From ab4dd18be4f4eaf56be5f1f776185b1a0482510e Mon Sep 17 00:00:00 2001 From: rembo10 Date: Fri, 14 Jan 2022 10:38:03 +0530 Subject: [PATCH] Initial python3 changes Mostly just updating libraries, removing string encoding/decoding, fixing some edge cases. No new functionality was added in this commit. --- Headphones.py | 4 + contrib/sni_test.py | 4 +- data/interfaces/default/config.html | 4 +- data/interfaces/default/history.html | 8 +- headphones/__init__.py | 4 +- headphones/api.py | 14 +- headphones/cache.py | 4 +- headphones/classes.py | 8 +- headphones/common.py | 13 +- headphones/config.py | 56 +- headphones/config_test.py | 6 +- headphones/crier.py | 2 +- headphones/cuesplit.py | 20 +- headphones/db.py | 14 +- headphones/deluge.py | 2 +- headphones/getXldProfile.py | 2 +- headphones/helpers.py | 175 +- headphones/helpers_test.py | 32 +- headphones/importer.py | 28 +- headphones/lastfm.py | 2 +- headphones/librarysync.py | 193 +- headphones/lock.py | 6 +- headphones/logger.py | 3 +- headphones/lyrics.py | 10 +- headphones/mb.py | 146 +- headphones/metacritic.py | 2 +- headphones/metadata.py | 35 +- headphones/metadata_test.py | 12 +- headphones/music_encoder.py | 54 +- headphones/notifiers.py | 131 +- headphones/nzbget.py | 32 +- headphones/pathrender.py | 24 +- headphones/pathrender_test.py | 42 +- headphones/postprocessor.py | 272 +- headphones/qbittorrent.py | 26 +- headphones/request.py | 4 +- headphones/rutracker.py | 16 +- headphones/sab.py | 15 +- headphones/searcher.py | 153 +- headphones/softchroot_test.py | 8 +- headphones/transmission.py | 10 +- headphones/unittestcompat.py | 2 +- headphones/utorrent.py | 32 +- headphones/versioncheck.py | 2 +- headphones/webserve.py | 113 +- headphones/webstart.py | 3 - lib/MultipartPostHandler.py | 16 +- lib/apscheduler/executors/asyncio.py | 2 +- lib/apscheduler/executors/gevent.py | 2 +- lib/apscheduler/executors/twisted.py | 2 +- lib/apscheduler/jobstores/memory.py | 2 +- lib/apscheduler/jobstores/mongodb.py | 4 +- lib/apscheduler/jobstores/redis.py | 4 +- lib/apscheduler/jobstores/sqlalchemy.py | 4 +- lib/apscheduler/schedulers/asyncio.py | 2 +- lib/apscheduler/schedulers/background.py | 2 +- lib/apscheduler/schedulers/base.py | 2 +- lib/apscheduler/schedulers/blocking.py | 2 +- lib/apscheduler/schedulers/gevent.py | 2 +- lib/apscheduler/schedulers/qt.py | 2 +- lib/apscheduler/schedulers/tornado.py | 2 +- lib/apscheduler/schedulers/twisted.py | 2 +- lib/apscheduler/util.py | 6 +- lib/beets/__init__.py | 33 +- lib/beets/__main__.py | 2 - lib/beets/art.py | 72 +- lib/beets/autotag/__init__.py | 127 +- lib/beets/autotag/hooks.py | 214 +- lib/beets/autotag/match.py | 55 +- lib/beets/autotag/mb.py | 233 +- lib/beets/config_default.yaml | 23 + lib/beets/dbcore/__init__.py | 2 - lib/beets/dbcore/db.py | 423 +- lib/beets/dbcore/query.py | 230 +- lib/beets/dbcore/queryparse.py | 70 +- lib/beets/dbcore/types.py | 70 +- lib/beets/importer.py | 379 +- lib/beets/library.py | 717 +- lib/beets/logging.py | 16 +- lib/beets/mediafile.py | 2055 +--- lib/beets/plugins.py | 346 +- lib/beets/random.py | 113 + lib/beets/ui/__init__.py | 386 +- lib/beets/ui/commands.py | 764 +- lib/beets/ui/completion_base.sh | 10 +- lib/beets/util/__init__.py | 310 +- lib/beets/util/artresizer.py | 380 +- lib/beets/util/bluelet.py | 28 +- lib/beets/util/confit.py | 1453 +-- lib/beets/util/enumeration.py | 2 - lib/beets/util/functemplate.py | 178 +- lib/beets/util/hidden.py | 2 - lib/beets/util/pipeline.py | 58 +- lib/beets/vfs.py | 2 - lib/beetsplug/__init__.py | 3 +- lib/beetsplug/absubmit.py | 196 + lib/beetsplug/acousticbrainz.py | 334 + lib/beetsplug/albumtypes.py | 65 + lib/beetsplug/aura.py | 984 ++ lib/beetsplug/badfiles.py | 215 + lib/beetsplug/bareasc.py | 82 + lib/beetsplug/beatport.py | 470 + lib/beetsplug/bench.py | 107 + lib/beetsplug/bpd/__init__.py | 1624 +++ lib/beetsplug/bpd/gstplayer.py | 304 + lib/beetsplug/bpm.py | 87 + lib/beetsplug/bpsync.py | 186 + lib/beetsplug/bucket.py | 242 + lib/beetsplug/chroma.py | 345 + lib/beetsplug/convert.py | 534 + lib/beetsplug/deezer.py | 230 + lib/beetsplug/discogs.py | 606 ++ lib/beetsplug/duplicates.py | 341 + lib/beetsplug/edit.py | 402 + lib/beetsplug/embedart.py | 341 +- lib/beetsplug/embyupdate.py | 205 + lib/beetsplug/export.py | 227 + lib/beetsplug/fetchart.py | 1321 ++- lib/beetsplug/filefilter.py | 79 + lib/beetsplug/fish.py | 285 + lib/beetsplug/freedesktop.py | 35 + lib/beetsplug/fromfilename.py | 162 + lib/beetsplug/ftintitle.py | 166 + lib/beetsplug/fuzzy.py | 46 + lib/beetsplug/gmusic.py | 25 + lib/beetsplug/hook.py | 115 + lib/beetsplug/ihate.py | 80 + lib/beetsplug/importadded.py | 132 + lib/beetsplug/importfeeds.py | 127 + lib/beetsplug/info.py | 229 + lib/beetsplug/inline.py | 126 + lib/beetsplug/ipfs.py | 295 + lib/beetsplug/keyfinder.py | 97 + lib/beetsplug/kodiupdate.py | 95 + lib/beetsplug/lastgenre/__init__.py | 486 + lib/beetsplug/lastgenre/genres-tree.yaml | 766 ++ lib/beetsplug/lastgenre/genres.txt | 1542 +++ lib/beetsplug/lastimport.py | 247 + lib/beetsplug/loadext.py | 44 + lib/beetsplug/lyrics.py | 1082 +- lib/beetsplug/mbcollection.py | 164 + lib/beetsplug/mbsubmit.py | 57 + lib/beetsplug/mbsync.py | 178 + lib/beetsplug/metasync/__init__.py | 138 + lib/beetsplug/metasync/amarok.py | 110 + lib/beetsplug/metasync/itunes.py | 125 + lib/beetsplug/missing.py | 226 + lib/beetsplug/mpdstats.py | 380 + lib/beetsplug/mpdupdate.py | 126 + lib/beetsplug/parentwork.py | 211 + lib/beetsplug/permissions.py | 121 + lib/beetsplug/play.py | 215 + lib/beetsplug/playlist.py | 185 + lib/beetsplug/plexupdate.py | 110 + lib/beetsplug/random.py | 57 + lib/beetsplug/replaygain.py | 1368 +++ lib/beetsplug/rewrite.py | 73 + lib/beetsplug/scrub.py | 149 + lib/beetsplug/smartplaylist.py | 224 + lib/beetsplug/sonosupdate.py | 46 + lib/beetsplug/spotify.py | 527 + lib/beetsplug/subsonicplaylist.py | 171 + lib/beetsplug/subsonicupdate.py | 144 + lib/beetsplug/the.py | 98 + lib/beetsplug/thumbnails.py | 291 + lib/beetsplug/types.py | 50 + lib/beetsplug/unimported.py | 68 + lib/beetsplug/web/__init__.py | 526 + lib/beetsplug/web/static/backbone.js | 1158 ++ lib/beetsplug/web/static/beets.css | 160 + lib/beetsplug/web/static/beets.js | 314 + lib/beetsplug/web/static/jquery.js | 9266 +++++++++++++++++ lib/beetsplug/web/static/underscore.js | 977 ++ lib/beetsplug/web/templates/index.html | 98 + lib/beetsplug/zero.py | 160 + lib/bencode.py | 131 - lib/bencode/__init__.py | 4 + lib/bencode/bencode.py | 270 + .../filters/__init__.py => bencode/py.typed} | 0 lib/bencode/torrent.py | 100 + lib/bencode/transform.py | 35 + lib/biplist/__init__.py | 30 +- lib/bs4/__init__.py | 592 +- lib/bs4/builder/__init__.py | 260 +- lib/bs4/builder/_html5lib.py | 236 +- lib/bs4/builder/_htmlparser.py | 298 +- lib/bs4/builder/_lxml.py | 174 +- lib/bs4/dammit.py | 2623 ++++- lib/bs4/diagnose.py | 105 +- lib/bs4/element.py | 1868 ++-- lib/bs4/formatter.py | 165 + lib/bs4/testing.py | 32 +- lib/certgen.py | 6 +- lib/certifi/__init__.py | 3 + lib/certifi/__main__.py | 12 + lib/{requests => certifi}/cacert.pem | 0 lib/certifi/core.py | 60 + lib/charset_normalizer/__init__.py | 56 + lib/charset_normalizer/api.py | 575 + lib/charset_normalizer/assets/__init__.py | 1244 +++ lib/charset_normalizer/cd.py | 340 + .../cli}/__init__.py | 0 lib/charset_normalizer/cli/normalizer.py | 290 + lib/charset_normalizer/constant.py | 500 + lib/charset_normalizer/legacy.py | 95 + lib/charset_normalizer/md.py | 559 + lib/charset_normalizer/models.py | 392 + .../py.typed} | 0 lib/charset_normalizer/utils.py | 342 + lib/charset_normalizer/version.py | 6 + lib/cheroot/__init__.py | 15 + lib/cheroot/__init__.pyi | 1 + lib/cheroot/__main__.py | 6 + lib/cheroot/_compat.py | 148 + lib/cheroot/cli.py | 247 + lib/cheroot/cli.pyi | 32 + lib/cheroot/connections.py | 397 + lib/cheroot/connections.pyi | 23 + lib/cheroot/errors.py | 88 + lib/cheroot/errors.pyi | 13 + lib/cheroot/makefile.py | 447 + lib/cheroot/makefile.pyi | 32 + lib/cheroot/py.typed | 0 lib/cheroot/server.py | 2204 ++++ lib/cheroot/server.pyi | 172 + lib/cheroot/ssl/__init__.py | 52 + lib/cheroot/ssl/__init__.pyi | 19 + lib/cheroot/ssl/builtin.py | 485 + lib/cheroot/ssl/builtin.pyi | 18 + lib/cheroot/ssl/pyopenssl.py | 382 + lib/cheroot/ssl/pyopenssl.pyi | 30 + lib/cheroot/test/__init__.py | 1 + lib/cheroot/test/_pytest_plugin.py | 50 + lib/cheroot/test/conftest.py | 71 + lib/cheroot/test/helper.py | 174 + lib/cheroot/test/test__compat.py | 66 + lib/cheroot/test/test_cli.py | 98 + lib/cheroot/test/test_conn.py | 1303 +++ lib/cheroot/test/test_core.py | 454 + lib/cheroot/test/test_dispatch.py | 55 + lib/cheroot/test/test_errors.py | 30 + lib/cheroot/test/test_makefile.py | 52 + lib/cheroot/test/test_server.py | 431 + lib/cheroot/test/test_ssl.py | 763 ++ lib/cheroot/test/test_wsgi.py | 83 + lib/cheroot/test/webtest.py | 613 ++ lib/cheroot/testing.py | 153 + lib/cheroot/testing.pyi | 17 + lib/cheroot/workers/__init__.py | 1 + lib/cheroot/workers/__init__.pyi | 0 lib/cheroot/workers/threadpool.py | 330 + lib/cheroot/workers/threadpool.pyi | 37 + lib/cheroot/wsgi.py | 435 + lib/cheroot/wsgi.pyi | 42 + lib/cherrypy/LICENSE.txt | 25 - lib/cherrypy/__init__.py | 446 +- lib/cherrypy/__main__.py | 5 + lib/cherrypy/_cpchecker.py | 191 +- lib/cherrypy/_cpcompat.py | 390 +- lib/cherrypy/_cpcompat_subprocess.py | 1544 --- lib/cherrypy/_cpconfig.py | 153 +- lib/cherrypy/_cpdispatch.py | 138 +- lib/cherrypy/_cperror.py | 212 +- lib/cherrypy/_cplogging.py | 212 +- lib/cherrypy/_cpmodpy.py | 85 +- lib/cherrypy/_cpnative_server.py | 78 +- lib/cherrypy/_cpreqbody.py | 212 +- lib/cherrypy/_cprequest.py | 431 +- lib/cherrypy/_cpserver.py | 135 +- lib/cherrypy/_cpthreadinglocal.py | 241 - lib/cherrypy/_cptools.py | 153 +- lib/cherrypy/_cptree.py | 131 +- lib/cherrypy/_cpwsgi.py | 207 +- lib/cherrypy/_cpwsgi_server.py | 92 +- lib/cherrypy/_helper.py | 348 + lib/cherrypy/_json.py | 25 + lib/cherrypy/{cherryd => daemon.py} | 67 +- lib/cherrypy/lib/__init__.py | 54 +- lib/cherrypy/lib/auth.py | 97 - lib/cherrypy/lib/auth_basic.py | 60 +- lib/cherrypy/lib/auth_digest.py | 211 +- lib/cherrypy/lib/caching.py | 58 +- lib/cherrypy/lib/covercp.py | 63 +- lib/cherrypy/lib/cpstats.py | 71 +- lib/cherrypy/lib/cptools.py | 143 +- lib/cherrypy/lib/encoding.py | 131 +- lib/cherrypy/lib/gctools.py | 63 +- lib/cherrypy/lib/http.py | 6 - lib/cherrypy/lib/httpauth.py | 371 - lib/cherrypy/lib/httputil.py | 282 +- lib/cherrypy/lib/jsontools.py | 21 +- lib/cherrypy/lib/lockfile.py | 147 - lib/cherrypy/lib/locking.py | 6 +- lib/cherrypy/lib/profiler.py | 89 +- lib/cherrypy/lib/reprconf.py | 262 +- lib/cherrypy/lib/sessions.py | 282 +- lib/cherrypy/lib/static.py | 148 +- lib/cherrypy/lib/xmlrpcutil.py | 37 +- lib/cherrypy/process/__init__.py | 7 +- lib/cherrypy/process/plugins.py | 275 +- lib/cherrypy/process/servers.py | 239 +- lib/cherrypy/process/win32.py | 29 +- lib/cherrypy/process/wspbus.py | 295 +- lib/cherrypy/scaffold/__init__.py | 36 +- lib/cherrypy/scaffold/apache-fcgi.conf | 2 +- lib/cherrypy/scaffold/example.conf | 2 +- lib/cherrypy/wsgiserver/__init__.py | 14 - lib/cherrypy/wsgiserver/ssl_builtin.py | 92 - lib/cherrypy/wsgiserver/ssl_pyopenssl.py | 253 - lib/cherrypy/wsgiserver/wsgiserver2.py | 2483 ----- lib/cherrypy/wsgiserver/wsgiserver3.py | 2176 ---- lib/concurrent/futures/_base.py | 2 +- lib/concurrent/futures/_compat.py | 2 +- lib/concurrent/futures/process.py | 4 +- lib/concurrent/futures/thread.py | 4 +- lib/configobj.py | 2468 ----- lib/confuse/__init__.py | 13 + lib/confuse/core.py | 724 ++ lib/confuse/exceptions.py | 56 + lib/confuse/sources.py | 184 + lib/confuse/templates.py | 741 ++ lib/confuse/util.py | 186 + lib/confuse/yaml_util.py | 228 + lib/enum/LICENSE | 32 - lib/enum/README | 2 - lib/enum/__init__.py | 790 -- lib/enum/doc/enum.rst | 725 -- lib/enum/enum.py | 790 -- lib/enum/test_enum.py | 1690 --- lib/feedparser.py | 3909 ------- lib/feedparser/__init__.py | 48 + lib/feedparser/api.py | 277 + lib/feedparser/datetimes/__init__.py | 70 + lib/feedparser/datetimes/asctime.py | 71 + lib/feedparser/datetimes/greek.py | 86 + lib/feedparser/datetimes/hungarian.py | 72 + lib/feedparser/datetimes/iso8601.py | 158 + lib/feedparser/datetimes/korean.py | 83 + lib/feedparser/datetimes/perforce.py | 46 + lib/feedparser/datetimes/rfc822.py | 150 + lib/feedparser/datetimes/w3dtf.py | 114 + lib/feedparser/encodings.py | 282 + lib/feedparser/exceptions.py | 55 + lib/feedparser/html.py | 355 + lib/feedparser/http.py | 227 + lib/feedparser/mixin.py | 784 ++ lib/feedparser/namespaces/__init__.py | 0 lib/feedparser/namespaces/_base.py | 503 + lib/feedparser/namespaces/admin.py | 53 + lib/feedparser/namespaces/cc.py | 69 + lib/feedparser/namespaces/dc.py | 134 + lib/feedparser/namespaces/georss.py | 276 + lib/feedparser/namespaces/itunes.py | 109 + lib/feedparser/namespaces/mediarss.py | 141 + lib/feedparser/namespaces/psc.py | 74 + lib/feedparser/parsers/__init__.py | 0 lib/feedparser/parsers/loose.py | 77 + lib/feedparser/parsers/strict.py | 135 + lib/feedparser/sanitizer.py | 950 ++ lib/feedparser/sgml.py | 98 + lib/feedparser/urls.py | 155 + lib/feedparser/util.py | 163 + lib/gntp/core.py | 10 +- lib/gntp/shim.py | 10 +- lib/html5lib/__init__.py | 25 - lib/html5lib/constants.py | 3102 ------ lib/html5lib/filters/_base.py | 12 - .../filters/alphabeticalattributes.py | 20 - lib/html5lib/filters/inject_meta_charset.py | 65 - lib/html5lib/filters/lint.py | 90 - lib/html5lib/filters/optionaltags.py | 205 - lib/html5lib/filters/sanitizer.py | 12 - lib/html5lib/filters/whitespace.py | 38 - lib/html5lib/html5parser.py | 2724 ----- lib/html5lib/ihatexml.py | 285 - lib/html5lib/inputstream.py | 903 -- lib/html5lib/sanitizer.py | 296 - lib/html5lib/serializer/__init__.py | 16 - lib/html5lib/serializer/htmlserializer.py | 317 - lib/html5lib/tokenizer.py | 1731 --- lib/html5lib/treeadapters/sax.py | 44 - lib/html5lib/treebuilders/__init__.py | 76 - lib/html5lib/treebuilders/_base.py | 377 - lib/html5lib/treebuilders/dom.py | 227 - lib/html5lib/treebuilders/etree.py | 337 - lib/html5lib/treebuilders/etree_lxml.py | 369 - lib/html5lib/treewalkers/__init__.py | 147 - lib/html5lib/treewalkers/_base.py | 200 - lib/html5lib/treewalkers/dom.py | 43 - lib/html5lib/treewalkers/etree.py | 136 - lib/html5lib/treewalkers/genshistream.py | 69 - lib/html5lib/treewalkers/lxmletree.py | 201 - lib/html5lib/treewalkers/pulldom.py | 63 - lib/html5lib/trie/__init__.py | 12 - lib/html5lib/trie/_base.py | 37 - lib/html5lib/trie/datrie.py | 44 - lib/html5lib/trie/py.py | 67 - lib/html5lib/utils.py | 103 - lib/httplib2/__init__.py | 1215 --- lib/httplib2/iri2uri.py | 110 - lib/idna/__init__.py | 44 + lib/idna/codec.py | 112 + lib/idna/compat.py | 13 + lib/idna/core.py | 397 + lib/idna/idnadata.py | 2137 ++++ lib/idna/intranges.py | 54 + lib/idna/package_data.py | 2 + lib/idna/py.typed | 0 lib/idna/uts46data.py | 8512 +++++++++++++++ lib/jaraco/classes/__init__.py | 0 lib/jaraco/classes/ancestry.py | 68 + lib/jaraco/classes/meta.py | 66 + lib/jaraco/classes/properties.py | 171 + lib/jaraco/collections.py | 1056 ++ lib/jaraco/functools.py | 525 + lib/jaraco/text/Lorem ipsum.txt | 2 + lib/jaraco/text/__init__.py | 529 + lib/jellyfish/_jellyfish.py | 2 +- lib/jellyfish/test.py | 32 +- lib/logutils/__init__.py | 2 +- lib/logutils/colorize.py | 6 +- lib/logutils/dictconfig.py | 40 +- lib/logutils/queue.py | 4 +- lib/logutils/redis.py | 6 +- lib/mako/LICENSE | 20 - lib/mako/README.rst | 52 - lib/mako/__init__.py | 4 +- lib/mako/_ast_util.py | 519 +- lib/mako/ast.py | 83 +- lib/mako/cache.py | 34 +- lib/mako/cmd.py | 77 +- lib/mako/codegen.py | 839 +- lib/mako/compat.py | 175 +- lib/mako/exceptions.py | 127 +- lib/mako/ext/autohandler.py | 47 +- lib/mako/ext/babelplugin.py | 31 +- lib/mako/ext/beaker_cache.py | 39 +- lib/mako/ext/extract.py | 58 +- lib/mako/ext/linguaplugin.py | 67 +- lib/mako/ext/preprocessors.py | 12 +- lib/mako/ext/pygmentplugin.py | 175 +- lib/mako/ext/turbogears.py | 25 +- lib/mako/filters.py | 102 +- lib/mako/lexer.py | 301 +- lib/mako/lookup.py | 183 +- lib/mako/parsetree.py | 485 +- lib/mako/pygen.py | 62 +- lib/mako/pyparser.py | 84 +- lib/mako/runtime.py | 254 +- lib/mako/template.py | 432 +- lib/mako/util.py | 126 +- lib/mediafile.py | 2370 +++++ lib/more_itertools/__init__.py | 4 + lib/more_itertools/__init__.pyi | 2 + lib/more_itertools/more.py | 4317 ++++++++ lib/more_itertools/more.pyi | 664 ++ lib/more_itertools/py.typed | 0 lib/more_itertools/recipes.py | 698 ++ lib/more_itertools/recipes.pyi | 112 + lib/munkres.py | 4 +- lib/musicbrainzngs/__init__.py | 0 lib/musicbrainzngs/caa.py | 45 +- lib/musicbrainzngs/compat.py | 4 +- lib/musicbrainzngs/mbxml.py | 42 +- lib/musicbrainzngs/musicbrainz.py | 174 +- lib/musicbrainzngs/util.py | 0 lib/mutagen/_compat.py | 26 +- lib/mutagen/_constants.py | 384 +- lib/mutagen/_file.py | 4 +- lib/mutagen/_senf/_compat.py | 15 +- lib/mutagen/_senf/_environ.py | 8 +- lib/mutagen/_senf/_fsnative.py | 24 +- lib/mutagen/_senf/_print.py | 8 +- lib/mutagen/_senf/_stdlib.py | 2 +- lib/mutagen/_senf/_winansi.py | 2 +- lib/mutagen/_tools/mid3cp.py | 14 +- lib/mutagen/_tools/mid3iconv.py | 12 +- lib/mutagen/_tools/mid3v2.py | 54 +- lib/mutagen/_tools/moggsplit.py | 2 +- lib/mutagen/_tools/mutagen_inspect.py | 8 +- lib/mutagen/_tools/mutagen_pony.py | 4 +- lib/mutagen/_util.py | 42 +- lib/mutagen/_vorbis.py | 12 +- lib/mutagen/aac.py | 10 +- lib/mutagen/aiff.py | 26 +- lib/mutagen/apev2.py | 38 +- lib/mutagen/asf/__init__.py | 18 +- lib/mutagen/asf/_objects.py | 36 +- lib/mutagen/asf/_util.py | 522 +- lib/mutagen/dsf.py | 12 +- lib/mutagen/easyid3.py | 50 +- lib/mutagen/easymp4.py | 22 +- lib/mutagen/flac.py | 10 +- lib/mutagen/id3/_file.py | 4 +- lib/mutagen/id3/_frames.py | 58 +- lib/mutagen/id3/_id3v1.py | 8 +- lib/mutagen/id3/_specs.py | 24 +- lib/mutagen/id3/_tags.py | 8 +- lib/mutagen/m4a.py | 4 +- lib/mutagen/monkeysaudio.py | 2 +- lib/mutagen/mp3/__init__.py | 20 +- lib/mutagen/mp3/_util.py | 82 +- lib/mutagen/mp4/__init__.py | 18 +- lib/mutagen/mp4/_as_entry.py | 6 +- lib/mutagen/mp4/_atom.py | 2 +- lib/mutagen/musepack.py | 8 +- lib/mutagen/ogg.py | 6 +- lib/mutagen/oggflac.py | 2 +- lib/mutagen/oggopus.py | 2 +- lib/mutagen/oggspeex.py | 2 +- lib/mutagen/oggtheora.py | 2 +- lib/mutagen/oggvorbis.py | 2 +- lib/mutagen/optimfrog.py | 2 +- lib/mutagen/smf.py | 8 +- lib/mutagen/trueaudio.py | 2 +- lib/mutagen/wavpack.py | 2 +- lib/oauth2/__init__.py | 704 -- lib/oauthlib/__init__.py | 34 + lib/oauthlib/common.py | 434 + lib/oauthlib/oauth1/__init__.py | 28 + lib/oauthlib/oauth1/rfc5849/__init__.py | 365 + .../oauth1/rfc5849/endpoints/__init__.py | 8 + .../oauth1/rfc5849/endpoints/access_token.py | 215 + .../oauth1/rfc5849/endpoints/authorization.py | 158 + lib/oauthlib/oauth1/rfc5849/endpoints/base.py | 245 + .../rfc5849/endpoints/pre_configured.py | 14 + .../oauth1/rfc5849/endpoints/request_token.py | 209 + .../oauth1/rfc5849/endpoints/resource.py | 163 + .../rfc5849/endpoints/signature_only.py | 82 + lib/oauthlib/oauth1/rfc5849/errors.py | 76 + lib/oauthlib/oauth1/rfc5849/parameters.py | 133 + .../oauth1/rfc5849/request_validator.py | 849 ++ lib/oauthlib/oauth1/rfc5849/signature.py | 843 ++ lib/oauthlib/oauth1/rfc5849/utils.py | 83 + lib/oauthlib/oauth2/__init__.py | 35 + lib/oauthlib/oauth2/rfc6749/__init__.py | 16 + .../oauth2/rfc6749/clients/__init__.py | 14 + .../rfc6749/clients/backend_application.py | 74 + lib/oauthlib/oauth2/rfc6749/clients/base.py | 522 + .../rfc6749/clients/legacy_application.py | 84 + .../rfc6749/clients/mobile_application.py | 174 + .../rfc6749/clients/service_application.py | 189 + .../oauth2/rfc6749/clients/web_application.py | 205 + .../oauth2/rfc6749/endpoints/__init__.py | 17 + .../oauth2/rfc6749/endpoints/authorization.py | 114 + lib/oauthlib/oauth2/rfc6749/endpoints/base.py | 113 + .../oauth2/rfc6749/endpoints/introspect.py | 122 + .../oauth2/rfc6749/endpoints/metadata.py | 237 + .../rfc6749/endpoints/pre_configured.py | 216 + .../oauth2/rfc6749/endpoints/resource.py | 84 + .../oauth2/rfc6749/endpoints/revocation.py | 126 + .../oauth2/rfc6749/endpoints/token.py | 119 + lib/oauthlib/oauth2/rfc6749/errors.py | 403 + .../oauth2/rfc6749/grant_types/__init__.py | 11 + .../rfc6749/grant_types/authorization_code.py | 545 + .../oauth2/rfc6749/grant_types/base.py | 250 + .../rfc6749/grant_types/client_credentials.py | 123 + .../oauth2/rfc6749/grant_types/implicit.py | 376 + .../rfc6749/grant_types/refresh_token.py | 135 + .../resource_owner_password_credentials.py | 199 + lib/oauthlib/oauth2/rfc6749/parameters.py | 455 + .../oauth2/rfc6749/request_validator.py | 651 ++ lib/oauthlib/oauth2/rfc6749/tokens.py | 355 + lib/oauthlib/oauth2/rfc6749/utils.py | 83 + lib/oauthlib/openid/__init__.py | 7 + lib/oauthlib/openid/connect/__init__.py | 0 lib/oauthlib/openid/connect/core/__init__.py | 0 .../openid/connect/core/endpoints/__init__.py | 9 + .../connect/core/endpoints/pre_configured.py | 97 + .../openid/connect/core/endpoints/userinfo.py | 99 + .../openid/connect/core/exceptions.py | 149 + .../connect/core/grant_types/__init__.py | 12 + .../core/grant_types/authorization_code.py | 43 + .../openid/connect/core/grant_types/base.py | 327 + .../connect/core/grant_types/dispatchers.py | 101 + .../openid/connect/core/grant_types/hybrid.py | 63 + .../connect/core/grant_types/implicit.py | 51 + .../openid/connect/core/request_validator.py | 308 + lib/oauthlib/openid/connect/core/tokens.py | 46 + lib/oauthlib/signals.py | 40 + lib/oauthlib/uri_validate.py | 190 + lib/ordereddict.py | 8 +- lib/osxnotify/registerapp.py | 2 +- .../__init__.py} | 2058 ++-- lib/pkg_resources/_vendor/__init__.py | 0 lib/pkg_resources/_vendor/appdirs.py | 608 ++ lib/pkg_resources/_vendor/packaging/LICENSE | 3 + .../_vendor/packaging/LICENSE.APACHE | 177 + .../_vendor/packaging/LICENSE.BSD | 23 + .../_vendor/packaging/__about__.py | 26 + .../_vendor/packaging/__init__.py | 25 + .../_vendor/packaging/_manylinux.py | 301 + .../_vendor/packaging/_musllinux.py | 136 + .../_vendor/packaging/_structures.py | 67 + .../_vendor/packaging/markers.py | 304 + lib/pkg_resources/_vendor/packaging/py.typed | 0 .../_vendor/packaging/requirements.py | 146 + .../_vendor/packaging/specifiers.py | 828 ++ lib/pkg_resources/_vendor/packaging/tags.py | 484 + lib/pkg_resources/_vendor/packaging/utils.py | 136 + .../_vendor/packaging/version.py | 504 + .../_vendor/pyparsing.LICENSE.txt | 18 + lib/pkg_resources/_vendor/pyparsing.py | 5742 ++++++++++ lib/pkg_resources/_vendor/vendored.txt | 3 + lib/pkg_resources/api_tests.txt | 401 + lib/pkg_resources/extern/__init__.py | 73 + lib/pkg_resources/tests/__init__.py | 0 .../data/my-test-package-source/setup.cfg | 0 .../data/my-test-package-source/setup.py | 6 + .../my-test-package-zip/my-test-package.zip | Bin 0 -> 1809 bytes .../EGG-INFO/PKG-INFO | 10 + .../EGG-INFO/SOURCES.txt | 7 + .../EGG-INFO/dependency_links.txt | 1 + .../EGG-INFO/top_level.txt | 1 + .../EGG-INFO/zip-safe | 1 + .../my_test_package-1.0-py3.7.egg | Bin 0 -> 843 bytes .../tests/test_find_distributions.py | 43 + lib/pkg_resources/tests/test_markers.py | 8 + lib/pkg_resources/tests/test_pkg_resources.py | 415 + lib/pkg_resources/tests/test_resources.py | 884 ++ lib/pkg_resources/tests/test_working_set.py | 482 + lib/portend.py | 240 + lib/pygazelle/api.py | 20 +- lib/pygazelle/artist.py | 2 +- lib/pygazelle/torrent.py | 2 +- lib/pygazelle/torrent_group.py | 10 +- lib/pygazelle/user.py | 4 +- lib/pynma/pynma.py | 4 +- lib/pythontwitter/__init__.py | 4651 --------- lib/pytz/__init__.py | 4 +- lib/pytz/lazy.py | 2 +- lib/pytz/tzfile.py | 2 +- lib/pytz/tzinfo.py | 2 +- lib/qbittorrentv2/client.py | 4 +- lib/requests/__init__.py | 127 +- lib/requests/__version__.py | 14 + lib/requests/_internal_utils.py | 42 + lib/requests/adapters.py | 194 +- lib/requests/api.py | 76 +- lib/requests/auth.py | 161 +- lib/requests/certs.py | 17 +- lib/requests/compat.py | 35 +- lib/requests/cookies.py | 181 +- lib/requests/exceptions.py | 56 +- lib/requests/help.py | 135 + lib/requests/hooks.py | 19 +- lib/requests/models.py | 321 +- lib/requests/packages.py | 26 + lib/requests/packages/README.rst | 8 - lib/requests/packages/__init__.py | 3 - lib/requests/packages/chardet/__init__.py | 32 - lib/requests/packages/chardet/big5freq.py | 925 -- lib/requests/packages/chardet/big5prober.py | 42 - lib/requests/packages/chardet/chardetect.py | 80 - .../packages/chardet/chardistribution.py | 231 - .../packages/chardet/charsetgroupprober.py | 106 - .../packages/chardet/charsetprober.py | 62 - .../packages/chardet/codingstatemachine.py | 61 - lib/requests/packages/chardet/compat.py | 34 - lib/requests/packages/chardet/constants.py | 39 - lib/requests/packages/chardet/cp949prober.py | 44 - lib/requests/packages/chardet/escprober.py | 86 - lib/requests/packages/chardet/escsm.py | 242 - lib/requests/packages/chardet/eucjpprober.py | 90 - lib/requests/packages/chardet/euckrfreq.py | 596 -- lib/requests/packages/chardet/euckrprober.py | 42 - lib/requests/packages/chardet/euctwfreq.py | 428 - lib/requests/packages/chardet/euctwprober.py | 41 - lib/requests/packages/chardet/gb2312freq.py | 472 - lib/requests/packages/chardet/gb2312prober.py | 41 - lib/requests/packages/chardet/hebrewprober.py | 283 - lib/requests/packages/chardet/jisfreq.py | 569 - lib/requests/packages/chardet/jpcntx.py | 227 - .../packages/chardet/langbulgarianmodel.py | 229 - .../packages/chardet/langcyrillicmodel.py | 329 - .../packages/chardet/langgreekmodel.py | 225 - .../packages/chardet/langhebrewmodel.py | 201 - .../packages/chardet/langhungarianmodel.py | 225 - .../packages/chardet/langthaimodel.py | 200 - lib/requests/packages/chardet/latin1prober.py | 139 - .../packages/chardet/mbcharsetprober.py | 86 - .../packages/chardet/mbcsgroupprober.py | 54 - lib/requests/packages/chardet/mbcssm.py | 572 - .../packages/chardet/sbcharsetprober.py | 120 - .../packages/chardet/sbcsgroupprober.py | 69 - lib/requests/packages/chardet/sjisprober.py | 91 - .../packages/chardet/universaldetector.py | 170 - lib/requests/packages/chardet/utf8prober.py | 76 - lib/requests/packages/urllib3/__init__.py | 71 - lib/requests/packages/urllib3/connection.py | 277 - .../packages/urllib3/contrib/appengine.py | 222 - .../packages/urllib3/contrib/ntlmpool.py | 114 - .../packages/urllib3/contrib/pyopenssl.py | 309 - lib/requests/packages/urllib3/exceptions.py | 193 - lib/requests/packages/urllib3/fields.py | 177 - .../packages/urllib3/packages/__init__.py | 4 - .../packages/urllib3/packages/ordered_dict.py | 259 - lib/requests/packages/urllib3/packages/six.py | 385 - .../packages/ssl_match_hostname/__init__.py | 13 - .../ssl_match_hostname/_implementation.py | 105 - lib/requests/packages/urllib3/poolmanager.py | 280 - lib/requests/packages/urllib3/response.py | 485 - .../packages/urllib3/util/__init__.py | 24 - .../packages/urllib3/util/connection.py | 100 - lib/requests/packages/urllib3/util/request.py | 71 - .../packages/urllib3/util/response.py | 73 - lib/requests/packages/urllib3/util/retry.py | 285 - lib/requests/packages/urllib3/util/ssl_.py | 286 - lib/requests/packages/urllib3/util/url.py | 214 - lib/requests/sessions.py | 455 +- lib/requests/status_codes.py | 46 +- lib/requests/structures.py | 19 +- lib/requests/utils.py | 600 +- lib/requests_oauthlib/__init__.py | 19 + .../compliance_fixes/__init__.py | 10 + .../compliance_fixes/douban.py | 17 + .../compliance_fixes/facebook.py | 33 + .../compliance_fixes/fitbit.py | 25 + .../compliance_fixes/instagram.py | 26 + .../compliance_fixes/linkedin.py | 21 + .../compliance_fixes/mailchimp.py | 23 + .../compliance_fixes/plentymarkets.py | 29 + .../compliance_fixes/slack.py | 37 + .../compliance_fixes/weibo.py | 15 + lib/requests_oauthlib/oauth1_auth.py | 117 + lib/requests_oauthlib/oauth1_session.py | 400 + lib/requests_oauthlib/oauth2_auth.py | 37 + lib/requests_oauthlib/oauth2_session.py | 534 + lib/sgmllib.py | 547 + lib/six.py | 188 +- lib/soupsieve/__init__.py | 166 + lib/soupsieve/__meta__.py | 196 + lib/soupsieve/css_match.py | 1584 +++ lib/soupsieve/css_parser.py | 1310 +++ lib/soupsieve/css_types.py | 407 + lib/soupsieve/pretty.py | 137 + lib/soupsieve/py.typed | 0 lib/soupsieve/util.py | 116 + lib/tempora/__init__.py | 682 ++ lib/tempora/schedule.py | 227 + lib/tempora/tests/test_schedule.py | 149 + lib/tempora/tests/test_timing.py | 50 + lib/tempora/timing.py | 266 + lib/tempora/utc.py | 36 + lib/twitter/__init__.py | 55 + lib/twitter/_file_cache.py | 102 + lib/twitter/api.py | 5035 +++++++++ lib/twitter/error.py | 25 + lib/twitter/models.py | 531 + lib/twitter/parse_tweet.py | 100 + lib/twitter/ratelimit.py | 191 + lib/twitter/twitter_utils.py | 321 + lib/tzlocal/darwin.py | 2 +- lib/tzlocal/unix.py | 2 +- lib/tzlocal/win32.py | 2 +- lib/unidecode/__init__.py | 6 +- lib/unittestcompat.py | 2 +- lib/urllib3/__init__.py | 85 + .../packages => }/urllib3/_collections.py | 98 +- lib/urllib3/_version.py | 2 + lib/urllib3/connection.py | 569 + .../packages => }/urllib3/connectionpool.py | 736 +- lib/urllib3/contrib/__init__.py | 0 lib/urllib3/contrib/_appengine_environ.py | 36 + .../contrib/_securetransport/__init__.py | 0 .../contrib/_securetransport/bindings.py | 519 + .../contrib/_securetransport/low_level.py | 397 + lib/urllib3/contrib/appengine.py | 314 + lib/urllib3/contrib/ntlmpool.py | 130 + lib/urllib3/contrib/pyopenssl.py | 511 + lib/urllib3/contrib/securetransport.py | 922 ++ lib/urllib3/contrib/socks.py | 216 + lib/urllib3/exceptions.py | 323 + lib/urllib3/fields.py | 274 + .../packages => }/urllib3/filepost.py | 27 +- lib/urllib3/packages/__init__.py | 0 lib/urllib3/packages/backports/__init__.py | 0 lib/urllib3/packages/backports/makefile.py | 51 + lib/urllib3/packages/six.py | 1077 ++ lib/urllib3/poolmanager.py | 536 + .../packages => }/urllib3/request.py | 101 +- lib/urllib3/response.py | 821 ++ lib/urllib3/util/__init__.py | 49 + lib/urllib3/util/connection.py | 149 + lib/urllib3/util/proxy.py | 57 + lib/urllib3/util/queue.py | 22 + lib/urllib3/util/request.py | 143 + lib/urllib3/util/response.py | 107 + lib/urllib3/util/retry.py | 620 ++ lib/urllib3/util/ssl_.py | 495 + lib/urllib3/util/ssl_match_hostname.py | 161 + lib/urllib3/util/ssltransport.py | 221 + .../packages => }/urllib3/util/timeout.py | 150 +- lib/urllib3/util/url.py | 432 + lib/urllib3/util/wait.py | 153 + lib/yaml/__init__.py | 155 +- lib/yaml/composer.py | 22 +- lib/yaml/constructor.py | 347 +- lib/yaml/cyaml.py | 48 +- lib/yaml/dumper.py | 26 +- lib/yaml/emitter.py | 319 +- lib/yaml/error.py | 8 +- lib/yaml/loader.py | 37 +- lib/yaml/parser.py | 34 +- lib/yaml/reader.py | 55 +- lib/yaml/representer.py | 243 +- lib/yaml/resolver.py | 87 +- lib/yaml/scanner.py | 458 +- lib/yaml/serializer.py | 10 +- lib/zc/__init__.py | 1 + lib/zc/lockfile/README.txt | 70 + lib/zc/lockfile/__init__.py | 125 + lib/zc/lockfile/tests.py | 201 + 813 files changed, 146338 insertions(+), 65753 deletions(-) mode change 100755 => 100644 lib/beets/__init__.py mode change 100755 => 100644 lib/beets/__main__.py mode change 100755 => 100644 lib/beets/art.py mode change 100755 => 100644 lib/beets/autotag/__init__.py mode change 100755 => 100644 lib/beets/autotag/hooks.py mode change 100755 => 100644 lib/beets/autotag/match.py mode change 100755 => 100644 lib/beets/autotag/mb.py mode change 100755 => 100644 lib/beets/config_default.yaml mode change 100755 => 100644 lib/beets/dbcore/__init__.py mode change 100755 => 100644 lib/beets/dbcore/query.py mode change 100755 => 100644 lib/beets/dbcore/queryparse.py mode change 100755 => 100644 lib/beets/dbcore/types.py mode change 100755 => 100644 lib/beets/importer.py mode change 100755 => 100644 lib/beets/library.py mode change 100755 => 100644 lib/beets/logging.py mode change 100755 => 100644 lib/beets/mediafile.py mode change 100755 => 100644 lib/beets/plugins.py create mode 100644 lib/beets/random.py mode change 100755 => 100644 lib/beets/ui/__init__.py mode change 100755 => 100644 lib/beets/ui/completion_base.sh mode change 100755 => 100644 lib/beets/util/__init__.py mode change 100755 => 100644 lib/beets/util/artresizer.py mode change 100755 => 100644 lib/beets/util/bluelet.py mode change 100755 => 100644 lib/beets/util/confit.py mode change 100755 => 100644 lib/beets/util/enumeration.py mode change 100755 => 100644 lib/beets/util/functemplate.py mode change 100755 => 100644 lib/beets/util/hidden.py mode change 100755 => 100644 lib/beets/util/pipeline.py mode change 100755 => 100644 lib/beets/vfs.py create mode 100644 lib/beetsplug/absubmit.py create mode 100644 lib/beetsplug/acousticbrainz.py create mode 100644 lib/beetsplug/albumtypes.py create mode 100644 lib/beetsplug/aura.py create mode 100644 lib/beetsplug/badfiles.py create mode 100644 lib/beetsplug/bareasc.py create mode 100644 lib/beetsplug/beatport.py create mode 100644 lib/beetsplug/bench.py create mode 100644 lib/beetsplug/bpd/__init__.py create mode 100644 lib/beetsplug/bpd/gstplayer.py create mode 100644 lib/beetsplug/bpm.py create mode 100644 lib/beetsplug/bpsync.py create mode 100644 lib/beetsplug/bucket.py create mode 100644 lib/beetsplug/chroma.py create mode 100644 lib/beetsplug/convert.py create mode 100644 lib/beetsplug/deezer.py create mode 100644 lib/beetsplug/discogs.py create mode 100644 lib/beetsplug/duplicates.py create mode 100644 lib/beetsplug/edit.py create mode 100644 lib/beetsplug/embyupdate.py create mode 100644 lib/beetsplug/export.py create mode 100644 lib/beetsplug/filefilter.py create mode 100644 lib/beetsplug/fish.py create mode 100644 lib/beetsplug/freedesktop.py create mode 100644 lib/beetsplug/fromfilename.py create mode 100644 lib/beetsplug/ftintitle.py create mode 100644 lib/beetsplug/fuzzy.py create mode 100644 lib/beetsplug/gmusic.py create mode 100644 lib/beetsplug/hook.py create mode 100644 lib/beetsplug/ihate.py create mode 100644 lib/beetsplug/importadded.py create mode 100644 lib/beetsplug/importfeeds.py create mode 100644 lib/beetsplug/info.py create mode 100644 lib/beetsplug/inline.py create mode 100644 lib/beetsplug/ipfs.py create mode 100644 lib/beetsplug/keyfinder.py create mode 100644 lib/beetsplug/kodiupdate.py create mode 100644 lib/beetsplug/lastgenre/__init__.py create mode 100644 lib/beetsplug/lastgenre/genres-tree.yaml create mode 100644 lib/beetsplug/lastgenre/genres.txt create mode 100644 lib/beetsplug/lastimport.py create mode 100644 lib/beetsplug/loadext.py create mode 100644 lib/beetsplug/mbcollection.py create mode 100644 lib/beetsplug/mbsubmit.py create mode 100644 lib/beetsplug/mbsync.py create mode 100644 lib/beetsplug/metasync/__init__.py create mode 100644 lib/beetsplug/metasync/amarok.py create mode 100644 lib/beetsplug/metasync/itunes.py create mode 100644 lib/beetsplug/missing.py create mode 100644 lib/beetsplug/mpdstats.py create mode 100644 lib/beetsplug/mpdupdate.py create mode 100644 lib/beetsplug/parentwork.py create mode 100644 lib/beetsplug/permissions.py create mode 100644 lib/beetsplug/play.py create mode 100644 lib/beetsplug/playlist.py create mode 100644 lib/beetsplug/plexupdate.py create mode 100644 lib/beetsplug/random.py create mode 100644 lib/beetsplug/replaygain.py create mode 100644 lib/beetsplug/rewrite.py create mode 100644 lib/beetsplug/scrub.py create mode 100644 lib/beetsplug/smartplaylist.py create mode 100644 lib/beetsplug/sonosupdate.py create mode 100644 lib/beetsplug/spotify.py create mode 100644 lib/beetsplug/subsonicplaylist.py create mode 100644 lib/beetsplug/subsonicupdate.py create mode 100644 lib/beetsplug/the.py create mode 100644 lib/beetsplug/thumbnails.py create mode 100644 lib/beetsplug/types.py create mode 100644 lib/beetsplug/unimported.py create mode 100644 lib/beetsplug/web/__init__.py create mode 100644 lib/beetsplug/web/static/backbone.js create mode 100644 lib/beetsplug/web/static/beets.css create mode 100644 lib/beetsplug/web/static/beets.js create mode 100644 lib/beetsplug/web/static/jquery.js create mode 100644 lib/beetsplug/web/static/underscore.js create mode 100644 lib/beetsplug/web/templates/index.html create mode 100644 lib/beetsplug/zero.py delete mode 100644 lib/bencode.py create mode 100644 lib/bencode/__init__.py create mode 100644 lib/bencode/bencode.py rename lib/{html5lib/filters/__init__.py => bencode/py.typed} (100%) create mode 100644 lib/bencode/torrent.py create mode 100644 lib/bencode/transform.py create mode 100644 lib/bs4/formatter.py create mode 100644 lib/certifi/__init__.py create mode 100644 lib/certifi/__main__.py rename lib/{requests => certifi}/cacert.pem (100%) mode change 100755 => 100644 create mode 100644 lib/certifi/core.py create mode 100644 lib/charset_normalizer/__init__.py create mode 100644 lib/charset_normalizer/api.py create mode 100644 lib/charset_normalizer/assets/__init__.py create mode 100644 lib/charset_normalizer/cd.py rename lib/{html5lib/treeadapters => charset_normalizer/cli}/__init__.py (100%) create mode 100644 lib/charset_normalizer/cli/normalizer.py create mode 100644 lib/charset_normalizer/constant.py create mode 100644 lib/charset_normalizer/legacy.py create mode 100644 lib/charset_normalizer/md.py create mode 100644 lib/charset_normalizer/models.py rename lib/{requests/packages/urllib3/contrib/__init__.py => charset_normalizer/py.typed} (100%) mode change 100755 => 100644 create mode 100644 lib/charset_normalizer/utils.py create mode 100644 lib/charset_normalizer/version.py create mode 100644 lib/cheroot/__init__.py create mode 100644 lib/cheroot/__init__.pyi create mode 100644 lib/cheroot/__main__.py create mode 100644 lib/cheroot/_compat.py create mode 100644 lib/cheroot/cli.py create mode 100644 lib/cheroot/cli.pyi create mode 100644 lib/cheroot/connections.py create mode 100644 lib/cheroot/connections.pyi create mode 100644 lib/cheroot/errors.py create mode 100644 lib/cheroot/errors.pyi create mode 100644 lib/cheroot/makefile.py create mode 100644 lib/cheroot/makefile.pyi create mode 100644 lib/cheroot/py.typed create mode 100644 lib/cheroot/server.py create mode 100644 lib/cheroot/server.pyi create mode 100644 lib/cheroot/ssl/__init__.py create mode 100644 lib/cheroot/ssl/__init__.pyi create mode 100644 lib/cheroot/ssl/builtin.py create mode 100644 lib/cheroot/ssl/builtin.pyi create mode 100644 lib/cheroot/ssl/pyopenssl.py create mode 100644 lib/cheroot/ssl/pyopenssl.pyi create mode 100644 lib/cheroot/test/__init__.py create mode 100644 lib/cheroot/test/_pytest_plugin.py create mode 100644 lib/cheroot/test/conftest.py create mode 100644 lib/cheroot/test/helper.py create mode 100644 lib/cheroot/test/test__compat.py create mode 100644 lib/cheroot/test/test_cli.py create mode 100644 lib/cheroot/test/test_conn.py create mode 100644 lib/cheroot/test/test_core.py create mode 100644 lib/cheroot/test/test_dispatch.py create mode 100644 lib/cheroot/test/test_errors.py create mode 100644 lib/cheroot/test/test_makefile.py create mode 100644 lib/cheroot/test/test_server.py create mode 100644 lib/cheroot/test/test_ssl.py create mode 100644 lib/cheroot/test/test_wsgi.py create mode 100644 lib/cheroot/test/webtest.py create mode 100644 lib/cheroot/testing.py create mode 100644 lib/cheroot/testing.pyi create mode 100644 lib/cheroot/workers/__init__.py create mode 100644 lib/cheroot/workers/__init__.pyi create mode 100644 lib/cheroot/workers/threadpool.py create mode 100644 lib/cheroot/workers/threadpool.pyi create mode 100644 lib/cheroot/wsgi.py create mode 100644 lib/cheroot/wsgi.pyi delete mode 100644 lib/cherrypy/LICENSE.txt create mode 100755 lib/cherrypy/__main__.py delete mode 100644 lib/cherrypy/_cpcompat_subprocess.py delete mode 100644 lib/cherrypy/_cpthreadinglocal.py create mode 100644 lib/cherrypy/_helper.py create mode 100644 lib/cherrypy/_json.py rename lib/cherrypy/{cherryd => daemon.py} (55%) mode change 100644 => 100755 delete mode 100644 lib/cherrypy/lib/auth.py delete mode 100644 lib/cherrypy/lib/http.py delete mode 100644 lib/cherrypy/lib/httpauth.py delete mode 100644 lib/cherrypy/lib/lockfile.py delete mode 100644 lib/cherrypy/wsgiserver/__init__.py delete mode 100644 lib/cherrypy/wsgiserver/ssl_builtin.py delete mode 100644 lib/cherrypy/wsgiserver/ssl_pyopenssl.py delete mode 100644 lib/cherrypy/wsgiserver/wsgiserver2.py delete mode 100644 lib/cherrypy/wsgiserver/wsgiserver3.py delete mode 100644 lib/configobj.py create mode 100644 lib/confuse/__init__.py create mode 100644 lib/confuse/core.py create mode 100644 lib/confuse/exceptions.py create mode 100644 lib/confuse/sources.py create mode 100644 lib/confuse/templates.py create mode 100644 lib/confuse/util.py create mode 100644 lib/confuse/yaml_util.py delete mode 100644 lib/enum/LICENSE delete mode 100644 lib/enum/README delete mode 100644 lib/enum/__init__.py delete mode 100644 lib/enum/doc/enum.rst delete mode 100644 lib/enum/enum.py delete mode 100644 lib/enum/test_enum.py delete mode 100644 lib/feedparser.py create mode 100644 lib/feedparser/__init__.py create mode 100644 lib/feedparser/api.py create mode 100644 lib/feedparser/datetimes/__init__.py create mode 100644 lib/feedparser/datetimes/asctime.py create mode 100644 lib/feedparser/datetimes/greek.py create mode 100644 lib/feedparser/datetimes/hungarian.py create mode 100644 lib/feedparser/datetimes/iso8601.py create mode 100644 lib/feedparser/datetimes/korean.py create mode 100644 lib/feedparser/datetimes/perforce.py create mode 100644 lib/feedparser/datetimes/rfc822.py create mode 100644 lib/feedparser/datetimes/w3dtf.py create mode 100644 lib/feedparser/encodings.py create mode 100644 lib/feedparser/exceptions.py create mode 100644 lib/feedparser/html.py create mode 100644 lib/feedparser/http.py create mode 100644 lib/feedparser/mixin.py create mode 100644 lib/feedparser/namespaces/__init__.py create mode 100644 lib/feedparser/namespaces/_base.py create mode 100644 lib/feedparser/namespaces/admin.py create mode 100644 lib/feedparser/namespaces/cc.py create mode 100644 lib/feedparser/namespaces/dc.py create mode 100644 lib/feedparser/namespaces/georss.py create mode 100644 lib/feedparser/namespaces/itunes.py create mode 100644 lib/feedparser/namespaces/mediarss.py create mode 100644 lib/feedparser/namespaces/psc.py create mode 100644 lib/feedparser/parsers/__init__.py create mode 100644 lib/feedparser/parsers/loose.py create mode 100644 lib/feedparser/parsers/strict.py create mode 100644 lib/feedparser/sanitizer.py create mode 100644 lib/feedparser/sgml.py create mode 100644 lib/feedparser/urls.py create mode 100644 lib/feedparser/util.py delete mode 100644 lib/html5lib/__init__.py delete mode 100644 lib/html5lib/constants.py delete mode 100644 lib/html5lib/filters/_base.py delete mode 100644 lib/html5lib/filters/alphabeticalattributes.py delete mode 100644 lib/html5lib/filters/inject_meta_charset.py delete mode 100644 lib/html5lib/filters/lint.py delete mode 100644 lib/html5lib/filters/optionaltags.py delete mode 100644 lib/html5lib/filters/sanitizer.py delete mode 100644 lib/html5lib/filters/whitespace.py delete mode 100644 lib/html5lib/html5parser.py delete mode 100644 lib/html5lib/ihatexml.py delete mode 100644 lib/html5lib/inputstream.py delete mode 100644 lib/html5lib/sanitizer.py delete mode 100644 lib/html5lib/serializer/__init__.py delete mode 100644 lib/html5lib/serializer/htmlserializer.py delete mode 100644 lib/html5lib/tokenizer.py delete mode 100644 lib/html5lib/treeadapters/sax.py delete mode 100644 lib/html5lib/treebuilders/__init__.py delete mode 100644 lib/html5lib/treebuilders/_base.py delete mode 100644 lib/html5lib/treebuilders/dom.py delete mode 100644 lib/html5lib/treebuilders/etree.py delete mode 100644 lib/html5lib/treebuilders/etree_lxml.py delete mode 100644 lib/html5lib/treewalkers/__init__.py delete mode 100644 lib/html5lib/treewalkers/_base.py delete mode 100644 lib/html5lib/treewalkers/dom.py delete mode 100644 lib/html5lib/treewalkers/etree.py delete mode 100644 lib/html5lib/treewalkers/genshistream.py delete mode 100644 lib/html5lib/treewalkers/lxmletree.py delete mode 100644 lib/html5lib/treewalkers/pulldom.py delete mode 100644 lib/html5lib/trie/__init__.py delete mode 100644 lib/html5lib/trie/_base.py delete mode 100644 lib/html5lib/trie/datrie.py delete mode 100644 lib/html5lib/trie/py.py delete mode 100644 lib/html5lib/utils.py delete mode 100755 lib/httplib2/__init__.py delete mode 100755 lib/httplib2/iri2uri.py create mode 100644 lib/idna/__init__.py create mode 100644 lib/idna/codec.py create mode 100644 lib/idna/compat.py create mode 100644 lib/idna/core.py create mode 100644 lib/idna/idnadata.py create mode 100644 lib/idna/intranges.py create mode 100644 lib/idna/package_data.py create mode 100644 lib/idna/py.typed create mode 100644 lib/idna/uts46data.py create mode 100644 lib/jaraco/classes/__init__.py create mode 100644 lib/jaraco/classes/ancestry.py create mode 100644 lib/jaraco/classes/meta.py create mode 100644 lib/jaraco/classes/properties.py create mode 100644 lib/jaraco/collections.py create mode 100644 lib/jaraco/functools.py create mode 100644 lib/jaraco/text/Lorem ipsum.txt create mode 100644 lib/jaraco/text/__init__.py delete mode 100644 lib/mako/LICENSE delete mode 100644 lib/mako/README.rst create mode 100644 lib/mediafile.py create mode 100644 lib/more_itertools/__init__.py create mode 100644 lib/more_itertools/__init__.pyi create mode 100755 lib/more_itertools/more.py create mode 100644 lib/more_itertools/more.pyi create mode 100644 lib/more_itertools/py.typed create mode 100644 lib/more_itertools/recipes.py create mode 100644 lib/more_itertools/recipes.pyi mode change 100755 => 100644 lib/musicbrainzngs/__init__.py mode change 100755 => 100644 lib/musicbrainzngs/caa.py mode change 100755 => 100644 lib/musicbrainzngs/compat.py mode change 100755 => 100644 lib/musicbrainzngs/mbxml.py mode change 100755 => 100644 lib/musicbrainzngs/musicbrainz.py mode change 100755 => 100644 lib/musicbrainzngs/util.py delete mode 100644 lib/oauth2/__init__.py create mode 100644 lib/oauthlib/__init__.py create mode 100644 lib/oauthlib/common.py create mode 100644 lib/oauthlib/oauth1/__init__.py create mode 100644 lib/oauthlib/oauth1/rfc5849/__init__.py create mode 100644 lib/oauthlib/oauth1/rfc5849/endpoints/__init__.py create mode 100644 lib/oauthlib/oauth1/rfc5849/endpoints/access_token.py create mode 100644 lib/oauthlib/oauth1/rfc5849/endpoints/authorization.py create mode 100644 lib/oauthlib/oauth1/rfc5849/endpoints/base.py create mode 100644 lib/oauthlib/oauth1/rfc5849/endpoints/pre_configured.py create mode 100644 lib/oauthlib/oauth1/rfc5849/endpoints/request_token.py create mode 100644 lib/oauthlib/oauth1/rfc5849/endpoints/resource.py create mode 100644 lib/oauthlib/oauth1/rfc5849/endpoints/signature_only.py create mode 100644 lib/oauthlib/oauth1/rfc5849/errors.py create mode 100644 lib/oauthlib/oauth1/rfc5849/parameters.py create mode 100644 lib/oauthlib/oauth1/rfc5849/request_validator.py create mode 100644 lib/oauthlib/oauth1/rfc5849/signature.py create mode 100644 lib/oauthlib/oauth1/rfc5849/utils.py create mode 100644 lib/oauthlib/oauth2/__init__.py create mode 100644 lib/oauthlib/oauth2/rfc6749/__init__.py create mode 100644 lib/oauthlib/oauth2/rfc6749/clients/__init__.py create mode 100644 lib/oauthlib/oauth2/rfc6749/clients/backend_application.py create mode 100644 lib/oauthlib/oauth2/rfc6749/clients/base.py create mode 100644 lib/oauthlib/oauth2/rfc6749/clients/legacy_application.py create mode 100644 lib/oauthlib/oauth2/rfc6749/clients/mobile_application.py create mode 100644 lib/oauthlib/oauth2/rfc6749/clients/service_application.py create mode 100644 lib/oauthlib/oauth2/rfc6749/clients/web_application.py create mode 100644 lib/oauthlib/oauth2/rfc6749/endpoints/__init__.py create mode 100644 lib/oauthlib/oauth2/rfc6749/endpoints/authorization.py create mode 100644 lib/oauthlib/oauth2/rfc6749/endpoints/base.py create mode 100644 lib/oauthlib/oauth2/rfc6749/endpoints/introspect.py create mode 100644 lib/oauthlib/oauth2/rfc6749/endpoints/metadata.py create mode 100644 lib/oauthlib/oauth2/rfc6749/endpoints/pre_configured.py create mode 100644 lib/oauthlib/oauth2/rfc6749/endpoints/resource.py create mode 100644 lib/oauthlib/oauth2/rfc6749/endpoints/revocation.py create mode 100644 lib/oauthlib/oauth2/rfc6749/endpoints/token.py create mode 100644 lib/oauthlib/oauth2/rfc6749/errors.py create mode 100644 lib/oauthlib/oauth2/rfc6749/grant_types/__init__.py create mode 100644 lib/oauthlib/oauth2/rfc6749/grant_types/authorization_code.py create mode 100644 lib/oauthlib/oauth2/rfc6749/grant_types/base.py create mode 100644 lib/oauthlib/oauth2/rfc6749/grant_types/client_credentials.py create mode 100644 lib/oauthlib/oauth2/rfc6749/grant_types/implicit.py create mode 100644 lib/oauthlib/oauth2/rfc6749/grant_types/refresh_token.py create mode 100644 lib/oauthlib/oauth2/rfc6749/grant_types/resource_owner_password_credentials.py create mode 100644 lib/oauthlib/oauth2/rfc6749/parameters.py create mode 100644 lib/oauthlib/oauth2/rfc6749/request_validator.py create mode 100644 lib/oauthlib/oauth2/rfc6749/tokens.py create mode 100644 lib/oauthlib/oauth2/rfc6749/utils.py create mode 100644 lib/oauthlib/openid/__init__.py create mode 100644 lib/oauthlib/openid/connect/__init__.py create mode 100644 lib/oauthlib/openid/connect/core/__init__.py create mode 100644 lib/oauthlib/openid/connect/core/endpoints/__init__.py create mode 100644 lib/oauthlib/openid/connect/core/endpoints/pre_configured.py create mode 100644 lib/oauthlib/openid/connect/core/endpoints/userinfo.py create mode 100644 lib/oauthlib/openid/connect/core/exceptions.py create mode 100644 lib/oauthlib/openid/connect/core/grant_types/__init__.py create mode 100644 lib/oauthlib/openid/connect/core/grant_types/authorization_code.py create mode 100644 lib/oauthlib/openid/connect/core/grant_types/base.py create mode 100644 lib/oauthlib/openid/connect/core/grant_types/dispatchers.py create mode 100644 lib/oauthlib/openid/connect/core/grant_types/hybrid.py create mode 100644 lib/oauthlib/openid/connect/core/grant_types/implicit.py create mode 100644 lib/oauthlib/openid/connect/core/request_validator.py create mode 100644 lib/oauthlib/openid/connect/core/tokens.py create mode 100644 lib/oauthlib/signals.py create mode 100644 lib/oauthlib/uri_validate.py rename lib/{pkg_resources.py => pkg_resources/__init__.py} (64%) create mode 100644 lib/pkg_resources/_vendor/__init__.py create mode 100644 lib/pkg_resources/_vendor/appdirs.py create mode 100644 lib/pkg_resources/_vendor/packaging/LICENSE create mode 100644 lib/pkg_resources/_vendor/packaging/LICENSE.APACHE create mode 100644 lib/pkg_resources/_vendor/packaging/LICENSE.BSD create mode 100644 lib/pkg_resources/_vendor/packaging/__about__.py create mode 100644 lib/pkg_resources/_vendor/packaging/__init__.py create mode 100644 lib/pkg_resources/_vendor/packaging/_manylinux.py create mode 100644 lib/pkg_resources/_vendor/packaging/_musllinux.py create mode 100644 lib/pkg_resources/_vendor/packaging/_structures.py create mode 100644 lib/pkg_resources/_vendor/packaging/markers.py create mode 100644 lib/pkg_resources/_vendor/packaging/py.typed create mode 100644 lib/pkg_resources/_vendor/packaging/requirements.py create mode 100644 lib/pkg_resources/_vendor/packaging/specifiers.py create mode 100644 lib/pkg_resources/_vendor/packaging/tags.py create mode 100644 lib/pkg_resources/_vendor/packaging/utils.py create mode 100644 lib/pkg_resources/_vendor/packaging/version.py create mode 100644 lib/pkg_resources/_vendor/pyparsing.LICENSE.txt create mode 100644 lib/pkg_resources/_vendor/pyparsing.py create mode 100644 lib/pkg_resources/_vendor/vendored.txt create mode 100644 lib/pkg_resources/api_tests.txt create mode 100644 lib/pkg_resources/extern/__init__.py create mode 100644 lib/pkg_resources/tests/__init__.py create mode 100644 lib/pkg_resources/tests/data/my-test-package-source/setup.cfg create mode 100644 lib/pkg_resources/tests/data/my-test-package-source/setup.py create mode 100644 lib/pkg_resources/tests/data/my-test-package-zip/my-test-package.zip create mode 100644 lib/pkg_resources/tests/data/my-test-package_unpacked-egg/my_test_package-1.0-py3.7.egg/EGG-INFO/PKG-INFO create mode 100644 lib/pkg_resources/tests/data/my-test-package_unpacked-egg/my_test_package-1.0-py3.7.egg/EGG-INFO/SOURCES.txt create mode 100644 lib/pkg_resources/tests/data/my-test-package_unpacked-egg/my_test_package-1.0-py3.7.egg/EGG-INFO/dependency_links.txt create mode 100644 lib/pkg_resources/tests/data/my-test-package_unpacked-egg/my_test_package-1.0-py3.7.egg/EGG-INFO/top_level.txt create mode 100644 lib/pkg_resources/tests/data/my-test-package_unpacked-egg/my_test_package-1.0-py3.7.egg/EGG-INFO/zip-safe create mode 100644 lib/pkg_resources/tests/data/my-test-package_zipped-egg/my_test_package-1.0-py3.7.egg create mode 100644 lib/pkg_resources/tests/test_find_distributions.py create mode 100644 lib/pkg_resources/tests/test_markers.py create mode 100644 lib/pkg_resources/tests/test_pkg_resources.py create mode 100644 lib/pkg_resources/tests/test_resources.py create mode 100644 lib/pkg_resources/tests/test_working_set.py create mode 100644 lib/portend.py delete mode 100644 lib/pythontwitter/__init__.py mode change 100755 => 100644 lib/requests/__init__.py create mode 100644 lib/requests/__version__.py create mode 100644 lib/requests/_internal_utils.py mode change 100755 => 100644 lib/requests/adapters.py mode change 100755 => 100644 lib/requests/api.py mode change 100755 => 100644 lib/requests/auth.py mode change 100755 => 100644 lib/requests/certs.py mode change 100755 => 100644 lib/requests/compat.py mode change 100755 => 100644 lib/requests/cookies.py mode change 100755 => 100644 lib/requests/exceptions.py create mode 100644 lib/requests/help.py mode change 100755 => 100644 lib/requests/hooks.py mode change 100755 => 100644 lib/requests/models.py create mode 100644 lib/requests/packages.py delete mode 100755 lib/requests/packages/README.rst delete mode 100755 lib/requests/packages/__init__.py delete mode 100755 lib/requests/packages/chardet/__init__.py delete mode 100755 lib/requests/packages/chardet/big5freq.py delete mode 100755 lib/requests/packages/chardet/big5prober.py delete mode 100755 lib/requests/packages/chardet/chardetect.py delete mode 100755 lib/requests/packages/chardet/chardistribution.py delete mode 100755 lib/requests/packages/chardet/charsetgroupprober.py delete mode 100755 lib/requests/packages/chardet/charsetprober.py delete mode 100755 lib/requests/packages/chardet/codingstatemachine.py delete mode 100755 lib/requests/packages/chardet/compat.py delete mode 100755 lib/requests/packages/chardet/constants.py delete mode 100755 lib/requests/packages/chardet/cp949prober.py delete mode 100755 lib/requests/packages/chardet/escprober.py delete mode 100755 lib/requests/packages/chardet/escsm.py delete mode 100755 lib/requests/packages/chardet/eucjpprober.py delete mode 100755 lib/requests/packages/chardet/euckrfreq.py delete mode 100755 lib/requests/packages/chardet/euckrprober.py delete mode 100755 lib/requests/packages/chardet/euctwfreq.py delete mode 100755 lib/requests/packages/chardet/euctwprober.py delete mode 100755 lib/requests/packages/chardet/gb2312freq.py delete mode 100755 lib/requests/packages/chardet/gb2312prober.py delete mode 100755 lib/requests/packages/chardet/hebrewprober.py delete mode 100755 lib/requests/packages/chardet/jisfreq.py delete mode 100755 lib/requests/packages/chardet/jpcntx.py delete mode 100755 lib/requests/packages/chardet/langbulgarianmodel.py delete mode 100755 lib/requests/packages/chardet/langcyrillicmodel.py delete mode 100755 lib/requests/packages/chardet/langgreekmodel.py delete mode 100755 lib/requests/packages/chardet/langhebrewmodel.py delete mode 100755 lib/requests/packages/chardet/langhungarianmodel.py delete mode 100755 lib/requests/packages/chardet/langthaimodel.py delete mode 100755 lib/requests/packages/chardet/latin1prober.py delete mode 100755 lib/requests/packages/chardet/mbcharsetprober.py delete mode 100755 lib/requests/packages/chardet/mbcsgroupprober.py delete mode 100755 lib/requests/packages/chardet/mbcssm.py delete mode 100755 lib/requests/packages/chardet/sbcharsetprober.py delete mode 100755 lib/requests/packages/chardet/sbcsgroupprober.py delete mode 100755 lib/requests/packages/chardet/sjisprober.py delete mode 100755 lib/requests/packages/chardet/universaldetector.py delete mode 100755 lib/requests/packages/chardet/utf8prober.py delete mode 100755 lib/requests/packages/urllib3/__init__.py delete mode 100755 lib/requests/packages/urllib3/connection.py delete mode 100755 lib/requests/packages/urllib3/contrib/appengine.py delete mode 100755 lib/requests/packages/urllib3/contrib/ntlmpool.py delete mode 100755 lib/requests/packages/urllib3/contrib/pyopenssl.py delete mode 100755 lib/requests/packages/urllib3/exceptions.py delete mode 100755 lib/requests/packages/urllib3/fields.py delete mode 100755 lib/requests/packages/urllib3/packages/__init__.py delete mode 100755 lib/requests/packages/urllib3/packages/ordered_dict.py delete mode 100755 lib/requests/packages/urllib3/packages/six.py delete mode 100755 lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py delete mode 100755 lib/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py delete mode 100755 lib/requests/packages/urllib3/poolmanager.py delete mode 100755 lib/requests/packages/urllib3/response.py delete mode 100755 lib/requests/packages/urllib3/util/__init__.py delete mode 100755 lib/requests/packages/urllib3/util/connection.py delete mode 100755 lib/requests/packages/urllib3/util/request.py delete mode 100755 lib/requests/packages/urllib3/util/response.py delete mode 100755 lib/requests/packages/urllib3/util/retry.py delete mode 100755 lib/requests/packages/urllib3/util/ssl_.py delete mode 100755 lib/requests/packages/urllib3/util/url.py mode change 100755 => 100644 lib/requests/sessions.py mode change 100755 => 100644 lib/requests/status_codes.py mode change 100755 => 100644 lib/requests/structures.py mode change 100755 => 100644 lib/requests/utils.py create mode 100644 lib/requests_oauthlib/__init__.py create mode 100644 lib/requests_oauthlib/compliance_fixes/__init__.py create mode 100644 lib/requests_oauthlib/compliance_fixes/douban.py create mode 100644 lib/requests_oauthlib/compliance_fixes/facebook.py create mode 100644 lib/requests_oauthlib/compliance_fixes/fitbit.py create mode 100644 lib/requests_oauthlib/compliance_fixes/instagram.py create mode 100644 lib/requests_oauthlib/compliance_fixes/linkedin.py create mode 100644 lib/requests_oauthlib/compliance_fixes/mailchimp.py create mode 100644 lib/requests_oauthlib/compliance_fixes/plentymarkets.py create mode 100644 lib/requests_oauthlib/compliance_fixes/slack.py create mode 100644 lib/requests_oauthlib/compliance_fixes/weibo.py create mode 100644 lib/requests_oauthlib/oauth1_auth.py create mode 100644 lib/requests_oauthlib/oauth1_session.py create mode 100644 lib/requests_oauthlib/oauth2_auth.py create mode 100644 lib/requests_oauthlib/oauth2_session.py create mode 100644 lib/sgmllib.py create mode 100644 lib/soupsieve/__init__.py create mode 100644 lib/soupsieve/__meta__.py create mode 100644 lib/soupsieve/css_match.py create mode 100644 lib/soupsieve/css_parser.py create mode 100644 lib/soupsieve/css_types.py create mode 100644 lib/soupsieve/pretty.py create mode 100644 lib/soupsieve/py.typed create mode 100644 lib/soupsieve/util.py create mode 100644 lib/tempora/__init__.py create mode 100644 lib/tempora/schedule.py create mode 100644 lib/tempora/tests/test_schedule.py create mode 100644 lib/tempora/tests/test_timing.py create mode 100644 lib/tempora/timing.py create mode 100644 lib/tempora/utc.py create mode 100644 lib/twitter/__init__.py create mode 100644 lib/twitter/_file_cache.py create mode 100755 lib/twitter/api.py create mode 100644 lib/twitter/error.py create mode 100644 lib/twitter/models.py create mode 100644 lib/twitter/parse_tweet.py create mode 100644 lib/twitter/ratelimit.py create mode 100644 lib/twitter/twitter_utils.py create mode 100644 lib/urllib3/__init__.py rename lib/{requests/packages => }/urllib3/_collections.py (79%) mode change 100755 => 100644 create mode 100644 lib/urllib3/_version.py create mode 100644 lib/urllib3/connection.py rename lib/{requests/packages => }/urllib3/connectionpool.py (51%) mode change 100755 => 100644 create mode 100644 lib/urllib3/contrib/__init__.py create mode 100644 lib/urllib3/contrib/_appengine_environ.py create mode 100644 lib/urllib3/contrib/_securetransport/__init__.py create mode 100644 lib/urllib3/contrib/_securetransport/bindings.py create mode 100644 lib/urllib3/contrib/_securetransport/low_level.py create mode 100644 lib/urllib3/contrib/appengine.py create mode 100644 lib/urllib3/contrib/ntlmpool.py create mode 100644 lib/urllib3/contrib/pyopenssl.py create mode 100644 lib/urllib3/contrib/securetransport.py create mode 100644 lib/urllib3/contrib/socks.py create mode 100644 lib/urllib3/exceptions.py create mode 100644 lib/urllib3/fields.py rename lib/{requests/packages => }/urllib3/filepost.py (77%) mode change 100755 => 100644 create mode 100644 lib/urllib3/packages/__init__.py create mode 100644 lib/urllib3/packages/backports/__init__.py create mode 100644 lib/urllib3/packages/backports/makefile.py create mode 100644 lib/urllib3/packages/six.py create mode 100644 lib/urllib3/poolmanager.py rename lib/{requests/packages => }/urllib3/request.py (65%) mode change 100755 => 100644 create mode 100644 lib/urllib3/response.py create mode 100644 lib/urllib3/util/__init__.py create mode 100644 lib/urllib3/util/connection.py create mode 100644 lib/urllib3/util/proxy.py create mode 100644 lib/urllib3/util/queue.py create mode 100644 lib/urllib3/util/request.py create mode 100644 lib/urllib3/util/response.py create mode 100644 lib/urllib3/util/retry.py create mode 100644 lib/urllib3/util/ssl_.py create mode 100644 lib/urllib3/util/ssl_match_hostname.py create mode 100644 lib/urllib3/util/ssltransport.py rename lib/{requests/packages => }/urllib3/util/timeout.py (63%) mode change 100755 => 100644 create mode 100644 lib/urllib3/util/url.py create mode 100644 lib/urllib3/util/wait.py create mode 100644 lib/zc/__init__.py create mode 100644 lib/zc/lockfile/README.txt create mode 100644 lib/zc/lockfile/__init__.py create mode 100644 lib/zc/lockfile/tests.py diff --git a/Headphones.py b/Headphones.py index 5ad2454d..de513fd2 100755 --- a/Headphones.py +++ b/Headphones.py @@ -17,6 +17,10 @@ import os import sys +if sys.version_info <= (3, 5): + sys.stdout.write("Headphones requires Python >= 3.5\n") + sys.exit(1) + # Ensure lib added to path, before any other imports sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'lib/')) diff --git a/contrib/sni_test.py b/contrib/sni_test.py index 15b2b2c6..ce027b2a 100644 --- a/contrib/sni_test.py +++ b/contrib/sni_test.py @@ -6,7 +6,7 @@ import sys # Ensure that we use the Headphones provided libraries. sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../lib")) -import urlparse +import urllib.parse def can_import(module): @@ -89,7 +89,7 @@ def main(): url = sys.argv[1] # Check if it is a HTTPS website. - parts = urlparse.urlparse(url) + parts = urllib.parse.urlparse(url) if parts.scheme.lower() != "https": sys.stderr.write( diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index d09f3a7d..0cc2ddc0 100644 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -1274,7 +1274,7 @@ - +
diff --git a/data/interfaces/default/history.html b/data/interfaces/default/history.html index f0736872..c76875e1 100644 --- a/data/interfaces/default/history.html +++ b/data/interfaces/default/history.html @@ -1,7 +1,7 @@ <%inherit file="base.html"/> <%! from headphones import helpers - import cgi + from html import escape as html_escape %> <%def name="headerIncludes()"> @@ -62,11 +62,11 @@ %> ${item['DateAdded']} - ${cgi.escape(item['Title'], quote=True)} [${fileid}][album page] + ${html_escape(item['Title'], quote=True)} [${fileid}][album page] ${helpers.bytes_to_mb(item['Size'])} ${item['Status']} - [retry][new] - + [retry][new] + %endfor diff --git a/headphones/__init__.py b/headphones/__init__.py index 9a0c4fc5..2952e0af 100644 --- a/headphones/__init__.py +++ b/headphones/__init__.py @@ -218,7 +218,7 @@ def daemonize(): pid = os.fork() # @UndefinedVariable - only available in UNIX if pid != 0: sys.exit(0) - except OSError, e: + except OSError as e: raise RuntimeError("1st fork failed: %s [%d]", e.strerror, e.errno) os.setsid() @@ -232,7 +232,7 @@ def daemonize(): pid = os.fork() # @UndefinedVariable - only available in UNIX if pid != 0: sys.exit(0) - except OSError, e: + except OSError as e: raise RuntimeError("2nd fork failed: %s [%d]", e.strerror, e.errno) dev_null = file('/dev/null', 'r') diff --git a/headphones/api.py b/headphones/api.py index 2ae742e3..025f12cf 100644 --- a/headphones/api.py +++ b/headphones/api.py @@ -86,7 +86,7 @@ class Api(object): methodToCall = getattr(self, "_" + self.cmd) methodToCall(**self.kwargs) if 'callback' not in self.kwargs: - if isinstance(self.data, basestring): + if isinstance(self.data, str): return self.data else: return json.dumps(self.data) @@ -106,7 +106,7 @@ class Api(object): rows_as_dic = [] for row in rows: - row_as_dic = dict(zip(row.keys(), row)) + row_as_dic = dict(list(zip(list(row.keys()), row))) rows_as_dic.append(row_as_dic) return rows_as_dic @@ -474,17 +474,17 @@ class Api(object): # Handle situations where the torrent url contains arguments that are # parsed if kwargs: - import urllib - import urllib2 - url = urllib2.quote( - url, safe=":?/=&") + '&' + urllib.urlencode(kwargs) + import urllib.request, urllib.parse, urllib.error + import urllib.request, urllib.error, urllib.parse + url = urllib.parse.quote( + url, safe=":?/=&") + '&' + urllib.parse.urlencode(kwargs) try: result = [(title, int(size), url, provider, kind)] except ValueError: result = [(title, float(size), url, provider, kind)] - logger.info(u"Making sure we can download the chosen result") + logger.info("Making sure we can download the chosen result") (data, bestqual) = searcher.preprocess(result) if data and bestqual: diff --git a/headphones/cache.py b/headphones/cache.py index db42bd18..fb5c1fbe 100644 --- a/headphones/cache.py +++ b/headphones/cache.py @@ -240,7 +240,7 @@ class Cache(object): # fallback to 1st album cover if none of the above elif 'albums' in data: - for mbid, art in data.get('albums', dict()).items(): + for mbid, art in list(data.get('albums', dict()).items()): if 'albumcover' in art: image_url = art['albumcover'][0]['url'] break @@ -352,7 +352,7 @@ class Cache(object): # fallback to 1st album cover if none of the above elif 'albums' in data: - for mbid, art in data.get('albums', dict()).items(): + for mbid, art in list(data.get('albums', dict()).items()): if 'albumcover' in art: image_url = art['albumcover'][0]['url'] break diff --git a/headphones/classes.py b/headphones/classes.py index 6015a0f2..a06f4f82 100644 --- a/headphones/classes.py +++ b/headphones/classes.py @@ -18,12 +18,12 @@ ####################################### -import urllib +import urllib.request, urllib.parse, urllib.error -from common import USER_AGENT +from .common import USER_AGENT -class HeadphonesURLopener(urllib.FancyURLopener): +class HeadphonesURLopener(urllib.request.FancyURLopener): version = USER_AGENT @@ -44,7 +44,7 @@ class AuthURLOpener(HeadphonesURLopener): self.numTries = 0 # call the base class - urllib.FancyURLopener.__init__(self) + urllib.request.FancyURLopener.__init__(self) def prompt_user_passwd(self, host, realm): """ diff --git a/headphones/common.py b/headphones/common.py index 0a33bde7..a311860a 100644 --- a/headphones/common.py +++ b/headphones/common.py @@ -24,6 +24,7 @@ import operator import os import re from headphones import version +from functools import reduce # Identify Our Application @@ -74,7 +75,7 @@ class Quality: @staticmethod def _getStatusStrings(status): toReturn = {} - for x in Quality.qualityStrings.keys(): + for x in list(Quality.qualityStrings.keys()): toReturn[Quality.compositeStatus(status, x)] = Quality.statusPrefixes[status] + " (" + \ Quality.qualityStrings[x] + ")" return toReturn @@ -93,7 +94,7 @@ class Quality: def splitQuality(quality): anyQualities = [] bestQualities = [] - for curQual in Quality.qualityStrings.keys(): + for curQual in list(Quality.qualityStrings.keys()): if curQual & quality: anyQualities.append(curQual) if curQual << 16 & quality: @@ -151,7 +152,7 @@ class Quality: @staticmethod def splitCompositeStatus(status): """Returns a tuple containing (status, quality)""" - for x in sorted(Quality.qualityStrings.keys(), reverse=True): + for x in sorted(list(Quality.qualityStrings.keys()), reverse=True): if status > x * 100: return (status - x * 100, x) @@ -169,10 +170,10 @@ class Quality: SNATCHED_PROPER = None -Quality.DOWNLOADED = [Quality.compositeStatus(DOWNLOADED, x) for x in Quality.qualityStrings.keys()] -Quality.SNATCHED = [Quality.compositeStatus(SNATCHED, x) for x in Quality.qualityStrings.keys()] +Quality.DOWNLOADED = [Quality.compositeStatus(DOWNLOADED, x) for x in list(Quality.qualityStrings.keys())] +Quality.SNATCHED = [Quality.compositeStatus(SNATCHED, x) for x in list(Quality.qualityStrings.keys())] Quality.SNATCHED_PROPER = [Quality.compositeStatus(SNATCHED_PROPER, x) for x in - Quality.qualityStrings.keys()] + list(Quality.qualityStrings.keys())] MP3 = Quality.combineQualities([Quality.B192, Quality.B256, Quality.B320, Quality.VBR], []) LOSSLESS = Quality.combineQualities([Quality.FLAC], []) diff --git a/headphones/config.py b/headphones/config.py index 7a8b93fc..69fdff08 100644 --- a/headphones/config.py +++ b/headphones/config.py @@ -2,15 +2,16 @@ import itertools import os import re +import ast +from configparser import ConfigParser import headphones.logger -from configobj import ConfigObj def bool_int(value): """ Casts a config value into a 0 or 1 """ - if isinstance(value, basestring): + if isinstance(value, str): if value.lower() in ('', '0', 'false', 'f', 'no', 'n', 'off'): value = 0 return int(bool(value)) @@ -326,8 +327,9 @@ class Config(object): def __init__(self, config_file): """ Initialize the config with values from a file """ self._config_file = config_file - self._config = ConfigObj(self._config_file, encoding='utf-8') - for key in _CONFIG_DEFINITIONS.keys(): + self._config = ConfigParser() + self._config.read(self._config_file) + for key in list(_CONFIG_DEFINITIONS.keys()): self.check_setting(key) self.ENCODER_MULTICORE_COUNT = max(0, self.ENCODER_MULTICORE_COUNT) self._upgrade() @@ -344,7 +346,7 @@ class Config(object): def check_section(self, section): """ Check if INI section exists, if not create it """ - if section not in self._config: + if not self._config.has_section(section): self._config[section] = {} return True else: @@ -354,28 +356,38 @@ class Config(object): """ Cast any value in the config to the right type or use the default """ key, definition_type, section, ini_key, default = self._define(key) self.check_section(section) + + # ConfigParser values are strings, so need to convert to actual list + if definition_type == list: + definition_type = ast.literal_eval + try: my_val = definition_type(self._config[section][ini_key]) + # ConfigParser interprets empty strings in the config + # literally, so we need to sanitize it. It's not really + # a config upgrade, since a user can at any time put + # some_key = '' + if my_val == '""' or my_val == "''": + my_val = '' except Exception: - my_val = definition_type(default) - self._config[section][ini_key] = my_val + my_val = default + self._config[section][ini_key] = str(my_val) return my_val def write(self): """ Make a copy of the stored config and write it to the configured file """ - new_config = ConfigObj(encoding="UTF-8") - new_config.filename = self._config_file + new_config = ConfigParser() # first copy over everything from the old config, even if it is not # correctly defined to keep from losing data - for key, subkeys in self._config.items(): + for key, subkeys in list(self._config.items()): if key not in new_config: new_config[key] = {} - for subkey, value in subkeys.items(): + for subkey, value in list(subkeys.items()): new_config[key][subkey] = value # next make sure that everything we expect to have defined is so - for key in _CONFIG_DEFINITIONS.keys(): + for key in list(_CONFIG_DEFINITIONS.keys()): key, definition_type, section, ini_key, default = self._define(key) self.check_setting(key) if section not in new_config: @@ -386,14 +398,15 @@ class Config(object): headphones.logger.info("Writing configuration to file") try: - new_config.write() + with open(self._config_file, 'w') as configfile: + new_config.write(configfile) except IOError as e: headphones.logger.error("Error writing configuration file: %s", e) def get_extra_newznabs(self): """ Return the extra newznab tuples """ extra_newznabs = list( - itertools.izip(*[itertools.islice(self.EXTRA_NEWZNABS, i, None, 3) + zip(*[itertools.islice(self.EXTRA_NEWZNABS, i, None, 3) for i in range(3)]) ) return extra_newznabs @@ -412,7 +425,7 @@ class Config(object): def get_extra_torznabs(self): """ Return the extra torznab tuples """ extra_torznabs = list( - itertools.izip(*[itertools.islice(self.EXTRA_TORZNABS, i, None, 4) + zip(*[itertools.islice(self.EXTRA_TORZNABS, i, None, 4) for i in range(4)]) ) return extra_torznabs @@ -448,20 +461,21 @@ class Config(object): return value else: key, definition_type, section, ini_key, default = self._define(name) - self._config[section][ini_key] = definition_type(value) + self._config[section][ini_key] = str(value) return self._config[section][ini_key] def process_kwargs(self, kwargs): """ Given a big bunch of key value pairs, apply them to the ini. """ - for name, value in kwargs.items(): + for name, value in list(kwargs.items()): key, definition_type, section, ini_key, default = self._define(name) - self._config[section][ini_key] = definition_type(value) + self._config[section][ini_key] = str(value) def _upgrade(self): """ - Bring old configs up to date + Bring old configs up to date. Although this is kind of a dumb + way to do it because it doesn't handle multi-step upgrades """ if self.CONFIG_VERSION == '2': # Update the config to use direct path to the encoder rather than the encoder folder @@ -488,12 +502,12 @@ class Config(object): # Add Seed Ratio to Torznabs if self.EXTRA_TORZNABS: extra_torznabs = list( - itertools.izip(*[itertools.islice(self.EXTRA_TORZNABS, i, None, 3) + zip(*[itertools.islice(self.EXTRA_TORZNABS, i, None, 3) for i in range(3)]) ) new_torznabs = [] for torznab in extra_torznabs: - new_torznabs.extend([torznab[0], torznab[1], u'', torznab[2]]) + new_torznabs.extend([torznab[0], torznab[1], '', torznab[2]]) if new_torznabs: self.EXTRA_TORZNABS = new_torznabs diff --git a/headphones/config_test.py b/headphones/config_test.py index 69e71efa..69424465 100644 --- a/headphones/config_test.py +++ b/headphones/config_test.py @@ -2,8 +2,8 @@ import mock from mock import MagicMock import headphones.config import re -import unittestcompat -from unittestcompat import TestCase, TestArgs +from . import unittestcompat +from .unittestcompat import TestCase, TestArgs class ConfigApiTest(TestCase): @@ -101,7 +101,7 @@ class ConfigApiTest(TestCase): # call methods c = headphones.config.Config(path) # assertions: - with self.assertRaisesRegexp(KeyError, exc_regex): + with self.assertRaisesRegex(KeyError, exc_regex): c.check_setting(setting_name) pass diff --git a/headphones/crier.py b/headphones/crier.py index 2399f6a9..d0bd54cf 100644 --- a/headphones/crier.py +++ b/headphones/crier.py @@ -21,7 +21,7 @@ def cry(): main_thread = t # Loop over each thread's current frame, writing info about it - for tid, frame in sys._current_frames().iteritems(): + for tid, frame in sys._current_frames().items(): thread = tmap.get(tid, main_thread) lines = [] diff --git a/headphones/cuesplit.py b/headphones/cuesplit.py index 6d7ae7a0..271d3049 100755 --- a/headphones/cuesplit.py +++ b/headphones/cuesplit.py @@ -87,7 +87,7 @@ def check_splitter(command): def split_baby(split_file, split_cmd): '''Let's split baby''' - logger.info('Splitting %s...', split_file.decode(headphones.SYS_ENCODING, 'replace')) + logger.info(f"Splitting {split_file}...") logger.debug(subprocess.list2cmdline(split_cmd)) # Prevent Windows from opening a terminal window @@ -108,16 +108,16 @@ def split_baby(split_file, split_cmd): process = subprocess.Popen(split_cmd, startupinfo=startupinfo, stdin=open(os.devnull, 'rb'), stdout=subprocess.PIPE, - stderr=subprocess.PIPE, env=env) + stderr=subprocess.PIPE, env=env, text=True) stdout, stderr = process.communicate() if process.returncode: - logger.error('Split failed for %s', split_file.decode(headphones.SYS_ENCODING, 'replace')) - out = stdout if stdout else stderr - logger.error('Error details: %s', out.decode(headphones.SYS_ENCODING, 'replace')) + logger.error(f"Split failed for {split_file}") + out = stdout or stderr + logger.error(f"Error details: {out}") return False else: - logger.info('Split success %s', split_file.decode(headphones.SYS_ENCODING, 'replace')) + logger.info(f"Split succeeded for {split_file}") return True @@ -232,7 +232,7 @@ class Directory: for i in list_dir: if not check_match(i): # music file - if os.path.splitext(i)[-1] in WAVE_FILE_TYPE_BY_EXTENSION.keys(): + if os.path.splitext(i)[-1] in list(WAVE_FILE_TYPE_BY_EXTENSION.keys()): track_nr = identify_track_number(i) if track_nr: self.content.append(WaveFile(self.path + os.sep + i, track_nr=track_nr)) @@ -378,7 +378,7 @@ class CueFile(File): except: raise ValueError('Cant encode CUE Sheet.') - if self.content[0] == u'\ufeff': + if self.content[0] == '\ufeff': self.content = self.content[1:] header = header_parser() @@ -581,7 +581,7 @@ def split(albumpath): # use xld profile to split cue if headphones.CONFIG.ENCODER == 'xld' and headphones.CONFIG.MUSIC_ENCODER and headphones.CONFIG.XLDPROFILE: - import getXldProfile + from . import getXldProfile xldprofile, xldformat, _ = getXldProfile.getXldProfile(headphones.CONFIG.XLDPROFILE) if not xldformat: raise ValueError( @@ -601,7 +601,7 @@ def split(albumpath): raise ValueError('Command not found, ensure shntool or xld installed') # Determine if file can be split - if wave.name_ext not in WAVE_FILE_TYPE_BY_EXTENSION.keys(): + if wave.name_ext not in list(WAVE_FILE_TYPE_BY_EXTENSION.keys()): raise ValueError('Cannot split, audio file has unsupported extension') # Split with xld diff --git a/headphones/db.py b/headphones/db.py index 35b447d6..e0b36afe 100644 --- a/headphones/db.py +++ b/headphones/db.py @@ -17,7 +17,7 @@ # Stolen from Sick-Beard's db.py # ################################### -from __future__ import with_statement + import time @@ -116,7 +116,7 @@ class DBConnection: break - except sqlite3.OperationalError, e: + except sqlite3.OperationalError as e: if "unable to open database file" in e.message or "database is locked" in e.message: dberror = e if args is None: @@ -128,7 +128,7 @@ class DBConnection: else: logger.error('Database error: %s', e) raise - except sqlite3.DatabaseError, e: + except sqlite3.DatabaseError as e: logger.error('Fatal Error executing %s :: %s', query, e) raise @@ -156,14 +156,14 @@ class DBConnection: If the table is not updated then the 'WHERE changes' will be 0 and the table inserted """ def genParams(myDict): - return [x + " = ?" for x in myDict.keys()] + return [x + " = ?" for x in list(myDict.keys())] update_query = "UPDATE " + tableName + " SET " + ", ".join(genParams(valueDict)) + " WHERE " + " AND ".join(genParams(keyDict)) - insert_query = ("INSERT INTO " + tableName + " (" + ", ".join(valueDict.keys() + keyDict.keys()) + ")" + " SELECT " + ", ".join( - ["?"] * len(valueDict.keys() + keyDict.keys())) + " WHERE changes()=0") + insert_query = ("INSERT INTO " + tableName + " (" + ", ".join(list(valueDict.keys()) + list(keyDict.keys())) + ")" + " SELECT " + ", ".join( + ["?"] * len(list(valueDict.keys()) + list(keyDict.keys()))) + " WHERE changes()=0") try: - self.action(update_query, valueDict.values() + keyDict.values(), upsert_insert_qry=insert_query) + self.action(update_query, list(valueDict.values()) + list(keyDict.values()), upsert_insert_qry=insert_query) except sqlite3.IntegrityError: logger.info('Queries failed: %s and %s', update_query, insert_query) diff --git a/headphones/deluge.py b/headphones/deluge.py index 6b0a13f6..e0e6b1f4 100644 --- a/headphones/deluge.py +++ b/headphones/deluge.py @@ -34,7 +34,7 @@ # You should have received a copy of the GNU General Public License # along with SickRage. If not, see . -from __future__ import unicode_literals + from headphones import logger diff --git a/headphones/getXldProfile.py b/headphones/getXldProfile.py index 6fd1ce2e..c368d3f7 100755 --- a/headphones/getXldProfile.py +++ b/headphones/getXldProfile.py @@ -15,7 +15,7 @@ def getXldProfile(xldProfile): # Get xld preferences plist try: preferences = biplist.readPlist(expanded) - except (biplist.InvalidPlistException, biplist.NotBinaryPlistException), e: + except (biplist.InvalidPlistException, biplist.NotBinaryPlistException) as e: logger.error("Error reading xld preferences plist: %s", e) return (xldProfileNotFound, None, None) diff --git a/headphones/helpers.py b/headphones/helpers.py index 07044a8c..b70ad08d 100644 --- a/headphones/helpers.py +++ b/headphones/helpers.py @@ -28,9 +28,10 @@ import six from contextlib import contextmanager import fnmatch +import functools import re import os -from beets.mediafile import MediaFile, FileTypeError, UnreadableFileError +from mediafile import MediaFile, FileTypeError, UnreadableFileError import headphones @@ -40,6 +41,17 @@ RE_FEATURING = re.compile(r"[fF]t\.|[fF]eaturing|[fF]eat\.|\b[wW]ith\b|&|vs\.") RE_CD_ALBUM = re.compile(r"\(?((CD|disc)\s*[0-9]+)\)?", re.I) RE_CD = re.compile(r"^(CD|dics)\s*[0-9]+$", re.I) +def cmp(x, y): + """ + Replacement for built-in function cmp that was removed in Python 3 + + Compare the two objects x and y and return an integer according to + the outcome. The return value is negative if x < y, zero if x == y + and strictly positive if x > y. + + https://portingguide.readthedocs.io/en/latest/comparisons.html#the-cmp-function + """ + return (x > y) - (x < y) def multikeysort(items, columns): comparers = [ @@ -54,7 +66,7 @@ def multikeysort(items, columns): else: return 0 - return sorted(items, cmp=comparer) + return sorted(items, key=functools.cmp_to_key(comparer)) def checked(variable): @@ -210,7 +222,7 @@ def pattern_substitute(pattern, dic, normalize=False): if normalize: new_dic = {} - for i, j in dic.iteritems(): + for i, j in dic.items(): if j is not None: try: if sys.platform == 'darwin': @@ -229,7 +241,7 @@ def replace_all(text, dic): if not text: return '' - for i, j in dic.iteritems(): + for i, j in dic.items(): text = text.replace(i, j) return text @@ -242,8 +254,8 @@ def replace_illegal_chars(string, type="file"): return string -_CN_RE1 = re.compile(ur'[^\w]+', re.UNICODE) -_CN_RE2 = re.compile(ur'[\s_]+', re.UNICODE) +_CN_RE1 = re.compile(r'[^\w]+', re.UNICODE) +_CN_RE2 = re.compile(r'[\s_]+', re.UNICODE) _XLATE_GRAPHICAL_AND_DIACRITICAL = { @@ -253,33 +265,33 @@ _XLATE_GRAPHICAL_AND_DIACRITICAL = { # ©ª«®²³¹»¼½¾ÆÐØÞßæðøþĐđĦħıIJijĸĿŀŁłŒœŦŧDŽDždžLJLjljNJNjnjǤǥDZDzdzȤȥ. This # includes also some graphical symbols which can be easily replaced and # usually are written by people who don't have appropriate keyboard layout. - u'©': '(C)', u'ª': 'a.', u'«': '<<', u'®': '(R)', u'²': '2', u'³': '3', - u'¹': '1', u'»': '>>', u'¼': ' 1/4 ', u'½': ' 1/2 ', u'¾': ' 3/4 ', - u'Æ': 'AE', u'Ð': 'D', u'Ø': 'O', u'Þ': 'Th', u'ß': 'ss', u'æ': 'ae', - u'ð': 'd', u'ø': 'o', u'þ': 'th', u'Đ': 'D', u'đ': 'd', u'Ħ': 'H', - u'ħ': 'h', u'ı': 'i', u'IJ': 'IJ', u'ij': 'ij', u'ĸ': 'q', u'Ŀ': 'L', - u'ŀ': 'l', u'Ł': 'L', u'ł': 'l', u'Œ': 'OE', u'œ': 'oe', u'Ŧ': 'T', - u'ŧ': 't', u'DŽ': 'DZ', u'Dž': 'Dz', u'LJ': 'LJ', u'Lj': 'Lj', - u'lj': 'lj', u'NJ': 'NJ', u'Nj': 'Nj', u'nj': 'nj', - u'Ǥ': 'G', u'ǥ': 'g', u'DZ': 'DZ', u'Dz': 'Dz', u'dz': 'dz', - u'Ȥ': 'Z', u'ȥ': 'z', u'№': 'No.', - u'º': 'o.', # normalize Nº abbrev (popular w/ classical music), + '©': '(C)', 'ª': 'a.', '«': '<<', '®': '(R)', '²': '2', '³': '3', + '¹': '1', '»': '>>', '¼': ' 1/4 ', '½': ' 1/2 ', '¾': ' 3/4 ', + 'Æ': 'AE', 'Ð': 'D', 'Ø': 'O', 'Þ': 'Th', 'ß': 'ss', 'æ': 'ae', + 'ð': 'd', 'ø': 'o', 'þ': 'th', 'Đ': 'D', 'đ': 'd', 'Ħ': 'H', + 'ħ': 'h', 'ı': 'i', 'IJ': 'IJ', 'ij': 'ij', 'ĸ': 'q', 'Ŀ': 'L', + 'ŀ': 'l', 'Ł': 'L', 'ł': 'l', 'Œ': 'OE', 'œ': 'oe', 'Ŧ': 'T', + 'ŧ': 't', 'DŽ': 'DZ', 'Dž': 'Dz', 'LJ': 'LJ', 'Lj': 'Lj', + 'lj': 'lj', 'NJ': 'NJ', 'Nj': 'Nj', 'nj': 'nj', + 'Ǥ': 'G', 'ǥ': 'g', 'DZ': 'DZ', 'Dz': 'Dz', 'dz': 'dz', + 'Ȥ': 'Z', 'ȥ': 'z', '№': 'No.', + 'º': 'o.', # normalize Nº abbrev (popular w/ classical music), # this is 'masculine ordering indicator', not degree } _XLATE_SPECIAL = { # Translation table. # Cover additional special characters processing normalization. - u"'": '', # replace apostrophe with nothing - u"’": '', # replace musicbrainz style apostrophe with nothing - u'&': ' and ', # expand & to ' and ' + "'": '', # replace apostrophe with nothing + "’": '', # replace musicbrainz style apostrophe with nothing + '&': ' and ', # expand & to ' and ' } _XLATE_MUSICBRAINZ = { # Translation table for Musicbrainz. - u"…": '...', # HORIZONTAL ELLIPSIS (U+2026) - u"’": "'", # APOSTROPHE (U+0027) - u"‐": "-", # EN DASH (U+2013) + "…": '...', # HORIZONTAL ELLIPSIS (U+2026) + "’": "'", # APOSTROPHE (U+0027) + "‐": "-", # EN DASH (U+2013) } @@ -314,10 +326,10 @@ def _transliterate(u, xlate): Perform transliteration using the specified dictionary """ u = unicodedata.normalize('NFD', u) - u = u''.join([u'' if _is_unicode_combining(x) else x for x in u]) + u = ''.join(['' if _is_unicode_combining(x) else x for x in u]) u = _translate(u, xlate) # at this point output is either unicode, or plain ascii - return unicode(u) + return str(u) def clean_name(s): @@ -327,10 +339,10 @@ def clean_name(s): :param s: string to clean up, possibly unicode one. :return: cleaned-up version of input string. """ - if not isinstance(s, unicode): + if not isinstance(s, str): # ignore extended chars if someone was dumb enough to pass non-ascii # narrow string here, use only unicode for meaningful texts - u = unicode(s, 'ascii', 'replace') + u = str(s, 'ascii', 'replace') else: u = s # 1. don't bother doing normalization NFKC, rather transliterate @@ -341,9 +353,9 @@ def clean_name(s): # 3. translate spacials u = _translate(u, _XLATE_SPECIAL) # 4. replace any non-alphanumeric character sequences by spaces - u = _CN_RE1.sub(u' ', u) + u = _CN_RE1.sub(' ', u) # 5. coalesce interleaved space/underscore sequences - u = _CN_RE2.sub(u' ', u) + u = _CN_RE2.sub(' ', u) # 6. trim u = u.strip() # 7. lowercase @@ -357,8 +369,8 @@ def clean_musicbrainz_name(s, return_as_string=True): :param s: string to clean up, probably unicode. :return: cleaned-up version of input string. """ - if not isinstance(s, unicode): - u = unicode(s, 'ascii', 'replace') + if not isinstance(s, str): + u = str(s, 'ascii', 'replace') else: u = s u = _translate(u, _XLATE_MUSICBRAINZ) @@ -452,8 +464,7 @@ def expand_subfolders(f): if difference > 0: logger.info( - "Found %d media folders, but depth difference between lowest and deepest media folder is %d (expected zero). If this is a discography or a collection of albums, make sure albums are per folder.", - len(media_folders), difference) + f"Found {len(media_folders)} media folders, but depth difference between lowest and deepest media folder is {difference} (expected zero). If this is a discography or a collection of albums, make sure albums are per folder.") # While already failed, advice the user what he could try. We assume the # directory may contain separate CD's and maybe some extra's. The @@ -465,8 +476,7 @@ def expand_subfolders(f): set([os.path.join(*media_folder) for media_folder in extra_media_folders])) logger.info( - "Please look at the following folder(s), since they cause the depth difference: %s", - extra_media_folders) + f"Please look at the following folder(s), since they cause the depth difference: {extra_media_folders}") return # Convert back to paths and remove duplicates, which may be there after @@ -480,7 +490,7 @@ def expand_subfolders(f): logger.debug("Did not expand subfolder, as it resulted in one folder.") return - logger.debug("Expanded subfolders in folder: %s", media_folders) + logger.debug(f"Expanded subfolders in folder: {media_folders}") return media_folders @@ -498,7 +508,7 @@ def path_match_patterns(path, patterns): return False -def path_filter_patterns(paths, patterns, root=None): +def path_filter_patterns(paths, patterns, root=''): """ Scan for ignored paths based on glob patterns. Note that the whole path will be matched, therefore paths should only contain the relative paths. @@ -512,8 +522,7 @@ def path_filter_patterns(paths, patterns, root=None): for path in paths[:]: if path_match_patterns(path, patterns): - logger.debug("Path ignored by pattern: %s", - os.path.join(root or "", path)) + logger.debug(f"Path ignored by pattern: {os.path.join(root, path)}") ignored += 1 paths.remove(path) @@ -595,7 +604,7 @@ def extract_metadata(f): count_ratio = 0.75 if count < (count_ratio * len(results)): - logger.info("Counted %d media files, but only %d have tags, ignoring.", count, len(results)) + logger.info(f"Counted {count} media files, but only {len(results)} have tags, ignoring.") return (None, None, None) # Count distinct values @@ -613,8 +622,7 @@ def extract_metadata(f): old_album = new_albums[index] new_albums[index] = RE_CD_ALBUM.sub("", album).strip() - logger.debug("Stripped albumd number identifier: %s -> %s", old_album, - new_albums[index]) + logger.debug(f"Stripped album number identifier: {old_album} -> {new_albums[index]}") # Remove duplicates new_albums = list(set(new_albums)) @@ -632,7 +640,7 @@ def extract_metadata(f): if len(artists) > 1 and len(albums) == 1: split_artists = [RE_FEATURING.split(x) for x in artists] featurings = [len(split_artist) - 1 for split_artist in split_artists] - logger.info("Album seem to feature %d different artists", sum(featurings)) + logger.info("Album seem to feature {sum(featurings)} different artists") if sum(featurings) > 0: # Find the artist of which the least splits have been generated. @@ -644,9 +652,11 @@ def extract_metadata(f): return (artist, albums[0], years[0]) # Not sure what to do here. - logger.info("Found %d artists, %d albums and %d years in metadata, so ignoring", len(artists), - len(albums), len(years)) - logger.debug("Artists: %s, Albums: %s, Years: %s", artists, albums, years) + logger.info( + f"Found {len(artists)} artists, {len(albums)} albums and " + f"{len(years)} years in metadata, so ignoring" + ) + logger.debug("Artists: {artists}, Albums: {albums}, Years: {years}") return (None, None, None) @@ -678,8 +688,7 @@ def preserve_torrent_directory(albumpath, forced=False, single=False): else: tempdir = tempfile.gettempdir() - logger.info("Preparing to copy to a temporary directory for post processing: " + albumpath.decode( - headphones.SYS_ENCODING, 'replace')) + logger.info(f"Preparing to copy to a temporary directory for post processing: {albumpath}") try: file_name = os.path.basename(os.path.normpath(albumpath)) @@ -689,8 +698,7 @@ def preserve_torrent_directory(albumpath, forced=False, single=False): prefix = "headphones_" + os.path.splitext(file_name)[0] + "_@hp@_" new_folder = tempfile.mkdtemp(prefix=prefix, dir=tempdir) except Exception as e: - logger.error("Cannot create temp directory: " + tempdir.decode( - headphones.SYS_ENCODING, 'replace') + ". Error: " + str(e)) + logger.error(f"Cannot create temp directory: {tempdir}. Error: {e}") return None # Attempt to stop multiple temp dirs being created for the same albumpath @@ -701,17 +709,21 @@ def preserve_torrent_directory(albumpath, forced=False, single=False): workdir = re.sub(r'(?= 3: logger.error( - "Looks like a temp directory has previously been created for this albumpath, not continuing " + workdir.decode( - headphones.SYS_ENCODING, 'replace')) + "Looks like a temp directory has previously been created " + "for this albumpath, not continuing " + ) shutil.rmtree(new_folder) return None except Exception as e: - logger.warn("Cannot determine if already copied/processed, will copy anyway: Warning: " + str(e)) + logger.warn( + "Cannot determine if already copied/processed, will copy anyway. " + f"Warning: {e}" + ) # Copy to temp dir try: subdir = os.path.join(new_folder, "headphones") - logger.info("Copying files to " + subdir.decode(headphones.SYS_ENCODING, 'replace')) + logger.info(f"Copying files to {subdir}") if not single: shutil.copytree(albumpath, subdir) else: @@ -720,9 +732,10 @@ def preserve_torrent_directory(albumpath, forced=False, single=False): # Update the album path with the new location return subdir except Exception as e: - logger.warn("Cannot copy/move files to temp directory: " + new_folder.decode(headphones.SYS_ENCODING, - 'replace') + ". Not continuing. Error: " + str( - e)) + logger.warn( + f"Cannot copy/move files to temp directory: {new_folder}. " + f"Not continuing. Error: {e}" + ) shutil.rmtree(new_folder) return None @@ -767,7 +780,7 @@ def cue_split(albumpath, keep_original_folder=False): cuesplit.split(cue_dir) except Exception as e: os.chdir(cwd) - logger.warn("Cue not split: " + str(e)) + logger.warn(f"Cue not split. Error: {e}") return None os.chdir(cwd) @@ -805,7 +818,7 @@ def extract_song_data(s): year = match.group("year") return (name, album, year) else: - logger.info("Couldn't parse %s into a valid default format", s) + logger.info(f"Couldn't parse {s} into a valid default format") # newzbin default format pattern = re.compile(r'(?P.*?)\s\-\s(?P.*?)\s\((?P\d+?\))', re.VERBOSE) @@ -816,7 +829,7 @@ def extract_song_data(s): year = match.group("year") return (name, album, year) else: - logger.info("Couldn't parse %s into a valid Newbin format", s) + logger.info(f"Couldn't parse {s} into a valid Newbin format") return (name, album, year) @@ -829,7 +842,7 @@ def smartMove(src, dest, delete=True): dest_path = os.path.join(dest, filename) if os.path.isfile(dest_path): - logger.info('Destination file exists: %s', dest_path) + logger.info(f"Destination file exists: {dest_path}") title = os.path.splitext(filename)[0] ext = os.path.splitext(filename)[1] i = 1 @@ -838,13 +851,12 @@ def smartMove(src, dest, delete=True): if os.path.isfile(os.path.join(dest, newfile)): i += 1 else: - logger.info('Renaming to %s', newfile) + logger.info(f"Renaming to {newfile}") try: os.rename(src, os.path.join(source_dir, newfile)) filename = newfile except Exception as e: - logger.warn('Error renaming %s: %s', - src.decode(headphones.SYS_ENCODING, 'replace'), e) + logger.warn(f"Error renaming {src}: {e}") break if delete: @@ -854,8 +866,9 @@ def smartMove(src, dest, delete=True): except Exception as e: exists = os.path.exists(dest_path) if exists and os.path.getsize(source_path) == os.path.getsize(dest_path): - logger.warn('Successfully moved file "%s", but something went wrong: %s', - filename.decode(headphones.SYS_ENCODING, 'replace'), e) + logger.warn( + f"Successfully moved {filename}, but something went wrong: {e}" + ) os.unlink(source_path) else: # remove faultly copied file @@ -864,12 +877,11 @@ def smartMove(src, dest, delete=True): raise else: try: - logger.info('Copying "%s" to "%s"', source_path, dest_path) + logger.info(f"Copying {source_path} to {dest_path}") shutil.copy(source_path, dest_path) return True except Exception as e: - logger.warn('Error copying file %s: %s', filename.decode(headphones.SYS_ENCODING, 'replace'), - e) + logger.warn(f"Error copying {filename}: {e}") def walk_directory(basedir, followlinks=True): @@ -878,7 +890,7 @@ def walk_directory(basedir, followlinks=True): with care. In case a folder is already processed, don't traverse it again. """ - import logger + from . import logger # Add the base path, because symlinks poiting to the basedir should not be # traversed again. @@ -892,8 +904,10 @@ def walk_directory(basedir, followlinks=True): real_path = os.path.abspath(os.readlink(path)) if real_path in traversed: - logger.debug("Skipping '%s' since it is a symlink to " - "'%s', which is already visited.", path, real_path) + logger.debug( + f"Skipping {path} since it is a symlink to " + f"{real_path}, which is already visited." + ) else: traversed.append(real_path) @@ -935,22 +949,13 @@ def sab_sanitize_foldername(name): FL_ILLEGAL = CH_ILLEGAL + ':\x92"' FL_LEGAL = CH_LEGAL + "-''" - uFL_ILLEGAL = FL_ILLEGAL.decode('latin-1') - uFL_LEGAL = FL_LEGAL.decode('latin-1') - if not name: - return name - if isinstance(name, unicode): - illegal = uFL_ILLEGAL - legal = uFL_LEGAL - else: - illegal = FL_ILLEGAL - legal = FL_LEGAL + return lst = [] for ch in name.strip(): - if ch in illegal: - ch = legal[illegal.find(ch)] + if ch in FL_ILLEGAL: + ch = FL_LEGAL[FL_ILLEGAL.find(ch)] lst.append(ch) else: lst.append(ch) @@ -1006,7 +1011,7 @@ def create_https_certificates(ssl_cert, ssl_key): with open(ssl_cert, "w") as fp: fp.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert)) except IOError as e: - logger.error("Error creating SSL key and certificate: %s", e) + logger.error(f"Error creating SSL key and certificate: e") return False return True diff --git a/headphones/helpers_test.py b/headphones/helpers_test.py index 14b8540d..3fd09aa4 100644 --- a/headphones/helpers_test.py +++ b/headphones/helpers_test.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from unittestcompat import TestCase +from .unittestcompat import TestCase from headphones.helpers import clean_name @@ -8,28 +8,28 @@ class HelpersTest(TestCase): def test_clean_name(self): """helpers: check correctness of clean_name() function""" cases = { - u' Weiße & rose ': 'Weisse and rose', - u'Multiple / spaces': 'Multiple spaces', - u'Kevin\'s m²': 'Kevins m2', - u'Symphonęy Nº9': 'Symphoney No.9', - u'ÆæßðÞIJij': u'AeaessdThIJıj', - u'Obsessió (Cerebral Apoplexy remix)': 'obsessio cerebral ' + ' Weiße & rose ': 'Weisse and rose', + 'Multiple / spaces': 'Multiple spaces', + 'Kevin\'s m²': 'Kevins m2', + 'Symphonęy Nº9': 'Symphoney No.9', + 'ÆæßðÞIJij': 'AeaessdThIJıj', + 'Obsessió (Cerebral Apoplexy remix)': 'obsessio cerebral ' 'apoplexy remix', - u'Doktór Hałabała i siedmiu zbojów': 'doktor halabala i siedmiu ' + 'Doktór Hałabała i siedmiu zbojów': 'doktor halabala i siedmiu ' 'zbojow', - u'Arbetets Söner och Döttrar': 'arbetets soner och dottrar', - u'Björk Guðmundsdóttir': 'bjork gudmundsdottir', - u'L\'Arc~en~Ciel': 'larc en ciel', - u'Orquesta de la Luz (オルケスタ・デ・ラ・ルス)': - u'Orquesta de la Luz オルケスタ デ ラ ルス' + 'Arbetets Söner och Döttrar': 'arbetets soner och dottrar', + 'Björk Guðmundsdóttir': 'bjork gudmundsdottir', + 'L\'Arc~en~Ciel': 'larc en ciel', + 'Orquesta de la Luz (オルケスタ・デ・ラ・ルス)': + 'Orquesta de la Luz オルケスタ デ ラ ルス' } - for first, second in cases.iteritems(): + for first, second in cases.items(): nf = clean_name(first).lower() ns = clean_name(second).lower() self.assertEqual( - nf, ns, u"check cleaning of case (%s," - u"%s)" % (nf, ns) + nf, ns, "check cleaning of case (%s," + "%s)" % (nf, ns) ) def test_clean_name_nonunicode(self): diff --git a/headphones/importer.py b/headphones/importer.py index 5595d37e..b2459787 100644 --- a/headphones/importer.py +++ b/headphones/importer.py @@ -16,7 +16,7 @@ import time from headphones import logger, helpers, db, mb, lastfm, metacritic -from beets.mediafile import MediaFile +from mediafile import MediaFile import headphones blacklisted_special_artist_names = ['[anonymous]', '[data]', '[no artist]', @@ -39,7 +39,7 @@ def is_exists(artistid): if any(artistid in x for x in artistlist): logger.info(artistlist[0][ - 1] + u" is already in the database. Updating 'have tracks', but not artist information") + 1] + " is already in the database. Updating 'have tracks', but not artist information") return True else: return False @@ -53,7 +53,7 @@ def artistlist_to_mbids(artistlist, forced=False): # If adding artists through Manage New Artists, they're coming through as non-unicode (utf-8?) # and screwing everything up - if not isinstance(artist, unicode): + if not isinstance(artist, str): try: artist = artist.decode('utf-8', 'replace') except Exception: @@ -184,7 +184,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): else: sortname = artist['artist_name'] - logger.info(u"Now adding/updating: " + artist['artist_name']) + logger.info("Now adding/updating: " + artist['artist_name']) controlValueDict = {"ArtistID": artistid} newValueDict = {"ArtistName": artist['artist_name'], "ArtistSortName": sortname, @@ -263,8 +263,8 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): new_releases = mb.get_new_releases(rgid, includeExtras) else: - if check_release_date is None or check_release_date == u"None": - if headphones.CONFIG.MB_IGNORE_AGE_MISSING is not 1: + if check_release_date is None or check_release_date == "None": + if not headphones.CONFIG.MB_IGNORE_AGE_MISSING: logger.info("[%s] Now updating: %s (No Release Date)" % (artist['artist_name'], rg['title'])) new_releases = mb.get_new_releases(rgid, includeExtras, True) else: @@ -517,7 +517,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): marked_as_downloaded = True logger.info( - u"[%s] Seeing if we need album art for %s" % (artist['artist_name'], rg['title'])) + "[%s] Seeing if we need album art for %s" % (artist['artist_name'], rg['title'])) try: cache.getThumb(AlbumID=rg['id']) except Exception as e: @@ -530,19 +530,19 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): album_searches.append(rg['id']) else: if skip_log == 0: - logger.info(u"[%s] No new releases, so no changes made to %s" % ( + logger.info("[%s] No new releases, so no changes made to %s" % ( artist['artist_name'], rg['title'])) time.sleep(3) finalize_update(artistid, artist['artist_name'], errors) - logger.info(u"Seeing if we need album art for: %s" % artist['artist_name']) + logger.info("Seeing if we need album art for: %s" % artist['artist_name']) try: cache.getThumb(ArtistID=artistid) except Exception as e: logger.error("Error getting album art: %s", e) - logger.info(u"Fetching Metacritic reviews for: %s" % artist['artist_name']) + logger.info("Fetching Metacritic reviews for: %s" % artist['artist_name']) try: metacritic.update(artistid, artist['artist_name'], artist['releasegroups']) except Exception as e: @@ -554,7 +554,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False, type="artist"): artist['artist_name'], artist['artist_name'])) else: myDB.action('DELETE FROM newartists WHERE ArtistName = ?', [artist['artist_name']]) - logger.info(u"Updating complete for: %s" % artist['artist_name']) + logger.info("Updating complete for: %s" % artist['artist_name']) # Start searching for newly added albums if album_searches: @@ -663,7 +663,7 @@ def addReleaseById(rid, rgid=None): sortname = release_dict['artist_name'] logger.info( - u"Now manually adding: " + release_dict['artist_name'] + " - with status Paused") + "Now manually adding: " + release_dict['artist_name'] + " - with status Paused") controlValueDict = {"ArtistID": release_dict['artist_id']} newValueDict = {"ArtistName": release_dict['artist_name'], "ArtistSortName": sortname, @@ -696,7 +696,7 @@ def addReleaseById(rid, rgid=None): if not rg_exists and release_dict or status == 'Loading' and release_dict: # it should never be the case that we have an rg and not the artist # but if it is this will fail - logger.info(u"Now adding-by-id album (" + release_dict['title'] + ") from id: " + rgid) + logger.info("Now adding-by-id album (" + release_dict['title'] + ") from id: " + rgid) controlValueDict = {"AlbumID": rgid} if status != 'Loading': status = 'Wanted' @@ -772,7 +772,7 @@ def addReleaseById(rid, rgid=None): # Start a search for the album if headphones.CONFIG.AUTOWANT_MANUALLY_ADDED: - import searcher + from . import searcher searcher.searchforalbum(rgid, False) elif not rg_exists and not release_dict: diff --git a/headphones/lastfm.py b/headphones/lastfm.py index db3e1aec..5b23e447 100644 --- a/headphones/lastfm.py +++ b/headphones/lastfm.py @@ -91,7 +91,7 @@ def getSimilar(): for artist, mbid in artistlist: count[artist, mbid] += 1 - items = count.items() + items = list(count.items()) top_list = sorted(items, key=lambda x: x[1], reverse=True)[:25] random.shuffle(top_list) diff --git a/headphones/librarysync.py b/headphones/librarysync.py index dbf9fb4b..13619d37 100644 --- a/headphones/librarysync.py +++ b/headphones/librarysync.py @@ -17,7 +17,7 @@ import os import math import headphones -from beets.mediafile import MediaFile, FileTypeError, UnreadableFileError +from mediafile import MediaFile, FileTypeError, UnreadableFileError from headphones import db, logger, helpers, importer, lastfm @@ -30,72 +30,64 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, if not dir: if not headphones.CONFIG.MUSIC_DIR: + logger.info( + "No music directory configured. Add it under " + "Manage -> Scan Music Library" + ) return else: dir = headphones.CONFIG.MUSIC_DIR - # If we're appending a dir, it's coming from the post processor which is - # already bytestring - if not append or artistScan: - dir = dir.encode(headphones.SYS_ENCODING) - if not os.path.isdir(dir): - logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(headphones.SYS_ENCODING, - 'replace')) + logger.warn(f"Cannot find music directory: {dir}") return myDB = db.DBConnection() new_artists = [] - logger.info('Scanning music directory: %s' % dir.decode(headphones.SYS_ENCODING, 'replace')) + logger.info(f"Scanning music directory: {dir}") if not append: # Clean up bad filepaths. Queries can take some time, ensure all results are loaded before processing if ArtistID: - tracks = myDB.action( + dbtracks = myDB.action( 'SELECT Location FROM alltracks WHERE ArtistID = ? AND Location IS NOT NULL UNION SELECT Location FROM tracks WHERE ArtistID = ? AND Location ' 'IS NOT NULL', [ArtistID, ArtistID]) else: - tracks = myDB.action( + dbtracks = myDB.action( 'SELECT Location FROM alltracks WHERE Location IS NOT NULL UNION SELECT Location FROM tracks WHERE Location IS NOT NULL') - locations = [] - for track in tracks: - locations.append(track['Location']) - for location in locations: - encoded_track_string = location.encode(headphones.SYS_ENCODING, 'replace') - if not os.path.isfile(encoded_track_string): + for track in dbtracks: + track_location = track['Location'] + if not os.path.isfile(track_location): myDB.action('UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', - [None, None, None, location]) + [None, None, None, track_location]) myDB.action('UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', - [None, None, None, location]) + [None, None, None, track_location]) if ArtistName: del_have_tracks = myDB.select('SELECT Location, Matched, ArtistName FROM have WHERE ArtistName = ? COLLATE NOCASE', [ArtistName]) else: del_have_tracks = myDB.select('SELECT Location, Matched, ArtistName FROM have') - locations = [] for track in del_have_tracks: - locations.append([track['Location'], track['ArtistName']]) - for location in locations: - encoded_track_string = location[0].encode(headphones.SYS_ENCODING, 'replace') - if not os.path.isfile(encoded_track_string): - if location[1]: + if not os.path.isfile(track['Location']): + if track['ArtistName']: # Make sure deleted files get accounted for when updating artist track counts - new_artists.append(location[1]) - myDB.action('DELETE FROM have WHERE Location=?', [location[0]]) + new_artists.append(track['ArtistName']) + myDB.action('DELETE FROM have WHERE Location=?', [Track['Location']]) logger.info( - 'File %s removed from Headphones, as it is no longer on disk' % encoded_track_string.decode( - headphones.SYS_ENCODING, 'replace')) + f"{Track['Location']} removed from Headphones, as it " + f"is no longer on disk" + ) bitrates = [] - song_list = [] + track_list = [] latest_subdirectory = [] - new_song_count = 0 + new_track_count = 0 file_count = 0 for r, d, f in helpers.walk_directory(dir): @@ -110,32 +102,16 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, subdirectory = r.replace(dir, '') latest_subdirectory.append(subdirectory) - if file_count == 0 and r.replace(dir, '') != '': - logger.info("[%s] Now scanning subdirectory %s" % ( - dir.decode(headphones.SYS_ENCODING, 'replace'), - subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) - elif latest_subdirectory[file_count] != latest_subdirectory[ - file_count - 1] and file_count != 0: - logger.info("[%s] Now scanning subdirectory %s" % ( - dir.decode(headphones.SYS_ENCODING, 'replace'), - subdirectory.decode(headphones.SYS_ENCODING, 'replace'))) - - song = os.path.join(r, files) - - # We need the unicode path to use for logging, inserting into database - unicode_song_path = song.decode(headphones.SYS_ENCODING, 'replace') + track_path = os.path.join(r, files) # Try to read the metadata try: - f = MediaFile(song) + f = MediaFile(track_path) except (FileTypeError, UnreadableFileError): - logger.warning( - "Cannot read media file '%s', skipping. It may be corrupted or not a media file.", - unicode_song_path) + logger.warning(f"Cannot read `{track_path}`. It may be corrupted or not a media file.") continue except IOError: - logger.warning("Cannnot read media file '%s', skipping. Does the file exists?", - unicode_song_path) + logger.warning(f"Cannnot read `{track_path}`. Does the file exists?") continue # Grab the bitrates for the auto detect bit rate option @@ -150,15 +126,15 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, else: f_artist = None - # Add the song to our song list - - # TODO: skip adding songs without the minimum requisite information (just a matter of putting together the right if statements) + # Add the track to our track list - + # TODO: skip adding tracks without the minimum requisite information (just a matter of putting together the right if statements) if f_artist and f.album and f.title: CleanName = helpers.clean_name(f_artist + ' ' + f.album + ' ' + f.title) else: CleanName = None - controlValueDict = {'Location': unicode_song_path} + controlValueDict = {'Location': track_path} newValueDict = {'TrackID': f.mb_trackid, # 'ReleaseID' : f.mb_albumid, @@ -174,24 +150,24 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, 'CleanName': CleanName } - # song_list.append(song_dict) - check_exist_song = myDB.action("SELECT * FROM have WHERE Location=?", - [unicode_song_path]).fetchone() - # Only attempt to match songs that are new, haven't yet been matched, or metadata has changed. - if not check_exist_song: + # track_list.append(track_dict) + check_exist_track = myDB.action("SELECT * FROM have WHERE Location=?", + [track_path]).fetchone() + # Only attempt to match tracks that are new, haven't yet been matched, or metadata has changed. + if not check_exist_track: # This is a new track if f_artist: new_artists.append(f_artist) myDB.upsert("have", newValueDict, controlValueDict) - new_song_count += 1 + new_track_count += 1 else: - if check_exist_song['ArtistName'] != f_artist or check_exist_song[ - 'AlbumTitle'] != f.album or check_exist_song['TrackTitle'] != f.title: + if check_exist_track['ArtistName'] != f_artist or check_exist_track[ + 'AlbumTitle'] != f.album or check_exist_track['TrackTitle'] != f.title: # Important track metadata has been modified, need to run matcher again - if f_artist and f_artist != check_exist_song['ArtistName']: + if f_artist and f_artist != check_exist_track['ArtistName']: new_artists.append(f_artist) - elif f_artist and f_artist == check_exist_song['ArtistName'] and \ - check_exist_song['Matched'] != "Ignored": + elif f_artist and f_artist == check_exist_track['ArtistName'] and \ + check_exist_track['Matched'] != "Ignored": new_artists.append(f_artist) else: continue @@ -200,51 +176,59 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, myDB.upsert("have", newValueDict, controlValueDict) myDB.action( 'UPDATE tracks SET Location=?, BitRate=?, Format=? WHERE Location=?', - [None, None, None, unicode_song_path]) + [None, None, None, track_path]) myDB.action( 'UPDATE alltracks SET Location=?, BitRate=?, Format=? WHERE Location=?', - [None, None, None, unicode_song_path]) - new_song_count += 1 + [None, None, None, track_path]) + new_track_count += 1 else: # This track information hasn't changed - if f_artist and check_exist_song['Matched'] != "Ignored": + if f_artist and check_exist_track['Matched'] != "Ignored": new_artists.append(f_artist) file_count += 1 # Now we start track matching - logger.info("%s new/modified songs found and added to the database" % new_song_count) - song_list = myDB.action("SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", - [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]) - total_number_of_songs = \ - myDB.action("SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", - [dir.decode(headphones.SYS_ENCODING, 'replace') + "%"]).fetchone()[0] - logger.info("Found " + str(total_number_of_songs) + " new/modified tracks in: '" + dir.decode( - headphones.SYS_ENCODING, 'replace') + "'. Matching tracks to the appropriate releases....") + logger.info(f"{new_track_count} new/modified tracks found and added to the database") + dbtracks = myDB.action( + "SELECT * FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", + [f"{dir}%"] + ) + dbtracks_count = myDB.action( + "SELECT COUNT(*) FROM have WHERE Matched IS NULL AND LOCATION LIKE ?", + [f"{dir}%"] + ).fetchone()[0] + logger.info(f"Found {dbtracks_count} new/modified tracks in `{dir}`") + logger.info("Matching tracks to the appropriate releases....") - # Sort the song_list by most vague (e.g. no trackid or releaseid) to most specific (both trackid & releaseid) - # When we insert into the database, the tracks with the most specific information will overwrite the more general matches - # song_list = helpers.multikeysort(song_list, ['ReleaseID', 'TrackID']) - song_list = helpers.multikeysort(song_list, ['ArtistName', 'AlbumTitle']) - # We'll use this to give a % completion, just because the track matching might take a while - song_count = 0 - latest_artist = [] + # Sort the track_list by most vague (e.g. no trackid or releaseid) + # to most specific (both trackid & releaseid) + # When we insert into the database, the tracks with the most + # specific information will overwrite the more general matches + + sorted_dbtracks = helpers.multikeysort(dbtracks, ['ArtistName', 'AlbumTitle']) + + + # We'll use this to give a % completion, just because the + # track matching might take a while + tracks_completed = 0 + latest_artist = None last_completion_percentage = 0 prev_artist_name = None artistid = None - for song in song_list: + for track in sorted_dbtracks: - latest_artist.append(song['ArtistName']) - if song_count == 0: - logger.info("Now matching songs by %s" % song['ArtistName']) - elif latest_artist[song_count] != latest_artist[song_count - 1] and song_count != 0: - logger.info("Now matching songs by %s" % song['ArtistName']) + if latest_artist != track['ArtistName']: + logger.info(f"Now matching tracks by {track['ArtistName']}") + latest_artist = track['ArtistName'] - song_count += 1 - completion_percentage = math.floor(float(song_count) / total_number_of_songs * 1000) / 10 + tracks_completed += 1 + completion_percentage = math.floor( + float(tracks_completed) / dbtracks_count * 1000 + ) / 10 if completion_percentage >= (last_completion_percentage + 10): logger.info("Track matching is " + str(completion_percentage) + "% complete") @@ -257,9 +241,9 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, albumid = None - if song['ArtistName'] and song['CleanName']: - artist_name = song['ArtistName'] - clean_name = song['CleanName'] + if track['ArtistName'] and track['CleanName']: + artist_name = track['ArtistName'] + clean_name = track['CleanName'] # Only update if artist is in the db if artist_name != prev_artist_name: @@ -297,12 +281,12 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, # matching on CleanName should be enough, ensure it's the same artist just in case # Update tracks - track = myDB.action('SELECT AlbumID, ArtistName FROM tracks WHERE CleanName = ? AND ArtistID = ?', [clean_name, artistid]).fetchone() - if track: - albumid = track['AlbumID'] + dbtrack = myDB.action('SELECT AlbumID, ArtistName FROM tracks WHERE CleanName = ? AND ArtistID = ?', [clean_name, artistid]).fetchone() + if dbtrack: + albumid = dbtrack['AlbumID'] myDB.action( 'UPDATE tracks SET Location = ?, BitRate = ?, Format = ? WHERE CleanName = ? AND ArtistID = ?', - [song['Location'], song['BitRate'], song['Format'], clean_name, artistid]) + [track['Location'], track['BitRate'], track['Format'], clean_name, artistid]) # Update alltracks alltrack = myDB.action('SELECT AlbumID, ArtistName FROM alltracks WHERE CleanName = ? AND ArtistID = ?', [clean_name, artistid]).fetchone() @@ -310,26 +294,25 @@ def libraryScan(dir=None, append=False, ArtistID=None, ArtistName=None, albumid = alltrack['AlbumID'] myDB.action( 'UPDATE alltracks SET Location = ?, BitRate = ?, Format = ? WHERE CleanName = ? AND ArtistID = ?', - [song['Location'], song['BitRate'], song['Format'], clean_name, artistid]) + [track['Location'], track['BitRate'], track['Format'], clean_name, artistid]) # Update have - controlValueDict2 = {'Location': song['Location']} + controlValueDict2 = {'Location': track['Location']} if albumid: newValueDict2 = {'Matched': albumid} else: newValueDict2 = {'Matched': "Failed"} myDB.upsert("have", newValueDict2, controlValueDict2) - # myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [song['ArtistName'], song['AlbumTitle'], song['TrackNumber'], song['TrackTitle'], song['TrackLength'], song['BitRate'], song['Genre'], song['Date'], song['TrackID'], song['Location'], CleanName, song['Format']]) + # myDB.action('INSERT INTO have (ArtistName, AlbumTitle, TrackNumber, TrackTitle, TrackLength, BitRate, Genre, Date, TrackID, Location, CleanName, Format) VALUES( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [track['ArtistName'], track['AlbumTitle'], track['TrackNumber'], track['TrackTitle'], track['TrackLength'], track['BitRate'], track['Genre'], track['Date'], track['TrackID'], track['Location'], CleanName, track['Format']]) - logger.info('Completed matching tracks from directory: %s' % dir.decode(headphones.SYS_ENCODING, - 'replace')) + logger.info(f"Completed matching tracks from `{dir}`") if not append or artistScan: logger.info('Updating scanned artist track counts') # Clean up the new artist list - unique_artists = {}.fromkeys(new_artists).keys() + unique_artists = list({}.fromkeys(new_artists).keys()) # # Don't think we need to do this, check the db instead below # diff --git a/headphones/lock.py b/headphones/lock.py index 8d385738..2eb28cfc 100644 --- a/headphones/lock.py +++ b/headphones/lock.py @@ -4,7 +4,7 @@ Locking-related classes import time import threading -import Queue +import queue import headphones.logger @@ -29,7 +29,7 @@ class TimedLock(object): self.lock = threading.Lock() self.last_used = 0 self.minimum_delta = minimum_delta - self.queue = Queue.Queue() + self.queue = queue.Queue() def __enter__(self): """ @@ -47,7 +47,7 @@ class TimedLock(object): seconds = self.queue.get(False) headphones.logger.debug('Sleeping %s (queued)', seconds) time.sleep(seconds) - except Queue.Empty: + except queue.Empty: continue self.queue.task_done() diff --git a/headphones/logger.py b/headphones/logger.py index 1eaac7cc..f49da8ac 100644 --- a/headphones/logger.py +++ b/headphones/logger.py @@ -153,7 +153,8 @@ def initLogger(console=False, log_dir=False, verbose=False): file_formatter = logging.Formatter( '%(asctime)s - %(levelname)-7s :: %(threadName)s : %(message)s', '%d-%b-%Y %H:%M:%S') file_handler = handlers.RotatingFileHandler(filename, maxBytes=MAX_SIZE, - backupCount=MAX_FILES) + backupCount=MAX_FILES, + encoding='utf8') file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(file_formatter) diff --git a/headphones/lyrics.py b/headphones/lyrics.py index c0f45ed5..93b59cf3 100644 --- a/headphones/lyrics.py +++ b/headphones/lyrics.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -import htmlentitydefs +import html.entities import re from headphones import logger, request @@ -53,7 +53,7 @@ def getLyrics(artist, song): '''
''').search( lyricspage) if m: - return u'(Instrumental)' + return '(Instrumental)' else: logger.warn('Cannot find lyrics on: %s' % lyricsurl) return @@ -72,7 +72,7 @@ def convert_html_entities(s): name = hit[2:-1] try: entnum = int(name) - s = s.replace(hit, unichr(entnum)) + s = s.replace(hit, chr(entnum)) except ValueError: pass @@ -83,7 +83,7 @@ def convert_html_entities(s): hits.remove(amp) for hit in hits: name = hit[1:-1] - if name in htmlentitydefs.name2codepoint: - s = s.replace(hit, unichr(htmlentitydefs.name2codepoint[name])) + if name in html.entities.name2codepoint: + s = s.replace(hit, chr(html.entities.name2codepoint[name])) s = s.replace(amp, "&") return s diff --git a/headphones/mb.py b/headphones/mb.py index 7d9b9c50..45d35122 100644 --- a/headphones/mb.py +++ b/headphones/mb.py @@ -112,9 +112,9 @@ def findArtist(name, limit=1): return False for result in artistResults: if 'disambiguation' in result: - uniquename = unicode(result['sort-name'] + " (" + result['disambiguation'] + ")") + uniquename = str(result['sort-name'] + " (" + result['disambiguation'] + ")") else: - uniquename = unicode(result['sort-name']) + uniquename = str(result['sort-name']) if result['name'] != uniquename and limit == 1: logger.info( 'Found an artist with a disambiguation: %s - doing an album based search' % name) @@ -126,7 +126,7 @@ def findArtist(name, limit=1): # Just need the artist id if the limit is 1 # 'name': unicode(result['sort-name']), # 'uniquename': uniquename, - 'id': unicode(result['id']), + 'id': str(result['id']), # 'url': unicode("http://musicbrainz.org/artist/" + result['id']),#probably needs to be changed # 'score': int(result['ext:score']) }) @@ -134,10 +134,10 @@ def findArtist(name, limit=1): artistlist.append(artistdict) else: artistlist.append({ - 'name': unicode(result['sort-name']), + 'name': str(result['sort-name']), 'uniquename': uniquename, - 'id': unicode(result['id']), - 'url': unicode("http://musicbrainz.org/artist/" + result['id']), + 'id': str(result['id']), + 'url': str("http://musicbrainz.org/artist/" + result['id']), # probably needs to be changed 'score': int(result['ext:score']) }) @@ -187,7 +187,7 @@ def findRelease(name, limit=1, artist=None): if tracks: tracks += ' + ' tracks += str(medium['track-count']) - for format, count in format_dict.items(): + for format, count in list(format_dict.items()): if formats: formats += ' + ' if count > 1: @@ -203,22 +203,22 @@ def findRelease(name, limit=1, artist=None): rg_type = secondary_type releaselist.append({ - 'uniquename': unicode(result['artist-credit'][0]['artist']['name']), - 'title': unicode(title), - 'id': unicode(result['artist-credit'][0]['artist']['id']), - 'albumid': unicode(result['id']), - 'url': unicode( + 'uniquename': str(result['artist-credit'][0]['artist']['name']), + 'title': str(title), + 'id': str(result['artist-credit'][0]['artist']['id']), + 'albumid': str(result['id']), + 'url': str( "http://musicbrainz.org/artist/" + result['artist-credit'][0]['artist']['id']), # probably needs to be changed - 'albumurl': unicode("http://musicbrainz.org/release/" + result['id']), + 'albumurl': str("http://musicbrainz.org/release/" + result['id']), # probably needs to be changed 'score': int(result['ext:score']), - 'date': unicode(result['date']) if 'date' in result else '', - 'country': unicode(result['country']) if 'country' in result else '', - 'formats': unicode(formats), - 'tracks': unicode(tracks), - 'rgid': unicode(result['release-group']['id']), - 'rgtype': unicode(rg_type) + 'date': str(result['date']) if 'date' in result else '', + 'country': str(result['country']) if 'country' in result else '', + 'formats': str(formats), + 'tracks': str(tracks), + 'rgid': str(result['release-group']['id']), + 'rgtype': str(rg_type) }) return releaselist @@ -240,15 +240,15 @@ def findSeries(name, limit=1): return False for result in seriesResults: if 'disambiguation' in result: - uniquename = unicode(result['name'] + " (" + result['disambiguation'] + ")") + uniquename = str(result['name'] + " (" + result['disambiguation'] + ")") else: - uniquename = unicode(result['name']) + uniquename = str(result['name']) serieslist.append({ 'uniquename': uniquename, - 'name': unicode(result['name']), - 'type': unicode(result['type']), - 'id': unicode(result['id']), - 'url': unicode("http://musicbrainz.org/series/" + result['id']), + 'name': str(result['name']), + 'type': str(result['type']), + 'id': str(result['id']), + 'url': str("http://musicbrainz.org/series/" + result['id']), # probably needs to be changed 'score': int(result['ext:score']) }) @@ -284,19 +284,19 @@ def getArtist(artistid, extrasonly=False): if not artist: return False - artist_dict['artist_name'] = unicode(artist['name']) + artist_dict['artist_name'] = str(artist['name']) releasegroups = [] if not extrasonly: for rg in artist['release-group-list']: - if "secondary-type-list" in rg.keys(): # only add releases without a secondary type + if "secondary-type-list" in list(rg.keys()): # only add releases without a secondary type continue releasegroups.append({ - 'title': unicode(rg['title']), - 'id': unicode(rg['id']), - 'url': u"http://musicbrainz.org/release-group/" + rg['id'], - 'type': unicode(rg['type']) + 'title': str(rg['title']), + 'id': str(rg['id']), + 'url': "http://musicbrainz.org/release-group/" + rg['id'], + 'type': str(rg['type']) }) # See if we need to grab extras. Artist specific extras take precedence over global option @@ -314,7 +314,7 @@ def getArtist(artistid, extrasonly=False): # Need to convert extras string from something like '2,5.6' to ['ep','live','remix'] (append new extras to end) if db_artist['Extras']: - extras = map(int, db_artist['Extras'].split(',')) + extras = list(map(int, db_artist['Extras'].split(','))) else: extras = [] extras_list = headphones.POSSIBLE_EXTRAS @@ -354,10 +354,10 @@ def getArtist(artistid, extrasonly=False): rg_type = secondary_type releasegroups.append({ - 'title': unicode(rg['title']), - 'id': unicode(rg['id']), - 'url': u"http://musicbrainz.org/release-group/" + rg['id'], - 'type': unicode(rg_type) + 'title': str(rg['title']), + 'id': str(rg['id']), + 'url': "http://musicbrainz.org/release-group/" + rg['id'], + 'type': str(rg_type) }) artist_dict['releasegroups'] = releasegroups return artist_dict @@ -382,10 +382,10 @@ def getSeries(seriesid): return False if 'disambiguation' in series: - series_dict['artist_name'] = unicode( - series['name'] + " (" + unicode(series['disambiguation']) + ")") + series_dict['artist_name'] = str( + series['name'] + " (" + str(series['disambiguation']) + ")") else: - series_dict['artist_name'] = unicode(series['name']) + series_dict['artist_name'] = str(series['name']) releasegroups = [] @@ -448,42 +448,42 @@ def getRelease(releaseid, include_artist_info=True): if not results: return False - release['title'] = unicode(results['title']) - release['id'] = unicode(results['id']) - release['asin'] = unicode(results['asin']) if 'asin' in results else None - release['date'] = unicode(results['date']) if 'date' in results else None + release['title'] = str(results['title']) + release['id'] = str(results['id']) + release['asin'] = str(results['asin']) if 'asin' in results else None + release['date'] = str(results['date']) if 'date' in results else None try: - release['format'] = unicode(results['medium-list'][0]['format']) + release['format'] = str(results['medium-list'][0]['format']) except: - release['format'] = u'Unknown' + release['format'] = 'Unknown' try: - release['country'] = unicode(results['country']) + release['country'] = str(results['country']) except: - release['country'] = u'Unknown' + release['country'] = 'Unknown' if include_artist_info: if 'release-group' in results: - release['rgid'] = unicode(results['release-group']['id']) - release['rg_title'] = unicode(results['release-group']['title']) + release['rgid'] = str(results['release-group']['id']) + release['rg_title'] = str(results['release-group']['title']) try: - release['rg_type'] = unicode(results['release-group']['type']) + release['rg_type'] = str(results['release-group']['type']) if release['rg_type'] == 'Album' and 'secondary-type-list' in results[ 'release-group']: - secondary_type = unicode(results['release-group']['secondary-type-list'][0]) + secondary_type = str(results['release-group']['secondary-type-list'][0]) if secondary_type != release['rg_type']: release['rg_type'] = secondary_type except KeyError: - release['rg_type'] = u'Unknown' + release['rg_type'] = 'Unknown' else: logger.warn("Release " + releaseid + "had no ReleaseGroup associated") - release['artist_name'] = unicode(results['artist-credit'][0]['artist']['name']) - release['artist_id'] = unicode(results['artist-credit'][0]['artist']['id']) + release['artist_name'] = str(results['artist-credit'][0]['artist']['name']) + release['artist_id'] = str(results['artist-credit'][0]['artist']['id']) release['tracks'] = getTracksFromRelease(results) @@ -529,7 +529,7 @@ def get_new_releases(rgid, includeExtras=False, forcefull=False): force_repackage1 = 0 if len(results) != 0: for release_mark in results: - release_list.append(unicode(release_mark['id'])) + release_list.append(str(release_mark['id'])) release_title = release_mark['title'] remove_missing_releases = myDB.action("SELECT ReleaseID FROM allalbums WHERE AlbumID=?", [rgid]) @@ -561,31 +561,31 @@ def get_new_releases(rgid, includeExtras=False, forcefull=False): # DELETE all references to this release since we're updating it anyway. myDB.action('DELETE from allalbums WHERE ReleaseID=?', [rel_id_check]) myDB.action('DELETE from alltracks WHERE ReleaseID=?', [rel_id_check]) - release['AlbumTitle'] = unicode(releasedata['title']) - release['AlbumID'] = unicode(rgid) - release['AlbumASIN'] = unicode(releasedata['asin']) if 'asin' in releasedata else None - release['ReleaseDate'] = unicode(releasedata['date']) if 'date' in releasedata else None + release['AlbumTitle'] = str(releasedata['title']) + release['AlbumID'] = str(rgid) + release['AlbumASIN'] = str(releasedata['asin']) if 'asin' in releasedata else None + release['ReleaseDate'] = str(releasedata['date']) if 'date' in releasedata else None release['ReleaseID'] = releasedata['id'] if 'release-group' not in releasedata: raise Exception('No release group associated with release id ' + releasedata[ 'id'] + ' album id' + rgid) - release['Type'] = unicode(releasedata['release-group']['type']) + release['Type'] = str(releasedata['release-group']['type']) if release['Type'] == 'Album' and 'secondary-type-list' in releasedata['release-group']: - secondary_type = unicode(releasedata['release-group']['secondary-type-list'][0]) + secondary_type = str(releasedata['release-group']['secondary-type-list'][0]) if secondary_type != release['Type']: release['Type'] = secondary_type # making the assumption that the most important artist will be first in the list if 'artist-credit' in releasedata: - release['ArtistID'] = unicode(releasedata['artist-credit'][0]['artist']['id']) - release['ArtistName'] = unicode(releasedata['artist-credit-phrase']) + release['ArtistID'] = str(releasedata['artist-credit'][0]['artist']['id']) + release['ArtistName'] = str(releasedata['artist-credit-phrase']) else: logger.warn('Release ' + releasedata['id'] + ' has no Artists associated.') return False - release['ReleaseCountry'] = unicode( - releasedata['country']) if 'country' in releasedata else u'Unknown' + release['ReleaseCountry'] = str( + releasedata['country']) if 'country' in releasedata else 'Unknown' # assuming that the list will contain media and that the format will be consistent try: additional_medium = '' @@ -600,9 +600,9 @@ def get_new_releases(rgid, includeExtras=False, forcefull=False): disc_number = str(medium_count) + 'x' packaged_medium = disc_number + releasedata['medium-list'][0][ 'format'] + additional_medium - release['ReleaseFormat'] = unicode(packaged_medium) + release['ReleaseFormat'] = str(packaged_medium) except: - release['ReleaseFormat'] = u'Unknown' + release['ReleaseFormat'] = 'Unknown' release['Tracks'] = getTracksFromRelease(releasedata) @@ -684,14 +684,14 @@ def getTracksFromRelease(release): for medium in release['medium-list']: for track in medium['track-list']: try: - track_title = unicode(track['title']) + track_title = str(track['title']) except: - track_title = unicode(track['recording']['title']) + track_title = str(track['recording']['title']) tracks.append({ 'number': totalTracks, 'title': track_title, - 'id': unicode(track['recording']['id']), - 'url': u"http://musicbrainz.org/track/" + track['recording']['id'], + 'id': str(track['recording']['id']), + 'url': "http://musicbrainz.org/track/" + track['recording']['id'], 'duration': int(track['length']) if 'length' in track else 0 }) totalTracks += 1 @@ -739,7 +739,7 @@ def findArtistbyAlbum(name): # uniquename = unicode(newArtist['sort-name']) # artist_dict['name'] = unicode(newArtist['sort-name']) # artist_dict['uniquename'] = uniquename - artist_dict['id'] = unicode(newArtist['id']) + artist_dict['id'] = str(newArtist['id']) # artist_dict['url'] = u'http://musicbrainz.org/artist/' + newArtist['id'] # artist_dict['score'] = int(releaseGroup['ext:score']) @@ -768,7 +768,7 @@ def findAlbumID(artist=None, album=None): if len(results) < 1: return False - rgid = unicode(results[0]['id']) + rgid = str(results[0]['id']) return rgid diff --git a/headphones/metacritic.py b/headphones/metacritic.py index 4c65ffcd..fc9c7c40 100644 --- a/headphones/metacritic.py +++ b/headphones/metacritic.py @@ -35,7 +35,7 @@ def update(artistid, artist_name, release_groups): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36'} - url = "http://www.metacritic.com/person/" + mc_artist_name + "?filter-options=music&sort_options=date&num_items=100" + url = "https://www.metacritic.com/person/" + mc_artist_name + "?filter-options=music&sort_options=date&num_items=100" res = request.request_soup(url, headers=headers, whitelist_status_code=404) diff --git a/headphones/metadata.py b/headphones/metadata.py index c8797801..20bdddd4 100644 --- a/headphones/metadata.py +++ b/headphones/metadata.py @@ -17,8 +17,8 @@ Track/album metadata handling routines. """ -from __future__ import print_function -from beets.mediafile import MediaFile, UnreadableFileError + +from mediafile import MediaFile, UnreadableFileError import headphones from headphones import logger import os.path @@ -60,7 +60,7 @@ class MetadataDict(dict): self._lower = {} if seq is not None: try: - self.add_items(seq.iteritems()) + self.add_items(iter(seq.items())) except KeyError: self.add_items(seq) @@ -103,11 +103,11 @@ def _verify_var_type(val): """ Check if type of value is allowed as a variable in pathname substitution. """ - return isinstance(val, (basestring, int, float, datetime.date)) + return isinstance(val, (str, int, float, datetime.date)) def _as_str(val): - if isinstance(val, basestring): + if isinstance(val, str): return val else: return str(val) @@ -134,7 +134,7 @@ def _row_to_dict(row, d): """ Populate dict with database row fields. """ - for fld in row.keys(): + for fld in list(row.keys()): val = row[fld] if val is None: val = '' @@ -184,9 +184,7 @@ def file_metadata(path, release): try: f = MediaFile(path) except UnreadableFileError as ex: - logger.info("MediaFile couldn't parse: %s (%s)", - path.decode(headphones.SYS_ENCODING, 'replace'), - str(ex)) + logger.info(f"MediaFile couldn't parse {path}: {e}") return None, None res = MetadataDict() @@ -207,8 +205,7 @@ def file_metadata(path, release): track_number = '%02d' % f.track if not f.title: - basename = os.path.basename( - path.decode(headphones.SYS_ENCODING, 'replace')) + basename = os.path.basename(path) title = os.path.splitext(basename)[0] from_metadata = False else: @@ -242,7 +239,7 @@ def file_metadata(path, release): Vars.SORT_ARTIST_LOWER: _lower(sort_name), Vars.ALBUM_LOWER: _lower(album_title), } - res.add_items(override_values.iteritems()) + res.add_items(iter(override_values.items())) return res, from_metadata @@ -252,7 +249,7 @@ def _intersect(d1, d2): Create intersection (common part) of two dictionaries. """ res = {} - for key, val in d1.iteritems(): + for key, val in d1.items(): if key in d2 and d2[key] == val: res[key] = val return res @@ -284,21 +281,19 @@ def album_metadata(path, release, common_tags): sort_name = artist if not sort_name or sort_name[0].isdigit(): - first_char = u'0-9' + first_char = '0-9' else: first_char = sort_name[0] - orig_folder = u'' + orig_folder = '' # Get from temp path if "_@hp@_" in path: orig_folder = path.rsplit("headphones_", 1)[1].split("_@hp@_")[0] - orig_folder = orig_folder.decode(headphones.SYS_ENCODING, 'replace') else: for r, d, f in os.walk(path): try: - orig_folder = os.path.basename( - os.path.normpath(r).decode(headphones.SYS_ENCODING, 'replace')) + orig_folder = os.path.basename(os.path.normpath(r)) break except: pass @@ -320,7 +315,7 @@ def album_metadata(path, release, common_tags): Vars.ORIGINAL_FOLDER_LOWER: _lower(orig_folder) } res = MetadataDict(common_tags) - res.add_items(override_values.iteritems()) + res.add_items(iter(override_values.items())) return res @@ -345,7 +340,7 @@ def albumart_metadata(release, common_tags): Vars.ALBUM_LOWER: _lower(album) } res = MetadataDict(common_tags) - res.add_items(override_values.iteritems()) + res.add_items(iter(override_values.items())) return res diff --git a/headphones/metadata_test.py b/headphones/metadata_test.py index 5d79d442..de4c2b31 100644 --- a/headphones/metadata_test.py +++ b/headphones/metadata_test.py @@ -22,7 +22,7 @@ import headphones.helpers as _hp from headphones.metadata import MetadataDict import datetime -from unittestcompat import TestCase +from .unittestcompat import TestCase __author__ = "Andrzej Ciarkowski " @@ -50,7 +50,7 @@ class _MockDatabaseRow(object): self._dict = dict(d) def keys(self): - return self._dict.iterkeys() + return iter(self._dict.keys()) def __getitem__(self, item): return self._dict[item] @@ -63,9 +63,9 @@ class MetadataTest(TestCase): def test_metadata_dict_ci(self): """MetadataDict: case-insensitive lookup""" - expected = u'naïve' + expected = 'naïve' key_var = '$TitlE' - m = MetadataDict({key_var.lower(): u'naïve'}) + m = MetadataDict({key_var.lower(): 'naïve'}) self.assertFalse('$track' in m) self.assertTrue('$tITLe' in m, "cross-case lookup with 'in'") self.assertEqual(m[key_var], expected, "cross-case lookup success") @@ -74,7 +74,7 @@ class MetadataTest(TestCase): def test_metadata_dict_cs(self): """MetadataDice: case-preserving lookup""" - expected_var = u'NaïVe' + expected_var = 'NaïVe' key_var = '$TitlE' m = MetadataDict({ key_var.lower(): expected_var.lower(), @@ -171,5 +171,5 @@ class MetadataTest(TestCase): res = _hp.pattern_substitute( "/music/$First/$Artist/$Artist - $Album{ [$Year]}", md, True) - self.assertEqual(res, u"/music/A/artist/artist - Album", + self.assertEqual(res, "/music/A/artist/artist - Album", "check correct rendering of None via pattern_substitute()") diff --git a/headphones/music_encoder.py b/headphones/music_encoder.py index b51001f6..bb9b672e 100644 --- a/headphones/music_encoder.py +++ b/headphones/music_encoder.py @@ -14,6 +14,7 @@ # along with Headphones. If not, see . import time +import datetime import shutil import subprocess import multiprocessing @@ -21,14 +22,15 @@ import multiprocessing import os import headphones from headphones import logger -from beets.mediafile import MediaFile +from mediafile import MediaFile # xld -import getXldProfile +from . import getXldProfile def encode(albumPath): + print(albumPath) use_xld = headphones.CONFIG.ENCODER == 'xld' # Return if xld details not found @@ -63,8 +65,7 @@ def encode(albumPath): for music in f: if any(music.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): if not use_xld: - encoderFormat = headphones.CONFIG.ENCODEROUTPUTFORMAT.encode( - headphones.SYS_ENCODING) + encoderFormat = headphones.CONFIG.ENCODEROUTPUTFORMAT else: xldMusicFile = os.path.join(r, music) xldInfoMusic = MediaFile(xldMusicFile) @@ -86,7 +87,7 @@ def encode(albumPath): musicTempFiles.append(os.path.join(tempDirEncode, musicTemp)) if headphones.CONFIG.ENCODER_PATH: - encoder = headphones.CONFIG.ENCODER_PATH.encode(headphones.SYS_ENCODING) + encoder = headphones.CONFIG.ENCODER_PATH else: if use_xld: encoder = os.path.join('/Applications', 'xld') @@ -117,18 +118,17 @@ def encode(albumPath): if use_xld: if xldBitrate and (infoMusic.bitrate / 1000 <= xldBitrate): - logger.info('%s has bitrate <= %skb, will not be re-encoded', - music.decode(headphones.SYS_ENCODING, 'replace'), xldBitrate) + logger.info(f"{music} has bitrate <= {xldBitrate}kb, will not be re-encoded") else: encode = True elif headphones.CONFIG.ENCODER == 'lame': if not any( - music.decode(headphones.SYS_ENCODING, 'replace').lower().endswith('.' + x) for x + music.lower().endswith('.' + x) for x in ["mp3", "wav"]): logger.warn('Lame cannot encode %s format for %s, use ffmpeg', os.path.splitext(music)[1], music) else: - if music.decode(headphones.SYS_ENCODING, 'replace').lower().endswith('.mp3') and ( + if music.lower().endswith('.mp3') and ( int(infoMusic.bitrate / 1000) <= headphones.CONFIG.BITRATE): logger.info('%s has bitrate <= %skb, will not be re-encoded', music, headphones.CONFIG.BITRATE) @@ -136,13 +136,12 @@ def encode(albumPath): encode = True else: if headphones.CONFIG.ENCODEROUTPUTFORMAT == 'ogg': - if music.decode(headphones.SYS_ENCODING, 'replace').lower().endswith('.ogg'): - logger.warn('Cannot re-encode .ogg %s', - music.decode(headphones.SYS_ENCODING, 'replace')) + if music.lower().endswith('.ogg'): + logger.warn(f"Cannot re-encode .ogg {music}") else: encode = True else: - if music.decode(headphones.SYS_ENCODING, 'replace').lower().endswith('.' + headphones.CONFIG.ENCODEROUTPUTFORMAT) and (int(infoMusic.bitrate / 1000) <= headphones.CONFIG.BITRATE): + if music.lower().endswith('.' + headphones.CONFIG.ENCODEROUTPUTFORMAT) and (int(infoMusic.bitrate / 1000) <= headphones.CONFIG.BITRATE): logger.info('%s has bitrate <= %skb, will not be re-encoded', music, headphones.CONFIG.BITRATE) else: encode = True @@ -185,13 +184,13 @@ def encode(albumPath): # Retrieve the results results = results.get() else: - results = map(command_map, jobs) + results = list(map(command_map, jobs)) # The results are either True or False, so determine if one is False encoder_failed = not all(results) - musicFiles = filter(None, musicFiles) - musicTempFiles = filter(None, musicTempFiles) + musicFiles = [_f for _f in musicFiles if _f] + musicTempFiles = [_f for _f in musicTempFiles if _f] # check all files to be encoded now exist in temp directory if not encoder_failed and musicTempFiles: @@ -352,36 +351,31 @@ def command(encoder, musicSource, musicDest, albumPath, xldProfile): startupinfo.dwFlags |= subprocess._subprocess.STARTF_USESHOWWINDOW # Encode - logger.info('Encoding %s...' % (musicSource.decode(headphones.SYS_ENCODING, 'replace'))) + logger.info(f"Encoding {musicSource}") logger.debug(subprocess.list2cmdline(cmd)) process = subprocess.Popen(cmd, startupinfo=startupinfo, stdin=open(os.devnull, 'rb'), stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stderr=subprocess.PIPE, text=True) stdout, stderr = process.communicate(headphones.CONFIG.ENCODER) # Error if return code not zero if process.returncode: - logger.error( - 'Encoding failed for %s' % (musicSource.decode(headphones.SYS_ENCODING, 'replace'))) - out = stdout if stdout else stderr - out = out.decode(headphones.SYS_ENCODING, 'replace') + logger.error(f"Encoding failed for {musicSource}") + out = stdout or stderr outlast2lines = '\n'.join(out.splitlines()[-2:]) - logger.error('%s error details: %s' % (headphones.CONFIG.ENCODER, outlast2lines)) + logger.error(f"{headphones.CONFIG.ENCODER} error details: {outlast2lines}") out = out.rstrip("\n") logger.debug(out) encoded = False else: - logger.info('%s encoded in %s', musicSource, getTimeEncode(startMusicTime)) + logger.info(f"{musicSource} encoded in {getTimeEncode(startMusicTime)}") encoded = True return encoded def getTimeEncode(start): - seconds = int(time.time() - start) - hours = seconds / 3600 - seconds -= 3600 * hours - minutes = seconds / 60 - seconds -= 60 * minutes - return "%02d:%02d:%02d" % (hours, minutes, seconds) + finish = time.time() + seconds = int(finish - start) + return datetime.timedelta(seconds=seconds) diff --git a/headphones/notifiers.py b/headphones/notifiers.py index 5f3e8620..79b7f596 100644 --- a/headphones/notifiers.py +++ b/headphones/notifiers.py @@ -1,28 +1,13 @@ -# This file is part of Headphones. -# -# Headphones is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Headphones is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Headphones. If not, see . - -from urllib import urlencode, quote_plus -import urllib +from urllib.parse import urlencode, quote_plus +import urllib.request, urllib.parse, urllib.error import subprocess import json from email.mime.text import MIMEText import smtplib import email.utils -from httplib import HTTPSConnection -from urlparse import parse_qsl -import urllib2 +from http.client import HTTPSConnection +from urllib.parse import parse_qsl +import urllib.request, urllib.error, urllib.parse import requests as requests import os.path @@ -31,8 +16,8 @@ from pynma import pynma import cherrypy import headphones import gntp.notifier -import oauth2 as oauth -import pythontwitter as twitter +#import oauth2 as oauth +import twitter class GROWL(object): @@ -81,10 +66,10 @@ class GROWL(object): try: growl.register() except gntp.notifier.errors.NetworkError: - logger.warning(u'Growl notification failed: network error') + logger.warning('Growl notification failed: network error') return except gntp.notifier.errors.AuthError: - logger.warning(u'Growl notification failed: authentication error') + logger.warning('Growl notification failed: authentication error') return # Fix message @@ -105,10 +90,10 @@ class GROWL(object): icon=image ) except gntp.notifier.errors.NetworkError: - logger.warning(u'Growl notification failed: network error') + logger.warning('Growl notification failed: network error') return - logger.info(u"Growl notifications sent.") + logger.info("Growl notifications sent.") def updateLibrary(self): # For uniformity reasons not removed @@ -157,13 +142,13 @@ class PROWL(object): request_status = response.status if request_status == 200: - logger.info(u"Prowl notifications sent.") + logger.info("Prowl notifications sent.") return True elif request_status == 401: - logger.info(u"Prowl auth failed: %s" % response.reason) + logger.info("Prowl auth failed: %s" % response.reason) return False else: - logger.info(u"Prowl notification failed.") + logger.info("Prowl notification failed.") return False def updateLibrary(self): @@ -202,7 +187,7 @@ class XBMC(object): self.password = headphones.CONFIG.XBMC_PASSWORD def _sendhttp(self, host, command): - url_command = urllib.urlencode(command) + url_command = urllib.parse.urlencode(command) url = host + '/xbmcCmds/xbmcHttp/?' + url_command if self.password: @@ -295,10 +280,10 @@ class LMS(object): content = {'Content-Type': 'application/json'} - req = urllib2.Request(host + '/jsonrpc.js', data, content) + req = urllib.request.Request(host + '/jsonrpc.js', data, content) try: - handle = urllib2.urlopen(req) + handle = urllib.request.urlopen(req) except Exception as e: logger.warn('Error opening LMS url: %s' % e) return @@ -424,7 +409,7 @@ class Plex(object): sections = r.getElementsByTagName('Directory') if not sections: - logger.info(u"Plex Media Server not running on: " + host) + logger.info("Plex Media Server not running on: " + host) return False for s in sections: @@ -483,9 +468,9 @@ class NMA(object): api = headphones.CONFIG.NMA_APIKEY nma_priority = headphones.CONFIG.NMA_PRIORITY - logger.debug(u"NMA title: " + title) - logger.debug(u"NMA API: " + api) - logger.debug(u"NMA Priority: " + str(nma_priority)) + logger.debug("NMA title: " + title) + logger.debug("NMA API: " + api) + logger.debug("NMA Priority: " + str(nma_priority)) if snatched: event = snatched + " snatched!" @@ -495,8 +480,8 @@ class NMA(object): message = "Headphones has downloaded and postprocessed: " + \ artist + ' [' + album + ']' - logger.debug(u"NMA event: " + event) - logger.debug(u"NMA message: " + message) + logger.debug("NMA event: " + event) + logger.debug("NMA message: " + message) batch = False @@ -510,8 +495,8 @@ class NMA(object): response = p.push(title, event, message, priority=nma_priority, batch_mode=batch) - if not response[api][u'code'] == u'200': - logger.error(u'Could not send notification to NotifyMyAndroid') + if not response[api]['code'] == '200': + logger.error('Could not send notification to NotifyMyAndroid') return False else: return True @@ -543,10 +528,10 @@ class PUSHBULLET(object): data=json.dumps(data)) if response: - logger.info(u"PushBullet notifications sent.") + logger.info("PushBullet notifications sent.") return True else: - logger.info(u"PushBullet notification failed.") + logger.info("PushBullet notification failed.") return False @@ -557,9 +542,9 @@ class PUSHALOT(object): pushalot_authorizationtoken = headphones.CONFIG.PUSHALOT_APIKEY - logger.debug(u"Pushalot event: " + event) - logger.debug(u"Pushalot message: " + message) - logger.debug(u"Pushalot api: " + pushalot_authorizationtoken) + logger.debug("Pushalot event: " + event) + logger.debug("Pushalot message: " + message) + logger.debug("Pushalot api: " + pushalot_authorizationtoken) http_handler = HTTPSConnection("pushalot.com") @@ -576,18 +561,18 @@ class PUSHALOT(object): response = http_handler.getresponse() request_status = response.status - logger.debug(u"Pushalot response status: %r" % request_status) - logger.debug(u"Pushalot response headers: %r" % response.getheaders()) - logger.debug(u"Pushalot response body: %r" % response.read()) + logger.debug("Pushalot response status: %r" % request_status) + logger.debug("Pushalot response headers: %r" % response.getheaders()) + logger.debug("Pushalot response body: %r" % response.read()) if request_status == 200: - logger.info(u"Pushalot notifications sent.") + logger.info("Pushalot notifications sent.") return True elif request_status == 410: - logger.info(u"Pushalot auth failed: %s" % response.reason) + logger.info("Pushalot auth failed: %s" % response.reason) return False else: - logger.info(u"Pushalot notification failed.") + logger.info("Pushalot notification failed.") return False @@ -618,7 +603,7 @@ class JOIN(object): else: self.url += '&deviceId={deviceid}' - response = urllib2.urlopen(self.url.format(apikey=self.apikey, + response = urllib.request.urlopen(self.url.format(apikey=self.apikey, title=quote_plus(event), text=quote_plus( message.encode( @@ -627,10 +612,10 @@ class JOIN(object): deviceid=self.deviceid)) if response: - logger.info(u"Join notifications sent.") + logger.info("Join notifications sent.") return True else: - logger.error(u"Join notification failed.") + logger.error("Join notification failed.") return False @@ -669,7 +654,7 @@ class Synoindex(object): out, error = p.communicate() # synoindex never returns any codes other than '0', # highly irritating - except OSError, e: + except OSError as e: logger.warn("Error sending notification: %s" % str(e)) def notify_multiple(self, path_list): @@ -710,10 +695,10 @@ class PUSHOVER(object): headers=headers, data=data) if response: - logger.info(u"Pushover notifications sent.") + logger.info("Pushover notifications sent.") return True else: - logger.error(u"Pushover notification failed.") + logger.error("Pushover notification failed.") return False def updateLibrary(self): @@ -832,7 +817,7 @@ class TwitterNotifier(object): access_token_key = headphones.CONFIG.TWITTER_USERNAME access_token_secret = headphones.CONFIG.TWITTER_PASSWORD - logger.info(u"Sending tweet: " + message) + logger.info("Sending tweet: " + message) api = twitter.Api(username, password, access_token_key, access_token_secret) @@ -840,7 +825,7 @@ class TwitterNotifier(object): try: api.PostUpdate(message) except Exception as e: - logger.info(u"Error Sending Tweet: %s" % e) + logger.info("Error Sending Tweet: %s" % e) return False return True @@ -938,7 +923,7 @@ class BOXCAR(object): message += '

MusicBrainz' % rgid - data = urllib.urlencode({ + data = urllib.parse.urlencode({ 'user_credentials': headphones.CONFIG.BOXCAR_TOKEN, 'notification[title]': title.encode('utf-8'), 'notification[long_message]': message.encode('utf-8'), @@ -947,12 +932,12 @@ class BOXCAR(object): "/headphoneslogo.png" }) - req = urllib2.Request(self.url) - handle = urllib2.urlopen(req, data) + req = urllib.request.Request(self.url) + handle = urllib.request.urlopen(req, data) handle.close() return True - except urllib2.URLError as e: + except urllib.error.URLError as e: logger.warn('Error sending Boxcar2 Notification: %s' % e) return False @@ -1011,7 +996,7 @@ class Email(object): mailserver.quit() return True - except Exception, e: + except Exception as e: logger.warn('Error sending Email: %s' % e) return False @@ -1044,15 +1029,15 @@ class TELEGRAM(object): payload = {'chat_id': userid, 'parse_mode': "HTML", 'caption': status + message} try: response = requests.post(TELEGRAM_API % (token, "sendPhoto"), data=payload, files=image_file) - except Exception, e: - logger.info(u'Telegram notify failed: ' + str(e)) + except Exception as e: + logger.info('Telegram notify failed: ' + str(e)) # Sent text else: payload = {'chat_id': userid, 'parse_mode': "HTML", 'text': status + message} try: response = requests.post(TELEGRAM_API % (token, "sendMessage"), data=payload) - except Exception, e: - logger.info(u'Telegram notify failed: ' + str(e)) + except Exception as e: + logger.info('Telegram notify failed: ' + str(e)) # Error logging sent_successfuly = True @@ -1060,7 +1045,7 @@ class TELEGRAM(object): logger.info("Could not send notification to TelegramBot (token=%s). Response: [%s]", token, response.text) sent_successfuly = False - logger.info(u"Telegram notifications sent.") + logger.info("Telegram notifications sent.") return sent_successfuly @@ -1080,15 +1065,15 @@ class SLACK(object): try: response = requests.post(SLACK_URL, json=payload) - except Exception, e: - logger.info(u'Slack notify failed: ' + str(e)) + except Exception as e: + logger.info('Slack notify failed: ' + str(e)) sent_successfuly = True if not response.status_code == 200: logger.info( - u'Could not send notification to Slack. Response: [%s]', + 'Could not send notification to Slack. Response: [%s]', (response.text)) sent_successfuly = False - logger.info(u"Slack notifications sent.") + logger.info("Slack notifications sent.") return sent_successfuly diff --git a/headphones/nzbget.py b/headphones/nzbget.py index 779558ef..4884df7f 100644 --- a/headphones/nzbget.py +++ b/headphones/nzbget.py @@ -20,8 +20,8 @@ from base64 import standard_b64encode -import httplib -import xmlrpclib +import http.client +import xmlrpc.client import headphones from headphones import logger @@ -32,7 +32,7 @@ def sendNZB(nzb): nzbgetXMLrpc = "%(protocol)s://%(username)s:%(password)s@%(host)s/xmlrpc" if not headphones.CONFIG.NZBGET_HOST: - logger.error(u"No NZBget host found in configuration. Please configure it.") + logger.error("No NZBget host found in configuration. Please configure it.") return False if headphones.CONFIG.NZBGET_HOST.startswith('https://'): @@ -46,25 +46,25 @@ def sendNZB(nzb): "username": headphones.CONFIG.NZBGET_USERNAME, "password": headphones.CONFIG.NZBGET_PASSWORD} - nzbGetRPC = xmlrpclib.ServerProxy(url) + nzbGetRPC = xmlrpc.client.ServerProxy(url) try: if nzbGetRPC.writelog("INFO", "headphones connected to drop of %s any moment now." % ( nzb.name + ".nzb")): - logger.debug(u"Successfully connected to NZBget") + logger.debug("Successfully connected to NZBget") else: - logger.info(u"Successfully connected to NZBget, but unable to send a message" % ( + logger.info("Successfully connected to NZBget, but unable to send a message" % ( nzb.name + ".nzb")) - except httplib.socket.error: + except http.client.socket.error: logger.error( - u"Please check your NZBget host and port (if it is running). NZBget is not responding to this combination") + "Please check your NZBget host and port (if it is running). NZBget is not responding to this combination") return False - except xmlrpclib.ProtocolError, e: + except xmlrpc.client.ProtocolError as e: if e.errmsg == "Unauthorized": - logger.error(u"NZBget password is incorrect.") + logger.error("NZBget password is incorrect.") else: - logger.error(u"Protocol Error: " + e.errmsg) + logger.error("Protocol Error: " + e.errmsg) return False nzbcontent64 = None @@ -72,8 +72,8 @@ def sendNZB(nzb): data = nzb.extraInfo[0] nzbcontent64 = standard_b64encode(data) - logger.info(u"Sending NZB to NZBget") - logger.debug(u"URL: " + url) + logger.info("Sending NZB to NZBget") + logger.debug("URL: " + url) dupekey = "" dupescore = 0 @@ -131,12 +131,12 @@ def sendNZB(nzb): nzb.url) if nzbget_result: - logger.debug(u"NZB sent to NZBget successfully") + logger.debug("NZB sent to NZBget successfully") return True else: - logger.error(u"NZBget could not add %s to the queue" % (nzb.name + ".nzb")) + logger.error("NZBget could not add %s to the queue" % (nzb.name + ".nzb")) return False except: logger.error( - u"Connect Error to NZBget: could not add %s to the queue" % (nzb.name + ".nzb")) + "Connect Error to NZBget: could not add %s to the queue" % (nzb.name + ".nzb")) return False diff --git a/headphones/pathrender.py b/headphones/pathrender.py index 48798dd7..a456c059 100644 --- a/headphones/pathrender.py +++ b/headphones/pathrender.py @@ -30,7 +30,7 @@ syntax elements are supported: nonempty value only if any variable or optional inside returned nonempty value, ignoring literals (like {'{'$That'}'}). """ -from __future__ import print_function + from enum import Enum __author__ = "Andrzej Ciarkowski " @@ -111,9 +111,9 @@ class _OptionalBlock(_Generator): # type: (Mapping[str,str]) -> str res = [(isinstance(x, _Generator), x.render(replacement)) for x in self._scope] if any((t[0] and t[1] is not None and len(t[1]) != 0) for t in res): - return u"".join(t[1] for t in res) + return "".join(t[1] for t in res) else: - return u"" + return "" def __eq__(self, other): """ @@ -122,15 +122,15 @@ class _OptionalBlock(_Generator): return isinstance(other, _OptionalBlock) and self._scope == other._scope -_OPTIONAL_START = u'{' -_OPTIONAL_END = u'}' -_ESCAPE_CHAR = u'\'' -_REPLACEMENT_START = u'$' +_OPTIONAL_START = '{' +_OPTIONAL_END = '}' +_ESCAPE_CHAR = '\'' +_REPLACEMENT_START = '$' def _is_replacement_valid(c): # type: (str) -> bool - return c.isalnum() or c == u'_' + return c.isalnum() or c == '_' class _State(Enum): @@ -243,7 +243,7 @@ class Pattern(object): def __call__(self, replacement): # type: (Mapping[str,str]) -> str '''Execute path rendering/substitution based on replacement dictionary.''' - return u"".join(p.render(replacement) for p in self._pattern) + return "".join(p.render(replacement) for p in self._pattern) def _get_warnings(self): # type: () -> str @@ -262,6 +262,6 @@ def render(pattern, replacement): if __name__ == "__main__": # primitive test ;) - p = Pattern(u"{$Disc.}$Track - $Artist - $Title{ [$Year]}") - d = {'$Disc': '', '$Track': '05', '$Artist': u'Grzegżółka', '$Title': u'Błona kapłona', '$Year': '2019'} - assert p(d) == u"05 - Grzegżółka - Błona kapłona [2019]" + p = Pattern("{$Disc.}$Track - $Artist - $Title{ [$Year]}") + d = {'$Disc': '', '$Track': '05', '$Artist': 'Grzegżółka', '$Title': 'Błona kapłona', '$Year': '2019'} + assert p(d) == "05 - Grzegżółka - Błona kapłona [2019]" diff --git a/headphones/pathrender_test.py b/headphones/pathrender_test.py index 9bcb7f4c..9d67e68d 100644 --- a/headphones/pathrender_test.py +++ b/headphones/pathrender_test.py @@ -19,7 +19,7 @@ Test module for pathrender. import headphones.pathrender as _pr from headphones.pathrender import Pattern, Warnings -from unittestcompat import TestCase +from .unittestcompat import TestCase __author__ = "Andrzej Ciarkowski " @@ -32,21 +32,21 @@ class PathRenderTest(TestCase): def test_parsing(self): """pathrender: pattern parsing""" - pattern = Pattern(u"{$Disc.}$Track - $Artist - $Title{ [$Year]}") + pattern = Pattern("{$Disc.}$Track - $Artist - $Title{ [$Year]}") expected = [ _pr._OptionalBlock([ - _pr._Replacement(u"$Disc"), - _pr._LiteralText(u".") + _pr._Replacement("$Disc"), + _pr._LiteralText(".") ]), - _pr._Replacement(u"$Track"), - _pr._LiteralText(u" - "), - _pr._Replacement(u"$Artist"), - _pr._LiteralText(u" - "), - _pr._Replacement(u"$Title"), + _pr._Replacement("$Track"), + _pr._LiteralText(" - "), + _pr._Replacement("$Artist"), + _pr._LiteralText(" - "), + _pr._Replacement("$Title"), _pr._OptionalBlock([ - _pr._LiteralText(u" ["), - _pr._Replacement(u"$Year"), - _pr._LiteralText(u"]") + _pr._LiteralText(" ["), + _pr._Replacement("$Year"), + _pr._LiteralText("]") ]) ] self.assertEqual(expected, pattern._pattern) @@ -54,27 +54,27 @@ class PathRenderTest(TestCase): def test_parsing_warnings(self): """pathrender: pattern parsing with warnings""" - pattern = Pattern(u"{$Disc.}$Track - $Artist - $Title{ [$Year]") + pattern = Pattern("{$Disc.}$Track - $Artist - $Title{ [$Year]") self.assertEqual(set([Warnings.UNCLOSED_OPTIONAL]), pattern.warnings) - pattern = Pattern(u"{$Disc.}$Track - $Artist - $Title{ [$Year]'}") + pattern = Pattern("{$Disc.}$Track - $Artist - $Title{ [$Year]'}") self.assertEqual(set([Warnings.UNCLOSED_ESCAPE, Warnings.UNCLOSED_OPTIONAL]), pattern.warnings) def test_replacement(self): """pathrender: _Replacement variable substitution""" - r = _pr._Replacement(u"$Title") + r = _pr._Replacement("$Title") subst = {'$Title': 'foo', '$Track': 'bar'} res = r.render(subst) - self.assertEqual(res, u'foo', 'check valid replacement') + self.assertEqual(res, 'foo', 'check valid replacement') subst = {} res = r.render(subst) - self.assertEqual(res, u'$Title', 'check missing replacement') + self.assertEqual(res, '$Title', 'check missing replacement') subst = {'$Title': None} res = r.render(subst) self.assertEqual(res, '', 'check render() works with None') def test_literal(self): """pathrender: _Literal text rendering""" - l = _pr._LiteralText(u"foo") + l = _pr._LiteralText("foo") subst = {'$foo': 'bar'} res = l.render(subst) self.assertEqual(res, 'foo') @@ -82,12 +82,12 @@ class PathRenderTest(TestCase): def test_optional(self): """pathrender: _OptionalBlock element processing""" o = _pr._OptionalBlock([ - _pr._Replacement(u"$Title"), - _pr._LiteralText(u".foobar") + _pr._Replacement("$Title"), + _pr._LiteralText(".foobar") ]) subst = {'$Title': 'foo', '$Track': 'bar'} res = o.render(subst) - self.assertEqual(res, u'foo.foobar', 'check non-empty replacement') + self.assertEqual(res, 'foo.foobar', 'check non-empty replacement') subst = {'$Title': ''} res = o.render(subst) self.assertEqual(res, '', 'check empty replacement') diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py index 5c7f4ebe..875bad80 100755 --- a/headphones/postprocessor.py +++ b/headphones/postprocessor.py @@ -25,7 +25,7 @@ import headphones from beets import autotag from beets import config as beetsconfig from beets import logging as beetslogging -from beets.mediafile import MediaFile, FileTypeError, UnreadableFileError +from mediafile import MediaFile, FileTypeError, UnreadableFileError from beetsplug import lyrics as beetslyrics from headphones import notifiers, utorrent, transmission, deluge, qbittorrent from headphones import db, albumart, librarysync @@ -65,8 +65,8 @@ def checkFolder(): folder_name = torrent_folder_name if folder_name: - album_path = os.path.join(download_dir, folder_name).encode( - headphones.SYS_ENCODING, 'replace') + print(folder_name) + album_path = os.path.join(download_dir, folder_name) logger.debug("Checking if %s exists" % album_path) if os.path.exists(album_path): @@ -80,6 +80,7 @@ def checkFolder(): def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=False, single=False): + print(albumpath) myDB = db.DBConnection() release = myDB.action('SELECT * from albums WHERE AlbumID=?', [albumid]).fetchone() tracks = myDB.select('SELECT * from tracks WHERE AlbumID=?', [albumid]) @@ -135,11 +136,11 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal if headphones.CONFIG.RENAME_FROZEN: renameUnprocessedFolder(albumpath, tag="Frozen") else: - logger.warn(u"Won't rename %s to mark as 'Frozen', because it is disabled.", - albumpath.decode(headphones.SYS_ENCODING, 'replace')) + logger.warn("Won't rename %s to mark as 'Frozen', because it is disabled.", + albumpath) return - logger.info(u"Now adding/updating artist: " + release_dict['artist_name']) + logger.info("Now adding/updating artist: " + release_dict['artist_name']) if release_dict['artist_name'].startswith('The '): sortname = release_dict['artist_name'][4:] @@ -161,7 +162,7 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal myDB.upsert("artists", newValueDict, controlValueDict) - logger.info(u"Now adding album: " + release_dict['title']) + logger.info("Now adding album: " + release_dict['title']) controlValueDict = {"AlbumID": albumid} newValueDict = {"ArtistID": release_dict['artist_id'], @@ -202,7 +203,7 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal newValueDict = {"Status": "Paused"} myDB.upsert("artists", newValueDict, controlValueDict) - logger.info(u"Addition complete for: " + release_dict['title'] + " - " + release_dict[ + logger.info("Addition complete for: " + release_dict['title'] + " - " + release_dict[ 'artist_name']) release = myDB.action('SELECT * from albums WHERE AlbumID=?', [albumid]).fetchone() @@ -211,17 +212,18 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal downloaded_track_list = [] downloaded_cuecount = 0 - for r, d, f in os.walk(albumpath): - for files in f: - if any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): - downloaded_track_list.append(os.path.join(r, files)) - elif files.lower().endswith('.cue'): + media_extensions = tuple(map(lambda x: '.' + x, headphones.MEDIA_FORMATS)) + + for root, dirs, files in os.walk(albumpath): + for file in files: + if file.endswith(media_extensions): + downloaded_track_list.append(os.path.join(root, file)) + elif file.endswith('.cue'): downloaded_cuecount += 1 # if any of the files end in *.part, we know the torrent isn't done yet. Process if forced, though - elif files.lower().endswith(('.part', '.utpart')) and not forced: + elif file.endswith(('.part', '.utpart')) and not forced: logger.info( - "Looks like " + os.path.basename(albumpath).decode(headphones.SYS_ENCODING, - 'replace') + " isn't complete yet. Will try again on the next run") + "Looks like " + os.path.basename(albumpath) + " isn't complete yet. Will try again on the next run") return # Force single file through @@ -264,10 +266,7 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal try: f = MediaFile(downloaded_track) except Exception as e: - logger.info( - u"Exception from MediaFile for: " + downloaded_track.decode(headphones.SYS_ENCODING, - 'replace') + u" : " + unicode( - e)) + logger.info(f"Exception from MediaFile for {downloaded_track}: {e}") continue if not f.artist: @@ -275,10 +274,10 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal if not f.album: continue - metaartist = helpers.latinToAscii(f.artist.lower()).encode('UTF-8') - dbartist = helpers.latinToAscii(release['ArtistName'].lower()).encode('UTF-8') - metaalbum = helpers.latinToAscii(f.album.lower()).encode('UTF-8') - dbalbum = helpers.latinToAscii(release['AlbumTitle'].lower()).encode('UTF-8') + metaartist = helpers.latinToAscii(f.artist.lower()) + dbartist = helpers.latinToAscii(release['ArtistName'].lower()) + metaalbum = helpers.latinToAscii(f.album.lower()) + dbalbum = helpers.latinToAscii(release['AlbumTitle'].lower()) logger.debug('Matching metadata artist: %s with artist name: %s' % (metaartist, dbartist)) logger.debug('Matching metadata album: %s with album name: %s' % (metaalbum, dbalbum)) @@ -298,8 +297,8 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal if not track['TrackTitle']: continue - dbtrack = helpers.latinToAscii(track['TrackTitle'].lower()).encode('UTF-8') - filetrack = helpers.latinToAscii(split_track_name).encode('UTF-8') + dbtrack = helpers.latinToAscii(track['TrackTitle'].lower()) + filetrack = helpers.latinToAscii(split_track_name) logger.debug('Checking if track title: %s is in file name: %s' % (dbtrack, filetrack)) if dbtrack in filetrack: @@ -340,11 +339,9 @@ def verify(albumid, albumpath, Kind=None, forced=False, keep_original_folder=Fal keep_original_folder, forced, single) return - logger.warn(u'Could not identify album: %s. It may not be the intended album.', - albumpath.decode(headphones.SYS_ENCODING, 'replace')) + logger.warn(f"Could not identify {albumpath}. It may not be the intended album") markAsUnprocessed(albumid, albumpath, keep_original_folder) - def markAsUnprocessed(albumid, albumpath, keep_original_folder=False): myDB = db.DBConnection() myDB.action( @@ -354,13 +351,19 @@ def markAsUnprocessed(albumid, albumpath, keep_original_folder=False): if headphones.CONFIG.RENAME_UNPROCESSED and not keep_original_folder: renameUnprocessedFolder(albumpath, tag="Unprocessed") else: - logger.warn(u"Won't rename %s to mark as 'Unprocessed', because it is disabled or folder is being kept.", - albumpath.decode(headphones.SYS_ENCODING, 'replace')) + logger.warn( + f"Won't rename {albumpath} to mark as 'Unprocessed', " + f"because it is disabled or folder is being kept." + ) + return def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, Kind=None, keep_original_folder=False, forced=False, single=False): - logger.info('Starting post-processing for: %s - %s' % (release['ArtistName'], release['AlbumTitle'])) + logger.info( + f"Starting post-processing for: {release['ArtistName']} - " + f"{release['AlbumTitle']}" + ) new_folder = None # Preserve the torrent dir @@ -393,12 +396,10 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, f = MediaFile(downloaded_track) builder.add_media_file(f) except (FileTypeError, UnreadableFileError): - logger.error("Track file is not a valid media file: %s. Not continuing.", - downloaded_track.decode(headphones.SYS_ENCODING, "replace")) + logger.error(f"`{downloaded_track}` is not a valid media file. Not continuing.") return except IOError: - logger.error("Unable to find media file: %s. Not continuing.", downloaded_track.decode( - headphones.SYS_ENCODING, "replace")) + logger.error(f"Unable to find `{downloaded_track}`. Not continuing.") if new_folder: shutil.rmtree(new_folder) return @@ -416,9 +417,10 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, fp.seek(0) except IOError as e: logger.debug("Write check exact error: %s", e) - logger.error("Track file is not writable. This is required " - "for some post processing steps: %s. Not continuing.", - downloaded_track.decode(headphones.SYS_ENCODING, "replace")) + logger.error( + f"`{downloaded_track}` is not writable. This is required " + "for some post processing steps. Not continuing." + ) if new_folder: shutil.rmtree(new_folder) return @@ -475,7 +477,8 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, else: albumpaths = [albumpath] - updateFilePermissions(albumpaths) + if headphones.CONFIG.FILE_PERMISSIONS_ENABLED: + updateFilePermissions(albumpaths) myDB = db.DBConnection() myDB.action('UPDATE albums SET status = "Downloaded" WHERE AlbumID=?', [albumid]) @@ -491,7 +494,7 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, if seed_snatched: hash = seed_snatched['TorrentHash'] torrent_removed = False - logger.info(u'%s - %s. Checking if torrent has finished seeding and can be removed' % ( + logger.info('%s - %s. Checking if torrent has finished seeding and can be removed' % ( release['ArtistName'], release['AlbumTitle'])) if headphones.CONFIG.TORRENT_DOWNLOADER == 1: torrent_removed = transmission.removeTorrent(hash, True) @@ -517,18 +520,18 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, ArtistName=release['ArtistName']) logger.info( - u'Post-processing for %s - %s complete' % (release['ArtistName'], release['AlbumTitle'])) + 'Post-processing for %s - %s complete' % (release['ArtistName'], release['AlbumTitle'])) pushmessage = release['ArtistName'] + ' - ' + release['AlbumTitle'] statusmessage = "Download and Postprocessing completed" if headphones.CONFIG.GROWL_ENABLED: - logger.info(u"Growl request") + logger.info("Growl request") growl = notifiers.GROWL() growl.notify(pushmessage, statusmessage) if headphones.CONFIG.PROWL_ENABLED: - logger.info(u"Prowl request") + logger.info("Prowl request") prowl = notifiers.PROWL() prowl.notify(pushmessage, statusmessage) @@ -559,7 +562,7 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, nma.notify(release['ArtistName'], release['AlbumTitle']) if headphones.CONFIG.PUSHALOT_ENABLED: - logger.info(u"Pushalot request") + logger.info("Pushalot request") pushalot = notifiers.PUSHALOT() pushalot.notify(pushmessage, statusmessage) @@ -569,35 +572,36 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, syno.notify(albumpath) if headphones.CONFIG.PUSHOVER_ENABLED: - logger.info(u"Pushover request") + logger.info("Pushover request") pushover = notifiers.PUSHOVER() pushover.notify(pushmessage, "Headphones") if headphones.CONFIG.PUSHBULLET_ENABLED: - logger.info(u"PushBullet request") + logger.info("PushBullet request") pushbullet = notifiers.PUSHBULLET() pushbullet.notify(pushmessage, statusmessage) if headphones.CONFIG.JOIN_ENABLED: - logger.info(u"Join request") + logger.info("Join request") join = notifiers.JOIN() join.notify(pushmessage, statusmessage) if headphones.CONFIG.TELEGRAM_ENABLED: - logger.info(u"Telegram request") + logger.info("Telegram request") telegram = notifiers.TELEGRAM() telegram.notify(statusmessage, pushmessage) if headphones.CONFIG.TWITTER_ENABLED: - logger.info(u"Sending Twitter notification") - twitter = notifiers.TwitterNotifier() - twitter.notify_download(pushmessage) + logger.info("Twitter notifications temporarily disabled") + #logger.info("Sending Twitter notification") + #twitter = notifiers.TwitterNotifier() + #twitter.notify_download(pushmessage) if headphones.CONFIG.OSX_NOTIFY_ENABLED: from headphones import cache c = cache.Cache() album_art = c.get_artwork_from_cache(None, release['AlbumID']) - logger.info(u"Sending OS X notification") + logger.info("Sending OS X notification") osx_notify = notifiers.OSX_NOTIFY() osx_notify.notify(release['ArtistName'], release['AlbumTitle'], @@ -605,13 +609,13 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, image=album_art) if headphones.CONFIG.BOXCAR_ENABLED: - logger.info(u"Sending Boxcar2 notification") + logger.info("Sending Boxcar2 notification") boxcar = notifiers.BOXCAR() boxcar.notify('Headphones processed: ' + pushmessage, statusmessage, release['AlbumID']) if headphones.CONFIG.SUBSONIC_ENABLED: - logger.info(u"Sending Subsonic update") + logger.info("Sending Subsonic update") subsonic = notifiers.SubSonicNotifier() subsonic.notify(albumpaths) @@ -620,7 +624,7 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list, mpc.notify() if headphones.CONFIG.EMAIL_ENABLED: - logger.info(u"Sending Email notification") + logger.info("Sending Email notification") email = notifiers.Email() subject = release['ArtistName'] + ' - ' + release['AlbumTitle'] email.notify(subject, "Download and Postprocessing completed") @@ -636,23 +640,21 @@ def embedAlbumArt(artwork, downloaded_track_list): try: f = MediaFile(downloaded_track) except: - logger.error(u'Could not read %s. Not adding album art' % downloaded_track.decode( - headphones.SYS_ENCODING, 'replace')) + logger.error(f"Could not read {downloaded_track}. Not adding album art") continue - logger.debug('Adding album art to: %s' % downloaded_track) + logger.debug(f"Adding album art to `{downloaded_track}`") try: f.art = artwork f.save() except Exception as e: - logger.error(u'Error embedding album art to: %s. Error: %s' % ( - downloaded_track.decode(headphones.SYS_ENCODING, 'replace'), str(e))) + logger.error(f"Error embedding album art to `{downloaded_track}`: {e}") continue def addAlbumArt(artwork, albumpath, release, metadata_dict): - logger.info('Adding album art to folder') + logger.info(f"Adding album art to `{albumpath}`") md = metadata.album_metadata(albumpath, release, metadata_dict) ext = ".jpg" @@ -663,8 +665,7 @@ def addAlbumArt(artwork, albumpath, release, metadata_dict): album_art_name = helpers.pattern_substitute( headphones.CONFIG.ALBUM_ART_FORMAT.strip(), md) + ext - album_art_name = helpers.replace_illegal_chars(album_art_name).encode( - headphones.SYS_ENCODING, 'replace') + album_art_name = helpers.replace_illegal_chars(album_art_name) if headphones.CONFIG.FILE_UNDERSCORES: album_art_name = album_art_name.replace(' ', '_') @@ -684,14 +685,13 @@ def cleanupFiles(albumpath): logger.info('Cleaning up files') for r, d, f in os.walk(albumpath): - for files in f: - if not any(files.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): - logger.debug('Removing: %s' % files) + for file in f: + if not any(file.lower().endswith('.' + x.lower()) for x in headphones.MEDIA_FORMATS): + logger.debug('Removing: %s' % file) try: - os.remove(os.path.join(r, files)) + os.remove(os.path.join(r, file)) except Exception as e: - logger.error(u'Could not remove file: %s. Error: %s' % ( - files.decode(headphones.SYS_ENCODING, 'replace'), e)) + logger.error('Could not remove file: %s. Error: %s' % (file, e)) def renameNFO(albumpath): @@ -701,19 +701,16 @@ def renameNFO(albumpath): for file in f: if file.lower().endswith('.nfo'): if not file.lower().endswith('.orig.nfo'): - logger.debug('Renaming: "%s" to "%s"' % ( - file.decode(headphones.SYS_ENCODING, 'replace'), - file.decode(headphones.SYS_ENCODING, 'replace') + '-orig')) try: new_file_name = os.path.join(r, file)[:-3] + 'orig.nfo' + logger.debug(f"Renaming `{file}` to `{new_file_name}`") os.rename(os.path.join(r, file), new_file_name) except Exception as e: - logger.error(u'Could not rename file: %s. Error: %s' % ( - os.path.join(r, file).decode(headphones.SYS_ENCODING, 'replace'), e)) + logger.error(f"Could not rename {file}: {e}") def moveFiles(albumpath, release, metadata_dict): - logger.info("Moving files: %s" % albumpath) + logger.info(f"Moving files: `{albumpath}`") md = metadata.album_metadata(albumpath, release, metadata_dict) folder = helpers.pattern_substitute( @@ -750,12 +747,8 @@ def moveFiles(albumpath, release, metadata_dict): make_lossy_folder = False make_lossless_folder = False - lossy_destination_path = os.path.normpath( - os.path.join(headphones.CONFIG.DESTINATION_DIR, folder)).encode(headphones.SYS_ENCODING, - 'replace') - lossless_destination_path = os.path.normpath( - os.path.join(headphones.CONFIG.LOSSLESS_DESTINATION_DIR, folder)).encode( - headphones.SYS_ENCODING, 'replace') + lossy_destination_path = os.path.join(headphones.CONFIG.DESTINATION_DIR, folder) + lossless_destination_path = os.path.join(headphones.CONFIG.LOSSLESS_DESTINATION_DIR, folder) # If they set a destination dir for lossless media, only create the lossy folder if there is lossy media if headphones.CONFIG.LOSSLESS_DESTINATION_DIR: @@ -780,8 +773,9 @@ def moveFiles(albumpath, release, metadata_dict): shutil.rmtree(lossless_destination_path) except Exception as e: logger.error( - "Error deleting existing folder: %s. Creating duplicate folder. Error: %s" % ( - lossless_destination_path.decode(headphones.SYS_ENCODING, 'replace'), e)) + f"Error deleting `{lossless_destination_path}`. " + f"Creating duplicate folder. Error: {e}" + ) create_duplicate_folder = True if not headphones.CONFIG.REPLACE_EXISTING_FOLDERS or create_duplicate_folder: @@ -791,8 +785,11 @@ def moveFiles(albumpath, release, metadata_dict): while True: newfolder = temp_folder + '[%i]' % i lossless_destination_path = os.path.normpath( - os.path.join(headphones.CONFIG.LOSSLESS_DESTINATION_DIR, newfolder)).encode( - headphones.SYS_ENCODING, 'replace') + os.path.join( + headphones.CONFIG.LOSSLESS_DESTINATION_DIR, + newfolder + ) + ) if os.path.exists(lossless_destination_path): i += 1 else: @@ -818,8 +815,9 @@ def moveFiles(albumpath, release, metadata_dict): shutil.rmtree(lossy_destination_path) except Exception as e: logger.error( - "Error deleting existing folder: %s. Creating duplicate folder. Error: %s" % ( - lossy_destination_path.decode(headphones.SYS_ENCODING, 'replace'), e)) + f"Error deleting `{lossy_destination_path}`. " + f"Creating duplicate folder. Error: {e}" + ) create_duplicate_folder = True if not headphones.CONFIG.REPLACE_EXISTING_FOLDERS or create_duplicate_folder: @@ -829,8 +827,11 @@ def moveFiles(albumpath, release, metadata_dict): while True: newfolder = temp_folder + '[%i]' % i lossy_destination_path = os.path.normpath( - os.path.join(headphones.CONFIG.DESTINATION_DIR, newfolder)).encode( - headphones.SYS_ENCODING, 'replace') + os.path.join( + headphones.CONFIG.DESTINATION_DIR, + newfolder + ) + ) if os.path.exists(lossy_destination_path): i += 1 else: @@ -876,12 +877,11 @@ def moveFiles(albumpath, release, metadata_dict): os.remove(file_to_move) except Exception as e: logger.error( - "Error deleting file '" + file_to_move.decode(headphones.SYS_ENCODING, - 'replace') + "' from source directory") + f"Error deleting `{file_to_move}` from source directory") else: - logger.error("Error copying '" + file_to_move.decode(headphones.SYS_ENCODING, - 'replace') + "'. Not deleting from download directory") - + logger.error( + f"Error copying `{file_to_move}`. " + f"Not deleting from download directory") elif make_lossless_folder and not make_lossy_folder: for file_to_move in files_to_move: @@ -910,20 +910,20 @@ def moveFiles(albumpath, release, metadata_dict): if headphones.CONFIG.FOLDER_PERMISSIONS_ENABLED: try: - os.chmod(os.path.normpath(temp_f).encode(headphones.SYS_ENCODING, 'replace'), + os.chmod(os.path.normpath(temp_f), int(headphones.CONFIG.FOLDER_PERMISSIONS, 8)) except Exception as e: - logger.error("Error trying to change permissions on folder: %s. %s", - temp_f.decode(headphones.SYS_ENCODING, 'replace'), e) + logger.error(f"Error trying to change permissions on `{temp_f}`: {e}") else: - logger.debug("Not changing folder permissions, since it is disabled: %s", - temp_f.decode(headphones.SYS_ENCODING, 'replace')) + logger.debug( + f"Not changing permissions on `{temp_f}`, " + "since it is disabled") # If we failed to move all the files out of the directory, this will fail too try: shutil.rmtree(albumpath) except Exception as e: - logger.error('Could not remove directory: %s. %s', albumpath, e) + logger.error(f"Could not remove `{albumpath}`: {e}") destination_paths = [] @@ -952,11 +952,15 @@ def correctMetadata(albumid, release, downloaded_track_list): headphones.LOSSY_MEDIA_FORMATS): lossy_items.append(beets.library.Item.from_path(downloaded_track)) else: - logger.warn("Skipping: %s because it is not a mutagen friendly file format", - downloaded_track.decode(headphones.SYS_ENCODING, 'replace')) + logger.warn( + f"Skipping `{downloaded_track}` because it is " + f"not a mutagen friendly file format" + ) + continue except Exception as e: - logger.error("Beets couldn't create an Item from: %s - not a media file? %s", - downloaded_track.decode(headphones.SYS_ENCODING, 'replace'), str(e)) + logger.error( + f"Beets couldn't create an Item from `{downloaded_track}`: {e}") + continue for items in [lossy_items, lossless_items]: @@ -1018,11 +1022,9 @@ def correctMetadata(albumid, release, downloaded_track_list): for item in items: try: item.write() - logger.info("Successfully applied metadata to: %s", - item.path.decode(headphones.SYS_ENCODING, 'replace')) + logger.info(f"Successfully applied metadata to `{item.path}`") except Exception as e: - logger.warn("Error writing metadata to '%s': %s", - item.path.decode(headphones.SYS_ENCODING, 'replace'), str(e)) + logger.warn(f"Error writing metadata to `{item.path}: {e}") return False return True @@ -1048,11 +1050,11 @@ def embedLyrics(downloaded_track_list): headphones.LOSSY_MEDIA_FORMATS): lossy_items.append(beets.library.Item.from_path(downloaded_track)) else: - logger.warn("Skipping: %s because it is not a mutagen friendly file format", - downloaded_track.decode(headphones.SYS_ENCODING, 'replace')) + logger.warn( + f"Skipping `{downloaded_track}` because it is " + f"not a mutagen friendly file format") except Exception as e: - logger.error("Beets couldn't create an Item from: %s - not a media file? %s", - downloaded_track.decode(headphones.SYS_ENCODING, 'replace'), str(e)) + logger.error(f"Beets couldn't create an Item from `{downloaded_track}`: {e}") for items in [lossy_items, lossless_items]: @@ -1067,7 +1069,7 @@ def embedLyrics(downloaded_track_list): if any(lyrics): break - lyrics = u"\n\n---\n\n".join([l for l in lyrics if l]) + lyrics = "\n\n---\n\n".join([l for l in lyrics if l]) if lyrics: logger.debug('Adding lyrics to: %s', item.title) @@ -1099,8 +1101,7 @@ def renameFiles(albumpath, downloaded_track_list, release): headphones.CONFIG.FILE_FORMAT.strip(), md ).replace('/', '_') + ext - new_file_name = helpers.replace_illegal_chars(new_file_name).encode( - headphones.SYS_ENCODING, 'replace') + new_file_name = helpers.replace_illegal_chars(new_file_name) if headphones.CONFIG.FILE_UNDERSCORES: new_file_name = new_file_name.replace(' ', '_') @@ -1111,37 +1112,28 @@ def renameFiles(albumpath, downloaded_track_list, release): new_file = os.path.join(albumpath, new_file_name) if downloaded_track == new_file_name: - logger.debug("Renaming for: " + downloaded_track.decode( - headphones.SYS_ENCODING, 'replace') + " is not neccessary") + logger.debug(f"Renaming for {downloaded_track} is not neccessary") continue - logger.debug('Renaming %s ---> %s', - downloaded_track.decode(headphones.SYS_ENCODING, 'replace'), - new_file_name.decode(headphones.SYS_ENCODING, 'replace')) + logger.debug(f"Renaming {downloaded_track} ---> {new_file_name}") try: os.rename(downloaded_track, new_file) except Exception as e: - logger.error('Error renaming file: %s. Error: %s', - downloaded_track.decode(headphones.SYS_ENCODING, 'replace'), e) + logger.error(f"Error renaming {downloaded_track}: {e}") continue def updateFilePermissions(albumpaths): for folder in albumpaths: - logger.info("Updating file permissions in %s", folder) + logger.info(f"Updating file permissions in `{folder}`") for r, d, f in os.walk(folder): for files in f: full_path = os.path.join(r, files) - if headphones.CONFIG.FILE_PERMISSIONS_ENABLED: - try: - os.chmod(full_path, int(headphones.CONFIG.FILE_PERMISSIONS, 8)) - except: - logger.error("Could not change permissions for file: %s", full_path) - continue - else: - logger.debug("Not changing file permissions, since it is disabled: %s", - full_path.decode(headphones.SYS_ENCODING, 'replace')) - + try: + os.chmod(full_path, int(headphones.CONFIG.FILE_PERMISSIONS, 8)) + except: + logger.error(f"Could not change permissions for `{full_path}`") + continue def renameUnprocessedFolder(path, tag): """ @@ -1168,18 +1160,16 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig ignored = 0 if album_dir: - folders = [album_dir.encode(headphones.SYS_ENCODING, 'replace')] + folders = [album_dir] else: download_dirs = [] if dir: - download_dirs.append(dir.encode(headphones.SYS_ENCODING, 'replace')) + download_dirs.append(dir) if headphones.CONFIG.DOWNLOAD_DIR and not dir: - download_dirs.append( - headphones.CONFIG.DOWNLOAD_DIR.encode(headphones.SYS_ENCODING, 'replace')) + download_dirs.append(headphones.CONFIG.DOWNLOAD_DIR) if headphones.CONFIG.DOWNLOAD_TORRENT_DIR and not dir: - download_dirs.append( - headphones.CONFIG.DOWNLOAD_TORRENT_DIR.encode(headphones.SYS_ENCODING, 'replace')) + download_dirs.append(headphones.CONFIG.DOWNLOAD_TORRENT_DIR) # If DOWNLOAD_DIR and DOWNLOAD_TORRENT_DIR are the same, remove the duplicate to prevent us from trying to process the same folder twice. download_dirs = list(set(download_dirs)) @@ -1223,7 +1213,7 @@ def forcePostProcess(dir=None, expand_subfolders=True, album_dir=None, keep_orig myDB = db.DBConnection() for folder in folders: - folder_basename = os.path.basename(folder).decode(headphones.SYS_ENCODING, 'replace') + folder_basename = os.path.basename(folder) logger.info('Processing: %s', folder_basename) # Attempt 1: First try to see if there's a match in the snatched table, diff --git a/headphones/qbittorrent.py b/headphones/qbittorrent.py index f32c1155..236c9b00 100755 --- a/headphones/qbittorrent.py +++ b/headphones/qbittorrent.py @@ -13,9 +13,9 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -import urllib -import urllib2 -import cookielib +import urllib.request, urllib.parse, urllib.error +import urllib.request, urllib.error, urllib.parse +import http.cookiejar import json import time import mimetypes @@ -61,23 +61,23 @@ class qbittorrentclient(object): self.version = 2 except Exception as e: logger.warning("Error with qBittorrent v2 api, check settings or update, will try v1: %s" % e) - self.cookiejar = cookielib.CookieJar() + self.cookiejar = http.cookiejar.CookieJar() self.opener = self._make_opener() self._get_sid(self.base_url, self.username, self.password) self.version = 1 def _make_opener(self): # create opener with cookie handler to carry QBitTorrent SID cookie - cookie_handler = urllib2.HTTPCookieProcessor(self.cookiejar) + cookie_handler = urllib.request.HTTPCookieProcessor(self.cookiejar) handlers = [cookie_handler] - return urllib2.build_opener(*handlers) + return urllib.request.build_opener(*handlers) def _get_sid(self, base_url, username, password): # login so we can capture SID cookie - login_data = urllib.urlencode({'username': username, 'password': password}) + login_data = urllib.parse.urlencode({'username': username, 'password': password}) try: self.opener.open(base_url + '/login', login_data) - except urllib2.URLError as err: + except urllib.error.URLError as err: logger.debug('Error getting SID. qBittorrent responded with error: ' + str(err.reason)) return for cookie in self.cookiejar: @@ -95,14 +95,14 @@ class qbittorrentclient(object): data, headers = encode_multipart(args, files) else: if args: - data = urllib.urlencode(args) + data = urllib.parse.urlencode(args) if content_type: headers['Content-Type'] = content_type logger.debug('%s' % json.dumps(headers, indent=4)) logger.debug('%s' % data) - request = urllib2.Request(url, data, headers) + request = urllib.request.Request(url, data, headers) try: response = self.opener.open(request) info = response.info() @@ -117,7 +117,7 @@ class qbittorrentclient(object): return response.code, json.loads(resp) logger.debug('response code: %s' % str(response.code)) return response.code, None - except urllib2.URLError as err: + except urllib.error.URLError as err: logger.debug('Failed URL: %s' % url) logger.debug('QBitTorrent webUI raised the following error: %s' % str(err)) return None, None @@ -319,7 +319,7 @@ def encode_multipart(args, files, boundary=None): lines = [] if args: - for name, value in args.items(): + for name, value in list(args.items()): lines.extend(( '--{0}'.format(boundary), 'Content-Disposition: form-data; name="{0}"'.format(escape_quote(name)), @@ -329,7 +329,7 @@ def encode_multipart(args, files, boundary=None): logger.debug(''.join(lines)) if files: - for name, value in files.items(): + for name, value in list(files.items()): filename = value['filename'] if 'mimetype' in value: mimetype = value['mimetype'] diff --git a/headphones/request.py b/headphones/request.py index dac4ffb4..6b8a3630 100644 --- a/headphones/request.py +++ b/headphones/request.py @@ -138,7 +138,7 @@ def request_soup(url, **kwargs): no exceptions are raised. """ - parser = kwargs.pop("parser", "html5lib") + parser = kwargs.pop("parser", "html.parser") response = request_response(url, **kwargs) if response is not None: @@ -222,7 +222,7 @@ def server_message(response): if response.headers.get("content-type") and \ "text/html" in response.headers.get("content-type"): try: - soup = BeautifulSoup(response.content, "html5lib") + soup = BeautifulSoup(response.content, "html.parser") except Exception: pass diff --git a/headphones/rutracker.py b/headphones/rutracker.py index a9407194..8c04d33f 100644 --- a/headphones/rutracker.py +++ b/headphones/rutracker.py @@ -1,8 +1,8 @@ #!/usr/bin/env python -import urllib +import urllib.request, urllib.parse, urllib.error import time -from urlparse import urlparse +from urllib.parse import urlparse import re import requests as requests @@ -68,10 +68,10 @@ class Rutracker(object): return self.loggedin def has_bb_session_cookie(self, response): - if 'bb_session' in response.cookies.keys(): + if 'bb_session' in list(response.cookies.keys()): return True # Rutracker randomly send a 302 redirect code, cookie may be present in response history - return next(('bb_session' in r.cookies.keys() for r in response.history), False) + return next(('bb_session' in list(r.cookies.keys()) for r in response.history), False) def searchurl(self, artist, album, year, format): """ @@ -99,10 +99,10 @@ class Rutracker(object): # sort by size, descending. sort = '&o=7&s=2' try: - searchurl = "%s?nm=%s%s%s" % (self.search_referer, urllib.quote(searchterm), format, sort) + searchurl = "%s?nm=%s%s%s" % (self.search_referer, urllib.parse.quote(searchterm), format, sort) except: searchterm = searchterm.encode('utf-8') - searchurl = "%s?nm=%s%s%s" % (self.search_referer, urllib.quote(searchterm), format, sort) + searchurl = "%s?nm=%s%s%s" % (self.search_referer, urllib.parse.quote(searchterm), format, sort) logger.info("Searching rutracker using term: %s", searchterm) return searchurl @@ -114,7 +114,7 @@ class Rutracker(object): try: headers = {'Referer': self.search_referer} r = self.session.get(url=searchurl, headers=headers, timeout=self.timeout) - soup = BeautifulSoup(r.content, 'html5lib') + soup = BeautifulSoup(r.content, 'html.parser') # Debug # logger.debug (soup.prettify()) @@ -123,7 +123,7 @@ class Rutracker(object): if not self.still_logged_in(soup): self.login() r = self.session.get(url=searchurl, timeout=self.timeout) - soup = BeautifulSoup(r.content, 'html5lib') + soup = BeautifulSoup(r.content, 'html.parser') if not self.still_logged_in(soup): logger.error("Error getting rutracker data") return None diff --git a/headphones/sab.py b/headphones/sab.py index fa7c3309..01ef6e21 100644 --- a/headphones/sab.py +++ b/headphones/sab.py @@ -17,7 +17,7 @@ # Stolen from Sick-Beard's sab.py # ################################### -import cookielib +import http.cookiejar import headphones from headphones.common import USER_AGENT @@ -74,29 +74,28 @@ def sendNZB(nzb): # if we get a raw data result we want to upload it to SAB elif nzb.resultType == "nzbdata": - # Sanitize the file a bit, since we can only use ascii chars with MultiPartPostHandler - nzbdata = helpers.latinToAscii(nzb.extraInfo[0]) + nzbdata = nzb.extraInfo[0] params['mode'] = 'addfile' - files = {"nzbfile": (helpers.latinToAscii(nzb.name) + ".nzb", nzbdata)} + files = {"nzbfile": (nzb.name + ".nzb", nzbdata)} headers = {'User-Agent': USER_AGENT} logger.info("Attempting to connect to SABnzbd on url: %s" % headphones.CONFIG.SAB_HOST) if nzb.resultType == "nzb": response = sab_api_call('send_nzb', params=params) elif nzb.resultType == "nzbdata": - cookies = cookielib.CookieJar() + cookies = http.cookiejar.CookieJar() response = sab_api_call('send_nzb', params=params, method="post", files=files, cookies=cookies, headers=headers) if not response: - logger.info(u"No data returned from SABnzbd, NZB not sent") + logger.info("No data returned from SABnzbd, NZB not sent") return False if response['status']: - logger.info(u"NZB sent to SABnzbd successfully") + logger.info("NZB sent to SABnzbd successfully") return True else: - logger.error(u"Error sending NZB to SABnzbd: %s" % response['error']) + logger.error("Error sending NZB to SABnzbd: %s" % response['error']) return False diff --git a/headphones/searcher.py b/headphones/searcher.py index 499b69b8..fbb4826d 100644 --- a/headphones/searcher.py +++ b/headphones/searcher.py @@ -19,11 +19,11 @@ from base64 import b16encode, b32decode from hashlib import sha1 import string import random -import urllib +import urllib.request, urllib.parse, urllib.error import datetime import subprocess import unicodedata -import urlparse +import urllib.parse import os import re @@ -35,7 +35,8 @@ import headphones from headphones.common import USER_AGENT from headphones import logger, db, helpers, classes, sab, nzbget, request from headphones import utorrent, transmission, notifiers, rutracker, deluge, qbittorrent -from bencode import bencode, bdecode +from bencode import encode as bencode +from bencode import decode as bdecode # Magnet to torrent services, for Black hole. Stolen from CouchPotato. TORRENT_TO_MAGNET_SERVICES = [ @@ -56,14 +57,11 @@ def fix_url(s, charset="utf-8"): Fix the URL so it is proper formatted and encoded. """ - if isinstance(s, unicode): - s = s.encode(charset, 'ignore') + scheme, netloc, path, qs, anchor = urllib.parse.urlsplit(s) + path = urllib.parse.quote(path, '/%') + qs = urllib.parse.quote_plus(qs, ':&=') - scheme, netloc, path, qs, anchor = urlparse.urlsplit(s) - path = urllib.quote(path, '/%') - qs = urllib.quote_plus(qs, ':&=') - - return urlparse.urlunsplit((scheme, netloc, path, qs, anchor)) + return urllib.parse.urlunsplit((scheme, netloc, path, qs, anchor)) def torrent_to_file(target_file, data): @@ -88,14 +86,10 @@ def torrent_to_file(target_file, data): try: os.chmod(target_file, int(headphones.CONFIG.FILE_PERMISSIONS, 8)) except OSError as e: - logger.warn( - "Could not change permissions for file '%s': %s. Continuing.", - target_file.decode(headphones.SYS_ENCODING, "replace"), - e.message) + logger.warn(f"Could not change permissions for `{target_file}`: {e}") else: logger.debug( - "Not changing file permissions, since it is disabled: %s", - target_file.decode(headphones.SYS_ENCODING, "replace")) + f"Not changing file permissions for `{target_file}, since it is disabled") # Done return True @@ -332,19 +326,13 @@ def do_sorted_search(album, new, losslessOnly, choose_specific_download=False): if not sorted_search_results: return - logger.info(u"Making sure we can download the best result") + logger.info("Making sure we can download the best result") (data, bestqual) = preprocess(sorted_search_results) if data and bestqual: send_to_downloader(data, bestqual, album) -def removeDisallowedFilenameChars(filename): - validFilenameChars = "-_.() %s%s" % (string.ascii_letters, string.digits) - cleanedFilename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').lower() - return ''.join(c for c in cleanedFilename if c in validFilenameChars) - - def more_filtering(results, album, albumlength, new): low_size_limit = None high_size_limit = None @@ -533,9 +521,9 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, else: term = cleanartist + ' ' + cleanalbum - # Replace bad characters in the term and unicode it - term = re.sub('[\.\-\/]', ' ', term).encode('utf-8') - artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8') + # Replace bad characters in the term + term = re.sub('[\.\-\/]', ' ', term) + artistterm = re.sub('[\.\-\/]', ' ', cleanartist) # If Preferred Bitrate and High Limit and Allow Lossless then get both lossy and lossless if headphones.CONFIG.PREFERRED_QUALITY == 2 and headphones.CONFIG.PREFERRED_BITRATE and headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS: @@ -582,7 +570,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, # Process feed if data: if not len(data.entries): - logger.info(u"No results found from %s for %s" % ('Headphones Index', term)) + logger.info("No results found from %s for %s" % ('Headphones Index', term)) else: for item in data.entries: try: @@ -593,7 +581,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, resultlist.append((title, size, url, provider, 'nzb', True)) logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size))) except Exception as e: - logger.error(u"An unknown error occurred trying to parse the feed: %s" % e) + logger.error("An unknown error occurred trying to parse the feed: %s" % e) if headphones.CONFIG.NEWZNAB: provider = "newznab" @@ -653,7 +641,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, # Process feed if data: if not len(data.entries): - logger.info(u"No results found from %s for %s", newznab_host[0], term) + logger.info("No results found from %s for %s", newznab_host[0], term) else: for item in data.entries: try: @@ -703,7 +691,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, # Process feed if data: if not len(data.entries): - logger.info(u"No results found from nzbs.org for %s" % term) + logger.info("No results found from nzbs.org for %s" % term) else: for item in data.entries: try: @@ -750,7 +738,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, # Parse response if data: if 'notice' in data: - logger.info(u"No results returned from omgwtfnzbs: %s" % data['notice']) + logger.info("No results returned from omgwtfnzbs: %s" % data['notice']) else: for item in data: try: @@ -780,7 +768,7 @@ def searchNZB(album, new=False, losslessOnly=False, albumlength=None, def send_to_downloader(data, bestqual, album): - logger.info(u'Found best result from %s: %s - %s', bestqual[3], bestqual[2], + logger.info('Found best result from %s: %s - %s', bestqual[3], bestqual[2], bestqual[0], helpers.bytes_to_mb(bestqual[1])) # Get rid of any dodgy chars here so we can prevent sab from renaming our downloads kind = bestqual[4] @@ -831,8 +819,8 @@ def send_to_downloader(data, bestqual, album): return else: folder_name = '%s - %s [%s]' % ( - helpers.latinToAscii(album['ArtistName']).encode('UTF-8').replace('/', '_'), - helpers.latinToAscii(album['AlbumTitle']).encode('UTF-8').replace('/', '_'), + helpers.latinToAscii(album['ArtistName']).replace('/', '_'), + helpers.latinToAscii(album['AlbumTitle']).replace('/', '_'), get_year_from_release_date(album['ReleaseDate'])) # Blackhole @@ -1058,31 +1046,31 @@ def send_to_downloader(data, bestqual, album): name = folder_name if folder_name else None if headphones.CONFIG.GROWL_ENABLED and headphones.CONFIG.GROWL_ONSNATCH: - logger.info(u"Sending Growl notification") + logger.info("Sending Growl notification") growl = notifiers.GROWL() growl.notify(name, "Download started") if headphones.CONFIG.PROWL_ENABLED and headphones.CONFIG.PROWL_ONSNATCH: - logger.info(u"Sending Prowl notification") + logger.info("Sending Prowl notification") prowl = notifiers.PROWL() prowl.notify(name, "Download started") if headphones.CONFIG.PUSHOVER_ENABLED and headphones.CONFIG.PUSHOVER_ONSNATCH: - logger.info(u"Sending Pushover notification") + logger.info("Sending Pushover notification") prowl = notifiers.PUSHOVER() prowl.notify(name, "Download started") if headphones.CONFIG.PUSHBULLET_ENABLED and headphones.CONFIG.PUSHBULLET_ONSNATCH: - logger.info(u"Sending PushBullet notification") + logger.info("Sending PushBullet notification") pushbullet = notifiers.PUSHBULLET() pushbullet.notify(name, "Download started") if headphones.CONFIG.JOIN_ENABLED and headphones.CONFIG.JOIN_ONSNATCH: - logger.info(u"Sending Join notification") + logger.info("Sending Join notification") join = notifiers.JOIN() join.notify(name, "Download started") if headphones.CONFIG.SLACK_ENABLED and headphones.CONFIG.SLACK_ONSNATCH: - logger.info(u"Sending Slack notification") + logger.info("Sending Slack notification") slack = notifiers.SLACK() slack.notify(name, "Download started") if headphones.CONFIG.TELEGRAM_ENABLED and headphones.CONFIG.TELEGRAM_ONSNATCH: - logger.info(u"Sending Telegram notification") + logger.info("Sending Telegram notification") from headphones import cache c = cache.Cache() album_art = c.get_artwork_from_cache(None, rgid) @@ -1090,34 +1078,35 @@ def send_to_downloader(data, bestqual, album): message = 'Snatched from ' + provider + '. ' + name telegram.notify(message, "Snatched: " + title, rgid, image=album_art) if headphones.CONFIG.TWITTER_ENABLED and headphones.CONFIG.TWITTER_ONSNATCH: - logger.info(u"Sending Twitter notification") - twitter = notifiers.TwitterNotifier() - twitter.notify_snatch(name) + logger.info("Twitter notifications temporarily disabled") + #logger.info("Sending Twitter notification") + #twitter = notifiers.TwitterNotifier() + #twitter.notify_snatch(name) if headphones.CONFIG.NMA_ENABLED and headphones.CONFIG.NMA_ONSNATCH: - logger.info(u"Sending NMA notification") + logger.info("Sending NMA notification") nma = notifiers.NMA() nma.notify(snatched=name) if headphones.CONFIG.PUSHALOT_ENABLED and headphones.CONFIG.PUSHALOT_ONSNATCH: - logger.info(u"Sending Pushalot notification") + logger.info("Sending Pushalot notification") pushalot = notifiers.PUSHALOT() pushalot.notify(name, "Download started") if headphones.CONFIG.OSX_NOTIFY_ENABLED and headphones.CONFIG.OSX_NOTIFY_ONSNATCH: from headphones import cache c = cache.Cache() album_art = c.get_artwork_from_cache(None, rgid) - logger.info(u"Sending OS X notification") + logger.info("Sending OS X notification") osx_notify = notifiers.OSX_NOTIFY() osx_notify.notify(artist, albumname, 'Snatched: ' + provider + '. ' + name, image=album_art) if headphones.CONFIG.BOXCAR_ENABLED and headphones.CONFIG.BOXCAR_ONSNATCH: - logger.info(u"Sending Boxcar2 notification") + logger.info("Sending Boxcar2 notification") b2msg = 'From ' + provider + '

' + name boxcar = notifiers.BOXCAR() boxcar.notify('Headphones snatched: ' + title, b2msg, rgid) if headphones.CONFIG.EMAIL_ENABLED and headphones.CONFIG.EMAIL_ONSNATCH: - logger.info(u"Sending Email notification") + logger.info("Sending Email notification") email = notifiers.Email() message = 'Snatched from ' + provider + '. ' + name email.notify("Snatched: " + title, message) @@ -1252,12 +1241,12 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, else: usersearchterm = '' - semi_clean_artist_term = re.sub('[\.\-\/]', ' ', semi_cleanartist).encode('utf-8', 'replace') - semi_clean_album_term = re.sub('[\.\-\/]', ' ', semi_cleanalbum).encode('utf-8', 'replace') - # Replace bad characters in the term and unicode it - term = re.sub('[\.\-\/]', ' ', term).encode('utf-8') - artistterm = re.sub('[\.\-\/]', ' ', cleanartist).encode('utf-8', 'replace') - albumterm = re.sub('[\.\-\/]', ' ', cleanalbum).encode('utf-8', 'replace') + semi_clean_artist_term = re.sub('[\.\-\/]', ' ', semi_cleanartist) + semi_clean_album_term = re.sub('[\.\-\/]', ' ', semi_cleanalbum) + # Replace bad characters in the term + term = re.sub('[\.\-\/]', ' ', term) + artistterm = re.sub('[\.\-\/]', ' ', cleanartist) + albumterm = re.sub('[\.\-\/]', ' ', cleanalbum) # If Preferred Bitrate and High Limit and Allow Lossless then get both lossy and lossless if headphones.CONFIG.PREFERRED_QUALITY == 2 and headphones.CONFIG.PREFERRED_BITRATE and headphones.CONFIG.PREFERRED_BITRATE_HIGH_BUFFER and headphones.CONFIG.PREFERRED_BITRATE_ALLOW_LOSSLESS: @@ -1333,7 +1322,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, if data: items = data.find_all('item') if not items: - logger.info(u"No results found from %s for %s", provider, term) + logger.info("No results found from %s for %s", provider, term) else: for item in items: try: @@ -1427,7 +1416,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, # Process feed if data: if not len(data.entries): - logger.info(u"No results found from %s for %s", provider, term) + logger.info("No results found from %s for %s", provider, term) else: for item in data.entries: try: @@ -1439,7 +1428,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, logger.info('Found %s. Size: %s', title, helpers.bytes_to_mb(size)) except Exception as e: logger.error( - u"An error occurred while trying to parse the response from Waffles.ch: %s", + "An error occurred while trying to parse the response from Waffles.ch: %s", e) # rutracker.org @@ -1448,7 +1437,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, # Ignore if release date not specified, results too unpredictable if not year and not usersearchterm: - logger.info(u"Release date not specified, ignoring for rutracker.org") + logger.info("Release date not specified, ignoring for rutracker.org") else: if headphones.CONFIG.PREFERRED_QUALITY == 3 or losslessOnly: format = 'lossless' @@ -1501,7 +1490,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, bitrate_string = encoding_string if bitrate_string not in gazelleencoding.ALL_ENCODINGS: logger.info( - u"Your preferred bitrate is not one of the available Orpheus.network filters, so not using it as a search parameter.") + "Your preferred bitrate is not one of the available Orpheus.network filters, so not using it as a search parameter.") maxsize = 10000000000 elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless: # Highest quality including lossless search_formats = [gazelleformat.FLAC, gazelleformat.MP3] @@ -1512,18 +1501,18 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, if not orpheusobj or not orpheusobj.logged_in(): try: - logger.info(u"Attempting to log in to Orpheus.network...") + logger.info("Attempting to log in to Orpheus.network...") orpheusobj = gazelleapi.GazelleAPI(headphones.CONFIG.ORPHEUS_USERNAME, headphones.CONFIG.ORPHEUS_PASSWORD, headphones.CONFIG.ORPHEUS_URL) orpheusobj._login() except Exception as e: orpheusobj = None - logger.error(u"Orpheus.network credentials incorrect or site is down. Error: %s %s" % ( + logger.error("Orpheus.network credentials incorrect or site is down. Error: %s %s" % ( e.__class__.__name__, str(e))) if orpheusobj and orpheusobj.logged_in(): - logger.info(u"Searching %s..." % provider) + logger.info("Searching %s..." % provider) all_torrents = [] album_type = "" @@ -1570,18 +1559,18 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, releasetype=album_type)['results']) # filter on format, size, and num seeders - logger.info(u"Filtering torrents by format, maximum size, and minimum seeders...") + logger.info("Filtering torrents by format, maximum size, and minimum seeders...") match_torrents = [t for t in all_torrents if t.size <= maxsize and t.seeders >= minimumseeders] logger.info( - u"Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) + "Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) # sort by times d/l'd if not len(match_torrents): - logger.info(u"No results found from %s for %s after filtering" % (provider, term)) + logger.info("No results found from %s for %s after filtering" % (provider, term)) elif len(match_torrents) > 1: - logger.info(u"Found %d matching releases from %s for %s - %s after filtering" % + logger.info("Found %d matching releases from %s for %s - %s after filtering" % (len(match_torrents), provider, artistterm, albumterm)) logger.info('Sorting torrents by number of seeders...') match_torrents.sort(key=lambda x: int(x.seeders), reverse=True) @@ -1595,7 +1584,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, # match_torrents.sort(key=lambda x: re.match("mp3", x.getTorrentDetails(), flags=re.I), reverse=True) # match_torrents.sort(key=lambda x: str(bitrate) in x.getTorrentFolderName(), reverse=True) logger.info( - u"New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) + "New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) for torrent in match_torrents: if not torrent.file_path: @@ -1632,7 +1621,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, bitrate_string = encoding_string if bitrate_string not in gazelleencoding.ALL_ENCODINGS: logger.info( - u"Your preferred bitrate is not one of the available RED filters, so not using it as a search parameter.") + "Your preferred bitrate is not one of the available RED filters, so not using it as a search parameter.") maxsize = 10000000000 elif headphones.CONFIG.PREFERRED_QUALITY == 1 or allow_lossless: # Highest quality including lossless search_formats = [gazelleformat.FLAC, gazelleformat.MP3] @@ -1643,18 +1632,18 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, if not redobj or not redobj.logged_in(): try: - logger.info(u"Attempting to log in to Redacted...") + logger.info("Attempting to log in to Redacted...") redobj = gazelleapi.GazelleAPI(headphones.CONFIG.REDACTED_USERNAME, headphones.CONFIG.REDACTED_PASSWORD, providerurl) redobj._login() except Exception as e: redobj = None - logger.error(u"Redacted credentials incorrect or site is down. Error: %s %s" % ( + logger.error("Redacted credentials incorrect or site is down. Error: %s %s" % ( e.__class__.__name__, str(e))) if redobj and redobj.logged_in(): - logger.info(u"Searching %s..." % provider) + logger.info("Searching %s..." % provider) all_torrents = [] for search_format in search_formats: if usersearchterm: @@ -1668,18 +1657,18 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, encoding=bitrate_string)['results']) # filter on format, size, and num seeders - logger.info(u"Filtering torrents by format, maximum size, and minimum seeders...") + logger.info("Filtering torrents by format, maximum size, and minimum seeders...") match_torrents = [t for t in all_torrents if t.size <= maxsize and t.seeders >= minimumseeders] logger.info( - u"Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) + "Remaining torrents: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) # sort by times d/l'd if not len(match_torrents): - logger.info(u"No results found from %s for %s after filtering" % (provider, term)) + logger.info("No results found from %s for %s after filtering" % (provider, term)) elif len(match_torrents) > 1: - logger.info(u"Found %d matching releases from %s for %s - %s after filtering" % + logger.info("Found %d matching releases from %s for %s - %s after filtering" % (len(match_torrents), provider, artistterm, albumterm)) logger.info( "Sorting torrents by times snatched and preferred bitrate %s..." % bitrate_string) @@ -1695,7 +1684,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, # match_torrents.sort(key=lambda x: re.match("mp3", x.getTorrentDetails(), flags=re.I), reverse=True) # match_torrents.sort(key=lambda x: str(bitrate) in x.getTorrentFolderName(), reverse=True) logger.info( - u"New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) + "New order: %s" % ", ".join(repr(torrent) for torrent in match_torrents)) for torrent in match_torrents: if not torrent.file_path: @@ -1767,8 +1756,8 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, if url.lower().startswith("//"): url = "http:" + url - formatted_size = re.search('Size (.*),', unicode(item)).group(1).replace( - u'\xa0', ' ') + formatted_size = re.search('Size (.*),', str(item)).group(1).replace( + '\xa0', ' ') size = helpers.piratesize(formatted_size) if size < maxsize and minimumseeders < seeds and url is not None: @@ -1781,7 +1770,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, resultlist.append((title, size, url, provider, "torrent", match)) except Exception as e: - logger.error(u"An unknown error occurred in the Pirate Bay parser: %s" % e) + logger.error("An unknown error occurred in the Pirate Bay parser: %s" % e) # Old Pirate Bay Compatible if headphones.CONFIG.OLDPIRATEBAY: @@ -1802,7 +1791,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2243.2 Safari/537.36'} provider_url = fix_url(headphones.CONFIG.OLDPIRATEBAY_URL) + \ - "/search.php?" + urllib.urlencode({"q": tpb_term, "iht": 6}) + "/search.php?" + urllib.parse.urlencode({"q": tpb_term, "iht": 6}) data = request.request_soup(url=provider_url, headers=headers) @@ -1836,7 +1825,7 @@ def searchTorrent(album, new=False, losslessOnly=False, albumlength=None, resultlist.append((title, size, url, provider, "torrent", match)) except Exception as e: logger.error( - u"An unknown error occurred in the Old Pirate Bay parser: %s" % e) + "An unknown error occurred in the Old Pirate Bay parser: %s" % e) # attempt to verify that this isn't a substring result # when looking for "Foo - Foo" we don't want "Foobar" diff --git a/headphones/softchroot_test.py b/headphones/softchroot_test.py index b310ce12..7a9b0831 100644 --- a/headphones/softchroot_test.py +++ b/headphones/softchroot_test.py @@ -38,8 +38,8 @@ class SoftChrootTest(TestCase): cf = SoftChroot(path) self.assertIsNone(cf) - self.assertRegexpMatches(str(exc.exception), r'No such directory') - self.assertRegexpMatches(str(exc.exception), path) + self.assertRegex(str(exc.exception), r'No such directory') + self.assertRegex(str(exc.exception), path) @mock.patch('headphones.softchroot.os', wrap=os, name='OsMock') def test_create_on_file(self, os_mock): @@ -57,8 +57,8 @@ class SoftChrootTest(TestCase): self.assertTrue(os_mock.path.isdir.called) - self.assertRegexpMatches(str(exc.exception), r'No such directory') - self.assertRegexpMatches(str(exc.exception), path) + self.assertRegex(str(exc.exception), r'No such directory') + self.assertRegex(str(exc.exception), path) @TestArgs( (None, None), diff --git a/headphones/transmission.py b/headphones/transmission.py index 6abc2d70..a787344c 100644 --- a/headphones/transmission.py +++ b/headphones/transmission.py @@ -16,7 +16,7 @@ import time import json import base64 -import urlparse +import urllib.parse import os from headphones import logger, request @@ -57,7 +57,7 @@ def addTorrent(link, data=None): else: retid = False - logger.info(u"Torrent sent to Transmission successfully") + logger.info("Torrent sent to Transmission successfully") return retid else: @@ -167,7 +167,7 @@ def torrentAction(method, arguments): # Fix the URL. We assume that the user does not point to the RPC endpoint, # so add it if it is missing. - parts = list(urlparse.urlparse(host)) + parts = list(urllib.parse.urlparse(host)) if not parts[0] in ("http", "https"): parts[0] = "http" @@ -175,7 +175,7 @@ def torrentAction(method, arguments): if not parts[2].endswith("/rpc"): parts[2] += "/transmission/rpc" - host = urlparse.urlunparse(parts) + host = urllib.parse.urlunparse(parts) data = {'method': method, 'arguments': arguments} data_json = json.dumps(data) auth = (username, password) if username and password else None @@ -205,5 +205,5 @@ def torrentAction(method, arguments): continue resp_json = response.json() - print resp_json + print(resp_json) return resp_json diff --git a/headphones/unittestcompat.py b/headphones/unittestcompat.py index 0bf26c3c..e11b2aee 100644 --- a/headphones/unittestcompat.py +++ b/headphones/unittestcompat.py @@ -44,7 +44,7 @@ class TestCase(TC): @_d def assertRegexpMatches(self, *args, **kw): - return super(TestCase, self).assertRegexpMatches(*args, **kw) + return super(TestCase, self).assertRegex(*args, **kw) # ----------------------------------------------------------- # NOT DUMMY ASSERTIONS diff --git a/headphones/utorrent.py b/headphones/utorrent.py index 43befbd2..1b4b2ab2 100644 --- a/headphones/utorrent.py +++ b/headphones/utorrent.py @@ -13,13 +13,13 @@ # You should have received a copy of the GNU General Public License # along with Headphones. If not, see . -import urllib +import urllib.request, urllib.parse, urllib.error import json import time from collections import namedtuple -import urllib2 -import urlparse -import cookielib +import urllib.request, urllib.error, urllib.parse +import urllib.parse +import http.cookiejar import re import os @@ -52,23 +52,23 @@ class utorrentclient(object): def _make_opener(self, realm, base_url, username, password): """uTorrent API need HTTP Basic Auth and cookie support for token verify.""" - auth = urllib2.HTTPBasicAuthHandler() + auth = urllib.request.HTTPBasicAuthHandler() auth.add_password(realm=realm, uri=base_url, user=username, passwd=password) - opener = urllib2.build_opener(auth) - urllib2.install_opener(opener) + opener = urllib.request.build_opener(auth) + urllib.request.install_opener(opener) - cookie_jar = cookielib.CookieJar() - cookie_handler = urllib2.HTTPCookieProcessor(cookie_jar) + cookie_jar = http.cookiejar.CookieJar() + cookie_handler = urllib.request.HTTPCookieProcessor(cookie_jar) handlers = [auth, cookie_handler] - opener = urllib2.build_opener(*handlers) + opener = urllib.request.build_opener(*handlers) return opener def _get_token(self): - url = urlparse.urljoin(self.base_url, 'gui/token.html') + url = urllib.parse.urljoin(self.base_url, 'gui/token.html') try: response = self.opener.open(url) - except urllib2.HTTPError as err: + except urllib.error.HTTPError as err: logger.debug('URL: ' + str(url)) logger.debug('Error getting Token. uTorrent responded with error: ' + str(err)) return @@ -77,7 +77,7 @@ class utorrentclient(object): def list(self, **kwargs): params = [('list', '1')] - params += kwargs.items() + params += list(kwargs.items()) return self._action(params) def add_url(self, url): @@ -150,8 +150,8 @@ class utorrentclient(object): if not self.token: return - url = self.base_url + '/gui/' + '?token=' + self.token + '&' + urllib.urlencode(params) - request = urllib2.Request(url) + url = self.base_url + '/gui/' + '?token=' + self.token + '&' + urllib.parse.urlencode(params) + request = urllib.request.Request(url) if body: request.add_data(body) @@ -162,7 +162,7 @@ class utorrentclient(object): try: response = self.opener.open(request) return response.code, json.loads(response.read()) - except urllib2.HTTPError as err: + except urllib.error.HTTPError as err: logger.debug('URL: ' + str(url)) logger.debug('uTorrent webUI raised the following error: ' + str(err)) diff --git a/headphones/versioncheck.py b/headphones/versioncheck.py index 11c4e225..a63129c2 100644 --- a/headphones/versioncheck.py +++ b/headphones/versioncheck.py @@ -43,7 +43,7 @@ def runGit(args): shell=True, cwd=headphones.PROG_DIR) output, err = p.communicate() - output = output.strip() + output = output.decode('utf-8').strip() logger.debug('Git output: ' + output) except OSError as e: diff --git a/headphones/webserve.py b/headphones/webserve.py index 9e2f1979..3f678da2 100644 --- a/headphones/webserve.py +++ b/headphones/webserve.py @@ -17,14 +17,14 @@ from operator import itemgetter import threading -import hashlib +import secrets import random -import urllib +import urllib.request, urllib.parse, urllib.error import json import time -import cgi import sys -import urllib2 +from html import escape as html_escape +import urllib.request, urllib.error, urllib.parse import os import re @@ -97,7 +97,7 @@ class WebInterface(object): # Serve the extras up as a dict to make things easier for new templates (append new extras to the end) extras_list = headphones.POSSIBLE_EXTRAS if artist['Extras']: - artist_extras = map(int, artist['Extras'].split(',')) + artist_extras = list(map(int, artist['Extras'].split(','))) else: artist_extras = [] @@ -158,8 +158,8 @@ class WebInterface(object): else: searchresults = mb.findSeries(name, limit=100) return serve_template(templatename="searchresults.html", - title='Search Results for: "' + cgi.escape(name) + '"', - searchresults=searchresults, name=cgi.escape(name), type=type) + title='Search Results for: "' + html_escape(name) + '"', + searchresults=searchresults, name=html_escape(name), type=type) @cherrypy.expose def addArtist(self, artistid): @@ -230,7 +230,7 @@ class WebInterface(object): @cherrypy.expose def pauseArtist(self, ArtistID): - logger.info(u"Pausing artist: " + ArtistID) + logger.info("Pausing artist: " + ArtistID) myDB = db.DBConnection() controlValueDict = {'ArtistID': ArtistID} newValueDict = {'Status': 'Paused'} @@ -239,7 +239,7 @@ class WebInterface(object): @cherrypy.expose def resumeArtist(self, ArtistID): - logger.info(u"Resuming artist: " + ArtistID) + logger.info("Resuming artist: " + ArtistID) myDB = db.DBConnection() controlValueDict = {'ArtistID': ArtistID} newValueDict = {'Status': 'Active'} @@ -252,9 +252,9 @@ class WebInterface(object): for name in namecheck: artistname = name['ArtistName'] try: - logger.info(u"Deleting all traces of artist: " + artistname) + logger.info("Deleting all traces of artist: " + artistname) except TypeError: - logger.info(u"Deleting all traces of artist: null") + logger.info("Deleting all traces of artist: null") myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID]) from headphones import cache @@ -291,7 +291,7 @@ class WebInterface(object): myDB = db.DBConnection() artist_name = myDB.select('SELECT DISTINCT ArtistName FROM artists WHERE ArtistID=?', [ArtistID])[0][0] - logger.info(u"Scanning artist: %s", artist_name) + logger.info("Scanning artist: %s", artist_name) full_folder_format = headphones.CONFIG.FOLDER_FORMAT folder_format = re.findall(r'(.*?[Aa]rtist?)\.*', full_folder_format)[0] @@ -314,7 +314,7 @@ class WebInterface(object): sortname = artist if sortname[0].isdigit(): - firstchar = u'0-9' + firstchar = '0-9' else: firstchar = sortname[0] @@ -363,7 +363,7 @@ class WebInterface(object): @cherrypy.expose def deleteEmptyArtists(self): - logger.info(u"Deleting all empty artists") + logger.info("Deleting all empty artists") myDB = db.DBConnection() emptyArtistIDs = [row['ArtistID'] for row in myDB.select("SELECT ArtistID FROM artists WHERE LatestAlbum IS NULL")] @@ -423,7 +423,7 @@ class WebInterface(object): @cherrypy.expose def queueAlbum(self, AlbumID, ArtistID=None, new=False, redirect=None, lossless=False): - logger.info(u"Marking album: " + AlbumID + " as wanted...") + logger.info("Marking album: " + AlbumID + " as wanted...") myDB = db.DBConnection() controlValueDict = {'AlbumID': AlbumID} if lossless: @@ -438,10 +438,11 @@ class WebInterface(object): raise cherrypy.HTTPRedirect(redirect) @cherrypy.expose + @cherrypy.tools.json_out() def choose_specific_download(self, AlbumID): results = searcher.searchforalbum(AlbumID, choose_specific_download=True) - results_as_dicts = [] + data = [] for result in results: result_dict = { @@ -452,35 +453,34 @@ class WebInterface(object): 'kind': result[4], 'matches': result[5] } - results_as_dicts.append(result_dict) - s = json.dumps(results_as_dicts) - cherrypy.response.headers['Content-type'] = 'application/json' - return s + data.append(result_dict) + return data @cherrypy.expose + @cherrypy.tools.json_out() def download_specific_release(self, AlbumID, title, size, url, provider, kind, **kwargs): # Handle situations where the torrent url contains arguments that are parsed if kwargs: - url = urllib2.quote(url, safe=":?/=&") + '&' + urllib.urlencode(kwargs) + url = urllib.parse.quote(url, safe=":?/=&") + '&' + urllib.parse.urlencode(kwargs) try: result = [(title, int(size), url, provider, kind)] except ValueError: result = [(title, float(size), url, provider, kind)] - logger.info(u"Making sure we can download the chosen result") + logger.info("Making sure we can download the chosen result") (data, bestqual) = searcher.preprocess(result) if data and bestqual: myDB = db.DBConnection() album = myDB.action('SELECT * from albums WHERE AlbumID=?', [AlbumID]).fetchone() searcher.send_to_downloader(data, bestqual, album) - return json.dumps({'result': 'success'}) + return {'result': 'success'} else: - return json.dumps({'result': 'failure'}) + return {'result': 'failure'} @cherrypy.expose def unqueueAlbum(self, AlbumID, ArtistID): - logger.info(u"Marking album: " + AlbumID + "as skipped...") + logger.info("Marking album: " + AlbumID + "as skipped...") myDB = db.DBConnection() controlValueDict = {'AlbumID': AlbumID} newValueDict = {'Status': 'Skipped'} @@ -489,7 +489,7 @@ class WebInterface(object): @cherrypy.expose def deleteAlbum(self, AlbumID, ArtistID=None): - logger.info(u"Deleting all traces of album: " + AlbumID) + logger.info("Deleting all traces of album: " + AlbumID) myDB = db.DBConnection() myDB.action('DELETE from have WHERE Matched=?', [AlbumID]) @@ -528,7 +528,7 @@ class WebInterface(object): @cherrypy.expose def editSearchTerm(self, AlbumID, SearchTerm): - logger.info(u"Updating search term for albumid: " + AlbumID) + logger.info("Updating search term for albumid: " + AlbumID) myDB = db.DBConnection() controlValueDict = {'AlbumID': AlbumID} newValueDict = {'SearchTerm': SearchTerm} @@ -959,6 +959,7 @@ class WebInterface(object): raise cherrypy.HTTPRedirect("logs") @cherrypy.expose + @cherrypy.tools.json_out() def getLog(self, iDisplayStart=0, iDisplayLength=100, iSortCol_0=0, sSortDir_0="desc", sSearch="", **kwargs): iDisplayStart = int(iDisplayStart) @@ -981,13 +982,14 @@ class WebInterface(object): rows = filtered[iDisplayStart:(iDisplayStart + iDisplayLength)] rows = [[row[0], row[2], row[1]] for row in rows] - return json.dumps({ + return { 'iTotalDisplayRecords': len(filtered), 'iTotalRecords': len(headphones.LOG_LIST), 'aaData': rows, - }) + } @cherrypy.expose + @cherrypy.tools.json_out() def getArtists_json(self, iDisplayStart=0, iDisplayLength=100, sSearch="", iSortCol_0='0', sSortDir_0='asc', **kwargs): iDisplayStart = int(iDisplayStart) @@ -1055,61 +1057,58 @@ class WebInterface(object): rows.append(row) - dict = {'iTotalDisplayRecords': len(filtered), + data = {'iTotalDisplayRecords': len(filtered), 'iTotalRecords': totalcount, 'aaData': rows, } - s = json.dumps(dict) - cherrypy.response.headers['Content-type'] = 'application/json' - return s + return data @cherrypy.expose + @cherrypy.tools.json_out() def getAlbumsByArtist_json(self, artist=None): myDB = db.DBConnection() - album_json = {} + data = {} counter = 0 album_list = myDB.select("SELECT AlbumTitle from albums WHERE ArtistName=?", [artist]) for album in album_list: - album_json[counter] = album['AlbumTitle'] + data[counter] = album['AlbumTitle'] counter += 1 - json_albums = json.dumps(album_json) - cherrypy.response.headers['Content-type'] = 'application/json' - return json_albums + return data @cherrypy.expose + @cherrypy.tools.json_out() def getArtistjson(self, ArtistID, **kwargs): myDB = db.DBConnection() artist = myDB.action('SELECT * FROM artists WHERE ArtistID=?', [ArtistID]).fetchone() - artist_json = json.dumps({ + return { 'ArtistName': artist['ArtistName'], 'Status': artist['Status'] - }) - return artist_json + } @cherrypy.expose + @cherrypy.tools.json_out() def getAlbumjson(self, AlbumID, **kwargs): myDB = db.DBConnection() album = myDB.action('SELECT * from albums WHERE AlbumID=?', [AlbumID]).fetchone() - album_json = json.dumps({ + return { 'AlbumTitle': album['AlbumTitle'], 'ArtistName': album['ArtistName'], 'Status': album['Status'] - }) - return album_json + } @cherrypy.expose def clearhistory(self, type=None, date_added=None, title=None): myDB = db.DBConnection() if type: if type == 'all': - logger.info(u"Clearing all history") + logger.info("Clearing all history") myDB.action('DELETE from snatched WHERE Status NOT LIKE "Seed%"') else: - logger.info(u"Clearing history where status is %s" % type) + logger.info("Clearing history where status is %s" % type) myDB.action('DELETE from snatched WHERE Status=?', [type]) else: - logger.info(u"Deleting '%s' from history" % title) + logger.info("Deleting '%s' from history" % title) myDB.action( 'DELETE from snatched WHERE Status NOT LIKE "Seed%" AND Title=? AND DateAdded=?', [title, date_added]) @@ -1117,7 +1116,7 @@ class WebInterface(object): @cherrypy.expose def generateAPI(self): - apikey = hashlib.sha224(str(random.getrandbits(256))).hexdigest()[0:32] + apikey = secrets.token_hex(nbytes=16) logger.info("New API generated") return apikey @@ -1418,7 +1417,7 @@ class WebInterface(object): "join_deviceid": headphones.CONFIG.JOIN_DEVICEID } - for k, v in config.iteritems(): + for k, v in config.items(): if isinstance(v, headphones.config.path): # need to apply SoftChroot to paths: nv = headphones.SOFT_CHROOT.apply(v) @@ -1435,7 +1434,7 @@ class WebInterface(object): extras_list = [extra_munges.get(x, x) for x in headphones.POSSIBLE_EXTRAS] if headphones.CONFIG.EXTRAS: - extras = map(int, headphones.CONFIG.EXTRAS.split(',')) + extras = list(map(int, headphones.CONFIG.EXTRAS.split(','))) else: extras = [] @@ -1496,7 +1495,7 @@ class WebInterface(object): kwargs[plain_config] = kwargs[use_config] del kwargs[use_config] - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): # TODO : HUGE crutch. It is all because there is no way to deal with options... try: _conf = headphones.CONFIG._define(k) @@ -1648,12 +1647,13 @@ class WebInterface(object): return a.fetchData() @cherrypy.expose + @cherrypy.tools.json_out() def getInfo(self, ArtistID=None, AlbumID=None): from headphones import cache info_dict = cache.getInfo(ArtistID, AlbumID) - return json.dumps(info_dict) + return info_dict @cherrypy.expose def getArtwork(self, ArtistID=None, AlbumID=None): @@ -1670,6 +1670,7 @@ class WebInterface(object): # If you just want to get the last.fm image links for an album, make sure # to pass a releaseid and not a releasegroupid @cherrypy.expose + @cherrypy.tools.json_out() def getImageLinks(self, ArtistID=None, AlbumID=None): from headphones import cache image_dict = cache.getImageLinks(ArtistID, AlbumID) @@ -1687,7 +1688,7 @@ class WebInterface(object): image_dict[ 'thumbnail'] = "http://coverartarchive.org/release/%s/front-250.jpg" % AlbumID - return json.dumps(image_dict) + return image_dict @cherrypy.expose def twitterStep1(self): @@ -1700,7 +1701,7 @@ class WebInterface(object): cherrypy.response.headers['Cache-Control'] = "max-age=0,no-cache,no-store" tweet = notifiers.TwitterNotifier() result = tweet._get_credentials(key) - logger.info(u"result: " + str(result)) + logger.info("result: " + str(result)) if result: return "Key verification successful" else: @@ -1732,14 +1733,14 @@ class WebInterface(object): @cherrypy.expose def testPushover(self): - logger.info(u"Sending Pushover notification") + logger.info("Sending Pushover notification") pushover = notifiers.PUSHOVER() result = pushover.notify("hooray!", "This is a test") return str(result) @cherrypy.expose def testPlex(self): - logger.info(u"Testing plex update") + logger.info("Testing plex update") plex = notifiers.Plex() plex.update() diff --git a/headphones/webstart.py b/headphones/webstart.py index 61417803..d5412c32 100644 --- a/headphones/webstart.py +++ b/headphones/webstart.py @@ -111,12 +111,9 @@ def initialize(options): }) conf['/api'] = {'tools.auth_basic.on': False} - # Prevent time-outs - cherrypy.engine.timeout_monitor.unsubscribe() cherrypy.tree.mount(WebInterface(), str(options['http_root']), config=conf) try: - cherrypy.process.servers.check_port(str(options['http_host']), options['http_port']) cherrypy.server.start() except IOError: sys.stderr.write( diff --git a/lib/MultipartPostHandler.py b/lib/MultipartPostHandler.py index ef63afa2..5d469bed 100644 --- a/lib/MultipartPostHandler.py +++ b/lib/MultipartPostHandler.py @@ -15,8 +15,8 @@ # Lesser General Public License for more details. # -import urllib -import urllib2 +import urllib.request, urllib.parse, urllib.error +import urllib.request, urllib.error, urllib.parse import mimetools, mimetypes import os, sys @@ -24,8 +24,8 @@ import os, sys # assigning a sequence. doseq = 1 -class MultipartPostHandler(urllib2.BaseHandler): - handler_order = urllib2.HTTPHandler.handler_order - 10 # needs to run first +class MultipartPostHandler(urllib.request.BaseHandler): + handler_order = urllib.request.HTTPHandler.handler_order - 10 # needs to run first def http_request(self, request): data = request.get_data() @@ -33,23 +33,23 @@ class MultipartPostHandler(urllib2.BaseHandler): v_files = [] v_vars = [] try: - for(key, value) in data.items(): + for(key, value) in list(data.items()): if type(value) in (file, list, tuple): v_files.append((key, value)) else: v_vars.append((key, value)) except TypeError: systype, value, traceback = sys.exc_info() - raise TypeError, "not a valid non-string sequence or mapping object", traceback + raise TypeError("not a valid non-string sequence or mapping object").with_traceback(traceback) if len(v_files) == 0: - data = urllib.urlencode(v_vars, doseq) + data = urllib.parse.urlencode(v_vars, doseq) else: boundary, data = MultipartPostHandler.multipart_encode(v_vars, v_files) contenttype = 'multipart/form-data; boundary=%s' % boundary if(request.has_header('Content-Type') and request.get_header('Content-Type').find('multipart/form-data') != 0): - print "Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data') + print("Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')) request.add_unredirected_header('Content-Type', contenttype) request.add_data(data) diff --git a/lib/apscheduler/executors/asyncio.py b/lib/apscheduler/executors/asyncio.py index fade99f8..c354eb98 100644 --- a/lib/apscheduler/executors/asyncio.py +++ b/lib/apscheduler/executors/asyncio.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + import sys from apscheduler.executors.base import BaseExecutor, run_job diff --git a/lib/apscheduler/executors/gevent.py b/lib/apscheduler/executors/gevent.py index 9f4db2fc..2f81ae06 100644 --- a/lib/apscheduler/executors/gevent.py +++ b/lib/apscheduler/executors/gevent.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + import sys from apscheduler.executors.base import BaseExecutor, run_job diff --git a/lib/apscheduler/executors/twisted.py b/lib/apscheduler/executors/twisted.py index 29217221..ba446eb3 100644 --- a/lib/apscheduler/executors/twisted.py +++ b/lib/apscheduler/executors/twisted.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + from apscheduler.executors.base import BaseExecutor, run_job diff --git a/lib/apscheduler/jobstores/memory.py b/lib/apscheduler/jobstores/memory.py index 645391f3..011d13cb 100644 --- a/lib/apscheduler/jobstores/memory.py +++ b/lib/apscheduler/jobstores/memory.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError from apscheduler.util import datetime_to_utc_timestamp diff --git a/lib/apscheduler/jobstores/mongodb.py b/lib/apscheduler/jobstores/mongodb.py index ff762f7e..95f47ee4 100644 --- a/lib/apscheduler/jobstores/mongodb.py +++ b/lib/apscheduler/jobstores/mongodb.py @@ -1,11 +1,11 @@ -from __future__ import absolute_import + from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime from apscheduler.job import Job try: - import cPickle as pickle + import pickle as pickle except ImportError: # pragma: nocover import pickle diff --git a/lib/apscheduler/jobstores/redis.py b/lib/apscheduler/jobstores/redis.py index 2b4ffd52..cf29d388 100644 --- a/lib/apscheduler/jobstores/redis.py +++ b/lib/apscheduler/jobstores/redis.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + import six @@ -7,7 +7,7 @@ from apscheduler.util import datetime_to_utc_timestamp, utc_timestamp_to_datetim from apscheduler.job import Job try: - import cPickle as pickle + import pickle as pickle except ImportError: # pragma: nocover import pickle diff --git a/lib/apscheduler/jobstores/sqlalchemy.py b/lib/apscheduler/jobstores/sqlalchemy.py index f8a3c151..1de6468d 100644 --- a/lib/apscheduler/jobstores/sqlalchemy.py +++ b/lib/apscheduler/jobstores/sqlalchemy.py @@ -1,11 +1,11 @@ -from __future__ import absolute_import + from apscheduler.jobstores.base import BaseJobStore, JobLookupError, ConflictingIdError from apscheduler.util import maybe_ref, datetime_to_utc_timestamp, utc_timestamp_to_datetime from apscheduler.job import Job try: - import cPickle as pickle + import pickle as pickle except ImportError: # pragma: nocover import pickle diff --git a/lib/apscheduler/schedulers/asyncio.py b/lib/apscheduler/schedulers/asyncio.py index b91ee97a..2538bdc5 100644 --- a/lib/apscheduler/schedulers/asyncio.py +++ b/lib/apscheduler/schedulers/asyncio.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + from functools import wraps from apscheduler.schedulers.base import BaseScheduler diff --git a/lib/apscheduler/schedulers/background.py b/lib/apscheduler/schedulers/background.py index 86ff2ba3..624db594 100644 --- a/lib/apscheduler/schedulers/background.py +++ b/lib/apscheduler/schedulers/background.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + from threading import Thread, Event from apscheduler.schedulers.base import BaseScheduler diff --git a/lib/apscheduler/schedulers/base.py b/lib/apscheduler/schedulers/base.py index cdc0bf06..22735540 100644 --- a/lib/apscheduler/schedulers/base.py +++ b/lib/apscheduler/schedulers/base.py @@ -1,4 +1,4 @@ -from __future__ import print_function + from abc import ABCMeta, abstractmethod from collections import MutableMapping from threading import RLock diff --git a/lib/apscheduler/schedulers/blocking.py b/lib/apscheduler/schedulers/blocking.py index 2720822c..8bdb33f8 100644 --- a/lib/apscheduler/schedulers/blocking.py +++ b/lib/apscheduler/schedulers/blocking.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + from threading import Event from apscheduler.schedulers.base import BaseScheduler diff --git a/lib/apscheduler/schedulers/gevent.py b/lib/apscheduler/schedulers/gevent.py index 9cce6589..b61b2417 100644 --- a/lib/apscheduler/schedulers/gevent.py +++ b/lib/apscheduler/schedulers/gevent.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + from apscheduler.schedulers.blocking import BlockingScheduler from apscheduler.schedulers.base import BaseScheduler diff --git a/lib/apscheduler/schedulers/qt.py b/lib/apscheduler/schedulers/qt.py index dde5afaa..7de3b97f 100644 --- a/lib/apscheduler/schedulers/qt.py +++ b/lib/apscheduler/schedulers/qt.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + from apscheduler.schedulers.base import BaseScheduler diff --git a/lib/apscheduler/schedulers/tornado.py b/lib/apscheduler/schedulers/tornado.py index 78093308..989d0225 100644 --- a/lib/apscheduler/schedulers/tornado.py +++ b/lib/apscheduler/schedulers/tornado.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + from datetime import timedelta from functools import wraps diff --git a/lib/apscheduler/schedulers/twisted.py b/lib/apscheduler/schedulers/twisted.py index 166b6130..7236ec06 100644 --- a/lib/apscheduler/schedulers/twisted.py +++ b/lib/apscheduler/schedulers/twisted.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import + from functools import wraps from apscheduler.schedulers.base import BaseScheduler diff --git a/lib/apscheduler/util.py b/lib/apscheduler/util.py index 988f9427..86d96da3 100644 --- a/lib/apscheduler/util.py +++ b/lib/apscheduler/util.py @@ -1,6 +1,6 @@ """This module contains several handy functions primarily meant for internal use.""" -from __future__ import division + from datetime import date, datetime, time, timedelta, tzinfo from inspect import isfunction, ismethod, getargspec from calendar import timegm @@ -23,7 +23,7 @@ __all__ = ('asint', 'asbool', 'astimezone', 'convert_to_datetime', 'datetime_to_ class _Undefined(object): - def __nonzero__(self): + def __bool__(self): return False def __bool__(self): @@ -116,7 +116,7 @@ def convert_to_datetime(input, tz, arg_name): m = _DATE_REGEX.match(input) if not m: raise ValueError('Invalid date string') - values = [(k, int(v or 0)) for k, v in m.groupdict().items()] + values = [(k, int(v or 0)) for k, v in list(m.groupdict().items())] values = dict(values) datetime_ = datetime(**values) else: diff --git a/lib/beets/__init__.py b/lib/beets/__init__.py old mode 100755 new mode 100644 index 9291673b..9642a6f3 --- a/lib/beets/__init__.py +++ b/lib/beets/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -13,33 +12,29 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function -import os +import confuse +from sys import stderr -from beets.util import confit - -# This particular version has been slightly modified to work with Headphones -# https://github.com/rembo10/headphones -__version__ = u'1.4.4-headphones' -__author__ = u'Adrian Sampson ' +__version__ = '1.6.0' +__author__ = 'Adrian Sampson ' -class IncludeLazyConfig(confit.LazyConfig): - """A version of Confit's LazyConfig that also merges in data from +class IncludeLazyConfig(confuse.LazyConfig): + """A version of Confuse's LazyConfig that also merges in data from YAML files specified in an `include` setting. """ def read(self, user=True, defaults=True): - super(IncludeLazyConfig, self).read(user, defaults) + super().read(user, defaults) try: for view in self['include']: - filename = view.as_filename() - if os.path.isfile(filename): - self.set_file(filename) - except confit.NotFoundError: + self.set_file(view.as_filename()) + except confuse.NotFoundError: pass + except confuse.ConfigReadError as err: + stderr.write("configuration `import` failed: {}" + .format(err.reason)) -# headphones -#config = IncludeLazyConfig('beets', __name__) -config = IncludeLazyConfig(os.path.dirname(__file__), __name__) + +config = IncludeLazyConfig('beets', __name__) diff --git a/lib/beets/__main__.py b/lib/beets/__main__.py old mode 100755 new mode 100644 index 8010ca0d..ac829de9 --- a/lib/beets/__main__.py +++ b/lib/beets/__main__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2017, Adrian Sampson. # @@ -17,7 +16,6 @@ `python -m beets`. """ -from __future__ import division, absolute_import, print_function import sys from .ui import main diff --git a/lib/beets/art.py b/lib/beets/art.py old mode 100755 new mode 100644 index 979a6f72..13d5dfbd --- a/lib/beets/art.py +++ b/lib/beets/art.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -17,7 +16,6 @@ music and items' embedded album art. """ -from __future__ import division, absolute_import, print_function import subprocess import platform @@ -26,7 +24,7 @@ import os from beets.util import displayable_path, syspath, bytestring_path from beets.util.artresizer import ArtResizer -from beets import mediafile +import mediafile def mediafile_image(image_path, maxwidth=None): @@ -43,7 +41,7 @@ def get_art(log, item): try: mf = mediafile.MediaFile(syspath(item.path)) except mediafile.UnreadableFileError as exc: - log.warning(u'Could not extract art from {0}: {1}', + log.warning('Could not extract art from {0}: {1}', displayable_path(item.path), exc) return @@ -51,26 +49,27 @@ def get_art(log, item): def embed_item(log, item, imagepath, maxwidth=None, itempath=None, - compare_threshold=0, ifempty=False, as_album=False): + compare_threshold=0, ifempty=False, as_album=False, id3v23=None, + quality=0): """Embed an image into the item's media file. """ # Conditions and filters. if compare_threshold: if not check_art_similarity(log, item, imagepath, compare_threshold): - log.info(u'Image not similar; skipping.') + log.info('Image not similar; skipping.') return if ifempty and get_art(log, item): - log.info(u'media file already contained art') - return + log.info('media file already contained art') + return if maxwidth and not as_album: - imagepath = resize_image(log, imagepath, maxwidth) + imagepath = resize_image(log, imagepath, maxwidth, quality) # Get the `Image` object from the file. try: - log.debug(u'embedding {0}', displayable_path(imagepath)) + log.debug('embedding {0}', displayable_path(imagepath)) image = mediafile_image(imagepath, maxwidth) - except IOError as exc: - log.warning(u'could not read image file: {0}', exc) + except OSError as exc: + log.warning('could not read image file: {0}', exc) return # Make sure the image kind is safe (some formats only support PNG @@ -80,36 +79,39 @@ def embed_item(log, item, imagepath, maxwidth=None, itempath=None, image.mime_type) return - item.try_write(path=itempath, tags={'images': [image]}) + item.try_write(path=itempath, tags={'images': [image]}, id3v23=id3v23) -def embed_album(log, album, maxwidth=None, quiet=False, - compare_threshold=0, ifempty=False): +def embed_album(log, album, maxwidth=None, quiet=False, compare_threshold=0, + ifempty=False, quality=0): """Embed album art into all of the album's items. """ imagepath = album.artpath if not imagepath: - log.info(u'No album art present for {0}', album) + log.info('No album art present for {0}', album) return if not os.path.isfile(syspath(imagepath)): - log.info(u'Album art not found at {0} for {1}', + log.info('Album art not found at {0} for {1}', displayable_path(imagepath), album) return if maxwidth: - imagepath = resize_image(log, imagepath, maxwidth) + imagepath = resize_image(log, imagepath, maxwidth, quality) - log.info(u'Embedding album art into {0}', album) + log.info('Embedding album art into {0}', album) for item in album.items(): - embed_item(log, item, imagepath, maxwidth, None, - compare_threshold, ifempty, as_album=True) + embed_item(log, item, imagepath, maxwidth, None, compare_threshold, + ifempty, as_album=True, quality=quality) -def resize_image(log, imagepath, maxwidth): - """Returns path to an image resized to maxwidth. +def resize_image(log, imagepath, maxwidth, quality): + """Returns path to an image resized to maxwidth and encoded with the + specified quality level. """ - log.debug(u'Resizing album art to {0} pixels wide', maxwidth) - imagepath = ArtResizer.shared.resize(maxwidth, syspath(imagepath)) + log.debug('Resizing album art to {0} pixels wide and encoding at quality \ + level {1}', maxwidth, quality) + imagepath = ArtResizer.shared.resize(maxwidth, syspath(imagepath), + quality=quality) return imagepath @@ -131,7 +133,7 @@ def check_art_similarity(log, item, imagepath, compare_threshold): syspath(art, prefix=False), '-colorspace', 'gray', 'MIFF:-'] compare_cmd = ['compare', '-metric', 'PHASH', '-', 'null:'] - log.debug(u'comparing images with pipeline {} | {}', + log.debug('comparing images with pipeline {} | {}', convert_cmd, compare_cmd) convert_proc = subprocess.Popen( convert_cmd, @@ -155,7 +157,7 @@ def check_art_similarity(log, item, imagepath, compare_threshold): convert_proc.wait() if convert_proc.returncode: log.debug( - u'ImageMagick convert failed with status {}: {!r}', + 'ImageMagick convert failed with status {}: {!r}', convert_proc.returncode, convert_stderr, ) @@ -165,7 +167,7 @@ def check_art_similarity(log, item, imagepath, compare_threshold): stdout, stderr = compare_proc.communicate() if compare_proc.returncode: if compare_proc.returncode != 1: - log.debug(u'ImageMagick compare failed: {0}, {1}', + log.debug('ImageMagick compare failed: {0}, {1}', displayable_path(imagepath), displayable_path(art)) return @@ -176,10 +178,10 @@ def check_art_similarity(log, item, imagepath, compare_threshold): try: phash_diff = float(out_str) except ValueError: - log.debug(u'IM output is not a number: {0!r}', out_str) + log.debug('IM output is not a number: {0!r}', out_str) return - log.debug(u'ImageMagick compare score: {0}', phash_diff) + log.debug('ImageMagick compare score: {0}', phash_diff) return phash_diff <= compare_threshold return True @@ -189,18 +191,18 @@ def extract(log, outpath, item): art = get_art(log, item) outpath = bytestring_path(outpath) if not art: - log.info(u'No album art present in {0}, skipping.', item) + log.info('No album art present in {0}, skipping.', item) return # Add an extension to the filename. ext = mediafile.image_extension(art) if not ext: - log.warning(u'Unknown image type in {0}.', + log.warning('Unknown image type in {0}.', displayable_path(item.path)) return outpath += bytestring_path('.' + ext) - log.info(u'Extracting album art from: {0} to: {1}', + log.info('Extracting album art from: {0} to: {1}', item, displayable_path(outpath)) with open(syspath(outpath), 'wb') as f: f.write(art) @@ -216,7 +218,7 @@ def extract_first(log, outpath, items): def clear(log, lib, query): items = lib.items(query) - log.info(u'Clearing album art from {0} items', len(items)) + log.info('Clearing album art from {0} items', len(items)) for item in items: - log.debug(u'Clearing art for {0}', item) + log.debug('Clearing art for {0}', item) item.try_write(tags={'images': None}) diff --git a/lib/beets/autotag/__init__.py b/lib/beets/autotag/__init__.py old mode 100755 new mode 100644 index 54ce8e0c..e62f492c --- a/lib/beets/autotag/__init__.py +++ b/lib/beets/autotag/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,19 +15,59 @@ """Facilities for automatically determining files' correct metadata. """ -from __future__ import division, absolute_import, print_function from beets import logging from beets import config # Parts of external interface. -from .hooks import AlbumInfo, TrackInfo, AlbumMatch, TrackMatch # noqa +from .hooks import ( # noqa + AlbumInfo, + TrackInfo, + AlbumMatch, + TrackMatch, + Distance, +) from .match import tag_item, tag_album, Proposal # noqa from .match import Recommendation # noqa # Global logger. log = logging.getLogger('beets') +# Metadata fields that are already hardcoded, or where the tag name changes. +SPECIAL_FIELDS = { + 'album': ( + 'va', + 'releasegroup_id', + 'artist_id', + 'album_id', + 'mediums', + 'tracks', + 'year', + 'month', + 'day', + 'artist', + 'artist_credit', + 'artist_sort', + 'data_url' + ), + 'track': ( + 'track_alt', + 'artist_id', + 'release_track_id', + 'medium', + 'index', + 'medium_index', + 'title', + 'artist_credit', + 'artist_sort', + 'artist', + 'track_id', + 'medium_total', + 'data_url', + 'length' + ) +} + # Additional utilities for the main interface. @@ -40,17 +79,17 @@ def apply_item_metadata(item, track_info): item.artist_credit = track_info.artist_credit item.title = track_info.title item.mb_trackid = track_info.track_id + item.mb_releasetrackid = track_info.release_track_id if track_info.artist_id: item.mb_artistid = track_info.artist_id - if track_info.data_source: - item.data_source = track_info.data_source - if track_info.lyricist is not None: - item.lyricist = track_info.lyricist - if track_info.composer is not None: - item.composer = track_info.composer - if track_info.arranger is not None: - item.arranger = track_info.arranger + for field, value in track_info.items(): + # We only overwrite fields that are not already hardcoded. + if field in SPECIAL_FIELDS['track']: + continue + if value is None: + continue + item[field] = value # At the moment, the other metadata is left intact (including album # and track number). Perhaps these should be emptied? @@ -61,12 +100,19 @@ def apply_metadata(album_info, mapping): mapping from Items to TrackInfo objects. """ for item, track_info in mapping.items(): - # Album, artist, track count. - if track_info.artist: - item.artist = track_info.artist + # Artist or artist credit. + if config['artist_credit']: + item.artist = (track_info.artist_credit or + track_info.artist or + album_info.artist_credit or + album_info.artist) + item.albumartist = (album_info.artist_credit or + album_info.artist) else: - item.artist = album_info.artist - item.albumartist = album_info.artist + item.artist = (track_info.artist or album_info.artist) + item.albumartist = album_info.artist + + # Album. item.album = album_info.album # Artist sort and credit names. @@ -120,6 +166,7 @@ def apply_metadata(album_info, mapping): # MusicBrainz IDs. item.mb_trackid = track_info.track_id + item.mb_releasetrackid = track_info.release_track_id item.mb_albumid = album_info.album_id if track_info.artist_id: item.mb_artistid = track_info.artist_id @@ -131,34 +178,24 @@ def apply_metadata(album_info, mapping): # Compilation flag. item.comp = album_info.va - # Miscellaneous metadata. - for field in ('albumtype', - 'label', - 'asin', - 'catalognum', - 'script', - 'language', - 'country', - 'albumstatus', - 'albumdisambig', - 'data_source',): - value = getattr(album_info, field) - if value is not None: - item[field] = value - if track_info.disctitle is not None: - item.disctitle = track_info.disctitle - - if track_info.media is not None: - item.media = track_info.media - - if track_info.lyricist is not None: - item.lyricist = track_info.lyricist - if track_info.composer is not None: - item.composer = track_info.composer - if track_info.arranger is not None: - item.arranger = track_info.arranger - + # Track alt. item.track_alt = track_info.track_alt - # Headphones seal of approval - item.comments = 'tagged by headphones/beets' + # Don't overwrite fields with empty values unless the + # field is explicitly allowed to be overwritten + for field, value in album_info.items(): + if field in SPECIAL_FIELDS['album']: + continue + clobber = field in config['overwrite_null']['album'].as_str_seq() + if value is None and not clobber: + continue + item[field] = value + + for field, value in track_info.items(): + if field in SPECIAL_FIELDS['track']: + continue + clobber = field in config['overwrite_null']['track'].as_str_seq() + value = getattr(track_info, field) + if value is None and not clobber: + continue + item[field] = value diff --git a/lib/beets/autotag/hooks.py b/lib/beets/autotag/hooks.py old mode 100755 new mode 100644 index 3c403fcf..9cd6f2cd --- a/lib/beets/autotag/hooks.py +++ b/lib/beets/autotag/hooks.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -14,7 +13,6 @@ # included in all copies or substantial portions of the Software. """Glue between metadata sources and the matching logic.""" -from __future__ import division, absolute_import, print_function from collections import namedtuple from functools import total_ordering @@ -27,14 +25,36 @@ from beets.util import as_string from beets.autotag import mb from jellyfish import levenshtein_distance from unidecode import unidecode -import six log = logging.getLogger('beets') +# The name of the type for patterns in re changed in Python 3.7. +try: + Pattern = re._pattern_type +except AttributeError: + Pattern = re.Pattern + # Classes used to represent candidate options. +class AttrDict(dict): + """A dictionary that supports attribute ("dot") access, so `d.field` + is equivalent to `d['field']`. + """ -class AlbumInfo(object): + def __getattr__(self, attr): + if attr in self: + return self.get(attr) + else: + raise AttributeError + + def __setattr__(self, key, value): + self.__setitem__(key, value) + + def __hash__(self): + return id(self) + + +class AlbumInfo(AttrDict): """Describes a canonical release that may be used to match a release in the library. Consists of these data members: @@ -43,38 +63,22 @@ class AlbumInfo(object): - ``artist``: name of the release's primary artist - ``artist_id`` - ``tracks``: list of TrackInfo objects making up the release - - ``asin``: Amazon ASIN - - ``albumtype``: string describing the kind of release - - ``va``: boolean: whether the release has "various artists" - - ``year``: release year - - ``month``: release month - - ``day``: release day - - ``label``: music label responsible for the release - - ``mediums``: the number of discs in this release - - ``artist_sort``: name of the release's artist for sorting - - ``releasegroup_id``: MBID for the album's release group - - ``catalognum``: the label's catalog number for the release - - ``script``: character set used for metadata - - ``language``: human language of the metadata - - ``country``: the release country - - ``albumstatus``: MusicBrainz release status (Official, etc.) - - ``media``: delivery mechanism (Vinyl, etc.) - - ``albumdisambig``: MusicBrainz release disambiguation comment - - ``artist_credit``: Release-specific artist name - - ``data_source``: The original data source (MusicBrainz, Discogs, etc.) - - ``data_url``: The data source release URL. - The fields up through ``tracks`` are required. The others are - optional and may be None. + ``mediums`` along with the fields up through ``tracks`` are required. + The others are optional and may be None. """ - def __init__(self, album, album_id, artist, artist_id, tracks, asin=None, - albumtype=None, va=False, year=None, month=None, day=None, - label=None, mediums=None, artist_sort=None, - releasegroup_id=None, catalognum=None, script=None, - language=None, country=None, albumstatus=None, media=None, - albumdisambig=None, artist_credit=None, original_year=None, - original_month=None, original_day=None, data_source=None, - data_url=None): + + def __init__(self, tracks, album=None, album_id=None, artist=None, + artist_id=None, asin=None, albumtype=None, va=False, + year=None, month=None, day=None, label=None, mediums=None, + artist_sort=None, releasegroup_id=None, catalognum=None, + script=None, language=None, country=None, style=None, + genre=None, albumstatus=None, media=None, albumdisambig=None, + releasegroupdisambig=None, artist_credit=None, + original_year=None, original_month=None, + original_day=None, data_source=None, data_url=None, + discogs_albumid=None, discogs_labelid=None, + discogs_artistid=None, **kwargs): self.album = album self.album_id = album_id self.artist = artist @@ -94,15 +98,22 @@ class AlbumInfo(object): self.script = script self.language = language self.country = country + self.style = style + self.genre = genre self.albumstatus = albumstatus self.media = media self.albumdisambig = albumdisambig + self.releasegroupdisambig = releasegroupdisambig self.artist_credit = artist_credit self.original_year = original_year self.original_month = original_month self.original_day = original_day self.data_source = data_source self.data_url = data_url + self.discogs_albumid = discogs_albumid + self.discogs_labelid = discogs_labelid + self.discogs_artistid = discogs_artistid + self.update(kwargs) # Work around a bug in python-musicbrainz-ngs that causes some # strings to be bytes rather than Unicode. @@ -112,53 +123,49 @@ class AlbumInfo(object): constituent `TrackInfo` objects, are decoded to Unicode. """ for fld in ['album', 'artist', 'albumtype', 'label', 'artist_sort', - 'catalognum', 'script', 'language', 'country', - 'albumstatus', 'albumdisambig', 'artist_credit', 'media']: + 'catalognum', 'script', 'language', 'country', 'style', + 'genre', 'albumstatus', 'albumdisambig', + 'releasegroupdisambig', 'artist_credit', + 'media', 'discogs_albumid', 'discogs_labelid', + 'discogs_artistid']: value = getattr(self, fld) if isinstance(value, bytes): setattr(self, fld, value.decode(codec, 'ignore')) - if self.tracks: - for track in self.tracks: - track.decode(codec) + for track in self.tracks: + track.decode(codec) + + def copy(self): + dupe = AlbumInfo([]) + dupe.update(self) + dupe.tracks = [track.copy() for track in self.tracks] + return dupe -class TrackInfo(object): +class TrackInfo(AttrDict): """Describes a canonical track present on a release. Appears as part of an AlbumInfo's ``tracks`` list. Consists of these data members: - ``title``: name of the track - ``track_id``: MusicBrainz ID; UUID fragment only - - ``artist``: individual track artist name - - ``artist_id`` - - ``length``: float: duration of the track in seconds - - ``index``: position on the entire release - - ``media``: delivery mechanism (Vinyl, etc.) - - ``medium``: the disc number this track appears on in the album - - ``medium_index``: the track's position on the disc - - ``medium_total``: the number of tracks on the item's disc - - ``artist_sort``: name of the track artist for sorting - - ``disctitle``: name of the individual medium (subtitle) - - ``artist_credit``: Recording-specific artist name - - ``data_source``: The original data source (MusicBrainz, Discogs, etc.) - - ``data_url``: The data source release URL. - - ``lyricist``: individual track lyricist name - - ``composer``: individual track composer name - - ``arranger`: individual track arranger name - - ``track_alt``: alternative track number (tape, vinyl, etc.) Only ``title`` and ``track_id`` are required. The rest of the fields may be None. The indices ``index``, ``medium``, and ``medium_index`` are all 1-based. """ - def __init__(self, title, track_id, artist=None, artist_id=None, - length=None, index=None, medium=None, medium_index=None, - medium_total=None, artist_sort=None, disctitle=None, - artist_credit=None, data_source=None, data_url=None, - media=None, lyricist=None, composer=None, arranger=None, - track_alt=None): + + def __init__(self, title=None, track_id=None, release_track_id=None, + artist=None, artist_id=None, length=None, index=None, + medium=None, medium_index=None, medium_total=None, + artist_sort=None, disctitle=None, artist_credit=None, + data_source=None, data_url=None, media=None, lyricist=None, + composer=None, composer_sort=None, arranger=None, + track_alt=None, work=None, mb_workid=None, + work_disambig=None, bpm=None, initial_key=None, genre=None, + **kwargs): self.title = title self.track_id = track_id + self.release_track_id = release_track_id self.artist = artist self.artist_id = artist_id self.length = length @@ -174,8 +181,16 @@ class TrackInfo(object): self.data_url = data_url self.lyricist = lyricist self.composer = composer + self.composer_sort = composer_sort self.arranger = arranger self.track_alt = track_alt + self.work = work + self.mb_workid = mb_workid + self.work_disambig = work_disambig + self.bpm = bpm + self.initial_key = initial_key + self.genre = genre + self.update(kwargs) # As above, work around a bug in python-musicbrainz-ngs. def decode(self, codec='utf-8'): @@ -188,6 +203,11 @@ class TrackInfo(object): if isinstance(value, bytes): setattr(self, fld, value.decode(codec, 'ignore')) + def copy(self): + dupe = TrackInfo() + dupe.update(self) + return dupe + # Candidate distance scoring. @@ -215,8 +235,8 @@ def _string_dist_basic(str1, str2): transliteration/lowering to ASCII characters. Normalized by string length. """ - assert isinstance(str1, six.text_type) - assert isinstance(str2, six.text_type) + assert isinstance(str1, str) + assert isinstance(str2, str) str1 = as_string(unidecode(str1)) str2 = as_string(unidecode(str2)) str1 = re.sub(r'[^a-z0-9]', '', str1.lower()) @@ -244,9 +264,9 @@ def string_dist(str1, str2): # "something, the". for word in SD_END_WORDS: if str1.endswith(', %s' % word): - str1 = '%s %s' % (word, str1[:-len(word) - 2]) + str1 = '{} {}'.format(word, str1[:-len(word) - 2]) if str2.endswith(', %s' % word): - str2 = '%s %s' % (word, str2[:-len(word) - 2]) + str2 = '{} {}'.format(word, str2[:-len(word) - 2]) # Perform a couple of basic normalizing substitutions. for pat, repl in SD_REPLACE: @@ -284,11 +304,12 @@ def string_dist(str1, str2): return base_dist + penalty -class LazyClassProperty(object): +class LazyClassProperty: """A decorator implementing a read-only property that is *lazy* in the sense that the getter is only invoked once. Subsequent accesses through *any* instance use the cached result. """ + def __init__(self, getter): self.getter = getter self.computed = False @@ -301,17 +322,17 @@ class LazyClassProperty(object): @total_ordering -@six.python_2_unicode_compatible -class Distance(object): +class Distance: """Keeps track of multiple distance penalties. Provides a single weighted distance for all penalties as well as a weighted distance for each individual penalty. """ + def __init__(self): self._penalties = {} @LazyClassProperty - def _weights(cls): # noqa + def _weights(cls): # noqa: N805 """A dictionary from keys to floating-point weights. """ weights_view = config['match']['distance_weights'] @@ -389,7 +410,7 @@ class Distance(object): return other - self.distance def __str__(self): - return "{0:.2f}".format(self.distance) + return f"{self.distance:.2f}" # Behave like a dict. @@ -416,7 +437,7 @@ class Distance(object): """ if not isinstance(dist, Distance): raise ValueError( - u'`dist` must be a Distance object, not {0}'.format(type(dist)) + '`dist` must be a Distance object, not {}'.format(type(dist)) ) for key, penalties in dist._penalties.items(): self._penalties.setdefault(key, []).extend(penalties) @@ -428,7 +449,7 @@ class Distance(object): be a compiled regular expression, in which case it will be matched against `value2`. """ - if isinstance(value1, re._pattern_type): + if isinstance(value1, Pattern): return bool(value1.match(value2)) return value1 == value2 @@ -440,7 +461,7 @@ class Distance(object): """ if not 0.0 <= dist <= 1.0: raise ValueError( - u'`dist` must be between 0.0 and 1.0, not {0}'.format(dist) + f'`dist` must be between 0.0 and 1.0, not {dist}' ) self._penalties.setdefault(key, []).append(dist) @@ -534,7 +555,10 @@ def album_for_mbid(release_id): if the ID is not found. """ try: - return mb.album_for_id(release_id) + album = mb.album_for_id(release_id) + if album: + plugins.send('albuminfo_received', info=album) + return album except mb.MusicBrainzAPIError as exc: exc.log(log) @@ -544,12 +568,14 @@ def track_for_mbid(recording_id): if the ID is not found. """ try: - return mb.track_for_id(recording_id) + track = mb.track_for_id(recording_id) + if track: + plugins.send('trackinfo_received', info=track) + return track except mb.MusicBrainzAPIError as exc: exc.log(log) -@plugins.notify_info_yielded(u'albuminfo_received') def albums_for_id(album_id): """Get a list of albums for an ID.""" a = album_for_mbid(album_id) @@ -557,10 +583,10 @@ def albums_for_id(album_id): yield a for a in plugins.album_for_id(album_id): if a: + plugins.send('albuminfo_received', info=a) yield a -@plugins.notify_info_yielded(u'trackinfo_received') def tracks_for_id(track_id): """Get a list of tracks for an ID.""" t = track_for_mbid(track_id) @@ -568,39 +594,43 @@ def tracks_for_id(track_id): yield t for t in plugins.track_for_id(track_id): if t: + plugins.send('trackinfo_received', info=t) yield t -@plugins.notify_info_yielded(u'albuminfo_received') -def album_candidates(items, artist, album, va_likely): +@plugins.notify_info_yielded('albuminfo_received') +def album_candidates(items, artist, album, va_likely, extra_tags): """Search for album matches. ``items`` is a list of Item objects that make up the album. ``artist`` and ``album`` are the respective names (strings), which may be derived from the item list or may be entered by the user. ``va_likely`` is a boolean indicating whether - the album is likely to be a "various artists" release. + the album is likely to be a "various artists" release. ``extra_tags`` + is an optional dictionary of additional tags used to further + constrain the search. """ + # Base candidates if we have album and artist to match. if artist and album: try: - for candidate in mb.match_album(artist, album, len(items)): - yield candidate + yield from mb.match_album(artist, album, len(items), + extra_tags) except mb.MusicBrainzAPIError as exc: exc.log(log) # Also add VA matches from MusicBrainz where appropriate. if va_likely and album: try: - for candidate in mb.match_album(None, album, len(items)): - yield candidate + yield from mb.match_album(None, album, len(items), + extra_tags) except mb.MusicBrainzAPIError as exc: exc.log(log) # Candidates from plugins. - for candidate in plugins.candidates(items, artist, album, va_likely): - yield candidate + yield from plugins.candidates(items, artist, album, va_likely, + extra_tags) -@plugins.notify_info_yielded(u'trackinfo_received') +@plugins.notify_info_yielded('trackinfo_received') def item_candidates(item, artist, title): """Search for item matches. ``item`` is the Item to be matched. ``artist`` and ``title`` are strings and either reflect the item or @@ -610,11 +640,9 @@ def item_candidates(item, artist, title): # MusicBrainz candidates. if artist and title: try: - for candidate in mb.match_track(artist, title): - yield candidate + yield from mb.match_track(artist, title) except mb.MusicBrainzAPIError as exc: exc.log(log) # Plugin candidates. - for candidate in plugins.item_candidates(item, artist, title): - yield candidate + yield from plugins.item_candidates(item, artist, title) diff --git a/lib/beets/autotag/match.py b/lib/beets/autotag/match.py old mode 100755 new mode 100644 index 71b62adb..d352a013 --- a/lib/beets/autotag/match.py +++ b/lib/beets/autotag/match.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -17,7 +16,6 @@ releases and tracks. """ -from __future__ import division, absolute_import, print_function import datetime import re @@ -35,7 +33,7 @@ from beets.util.enumeration import OrderedEnum # album level to determine whether a given release is likely a VA # release and also on the track level to to remove the penalty for # differing artists. -VA_ARTISTS = (u'', u'various artists', u'various', u'va', u'unknown') +VA_ARTISTS = ('', 'various artists', 'various', 'va', 'unknown') # Global logger. log = logging.getLogger('beets') @@ -108,7 +106,7 @@ def assign_items(items, tracks): log.debug('...done.') # Produce the output matching. - mapping = dict((items[i], tracks[j]) for (i, j) in matching) + mapping = {items[i]: tracks[j] for (i, j) in matching} extra_items = list(set(items) - set(mapping.keys())) extra_items.sort(key=lambda i: (i.disc, i.track, i.title)) extra_tracks = list(set(tracks) - set(mapping.values())) @@ -276,16 +274,16 @@ def match_by_id(items): try: first = next(albumids) except StopIteration: - log.debug(u'No album ID found.') + log.debug('No album ID found.') return None # Is there a consensus on the MB album ID? for other in albumids: if other != first: - log.debug(u'No album ID consensus.') + log.debug('No album ID consensus.') return None # If all album IDs are equal, look up the album. - log.debug(u'Searching for discovered album ID: {0}', first) + log.debug('Searching for discovered album ID: {0}', first) return hooks.album_for_mbid(first) @@ -351,23 +349,23 @@ def _add_candidate(items, results, info): checking the track count, ordering the items, checking for duplicates, and calculating the distance. """ - log.debug(u'Candidate: {0} - {1} ({2})', + log.debug('Candidate: {0} - {1} ({2})', info.artist, info.album, info.album_id) # Discard albums with zero tracks. if not info.tracks: - log.debug(u'No tracks.') + log.debug('No tracks.') return # Don't duplicate. if info.album_id in results: - log.debug(u'Duplicate.') + log.debug('Duplicate.') return # Discard matches without required tags. for req_tag in config['match']['required'].as_str_seq(): if getattr(info, req_tag) is None: - log.debug(u'Ignored. Missing required tag: {0}', req_tag) + log.debug('Ignored. Missing required tag: {0}', req_tag) return # Find mapping between the items and the track info. @@ -380,10 +378,10 @@ def _add_candidate(items, results, info): penalties = [key for key, _ in dist] for penalty in config['match']['ignored'].as_str_seq(): if penalty in penalties: - log.debug(u'Ignored. Penalty: {0}', penalty) + log.debug('Ignored. Penalty: {0}', penalty) return - log.debug(u'Success. Distance: {0}', dist) + log.debug('Success. Distance: {0}', dist) results[info.album_id] = hooks.AlbumMatch(dist, info, mapping, extra_items, extra_tracks) @@ -411,7 +409,7 @@ def tag_album(items, search_artist=None, search_album=None, likelies, consensus = current_metadata(items) cur_artist = likelies['artist'] cur_album = likelies['album'] - log.debug(u'Tagging {0} - {1}', cur_artist, cur_album) + log.debug('Tagging {0} - {1}', cur_artist, cur_album) # The output result (distance, AlbumInfo) tuples (keyed by MB album # ID). @@ -420,7 +418,7 @@ def tag_album(items, search_artist=None, search_album=None, # Search by explicit ID. if search_ids: for search_id in search_ids: - log.debug(u'Searching for album ID: {0}', search_id) + log.debug('Searching for album ID: {0}', search_id) for id_candidate in hooks.albums_for_id(search_id): _add_candidate(items, candidates, id_candidate) @@ -431,13 +429,13 @@ def tag_album(items, search_artist=None, search_album=None, if id_info: _add_candidate(items, candidates, id_info) rec = _recommendation(list(candidates.values())) - log.debug(u'Album ID match recommendation is {0}', rec) + log.debug('Album ID match recommendation is {0}', rec) if candidates and not config['import']['timid']: # If we have a very good MBID match, return immediately. # Otherwise, this match will compete against metadata-based # matches. if rec == Recommendation.strong: - log.debug(u'ID match.') + log.debug('ID match.') return cur_artist, cur_album, \ Proposal(list(candidates.values()), rec) @@ -445,22 +443,29 @@ def tag_album(items, search_artist=None, search_album=None, if not (search_artist and search_album): # No explicit search terms -- use current metadata. search_artist, search_album = cur_artist, cur_album - log.debug(u'Search terms: {0} - {1}', search_artist, search_album) + log.debug('Search terms: {0} - {1}', search_artist, search_album) + + extra_tags = None + if config['musicbrainz']['extra_tags']: + tag_list = config['musicbrainz']['extra_tags'].get() + extra_tags = {k: v for (k, v) in likelies.items() if k in tag_list} + log.debug('Additional search terms: {0}', extra_tags) # Is this album likely to be a "various artist" release? va_likely = ((not consensus['artist']) or (search_artist.lower() in VA_ARTISTS) or any(item.comp for item in items)) - log.debug(u'Album might be VA: {0}', va_likely) + log.debug('Album might be VA: {0}', va_likely) # Get the results from the data sources. for matched_candidate in hooks.album_candidates(items, search_artist, search_album, - va_likely): + va_likely, + extra_tags): _add_candidate(items, candidates, matched_candidate) - log.debug(u'Evaluating {0} candidates.', len(candidates)) + log.debug('Evaluating {0} candidates.', len(candidates)) # Sort and get the recommendation. candidates = _sort_candidates(candidates.values()) rec = _recommendation(candidates) @@ -485,7 +490,7 @@ def tag_item(item, search_artist=None, search_title=None, trackids = search_ids or [t for t in [item.mb_trackid] if t] if trackids: for trackid in trackids: - log.debug(u'Searching for track ID: {0}', trackid) + log.debug('Searching for track ID: {0}', trackid) for track_info in hooks.tracks_for_id(trackid): dist = track_distance(item, track_info, incl_artist=True) candidates[track_info.track_id] = \ @@ -494,7 +499,7 @@ def tag_item(item, search_artist=None, search_title=None, rec = _recommendation(_sort_candidates(candidates.values())) if rec == Recommendation.strong and \ not config['import']['timid']: - log.debug(u'Track ID match.') + log.debug('Track ID match.') return Proposal(_sort_candidates(candidates.values()), rec) # If we're searching by ID, don't proceed. @@ -507,7 +512,7 @@ def tag_item(item, search_artist=None, search_title=None, # Search terms. if not (search_artist and search_title): search_artist, search_title = item.artist, item.title - log.debug(u'Item search terms: {0} - {1}', search_artist, search_title) + log.debug('Item search terms: {0} - {1}', search_artist, search_title) # Get and evaluate candidate metadata. for track_info in hooks.item_candidates(item, search_artist, search_title): @@ -515,7 +520,7 @@ def tag_item(item, search_artist=None, search_title=None, candidates[track_info.track_id] = hooks.TrackMatch(dist, track_info) # Sort by distance and return with recommendation. - log.debug(u'Found {0} candidates.', len(candidates)) + log.debug('Found {0} candidates.', len(candidates)) candidates = _sort_candidates(candidates.values()) rec = _recommendation(candidates) return Proposal(candidates, rec) diff --git a/lib/beets/autotag/mb.py b/lib/beets/autotag/mb.py old mode 100755 new mode 100644 index a6133adb..e6a2e277 --- a/lib/beets/autotag/mb.py +++ b/lib/beets/autotag/mb.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,55 +14,72 @@ """Searches for albums in the MusicBrainz database. """ -from __future__ import division, absolute_import, print_function import musicbrainzngs import re import traceback -from six.moves.urllib.parse import urljoin from beets import logging +from beets import plugins import beets.autotag.hooks import beets from beets import util from beets import config -import six +from collections import Counter +from urllib.parse import urljoin VARIOUS_ARTISTS_ID = '89ad4ac3-39f7-470e-963a-56509c546377' -if util.SNI_SUPPORTED: - BASE_URL = 'https://musicbrainz.org/' -else: - BASE_URL = 'http://musicbrainz.org/' +BASE_URL = 'https://musicbrainz.org/' + +SKIPPED_TRACKS = ['[data track]'] + +FIELDS_TO_MB_KEYS = { + 'catalognum': 'catno', + 'country': 'country', + 'label': 'label', + 'media': 'format', + 'year': 'date', +} musicbrainzngs.set_useragent('beets', beets.__version__, - 'http://beets.io/') + 'https://beets.io/') class MusicBrainzAPIError(util.HumanReadableException): """An error while talking to MusicBrainz. The `query` field is the parameter to the action and may have any type. """ + def __init__(self, reason, verb, query, tb=None): self.query = query if isinstance(reason, musicbrainzngs.WebServiceError): - reason = u'MusicBrainz not reachable' - super(MusicBrainzAPIError, self).__init__(reason, verb, tb) + reason = 'MusicBrainz not reachable' + super().__init__(reason, verb, tb) def get_message(self): - return u'{0} in {1} with query {2}'.format( + return '{} in {} with query {}'.format( self._reasonstr(), self.verb, repr(self.query) ) + log = logging.getLogger('beets') RELEASE_INCLUDES = ['artists', 'media', 'recordings', 'release-groups', 'labels', 'artist-credits', 'aliases', 'recording-level-rels', 'work-rels', - 'work-level-rels', 'artist-rels'] -TRACK_INCLUDES = ['artists', 'aliases'] + 'work-level-rels', 'artist-rels', 'isrcs'] +BROWSE_INCLUDES = ['artist-credits', 'work-rels', + 'artist-rels', 'recording-rels', 'release-rels'] +if "work-level-rels" in musicbrainzngs.VALID_BROWSE_INCLUDES['recording']: + BROWSE_INCLUDES.append("work-level-rels") +BROWSE_CHUNKSIZE = 100 +BROWSE_MAXTRACKS = 500 +TRACK_INCLUDES = ['artists', 'aliases', 'isrcs'] if 'work-level-rels' in musicbrainzngs.VALID_INCLUDES['recording']: TRACK_INCLUDES += ['work-level-rels', 'artist-rels'] +if 'genres' in musicbrainzngs.VALID_INCLUDES['recording']: + RELEASE_INCLUDES += ['genres'] def track_url(trackid): @@ -79,7 +95,11 @@ def configure(): from the beets configuration. This should be called at startup. """ hostname = config['musicbrainz']['host'].as_str() - musicbrainzngs.set_hostname(hostname) + https = config['musicbrainz']['https'].get(bool) + # Only call set_hostname when a custom server is configured. Since + # musicbrainz-ngs connects to musicbrainz.org with HTTPS by default + if hostname != "musicbrainz.org": + musicbrainzngs.set_hostname(hostname, https) musicbrainzngs.set_rate_limit( config['musicbrainz']['ratelimit_interval'].as_number(), config['musicbrainz']['ratelimit'].get(int), @@ -109,6 +129,24 @@ def _preferred_alias(aliases): return matches[0] +def _preferred_release_event(release): + """Given a release, select and return the user's preferred release + event as a tuple of (country, release_date). Fall back to the + default release event if a preferred event is not found. + """ + countries = config['match']['preferred']['countries'].as_str_seq() + + for country in countries: + for event in release.get('release-event-list', {}): + try: + if country in event['area']['iso-3166-1-code-list']: + return country, event['date'] + except KeyError: + pass + + return release.get('country'), release.get('date') + + def _flatten_artist_credit(credit): """Given a list representing an ``artist-credit`` block, flatten the data into a triple of joined artist name strings: canonical, sort, and @@ -118,7 +156,7 @@ def _flatten_artist_credit(credit): artist_sort_parts = [] artist_credit_parts = [] for el in credit: - if isinstance(el, six.string_types): + if isinstance(el, str): # Join phrase. artist_parts.append(el) artist_credit_parts.append(el) @@ -165,13 +203,13 @@ def track_info(recording, index=None, medium=None, medium_index=None, the number of tracks on the medium. Each number is a 1-based index. """ info = beets.autotag.hooks.TrackInfo( - recording['title'], - recording['id'], + title=recording['title'], + track_id=recording['id'], index=index, medium=medium, medium_index=medium_index, medium_total=medium_total, - data_source=u'MusicBrainz', + data_source='MusicBrainz', data_url=track_url(recording['id']), ) @@ -187,11 +225,22 @@ def track_info(recording, index=None, medium=None, medium_index=None, if recording.get('length'): info.length = int(recording['length']) / (1000.0) + info.trackdisambig = recording.get('disambiguation') + + if recording.get('isrc-list'): + info.isrc = ';'.join(recording['isrc-list']) + lyricist = [] composer = [] + composer_sort = [] for work_relation in recording.get('work-relation-list', ()): if work_relation['type'] != 'performance': continue + info.work = work_relation['work']['title'] + info.mb_workid = work_relation['work']['id'] + if 'disambiguation' in work_relation['work']: + info.work_disambig = work_relation['work']['disambiguation'] + for artist_relation in work_relation['work'].get( 'artist-relation-list', ()): if 'type' in artist_relation: @@ -200,10 +249,13 @@ def track_info(recording, index=None, medium=None, medium_index=None, lyricist.append(artist_relation['artist']['name']) elif type == 'composer': composer.append(artist_relation['artist']['name']) + composer_sort.append( + artist_relation['artist']['sort-name']) if lyricist: - info.lyricist = u', '.join(lyricist) + info.lyricist = ', '.join(lyricist) if composer: - info.composer = u', '.join(composer) + info.composer = ', '.join(composer) + info.composer_sort = ', '.join(composer_sort) arranger = [] for artist_relation in recording.get('artist-relation-list', ()): @@ -212,7 +264,12 @@ def track_info(recording, index=None, medium=None, medium_index=None, if type == 'arranger': arranger.append(artist_relation['artist']['name']) if arranger: - info.arranger = u', '.join(arranger) + info.arranger = ', '.join(arranger) + + # Supplementary fields provided by plugins + extra_trackdatas = plugins.send('mb_track_extract', data=recording) + for extra_trackdata in extra_trackdatas: + info.update(extra_trackdata) info.decode() return info @@ -246,6 +303,26 @@ def album_info(release): artist_name, artist_sort_name, artist_credit_name = \ _flatten_artist_credit(release['artist-credit']) + ntracks = sum(len(m['track-list']) for m in release['medium-list']) + + # The MusicBrainz API omits 'artist-relation-list' and 'work-relation-list' + # when the release has more than 500 tracks. So we use browse_recordings + # on chunks of tracks to recover the same information in this case. + if ntracks > BROWSE_MAXTRACKS: + log.debug('Album {} has too many tracks', release['id']) + recording_list = [] + for i in range(0, ntracks, BROWSE_CHUNKSIZE): + log.debug('Retrieving tracks starting at {}', i) + recording_list.extend(musicbrainzngs.browse_recordings( + release=release['id'], limit=BROWSE_CHUNKSIZE, + includes=BROWSE_INCLUDES, + offset=i)['recording-list']) + track_map = {r['id']: r for r in recording_list} + for medium in release['medium-list']: + for recording in medium['track-list']: + recording_info = track_map[recording['recording']['id']] + recording['recording'] = recording_info + # Basic info. track_infos = [] index = 0 @@ -253,11 +330,29 @@ def album_info(release): disctitle = medium.get('title') format = medium.get('format') + if format in config['match']['ignored_media'].as_str_seq(): + continue + all_tracks = medium['track-list'] + if ('data-track-list' in medium + and not config['match']['ignore_data_tracks']): + all_tracks += medium['data-track-list'] + track_count = len(all_tracks) + if 'pregap' in medium: all_tracks.insert(0, medium['pregap']) for track in all_tracks: + + if ('title' in track['recording'] and + track['recording']['title'] in SKIPPED_TRACKS): + continue + + if ('video' in track['recording'] and + track['recording']['video'] == 'true' and + config['match']['ignore_video_tracks']): + continue + # Basic information from the recording. index += 1 ti = track_info( @@ -265,8 +360,9 @@ def album_info(release): index, int(medium['position']), int(track['position']), - len(medium['track-list']), + track_count, ) + ti.release_track_id = track['id'] ti.disctitle = disctitle ti.media = format ti.track_alt = track['number'] @@ -285,15 +381,15 @@ def album_info(release): track_infos.append(ti) info = beets.autotag.hooks.AlbumInfo( - release['title'], - release['id'], - artist_name, - release['artist-credit'][0]['artist']['id'], - track_infos, + album=release['title'], + album_id=release['id'], + artist=artist_name, + artist_id=release['artist-credit'][0]['artist']['id'], + tracks=track_infos, mediums=len(release['medium-list']), artist_sort=artist_sort_name, artist_credit=artist_credit_name, - data_source=u'MusicBrainz', + data_source='MusicBrainz', data_url=album_url(release['id']), ) info.va = info.artist_id == VARIOUS_ARTISTS_ID @@ -301,25 +397,36 @@ def album_info(release): info.artist = config['va_name'].as_str() info.asin = release.get('asin') info.releasegroup_id = release['release-group']['id'] - info.country = release.get('country') info.albumstatus = release.get('status') - # Build up the disambiguation string from the release group and release. - disambig = [] + # Get the disambiguation strings at the release and release group level. if release['release-group'].get('disambiguation'): - disambig.append(release['release-group'].get('disambiguation')) + info.releasegroupdisambig = \ + release['release-group'].get('disambiguation') if release.get('disambiguation'): - disambig.append(release.get('disambiguation')) - info.albumdisambig = u', '.join(disambig) + info.albumdisambig = release.get('disambiguation') - # Release type not always populated. + # Get the "classic" Release type. This data comes from a legacy API + # feature before MusicBrainz supported multiple release types. if 'type' in release['release-group']: reltype = release['release-group']['type'] if reltype: info.albumtype = reltype.lower() - # Release dates. - release_date = release.get('date') + # Set the new-style "primary" and "secondary" release types. + albumtypes = [] + if 'primary-type' in release['release-group']: + rel_primarytype = release['release-group']['primary-type'] + if rel_primarytype: + albumtypes.append(rel_primarytype.lower()) + if 'secondary-type-list' in release['release-group']: + if release['release-group']['secondary-type-list']: + for sec_type in release['release-group']['secondary-type-list']: + albumtypes.append(sec_type.lower()) + info.albumtypes = '; '.join(albumtypes) + + # Release events. + info.country, release_date = _preferred_release_event(release) release_group_date = release['release-group'].get('first-release-date') if not release_date: # Fall back if release-specific date is not available. @@ -347,17 +454,33 @@ def album_info(release): first_medium = release['medium-list'][0] info.media = first_medium.get('format') + if config['musicbrainz']['genres']: + sources = [ + release['release-group'].get('genre-list', []), + release.get('genre-list', []), + ] + genres = Counter() + for source in sources: + for genreitem in source: + genres[genreitem['name']] += int(genreitem['count']) + info.genre = '; '.join(g[0] for g in sorted(genres.items(), + key=lambda g: -g[1])) + + extra_albumdatas = plugins.send('mb_album_extract', data=release) + for extra_albumdata in extra_albumdatas: + info.update(extra_albumdata) + info.decode() return info -def match_album(artist, album, tracks=None): +def match_album(artist, album, tracks=None, extra_tags=None): """Searches for a single album ("release" in MusicBrainz parlance) and returns an iterator over AlbumInfo objects. May raise a MusicBrainzAPIError. The query consists of an artist name, an album name, and, - optionally, a number of tracks on the album. + optionally, a number of tracks on the album and any other extra tags. """ # Build search criteria. criteria = {'release': album.lower().strip()} @@ -367,14 +490,24 @@ def match_album(artist, album, tracks=None): # Various Artists search. criteria['arid'] = VARIOUS_ARTISTS_ID if tracks is not None: - criteria['tracks'] = six.text_type(tracks) + criteria['tracks'] = str(tracks) + + # Additional search cues from existing metadata. + if extra_tags: + for tag in extra_tags: + key = FIELDS_TO_MB_KEYS[tag] + value = str(extra_tags.get(tag, '')).lower().strip() + if key == 'catno': + value = value.replace(' ', '') + if value: + criteria[key] = value # Abort if we have no search terms. if not any(criteria.values()): return try: - log.debug(u'Searching for MusicBrainz releases with: {!r}', criteria) + log.debug('Searching for MusicBrainz releases with: {!r}', criteria) res = musicbrainzngs.search_releases( limit=config['musicbrainz']['searchlimit'].get(int), **criteria) except musicbrainzngs.MusicBrainzError as exc: @@ -415,7 +548,7 @@ def _parse_id(s): no ID can be found, return None. """ # Find the first thing that looks like a UUID/MBID. - match = re.search(u'[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s) + match = re.search('[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}', s) if match: return match.group() @@ -425,19 +558,19 @@ def album_for_id(releaseid): object or None if the album is not found. May raise a MusicBrainzAPIError. """ - log.debug(u'Requesting MusicBrainz release {}', releaseid) + log.debug('Requesting MusicBrainz release {}', releaseid) albumid = _parse_id(releaseid) if not albumid: - log.debug(u'Invalid MBID ({0}).', releaseid) + log.debug('Invalid MBID ({0}).', releaseid) return try: res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES) except musicbrainzngs.ResponseError: - log.debug(u'Album ID match failed.') + log.debug('Album ID match failed.') return None except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError(exc, u'get release by ID', albumid, + raise MusicBrainzAPIError(exc, 'get release by ID', albumid, traceback.format_exc()) return album_info(res['release']) @@ -448,14 +581,14 @@ def track_for_id(releaseid): """ trackid = _parse_id(releaseid) if not trackid: - log.debug(u'Invalid MBID ({0}).', releaseid) + log.debug('Invalid MBID ({0}).', releaseid) return try: res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES) except musicbrainzngs.ResponseError: - log.debug(u'Track ID match failed.') + log.debug('Track ID match failed.') return None except musicbrainzngs.MusicBrainzError as exc: - raise MusicBrainzAPIError(exc, u'get recording by ID', trackid, + raise MusicBrainzAPIError(exc, 'get recording by ID', trackid, traceback.format_exc()) return track_info(res['recording']) diff --git a/lib/beets/config_default.yaml b/lib/beets/config_default.yaml old mode 100755 new mode 100644 index 3b037796..74540891 --- a/lib/beets/config_default.yaml +++ b/lib/beets/config_default.yaml @@ -7,9 +7,12 @@ import: move: no link: no hardlink: no + reflink: no delete: no resume: ask incremental: no + incremental_skip_later: no + from_scratch: no quiet_fallback: skip none_rec_action: ask timid: no @@ -25,6 +28,8 @@ import: pretend: false search_ids: [] duplicate_action: ask + bell: no + set_fields: {} clutter: ["Thumbs.DB", ".DS_Store"] ignore: [".*", "*~", "System Volume Information", "lost+found"] @@ -38,11 +43,22 @@ replace: '\.$': _ '\s+$': '' '^\s+': '' + '^-': _ path_sep_replace: _ +drive_sep_replace: _ asciify_paths: false art_filename: cover max_filename_length: 0 +aunique: + keys: albumartist album + disambiguators: albumtype year label catalognum albumdisambig releasegroupdisambig + bracket: '[]' + +overwrite_null: + album: [] + track: [] + plugins: [] pluginpath: [] threaded: yes @@ -51,6 +67,7 @@ per_disc_numbering: no verbose: 0 terminal_encoding: original_date: no +artist_credit: no id3v23: no va_name: "Various Artists" @@ -85,9 +102,12 @@ statefile: state.pickle musicbrainz: host: musicbrainz.org + https: no ratelimit: 1 ratelimit_interval: 1.0 searchlimit: 5 + extra_tags: [] + genres: no match: strong_rec_thresh: 0.04 @@ -122,5 +142,8 @@ match: original_year: no ignored: [] required: [] + ignored_media: [] + ignore_data_tracks: yes + ignore_video_tracks: yes track_length_grace: 10 track_length_max: 30 diff --git a/lib/beets/dbcore/__init__.py b/lib/beets/dbcore/__init__.py old mode 100755 new mode 100644 index 689e7202..923c34ca --- a/lib/beets/dbcore/__init__.py +++ b/lib/beets/dbcore/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,7 +15,6 @@ """DBCore is an abstract database package that forms the basis for beets' Library. """ -from __future__ import division, absolute_import, print_function from .db import Model, Database from .query import Query, FieldQuery, MatchQuery, AndQuery, OrQuery diff --git a/lib/beets/dbcore/db.py b/lib/beets/dbcore/db.py index 6b0ed8b4..acd131be 100755 --- a/lib/beets/dbcore/db.py +++ b/lib/beets/dbcore/db.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,38 +14,56 @@ """The central Model and Database constructs for DBCore. """ -from __future__ import division, absolute_import, print_function import time import os +import re from collections import defaultdict import threading import sqlite3 import contextlib -import collections import beets -from beets.util.functemplate import Template +from beets.util import functemplate from beets.util import py3_path from beets.dbcore import types from .query import MatchQuery, NullSort, TrueQuery -import six +from collections.abc import Mapping -class FormattedMapping(collections.Mapping): +class DBAccessError(Exception): + """The SQLite database became inaccessible. + + This can happen when trying to read or write the database when, for + example, the database file is deleted or otherwise disappears. There + is probably no way to recover from this error. + """ + + +class FormattedMapping(Mapping): """A `dict`-like formatted view of a model. The accessor `mapping[key]` returns the formatted version of `model[key]` as a unicode string. + The `included_keys` parameter allows filtering the fields that are + returned. By default all fields are returned. Limiting to specific keys can + avoid expensive per-item database queries. + If `for_path` is true, all path separators in the formatted values are replaced. """ - def __init__(self, model, for_path=False): + ALL_KEYS = '*' + + def __init__(self, model, included_keys=ALL_KEYS, for_path=False): self.for_path = for_path self.model = model - self.model_keys = model.keys(True) + if included_keys == self.ALL_KEYS: + # Performance note: this triggers a database query. + self.model_keys = self.model.keys(True) + else: + self.model_keys = included_keys def __getitem__(self, key): if key in self.model_keys: @@ -63,7 +80,7 @@ class FormattedMapping(collections.Mapping): def get(self, key, default=None): if default is None: default = self.model._type(key).format(None) - return super(FormattedMapping, self).get(key, default) + return super().get(key, default) def _get_formatted(self, model, key): value = model._type(key).format(model.get(key)) @@ -72,6 +89,11 @@ class FormattedMapping(collections.Mapping): if self.for_path: sep_repl = beets.config['path_sep_replace'].as_str() + sep_drive = beets.config['drive_sep_replace'].as_str() + + if re.match(r'^\w:', value): + value = re.sub(r'(?<=^\w):', sep_drive, value) + for sep in (os.path.sep, os.path.altsep): if sep: value = value.replace(sep, sep_repl) @@ -79,11 +101,105 @@ class FormattedMapping(collections.Mapping): return value +class LazyConvertDict: + """Lazily convert types for attributes fetched from the database + """ + + def __init__(self, model_cls): + """Initialize the object empty + """ + self.data = {} + self.model_cls = model_cls + self._converted = {} + + def init(self, data): + """Set the base data that should be lazily converted + """ + self.data = data + + def _convert(self, key, value): + """Convert the attribute type according the the SQL type + """ + return self.model_cls._type(key).from_sql(value) + + def __setitem__(self, key, value): + """Set an attribute value, assume it's already converted + """ + self._converted[key] = value + + def __getitem__(self, key): + """Get an attribute value, converting the type on demand + if needed + """ + if key in self._converted: + return self._converted[key] + elif key in self.data: + value = self._convert(key, self.data[key]) + self._converted[key] = value + return value + + def __delitem__(self, key): + """Delete both converted and base data + """ + if key in self._converted: + del self._converted[key] + if key in self.data: + del self.data[key] + + def keys(self): + """Get a list of available field names for this object. + """ + return list(self._converted.keys()) + list(self.data.keys()) + + def copy(self): + """Create a copy of the object. + """ + new = self.__class__(self.model_cls) + new.data = self.data.copy() + new._converted = self._converted.copy() + return new + + # Act like a dictionary. + + def update(self, values): + """Assign all values in the given dict. + """ + for key, value in values.items(): + self[key] = value + + def items(self): + """Iterate over (key, value) pairs that this object contains. + Computed fields are not included. + """ + for key in self: + yield key, self[key] + + def get(self, key, default=None): + """Get the value for a given key or `default` if it does not + exist. + """ + if key in self: + return self[key] + else: + return default + + def __contains__(self, key): + """Determine whether `key` is an attribute on this object. + """ + return key in self.keys() + + def __iter__(self): + """Iterate over the available field names (excluding computed + fields). + """ + return iter(self.keys()) + + # Abstract base for model classes. -class Model(object): +class Model: """An abstract object representing an object in the database. Model - objects act like dictionaries (i.e., the allow subscript access like + objects act like dictionaries (i.e., they allow subscript access like ``obj['field']``). The same field set is available via attribute access as a shortcut (i.e., ``obj.field``). Three kinds of attributes are available: @@ -134,12 +250,22 @@ class Model(object): are subclasses of `Sort`. """ + _queries = {} + """Named queries that use a field-like `name:value` syntax but which + do not relate to any specific field. + """ + _always_dirty = False """By default, fields only become "dirty" when their value actually changes. Enabling this flag marks fields as dirty even when the new value is the same as the old value (e.g., `o.f = o.f`). """ + _revision = -1 + """A revision number from when the model was loaded from or written + to the database. + """ + @classmethod def _getters(cls): """Return a mapping from field names to getter functions. @@ -163,8 +289,8 @@ class Model(object): """ self._db = db self._dirty = set() - self._values_fixed = {} - self._values_flex = {} + self._values_fixed = LazyConvertDict(self) + self._values_flex = LazyConvertDict(self) # Initial contents. self.update(values) @@ -178,23 +304,25 @@ class Model(object): ordinary construction are bypassed. """ obj = cls(db) - for key, value in fixed_values.items(): - obj._values_fixed[key] = cls._type(key).from_sql(value) - for key, value in flex_values.items(): - obj._values_flex[key] = cls._type(key).from_sql(value) + + obj._values_fixed.init(fixed_values) + obj._values_flex.init(flex_values) + return obj def __repr__(self): - return '{0}({1})'.format( + return '{}({})'.format( type(self).__name__, - ', '.join('{0}={1!r}'.format(k, v) for k, v in dict(self).items()), + ', '.join(f'{k}={v!r}' for k, v in dict(self).items()), ) def clear_dirty(self): """Mark all fields as *clean* (i.e., not needing to be stored to - the database). + the database). Also update the revision. """ self._dirty = set() + if self._db: + self._revision = self._db.revision def _check_db(self, need_id=True): """Ensure that this object is associated with a database row: it @@ -203,10 +331,25 @@ class Model(object): """ if not self._db: raise ValueError( - u'{0} has no database'.format(type(self).__name__) + '{} has no database'.format(type(self).__name__) ) if need_id and not self.id: - raise ValueError(u'{0} has no id'.format(type(self).__name__)) + raise ValueError('{} has no id'.format(type(self).__name__)) + + def copy(self): + """Create a copy of the model object. + + The field values and other state is duplicated, but the new copy + remains associated with the same database as the old object. + (A simple `copy.deepcopy` will not work because it would try to + duplicate the SQLite connection.) + """ + new = self.__class__() + new._db = self._db + new._values_fixed = self._values_fixed.copy() + new._values_flex = self._values_flex.copy() + new._dirty = self._dirty.copy() + return new # Essential field accessors. @@ -219,22 +362,36 @@ class Model(object): """ return cls._fields.get(key) or cls._types.get(key) or types.DEFAULT - def __getitem__(self, key): - """Get the value for a field. Raise a KeyError if the field is - not available. + def _get(self, key, default=None, raise_=False): + """Get the value for a field, or `default`. Alternatively, + raise a KeyError if the field is not available. """ getters = self._getters() if key in getters: # Computed. return getters[key](self) elif key in self._fields: # Fixed. - return self._values_fixed.get(key) + if key in self._values_fixed: + return self._values_fixed[key] + else: + return self._type(key).null elif key in self._values_flex: # Flexible. return self._values_flex[key] - else: + elif raise_: raise KeyError(key) + else: + return default - def __setitem__(self, key, value): - """Assign the value for a field. + get = _get + + def __getitem__(self, key): + """Get the value for a field. Raise a KeyError if the field is + not available. + """ + return self._get(key, raise_=True) + + def _setitem(self, key, value): + """Assign the value for a field, return whether new and old value + differ. """ # Choose where to place the value. if key in self._fields: @@ -248,21 +405,29 @@ class Model(object): # Assign value and possibly mark as dirty. old_value = source.get(key) source[key] = value - if self._always_dirty or old_value != value: + changed = old_value != value + if self._always_dirty or changed: self._dirty.add(key) + return changed + + def __setitem__(self, key, value): + """Assign the value for a field. + """ + self._setitem(key, value) + def __delitem__(self, key): """Remove a flexible attribute from the model. """ if key in self._values_flex: # Flexible. del self._values_flex[key] self._dirty.add(key) # Mark for dropping on store. + elif key in self._fields: # Fixed + setattr(self, key, self._type(key).null) elif key in self._getters(): # Computed. - raise KeyError(u'computed field {0} cannot be deleted'.format(key)) - elif key in self._fields: # Fixed. - raise KeyError(u'fixed field {0} cannot be deleted'.format(key)) + raise KeyError(f'computed field {key} cannot be deleted') else: - raise KeyError(u'no such field {0}'.format(key)) + raise KeyError(f'no such field {key}') def keys(self, computed=False): """Get a list of available field names for this object. The @@ -297,19 +462,10 @@ class Model(object): for key in self: yield key, self[key] - def get(self, key, default=None): - """Get the value for a given key or `default` if it does not - exist. - """ - if key in self: - return self[key] - else: - return default - def __contains__(self, key): """Determine whether `key` is an attribute on this object. """ - return key in self.keys(True) + return key in self.keys(computed=True) def __iter__(self): """Iterate over the available field names (excluding computed @@ -321,22 +477,22 @@ class Model(object): def __getattr__(self, key): if key.startswith('_'): - raise AttributeError(u'model has no attribute {0!r}'.format(key)) + raise AttributeError(f'model has no attribute {key!r}') else: try: return self[key] except KeyError: - raise AttributeError(u'no such field {0!r}'.format(key)) + raise AttributeError(f'no such field {key!r}') def __setattr__(self, key, value): if key.startswith('_'): - super(Model, self).__setattr__(key, value) + super().__setattr__(key, value) else: self[key] = value def __delattr__(self, key): if key.startswith('_'): - super(Model, self).__delattr__(key) + super().__delattr__(key) else: del self[key] @@ -365,7 +521,7 @@ class Model(object): with self._db.transaction() as tx: # Main table update. if assignments: - query = 'UPDATE {0} SET {1} WHERE id=?'.format( + query = 'UPDATE {} SET {} WHERE id=?'.format( self._table, assignments ) subvars.append(self.id) @@ -376,7 +532,7 @@ class Model(object): if key in self._dirty: self._dirty.remove(key) tx.mutate( - 'INSERT INTO {0} ' + 'INSERT INTO {} ' '(entity_id, key, value) ' 'VALUES (?, ?, ?);'.format(self._flex_table), (self.id, key, value), @@ -385,7 +541,7 @@ class Model(object): # Deleted flexible attributes. for key in self._dirty: tx.mutate( - 'DELETE FROM {0} ' + 'DELETE FROM {} ' 'WHERE entity_id=? AND key=?'.format(self._flex_table), (self.id, key) ) @@ -394,12 +550,18 @@ class Model(object): def load(self): """Refresh the object's metadata from the library database. + + If check_revision is true, the database is only queried loaded when a + transaction has been committed since the item was last loaded. """ self._check_db() + if not self._dirty and self._db.revision == self._revision: + # Exit early + return stored_obj = self._db._get(type(self), self.id) - assert stored_obj is not None, u"object {0} not in DB".format(self.id) - self._values_fixed = {} - self._values_flex = {} + assert stored_obj is not None, f"object {self.id} not in DB" + self._values_fixed = LazyConvertDict(self) + self._values_flex = LazyConvertDict(self) self.update(dict(stored_obj)) self.clear_dirty() @@ -409,11 +571,11 @@ class Model(object): self._check_db() with self._db.transaction() as tx: tx.mutate( - 'DELETE FROM {0} WHERE id=?'.format(self._table), + f'DELETE FROM {self._table} WHERE id=?', (self.id,) ) tx.mutate( - 'DELETE FROM {0} WHERE entity_id=?'.format(self._flex_table), + f'DELETE FROM {self._flex_table} WHERE entity_id=?', (self.id,) ) @@ -431,7 +593,7 @@ class Model(object): with self._db.transaction() as tx: new_id = tx.mutate( - 'INSERT INTO {0} DEFAULT VALUES'.format(self._table) + f'INSERT INTO {self._table} DEFAULT VALUES' ) self.id = new_id self.added = time.time() @@ -446,11 +608,11 @@ class Model(object): _formatter = FormattedMapping - def formatted(self, for_path=False): + def formatted(self, included_keys=_formatter.ALL_KEYS, for_path=False): """Get a mapping containing all values on this object formatted as human-readable unicode strings. """ - return self._formatter(self, for_path) + return self._formatter(self, included_keys, for_path) def evaluate_template(self, template, for_path=False): """Evaluate a template (a string or a `Template` object) using @@ -458,9 +620,9 @@ class Model(object): separators will be added to the template. """ # Perform substitution. - if isinstance(template, six.string_types): - template = Template(template) - return template.substitute(self.formatted(for_path), + if isinstance(template, str): + template = functemplate.template(template) + return template.substitute(self.formatted(for_path=for_path), self._template_funcs()) # Parsing. @@ -469,8 +631,8 @@ class Model(object): def _parse(cls, key, string): """Parse a string as a value for the given key. """ - if not isinstance(string, six.string_types): - raise TypeError(u"_parse() argument must be a string") + if not isinstance(string, str): + raise TypeError("_parse() argument must be a string") return cls._type(key).parse(string) @@ -482,11 +644,13 @@ class Model(object): # Database controller and supporting interfaces. -class Results(object): +class Results: """An item query result set. Iterating over the collection lazily constructs LibModel objects that reflect database rows. """ - def __init__(self, model_class, rows, db, query=None, sort=None): + + def __init__(self, model_class, rows, db, flex_rows, + query=None, sort=None): """Create a result set that will construct objects of type `model_class`. @@ -506,6 +670,7 @@ class Results(object): self.db = db self.query = query self.sort = sort + self.flex_rows = flex_rows # We keep a queue of rows we haven't yet consumed for # materialization. We preserve the original total number of @@ -527,6 +692,10 @@ class Results(object): a `Results` object a second time should be much faster than the first. """ + + # Index flexible attributes by the item ID, so we have easier access + flex_attrs = self._get_indexed_flex_attrs() + index = 0 # Position in the materialized objects. while index < len(self._objects) or self._rows: # Are there previously-materialized objects to produce? @@ -539,7 +708,7 @@ class Results(object): else: while self._rows: row = self._rows.pop(0) - obj = self._make_model(row) + obj = self._make_model(row, flex_attrs.get(row['id'], {})) # If there is a slow-query predicate, ensurer that the # object passes it. if not self.query or self.query.match(obj): @@ -561,20 +730,24 @@ class Results(object): # Objects are pre-sorted (i.e., by the database). return self._get_objects() - def _make_model(self, row): - # Get the flexible attributes for the object. - with self.db.transaction() as tx: - flex_rows = tx.query( - 'SELECT * FROM {0} WHERE entity_id=?'.format( - self.model_class._flex_table - ), - (row['id'],) - ) + def _get_indexed_flex_attrs(self): + """ Index flexible attributes by the entity id they belong to + """ + flex_values = {} + for row in self.flex_rows: + if row['entity_id'] not in flex_values: + flex_values[row['entity_id']] = {} + flex_values[row['entity_id']][row['key']] = row['value'] + + return flex_values + + def _make_model(self, row, flex_values={}): + """ Create a Model object for the given row + """ cols = dict(row) - values = dict((k, v) for (k, v) in cols.items() - if not k[:4] == 'flex') - flex_values = dict((row['key'], row['value']) for row in flex_rows) + values = {k: v for (k, v) in cols.items() + if not k[:4] == 'flex'} # Construct the Python object obj = self.model_class._awaken(self.db, values, flex_values) @@ -623,7 +796,7 @@ class Results(object): next(it) return next(it) except StopIteration: - raise IndexError(u'result index {0} out of range'.format(n)) + raise IndexError(f'result index {n} out of range') def get(self): """Return the first matching object, or None if no objects @@ -636,10 +809,16 @@ class Results(object): return None -class Transaction(object): +class Transaction: """A context manager for safe, concurrent access to the database. All SQL commands should be executed through a transaction. """ + + _mutated = False + """A flag storing whether a mutation has been executed in the + current transaction. + """ + def __init__(self, db): self.db = db @@ -661,12 +840,15 @@ class Transaction(object): entered but not yet exited transaction. If it is the last active transaction, the database updates are committed. """ + # Beware of races; currently secured by db._db_lock + self.db.revision += self._mutated with self.db._tx_stack() as stack: assert stack.pop() is self empty = not stack if empty: # Ending a "root" transaction. End the SQLite transaction. self.db._connection().commit() + self._mutated = False self.db._db_lock.release() def query(self, statement, subvals=()): @@ -680,28 +862,52 @@ class Transaction(object): """Execute an SQL statement with substitution values and return the row ID of the last affected row. """ - cursor = self.db._connection().execute(statement, subvals) - return cursor.lastrowid + try: + cursor = self.db._connection().execute(statement, subvals) + except sqlite3.OperationalError as e: + # In two specific cases, SQLite reports an error while accessing + # the underlying database file. We surface these exceptions as + # DBAccessError so the application can abort. + if e.args[0] in ("attempt to write a readonly database", + "unable to open database file"): + raise DBAccessError(e.args[0]) + else: + raise + else: + self._mutated = True + return cursor.lastrowid def script(self, statements): """Execute a string containing multiple SQL statements.""" + # We don't know whether this mutates, but quite likely it does. + self._mutated = True self.db._connection().executescript(statements) -class Database(object): +class Database: """A container for Model objects that wraps an SQLite database as the backend. """ + _models = () """The Model subclasses representing tables in this database. """ + supports_extensions = hasattr(sqlite3.Connection, 'enable_load_extension') + """Whether or not the current version of SQLite supports extensions""" + + revision = 0 + """The current revision of the database. To be increased whenever + data is written in a transaction. + """ + def __init__(self, path, timeout=5.0): self.path = path self.timeout = timeout self._connections = {} self._tx_stacks = defaultdict(list) + self._extensions = [] # A lock to protect the _connections and _tx_stacks maps, which # both map thread IDs to private resources. @@ -751,6 +957,13 @@ class Database(object): py3_path(self.path), timeout=self.timeout ) + if self.supports_extensions: + conn.enable_load_extension(True) + + # Load any extension that are already loaded for other connections. + for path in self._extensions: + conn.load_extension(path) + # Access SELECT results like dictionaries. conn.row_factory = sqlite3.Row return conn @@ -779,6 +992,18 @@ class Database(object): """ return Transaction(self) + def load_extension(self, path): + """Load an SQLite extension into all open connections.""" + if not self.supports_extensions: + raise ValueError( + 'this sqlite3 installation does not support extensions') + + self._extensions.append(path) + + # Load the extension into every open connection. + for conn in self._connections.values(): + conn.load_extension(path) + # Schema setup and migration. def _make_table(self, table, fields): @@ -788,7 +1013,7 @@ class Database(object): # Get current schema. with self.transaction() as tx: rows = tx.query('PRAGMA table_info(%s)' % table) - current_fields = set([row[1] for row in rows]) + current_fields = {row[1] for row in rows} field_names = set(fields.keys()) if current_fields.issuperset(field_names): @@ -799,9 +1024,9 @@ class Database(object): # No table exists. columns = [] for name, typ in fields.items(): - columns.append('{0} {1}'.format(name, typ.sql)) - setup_sql = 'CREATE TABLE {0} ({1});\n'.format(table, - ', '.join(columns)) + columns.append(f'{name} {typ.sql}') + setup_sql = 'CREATE TABLE {} ({});\n'.format(table, + ', '.join(columns)) else: # Table exists does not match the field set. @@ -809,7 +1034,7 @@ class Database(object): for name, typ in fields.items(): if name in current_fields: continue - setup_sql += 'ALTER TABLE {0} ADD COLUMN {1} {2};\n'.format( + setup_sql += 'ALTER TABLE {} ADD COLUMN {} {};\n'.format( table, name, typ.sql ) @@ -845,17 +1070,31 @@ class Database(object): where, subvals = query.clause() order_by = sort.order_clause() - sql = ("SELECT * FROM {0} WHERE {1} {2}").format( + sql = ("SELECT * FROM {} WHERE {} {}").format( model_cls._table, where or '1', - "ORDER BY {0}".format(order_by) if order_by else '', + f"ORDER BY {order_by}" if order_by else '', + ) + + # Fetch flexible attributes for items matching the main query. + # Doing the per-item filtering in python is faster than issuing + # one query per item to sqlite. + flex_sql = (""" + SELECT * FROM {} WHERE entity_id IN + (SELECT id FROM {} WHERE {}); + """.format( + model_cls._flex_table, + model_cls._table, + where or '1', + ) ) with self.transaction() as tx: rows = tx.query(sql, subvals) + flex_rows = tx.query(flex_sql, subvals) return Results( - model_cls, rows, self, + model_cls, rows, self, flex_rows, None if where else query, # Slow query component. sort if sort.is_slow() else None, # Slow sort component. ) diff --git a/lib/beets/dbcore/query.py b/lib/beets/dbcore/query.py old mode 100755 new mode 100644 index 470ca2ac..96476a5b --- a/lib/beets/dbcore/query.py +++ b/lib/beets/dbcore/query.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,7 +14,6 @@ """The Query type hierarchy for DBCore. """ -from __future__ import division, absolute_import, print_function import re from operator import mul @@ -23,10 +21,6 @@ from beets import util from datetime import datetime, timedelta import unicodedata from functools import reduce -import six - -if not six.PY2: - buffer = memoryview # sqlite won't accept memoryview in python 2 class ParsingError(ValueError): @@ -40,29 +34,32 @@ class InvalidQueryError(ParsingError): The query should be a unicode string or a list, which will be space-joined. """ + def __init__(self, query, explanation): if isinstance(query, list): query = " ".join(query) - message = u"'{0}': {1}".format(query, explanation) - super(InvalidQueryError, self).__init__(message) + message = f"'{query}': {explanation}" + super().__init__(message) -class InvalidQueryArgumentTypeError(ParsingError): +class InvalidQueryArgumentValueError(ParsingError): """Represent a query argument that could not be converted as expected. It exists to be caught in upper stack levels so a meaningful (i.e. with the query) InvalidQueryError can be raised. """ + def __init__(self, what, expected, detail=None): - message = u"'{0}' is not {1}".format(what, expected) + message = f"'{what}' is not {expected}" if detail: - message = u"{0}: {1}".format(message, detail) - super(InvalidQueryArgumentTypeError, self).__init__(message) + message = f"{message}: {detail}" + super().__init__(message) -class Query(object): +class Query: """An abstract class representing a query into the item database. """ + def clause(self): """Generate an SQLite expression implementing the query. @@ -79,7 +76,7 @@ class Query(object): raise NotImplementedError def __repr__(self): - return "{0.__class__.__name__}()".format(self) + return f"{self.__class__.__name__}()" def __eq__(self, other): return type(self) == type(other) @@ -95,6 +92,7 @@ class FieldQuery(Query): string. Subclasses may also provide `col_clause` to implement the same matching functionality in SQLite. """ + def __init__(self, field, pattern, fast=True): self.field = field self.pattern = pattern @@ -125,7 +123,7 @@ class FieldQuery(Query): "{0.fast})".format(self)) def __eq__(self, other): - return super(FieldQuery, self).__eq__(other) and \ + return super().__eq__(other) and \ self.field == other.field and self.pattern == other.pattern def __hash__(self): @@ -134,6 +132,7 @@ class FieldQuery(Query): class MatchQuery(FieldQuery): """A query that looks for exact matches in an item field.""" + def col_clause(self): return self.field + " = ?", [self.pattern] @@ -143,19 +142,16 @@ class MatchQuery(FieldQuery): class NoneQuery(FieldQuery): + """A query that checks whether a field is null.""" def __init__(self, field, fast=True): - super(NoneQuery, self).__init__(field, None, fast) + super().__init__(field, None, fast) def col_clause(self): return self.field + " IS NULL", () - @classmethod - def match(cls, item): - try: - return item[cls.field] is None - except KeyError: - return True + def match(self, item): + return item.get(self.field) is None def __repr__(self): return "{0.__class__.__name__}({0.field!r}, {0.fast})".format(self) @@ -165,6 +161,7 @@ class StringFieldQuery(FieldQuery): """A FieldQuery that converts values to strings before matching them. """ + @classmethod def value_match(cls, pattern, value): """Determine whether the value matches the pattern. The value @@ -182,11 +179,12 @@ class StringFieldQuery(FieldQuery): class SubstringQuery(StringFieldQuery): """A query that matches a substring in a specific item field.""" + def col_clause(self): pattern = (self.pattern - .replace('\\', '\\\\') - .replace('%', '\\%') - .replace('_', '\\_')) + .replace('\\', '\\\\') + .replace('%', '\\%') + .replace('_', '\\_')) search = '%' + pattern + '%' clause = self.field + " like ? escape '\\'" subvals = [search] @@ -204,16 +202,17 @@ class RegexpQuery(StringFieldQuery): Raises InvalidQueryError when the pattern is not a valid regular expression. """ + def __init__(self, field, pattern, fast=True): - super(RegexpQuery, self).__init__(field, pattern, fast) + super().__init__(field, pattern, fast) pattern = self._normalize(pattern) try: self.pattern = re.compile(self.pattern) except re.error as exc: # Invalid regular expression. - raise InvalidQueryArgumentTypeError(pattern, - u"a regular expression", - format(exc)) + raise InvalidQueryArgumentValueError(pattern, + "a regular expression", + format(exc)) @staticmethod def _normalize(s): @@ -231,9 +230,10 @@ class BooleanQuery(MatchQuery): """Matches a boolean field. Pattern should either be a boolean or a string reflecting a boolean. """ + def __init__(self, field, pattern, fast=True): - super(BooleanQuery, self).__init__(field, pattern, fast) - if isinstance(pattern, six.string_types): + super().__init__(field, pattern, fast) + if isinstance(pattern, str): self.pattern = util.str2bool(pattern) self.pattern = int(self.pattern) @@ -244,17 +244,18 @@ class BytesQuery(MatchQuery): `unicode` equivalently in Python 2. Always use this query instead of `MatchQuery` when matching on BLOB values. """ + def __init__(self, field, pattern): - super(BytesQuery, self).__init__(field, pattern) + super().__init__(field, pattern) # Use a buffer/memoryview representation of the pattern for SQLite # matching. This instructs SQLite to treat the blob as binary # rather than encoded Unicode. - if isinstance(self.pattern, (six.text_type, bytes)): - if isinstance(self.pattern, six.text_type): + if isinstance(self.pattern, (str, bytes)): + if isinstance(self.pattern, str): self.pattern = self.pattern.encode('utf-8') - self.buf_pattern = buffer(self.pattern) - elif isinstance(self.pattern, buffer): + self.buf_pattern = memoryview(self.pattern) + elif isinstance(self.pattern, memoryview): self.buf_pattern = self.pattern self.pattern = bytes(self.pattern) @@ -270,6 +271,7 @@ class NumericQuery(FieldQuery): Raises InvalidQueryError when the pattern does not represent an int or a float. """ + def _convert(self, s): """Convert a string to a numeric type (float or int). @@ -285,10 +287,10 @@ class NumericQuery(FieldQuery): try: return float(s) except ValueError: - raise InvalidQueryArgumentTypeError(s, u"an int or a float") + raise InvalidQueryArgumentValueError(s, "an int or a float") def __init__(self, field, pattern, fast=True): - super(NumericQuery, self).__init__(field, pattern, fast) + super().__init__(field, pattern, fast) parts = pattern.split('..', 1) if len(parts) == 1: @@ -306,7 +308,7 @@ class NumericQuery(FieldQuery): if self.field not in item: return False value = item[self.field] - if isinstance(value, six.string_types): + if isinstance(value, str): value = self._convert(value) if self.point is not None: @@ -323,20 +325,21 @@ class NumericQuery(FieldQuery): return self.field + '=?', (self.point,) else: if self.rangemin is not None and self.rangemax is not None: - return (u'{0} >= ? AND {0} <= ?'.format(self.field), + return ('{0} >= ? AND {0} <= ?'.format(self.field), (self.rangemin, self.rangemax)) elif self.rangemin is not None: - return u'{0} >= ?'.format(self.field), (self.rangemin,) + return f'{self.field} >= ?', (self.rangemin,) elif self.rangemax is not None: - return u'{0} <= ?'.format(self.field), (self.rangemax,) + return f'{self.field} <= ?', (self.rangemax,) else: - return u'1', () + return '1', () class CollectionQuery(Query): """An abstract query class that aggregates other queries. Can be indexed like a list to access the sub-queries. """ + def __init__(self, subqueries=()): self.subqueries = subqueries @@ -374,7 +377,7 @@ class CollectionQuery(Query): return "{0.__class__.__name__}({0.subqueries!r})".format(self) def __eq__(self, other): - return super(CollectionQuery, self).__eq__(other) and \ + return super().__eq__(other) and \ self.subqueries == other.subqueries def __hash__(self): @@ -389,6 +392,7 @@ class AnyFieldQuery(CollectionQuery): any field. The individual field query class is provided to the constructor. """ + def __init__(self, pattern, fields, cls): self.pattern = pattern self.fields = fields @@ -397,7 +401,7 @@ class AnyFieldQuery(CollectionQuery): subqueries = [] for field in self.fields: subqueries.append(cls(field, pattern, True)) - super(AnyFieldQuery, self).__init__(subqueries) + super().__init__(subqueries) def clause(self): return self.clause_with_joiner('or') @@ -413,7 +417,7 @@ class AnyFieldQuery(CollectionQuery): "{0.query_class.__name__})".format(self)) def __eq__(self, other): - return super(AnyFieldQuery, self).__eq__(other) and \ + return super().__eq__(other) and \ self.query_class == other.query_class def __hash__(self): @@ -424,6 +428,7 @@ class MutableCollectionQuery(CollectionQuery): """A collection query whose subqueries may be modified after the query is initialized. """ + def __setitem__(self, key, value): self.subqueries[key] = value @@ -433,33 +438,36 @@ class MutableCollectionQuery(CollectionQuery): class AndQuery(MutableCollectionQuery): """A conjunction of a list of other queries.""" + def clause(self): return self.clause_with_joiner('and') def match(self, item): - return all([q.match(item) for q in self.subqueries]) + return all(q.match(item) for q in self.subqueries) class OrQuery(MutableCollectionQuery): """A conjunction of a list of other queries.""" + def clause(self): return self.clause_with_joiner('or') def match(self, item): - return any([q.match(item) for q in self.subqueries]) + return any(q.match(item) for q in self.subqueries) class NotQuery(Query): """A query that matches the negation of its `subquery`, as a shorcut for performing `not(subquery)` without using regular expressions. """ + def __init__(self, subquery): self.subquery = subquery def clause(self): clause, subvals = self.subquery.clause() if clause: - return 'not ({0})'.format(clause), subvals + return f'not ({clause})', subvals else: # If there is no clause, there is nothing to negate. All the logic # is handled by match() for slow queries. @@ -472,7 +480,7 @@ class NotQuery(Query): return "{0.__class__.__name__}({0.subquery!r})".format(self) def __eq__(self, other): - return super(NotQuery, self).__eq__(other) and \ + return super().__eq__(other) and \ self.subquery == other.subquery def __hash__(self): @@ -481,6 +489,7 @@ class NotQuery(Query): class TrueQuery(Query): """A query that always matches.""" + def clause(self): return '1', () @@ -490,6 +499,7 @@ class TrueQuery(Query): class FalseQuery(Query): """A query that never matches.""" + def clause(self): return '0', () @@ -526,42 +536,88 @@ def _parse_periods(pattern): return (start, end) -class Period(object): +class Period: """A period of time given by a date, time and precision. Example: 2014-01-01 10:50:30 with precision 'month' represents all instants of time during January 2014. """ - precisions = ('year', 'month', 'day') - date_formats = ('%Y', '%Y-%m', '%Y-%m-%d') + precisions = ('year', 'month', 'day', 'hour', 'minute', 'second') + date_formats = ( + ('%Y',), # year + ('%Y-%m',), # month + ('%Y-%m-%d',), # day + ('%Y-%m-%dT%H', '%Y-%m-%d %H'), # hour + ('%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M'), # minute + ('%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S') # second + ) + relative_units = {'y': 365, 'm': 30, 'w': 7, 'd': 1} + relative_re = '(?P[+|-]?)(?P[0-9]+)' + \ + '(?P[y|m|w|d])' def __init__(self, date, precision): """Create a period with the given date (a `datetime` object) and - precision (a string, one of "year", "month", or "day"). + precision (a string, one of "year", "month", "day", "hour", "minute", + or "second"). """ if precision not in Period.precisions: - raise ValueError(u'Invalid precision {0}'.format(precision)) + raise ValueError(f'Invalid precision {precision}') self.date = date self.precision = precision @classmethod def parse(cls, string): """Parse a date and return a `Period` object or `None` if the - string is empty. + string is empty, or raise an InvalidQueryArgumentValueError if + the string cannot be parsed to a date. + + The date may be absolute or relative. Absolute dates look like + `YYYY`, or `YYYY-MM-DD`, or `YYYY-MM-DD HH:MM:SS`, etc. Relative + dates have three parts: + + - Optionally, a ``+`` or ``-`` sign indicating the future or the + past. The default is the future. + - A number: how much to add or subtract. + - A letter indicating the unit: days, weeks, months or years + (``d``, ``w``, ``m`` or ``y``). A "month" is exactly 30 days + and a "year" is exactly 365 days. """ + + def find_date_and_format(string): + for ord, format in enumerate(cls.date_formats): + for format_option in format: + try: + date = datetime.strptime(string, format_option) + return date, ord + except ValueError: + # Parsing failed. + pass + return (None, None) + if not string: return None - ordinal = string.count('-') - if ordinal >= len(cls.date_formats): - # Too many components. - return None - date_format = cls.date_formats[ordinal] - try: - date = datetime.strptime(string, date_format) - except ValueError: - # Parsing failed. - return None + + # Check for a relative date. + match_dq = re.match(cls.relative_re, string) + if match_dq: + sign = match_dq.group('sign') + quantity = match_dq.group('quantity') + timespan = match_dq.group('timespan') + + # Add or subtract the given amount of time from the current + # date. + multiplier = -1 if sign == '-' else 1 + days = cls.relative_units[timespan] + date = datetime.now() + \ + timedelta(days=int(quantity) * days) * multiplier + return cls(date, cls.precisions[5]) + + # Check for an absolute date. + date, ordinal = find_date_and_format(string) + if date is None: + raise InvalidQueryArgumentValueError(string, + 'a valid date/time string') precision = cls.precisions[ordinal] return cls(date, precision) @@ -580,11 +636,17 @@ class Period(object): return date.replace(year=date.year + 1, month=1) elif 'day' == precision: return date + timedelta(days=1) + elif 'hour' == precision: + return date + timedelta(hours=1) + elif 'minute' == precision: + return date + timedelta(minutes=1) + elif 'second' == precision: + return date + timedelta(seconds=1) else: - raise ValueError(u'unhandled precision {0}'.format(precision)) + raise ValueError(f'unhandled precision {precision}') -class DateInterval(object): +class DateInterval: """A closed-open interval of dates. A left endpoint of None means since the beginning of time. @@ -593,7 +655,7 @@ class DateInterval(object): def __init__(self, start, end): if start is not None and end is not None and not start < end: - raise ValueError(u"start date {0} is not before end date {1}" + raise ValueError("start date {} is not before end date {}" .format(start, end)) self.start = start self.end = end @@ -614,7 +676,7 @@ class DateInterval(object): return True def __str__(self): - return '[{0}, {1})'.format(self.start, self.end) + return f'[{self.start}, {self.end})' class DateQuery(FieldQuery): @@ -626,8 +688,9 @@ class DateQuery(FieldQuery): The value of a date field can be matched against a date interval by using an ellipsis interval syntax similar to that of NumericQuery. """ + def __init__(self, field, pattern, fast=True): - super(DateQuery, self).__init__(field, pattern, fast) + super().__init__(field, pattern, fast) start, end = _parse_periods(pattern) self.interval = DateInterval.from_periods(start, end) @@ -635,7 +698,7 @@ class DateQuery(FieldQuery): if self.field not in item: return False timestamp = float(item[self.field]) - date = datetime.utcfromtimestamp(timestamp) + date = datetime.fromtimestamp(timestamp) return self.interval.contains(date) _clause_tmpl = "{0} {1} ?" @@ -669,6 +732,7 @@ class DurationQuery(NumericQuery): Raises InvalidQueryError when the pattern does not represent an int, float or M:SS time interval. """ + def _convert(self, s): """Convert a M:SS or numeric string to a float. @@ -683,14 +747,14 @@ class DurationQuery(NumericQuery): try: return float(s) except ValueError: - raise InvalidQueryArgumentTypeError( + raise InvalidQueryArgumentValueError( s, - u"a M:SS string or a float") + "a M:SS string or a float") # Sorting. -class Sort(object): +class Sort: """An abstract class representing a sort operation for a query into the item database. """ @@ -777,13 +841,13 @@ class MultipleSort(Sort): return items def __repr__(self): - return 'MultipleSort({!r})'.format(self.sorts) + return f'MultipleSort({self.sorts!r})' def __hash__(self): return hash(tuple(self.sorts)) def __eq__(self, other): - return super(MultipleSort, self).__eq__(other) and \ + return super().__eq__(other) and \ self.sorts == other.sorts @@ -791,6 +855,7 @@ class FieldSort(Sort): """An abstract sort criterion that orders by a specific field (of any kind). """ + def __init__(self, field, ascending=True, case_insensitive=True): self.field = field self.ascending = ascending @@ -803,14 +868,14 @@ class FieldSort(Sort): def key(item): field_val = item.get(self.field, '') - if self.case_insensitive and isinstance(field_val, six.text_type): + if self.case_insensitive and isinstance(field_val, str): field_val = field_val.lower() return field_val return sorted(objs, key=key, reverse=not self.ascending) def __repr__(self): - return '<{0}: {1}{2}>'.format( + return '<{}: {}{}>'.format( type(self).__name__, self.field, '+' if self.ascending else '-', @@ -820,7 +885,7 @@ class FieldSort(Sort): return hash((self.field, self.ascending)) def __eq__(self, other): - return super(FieldSort, self).__eq__(other) and \ + return super().__eq__(other) and \ self.field == other.field and \ self.ascending == other.ascending @@ -828,6 +893,7 @@ class FieldSort(Sort): class FixedFieldSort(FieldSort): """Sort object to sort on a fixed field. """ + def order_clause(self): order = "ASC" if self.ascending else "DESC" if self.case_insensitive: @@ -837,19 +903,21 @@ class FixedFieldSort(FieldSort): 'ELSE {0} END)'.format(self.field) else: field = self.field - return "{0} {1}".format(field, order) + return f"{field} {order}" class SlowFieldSort(FieldSort): """A sort criterion by some model field other than a fixed field: i.e., a computed or flexible field. """ + def is_slow(self): return True class NullSort(Sort): """No sorting. Leave results unsorted.""" + def sort(self, items): return items diff --git a/lib/beets/dbcore/queryparse.py b/lib/beets/dbcore/queryparse.py old mode 100755 new mode 100644 index bc9cc77e..3bf02e4d --- a/lib/beets/dbcore/queryparse.py +++ b/lib/beets/dbcore/queryparse.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,12 +14,10 @@ """Parsing of strings into DBCore queries. """ -from __future__ import division, absolute_import, print_function import re import itertools from . import query -import beets PARSE_QUERY_PART_REGEX = re.compile( # Non-capturing optional segment for the keyword. @@ -89,7 +86,7 @@ def parse_query_part(part, query_classes={}, prefixes={}, assert match # Regex should always match negate = bool(match.group(1)) key = match.group(2) - term = match.group(3).replace('\:', ':') + term = match.group(3).replace('\\:', ':') # Check whether there's a prefix in the query and use the # corresponding query type. @@ -119,12 +116,13 @@ def construct_query_part(model_cls, prefixes, query_part): if not query_part: return query.TrueQuery() - # Use `model_cls` to build up a map from field names to `Query` - # classes. + # Use `model_cls` to build up a map from field (or query) names to + # `Query` classes. query_classes = {} for k, t in itertools.chain(model_cls._fields.items(), model_cls._types.items()): query_classes[k] = t.query + query_classes.update(model_cls._queries) # Non-field queries. # Parse the string. key, pattern, query_class, negate = \ @@ -137,26 +135,27 @@ def construct_query_part(model_cls, prefixes, query_part): # The query type matches a specific field, but none was # specified. So we use a version of the query that matches # any field. - q = query.AnyFieldQuery(pattern, model_cls._search_fields, - query_class) - if negate: - return query.NotQuery(q) - else: - return q + out_query = query.AnyFieldQuery(pattern, model_cls._search_fields, + query_class) else: # Non-field query type. - if negate: - return query.NotQuery(query_class(pattern)) - else: - return query_class(pattern) + out_query = query_class(pattern) - # Otherwise, this must be a `FieldQuery`. Use the field name to - # construct the query object. - key = key.lower() - q = query_class(key.lower(), pattern, key in model_cls._fields) + # Field queries get constructed according to the name of the field + # they are querying. + elif issubclass(query_class, query.FieldQuery): + key = key.lower() + out_query = query_class(key.lower(), pattern, key in model_cls._fields) + + # Non-field (named) query. + else: + out_query = query_class(pattern) + + # Apply negation. if negate: - return query.NotQuery(q) - return q + return query.NotQuery(out_query) + else: + return out_query def query_from_strings(query_cls, model_cls, prefixes, query_parts): @@ -172,11 +171,13 @@ def query_from_strings(query_cls, model_cls, prefixes, query_parts): return query_cls(subqueries) -def construct_sort_part(model_cls, part): +def construct_sort_part(model_cls, part, case_insensitive=True): """Create a `Sort` from a single string criterion. `model_cls` is the `Model` being queried. `part` is a single string - ending in ``+`` or ``-`` indicating the sort. + ending in ``+`` or ``-`` indicating the sort. `case_insensitive` + indicates whether or not the sort should be performed in a case + sensitive manner. """ assert part, "part must be a field name and + or -" field = part[:-1] @@ -185,7 +186,6 @@ def construct_sort_part(model_cls, part): assert direction in ('+', '-'), "part must end with + or -" is_ascending = direction == '+' - case_insensitive = beets.config['sort_case_insensitive'].get(bool) if field in model_cls._sorts: sort = model_cls._sorts[field](model_cls, is_ascending, case_insensitive) @@ -197,21 +197,23 @@ def construct_sort_part(model_cls, part): return sort -def sort_from_strings(model_cls, sort_parts): +def sort_from_strings(model_cls, sort_parts, case_insensitive=True): """Create a `Sort` from a list of sort criteria (strings). """ if not sort_parts: sort = query.NullSort() elif len(sort_parts) == 1: - sort = construct_sort_part(model_cls, sort_parts[0]) + sort = construct_sort_part(model_cls, sort_parts[0], case_insensitive) else: sort = query.MultipleSort() for part in sort_parts: - sort.add_sort(construct_sort_part(model_cls, part)) + sort.add_sort(construct_sort_part(model_cls, part, + case_insensitive)) return sort -def parse_sorted_query(model_cls, parts, prefixes={}): +def parse_sorted_query(model_cls, parts, prefixes={}, + case_insensitive=True): """Given a list of strings, create the `Query` and `Sort` that they represent. """ @@ -222,8 +224,8 @@ def parse_sorted_query(model_cls, parts, prefixes={}): # Split up query in to comma-separated subqueries, each representing # an AndQuery, which need to be joined together in one OrQuery subquery_parts = [] - for part in parts + [u',']: - if part.endswith(u','): + for part in parts + [',']: + if part.endswith(','): # Ensure we can catch "foo, bar" as well as "foo , bar" last_subquery_part = part[:-1] if last_subquery_part: @@ -237,8 +239,8 @@ def parse_sorted_query(model_cls, parts, prefixes={}): else: # Sort parts (1) end in + or -, (2) don't have a field, and # (3) consist of more than just the + or -. - if part.endswith((u'+', u'-')) \ - and u':' not in part \ + if part.endswith(('+', '-')) \ + and ':' not in part \ and len(part) > 1: sort_parts.append(part) else: @@ -246,5 +248,5 @@ def parse_sorted_query(model_cls, parts, prefixes={}): # Avoid needlessly wrapping single statements in an OR q = query.OrQuery(query_parts) if len(query_parts) > 1 else query_parts[0] - s = sort_from_strings(model_cls, sort_parts) + s = sort_from_strings(model_cls, sort_parts, case_insensitive) return q, s diff --git a/lib/beets/dbcore/types.py b/lib/beets/dbcore/types.py old mode 100755 new mode 100644 index b909904b..40f6a080 --- a/lib/beets/dbcore/types.py +++ b/lib/beets/dbcore/types.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,25 +14,20 @@ """Representation of type information for DBCore model fields. """ -from __future__ import division, absolute_import, print_function from . import query from beets.util import str2bool -import six - -if not six.PY2: - buffer = memoryview # sqlite won't accept memoryview in python 2 # Abstract base. -class Type(object): +class Type: """An object encapsulating the type of a model field. Includes information about how to store, query, format, and parse a given field. """ - sql = u'TEXT' + sql = 'TEXT' """The SQLite column type for the value. """ @@ -41,7 +35,7 @@ class Type(object): """The `Query` subclass to be used when querying the field. """ - model_type = six.text_type + model_type = str """The Python type that is used to represent the value in the model. The model is guaranteed to return a value of this type if the field @@ -63,11 +57,11 @@ class Type(object): value = self.null # `self.null` might be `None` if value is None: - value = u'' + value = '' if isinstance(value, bytes): value = value.decode('utf-8', 'ignore') - return six.text_type(value) + return str(value) def parse(self, string): """Parse a (possibly human-written) string and return the @@ -97,16 +91,16 @@ class Type(object): For fixed fields the type of `value` is determined by the column type affinity given in the `sql` property and the SQL to Python mapping of the database adapter. For more information see: - http://www.sqlite.org/datatype3.html + https://www.sqlite.org/datatype3.html https://docs.python.org/2/library/sqlite3.html#sqlite-and-python-types Flexible fields have the type affinity `TEXT`. This means the - `sql_value` is either a `buffer`/`memoryview` or a `unicode` object` + `sql_value` is either a `memoryview` or a `unicode` object` and the method must handle these in addition. """ - if isinstance(sql_value, buffer): + if isinstance(sql_value, memoryview): sql_value = bytes(sql_value).decode('utf-8', 'ignore') - if isinstance(sql_value, six.text_type): + if isinstance(sql_value, str): return self.parse(sql_value) else: return self.normalize(sql_value) @@ -127,10 +121,18 @@ class Default(Type): class Integer(Type): """A basic integer type. """ - sql = u'INTEGER' + sql = 'INTEGER' query = query.NumericQuery model_type = int + def normalize(self, value): + try: + return self.model_type(round(float(value))) + except ValueError: + return self.null + except TypeError: + return self.null + class PaddedInt(Integer): """An integer field that is formatted with a given number of digits, @@ -140,19 +142,25 @@ class PaddedInt(Integer): self.digits = digits def format(self, value): - return u'{0:0{1}d}'.format(value or 0, self.digits) + return '{0:0{1}d}'.format(value or 0, self.digits) + + +class NullPaddedInt(PaddedInt): + """Same as `PaddedInt`, but does not normalize `None` to `0.0`. + """ + null = None class ScaledInt(Integer): """An integer whose formatting operation scales the number by a constant and adds a suffix. Good for units with large magnitudes. """ - def __init__(self, unit, suffix=u''): + def __init__(self, unit, suffix=''): self.unit = unit self.suffix = suffix def format(self, value): - return u'{0}{1}'.format((value or 0) // self.unit, self.suffix) + return '{}{}'.format((value or 0) // self.unit, self.suffix) class Id(Integer): @@ -163,18 +171,22 @@ class Id(Integer): def __init__(self, primary=True): if primary: - self.sql = u'INTEGER PRIMARY KEY' + self.sql = 'INTEGER PRIMARY KEY' class Float(Type): - """A basic floating-point type. + """A basic floating-point type. The `digits` parameter specifies how + many decimal places to use in the human-readable representation. """ - sql = u'REAL' + sql = 'REAL' query = query.NumericQuery model_type = float + def __init__(self, digits=1): + self.digits = digits + def format(self, value): - return u'{0:.1f}'.format(value or 0.0) + return '{0:.{1}f}'.format(value or 0, self.digits) class NullFloat(Float): @@ -186,19 +198,25 @@ class NullFloat(Float): class String(Type): """A Unicode string type. """ - sql = u'TEXT' + sql = 'TEXT' query = query.SubstringQuery + def normalize(self, value): + if value is None: + return self.null + else: + return self.model_type(value) + class Boolean(Type): """A boolean type. """ - sql = u'INTEGER' + sql = 'INTEGER' query = query.BooleanQuery model_type = bool def format(self, value): - return six.text_type(bool(value)) + return str(bool(value)) def parse(self, string): return str2bool(string) diff --git a/lib/beets/importer.py b/lib/beets/importer.py old mode 100755 new mode 100644 index 690a499f..561cedd2 --- a/lib/beets/importer.py +++ b/lib/beets/importer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -13,7 +12,6 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function """Provides the basic, interface-agnostic workflow for importing and autotagging music files. @@ -37,10 +35,10 @@ from beets import dbcore from beets import plugins from beets import util from beets import config -from beets.util import pipeline, sorted_walk, ancestry +from beets.util import pipeline, sorted_walk, ancestry, MoveOperation from beets.util import syspath, normpath, displayable_path from enum import Enum -from beets import mediafile +import mediafile action = Enum('action', ['SKIP', 'ASIS', 'TRACKS', 'APPLY', 'ALBUMS', 'RETAG']) @@ -75,7 +73,7 @@ def _open_state(): # unpickling, including ImportError. We use a catch-all # exception to avoid enumerating them all (the docs don't even have a # full list!). - log.debug(u'state file could not be read: {0}', exc) + log.debug('state file could not be read: {0}', exc) return {} @@ -84,8 +82,8 @@ def _save_state(state): try: with open(config['statefile'].as_filename(), 'wb') as f: pickle.dump(state, f) - except IOError as exc: - log.error(u'state file could not be written: {0}', exc) + except OSError as exc: + log.error('state file could not be written: {0}', exc) # Utilities for reading and writing the beets progress file, which @@ -174,10 +172,11 @@ def history_get(): # Abstract session class. -class ImportSession(object): +class ImportSession: """Controls an import action. Subclasses should implement methods to communicate with the user or otherwise make decisions. """ + def __init__(self, lib, loghandler, paths, query): """Create a session. `lib` is a Library object. `loghandler` is a logging.Handler. Either `paths` or `query` is non-null and indicates @@ -187,7 +186,9 @@ class ImportSession(object): self.logger = self._setup_logging(loghandler) self.paths = paths self.query = query - self._is_resuming = dict() + self._is_resuming = {} + self._merged_items = set() + self._merged_dirs = set() # Normalize the paths. if self.paths: @@ -220,19 +221,31 @@ class ImportSession(object): iconfig['resume'] = False iconfig['incremental'] = False - # Copy, move, link, and hardlink are mutually exclusive. + if iconfig['reflink']: + iconfig['reflink'] = iconfig['reflink'] \ + .as_choice(['auto', True, False]) + + # Copy, move, reflink, link, and hardlink are mutually exclusive. if iconfig['move']: iconfig['copy'] = False iconfig['link'] = False iconfig['hardlink'] = False + iconfig['reflink'] = False elif iconfig['link']: iconfig['copy'] = False iconfig['move'] = False iconfig['hardlink'] = False + iconfig['reflink'] = False elif iconfig['hardlink']: iconfig['copy'] = False iconfig['move'] = False iconfig['link'] = False + iconfig['reflink'] = False + elif iconfig['reflink']: + iconfig['copy'] = False + iconfig['move'] = False + iconfig['link'] = False + iconfig['hardlink'] = False # Only delete when copying. if not iconfig['copy']: @@ -244,7 +257,7 @@ class ImportSession(object): """Log a message about a given album to the importer log. The status should reflect the reason the album couldn't be tagged. """ - self.logger.info(u'{0} {1}', status, displayable_path(paths)) + self.logger.info('{0} {1}', status, displayable_path(paths)) def log_choice(self, task, duplicate=False): """Logs the task's current choice if it should be logged. If @@ -255,17 +268,17 @@ class ImportSession(object): if duplicate: # Duplicate: log all three choices (skip, keep both, and trump). if task.should_remove_duplicates: - self.tag_log(u'duplicate-replace', paths) + self.tag_log('duplicate-replace', paths) elif task.choice_flag in (action.ASIS, action.APPLY): - self.tag_log(u'duplicate-keep', paths) + self.tag_log('duplicate-keep', paths) elif task.choice_flag is (action.SKIP): - self.tag_log(u'duplicate-skip', paths) + self.tag_log('duplicate-skip', paths) else: # Non-duplicate: log "skip" and "asis" choices. if task.choice_flag is action.ASIS: - self.tag_log(u'asis', paths) + self.tag_log('asis', paths) elif task.choice_flag is action.SKIP: - self.tag_log(u'skip', paths) + self.tag_log('skip', paths) def should_resume(self, path): raise NotImplementedError @@ -282,7 +295,7 @@ class ImportSession(object): def run(self): """Run the import task. """ - self.logger.info(u'import started {0}', time.asctime()) + self.logger.info('import started {0}', time.asctime()) self.set_config(config['import']) # Set up the pipeline. @@ -311,6 +324,8 @@ class ImportSession(object): stages += [import_asis(self)] # Plugin stages. + for stage_func in plugins.early_import_stages(): + stages.append(plugin_stage(self, stage_func)) for stage_func in plugins.import_stages(): stages.append(plugin_stage(self, stage_func)) @@ -350,6 +365,24 @@ class ImportSession(object): self._history_dirs = history_get() return self._history_dirs + def already_merged(self, paths): + """Returns true if all the paths being imported were part of a merge + during previous tasks. + """ + for path in paths: + if path not in self._merged_items \ + and path not in self._merged_dirs: + return False + return True + + def mark_merged(self, paths): + """Mark paths and directories as merged for future reimport tasks. + """ + self._merged_items.update(paths) + dirs = {os.path.dirname(path) if os.path.isfile(path) else path + for path in paths} + self._merged_dirs.update(dirs) + def is_resuming(self, toppath): """Return `True` if user wants to resume import of this path. @@ -367,7 +400,7 @@ class ImportSession(object): # Either accept immediately or prompt for input to decide. if self.want_resume is True or \ self.should_resume(toppath): - log.warning(u'Resuming interrupted import of {0}', + log.warning('Resuming interrupted import of {0}', util.displayable_path(toppath)) self._is_resuming[toppath] = True else: @@ -377,11 +410,12 @@ class ImportSession(object): # The importer task class. -class BaseImportTask(object): +class BaseImportTask: """An abstract base class for importer tasks. Tasks flow through the importer pipeline. Each stage can update them. """ + def __init__(self, toppath, paths, items): """Create a task. The primary fields that define a task are: @@ -419,7 +453,7 @@ class ImportTask(BaseImportTask): from the `candidates` list. * `find_duplicates()` Returns a list of albums from `lib` with the - same artist and album name as the task. + same artist and album name as the task. * `apply_metadata()` Sets the attributes of the items from the task's `match` attribute. @@ -429,17 +463,22 @@ class ImportTask(BaseImportTask): * `manipulate_files()` Copy, move, and write files depending on the session configuration. + * `set_fields()` Sets the fields given at CLI or configuration to + the specified values. + * `finalize()` Update the import progress and cleanup the file system. """ + def __init__(self, toppath, paths, items): - super(ImportTask, self).__init__(toppath, paths, items) + super().__init__(toppath, paths, items) self.choice_flag = None self.cur_album = None self.cur_artist = None self.candidates = [] self.rec = None self.should_remove_duplicates = False + self.should_merge_duplicates = False self.is_album = True self.search_ids = [] # user-supplied candidate IDs. @@ -510,6 +549,10 @@ class ImportTask(BaseImportTask): def apply_metadata(self): """Copy metadata from match info to the items. """ + if config['import']['from_scratch']: + for item in self.match.mapping: + item.clear() + autotag.apply_metadata(self.match.info, self.match.mapping) def duplicate_items(self, lib): @@ -520,23 +563,45 @@ class ImportTask(BaseImportTask): def remove_duplicates(self, lib): duplicate_items = self.duplicate_items(lib) - log.debug(u'removing {0} old duplicated items', len(duplicate_items)) + log.debug('removing {0} old duplicated items', len(duplicate_items)) for item in duplicate_items: item.remove() if lib.directory in util.ancestry(item.path): - log.debug(u'deleting duplicate {0}', + log.debug('deleting duplicate {0}', util.displayable_path(item.path)) util.remove(item.path) util.prune_dirs(os.path.dirname(item.path), lib.directory) + def set_fields(self, lib): + """Sets the fields given at CLI or configuration to the specified + values, for both the album and all its items. + """ + items = self.imported_items() + for field, view in config['import']['set_fields'].items(): + value = view.get() + log.debug('Set field {1}={2} for {0}', + displayable_path(self.paths), + field, + value) + self.album[field] = value + for item in items: + item[field] = value + with lib.transaction(): + for item in items: + item.store() + self.album.store() + def finalize(self, session): """Save progress, clean up files, and emit plugin event. """ # Update progress. if session.want_resume: self.save_progress() - if session.config['incremental']: + if session.config['incremental'] and not ( + # Should we skip recording to incremental list? + self.skip and session.config['incremental_skip_later'] + ): self.save_history() self.cleanup(copy=session.config['copy'], @@ -609,17 +674,18 @@ class ImportTask(BaseImportTask): return [] duplicates = [] - task_paths = set(i.path for i in self.items if i) + task_paths = {i.path for i in self.items if i} duplicate_query = dbcore.AndQuery(( dbcore.MatchQuery('albumartist', artist), dbcore.MatchQuery('album', album), )) for album in lib.albums(duplicate_query): - # Check whether the album is identical in contents, in which - # case it is not a duplicate (will be replaced). - album_paths = set(i.path for i in album.items()) - if album_paths != task_paths: + # Check whether the album paths are all present in the task + # i.e. album is being completely re-imported by the task, + # in which case it is not a duplicate (will be replaced). + album_paths = {i.path for i in album.items()} + if not (album_paths <= task_paths): duplicates.append(album) return duplicates @@ -659,20 +725,28 @@ class ImportTask(BaseImportTask): for item in self.items: item.update(changes) - def manipulate_files(self, move=False, copy=False, write=False, - link=False, hardlink=False, session=None): + def manipulate_files(self, operation=None, write=False, session=None): + """ Copy, move, link, hardlink or reflink (depending on `operation`) the files + as well as write metadata. + + `operation` should be an instance of `util.MoveOperation`. + + If `write` is `True` metadata is written to the files. + """ + items = self.imported_items() # Save the original paths of all items for deletion and pruning # in the next step (finalization). self.old_paths = [item.path for item in items] for item in items: - if move or copy or link or hardlink: + if operation is not None: # In copy and link modes, treat re-imports specially: # move in-library files. (Out-of-library files are # copied/moved as usual). old_path = item.path - if (copy or link or hardlink) and self.replaced_items[item] \ - and session.lib.directory in util.ancestry(old_path): + if (operation != MoveOperation.MOVE + and self.replaced_items[item] + and session.lib.directory in util.ancestry(old_path)): item.move() # We moved the item, so remove the # now-nonexistent file from old_paths. @@ -680,7 +754,7 @@ class ImportTask(BaseImportTask): else: # A normal import. Just copy files and keep track of # old paths. - item.move(copy, link, hardlink) + item.move(operation) if write and (self.apply or self.choice_flag == action.RETAG): item.try_write() @@ -699,6 +773,8 @@ class ImportTask(BaseImportTask): self.record_replaced(lib) self.remove_replaced(lib) self.album = lib.add_album(self.imported_items()) + if 'data_source' in self.imported_items()[0]: + self.album.data_source = self.imported_items()[0].data_source self.reimport_metadata(lib) def record_replaced(self, lib): @@ -717,7 +793,7 @@ class ImportTask(BaseImportTask): if (not dup_item.album_id or dup_item.album_id in replaced_album_ids): continue - replaced_album = dup_item.get_album() + replaced_album = dup_item._cached_album if replaced_album: replaced_album_ids.add(dup_item.album_id) self.replaced_albums[replaced_album.path] = replaced_album @@ -734,8 +810,8 @@ class ImportTask(BaseImportTask): self.album.artpath = replaced_album.artpath self.album.store() log.debug( - u'Reimported album: added {0}, flexible ' - u'attributes {1} from album {2} for {3}', + 'Reimported album: added {0}, flexible ' + 'attributes {1} from album {2} for {3}', self.album.added, replaced_album._values_flex.keys(), replaced_album.id, @@ -748,16 +824,16 @@ class ImportTask(BaseImportTask): if dup_item.added and dup_item.added != item.added: item.added = dup_item.added log.debug( - u'Reimported item added {0} ' - u'from item {1} for {2}', + 'Reimported item added {0} ' + 'from item {1} for {2}', item.added, dup_item.id, displayable_path(item.path) ) item.update(dup_item._values_flex) log.debug( - u'Reimported item flexible attributes {0} ' - u'from item {1} for {2}', + 'Reimported item flexible attributes {0} ' + 'from item {1} for {2}', dup_item._values_flex.keys(), dup_item.id, displayable_path(item.path) @@ -770,10 +846,10 @@ class ImportTask(BaseImportTask): """ for item in self.imported_items(): for dup_item in self.replaced_items[item]: - log.debug(u'Replacing item {0}: {1}', + log.debug('Replacing item {0}: {1}', dup_item.id, displayable_path(item.path)) dup_item.remove() - log.debug(u'{0} of {1} items replaced', + log.debug('{0} of {1} items replaced', sum(bool(l) for l in self.replaced_items.values()), len(self.imported_items())) @@ -811,7 +887,7 @@ class SingletonImportTask(ImportTask): """ def __init__(self, toppath, item): - super(SingletonImportTask, self).__init__(toppath, [item.path], [item]) + super().__init__(toppath, [item.path], [item]) self.item = item self.is_album = False self.paths = [item.path] @@ -877,6 +953,19 @@ class SingletonImportTask(ImportTask): def reload(self): self.item.load() + def set_fields(self, lib): + """Sets the fields given at CLI or configuration to the specified + values, for the singleton item. + """ + for field, view in config['import']['set_fields'].items(): + value = view.get() + log.debug('Set field {1}={2} for {0}', + displayable_path(self.paths), + field, + value) + self.item[field] = value + self.item.store() + # FIXME The inheritance relationships are inverted. This is why there # are so many methods which pass. More responsibility should be delegated to @@ -891,7 +980,7 @@ class SentinelImportTask(ImportTask): """ def __init__(self, toppath, paths): - super(SentinelImportTask, self).__init__(toppath, paths, ()) + super().__init__(toppath, paths, ()) # TODO Remove the remaining attributes eventually self.should_remove_duplicates = False self.is_album = True @@ -935,7 +1024,7 @@ class ArchiveImportTask(SentinelImportTask): """ def __init__(self, toppath): - super(ArchiveImportTask, self).__init__(toppath, ()) + super().__init__(toppath, ()) self.extracted = False @classmethod @@ -964,14 +1053,20 @@ class ArchiveImportTask(SentinelImportTask): cls._handlers = [] from zipfile import is_zipfile, ZipFile cls._handlers.append((is_zipfile, ZipFile)) - from tarfile import is_tarfile, TarFile - cls._handlers.append((is_tarfile, TarFile)) + import tarfile + cls._handlers.append((tarfile.is_tarfile, tarfile.open)) try: from rarfile import is_rarfile, RarFile except ImportError: pass else: cls._handlers.append((is_rarfile, RarFile)) + try: + from py7zr import is_7zfile, SevenZipFile + except ImportError: + pass + else: + cls._handlers.append((is_7zfile, SevenZipFile)) return cls._handlers @@ -979,7 +1074,7 @@ class ArchiveImportTask(SentinelImportTask): """Removes the temporary directory the archive was extracted to. """ if self.extracted: - log.debug(u'Removing extracted directory: {0}', + log.debug('Removing extracted directory: {0}', displayable_path(self.toppath)) shutil.rmtree(self.toppath) @@ -991,9 +1086,9 @@ class ArchiveImportTask(SentinelImportTask): if path_test(util.py3_path(self.toppath)): break + extract_to = mkdtemp() + archive = handler_class(util.py3_path(self.toppath), mode='r') try: - extract_to = mkdtemp() - archive = handler_class(util.py3_path(self.toppath), mode='r') archive.extractall(extract_to) finally: archive.close() @@ -1001,10 +1096,11 @@ class ArchiveImportTask(SentinelImportTask): self.toppath = extract_to -class ImportTaskFactory(object): +class ImportTaskFactory: """Generate album and singleton import tasks for all media files indicated by a path. """ + def __init__(self, toppath, session): """Create a new task factory. @@ -1042,14 +1138,12 @@ class ImportTaskFactory(object): if self.session.config['singletons']: for path in paths: tasks = self._create(self.singleton(path)) - for task in tasks: - yield task + yield from tasks yield self.sentinel(dirs) else: tasks = self._create(self.album(paths, dirs)) - for task in tasks: - yield task + yield from tasks # Produce the final sentinel for this toppath to indicate that # it is finished. This is usually just a SentinelImportTask, but @@ -1097,7 +1191,7 @@ class ImportTaskFactory(object): """Return a `SingletonImportTask` for the music file. """ if self.session.already_imported(self.toppath, [path]): - log.debug(u'Skipping previously-imported path: {0}', + log.debug('Skipping previously-imported path: {0}', displayable_path(path)) self.skipped += 1 return None @@ -1118,10 +1212,10 @@ class ImportTaskFactory(object): return None if dirs is None: - dirs = list(set(os.path.dirname(p) for p in paths)) + dirs = list({os.path.dirname(p) for p in paths}) if self.session.already_imported(self.toppath, dirs): - log.debug(u'Skipping previously-imported path: {0}', + log.debug('Skipping previously-imported path: {0}', displayable_path(dirs)) self.skipped += 1 return None @@ -1151,22 +1245,22 @@ class ImportTaskFactory(object): if not (self.session.config['move'] or self.session.config['copy']): - log.warning(u"Archive importing requires either " - u"'copy' or 'move' to be enabled.") + log.warning("Archive importing requires either " + "'copy' or 'move' to be enabled.") return - log.debug(u'Extracting archive: {0}', + log.debug('Extracting archive: {0}', displayable_path(self.toppath)) archive_task = ArchiveImportTask(self.toppath) try: archive_task.extract() except Exception as exc: - log.error(u'extraction failed: {0}', exc) + log.error('extraction failed: {0}', exc) return # Now read albums from the extracted directory. self.toppath = archive_task.toppath - log.debug(u'Archive extracted to: {0}', self.toppath) + log.debug('Archive extracted to: {0}', self.toppath) return archive_task def read_item(self, path): @@ -1182,12 +1276,33 @@ class ImportTaskFactory(object): # Silently ignore non-music files. pass elif isinstance(exc.reason, mediafile.UnreadableFileError): - log.warning(u'unreadable file: {0}', displayable_path(path)) + log.warning('unreadable file: {0}', displayable_path(path)) else: - log.error(u'error reading {0}: {1}', + log.error('error reading {0}: {1}', displayable_path(path), exc) +# Pipeline utilities + +def _freshen_items(items): + # Clear IDs from re-tagged items so they appear "fresh" when + # we add them back to the library. + for item in items: + item.id = None + item.album_id = None + + +def _extend_pipeline(tasks, *stages): + # Return pipeline extension for stages with list of tasks + if type(tasks) == list: + task_iter = iter(tasks) + else: + task_iter = tasks + + ipl = pipeline.Pipeline([task_iter] + list(stages)) + return pipeline.multiple(ipl.pull()) + + # Full-album pipeline stages. def read_tasks(session): @@ -1202,17 +1317,16 @@ def read_tasks(session): # Generate tasks. task_factory = ImportTaskFactory(toppath, session) - for t in task_factory.tasks(): - yield t + yield from task_factory.tasks() skipped += task_factory.skipped if not task_factory.imported: - log.warning(u'No files imported from {0}', + log.warning('No files imported from {0}', displayable_path(toppath)) # Show skipped directories (due to incremental/resume). if skipped: - log.info(u'Skipped {0} paths.', skipped) + log.info('Skipped {0} paths.', skipped) def query_tasks(session): @@ -1230,15 +1344,10 @@ def query_tasks(session): else: # Search for albums. for album in session.lib.albums(session.query): - log.debug(u'yielding album {0}: {1} - {2}', + log.debug('yielding album {0}: {1} - {2}', album.id, album.albumartist, album.album) items = list(album.items()) - - # Clear IDs from re-tagged items so they appear "fresh" when - # we add them back to the library. - for item in items: - item.id = None - item.album_id = None + _freshen_items(items) task = ImportTask(None, [album.item_dir()], items) for task in task.handle_created(session): @@ -1258,7 +1367,7 @@ def lookup_candidates(session, task): return plugins.send('import_task_start', session=session, task=task) - log.debug(u'Looking up: {0}', displayable_path(task.paths)) + log.debug('Looking up: {0}', displayable_path(task.paths)) # Restrict the initial lookup to IDs specified by the user via the -m # option. Currently all the IDs are passed onto the tasks directly. @@ -1284,6 +1393,9 @@ def user_query(session, task): if task.skip: return task + if session.already_merged(task.paths): + return pipeline.BUBBLE + # Ask the user for a choice. task.choose_match(session) plugins.send('import_task_choice', session=session, task=task) @@ -1294,28 +1406,41 @@ def user_query(session, task): def emitter(task): for item in task.items: task = SingletonImportTask(task.toppath, item) - for new_task in task.handle_created(session): - yield new_task + yield from task.handle_created(session) yield SentinelImportTask(task.toppath, task.paths) - ipl = pipeline.Pipeline([ - emitter(task), - lookup_candidates(session), - user_query(session), - ]) - return pipeline.multiple(ipl.pull()) + return _extend_pipeline(emitter(task), + lookup_candidates(session), + user_query(session)) # As albums: group items by albums and create task for each album if task.choice_flag is action.ALBUMS: - ipl = pipeline.Pipeline([ - iter([task]), - group_albums(session), - lookup_candidates(session), - user_query(session) - ]) - return pipeline.multiple(ipl.pull()) + return _extend_pipeline([task], + group_albums(session), + lookup_candidates(session), + user_query(session)) resolve_duplicates(session, task) + + if task.should_merge_duplicates: + # Create a new task for tagging the current items + # and duplicates together + duplicate_items = task.duplicate_items(session.lib) + + # Duplicates would be reimported so make them look "fresh" + _freshen_items(duplicate_items) + duplicate_paths = [item.path for item in duplicate_items] + + # Record merged paths in the session so they are not reimported + session.mark_merged(duplicate_paths) + + merged_task = ImportTask(None, task.paths + duplicate_paths, + task.items + duplicate_items) + + return _extend_pipeline([merged_task], + lookup_candidates(session), + user_query(session)) + apply_choice(session, task) return task @@ -1327,28 +1452,32 @@ def resolve_duplicates(session, task): if task.choice_flag in (action.ASIS, action.APPLY, action.RETAG): found_duplicates = task.find_duplicates(session.lib) if found_duplicates: - log.debug(u'found duplicates: {}'.format( + log.debug('found duplicates: {}'.format( [o.id for o in found_duplicates] )) # Get the default action to follow from config. duplicate_action = config['import']['duplicate_action'].as_choice({ - u'skip': u's', - u'keep': u'k', - u'remove': u'r', - u'ask': u'a', + 'skip': 's', + 'keep': 'k', + 'remove': 'r', + 'merge': 'm', + 'ask': 'a', }) - log.debug(u'default action for duplicates: {0}', duplicate_action) + log.debug('default action for duplicates: {0}', duplicate_action) - if duplicate_action == u's': + if duplicate_action == 's': # Skip new. task.set_choice(action.SKIP) - elif duplicate_action == u'k': + elif duplicate_action == 'k': # Keep both. Do nothing; leave the choice intact. pass - elif duplicate_action == u'r': + elif duplicate_action == 'r': # Remove old. task.should_remove_duplicates = True + elif duplicate_action == 'm': + # Merge duplicates together + task.should_merge_duplicates = True else: # No default action set; ask the session. session.resolve_duplicate(task, found_duplicates) @@ -1366,7 +1495,7 @@ def import_asis(session, task): if task.skip: return - log.info(u'{}', displayable_path(task.paths)) + log.info('{}', displayable_path(task.paths)) task.set_choice(action.ASIS) apply_choice(session, task) @@ -1385,6 +1514,14 @@ def apply_choice(session, task): task.add(session.lib) + # If ``set_fields`` is set, set those fields to the + # configured values. + # NOTE: This cannot be done before the ``task.add()`` call above, + # because then the ``ImportTask`` won't have an `album` for which + # it can set the fields. + if config['import']['set_fields']: + task.set_fields(session.lib) + @pipeline.mutator_stage def plugin_stage(session, func, task): @@ -1413,12 +1550,22 @@ def manipulate_files(session, task): if task.should_remove_duplicates: task.remove_duplicates(session.lib) + if session.config['move']: + operation = MoveOperation.MOVE + elif session.config['copy']: + operation = MoveOperation.COPY + elif session.config['link']: + operation = MoveOperation.LINK + elif session.config['hardlink']: + operation = MoveOperation.HARDLINK + elif session.config['reflink']: + operation = MoveOperation.REFLINK + else: + operation = None + task.manipulate_files( - move=session.config['move'], - copy=session.config['copy'], + operation, write=session.config['write'], - link=session.config['link'], - hardlink=session.config['hardlink'], session=session, ) @@ -1431,11 +1578,11 @@ def log_files(session, task): """A coroutine (pipeline stage) to log each file to be imported. """ if isinstance(task, SingletonImportTask): - log.info(u'Singleton: {0}', displayable_path(task.item['path'])) + log.info('Singleton: {0}', displayable_path(task.item['path'])) elif task.items: - log.info(u'Album: {0}', displayable_path(task.paths[0])) + log.info('Album: {0}', displayable_path(task.paths[0])) for item in task.items: - log.info(u' {0}', displayable_path(item['path'])) + log.info(' {0}', displayable_path(item['path'])) def group_albums(session): @@ -1469,6 +1616,14 @@ MULTIDISC_MARKERS = (br'dis[ck]', br'cd') MULTIDISC_PAT_FMT = br'^(.*%s[\W_]*)\d' +def is_subdir_of_any_in_list(path, dirs): + """Returns True if path os a subdirectory of any directory in dirs + (a list). In other case, returns False. + """ + ancestors = ancestry(path) + return any(d in ancestors for d in dirs) + + def albums_in_dir(path): """Recursively searches the given directory and returns an iterable of (paths, items) where paths is a list of directories and items is @@ -1488,7 +1643,7 @@ def albums_in_dir(path): # and add the current directory. If so, just add the directory # and move on to the next directory. If not, stop collapsing. if collapse_paths: - if (not collapse_pat and collapse_paths[0] in ancestry(root)) or \ + if (is_subdir_of_any_in_list(root, collapse_paths)) or \ (collapse_pat and collapse_pat.match(os.path.basename(root))): # Still collapsing. diff --git a/lib/beets/library.py b/lib/beets/library.py old mode 100755 new mode 100644 index b263ecd6..888836cd --- a/lib/beets/library.py +++ b/lib/beets/library.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,33 +14,30 @@ """The core data store and collection logic for beets. """ -from __future__ import division, absolute_import, print_function import os import sys import unicodedata import time import re -import six +import string +import shlex from beets import logging -from beets.mediafile import MediaFile, UnreadableFileError +from mediafile import MediaFile, UnreadableFileError from beets import plugins from beets import util -from beets.util import bytestring_path, syspath, normpath, samefile -from beets.util.functemplate import Template +from beets.util import bytestring_path, syspath, normpath, samefile, \ + MoveOperation, lazy_property +from beets.util.functemplate import template, Template from beets import dbcore from beets.dbcore import types import beets # To use the SQLite "blob" type, it doesn't suffice to provide a byte -# string; SQLite treats that as encoded text. Wrapping it in a `buffer` or a -# `memoryview`, depending on the Python version, tells it that we -# actually mean non-text data. -if six.PY2: - BLOB_TYPE = buffer # noqa: F821 -else: - BLOB_TYPE = memoryview +# string; SQLite treats that as encoded text. Wrapping it in a +# `memoryview` tells it that we actually mean non-text data. +BLOB_TYPE = memoryview log = logging.getLogger('beets') @@ -63,7 +59,7 @@ class PathQuery(dbcore.FieldQuery): `case_sensitive` can be a bool or `None`, indicating that the behavior should depend on the filesystem. """ - super(PathQuery, self).__init__(field, pattern, fast) + super().__init__(field, pattern, fast) # By default, the case sensitivity depends on the filesystem # that the query path is located on. @@ -144,10 +140,27 @@ class DateType(types.Float): class PathType(types.Type): - sql = u'BLOB' + """A dbcore type for filesystem paths. These are represented as + `bytes` objects, in keeping with the Unix filesystem abstraction. + """ + + sql = 'BLOB' query = PathQuery model_type = bytes + def __init__(self, nullable=False): + """Create a path type object. `nullable` controls whether the + type may be missing, i.e., None. + """ + self.nullable = nullable + + @property + def null(self): + if self.nullable: + return None + else: + return b'' + def format(self, value): return util.displayable_path(value) @@ -155,7 +168,7 @@ class PathType(types.Type): return normpath(bytestring_path(string)) def normalize(self, value): - if isinstance(value, six.text_type): + if isinstance(value, str): # Paths stored internally as encoded bytes. return bytestring_path(value) @@ -188,6 +201,8 @@ class MusicalKey(types.String): r'bb': 'a#', } + null = None + def parse(self, key): key = key.lower() for flat, sharp in self.ENHARMONIC.items(): @@ -231,6 +246,7 @@ class SmartArtistSort(dbcore.query.Sort): """Sort by artist (either album artist or track artist), prioritizing the sort field over the raw field. """ + def __init__(self, model_cls, ascending=True, case_insensitive=True): self.album = model_cls is Album self.ascending = ascending @@ -246,12 +262,15 @@ class SmartArtistSort(dbcore.query.Sort): def sort(self, objs): if self.album: - field = lambda a: a.albumartist_sort or a.albumartist + def field(a): + return a.albumartist_sort or a.albumartist else: - field = lambda i: i.artist_sort or i.artist + def field(i): + return i.artist_sort or i.artist if self.case_insensitive: - key = lambda x: field(x).lower() + def key(x): + return field(x).lower() else: key = field return sorted(objs, key=key, reverse=not self.ascending) @@ -262,17 +281,17 @@ PF_KEY_DEFAULT = 'default' # Exceptions. -@six.python_2_unicode_compatible class FileOperationError(Exception): """Indicates an error when interacting with a file on disk. Possibilities include an unsupported media type, a permissions error, and an unhandled Mutagen exception. """ + def __init__(self, path, reason): """Create an exception describing an operation on the file at `path` with the underlying (chained) exception `reason`. """ - super(FileOperationError, self).__init__(path, reason) + super().__init__(path, reason) self.path = path self.reason = reason @@ -280,9 +299,9 @@ class FileOperationError(Exception): """Get a string representing the error. Describes both the underlying reason and the file path in question. """ - return u'{0}: {1}'.format( + return '{}: {}'.format( util.displayable_path(self.path), - six.text_type(self.reason) + str(self.reason) ) # define __str__ as text to avoid infinite loop on super() calls @@ -290,25 +309,24 @@ class FileOperationError(Exception): __str__ = text -@six.python_2_unicode_compatible class ReadError(FileOperationError): """An error while reading a file (i.e. in `Item.read`). """ + def __str__(self): - return u'error reading ' + super(ReadError, self).text() + return 'error reading ' + super().text() -@six.python_2_unicode_compatible class WriteError(FileOperationError): """An error while writing a file (i.e. in `Item.write`). """ + def __str__(self): - return u'error writing ' + super(WriteError, self).text() + return 'error writing ' + super().text() # Item and Album model classes. -@six.python_2_unicode_compatible class LibModel(dbcore.Model): """Shared concrete functionality for Items and Albums. """ @@ -323,21 +341,21 @@ class LibModel(dbcore.Model): return funcs def store(self, fields=None): - super(LibModel, self).store(fields) + super().store(fields) plugins.send('database_change', lib=self._db, model=self) def remove(self): - super(LibModel, self).remove() + super().remove() plugins.send('database_change', lib=self._db, model=self) def add(self, lib=None): - super(LibModel, self).add(lib) + super().add(lib) plugins.send('database_change', lib=self._db, model=self) def __format__(self, spec): if not spec: spec = beets.config[self._format_config_key].as_str() - assert isinstance(spec, six.text_type) + assert isinstance(spec, str) return self.evaluate_template(spec) def __str__(self): @@ -353,15 +371,42 @@ class FormattedItemMapping(dbcore.db.FormattedMapping): Album-level fields take precedence if `for_path` is true. """ - def __init__(self, item, for_path=False): - super(FormattedItemMapping, self).__init__(item, for_path) - self.album = item.get_album() - self.album_keys = [] + ALL_KEYS = '*' + + def __init__(self, item, included_keys=ALL_KEYS, for_path=False): + # We treat album and item keys specially here, + # so exclude transitive album keys from the model's keys. + super().__init__(item, included_keys=[], + for_path=for_path) + self.included_keys = included_keys + if included_keys == self.ALL_KEYS: + # Performance note: this triggers a database query. + self.model_keys = item.keys(computed=True, with_album=False) + else: + self.model_keys = included_keys + self.item = item + + @lazy_property + def all_keys(self): + return set(self.model_keys).union(self.album_keys) + + @lazy_property + def album_keys(self): + album_keys = [] if self.album: - for key in self.album.keys(True): - if key in Album.item_keys or key not in item._fields.keys(): - self.album_keys.append(key) - self.all_keys = set(self.model_keys).union(self.album_keys) + if self.included_keys == self.ALL_KEYS: + # Performance note: this triggers a database query. + for key in self.album.keys(computed=True): + if key in Album.item_keys \ + or key not in self.item._fields.keys(): + album_keys.append(key) + else: + album_keys = self.included_keys + return album_keys + + @property + def album(self): + return self.item._cached_album def _get(self, key): """Get the value for a key, either from the album or the item. @@ -377,19 +422,23 @@ class FormattedItemMapping(dbcore.db.FormattedMapping): raise KeyError(key) def __getitem__(self, key): - """Get the value for a key. Certain unset values are remapped. + """Get the value for a key. `artist` and `albumartist` + are fallback values for each other when not set. """ value = self._get(key) # `artist` and `albumartist` fields fall back to one another. # This is helpful in path formats when the album artist is unset # on as-is imports. - if key == 'artist' and not value: - return self._get('albumartist') - elif key == 'albumartist' and not value: - return self._get('artist') - else: - return value + try: + if key == 'artist' and not value: + return self._get('albumartist') + elif key == 'albumartist' and not value: + return self._get('artist') + except KeyError: + pass + + return value def __iter__(self): return iter(self.all_keys) @@ -402,70 +451,85 @@ class Item(LibModel): _table = 'items' _flex_table = 'item_attributes' _fields = { - 'id': types.PRIMARY_ID, - 'path': PathType(), + 'id': types.PRIMARY_ID, + 'path': PathType(), 'album_id': types.FOREIGN_ID, - 'title': types.STRING, - 'artist': types.STRING, - 'artist_sort': types.STRING, - 'artist_credit': types.STRING, - 'album': types.STRING, - 'albumartist': types.STRING, - 'albumartist_sort': types.STRING, - 'albumartist_credit': types.STRING, - 'genre': types.STRING, - 'lyricist': types.STRING, - 'composer': types.STRING, - 'arranger': types.STRING, - 'grouping': types.STRING, - 'year': types.PaddedInt(4), - 'month': types.PaddedInt(2), - 'day': types.PaddedInt(2), - 'track': types.PaddedInt(2), - 'tracktotal': types.PaddedInt(2), - 'disc': types.PaddedInt(2), - 'disctotal': types.PaddedInt(2), - 'lyrics': types.STRING, - 'comments': types.STRING, - 'bpm': types.INTEGER, - 'comp': types.BOOLEAN, - 'mb_trackid': types.STRING, - 'mb_albumid': types.STRING, - 'mb_artistid': types.STRING, - 'mb_albumartistid': types.STRING, - 'albumtype': types.STRING, - 'label': types.STRING, + 'title': types.STRING, + 'artist': types.STRING, + 'artist_sort': types.STRING, + 'artist_credit': types.STRING, + 'album': types.STRING, + 'albumartist': types.STRING, + 'albumartist_sort': types.STRING, + 'albumartist_credit': types.STRING, + 'genre': types.STRING, + 'style': types.STRING, + 'discogs_albumid': types.INTEGER, + 'discogs_artistid': types.INTEGER, + 'discogs_labelid': types.INTEGER, + 'lyricist': types.STRING, + 'composer': types.STRING, + 'composer_sort': types.STRING, + 'work': types.STRING, + 'mb_workid': types.STRING, + 'work_disambig': types.STRING, + 'arranger': types.STRING, + 'grouping': types.STRING, + 'year': types.PaddedInt(4), + 'month': types.PaddedInt(2), + 'day': types.PaddedInt(2), + 'track': types.PaddedInt(2), + 'tracktotal': types.PaddedInt(2), + 'disc': types.PaddedInt(2), + 'disctotal': types.PaddedInt(2), + 'lyrics': types.STRING, + 'comments': types.STRING, + 'bpm': types.INTEGER, + 'comp': types.BOOLEAN, + 'mb_trackid': types.STRING, + 'mb_albumid': types.STRING, + 'mb_artistid': types.STRING, + 'mb_albumartistid': types.STRING, + 'mb_releasetrackid': types.STRING, + 'trackdisambig': types.STRING, + 'albumtype': types.STRING, + 'albumtypes': types.STRING, + 'label': types.STRING, 'acoustid_fingerprint': types.STRING, - 'acoustid_id': types.STRING, - 'mb_releasegroupid': types.STRING, - 'asin': types.STRING, - 'catalognum': types.STRING, - 'script': types.STRING, - 'language': types.STRING, - 'country': types.STRING, - 'albumstatus': types.STRING, - 'media': types.STRING, - 'albumdisambig': types.STRING, - 'disctitle': types.STRING, - 'encoder': types.STRING, - 'rg_track_gain': types.NULL_FLOAT, - 'rg_track_peak': types.NULL_FLOAT, - 'rg_album_gain': types.NULL_FLOAT, - 'rg_album_peak': types.NULL_FLOAT, - 'original_year': types.PaddedInt(4), - 'original_month': types.PaddedInt(2), - 'original_day': types.PaddedInt(2), - 'initial_key': MusicalKey(), + 'acoustid_id': types.STRING, + 'mb_releasegroupid': types.STRING, + 'asin': types.STRING, + 'isrc': types.STRING, + 'catalognum': types.STRING, + 'script': types.STRING, + 'language': types.STRING, + 'country': types.STRING, + 'albumstatus': types.STRING, + 'media': types.STRING, + 'albumdisambig': types.STRING, + 'releasegroupdisambig': types.STRING, + 'disctitle': types.STRING, + 'encoder': types.STRING, + 'rg_track_gain': types.NULL_FLOAT, + 'rg_track_peak': types.NULL_FLOAT, + 'rg_album_gain': types.NULL_FLOAT, + 'rg_album_peak': types.NULL_FLOAT, + 'r128_track_gain': types.NullPaddedInt(6), + 'r128_album_gain': types.NullPaddedInt(6), + 'original_year': types.PaddedInt(4), + 'original_month': types.PaddedInt(2), + 'original_day': types.PaddedInt(2), + 'initial_key': MusicalKey(), - 'length': DurationType(), - 'bitrate': types.ScaledInt(1000, u'kbps'), - 'format': types.STRING, - 'samplerate': types.ScaledInt(1000, u'kHz'), - 'bitdepth': types.INTEGER, - 'channels': types.INTEGER, - 'mtime': DateType(), - 'added': DateType(), + 'length': DurationType(), + 'bitrate': types.ScaledInt(1000, 'kbps'), + 'format': types.STRING, + 'samplerate': types.ScaledInt(1000, 'kHz'), + 'bitdepth': types.INTEGER, + 'channels': types.INTEGER, + 'mtime': DateType(), + 'added': DateType(), } _search_fields = ('artist', 'title', 'comments', @@ -498,6 +562,29 @@ class Item(LibModel): _format_config_key = 'format_item' + __album = None + """Cached album object. Read-only.""" + + @property + def _cached_album(self): + """The Album object that this item belongs to, if any, or + None if the item is a singleton or is not associated with a + library. + The instance is cached and refreshed on access. + + DO NOT MODIFY! + If you want a copy to modify, use :meth:`get_album`. + """ + if not self.__album and self._db: + self.__album = self._db.get_album(self) + elif self.__album: + self.__album.load() + return self.__album + + @_cached_album.setter + def _cached_album(self, album): + self.__album = album + @classmethod def _getters(cls): getters = plugins.item_field_getters() @@ -520,24 +607,74 @@ class Item(LibModel): """ # Encode unicode paths and read buffers. if key == 'path': - if isinstance(value, six.text_type): + if isinstance(value, str): value = bytestring_path(value) elif isinstance(value, BLOB_TYPE): value = bytes(value) + elif key == 'album_id': + self._cached_album = None - if key in MediaFile.fields(): + changed = super()._setitem(key, value) + + if changed and key in MediaFile.fields(): self.mtime = 0 # Reset mtime on dirty. - super(Item, self).__setitem__(key, value) + def __getitem__(self, key): + """Get the value for a field, falling back to the album if + necessary. Raise a KeyError if the field is not available. + """ + try: + return super().__getitem__(key) + except KeyError: + if self._cached_album: + return self._cached_album[key] + raise + + def __repr__(self): + # This must not use `with_album=True`, because that might access + # the database. When debugging, that is not guaranteed to succeed, and + # can even deadlock due to the database lock. + return '{}({})'.format( + type(self).__name__, + ', '.join('{}={!r}'.format(k, self[k]) + for k in self.keys(with_album=False)), + ) + + def keys(self, computed=False, with_album=True): + """Get a list of available field names. `with_album` + controls whether the album's fields are included. + """ + keys = super().keys(computed=computed) + if with_album and self._cached_album: + keys = set(keys) + keys.update(self._cached_album.keys(computed=computed)) + keys = list(keys) + return keys + + def get(self, key, default=None, with_album=True): + """Get the value for a given key or `default` if it does not + exist. Set `with_album` to false to skip album fallback. + """ + try: + return self._get(key, default, raise_=with_album) + except KeyError: + if self._cached_album: + return self._cached_album.get(key, default) + return default def update(self, values): """Set all key/value pairs in the mapping. If mtime is specified, it is not reset (as it might otherwise be). """ - super(Item, self).update(values) + super().update(values) if self.mtime == 0 and 'mtime' in values: self.mtime = values['mtime'] + def clear(self): + """Set all key/value pairs to None.""" + for key in self._media_tag_fields: + setattr(self, key, None) + def get_album(self): """Get the Album object that this item belongs to, if any, or None if the item is a singleton or is not associated with a @@ -569,7 +706,7 @@ class Item(LibModel): for key in self._media_fields: value = getattr(mediafile, key) - if isinstance(value, six.integer_types): + if isinstance(value, int): if value.bit_length() > 63: value = 0 self[key] = value @@ -580,7 +717,7 @@ class Item(LibModel): self.path = read_path - def write(self, path=None, tags=None): + def write(self, path=None, tags=None, id3v23=None): """Write the item's metadata to a media file. All fields in `_media_fields` are written to disk according to @@ -592,6 +729,9 @@ class Item(LibModel): `tags` is a dictionary of additional metadata the should be written to the file. (These tags need not be in `_media_fields`.) + `id3v23` will override the global `id3v23` config option if it is + set to something other than `None`. + Can raise either a `ReadError` or a `WriteError`. """ if path is None: @@ -599,6 +739,9 @@ class Item(LibModel): else: path = normpath(path) + if id3v23 is None: + id3v23 = beets.config['id3v23'].get(bool) + # Get the data to write to the file. item_tags = dict(self) item_tags = {k: v for k, v in item_tags.items() @@ -609,10 +752,9 @@ class Item(LibModel): # Open the file. try: - mediafile = MediaFile(syspath(path), - id3v23=beets.config['id3v23'].get(bool)) + mediafile = MediaFile(syspath(path), id3v23=id3v23) except UnreadableFileError as exc: - raise ReadError(self.path, exc) + raise ReadError(path, exc) # Write the tags to the file. mediafile.update(item_tags) @@ -626,17 +768,17 @@ class Item(LibModel): self.mtime = self.current_mtime() plugins.send('after_write', item=self, path=path) - def try_write(self, path=None, tags=None): + def try_write(self, *args, **kwargs): """Calls `write()` but catches and logs `FileOperationError` exceptions. Returns `False` an exception was caught and `True` otherwise. """ try: - self.write(path, tags) + self.write(*args, **kwargs) return True except FileOperationError as exc: - log.error(u"{0}", exc) + log.error("{0}", exc) return False def try_sync(self, write, move, with_album=True): @@ -656,38 +798,51 @@ class Item(LibModel): if move: # Check whether this file is inside the library directory. if self._db and self._db.directory in util.ancestry(self.path): - log.debug(u'moving {0} to synchronize path', + log.debug('moving {0} to synchronize path', util.displayable_path(self.path)) self.move(with_album=with_album) self.store() # Files themselves. - def move_file(self, dest, copy=False, link=False, hardlink=False): - """Moves or copies the item's file, updating the path value if - the move succeeds. If a file exists at ``dest``, then it is - slightly modified to be unique. + def move_file(self, dest, operation=MoveOperation.MOVE): + """Move, copy, link or hardlink the item's depending on `operation`, + updating the path value if the move succeeds. + + If a file exists at `dest`, then it is slightly modified to be unique. + + `operation` should be an instance of `util.MoveOperation`. """ if not util.samefile(self.path, dest): dest = util.unique_path(dest) - if copy: - util.copy(self.path, dest) - plugins.send("item_copied", item=self, source=self.path, - destination=dest) - elif link: - util.link(self.path, dest) - plugins.send("item_linked", item=self, source=self.path, - destination=dest) - elif hardlink: - util.hardlink(self.path, dest) - plugins.send("item_hardlinked", item=self, source=self.path, - destination=dest) - else: + if operation == MoveOperation.MOVE: plugins.send("before_item_moved", item=self, source=self.path, destination=dest) util.move(self.path, dest) plugins.send("item_moved", item=self, source=self.path, destination=dest) + elif operation == MoveOperation.COPY: + util.copy(self.path, dest) + plugins.send("item_copied", item=self, source=self.path, + destination=dest) + elif operation == MoveOperation.LINK: + util.link(self.path, dest) + plugins.send("item_linked", item=self, source=self.path, + destination=dest) + elif operation == MoveOperation.HARDLINK: + util.hardlink(self.path, dest) + plugins.send("item_hardlinked", item=self, source=self.path, + destination=dest) + elif operation == MoveOperation.REFLINK: + util.reflink(self.path, dest, fallback=False) + plugins.send("item_reflinked", item=self, source=self.path, + destination=dest) + elif operation == MoveOperation.REFLINK_AUTO: + util.reflink(self.path, dest, fallback=True) + plugins.send("item_reflinked", item=self, source=self.path, + destination=dest) + else: + assert False, 'unknown MoveOperation' # Either copying or moving succeeded, so update the stored path. self.path = dest @@ -706,7 +861,7 @@ class Item(LibModel): try: return os.path.getsize(syspath(self.path)) except (OSError, Exception) as exc: - log.warning(u'could not get filesize: {0}', exc) + log.warning('could not get filesize: {0}', exc) return 0 # Model methods. @@ -716,7 +871,7 @@ class Item(LibModel): removed from disk. If `with_album`, then the item's album (if any) is removed if it the item was the last in the album. """ - super(Item, self).remove() + super().remove() # Remove the album if it is empty. if with_album: @@ -734,29 +889,27 @@ class Item(LibModel): self._db._memotable = {} - def move(self, copy=False, link=False, hardlink=False, basedir=None, + def move(self, operation=MoveOperation.MOVE, basedir=None, with_album=True, store=True): """Move the item to its designated location within the library directory (provided by destination()). Subdirectories are created as needed. If the operation succeeds, the item's path field is updated to reflect the new location. - If `copy` is true, moving the file is copied rather than moved. - Similarly, `link` creates a symlink instead, and `hardlink` - creates a hardlink. + Instead of moving the item it can also be copied, linked or hardlinked + depending on `operation` which should be an instance of + `util.MoveOperation`. - basedir overrides the library base directory for the - destination. + `basedir` overrides the library base directory for the destination. - If the item is in an album, the album is given an opportunity to - move its art. (This can be disabled by passing - with_album=False.) + If the item is in an album and `with_album` is `True`, the album is + given an opportunity to move its art. By default, the item is stored to the database if it is in the database, so any dirty fields prior to the move() call will be written - as a side effect. You probably want to call save() to commit the DB - transaction. If `store` is true however, the item won't be stored, and - you'll have to manually store it after invoking this method. + as a side effect. + If `store` is `False` however, the item won't be stored and you'll + have to manually store it after invoking this method. """ self._check_db() dest = self.destination(basedir=basedir) @@ -766,7 +919,7 @@ class Item(LibModel): # Perform the move and store the change. old_path = self.path - self.move_file(dest, copy, link, hardlink) + self.move_file(dest, operation) if store: self.store() @@ -774,18 +927,18 @@ class Item(LibModel): if with_album: album = self.get_album() if album: - album.move_art(copy) + album.move_art(operation) if store: album.store() # Prune vacated directory. - if not copy: + if operation == MoveOperation.MOVE: util.prune_dirs(os.path.dirname(old_path), self._db.directory) # Templating. def destination(self, fragment=False, basedir=None, platform=None, - path_formats=None): + path_formats=None, replacements=None): """Returns the path in the library directory designated for the item (i.e., where the file ought to be). fragment makes this method return just the path fragment underneath the root library @@ -797,6 +950,8 @@ class Item(LibModel): platform = platform or sys.platform basedir = basedir or self._db.directory path_formats = path_formats or self._db.path_formats + if replacements is None: + replacements = self._db.replacements # Use a path format based on a query, falling back on the # default. @@ -814,11 +969,11 @@ class Item(LibModel): if query == PF_KEY_DEFAULT: break else: - assert False, u"no default path format" + assert False, "no default path format" if isinstance(path_format, Template): subpath_tmpl = path_format else: - subpath_tmpl = Template(path_format) + subpath_tmpl = template(path_format) # Evaluate the selected template. subpath = self.evaluate_template(subpath_tmpl, True) @@ -841,16 +996,16 @@ class Item(LibModel): maxlen = util.max_filename_length(self._db.directory) subpath, fellback = util.legalize_path( - subpath, self._db.replacements, maxlen, + subpath, replacements, maxlen, os.path.splitext(self.path)[1], fragment ) if fellback: # Print an error message if legalization fell back to # default replacements because of the maximum length. log.warning( - u'Fell back to default replacements when naming ' - u'file {}. Configure replacements to avoid lengthening ' - u'the filename.', + 'Fell back to default replacements when naming ' + 'file {}. Configure replacements to avoid lengthening ' + 'the filename.', subpath ) @@ -869,43 +1024,50 @@ class Album(LibModel): _flex_table = 'album_attributes' _always_dirty = True _fields = { - 'id': types.PRIMARY_ID, - 'artpath': PathType(), - 'added': DateType(), + 'id': types.PRIMARY_ID, + 'artpath': PathType(True), + 'added': DateType(), - 'albumartist': types.STRING, - 'albumartist_sort': types.STRING, + 'albumartist': types.STRING, + 'albumartist_sort': types.STRING, 'albumartist_credit': types.STRING, - 'album': types.STRING, - 'genre': types.STRING, - 'year': types.PaddedInt(4), - 'month': types.PaddedInt(2), - 'day': types.PaddedInt(2), - 'disctotal': types.PaddedInt(2), - 'comp': types.BOOLEAN, - 'mb_albumid': types.STRING, - 'mb_albumartistid': types.STRING, - 'albumtype': types.STRING, - 'label': types.STRING, - 'mb_releasegroupid': types.STRING, - 'asin': types.STRING, - 'catalognum': types.STRING, - 'script': types.STRING, - 'language': types.STRING, - 'country': types.STRING, - 'albumstatus': types.STRING, - 'albumdisambig': types.STRING, - 'rg_album_gain': types.NULL_FLOAT, - 'rg_album_peak': types.NULL_FLOAT, - 'original_year': types.PaddedInt(4), - 'original_month': types.PaddedInt(2), - 'original_day': types.PaddedInt(2), + 'album': types.STRING, + 'genre': types.STRING, + 'style': types.STRING, + 'discogs_albumid': types.INTEGER, + 'discogs_artistid': types.INTEGER, + 'discogs_labelid': types.INTEGER, + 'year': types.PaddedInt(4), + 'month': types.PaddedInt(2), + 'day': types.PaddedInt(2), + 'disctotal': types.PaddedInt(2), + 'comp': types.BOOLEAN, + 'mb_albumid': types.STRING, + 'mb_albumartistid': types.STRING, + 'albumtype': types.STRING, + 'albumtypes': types.STRING, + 'label': types.STRING, + 'mb_releasegroupid': types.STRING, + 'asin': types.STRING, + 'catalognum': types.STRING, + 'script': types.STRING, + 'language': types.STRING, + 'country': types.STRING, + 'albumstatus': types.STRING, + 'albumdisambig': types.STRING, + 'releasegroupdisambig': types.STRING, + 'rg_album_gain': types.NULL_FLOAT, + 'rg_album_peak': types.NULL_FLOAT, + 'r128_album_gain': types.NullPaddedInt(6), + 'original_year': types.PaddedInt(4), + 'original_month': types.PaddedInt(2), + 'original_day': types.PaddedInt(2), } _search_fields = ('album', 'albumartist', 'genre') _types = { - 'path': PathType(), + 'path': PathType(), 'data_source': types.STRING, } @@ -921,6 +1083,10 @@ class Album(LibModel): 'albumartist_credit', 'album', 'genre', + 'style', + 'discogs_albumid', + 'discogs_artistid', + 'discogs_labelid', 'year', 'month', 'day', @@ -929,6 +1095,7 @@ class Album(LibModel): 'mb_albumid', 'mb_albumartistid', 'albumtype', + 'albumtypes', 'label', 'mb_releasegroupid', 'asin', @@ -938,8 +1105,10 @@ class Album(LibModel): 'country', 'albumstatus', 'albumdisambig', + 'releasegroupdisambig', 'rg_album_gain', 'rg_album_peak', + 'r128_album_gain', 'original_year', 'original_month', 'original_day', @@ -971,7 +1140,10 @@ class Album(LibModel): containing the album are also removed (recursively) if empty. Set with_items to False to avoid removing the album's items. """ - super(Album, self).remove() + super().remove() + + # Send a 'album_removed' signal to plugins + plugins.send('album_removed', album=self) # Delete art file. if delete: @@ -984,45 +1156,60 @@ class Album(LibModel): for item in self.items(): item.remove(delete, False) - def move_art(self, copy=False, link=False, hardlink=False): - """Move or copy any existing album art so that it remains in the - same directory as the items. + def move_art(self, operation=MoveOperation.MOVE): + """Move, copy, link or hardlink (depending on `operation`) any + existing album art so that it remains in the same directory as + the items. + + `operation` should be an instance of `util.MoveOperation`. """ old_art = self.artpath if not old_art: return + if not os.path.exists(old_art): + log.error('removing reference to missing album art file {}', + util.displayable_path(old_art)) + self.artpath = None + return + new_art = self.art_destination(old_art) if new_art == old_art: return new_art = util.unique_path(new_art) - log.debug(u'moving album art {0} to {1}', + log.debug('moving album art {0} to {1}', util.displayable_path(old_art), util.displayable_path(new_art)) - if copy: - util.copy(old_art, new_art) - elif link: - util.link(old_art, new_art) - elif hardlink: - util.hardlink(old_art, new_art) - else: + if operation == MoveOperation.MOVE: util.move(old_art, new_art) + util.prune_dirs(os.path.dirname(old_art), self._db.directory) + elif operation == MoveOperation.COPY: + util.copy(old_art, new_art) + elif operation == MoveOperation.LINK: + util.link(old_art, new_art) + elif operation == MoveOperation.HARDLINK: + util.hardlink(old_art, new_art) + elif operation == MoveOperation.REFLINK: + util.reflink(old_art, new_art, fallback=False) + elif operation == MoveOperation.REFLINK_AUTO: + util.reflink(old_art, new_art, fallback=True) + else: + assert False, 'unknown MoveOperation' self.artpath = new_art - # Prune old path when moving. - if not copy: - util.prune_dirs(os.path.dirname(old_art), - self._db.directory) + def move(self, operation=MoveOperation.MOVE, basedir=None, store=True): + """Move, copy, link or hardlink (depending on `operation`) + all items to their destination. Any album art moves along with them. - def move(self, copy=False, link=False, hardlink=False, basedir=None, - store=True): - """Moves (or copies) all items to their destination. Any album - art moves along with them. basedir overrides the library base - directory for the destination. By default, the album is stored to the - database, persisting any modifications to its metadata. If `store` is - true however, the album is not stored automatically, and you'll have - to manually store it after invoking this method. + `basedir` overrides the library base directory for the destination. + + `operation` should be an instance of `util.MoveOperation`. + + By default, the album is stored to the database, persisting any + modifications to its metadata. If `store` is `False` however, + the album is not stored automatically, and you'll have to manually + store it after invoking this method. """ basedir = basedir or self._db.directory @@ -1034,11 +1221,11 @@ class Album(LibModel): # Move items. items = list(self.items()) for item in items: - item.move(copy, link, hardlink, basedir=basedir, with_album=False, + item.move(operation, basedir=basedir, with_album=False, store=store) # Move art. - self.move_art(copy, link, hardlink) + self.move_art(operation) if store: self.store() @@ -1048,7 +1235,7 @@ class Album(LibModel): """ item = self.items().get() if not item: - raise ValueError(u'empty album') + raise ValueError('empty album for album id %d' % self.id) return os.path.dirname(item.path) def _albumtotal(self): @@ -1084,7 +1271,7 @@ class Album(LibModel): image = bytestring_path(image) item_dir = item_dir or self.item_dir() - filename_tmpl = Template( + filename_tmpl = template( beets.config['art_filename'].as_str()) subpath = self.evaluate_template(filename_tmpl, True) if beets.config['asciify_paths']: @@ -1145,7 +1332,7 @@ class Album(LibModel): track_updates[key] = self[key] with self._db.transaction(): - super(Album, self).store(fields) + super().store(fields) if track_updates: for item in self.items(): for key, value in track_updates.items(): @@ -1189,8 +1376,10 @@ def parse_query_parts(parts, model_cls): else: non_path_parts.append(s) + case_insensitive = beets.config['sort_case_insensitive'].get(bool) + query, sort = dbcore.parse_sorted_query( - model_cls, non_path_parts, prefixes + model_cls, non_path_parts, prefixes, case_insensitive ) # Add path queries to aggregate query. @@ -1208,10 +1397,10 @@ def parse_query_string(s, model_cls): The string is split into components using shell-like syntax. """ - message = u"Query is not unicode: {0!r}".format(s) - assert isinstance(s, six.text_type), message + message = f"Query is not unicode: {s!r}" + assert isinstance(s, str), message try: - parts = util.shlex_split(s) + parts = shlex.split(s) except ValueError as exc: raise dbcore.InvalidQueryError(s, exc) return parse_query_parts(parts, model_cls) @@ -1224,10 +1413,7 @@ def _sqlite_bytelower(bytestring): ``-DSQLITE_LIKE_DOESNT_MATCH_BLOBS``. See ``https://github.com/beetbox/beets/issues/2172`` for details. """ - if not six.PY2: - return bytestring.lower() - - return buffer(bytes(bytestring).lower()) # noqa: F821 + return bytestring.lower() # The Library: interface to the database. @@ -1243,7 +1429,7 @@ class Library(dbcore.Database): '$artist/$album/$track $title'),), replacements=None): timeout = beets.config['timeout'].as_number() - super(Library, self).__init__(path, timeout=timeout) + super().__init__(path, timeout=timeout) self.directory = bytestring_path(normpath(directory)) self.path_formats = path_formats @@ -1252,7 +1438,7 @@ class Library(dbcore.Database): self._memotable = {} # Used for template substitution performance. def _create_connection(self): - conn = super(Library, self)._create_connection() + conn = super()._create_connection() conn.create_function('bytelower', 1, _sqlite_bytelower) return conn @@ -1274,10 +1460,10 @@ class Library(dbcore.Database): be empty. """ if not items: - raise ValueError(u'need at least one item') + raise ValueError('need at least one item') # Create the album structure using metadata from the first item. - values = dict((key, items[0][key]) for key in Album.item_keys) + values = {key: items[0][key] for key in Album.item_keys} album = Album(self, **values) # Add the album structure and set the items' album_id fields. @@ -1302,11 +1488,11 @@ class Library(dbcore.Database): # Parse the query, if necessary. try: parsed_sort = None - if isinstance(query, six.string_types): + if isinstance(query, str): query, parsed_sort = parse_query_string(query, model_cls) elif isinstance(query, (list, tuple)): query, parsed_sort = parse_query_parts(query, model_cls) - except dbcore.query.InvalidQueryArgumentTypeError as exc: + except dbcore.query.InvalidQueryArgumentValueError as exc: raise dbcore.InvalidQueryError(query, exc) # Any non-null sort specified by the parsed query overrides the @@ -1314,7 +1500,7 @@ class Library(dbcore.Database): if parsed_sort and not isinstance(parsed_sort, dbcore.query.NullSort): sort = parsed_sort - return super(Library, self)._fetch( + return super()._fetch( model_cls, query, sort ) @@ -1373,7 +1559,7 @@ def _int_arg(s): return int(s.strip()) -class DefaultTemplateFunctions(object): +class DefaultTemplateFunctions: """A container class for the default functions provided to path templates. These functions are contained in an object to provide additional context to the functions -- specifically, the Item being @@ -1412,7 +1598,7 @@ class DefaultTemplateFunctions(object): @staticmethod def tmpl_title(s): """Convert a string to title case.""" - return s.title() + return string.capwords(s) @staticmethod def tmpl_left(s, chars): @@ -1425,7 +1611,7 @@ class DefaultTemplateFunctions(object): return s[-_int_arg(chars):] @staticmethod - def tmpl_if(condition, trueval, falseval=u''): + def tmpl_if(condition, trueval, falseval=''): """If ``condition`` is nonempty and nonzero, emit ``trueval``; otherwise, emit ``falseval`` (if provided). """ @@ -1467,18 +1653,25 @@ class DefaultTemplateFunctions(object): """ # Fast paths: no album, no item or library, or memoized value. if not self.item or not self.lib: - return u'' - if self.item.album_id is None: - return u'' - memokey = ('aunique', keys, disam, self.item.album_id) + return '' + + if isinstance(self.item, Item): + album_id = self.item.album_id + elif isinstance(self.item, Album): + album_id = self.item.id + + if album_id is None: + return '' + + memokey = ('aunique', keys, disam, album_id) memoval = self.lib._memotable.get(memokey) if memoval is not None: return memoval - keys = keys or 'albumartist album' - disam = disam or 'albumtype year label catalognum albumdisambig' + keys = keys or beets.config['aunique']['keys'].as_str() + disam = disam or beets.config['aunique']['disambiguators'].as_str() if bracket is None: - bracket = '[]' + bracket = beets.config['aunique']['bracket'].as_str() keys = keys.split() disam = disam.split() @@ -1487,32 +1680,34 @@ class DefaultTemplateFunctions(object): bracket_l = bracket[0] bracket_r = bracket[1] else: - bracket_l = u'' - bracket_r = u'' + bracket_l = '' + bracket_r = '' - album = self.lib.get_album(self.item) + album = self.lib.get_album(album_id) if not album: # Do nothing for singletons. - self.lib._memotable[memokey] = u'' - return u'' + self.lib._memotable[memokey] = '' + return '' # Find matching albums to disambiguate with. subqueries = [] for key in keys: value = album.get(key, '') - subqueries.append(dbcore.MatchQuery(key, value)) + # Use slow queries for flexible attributes. + fast = key in album.item_keys + subqueries.append(dbcore.MatchQuery(key, value, fast)) albums = self.lib.albums(dbcore.AndQuery(subqueries)) # If there's only one album to matching these details, then do # nothing. if len(albums) == 1: - self.lib._memotable[memokey] = u'' - return u'' + self.lib._memotable[memokey] = '' + return '' # Find the first disambiguator that distinguishes the albums. for disambiguator in disam: # Get the value for each album for the current field. - disam_values = set([a.get(disambiguator, '') for a in albums]) + disam_values = {a.get(disambiguator, '') for a in albums} # If the set of unique values is equal to the number of # albums in the disambiguation set, we're done -- this is @@ -1522,24 +1717,24 @@ class DefaultTemplateFunctions(object): else: # No disambiguator distinguished all fields. - res = u' {1}{0}{2}'.format(album.id, bracket_l, bracket_r) + res = f' {bracket_l}{album.id}{bracket_r}' self.lib._memotable[memokey] = res return res # Flatten disambiguation value into a string. - disam_value = album.formatted(True).get(disambiguator) + disam_value = album.formatted(for_path=True).get(disambiguator) # Return empty string if disambiguator is empty. if disam_value: - res = u' {1}{0}{2}'.format(disam_value, bracket_l, bracket_r) + res = f' {bracket_l}{disam_value}{bracket_r}' else: - res = u'' + res = '' self.lib._memotable[memokey] = res return res @staticmethod - def tmpl_first(s, count=1, skip=0, sep=u'; ', join_str=u'; '): + def tmpl_first(s, count=1, skip=0, sep='; ', join_str='; '): """ Gets the item(s) from x to y in a string separated by something and join then with something @@ -1553,7 +1748,7 @@ class DefaultTemplateFunctions(object): count = skip + int(count) return join_str.join(s.split(sep)[skip:count]) - def tmpl_ifdef(self, field, trueval=u'', falseval=u''): + def tmpl_ifdef(self, field, trueval='', falseval=''): """ If field exists return trueval or the field (default) otherwise, emit return falseval (if provided). @@ -1562,7 +1757,7 @@ class DefaultTemplateFunctions(object): :param falseval: The string if the condition is false :return: The string, based on condition """ - if self.item.formatted().get(field): + if field in self.item: return trueval if trueval else self.item.formatted().get(field) else: return falseval diff --git a/lib/beets/logging.py b/lib/beets/logging.py old mode 100755 new mode 100644 index d5ec7b73..4f004f8d --- a/lib/beets/logging.py +++ b/lib/beets/logging.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -21,13 +20,11 @@ that when getLogger(name) instantiates a logger that logger uses {}-style formatting. """ -from __future__ import division, absolute_import, print_function from copy import copy from logging import * # noqa import subprocess import threading -import six def logsafe(val): @@ -43,7 +40,7 @@ def logsafe(val): example. """ # Already Unicode. - if isinstance(val, six.text_type): + if isinstance(val, str): return val # Bytestring: needs decoding. @@ -57,7 +54,7 @@ def logsafe(val): # A "problem" object: needs a workaround. elif isinstance(val, subprocess.CalledProcessError): try: - return six.text_type(val) + return str(val) except UnicodeDecodeError: # An object with a broken __unicode__ formatter. Use __str__ # instead. @@ -74,7 +71,7 @@ class StrFormatLogger(Logger): instead of %-style formatting. """ - class _LogMessage(object): + class _LogMessage: def __init__(self, msg, args, kwargs): self.msg = msg self.args = args @@ -82,22 +79,23 @@ class StrFormatLogger(Logger): def __str__(self): args = [logsafe(a) for a in self.args] - kwargs = dict((k, logsafe(v)) for (k, v) in self.kwargs.items()) + kwargs = {k: logsafe(v) for (k, v) in self.kwargs.items()} return self.msg.format(*args, **kwargs) def _log(self, level, msg, args, exc_info=None, extra=None, **kwargs): """Log msg.format(*args, **kwargs)""" m = self._LogMessage(msg, args, kwargs) - return super(StrFormatLogger, self)._log(level, m, (), exc_info, extra) + return super()._log(level, m, (), exc_info, extra) class ThreadLocalLevelLogger(Logger): """A version of `Logger` whose level is thread-local instead of shared. """ + def __init__(self, name, level=NOTSET): self._thread_level = threading.local() self.default_level = NOTSET - super(ThreadLocalLevelLogger, self).__init__(name, level) + super().__init__(name, level) @property def level(self): diff --git a/lib/beets/mediafile.py b/lib/beets/mediafile.py old mode 100755 new mode 100644 index 13f1b2df..82bcc973 --- a/lib/beets/mediafile.py +++ b/lib/beets/mediafile.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -13,2053 +12,15 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -"""Handles low-level interfacing for files' tags. Wraps Mutagen to -automatically detect file types and provide a unified interface for a -useful subset of music files' tags. -Usage: +import mediafile - >>> f = MediaFile('Lucy.mp3') - >>> f.title - u'Lucy in the Sky with Diamonds' - >>> f.artist = 'The Beatles' - >>> f.save() +import warnings +warnings.warn("beets.mediafile is deprecated; use mediafile instead") -A field will always return a reasonable value of the correct type, even -if no tag is present. If no value is available, the value will be false -(e.g., zero or the empty string). +# Import everything from the mediafile module into this module. +for key, value in mediafile.__dict__.items(): + if key not in ['__name__']: + globals()[key] = value -Internally ``MediaFile`` uses ``MediaField`` descriptors to access the -data from the tags. In turn ``MediaField`` uses a number of -``StorageStyle`` strategies to handle format specific logic. -""" -from __future__ import division, absolute_import, print_function - -import mutagen -import mutagen.id3 -import mutagen.mp4 -import mutagen.flac -import mutagen.asf - -import codecs -import datetime -import re -import base64 -import binascii -import math -import struct -import imghdr -import os -import traceback -import enum -import logging -import six - - -__all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile'] - -log = logging.getLogger(__name__) - -# Human-readable type names. -TYPES = { - 'mp3': 'MP3', - 'aac': 'AAC', - 'alac': 'ALAC', - 'ogg': 'OGG', - 'opus': 'Opus', - 'flac': 'FLAC', - 'ape': 'APE', - 'wv': 'WavPack', - 'mpc': 'Musepack', - 'asf': 'Windows Media', - 'aiff': 'AIFF', - 'dsf': 'DSD Stream File', -} - -PREFERRED_IMAGE_EXTENSIONS = {'jpeg': 'jpg'} - - -# Exceptions. - -class UnreadableFileError(Exception): - """Mutagen is not able to extract information from the file. - """ - def __init__(self, path, msg): - Exception.__init__(self, msg if msg else repr(path)) - - -class FileTypeError(UnreadableFileError): - """Reading this type of file is not supported. - - If passed the `mutagen_type` argument this indicates that the - mutagen type is not supported by `Mediafile`. - """ - def __init__(self, path, mutagen_type=None): - if mutagen_type is None: - msg = repr(path) - else: - msg = u'{0}: of mutagen type {1}'.format(repr(path), mutagen_type) - Exception.__init__(self, msg) - - -class MutagenError(UnreadableFileError): - """Raised when Mutagen fails unexpectedly---probably due to a bug. - """ - def __init__(self, path, mutagen_exc): - msg = u'{0}: {1}'.format(repr(path), mutagen_exc) - Exception.__init__(self, msg) - - -# Interacting with Mutagen. - -def mutagen_call(action, path, func, *args, **kwargs): - """Call a Mutagen function with appropriate error handling. - - `action` is a string describing what the function is trying to do, - and `path` is the relevant filename. The rest of the arguments - describe the callable to invoke. - - We require at least Mutagen 1.33, where `IOError` is *never* used, - neither for internal parsing errors *nor* for ordinary IO error - conditions such as a bad filename. Mutagen-specific parsing errors and IO - errors are reraised as `UnreadableFileError`. Other exceptions - raised inside Mutagen---i.e., bugs---are reraised as `MutagenError`. - """ - try: - return func(*args, **kwargs) - except mutagen.MutagenError as exc: - log.debug(u'%s failed: %s', action, six.text_type(exc)) - raise UnreadableFileError(path, six.text_type(exc)) - except Exception as exc: - # Isolate bugs in Mutagen. - log.debug(u'%s', traceback.format_exc()) - log.error(u'uncaught Mutagen exception in %s: %s', action, exc) - raise MutagenError(path, exc) - - -# Utility. - -def _safe_cast(out_type, val): - """Try to covert val to out_type but never raise an exception. If - the value can't be converted, then a sensible default value is - returned. out_type should be bool, int, or unicode; otherwise, the - value is just passed through. - """ - if val is None: - return None - - if out_type == int: - if isinstance(val, int) or isinstance(val, float): - # Just a number. - return int(val) - else: - # Process any other type as a string. - if not isinstance(val, six.string_types): - val = six.text_type(val) - # Get a number from the front of the string. - val = re.match(r'[0-9]*', val.strip()).group(0) - if not val: - return 0 - else: - return int(val) - - elif out_type == bool: - try: - # Should work for strings, bools, ints: - return bool(int(val)) - except ValueError: - return False - - elif out_type == six.text_type: - if isinstance(val, bytes): - return val.decode('utf-8', 'ignore') - elif isinstance(val, six.text_type): - return val - else: - return six.text_type(val) - - elif out_type == float: - if isinstance(val, int) or isinstance(val, float): - return float(val) - else: - if isinstance(val, bytes): - val = val.decode('utf-8', 'ignore') - else: - val = six.text_type(val) - match = re.match(r'[\+-]?([0-9]+\.?[0-9]*|[0-9]*\.[0-9]+)', - val.strip()) - if match: - val = match.group(0) - if val: - return float(val) - return 0.0 - - else: - return val - - -# Image coding for ASF/WMA. - -def _unpack_asf_image(data): - """Unpack image data from a WM/Picture tag. Return a tuple - containing the MIME type, the raw image data, a type indicator, and - the image's description. - - This function is treated as "untrusted" and could throw all manner - of exceptions (out-of-bounds, etc.). We should clean this up - sometime so that the failure modes are well-defined. - """ - type, size = struct.unpack_from(' 0: - gain = math.log10(maxgain / 1000.0) * -10 - else: - # Invalid gain value found. - gain = 0.0 - - # SoundCheck stores peak values as the actual value of the sample, - # and again separately for the left and right channels. We need to - # convert this to a percentage of full scale, which is 32768 for a - # 16 bit sample. Once again, we play it safe by using the larger of - # the two values. - peak = max(soundcheck[6:8]) / 32768.0 - - return round(gain, 2), round(peak, 6) - - -def _sc_encode(gain, peak): - """Encode ReplayGain gain/peak values as a Sound Check string. - """ - # SoundCheck stores the peak value as the actual value of the - # sample, rather than the percentage of full scale that RG uses, so - # we do a simple conversion assuming 16 bit samples. - peak *= 32768.0 - - # SoundCheck stores absolute RMS values in some unknown units rather - # than the dB values RG uses. We can calculate these absolute values - # from the gain ratio using a reference value of 1000 units. We also - # enforce the maximum value here, which is equivalent to about - # -18.2dB. - g1 = int(min(round((10 ** (gain / -10)) * 1000), 65534)) - # Same as above, except our reference level is 2500 units. - g2 = int(min(round((10 ** (gain / -10)) * 2500), 65534)) - - # The purpose of these values are unknown, but they also seem to be - # unused so we just use zero. - uk = 0 - values = (g1, g1, g2, g2, uk, uk, int(peak), int(peak), uk, uk) - return (u' %08X' * 10) % values - - -# Cover art and other images. -def _imghdr_what_wrapper(data): - """A wrapper around imghdr.what to account for jpeg files that can only be - identified as such using their magic bytes - See #1545 - See https://github.com/file/file/blob/master/magic/Magdir/jpeg#L12 - """ - # imghdr.what returns none for jpegs with only the magic bytes, so - # _wider_test_jpeg is run in that case. It still returns None if it didn't - # match such a jpeg file. - return imghdr.what(None, h=data) or _wider_test_jpeg(data) - - -def _wider_test_jpeg(data): - """Test for a jpeg file following the UNIX file implementation which - uses the magic bytes rather than just looking for the bytes that - represent 'JFIF' or 'EXIF' at a fixed position. - """ - if data[:2] == b'\xff\xd8': - return 'jpeg' - - -def image_mime_type(data): - """Return the MIME type of the image data (a bytestring). - """ - # This checks for a jpeg file with only the magic bytes (unrecognized by - # imghdr.what). imghdr.what returns none for that type of file, so - # _wider_test_jpeg is run in that case. It still returns None if it didn't - # match such a jpeg file. - kind = _imghdr_what_wrapper(data) - if kind in ['gif', 'jpeg', 'png', 'tiff', 'bmp']: - return 'image/{0}'.format(kind) - elif kind == 'pgm': - return 'image/x-portable-graymap' - elif kind == 'pbm': - return 'image/x-portable-bitmap' - elif kind == 'ppm': - return 'image/x-portable-pixmap' - elif kind == 'xbm': - return 'image/x-xbitmap' - else: - return 'image/x-{0}'.format(kind) - - -def image_extension(data): - ext = _imghdr_what_wrapper(data) - return PREFERRED_IMAGE_EXTENSIONS.get(ext, ext) - - -class ImageType(enum.Enum): - """Indicates the kind of an `Image` stored in a file's tag. - """ - other = 0 - icon = 1 - other_icon = 2 - front = 3 - back = 4 - leaflet = 5 - media = 6 - lead_artist = 7 - artist = 8 - conductor = 9 - group = 10 - composer = 11 - lyricist = 12 - recording_location = 13 - recording_session = 14 - performance = 15 - screen_capture = 16 - fish = 17 - illustration = 18 - artist_logo = 19 - publisher_logo = 20 - - -class Image(object): - """Structure representing image data and metadata that can be - stored and retrieved from tags. - - The structure has four properties. - * ``data`` The binary data of the image - * ``desc`` An optional description of the image - * ``type`` An instance of `ImageType` indicating the kind of image - * ``mime_type`` Read-only property that contains the mime type of - the binary data - """ - def __init__(self, data, desc=None, type=None): - assert isinstance(data, bytes) - if desc is not None: - assert isinstance(desc, six.text_type) - self.data = data - self.desc = desc - if isinstance(type, int): - try: - type = list(ImageType)[type] - except IndexError: - log.debug(u"ignoring unknown image type index %s", type) - type = ImageType.other - self.type = type - - @property - def mime_type(self): - if self.data: - return image_mime_type(self.data) - - @property - def type_index(self): - if self.type is None: - # This method is used when a tag format requires the type - # index to be set, so we return "other" as the default value. - return 0 - return self.type.value - - -# StorageStyle classes describe strategies for accessing values in -# Mutagen file objects. - -class StorageStyle(object): - """A strategy for storing a value for a certain tag format (or set - of tag formats). This basic StorageStyle describes simple 1:1 - mapping from raw values to keys in a Mutagen file object; subclasses - describe more sophisticated translations or format-specific access - strategies. - - MediaFile uses a StorageStyle via three methods: ``get()``, - ``set()``, and ``delete()``. It passes a Mutagen file object to - each. - - Internally, the StorageStyle implements ``get()`` and ``set()`` - using two steps that may be overridden by subtypes. To get a value, - the StorageStyle first calls ``fetch()`` to retrieve the value - corresponding to a key and then ``deserialize()`` to convert the raw - Mutagen value to a consumable Python value. Similarly, to set a - field, we call ``serialize()`` to encode the value and then - ``store()`` to assign the result into the Mutagen object. - - Each StorageStyle type has a class-level `formats` attribute that is - a list of strings indicating the formats that the style applies to. - MediaFile only uses StorageStyles that apply to the correct type for - a given audio file. - """ - - formats = ['FLAC', 'OggOpus', 'OggTheora', 'OggSpeex', 'OggVorbis', - 'OggFlac', 'APEv2File', 'WavPack', 'Musepack', 'MonkeysAudio'] - """List of mutagen classes the StorageStyle can handle. - """ - - def __init__(self, key, as_type=six.text_type, suffix=None, - float_places=2): - """Create a basic storage strategy. Parameters: - - - `key`: The key on the Mutagen file object used to access the - field's data. - - `as_type`: The Python type that the value is stored as - internally (`unicode`, `int`, `bool`, or `bytes`). - - `suffix`: When `as_type` is a string type, append this before - storing the value. - - `float_places`: When the value is a floating-point number and - encoded as a string, the number of digits to store after the - decimal point. - """ - self.key = key - self.as_type = as_type - self.suffix = suffix - self.float_places = float_places - - # Convert suffix to correct string type. - if self.suffix and self.as_type is six.text_type \ - and not isinstance(self.suffix, six.text_type): - self.suffix = self.suffix.decode('utf-8') - - # Getter. - - def get(self, mutagen_file): - """Get the value for the field using this style. - """ - return self.deserialize(self.fetch(mutagen_file)) - - def fetch(self, mutagen_file): - """Retrieve the raw value of for this tag from the Mutagen file - object. - """ - try: - return mutagen_file[self.key][0] - except (KeyError, IndexError): - return None - - def deserialize(self, mutagen_value): - """Given a raw value stored on a Mutagen object, decode and - return the represented value. - """ - if self.suffix and isinstance(mutagen_value, six.text_type) \ - and mutagen_value.endswith(self.suffix): - return mutagen_value[:-len(self.suffix)] - else: - return mutagen_value - - # Setter. - - def set(self, mutagen_file, value): - """Assign the value for the field using this style. - """ - self.store(mutagen_file, self.serialize(value)) - - def store(self, mutagen_file, value): - """Store a serialized value in the Mutagen file object. - """ - mutagen_file[self.key] = [value] - - def serialize(self, value): - """Convert the external Python value to a type that is suitable for - storing in a Mutagen file object. - """ - if isinstance(value, float) and self.as_type is six.text_type: - value = u'{0:.{1}f}'.format(value, self.float_places) - value = self.as_type(value) - elif self.as_type is six.text_type: - if isinstance(value, bool): - # Store bools as 1/0 instead of True/False. - value = six.text_type(int(bool(value))) - elif isinstance(value, bytes): - value = value.decode('utf-8', 'ignore') - else: - value = six.text_type(value) - else: - value = self.as_type(value) - - if self.suffix: - value += self.suffix - - return value - - def delete(self, mutagen_file): - """Remove the tag from the file. - """ - if self.key in mutagen_file: - del mutagen_file[self.key] - - -class ListStorageStyle(StorageStyle): - """Abstract storage style that provides access to lists. - - The ListMediaField descriptor uses a ListStorageStyle via two - methods: ``get_list()`` and ``set_list()``. It passes a Mutagen file - object to each. - - Subclasses may overwrite ``fetch`` and ``store``. ``fetch`` must - return a (possibly empty) list and ``store`` receives a serialized - list of values as the second argument. - - The `serialize` and `deserialize` methods (from the base - `StorageStyle`) are still called with individual values. This class - handles packing and unpacking the values into lists. - """ - def get(self, mutagen_file): - """Get the first value in the field's value list. - """ - try: - return self.get_list(mutagen_file)[0] - except IndexError: - return None - - def get_list(self, mutagen_file): - """Get a list of all values for the field using this style. - """ - return [self.deserialize(item) for item in self.fetch(mutagen_file)] - - def fetch(self, mutagen_file): - """Get the list of raw (serialized) values. - """ - try: - return mutagen_file[self.key] - except KeyError: - return [] - - def set(self, mutagen_file, value): - """Set an individual value as the only value for the field using - this style. - """ - self.set_list(mutagen_file, [value]) - - def set_list(self, mutagen_file, values): - """Set all values for the field using this style. `values` - should be an iterable. - """ - self.store(mutagen_file, [self.serialize(value) for value in values]) - - def store(self, mutagen_file, values): - """Set the list of all raw (serialized) values for this field. - """ - mutagen_file[self.key] = values - - -class SoundCheckStorageStyleMixin(object): - """A mixin for storage styles that read and write iTunes SoundCheck - analysis values. The object must have an `index` field that - indicates which half of the gain/peak pair---0 or 1---the field - represents. - """ - def get(self, mutagen_file): - data = self.fetch(mutagen_file) - if data is not None: - return _sc_decode(data)[self.index] - - def set(self, mutagen_file, value): - data = self.fetch(mutagen_file) - if data is None: - gain_peak = [0, 0] - else: - gain_peak = list(_sc_decode(data)) - gain_peak[self.index] = value or 0 - data = self.serialize(_sc_encode(*gain_peak)) - self.store(mutagen_file, data) - - -class ASFStorageStyle(ListStorageStyle): - """A general storage style for Windows Media/ASF files. - """ - formats = ['ASF'] - - def deserialize(self, data): - if isinstance(data, mutagen.asf.ASFBaseAttribute): - data = data.value - return data - - -class MP4StorageStyle(StorageStyle): - """A general storage style for MPEG-4 tags. - """ - formats = ['MP4'] - - def serialize(self, value): - value = super(MP4StorageStyle, self).serialize(value) - if self.key.startswith('----:') and isinstance(value, six.text_type): - value = value.encode('utf-8') - return value - - -class MP4TupleStorageStyle(MP4StorageStyle): - """A style for storing values as part of a pair of numbers in an - MPEG-4 file. - """ - def __init__(self, key, index=0, **kwargs): - super(MP4TupleStorageStyle, self).__init__(key, **kwargs) - self.index = index - - def deserialize(self, mutagen_value): - items = mutagen_value or [] - packing_length = 2 - return list(items) + [0] * (packing_length - len(items)) - - def get(self, mutagen_file): - value = super(MP4TupleStorageStyle, self).get(mutagen_file)[self.index] - if value == 0: - # The values are always present and saved as integers. So we - # assume that "0" indicates it is not set. - return None - else: - return value - - def set(self, mutagen_file, value): - if value is None: - value = 0 - items = self.deserialize(self.fetch(mutagen_file)) - items[self.index] = int(value) - self.store(mutagen_file, items) - - def delete(self, mutagen_file): - if self.index == 0: - super(MP4TupleStorageStyle, self).delete(mutagen_file) - else: - self.set(mutagen_file, None) - - -class MP4ListStorageStyle(ListStorageStyle, MP4StorageStyle): - pass - - -class MP4SoundCheckStorageStyle(SoundCheckStorageStyleMixin, MP4StorageStyle): - def __init__(self, key, index=0, **kwargs): - super(MP4SoundCheckStorageStyle, self).__init__(key, **kwargs) - self.index = index - - -class MP4BoolStorageStyle(MP4StorageStyle): - """A style for booleans in MPEG-4 files. (MPEG-4 has an atom type - specifically for representing booleans.) - """ - def get(self, mutagen_file): - try: - return mutagen_file[self.key] - except KeyError: - return None - - def get_list(self, mutagen_file): - raise NotImplementedError(u'MP4 bool storage does not support lists') - - def set(self, mutagen_file, value): - mutagen_file[self.key] = value - - def set_list(self, mutagen_file, values): - raise NotImplementedError(u'MP4 bool storage does not support lists') - - -class MP4ImageStorageStyle(MP4ListStorageStyle): - """Store images as MPEG-4 image atoms. Values are `Image` objects. - """ - def __init__(self, **kwargs): - super(MP4ImageStorageStyle, self).__init__(key='covr', **kwargs) - - def deserialize(self, data): - return Image(data) - - def serialize(self, image): - if image.mime_type == 'image/png': - kind = mutagen.mp4.MP4Cover.FORMAT_PNG - elif image.mime_type == 'image/jpeg': - kind = mutagen.mp4.MP4Cover.FORMAT_JPEG - else: - raise ValueError(u'MP4 files only supports PNG and JPEG images') - return mutagen.mp4.MP4Cover(image.data, kind) - - -class MP3StorageStyle(StorageStyle): - """Store data in ID3 frames. - """ - formats = ['MP3', 'AIFF', 'DSF'] - - def __init__(self, key, id3_lang=None, **kwargs): - """Create a new ID3 storage style. `id3_lang` is the value for - the language field of newly created frames. - """ - self.id3_lang = id3_lang - super(MP3StorageStyle, self).__init__(key, **kwargs) - - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].text[0] - except (KeyError, IndexError): - return None - - def store(self, mutagen_file, value): - frame = mutagen.id3.Frames[self.key](encoding=3, text=[value]) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3PeopleStorageStyle(MP3StorageStyle): - """Store list of people in ID3 frames. - """ - def __init__(self, key, involvement='', **kwargs): - self.involvement = involvement - super(MP3PeopleStorageStyle, self).__init__(key, **kwargs) - - def store(self, mutagen_file, value): - frames = mutagen_file.tags.getall(self.key) - - # Try modifying in place. - found = False - for frame in frames: - if frame.encoding == mutagen.id3.Encoding.UTF8: - for pair in frame.people: - if pair[0].lower() == self.involvement.lower(): - pair[1] = value - found = True - - # Try creating a new frame. - if not found: - frame = mutagen.id3.Frames[self.key]( - encoding=mutagen.id3.Encoding.UTF8, - people=[[self.involvement, value]] - ) - mutagen_file.tags.add(frame) - - def fetch(self, mutagen_file): - for frame in mutagen_file.tags.getall(self.key): - for pair in frame.people: - if pair[0].lower() == self.involvement.lower(): - try: - return pair[1] - except IndexError: - return None - - -class MP3ListStorageStyle(ListStorageStyle, MP3StorageStyle): - """Store lists of data in multiple ID3 frames. - """ - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].text - except KeyError: - return [] - - def store(self, mutagen_file, values): - frame = mutagen.id3.Frames[self.key](encoding=3, text=values) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3UFIDStorageStyle(MP3StorageStyle): - """Store string data in a UFID ID3 frame with a particular owner. - """ - def __init__(self, owner, **kwargs): - self.owner = owner - super(MP3UFIDStorageStyle, self).__init__('UFID:' + owner, **kwargs) - - def fetch(self, mutagen_file): - try: - return mutagen_file[self.key].data - except KeyError: - return None - - def store(self, mutagen_file, value): - # This field type stores text data as encoded data. - assert isinstance(value, six.text_type) - value = value.encode('utf-8') - - frames = mutagen_file.tags.getall(self.key) - for frame in frames: - # Replace existing frame data. - if frame.owner == self.owner: - frame.data = value - else: - # New frame. - frame = mutagen.id3.UFID(owner=self.owner, data=value) - mutagen_file.tags.setall(self.key, [frame]) - - -class MP3DescStorageStyle(MP3StorageStyle): - """Store data in a TXXX (or similar) ID3 frame. The frame is - selected based its ``desc`` field. - """ - def __init__(self, desc=u'', key='TXXX', **kwargs): - assert isinstance(desc, six.text_type) - self.description = desc - super(MP3DescStorageStyle, self).__init__(key=key, **kwargs) - - def store(self, mutagen_file, value): - frames = mutagen_file.tags.getall(self.key) - if self.key != 'USLT': - value = [value] - - # Try modifying in place. - found = False - for frame in frames: - if frame.desc.lower() == self.description.lower(): - frame.text = value - frame.encoding = mutagen.id3.Encoding.UTF8 - found = True - - # Try creating a new frame. - if not found: - frame = mutagen.id3.Frames[self.key]( - desc=self.description, - text=value, - encoding=mutagen.id3.Encoding.UTF8, - ) - if self.id3_lang: - frame.lang = self.id3_lang - mutagen_file.tags.add(frame) - - def fetch(self, mutagen_file): - for frame in mutagen_file.tags.getall(self.key): - if frame.desc.lower() == self.description.lower(): - if self.key == 'USLT': - return frame.text - try: - return frame.text[0] - except IndexError: - return None - - def delete(self, mutagen_file): - found_frame = None - for frame in mutagen_file.tags.getall(self.key): - if frame.desc.lower() == self.description.lower(): - found_frame = frame - break - if found_frame is not None: - del mutagen_file[frame.HashKey] - - -class MP3SlashPackStorageStyle(MP3StorageStyle): - """Store value as part of pair that is serialized as a slash- - separated string. - """ - def __init__(self, key, pack_pos=0, **kwargs): - super(MP3SlashPackStorageStyle, self).__init__(key, **kwargs) - self.pack_pos = pack_pos - - def _fetch_unpacked(self, mutagen_file): - data = self.fetch(mutagen_file) - if data: - items = six.text_type(data).split('/') - else: - items = [] - packing_length = 2 - return list(items) + [None] * (packing_length - len(items)) - - def get(self, mutagen_file): - return self._fetch_unpacked(mutagen_file)[self.pack_pos] - - def set(self, mutagen_file, value): - items = self._fetch_unpacked(mutagen_file) - items[self.pack_pos] = value - if items[0] is None: - items[0] = '' - if items[1] is None: - items.pop() # Do not store last value - self.store(mutagen_file, '/'.join(map(six.text_type, items))) - - def delete(self, mutagen_file): - if self.pack_pos == 0: - super(MP3SlashPackStorageStyle, self).delete(mutagen_file) - else: - self.set(mutagen_file, None) - - -class MP3ImageStorageStyle(ListStorageStyle, MP3StorageStyle): - """Converts between APIC frames and ``Image`` instances. - - The `get_list` method inherited from ``ListStorageStyle`` returns a - list of ``Image``s. Similarly, the `set_list` method accepts a - list of ``Image``s as its ``values`` argument. - """ - def __init__(self): - super(MP3ImageStorageStyle, self).__init__(key='APIC') - self.as_type = bytes - - def deserialize(self, apic_frame): - """Convert APIC frame into Image.""" - return Image(data=apic_frame.data, desc=apic_frame.desc, - type=apic_frame.type) - - def fetch(self, mutagen_file): - return mutagen_file.tags.getall(self.key) - - def store(self, mutagen_file, frames): - mutagen_file.tags.setall(self.key, frames) - - def delete(self, mutagen_file): - mutagen_file.tags.delall(self.key) - - def serialize(self, image): - """Return an APIC frame populated with data from ``image``. - """ - assert isinstance(image, Image) - frame = mutagen.id3.Frames[self.key]() - frame.data = image.data - frame.mime = image.mime_type - frame.desc = image.desc or u'' - - # For compatibility with OS X/iTunes prefer latin-1 if possible. - # See issue #899 - try: - frame.desc.encode("latin-1") - except UnicodeEncodeError: - frame.encoding = mutagen.id3.Encoding.UTF16 - else: - frame.encoding = mutagen.id3.Encoding.LATIN1 - - frame.type = image.type_index - return frame - - -class MP3SoundCheckStorageStyle(SoundCheckStorageStyleMixin, - MP3DescStorageStyle): - def __init__(self, index=0, **kwargs): - super(MP3SoundCheckStorageStyle, self).__init__(**kwargs) - self.index = index - - -class ASFImageStorageStyle(ListStorageStyle): - """Store images packed into Windows Media/ASF byte array attributes. - Values are `Image` objects. - """ - formats = ['ASF'] - - def __init__(self): - super(ASFImageStorageStyle, self).__init__(key='WM/Picture') - - def deserialize(self, asf_picture): - mime, data, type, desc = _unpack_asf_image(asf_picture.value) - return Image(data, desc=desc, type=type) - - def serialize(self, image): - pic = mutagen.asf.ASFByteArrayAttribute() - pic.value = _pack_asf_image(image.mime_type, image.data, - type=image.type_index, - description=image.desc or u'') - return pic - - -class VorbisImageStorageStyle(ListStorageStyle): - """Store images in Vorbis comments. Both legacy COVERART fields and - modern METADATA_BLOCK_PICTURE tags are supported. Data is - base64-encoded. Values are `Image` objects. - """ - formats = ['OggOpus', 'OggTheora', 'OggSpeex', 'OggVorbis', - 'OggFlac'] - - def __init__(self): - super(VorbisImageStorageStyle, self).__init__( - key='metadata_block_picture' - ) - self.as_type = bytes - - def fetch(self, mutagen_file): - images = [] - if 'metadata_block_picture' not in mutagen_file: - # Try legacy COVERART tags. - if 'coverart' in mutagen_file: - for data in mutagen_file['coverart']: - images.append(Image(base64.b64decode(data))) - return images - for data in mutagen_file["metadata_block_picture"]: - try: - pic = mutagen.flac.Picture(base64.b64decode(data)) - except (TypeError, AttributeError): - continue - images.append(Image(data=pic.data, desc=pic.desc, - type=pic.type)) - return images - - def store(self, mutagen_file, image_data): - # Strip all art, including legacy COVERART. - if 'coverart' in mutagen_file: - del mutagen_file['coverart'] - if 'coverartmime' in mutagen_file: - del mutagen_file['coverartmime'] - super(VorbisImageStorageStyle, self).store(mutagen_file, image_data) - - def serialize(self, image): - """Turn a Image into a base64 encoded FLAC picture block. - """ - pic = mutagen.flac.Picture() - pic.data = image.data - pic.type = image.type_index - pic.mime = image.mime_type - pic.desc = image.desc or u'' - - # Encoding with base64 returns bytes on both Python 2 and 3. - # Mutagen requires the data to be a Unicode string, so we decode - # it before passing it along. - return base64.b64encode(pic.write()).decode('ascii') - - -class FlacImageStorageStyle(ListStorageStyle): - """Converts between ``mutagen.flac.Picture`` and ``Image`` instances. - """ - formats = ['FLAC'] - - def __init__(self): - super(FlacImageStorageStyle, self).__init__(key='') - - def fetch(self, mutagen_file): - return mutagen_file.pictures - - def deserialize(self, flac_picture): - return Image(data=flac_picture.data, desc=flac_picture.desc, - type=flac_picture.type) - - def store(self, mutagen_file, pictures): - """``pictures`` is a list of mutagen.flac.Picture instances. - """ - mutagen_file.clear_pictures() - for pic in pictures: - mutagen_file.add_picture(pic) - - def serialize(self, image): - """Turn a Image into a mutagen.flac.Picture. - """ - pic = mutagen.flac.Picture() - pic.data = image.data - pic.type = image.type_index - pic.mime = image.mime_type - pic.desc = image.desc or u'' - return pic - - def delete(self, mutagen_file): - """Remove all images from the file. - """ - mutagen_file.clear_pictures() - - -class APEv2ImageStorageStyle(ListStorageStyle): - """Store images in APEv2 tags. Values are `Image` objects. - """ - formats = ['APEv2File', 'WavPack', 'Musepack', 'MonkeysAudio', 'OptimFROG'] - - TAG_NAMES = { - ImageType.other: 'Cover Art (other)', - ImageType.icon: 'Cover Art (icon)', - ImageType.other_icon: 'Cover Art (other icon)', - ImageType.front: 'Cover Art (front)', - ImageType.back: 'Cover Art (back)', - ImageType.leaflet: 'Cover Art (leaflet)', - ImageType.media: 'Cover Art (media)', - ImageType.lead_artist: 'Cover Art (lead)', - ImageType.artist: 'Cover Art (artist)', - ImageType.conductor: 'Cover Art (conductor)', - ImageType.group: 'Cover Art (band)', - ImageType.composer: 'Cover Art (composer)', - ImageType.lyricist: 'Cover Art (lyricist)', - ImageType.recording_location: 'Cover Art (studio)', - ImageType.recording_session: 'Cover Art (recording)', - ImageType.performance: 'Cover Art (performance)', - ImageType.screen_capture: 'Cover Art (movie scene)', - ImageType.fish: 'Cover Art (colored fish)', - ImageType.illustration: 'Cover Art (illustration)', - ImageType.artist_logo: 'Cover Art (band logo)', - ImageType.publisher_logo: 'Cover Art (publisher logo)', - } - - def __init__(self): - super(APEv2ImageStorageStyle, self).__init__(key='') - - def fetch(self, mutagen_file): - images = [] - for cover_type, cover_tag in self.TAG_NAMES.items(): - try: - frame = mutagen_file[cover_tag] - text_delimiter_index = frame.value.find(b'\x00') - if text_delimiter_index > 0: - comment = frame.value[0:text_delimiter_index] - comment = comment.decode('utf-8', 'replace') - else: - comment = None - image_data = frame.value[text_delimiter_index + 1:] - images.append(Image(data=image_data, type=cover_type, - desc=comment)) - except KeyError: - pass - - return images - - def set_list(self, mutagen_file, values): - self.delete(mutagen_file) - - for image in values: - image_type = image.type or ImageType.other - comment = image.desc or '' - image_data = comment.encode('utf-8') + b'\x00' + image.data - cover_tag = self.TAG_NAMES[image_type] - mutagen_file[cover_tag] = image_data - - def delete(self, mutagen_file): - """Remove all images from the file. - """ - for cover_tag in self.TAG_NAMES.values(): - try: - del mutagen_file[cover_tag] - except KeyError: - pass - - -# MediaField is a descriptor that represents a single logical field. It -# aggregates several StorageStyles describing how to access the data for -# each file type. - -class MediaField(object): - """A descriptor providing access to a particular (abstract) metadata - field. - """ - def __init__(self, *styles, **kwargs): - """Creates a new MediaField. - - :param styles: `StorageStyle` instances that describe the strategy - for reading and writing the field in particular - formats. There must be at least one style for - each possible file format. - - :param out_type: the type of the value that should be returned when - getting this property. - - """ - self.out_type = kwargs.get('out_type', six.text_type) - self._styles = styles - - def styles(self, mutagen_file): - """Yields the list of storage styles of this field that can - handle the MediaFile's format. - """ - for style in self._styles: - if mutagen_file.__class__.__name__ in style.formats: - yield style - - def __get__(self, mediafile, owner=None): - out = None - for style in self.styles(mediafile.mgfile): - out = style.get(mediafile.mgfile) - if out: - break - return _safe_cast(self.out_type, out) - - def __set__(self, mediafile, value): - if value is None: - value = self._none_value() - for style in self.styles(mediafile.mgfile): - style.set(mediafile.mgfile, value) - - def __delete__(self, mediafile): - for style in self.styles(mediafile.mgfile): - style.delete(mediafile.mgfile) - - def _none_value(self): - """Get an appropriate "null" value for this field's type. This - is used internally when setting the field to None. - """ - if self.out_type == int: - return 0 - elif self.out_type == float: - return 0.0 - elif self.out_type == bool: - return False - elif self.out_type == six.text_type: - return u'' - - -class ListMediaField(MediaField): - """Property descriptor that retrieves a list of multiple values from - a tag. - - Uses ``get_list`` and set_list`` methods of its ``StorageStyle`` - strategies to do the actual work. - """ - def __get__(self, mediafile, _): - values = [] - for style in self.styles(mediafile.mgfile): - values.extend(style.get_list(mediafile.mgfile)) - return [_safe_cast(self.out_type, value) for value in values] - - def __set__(self, mediafile, values): - for style in self.styles(mediafile.mgfile): - style.set_list(mediafile.mgfile, values) - - def single_field(self): - """Returns a ``MediaField`` descriptor that gets and sets the - first item. - """ - options = {'out_type': self.out_type} - return MediaField(*self._styles, **options) - - -class DateField(MediaField): - """Descriptor that handles serializing and deserializing dates - - The getter parses value from tags into a ``datetime.date`` instance - and setter serializes such an instance into a string. - - For granular access to year, month, and day, use the ``*_field`` - methods to create corresponding `DateItemField`s. - """ - def __init__(self, *date_styles, **kwargs): - """``date_styles`` is a list of ``StorageStyle``s to store and - retrieve the whole date from. The ``year`` option is an - additional list of fallback styles for the year. The year is - always set on this style, but is only retrieved if the main - storage styles do not return a value. - """ - super(DateField, self).__init__(*date_styles) - year_style = kwargs.get('year', None) - if year_style: - self._year_field = MediaField(*year_style) - - def __get__(self, mediafile, owner=None): - year, month, day = self._get_date_tuple(mediafile) - if not year: - return None - try: - return datetime.date( - year, - month or 1, - day or 1 - ) - except ValueError: # Out of range values. - return None - - def __set__(self, mediafile, date): - if date is None: - self._set_date_tuple(mediafile, None, None, None) - else: - self._set_date_tuple(mediafile, date.year, date.month, date.day) - - def __delete__(self, mediafile): - super(DateField, self).__delete__(mediafile) - if hasattr(self, '_year_field'): - self._year_field.__delete__(mediafile) - - def _get_date_tuple(self, mediafile): - """Get a 3-item sequence representing the date consisting of a - year, month, and day number. Each number is either an integer or - None. - """ - # Get the underlying data and split on hyphens and slashes. - datestring = super(DateField, self).__get__(mediafile, None) - if isinstance(datestring, six.string_types): - datestring = re.sub(r'[Tt ].*$', '', six.text_type(datestring)) - items = re.split('[-/]', six.text_type(datestring)) - else: - items = [] - - # Ensure that we have exactly 3 components, possibly by - # truncating or padding. - items = items[:3] - if len(items) < 3: - items += [None] * (3 - len(items)) - - # Use year field if year is missing. - if not items[0] and hasattr(self, '_year_field'): - items[0] = self._year_field.__get__(mediafile) - - # Convert each component to an integer if possible. - items_ = [] - for item in items: - try: - items_.append(int(item)) - except: - items_.append(None) - return items_ - - def _set_date_tuple(self, mediafile, year, month=None, day=None): - """Set the value of the field given a year, month, and day - number. Each number can be an integer or None to indicate an - unset component. - """ - if year is None: - self.__delete__(mediafile) - return - - date = [u'{0:04d}'.format(int(year))] - if month: - date.append(u'{0:02d}'.format(int(month))) - if month and day: - date.append(u'{0:02d}'.format(int(day))) - date = map(six.text_type, date) - super(DateField, self).__set__(mediafile, u'-'.join(date)) - - if hasattr(self, '_year_field'): - self._year_field.__set__(mediafile, year) - - def year_field(self): - return DateItemField(self, 0) - - def month_field(self): - return DateItemField(self, 1) - - def day_field(self): - return DateItemField(self, 2) - - -class DateItemField(MediaField): - """Descriptor that gets and sets constituent parts of a `DateField`: - the month, day, or year. - """ - def __init__(self, date_field, item_pos): - self.date_field = date_field - self.item_pos = item_pos - - def __get__(self, mediafile, _): - return self.date_field._get_date_tuple(mediafile)[self.item_pos] - - def __set__(self, mediafile, value): - items = self.date_field._get_date_tuple(mediafile) - items[self.item_pos] = value - self.date_field._set_date_tuple(mediafile, *items) - - def __delete__(self, mediafile): - self.__set__(mediafile, None) - - -class CoverArtField(MediaField): - """A descriptor that provides access to the *raw image data* for the - cover image on a file. This is used for backwards compatibility: the - full `ImageListField` provides richer `Image` objects. - - When there are multiple images we try to pick the most likely to be a front - cover. - """ - def __init__(self): - pass - - def __get__(self, mediafile, _): - candidates = mediafile.images - if candidates: - return self.guess_cover_image(candidates).data - else: - return None - - @staticmethod - def guess_cover_image(candidates): - if len(candidates) == 1: - return candidates[0] - try: - return next(c for c in candidates if c.type == ImageType.front) - except StopIteration: - return candidates[0] - - def __set__(self, mediafile, data): - if data: - mediafile.images = [Image(data=data)] - else: - mediafile.images = [] - - def __delete__(self, mediafile): - delattr(mediafile, 'images') - - -class ImageListField(ListMediaField): - """Descriptor to access the list of images embedded in tags. - - The getter returns a list of `Image` instances obtained from - the tags. The setter accepts a list of `Image` instances to be - written to the tags. - """ - def __init__(self): - # The storage styles used here must implement the - # `ListStorageStyle` interface and get and set lists of - # `Image`s. - super(ImageListField, self).__init__( - MP3ImageStorageStyle(), - MP4ImageStorageStyle(), - ASFImageStorageStyle(), - VorbisImageStorageStyle(), - FlacImageStorageStyle(), - APEv2ImageStorageStyle(), - out_type=Image, - ) - - -# MediaFile is a collection of fields. - -class MediaFile(object): - """Represents a multimedia file on disk and provides access to its - metadata. - """ - def __init__(self, path, id3v23=False): - """Constructs a new `MediaFile` reflecting the file at path. May - throw `UnreadableFileError`. - - By default, MP3 files are saved with ID3v2.4 tags. You can use - the older ID3v2.3 standard by specifying the `id3v23` option. - """ - self.path = path - - self.mgfile = mutagen_call('open', path, mutagen.File, path) - - if self.mgfile is None: - # Mutagen couldn't guess the type - raise FileTypeError(path) - elif (type(self.mgfile).__name__ == 'M4A' or - type(self.mgfile).__name__ == 'MP4'): - info = self.mgfile.info - if info.codec and info.codec.startswith('alac'): - self.type = 'alac' - else: - self.type = 'aac' - elif (type(self.mgfile).__name__ == 'ID3' or - type(self.mgfile).__name__ == 'MP3'): - self.type = 'mp3' - elif type(self.mgfile).__name__ == 'FLAC': - self.type = 'flac' - elif type(self.mgfile).__name__ == 'OggOpus': - self.type = 'opus' - elif type(self.mgfile).__name__ == 'OggVorbis': - self.type = 'ogg' - elif type(self.mgfile).__name__ == 'MonkeysAudio': - self.type = 'ape' - elif type(self.mgfile).__name__ == 'WavPack': - self.type = 'wv' - elif type(self.mgfile).__name__ == 'Musepack': - self.type = 'mpc' - elif type(self.mgfile).__name__ == 'ASF': - self.type = 'asf' - elif type(self.mgfile).__name__ == 'AIFF': - self.type = 'aiff' - elif type(self.mgfile).__name__ == 'DSF': - self.type = 'dsf' - else: - raise FileTypeError(path, type(self.mgfile).__name__) - - # Add a set of tags if it's missing. - if self.mgfile.tags is None: - self.mgfile.add_tags() - - # Set the ID3v2.3 flag only for MP3s. - self.id3v23 = id3v23 and self.type == 'mp3' - - def save(self): - """Write the object's tags back to the file. May - throw `UnreadableFileError`. - """ - # Possibly save the tags to ID3v2.3. - kwargs = {} - if self.id3v23: - id3 = self.mgfile - if hasattr(id3, 'tags'): - # In case this is an MP3 object, not an ID3 object. - id3 = id3.tags - id3.update_to_v23() - kwargs['v2_version'] = 3 - - mutagen_call('save', self.path, self.mgfile.save, **kwargs) - - def delete(self): - """Remove the current metadata tag from the file. May - throw `UnreadableFileError`. - """ - mutagen_call('delete', self.path, self.mgfile.delete) - - # Convenient access to the set of available fields. - - @classmethod - def fields(cls): - """Get the names of all writable properties that reflect - metadata tags (i.e., those that are instances of - :class:`MediaField`). - """ - for property, descriptor in cls.__dict__.items(): - if isinstance(descriptor, MediaField): - if isinstance(property, bytes): - # On Python 2, class field names are bytes. This method - # produces text strings. - yield property.decode('utf8', 'ignore') - else: - yield property - - @classmethod - def _field_sort_name(cls, name): - """Get a sort key for a field name that determines the order - fields should be written in. - - Fields names are kept unchanged, unless they are instances of - :class:`DateItemField`, in which case `year`, `month`, and `day` - are replaced by `date0`, `date1`, and `date2`, respectively, to - make them appear in that order. - """ - if isinstance(cls.__dict__[name], DateItemField): - name = re.sub('year', 'date0', name) - name = re.sub('month', 'date1', name) - name = re.sub('day', 'date2', name) - return name - - @classmethod - def sorted_fields(cls): - """Get the names of all writable metadata fields, sorted in the - order that they should be written. - - This is a lexicographic order, except for instances of - :class:`DateItemField`, which are sorted in year-month-day - order. - """ - for property in sorted(cls.fields(), key=cls._field_sort_name): - yield property - - @classmethod - def readable_fields(cls): - """Get all metadata fields: the writable ones from - :meth:`fields` and also other audio properties. - """ - for property in cls.fields(): - yield property - for property in ('length', 'samplerate', 'bitdepth', 'bitrate', - 'channels', 'format'): - yield property - - @classmethod - def add_field(cls, name, descriptor): - """Add a field to store custom tags. - - :param name: the name of the property the field is accessed - through. It must not already exist on this class. - - :param descriptor: an instance of :class:`MediaField`. - """ - if not isinstance(descriptor, MediaField): - raise ValueError( - u'{0} must be an instance of MediaField'.format(descriptor)) - if name in cls.__dict__: - raise ValueError( - u'property "{0}" already exists on MediaField'.format(name)) - setattr(cls, name, descriptor) - - def update(self, dict): - """Set all field values from a dictionary. - - For any key in `dict` that is also a field to store tags the - method retrieves the corresponding value from `dict` and updates - the `MediaFile`. If a key has the value `None`, the - corresponding property is deleted from the `MediaFile`. - """ - for field in self.sorted_fields(): - if field in dict: - if dict[field] is None: - delattr(self, field) - else: - setattr(self, field, dict[field]) - - # Field definitions. - - title = MediaField( - MP3StorageStyle('TIT2'), - MP4StorageStyle('\xa9nam'), - StorageStyle('TITLE'), - ASFStorageStyle('Title'), - ) - artist = MediaField( - MP3StorageStyle('TPE1'), - MP4StorageStyle('\xa9ART'), - StorageStyle('ARTIST'), - ASFStorageStyle('Author'), - ) - album = MediaField( - MP3StorageStyle('TALB'), - MP4StorageStyle('\xa9alb'), - StorageStyle('ALBUM'), - ASFStorageStyle('WM/AlbumTitle'), - ) - genres = ListMediaField( - MP3ListStorageStyle('TCON'), - MP4ListStorageStyle('\xa9gen'), - ListStorageStyle('GENRE'), - ASFStorageStyle('WM/Genre'), - ) - genre = genres.single_field() - - lyricist = MediaField( - MP3StorageStyle('TEXT'), - MP4StorageStyle('----:com.apple.iTunes:LYRICIST'), - StorageStyle('LYRICIST'), - ASFStorageStyle('WM/Writer'), - ) - composer = MediaField( - MP3StorageStyle('TCOM'), - MP4StorageStyle('\xa9wrt'), - StorageStyle('COMPOSER'), - ASFStorageStyle('WM/Composer'), - ) - arranger = MediaField( - MP3PeopleStorageStyle('TIPL', involvement='arranger'), - MP4StorageStyle('----:com.apple.iTunes:Arranger'), - StorageStyle('ARRANGER'), - ASFStorageStyle('beets/Arranger'), - ) - - grouping = MediaField( - MP3StorageStyle('TIT1'), - MP4StorageStyle('\xa9grp'), - StorageStyle('GROUPING'), - ASFStorageStyle('WM/ContentGroupDescription'), - ) - track = MediaField( - MP3SlashPackStorageStyle('TRCK', pack_pos=0), - MP4TupleStorageStyle('trkn', index=0), - StorageStyle('TRACK'), - StorageStyle('TRACKNUMBER'), - ASFStorageStyle('WM/TrackNumber'), - out_type=int, - ) - tracktotal = MediaField( - MP3SlashPackStorageStyle('TRCK', pack_pos=1), - MP4TupleStorageStyle('trkn', index=1), - StorageStyle('TRACKTOTAL'), - StorageStyle('TRACKC'), - StorageStyle('TOTALTRACKS'), - ASFStorageStyle('TotalTracks'), - out_type=int, - ) - disc = MediaField( - MP3SlashPackStorageStyle('TPOS', pack_pos=0), - MP4TupleStorageStyle('disk', index=0), - StorageStyle('DISC'), - StorageStyle('DISCNUMBER'), - ASFStorageStyle('WM/PartOfSet'), - out_type=int, - ) - disctotal = MediaField( - MP3SlashPackStorageStyle('TPOS', pack_pos=1), - MP4TupleStorageStyle('disk', index=1), - StorageStyle('DISCTOTAL'), - StorageStyle('DISCC'), - StorageStyle('TOTALDISCS'), - ASFStorageStyle('TotalDiscs'), - out_type=int, - ) - lyrics = MediaField( - MP3DescStorageStyle(key='USLT'), - MP4StorageStyle('\xa9lyr'), - StorageStyle('LYRICS'), - ASFStorageStyle('WM/Lyrics'), - ) - comments = MediaField( - MP3DescStorageStyle(key='COMM'), - MP4StorageStyle('\xa9cmt'), - StorageStyle('DESCRIPTION'), - StorageStyle('COMMENT'), - ASFStorageStyle('WM/Comments'), - ASFStorageStyle('Description') - ) - bpm = MediaField( - MP3StorageStyle('TBPM'), - MP4StorageStyle('tmpo', as_type=int), - StorageStyle('BPM'), - ASFStorageStyle('WM/BeatsPerMinute'), - out_type=int, - ) - comp = MediaField( - MP3StorageStyle('TCMP'), - MP4BoolStorageStyle('cpil'), - StorageStyle('COMPILATION'), - ASFStorageStyle('WM/IsCompilation', as_type=bool), - out_type=bool, - ) - albumartist = MediaField( - MP3StorageStyle('TPE2'), - MP4StorageStyle('aART'), - StorageStyle('ALBUM ARTIST'), - StorageStyle('ALBUMARTIST'), - ASFStorageStyle('WM/AlbumArtist'), - ) - albumtype = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Type'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Type'), - StorageStyle('MUSICBRAINZ_ALBUMTYPE'), - ASFStorageStyle('MusicBrainz/Album Type'), - ) - label = MediaField( - MP3StorageStyle('TPUB'), - MP4StorageStyle('----:com.apple.iTunes:Label'), - MP4StorageStyle('----:com.apple.iTunes:publisher'), - StorageStyle('LABEL'), - StorageStyle('PUBLISHER'), # Traktor - ASFStorageStyle('WM/Publisher'), - ) - artist_sort = MediaField( - MP3StorageStyle('TSOP'), - MP4StorageStyle('soar'), - StorageStyle('ARTISTSORT'), - ASFStorageStyle('WM/ArtistSortOrder'), - ) - albumartist_sort = MediaField( - MP3DescStorageStyle(u'ALBUMARTISTSORT'), - MP4StorageStyle('soaa'), - StorageStyle('ALBUMARTISTSORT'), - ASFStorageStyle('WM/AlbumArtistSortOrder'), - ) - asin = MediaField( - MP3DescStorageStyle(u'ASIN'), - MP4StorageStyle('----:com.apple.iTunes:ASIN'), - StorageStyle('ASIN'), - ASFStorageStyle('MusicBrainz/ASIN'), - ) - catalognum = MediaField( - MP3DescStorageStyle(u'CATALOGNUMBER'), - MP4StorageStyle('----:com.apple.iTunes:CATALOGNUMBER'), - StorageStyle('CATALOGNUMBER'), - ASFStorageStyle('WM/CatalogNo'), - ) - disctitle = MediaField( - MP3StorageStyle('TSST'), - MP4StorageStyle('----:com.apple.iTunes:DISCSUBTITLE'), - StorageStyle('DISCSUBTITLE'), - ASFStorageStyle('WM/SetSubTitle'), - ) - encoder = MediaField( - MP3StorageStyle('TENC'), - MP4StorageStyle('\xa9too'), - StorageStyle('ENCODEDBY'), - StorageStyle('ENCODER'), - ASFStorageStyle('WM/EncodedBy'), - ) - script = MediaField( - MP3DescStorageStyle(u'Script'), - MP4StorageStyle('----:com.apple.iTunes:SCRIPT'), - StorageStyle('SCRIPT'), - ASFStorageStyle('WM/Script'), - ) - language = MediaField( - MP3StorageStyle('TLAN'), - MP4StorageStyle('----:com.apple.iTunes:LANGUAGE'), - StorageStyle('LANGUAGE'), - ASFStorageStyle('WM/Language'), - ) - country = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Release Country'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz ' - 'Album Release Country'), - StorageStyle('RELEASECOUNTRY'), - ASFStorageStyle('MusicBrainz/Album Release Country'), - ) - albumstatus = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Status'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Status'), - StorageStyle('MUSICBRAINZ_ALBUMSTATUS'), - ASFStorageStyle('MusicBrainz/Album Status'), - ) - media = MediaField( - MP3StorageStyle('TMED'), - MP4StorageStyle('----:com.apple.iTunes:MEDIA'), - StorageStyle('MEDIA'), - ASFStorageStyle('WM/Media'), - ) - albumdisambig = MediaField( - # This tag mapping was invented for beets (not used by Picard, etc). - MP3DescStorageStyle(u'MusicBrainz Album Comment'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Comment'), - StorageStyle('MUSICBRAINZ_ALBUMCOMMENT'), - ASFStorageStyle('MusicBrainz/Album Comment'), - ) - - # Release date. - date = DateField( - MP3StorageStyle('TDRC'), - MP4StorageStyle('\xa9day'), - StorageStyle('DATE'), - ASFStorageStyle('WM/Year'), - year=(StorageStyle('YEAR'),)) - - year = date.year_field() - month = date.month_field() - day = date.day_field() - - # *Original* release date. - original_date = DateField( - MP3StorageStyle('TDOR'), - MP4StorageStyle('----:com.apple.iTunes:ORIGINAL YEAR'), - StorageStyle('ORIGINALDATE'), - ASFStorageStyle('WM/OriginalReleaseYear')) - - original_year = original_date.year_field() - original_month = original_date.month_field() - original_day = original_date.day_field() - - # Nonstandard metadata. - artist_credit = MediaField( - MP3DescStorageStyle(u'Artist Credit'), - MP4StorageStyle('----:com.apple.iTunes:Artist Credit'), - StorageStyle('ARTIST_CREDIT'), - ASFStorageStyle('beets/Artist Credit'), - ) - albumartist_credit = MediaField( - MP3DescStorageStyle(u'Album Artist Credit'), - MP4StorageStyle('----:com.apple.iTunes:Album Artist Credit'), - StorageStyle('ALBUMARTIST_CREDIT'), - ASFStorageStyle('beets/Album Artist Credit'), - ) - - # Legacy album art field - art = CoverArtField() - - # Image list - images = ImageListField() - - # MusicBrainz IDs. - mb_trackid = MediaField( - MP3UFIDStorageStyle(owner='http://musicbrainz.org'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Track Id'), - StorageStyle('MUSICBRAINZ_TRACKID'), - ASFStorageStyle('MusicBrainz/Track Id'), - ) - mb_albumid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Id'), - StorageStyle('MUSICBRAINZ_ALBUMID'), - ASFStorageStyle('MusicBrainz/Album Id'), - ) - mb_artistid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Artist Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Artist Id'), - StorageStyle('MUSICBRAINZ_ARTISTID'), - ASFStorageStyle('MusicBrainz/Artist Id'), - ) - mb_albumartistid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Album Artist Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Artist Id'), - StorageStyle('MUSICBRAINZ_ALBUMARTISTID'), - ASFStorageStyle('MusicBrainz/Album Artist Id'), - ) - mb_releasegroupid = MediaField( - MP3DescStorageStyle(u'MusicBrainz Release Group Id'), - MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Release Group Id'), - StorageStyle('MUSICBRAINZ_RELEASEGROUPID'), - ASFStorageStyle('MusicBrainz/Release Group Id'), - ) - - # Acoustid fields. - acoustid_fingerprint = MediaField( - MP3DescStorageStyle(u'Acoustid Fingerprint'), - MP4StorageStyle('----:com.apple.iTunes:Acoustid Fingerprint'), - StorageStyle('ACOUSTID_FINGERPRINT'), - ASFStorageStyle('Acoustid/Fingerprint'), - ) - acoustid_id = MediaField( - MP3DescStorageStyle(u'Acoustid Id'), - MP4StorageStyle('----:com.apple.iTunes:Acoustid Id'), - StorageStyle('ACOUSTID_ID'), - ASFStorageStyle('Acoustid/Id'), - ) - - # ReplayGain fields. - rg_track_gain = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_TRACK_GAIN', - float_places=2, suffix=u' dB' - ), - MP3DescStorageStyle( - u'replaygain_track_gain', - float_places=2, suffix=u' dB' - ), - MP3SoundCheckStorageStyle( - key='COMM', - index=0, desc=u'iTunNORM', - id3_lang='eng' - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_track_gain', - float_places=2, suffix=' dB' - ), - MP4SoundCheckStorageStyle( - '----:com.apple.iTunes:iTunNORM', - index=0 - ), - StorageStyle( - u'REPLAYGAIN_TRACK_GAIN', - float_places=2, suffix=u' dB' - ), - ASFStorageStyle( - u'replaygain_track_gain', - float_places=2, suffix=u' dB' - ), - out_type=float - ) - rg_album_gain = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_ALBUM_GAIN', - float_places=2, suffix=u' dB' - ), - MP3DescStorageStyle( - u'replaygain_album_gain', - float_places=2, suffix=u' dB' - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_album_gain', - float_places=2, suffix=' dB' - ), - StorageStyle( - u'REPLAYGAIN_ALBUM_GAIN', - float_places=2, suffix=u' dB' - ), - ASFStorageStyle( - u'replaygain_album_gain', - float_places=2, suffix=u' dB' - ), - out_type=float - ) - rg_track_peak = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_TRACK_PEAK', - float_places=6 - ), - MP3DescStorageStyle( - u'replaygain_track_peak', - float_places=6 - ), - MP3SoundCheckStorageStyle( - key=u'COMM', - index=1, desc=u'iTunNORM', - id3_lang='eng' - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_track_peak', - float_places=6 - ), - MP4SoundCheckStorageStyle( - '----:com.apple.iTunes:iTunNORM', - index=1 - ), - StorageStyle(u'REPLAYGAIN_TRACK_PEAK', float_places=6), - ASFStorageStyle(u'replaygain_track_peak', float_places=6), - out_type=float, - ) - rg_album_peak = MediaField( - MP3DescStorageStyle( - u'REPLAYGAIN_ALBUM_PEAK', - float_places=6 - ), - MP3DescStorageStyle( - u'replaygain_album_peak', - float_places=6 - ), - MP4StorageStyle( - '----:com.apple.iTunes:replaygain_album_peak', - float_places=6 - ), - StorageStyle(u'REPLAYGAIN_ALBUM_PEAK', float_places=6), - ASFStorageStyle(u'replaygain_album_peak', float_places=6), - out_type=float, - ) - - initial_key = MediaField( - MP3StorageStyle('TKEY'), - MP4StorageStyle('----:com.apple.iTunes:initialkey'), - StorageStyle('INITIALKEY'), - ASFStorageStyle('INITIALKEY'), - ) - - @property - def length(self): - """The duration of the audio in seconds (a float).""" - return self.mgfile.info.length - - @property - def samplerate(self): - """The audio's sample rate (an int).""" - if hasattr(self.mgfile.info, 'sample_rate'): - return self.mgfile.info.sample_rate - elif self.type == 'opus': - # Opus is always 48kHz internally. - return 48000 - return 0 - - @property - def bitdepth(self): - """The number of bits per sample in the audio encoding (an int). - Only available for certain file formats (zero where - unavailable). - """ - if hasattr(self.mgfile.info, 'bits_per_sample'): - return self.mgfile.info.bits_per_sample - return 0 - - @property - def channels(self): - """The number of channels in the audio (an int).""" - if hasattr(self.mgfile.info, 'channels'): - return self.mgfile.info.channels - return 0 - - @property - def bitrate(self): - """The number of bits per seconds used in the audio coding (an - int). If this is provided explicitly by the compressed file - format, this is a precise reflection of the encoding. Otherwise, - it is estimated from the on-disk file size. In this case, some - imprecision is possible because the file header is incorporated - in the file size. - """ - if hasattr(self.mgfile.info, 'bitrate') and self.mgfile.info.bitrate: - # Many formats provide it explicitly. - return self.mgfile.info.bitrate - else: - # Otherwise, we calculate bitrate from the file size. (This - # is the case for all of the lossless formats.) - if not self.length: - # Avoid division by zero if length is not available. - return 0 - size = os.path.getsize(self.path) - return int(size * 8 / self.length) - - @property - def format(self): - """A string describing the file format/codec.""" - return TYPES[self.type] +del key, value, warnings, mediafile diff --git a/lib/beets/plugins.py b/lib/beets/plugins.py old mode 100755 new mode 100644 index 2ecdb847..ed1f82d8 --- a/lib/beets/plugins.py +++ b/lib/beets/plugins.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,19 +14,19 @@ """Support for beets plugins.""" -from __future__ import division, absolute_import, print_function -import inspect import traceback import re +import inspect +import abc from collections import defaultdict from functools import wraps import beets from beets import logging -from beets import mediafile -import six +import mediafile + PLUGIN_NAMESPACE = 'beetsplug' @@ -50,26 +49,28 @@ class PluginLogFilter(logging.Filter): """A logging filter that identifies the plugin that emitted a log message. """ + def __init__(self, plugin): - self.prefix = u'{0}: '.format(plugin.name) + self.prefix = f'{plugin.name}: ' def filter(self, record): if hasattr(record.msg, 'msg') and isinstance(record.msg.msg, - six.string_types): + str): # A _LogMessage from our hacked-up Logging replacement. record.msg.msg = self.prefix + record.msg.msg - elif isinstance(record.msg, six.string_types): + elif isinstance(record.msg, str): record.msg = self.prefix + record.msg return True # Managing the plugins themselves. -class BeetsPlugin(object): +class BeetsPlugin: """The base class for all beets plugins. Plugins provide functionality by defining a subclass of BeetsPlugin and overriding the abstract methods defined here. """ + def __init__(self, name=None): """Perform one-time plugin setup. """ @@ -81,6 +82,7 @@ class BeetsPlugin(object): self.template_fields = {} if not self.album_template_fields: self.album_template_fields = {} + self.early_import_stages = [] self.import_stages = [] self._log = log.getChild(self.name) @@ -94,6 +96,22 @@ class BeetsPlugin(object): """ return () + def _set_stage_log_level(self, stages): + """Adjust all the stages in `stages` to WARNING logging level. + """ + return [self._set_log_level_and_params(logging.WARNING, stage) + for stage in stages] + + def get_early_import_stages(self): + """Return a list of functions that should be called as importer + pipelines stages early in the pipeline. + + The callables are wrapped versions of the functions in + `self.early_import_stages`. Wrapping provides some bookkeeping for the + plugin: specifically, the logging level is adjusted to WARNING. + """ + return self._set_stage_log_level(self.early_import_stages) + def get_import_stages(self): """Return a list of functions that should be called as importer pipelines stages. @@ -102,8 +120,7 @@ class BeetsPlugin(object): `self.import_stages`. Wrapping provides some bookkeeping for the plugin: specifically, the logging level is adjusted to WARNING. """ - return [self._set_log_level_and_params(logging.WARNING, import_stage) - for import_stage in self.import_stages] + return self._set_stage_log_level(self.import_stages) def _set_log_level_and_params(self, base_log_level, func): """Wrap `func` to temporarily set this plugin's logger level to @@ -111,27 +128,24 @@ class BeetsPlugin(object): value after the function returns). Also determines which params may not be sent for backwards-compatibility. """ - argspec = inspect.getargspec(func) + argspec = inspect.getfullargspec(func) @wraps(func) def wrapper(*args, **kwargs): assert self._log.level == logging.NOTSET + verbosity = beets.config['verbose'].get(int) log_level = max(logging.DEBUG, base_log_level - 10 * verbosity) self._log.setLevel(log_level) + if argspec.varkw is None: + kwargs = {k: v for k, v in kwargs.items() + if k in argspec.args} + try: - try: - return func(*args, **kwargs) - except TypeError as exc: - if exc.args[0].startswith(func.__name__): - # caused by 'func' and not stuff internal to 'func' - kwargs = dict((arg, val) for arg, val in kwargs.items() - if arg in argspec.args) - return func(*args, **kwargs) - else: - raise + return func(*args, **kwargs) finally: self._log.setLevel(logging.NOTSET) + return wrapper def queries(self): @@ -151,7 +165,7 @@ class BeetsPlugin(object): """ return beets.autotag.hooks.Distance() - def candidates(self, items, artist, album, va_likely): + def candidates(self, items, artist, album, va_likely, extra_tags=None): """Should return a sequence of AlbumInfo objects that match the album whose items are provided. """ @@ -185,7 +199,7 @@ class BeetsPlugin(object): ``descriptor`` must be an instance of ``mediafile.MediaField``. """ - # Defer impor to prevent circular dependency + # Defer import to prevent circular dependency from beets import library mediafile.MediaFile.add_field(name, descriptor) library.Item._media_fields.add(name) @@ -248,14 +262,14 @@ def load_plugins(names=()): BeetsPlugin subclasses desired. """ for name in names: - modname = '{0}.{1}'.format(PLUGIN_NAMESPACE, name) + modname = f'{PLUGIN_NAMESPACE}.{name}' try: try: namespace = __import__(modname, None, None) except ImportError as exc: # Again, this is hacky: if exc.args[0].endswith(' ' + name): - log.warning(u'** plugin {0} not found', name) + log.warning('** plugin {0} not found', name) else: raise else: @@ -264,9 +278,9 @@ def load_plugins(names=()): and obj != BeetsPlugin and obj not in _classes: _classes.add(obj) - except: + except Exception: log.warning( - u'** error loading plugin {}:\n{}', + '** error loading plugin {}:\n{}', name, traceback.format_exc(), ) @@ -280,6 +294,11 @@ def find_plugins(): currently loaded beets plugins. Loads the default plugin set first. """ + if _instances: + # After the first call, use cached instances for performance reasons. + # See https://github.com/beetbox/beets/pull/3810 + return list(_instances.values()) + load_plugins() plugins = [] for cls in _classes: @@ -313,21 +332,31 @@ def queries(): def types(model_cls): # Gives us `item_types` and `album_types` - attr_name = '{0}_types'.format(model_cls.__name__.lower()) + attr_name = f'{model_cls.__name__.lower()}_types' types = {} for plugin in find_plugins(): plugin_types = getattr(plugin, attr_name, {}) for field in plugin_types: if field in types and plugin_types[field] != types[field]: raise PluginConflictException( - u'Plugin {0} defines flexible field {1} ' - u'which has already been defined with ' - u'another type.'.format(plugin.name, field) + 'Plugin {} defines flexible field {} ' + 'which has already been defined with ' + 'another type.'.format(plugin.name, field) ) types.update(plugin_types) return types +def named_queries(model_cls): + # Gather `item_queries` and `album_queries` from the plugins. + attr_name = f'{model_cls.__name__.lower()}_queries' + queries = {} + for plugin in find_plugins(): + plugin_queries = getattr(plugin, attr_name, {}) + queries.update(plugin_queries) + return queries + + def track_distance(item, info): """Gets the track distance calculated by all loaded plugins. Returns a Distance object. @@ -348,20 +377,19 @@ def album_distance(items, album_info, mapping): return dist -def candidates(items, artist, album, va_likely): +def candidates(items, artist, album, va_likely, extra_tags=None): """Gets MusicBrainz candidates for an album from each plugin. """ for plugin in find_plugins(): - for candidate in plugin.candidates(items, artist, album, va_likely): - yield candidate + yield from plugin.candidates(items, artist, album, va_likely, + extra_tags) def item_candidates(item, artist, title): """Gets MusicBrainz candidates for an item from the plugins. """ for plugin in find_plugins(): - for item_candidate in plugin.item_candidates(item, artist, title): - yield item_candidate + yield from plugin.item_candidates(item, artist, title) def album_for_id(album_id): @@ -393,6 +421,14 @@ def template_funcs(): return funcs +def early_import_stages(): + """Get a list of early import stage functions defined by plugins.""" + stages = [] + for plugin in find_plugins(): + stages += plugin.get_early_import_stages() + return stages + + def import_stages(): """Get a list of import stage functions defined by plugins.""" stages = [] @@ -446,7 +482,7 @@ def send(event, **arguments): Return a list of non-None values returned from the handlers. """ - log.debug(u'Sending event: {0}', event) + log.debug('Sending event: {0}', event) results = [] for handler in event_handlers()[event]: result = handler(**arguments) @@ -464,7 +500,7 @@ def feat_tokens(for_artist=True): feat_words = ['ft', 'featuring', 'feat', 'feat.', 'ft.'] if for_artist: feat_words += ['with', 'vs', 'and', 'con', '&'] - return '(?<=\s)(?:{0})(?=\s)'.format( + return r'(?<=\s)(?:{})(?=\s)'.format( '|'.join(re.escape(x) for x in feat_words) ) @@ -478,9 +514,50 @@ def sanitize_choices(choices, choices_all): others = [x for x in choices_all if x not in choices] res = [] for s in choices: - if s in list(choices_all) + ['*']: - if not (s in seen or seen.add(s)): - res.extend(list(others) if s == '*' else [s]) + if s not in seen: + if s in list(choices_all): + res.append(s) + elif s == '*': + res.extend(others) + seen.add(s) + return res + + +def sanitize_pairs(pairs, pairs_all): + """Clean up a single-element mapping configuration attribute as returned + by Confuse's `Pairs` template: keep only two-element tuples present in + pairs_all, remove duplicate elements, expand ('str', '*') and ('*', '*') + wildcards while keeping the original order. Note that ('*', '*') and + ('*', 'whatever') have the same effect. + + For example, + + >>> sanitize_pairs( + ... [('foo', 'baz bar'), ('key', '*'), ('*', '*')], + ... [('foo', 'bar'), ('foo', 'baz'), ('foo', 'foobar'), + ... ('key', 'value')] + ... ) + [('foo', 'baz'), ('foo', 'bar'), ('key', 'value'), ('foo', 'foobar')] + """ + pairs_all = list(pairs_all) + seen = set() + others = [x for x in pairs_all if x not in pairs] + res = [] + for k, values in pairs: + for v in values.split(): + x = (k, v) + if x in pairs_all: + if x not in seen: + seen.add(x) + res.append(x) + elif k == '*': + new = [o for o in others if o not in seen] + seen.update(new) + res.extend(new) + elif v == '*': + new = [o for o in others if o not in seen and o[0] == k] + seen.update(new) + res.extend(new) return res @@ -498,3 +575,188 @@ def notify_info_yielded(event): yield v return decorated return decorator + + +def get_distance(config, data_source, info): + """Returns the ``data_source`` weight and the maximum source weight + for albums or individual tracks. + """ + dist = beets.autotag.Distance() + if info.data_source == data_source: + dist.add('source', config['source_weight'].as_number()) + return dist + + +def apply_item_changes(lib, item, move, pretend, write): + """Store, move, and write the item according to the arguments. + + :param lib: beets library. + :type lib: beets.library.Library + :param item: Item whose changes to apply. + :type item: beets.library.Item + :param move: Move the item if it's in the library. + :type move: bool + :param pretend: Return without moving, writing, or storing the item's + metadata. + :type pretend: bool + :param write: Write the item's metadata to its media file. + :type write: bool + """ + if pretend: + return + + from beets import util + + # Move the item if it's in the library. + if move and lib.directory in util.ancestry(item.path): + item.move(with_album=False) + + if write: + item.try_write() + + item.store() + + +class MetadataSourcePlugin(metaclass=abc.ABCMeta): + def __init__(self): + super().__init__() + self.config.add({'source_weight': 0.5}) + + @abc.abstractproperty + def id_regex(self): + raise NotImplementedError + + @abc.abstractproperty + def data_source(self): + raise NotImplementedError + + @abc.abstractproperty + def search_url(self): + raise NotImplementedError + + @abc.abstractproperty + def album_url(self): + raise NotImplementedError + + @abc.abstractproperty + def track_url(self): + raise NotImplementedError + + @abc.abstractmethod + def _search_api(self, query_type, filters, keywords=''): + raise NotImplementedError + + @abc.abstractmethod + def album_for_id(self, album_id): + raise NotImplementedError + + @abc.abstractmethod + def track_for_id(self, track_id=None, track_data=None): + raise NotImplementedError + + @staticmethod + def get_artist(artists, id_key='id', name_key='name'): + """Returns an artist string (all artists) and an artist_id (the main + artist) for a list of artist object dicts. + + For each artist, this function moves articles (such as 'a', 'an', + and 'the') to the front and strips trailing disambiguation numbers. It + returns a tuple containing the comma-separated string of all + normalized artists and the ``id`` of the main/first artist. + + :param artists: Iterable of artist dicts or lists returned by API. + :type artists: list[dict] or list[list] + :param id_key: Key or index corresponding to the value of ``id`` for + the main/first artist. Defaults to 'id'. + :type id_key: str or int + :param name_key: Key or index corresponding to values of names + to concatenate for the artist string (containing all artists). + Defaults to 'name'. + :type name_key: str or int + :return: Normalized artist string. + :rtype: str + """ + artist_id = None + artist_names = [] + for artist in artists: + if not artist_id: + artist_id = artist[id_key] + name = artist[name_key] + # Strip disambiguation number. + name = re.sub(r' \(\d+\)$', '', name) + # Move articles to the front. + name = re.sub(r'^(.*?), (a|an|the)$', r'\2 \1', name, flags=re.I) + artist_names.append(name) + artist = ', '.join(artist_names).replace(' ,', ',') or None + return artist, artist_id + + def _get_id(self, url_type, id_): + """Parse an ID from its URL if necessary. + + :param url_type: Type of URL. Either 'album' or 'track'. + :type url_type: str + :param id_: Album/track ID or URL. + :type id_: str + :return: Album/track ID. + :rtype: str + """ + self._log.debug( + "Searching {} for {} '{}'", self.data_source, url_type, id_ + ) + match = re.search(self.id_regex['pattern'].format(url_type), str(id_)) + if match: + id_ = match.group(self.id_regex['match_group']) + if id_: + return id_ + return None + + def candidates(self, items, artist, album, va_likely, extra_tags=None): + """Returns a list of AlbumInfo objects for Search API results + matching an ``album`` and ``artist`` (if not various). + + :param items: List of items comprised by an album to be matched. + :type items: list[beets.library.Item] + :param artist: The artist of the album to be matched. + :type artist: str + :param album: The name of the album to be matched. + :type album: str + :param va_likely: True if the album to be matched likely has + Various Artists. + :type va_likely: bool + :return: Candidate AlbumInfo objects. + :rtype: list[beets.autotag.hooks.AlbumInfo] + """ + query_filters = {'album': album} + if not va_likely: + query_filters['artist'] = artist + results = self._search_api(query_type='album', filters=query_filters) + albums = [self.album_for_id(album_id=r['id']) for r in results] + return [a for a in albums if a is not None] + + def item_candidates(self, item, artist, title): + """Returns a list of TrackInfo objects for Search API results + matching ``title`` and ``artist``. + + :param item: Singleton item to be matched. + :type item: beets.library.Item + :param artist: The artist of the track to be matched. + :type artist: str + :param title: The title of the track to be matched. + :type title: str + :return: Candidate TrackInfo objects. + :rtype: list[beets.autotag.hooks.TrackInfo] + """ + tracks = self._search_api( + query_type='track', keywords=title, filters={'artist': artist} + ) + return [self.track_for_id(track_data=track) for track in tracks] + + def album_distance(self, items, album_info, mapping): + return get_distance( + data_source=self.data_source, info=album_info, config=self.config + ) + + def track_distance(self, item, track_info): + return get_distance( + data_source=self.data_source, info=track_info, config=self.config + ) diff --git a/lib/beets/random.py b/lib/beets/random.py new file mode 100644 index 00000000..eb4f55af --- /dev/null +++ b/lib/beets/random.py @@ -0,0 +1,113 @@ +# This file is part of beets. +# Copyright 2016, Philippe Mongeau. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Get a random song or album from the library. +""" + +import random +from operator import attrgetter +from itertools import groupby + + +def _length(obj, album): + """Get the duration of an item or album. + """ + if album: + return sum(i.length for i in obj.items()) + else: + return obj.length + + +def _equal_chance_permutation(objs, field='albumartist', random_gen=None): + """Generate (lazily) a permutation of the objects where every group + with equal values for `field` have an equal chance of appearing in + any given position. + """ + rand = random_gen or random + + # Group the objects by artist so we can sample from them. + key = attrgetter(field) + objs.sort(key=key) + objs_by_artists = {} + for artist, v in groupby(objs, key): + objs_by_artists[artist] = list(v) + + # While we still have artists with music to choose from, pick one + # randomly and pick a track from that artist. + while objs_by_artists: + # Choose an artist and an object for that artist, removing + # this choice from the pool. + artist = rand.choice(list(objs_by_artists.keys())) + objs_from_artist = objs_by_artists[artist] + i = rand.randint(0, len(objs_from_artist) - 1) + yield objs_from_artist.pop(i) + + # Remove the artist if we've used up all of its objects. + if not objs_from_artist: + del objs_by_artists[artist] + + +def _take(iter, num): + """Return a list containing the first `num` values in `iter` (or + fewer, if the iterable ends early). + """ + out = [] + for val in iter: + out.append(val) + num -= 1 + if num <= 0: + break + return out + + +def _take_time(iter, secs, album): + """Return a list containing the first values in `iter`, which should + be Item or Album objects, that add up to the given amount of time in + seconds. + """ + out = [] + total_time = 0.0 + for obj in iter: + length = _length(obj, album) + if total_time + length <= secs: + out.append(obj) + total_time += length + return out + + +def random_objs(objs, album, number=1, time=None, equal_chance=False, + random_gen=None): + """Get a random subset of the provided `objs`. + + If `number` is provided, produce that many matches. Otherwise, if + `time` is provided, instead select a list whose total time is close + to that number of minutes. If `equal_chance` is true, give each + artist an equal chance of being included so that artists with more + songs are not represented disproportionately. + """ + rand = random_gen or random + + # Permute the objects either in a straightforward way or an + # artist-balanced way. + if equal_chance: + perm = _equal_chance_permutation(objs) + else: + perm = objs + rand.shuffle(perm) # N.B. This shuffles the original list. + + # Select objects by time our count. + if time: + return _take_time(perm, time * 60, album) + else: + return _take(perm, number) diff --git a/lib/beets/ui/__init__.py b/lib/beets/ui/__init__.py old mode 100755 new mode 100644 index df370b52..121cb5dc --- a/lib/beets/ui/__init__.py +++ b/lib/beets/ui/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -18,7 +17,6 @@ interface. To invoke the CLI, just call beets.ui.main(). The actual CLI commands are implemented in the ui.commands module. """ -from __future__ import division, absolute_import, print_function import optparse import textwrap @@ -30,18 +28,18 @@ import re import struct import traceback import os.path -from six.moves import input from beets import logging from beets import library from beets import plugins from beets import util -from beets.util.functemplate import Template +from beets.util.functemplate import template from beets import config -from beets.util import confit, as_string +from beets.util import as_string from beets.autotag import mb from beets.dbcore import query as db_query -import six +from beets.dbcore import db +import confuse # On Windows platforms, use colorama to support "ANSI" terminal colors. if sys.platform == 'win32': @@ -60,8 +58,8 @@ log.propagate = False # Don't propagate to root handler. PF_KEY_QUERIES = { - 'comp': u'comp:true', - 'singleton': u'singleton:true', + 'comp': 'comp:true', + 'singleton': 'singleton:true', } @@ -111,10 +109,7 @@ def decargs(arglist): """Given a list of command-line argument bytestrings, attempts to decode them to Unicode strings when running under Python 2. """ - if six.PY2: - return [s.decode(util.arg_encoding()) for s in arglist] - else: - return arglist + return arglist def print_(*strings, **kwargs): @@ -129,29 +124,25 @@ def print_(*strings, **kwargs): (it defaults to a newline). """ if not strings: - strings = [u''] - assert isinstance(strings[0], six.text_type) + strings = [''] + assert isinstance(strings[0], str) - txt = u' '.join(strings) - txt += kwargs.get('end', u'\n') + txt = ' '.join(strings) + txt += kwargs.get('end', '\n') # Encode the string and write it to stdout. - if six.PY2: - # On Python 2, sys.stdout expects bytes. + # On Python 3, sys.stdout expects text strings and uses the + # exception-throwing encoding error policy. To avoid throwing + # errors and use our configurable encoding override, we use the + # underlying bytes buffer instead. + if hasattr(sys.stdout, 'buffer'): out = txt.encode(_out_encoding(), 'replace') - sys.stdout.write(out) + sys.stdout.buffer.write(out) + sys.stdout.buffer.flush() else: - # On Python 3, sys.stdout expects text strings and uses the - # exception-throwing encoding error policy. To avoid throwing - # errors and use our configurable encoding override, we use the - # underlying bytes buffer instead. - if hasattr(sys.stdout, 'buffer'): - out = txt.encode(_out_encoding(), 'replace') - sys.stdout.buffer.write(out) - else: - # In our test harnesses (e.g., DummyOut), sys.stdout.buffer - # does not exist. We instead just record the text string. - sys.stdout.write(txt) + # In our test harnesses (e.g., DummyOut), sys.stdout.buffer + # does not exist. We instead just record the text string. + sys.stdout.write(txt) # Configuration wrappers. @@ -201,19 +192,16 @@ def input_(prompt=None): """ # raw_input incorrectly sends prompts to stderr, not stdout, so we # use print_() explicitly to display prompts. - # http://bugs.python.org/issue1927 + # https://bugs.python.org/issue1927 if prompt: - print_(prompt, end=u' ') + print_(prompt, end=' ') try: resp = input() except EOFError: - raise UserError(u'stdin stream ended while input required') + raise UserError('stdin stream ended while input required') - if six.PY2: - return resp.decode(_in_encoding(), 'ignore') - else: - return resp + return resp def input_options(options, require=False, prompt=None, fallback_prompt=None, @@ -257,7 +245,7 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, found_letter = letter break else: - raise ValueError(u'no unambiguous lettering found') + raise ValueError('no unambiguous lettering found') letters[found_letter.lower()] = option index = option.index(found_letter) @@ -265,7 +253,7 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, # Mark the option's shortcut letter for display. if not require and ( (default is None and not numrange and first) or - (isinstance(default, six.string_types) and + (isinstance(default, str) and found_letter.lower() == default.lower())): # The first option is the default; mark it. show_letter = '[%s]' % found_letter.upper() @@ -301,11 +289,11 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, prompt_part_lengths = [] if numrange: if isinstance(default, int): - default_name = six.text_type(default) + default_name = str(default) default_name = colorize('action_default', default_name) tmpl = '# selection (default %s)' prompt_parts.append(tmpl % default_name) - prompt_part_lengths.append(len(tmpl % six.text_type(default))) + prompt_part_lengths.append(len(tmpl % str(default))) else: prompt_parts.append('# selection') prompt_part_lengths.append(len(prompt_parts[-1])) @@ -340,9 +328,9 @@ def input_options(options, require=False, prompt=None, fallback_prompt=None, # Make a fallback prompt too. This is displayed if the user enters # something that is not recognized. if not fallback_prompt: - fallback_prompt = u'Enter one of ' + fallback_prompt = 'Enter one of ' if numrange: - fallback_prompt += u'%i-%i, ' % numrange + fallback_prompt += '%i-%i, ' % numrange fallback_prompt += ', '.join(display_letters) + ':' resp = input_(prompt) @@ -381,34 +369,41 @@ def input_yn(prompt, require=False): "yes" unless `require` is `True`, in which case there is no default. """ sel = input_options( - ('y', 'n'), require, prompt, u'Enter Y or N:' + ('y', 'n'), require, prompt, 'Enter Y or N:' ) - return sel == u'y' + return sel == 'y' -def input_select_objects(prompt, objs, rep): +def input_select_objects(prompt, objs, rep, prompt_all=None): """Prompt to user to choose all, none, or some of the given objects. Return the list of selected objects. `prompt` is the prompt string to use for each question (it should be - phrased as an imperative verb). `rep` is a function to call on each - object to print it out when confirming objects individually. + phrased as an imperative verb). If `prompt_all` is given, it is used + instead of `prompt` for the first (yes(/no/select) question. + `rep` is a function to call on each object to print it out when confirming + objects individually. """ choice = input_options( - (u'y', u'n', u's'), False, - u'%s? (Yes/no/select)' % prompt) + ('y', 'n', 's'), False, + '%s? (Yes/no/select)' % (prompt_all or prompt)) print() # Blank line. - if choice == u'y': # Yes. + if choice == 'y': # Yes. return objs - elif choice == u's': # Select. + elif choice == 's': # Select. out = [] for obj in objs: rep(obj) - if input_yn(u'%s? (yes/no)' % prompt, True): + answer = input_options( + ('y', 'n', 'q'), True, '%s? (yes/no/quit)' % prompt, + 'Enter Y or N:' + ) + if answer == 'y': out.append(obj) - print() # go to a new line + elif answer == 'q': + return out return out else: # No. @@ -419,14 +414,14 @@ def input_select_objects(prompt, objs, rep): def human_bytes(size): """Formats size, a number of bytes, in a human-readable way.""" - powers = [u'', u'K', u'M', u'G', u'T', u'P', u'E', u'Z', u'Y', u'H'] + powers = ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y', 'H'] unit = 'B' for power in powers: if size < 1024: - return u"%3.1f %s%s" % (size, power, unit) + return f"{size:3.1f} {power}{unit}" size /= 1024.0 - unit = u'iB' - return u"big" + unit = 'iB' + return "big" def human_seconds(interval): @@ -434,13 +429,13 @@ def human_seconds(interval): interval using English words. """ units = [ - (1, u'second'), - (60, u'minute'), - (60, u'hour'), - (24, u'day'), - (7, u'week'), - (52, u'year'), - (10, u'decade'), + (1, 'second'), + (60, 'minute'), + (60, 'hour'), + (24, 'day'), + (7, 'week'), + (52, 'year'), + (10, 'decade'), ] for i in range(len(units) - 1): increment, suffix = units[i] @@ -453,7 +448,7 @@ def human_seconds(interval): increment, suffix = units[-1] interval /= float(increment) - return u"%3.1f %ss" % (interval, suffix) + return f"{interval:3.1f} {suffix}s" def human_seconds_short(interval): @@ -461,13 +456,13 @@ def human_seconds_short(interval): string. """ interval = int(interval) - return u'%i:%02i' % (interval // 60, interval % 60) + return '%i:%02i' % (interval // 60, interval % 60) # Colorization. # ANSI terminal colorization code heavily inspired by pygments: -# http://dev.pocoo.org/hg/pygments-main/file/b2deea5b5030/pygments/console.py +# https://bitbucket.org/birkenfeld/pygments-main/src/default/pygments/console.py # (pygments is by Tim Hatch, Armin Ronacher, et al.) COLOR_ESCAPE = "\x1b[" DARK_COLORS = { @@ -514,7 +509,7 @@ def _colorize(color, text): elif color in LIGHT_COLORS: escape = COLOR_ESCAPE + "%i;01m" % (LIGHT_COLORS[color] + 30) else: - raise ValueError(u'no such color %s', color) + raise ValueError('no such color %s', color) return escape + text + RESET_COLOR @@ -522,22 +517,22 @@ def colorize(color_name, text): """Colorize text if colored output is enabled. (Like _colorize but conditional.) """ - if config['ui']['color']: - global COLORS - if not COLORS: - COLORS = dict((name, - config['ui']['colors'][name].as_str()) - for name in COLOR_NAMES) - # In case a 3rd party plugin is still passing the actual color ('red') - # instead of the abstract color name ('text_error') - color = COLORS.get(color_name) - if not color: - log.debug(u'Invalid color_name: {0}', color_name) - color = color_name - return _colorize(color, text) - else: + if not config['ui']['color'] or 'NO_COLOR' in os.environ.keys(): return text + global COLORS + if not COLORS: + COLORS = {name: + config['ui']['colors'][name].as_str() + for name in COLOR_NAMES} + # In case a 3rd party plugin is still passing the actual color ('red') + # instead of the abstract color name ('text_error') + color = COLORS.get(color_name) + if not color: + log.debug('Invalid color_name: {0}', color_name) + color = color_name + return _colorize(color, text) + def _colordiff(a, b, highlight='text_highlight', minor_highlight='text_highlight_minor'): @@ -546,11 +541,11 @@ def _colordiff(a, b, highlight='text_highlight', highlighted intelligently to show differences; other values are stringified and highlighted in their entirety. """ - if not isinstance(a, six.string_types) \ - or not isinstance(b, six.string_types): + if not isinstance(a, str) \ + or not isinstance(b, str): # Non-strings: use ordinary equality. - a = six.text_type(a) - b = six.text_type(b) + a = str(a) + b = str(b) if a == b: return a, b else: @@ -588,7 +583,7 @@ def _colordiff(a, b, highlight='text_highlight', else: assert(False) - return u''.join(a_out), u''.join(b_out) + return ''.join(a_out), ''.join(b_out) def colordiff(a, b, highlight='text_highlight'): @@ -598,7 +593,7 @@ def colordiff(a, b, highlight='text_highlight'): if config['ui']['color']: return _colordiff(a, b, highlight) else: - return six.text_type(a), six.text_type(b) + return str(a), str(b) def get_path_formats(subview=None): @@ -609,12 +604,12 @@ def get_path_formats(subview=None): subview = subview or config['paths'] for query, view in subview.items(): query = PF_KEY_QUERIES.get(query, query) # Expand common queries. - path_formats.append((query, Template(view.as_str()))) + path_formats.append((query, template(view.as_str()))) return path_formats def get_replacements(): - """Confit validation function that reads regex/string pairs. + """Confuse validation function that reads regex/string pairs. """ replacements = [] for pattern, repl in config['replace'].get(dict).items(): @@ -623,7 +618,7 @@ def get_replacements(): replacements.append((re.compile(pattern), repl)) except re.error: raise UserError( - u'malformed regular expression in replace: {0}'.format( + 'malformed regular expression in replace: {}'.format( pattern ) ) @@ -644,7 +639,7 @@ def term_width(): try: buf = fcntl.ioctl(0, termios.TIOCGWINSZ, ' ' * 4) - except IOError: + except OSError: return fallback try: height, width = struct.unpack('hh', buf) @@ -656,10 +651,10 @@ def term_width(): FLOAT_EPSILON = 0.01 -def _field_diff(field, old, new): - """Given two Model objects, format their values for `field` and - highlight changes among them. Return a human-readable string. If the - value has not changed, return None instead. +def _field_diff(field, old, old_fmt, new, new_fmt): + """Given two Model objects and their formatted views, format their values + for `field` and highlight changes among them. Return a human-readable + string. If the value has not changed, return None instead. """ oldval = old.get(field) newval = new.get(field) @@ -672,18 +667,18 @@ def _field_diff(field, old, new): return None # Get formatted values for output. - oldstr = old.formatted().get(field, u'') - newstr = new.formatted().get(field, u'') + oldstr = old_fmt.get(field, '') + newstr = new_fmt.get(field, '') # For strings, highlight changes. For others, colorize the whole # thing. - if isinstance(oldval, six.string_types): + if isinstance(oldval, str): oldstr, newstr = colordiff(oldval, newstr) else: oldstr = colorize('text_error', oldstr) newstr = colorize('text_error', newstr) - return u'{0} -> {1}'.format(oldstr, newstr) + return f'{oldstr} -> {newstr}' def show_model_changes(new, old=None, fields=None, always=False): @@ -698,6 +693,11 @@ def show_model_changes(new, old=None, fields=None, always=False): """ old = old or new._db._get(type(new), new.id) + # Keep the formatted views around instead of re-creating them in each + # iteration step + old_fmt = old.formatted() + new_fmt = new.formatted() + # Build up lines showing changed fields. changes = [] for field in old: @@ -706,25 +706,25 @@ def show_model_changes(new, old=None, fields=None, always=False): continue # Detect and show difference for this field. - line = _field_diff(field, old, new) + line = _field_diff(field, old, old_fmt, new, new_fmt) if line: - changes.append(u' {0}: {1}'.format(field, line)) + changes.append(f' {field}: {line}') # New fields. for field in set(new) - set(old): if fields and field not in fields: continue - changes.append(u' {0}: {1}'.format( + changes.append(' {}: {}'.format( field, - colorize('text_highlight', new.formatted()[field]) + colorize('text_highlight', new_fmt[field]) )) # Print changes. if changes or always: print_(format(old)) if changes: - print_(u'\n'.join(changes)) + print_('\n'.join(changes)) return bool(changes) @@ -757,18 +757,55 @@ def show_path_changes(path_changes): if max_width > col_width: # Print every change over two lines for source, dest in zip(sources, destinations): - log.info(u'{0} \n -> {1}', source, dest) + color_source, color_dest = colordiff(source, dest) + print_('{0} \n -> {1}'.format(color_source, color_dest)) else: # Print every change on a single line, and add a header title_pad = max_width - len('Source ') + len(' -> ') - log.info(u'Source {0} Destination', ' ' * title_pad) + print_('Source {0} Destination'.format(' ' * title_pad)) for source, dest in zip(sources, destinations): pad = max_width - len(source) - log.info(u'{0} {1} -> {2}', source, ' ' * pad, dest) + color_source, color_dest = colordiff(source, dest) + print_('{0} {1} -> {2}'.format( + color_source, + ' ' * pad, + color_dest, + )) -class CommonOptionsParser(optparse.OptionParser, object): +# Helper functions for option parsing. + +def _store_dict(option, opt_str, value, parser): + """Custom action callback to parse options which have ``key=value`` + pairs as values. All such pairs passed for this option are + aggregated into a dictionary. + """ + dest = option.dest + option_values = getattr(parser.values, dest, None) + + if option_values is None: + # This is the first supplied ``key=value`` pair of option. + # Initialize empty dictionary and get a reference to it. + setattr(parser.values, dest, {}) + option_values = getattr(parser.values, dest) + + # Decode the argument using the platform's argument encoding. + value = util.text_string(value, util.arg_encoding()) + + try: + key, value = value.split('=', 1) + if not (key and value): + raise ValueError + except ValueError: + raise UserError( + "supplied argument `{}' is not of the form `key=value'" + .format(value)) + + option_values[key] = value + + +class CommonOptionsParser(optparse.OptionParser): """Offers a simple way to add common formatting options. Options available include: @@ -783,8 +820,9 @@ class CommonOptionsParser(optparse.OptionParser, object): Each method is fully documented in the related method. """ + def __init__(self, *args, **kwargs): - super(CommonOptionsParser, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self._album_flags = False # this serves both as an indicator that we offer the feature AND allows # us to check whether it has been specified on the CLI - bypassing the @@ -798,7 +836,7 @@ class CommonOptionsParser(optparse.OptionParser, object): Sets the album property on the options extracted from the CLI. """ album = optparse.Option(*flags, action='store_true', - help=u'match albums instead of tracks') + help='match albums instead of tracks') self.add_option(album) self._album_flags = set(flags) @@ -816,7 +854,7 @@ class CommonOptionsParser(optparse.OptionParser, object): elif value: value, = decargs([value]) else: - value = u'' + value = '' parser.values.format = value if target: @@ -843,14 +881,14 @@ class CommonOptionsParser(optparse.OptionParser, object): By default this affects both items and albums. If add_album_option() is used then the target will be autodetected. - Sets the format property to u'$path' on the options extracted from the + Sets the format property to '$path' on the options extracted from the CLI. """ path = optparse.Option(*flags, nargs=0, action='callback', callback=self._set_format, - callback_kwargs={'fmt': u'$path', + callback_kwargs={'fmt': '$path', 'store_true': True}, - help=u'print paths for matched items or albums') + help='print paths for matched items or albums') self.add_option(path) def add_format_option(self, flags=('-f', '--format'), target=None): @@ -870,7 +908,7 @@ class CommonOptionsParser(optparse.OptionParser, object): """ kwargs = {} if target: - if isinstance(target, six.string_types): + if isinstance(target, str): target = {'item': library.Item, 'album': library.Album}[target] kwargs['target'] = target @@ -878,7 +916,7 @@ class CommonOptionsParser(optparse.OptionParser, object): opt = optparse.Option(*flags, action='callback', callback=self._set_format, callback_kwargs=kwargs, - help=u'print with custom format') + help='print with custom format') self.add_option(opt) def add_all_common_options(self): @@ -893,14 +931,15 @@ class CommonOptionsParser(optparse.OptionParser, object): # # This is a fairly generic subcommand parser for optparse. It is # maintained externally here: -# http://gist.github.com/462717 +# https://gist.github.com/462717 # There you will also find a better description of the code and a more # succinct example program. -class Subcommand(object): +class Subcommand: """A subcommand of a root command-line application that may be invoked by a SubcommandOptionParser. """ + def __init__(self, name, parser=None, help='', aliases=(), hide=False): """Creates a new subcommand. name is the primary way to invoke the subcommand; aliases are alternate names. parser is an @@ -928,7 +967,7 @@ class Subcommand(object): @root_parser.setter def root_parser(self, root_parser): self._root_parser = root_parser - self.parser.prog = '{0} {1}'.format( + self.parser.prog = '{} {}'.format( as_string(root_parser.get_prog_name()), self.name) @@ -944,13 +983,13 @@ class SubcommandsOptionParser(CommonOptionsParser): """ # A more helpful default usage. if 'usage' not in kwargs: - kwargs['usage'] = u""" + kwargs['usage'] = """ %prog COMMAND [ARGS...] %prog help COMMAND""" kwargs['add_help_option'] = False # Super constructor. - super(SubcommandsOptionParser, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) # Our root parser needs to stop on the first unrecognized argument. self.disable_interspersed_args() @@ -967,7 +1006,7 @@ class SubcommandsOptionParser(CommonOptionsParser): # Add the list of subcommands to the help message. def format_help(self, formatter=None): # Get the original help message, to which we will append. - out = super(SubcommandsOptionParser, self).format_help(formatter) + out = super().format_help(formatter) if formatter is None: formatter = self.formatter @@ -1053,7 +1092,7 @@ class SubcommandsOptionParser(CommonOptionsParser): cmdname = args.pop(0) subcommand = self._subcommand_for_name(cmdname) if not subcommand: - raise UserError(u"unknown command '{0}'".format(cmdname)) + raise UserError(f"unknown command '{cmdname}'") suboptions, subargs = subcommand.parse_args(args) return subcommand, suboptions, subargs @@ -1064,26 +1103,32 @@ optparse.Option.ALWAYS_TYPED_ACTIONS += ('callback',) # The main entry point and bootstrapping. -def _load_plugins(config): - """Load the plugins specified in the configuration. +def _load_plugins(options, config): + """Load the plugins specified on the command line or in the configuration. """ paths = config['pluginpath'].as_str_seq(split=False) paths = [util.normpath(p) for p in paths] - log.debug(u'plugin paths: {0}', util.displayable_path(paths)) + log.debug('plugin paths: {0}', util.displayable_path(paths)) # On Python 3, the search paths need to be unicode. paths = [util.py3_path(p) for p in paths] # Extend the `beetsplug` package to include the plugin paths. import beetsplug - beetsplug.__path__ = paths + beetsplug.__path__ + beetsplug.__path__ = paths + list(beetsplug.__path__) # For backwards compatibility, also support plugin paths that # *contain* a `beetsplug` package. sys.path += paths - plugins.load_plugins(config['plugins'].as_str_seq()) - plugins.send("pluginload") + # If we were given any plugins on the command line, use those. + if options.plugins is not None: + plugin_list = (options.plugins.split(',') + if len(options.plugins) > 0 else []) + else: + plugin_list = config['plugins'].as_str_seq() + + plugins.load_plugins(plugin_list) return plugins @@ -1097,7 +1142,20 @@ def _setup(options, lib=None): config = _configure(options) - plugins = _load_plugins(config) + plugins = _load_plugins(options, config) + + # Add types and queries defined by plugins. + plugin_types_album = plugins.types(library.Album) + library.Album._types.update(plugin_types_album) + item_types = plugin_types_album.copy() + item_types.update(library.Item._types) + item_types.update(plugins.types(library.Item)) + library.Item._types = item_types + + library.Item._queries.update(plugins.named_queries(library.Item)) + library.Album._queries.update(plugins.named_queries(library.Album)) + + plugins.send("pluginload") # Get the default subcommands. from beets.ui.commands import default_commands @@ -1108,8 +1166,6 @@ def _setup(options, lib=None): if lib is None: lib = _open_library(config) plugins.send("library_opened", lib=lib) - library.Item._types.update(plugins.types(library.Item)) - library.Album._types.update(plugins.types(library.Album)) return subcommands, plugins, lib @@ -1121,9 +1177,11 @@ def _configure(options): # special handling lets specified plugins get loaded before we # finish parsing the command line. if getattr(options, 'config', None) is not None: - config_path = options.config + overlay_path = options.config del options.config - config.set_file(config_path) + config.set_file(overlay_path) + else: + overlay_path = None config.set_args(options) # Configure the logger. @@ -1132,15 +1190,19 @@ def _configure(options): else: log.set_global_level(logging.INFO) + if overlay_path: + log.debug('overlaying configuration: {0}', + util.displayable_path(overlay_path)) + config_path = config.user_config_path() if os.path.isfile(config_path): - log.debug(u'user configuration: {0}', + log.debug('user configuration: {0}', util.displayable_path(config_path)) else: - log.debug(u'no user configuration found at {0}', + log.debug('no user configuration found at {0}', util.displayable_path(config_path)) - log.debug(u'data directory: {0}', + log.debug('data directory: {0}', util.displayable_path(config.config_dir())) return config @@ -1157,13 +1219,14 @@ def _open_library(config): get_replacements(), ) lib.get_item(0) # Test database connection. - except (sqlite3.OperationalError, sqlite3.DatabaseError): - log.debug(u'{}', traceback.format_exc()) - raise UserError(u"database file {0} could not be opened".format( - util.displayable_path(dbpath) + except (sqlite3.OperationalError, sqlite3.DatabaseError) as db_error: + log.debug('{}', traceback.format_exc()) + raise UserError("database file {} cannot not be opened: {}".format( + util.displayable_path(dbpath), + db_error )) - log.debug(u'library database: {0}\n' - u'library directory: {1}', + log.debug('library database: {0}\n' + 'library directory: {1}', util.displayable_path(lib.path), util.displayable_path(lib.directory)) return lib @@ -1177,15 +1240,17 @@ def _raw_main(args, lib=None): parser.add_format_option(flags=('--format-item',), target=library.Item) parser.add_format_option(flags=('--format-album',), target=library.Album) parser.add_option('-l', '--library', dest='library', - help=u'library database file to use') + help='library database file to use') parser.add_option('-d', '--directory', dest='directory', - help=u"destination music directory") + help="destination music directory") parser.add_option('-v', '--verbose', dest='verbose', action='count', - help=u'log more details (use twice for even more)') + help='log more details (use twice for even more)') parser.add_option('-c', '--config', dest='config', - help=u'path to configuration file') + help='path to configuration file') + parser.add_option('-p', '--plugins', dest='plugins', + help='a comma-separated list of plugins to load') parser.add_option('-h', '--help', dest='help', action='store_true', - help=u'show this help message and exit') + help='show this help message and exit') parser.add_option('--version', dest='version', action='store_true', help=optparse.SUPPRESS_HELP) @@ -1220,7 +1285,7 @@ def main(args=None): _raw_main(args) except UserError as exc: message = exc.args[0] if exc.args else None - log.error(u'error: {0}', message) + log.error('error: {0}', message) sys.exit(1) except util.HumanReadableException as exc: exc.log(log) @@ -1231,18 +1296,25 @@ def main(args=None): log.debug('{}', traceback.format_exc()) log.error('{}', exc) sys.exit(1) - except confit.ConfigError as exc: - log.error(u'configuration error: {0}', exc) + except confuse.ConfigError as exc: + log.error('configuration error: {0}', exc) sys.exit(1) except db_query.InvalidQueryError as exc: - log.error(u'invalid query: {0}', exc) + log.error('invalid query: {0}', exc) sys.exit(1) - except IOError as exc: + except OSError as exc: if exc.errno == errno.EPIPE: # "Broken pipe". End silently. - pass + sys.stderr.close() else: raise except KeyboardInterrupt: # Silently ignore ^C except in verbose mode. - log.debug(u'{}', traceback.format_exc()) + log.debug('{}', traceback.format_exc()) + except db.DBAccessError as exc: + log.error( + 'database access error: {0}\n' + 'the library file might have a permissions problem', + exc + ) + sys.exit(1) diff --git a/lib/beets/ui/commands.py b/lib/beets/ui/commands.py index 8a07f614..3a337401 100755 --- a/lib/beets/ui/commands.py +++ b/lib/beets/ui/commands.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -17,7 +16,6 @@ interface. """ -from __future__ import division, absolute_import, print_function import os import re @@ -34,14 +32,15 @@ from beets.autotag import hooks from beets import plugins from beets import importer from beets import util -from beets.util import syspath, normpath, ancestry, displayable_path +from beets.util import syspath, normpath, ancestry, displayable_path, \ + MoveOperation from beets import library from beets import config from beets import logging -from beets.util.confit import _package_path -import six -VARIOUS_ARTISTS = u'Various Artists' +from . import _store_dict + +VARIOUS_ARTISTS = 'Various Artists' PromptChoice = namedtuple('PromptChoice', ['short', 'long', 'callback']) # Global logger. @@ -73,9 +72,9 @@ def _do_query(lib, query, album, also_items=True): items = list(lib.items(query)) if album and not albums: - raise ui.UserError(u'No matching albums found.') + raise ui.UserError('No matching albums found.') elif not album and not items: - raise ui.UserError(u'No matching items found.') + raise ui.UserError('No matching items found.') return items, albums @@ -84,36 +83,37 @@ def _do_query(lib, query, album, also_items=True): def _print_keys(query): """Given a SQLite query result, print the `key` field of each - returned row, with identation of 2 spaces. + returned row, with indentation of 2 spaces. """ for row in query: - print_(u' ' * 2 + row['key']) + print_(' ' * 2 + row['key']) def fields_func(lib, opts, args): def _print_rows(names): names.sort() - print_(u' ' + u'\n '.join(names)) + print_(' ' + '\n '.join(names)) - print_(u"Item fields:") + print_("Item fields:") _print_rows(library.Item.all_keys()) - print_(u"Album fields:") + print_("Album fields:") _print_rows(library.Album.all_keys()) with lib.transaction() as tx: # The SQL uses the DISTINCT to get unique values from the query unique_fields = 'SELECT DISTINCT key FROM (%s)' - print_(u"Item flexible attributes:") + print_("Item flexible attributes:") _print_keys(tx.query(unique_fields % library.Item._flex_table)) - print_(u"Album flexible attributes:") + print_("Album flexible attributes:") _print_keys(tx.query(unique_fields % library.Album._flex_table)) + fields_cmd = ui.Subcommand( 'fields', - help=u'show fields available for queries and format strings' + help='show fields available for queries and format strings' ) fields_cmd.func = fields_func default_commands.append(fields_cmd) @@ -124,9 +124,9 @@ default_commands.append(fields_cmd) class HelpCommand(ui.Subcommand): def __init__(self): - super(HelpCommand, self).__init__( + super().__init__( 'help', aliases=('?',), - help=u'give detailed help on a specific sub-command', + help='give detailed help on a specific sub-command', ) def func(self, lib, opts, args): @@ -134,7 +134,7 @@ class HelpCommand(ui.Subcommand): cmdname = args[0] helpcommand = self.root_parser._subcommand_for_name(cmdname) if not helpcommand: - raise ui.UserError(u"unknown command '{0}'".format(cmdname)) + raise ui.UserError(f"unknown command '{cmdname}'") helpcommand.print_help() else: self.root_parser.print_help() @@ -159,29 +159,31 @@ def disambig_string(info): if isinstance(info, hooks.AlbumInfo): if info.media: if info.mediums and info.mediums > 1: - disambig.append(u'{0}x{1}'.format( + disambig.append('{}x{}'.format( info.mediums, info.media )) else: disambig.append(info.media) if info.year: - disambig.append(six.text_type(info.year)) + disambig.append(str(info.year)) if info.country: disambig.append(info.country) if info.label: disambig.append(info.label) + if info.catalognum: + disambig.append(info.catalognum) if info.albumdisambig: disambig.append(info.albumdisambig) if disambig: - return u', '.join(disambig) + return ', '.join(disambig) def dist_string(dist): """Formats a distance (a float) as a colorized similarity percentage string. """ - out = u'%.1f%%' % ((1 - dist) * 100) + out = '%.1f%%' % ((1 - dist) * 100) if dist <= config['match']['strong_rec_thresh'].as_number(): out = ui.colorize('text_success', out) elif dist <= config['match']['medium_rec_thresh'].as_number(): @@ -204,7 +206,7 @@ def penalty_string(distance, limit=None): if penalties: if limit and len(penalties) > limit: penalties = penalties[:limit] + ['...'] - return ui.colorize('text_warning', u'(%s)' % ', '.join(penalties)) + return ui.colorize('text_warning', '(%s)' % ', '.join(penalties)) def show_change(cur_artist, cur_album, match): @@ -214,11 +216,11 @@ def show_change(cur_artist, cur_album, match): """ def show_album(artist, album): if artist: - album_description = u' %s - %s' % (artist, album) + album_description = f' {artist} - {album}' elif album: - album_description = u' %s' % album + album_description = ' %s' % album else: - album_description = u' (unknown album)' + album_description = ' (unknown album)' print_(album_description) def format_index(track_info): @@ -235,41 +237,45 @@ def show_change(cur_artist, cur_album, match): medium = track_info.disc mediums = track_info.disctotal if config['per_disc_numbering']: - if mediums > 1: - return u'{0}-{1}'.format(medium, medium_index) + if mediums and mediums > 1: + return f'{medium}-{medium_index}' else: - return six.text_type(medium_index or index) + return str(medium_index if medium_index is not None + else index) else: - return six.text_type(index) + return str(index) # Identify the album in question. if cur_artist != match.info.artist or \ (cur_album != match.info.album and match.info.album != VARIOUS_ARTISTS): artist_l, artist_r = cur_artist or '', match.info.artist - album_l, album_r = cur_album or '', match.info.album + album_l, album_r = cur_album or '', match.info.album if artist_r == VARIOUS_ARTISTS: # Hide artists for VA releases. - artist_l, artist_r = u'', u'' + artist_l, artist_r = '', '' + + if config['artist_credit']: + artist_r = match.info.artist_credit artist_l, artist_r = ui.colordiff(artist_l, artist_r) album_l, album_r = ui.colordiff(album_l, album_r) - print_(u"Correcting tags from:") + print_("Correcting tags from:") show_album(artist_l, album_l) - print_(u"To:") + print_("To:") show_album(artist_r, album_r) else: - print_(u"Tagging:\n {0.artist} - {0.album}".format(match.info)) + print_("Tagging:\n {0.artist} - {0.album}".format(match.info)) # Data URL. if match.info.data_url: - print_(u'URL:\n %s' % match.info.data_url) + print_('URL:\n %s' % match.info.data_url) # Info line. info = [] # Similarity. - info.append(u'(Similarity: %s)' % dist_string(match.distance)) + info.append('(Similarity: %s)' % dist_string(match.distance)) # Penalties. penalties = penalty_string(match.distance) if penalties: @@ -277,7 +283,7 @@ def show_change(cur_artist, cur_album, match): # Disambiguation. disambig = disambig_string(match.info) if disambig: - info.append(ui.colorize('text_highlight_minor', u'(%s)' % disambig)) + info.append(ui.colorize('text_highlight_minor', '(%s)' % disambig)) print_(' '.join(info)) # Tracks. @@ -295,16 +301,16 @@ def show_change(cur_artist, cur_album, match): if medium != track_info.medium or disctitle != track_info.disctitle: media = match.info.media or 'Media' if match.info.mediums > 1 and track_info.disctitle: - lhs = u'%s %s: %s' % (media, track_info.medium, - track_info.disctitle) + lhs = '{} {}: {}'.format(media, track_info.medium, + track_info.disctitle) elif match.info.mediums > 1: - lhs = u'%s %s' % (media, track_info.medium) + lhs = f'{media} {track_info.medium}' elif track_info.disctitle: - lhs = u'%s: %s' % (media, track_info.disctitle) + lhs = f'{media}: {track_info.disctitle}' else: lhs = None if lhs: - lines.append((lhs, u'', 0)) + lines.append((lhs, '', 0)) medium, disctitle = track_info.medium, track_info.disctitle # Titles. @@ -325,7 +331,7 @@ def show_change(cur_artist, cur_album, match): color = 'text_highlight_minor' else: color = 'text_highlight' - templ = ui.colorize(color, u' (#{0})') + templ = ui.colorize(color, ' (#{0})') lhs += templ.format(cur_track) rhs += templ.format(new_track) lhs_width += len(cur_track) + 4 @@ -336,7 +342,7 @@ def show_change(cur_artist, cur_album, match): config['ui']['length_diff_thresh'].as_number(): cur_length = ui.human_seconds_short(item.length) new_length = ui.human_seconds_short(track_info.length) - templ = ui.colorize('text_highlight', u' ({0})') + templ = ui.colorize('text_highlight', ' ({0})') lhs += templ.format(cur_length) rhs += templ.format(new_length) lhs_width += len(cur_length) + 3 @@ -347,9 +353,9 @@ def show_change(cur_artist, cur_album, match): rhs += ' %s' % penalties if lhs != rhs: - lines.append((u' * %s' % lhs, rhs, lhs_width)) + lines.append((' * %s' % lhs, rhs, lhs_width)) elif config['import']['detail']: - lines.append((u' * %s' % lhs, '', lhs_width)) + lines.append((' * %s' % lhs, '', lhs_width)) # Print each track in two columns, or across two lines. col_width = (ui.term_width() - len(''.join([' * ', ' -> ']))) // 2 @@ -359,29 +365,36 @@ def show_change(cur_artist, cur_album, match): if not rhs: print_(lhs) elif max_width > col_width: - print_(u'%s ->\n %s' % (lhs, rhs)) + print_(f'{lhs} ->\n {rhs}') else: pad = max_width - lhs_width - print_(u'%s%s -> %s' % (lhs, ' ' * pad, rhs)) + print_('{}{} -> {}'.format(lhs, ' ' * pad, rhs)) # Missing and unmatched tracks. if match.extra_tracks: - print_(u'Missing tracks ({0}/{1} - {2:.1%}):'.format( + print_('Missing tracks ({}/{} - {:.1%}):'.format( len(match.extra_tracks), len(match.info.tracks), len(match.extra_tracks) / len(match.info.tracks) )) + pad_width = max(len(track_info.title) for track_info in + match.extra_tracks) for track_info in match.extra_tracks: - line = u' ! %s (#%s)' % (track_info.title, format_index(track_info)) + line = ' ! {0: <{width}} (#{1: >2})'.format(track_info.title, + format_index(track_info), + width=pad_width) if track_info.length: - line += u' (%s)' % ui.human_seconds_short(track_info.length) + line += ' (%s)' % ui.human_seconds_short(track_info.length) print_(ui.colorize('text_warning', line)) if match.extra_items: - print_(u'Unmatched tracks ({0}):'.format(len(match.extra_items))) + print_('Unmatched tracks ({}):'.format(len(match.extra_items))) + pad_width = max(len(item.title) for item in match.extra_items) for item in match.extra_items: - line = u' ! %s (#%s)' % (item.title, format_index(item)) + line = ' ! {0: <{width}} (#{1: >2})'.format(item.title, + format_index(item), + width=pad_width) if item.length: - line += u' (%s)' % ui.human_seconds_short(item.length) + line += ' (%s)' % ui.human_seconds_short(item.length) print_(ui.colorize('text_warning', line)) @@ -396,22 +409,22 @@ def show_item_change(item, match): cur_artist, new_artist = ui.colordiff(cur_artist, new_artist) cur_title, new_title = ui.colordiff(cur_title, new_title) - print_(u"Correcting track tags from:") - print_(u" %s - %s" % (cur_artist, cur_title)) - print_(u"To:") - print_(u" %s - %s" % (new_artist, new_title)) + print_("Correcting track tags from:") + print_(f" {cur_artist} - {cur_title}") + print_("To:") + print_(f" {new_artist} - {new_title}") else: - print_(u"Tagging track: %s - %s" % (cur_artist, cur_title)) + print_(f"Tagging track: {cur_artist} - {cur_title}") # Data URL. if match.info.data_url: - print_(u'URL:\n %s' % match.info.data_url) + print_('URL:\n %s' % match.info.data_url) # Info line. info = [] # Similarity. - info.append(u'(Similarity: %s)' % dist_string(match.distance)) + info.append('(Similarity: %s)' % dist_string(match.distance)) # Penalties. penalties = penalty_string(match.distance) if penalties: @@ -419,7 +432,7 @@ def show_item_change(item, match): # Disambiguation. disambig = disambig_string(match.info) if disambig: - info.append(ui.colorize('text_highlight_minor', u'(%s)' % disambig)) + info.append(ui.colorize('text_highlight_minor', '(%s)' % disambig)) print_(' '.join(info)) @@ -433,7 +446,7 @@ def summarize_items(items, singleton): """ summary_parts = [] if not singleton: - summary_parts.append(u"{0} items".format(len(items))) + summary_parts.append("{} items".format(len(items))) format_counts = {} for item in items: @@ -447,26 +460,31 @@ def summarize_items(items, singleton): format_counts.items(), key=lambda fmt_and_count: (-fmt_and_count[1], fmt_and_count[0]) ): - summary_parts.append('{0} {1}'.format(fmt, count)) + summary_parts.append(f'{fmt} {count}') if items: average_bitrate = sum([item.bitrate for item in items]) / len(items) total_duration = sum([item.length for item in items]) total_filesize = sum([item.filesize for item in items]) - summary_parts.append(u'{0}kbps'.format(int(average_bitrate / 1000))) + summary_parts.append('{}kbps'.format(int(average_bitrate / 1000))) + if items[0].format == "FLAC": + sample_bits = '{}kHz/{} bit'.format( + round(int(items[0].samplerate) / 1000, 1), items[0].bitdepth) + summary_parts.append(sample_bits) summary_parts.append(ui.human_seconds_short(total_duration)) summary_parts.append(ui.human_bytes(total_filesize)) - return u', '.join(summary_parts) + return ', '.join(summary_parts) def _summary_judgment(rec): """Determines whether a decision should be made without even asking the user. This occurs in quiet mode and when an action is chosen for - NONE recommendations. Return an action or None if the user should be - queried. May also print to the console if a summary judgment is - made. + NONE recommendations. Return None if the user should be queried. + Otherwise, returns an action. May also print to the console if a + summary judgment is made. """ + if config['import']['quiet']: if rec == Recommendation.strong: return importer.action.APPLY @@ -475,21 +493,21 @@ def _summary_judgment(rec): 'skip': importer.action.SKIP, 'asis': importer.action.ASIS, }) - + elif config['import']['timid']: + return None elif rec == Recommendation.none: action = config['import']['none_rec_action'].as_choice({ 'skip': importer.action.SKIP, 'asis': importer.action.ASIS, 'ask': None, }) - else: return None if action == importer.action.SKIP: - print_(u'Skipping.') + print_('Skipping.') elif action == importer.action.ASIS: - print_(u'Importing as-is.') + print_('Importing as-is.') return action @@ -524,12 +542,12 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, # Zero candidates. if not candidates: if singleton: - print_(u"No matching recordings found.") + print_("No matching recordings found.") else: - print_(u"No matching release found for {0} tracks." + print_("No matching release found for {} tracks." .format(itemcount)) - print_(u'For help, see: ' - u'http://beets.readthedocs.org/en/latest/faq.html#nomatch') + print_('For help, see: ' + 'https://beets.readthedocs.org/en/latest/faq.html#nomatch') sel = ui.input_options(choice_opts) if sel in choice_actions: return choice_actions[sel] @@ -548,22 +566,22 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, if not bypass_candidates: # Display list of candidates. - print_(u'Finding tags for {0} "{1} - {2}".'.format( - u'track' if singleton else u'album', + print_('Finding tags for {} "{} - {}".'.format( + 'track' if singleton else 'album', item.artist if singleton else cur_artist, item.title if singleton else cur_album, )) - print_(u'Candidates:') + print_('Candidates:') for i, match in enumerate(candidates): # Index, metadata, and distance. line = [ - u'{0}.'.format(i + 1), - u'{0} - {1}'.format( + '{}.'.format(i + 1), + '{} - {}'.format( match.info.artist, match.info.title if singleton else match.info.album, ), - u'({0})'.format(dist_string(match.distance)), + '({})'.format(dist_string(match.distance)), ] # Penalties. @@ -575,14 +593,14 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, disambig = disambig_string(match.info) if disambig: line.append(ui.colorize('text_highlight_minor', - u'(%s)' % disambig)) + '(%s)' % disambig)) - print_(u' '.join(line)) + print_(' '.join(line)) # Ask the user for a choice. sel = ui.input_options(choice_opts, numrange=(1, len(candidates))) - if sel == u'm': + if sel == 'm': pass elif sel in choice_actions: return choice_actions[sel] @@ -606,16 +624,19 @@ def choose_candidate(candidates, singleton, rec, cur_artist=None, # Ask for confirmation. default = config['import']['default_action'].as_choice({ - u'apply': u'a', - u'skip': u's', - u'asis': u'u', - u'none': None, + 'apply': 'a', + 'skip': 's', + 'asis': 'u', + 'none': None, }) if default is None: require = True - sel = ui.input_options((u'Apply', u'More candidates') + choice_opts, + # Bell ring when user interaction is needed. + if config['import']['bell']: + ui.print_('\a', end='') + sel = ui.input_options(('Apply', 'More candidates') + choice_opts, require=require, default=default) - if sel == u'a': + if sel == 'a': return match elif sel in choice_actions: return choice_actions[sel] @@ -627,8 +648,8 @@ def manual_search(session, task): Input either an artist and album (for full albums) or artist and track name (for singletons) for manual search. """ - artist = input_(u'Artist:').strip() - name = input_(u'Album:' if task.is_album else u'Track:').strip() + artist = input_('Artist:').strip() + name = input_('Album:' if task.is_album else 'Track:').strip() if task.is_album: _, _, prop = autotag.tag_album( @@ -644,8 +665,8 @@ def manual_id(session, task): Input an ID, either for an album ("release") or a track ("recording"). """ - prompt = u'Enter {0} ID:'.format(u'release' if task.is_album - else u'recording') + prompt = 'Enter {} ID:'.format('release' if task.is_album + else 'recording') search_id = input_(prompt).strip() if task.is_album: @@ -666,6 +687,7 @@ def abort_action(session, task): class TerminalImportSession(importer.ImportSession): """An import session that runs in a terminal. """ + def choose_match(self, task): """Given an initial autotagging of items, go through an interactive dance with the user to ask for a choice of metadata. Returns an @@ -673,8 +695,21 @@ class TerminalImportSession(importer.ImportSession): """ # Show what we're tagging. print_() - print_(displayable_path(task.paths, u'\n') + - u' ({0} items)'.format(len(task.items))) + print_(displayable_path(task.paths, '\n') + + ' ({} items)'.format(len(task.items))) + + # Let plugins display info or prompt the user before we go through the + # process of selecting candidate. + results = plugins.send('import_task_before_choice', + session=self, task=task) + actions = [action for action in results if action] + + if len(actions) == 1: + return actions[0] + elif len(actions) > 1: + raise plugins.PluginConflictException( + 'Only one handler for `import_task_before_choice` may return ' + 'an action.') # Take immediate action if appropriate. action = _summary_judgment(task.rec) @@ -686,7 +721,6 @@ class TerminalImportSession(importer.ImportSession): return action # Loop until we have a choice. - candidates, rec = task.candidates, task.rec while True: # Ask for a choice from the user. The result of # `choose_candidate` may be an `importer.action`, an @@ -694,8 +728,8 @@ class TerminalImportSession(importer.ImportSession): # `PromptChoice`. choices = self._get_choices(task) choice = choose_candidate( - candidates, False, rec, task.cur_artist, task.cur_album, - itemcount=len(task.items), choices=choices + task.candidates, False, task.rec, task.cur_artist, + task.cur_album, itemcount=len(task.items), choices=choices ) # Basic choices that require no more action here. @@ -711,8 +745,8 @@ class TerminalImportSession(importer.ImportSession): return post_choice elif isinstance(post_choice, autotag.Proposal): # Use the new candidates and continue around the loop. - candidates = post_choice.candidates - rec = post_choice.recommendation + task.candidates = post_choice.candidates + task.rec = post_choice.recommendation # Otherwise, we have a specific match selection. else: @@ -764,46 +798,48 @@ class TerminalImportSession(importer.ImportSession): """Decide what to do when a new album or item seems similar to one that's already in the library. """ - log.warning(u"This {0} is already in the library!", - (u"album" if task.is_album else u"item")) + log.warning("This {0} is already in the library!", + ("album" if task.is_album else "item")) if config['import']['quiet']: # In quiet mode, don't prompt -- just skip. - log.info(u'Skipping.') - sel = u's' + log.info('Skipping.') + sel = 's' else: # Print some detail about the existing and new items so the # user can make an informed decision. for duplicate in found_duplicates: - print_(u"Old: " + summarize_items( + print_("Old: " + summarize_items( list(duplicate.items()) if task.is_album else [duplicate], not task.is_album, )) - print_(u"New: " + summarize_items( + print_("New: " + summarize_items( task.imported_items(), not task.is_album, )) sel = ui.input_options( - (u'Skip new', u'Keep both', u'Remove old') + ('Skip new', 'Keep all', 'Remove old', 'Merge all') ) - if sel == u's': + if sel == 's': # Skip new. task.set_choice(importer.action.SKIP) - elif sel == u'k': + elif sel == 'k': # Keep both. Do nothing; leave the choice intact. pass - elif sel == u'r': + elif sel == 'r': # Remove old. task.should_remove_duplicates = True + elif sel == 'm': + task.should_merge_duplicates = True else: assert False def should_resume(self, path): - return ui.input_yn(u"Import of the directory:\n{0}\n" - u"was interrupted. Resume (Y/n)?" + return ui.input_yn("Import of the directory:\n{}\n" + "was interrupted. Resume (Y/n)?" .format(displayable_path(path))) def _get_choices(self, task): @@ -824,22 +860,22 @@ class TerminalImportSession(importer.ImportSession): """ # Standard, built-in choices. choices = [ - PromptChoice(u's', u'Skip', + PromptChoice('s', 'Skip', lambda s, t: importer.action.SKIP), - PromptChoice(u'u', u'Use as-is', + PromptChoice('u', 'Use as-is', lambda s, t: importer.action.ASIS) ] if task.is_album: choices += [ - PromptChoice(u't', u'as Tracks', + PromptChoice('t', 'as Tracks', lambda s, t: importer.action.TRACKS), - PromptChoice(u'g', u'Group albums', + PromptChoice('g', 'Group albums', lambda s, t: importer.action.ALBUMS), ] choices += [ - PromptChoice(u'e', u'Enter search', manual_search), - PromptChoice(u'i', u'enter Id', manual_id), - PromptChoice(u'b', u'aBort', abort_action), + PromptChoice('e', 'Enter search', manual_search), + PromptChoice('i', 'enter Id', manual_id), + PromptChoice('b', 'aBort', abort_action), ] # Send the before_choose_candidate event and flatten list. @@ -849,7 +885,7 @@ class TerminalImportSession(importer.ImportSession): # Add a "dummy" choice for the other baked-in option, for # duplicate checking. all_choices = [ - PromptChoice(u'a', u'Apply', None), + PromptChoice('a', 'Apply', None), ] + choices + extra_choices # Check for conflicts. @@ -862,8 +898,8 @@ class TerminalImportSession(importer.ImportSession): # Keep the first of the choices, removing the rest. dup_choices = [c for c in all_choices if c.short == short] for c in dup_choices[1:]: - log.warning(u"Prompt choice '{0}' removed due to conflict " - u"with '{1}' (short letter: '{2}')", + log.warning("Prompt choice '{0}' removed due to conflict " + "with '{1}' (short letter: '{2}')", c.long, dup_choices[0].long, c.short) extra_choices.remove(c) @@ -880,21 +916,21 @@ def import_files(lib, paths, query): # Check the user-specified directories. for path in paths: if not os.path.exists(syspath(normpath(path))): - raise ui.UserError(u'no such file or directory: {0}'.format( + raise ui.UserError('no such file or directory: {}'.format( displayable_path(path))) # Check parameter consistency. if config['import']['quiet'] and config['import']['timid']: - raise ui.UserError(u"can't be both quiet and timid") + raise ui.UserError("can't be both quiet and timid") # Open the log. if config['import']['log'].get() is not None: logpath = syspath(config['import']['log'].as_filename()) try: loghandler = logging.FileHandler(logpath) - except IOError: - raise ui.UserError(u"could not open log file for writing: " - u"{0}".format(displayable_path(logpath))) + except OSError: + raise ui.UserError("could not open log file for writing: " + "{}".format(displayable_path(logpath))) else: loghandler = None @@ -925,94 +961,111 @@ def import_func(lib, opts, args): query = None paths = args if not paths: - raise ui.UserError(u'no path specified') + raise ui.UserError('no path specified') + + # On Python 2, we used to get filenames as raw bytes, which is + # what we need. On Python 3, we need to undo the "helpful" + # conversion to Unicode strings to get the real bytestring + # filename. + paths = [p.encode(util.arg_encoding(), 'surrogateescape') + for p in paths] import_files(lib, paths, query) import_cmd = ui.Subcommand( - u'import', help=u'import new music', aliases=(u'imp', u'im') + 'import', help='import new music', aliases=('imp', 'im') ) import_cmd.parser.add_option( - u'-c', u'--copy', action='store_true', default=None, - help=u"copy tracks into library directory (default)" + '-c', '--copy', action='store_true', default=None, + help="copy tracks into library directory (default)" ) import_cmd.parser.add_option( - u'-C', u'--nocopy', action='store_false', dest='copy', - help=u"don't copy tracks (opposite of -c)" + '-C', '--nocopy', action='store_false', dest='copy', + help="don't copy tracks (opposite of -c)" ) import_cmd.parser.add_option( - u'-m', u'--move', action='store_true', dest='move', - help=u"move tracks into the library (overrides -c)" + '-m', '--move', action='store_true', dest='move', + help="move tracks into the library (overrides -c)" ) import_cmd.parser.add_option( - u'-w', u'--write', action='store_true', default=None, - help=u"write new metadata to files' tags (default)" + '-w', '--write', action='store_true', default=None, + help="write new metadata to files' tags (default)" ) import_cmd.parser.add_option( - u'-W', u'--nowrite', action='store_false', dest='write', - help=u"don't write metadata (opposite of -w)" + '-W', '--nowrite', action='store_false', dest='write', + help="don't write metadata (opposite of -w)" ) import_cmd.parser.add_option( - u'-a', u'--autotag', action='store_true', dest='autotag', - help=u"infer tags for imported files (default)" + '-a', '--autotag', action='store_true', dest='autotag', + help="infer tags for imported files (default)" ) import_cmd.parser.add_option( - u'-A', u'--noautotag', action='store_false', dest='autotag', - help=u"don't infer tags for imported files (opposite of -a)" + '-A', '--noautotag', action='store_false', dest='autotag', + help="don't infer tags for imported files (opposite of -a)" ) import_cmd.parser.add_option( - u'-p', u'--resume', action='store_true', default=None, - help=u"resume importing if interrupted" + '-p', '--resume', action='store_true', default=None, + help="resume importing if interrupted" ) import_cmd.parser.add_option( - u'-P', u'--noresume', action='store_false', dest='resume', - help=u"do not try to resume importing" + '-P', '--noresume', action='store_false', dest='resume', + help="do not try to resume importing" ) import_cmd.parser.add_option( - u'-q', u'--quiet', action='store_true', dest='quiet', - help=u"never prompt for input: skip albums instead" + '-q', '--quiet', action='store_true', dest='quiet', + help="never prompt for input: skip albums instead" ) import_cmd.parser.add_option( - u'-l', u'--log', dest='log', - help=u'file to log untaggable albums for later review' + '-l', '--log', dest='log', + help='file to log untaggable albums for later review' ) import_cmd.parser.add_option( - u'-s', u'--singletons', action='store_true', - help=u'import individual tracks instead of full albums' + '-s', '--singletons', action='store_true', + help='import individual tracks instead of full albums' ) import_cmd.parser.add_option( - u'-t', u'--timid', dest='timid', action='store_true', - help=u'always confirm all actions' + '-t', '--timid', dest='timid', action='store_true', + help='always confirm all actions' ) import_cmd.parser.add_option( - u'-L', u'--library', dest='library', action='store_true', - help=u'retag items matching a query' + '-L', '--library', dest='library', action='store_true', + help='retag items matching a query' ) import_cmd.parser.add_option( - u'-i', u'--incremental', dest='incremental', action='store_true', - help=u'skip already-imported directories' + '-i', '--incremental', dest='incremental', action='store_true', + help='skip already-imported directories' ) import_cmd.parser.add_option( - u'-I', u'--noincremental', dest='incremental', action='store_false', - help=u'do not skip already-imported directories' + '-I', '--noincremental', dest='incremental', action='store_false', + help='do not skip already-imported directories' ) import_cmd.parser.add_option( - u'--flat', dest='flat', action='store_true', - help=u'import an entire tree as a single album' + '--from-scratch', dest='from_scratch', action='store_true', + help='erase existing metadata before applying new metadata' ) import_cmd.parser.add_option( - u'-g', u'--group-albums', dest='group_albums', action='store_true', - help=u'group tracks in a folder into separate albums' + '--flat', dest='flat', action='store_true', + help='import an entire tree as a single album' ) import_cmd.parser.add_option( - u'--pretend', dest='pretend', action='store_true', - help=u'just print the files to import' + '-g', '--group-albums', dest='group_albums', action='store_true', + help='group tracks in a folder into separate albums' ) import_cmd.parser.add_option( - u'-S', u'--search-id', dest='search_ids', action='append', + '--pretend', dest='pretend', action='store_true', + help='just print the files to import' +) +import_cmd.parser.add_option( + '-S', '--search-id', dest='search_ids', action='append', metavar='ID', - help=u'restrict matching to a specific metadata backend ID' + help='restrict matching to a specific metadata backend ID' +) +import_cmd.parser.add_option( + '--set', dest='set_fields', action='callback', + callback=_store_dict, + metavar='FIELD=VALUE', + help='set the given fields to the supplied values' ) import_cmd.func = import_func default_commands.append(import_cmd) @@ -1020,7 +1073,7 @@ default_commands.append(import_cmd) # list: Query and show library contents. -def list_items(lib, query, album, fmt=u''): +def list_items(lib, query, album, fmt=''): """Print out items in lib matching query. If album, then search for albums instead of single items. """ @@ -1036,9 +1089,9 @@ def list_func(lib, opts, args): list_items(lib, decargs(args), opts.album) -list_cmd = ui.Subcommand(u'list', help=u'query the library', aliases=(u'ls',)) -list_cmd.parser.usage += u"\n" \ - u'Example: %prog -f \'$album: $title\' artist:beatles' +list_cmd = ui.Subcommand('list', help='query the library', aliases=('ls',)) +list_cmd.parser.usage += "\n" \ + 'Example: %prog -f \'$album: $title\' artist:beatles' list_cmd.parser.add_all_common_options() list_cmd.func = list_func default_commands.append(list_cmd) @@ -1066,7 +1119,7 @@ def update_items(lib, query, album, move, pretend, fields): # Item deleted? if not os.path.exists(syspath(item.path)): ui.print_(format(item)) - ui.print_(ui.colorize('text_error', u' deleted')) + ui.print_(ui.colorize('text_error', ' deleted')) if not pretend: item.remove(True) affected_albums.add(item.album_id) @@ -1074,7 +1127,7 @@ def update_items(lib, query, album, move, pretend, fields): # Did the item change since last checked? if item.current_mtime() <= item.mtime: - log.debug(u'skipping {0} because mtime is up to date ({1})', + log.debug('skipping {0} because mtime is up to date ({1})', displayable_path(item.path), item.mtime) continue @@ -1082,7 +1135,7 @@ def update_items(lib, query, album, move, pretend, fields): try: item.read() except library.ReadError as exc: - log.error(u'error reading {0}: {1}', + log.error('error reading {0}: {1}', displayable_path(item.path), exc) continue @@ -1093,7 +1146,7 @@ def update_items(lib, query, album, move, pretend, fields): old_item = lib.get_item(item.id) if old_item.albumartist == old_item.artist == item.artist: item.albumartist = old_item.albumartist - item._dirty.discard(u'albumartist') + item._dirty.discard('albumartist') # Check for and display changes. changed = ui.show_model_changes( @@ -1126,7 +1179,7 @@ def update_items(lib, query, album, move, pretend, fields): continue album = lib.get_album(album_id) if not album: # Empty albums have already been removed. - log.debug(u'emptied album {0}', album_id) + log.debug('emptied album {0}', album_id) continue first_item = album.items().get() @@ -1137,42 +1190,48 @@ def update_items(lib, query, album, move, pretend, fields): # Move album art (and any inconsistent items). if move and lib.directory in ancestry(first_item.path): - log.debug(u'moving album {0}', album_id) + log.debug('moving album {0}', album_id) # Manually moving and storing the album. items = list(album.items()) for item in items: - item.move(store=False) + item.move(store=False, with_album=False) item.store(fields=fields) album.move(store=False) album.store(fields=fields) def update_func(lib, opts, args): + # Verify that the library folder exists to prevent accidental wipes. + if not os.path.isdir(lib.directory): + ui.print_("Library path is unavailable or does not exist.") + ui.print_(lib.directory) + if not ui.input_yn("Are you sure you want to continue (y/n)?", True): + return update_items(lib, decargs(args), opts.album, ui.should_move(opts.move), opts.pretend, opts.fields) update_cmd = ui.Subcommand( - u'update', help=u'update the library', aliases=(u'upd', u'up',) + 'update', help='update the library', aliases=('upd', 'up',) ) update_cmd.parser.add_album_option() update_cmd.parser.add_format_option() update_cmd.parser.add_option( - u'-m', u'--move', action='store_true', dest='move', - help=u"move files in the library directory" + '-m', '--move', action='store_true', dest='move', + help="move files in the library directory" ) update_cmd.parser.add_option( - u'-M', u'--nomove', action='store_false', dest='move', - help=u"don't move files in library" + '-M', '--nomove', action='store_false', dest='move', + help="don't move files in library" ) update_cmd.parser.add_option( - u'-p', u'--pretend', action='store_true', - help=u"show all changes but do nothing" + '-p', '--pretend', action='store_true', + help="show all changes but do nothing" ) update_cmd.parser.add_option( - u'-F', u'--field', default=None, action='append', dest='fields', - help=u'list of fields to update' + '-F', '--field', default=None, action='append', dest='fields', + help='list of fields to update' ) update_cmd.func = update_func default_commands.append(update_cmd) @@ -1186,31 +1245,53 @@ def remove_items(lib, query, album, delete, force): """ # Get the matching items. items, albums = _do_query(lib, query, album) + objs = albums if album else items # Confirm file removal if not forcing removal. if not force: # Prepare confirmation with user. - print_() + album_str = " in {} album{}".format( + len(albums), 's' if len(albums) > 1 else '' + ) if album else "" + if delete: - fmt = u'$path - $title' - prompt = u'Really DELETE %i file%s (y/n)?' % \ - (len(items), 's' if len(items) > 1 else '') + fmt = '$path - $title' + prompt = 'Really DELETE' + prompt_all = 'Really DELETE {} file{}{}'.format( + len(items), 's' if len(items) > 1 else '', album_str + ) else: - fmt = u'' - prompt = u'Really remove %i item%s from the library (y/n)?' % \ - (len(items), 's' if len(items) > 1 else '') + fmt = '' + prompt = 'Really remove from the library?' + prompt_all = 'Really remove {} item{}{} from the library?'.format( + len(items), 's' if len(items) > 1 else '', album_str + ) + + # Helpers for printing affected items + def fmt_track(t): + ui.print_(format(t, fmt)) + + def fmt_album(a): + ui.print_() + for i in a.items(): + fmt_track(i) + + fmt_obj = fmt_album if album else fmt_track # Show all the items. - for item in items: - ui.print_(format(item, fmt)) + for o in objs: + fmt_obj(o) # Confirm with user. - if not ui.input_yn(prompt, True): - return + objs = ui.input_select_objects(prompt, objs, fmt_obj, + prompt_all=prompt_all) + + if not objs: + return # Remove (and possibly delete) items. with lib.transaction(): - for obj in (albums if album else items): + for obj in objs: obj.remove(delete) @@ -1219,15 +1300,15 @@ def remove_func(lib, opts, args): remove_cmd = ui.Subcommand( - u'remove', help=u'remove matching items from the library', aliases=(u'rm',) + 'remove', help='remove matching items from the library', aliases=('rm',) ) remove_cmd.parser.add_option( - u"-d", u"--delete", action="store_true", - help=u"also remove files from disk" + "-d", "--delete", action="store_true", + help="also remove files from disk" ) remove_cmd.parser.add_option( - u"-f", u"--force", action="store_true", - help=u"do not ask when removing items" + "-f", "--force", action="store_true", + help="do not ask when removing items" ) remove_cmd.parser.add_album_option() remove_cmd.func = remove_func @@ -1252,7 +1333,7 @@ def show_stats(lib, query, exact): try: total_size += os.path.getsize(syspath(item.path)) except OSError as exc: - log.info(u'could not get size of {}: {}', item.path, exc) + log.info('could not get size of {}: {}', item.path, exc) else: total_size += int(item.length * item.bitrate / 8) total_time += item.length @@ -1262,20 +1343,20 @@ def show_stats(lib, query, exact): if item.album_id: albums.add(item.album_id) - size_str = u'' + ui.human_bytes(total_size) + size_str = '' + ui.human_bytes(total_size) if exact: - size_str += u' ({0} bytes)'.format(total_size) + size_str += f' ({total_size} bytes)' - print_(u"""Tracks: {0} -Total time: {1}{2} -{3}: {4} -Artists: {5} -Albums: {6} -Album artists: {7}""".format( + print_("""Tracks: {} +Total time: {}{} +{}: {} +Artists: {} +Albums: {} +Album artists: {}""".format( total_items, ui.human_seconds(total_time), - u' ({0:.2f} seconds)'.format(total_time) if exact else '', - u'Total size' if exact else u'Approximate total size', + f' ({total_time:.2f} seconds)' if exact else '', + 'Total size' if exact else 'Approximate total size', size_str, len(artists), len(albums), @@ -1288,11 +1369,11 @@ def stats_func(lib, opts, args): stats_cmd = ui.Subcommand( - u'stats', help=u'show statistics about the library or a query' + 'stats', help='show statistics about the library or a query' ) stats_cmd.parser.add_option( - u'-e', u'--exact', action='store_true', - help=u'exact size and time' + '-e', '--exact', action='store_true', + help='exact size and time' ) stats_cmd.func = stats_func default_commands.append(stats_cmd) @@ -1301,18 +1382,18 @@ default_commands.append(stats_cmd) # version: Show current beets version. def show_version(lib, opts, args): - print_(u'beets version %s' % beets.__version__) - print_(u'Python version {}'.format(python_version())) + print_('beets version %s' % beets.__version__) + print_(f'Python version {python_version()}') # Show plugins. names = sorted(p.name for p in plugins.find_plugins()) if names: - print_(u'plugins:', ', '.join(names)) + print_('plugins:', ', '.join(names)) else: - print_(u'no plugins loaded') + print_('no plugins loaded') version_cmd = ui.Subcommand( - u'version', help=u'output version information' + 'version', help='output version information' ) version_cmd.func = show_version default_commands.append(version_cmd) @@ -1339,31 +1420,31 @@ def modify_items(lib, mods, dels, query, write, move, album, confirm): # Apply changes *temporarily*, preview them, and collect modified # objects. - print_(u'Modifying {0} {1}s.' - .format(len(objs), u'album' if album else u'item')) - changed = set() + print_('Modifying {} {}s.' + .format(len(objs), 'album' if album else 'item')) + changed = [] for obj in objs: - if print_and_modify(obj, mods, dels): - changed.add(obj) + if print_and_modify(obj, mods, dels) and obj not in changed: + changed.append(obj) # Still something to do? if not changed: - print_(u'No changes to make.') + print_('No changes to make.') return # Confirm action. if confirm: if write and move: - extra = u', move and write tags' + extra = ', move and write tags' elif write: - extra = u' and write tags' + extra = ' and write tags' elif move: - extra = u' and move' + extra = ' and move' else: - extra = u'' + extra = '' changed = ui.input_select_objects( - u'Really modify%s' % extra, changed, + 'Really modify%s' % extra, changed, lambda o: print_and_modify(o, mods, dels) ) @@ -1411,35 +1492,35 @@ def modify_parse_args(args): def modify_func(lib, opts, args): query, mods, dels = modify_parse_args(decargs(args)) if not mods and not dels: - raise ui.UserError(u'no modifications specified') + raise ui.UserError('no modifications specified') modify_items(lib, mods, dels, query, ui.should_write(opts.write), ui.should_move(opts.move), opts.album, not opts.yes) modify_cmd = ui.Subcommand( - u'modify', help=u'change metadata fields', aliases=(u'mod',) + 'modify', help='change metadata fields', aliases=('mod',) ) modify_cmd.parser.add_option( - u'-m', u'--move', action='store_true', dest='move', - help=u"move files in the library directory" + '-m', '--move', action='store_true', dest='move', + help="move files in the library directory" ) modify_cmd.parser.add_option( - u'-M', u'--nomove', action='store_false', dest='move', - help=u"don't move files in library" + '-M', '--nomove', action='store_false', dest='move', + help="don't move files in library" ) modify_cmd.parser.add_option( - u'-w', u'--write', action='store_true', default=None, - help=u"write new metadata to files' tags (default)" + '-w', '--write', action='store_true', default=None, + help="write new metadata to files' tags (default)" ) modify_cmd.parser.add_option( - u'-W', u'--nowrite', action='store_false', dest='write', - help=u"don't write metadata (opposite of -w)" + '-W', '--nowrite', action='store_false', dest='write', + help="don't write metadata (opposite of -w)" ) modify_cmd.parser.add_album_option() modify_cmd.parser.add_format_option(target='item') modify_cmd.parser.add_option( - u'-y', u'--yes', action='store_true', - help=u'skip confirmation' + '-y', '--yes', action='store_true', + help='skip confirmation' ) modify_cmd.func = modify_func default_commands.append(modify_cmd) @@ -1447,24 +1528,36 @@ default_commands.append(modify_cmd) # move: Move/copy files to the library or a new base directory. -def move_items(lib, dest, query, copy, album, pretend, confirm=False): +def move_items(lib, dest, query, copy, album, pretend, confirm=False, + export=False): """Moves or copies items to a new base directory, given by dest. If dest is None, then the library's base directory is used, making the command "consolidate" files. """ items, albums = _do_query(lib, query, album, False) objs = albums if album else items + num_objs = len(objs) # Filter out files that don't need to be moved. - isitemmoved = lambda item: item.path != item.destination(basedir=dest) - isalbummoved = lambda album: any(isitemmoved(i) for i in album.items()) - objs = [o for o in objs if (isalbummoved if album else isitemmoved)(o)] + def isitemmoved(item): + return item.path != item.destination(basedir=dest) - action = u'Copying' if copy else u'Moving' - act = u'copy' if copy else u'move' - entity = u'album' if album else u'item' - log.info(u'{0} {1} {2}{3}.', action, len(objs), entity, - u's' if len(objs) != 1 else u'') + def isalbummoved(album): + return any(isitemmoved(i) for i in album.items()) + + objs = [o for o in objs if (isalbummoved if album else isitemmoved)(o)] + num_unmoved = num_objs - len(objs) + # Report unmoved files that match the query. + unmoved_msg = '' + if num_unmoved > 0: + unmoved_msg = f' ({num_unmoved} already in place)' + + copy = copy or export # Exporting always copies. + action = 'Copying' if copy else 'Moving' + act = 'copy' if copy else 'move' + entity = 'album' if album else 'item' + log.info('{0} {1} {2}{3}{4}.', action, len(objs), entity, + 's' if len(objs) != 1 else '', unmoved_msg) if not objs: return @@ -1478,15 +1571,23 @@ def move_items(lib, dest, query, copy, album, pretend, confirm=False): else: if confirm: objs = ui.input_select_objects( - u'Really %s' % act, objs, + 'Really %s' % act, objs, lambda o: show_path_changes( [(o.path, o.destination(basedir=dest))])) for obj in objs: - log.debug(u'moving: {0}', util.displayable_path(obj.path)) + log.debug('moving: {0}', util.displayable_path(obj.path)) - obj.move(copy, basedir=dest) - obj.store() + if export: + # Copy without affecting the database. + obj.move(operation=MoveOperation.COPY, basedir=dest, + store=False) + else: + # Ordinary move/copy: store the new path. + if copy: + obj.move(operation=MoveOperation.COPY, basedir=dest) + else: + obj.move(operation=MoveOperation.MOVE, basedir=dest) def move_func(lib, opts, args): @@ -1494,30 +1595,34 @@ def move_func(lib, opts, args): if dest is not None: dest = normpath(dest) if not os.path.isdir(dest): - raise ui.UserError(u'no such directory: %s' % dest) + raise ui.UserError('no such directory: %s' % dest) move_items(lib, dest, decargs(args), opts.copy, opts.album, opts.pretend, - opts.timid) + opts.timid, opts.export) move_cmd = ui.Subcommand( - u'move', help=u'move or copy items', aliases=(u'mv',) + 'move', help='move or copy items', aliases=('mv',) ) move_cmd.parser.add_option( - u'-d', u'--dest', metavar='DIR', dest='dest', - help=u'destination directory' + '-d', '--dest', metavar='DIR', dest='dest', + help='destination directory' ) move_cmd.parser.add_option( - u'-c', u'--copy', default=False, action='store_true', - help=u'copy instead of moving' + '-c', '--copy', default=False, action='store_true', + help='copy instead of moving' ) move_cmd.parser.add_option( - u'-p', u'--pretend', default=False, action='store_true', - help=u'show how files would be moved, but don\'t touch anything' + '-p', '--pretend', default=False, action='store_true', + help='show how files would be moved, but don\'t touch anything' ) move_cmd.parser.add_option( - u'-t', u'--timid', dest='timid', action='store_true', - help=u'always confirm all actions' + '-t', '--timid', dest='timid', action='store_true', + help='always confirm all actions' +) +move_cmd.parser.add_option( + '-e', '--export', default=False, action='store_true', + help='copy without changing the database path' ) move_cmd.parser.add_album_option() move_cmd.func = move_func @@ -1535,14 +1640,14 @@ def write_items(lib, query, pretend, force): for item in items: # Item deleted? if not os.path.exists(syspath(item.path)): - log.info(u'missing file: {0}', util.displayable_path(item.path)) + log.info('missing file: {0}', util.displayable_path(item.path)) continue # Get an Item object reflecting the "clean" (on-disk) state. try: clean_item = library.Item.from_path(item.path) except library.ReadError as exc: - log.error(u'error reading {0}: {1}', + log.error('error reading {0}: {1}', displayable_path(item.path), exc) continue @@ -1559,14 +1664,14 @@ def write_func(lib, opts, args): write_items(lib, decargs(args), opts.pretend, opts.force) -write_cmd = ui.Subcommand(u'write', help=u'write tag information to files') +write_cmd = ui.Subcommand('write', help='write tag information to files') write_cmd.parser.add_option( - u'-p', u'--pretend', action='store_true', - help=u"show all changes but do nothing" + '-p', '--pretend', action='store_true', + help="show all changes but do nothing" ) write_cmd.parser.add_option( - u'-f', u'--force', action='store_true', - help=u"write tags even if the existing tags match the database" + '-f', '--force', action='store_true', + help="write tags even if the existing tags match the database" ) write_cmd.func = write_func default_commands.append(write_cmd) @@ -1603,7 +1708,10 @@ def config_func(lib, opts, args): # Dump configuration. else: config_out = config.dump(full=opts.defaults, redact=opts.redact) - print_(util.text_string(config_out)) + if config_out.strip() != '{}': + print_(util.text_string(config_out)) + else: + print("Empty configuration") def config_edit(): @@ -1617,29 +1725,30 @@ def config_edit(): open(path, 'w+').close() util.interactive_open([path], editor) except OSError as exc: - message = u"Could not edit configuration: {0}".format(exc) + message = f"Could not edit configuration: {exc}" if not editor: - message += u". Please set the EDITOR environment variable" + message += ". Please set the EDITOR environment variable" raise ui.UserError(message) -config_cmd = ui.Subcommand(u'config', - help=u'show or edit the user configuration') + +config_cmd = ui.Subcommand('config', + help='show or edit the user configuration') config_cmd.parser.add_option( - u'-p', u'--paths', action='store_true', - help=u'show files that configuration was loaded from' + '-p', '--paths', action='store_true', + help='show files that configuration was loaded from' ) config_cmd.parser.add_option( - u'-e', u'--edit', action='store_true', - help=u'edit user configuration with $EDITOR' + '-e', '--edit', action='store_true', + help='edit user configuration with $EDITOR' ) config_cmd.parser.add_option( - u'-d', u'--defaults', action='store_true', - help=u'include the default configuration' + '-d', '--defaults', action='store_true', + help='include the default configuration' ) config_cmd.parser.add_option( - u'-c', u'--clear', action='store_false', + '-c', '--clear', action='store_false', dest='redact', default=True, - help=u'do not redact sensitive fields' + help='do not redact sensitive fields' ) config_cmd.func = config_func default_commands.append(config_cmd) @@ -1649,19 +1758,20 @@ default_commands.append(config_cmd) def print_completion(*args): for line in completion_script(default_commands + plugins.commands()): - print_(line, end=u'') + print_(line, end='') if not any(map(os.path.isfile, BASH_COMPLETION_PATHS)): - log.warning(u'Warning: Unable to find the bash-completion package. ' - u'Command line completion might not work.') + log.warning('Warning: Unable to find the bash-completion package. ' + 'Command line completion might not work.') + BASH_COMPLETION_PATHS = map(syspath, [ - u'/etc/bash_completion', - u'/usr/share/bash-completion/bash_completion', - u'/usr/local/share/bash-completion/bash_completion', + '/etc/bash_completion', + '/usr/share/bash-completion/bash_completion', + '/usr/local/share/bash-completion/bash_completion', # SmartOS - u'/opt/local/share/bash-completion/bash_completion', + '/opt/local/share/bash-completion/bash_completion', # Homebrew (before bash-completion2) - u'/usr/local/etc/bash_completion', + '/usr/local/etc/bash_completion', ]) @@ -1671,8 +1781,8 @@ def completion_script(commands): ``commands`` is alist of ``ui.Subcommand`` instances to generate completion data for. """ - base_script = os.path.join(_package_path('beets.ui'), 'completion_base.sh') - with open(base_script, 'r') as base_script: + base_script = os.path.join(os.path.dirname(__file__), 'completion_base.sh') + with open(base_script) as base_script: yield util.text_string(base_script.read()) options = {} @@ -1688,12 +1798,12 @@ def completion_script(commands): if re.match(r'^\w+$', alias): aliases[alias] = name - options[name] = {u'flags': [], u'opts': []} + options[name] = {'flags': [], 'opts': []} for opts in cmd.parser._get_all_options()[1:]: if opts.action in ('store_true', 'store_false'): - option_type = u'flags' + option_type = 'flags' else: - option_type = u'opts' + option_type = 'opts' options[name][option_type].extend( opts._short_opts + opts._long_opts @@ -1701,31 +1811,31 @@ def completion_script(commands): # Add global options options['_global'] = { - u'flags': [u'-v', u'--verbose'], - u'opts': - u'-l --library -c --config -d --directory -h --help'.split(u' ') + 'flags': ['-v', '--verbose'], + 'opts': + '-l --library -c --config -d --directory -h --help'.split(' ') } # Add flags common to all commands options['_common'] = { - u'flags': [u'-h', u'--help'] + 'flags': ['-h', '--help'] } # Start generating the script - yield u"_beet() {\n" + yield "_beet() {\n" # Command names - yield u" local commands='%s'\n" % ' '.join(command_names) - yield u"\n" + yield " local commands='%s'\n" % ' '.join(command_names) + yield "\n" # Command aliases - yield u" local aliases='%s'\n" % ' '.join(aliases.keys()) + yield " local aliases='%s'\n" % ' '.join(aliases.keys()) for alias, cmd in aliases.items(): - yield u" local alias__%s=%s\n" % (alias, cmd) - yield u'\n' + yield " local alias__{}={}\n".format(alias.replace('-', '_'), cmd) + yield '\n' # Fields - yield u" fields='%s'\n" % ' '.join( + yield " fields='%s'\n" % ' '.join( set( list(library.Item._fields.keys()) + list(library.Album._fields.keys()) @@ -1736,17 +1846,17 @@ def completion_script(commands): for cmd, opts in options.items(): for option_type, option_list in opts.items(): if option_list: - option_list = u' '.join(option_list) - yield u" local %s__%s='%s'\n" % ( - option_type, cmd, option_list) + option_list = ' '.join(option_list) + yield " local {}__{}='{}'\n".format( + option_type, cmd.replace('-', '_'), option_list) - yield u' _beet_dispatch\n' - yield u'}\n' + yield ' _beet_dispatch\n' + yield '}\n' completion_cmd = ui.Subcommand( 'completion', - help=u'print shell script that provides command line completion' + help='print shell script that provides command line completion' ) completion_cmd.func = print_completion completion_cmd.hide = True diff --git a/lib/beets/ui/completion_base.sh b/lib/beets/ui/completion_base.sh old mode 100755 new mode 100644 index ce3fb6e2..1eaa4db3 --- a/lib/beets/ui/completion_base.sh +++ b/lib/beets/ui/completion_base.sh @@ -70,7 +70,7 @@ _beet_dispatch() { # Replace command shortcuts if [[ -n $cmd ]] && _list_include_item "$aliases" "$cmd"; then - eval "cmd=\$alias__$cmd" + eval "cmd=\$alias__${cmd//-/_}" fi case $cmd in @@ -94,8 +94,8 @@ _beet_dispatch() { _beet_complete() { if [[ $cur == -* ]]; then local opts flags completions - eval "opts=\$opts__$cmd" - eval "flags=\$flags__$cmd" + eval "opts=\$opts__${cmd//-/_}" + eval "flags=\$flags__${cmd//-/_}" completions="${flags___common} ${opts} ${flags}" COMPREPLY+=( $(compgen -W "$completions" -- $cur) ) else @@ -129,7 +129,7 @@ _beet_complete_global() { COMPREPLY+=( $(compgen -W "$completions" -- $cur) ) elif [[ -n $cur ]] && _list_include_item "$aliases" "$cur"; then local cmd - eval "cmd=\$alias__$cur" + eval "cmd=\$alias__${cur//-/_}" COMPREPLY+=( "$cmd" ) else COMPREPLY+=( $(compgen -W "$commands" -- $cur) ) @@ -138,7 +138,7 @@ _beet_complete_global() { _beet_complete_query() { local opts - eval "opts=\$opts__$cmd" + eval "opts=\$opts__${cmd//-/_}" if [[ $cur == -* ]] || _list_include_item "$opts" "$prev"; then _beet_complete diff --git a/lib/beets/util/__init__.py b/lib/beets/util/__init__.py old mode 100755 new mode 100644 index f6cd488d..d58bb28e --- a/lib/beets/util/__init__.py +++ b/lib/beets/util/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -15,27 +14,28 @@ """Miscellaneous utility functions.""" -from __future__ import division, absolute_import, print_function import os import sys import errno import locale import re +import tempfile import shutil import fnmatch -from collections import Counter +import functools +from collections import Counter, namedtuple +from multiprocessing.pool import ThreadPool import traceback import subprocess import platform import shlex from beets.util import hidden -import six from unidecode import unidecode +from enum import Enum MAX_FILENAME_LENGTH = 200 -WINDOWS_MAGIC_PREFIX = u'\\\\?\\' -SNI_SUPPORTED = sys.version_info >= (2, 7, 9) +WINDOWS_MAGIC_PREFIX = '\\\\?\\' class HumanReadableException(Exception): @@ -57,27 +57,27 @@ class HumanReadableException(Exception): self.reason = reason self.verb = verb self.tb = tb - super(HumanReadableException, self).__init__(self.get_message()) + super().__init__(self.get_message()) def _gerund(self): """Generate a (likely) gerund form of the English verb. """ - if u' ' in self.verb: + if ' ' in self.verb: return self.verb - gerund = self.verb[:-1] if self.verb.endswith(u'e') else self.verb - gerund += u'ing' + gerund = self.verb[:-1] if self.verb.endswith('e') else self.verb + gerund += 'ing' return gerund def _reasonstr(self): """Get the reason as a string.""" - if isinstance(self.reason, six.text_type): + if isinstance(self.reason, str): return self.reason elif isinstance(self.reason, bytes): return self.reason.decode('utf-8', 'ignore') elif hasattr(self.reason, 'strerror'): # i.e., EnvironmentError return self.reason.strerror else: - return u'"{0}"'.format(six.text_type(self.reason)) + return '"{}"'.format(str(self.reason)) def get_message(self): """Create the human-readable description of the error, sans @@ -91,7 +91,7 @@ class HumanReadableException(Exception): """ if self.tb: logger.debug(self.tb) - logger.error(u'{0}: {1}', self.error_kind, self.args[0]) + logger.error('{0}: {1}', self.error_kind, self.args[0]) class FilesystemError(HumanReadableException): @@ -99,29 +99,41 @@ class FilesystemError(HumanReadableException): via a function in this module. The `paths` field is a sequence of pathnames involved in the operation. """ + def __init__(self, reason, verb, paths, tb=None): self.paths = paths - super(FilesystemError, self).__init__(reason, verb, tb) + super().__init__(reason, verb, tb) def get_message(self): # Use a nicer English phrasing for some specific verbs. if self.verb in ('move', 'copy', 'rename'): - clause = u'while {0} {1} to {2}'.format( + clause = 'while {} {} to {}'.format( self._gerund(), displayable_path(self.paths[0]), displayable_path(self.paths[1]) ) elif self.verb in ('delete', 'write', 'create', 'read'): - clause = u'while {0} {1}'.format( + clause = 'while {} {}'.format( self._gerund(), displayable_path(self.paths[0]) ) else: - clause = u'during {0} of paths {1}'.format( - self.verb, u', '.join(displayable_path(p) for p in self.paths) + clause = 'during {} of paths {}'.format( + self.verb, ', '.join(displayable_path(p) for p in self.paths) ) - return u'{0} {1}'.format(self._reasonstr(), clause) + return f'{self._reasonstr()} {clause}' + + +class MoveOperation(Enum): + """The file operations that e.g. various move functions can carry out. + """ + MOVE = 0 + COPY = 1 + LINK = 2 + HARDLINK = 3 + REFLINK = 4 + REFLINK_AUTO = 5 def normpath(path): @@ -172,7 +184,7 @@ def sorted_walk(path, ignore=(), ignore_hidden=False, logger=None): contents = os.listdir(syspath(path)) except OSError as exc: if logger: - logger.warning(u'could not list directory {0}: {1}'.format( + logger.warning('could not list directory {}: {}'.format( displayable_path(path), exc.strerror )) return @@ -185,6 +197,10 @@ def sorted_walk(path, ignore=(), ignore_hidden=False, logger=None): skip = False for pat in ignore: if fnmatch.fnmatch(base, pat): + if logger: + logger.debug('ignoring {} due to ignore rule {}'.format( + base, pat + )) skip = True break if skip: @@ -207,8 +223,14 @@ def sorted_walk(path, ignore=(), ignore_hidden=False, logger=None): for base in dirs: cur = os.path.join(path, base) # yield from sorted_walk(...) - for res in sorted_walk(cur, ignore, ignore_hidden, logger): - yield res + yield from sorted_walk(cur, ignore, ignore_hidden, logger) + + +def path_as_posix(path): + """Return the string representation of the path with forward (/) + slashes. + """ + return path.replace(b'\\', b'/') def mkdirall(path): @@ -219,7 +241,7 @@ def mkdirall(path): if not os.path.isdir(syspath(ancestor)): try: os.mkdir(syspath(ancestor)) - except (OSError, IOError) as exc: + except OSError as exc: raise FilesystemError(exc, 'create', (ancestor,), traceback.format_exc()) @@ -272,13 +294,13 @@ def prune_dirs(path, root=None, clutter=('.DS_Store', 'Thumbs.db')): continue clutter = [bytestring_path(c) for c in clutter] match_paths = [bytestring_path(d) for d in os.listdir(directory)] - if fnmatch_all(match_paths, clutter): - # Directory contains only clutter (or nothing). - try: + try: + if fnmatch_all(match_paths, clutter): + # Directory contains only clutter (or nothing). shutil.rmtree(directory) - except OSError: + else: break - else: + except OSError: break @@ -357,18 +379,18 @@ def bytestring_path(path): PATH_SEP = bytestring_path(os.sep) -def displayable_path(path, separator=u'; '): +def displayable_path(path, separator='; '): """Attempts to decode a bytestring path to a unicode object for the purpose of displaying it to the user. If the `path` argument is a list or a tuple, the elements are joined with `separator`. """ if isinstance(path, (list, tuple)): return separator.join(displayable_path(p) for p in path) - elif isinstance(path, six.text_type): + elif isinstance(path, str): return path elif not isinstance(path, bytes): # A non-string object: just get its unicode representation. - return six.text_type(path) + return str(path) try: return path.decode(_fsencoding(), 'ignore') @@ -387,7 +409,7 @@ def syspath(path, prefix=True): if os.path.__name__ != 'ntpath': return path - if not isinstance(path, six.text_type): + if not isinstance(path, str): # Beets currently represents Windows paths internally with UTF-8 # arbitrarily. But earlier versions used MBCS because it is # reported as the FS encoding by Windows. Try both. @@ -400,11 +422,11 @@ def syspath(path, prefix=True): path = path.decode(encoding, 'replace') # Add the magic prefix if it isn't already there. - # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx + # https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx if prefix and not path.startswith(WINDOWS_MAGIC_PREFIX): - if path.startswith(u'\\\\'): + if path.startswith('\\\\'): # UNC path. Final path should look like \\?\UNC\... - path = u'UNC' + path[1:] + path = 'UNC' + path[1:] path = WINDOWS_MAGIC_PREFIX + path return path @@ -412,6 +434,8 @@ def syspath(path, prefix=True): def samefile(p1, p2): """Safer equality for paths.""" + if p1 == p2: + return True return shutil._samefile(syspath(p1), syspath(p2)) @@ -424,7 +448,7 @@ def remove(path, soft=True): return try: os.remove(path) - except (OSError, IOError) as exc: + except OSError as exc: raise FilesystemError(exc, 'delete', (path,), traceback.format_exc()) @@ -439,10 +463,10 @@ def copy(path, dest, replace=False): path = syspath(path) dest = syspath(dest) if not replace and os.path.exists(dest): - raise FilesystemError(u'file exists', 'copy', (path, dest)) + raise FilesystemError('file exists', 'copy', (path, dest)) try: shutil.copyfile(path, dest) - except (OSError, IOError) as exc: + except OSError as exc: raise FilesystemError(exc, 'copy', (path, dest), traceback.format_exc()) @@ -455,24 +479,37 @@ def move(path, dest, replace=False): instead, in which case metadata will *not* be preserved. Paths are translated to system paths. """ + if os.path.isdir(path): + raise FilesystemError(u'source is directory', 'move', (path, dest)) + if os.path.isdir(dest): + raise FilesystemError(u'destination is directory', 'move', + (path, dest)) if samefile(path, dest): return path = syspath(path) dest = syspath(dest) if os.path.exists(dest) and not replace: - raise FilesystemError(u'file exists', 'rename', (path, dest)) + raise FilesystemError('file exists', 'rename', (path, dest)) # First, try renaming the file. try: - os.rename(path, dest) + os.replace(path, dest) except OSError: - # Otherwise, copy and delete the original. + tmp = tempfile.mktemp(suffix='.beets', + prefix=py3_path(b'.' + os.path.basename(dest)), + dir=py3_path(os.path.dirname(dest))) + tmp = syspath(tmp) try: - shutil.copyfile(path, dest) + shutil.copyfile(path, tmp) + os.replace(tmp, dest) + tmp = None os.remove(path) - except (OSError, IOError) as exc: + except OSError as exc: raise FilesystemError(exc, 'move', (path, dest), traceback.format_exc()) + finally: + if tmp is not None: + os.remove(tmp) def link(path, dest, replace=False): @@ -484,18 +521,18 @@ def link(path, dest, replace=False): return if os.path.exists(syspath(dest)) and not replace: - raise FilesystemError(u'file exists', 'rename', (path, dest)) + raise FilesystemError('file exists', 'rename', (path, dest)) try: os.symlink(syspath(path), syspath(dest)) except NotImplementedError: # raised on python >= 3.2 and Windows versions before Vista - raise FilesystemError(u'OS does not support symbolic links.' + raise FilesystemError('OS does not support symbolic links.' 'link', (path, dest), traceback.format_exc()) except OSError as exc: # TODO: Windows version checks can be removed for python 3 if hasattr('sys', 'getwindowsversion'): if sys.getwindowsversion()[0] < 6: # is before Vista - exc = u'OS does not support symbolic links.' + exc = 'OS does not support symbolic links.' raise FilesystemError(exc, 'link', (path, dest), traceback.format_exc()) @@ -509,21 +546,50 @@ def hardlink(path, dest, replace=False): return if os.path.exists(syspath(dest)) and not replace: - raise FilesystemError(u'file exists', 'rename', (path, dest)) + raise FilesystemError('file exists', 'rename', (path, dest)) try: os.link(syspath(path), syspath(dest)) except NotImplementedError: - raise FilesystemError(u'OS does not support hard links.' + raise FilesystemError('OS does not support hard links.' 'link', (path, dest), traceback.format_exc()) except OSError as exc: if exc.errno == errno.EXDEV: - raise FilesystemError(u'Cannot hard link across devices.' + raise FilesystemError('Cannot hard link across devices.' 'link', (path, dest), traceback.format_exc()) else: raise FilesystemError(exc, 'link', (path, dest), traceback.format_exc()) +def reflink(path, dest, replace=False, fallback=False): + """Create a reflink from `dest` to `path`. + + Raise an `OSError` if `dest` already exists, unless `replace` is + True. If `path` == `dest`, then do nothing. + + If reflinking fails and `fallback` is enabled, try copying the file + instead. Otherwise, raise an error without trying a plain copy. + + May raise an `ImportError` if the `reflink` module is not available. + """ + import reflink as pyreflink + + if samefile(path, dest): + return + + if os.path.exists(syspath(dest)) and not replace: + raise FilesystemError('file exists', 'rename', (path, dest)) + + try: + pyreflink.reflink(path, dest) + except (NotImplementedError, pyreflink.ReflinkImpossibleError): + if fallback: + copy(path, dest, replace) + else: + raise FilesystemError('OS/filesystem does not support reflinks.', + 'link', (path, dest), traceback.format_exc()) + + def unique_path(path): """Returns a version of ``path`` that does not exist on the filesystem. Specifically, if ``path` itself already exists, then @@ -541,22 +607,23 @@ def unique_path(path): num = 0 while True: num += 1 - suffix = u'.{}'.format(num).encode() + ext + suffix = f'.{num}'.encode() + ext new_path = base + suffix if not os.path.exists(new_path): return new_path + # Note: The Windows "reserved characters" are, of course, allowed on # Unix. They are forbidden here because they cause problems on Samba # shares, which are sufficiently common as to cause frequent problems. -# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx +# https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx CHAR_REPLACE = [ - (re.compile(r'[\\/]'), u'_'), # / and \ -- forbidden everywhere. - (re.compile(r'^\.'), u'_'), # Leading dot (hidden files on Unix). - (re.compile(r'[\x00-\x1f]'), u''), # Control characters. - (re.compile(r'[<>:"\?\*\|]'), u'_'), # Windows "reserved characters". - (re.compile(r'\.$'), u'_'), # Trailing dots. - (re.compile(r'\s+$'), u''), # Trailing whitespace. + (re.compile(r'[\\/]'), '_'), # / and \ -- forbidden everywhere. + (re.compile(r'^\.'), '_'), # Leading dot (hidden files on Unix). + (re.compile(r'[\x00-\x1f]'), ''), # Control characters. + (re.compile(r'[<>:"\?\*\|]'), '_'), # Windows "reserved characters". + (re.compile(r'\.$'), '_'), # Trailing dots. + (re.compile(r'\s+$'), ''), # Trailing whitespace. ] @@ -680,36 +747,29 @@ def py3_path(path): it is. So this function helps us "smuggle" the true bytes data through APIs that took Python 3's Unicode mandate too seriously. """ - if isinstance(path, six.text_type): + if isinstance(path, str): return path assert isinstance(path, bytes) - if six.PY2: - return path return os.fsdecode(path) def str2bool(value): """Returns a boolean reflecting a human-entered string.""" - return value.lower() in (u'yes', u'1', u'true', u't', u'y') + return value.lower() in ('yes', '1', 'true', 't', 'y') def as_string(value): """Convert a value to a Unicode object for matching with a query. None becomes the empty string. Bytestrings are silently decoded. """ - if six.PY2: - buffer_types = buffer, memoryview # noqa: F821 - else: - buffer_types = memoryview - if value is None: - return u'' - elif isinstance(value, buffer_types): + return '' + elif isinstance(value, memoryview): return bytes(value).decode('utf-8', 'ignore') elif isinstance(value, bytes): return value.decode('utf-8', 'ignore') else: - return six.text_type(value) + return str(value) def text_string(value, encoding='utf-8'): @@ -732,7 +792,7 @@ def plurality(objs): """ c = Counter(objs) if not c: - raise ValueError(u'sequence must be non-empty') + raise ValueError('sequence must be non-empty') return c.most_common(1)[0] @@ -749,7 +809,11 @@ def cpu_count(): num = 0 elif sys.platform == 'darwin': try: - num = int(command_output(['/usr/sbin/sysctl', '-n', 'hw.ncpu'])) + num = int(command_output([ + '/usr/sbin/sysctl', + '-n', + 'hw.ncpu', + ]).stdout) except (ValueError, OSError, subprocess.CalledProcessError): num = 0 else: @@ -769,20 +833,23 @@ def convert_command_args(args): assert isinstance(args, list) def convert(arg): - if six.PY2: - if isinstance(arg, six.text_type): - arg = arg.encode(arg_encoding()) - else: - if isinstance(arg, bytes): - arg = arg.decode(arg_encoding(), 'surrogateescape') + if isinstance(arg, bytes): + arg = arg.decode(arg_encoding(), 'surrogateescape') return arg return [convert(a) for a in args] +# stdout and stderr as bytes +CommandOutput = namedtuple("CommandOutput", ("stdout", "stderr")) + + def command_output(cmd, shell=False): """Runs the command and returns its output after it has exited. + Returns a CommandOutput. The attributes ``stdout`` and ``stderr`` contain + byte strings of the respective output streams. + ``cmd`` is a list of arguments starting with the command names. The arguments are bytes on Unix and strings on Windows. If ``shell`` is true, ``cmd`` is assumed to be a string and passed to a @@ -797,10 +864,16 @@ def command_output(cmd, shell=False): """ cmd = convert_command_args(cmd) + try: # python >= 3.3 + devnull = subprocess.DEVNULL + except AttributeError: + devnull = open(os.devnull, 'r+b') + proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + stdin=devnull, close_fds=platform.system() != 'Windows', shell=shell ) @@ -811,7 +884,7 @@ def command_output(cmd, shell=False): cmd=' '.join(cmd), output=stdout + stderr, ) - return stdout + return CommandOutput(stdout, stderr) def max_filename_length(path, limit=MAX_FILENAME_LENGTH): @@ -858,25 +931,6 @@ def editor_command(): return open_anything() -def shlex_split(s): - """Split a Unicode or bytes string according to shell lexing rules. - - Raise `ValueError` if the string is not a well-formed shell string. - This is a workaround for a bug in some versions of Python. - """ - if not six.PY2 or isinstance(s, bytes): # Shlex works fine. - return shlex.split(s) - - elif isinstance(s, six.text_type): - # Work around a Python bug. - # http://bugs.python.org/issue6988 - bs = s.encode('utf-8') - return [c.decode('utf-8') for c in shlex.split(bs)] - - else: - raise TypeError(u'shlex_split called with non-string') - - def interactive_open(targets, command): """Open the files in `targets` by `exec`ing a new `command`, given as a Unicode string. (The new program takes over, and Python @@ -888,7 +942,7 @@ def interactive_open(targets, command): # Split the command string into its arguments. try: - args = shlex_split(command) + args = shlex.split(command) except ValueError: # Malformed shell tokens. args = [command] @@ -903,7 +957,7 @@ def _windows_long_path_name(short_path): """Use Windows' `GetLongPathNameW` via ctypes to get the canonical, long path given a short filename. """ - if not isinstance(short_path, six.text_type): + if not isinstance(short_path, str): short_path = short_path.decode(_fsencoding()) import ctypes @@ -964,7 +1018,7 @@ def raw_seconds_short(string): """ match = re.match(r'^(\d+):([0-5]\d)$', string) if not match: - raise ValueError(u'String not in M:SS format') + raise ValueError('String not in M:SS format') minutes, seconds = map(int, match.groups()) return float(minutes * 60 + seconds) @@ -991,3 +1045,59 @@ def asciify_path(path, sep_replace): sep_replace ) return os.sep.join(path_components) + + +def par_map(transform, items): + """Apply the function `transform` to all the elements in the + iterable `items`, like `map(transform, items)` but with no return + value. The map *might* happen in parallel: it's parallel on Python 3 + and sequential on Python 2. + + The parallelism uses threads (not processes), so this is only useful + for IO-bound `transform`s. + """ + pool = ThreadPool() + pool.map(transform, items) + pool.close() + pool.join() + + +def lazy_property(func): + """A decorator that creates a lazily evaluated property. On first access, + the property is assigned the return value of `func`. This first value is + stored, so that future accesses do not have to evaluate `func` again. + + This behaviour is useful when `func` is expensive to evaluate, and it is + not certain that the result will be needed. + """ + field_name = '_' + func.__name__ + + @property + @functools.wraps(func) + def wrapper(self): + if hasattr(self, field_name): + return getattr(self, field_name) + + value = func(self) + setattr(self, field_name, value) + return value + + return wrapper + + +def decode_commandline_path(path): + """Prepare a path for substitution into commandline template. + + On Python 3, we need to construct the subprocess commands to invoke as a + Unicode string. On Unix, this is a little unfortunate---the OS is + expecting bytes---so we use surrogate escaping and decode with the + argument encoding, which is the same encoding that will then be + *reversed* to recover the same bytes before invoking the OS. On + Windows, we want to preserve the Unicode filename "as is." + """ + # On Python 3, the template is a Unicode string, which only supports + # substitution of Unicode variables. + if platform.system() == 'Windows': + return path.decode(_fsencoding()) + else: + return path.decode(arg_encoding(), 'surrogateescape') diff --git a/lib/beets/util/artresizer.py b/lib/beets/util/artresizer.py old mode 100755 new mode 100644 index e84b775d..8683e228 --- a/lib/beets/util/artresizer.py +++ b/lib/beets/util/artresizer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Fabrice Laporte # @@ -16,38 +15,39 @@ """Abstraction layer to resize images using PIL, ImageMagick, or a public resizing proxy if neither is available. """ -from __future__ import division, absolute_import, print_function import subprocess import os +import os.path import re from tempfile import NamedTemporaryFile -from six.moves.urllib.parse import urlencode +from urllib.parse import urlencode from beets import logging from beets import util -import six # Resizing methods PIL = 1 IMAGEMAGICK = 2 WEBPROXY = 3 -if util.SNI_SUPPORTED: - PROXY_URL = 'https://images.weserv.nl/' -else: - PROXY_URL = 'http://images.weserv.nl/' +PROXY_URL = 'https://images.weserv.nl/' log = logging.getLogger('beets') -def resize_url(url, maxwidth): +def resize_url(url, maxwidth, quality=0): """Return a proxied image URL that resizes the original image to maxwidth (preserving aspect ratio). """ - return '{0}?{1}'.format(PROXY_URL, urlencode({ + params = { 'url': url.replace('http://', ''), 'w': maxwidth, - })) + } + + if quality > 0: + params['q'] = quality + + return '{}?{}'.format(PROXY_URL, urlencode(params)) def temp_file_for(path): @@ -59,49 +59,102 @@ def temp_file_for(path): return util.bytestring_path(f.name) -def pil_resize(maxwidth, path_in, path_out=None): +def pil_resize(maxwidth, path_in, path_out=None, quality=0, max_filesize=0): """Resize using Python Imaging Library (PIL). Return the output path of resized image. """ path_out = path_out or temp_file_for(path_in) from PIL import Image - log.debug(u'artresizer: PIL resizing {0} to {1}', + + log.debug('artresizer: PIL resizing {0} to {1}', util.displayable_path(path_in), util.displayable_path(path_out)) try: im = Image.open(util.syspath(path_in)) size = maxwidth, maxwidth im.thumbnail(size, Image.ANTIALIAS) - im.save(path_out) - return path_out - except IOError: - log.error(u"PIL cannot create thumbnail for '{0}'", + + if quality == 0: + # Use PIL's default quality. + quality = -1 + + # progressive=False only affects JPEGs and is the default, + # but we include it here for explicitness. + im.save(util.py3_path(path_out), quality=quality, progressive=False) + + if max_filesize > 0: + # If maximum filesize is set, we attempt to lower the quality of + # jpeg conversion by a proportional amount, up to 3 attempts + # First, set the maximum quality to either provided, or 95 + if quality > 0: + lower_qual = quality + else: + lower_qual = 95 + for i in range(5): + # 5 attempts is an abitrary choice + filesize = os.stat(util.syspath(path_out)).st_size + log.debug("PIL Pass {0} : Output size: {1}B", i, filesize) + if filesize <= max_filesize: + return path_out + # The relationship between filesize & quality will be + # image dependent. + lower_qual -= 10 + # Restrict quality dropping below 10 + if lower_qual < 10: + lower_qual = 10 + # Use optimize flag to improve filesize decrease + im.save(util.py3_path(path_out), quality=lower_qual, + optimize=True, progressive=False) + log.warning("PIL Failed to resize file to below {0}B", + max_filesize) + return path_out + + else: + return path_out + except OSError: + log.error("PIL cannot create thumbnail for '{0}'", util.displayable_path(path_in)) return path_in -def im_resize(maxwidth, path_in, path_out=None): - """Resize using ImageMagick's ``convert`` tool. - Return the output path of resized image. +def im_resize(maxwidth, path_in, path_out=None, quality=0, max_filesize=0): + """Resize using ImageMagick. + + Use the ``magick`` program or ``convert`` on older versions. Return + the output path of resized image. """ path_out = path_out or temp_file_for(path_in) - log.debug(u'artresizer: ImageMagick resizing {0} to {1}', + log.debug('artresizer: ImageMagick resizing {0} to {1}', util.displayable_path(path_in), util.displayable_path(path_out)) - # "-resize widthxheight>" shrinks images with dimension(s) larger - # than the corresponding width and/or height dimension(s). The > - # "only shrink" flag is prefixed by ^ escape char for Windows - # compatibility. + # "-resize WIDTHx>" shrinks images with the width larger + # than the given width while maintaining the aspect ratio + # with regards to the height. + # ImageMagick already seems to default to no interlace, but we include it + # here for the sake of explicitness. + cmd = ArtResizer.shared.im_convert_cmd + [ + util.syspath(path_in, prefix=False), + '-resize', f'{maxwidth}x>', + '-interlace', 'none', + ] + + if quality > 0: + cmd += ['-quality', f'{quality}'] + + # "-define jpeg:extent=SIZEb" sets the target filesize for imagemagick to + # SIZE in bytes. + if max_filesize > 0: + cmd += ['-define', f'jpeg:extent={max_filesize}b'] + + cmd.append(util.syspath(path_out, prefix=False)) + try: - util.command_output([ - 'convert', util.syspath(path_in, prefix=False), - '-resize', '{0}x^>'.format(maxwidth), - util.syspath(path_out, prefix=False), - ]) + util.command_output(cmd) except subprocess.CalledProcessError: - log.warning(u'artresizer: IM convert failed for {0}', + log.warning('artresizer: IM convert failed for {0}', util.displayable_path(path_in)) return path_in + return path_out @@ -113,31 +166,33 @@ BACKEND_FUNCS = { def pil_getsize(path_in): from PIL import Image + try: im = Image.open(util.syspath(path_in)) return im.size - except IOError as exc: - log.error(u"PIL could not read file {}: {}", + except OSError as exc: + log.error("PIL could not read file {}: {}", util.displayable_path(path_in), exc) def im_getsize(path_in): - cmd = ['identify', '-format', '%w %h', - util.syspath(path_in, prefix=False)] + cmd = ArtResizer.shared.im_identify_cmd + \ + ['-format', '%w %h', util.syspath(path_in, prefix=False)] + try: - out = util.command_output(cmd) + out = util.command_output(cmd).stdout except subprocess.CalledProcessError as exc: - log.warning(u'ImageMagick size query failed') + log.warning('ImageMagick size query failed') log.debug( - u'`convert` exited with (status {}) when ' - u'getting size with command {}:\n{}', + '`convert` exited with (status {}) when ' + 'getting size with command {}:\n{}', exc.returncode, cmd, exc.output.strip() ) return try: return tuple(map(int, out.split(b' '))) except IndexError: - log.warning(u'Could not understand IM output: {0!r}', out) + log.warning('Could not understand IM output: {0!r}', out) BACKEND_GET_SIZE = { @@ -146,24 +201,125 @@ BACKEND_GET_SIZE = { } +def pil_deinterlace(path_in, path_out=None): + path_out = path_out or temp_file_for(path_in) + from PIL import Image + + try: + im = Image.open(util.syspath(path_in)) + im.save(util.py3_path(path_out), progressive=False) + return path_out + except IOError: + return path_in + + +def im_deinterlace(path_in, path_out=None): + path_out = path_out or temp_file_for(path_in) + + cmd = ArtResizer.shared.im_convert_cmd + [ + util.syspath(path_in, prefix=False), + '-interlace', 'none', + util.syspath(path_out, prefix=False), + ] + + try: + util.command_output(cmd) + return path_out + except subprocess.CalledProcessError: + return path_in + + +DEINTERLACE_FUNCS = { + PIL: pil_deinterlace, + IMAGEMAGICK: im_deinterlace, +} + + +def im_get_format(filepath): + cmd = ArtResizer.shared.im_identify_cmd + [ + '-format', '%[magick]', + util.syspath(filepath) + ] + + try: + return util.command_output(cmd).stdout + except subprocess.CalledProcessError: + return None + + +def pil_get_format(filepath): + from PIL import Image, UnidentifiedImageError + + try: + with Image.open(util.syspath(filepath)) as im: + return im.format + except (ValueError, TypeError, UnidentifiedImageError, FileNotFoundError): + log.exception("failed to detect image format for {}", filepath) + return None + + +BACKEND_GET_FORMAT = { + PIL: pil_get_format, + IMAGEMAGICK: im_get_format, +} + + +def im_convert_format(source, target, deinterlaced): + cmd = ArtResizer.shared.im_convert_cmd + [ + util.syspath(source), + *(["-interlace", "none"] if deinterlaced else []), + util.syspath(target), + ] + + try: + subprocess.check_call( + cmd, + stderr=subprocess.DEVNULL, + stdout=subprocess.DEVNULL + ) + return target + except subprocess.CalledProcessError: + return source + + +def pil_convert_format(source, target, deinterlaced): + from PIL import Image, UnidentifiedImageError + + try: + with Image.open(util.syspath(source)) as im: + im.save(util.py3_path(target), progressive=not deinterlaced) + return target + except (ValueError, TypeError, UnidentifiedImageError, FileNotFoundError, + OSError): + log.exception("failed to convert image {} -> {}", source, target) + return source + + +BACKEND_CONVERT_IMAGE_FORMAT = { + PIL: pil_convert_format, + IMAGEMAGICK: im_convert_format, +} + + class Shareable(type): """A pseudo-singleton metaclass that allows both shared and non-shared instances. The ``MyClass.shared`` property holds a lazily-created shared instance of ``MyClass`` while calling ``MyClass()`` to construct a new object works as usual. """ - def __init__(self, name, bases, dict): - super(Shareable, self).__init__(name, bases, dict) - self._instance = None + + def __init__(cls, name, bases, dict): + super().__init__(name, bases, dict) + cls._instance = None @property - def shared(self): - if self._instance is None: - self._instance = self() - return self._instance + def shared(cls): + if cls._instance is None: + cls._instance = cls() + return cls._instance -class ArtResizer(six.with_metaclass(Shareable, object)): +class ArtResizer(metaclass=Shareable): """A singleton class that performs image resizes. """ @@ -171,21 +327,44 @@ class ArtResizer(six.with_metaclass(Shareable, object)): """Create a resizer object with an inferred method. """ self.method = self._check_method() - log.debug(u"artresizer: method is {0}", self.method) + log.debug("artresizer: method is {0}", self.method) self.can_compare = self._can_compare() - def resize(self, maxwidth, path_in, path_out=None): + # Use ImageMagick's magick binary when it's available. If it's + # not, fall back to the older, separate convert and identify + # commands. + if self.method[0] == IMAGEMAGICK: + self.im_legacy = self.method[2] + if self.im_legacy: + self.im_convert_cmd = ['convert'] + self.im_identify_cmd = ['identify'] + else: + self.im_convert_cmd = ['magick'] + self.im_identify_cmd = ['magick', 'identify'] + + def resize( + self, maxwidth, path_in, path_out=None, quality=0, max_filesize=0 + ): """Manipulate an image file according to the method, returning a new path. For PIL or IMAGEMAGIC methods, resizes the image to a - temporary file. For WEBPROXY, returns `path_in` unmodified. + temporary file and encodes with the specified quality level. + For WEBPROXY, returns `path_in` unmodified. """ if self.local: func = BACKEND_FUNCS[self.method[0]] - return func(maxwidth, path_in, path_out) + return func(maxwidth, path_in, path_out, + quality=quality, max_filesize=max_filesize) else: return path_in - def proxy_url(self, maxwidth, url): + def deinterlace(self, path_in, path_out=None): + if self.local: + func = DEINTERLACE_FUNCS[self.method[0]] + return func(path_in, path_out) + else: + return path_in + + def proxy_url(self, maxwidth, url, quality=0): """Modifies an image URL according the method, returning a new URL. For WEBPROXY, a URL on the proxy server is returned. Otherwise, the URL is returned unmodified. @@ -193,7 +372,7 @@ class ArtResizer(six.with_metaclass(Shareable, object)): if self.local: return url else: - return resize_url(url, maxwidth) + return resize_url(url, maxwidth, quality) @property def local(self): @@ -206,12 +385,50 @@ class ArtResizer(six.with_metaclass(Shareable, object)): """Return the size of an image file as an int couple (width, height) in pixels. - Only available locally + Only available locally. """ if self.local: func = BACKEND_GET_SIZE[self.method[0]] return func(path_in) + def get_format(self, path_in): + """Returns the format of the image as a string. + + Only available locally. + """ + if self.local: + func = BACKEND_GET_FORMAT[self.method[0]] + return func(path_in) + + def reformat(self, path_in, new_format, deinterlaced=True): + """Converts image to desired format, updating its extension, but + keeping the same filename. + + Only available locally. + """ + if not self.local: + return path_in + + new_format = new_format.lower() + # A nonexhaustive map of image "types" to extensions overrides + new_format = { + 'jpeg': 'jpg', + }.get(new_format, new_format) + + fname, ext = os.path.splitext(path_in) + path_new = fname + b'.' + new_format.encode('utf8') + func = BACKEND_CONVERT_IMAGE_FORMAT[self.method[0]] + + # allows the exception to propagate, while still making sure a changed + # file path was removed + result_path = path_in + try: + result_path = func(path_in, path_new, deinterlaced) + finally: + if result_path != path_in: + os.unlink(path_in) + return result_path + def _can_compare(self): """A boolean indicating whether image comparison is available""" @@ -219,10 +436,20 @@ class ArtResizer(six.with_metaclass(Shareable, object)): @staticmethod def _check_method(): - """Return a tuple indicating an available method and its version.""" + """Return a tuple indicating an available method and its version. + + The result has at least two elements: + - The method, eitehr WEBPROXY, PIL, or IMAGEMAGICK. + - The version. + + If the method is IMAGEMAGICK, there is also a third element: a + bool flag indicating whether to use the `magick` binary or + legacy single-purpose executables (`convert`, `identify`, etc.) + """ version = get_im_version() if version: - return IMAGEMAGICK, version + version, legacy = version + return IMAGEMAGICK, version, legacy version = get_pil_version() if version: @@ -232,31 +459,34 @@ class ArtResizer(six.with_metaclass(Shareable, object)): def get_im_version(): - """Return Image Magick version or None if it is unavailable - Try invoking ImageMagick's "convert". + """Get the ImageMagick version and legacy flag as a pair. Or return + None if ImageMagick is not available. """ - try: - out = util.command_output(['convert', '--version']) + for cmd_name, legacy in ((['magick'], False), (['convert'], True)): + cmd = cmd_name + ['--version'] - if b'imagemagick' in out.lower(): - pattern = br".+ (\d+)\.(\d+)\.(\d+).*" - match = re.search(pattern, out) - if match: - return (int(match.group(1)), - int(match.group(2)), - int(match.group(3))) - return (0,) + try: + out = util.command_output(cmd).stdout + except (subprocess.CalledProcessError, OSError) as exc: + log.debug('ImageMagick version check failed: {}', exc) + else: + if b'imagemagick' in out.lower(): + pattern = br".+ (\d+)\.(\d+)\.(\d+).*" + match = re.search(pattern, out) + if match: + version = (int(match.group(1)), + int(match.group(2)), + int(match.group(3))) + return version, legacy - except (subprocess.CalledProcessError, OSError) as exc: - log.debug(u'ImageMagick check `convert --version` failed: {}', exc) - return None + return None def get_pil_version(): - """Return Image Magick version or None if it is unavailable - Try importing PIL.""" + """Get the PIL/Pillow version, or None if it is unavailable. + """ try: - __import__('PIL', fromlist=[str('Image')]) + __import__('PIL', fromlist=['Image']) return (0,) except ImportError: return None diff --git a/lib/beets/util/bluelet.py b/lib/beets/util/bluelet.py old mode 100755 new mode 100644 index 48dd7bd9..a40f3b2f --- a/lib/beets/util/bluelet.py +++ b/lib/beets/util/bluelet.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Extremely simple pure-Python implementation of coroutine-style asynchronous socket I/O. Inspired by, but inferior to, Eventlet. Bluelet can also be thought of as a less-terrible replacement for @@ -7,9 +5,7 @@ asyncore. Bluelet: easy concurrency without all the messy parallelism. """ -from __future__ import division, absolute_import, print_function -import six import socket import select import sys @@ -22,7 +18,7 @@ import collections # Basic events used for thread scheduling. -class Event(object): +class Event: """Just a base class identifying Bluelet events. An event is an object yielded from a Bluelet thread coroutine to suspend operation and communicate with the scheduler. @@ -201,7 +197,7 @@ class ThreadException(Exception): self.exc_info = exc_info def reraise(self): - six.reraise(self.exc_info[0], self.exc_info[1], self.exc_info[2]) + raise self.exc_info[1].with_traceback(self.exc_info[2]) SUSPENDED = Event() # Special sentinel placeholder for suspended threads. @@ -269,7 +265,7 @@ def run(root_coro): except StopIteration: # Thread is done. complete_thread(coro, None) - except: + except BaseException: # Thread raised some other exception. del threads[coro] raise ThreadException(coro, sys.exc_info()) @@ -336,16 +332,20 @@ def run(root_coro): break # Wait and fire. - event2coro = dict((v, k) for k, v in threads.items()) + event2coro = {v: k for k, v in threads.items()} for event in _event_select(threads.values()): # Run the IO operation, but catch socket errors. try: value = event.fire() - except socket.error as exc: + except OSError as exc: if isinstance(exc.args, tuple) and \ exc.args[0] == errno.EPIPE: # Broken pipe. Remote host disconnected. pass + elif isinstance(exc.args, tuple) and \ + exc.args[0] == errno.ECONNRESET: + # Connection was reset by peer. + pass else: traceback.print_exc() # Abort the coroutine. @@ -366,7 +366,7 @@ def run(root_coro): exit_te = te break - except: + except BaseException: # For instance, KeyboardInterrupt during select(). Raise # into root thread and terminate others. threads = {root_coro: ExceptionEvent(sys.exc_info())} @@ -386,7 +386,7 @@ class SocketClosedError(Exception): pass -class Listener(object): +class Listener: """A socket wrapper object for listening sockets. """ def __init__(self, host, port): @@ -416,7 +416,7 @@ class Listener(object): self.sock.close() -class Connection(object): +class Connection: """A socket wrapper object for connected sockets. """ def __init__(self, sock, addr): @@ -541,7 +541,7 @@ def spawn(coro): and child coroutines run concurrently. """ if not isinstance(coro, types.GeneratorType): - raise ValueError(u'%s is not a coroutine' % coro) + raise ValueError('%s is not a coroutine' % coro) return SpawnEvent(coro) @@ -551,7 +551,7 @@ def call(coro): returns a value using end(), then this event returns that value. """ if not isinstance(coro, types.GeneratorType): - raise ValueError(u'%s is not a coroutine' % coro) + raise ValueError('%s is not a coroutine' % coro) return DelegationEvent(coro) diff --git a/lib/beets/util/confit.py b/lib/beets/util/confit.py old mode 100755 new mode 100644 index 373e05ff..dd912c44 --- a/lib/beets/util/confit.py +++ b/lib/beets/util/confit.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- -# This file is part of Confuse. -# Copyright 2016, Adrian Sampson. +# This file is part of beets. +# Copyright 2016-2019, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -13,1447 +12,17 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -"""Worry-free YAML configuration files. -""" -from __future__ import division, absolute_import, print_function -import platform -import os -import pkgutil -import sys -import yaml -import collections -import re -from collections import OrderedDict +import confuse -UNIX_DIR_VAR = 'XDG_CONFIG_HOME' -UNIX_DIR_FALLBACK = '~/.config' -WINDOWS_DIR_VAR = 'APPDATA' -WINDOWS_DIR_FALLBACK = '~\\AppData\\Roaming' -MAC_DIR = '~/Library/Application Support' +import warnings +warnings.warn("beets.util.confit is deprecated; use confuse instead") -CONFIG_FILENAME = 'config.yaml' -DEFAULT_FILENAME = 'config_default.yaml' -ROOT_NAME = 'root' +# Import everything from the confuse module into this module. +for key, value in confuse.__dict__.items(): + if key not in ['__name__']: + globals()[key] = value -YAML_TAB_PROBLEM = "found character '\\t' that cannot start any token" -REDACTED_TOMBSTONE = 'REDACTED' - - -# Utilities. - -PY3 = sys.version_info[0] == 3 -STRING = str if PY3 else unicode # noqa: F821 -BASESTRING = str if PY3 else basestring # noqa: F821 -NUMERIC_TYPES = (int, float) if PY3 else (int, float, long) # noqa: F821 - - -def iter_first(sequence): - """Get the first element from an iterable or raise a ValueError if - the iterator generates no values. - """ - it = iter(sequence) - try: - return next(it) - except StopIteration: - raise ValueError() - - -# Exceptions. - -class ConfigError(Exception): - """Base class for exceptions raised when querying a configuration. - """ - - -class NotFoundError(ConfigError): - """A requested value could not be found in the configuration trees. - """ - - -class ConfigValueError(ConfigError): - """The value in the configuration is illegal.""" - - -class ConfigTypeError(ConfigValueError): - """The value in the configuration did not match the expected type. - """ - - -class ConfigTemplateError(ConfigError): - """Base class for exceptions raised because of an invalid template. - """ - - -class ConfigReadError(ConfigError): - """A configuration file could not be read.""" - def __init__(self, filename, reason=None): - self.filename = filename - self.reason = reason - - message = u'file {0} could not be read'.format(filename) - if isinstance(reason, yaml.scanner.ScannerError) and \ - reason.problem == YAML_TAB_PROBLEM: - # Special-case error message for tab indentation in YAML markup. - message += u': found tab character at line {0}, column {1}'.format( - reason.problem_mark.line + 1, - reason.problem_mark.column + 1, - ) - elif reason: - # Generic error message uses exception's message. - message += u': {0}'.format(reason) - - super(ConfigReadError, self).__init__(message) - - -# Views and sources. - -class ConfigSource(dict): - """A dictionary augmented with metadata about the source of the - configuration. - """ - def __init__(self, value, filename=None, default=False): - super(ConfigSource, self).__init__(value) - if filename is not None and not isinstance(filename, BASESTRING): - raise TypeError(u'filename must be a string or None') - self.filename = filename - self.default = default - - def __repr__(self): - return 'ConfigSource({0!r}, {1!r}, {2!r})'.format( - super(ConfigSource, self), - self.filename, - self.default, - ) - - @classmethod - def of(cls, value): - """Given either a dictionary or a `ConfigSource` object, return - a `ConfigSource` object. This lets a function accept either type - of object as an argument. - """ - if isinstance(value, ConfigSource): - return value - elif isinstance(value, dict): - return ConfigSource(value) - else: - raise TypeError(u'source value must be a dict') - - -class ConfigView(object): - """A configuration "view" is a query into a program's configuration - data. A view represents a hypothetical location in the configuration - tree; to extract the data from the location, a client typically - calls the ``view.get()`` method. The client can access children in - the tree (subviews) by subscripting the parent view (i.e., - ``view[key]``). - """ - - name = None - """The name of the view, depicting the path taken through the - configuration in Python-like syntax (e.g., ``foo['bar'][42]``). - """ - - def resolve(self): - """The core (internal) data retrieval method. Generates (value, - source) pairs for each source that contains a value for this - view. May raise ConfigTypeError if a type error occurs while - traversing a source. - """ - raise NotImplementedError - - def first(self): - """Return a (value, source) pair for the first object found for - this view. This amounts to the first element returned by - `resolve`. If no values are available, a NotFoundError is - raised. - """ - pairs = self.resolve() - try: - return iter_first(pairs) - except ValueError: - raise NotFoundError(u"{0} not found".format(self.name)) - - def exists(self): - """Determine whether the view has a setting in any source. - """ - try: - self.first() - except NotFoundError: - return False - return True - - def add(self, value): - """Set the *default* value for this configuration view. The - specified value is added as the lowest-priority configuration - data source. - """ - raise NotImplementedError - - def set(self, value): - """*Override* the value for this configuration view. The - specified value is added as the highest-priority configuration - data source. - """ - raise NotImplementedError - - def root(self): - """The RootView object from which this view is descended. - """ - raise NotImplementedError - - def __repr__(self): - return '<{}: {}>'.format(self.__class__.__name__, self.name) - - def __iter__(self): - """Iterate over the keys of a dictionary view or the *subviews* - of a list view. - """ - # Try getting the keys, if this is a dictionary view. - try: - keys = self.keys() - for key in keys: - yield key - - except ConfigTypeError: - # Otherwise, try iterating over a list. - collection = self.get() - if not isinstance(collection, (list, tuple)): - raise ConfigTypeError( - u'{0} must be a dictionary or a list, not {1}'.format( - self.name, type(collection).__name__ - ) - ) - - # Yield all the indices in the list. - for index in range(len(collection)): - yield self[index] - - def __getitem__(self, key): - """Get a subview of this view.""" - return Subview(self, key) - - def __setitem__(self, key, value): - """Create an overlay source to assign a given key under this - view. - """ - self.set({key: value}) - - def __contains__(self, key): - return self[key].exists() - - def set_args(self, namespace): - """Overlay parsed command-line arguments, generated by a library - like argparse or optparse, onto this view's value. ``namespace`` - can be a ``dict`` or namespace object. - """ - args = {} - if isinstance(namespace, dict): - items = namespace.items() - else: - items = namespace.__dict__.items() - for key, value in items: - if value is not None: # Avoid unset options. - args[key] = value - self.set(args) - - # Magical conversions. These special methods make it possible to use - # View objects somewhat transparently in certain circumstances. For - # example, rather than using ``view.get(bool)``, it's possible to - # just say ``bool(view)`` or use ``view`` in a conditional. - - def __str__(self): - """Get the value for this view as a bytestring. - """ - if PY3: - return self.__unicode__() - else: - return bytes(self.get()) - - def __unicode__(self): - """Get the value for this view as a Unicode string. - """ - return STRING(self.get()) - - def __nonzero__(self): - """Gets the value for this view as a boolean. (Python 2 only.) - """ - return self.__bool__() - - def __bool__(self): - """Gets the value for this view as a boolean. (Python 3 only.) - """ - return bool(self.get()) - - # Dictionary emulation methods. - - def keys(self): - """Returns a list containing all the keys available as subviews - of the current views. This enumerates all the keys in *all* - dictionaries matching the current view, in contrast to - ``view.get(dict).keys()``, which gets all the keys for the - *first* dict matching the view. If the object for this view in - any source is not a dict, then a ConfigTypeError is raised. The - keys are ordered according to how they appear in each source. - """ - keys = [] - - for dic, _ in self.resolve(): - try: - cur_keys = dic.keys() - except AttributeError: - raise ConfigTypeError( - u'{0} must be a dict, not {1}'.format( - self.name, type(dic).__name__ - ) - ) - - for key in cur_keys: - if key not in keys: - keys.append(key) - - return keys - - def items(self): - """Iterates over (key, subview) pairs contained in dictionaries - from *all* sources at this view. If the object for this view in - any source is not a dict, then a ConfigTypeError is raised. - """ - for key in self.keys(): - yield key, self[key] - - def values(self): - """Iterates over all the subviews contained in dictionaries from - *all* sources at this view. If the object for this view in any - source is not a dict, then a ConfigTypeError is raised. - """ - for key in self.keys(): - yield self[key] - - # List/sequence emulation. - - def all_contents(self): - """Iterates over all subviews from collections at this view from - *all* sources. If the object for this view in any source is not - iterable, then a ConfigTypeError is raised. This method is - intended to be used when the view indicates a list; this method - will concatenate the contents of the list from all sources. - """ - for collection, _ in self.resolve(): - try: - it = iter(collection) - except TypeError: - raise ConfigTypeError( - u'{0} must be an iterable, not {1}'.format( - self.name, type(collection).__name__ - ) - ) - for value in it: - yield value - - # Validation and conversion. - - def flatten(self, redact=False): - """Create a hierarchy of OrderedDicts containing the data from - this view, recursively reifying all views to get their - represented values. - - If `redact` is set, then sensitive values are replaced with - the string "REDACTED". - """ - od = OrderedDict() - for key, view in self.items(): - if redact and view.redact: - od[key] = REDACTED_TOMBSTONE - else: - try: - od[key] = view.flatten(redact=redact) - except ConfigTypeError: - od[key] = view.get() - return od - - def get(self, template=None): - """Retrieve the value for this view according to the template. - - The `template` against which the values are checked can be - anything convertible to a `Template` using `as_template`. This - means you can pass in a default integer or string value, for - example, or a type to just check that something matches the type - you expect. - - May raise a `ConfigValueError` (or its subclass, - `ConfigTypeError`) or a `NotFoundError` when the configuration - doesn't satisfy the template. - """ - return as_template(template).value(self, template) - - # Shortcuts for common templates. - - def as_filename(self): - """Get the value as a path. Equivalent to `get(Filename())`. - """ - return self.get(Filename()) - - def as_choice(self, choices): - """Get the value from a list of choices. Equivalent to - `get(Choice(choices))`. - """ - return self.get(Choice(choices)) - - def as_number(self): - """Get the value as any number type: int or float. Equivalent to - `get(Number())`. - """ - return self.get(Number()) - - def as_str_seq(self, split=True): - """Get the value as a sequence of strings. Equivalent to - `get(StrSeq())`. - """ - return self.get(StrSeq(split=split)) - - def as_str(self): - """Get the value as a (Unicode) string. Equivalent to - `get(unicode)` on Python 2 and `get(str)` on Python 3. - """ - return self.get(String()) - - # Redaction. - - @property - def redact(self): - """Whether the view contains sensitive information and should be - redacted from output. - """ - return () in self.get_redactions() - - @redact.setter - def redact(self, flag): - self.set_redaction((), flag) - - def set_redaction(self, path, flag): - """Add or remove a redaction for a key path, which should be an - iterable of keys. - """ - raise NotImplementedError() - - def get_redactions(self): - """Get the set of currently-redacted sub-key-paths at this view. - """ - raise NotImplementedError() - - -class RootView(ConfigView): - """The base of a view hierarchy. This view keeps track of the - sources that may be accessed by subviews. - """ - def __init__(self, sources): - """Create a configuration hierarchy for a list of sources. At - least one source must be provided. The first source in the list - has the highest priority. - """ - self.sources = list(sources) - self.name = ROOT_NAME - self.redactions = set() - - def add(self, obj): - self.sources.append(ConfigSource.of(obj)) - - def set(self, value): - self.sources.insert(0, ConfigSource.of(value)) - - def resolve(self): - return ((dict(s), s) for s in self.sources) - - def clear(self): - """Remove all sources (and redactions) from this - configuration. - """ - del self.sources[:] - self.redactions.clear() - - def root(self): - return self - - def set_redaction(self, path, flag): - if flag: - self.redactions.add(path) - elif path in self.redactions: - self.redactions.remove(path) - - def get_redactions(self): - return self.redactions - - -class Subview(ConfigView): - """A subview accessed via a subscript of a parent view.""" - def __init__(self, parent, key): - """Make a subview of a parent view for a given subscript key. - """ - self.parent = parent - self.key = key - - # Choose a human-readable name for this view. - if isinstance(self.parent, RootView): - self.name = '' - else: - self.name = self.parent.name - if not isinstance(self.key, int): - self.name += '.' - if isinstance(self.key, int): - self.name += u'#{0}'.format(self.key) - elif isinstance(self.key, bytes): - self.name += self.key.decode('utf-8') - elif isinstance(self.key, STRING): - self.name += self.key - else: - self.name += repr(self.key) - - def resolve(self): - for collection, source in self.parent.resolve(): - try: - value = collection[self.key] - except IndexError: - # List index out of bounds. - continue - except KeyError: - # Dict key does not exist. - continue - except TypeError: - # Not subscriptable. - raise ConfigTypeError( - u"{0} must be a collection, not {1}".format( - self.parent.name, type(collection).__name__ - ) - ) - yield value, source - - def set(self, value): - self.parent.set({self.key: value}) - - def add(self, value): - self.parent.add({self.key: value}) - - def root(self): - return self.parent.root() - - def set_redaction(self, path, flag): - self.parent.set_redaction((self.key,) + path, flag) - - def get_redactions(self): - return (kp[1:] for kp in self.parent.get_redactions() - if kp and kp[0] == self.key) - - -# Config file paths, including platform-specific paths and in-package -# defaults. - -# Based on get_root_path from Flask by Armin Ronacher. -def _package_path(name): - """Returns the path to the package containing the named module or - None if the path could not be identified (e.g., if - ``name == "__main__"``). - """ - loader = pkgutil.get_loader(name) - if loader is None or name == '__main__': - return None - - if hasattr(loader, 'get_filename'): - filepath = loader.get_filename(name) - else: - # Fall back to importing the specified module. - __import__(name) - filepath = sys.modules[name].__file__ - - return os.path.dirname(os.path.abspath(filepath)) - - -def config_dirs(): - """Return a platform-specific list of candidates for user - configuration directories on the system. - - The candidates are in order of priority, from highest to lowest. The - last element is the "fallback" location to be used when no - higher-priority config file exists. - """ - paths = [] - - if platform.system() == 'Darwin': - paths.append(MAC_DIR) - paths.append(UNIX_DIR_FALLBACK) - if UNIX_DIR_VAR in os.environ: - paths.append(os.environ[UNIX_DIR_VAR]) - - elif platform.system() == 'Windows': - paths.append(WINDOWS_DIR_FALLBACK) - if WINDOWS_DIR_VAR in os.environ: - paths.append(os.environ[WINDOWS_DIR_VAR]) - - else: - # Assume Unix. - paths.append(UNIX_DIR_FALLBACK) - if UNIX_DIR_VAR in os.environ: - paths.append(os.environ[UNIX_DIR_VAR]) - - # Expand and deduplicate paths. - out = [] - for path in paths: - path = os.path.abspath(os.path.expanduser(path)) - if path not in out: - out.append(path) - return out - - -# YAML loading. - -class Loader(yaml.SafeLoader): - """A customized YAML loader. This loader deviates from the official - YAML spec in a few convenient ways: - - - All strings as are Unicode objects. - - All maps are OrderedDicts. - - Strings can begin with % without quotation. - """ - # All strings should be Unicode objects, regardless of contents. - def _construct_unicode(self, node): - return self.construct_scalar(node) - - # Use ordered dictionaries for every YAML map. - # From https://gist.github.com/844388 - def construct_yaml_map(self, node): - data = OrderedDict() - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_mapping(self, node, deep=False): - if isinstance(node, yaml.MappingNode): - self.flatten_mapping(node) - else: - raise yaml.constructor.ConstructorError( - None, None, - u'expected a mapping node, but found %s' % node.id, - node.start_mark - ) - - mapping = OrderedDict() - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - try: - hash(key) - except TypeError as exc: - raise yaml.constructor.ConstructorError( - u'while constructing a mapping', - node.start_mark, 'found unacceptable key (%s)' % exc, - key_node.start_mark - ) - value = self.construct_object(value_node, deep=deep) - mapping[key] = value - return mapping - - # Allow bare strings to begin with %. Directives are still detected. - def check_plain(self): - plain = super(Loader, self).check_plain() - return plain or self.peek() == '%' - - -Loader.add_constructor('tag:yaml.org,2002:str', Loader._construct_unicode) -Loader.add_constructor('tag:yaml.org,2002:map', Loader.construct_yaml_map) -Loader.add_constructor('tag:yaml.org,2002:omap', Loader.construct_yaml_map) - - -def load_yaml(filename): - """Read a YAML document from a file. If the file cannot be read or - parsed, a ConfigReadError is raised. - """ - try: - with open(filename, 'r') as f: - return yaml.load(f, Loader=Loader) - except (IOError, yaml.error.YAMLError) as exc: - raise ConfigReadError(filename, exc) - - -# YAML dumping. - -class Dumper(yaml.SafeDumper): - """A PyYAML Dumper that represents OrderedDicts as ordinary mappings - (in order, of course). - """ - # From http://pyyaml.org/attachment/ticket/161/use_ordered_dict.py - def represent_mapping(self, tag, mapping, flow_style=None): - value = [] - node = yaml.MappingNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = False - if hasattr(mapping, 'items'): - mapping = list(mapping.items()) - for item_key, item_value in mapping: - node_key = self.represent_data(item_key) - node_value = self.represent_data(item_value) - if not (isinstance(node_key, yaml.ScalarNode) and - not node_key.style): - best_style = False - if not (isinstance(node_value, yaml.ScalarNode) and - not node_value.style): - best_style = False - value.append((node_key, node_value)) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_list(self, data): - """If a list has less than 4 items, represent it in inline style - (i.e. comma separated, within square brackets). - """ - node = super(Dumper, self).represent_list(data) - length = len(data) - if self.default_flow_style is None and length < 4: - node.flow_style = True - elif self.default_flow_style is None: - node.flow_style = False - return node - - def represent_bool(self, data): - """Represent bool as 'yes' or 'no' instead of 'true' or 'false'. - """ - if data: - value = u'yes' - else: - value = u'no' - return self.represent_scalar('tag:yaml.org,2002:bool', value) - - def represent_none(self, data): - """Represent a None value with nothing instead of 'none'. - """ - return self.represent_scalar('tag:yaml.org,2002:null', '') - - -Dumper.add_representer(OrderedDict, Dumper.represent_dict) -Dumper.add_representer(bool, Dumper.represent_bool) -Dumper.add_representer(type(None), Dumper.represent_none) -Dumper.add_representer(list, Dumper.represent_list) - - -def restore_yaml_comments(data, default_data): - """Scan default_data for comments (we include empty lines in our - definition of comments) and place them before the same keys in data. - Only works with comments that are on one or more own lines, i.e. - not next to a yaml mapping. - """ - comment_map = dict() - default_lines = iter(default_data.splitlines()) - for line in default_lines: - if not line: - comment = "\n" - elif line.startswith("#"): - comment = "{0}\n".format(line) - else: - continue - while True: - line = next(default_lines) - if line and not line.startswith("#"): - break - comment += "{0}\n".format(line) - key = line.split(':')[0].strip() - comment_map[key] = comment - out_lines = iter(data.splitlines()) - out_data = "" - for line in out_lines: - key = line.split(':')[0].strip() - if key in comment_map: - out_data += comment_map[key] - out_data += "{0}\n".format(line) - return out_data - - -# Main interface. - -class Configuration(RootView): - def __init__(self, appname, modname=None, read=True): - """Create a configuration object by reading the - automatically-discovered config files for the application for a - given name. If `modname` is specified, it should be the import - name of a module whose package will be searched for a default - config file. (Otherwise, no defaults are used.) Pass `False` for - `read` to disable automatic reading of all discovered - configuration files. Use this when creating a configuration - object at module load time and then call the `read` method - later. - """ - super(Configuration, self).__init__([]) - self.appname = appname - self.modname = modname - - self._env_var = '{0}DIR'.format(self.appname.upper()) - - if read: - self.read() - - def user_config_path(self): - """Points to the location of the user configuration. - - The file may not exist. - """ - return os.path.join(self.config_dir(), CONFIG_FILENAME) - - def _add_user_source(self): - """Add the configuration options from the YAML file in the - user's configuration directory (given by `config_dir`) if it - exists. - """ - filename = self.user_config_path() - if os.path.isfile(filename): - self.add(ConfigSource(load_yaml(filename) or {}, filename)) - - def _add_default_source(self): - """Add the package's default configuration settings. This looks - for a YAML file located inside the package for the module - `modname` if it was given. - """ - if self.modname: - pkg_path = _package_path(self.modname) - if pkg_path: - filename = os.path.join(pkg_path, DEFAULT_FILENAME) - if os.path.isfile(filename): - self.add(ConfigSource(load_yaml(filename), filename, True)) - - def read(self, user=True, defaults=True): - """Find and read the files for this configuration and set them - as the sources for this configuration. To disable either - discovered user configuration files or the in-package defaults, - set `user` or `defaults` to `False`. - """ - if user: - self._add_user_source() - if defaults: - self._add_default_source() - - def config_dir(self): - """Get the path to the user configuration directory. The - directory is guaranteed to exist as a postcondition (one may be - created if none exist). - - If the application's ``...DIR`` environment variable is set, it - is used as the configuration directory. Otherwise, - platform-specific standard configuration locations are searched - for a ``config.yaml`` file. If no configuration file is found, a - fallback path is used. - """ - # If environment variable is set, use it. - if self._env_var in os.environ: - appdir = os.environ[self._env_var] - appdir = os.path.abspath(os.path.expanduser(appdir)) - if os.path.isfile(appdir): - raise ConfigError(u'{0} must be a directory'.format( - self._env_var - )) - - else: - # Search platform-specific locations. If no config file is - # found, fall back to the final directory in the list. - for confdir in config_dirs(): - appdir = os.path.join(confdir, self.appname) - if os.path.isfile(os.path.join(appdir, CONFIG_FILENAME)): - break - - # Ensure that the directory exists. - if not os.path.isdir(appdir): - os.makedirs(appdir) - return appdir - - def set_file(self, filename): - """Parses the file as YAML and inserts it into the configuration - sources with highest priority. - """ - filename = os.path.abspath(filename) - self.set(ConfigSource(load_yaml(filename), filename)) - - def dump(self, full=True, redact=False): - """Dump the Configuration object to a YAML file. - - The order of the keys is determined from the default - configuration file. All keys not in the default configuration - will be appended to the end of the file. - - :param filename: The file to dump the configuration to, or None - if the YAML string should be returned instead - :type filename: unicode - :param full: Dump settings that don't differ from the defaults - as well - :param redact: Remove sensitive information (views with the `redact` - flag set) from the output - """ - if full: - out_dict = self.flatten(redact=redact) - else: - # Exclude defaults when flattening. - sources = [s for s in self.sources if not s.default] - temp_root = RootView(sources) - temp_root.redactions = self.redactions - out_dict = temp_root.flatten(redact=redact) - - yaml_out = yaml.dump(out_dict, Dumper=Dumper, - default_flow_style=None, indent=4, - width=1000) - - # Restore comments to the YAML text. - default_source = None - for source in self.sources: - if source.default: - default_source = source - break - if default_source and default_source.filename: - with open(default_source.filename, 'r') as fp: - default_data = fp.read() - yaml_out = restore_yaml_comments(yaml_out, default_data) - - return yaml_out - - -class LazyConfig(Configuration): - """A Configuration at reads files on demand when it is first - accessed. This is appropriate for using as a global config object at - the module level. - """ - def __init__(self, appname, modname=None): - super(LazyConfig, self).__init__(appname, modname, False) - self._materialized = False # Have we read the files yet? - self._lazy_prefix = [] # Pre-materialization calls to set(). - self._lazy_suffix = [] # Calls to add(). - - def read(self, user=True, defaults=True): - self._materialized = True - super(LazyConfig, self).read(user, defaults) - - def resolve(self): - if not self._materialized: - # Read files and unspool buffers. - self.read() - self.sources += self._lazy_suffix - self.sources[:0] = self._lazy_prefix - return super(LazyConfig, self).resolve() - - def add(self, value): - super(LazyConfig, self).add(value) - if not self._materialized: - # Buffer additions to end. - self._lazy_suffix += self.sources - del self.sources[:] - - def set(self, value): - super(LazyConfig, self).set(value) - if not self._materialized: - # Buffer additions to beginning. - self._lazy_prefix[:0] = self.sources - del self.sources[:] - - def clear(self): - """Remove all sources from this configuration.""" - super(LazyConfig, self).clear() - self._lazy_suffix = [] - self._lazy_prefix = [] - - -# "Validated" configuration views: experimental! - - -REQUIRED = object() -"""A sentinel indicating that there is no default value and an exception -should be raised when the value is missing. -""" - - -class Template(object): - """A value template for configuration fields. - - The template works like a type and instructs Confuse about how to - interpret a deserialized YAML value. This includes type conversions, - providing a default value, and validating for errors. For example, a - filepath type might expand tildes and check that the file exists. - """ - def __init__(self, default=REQUIRED): - """Create a template with a given default value. - - If `default` is the sentinel `REQUIRED` (as it is by default), - then an error will be raised when a value is missing. Otherwise, - missing values will instead return `default`. - """ - self.default = default - - def __call__(self, view): - """Invoking a template on a view gets the view's value according - to the template. - """ - return self.value(view, self) - - def value(self, view, template=None): - """Get the value for a `ConfigView`. - - May raise a `NotFoundError` if the value is missing (and the - template requires it) or a `ConfigValueError` for invalid values. - """ - if view.exists(): - value, _ = view.first() - return self.convert(value, view) - elif self.default is REQUIRED: - # Missing required value. This is an error. - raise NotFoundError(u"{0} not found".format(view.name)) - else: - # Missing value, but not required. - return self.default - - def convert(self, value, view): - """Convert the YAML-deserialized value to a value of the desired - type. - - Subclasses should override this to provide useful conversions. - May raise a `ConfigValueError` when the configuration is wrong. - """ - # Default implementation does no conversion. - return value - - def fail(self, message, view, type_error=False): - """Raise an exception indicating that a value cannot be - accepted. - - `type_error` indicates whether the error is due to a type - mismatch rather than a malformed value. In this case, a more - specific exception is raised. - """ - exc_class = ConfigTypeError if type_error else ConfigValueError - raise exc_class( - u'{0}: {1}'.format(view.name, message) - ) - - def __repr__(self): - return '{0}({1})'.format( - type(self).__name__, - '' if self.default is REQUIRED else repr(self.default), - ) - - -class Integer(Template): - """An integer configuration value template. - """ - def convert(self, value, view): - """Check that the value is an integer. Floats are rounded. - """ - if isinstance(value, int): - return value - elif isinstance(value, float): - return int(value) - else: - self.fail(u'must be a number', view, True) - - -class Number(Template): - """A numeric type: either an integer or a floating-point number. - """ - def convert(self, value, view): - """Check that the value is an int or a float. - """ - if isinstance(value, NUMERIC_TYPES): - return value - else: - self.fail( - u'must be numeric, not {0}'.format(type(value).__name__), - view, - True - ) - - -class MappingTemplate(Template): - """A template that uses a dictionary to specify other types for the - values for a set of keys and produce a validated `AttrDict`. - """ - def __init__(self, mapping): - """Create a template according to a dict (mapping). The - mapping's values should themselves either be Types or - convertible to Types. - """ - subtemplates = {} - for key, typ in mapping.items(): - subtemplates[key] = as_template(typ) - self.subtemplates = subtemplates - - def value(self, view, template=None): - """Get a dict with the same keys as the template and values - validated according to the value types. - """ - out = AttrDict() - for key, typ in self.subtemplates.items(): - out[key] = typ.value(view[key], self) - return out - - def __repr__(self): - return 'MappingTemplate({0})'.format(repr(self.subtemplates)) - - -class String(Template): - """A string configuration value template. - """ - def __init__(self, default=REQUIRED, pattern=None): - """Create a template with the added optional `pattern` argument, - a regular expression string that the value should match. - """ - super(String, self).__init__(default) - self.pattern = pattern - if pattern: - self.regex = re.compile(pattern) - - def __repr__(self): - args = [] - - if self.default is not REQUIRED: - args.append(repr(self.default)) - - if self.pattern is not None: - args.append('pattern=' + repr(self.pattern)) - - return 'String({0})'.format(', '.join(args)) - - def convert(self, value, view): - """Check that the value is a string and matches the pattern. - """ - if isinstance(value, BASESTRING): - if self.pattern and not self.regex.match(value): - self.fail( - u"must match the pattern {0}".format(self.pattern), - view - ) - return value - else: - self.fail(u'must be a string', view, True) - - -class Choice(Template): - """A template that permits values from a sequence of choices. - """ - def __init__(self, choices): - """Create a template that validates any of the values from the - iterable `choices`. - - If `choices` is a map, then the corresponding value is emitted. - Otherwise, the value itself is emitted. - """ - self.choices = choices - - def convert(self, value, view): - """Ensure that the value is among the choices (and remap if the - choices are a mapping). - """ - if value not in self.choices: - self.fail( - u'must be one of {0}, not {1}'.format( - repr(list(self.choices)), repr(value) - ), - view - ) - - if isinstance(self.choices, collections.Mapping): - return self.choices[value] - else: - return value - - def __repr__(self): - return 'Choice({0!r})'.format(self.choices) - - -class OneOf(Template): - """A template that permits values complying to one of the given templates. - """ - def __init__(self, allowed, default=REQUIRED): - super(OneOf, self).__init__(default) - self.allowed = list(allowed) - - def __repr__(self): - args = [] - - if self.allowed is not None: - args.append('allowed=' + repr(self.allowed)) - - if self.default is not REQUIRED: - args.append(repr(self.default)) - - return 'OneOf({0})'.format(', '.join(args)) - - def value(self, view, template): - self.template = template - return super(OneOf, self).value(view, template) - - def convert(self, value, view): - """Ensure that the value follows at least one template. - """ - is_mapping = isinstance(self.template, MappingTemplate) - - for candidate in self.allowed: - try: - if is_mapping: - if isinstance(candidate, Filename) and \ - candidate.relative_to: - next_template = candidate.template_with_relatives( - view, - self.template - ) - - next_template.subtemplates[view.key] = as_template( - candidate - ) - else: - next_template = MappingTemplate({view.key: candidate}) - - return view.parent.get(next_template)[view.key] - else: - return view.get(candidate) - except ConfigTemplateError: - raise - except ConfigError: - pass - except ValueError as exc: - raise ConfigTemplateError(exc) - - self.fail( - u'must be one of {0}, not {1}'.format( - repr(self.allowed), repr(value) - ), - view - ) - - -class StrSeq(Template): - """A template for values that are lists of strings. - - Validates both actual YAML string lists and single strings. Strings - can optionally be split on whitespace. - """ - def __init__(self, split=True): - """Create a new template. - - `split` indicates whether, when the underlying value is a single - string, it should be split on whitespace. Otherwise, the - resulting value is a list containing a single string. - """ - super(StrSeq, self).__init__() - self.split = split - - def convert(self, value, view): - if isinstance(value, bytes): - value = value.decode('utf-8', 'ignore') - - if isinstance(value, STRING): - if self.split: - return value.split() - else: - return [value] - - try: - value = list(value) - except TypeError: - self.fail(u'must be a whitespace-separated string or a list', - view, True) - - def convert(x): - if isinstance(x, STRING): - return x - elif isinstance(x, bytes): - return x.decode('utf-8', 'ignore') - else: - self.fail(u'must be a list of strings', view, True) - return list(map(convert, value)) - - -class Filename(Template): - """A template that validates strings as filenames. - - Filenames are returned as absolute, tilde-free paths. - - Relative paths are relative to the template's `cwd` argument - when it is specified, then the configuration directory (see - the `config_dir` method) if they come from a file. Otherwise, - they are relative to the current working directory. This helps - attain the expected behavior when using command-line options. - """ - def __init__(self, default=REQUIRED, cwd=None, relative_to=None, - in_app_dir=False): - """`relative_to` is the name of a sibling value that is - being validated at the same time. - - `in_app_dir` indicates whether the path should be resolved - inside the application's config directory (even when the setting - does not come from a file). - """ - super(Filename, self).__init__(default) - self.cwd = cwd - self.relative_to = relative_to - self.in_app_dir = in_app_dir - - def __repr__(self): - args = [] - - if self.default is not REQUIRED: - args.append(repr(self.default)) - - if self.cwd is not None: - args.append('cwd=' + repr(self.cwd)) - - if self.relative_to is not None: - args.append('relative_to=' + repr(self.relative_to)) - - if self.in_app_dir: - args.append('in_app_dir=True') - - return 'Filename({0})'.format(', '.join(args)) - - def resolve_relative_to(self, view, template): - if not isinstance(template, (collections.Mapping, MappingTemplate)): - # disallow config.get(Filename(relative_to='foo')) - raise ConfigTemplateError( - u'relative_to may only be used when getting multiple values.' - ) - - elif self.relative_to == view.key: - raise ConfigTemplateError( - u'{0} is relative to itself'.format(view.name) - ) - - elif self.relative_to not in view.parent.keys(): - # self.relative_to is not in the config - self.fail( - ( - u'needs sibling value "{0}" to expand relative path' - ).format(self.relative_to), - view - ) - - old_template = {} - old_template.update(template.subtemplates) - - # save time by skipping MappingTemplate's init loop - next_template = MappingTemplate({}) - next_relative = self.relative_to - - # gather all the needed templates and nothing else - while next_relative is not None: - try: - # pop to avoid infinite loop because of recursive - # relative paths - rel_to_template = old_template.pop(next_relative) - except KeyError: - if next_relative in template.subtemplates: - # we encountered this config key previously - raise ConfigTemplateError(( - u'{0} and {1} are recursively relative' - ).format(view.name, self.relative_to)) - else: - raise ConfigTemplateError(( - u'missing template for {0}, needed to expand {1}\'s' + - u'relative path' - ).format(self.relative_to, view.name)) - - next_template.subtemplates[next_relative] = rel_to_template - next_relative = rel_to_template.relative_to - - return view.parent.get(next_template)[self.relative_to] - - def value(self, view, template=None): - path, source = view.first() - if not isinstance(path, BASESTRING): - self.fail( - u'must be a filename, not {0}'.format(type(path).__name__), - view, - True - ) - path = os.path.expanduser(STRING(path)) - - if not os.path.isabs(path): - if self.cwd is not None: - # relative to the template's argument - path = os.path.join(self.cwd, path) - - elif self.relative_to is not None: - path = os.path.join( - self.resolve_relative_to(view, template), - path, - ) - - elif source.filename or self.in_app_dir: - # From defaults: relative to the app's directory. - path = os.path.join(view.root().config_dir(), path) - - return os.path.abspath(path) - - -class TypeTemplate(Template): - """A simple template that checks that a value is an instance of a - desired Python type. - """ - def __init__(self, typ, default=REQUIRED): - """Create a template that checks that the value is an instance - of `typ`. - """ - super(TypeTemplate, self).__init__(default) - self.typ = typ - - def convert(self, value, view): - if not isinstance(value, self.typ): - self.fail( - u'must be a {0}, not {1}'.format( - self.typ.__name__, - type(value).__name__, - ), - view, - True - ) - return value - - -class AttrDict(dict): - """A `dict` subclass that can be accessed via attributes (dot - notation) for convenience. - """ - def __getattr__(self, key): - if key in self: - return self[key] - else: - raise AttributeError(key) - - -def as_template(value): - """Convert a simple "shorthand" Python value to a `Template`. - """ - if isinstance(value, Template): - # If it's already a Template, pass it through. - return value - elif isinstance(value, collections.Mapping): - # Dictionaries work as templates. - return MappingTemplate(value) - elif value is int: - return Integer() - elif isinstance(value, int): - return Integer(value) - elif isinstance(value, type) and issubclass(value, BASESTRING): - return String() - elif isinstance(value, BASESTRING): - return String(value) - elif isinstance(value, set): - # convert to list to avoid hash related problems - return Choice(list(value)) - elif isinstance(value, list): - return OneOf(value) - elif value is float: - return Number() - elif value is None: - return Template() - elif value is dict: - return TypeTemplate(collections.Mapping) - elif value is list: - return TypeTemplate(collections.Sequence) - elif isinstance(value, type): - return TypeTemplate(value) - else: - raise ValueError(u'cannot convert to template: {0!r}'.format(value)) +# Cleanup namespace. +del key, value, warnings, confuse diff --git a/lib/beets/util/enumeration.py b/lib/beets/util/enumeration.py old mode 100755 new mode 100644 index 3e946718..e49f6fdd --- a/lib/beets/util/enumeration.py +++ b/lib/beets/util/enumeration.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -13,7 +12,6 @@ # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. -from __future__ import division, absolute_import, print_function from enum import Enum diff --git a/lib/beets/util/functemplate.py b/lib/beets/util/functemplate.py old mode 100755 new mode 100644 index 51716552..289a436d --- a/lib/beets/util/functemplate.py +++ b/lib/beets/util/functemplate.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -27,30 +26,30 @@ This is sort of like a tiny, horrible degeneration of a real templating engine like Jinja2 or Mustache. """ -from __future__ import division, absolute_import, print_function import re import ast import dis import types import sys -import six +import functools -SYMBOL_DELIM = u'$' -FUNC_DELIM = u'%' -GROUP_OPEN = u'{' -GROUP_CLOSE = u'}' -ARG_SEP = u',' -ESCAPE_CHAR = u'$' +SYMBOL_DELIM = '$' +FUNC_DELIM = '%' +GROUP_OPEN = '{' +GROUP_CLOSE = '}' +ARG_SEP = ',' +ESCAPE_CHAR = '$' VARIABLE_PREFIX = '__var_' FUNCTION_PREFIX = '__func_' -class Environment(object): +class Environment: """Contains the values and functions to be substituted into a template. """ + def __init__(self, values, functions): self.values = values self.functions = functions @@ -72,15 +71,7 @@ def ex_literal(val): """An int, float, long, bool, string, or None literal with the given value. """ - if val is None: - return ast.Name('None', ast.Load()) - elif isinstance(val, six.integer_types): - return ast.Num(val) - elif isinstance(val, bool): - return ast.Name(bytes(val), ast.Load()) - elif isinstance(val, six.string_types): - return ast.Str(val) - raise TypeError(u'no literal for {0}'.format(type(val))) + return ast.Constant(val) def ex_varassign(name, expr): @@ -97,7 +88,7 @@ def ex_call(func, args): function may be an expression or the name of a function. Each argument may be an expression or a value to be used as a literal. """ - if isinstance(func, six.string_types): + if isinstance(func, str): func = ex_rvalue(func) args = list(args) @@ -105,10 +96,7 @@ def ex_call(func, args): if not isinstance(args[i], ast.expr): args[i] = ex_literal(args[i]) - if sys.version_info[:2] < (3, 5): - return ast.Call(func, args, [], None, None) - else: - return ast.Call(func, args, []) + return ast.Call(func, args, []) def compile_func(arg_names, statements, name='_the_func', debug=False): @@ -116,32 +104,30 @@ def compile_func(arg_names, statements, name='_the_func', debug=False): the resulting Python function. If `debug`, then print out the bytecode of the compiled function. """ - if six.PY2: - func_def = ast.FunctionDef( - name=name.encode('utf-8'), - args=ast.arguments( - args=[ast.Name(n, ast.Param()) for n in arg_names], - vararg=None, - kwarg=None, - defaults=[ex_literal(None) for _ in arg_names], - ), - body=statements, - decorator_list=[], - ) - else: - func_def = ast.FunctionDef( - name=name, - args=ast.arguments( - args=[ast.arg(arg=n, annotation=None) for n in arg_names], - kwonlyargs=[], - kw_defaults=[], - defaults=[ex_literal(None) for _ in arg_names], - ), - body=statements, - decorator_list=[], - ) + args_fields = { + 'args': [ast.arg(arg=n, annotation=None) for n in arg_names], + 'kwonlyargs': [], + 'kw_defaults': [], + 'defaults': [ex_literal(None) for _ in arg_names], + } + if 'posonlyargs' in ast.arguments._fields: # Added in Python 3.8. + args_fields['posonlyargs'] = [] + args = ast.arguments(**args_fields) + + func_def = ast.FunctionDef( + name=name, + args=args, + body=statements, + decorator_list=[], + ) + + # The ast.Module signature changed in 3.8 to accept a list of types to + # ignore. + if sys.version_info >= (3, 8): + mod = ast.Module([func_def], []) + else: + mod = ast.Module([func_def]) - mod = ast.Module([func_def]) ast.fix_missing_locations(mod) prog = compile(mod, '', 'exec') @@ -160,14 +146,15 @@ def compile_func(arg_names, statements, name='_the_func', debug=False): # AST nodes for the template language. -class Symbol(object): +class Symbol: """A variable-substitution symbol in a template.""" + def __init__(self, ident, original): self.ident = ident self.original = original def __repr__(self): - return u'Symbol(%s)' % repr(self.ident) + return 'Symbol(%s)' % repr(self.ident) def evaluate(self, env): """Evaluate the symbol in the environment, returning a Unicode @@ -182,24 +169,22 @@ class Symbol(object): def translate(self): """Compile the variable lookup.""" - if six.PY2: - ident = self.ident.encode('utf-8') - else: - ident = self.ident + ident = self.ident expr = ex_rvalue(VARIABLE_PREFIX + ident) - return [expr], set([ident]), set() + return [expr], {ident}, set() -class Call(object): +class Call: """A function call in a template.""" + def __init__(self, ident, args, original): self.ident = ident self.args = args self.original = original def __repr__(self): - return u'Call(%s, %s, %s)' % (repr(self.ident), repr(self.args), - repr(self.original)) + return 'Call({}, {}, {})'.format(repr(self.ident), repr(self.args), + repr(self.original)) def evaluate(self, env): """Evaluate the function call in the environment, returning a @@ -212,19 +197,15 @@ class Call(object): except Exception as exc: # Function raised exception! Maybe inlining the name of # the exception will help debug. - return u'<%s>' % six.text_type(exc) - return six.text_type(out) + return '<%s>' % str(exc) + return str(out) else: return self.original def translate(self): """Compile the function call.""" varnames = set() - if six.PY2: - ident = self.ident.encode('utf-8') - else: - ident = self.ident - funcnames = set([ident]) + funcnames = {self.ident} arg_exprs = [] for arg in self.args: @@ -235,32 +216,33 @@ class Call(object): # Create a subexpression that joins the result components of # the arguments. arg_exprs.append(ex_call( - ast.Attribute(ex_literal(u''), 'join', ast.Load()), + ast.Attribute(ex_literal(''), 'join', ast.Load()), [ex_call( 'map', [ - ex_rvalue(six.text_type.__name__), + ex_rvalue(str.__name__), ast.List(subexprs, ast.Load()), ] )], )) subexpr_call = ex_call( - FUNCTION_PREFIX + ident, + FUNCTION_PREFIX + self.ident, arg_exprs ) return [subexpr_call], varnames, funcnames -class Expression(object): +class Expression: """Top-level template construct: contains a list of text blobs, Symbols, and Calls. """ + def __init__(self, parts): self.parts = parts def __repr__(self): - return u'Expression(%s)' % (repr(self.parts)) + return 'Expression(%s)' % (repr(self.parts)) def evaluate(self, env): """Evaluate the entire expression in the environment, returning @@ -268,11 +250,11 @@ class Expression(object): """ out = [] for part in self.parts: - if isinstance(part, six.string_types): + if isinstance(part, str): out.append(part) else: out.append(part.evaluate(env)) - return u''.join(map(six.text_type, out)) + return ''.join(map(str, out)) def translate(self): """Compile the expression to a list of Python AST expressions, a @@ -282,7 +264,7 @@ class Expression(object): varnames = set() funcnames = set() for part in self.parts: - if isinstance(part, six.string_types): + if isinstance(part, str): expressions.append(ex_literal(part)) else: e, v, f = part.translate() @@ -298,7 +280,7 @@ class ParseError(Exception): pass -class Parser(object): +class Parser: """Parses a template expression string. Instantiate the class with the template source and call ``parse_expression``. The ``pos`` field will indicate the character after the expression finished and @@ -311,6 +293,7 @@ class Parser(object): replaced with a real, accepted parsing technique (PEG, parser generator, etc.). """ + def __init__(self, string, in_argument=False): """ Create a new parser. :param in_arguments: boolean that indicates the parser is to be @@ -325,8 +308,8 @@ class Parser(object): # Common parsing resources. special_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_OPEN, GROUP_CLOSE, ESCAPE_CHAR) - special_char_re = re.compile(r'[%s]|$' % - u''.join(re.escape(c) for c in special_chars)) + special_char_re = re.compile(r'[%s]|\Z' % + ''.join(re.escape(c) for c in special_chars)) escapable_chars = (SYMBOL_DELIM, FUNC_DELIM, GROUP_CLOSE, ARG_SEP) terminator_chars = (GROUP_CLOSE,) @@ -343,8 +326,11 @@ class Parser(object): if self.in_argument: extra_special_chars = (ARG_SEP,) special_char_re = re.compile( - r'[%s]|$' % u''.join(re.escape(c) for c in - self.special_chars + extra_special_chars)) + r'[%s]|\Z' % ''.join( + re.escape(c) for c in + self.special_chars + extra_special_chars + ) + ) text_parts = [] @@ -384,7 +370,7 @@ class Parser(object): # Shift all characters collected so far into a single string. if text_parts: - self.parts.append(u''.join(text_parts)) + self.parts.append(''.join(text_parts)) text_parts = [] if char == SYMBOL_DELIM: @@ -406,7 +392,7 @@ class Parser(object): # If any parsed characters remain, shift them into a string. if text_parts: - self.parts.append(u''.join(text_parts)) + self.parts.append(''.join(text_parts)) def parse_symbol(self): """Parse a variable reference (like ``$foo`` or ``${foo}``) @@ -544,11 +530,27 @@ def _parse(template): return Expression(parts) -# External interface. +def cached(func): + """Like the `functools.lru_cache` decorator, but works (as a no-op) + on Python < 3.2. + """ + if hasattr(functools, 'lru_cache'): + return functools.lru_cache(maxsize=128)(func) + else: + # Do nothing when lru_cache is not available. + return func -class Template(object): + +@cached +def template(fmt): + return Template(fmt) + + +# External interface. +class Template: """A string template, including text, Symbols, and Calls. """ + def __init__(self, template): self.expr = _parse(template) self.original = template @@ -570,7 +572,7 @@ class Template(object): """ try: res = self.compiled(values, functions) - except: # Handle any exceptions thrown by compiled version. + except Exception: # Handle any exceptions thrown by compiled version. res = self.interpret(values, functions) return res @@ -597,7 +599,7 @@ class Template(object): for funcname in funcnames: args[FUNCTION_PREFIX + funcname] = functions[funcname] parts = func(**args) - return u''.join(parts) + return ''.join(parts) return wrapper_func @@ -606,9 +608,9 @@ class Template(object): if __name__ == '__main__': import timeit - _tmpl = Template(u'foo $bar %baz{foozle $bar barzle} $bar') + _tmpl = Template('foo $bar %baz{foozle $bar barzle} $bar') _vars = {'bar': 'qux'} - _funcs = {'baz': six.text_type.upper} + _funcs = {'baz': str.upper} interp_time = timeit.timeit('_tmpl.interpret(_vars, _funcs)', 'from __main__ import _tmpl, _vars, _funcs', number=10000) @@ -617,4 +619,4 @@ if __name__ == '__main__': 'from __main__ import _tmpl, _vars, _funcs', number=10000) print(comp_time) - print(u'Speedup:', interp_time / comp_time) + print('Speedup:', interp_time / comp_time) diff --git a/lib/beets/util/hidden.py b/lib/beets/util/hidden.py old mode 100755 new mode 100644 index ed97f2bf..881de1ac --- a/lib/beets/util/hidden.py +++ b/lib/beets/util/hidden.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -14,7 +13,6 @@ # included in all copies or substantial portions of the Software. """Simple library to work out if a file is hidden on different platforms.""" -from __future__ import division, absolute_import, print_function import os import stat diff --git a/lib/beets/util/pipeline.py b/lib/beets/util/pipeline.py old mode 100755 new mode 100644 index 367e5d98..d338cb51 --- a/lib/beets/util/pipeline.py +++ b/lib/beets/util/pipeline.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -32,12 +31,10 @@ To do so, pass an iterable of coroutines to the Pipeline constructor in place of any single coroutine. """ -from __future__ import division, absolute_import, print_function -from six.moves import queue +import queue from threading import Thread, Lock import sys -import six BUBBLE = '__PIPELINE_BUBBLE__' POISON = '__PIPELINE_POISON__' @@ -91,6 +88,7 @@ class CountedQueue(queue.Queue): still feeding into it. The queue is poisoned when all threads are finished with the queue. """ + def __init__(self, maxsize=0): queue.Queue.__init__(self, maxsize) self.nthreads = 0 @@ -135,10 +133,11 @@ class CountedQueue(queue.Queue): _invalidate_queue(self, POISON, False) -class MultiMessage(object): +class MultiMessage: """A message yielded by a pipeline stage encapsulating multiple values to be sent to the next stage. """ + def __init__(self, messages): self.messages = messages @@ -210,8 +209,9 @@ def _allmsgs(obj): class PipelineThread(Thread): """Abstract base class for pipeline-stage threads.""" + def __init__(self, all_threads): - super(PipelineThread, self).__init__() + super().__init__() self.abort_lock = Lock() self.abort_flag = False self.all_threads = all_threads @@ -241,15 +241,13 @@ class FirstPipelineThread(PipelineThread): """The thread running the first stage in a parallel pipeline setup. The coroutine should just be a generator. """ + def __init__(self, coro, out_queue, all_threads): - super(FirstPipelineThread, self).__init__(all_threads) + super().__init__(all_threads) self.coro = coro self.out_queue = out_queue self.out_queue.acquire() - self.abort_lock = Lock() - self.abort_flag = False - def run(self): try: while True: @@ -270,7 +268,7 @@ class FirstPipelineThread(PipelineThread): return self.out_queue.put(msg) - except: + except BaseException: self.abort_all(sys.exc_info()) return @@ -282,8 +280,9 @@ class MiddlePipelineThread(PipelineThread): """A thread running any stage in the pipeline except the first or last. """ + def __init__(self, coro, in_queue, out_queue, all_threads): - super(MiddlePipelineThread, self).__init__(all_threads) + super().__init__(all_threads) self.coro = coro self.in_queue = in_queue self.out_queue = out_queue @@ -318,7 +317,7 @@ class MiddlePipelineThread(PipelineThread): return self.out_queue.put(msg) - except: + except BaseException: self.abort_all(sys.exc_info()) return @@ -330,8 +329,9 @@ class LastPipelineThread(PipelineThread): """A thread running the last stage in a pipeline. The coroutine should yield nothing. """ + def __init__(self, coro, in_queue, all_threads): - super(LastPipelineThread, self).__init__(all_threads) + super().__init__(all_threads) self.coro = coro self.in_queue = in_queue @@ -357,22 +357,23 @@ class LastPipelineThread(PipelineThread): # Send to consumer. self.coro.send(msg) - except: + except BaseException: self.abort_all(sys.exc_info()) return -class Pipeline(object): +class Pipeline: """Represents a staged pattern of work. Each stage in the pipeline is a coroutine that receives messages from the previous stage and yields messages to be sent to the next stage. """ + def __init__(self, stages): """Makes a new pipeline from a list of coroutines. There must be at least two stages. """ if len(stages) < 2: - raise ValueError(u'pipeline must have at least two stages') + raise ValueError('pipeline must have at least two stages') self.stages = [] for stage in stages: if isinstance(stage, (list, tuple)): @@ -425,7 +426,7 @@ class Pipeline(object): while threads[-1].is_alive(): threads[-1].join(1) - except: + except BaseException: # Stop all the threads immediately. for thread in threads: thread.abort() @@ -442,7 +443,7 @@ class Pipeline(object): exc_info = thread.exc_info if exc_info: # Make the exception appear as it was raised originally. - six.reraise(exc_info[0], exc_info[1], exc_info[2]) + raise exc_info[1].with_traceback(exc_info[2]) def pull(self): """Yield elements from the end of the pipeline. Runs the stages @@ -469,6 +470,7 @@ class Pipeline(object): for msg in msgs: yield msg + # Smoke test. if __name__ == '__main__': import time @@ -477,14 +479,14 @@ if __name__ == '__main__': # in parallel. def produce(): for i in range(5): - print(u'generating %i' % i) + print('generating %i' % i) time.sleep(1) yield i def work(): num = yield while True: - print(u'processing %i' % num) + print('processing %i' % num) time.sleep(2) num = yield num * 2 @@ -492,7 +494,7 @@ if __name__ == '__main__': while True: num = yield time.sleep(1) - print(u'received %i' % num) + print('received %i' % num) ts_start = time.time() Pipeline([produce(), work(), consume()]).run_sequential() @@ -501,22 +503,22 @@ if __name__ == '__main__': ts_par = time.time() Pipeline([produce(), (work(), work()), consume()]).run_parallel() ts_end = time.time() - print(u'Sequential time:', ts_seq - ts_start) - print(u'Parallel time:', ts_par - ts_seq) - print(u'Multiply-parallel time:', ts_end - ts_par) + print('Sequential time:', ts_seq - ts_start) + print('Parallel time:', ts_par - ts_seq) + print('Multiply-parallel time:', ts_end - ts_par) print() # Test a pipeline that raises an exception. def exc_produce(): for i in range(10): - print(u'generating %i' % i) + print('generating %i' % i) time.sleep(1) yield i def exc_work(): num = yield while True: - print(u'processing %i' % num) + print('processing %i' % num) time.sleep(3) if num == 3: raise Exception() @@ -525,6 +527,6 @@ if __name__ == '__main__': def exc_consume(): while True: num = yield - print(u'received %i' % num) + print('received %i' % num) Pipeline([exc_produce(), exc_work(), exc_consume()]).run_parallel(1) diff --git a/lib/beets/vfs.py b/lib/beets/vfs.py old mode 100755 new mode 100644 index 7f9a049e..aef69650 --- a/lib/beets/vfs.py +++ b/lib/beets/vfs.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This file is part of beets. # Copyright 2016, Adrian Sampson. # @@ -16,7 +15,6 @@ """A simple utility for constructing filesystem-like trees from beets libraries. """ -from __future__ import division, absolute_import, print_function from collections import namedtuple from beets import util diff --git a/lib/beetsplug/__init__.py b/lib/beetsplug/__init__.py index 98a7ffd5..da248491 100644 --- a/lib/beetsplug/__init__.py +++ b/lib/beetsplug/__init__.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2013, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,6 +14,7 @@ """A namespace package for beets plugins.""" + # Make this a namespace package. from pkgutil import extend_path __path__ = extend_path(__path__, __name__) diff --git a/lib/beetsplug/absubmit.py b/lib/beetsplug/absubmit.py new file mode 100644 index 00000000..d1ea692f --- /dev/null +++ b/lib/beetsplug/absubmit.py @@ -0,0 +1,196 @@ +# This file is part of beets. +# Copyright 2016, Pieter Mulder. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Calculate acoustic information and submit to AcousticBrainz. +""" + + +import errno +import hashlib +import json +import os +import subprocess +import tempfile + +from distutils.spawn import find_executable +import requests + +from beets import plugins +from beets import util +from beets import ui + +# We use this field to check whether AcousticBrainz info is present. +PROBE_FIELD = 'mood_acoustic' + + +class ABSubmitError(Exception): + """Raised when failing to analyse file with extractor.""" + + +def call(args): + """Execute the command and return its output. + + Raise a AnalysisABSubmitError on failure. + """ + try: + return util.command_output(args).stdout + except subprocess.CalledProcessError as e: + raise ABSubmitError( + '{} exited with status {}'.format(args[0], e.returncode) + ) + + +class AcousticBrainzSubmitPlugin(plugins.BeetsPlugin): + + def __init__(self): + super().__init__() + + self.config.add({ + 'extractor': '', + 'force': False, + 'pretend': False + }) + + self.extractor = self.config['extractor'].as_str() + if self.extractor: + self.extractor = util.normpath(self.extractor) + # Expicit path to extractor + if not os.path.isfile(self.extractor): + raise ui.UserError( + 'Extractor command does not exist: {0}.'. + format(self.extractor) + ) + else: + # Implicit path to extractor, search for it in path + self.extractor = 'streaming_extractor_music' + try: + call([self.extractor]) + except OSError: + raise ui.UserError( + 'No extractor command found: please install the extractor' + ' binary from https://acousticbrainz.org/download' + ) + except ABSubmitError: + # Extractor found, will exit with an error if not called with + # the correct amount of arguments. + pass + + # Get the executable location on the system, which we need + # to calculate the SHA-1 hash. + self.extractor = find_executable(self.extractor) + + # Calculate extractor hash. + self.extractor_sha = hashlib.sha1() + with open(self.extractor, 'rb') as extractor: + self.extractor_sha.update(extractor.read()) + self.extractor_sha = self.extractor_sha.hexdigest() + + base_url = 'https://acousticbrainz.org/api/v1/{mbid}/low-level' + + def commands(self): + cmd = ui.Subcommand( + 'absubmit', + help='calculate and submit AcousticBrainz analysis' + ) + cmd.parser.add_option( + '-f', '--force', dest='force_refetch', + action='store_true', default=False, + help='re-download data when already present' + ) + cmd.parser.add_option( + '-p', '--pretend', dest='pretend_fetch', + action='store_true', default=False, + help='pretend to perform action, but show \ +only files which would be processed' + ) + cmd.func = self.command + return [cmd] + + def command(self, lib, opts, args): + # Get items from arguments + items = lib.items(ui.decargs(args)) + self.opts = opts + util.par_map(self.analyze_submit, items) + + def analyze_submit(self, item): + analysis = self._get_analysis(item) + if analysis: + self._submit_data(item, analysis) + + def _get_analysis(self, item): + mbid = item['mb_trackid'] + + # Avoid re-analyzing files that already have AB data. + if not self.opts.force_refetch and not self.config['force']: + if item.get(PROBE_FIELD): + return None + + # If file has no MBID, skip it. + if not mbid: + self._log.info('Not analysing {}, missing ' + 'musicbrainz track id.', item) + return None + + if self.opts.pretend_fetch or self.config['pretend']: + self._log.info('pretend action - extract item: {}', item) + return None + + # Temporary file to save extractor output to, extractor only works + # if an output file is given. Here we use a temporary file to copy + # the data into a python object and then remove the file from the + # system. + tmp_file, filename = tempfile.mkstemp(suffix='.json') + try: + # Close the file, so the extractor can overwrite it. + os.close(tmp_file) + try: + call([self.extractor, util.syspath(item.path), filename]) + except ABSubmitError as e: + self._log.warning( + 'Failed to analyse {item} for AcousticBrainz: {error}', + item=item, error=e + ) + return None + with open(filename) as tmp_file: + analysis = json.load(tmp_file) + # Add the hash to the output. + analysis['metadata']['version']['essentia_build_sha'] = \ + self.extractor_sha + return analysis + finally: + try: + os.remove(filename) + except OSError as e: + # ENOENT means file does not exist, just ignore this error. + if e.errno != errno.ENOENT: + raise + + def _submit_data(self, item, data): + mbid = item['mb_trackid'] + headers = {'Content-Type': 'application/json'} + response = requests.post(self.base_url.format(mbid=mbid), + json=data, headers=headers) + # Test that request was successful and raise an error on failure. + if response.status_code != 200: + try: + message = response.json()['message'] + except (ValueError, KeyError) as e: + message = f'unable to get error message: {e}' + self._log.error( + 'Failed to submit AcousticBrainz analysis of {item}: ' + '{message}).', item=item, message=message + ) + else: + self._log.debug('Successfully submitted AcousticBrainz analysis ' + 'for {}.', item) diff --git a/lib/beetsplug/acousticbrainz.py b/lib/beetsplug/acousticbrainz.py new file mode 100644 index 00000000..eabc5849 --- /dev/null +++ b/lib/beetsplug/acousticbrainz.py @@ -0,0 +1,334 @@ +# This file is part of beets. +# Copyright 2015-2016, Ohm Patel. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Fetch various AcousticBrainz metadata using MBID. +""" + +from collections import defaultdict + +import requests + +from beets import plugins, ui +from beets.dbcore import types + +ACOUSTIC_BASE = "https://acousticbrainz.org/" +LEVELS = ["/low-level", "/high-level"] +ABSCHEME = { + 'highlevel': { + 'danceability': { + 'all': { + 'danceable': 'danceable' + } + }, + 'gender': { + 'value': 'gender' + }, + 'genre_rosamerica': { + 'value': 'genre_rosamerica' + }, + 'mood_acoustic': { + 'all': { + 'acoustic': 'mood_acoustic' + } + }, + 'mood_aggressive': { + 'all': { + 'aggressive': 'mood_aggressive' + } + }, + 'mood_electronic': { + 'all': { + 'electronic': 'mood_electronic' + } + }, + 'mood_happy': { + 'all': { + 'happy': 'mood_happy' + } + }, + 'mood_party': { + 'all': { + 'party': 'mood_party' + } + }, + 'mood_relaxed': { + 'all': { + 'relaxed': 'mood_relaxed' + } + }, + 'mood_sad': { + 'all': { + 'sad': 'mood_sad' + } + }, + 'moods_mirex': { + 'value': 'moods_mirex' + }, + 'ismir04_rhythm': { + 'value': 'rhythm' + }, + 'tonal_atonal': { + 'all': { + 'tonal': 'tonal' + } + }, + 'timbre': { + 'value': 'timbre' + }, + 'voice_instrumental': { + 'value': 'voice_instrumental' + }, + }, + 'lowlevel': { + 'average_loudness': 'average_loudness' + }, + 'rhythm': { + 'bpm': 'bpm' + }, + 'tonal': { + 'chords_changes_rate': 'chords_changes_rate', + 'chords_key': 'chords_key', + 'chords_number_rate': 'chords_number_rate', + 'chords_scale': 'chords_scale', + 'key_key': ('initial_key', 0), + 'key_scale': ('initial_key', 1), + 'key_strength': 'key_strength' + + } +} + + +class AcousticPlugin(plugins.BeetsPlugin): + item_types = { + 'average_loudness': types.Float(6), + 'chords_changes_rate': types.Float(6), + 'chords_key': types.STRING, + 'chords_number_rate': types.Float(6), + 'chords_scale': types.STRING, + 'danceable': types.Float(6), + 'gender': types.STRING, + 'genre_rosamerica': types.STRING, + 'initial_key': types.STRING, + 'key_strength': types.Float(6), + 'mood_acoustic': types.Float(6), + 'mood_aggressive': types.Float(6), + 'mood_electronic': types.Float(6), + 'mood_happy': types.Float(6), + 'mood_party': types.Float(6), + 'mood_relaxed': types.Float(6), + 'mood_sad': types.Float(6), + 'moods_mirex': types.STRING, + 'rhythm': types.Float(6), + 'timbre': types.STRING, + 'tonal': types.Float(6), + 'voice_instrumental': types.STRING, + } + + def __init__(self): + super().__init__() + + self.config.add({ + 'auto': True, + 'force': False, + 'tags': [] + }) + + if self.config['auto']: + self.register_listener('import_task_files', + self.import_task_files) + + def commands(self): + cmd = ui.Subcommand('acousticbrainz', + help="fetch metadata from AcousticBrainz") + cmd.parser.add_option( + '-f', '--force', dest='force_refetch', + action='store_true', default=False, + help='re-download data when already present' + ) + + def func(lib, opts, args): + items = lib.items(ui.decargs(args)) + self._fetch_info(items, ui.should_write(), + opts.force_refetch or self.config['force']) + + cmd.func = func + return [cmd] + + def import_task_files(self, session, task): + """Function is called upon beet import. + """ + self._fetch_info(task.imported_items(), False, True) + + def _get_data(self, mbid): + data = {} + for url in _generate_urls(mbid): + self._log.debug('fetching URL: {}', url) + + try: + res = requests.get(url) + except requests.RequestException as exc: + self._log.info('request error: {}', exc) + return {} + + if res.status_code == 404: + self._log.info('recording ID {} not found', mbid) + return {} + + try: + data.update(res.json()) + except ValueError: + self._log.debug('Invalid Response: {}', res.text) + return {} + + return data + + def _fetch_info(self, items, write, force): + """Fetch additional information from AcousticBrainz for the `item`s. + """ + tags = self.config['tags'].as_str_seq() + for item in items: + # If we're not forcing re-downloading for all tracks, check + # whether the data is already present. We use one + # representative field name to check for previously fetched + # data. + if not force: + mood_str = item.get('mood_acoustic', '') + if mood_str: + self._log.info('data already present for: {}', item) + continue + + # We can only fetch data for tracks with MBIDs. + if not item.mb_trackid: + continue + + self._log.info('getting data for: {}', item) + data = self._get_data(item.mb_trackid) + if data: + for attr, val in self._map_data_to_scheme(data, ABSCHEME): + if not tags or attr in tags: + self._log.debug('attribute {} of {} set to {}', + attr, + item, + val) + setattr(item, attr, val) + else: + self._log.debug('skipping attribute {} of {}' + ' (value {}) due to config', + attr, + item, + val) + item.store() + if write: + item.try_write() + + def _map_data_to_scheme(self, data, scheme): + """Given `data` as a structure of nested dictionaries, and `scheme` as a + structure of nested dictionaries , `yield` tuples `(attr, val)` where + `attr` and `val` are corresponding leaf nodes in `scheme` and `data`. + + As its name indicates, `scheme` defines how the data is structured, + so this function tries to find leaf nodes in `data` that correspond + to the leafs nodes of `scheme`, and not the other way around. + Leaf nodes of `data` that do not exist in the `scheme` do not matter. + If a leaf node of `scheme` is not present in `data`, + no value is yielded for that attribute and a simple warning is issued. + + Finally, to account for attributes of which the value is split between + several leaf nodes in `data`, leaf nodes of `scheme` can be tuples + `(attr, order)` where `attr` is the attribute to which the leaf node + belongs, and `order` is the place at which it should appear in the + value. The different `value`s belonging to the same `attr` are simply + joined with `' '`. This is hardcoded and not very flexible, but it gets + the job done. + + For example: + + >>> scheme = { + 'key1': 'attribute', + 'key group': { + 'subkey1': 'subattribute', + 'subkey2': ('composite attribute', 0) + }, + 'key2': ('composite attribute', 1) + } + >>> data = { + 'key1': 'value', + 'key group': { + 'subkey1': 'subvalue', + 'subkey2': 'part 1 of composite attr' + }, + 'key2': 'part 2' + } + >>> print(list(_map_data_to_scheme(data, scheme))) + [('subattribute', 'subvalue'), + ('attribute', 'value'), + ('composite attribute', 'part 1 of composite attr part 2')] + """ + # First, we traverse `scheme` and `data`, `yield`ing all the non + # composites attributes straight away and populating the dictionary + # `composites` with the composite attributes. + + # When we are finished traversing `scheme`, `composites` should + # map each composite attribute to an ordered list of the values + # belonging to the attribute, for example: + # `composites = {'initial_key': ['B', 'minor']}`. + + # The recursive traversal. + composites = defaultdict(list) + yield from self._data_to_scheme_child(data, + scheme, + composites) + + # When composites has been populated, yield the composite attributes + # by joining their parts. + for composite_attr, value_parts in composites.items(): + yield composite_attr, ' '.join(value_parts) + + def _data_to_scheme_child(self, subdata, subscheme, composites): + """The recursive business logic of :meth:`_map_data_to_scheme`: + Traverse two structures of nested dictionaries in parallel and `yield` + tuples of corresponding leaf nodes. + + If a leaf node belongs to a composite attribute (is a `tuple`), + populate `composites` rather than yielding straight away. + All the child functions for a single traversal share the same + `composites` instance, which is passed along. + """ + for k, v in subscheme.items(): + if k in subdata: + if type(v) == dict: + yield from self._data_to_scheme_child(subdata[k], + v, + composites) + elif type(v) == tuple: + composite_attribute, part_number = v + attribute_parts = composites[composite_attribute] + # Parts are not guaranteed to be inserted in order + while len(attribute_parts) <= part_number: + attribute_parts.append('') + attribute_parts[part_number] = subdata[k] + else: + yield v, subdata[k] + else: + self._log.warning('Acousticbrainz did not provide info' + 'about {}', k) + self._log.debug('Data {} could not be mapped to scheme {} ' + 'because key {} was not found', subdata, v, k) + + +def _generate_urls(mbid): + """Generates AcousticBrainz end point urls for given `mbid`. + """ + for level in LEVELS: + yield ACOUSTIC_BASE + mbid + level diff --git a/lib/beetsplug/albumtypes.py b/lib/beetsplug/albumtypes.py new file mode 100644 index 00000000..47f8dc64 --- /dev/null +++ b/lib/beetsplug/albumtypes.py @@ -0,0 +1,65 @@ +# This file is part of beets. +# Copyright 2021, Edgars Supe. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds an album template field for formatted album types.""" + + +from beets.autotag.mb import VARIOUS_ARTISTS_ID +from beets.library import Album +from beets.plugins import BeetsPlugin + + +class AlbumTypesPlugin(BeetsPlugin): + """Adds an album template field for formatted album types.""" + + def __init__(self): + """Init AlbumTypesPlugin.""" + super().__init__() + self.album_template_fields['atypes'] = self._atypes + self.config.add({ + 'types': [ + ('ep', 'EP'), + ('single', 'Single'), + ('soundtrack', 'OST'), + ('live', 'Live'), + ('compilation', 'Anthology'), + ('remix', 'Remix') + ], + 'ignore_va': ['compilation'], + 'bracket': '[]' + }) + + def _atypes(self, item: Album): + """Returns a formatted string based on album's types.""" + types = self.config['types'].as_pairs() + ignore_va = self.config['ignore_va'].as_str_seq() + bracket = self.config['bracket'].as_str() + + # Assign a left and right bracket or leave blank if argument is empty. + if len(bracket) == 2: + bracket_l = bracket[0] + bracket_r = bracket[1] + else: + bracket_l = '' + bracket_r = '' + + res = '' + albumtypes = item.albumtypes.split('; ') + is_va = item.mb_albumartistid == VARIOUS_ARTISTS_ID + for type in types: + if type[0] in albumtypes and type[1]: + if not is_va or (type[0] not in ignore_va and is_va): + res += f'{bracket_l}{type[1]}{bracket_r}' + + return res diff --git a/lib/beetsplug/aura.py b/lib/beetsplug/aura.py new file mode 100644 index 00000000..f4ae5527 --- /dev/null +++ b/lib/beetsplug/aura.py @@ -0,0 +1,984 @@ +# This file is part of beets. +# Copyright 2020, Callum Brown. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""An AURA server using Flask.""" + + +from mimetypes import guess_type +import re +import os.path +from os.path import isfile, getsize + +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand, _open_library +from beets import config +from beets.util import py3_path +from beets.library import Item, Album +from beets.dbcore.query import ( + MatchQuery, + NotQuery, + RegexpQuery, + AndQuery, + FixedFieldSort, + SlowFieldSort, + MultipleSort, +) + +from flask import ( + Blueprint, + Flask, + current_app, + send_file, + make_response, + request, +) + + +# Constants + +# AURA server information +# TODO: Add version information +SERVER_INFO = { + "aura-version": "0", + "server": "beets-aura", + "server-version": "0.1", + "auth-required": False, + "features": ["albums", "artists", "images"], +} + +# Maps AURA Track attribute to beets Item attribute +TRACK_ATTR_MAP = { + # Required + "title": "title", + "artist": "artist", + # Optional + "album": "album", + "track": "track", # Track number on album + "tracktotal": "tracktotal", + "disc": "disc", + "disctotal": "disctotal", + "year": "year", + "month": "month", + "day": "day", + "bpm": "bpm", + "genre": "genre", + "recording-mbid": "mb_trackid", # beets trackid is MB recording + "track-mbid": "mb_releasetrackid", + "composer": "composer", + "albumartist": "albumartist", + "comments": "comments", + # Optional for Audio Metadata + # TODO: Support the mimetype attribute, format != mime type + # "mimetype": track.format, + "duration": "length", + "framerate": "samplerate", + # I don't think beets has a framecount field + # "framecount": ???, + "channels": "channels", + "bitrate": "bitrate", + "bitdepth": "bitdepth", + "size": "filesize", +} + +# Maps AURA Album attribute to beets Album attribute +ALBUM_ATTR_MAP = { + # Required + "title": "album", + "artist": "albumartist", + # Optional + "tracktotal": "albumtotal", + "disctotal": "disctotal", + "year": "year", + "month": "month", + "day": "day", + "genre": "genre", + "release-mbid": "mb_albumid", + "release-group-mbid": "mb_releasegroupid", +} + +# Maps AURA Artist attribute to beets Item field +# Artists are not first-class in beets, so information is extracted from +# beets Items. +ARTIST_ATTR_MAP = { + # Required + "name": "artist", + # Optional + "artist-mbid": "mb_artistid", +} + + +class AURADocument: + """Base class for building AURA documents.""" + + @staticmethod + def error(status, title, detail): + """Make a response for an error following the JSON:API spec. + + Args: + status: An HTTP status code string, e.g. "404 Not Found". + title: A short, human-readable summary of the problem. + detail: A human-readable explanation specific to this + occurrence of the problem. + """ + document = { + "errors": [{"status": status, "title": title, "detail": detail}] + } + return make_response(document, status) + + def translate_filters(self): + """Translate filters from request arguments to a beets Query.""" + # The format of each filter key in the request parameter is: + # filter[]. This regex extracts . + pattern = re.compile(r"filter\[(?P[a-zA-Z0-9_-]+)\]") + queries = [] + for key, value in request.args.items(): + match = pattern.match(key) + if match: + # Extract attribute name from key + aura_attr = match.group("attribute") + # Get the beets version of the attribute name + beets_attr = self.attribute_map.get(aura_attr, aura_attr) + converter = self.get_attribute_converter(beets_attr) + value = converter(value) + # Add exact match query to list + # Use a slow query so it works with all fields + queries.append(MatchQuery(beets_attr, value, fast=False)) + # NOTE: AURA doesn't officially support multiple queries + return AndQuery(queries) + + def translate_sorts(self, sort_arg): + """Translate an AURA sort parameter into a beets Sort. + + Args: + sort_arg: The value of the 'sort' query parameter; a comma + separated list of fields to sort by, in order. + E.g. "-year,title". + """ + # Change HTTP query parameter to a list + aura_sorts = sort_arg.strip(",").split(",") + sorts = [] + for aura_attr in aura_sorts: + if aura_attr[0] == "-": + ascending = False + # Remove leading "-" + aura_attr = aura_attr[1:] + else: + # JSON:API default + ascending = True + # Get the beets version of the attribute name + beets_attr = self.attribute_map.get(aura_attr, aura_attr) + # Use slow sort so it works with all fields (inc. computed) + sorts.append(SlowFieldSort(beets_attr, ascending=ascending)) + return MultipleSort(sorts) + + def paginate(self, collection): + """Get a page of the collection and the URL to the next page. + + Args: + collection: The raw data from which resource objects can be + built. Could be an sqlite3.Cursor object (tracks and + albums) or a list of strings (artists). + """ + # Pages start from zero + page = request.args.get("page", 0, int) + # Use page limit defined in config by default. + default_limit = config["aura"]["page_limit"].get(int) + limit = request.args.get("limit", default_limit, int) + # start = offset of first item to return + start = page * limit + # end = offset of last item + 1 + end = start + limit + if end > len(collection): + end = len(collection) + next_url = None + else: + # Not the last page so work out links.next url + if not request.args: + # No existing arguments, so current page is 0 + next_url = request.url + "?page=1" + elif not request.args.get("page", None): + # No existing page argument, so add one to the end + next_url = request.url + "&page=1" + else: + # Increment page token by 1 + next_url = request.url.replace( + f"page={page}", "page={}".format(page + 1) + ) + # Get only the items in the page range + data = [self.resource_object(collection[i]) for i in range(start, end)] + return data, next_url + + def get_included(self, data, include_str): + """Build a list of resource objects for inclusion. + + Args: + data: An array of dicts in the form of resource objects. + include_str: A comma separated list of resource types to + include. E.g. "tracks,images". + """ + # Change HTTP query parameter to a list + to_include = include_str.strip(",").split(",") + # Build a list of unique type and id combinations + # For each resource object in the primary data, iterate over it's + # relationships. If a relationship matches one of the types + # requested for inclusion (e.g. "albums") then add each type-id pair + # under the "data" key to unique_identifiers, checking first that + # it has not already been added. This ensures that no resources are + # included more than once. + unique_identifiers = [] + for res_obj in data: + for rel_name, rel_obj in res_obj["relationships"].items(): + if rel_name in to_include: + # NOTE: Assumes relationship is to-many + for identifier in rel_obj["data"]: + if identifier not in unique_identifiers: + unique_identifiers.append(identifier) + # TODO: I think this could be improved + included = [] + for identifier in unique_identifiers: + res_type = identifier["type"] + if res_type == "track": + track_id = int(identifier["id"]) + track = current_app.config["lib"].get_item(track_id) + included.append(TrackDocument.resource_object(track)) + elif res_type == "album": + album_id = int(identifier["id"]) + album = current_app.config["lib"].get_album(album_id) + included.append(AlbumDocument.resource_object(album)) + elif res_type == "artist": + artist_id = identifier["id"] + included.append(ArtistDocument.resource_object(artist_id)) + elif res_type == "image": + image_id = identifier["id"] + included.append(ImageDocument.resource_object(image_id)) + else: + raise ValueError(f"Invalid resource type: {res_type}") + return included + + def all_resources(self): + """Build document for /tracks, /albums or /artists.""" + query = self.translate_filters() + sort_arg = request.args.get("sort", None) + if sort_arg: + sort = self.translate_sorts(sort_arg) + # For each sort field add a query which ensures all results + # have a non-empty, non-zero value for that field. + for s in sort.sorts: + query.subqueries.append( + NotQuery( + # Match empty fields (^$) or zero fields, (^0$) + RegexpQuery(s.field, "(^$|^0$)", fast=False) + ) + ) + else: + sort = None + # Get information from the library + collection = self.get_collection(query=query, sort=sort) + # Convert info to AURA form and paginate it + data, next_url = self.paginate(collection) + document = {"data": data} + # If there are more pages then provide a way to access them + if next_url: + document["links"] = {"next": next_url} + # Include related resources for each element in "data" + include_str = request.args.get("include", None) + if include_str: + document["included"] = self.get_included(data, include_str) + return document + + def single_resource_document(self, resource_object): + """Build document for a specific requested resource. + + Args: + resource_object: A dictionary in the form of a JSON:API + resource object. + """ + document = {"data": resource_object} + include_str = request.args.get("include", None) + if include_str: + # [document["data"]] is because arg needs to be list + document["included"] = self.get_included( + [document["data"]], include_str + ) + return document + + +class TrackDocument(AURADocument): + """Class for building documents for /tracks endpoints.""" + + attribute_map = TRACK_ATTR_MAP + + def get_collection(self, query=None, sort=None): + """Get Item objects from the library. + + Args: + query: A beets Query object or a beets query string. + sort: A beets Sort object. + """ + return current_app.config["lib"].items(query, sort) + + def get_attribute_converter(self, beets_attr): + """Work out what data type an attribute should be for beets. + + Args: + beets_attr: The name of the beets attribute, e.g. "title". + """ + # filesize is a special field (read from disk not db?) + if beets_attr == "filesize": + converter = int + else: + try: + # Look for field in list of Item fields + # and get python type of database type. + # See beets.library.Item and beets.dbcore.types + converter = Item._fields[beets_attr].model_type + except KeyError: + # Fall back to string (NOTE: probably not good) + converter = str + return converter + + @staticmethod + def resource_object(track): + """Construct a JSON:API resource object from a beets Item. + + Args: + track: A beets Item object. + """ + attributes = {} + # Use aura => beets attribute map, e.g. size => filesize + for aura_attr, beets_attr in TRACK_ATTR_MAP.items(): + a = getattr(track, beets_attr) + # Only set attribute if it's not None, 0, "", etc. + # NOTE: This could result in required attributes not being set + if a: + attributes[aura_attr] = a + + # JSON:API one-to-many relationship to parent album + relationships = { + "artists": {"data": [{"type": "artist", "id": track.artist}]} + } + # Only add album relationship if not singleton + if not track.singleton: + relationships["albums"] = { + "data": [{"type": "album", "id": str(track.album_id)}] + } + + return { + "type": "track", + "id": str(track.id), + "attributes": attributes, + "relationships": relationships, + } + + def single_resource(self, track_id): + """Get track from the library and build a document. + + Args: + track_id: The beets id of the track (integer). + """ + track = current_app.config["lib"].get_item(track_id) + if not track: + return self.error( + "404 Not Found", + "No track with the requested id.", + "There is no track with an id of {} in the library.".format( + track_id + ), + ) + return self.single_resource_document(self.resource_object(track)) + + +class AlbumDocument(AURADocument): + """Class for building documents for /albums endpoints.""" + + attribute_map = ALBUM_ATTR_MAP + + def get_collection(self, query=None, sort=None): + """Get Album objects from the library. + + Args: + query: A beets Query object or a beets query string. + sort: A beets Sort object. + """ + return current_app.config["lib"].albums(query, sort) + + def get_attribute_converter(self, beets_attr): + """Work out what data type an attribute should be for beets. + + Args: + beets_attr: The name of the beets attribute, e.g. "title". + """ + try: + # Look for field in list of Album fields + # and get python type of database type. + # See beets.library.Album and beets.dbcore.types + converter = Album._fields[beets_attr].model_type + except KeyError: + # Fall back to string (NOTE: probably not good) + converter = str + return converter + + @staticmethod + def resource_object(album): + """Construct a JSON:API resource object from a beets Album. + + Args: + album: A beets Album object. + """ + attributes = {} + # Use aura => beets attribute name map + for aura_attr, beets_attr in ALBUM_ATTR_MAP.items(): + a = getattr(album, beets_attr) + # Only set attribute if it's not None, 0, "", etc. + # NOTE: This could mean required attributes are not set + if a: + attributes[aura_attr] = a + + # Get beets Item objects for all tracks in the album sorted by + # track number. Sorting is not required but it's nice. + query = MatchQuery("album_id", album.id) + sort = FixedFieldSort("track", ascending=True) + tracks = current_app.config["lib"].items(query, sort) + # JSON:API one-to-many relationship to tracks on the album + relationships = { + "tracks": { + "data": [{"type": "track", "id": str(t.id)} for t in tracks] + } + } + # Add images relationship if album has associated images + if album.artpath: + path = py3_path(album.artpath) + filename = path.split("/")[-1] + image_id = f"album-{album.id}-{filename}" + relationships["images"] = { + "data": [{"type": "image", "id": image_id}] + } + # Add artist relationship if artist name is same on tracks + # Tracks are used to define artists so don't albumartist + # Check for all tracks in case some have featured artists + if album.albumartist in [t.artist for t in tracks]: + relationships["artists"] = { + "data": [{"type": "artist", "id": album.albumartist}] + } + + return { + "type": "album", + "id": str(album.id), + "attributes": attributes, + "relationships": relationships, + } + + def single_resource(self, album_id): + """Get album from the library and build a document. + + Args: + album_id: The beets id of the album (integer). + """ + album = current_app.config["lib"].get_album(album_id) + if not album: + return self.error( + "404 Not Found", + "No album with the requested id.", + "There is no album with an id of {} in the library.".format( + album_id + ), + ) + return self.single_resource_document(self.resource_object(album)) + + +class ArtistDocument(AURADocument): + """Class for building documents for /artists endpoints.""" + + attribute_map = ARTIST_ATTR_MAP + + def get_collection(self, query=None, sort=None): + """Get a list of artist names from the library. + + Args: + query: A beets Query object or a beets query string. + sort: A beets Sort object. + """ + # Gets only tracks with matching artist information + tracks = current_app.config["lib"].items(query, sort) + collection = [] + for track in tracks: + # Do not add duplicates + if track.artist not in collection: + collection.append(track.artist) + return collection + + def get_attribute_converter(self, beets_attr): + """Work out what data type an attribute should be for beets. + + Args: + beets_attr: The name of the beets attribute, e.g. "artist". + """ + try: + # Look for field in list of Item fields + # and get python type of database type. + # See beets.library.Item and beets.dbcore.types + converter = Item._fields[beets_attr].model_type + except KeyError: + # Fall back to string (NOTE: probably not good) + converter = str + return converter + + @staticmethod + def resource_object(artist_id): + """Construct a JSON:API resource object for the given artist. + + Args: + artist_id: A string which is the artist's name. + """ + # Get tracks where artist field exactly matches artist_id + query = MatchQuery("artist", artist_id) + tracks = current_app.config["lib"].items(query) + if not tracks: + return None + + # Get artist information from the first track + # NOTE: It could be that the first track doesn't have a + # MusicBrainz id but later tracks do, which isn't ideal. + attributes = {} + # Use aura => beets attribute map, e.g. artist => name + for aura_attr, beets_attr in ARTIST_ATTR_MAP.items(): + a = getattr(tracks[0], beets_attr) + # Only set attribute if it's not None, 0, "", etc. + # NOTE: This could mean required attributes are not set + if a: + attributes[aura_attr] = a + + relationships = { + "tracks": { + "data": [{"type": "track", "id": str(t.id)} for t in tracks] + } + } + album_query = MatchQuery("albumartist", artist_id) + albums = current_app.config["lib"].albums(query=album_query) + if len(albums) != 0: + relationships["albums"] = { + "data": [{"type": "album", "id": str(a.id)} for a in albums] + } + + return { + "type": "artist", + "id": artist_id, + "attributes": attributes, + "relationships": relationships, + } + + def single_resource(self, artist_id): + """Get info for the requested artist and build a document. + + Args: + artist_id: A string which is the artist's name. + """ + artist_resource = self.resource_object(artist_id) + if not artist_resource: + return self.error( + "404 Not Found", + "No artist with the requested id.", + "There is no artist with an id of {} in the library.".format( + artist_id + ), + ) + return self.single_resource_document(artist_resource) + + +def safe_filename(fn): + """Check whether a string is a simple (non-path) filename. + + For example, `foo.txt` is safe because it is a "plain" filename. But + `foo/bar.txt` and `../foo.txt` and `.` are all non-safe because they + can traverse to other directories other than the current one. + """ + # Rule out any directories. + if os.path.basename(fn) != fn: + return False + + # In single names, rule out Unix directory traversal names. + if fn in ('.', '..'): + return False + + return True + + +class ImageDocument(AURADocument): + """Class for building documents for /images/(id) endpoints.""" + + @staticmethod + def get_image_path(image_id): + """Works out the full path to the image with the given id. + + Returns None if there is no such image. + + Args: + image_id: A string in the form + "--". + """ + # Split image_id into its constituent parts + id_split = image_id.split("-") + if len(id_split) < 3: + # image_id is not in the required format + return None + parent_type = id_split[0] + parent_id = id_split[1] + img_filename = "-".join(id_split[2:]) + if not safe_filename(img_filename): + return None + + # Get the path to the directory parent's images are in + if parent_type == "album": + album = current_app.config["lib"].get_album(int(parent_id)) + if not album or not album.artpath: + return None + # Cut the filename off of artpath + # This is in preparation for supporting images in the same + # directory that are not tracked by beets. + artpath = py3_path(album.artpath) + dir_path = "/".join(artpath.split("/")[:-1]) + else: + # Images for other resource types are not supported + return None + + img_path = os.path.join(dir_path, img_filename) + # Check the image actually exists + if isfile(img_path): + return img_path + else: + return None + + @staticmethod + def resource_object(image_id): + """Construct a JSON:API resource object for the given image. + + Args: + image_id: A string in the form + "--". + """ + # Could be called as a static method, so can't use + # self.get_image_path() + image_path = ImageDocument.get_image_path(image_id) + if not image_path: + return None + + attributes = { + "role": "cover", + "mimetype": guess_type(image_path)[0], + "size": getsize(image_path), + } + try: + from PIL import Image + except ImportError: + pass + else: + im = Image.open(image_path) + attributes["width"] = im.width + attributes["height"] = im.height + + relationships = {} + # Split id into [parent_type, parent_id, filename] + id_split = image_id.split("-") + relationships[id_split[0] + "s"] = { + "data": [{"type": id_split[0], "id": id_split[1]}] + } + + return { + "id": image_id, + "type": "image", + # Remove attributes that are None, 0, "", etc. + "attributes": {k: v for k, v in attributes.items() if v}, + "relationships": relationships, + } + + def single_resource(self, image_id): + """Get info for the requested image and build a document. + + Args: + image_id: A string in the form + "--". + """ + image_resource = self.resource_object(image_id) + if not image_resource: + return self.error( + "404 Not Found", + "No image with the requested id.", + "There is no image with an id of {} in the library.".format( + image_id + ), + ) + return self.single_resource_document(image_resource) + + +# Initialise flask blueprint +aura_bp = Blueprint("aura_bp", __name__) + + +@aura_bp.route("/server") +def server_info(): + """Respond with info about the server.""" + return {"data": {"type": "server", "id": "0", "attributes": SERVER_INFO}} + + +# Track endpoints + + +@aura_bp.route("/tracks") +def all_tracks(): + """Respond with a list of all tracks and related information.""" + doc = TrackDocument() + return doc.all_resources() + + +@aura_bp.route("/tracks/") +def single_track(track_id): + """Respond with info about the specified track. + + Args: + track_id: The id of the track provided in the URL (integer). + """ + doc = TrackDocument() + return doc.single_resource(track_id) + + +@aura_bp.route("/tracks//audio") +def audio_file(track_id): + """Supply an audio file for the specified track. + + Args: + track_id: The id of the track provided in the URL (integer). + """ + track = current_app.config["lib"].get_item(track_id) + if not track: + return AURADocument.error( + "404 Not Found", + "No track with the requested id.", + "There is no track with an id of {} in the library.".format( + track_id + ), + ) + + path = py3_path(track.path) + if not isfile(path): + return AURADocument.error( + "404 Not Found", + "No audio file for the requested track.", + ( + "There is no audio file for track {} at the expected location" + ).format(track_id), + ) + + file_mimetype = guess_type(path)[0] + if not file_mimetype: + return AURADocument.error( + "500 Internal Server Error", + "Requested audio file has an unknown mimetype.", + ( + "The audio file for track {} has an unknown mimetype. " + "Its file extension is {}." + ).format(track_id, path.split(".")[-1]), + ) + + # Check that the Accept header contains the file's mimetype + # Takes into account */* and audio/* + # Adding support for the bitrate parameter would require some effort so I + # left it out. This means the client could be sent an error even if the + # audio doesn't need transcoding. + if not request.accept_mimetypes.best_match([file_mimetype]): + return AURADocument.error( + "406 Not Acceptable", + "Unsupported MIME type or bitrate parameter in Accept header.", + ( + "The audio file for track {} is only available as {} and " + "bitrate parameters are not supported." + ).format(track_id, file_mimetype), + ) + + return send_file( + path, + mimetype=file_mimetype, + # Handles filename in Content-Disposition header + as_attachment=True, + # Tries to upgrade the stream to support range requests + conditional=True, + ) + + +# Album endpoints + + +@aura_bp.route("/albums") +def all_albums(): + """Respond with a list of all albums and related information.""" + doc = AlbumDocument() + return doc.all_resources() + + +@aura_bp.route("/albums/") +def single_album(album_id): + """Respond with info about the specified album. + + Args: + album_id: The id of the album provided in the URL (integer). + """ + doc = AlbumDocument() + return doc.single_resource(album_id) + + +# Artist endpoints +# Artist ids are their names + + +@aura_bp.route("/artists") +def all_artists(): + """Respond with a list of all artists and related information.""" + doc = ArtistDocument() + return doc.all_resources() + + +# Using the path converter allows slashes in artist_id +@aura_bp.route("/artists/") +def single_artist(artist_id): + """Respond with info about the specified artist. + + Args: + artist_id: The id of the artist provided in the URL. A string + which is the artist's name. + """ + doc = ArtistDocument() + return doc.single_resource(artist_id) + + +# Image endpoints +# Image ids are in the form -- +# For example: album-13-cover.jpg + + +@aura_bp.route("/images/") +def single_image(image_id): + """Respond with info about the specified image. + + Args: + image_id: The id of the image provided in the URL. A string in + the form "--". + """ + doc = ImageDocument() + return doc.single_resource(image_id) + + +@aura_bp.route("/images//file") +def image_file(image_id): + """Supply an image file for the specified image. + + Args: + image_id: The id of the image provided in the URL. A string in + the form "--". + """ + img_path = ImageDocument.get_image_path(image_id) + if not img_path: + return AURADocument.error( + "404 Not Found", + "No image with the requested id.", + "There is no image with an id of {} in the library".format( + image_id + ), + ) + return send_file(img_path) + + +# WSGI app + + +def create_app(): + """An application factory for use by a WSGI server.""" + config["aura"].add( + { + "host": "127.0.0.1", + "port": 8337, + "cors": [], + "cors_supports_credentials": False, + "page_limit": 500, + } + ) + + app = Flask(__name__) + # Register AURA blueprint view functions under a URL prefix + app.register_blueprint(aura_bp, url_prefix="/aura") + # AURA specifies mimetype MUST be this + app.config["JSONIFY_MIMETYPE"] = "application/vnd.api+json" + # Disable auto-sorting of JSON keys + app.config["JSON_SORT_KEYS"] = False + # Provide a way to access the beets library + # The normal method of using the Library and config provided in the + # command function is not used because create_app() could be called + # by an external WSGI server. + # NOTE: this uses a 'private' function from beets.ui.__init__ + app.config["lib"] = _open_library(config) + + # Enable CORS if required + cors = config["aura"]["cors"].as_str_seq(list) + if cors: + from flask_cors import CORS + + # "Accept" is the only header clients use + app.config["CORS_ALLOW_HEADERS"] = "Accept" + app.config["CORS_RESOURCES"] = {r"/aura/*": {"origins": cors}} + app.config["CORS_SUPPORTS_CREDENTIALS"] = config["aura"][ + "cors_supports_credentials" + ].get(bool) + CORS(app) + + return app + + +# Beets Plugin Hook + + +class AURAPlugin(BeetsPlugin): + """The BeetsPlugin subclass for the AURA server plugin.""" + + def __init__(self): + """Add configuration options for the AURA plugin.""" + super().__init__() + + def commands(self): + """Add subcommand used to run the AURA server.""" + + def run_aura(lib, opts, args): + """Run the application using Flask's built in-server. + + Args: + lib: A beets Library object (not used). + opts: Command line options. An optparse.Values object. + args: The list of arguments to process (not used). + """ + app = create_app() + # Start the built-in server (not intended for production) + app.run( + host=self.config["host"].get(str), + port=self.config["port"].get(int), + debug=opts.debug, + threaded=True, + ) + + run_aura_cmd = Subcommand("aura", help="run an AURA server") + run_aura_cmd.parser.add_option( + "-d", + "--debug", + action="store_true", + default=False, + help="use Flask debug mode", + ) + run_aura_cmd.func = run_aura + return [run_aura_cmd] diff --git a/lib/beetsplug/badfiles.py b/lib/beetsplug/badfiles.py new file mode 100644 index 00000000..ec465895 --- /dev/null +++ b/lib/beetsplug/badfiles.py @@ -0,0 +1,215 @@ +# This file is part of beets. +# Copyright 2016, François-Xavier Thomas. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Use command-line tools to check for audio file corruption. +""" + + +from subprocess import check_output, CalledProcessError, list2cmdline, STDOUT + +import shlex +import os +import errno +import sys +import confuse +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand +from beets.util import displayable_path, par_map +from beets import ui +from beets import importer + + +class CheckerCommandException(Exception): + """Raised when running a checker failed. + + Attributes: + checker: Checker command name. + path: Path to the file being validated. + errno: Error number from the checker execution error. + msg: Message from the checker execution error. + """ + + def __init__(self, cmd, oserror): + self.checker = cmd[0] + self.path = cmd[-1] + self.errno = oserror.errno + self.msg = str(oserror) + + +class BadFiles(BeetsPlugin): + def __init__(self): + super().__init__() + self.verbose = False + + self.register_listener('import_task_start', + self.on_import_task_start) + self.register_listener('import_task_before_choice', + self.on_import_task_before_choice) + + def run_command(self, cmd): + self._log.debug("running command: {}", + displayable_path(list2cmdline(cmd))) + try: + output = check_output(cmd, stderr=STDOUT) + errors = 0 + status = 0 + except CalledProcessError as e: + output = e.output + errors = 1 + status = e.returncode + except OSError as e: + raise CheckerCommandException(cmd, e) + output = output.decode(sys.getdefaultencoding(), 'replace') + return status, errors, [line for line in output.split("\n") if line] + + def check_mp3val(self, path): + status, errors, output = self.run_command(["mp3val", path]) + if status == 0: + output = [line for line in output if line.startswith("WARNING:")] + errors = len(output) + return status, errors, output + + def check_flac(self, path): + return self.run_command(["flac", "-wst", path]) + + def check_custom(self, command): + def checker(path): + cmd = shlex.split(command) + cmd.append(path) + return self.run_command(cmd) + return checker + + def get_checker(self, ext): + ext = ext.lower() + try: + command = self.config['commands'].get(dict).get(ext) + except confuse.NotFoundError: + command = None + if command: + return self.check_custom(command) + if ext == "mp3": + return self.check_mp3val + if ext == "flac": + return self.check_flac + + def check_item(self, item): + # First, check whether the path exists. If not, the user + # should probably run `beet update` to cleanup your library. + dpath = displayable_path(item.path) + self._log.debug("checking path: {}", dpath) + if not os.path.exists(item.path): + ui.print_("{}: file does not exist".format( + ui.colorize('text_error', dpath))) + + # Run the checker against the file if one is found + ext = os.path.splitext(item.path)[1][1:].decode('utf8', 'ignore') + checker = self.get_checker(ext) + if not checker: + self._log.error("no checker specified in the config for {}", + ext) + return [] + path = item.path + if not isinstance(path, str): + path = item.path.decode(sys.getfilesystemencoding()) + try: + status, errors, output = checker(path) + except CheckerCommandException as e: + if e.errno == errno.ENOENT: + self._log.error( + "command not found: {} when validating file: {}", + e.checker, + e.path + ) + else: + self._log.error("error invoking {}: {}", e.checker, e.msg) + return [] + + error_lines = [] + + if status > 0: + error_lines.append( + "{}: checker exited with status {}" + .format(ui.colorize('text_error', dpath), status)) + for line in output: + error_lines.append(f" {line}") + + elif errors > 0: + error_lines.append( + "{}: checker found {} errors or warnings" + .format(ui.colorize('text_warning', dpath), errors)) + for line in output: + error_lines.append(f" {line}") + elif self.verbose: + error_lines.append( + "{}: ok".format(ui.colorize('text_success', dpath))) + + return error_lines + + def on_import_task_start(self, task, session): + if not self.config['check_on_import'].get(False): + return + + checks_failed = [] + + for item in task.items: + error_lines = self.check_item(item) + if error_lines: + checks_failed.append(error_lines) + + if checks_failed: + task._badfiles_checks_failed = checks_failed + + def on_import_task_before_choice(self, task, session): + if hasattr(task, '_badfiles_checks_failed'): + ui.print_('{} one or more files failed checks:' + .format(ui.colorize('text_warning', 'BAD'))) + for error in task._badfiles_checks_failed: + for error_line in error: + ui.print_(error_line) + + ui.print_() + ui.print_('What would you like to do?') + + sel = ui.input_options(['aBort', 'skip', 'continue']) + + if sel == 's': + return importer.action.SKIP + elif sel == 'c': + return None + elif sel == 'b': + raise importer.ImportAbort() + else: + raise Exception(f'Unexpected selection: {sel}') + + def command(self, lib, opts, args): + # Get items from arguments + items = lib.items(ui.decargs(args)) + self.verbose = opts.verbose + + def check_and_print(item): + for error_line in self.check_item(item): + ui.print_(error_line) + + par_map(check_and_print, items) + + def commands(self): + bad_command = Subcommand('bad', + help='check for corrupt or missing files') + bad_command.parser.add_option( + '-v', '--verbose', + action='store_true', default=False, dest='verbose', + help='view results for both the bad and uncorrupted files' + ) + bad_command.func = self.command + return [bad_command] diff --git a/lib/beetsplug/bareasc.py b/lib/beetsplug/bareasc.py new file mode 100644 index 00000000..21836936 --- /dev/null +++ b/lib/beetsplug/bareasc.py @@ -0,0 +1,82 @@ +# This file is part of beets. +# Copyright 2016, Philippe Mongeau. +# Copyright 2021, Graham R. Cobb. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and ascociated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# This module is adapted from Fuzzy in accordance to the licence of +# that module + +"""Provides a bare-ASCII matching query.""" + + +from beets import ui +from beets.ui import print_, decargs +from beets.plugins import BeetsPlugin +from beets.dbcore.query import StringFieldQuery +from unidecode import unidecode + + +class BareascQuery(StringFieldQuery): + """Compare items using bare ASCII, without accents etc.""" + @classmethod + def string_match(cls, pattern, val): + """Convert both pattern and string to plain ASCII before matching. + + If pattern is all lower case, also convert string to lower case so + match is also case insensitive + """ + # smartcase + if pattern.islower(): + val = val.lower() + pattern = unidecode(pattern) + val = unidecode(val) + return pattern in val + + +class BareascPlugin(BeetsPlugin): + """Plugin to provide bare-ASCII option for beets matching.""" + def __init__(self): + """Default prefix for selecting bare-ASCII matching is #.""" + super().__init__() + self.config.add({ + 'prefix': '#', + }) + + def queries(self): + """Register bare-ASCII matching.""" + prefix = self.config['prefix'].as_str() + return {prefix: BareascQuery} + + def commands(self): + """Add bareasc command as unidecode version of 'list'.""" + cmd = ui.Subcommand('bareasc', + help='unidecode version of beet list command') + cmd.parser.usage += "\n" \ + 'Example: %prog -f \'$album: $title\' artist:beatles' + cmd.parser.add_all_common_options() + cmd.func = self.unidecode_list + return [cmd] + + def unidecode_list(self, lib, opts, args): + """Emulate normal 'list' command but with unidecode output.""" + query = decargs(args) + album = opts.album + # Copied from commands.py - list_items + if album: + for album in lib.albums(query): + bare = unidecode(str(album)) + print_(bare) + else: + for item in lib.items(query): + bare = unidecode(str(item)) + print_(bare) diff --git a/lib/beetsplug/beatport.py b/lib/beetsplug/beatport.py new file mode 100644 index 00000000..133441d7 --- /dev/null +++ b/lib/beetsplug/beatport.py @@ -0,0 +1,470 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds Beatport release and track search support to the autotagger +""" + +import json +import re +from datetime import datetime, timedelta + +from requests_oauthlib import OAuth1Session +from requests_oauthlib.oauth1_session import (TokenRequestDenied, TokenMissing, + VerifierMissing) + +import beets +import beets.ui +from beets.autotag.hooks import AlbumInfo, TrackInfo +from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance +import confuse + + +AUTH_ERRORS = (TokenRequestDenied, TokenMissing, VerifierMissing) +USER_AGENT = f'beets/{beets.__version__} +https://beets.io/' + + +class BeatportAPIError(Exception): + pass + + +class BeatportObject: + def __init__(self, data): + self.beatport_id = data['id'] + self.name = str(data['name']) + if 'releaseDate' in data: + self.release_date = datetime.strptime(data['releaseDate'], + '%Y-%m-%d') + if 'artists' in data: + self.artists = [(x['id'], str(x['name'])) + for x in data['artists']] + if 'genres' in data: + self.genres = [str(x['name']) + for x in data['genres']] + + +class BeatportClient: + _api_base = 'https://oauth-api.beatport.com' + + def __init__(self, c_key, c_secret, auth_key=None, auth_secret=None): + """ Initiate the client with OAuth information. + + For the initial authentication with the backend `auth_key` and + `auth_secret` can be `None`. Use `get_authorize_url` and + `get_access_token` to obtain them for subsequent uses of the API. + + :param c_key: OAuth1 client key + :param c_secret: OAuth1 client secret + :param auth_key: OAuth1 resource owner key + :param auth_secret: OAuth1 resource owner secret + """ + self.api = OAuth1Session( + client_key=c_key, client_secret=c_secret, + resource_owner_key=auth_key, + resource_owner_secret=auth_secret, + callback_uri='oob') + self.api.headers = {'User-Agent': USER_AGENT} + + def get_authorize_url(self): + """ Generate the URL for the user to authorize the application. + + Retrieves a request token from the Beatport API and returns the + corresponding authorization URL on their end that the user has + to visit. + + This is the first step of the initial authorization process with the + API. Once the user has visited the URL, call + :py:method:`get_access_token` with the displayed data to complete + the process. + + :returns: Authorization URL for the user to visit + :rtype: unicode + """ + self.api.fetch_request_token( + self._make_url('/identity/1/oauth/request-token')) + return self.api.authorization_url( + self._make_url('/identity/1/oauth/authorize')) + + def get_access_token(self, auth_data): + """ Obtain the final access token and secret for the API. + + :param auth_data: URL-encoded authorization data as displayed at + the authorization url (obtained via + :py:meth:`get_authorize_url`) after signing in + :type auth_data: unicode + :returns: OAuth resource owner key and secret + :rtype: (unicode, unicode) tuple + """ + self.api.parse_authorization_response( + "https://beets.io/auth?" + auth_data) + access_data = self.api.fetch_access_token( + self._make_url('/identity/1/oauth/access-token')) + return access_data['oauth_token'], access_data['oauth_token_secret'] + + def search(self, query, release_type='release', details=True): + """ Perform a search of the Beatport catalogue. + + :param query: Query string + :param release_type: Type of releases to search for, can be + 'release' or 'track' + :param details: Retrieve additional information about the + search results. Currently this will fetch + the tracklist for releases and do nothing for + tracks + :returns: Search results + :rtype: generator that yields + py:class:`BeatportRelease` or + :py:class:`BeatportTrack` + """ + response = self._get('catalog/3/search', + query=query, perPage=5, + facets=[f'fieldType:{release_type}']) + for item in response: + if release_type == 'release': + if details: + release = self.get_release(item['id']) + else: + release = BeatportRelease(item) + yield release + elif release_type == 'track': + yield BeatportTrack(item) + + def get_release(self, beatport_id): + """ Get information about a single release. + + :param beatport_id: Beatport ID of the release + :returns: The matching release + :rtype: :py:class:`BeatportRelease` + """ + response = self._get('/catalog/3/releases', id=beatport_id) + if response: + release = BeatportRelease(response[0]) + release.tracks = self.get_release_tracks(beatport_id) + return release + return None + + def get_release_tracks(self, beatport_id): + """ Get all tracks for a given release. + + :param beatport_id: Beatport ID of the release + :returns: Tracks in the matching release + :rtype: list of :py:class:`BeatportTrack` + """ + response = self._get('/catalog/3/tracks', releaseId=beatport_id, + perPage=100) + return [BeatportTrack(t) for t in response] + + def get_track(self, beatport_id): + """ Get information about a single track. + + :param beatport_id: Beatport ID of the track + :returns: The matching track + :rtype: :py:class:`BeatportTrack` + """ + response = self._get('/catalog/3/tracks', id=beatport_id) + return BeatportTrack(response[0]) + + def _make_url(self, endpoint): + """ Get complete URL for a given API endpoint. """ + if not endpoint.startswith('/'): + endpoint = '/' + endpoint + return self._api_base + endpoint + + def _get(self, endpoint, **kwargs): + """ Perform a GET request on a given API endpoint. + + Automatically extracts result data from the response and converts HTTP + exceptions into :py:class:`BeatportAPIError` objects. + """ + try: + response = self.api.get(self._make_url(endpoint), params=kwargs) + except Exception as e: + raise BeatportAPIError("Error connecting to Beatport API: {}" + .format(e)) + if not response: + raise BeatportAPIError( + "Error {0.status_code} for '{0.request.path_url}" + .format(response)) + return response.json()['results'] + + +class BeatportRelease(BeatportObject): + def __str__(self): + if len(self.artists) < 4: + artist_str = ", ".join(x[1] for x in self.artists) + else: + artist_str = "Various Artists" + return "".format( + artist_str, + self.name, + self.catalog_number, + ) + + def __repr__(self): + return str(self).encode('utf-8') + + def __init__(self, data): + BeatportObject.__init__(self, data) + if 'catalogNumber' in data: + self.catalog_number = data['catalogNumber'] + if 'label' in data: + self.label_name = data['label']['name'] + if 'category' in data: + self.category = data['category'] + if 'slug' in data: + self.url = "https://beatport.com/release/{}/{}".format( + data['slug'], data['id']) + self.genre = data.get('genre') + + +class BeatportTrack(BeatportObject): + def __str__(self): + artist_str = ", ".join(x[1] for x in self.artists) + return ("" + .format(artist_str, self.name, self.mix_name)) + + def __repr__(self): + return str(self).encode('utf-8') + + def __init__(self, data): + BeatportObject.__init__(self, data) + if 'title' in data: + self.title = str(data['title']) + if 'mixName' in data: + self.mix_name = str(data['mixName']) + self.length = timedelta(milliseconds=data.get('lengthMs', 0) or 0) + if not self.length: + try: + min, sec = data.get('length', '0:0').split(':') + self.length = timedelta(minutes=int(min), seconds=int(sec)) + except ValueError: + pass + if 'slug' in data: + self.url = "https://beatport.com/track/{}/{}" \ + .format(data['slug'], data['id']) + self.track_number = data.get('trackNumber') + self.bpm = data.get('bpm') + self.initial_key = str( + (data.get('key') or {}).get('shortName') + ) + + # Use 'subgenre' and if not present, 'genre' as a fallback. + if data.get('subGenres'): + self.genre = str(data['subGenres'][0].get('name')) + elif data.get('genres'): + self.genre = str(data['genres'][0].get('name')) + + +class BeatportPlugin(BeetsPlugin): + data_source = 'Beatport' + + def __init__(self): + super().__init__() + self.config.add({ + 'apikey': '57713c3906af6f5def151b33601389176b37b429', + 'apisecret': 'b3fe08c93c80aefd749fe871a16cd2bb32e2b954', + 'tokenfile': 'beatport_token.json', + 'source_weight': 0.5, + }) + self.config['apikey'].redact = True + self.config['apisecret'].redact = True + self.client = None + self.register_listener('import_begin', self.setup) + + def setup(self, session=None): + c_key = self.config['apikey'].as_str() + c_secret = self.config['apisecret'].as_str() + + # Get the OAuth token from a file or log in. + try: + with open(self._tokenfile()) as f: + tokendata = json.load(f) + except OSError: + # No token yet. Generate one. + token, secret = self.authenticate(c_key, c_secret) + else: + token = tokendata['token'] + secret = tokendata['secret'] + + self.client = BeatportClient(c_key, c_secret, token, secret) + + def authenticate(self, c_key, c_secret): + # Get the link for the OAuth page. + auth_client = BeatportClient(c_key, c_secret) + try: + url = auth_client.get_authorize_url() + except AUTH_ERRORS as e: + self._log.debug('authentication error: {0}', e) + raise beets.ui.UserError('communication with Beatport failed') + + beets.ui.print_("To authenticate with Beatport, visit:") + beets.ui.print_(url) + + # Ask for the verifier data and validate it. + data = beets.ui.input_("Enter the string displayed in your browser:") + try: + token, secret = auth_client.get_access_token(data) + except AUTH_ERRORS as e: + self._log.debug('authentication error: {0}', e) + raise beets.ui.UserError('Beatport token request failed') + + # Save the token for later use. + self._log.debug('Beatport token {0}, secret {1}', token, secret) + with open(self._tokenfile(), 'w') as f: + json.dump({'token': token, 'secret': secret}, f) + + return token, secret + + def _tokenfile(self): + """Get the path to the JSON file for storing the OAuth token. + """ + return self.config['tokenfile'].get(confuse.Filename(in_app_dir=True)) + + def album_distance(self, items, album_info, mapping): + """Returns the Beatport source weight and the maximum source weight + for albums. + """ + return get_distance( + data_source=self.data_source, + info=album_info, + config=self.config + ) + + def track_distance(self, item, track_info): + """Returns the Beatport source weight and the maximum source weight + for individual tracks. + """ + return get_distance( + data_source=self.data_source, + info=track_info, + config=self.config + ) + + def candidates(self, items, artist, release, va_likely, extra_tags=None): + """Returns a list of AlbumInfo objects for beatport search results + matching release and artist (if not various). + """ + if va_likely: + query = release + else: + query = f'{artist} {release}' + try: + return self._get_releases(query) + except BeatportAPIError as e: + self._log.debug('API Error: {0} (query: {1})', e, query) + return [] + + def item_candidates(self, item, artist, title): + """Returns a list of TrackInfo objects for beatport search results + matching title and artist. + """ + query = f'{artist} {title}' + try: + return self._get_tracks(query) + except BeatportAPIError as e: + self._log.debug('API Error: {0} (query: {1})', e, query) + return [] + + def album_for_id(self, release_id): + """Fetches a release by its Beatport ID and returns an AlbumInfo object + or None if the query is not a valid ID or release is not found. + """ + self._log.debug('Searching for release {0}', release_id) + match = re.search(r'(^|beatport\.com/release/.+/)(\d+)$', release_id) + if not match: + self._log.debug('Not a valid Beatport release ID.') + return None + release = self.client.get_release(match.group(2)) + if release: + return self._get_album_info(release) + return None + + def track_for_id(self, track_id): + """Fetches a track by its Beatport ID and returns a TrackInfo object + or None if the track is not a valid Beatport ID or track is not found. + """ + self._log.debug('Searching for track {0}', track_id) + match = re.search(r'(^|beatport\.com/track/.+/)(\d+)$', track_id) + if not match: + self._log.debug('Not a valid Beatport track ID.') + return None + bp_track = self.client.get_track(match.group(2)) + if bp_track is not None: + return self._get_track_info(bp_track) + return None + + def _get_releases(self, query): + """Returns a list of AlbumInfo objects for a beatport search query. + """ + # Strip non-word characters from query. Things like "!" and "-" can + # cause a query to return no results, even if they match the artist or + # album title. Use `re.UNICODE` flag to avoid stripping non-english + # word characters. + query = re.sub(r'\W+', ' ', query, flags=re.UNICODE) + # Strip medium information from query, Things like "CD1" and "disk 1" + # can also negate an otherwise positive result. + query = re.sub(r'\b(CD|disc)\s*\d+', '', query, flags=re.I) + albums = [self._get_album_info(x) + for x in self.client.search(query)] + return albums + + def _get_album_info(self, release): + """Returns an AlbumInfo object for a Beatport Release object. + """ + va = len(release.artists) > 3 + artist, artist_id = self._get_artist(release.artists) + if va: + artist = "Various Artists" + tracks = [self._get_track_info(x) for x in release.tracks] + + return AlbumInfo(album=release.name, album_id=release.beatport_id, + artist=artist, artist_id=artist_id, tracks=tracks, + albumtype=release.category, va=va, + year=release.release_date.year, + month=release.release_date.month, + day=release.release_date.day, + label=release.label_name, + catalognum=release.catalog_number, media='Digital', + data_source=self.data_source, data_url=release.url, + genre=release.genre) + + def _get_track_info(self, track): + """Returns a TrackInfo object for a Beatport Track object. + """ + title = track.name + if track.mix_name != "Original Mix": + title += f" ({track.mix_name})" + artist, artist_id = self._get_artist(track.artists) + length = track.length.total_seconds() + return TrackInfo(title=title, track_id=track.beatport_id, + artist=artist, artist_id=artist_id, + length=length, index=track.track_number, + medium_index=track.track_number, + data_source=self.data_source, data_url=track.url, + bpm=track.bpm, initial_key=track.initial_key, + genre=track.genre) + + def _get_artist(self, artists): + """Returns an artist string (all artists) and an artist_id (the main + artist) for a list of Beatport release or track artists. + """ + return MetadataSourcePlugin.get_artist( + artists=artists, id_key=0, name_key=1 + ) + + def _get_tracks(self, query): + """Returns a list of TrackInfo objects for a Beatport query. + """ + bp_tracks = self.client.search(query, release_type='track') + tracks = [self._get_track_info(x) for x in bp_tracks] + return tracks diff --git a/lib/beetsplug/bench.py b/lib/beetsplug/bench.py new file mode 100644 index 00000000..6dffbdda --- /dev/null +++ b/lib/beetsplug/bench.py @@ -0,0 +1,107 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Some simple performance benchmarks for beets. +""" + + +from beets.plugins import BeetsPlugin +from beets import ui +from beets import vfs +from beets import library +from beets.util.functemplate import Template +from beets.autotag import match +from beets import plugins +from beets import importer +import cProfile +import timeit + + +def aunique_benchmark(lib, prof): + def _build_tree(): + vfs.libtree(lib) + + # Measure path generation performance with %aunique{} included. + lib.path_formats = [ + (library.PF_KEY_DEFAULT, + Template('$albumartist/$album%aunique{}/$track $title')), + ] + if prof: + cProfile.runctx('_build_tree()', {}, {'_build_tree': _build_tree}, + 'paths.withaunique.prof') + else: + interval = timeit.timeit(_build_tree, number=1) + print('With %aunique:', interval) + + # And with %aunique replaceed with a "cheap" no-op function. + lib.path_formats = [ + (library.PF_KEY_DEFAULT, + Template('$albumartist/$album%lower{}/$track $title')), + ] + if prof: + cProfile.runctx('_build_tree()', {}, {'_build_tree': _build_tree}, + 'paths.withoutaunique.prof') + else: + interval = timeit.timeit(_build_tree, number=1) + print('Without %aunique:', interval) + + +def match_benchmark(lib, prof, query=None, album_id=None): + # If no album ID is provided, we'll match against a suitably huge + # album. + if not album_id: + album_id = '9c5c043e-bc69-4edb-81a4-1aaf9c81e6dc' + + # Get an album from the library to use as the source for the match. + items = lib.albums(query).get().items() + + # Ensure fingerprinting is invoked (if enabled). + plugins.send('import_task_start', + task=importer.ImportTask(None, None, items), + session=importer.ImportSession(lib, None, None, None)) + + # Run the match. + def _run_match(): + match.tag_album(items, search_ids=[album_id]) + if prof: + cProfile.runctx('_run_match()', {}, {'_run_match': _run_match}, + 'match.prof') + else: + interval = timeit.timeit(_run_match, number=1) + print('match duration:', interval) + + +class BenchmarkPlugin(BeetsPlugin): + """A plugin for performing some simple performance benchmarks. + """ + def commands(self): + aunique_bench_cmd = ui.Subcommand('bench_aunique', + help='benchmark for %aunique{}') + aunique_bench_cmd.parser.add_option('-p', '--profile', + action='store_true', default=False, + help='performance profiling') + aunique_bench_cmd.func = lambda lib, opts, args: \ + aunique_benchmark(lib, opts.profile) + + match_bench_cmd = ui.Subcommand('bench_match', + help='benchmark for track matching') + match_bench_cmd.parser.add_option('-p', '--profile', + action='store_true', default=False, + help='performance profiling') + match_bench_cmd.parser.add_option('-i', '--id', default=None, + help='album ID to match against') + match_bench_cmd.func = lambda lib, opts, args: \ + match_benchmark(lib, opts.profile, ui.decargs(args), opts.id) + + return [aunique_bench_cmd, match_bench_cmd] diff --git a/lib/beetsplug/bpd/__init__.py b/lib/beetsplug/bpd/__init__.py new file mode 100644 index 00000000..07198b1b --- /dev/null +++ b/lib/beetsplug/bpd/__init__.py @@ -0,0 +1,1624 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""A clone of the Music Player Daemon (MPD) that plays music from a +Beets library. Attempts to implement a compatible protocol to allow +use of the wide range of MPD clients. +""" + + +import re +import sys +from string import Template +import traceback +import random +import time +import math +import inspect +import socket + +import beets +from beets.plugins import BeetsPlugin +import beets.ui +from beets import vfs +from beets.util import bluelet +from beets.library import Item +from beets import dbcore +from mediafile import MediaFile + +PROTOCOL_VERSION = '0.16.0' +BUFSIZE = 1024 + +HELLO = 'OK MPD %s' % PROTOCOL_VERSION +CLIST_BEGIN = 'command_list_begin' +CLIST_VERBOSE_BEGIN = 'command_list_ok_begin' +CLIST_END = 'command_list_end' +RESP_OK = 'OK' +RESP_CLIST_VERBOSE = 'list_OK' +RESP_ERR = 'ACK' + +NEWLINE = "\n" + +ERROR_NOT_LIST = 1 +ERROR_ARG = 2 +ERROR_PASSWORD = 3 +ERROR_PERMISSION = 4 +ERROR_UNKNOWN = 5 +ERROR_NO_EXIST = 50 +ERROR_PLAYLIST_MAX = 51 +ERROR_SYSTEM = 52 +ERROR_PLAYLIST_LOAD = 53 +ERROR_UPDATE_ALREADY = 54 +ERROR_PLAYER_SYNC = 55 +ERROR_EXIST = 56 + +VOLUME_MIN = 0 +VOLUME_MAX = 100 + +SAFE_COMMANDS = ( + # Commands that are available when unauthenticated. + 'close', 'commands', 'notcommands', 'password', 'ping', +) + +# List of subsystems/events used by the `idle` command. +SUBSYSTEMS = [ + 'update', 'player', 'mixer', 'options', 'playlist', 'database', + # Related to unsupported commands: + 'stored_playlist', 'output', 'subscription', 'sticker', 'message', + 'partition', +] + +ITEM_KEYS_WRITABLE = set(MediaFile.fields()).intersection(Item._fields.keys()) + + +# Gstreamer import error. +class NoGstreamerError(Exception): + pass + + +# Error-handling, exceptions, parameter parsing. + +class BPDError(Exception): + """An error that should be exposed to the client to the BPD + server. + """ + def __init__(self, code, message, cmd_name='', index=0): + self.code = code + self.message = message + self.cmd_name = cmd_name + self.index = index + + template = Template('$resp [$code@$index] {$cmd_name} $message') + + def response(self): + """Returns a string to be used as the response code for the + erring command. + """ + return self.template.substitute({ + 'resp': RESP_ERR, + 'code': self.code, + 'index': self.index, + 'cmd_name': self.cmd_name, + 'message': self.message, + }) + + +def make_bpd_error(s_code, s_message): + """Create a BPDError subclass for a static code and message. + """ + + class NewBPDError(BPDError): + code = s_code + message = s_message + cmd_name = '' + index = 0 + + def __init__(self): + pass + return NewBPDError + +ArgumentTypeError = make_bpd_error(ERROR_ARG, 'invalid type for argument') +ArgumentIndexError = make_bpd_error(ERROR_ARG, 'argument out of range') +ArgumentNotFoundError = make_bpd_error(ERROR_NO_EXIST, 'argument not found') + + +def cast_arg(t, val): + """Attempts to call t on val, raising a ArgumentTypeError + on ValueError. + + If 't' is the special string 'intbool', attempts to cast first + to an int and then to a bool (i.e., 1=True, 0=False). + """ + if t == 'intbool': + return cast_arg(bool, cast_arg(int, val)) + else: + try: + return t(val) + except ValueError: + raise ArgumentTypeError() + + +class BPDClose(Exception): + """Raised by a command invocation to indicate that the connection + should be closed. + """ + + +class BPDIdle(Exception): + """Raised by a command to indicate the client wants to enter the idle state + and should be notified when a relevant event happens. + """ + def __init__(self, subsystems): + super().__init__() + self.subsystems = set(subsystems) + + +# Generic server infrastructure, implementing the basic protocol. + + +class BaseServer: + """A MPD-compatible music player server. + + The functions with the `cmd_` prefix are invoked in response to + client commands. For instance, if the client says `status`, + `cmd_status` will be invoked. The arguments to the client's commands + are used as function arguments following the connection issuing the + command. The functions may send data on the connection. They may + also raise BPDError exceptions to report errors. + + This is a generic superclass and doesn't support many commands. + """ + + def __init__(self, host, port, password, ctrl_port, log, ctrl_host=None): + """Create a new server bound to address `host` and listening + on port `port`. If `password` is given, it is required to do + anything significant on the server. + A separate control socket is established listening to `ctrl_host` on + port `ctrl_port` which is used to forward notifications from the player + and can be sent debug commands (e.g. using netcat). + """ + self.host, self.port, self.password = host, port, password + self.ctrl_host, self.ctrl_port = ctrl_host or host, ctrl_port + self.ctrl_sock = None + self._log = log + + # Default server values. + self.random = False + self.repeat = False + self.consume = False + self.single = False + self.volume = VOLUME_MAX + self.crossfade = 0 + self.mixrampdb = 0.0 + self.mixrampdelay = float('nan') + self.replay_gain_mode = 'off' + self.playlist = [] + self.playlist_version = 0 + self.current_index = -1 + self.paused = False + self.error = None + + # Current connections + self.connections = set() + + # Object for random numbers generation + self.random_obj = random.Random() + + def connect(self, conn): + """A new client has connected. + """ + self.connections.add(conn) + + def disconnect(self, conn): + """Client has disconnected; clean up residual state. + """ + self.connections.remove(conn) + + def run(self): + """Block and start listening for connections from clients. An + interrupt (^C) closes the server. + """ + self.startup_time = time.time() + + def start(): + yield bluelet.spawn( + bluelet.server(self.ctrl_host, self.ctrl_port, + ControlConnection.handler(self))) + yield bluelet.server(self.host, self.port, + MPDConnection.handler(self)) + bluelet.run(start()) + + def dispatch_events(self): + """If any clients have idle events ready, send them. + """ + # We need a copy of `self.connections` here since clients might + # disconnect once we try and send to them, changing `self.connections`. + for conn in list(self.connections): + yield bluelet.spawn(conn.send_notifications()) + + def _ctrl_send(self, message): + """Send some data over the control socket. + If it's our first time, open the socket. The message should be a + string without a terminal newline. + """ + if not self.ctrl_sock: + self.ctrl_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.ctrl_sock.connect((self.ctrl_host, self.ctrl_port)) + self.ctrl_sock.sendall((message + '\n').encode('utf-8')) + + def _send_event(self, event): + """Notify subscribed connections of an event.""" + for conn in self.connections: + conn.notify(event) + + def _item_info(self, item): + """An abstract method that should response lines containing a + single song's metadata. + """ + raise NotImplementedError + + def _item_id(self, item): + """An abstract method returning the integer id for an item. + """ + raise NotImplementedError + + def _id_to_index(self, track_id): + """Searches the playlist for a song with the given id and + returns its index in the playlist. + """ + track_id = cast_arg(int, track_id) + for index, track in enumerate(self.playlist): + if self._item_id(track) == track_id: + return index + # Loop finished with no track found. + raise ArgumentNotFoundError() + + def _random_idx(self): + """Returns a random index different from the current one. + If there are no songs in the playlist it returns -1. + If there is only one song in the playlist it returns 0. + """ + if len(self.playlist) < 2: + return len(self.playlist) - 1 + new_index = self.random_obj.randint(0, len(self.playlist) - 1) + while new_index == self.current_index: + new_index = self.random_obj.randint(0, len(self.playlist) - 1) + return new_index + + def _succ_idx(self): + """Returns the index for the next song to play. + It also considers random, single and repeat flags. + No boundaries are checked. + """ + if self.repeat and self.single: + return self.current_index + if self.random: + return self._random_idx() + return self.current_index + 1 + + def _prev_idx(self): + """Returns the index for the previous song to play. + It also considers random and repeat flags. + No boundaries are checked. + """ + if self.repeat and self.single: + return self.current_index + if self.random: + return self._random_idx() + return self.current_index - 1 + + def cmd_ping(self, conn): + """Succeeds.""" + pass + + def cmd_idle(self, conn, *subsystems): + subsystems = subsystems or SUBSYSTEMS + for system in subsystems: + if system not in SUBSYSTEMS: + raise BPDError(ERROR_ARG, + f'Unrecognised idle event: {system}') + raise BPDIdle(subsystems) # put the connection into idle mode + + def cmd_kill(self, conn): + """Exits the server process.""" + sys.exit(0) + + def cmd_close(self, conn): + """Closes the connection.""" + raise BPDClose() + + def cmd_password(self, conn, password): + """Attempts password authentication.""" + if password == self.password: + conn.authenticated = True + else: + conn.authenticated = False + raise BPDError(ERROR_PASSWORD, 'incorrect password') + + def cmd_commands(self, conn): + """Lists the commands available to the user.""" + if self.password and not conn.authenticated: + # Not authenticated. Show limited list of commands. + for cmd in SAFE_COMMANDS: + yield 'command: ' + cmd + + else: + # Authenticated. Show all commands. + for func in dir(self): + if func.startswith('cmd_'): + yield 'command: ' + func[4:] + + def cmd_notcommands(self, conn): + """Lists all unavailable commands.""" + if self.password and not conn.authenticated: + # Not authenticated. Show privileged commands. + for func in dir(self): + if func.startswith('cmd_'): + cmd = func[4:] + if cmd not in SAFE_COMMANDS: + yield 'command: ' + cmd + + else: + # Authenticated. No commands are unavailable. + pass + + def cmd_status(self, conn): + """Returns some status information for use with an + implementation of cmd_status. + + Gives a list of response-lines for: volume, repeat, random, + playlist, playlistlength, and xfade. + """ + yield ( + 'repeat: ' + str(int(self.repeat)), + 'random: ' + str(int(self.random)), + 'consume: ' + str(int(self.consume)), + 'single: ' + str(int(self.single)), + 'playlist: ' + str(self.playlist_version), + 'playlistlength: ' + str(len(self.playlist)), + 'mixrampdb: ' + str(self.mixrampdb), + ) + + if self.volume > 0: + yield 'volume: ' + str(self.volume) + + if not math.isnan(self.mixrampdelay): + yield 'mixrampdelay: ' + str(self.mixrampdelay) + if self.crossfade > 0: + yield 'xfade: ' + str(self.crossfade) + + if self.current_index == -1: + state = 'stop' + elif self.paused: + state = 'pause' + else: + state = 'play' + yield 'state: ' + state + + if self.current_index != -1: # i.e., paused or playing + current_id = self._item_id(self.playlist[self.current_index]) + yield 'song: ' + str(self.current_index) + yield 'songid: ' + str(current_id) + if len(self.playlist) > self.current_index + 1: + # If there's a next song, report its index too. + next_id = self._item_id(self.playlist[self.current_index + 1]) + yield 'nextsong: ' + str(self.current_index + 1) + yield 'nextsongid: ' + str(next_id) + + if self.error: + yield 'error: ' + self.error + + def cmd_clearerror(self, conn): + """Removes the persistent error state of the server. This + error is set when a problem arises not in response to a + command (for instance, when playing a file). + """ + self.error = None + + def cmd_random(self, conn, state): + """Set or unset random (shuffle) mode.""" + self.random = cast_arg('intbool', state) + self._send_event('options') + + def cmd_repeat(self, conn, state): + """Set or unset repeat mode.""" + self.repeat = cast_arg('intbool', state) + self._send_event('options') + + def cmd_consume(self, conn, state): + """Set or unset consume mode.""" + self.consume = cast_arg('intbool', state) + self._send_event('options') + + def cmd_single(self, conn, state): + """Set or unset single mode.""" + # TODO support oneshot in addition to 0 and 1 [MPD 0.20] + self.single = cast_arg('intbool', state) + self._send_event('options') + + def cmd_setvol(self, conn, vol): + """Set the player's volume level (0-100).""" + vol = cast_arg(int, vol) + if vol < VOLUME_MIN or vol > VOLUME_MAX: + raise BPDError(ERROR_ARG, 'volume out of range') + self.volume = vol + self._send_event('mixer') + + def cmd_volume(self, conn, vol_delta): + """Deprecated command to change the volume by a relative amount.""" + vol_delta = cast_arg(int, vol_delta) + return self.cmd_setvol(conn, self.volume + vol_delta) + + def cmd_crossfade(self, conn, crossfade): + """Set the number of seconds of crossfading.""" + crossfade = cast_arg(int, crossfade) + if crossfade < 0: + raise BPDError(ERROR_ARG, 'crossfade time must be nonnegative') + self._log.warning('crossfade is not implemented in bpd') + self.crossfade = crossfade + self._send_event('options') + + def cmd_mixrampdb(self, conn, db): + """Set the mixramp normalised max volume in dB.""" + db = cast_arg(float, db) + if db > 0: + raise BPDError(ERROR_ARG, 'mixrampdb time must be negative') + self._log.warning('mixramp is not implemented in bpd') + self.mixrampdb = db + self._send_event('options') + + def cmd_mixrampdelay(self, conn, delay): + """Set the mixramp delay in seconds.""" + delay = cast_arg(float, delay) + if delay < 0: + raise BPDError(ERROR_ARG, 'mixrampdelay time must be nonnegative') + self._log.warning('mixramp is not implemented in bpd') + self.mixrampdelay = delay + self._send_event('options') + + def cmd_replay_gain_mode(self, conn, mode): + """Set the replay gain mode.""" + if mode not in ['off', 'track', 'album', 'auto']: + raise BPDError(ERROR_ARG, 'Unrecognised replay gain mode') + self._log.warning('replay gain is not implemented in bpd') + self.replay_gain_mode = mode + self._send_event('options') + + def cmd_replay_gain_status(self, conn): + """Get the replaygain mode.""" + yield 'replay_gain_mode: ' + str(self.replay_gain_mode) + + def cmd_clear(self, conn): + """Clear the playlist.""" + self.playlist = [] + self.playlist_version += 1 + self.cmd_stop(conn) + self._send_event('playlist') + + def cmd_delete(self, conn, index): + """Remove the song at index from the playlist.""" + index = cast_arg(int, index) + try: + del(self.playlist[index]) + except IndexError: + raise ArgumentIndexError() + self.playlist_version += 1 + + if self.current_index == index: # Deleted playing song. + self.cmd_stop(conn) + elif index < self.current_index: # Deleted before playing. + # Shift playing index down. + self.current_index -= 1 + self._send_event('playlist') + + def cmd_deleteid(self, conn, track_id): + self.cmd_delete(conn, self._id_to_index(track_id)) + + def cmd_move(self, conn, idx_from, idx_to): + """Move a track in the playlist.""" + idx_from = cast_arg(int, idx_from) + idx_to = cast_arg(int, idx_to) + try: + track = self.playlist.pop(idx_from) + self.playlist.insert(idx_to, track) + except IndexError: + raise ArgumentIndexError() + + # Update currently-playing song. + if idx_from == self.current_index: + self.current_index = idx_to + elif idx_from < self.current_index <= idx_to: + self.current_index -= 1 + elif idx_from > self.current_index >= idx_to: + self.current_index += 1 + + self.playlist_version += 1 + self._send_event('playlist') + + def cmd_moveid(self, conn, idx_from, idx_to): + idx_from = self._id_to_index(idx_from) + return self.cmd_move(conn, idx_from, idx_to) + + def cmd_swap(self, conn, i, j): + """Swaps two tracks in the playlist.""" + i = cast_arg(int, i) + j = cast_arg(int, j) + try: + track_i = self.playlist[i] + track_j = self.playlist[j] + except IndexError: + raise ArgumentIndexError() + + self.playlist[j] = track_i + self.playlist[i] = track_j + + # Update currently-playing song. + if self.current_index == i: + self.current_index = j + elif self.current_index == j: + self.current_index = i + + self.playlist_version += 1 + self._send_event('playlist') + + def cmd_swapid(self, conn, i_id, j_id): + i = self._id_to_index(i_id) + j = self._id_to_index(j_id) + return self.cmd_swap(conn, i, j) + + def cmd_urlhandlers(self, conn): + """Indicates supported URL schemes. None by default.""" + pass + + def cmd_playlistinfo(self, conn, index=None): + """Gives metadata information about the entire playlist or a + single track, given by its index. + """ + if index is None: + for track in self.playlist: + yield self._item_info(track) + else: + indices = self._parse_range(index, accept_single_number=True) + try: + tracks = [self.playlist[i] for i in indices] + except IndexError: + raise ArgumentIndexError() + for track in tracks: + yield self._item_info(track) + + def cmd_playlistid(self, conn, track_id=None): + if track_id is not None: + track_id = cast_arg(int, track_id) + track_id = self._id_to_index(track_id) + return self.cmd_playlistinfo(conn, track_id) + + def cmd_plchanges(self, conn, version): + """Sends playlist changes since the given version. + + This is a "fake" implementation that ignores the version and + just returns the entire playlist (rather like version=0). This + seems to satisfy many clients. + """ + return self.cmd_playlistinfo(conn) + + def cmd_plchangesposid(self, conn, version): + """Like plchanges, but only sends position and id. + + Also a dummy implementation. + """ + for idx, track in enumerate(self.playlist): + yield 'cpos: ' + str(idx) + yield 'Id: ' + str(track.id) + + def cmd_currentsong(self, conn): + """Sends information about the currently-playing song. + """ + if self.current_index != -1: # -1 means stopped. + track = self.playlist[self.current_index] + yield self._item_info(track) + + def cmd_next(self, conn): + """Advance to the next song in the playlist.""" + old_index = self.current_index + self.current_index = self._succ_idx() + if self.consume: + # TODO how does consume interact with single+repeat? + self.playlist.pop(old_index) + if self.current_index > old_index: + self.current_index -= 1 + self.playlist_version += 1 + self._send_event("playlist") + if self.current_index >= len(self.playlist): + # Fallen off the end. Move to stopped state or loop. + if self.repeat: + self.current_index = -1 + return self.cmd_play(conn) + return self.cmd_stop(conn) + elif self.single and not self.repeat: + return self.cmd_stop(conn) + else: + return self.cmd_play(conn) + + def cmd_previous(self, conn): + """Step back to the last song.""" + old_index = self.current_index + self.current_index = self._prev_idx() + if self.consume: + self.playlist.pop(old_index) + if self.current_index < 0: + if self.repeat: + self.current_index = len(self.playlist) - 1 + else: + self.current_index = 0 + return self.cmd_play(conn) + + def cmd_pause(self, conn, state=None): + """Set the pause state playback.""" + if state is None: + self.paused = not self.paused # Toggle. + else: + self.paused = cast_arg('intbool', state) + self._send_event('player') + + def cmd_play(self, conn, index=-1): + """Begin playback, possibly at a specified playlist index.""" + index = cast_arg(int, index) + + if index < -1 or index >= len(self.playlist): + raise ArgumentIndexError() + + if index == -1: # No index specified: start where we are. + if not self.playlist: # Empty playlist: stop immediately. + return self.cmd_stop(conn) + if self.current_index == -1: # No current song. + self.current_index = 0 # Start at the beginning. + # If we have a current song, just stay there. + + else: # Start with the specified index. + self.current_index = index + + self.paused = False + self._send_event('player') + + def cmd_playid(self, conn, track_id=0): + track_id = cast_arg(int, track_id) + if track_id == -1: + index = -1 + else: + index = self._id_to_index(track_id) + return self.cmd_play(conn, index) + + def cmd_stop(self, conn): + """Stop playback.""" + self.current_index = -1 + self.paused = False + self._send_event('player') + + def cmd_seek(self, conn, index, pos): + """Seek to a specified point in a specified song.""" + index = cast_arg(int, index) + if index < 0 or index >= len(self.playlist): + raise ArgumentIndexError() + self.current_index = index + self._send_event('player') + + def cmd_seekid(self, conn, track_id, pos): + index = self._id_to_index(track_id) + return self.cmd_seek(conn, index, pos) + + # Additions to the MPD protocol. + + def cmd_crash_TypeError(self, conn): # noqa: N802 + """Deliberately trigger a TypeError for testing purposes. + We want to test that the server properly responds with ERROR_SYSTEM + without crashing, and that this is not treated as ERROR_ARG (since it + is caused by a programming error, not a protocol error). + """ + 'a' + 2 + + +class Connection: + """A connection between a client and the server. + """ + def __init__(self, server, sock): + """Create a new connection for the accepted socket `client`. + """ + self.server = server + self.sock = sock + self.address = '{}:{}'.format(*sock.sock.getpeername()) + + def debug(self, message, kind=' '): + """Log a debug message about this connection. + """ + self.server._log.debug('{}[{}]: {}', kind, self.address, message) + + def run(self): + pass + + def send(self, lines): + """Send lines, which which is either a single string or an + iterable consisting of strings, to the client. A newline is + added after every string. Returns a Bluelet event that sends + the data. + """ + if isinstance(lines, str): + lines = [lines] + out = NEWLINE.join(lines) + NEWLINE + for l in out.split(NEWLINE)[:-1]: + self.debug(l, kind='>') + if isinstance(out, str): + out = out.encode('utf-8') + return self.sock.sendall(out) + + @classmethod + def handler(cls, server): + def _handle(sock): + """Creates a new `Connection` and runs it. + """ + return cls(server, sock).run() + return _handle + + +class MPDConnection(Connection): + """A connection that receives commands from an MPD-compatible client. + """ + def __init__(self, server, sock): + """Create a new connection for the accepted socket `client`. + """ + super().__init__(server, sock) + self.authenticated = False + self.notifications = set() + self.idle_subscriptions = set() + + def do_command(self, command): + """A coroutine that runs the given command and sends an + appropriate response.""" + try: + yield bluelet.call(command.run(self)) + except BPDError as e: + # Send the error. + yield self.send(e.response()) + else: + # Send success code. + yield self.send(RESP_OK) + + def disconnect(self): + """The connection has closed for any reason. + """ + self.server.disconnect(self) + self.debug('disconnected', kind='*') + + def notify(self, event): + """Queue up an event for sending to this client. + """ + self.notifications.add(event) + + def send_notifications(self, force_close_idle=False): + """Send the client any queued events now. + """ + pending = self.notifications.intersection(self.idle_subscriptions) + try: + for event in pending: + yield self.send(f'changed: {event}') + if pending or force_close_idle: + self.idle_subscriptions = set() + self.notifications = self.notifications.difference(pending) + yield self.send(RESP_OK) + except bluelet.SocketClosedError: + self.disconnect() # Client disappeared. + + def run(self): + """Send a greeting to the client and begin processing commands + as they arrive. + """ + self.debug('connected', kind='*') + self.server.connect(self) + yield self.send(HELLO) + + clist = None # Initially, no command list is being constructed. + while True: + line = yield self.sock.readline() + if not line: + self.disconnect() # Client disappeared. + break + line = line.strip() + if not line: + err = BPDError(ERROR_UNKNOWN, 'No command given') + yield self.send(err.response()) + self.disconnect() # Client sent a blank line. + break + line = line.decode('utf8') # MPD protocol uses UTF-8. + for l in line.split(NEWLINE): + self.debug(l, kind='<') + + if self.idle_subscriptions: + # The connection is in idle mode. + if line == 'noidle': + yield bluelet.call(self.send_notifications(True)) + else: + err = BPDError(ERROR_UNKNOWN, + f'Got command while idle: {line}') + yield self.send(err.response()) + break + continue + if line == 'noidle': + # When not in idle, this command sends no response. + continue + + if clist is not None: + # Command list already opened. + if line == CLIST_END: + yield bluelet.call(self.do_command(clist)) + clist = None # Clear the command list. + yield bluelet.call(self.server.dispatch_events()) + else: + clist.append(Command(line)) + + elif line == CLIST_BEGIN or line == CLIST_VERBOSE_BEGIN: + # Begin a command list. + clist = CommandList([], line == CLIST_VERBOSE_BEGIN) + + else: + # Ordinary command. + try: + yield bluelet.call(self.do_command(Command(line))) + except BPDClose: + # Command indicates that the conn should close. + self.sock.close() + self.disconnect() # Client explicitly closed. + return + except BPDIdle as e: + self.idle_subscriptions = e.subsystems + self.debug('awaiting: {}'.format(' '.join(e.subsystems)), + kind='z') + yield bluelet.call(self.server.dispatch_events()) + + +class ControlConnection(Connection): + """A connection used to control BPD for debugging and internal events. + """ + def __init__(self, server, sock): + """Create a new connection for the accepted socket `client`. + """ + super().__init__(server, sock) + + def debug(self, message, kind=' '): + self.server._log.debug('CTRL {}[{}]: {}', kind, self.address, message) + + def run(self): + """Listen for control commands and delegate to `ctrl_*` methods. + """ + self.debug('connected', kind='*') + while True: + line = yield self.sock.readline() + if not line: + break # Client disappeared. + line = line.strip() + if not line: + break # Client sent a blank line. + line = line.decode('utf8') # Protocol uses UTF-8. + for l in line.split(NEWLINE): + self.debug(l, kind='<') + command = Command(line) + try: + func = command.delegate('ctrl_', self) + yield bluelet.call(func(*command.args)) + except (AttributeError, TypeError) as e: + yield self.send('ERROR: {}'.format(e.args[0])) + except Exception: + yield self.send(['ERROR: server error', + traceback.format_exc().rstrip()]) + + def ctrl_play_finished(self): + """Callback from the player signalling a song finished playing. + """ + yield bluelet.call(self.server.dispatch_events()) + + def ctrl_profile(self): + """Memory profiling for debugging. + """ + from guppy import hpy + heap = hpy().heap() + yield self.send(heap) + + def ctrl_nickname(self, oldlabel, newlabel): + """Rename a client in the log messages. + """ + for c in self.server.connections: + if c.address == oldlabel: + c.address = newlabel + break + else: + yield self.send(f'ERROR: no such client: {oldlabel}') + + +class Command: + """A command issued by the client for processing by the server. + """ + + command_re = re.compile(r'^([^ \t]+)[ \t]*') + arg_re = re.compile(r'"((?:\\"|[^"])+)"|([^ \t"]+)') + + def __init__(self, s): + """Creates a new `Command` from the given string, `s`, parsing + the string for command name and arguments. + """ + command_match = self.command_re.match(s) + self.name = command_match.group(1) + + self.args = [] + arg_matches = self.arg_re.findall(s[command_match.end():]) + for match in arg_matches: + if match[0]: + # Quoted argument. + arg = match[0] + arg = arg.replace('\\"', '"').replace('\\\\', '\\') + else: + # Unquoted argument. + arg = match[1] + self.args.append(arg) + + def delegate(self, prefix, target, extra_args=0): + """Get the target method that corresponds to this command. + The `prefix` is prepended to the command name and then the resulting + name is used to search `target` for a method with a compatible number + of arguments. + """ + # Attempt to get correct command function. + func_name = prefix + self.name + if not hasattr(target, func_name): + raise AttributeError(f'unknown command "{self.name}"') + func = getattr(target, func_name) + + argspec = inspect.getfullargspec(func) + + # Check that `func` is able to handle the number of arguments sent + # by the client (so we can raise ERROR_ARG instead of ERROR_SYSTEM). + # Maximum accepted arguments: argspec includes "self". + max_args = len(argspec.args) - 1 - extra_args + # Minimum accepted arguments: some arguments might be optional. + min_args = max_args + if argspec.defaults: + min_args -= len(argspec.defaults) + wrong_num = (len(self.args) > max_args) or (len(self.args) < min_args) + # If the command accepts a variable number of arguments skip the check. + if wrong_num and not argspec.varargs: + raise TypeError('wrong number of arguments for "{}"' + .format(self.name), self.name) + + return func + + def run(self, conn): + """A coroutine that executes the command on the given + connection. + """ + try: + # `conn` is an extra argument to all cmd handlers. + func = self.delegate('cmd_', conn.server, extra_args=1) + except AttributeError as e: + raise BPDError(ERROR_UNKNOWN, e.args[0]) + except TypeError as e: + raise BPDError(ERROR_ARG, e.args[0], self.name) + + # Ensure we have permission for this command. + if conn.server.password and \ + not conn.authenticated and \ + self.name not in SAFE_COMMANDS: + raise BPDError(ERROR_PERMISSION, 'insufficient privileges') + + try: + args = [conn] + self.args + results = func(*args) + if results: + for data in results: + yield conn.send(data) + + except BPDError as e: + # An exposed error. Set the command name and then let + # the Connection handle it. + e.cmd_name = self.name + raise e + + except BPDClose: + # An indication that the connection should close. Send + # it on the Connection. + raise + + except BPDIdle: + raise + + except Exception: + # An "unintentional" error. Hide it from the client. + conn.server._log.error('{}', traceback.format_exc()) + raise BPDError(ERROR_SYSTEM, 'server error', self.name) + + +class CommandList(list): + """A list of commands issued by the client for processing by the + server. May be verbose, in which case the response is delimited, or + not. Should be a list of `Command` objects. + """ + + def __init__(self, sequence=None, verbose=False): + """Create a new `CommandList` from the given sequence of + `Command`s. If `verbose`, this is a verbose command list. + """ + if sequence: + for item in sequence: + self.append(item) + self.verbose = verbose + + def run(self, conn): + """Coroutine executing all the commands in this list. + """ + for i, command in enumerate(self): + try: + yield bluelet.call(command.run(conn)) + except BPDError as e: + # If the command failed, stop executing. + e.index = i # Give the error the correct index. + raise e + + # Otherwise, possibly send the output delimiter if we're in a + # verbose ("OK") command list. + if self.verbose: + yield conn.send(RESP_CLIST_VERBOSE) + + +# A subclass of the basic, protocol-handling server that actually plays +# music. + +class Server(BaseServer): + """An MPD-compatible server using GStreamer to play audio and beets + to store its library. + """ + + def __init__(self, library, host, port, password, ctrl_port, log): + try: + from beetsplug.bpd import gstplayer + except ImportError as e: + # This is a little hacky, but it's the best I know for now. + if e.args[0].endswith(' gst'): + raise NoGstreamerError() + else: + raise + log.info('Starting server...') + super().__init__(host, port, password, ctrl_port, log) + self.lib = library + self.player = gstplayer.GstPlayer(self.play_finished) + self.cmd_update(None) + log.info('Server ready and listening on {}:{}'.format( + host, port)) + log.debug('Listening for control signals on {}:{}'.format( + host, ctrl_port)) + + def run(self): + self.player.run() + super().run() + + def play_finished(self): + """A callback invoked every time our player finishes a track. + """ + self.cmd_next(None) + self._ctrl_send('play_finished') + + # Metadata helper functions. + + def _item_info(self, item): + info_lines = [ + 'file: ' + item.destination(fragment=True), + 'Time: ' + str(int(item.length)), + 'duration: ' + f'{item.length:.3f}', + 'Id: ' + str(item.id), + ] + + try: + pos = self._id_to_index(item.id) + info_lines.append('Pos: ' + str(pos)) + except ArgumentNotFoundError: + # Don't include position if not in playlist. + pass + + for tagtype, field in self.tagtype_map.items(): + info_lines.append('{}: {}'.format( + tagtype, str(getattr(item, field)))) + + return info_lines + + def _parse_range(self, items, accept_single_number=False): + """Convert a range of positions to a list of item info. + MPD specifies ranges as START:STOP (endpoint excluded) for some + commands. Sometimes a single number can be provided instead. + """ + try: + start, stop = str(items).split(':', 1) + except ValueError: + if accept_single_number: + return [cast_arg(int, items)] + raise BPDError(ERROR_ARG, 'bad range syntax') + start = cast_arg(int, start) + stop = cast_arg(int, stop) + return range(start, stop) + + def _item_id(self, item): + return item.id + + # Database updating. + + def cmd_update(self, conn, path='/'): + """Updates the catalog to reflect the current database state. + """ + # Path is ignored. Also, the real MPD does this asynchronously; + # this is done inline. + self._log.debug('Building directory tree...') + self.tree = vfs.libtree(self.lib) + self._log.debug('Finished building directory tree.') + self.updated_time = time.time() + self._send_event('update') + self._send_event('database') + + # Path (directory tree) browsing. + + def _resolve_path(self, path): + """Returns a VFS node or an item ID located at the path given. + If the path does not exist, raises a + """ + components = path.split('/') + node = self.tree + + for component in components: + if not component: + continue + + if isinstance(node, int): + # We're trying to descend into a file node. + raise ArgumentNotFoundError() + + if component in node.files: + node = node.files[component] + elif component in node.dirs: + node = node.dirs[component] + else: + raise ArgumentNotFoundError() + + return node + + def _path_join(self, p1, p2): + """Smashes together two BPD paths.""" + out = p1 + '/' + p2 + return out.replace('//', '/').replace('//', '/') + + def cmd_lsinfo(self, conn, path="/"): + """Sends info on all the items in the path.""" + node = self._resolve_path(path) + if isinstance(node, int): + # Trying to list a track. + raise BPDError(ERROR_ARG, 'this is not a directory') + else: + for name, itemid in iter(sorted(node.files.items())): + item = self.lib.get_item(itemid) + yield self._item_info(item) + for name, _ in iter(sorted(node.dirs.items())): + dirpath = self._path_join(path, name) + if dirpath.startswith("/"): + # Strip leading slash (libmpc rejects this). + dirpath = dirpath[1:] + yield 'directory: %s' % dirpath + + def _listall(self, basepath, node, info=False): + """Helper function for recursive listing. If info, show + tracks' complete info; otherwise, just show items' paths. + """ + if isinstance(node, int): + # List a single file. + if info: + item = self.lib.get_item(node) + yield self._item_info(item) + else: + yield 'file: ' + basepath + else: + # List a directory. Recurse into both directories and files. + for name, itemid in sorted(node.files.items()): + newpath = self._path_join(basepath, name) + # "yield from" + yield from self._listall(newpath, itemid, info) + for name, subdir in sorted(node.dirs.items()): + newpath = self._path_join(basepath, name) + yield 'directory: ' + newpath + yield from self._listall(newpath, subdir, info) + + def cmd_listall(self, conn, path="/"): + """Send the paths all items in the directory, recursively.""" + return self._listall(path, self._resolve_path(path), False) + + def cmd_listallinfo(self, conn, path="/"): + """Send info on all the items in the directory, recursively.""" + return self._listall(path, self._resolve_path(path), True) + + # Playlist manipulation. + + def _all_items(self, node): + """Generator yielding all items under a VFS node. + """ + if isinstance(node, int): + # Could be more efficient if we built up all the IDs and + # then issued a single SELECT. + yield self.lib.get_item(node) + else: + # Recurse into a directory. + for name, itemid in sorted(node.files.items()): + # "yield from" + yield from self._all_items(itemid) + for name, subdir in sorted(node.dirs.items()): + yield from self._all_items(subdir) + + def _add(self, path, send_id=False): + """Adds a track or directory to the playlist, specified by the + path. If `send_id`, write each item's id to the client. + """ + for item in self._all_items(self._resolve_path(path)): + self.playlist.append(item) + if send_id: + yield 'Id: ' + str(item.id) + self.playlist_version += 1 + self._send_event('playlist') + + def cmd_add(self, conn, path): + """Adds a track or directory to the playlist, specified by a + path. + """ + return self._add(path, False) + + def cmd_addid(self, conn, path): + """Same as `cmd_add` but sends an id back to the client.""" + return self._add(path, True) + + # Server info. + + def cmd_status(self, conn): + yield from super().cmd_status(conn) + if self.current_index > -1: + item = self.playlist[self.current_index] + + yield ( + 'bitrate: ' + str(item.bitrate / 1000), + 'audio: {}:{}:{}'.format( + str(item.samplerate), + str(item.bitdepth), + str(item.channels), + ), + ) + + (pos, total) = self.player.time() + yield ( + 'time: {}:{}'.format( + str(int(pos)), + str(int(total)), + ), + 'elapsed: ' + f'{pos:.3f}', + 'duration: ' + f'{total:.3f}', + ) + + # Also missing 'updating_db'. + + def cmd_stats(self, conn): + """Sends some statistics about the library.""" + with self.lib.transaction() as tx: + statement = 'SELECT COUNT(DISTINCT artist), ' \ + 'COUNT(DISTINCT album), ' \ + 'COUNT(id), ' \ + 'SUM(length) ' \ + 'FROM items' + artists, albums, songs, totaltime = tx.query(statement)[0] + + yield ( + 'artists: ' + str(artists), + 'albums: ' + str(albums), + 'songs: ' + str(songs), + 'uptime: ' + str(int(time.time() - self.startup_time)), + 'playtime: ' + '0', # Missing. + 'db_playtime: ' + str(int(totaltime)), + 'db_update: ' + str(int(self.updated_time)), + ) + + def cmd_decoders(self, conn): + """Send list of supported decoders and formats.""" + decoders = self.player.get_decoders() + for name, (mimes, exts) in decoders.items(): + yield f'plugin: {name}' + for ext in exts: + yield f'suffix: {ext}' + for mime in mimes: + yield f'mime_type: {mime}' + + # Searching. + + tagtype_map = { + 'Artist': 'artist', + 'ArtistSort': 'artist_sort', + 'Album': 'album', + 'Title': 'title', + 'Track': 'track', + 'AlbumArtist': 'albumartist', + 'AlbumArtistSort': 'albumartist_sort', + 'Label': 'label', + 'Genre': 'genre', + 'Date': 'year', + 'OriginalDate': 'original_year', + 'Composer': 'composer', + 'Disc': 'disc', + 'Comment': 'comments', + 'MUSICBRAINZ_TRACKID': 'mb_trackid', + 'MUSICBRAINZ_ALBUMID': 'mb_albumid', + 'MUSICBRAINZ_ARTISTID': 'mb_artistid', + 'MUSICBRAINZ_ALBUMARTISTID': 'mb_albumartistid', + 'MUSICBRAINZ_RELEASETRACKID': 'mb_releasetrackid', + } + + def cmd_tagtypes(self, conn): + """Returns a list of the metadata (tag) fields available for + searching. + """ + for tag in self.tagtype_map: + yield 'tagtype: ' + tag + + def _tagtype_lookup(self, tag): + """Uses `tagtype_map` to look up the beets column name for an + MPD tagtype (or throw an appropriate exception). Returns both + the canonical name of the MPD tagtype and the beets column + name. + """ + for test_tag, key in self.tagtype_map.items(): + # Match case-insensitively. + if test_tag.lower() == tag.lower(): + return test_tag, key + raise BPDError(ERROR_UNKNOWN, 'no such tagtype') + + def _metadata_query(self, query_type, any_query_type, kv): + """Helper function returns a query object that will find items + according to the library query type provided and the key-value + pairs specified. The any_query_type is used for queries of + type "any"; if None, then an error is thrown. + """ + if kv: # At least one key-value pair. + queries = [] + # Iterate pairwise over the arguments. + it = iter(kv) + for tag, value in zip(it, it): + if tag.lower() == 'any': + if any_query_type: + queries.append(any_query_type(value, + ITEM_KEYS_WRITABLE, + query_type)) + else: + raise BPDError(ERROR_UNKNOWN, 'no such tagtype') + else: + _, key = self._tagtype_lookup(tag) + queries.append(query_type(key, value)) + return dbcore.query.AndQuery(queries) + else: # No key-value pairs. + return dbcore.query.TrueQuery() + + def cmd_search(self, conn, *kv): + """Perform a substring match for items.""" + query = self._metadata_query(dbcore.query.SubstringQuery, + dbcore.query.AnyFieldQuery, + kv) + for item in self.lib.items(query): + yield self._item_info(item) + + def cmd_find(self, conn, *kv): + """Perform an exact match for items.""" + query = self._metadata_query(dbcore.query.MatchQuery, + None, + kv) + for item in self.lib.items(query): + yield self._item_info(item) + + def cmd_list(self, conn, show_tag, *kv): + """List distinct metadata values for show_tag, possibly + filtered by matching match_tag to match_term. + """ + show_tag_canon, show_key = self._tagtype_lookup(show_tag) + if len(kv) == 1: + if show_tag_canon == 'Album': + # If no tag was given, assume artist. This is because MPD + # supports a short version of this command for fetching the + # albums belonging to a particular artist, and some clients + # rely on this behaviour (e.g. MPDroid, M.A.L.P.). + kv = ('Artist', kv[0]) + else: + raise BPDError(ERROR_ARG, 'should be "Album" for 3 arguments') + elif len(kv) % 2 != 0: + raise BPDError(ERROR_ARG, 'Incorrect number of filter arguments') + query = self._metadata_query(dbcore.query.MatchQuery, None, kv) + + clause, subvals = query.clause() + statement = 'SELECT DISTINCT ' + show_key + \ + ' FROM items WHERE ' + clause + \ + ' ORDER BY ' + show_key + self._log.debug(statement) + with self.lib.transaction() as tx: + rows = tx.query(statement, subvals) + + for row in rows: + if not row[0]: + # Skip any empty values of the field. + continue + yield show_tag_canon + ': ' + str(row[0]) + + def cmd_count(self, conn, tag, value): + """Returns the number and total time of songs matching the + tag/value query. + """ + _, key = self._tagtype_lookup(tag) + songs = 0 + playtime = 0.0 + for item in self.lib.items(dbcore.query.MatchQuery(key, value)): + songs += 1 + playtime += item.length + yield 'songs: ' + str(songs) + yield 'playtime: ' + str(int(playtime)) + + # Persistent playlist manipulation. In MPD this is an optional feature so + # these dummy implementations match MPD's behaviour with the feature off. + + def cmd_listplaylist(self, conn, playlist): + raise BPDError(ERROR_NO_EXIST, 'No such playlist') + + def cmd_listplaylistinfo(self, conn, playlist): + raise BPDError(ERROR_NO_EXIST, 'No such playlist') + + def cmd_listplaylists(self, conn): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_load(self, conn, playlist): + raise BPDError(ERROR_NO_EXIST, 'Stored playlists are disabled') + + def cmd_playlistadd(self, conn, playlist, uri): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_playlistclear(self, conn, playlist): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_playlistdelete(self, conn, playlist, index): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_playlistmove(self, conn, playlist, from_index, to_index): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_rename(self, conn, playlist, new_name): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_rm(self, conn, playlist): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + def cmd_save(self, conn, playlist): + raise BPDError(ERROR_UNKNOWN, 'Stored playlists are disabled') + + # "Outputs." Just a dummy implementation because we don't control + # any outputs. + + def cmd_outputs(self, conn): + """List the available outputs.""" + yield ( + 'outputid: 0', + 'outputname: gstreamer', + 'outputenabled: 1', + ) + + def cmd_enableoutput(self, conn, output_id): + output_id = cast_arg(int, output_id) + if output_id != 0: + raise ArgumentIndexError() + + def cmd_disableoutput(self, conn, output_id): + output_id = cast_arg(int, output_id) + if output_id == 0: + raise BPDError(ERROR_ARG, 'cannot disable this output') + else: + raise ArgumentIndexError() + + # Playback control. The functions below hook into the + # half-implementations provided by the base class. Together, they're + # enough to implement all normal playback functionality. + + def cmd_play(self, conn, index=-1): + new_index = index != -1 and index != self.current_index + was_paused = self.paused + super().cmd_play(conn, index) + + if self.current_index > -1: # Not stopped. + if was_paused and not new_index: + # Just unpause. + self.player.play() + else: + self.player.play_file(self.playlist[self.current_index].path) + + def cmd_pause(self, conn, state=None): + super().cmd_pause(conn, state) + if self.paused: + self.player.pause() + elif self.player.playing: + self.player.play() + + def cmd_stop(self, conn): + super().cmd_stop(conn) + self.player.stop() + + def cmd_seek(self, conn, index, pos): + """Seeks to the specified position in the specified song.""" + index = cast_arg(int, index) + pos = cast_arg(float, pos) + super().cmd_seek(conn, index, pos) + self.player.seek(pos) + + # Volume control. + + def cmd_setvol(self, conn, vol): + vol = cast_arg(int, vol) + super().cmd_setvol(conn, vol) + self.player.volume = float(vol) / 100 + + +# Beets plugin hooks. + +class BPDPlugin(BeetsPlugin): + """Provides the "beet bpd" command for running a music player + server. + """ + def __init__(self): + super().__init__() + self.config.add({ + 'host': '', + 'port': 6600, + 'control_port': 6601, + 'password': '', + 'volume': VOLUME_MAX, + }) + self.config['password'].redact = True + + def start_bpd(self, lib, host, port, password, volume, ctrl_port): + """Starts a BPD server.""" + try: + server = Server(lib, host, port, password, ctrl_port, self._log) + server.cmd_setvol(None, volume) + server.run() + except NoGstreamerError: + self._log.error('Gstreamer Python bindings not found.') + self._log.error('Install "gstreamer1.0" and "python-gi"' + 'or similar package to use BPD.') + + def commands(self): + cmd = beets.ui.Subcommand( + 'bpd', help='run an MPD-compatible music player server' + ) + + def func(lib, opts, args): + host = self.config['host'].as_str() + host = args.pop(0) if args else host + port = args.pop(0) if args else self.config['port'].get(int) + if args: + ctrl_port = args.pop(0) + else: + ctrl_port = self.config['control_port'].get(int) + if args: + raise beets.ui.UserError('too many arguments') + password = self.config['password'].as_str() + volume = self.config['volume'].get(int) + self.start_bpd(lib, host, int(port), password, volume, + int(ctrl_port)) + + cmd.func = func + return [cmd] diff --git a/lib/beetsplug/bpd/gstplayer.py b/lib/beetsplug/bpd/gstplayer.py new file mode 100644 index 00000000..64954b1c --- /dev/null +++ b/lib/beetsplug/bpd/gstplayer.py @@ -0,0 +1,304 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""A wrapper for the GStreamer Python bindings that exposes a simple +music player. +""" + + +import sys +import time +import _thread +import os +import copy +import urllib +from beets import ui + +import gi +gi.require_version('Gst', '1.0') +from gi.repository import GLib, Gst # noqa: E402 + + +Gst.init(None) + + +class QueryError(Exception): + pass + + +class GstPlayer: + """A music player abstracting GStreamer's Playbin element. + + Create a player object, then call run() to start a thread with a + runloop. Then call play_file to play music. Use player.playing + to check whether music is currently playing. + + A basic play queue is also implemented (just a Python list, + player.queue, whose last element is next to play). To use it, + just call enqueue() and then play(). When a track finishes and + another is available on the queue, it is played automatically. + """ + + def __init__(self, finished_callback=None): + """Initialize a player. + + If a finished_callback is provided, it is called every time a + track started with play_file finishes. + + Once the player has been created, call run() to begin the main + runloop in a separate thread. + """ + + # Set up the Gstreamer player. From the pygst tutorial: + # https://pygstdocs.berlios.de/pygst-tutorial/playbin.html (gone) + # https://brettviren.github.io/pygst-tutorial-org/pygst-tutorial.html + #### + # Updated to GStreamer 1.0 with: + # https://wiki.ubuntu.com/Novacut/GStreamer1.0 + self.player = Gst.ElementFactory.make("playbin", "player") + + if self.player is None: + raise ui.UserError("Could not create playbin") + + fakesink = Gst.ElementFactory.make("fakesink", "fakesink") + + if fakesink is None: + raise ui.UserError("Could not create fakesink") + + self.player.set_property("video-sink", fakesink) + bus = self.player.get_bus() + bus.add_signal_watch() + bus.connect("message", self._handle_message) + + # Set up our own stuff. + self.playing = False + self.finished_callback = finished_callback + self.cached_time = None + self._volume = 1.0 + + def _get_state(self): + """Returns the current state flag of the playbin.""" + # gst's get_state function returns a 3-tuple; we just want the + # status flag in position 1. + return self.player.get_state(Gst.CLOCK_TIME_NONE)[1] + + def _handle_message(self, bus, message): + """Callback for status updates from GStreamer.""" + if message.type == Gst.MessageType.EOS: + # file finished playing + self.player.set_state(Gst.State.NULL) + self.playing = False + self.cached_time = None + if self.finished_callback: + self.finished_callback() + + elif message.type == Gst.MessageType.ERROR: + # error + self.player.set_state(Gst.State.NULL) + err, debug = message.parse_error() + print(f"Error: {err}") + self.playing = False + + def _set_volume(self, volume): + """Set the volume level to a value in the range [0, 1.5].""" + # And the volume for the playbin. + self._volume = volume + self.player.set_property("volume", volume) + + def _get_volume(self): + """Get the volume as a float in the range [0, 1.5].""" + return self._volume + + volume = property(_get_volume, _set_volume) + + def play_file(self, path): + """Immediately begin playing the audio file at the given + path. + """ + self.player.set_state(Gst.State.NULL) + if isinstance(path, str): + path = path.encode('utf-8') + uri = 'file://' + urllib.parse.quote(path) + self.player.set_property("uri", uri) + self.player.set_state(Gst.State.PLAYING) + self.playing = True + + def play(self): + """If paused, resume playback.""" + if self._get_state() == Gst.State.PAUSED: + self.player.set_state(Gst.State.PLAYING) + self.playing = True + + def pause(self): + """Pause playback.""" + self.player.set_state(Gst.State.PAUSED) + + def stop(self): + """Halt playback.""" + self.player.set_state(Gst.State.NULL) + self.playing = False + self.cached_time = None + + def run(self): + """Start a new thread for the player. + + Call this function before trying to play any music with + play_file() or play(). + """ + + # If we don't use the MainLoop, messages are never sent. + + def start(): + loop = GLib.MainLoop() + loop.run() + + _thread.start_new_thread(start, ()) + + def time(self): + """Returns a tuple containing (position, length) where both + values are integers in seconds. If no stream is available, + returns (0, 0). + """ + fmt = Gst.Format(Gst.Format.TIME) + try: + posq = self.player.query_position(fmt) + if not posq[0]: + raise QueryError("query_position failed") + pos = posq[1] / (10 ** 9) + + lengthq = self.player.query_duration(fmt) + if not lengthq[0]: + raise QueryError("query_duration failed") + length = lengthq[1] / (10 ** 9) + + self.cached_time = (pos, length) + return (pos, length) + + except QueryError: + # Stream not ready. For small gaps of time, for instance + # after seeking, the time values are unavailable. For this + # reason, we cache recent. + if self.playing and self.cached_time: + return self.cached_time + else: + return (0, 0) + + def seek(self, position): + """Seeks to position (in seconds).""" + cur_pos, cur_len = self.time() + if position > cur_len: + self.stop() + return + + fmt = Gst.Format(Gst.Format.TIME) + ns = position * 10 ** 9 # convert to nanoseconds + self.player.seek_simple(fmt, Gst.SeekFlags.FLUSH, ns) + + # save new cached time + self.cached_time = (position, cur_len) + + def block(self): + """Block until playing finishes.""" + while self.playing: + time.sleep(1) + + def get_decoders(self): + return get_decoders() + + +def get_decoders(): + """Get supported audio decoders from GStreamer. + Returns a dict mapping decoder element names to the associated media types + and file extensions. + """ + # We only care about audio decoder elements. + filt = (Gst.ELEMENT_FACTORY_TYPE_DEPAYLOADER | + Gst.ELEMENT_FACTORY_TYPE_DEMUXER | + Gst.ELEMENT_FACTORY_TYPE_PARSER | + Gst.ELEMENT_FACTORY_TYPE_DECODER | + Gst.ELEMENT_FACTORY_TYPE_MEDIA_AUDIO) + + decoders = {} + mime_types = set() + for f in Gst.ElementFactory.list_get_elements(filt, Gst.Rank.NONE): + for pad in f.get_static_pad_templates(): + if pad.direction == Gst.PadDirection.SINK: + caps = pad.static_caps.get() + mimes = set() + for i in range(caps.get_size()): + struct = caps.get_structure(i) + mime = struct.get_name() + if mime == 'unknown/unknown': + continue + mimes.add(mime) + mime_types.add(mime) + if mimes: + decoders[f.get_name()] = (mimes, set()) + + # Check all the TypeFindFactory plugin features form the registry. If they + # are associated with an audio media type that we found above, get the list + # of corresponding file extensions. + mime_extensions = {mime: set() for mime in mime_types} + for feat in Gst.Registry.get().get_feature_list(Gst.TypeFindFactory): + caps = feat.get_caps() + if caps: + for i in range(caps.get_size()): + struct = caps.get_structure(i) + mime = struct.get_name() + if mime in mime_types: + mime_extensions[mime].update(feat.get_extensions()) + + # Fill in the slot we left for file extensions. + for name, (mimes, exts) in decoders.items(): + for mime in mimes: + exts.update(mime_extensions[mime]) + + return decoders + + +def play_simple(paths): + """Play the files in paths in a straightforward way, without + using the player's callback function. + """ + p = GstPlayer() + p.run() + for path in paths: + p.play_file(path) + p.block() + + +def play_complicated(paths): + """Play the files in the path one after the other by using the + callback function to advance to the next song. + """ + my_paths = copy.copy(paths) + + def next_song(): + my_paths.pop(0) + p.play_file(my_paths[0]) + + p = GstPlayer(next_song) + p.run() + p.play_file(my_paths[0]) + while my_paths: + time.sleep(1) + + +if __name__ == '__main__': + # A very simple command-line player. Just give it names of audio + # files on the command line; these are all played in sequence. + paths = [os.path.abspath(os.path.expanduser(p)) + for p in sys.argv[1:]] + # play_simple(paths) + play_complicated(paths) diff --git a/lib/beetsplug/bpm.py b/lib/beetsplug/bpm.py new file mode 100644 index 00000000..5aa2d95a --- /dev/null +++ b/lib/beetsplug/bpm.py @@ -0,0 +1,87 @@ +# This file is part of beets. +# Copyright 2016, aroquen +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Determine BPM by pressing a key to the rhythm.""" + + +import time + +from beets import ui +from beets.plugins import BeetsPlugin + + +def bpm(max_strokes): + """Returns average BPM (possibly of a playing song) + listening to Enter keystrokes. + """ + t0 = None + dt = [] + for i in range(max_strokes): + # Press enter to the rhythm... + s = input() + if s == '': + t1 = time.time() + # Only start measuring at the second stroke + if t0: + dt.append(t1 - t0) + t0 = t1 + else: + break + + # Return average BPM + # bpm = (max_strokes-1) / sum(dt) * 60 + ave = sum([1.0 / dti * 60 for dti in dt]) / len(dt) + return ave + + +class BPMPlugin(BeetsPlugin): + + def __init__(self): + super().__init__() + self.config.add({ + 'max_strokes': 3, + 'overwrite': True, + }) + + def commands(self): + cmd = ui.Subcommand('bpm', + help='determine bpm of a song by pressing ' + 'a key to the rhythm') + cmd.func = self.command + return [cmd] + + def command(self, lib, opts, args): + items = lib.items(ui.decargs(args)) + write = ui.should_write() + self.get_bpm(items, write) + + def get_bpm(self, items, write=False): + overwrite = self.config['overwrite'].get(bool) + if len(items) > 1: + raise ValueError('Can only get bpm of one song at time') + + item = items[0] + if item['bpm']: + self._log.info('Found bpm {0}', item['bpm']) + if not overwrite: + return + + self._log.info('Press Enter {0} times to the rhythm or Ctrl-D ' + 'to exit', self.config['max_strokes'].get(int)) + new_bpm = bpm(self.config['max_strokes'].get(int)) + item['bpm'] = int(new_bpm) + if write: + item.try_write() + item.store() + self._log.info('Added new bpm {0}', item['bpm']) diff --git a/lib/beetsplug/bpsync.py b/lib/beetsplug/bpsync.py new file mode 100644 index 00000000..5b28d6d2 --- /dev/null +++ b/lib/beetsplug/bpsync.py @@ -0,0 +1,186 @@ +# This file is part of beets. +# Copyright 2019, Rahul Ahuja. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Update library's tags using Beatport. +""" + +from beets.plugins import BeetsPlugin, apply_item_changes +from beets import autotag, library, ui, util + +from .beatport import BeatportPlugin + + +class BPSyncPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + self.beatport_plugin = BeatportPlugin() + self.beatport_plugin.setup() + + def commands(self): + cmd = ui.Subcommand('bpsync', help='update metadata from Beatport') + cmd.parser.add_option( + '-p', + '--pretend', + action='store_true', + help='show all changes but do nothing', + ) + cmd.parser.add_option( + '-m', + '--move', + action='store_true', + dest='move', + help="move files in the library directory", + ) + cmd.parser.add_option( + '-M', + '--nomove', + action='store_false', + dest='move', + help="don't move files in library", + ) + cmd.parser.add_option( + '-W', + '--nowrite', + action='store_false', + default=None, + dest='write', + help="don't write updated metadata to files", + ) + cmd.parser.add_format_option() + cmd.func = self.func + return [cmd] + + def func(self, lib, opts, args): + """Command handler for the bpsync function. + """ + move = ui.should_move(opts.move) + pretend = opts.pretend + write = ui.should_write(opts.write) + query = ui.decargs(args) + + self.singletons(lib, query, move, pretend, write) + self.albums(lib, query, move, pretend, write) + + def singletons(self, lib, query, move, pretend, write): + """Retrieve and apply info from the autotagger for items matched by + query. + """ + for item in lib.items(query + ['singleton:true']): + if not item.mb_trackid: + self._log.info( + 'Skipping singleton with no mb_trackid: {}', item + ) + continue + + if not self.is_beatport_track(item): + self._log.info( + 'Skipping non-{} singleton: {}', + self.beatport_plugin.data_source, + item, + ) + continue + + # Apply. + trackinfo = self.beatport_plugin.track_for_id(item.mb_trackid) + with lib.transaction(): + autotag.apply_item_metadata(item, trackinfo) + apply_item_changes(lib, item, move, pretend, write) + + @staticmethod + def is_beatport_track(item): + return ( + item.get('data_source') == BeatportPlugin.data_source + and item.mb_trackid.isnumeric() + ) + + def get_album_tracks(self, album): + if not album.mb_albumid: + self._log.info('Skipping album with no mb_albumid: {}', album) + return False + if not album.mb_albumid.isnumeric(): + self._log.info( + 'Skipping album with invalid {} ID: {}', + self.beatport_plugin.data_source, + album, + ) + return False + items = list(album.items()) + if album.get('data_source') == self.beatport_plugin.data_source: + return items + if not all(self.is_beatport_track(item) for item in items): + self._log.info( + 'Skipping non-{} release: {}', + self.beatport_plugin.data_source, + album, + ) + return False + return items + + def albums(self, lib, query, move, pretend, write): + """Retrieve and apply info from the autotagger for albums matched by + query and their items. + """ + # Process matching albums. + for album in lib.albums(query): + # Do we have a valid Beatport album? + items = self.get_album_tracks(album) + if not items: + continue + + # Get the Beatport album information. + albuminfo = self.beatport_plugin.album_for_id(album.mb_albumid) + if not albuminfo: + self._log.info( + 'Release ID {} not found for album {}', + album.mb_albumid, + album, + ) + continue + + beatport_trackid_to_trackinfo = { + track.track_id: track for track in albuminfo.tracks + } + library_trackid_to_item = { + int(item.mb_trackid): item for item in items + } + item_to_trackinfo = { + item: beatport_trackid_to_trackinfo[track_id] + for track_id, item in library_trackid_to_item.items() + } + + self._log.info('applying changes to {}', album) + with lib.transaction(): + autotag.apply_metadata(albuminfo, item_to_trackinfo) + changed = False + # Find any changed item to apply Beatport changes to album. + any_changed_item = items[0] + for item in items: + item_changed = ui.show_model_changes(item) + changed |= item_changed + if item_changed: + any_changed_item = item + apply_item_changes(lib, item, move, pretend, write) + + if pretend or not changed: + continue + + # Update album structure to reflect an item in it. + for key in library.Album.item_keys: + album[key] = any_changed_item[key] + album.store() + + # Move album art (and any inconsistent items). + if move and lib.directory in util.ancestry(items[0].path): + self._log.debug('moving album {}', album) + album.move() diff --git a/lib/beetsplug/bucket.py b/lib/beetsplug/bucket.py new file mode 100644 index 00000000..9ed50b45 --- /dev/null +++ b/lib/beetsplug/bucket.py @@ -0,0 +1,242 @@ +# This file is part of beets. +# Copyright 2016, Fabrice Laporte. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Provides the %bucket{} function for path formatting. +""" + + +from datetime import datetime +import re +import string +from itertools import tee + +from beets import plugins, ui + + +ASCII_DIGITS = string.digits + string.ascii_lowercase + + +class BucketError(Exception): + pass + + +def pairwise(iterable): + "s -> (s0,s1), (s1,s2), (s2, s3), ..." + a, b = tee(iterable) + next(b, None) + return zip(a, b) + + +def span_from_str(span_str): + """Build a span dict from the span string representation. + """ + + def normalize_year(d, yearfrom): + """Convert string to a 4 digits year + """ + if yearfrom < 100: + raise BucketError("%d must be expressed on 4 digits" % yearfrom) + + # if two digits only, pick closest year that ends by these two + # digits starting from yearfrom + if d < 100: + if (d % 100) < (yearfrom % 100): + d = (yearfrom - yearfrom % 100) + 100 + d + else: + d = (yearfrom - yearfrom % 100) + d + return d + + years = [int(x) for x in re.findall(r'\d+', span_str)] + if not years: + raise ui.UserError("invalid range defined for year bucket '%s': no " + "year found" % span_str) + try: + years = [normalize_year(x, years[0]) for x in years] + except BucketError as exc: + raise ui.UserError("invalid range defined for year bucket '%s': %s" % + (span_str, exc)) + + res = {'from': years[0], 'str': span_str} + if len(years) > 1: + res['to'] = years[-1] + return res + + +def complete_year_spans(spans): + """Set the `to` value of spans if empty and sort them chronologically. + """ + spans.sort(key=lambda x: x['from']) + for (x, y) in pairwise(spans): + if 'to' not in x: + x['to'] = y['from'] - 1 + if spans and 'to' not in spans[-1]: + spans[-1]['to'] = datetime.now().year + + +def extend_year_spans(spans, spanlen, start=1900, end=2014): + """Add new spans to given spans list so that every year of [start,end] + belongs to a span. + """ + extended_spans = spans[:] + for (x, y) in pairwise(spans): + # if a gap between two spans, fill the gap with as much spans of + # spanlen length as necessary + for span_from in range(x['to'] + 1, y['from'], spanlen): + extended_spans.append({'from': span_from}) + # Create spans prior to declared ones + for span_from in range(spans[0]['from'] - spanlen, start, -spanlen): + extended_spans.append({'from': span_from}) + # Create spans after the declared ones + for span_from in range(spans[-1]['to'] + 1, end, spanlen): + extended_spans.append({'from': span_from}) + + complete_year_spans(extended_spans) + return extended_spans + + +def build_year_spans(year_spans_str): + """Build a chronologically ordered list of spans dict from unordered spans + stringlist. + """ + spans = [] + for elem in year_spans_str: + spans.append(span_from_str(elem)) + complete_year_spans(spans) + return spans + + +def str2fmt(s): + """Deduces formatting syntax from a span string. + """ + regex = re.compile(r"(?P\D*)(?P\d+)(?P\D*)" + r"(?P\d*)(?P\D*)") + m = re.match(regex, s) + + res = {'fromnchars': len(m.group('fromyear')), + 'tonchars': len(m.group('toyear'))} + res['fmt'] = "{}%s{}{}{}".format(m.group('bef'), + m.group('sep'), + '%s' if res['tonchars'] else '', + m.group('after')) + return res + + +def format_span(fmt, yearfrom, yearto, fromnchars, tonchars): + """Return a span string representation. + """ + args = (str(yearfrom)[-fromnchars:]) + if tonchars: + args = (str(yearfrom)[-fromnchars:], str(yearto)[-tonchars:]) + + return fmt % args + + +def extract_modes(spans): + """Extract the most common spans lengths and representation formats + """ + rangelen = sorted([x['to'] - x['from'] + 1 for x in spans]) + deflen = sorted(rangelen, key=rangelen.count)[-1] + reprs = [str2fmt(x['str']) for x in spans] + deffmt = sorted(reprs, key=reprs.count)[-1] + return deflen, deffmt + + +def build_alpha_spans(alpha_spans_str, alpha_regexs): + """Extract alphanumerics from string and return sorted list of chars + [from...to] + """ + spans = [] + + for elem in alpha_spans_str: + if elem in alpha_regexs: + spans.append(re.compile(alpha_regexs[elem])) + else: + bucket = sorted([x for x in elem.lower() if x.isalnum()]) + if bucket: + begin_index = ASCII_DIGITS.index(bucket[0]) + end_index = ASCII_DIGITS.index(bucket[-1]) + else: + raise ui.UserError("invalid range defined for alpha bucket " + "'%s': no alphanumeric character found" % + elem) + spans.append( + re.compile( + "^[" + ASCII_DIGITS[begin_index:end_index + 1] + + ASCII_DIGITS[begin_index:end_index + 1].upper() + "]" + ) + ) + return spans + + +class BucketPlugin(plugins.BeetsPlugin): + def __init__(self): + super().__init__() + self.template_funcs['bucket'] = self._tmpl_bucket + + self.config.add({ + 'bucket_year': [], + 'bucket_alpha': [], + 'bucket_alpha_regex': {}, + 'extrapolate': False + }) + self.setup() + + def setup(self): + """Setup plugin from config options + """ + self.year_spans = build_year_spans(self.config['bucket_year'].get()) + if self.year_spans and self.config['extrapolate']: + [self.ys_len_mode, + self.ys_repr_mode] = extract_modes(self.year_spans) + self.year_spans = extend_year_spans(self.year_spans, + self.ys_len_mode) + + self.alpha_spans = build_alpha_spans( + self.config['bucket_alpha'].get(), + self.config['bucket_alpha_regex'].get() + ) + + def find_bucket_year(self, year): + """Return bucket that matches given year or return the year + if no matching bucket. + """ + for ys in self.year_spans: + if ys['from'] <= int(year) <= ys['to']: + if 'str' in ys: + return ys['str'] + else: + return format_span(self.ys_repr_mode['fmt'], + ys['from'], ys['to'], + self.ys_repr_mode['fromnchars'], + self.ys_repr_mode['tonchars']) + return year + + def find_bucket_alpha(self, s): + """Return alpha-range bucket that matches given string or return the + string initial if no matching bucket. + """ + for (i, span) in enumerate(self.alpha_spans): + if span.match(s): + return self.config['bucket_alpha'].get()[i] + return s[0].upper() + + def _tmpl_bucket(self, text, field=None): + if not field and len(text) == 4 and text.isdigit(): + field = 'year' + + if field == 'year': + func = self.find_bucket_year + else: + func = self.find_bucket_alpha + return func(text) diff --git a/lib/beetsplug/chroma.py b/lib/beetsplug/chroma.py new file mode 100644 index 00000000..353923aa --- /dev/null +++ b/lib/beetsplug/chroma.py @@ -0,0 +1,345 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds Chromaprint/Acoustid acoustic fingerprinting support to the +autotagger. Requires the pyacoustid library. +""" + +from beets import plugins +from beets import ui +from beets import util +from beets import config +from beets.autotag import hooks +import confuse +import acoustid +from collections import defaultdict +from functools import partial +import re + +API_KEY = '1vOwZtEn' +SCORE_THRESH = 0.5 +TRACK_ID_WEIGHT = 10.0 +COMMON_REL_THRESH = 0.6 # How many tracks must have an album in common? +MAX_RECORDINGS = 5 +MAX_RELEASES = 5 + +# Stores the Acoustid match information for each track. This is +# populated when an import task begins and then used when searching for +# candidates. It maps audio file paths to (recording_ids, release_ids) +# pairs. If a given path is not present in the mapping, then no match +# was found. +_matches = {} + +# Stores the fingerprint and Acoustid ID for each track. This is stored +# as metadata for each track for later use but is not relevant for +# autotagging. +_fingerprints = {} +_acoustids = {} + + +def prefix(it, count): + """Truncate an iterable to at most `count` items. + """ + for i, v in enumerate(it): + if i >= count: + break + yield v + + +def releases_key(release, countries, original_year): + """Used as a key to sort releases by date then preferred country + """ + date = release.get('date') + if date and original_year: + year = date.get('year', 9999) + month = date.get('month', 99) + day = date.get('day', 99) + else: + year = 9999 + month = 99 + day = 99 + + # Uses index of preferred countries to sort + country_key = 99 + if release.get('country'): + for i, country in enumerate(countries): + if country.match(release['country']): + country_key = i + break + + return (year, month, day, country_key) + + +def acoustid_match(log, path): + """Gets metadata for a file from Acoustid and populates the + _matches, _fingerprints, and _acoustids dictionaries accordingly. + """ + try: + duration, fp = acoustid.fingerprint_file(util.syspath(path)) + except acoustid.FingerprintGenerationError as exc: + log.error('fingerprinting of {0} failed: {1}', + util.displayable_path(repr(path)), exc) + return None + fp = fp.decode() + _fingerprints[path] = fp + try: + res = acoustid.lookup(API_KEY, fp, duration, + meta='recordings releases') + except acoustid.AcoustidError as exc: + log.debug('fingerprint matching {0} failed: {1}', + util.displayable_path(repr(path)), exc) + return None + log.debug('chroma: fingerprinted {0}', + util.displayable_path(repr(path))) + + # Ensure the response is usable and parse it. + if res['status'] != 'ok' or not res.get('results'): + log.debug('no match found') + return None + result = res['results'][0] # Best match. + if result['score'] < SCORE_THRESH: + log.debug('no results above threshold') + return None + _acoustids[path] = result['id'] + + # Get recording and releases from the result + if not result.get('recordings'): + log.debug('no recordings found') + return None + recording_ids = [] + releases = [] + for recording in result['recordings']: + recording_ids.append(recording['id']) + if 'releases' in recording: + releases.extend(recording['releases']) + + # The releases list is essentially in random order from the Acoustid lookup + # so we optionally sort it using the match.preferred configuration options. + # 'original_year' to sort the earliest first and + # 'countries' to then sort preferred countries first. + country_patterns = config['match']['preferred']['countries'].as_str_seq() + countries = [re.compile(pat, re.I) for pat in country_patterns] + original_year = config['match']['preferred']['original_year'] + releases.sort(key=partial(releases_key, + countries=countries, + original_year=original_year)) + release_ids = [rel['id'] for rel in releases] + + log.debug('matched recordings {0} on releases {1}', + recording_ids, release_ids) + _matches[path] = recording_ids, release_ids + + +# Plugin structure and autotagging logic. + + +def _all_releases(items): + """Given an iterable of Items, determines (according to Acoustid) + which releases the items have in common. Generates release IDs. + """ + # Count the number of "hits" for each release. + relcounts = defaultdict(int) + for item in items: + if item.path not in _matches: + continue + + _, release_ids = _matches[item.path] + for release_id in release_ids: + relcounts[release_id] += 1 + + for release_id, count in relcounts.items(): + if float(count) / len(items) > COMMON_REL_THRESH: + yield release_id + + +class AcoustidPlugin(plugins.BeetsPlugin): + def __init__(self): + super().__init__() + + self.config.add({ + 'auto': True, + }) + config['acoustid']['apikey'].redact = True + + if self.config['auto']: + self.register_listener('import_task_start', self.fingerprint_task) + self.register_listener('import_task_apply', apply_acoustid_metadata) + + def fingerprint_task(self, task, session): + return fingerprint_task(self._log, task, session) + + def track_distance(self, item, info): + dist = hooks.Distance() + if item.path not in _matches or not info.track_id: + # Match failed or no track ID. + return dist + + recording_ids, _ = _matches[item.path] + dist.add_expr('track_id', info.track_id not in recording_ids) + return dist + + def candidates(self, items, artist, album, va_likely, extra_tags=None): + albums = [] + for relid in prefix(_all_releases(items), MAX_RELEASES): + album = hooks.album_for_mbid(relid) + if album: + albums.append(album) + + self._log.debug('acoustid album candidates: {0}', len(albums)) + return albums + + def item_candidates(self, item, artist, title): + if item.path not in _matches: + return [] + + recording_ids, _ = _matches[item.path] + tracks = [] + for recording_id in prefix(recording_ids, MAX_RECORDINGS): + track = hooks.track_for_mbid(recording_id) + if track: + tracks.append(track) + self._log.debug('acoustid item candidates: {0}', len(tracks)) + return tracks + + def commands(self): + submit_cmd = ui.Subcommand('submit', + help='submit Acoustid fingerprints') + + def submit_cmd_func(lib, opts, args): + try: + apikey = config['acoustid']['apikey'].as_str() + except confuse.NotFoundError: + raise ui.UserError('no Acoustid user API key provided') + submit_items(self._log, apikey, lib.items(ui.decargs(args))) + submit_cmd.func = submit_cmd_func + + fingerprint_cmd = ui.Subcommand( + 'fingerprint', + help='generate fingerprints for items without them' + ) + + def fingerprint_cmd_func(lib, opts, args): + for item in lib.items(ui.decargs(args)): + fingerprint_item(self._log, item, write=ui.should_write()) + fingerprint_cmd.func = fingerprint_cmd_func + + return [submit_cmd, fingerprint_cmd] + + +# Hooks into import process. + + +def fingerprint_task(log, task, session): + """Fingerprint each item in the task for later use during the + autotagging candidate search. + """ + items = task.items if task.is_album else [task.item] + for item in items: + acoustid_match(log, item.path) + + +def apply_acoustid_metadata(task, session): + """Apply Acoustid metadata (fingerprint and ID) to the task's items. + """ + for item in task.imported_items(): + if item.path in _fingerprints: + item.acoustid_fingerprint = _fingerprints[item.path] + if item.path in _acoustids: + item.acoustid_id = _acoustids[item.path] + + +# UI commands. + + +def submit_items(log, userkey, items, chunksize=64): + """Submit fingerprints for the items to the Acoustid server. + """ + data = [] # The running list of dictionaries to submit. + + def submit_chunk(): + """Submit the current accumulated fingerprint data.""" + log.info('submitting {0} fingerprints', len(data)) + try: + acoustid.submit(API_KEY, userkey, data) + except acoustid.AcoustidError as exc: + log.warning('acoustid submission error: {0}', exc) + del data[:] + + for item in items: + fp = fingerprint_item(log, item, write=ui.should_write()) + + # Construct a submission dictionary for this item. + item_data = { + 'duration': int(item.length), + 'fingerprint': fp, + } + if item.mb_trackid: + item_data['mbid'] = item.mb_trackid + log.debug('submitting MBID') + else: + item_data.update({ + 'track': item.title, + 'artist': item.artist, + 'album': item.album, + 'albumartist': item.albumartist, + 'year': item.year, + 'trackno': item.track, + 'discno': item.disc, + }) + log.debug('submitting textual metadata') + data.append(item_data) + + # If we have enough data, submit a chunk. + if len(data) >= chunksize: + submit_chunk() + + # Submit remaining data in a final chunk. + if data: + submit_chunk() + + +def fingerprint_item(log, item, write=False): + """Get the fingerprint for an Item. If the item already has a + fingerprint, it is not regenerated. If fingerprint generation fails, + return None. If the items are associated with a library, they are + saved to the database. If `write` is set, then the new fingerprints + are also written to files' metadata. + """ + # Get a fingerprint and length for this track. + if not item.length: + log.info('{0}: no duration available', + util.displayable_path(item.path)) + elif item.acoustid_fingerprint: + if write: + log.info('{0}: fingerprint exists, skipping', + util.displayable_path(item.path)) + else: + log.info('{0}: using existing fingerprint', + util.displayable_path(item.path)) + return item.acoustid_fingerprint + else: + log.info('{0}: fingerprinting', + util.displayable_path(item.path)) + try: + _, fp = acoustid.fingerprint_file(util.syspath(item.path)) + item.acoustid_fingerprint = fp.decode() + if write: + log.info('{0}: writing fingerprint', + util.displayable_path(item.path)) + item.try_write() + if item._db: + item.store() + return item.acoustid_fingerprint + except acoustid.FingerprintGenerationError as exc: + log.info('fingerprint generation failed: {0}', exc) diff --git a/lib/beetsplug/convert.py b/lib/beetsplug/convert.py new file mode 100644 index 00000000..6bc07c28 --- /dev/null +++ b/lib/beetsplug/convert.py @@ -0,0 +1,534 @@ +# This file is part of beets. +# Copyright 2016, Jakob Schnitzer. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Converts tracks or albums to external directory +""" +from beets.util import par_map, decode_commandline_path, arg_encoding + +import os +import threading +import subprocess +import tempfile +import shlex +from string import Template + +from beets import ui, util, plugins, config +from beets.plugins import BeetsPlugin +from confuse import ConfigTypeError +from beets import art +from beets.util.artresizer import ArtResizer +from beets.library import parse_query_string +from beets.library import Item + +_fs_lock = threading.Lock() +_temp_files = [] # Keep track of temporary transcoded files for deletion. + +# Some convenient alternate names for formats. +ALIASES = { + 'wma': 'windows media', + 'vorbis': 'ogg', +} + +LOSSLESS_FORMATS = ['ape', 'flac', 'alac', 'wav', 'aiff'] + + +def replace_ext(path, ext): + """Return the path with its extension replaced by `ext`. + + The new extension must not contain a leading dot. + """ + ext_dot = b'.' + ext + return os.path.splitext(path)[0] + ext_dot + + +def get_format(fmt=None): + """Return the command template and the extension from the config. + """ + if not fmt: + fmt = config['convert']['format'].as_str().lower() + fmt = ALIASES.get(fmt, fmt) + + try: + format_info = config['convert']['formats'][fmt].get(dict) + command = format_info['command'] + extension = format_info.get('extension', fmt) + except KeyError: + raise ui.UserError( + 'convert: format {} needs the "command" field' + .format(fmt) + ) + except ConfigTypeError: + command = config['convert']['formats'][fmt].get(str) + extension = fmt + + # Convenience and backwards-compatibility shortcuts. + keys = config['convert'].keys() + if 'command' in keys: + command = config['convert']['command'].as_str() + elif 'opts' in keys: + # Undocumented option for backwards compatibility with < 1.3.1. + command = 'ffmpeg -i $source -y {} $dest'.format( + config['convert']['opts'].as_str() + ) + if 'extension' in keys: + extension = config['convert']['extension'].as_str() + + return (command.encode('utf-8'), extension.encode('utf-8')) + + +def should_transcode(item, fmt): + """Determine whether the item should be transcoded as part of + conversion (i.e., its bitrate is high or it has the wrong format). + """ + no_convert_queries = config['convert']['no_convert'].as_str_seq() + if no_convert_queries: + for query_string in no_convert_queries: + query, _ = parse_query_string(query_string, Item) + if query.match(item): + return False + if config['convert']['never_convert_lossy_files'] and \ + not (item.format.lower() in LOSSLESS_FORMATS): + return False + maxbr = config['convert']['max_bitrate'].get(int) + return fmt.lower() != item.format.lower() or \ + item.bitrate >= 1000 * maxbr + + +class ConvertPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + self.config.add({ + 'dest': None, + 'pretend': False, + 'link': False, + 'hardlink': False, + 'threads': util.cpu_count(), + 'format': 'mp3', + 'id3v23': 'inherit', + 'formats': { + 'aac': { + 'command': 'ffmpeg -i $source -y -vn -acodec aac ' + '-aq 1 $dest', + 'extension': 'm4a', + }, + 'alac': { + 'command': 'ffmpeg -i $source -y -vn -acodec alac $dest', + 'extension': 'm4a', + }, + 'flac': 'ffmpeg -i $source -y -vn -acodec flac $dest', + 'mp3': 'ffmpeg -i $source -y -vn -aq 2 $dest', + 'opus': + 'ffmpeg -i $source -y -vn -acodec libopus -ab 96k $dest', + 'ogg': + 'ffmpeg -i $source -y -vn -acodec libvorbis -aq 3 $dest', + 'wma': + 'ffmpeg -i $source -y -vn -acodec wmav2 -vn $dest', + }, + 'max_bitrate': 500, + 'auto': False, + 'tmpdir': None, + 'quiet': False, + 'embed': True, + 'paths': {}, + 'no_convert': '', + 'never_convert_lossy_files': False, + 'copy_album_art': False, + 'album_art_maxwidth': 0, + 'delete_originals': False, + }) + self.early_import_stages = [self.auto_convert] + + self.register_listener('import_task_files', self._cleanup) + + def commands(self): + cmd = ui.Subcommand('convert', help='convert to external location') + cmd.parser.add_option('-p', '--pretend', action='store_true', + help='show actions but do nothing') + cmd.parser.add_option('-t', '--threads', action='store', type='int', + help='change the number of threads, \ + defaults to maximum available processors') + cmd.parser.add_option('-k', '--keep-new', action='store_true', + dest='keep_new', help='keep only the converted \ + and move the old files') + cmd.parser.add_option('-d', '--dest', action='store', + help='set the destination directory') + cmd.parser.add_option('-f', '--format', action='store', dest='format', + help='set the target format of the tracks') + cmd.parser.add_option('-y', '--yes', action='store_true', dest='yes', + help='do not ask for confirmation') + cmd.parser.add_option('-l', '--link', action='store_true', dest='link', + help='symlink files that do not \ + need transcoding.') + cmd.parser.add_option('-H', '--hardlink', action='store_true', + dest='hardlink', + help='hardlink files that do not \ + need transcoding. Overrides --link.') + cmd.parser.add_album_option() + cmd.func = self.convert_func + return [cmd] + + def auto_convert(self, config, task): + if self.config['auto']: + par_map(lambda item: self.convert_on_import(config.lib, item), + task.imported_items()) + + # Utilities converted from functions to methods on logging overhaul + + def encode(self, command, source, dest, pretend=False): + """Encode `source` to `dest` using command template `command`. + + Raises `subprocess.CalledProcessError` if the command exited with a + non-zero status code. + """ + # The paths and arguments must be bytes. + assert isinstance(command, bytes) + assert isinstance(source, bytes) + assert isinstance(dest, bytes) + + quiet = self.config['quiet'].get(bool) + + if not quiet and not pretend: + self._log.info('Encoding {0}', util.displayable_path(source)) + + command = command.decode(arg_encoding(), 'surrogateescape') + source = decode_commandline_path(source) + dest = decode_commandline_path(dest) + + # Substitute $source and $dest in the argument list. + args = shlex.split(command) + encode_cmd = [] + for i, arg in enumerate(args): + args[i] = Template(arg).safe_substitute({ + 'source': source, + 'dest': dest, + }) + encode_cmd.append(args[i].encode(util.arg_encoding())) + + if pretend: + self._log.info('{0}', ' '.join(ui.decargs(args))) + return + + try: + util.command_output(encode_cmd) + except subprocess.CalledProcessError as exc: + # Something went wrong (probably Ctrl+C), remove temporary files + self._log.info('Encoding {0} failed. Cleaning up...', + util.displayable_path(source)) + self._log.debug('Command {0} exited with status {1}: {2}', + args, + exc.returncode, + exc.output) + util.remove(dest) + util.prune_dirs(os.path.dirname(dest)) + raise + except OSError as exc: + raise ui.UserError( + "convert: couldn't invoke '{}': {}".format( + ' '.join(ui.decargs(args)), exc + ) + ) + + if not quiet and not pretend: + self._log.info('Finished encoding {0}', + util.displayable_path(source)) + + def convert_item(self, dest_dir, keep_new, path_formats, fmt, + pretend=False, link=False, hardlink=False): + """A pipeline thread that converts `Item` objects from a + library. + """ + command, ext = get_format(fmt) + item, original, converted = None, None, None + while True: + item = yield (item, original, converted) + dest = item.destination(basedir=dest_dir, + path_formats=path_formats) + + # When keeping the new file in the library, we first move the + # current (pristine) file to the destination. We'll then copy it + # back to its old path or transcode it to a new path. + if keep_new: + original = dest + converted = item.path + if should_transcode(item, fmt): + converted = replace_ext(converted, ext) + else: + original = item.path + if should_transcode(item, fmt): + dest = replace_ext(dest, ext) + converted = dest + + # Ensure that only one thread tries to create directories at a + # time. (The existence check is not atomic with the directory + # creation inside this function.) + if not pretend: + with _fs_lock: + util.mkdirall(dest) + + if os.path.exists(util.syspath(dest)): + self._log.info('Skipping {0} (target file exists)', + util.displayable_path(item.path)) + continue + + if keep_new: + if pretend: + self._log.info('mv {0} {1}', + util.displayable_path(item.path), + util.displayable_path(original)) + else: + self._log.info('Moving to {0}', + util.displayable_path(original)) + util.move(item.path, original) + + if should_transcode(item, fmt): + linked = False + try: + self.encode(command, original, converted, pretend) + except subprocess.CalledProcessError: + continue + else: + linked = link or hardlink + if pretend: + msg = 'ln' if hardlink else ('ln -s' if link else 'cp') + + self._log.info('{2} {0} {1}', + util.displayable_path(original), + util.displayable_path(converted), + msg) + else: + # No transcoding necessary. + msg = 'Hardlinking' if hardlink \ + else ('Linking' if link else 'Copying') + + self._log.info('{1} {0}', + util.displayable_path(item.path), + msg) + + if hardlink: + util.hardlink(original, converted) + elif link: + util.link(original, converted) + else: + util.copy(original, converted) + + if pretend: + continue + + id3v23 = self.config['id3v23'].as_choice([True, False, 'inherit']) + if id3v23 == 'inherit': + id3v23 = None + + # Write tags from the database to the converted file. + item.try_write(path=converted, id3v23=id3v23) + + if keep_new: + # If we're keeping the transcoded file, read it again (after + # writing) to get new bitrate, duration, etc. + item.path = converted + item.read() + item.store() # Store new path and audio data. + + if self.config['embed'] and not linked: + album = item._cached_album + if album and album.artpath: + self._log.debug('embedding album art from {}', + util.displayable_path(album.artpath)) + art.embed_item(self._log, item, album.artpath, + itempath=converted, id3v23=id3v23) + + if keep_new: + plugins.send('after_convert', item=item, + dest=dest, keepnew=True) + else: + plugins.send('after_convert', item=item, + dest=converted, keepnew=False) + + def copy_album_art(self, album, dest_dir, path_formats, pretend=False, + link=False, hardlink=False): + """Copies or converts the associated cover art of the album. Album must + have at least one track. + """ + if not album or not album.artpath: + return + + album_item = album.items().get() + # Album shouldn't be empty. + if not album_item: + return + + # Get the destination of the first item (track) of the album, we use + # this function to format the path accordingly to path_formats. + dest = album_item.destination(basedir=dest_dir, + path_formats=path_formats) + + # Remove item from the path. + dest = os.path.join(*util.components(dest)[:-1]) + + dest = album.art_destination(album.artpath, item_dir=dest) + if album.artpath == dest: + return + + if not pretend: + util.mkdirall(dest) + + if os.path.exists(util.syspath(dest)): + self._log.info('Skipping {0} (target file exists)', + util.displayable_path(album.artpath)) + return + + # Decide whether we need to resize the cover-art image. + resize = False + maxwidth = None + if self.config['album_art_maxwidth']: + maxwidth = self.config['album_art_maxwidth'].get(int) + size = ArtResizer.shared.get_size(album.artpath) + self._log.debug('image size: {}', size) + if size: + resize = size[0] > maxwidth + else: + self._log.warning('Could not get size of image (please see ' + 'documentation for dependencies).') + + # Either copy or resize (while copying) the image. + if resize: + self._log.info('Resizing cover art from {0} to {1}', + util.displayable_path(album.artpath), + util.displayable_path(dest)) + if not pretend: + ArtResizer.shared.resize(maxwidth, album.artpath, dest) + else: + if pretend: + msg = 'ln' if hardlink else ('ln -s' if link else 'cp') + + self._log.info('{2} {0} {1}', + util.displayable_path(album.artpath), + util.displayable_path(dest), + msg) + else: + msg = 'Hardlinking' if hardlink \ + else ('Linking' if link else 'Copying') + + self._log.info('{2} cover art from {0} to {1}', + util.displayable_path(album.artpath), + util.displayable_path(dest), + msg) + if hardlink: + util.hardlink(album.artpath, dest) + elif link: + util.link(album.artpath, dest) + else: + util.copy(album.artpath, dest) + + def convert_func(self, lib, opts, args): + dest = opts.dest or self.config['dest'].get() + if not dest: + raise ui.UserError('no convert destination set') + dest = util.bytestring_path(dest) + + threads = opts.threads or self.config['threads'].get(int) + + path_formats = ui.get_path_formats(self.config['paths'] or None) + + fmt = opts.format or self.config['format'].as_str().lower() + + if opts.pretend is not None: + pretend = opts.pretend + else: + pretend = self.config['pretend'].get(bool) + + if opts.hardlink is not None: + hardlink = opts.hardlink + link = False + elif opts.link is not None: + hardlink = False + link = opts.link + else: + hardlink = self.config['hardlink'].get(bool) + link = self.config['link'].get(bool) + + if opts.album: + albums = lib.albums(ui.decargs(args)) + items = [i for a in albums for i in a.items()] + if not pretend: + for a in albums: + ui.print_(format(a, '')) + else: + items = list(lib.items(ui.decargs(args))) + if not pretend: + for i in items: + ui.print_(format(i, '')) + + if not items: + self._log.error('Empty query result.') + return + if not (pretend or opts.yes or ui.input_yn("Convert? (Y/n)")): + return + + if opts.album and self.config['copy_album_art']: + for album in albums: + self.copy_album_art(album, dest, path_formats, pretend, + link, hardlink) + + convert = [self.convert_item(dest, + opts.keep_new, + path_formats, + fmt, + pretend, + link, + hardlink) + for _ in range(threads)] + pipe = util.pipeline.Pipeline([iter(items), convert]) + pipe.run_parallel() + + def convert_on_import(self, lib, item): + """Transcode a file automatically after it is imported into the + library. + """ + fmt = self.config['format'].as_str().lower() + if should_transcode(item, fmt): + command, ext = get_format() + + # Create a temporary file for the conversion. + tmpdir = self.config['tmpdir'].get() + if tmpdir: + tmpdir = util.py3_path(util.bytestring_path(tmpdir)) + fd, dest = tempfile.mkstemp(util.py3_path(b'.' + ext), dir=tmpdir) + os.close(fd) + dest = util.bytestring_path(dest) + _temp_files.append(dest) # Delete the transcode later. + + # Convert. + try: + self.encode(command, item.path, dest) + except subprocess.CalledProcessError: + return + + # Change the newly-imported database entry to point to the + # converted file. + source_path = item.path + item.path = dest + item.write() + item.read() # Load new audio information data. + item.store() + + if self.config['delete_originals']: + self._log.info('Removing original file {0}', source_path) + util.remove(source_path, False) + + def _cleanup(self, task, session): + for path in task.old_paths: + if path in _temp_files: + if os.path.isfile(path): + util.remove(path) + _temp_files.remove(path) diff --git a/lib/beetsplug/deezer.py b/lib/beetsplug/deezer.py new file mode 100644 index 00000000..5f158f93 --- /dev/null +++ b/lib/beetsplug/deezer.py @@ -0,0 +1,230 @@ +# This file is part of beets. +# Copyright 2019, Rahul Ahuja. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds Deezer release and track search support to the autotagger +""" + +import collections + +import unidecode +import requests + +from beets import ui +from beets.autotag import AlbumInfo, TrackInfo +from beets.plugins import MetadataSourcePlugin, BeetsPlugin + + +class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin): + data_source = 'Deezer' + + # Base URLs for the Deezer API + # Documentation: https://developers.deezer.com/api/ + search_url = 'https://api.deezer.com/search/' + album_url = 'https://api.deezer.com/album/' + track_url = 'https://api.deezer.com/track/' + + id_regex = { + 'pattern': r'(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)', + 'match_group': 4, + } + + def __init__(self): + super().__init__() + + def album_for_id(self, album_id): + """Fetch an album by its Deezer ID or URL and return an + AlbumInfo object or None if the album is not found. + + :param album_id: Deezer ID or URL for the album. + :type album_id: str + :return: AlbumInfo object for album. + :rtype: beets.autotag.hooks.AlbumInfo or None + """ + deezer_id = self._get_id('album', album_id) + if deezer_id is None: + return None + + album_data = requests.get(self.album_url + deezer_id).json() + artist, artist_id = self.get_artist(album_data['contributors']) + + release_date = album_data['release_date'] + date_parts = [int(part) for part in release_date.split('-')] + num_date_parts = len(date_parts) + + if num_date_parts == 3: + year, month, day = date_parts + elif num_date_parts == 2: + year, month = date_parts + day = None + elif num_date_parts == 1: + year = date_parts[0] + month = None + day = None + else: + raise ui.UserError( + "Invalid `release_date` returned " + "by {} API: '{}'".format(self.data_source, release_date) + ) + + tracks_data = requests.get( + self.album_url + deezer_id + '/tracks' + ).json()['data'] + if not tracks_data: + return None + tracks = [] + medium_totals = collections.defaultdict(int) + for i, track_data in enumerate(tracks_data, start=1): + track = self._get_track(track_data) + track.index = i + medium_totals[track.medium] += 1 + tracks.append(track) + for track in tracks: + track.medium_total = medium_totals[track.medium] + + return AlbumInfo( + album=album_data['title'], + album_id=deezer_id, + artist=artist, + artist_credit=self.get_artist([album_data['artist']])[0], + artist_id=artist_id, + tracks=tracks, + albumtype=album_data['record_type'], + va=len(album_data['contributors']) == 1 + and artist.lower() == 'various artists', + year=year, + month=month, + day=day, + label=album_data['label'], + mediums=max(medium_totals.keys()), + data_source=self.data_source, + data_url=album_data['link'], + ) + + def _get_track(self, track_data): + """Convert a Deezer track object dict to a TrackInfo object. + + :param track_data: Deezer Track object dict + :type track_data: dict + :return: TrackInfo object for track + :rtype: beets.autotag.hooks.TrackInfo + """ + artist, artist_id = self.get_artist( + track_data.get('contributors', [track_data['artist']]) + ) + return TrackInfo( + title=track_data['title'], + track_id=track_data['id'], + artist=artist, + artist_id=artist_id, + length=track_data['duration'], + index=track_data['track_position'], + medium=track_data['disk_number'], + medium_index=track_data['track_position'], + data_source=self.data_source, + data_url=track_data['link'], + ) + + def track_for_id(self, track_id=None, track_data=None): + """Fetch a track by its Deezer ID or URL and return a + TrackInfo object or None if the track is not found. + + :param track_id: (Optional) Deezer ID or URL for the track. Either + ``track_id`` or ``track_data`` must be provided. + :type track_id: str + :param track_data: (Optional) Simplified track object dict. May be + provided instead of ``track_id`` to avoid unnecessary API calls. + :type track_data: dict + :return: TrackInfo object for track + :rtype: beets.autotag.hooks.TrackInfo or None + """ + if track_data is None: + deezer_id = self._get_id('track', track_id) + if deezer_id is None: + return None + track_data = requests.get(self.track_url + deezer_id).json() + track = self._get_track(track_data) + + # Get album's tracks to set `track.index` (position on the entire + # release) and `track.medium_total` (total number of tracks on + # the track's disc). + album_tracks_data = requests.get( + self.album_url + str(track_data['album']['id']) + '/tracks' + ).json()['data'] + medium_total = 0 + for i, track_data in enumerate(album_tracks_data, start=1): + if track_data['disk_number'] == track.medium: + medium_total += 1 + if track_data['id'] == track.track_id: + track.index = i + track.medium_total = medium_total + return track + + @staticmethod + def _construct_search_query(filters=None, keywords=''): + """Construct a query string with the specified filters and keywords to + be provided to the Deezer Search API + (https://developers.deezer.com/api/search). + + :param filters: (Optional) Field filters to apply. + :type filters: dict + :param keywords: (Optional) Query keywords to use. + :type keywords: str + :return: Query string to be provided to the Search API. + :rtype: str + """ + query_components = [ + keywords, + ' '.join(f'{k}:"{v}"' for k, v in filters.items()), + ] + query = ' '.join([q for q in query_components if q]) + if not isinstance(query, str): + query = query.decode('utf8') + return unidecode.unidecode(query) + + def _search_api(self, query_type, filters=None, keywords=''): + """Query the Deezer Search API for the specified ``keywords``, applying + the provided ``filters``. + + :param query_type: The Deezer Search API method to use. Valid types + are: 'album', 'artist', 'history', 'playlist', 'podcast', + 'radio', 'track', 'user', and 'track'. + :type query_type: str + :param filters: (Optional) Field filters to apply. + :type filters: dict + :param keywords: (Optional) Query keywords to use. + :type keywords: str + :return: JSON data for the class:`Response ` object or None + if no search results are returned. + :rtype: dict or None + """ + query = self._construct_search_query( + keywords=keywords, filters=filters + ) + if not query: + return None + self._log.debug( + f"Searching {self.data_source} for '{query}'" + ) + response = requests.get( + self.search_url + query_type, params={'q': query} + ) + response.raise_for_status() + response_data = response.json().get('data', []) + self._log.debug( + "Found {} result(s) from {} for '{}'", + len(response_data), + self.data_source, + query, + ) + return response_data diff --git a/lib/beetsplug/discogs.py b/lib/beetsplug/discogs.py new file mode 100644 index 00000000..d015e420 --- /dev/null +++ b/lib/beetsplug/discogs.py @@ -0,0 +1,606 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds Discogs album search support to the autotagger. Requires the +python3-discogs-client library. +""" + +import beets.ui +from beets import config +from beets.autotag.hooks import AlbumInfo, TrackInfo +from beets.plugins import MetadataSourcePlugin, BeetsPlugin, get_distance +import confuse +from discogs_client import Release, Master, Client +from discogs_client.exceptions import DiscogsAPIError +from requests.exceptions import ConnectionError +import http.client +import beets +import re +import time +import json +import socket +import os +import traceback +from string import ascii_lowercase + + +USER_AGENT = f'beets/{beets.__version__} +https://beets.io/' +API_KEY = 'rAzVUQYRaoFjeBjyWuWZ' +API_SECRET = 'plxtUTqoCzwxZpqdPysCwGuBSmZNdZVy' + +# Exceptions that discogs_client should really handle but does not. +CONNECTION_ERRORS = (ConnectionError, socket.error, http.client.HTTPException, + ValueError, # JSON decoding raises a ValueError. + DiscogsAPIError) + + +class DiscogsPlugin(BeetsPlugin): + + def __init__(self): + super().__init__() + self.config.add({ + 'apikey': API_KEY, + 'apisecret': API_SECRET, + 'tokenfile': 'discogs_token.json', + 'source_weight': 0.5, + 'user_token': '', + 'separator': ', ', + 'index_tracks': False, + }) + self.config['apikey'].redact = True + self.config['apisecret'].redact = True + self.config['user_token'].redact = True + self.discogs_client = None + self.register_listener('import_begin', self.setup) + + def setup(self, session=None): + """Create the `discogs_client` field. Authenticate if necessary. + """ + c_key = self.config['apikey'].as_str() + c_secret = self.config['apisecret'].as_str() + + # Try using a configured user token (bypassing OAuth login). + user_token = self.config['user_token'].as_str() + if user_token: + # The rate limit for authenticated users goes up to 60 + # requests per minute. + self.discogs_client = Client(USER_AGENT, user_token=user_token) + return + + # Get the OAuth token from a file or log in. + try: + with open(self._tokenfile()) as f: + tokendata = json.load(f) + except OSError: + # No token yet. Generate one. + token, secret = self.authenticate(c_key, c_secret) + else: + token = tokendata['token'] + secret = tokendata['secret'] + + self.discogs_client = Client(USER_AGENT, c_key, c_secret, + token, secret) + + def reset_auth(self): + """Delete token file & redo the auth steps. + """ + os.remove(self._tokenfile()) + self.setup() + + def _tokenfile(self): + """Get the path to the JSON file for storing the OAuth token. + """ + return self.config['tokenfile'].get(confuse.Filename(in_app_dir=True)) + + def authenticate(self, c_key, c_secret): + # Get the link for the OAuth page. + auth_client = Client(USER_AGENT, c_key, c_secret) + try: + _, _, url = auth_client.get_authorize_url() + except CONNECTION_ERRORS as e: + self._log.debug('connection error: {0}', e) + raise beets.ui.UserError('communication with Discogs failed') + + beets.ui.print_("To authenticate with Discogs, visit:") + beets.ui.print_(url) + + # Ask for the code and validate it. + code = beets.ui.input_("Enter the code:") + try: + token, secret = auth_client.get_access_token(code) + except DiscogsAPIError: + raise beets.ui.UserError('Discogs authorization failed') + except CONNECTION_ERRORS as e: + self._log.debug('connection error: {0}', e) + raise beets.ui.UserError('Discogs token request failed') + + # Save the token for later use. + self._log.debug('Discogs token {0}, secret {1}', token, secret) + with open(self._tokenfile(), 'w') as f: + json.dump({'token': token, 'secret': secret}, f) + + return token, secret + + def album_distance(self, items, album_info, mapping): + """Returns the album distance. + """ + return get_distance( + data_source='Discogs', + info=album_info, + config=self.config + ) + + def track_distance(self, item, track_info): + """Returns the track distance. + """ + return get_distance( + data_source='Discogs', + info=track_info, + config=self.config + ) + + def candidates(self, items, artist, album, va_likely, extra_tags=None): + """Returns a list of AlbumInfo objects for discogs search results + matching an album and artist (if not various). + """ + if not self.discogs_client: + return + + if va_likely: + query = album + else: + query = f'{artist} {album}' + try: + return self.get_albums(query) + except DiscogsAPIError as e: + self._log.debug('API Error: {0} (query: {1})', e, query) + if e.status_code == 401: + self.reset_auth() + return self.candidates(items, artist, album, va_likely) + else: + return [] + except CONNECTION_ERRORS: + self._log.debug('Connection error in album search', exc_info=True) + return [] + + @staticmethod + def extract_release_id_regex(album_id): + """Returns the Discogs_id or None.""" + # Discogs-IDs are simple integers. In order to avoid confusion with + # other metadata plugins, we only look for very specific formats of the + # input string: + # - plain integer, optionally wrapped in brackets and prefixed by an + # 'r', as this is how discogs displays the release ID on its webpage. + # - legacy url format: discogs.com//release/ + # - current url format: discogs.com/release/- + # See #291, #4080 and #4085 for the discussions leading up to these + # patterns. + # Regex has been tested here https://regex101.com/r/wyLdB4/2 + + for pattern in [ + r'^\[?r?(?P\d+)\]?$', + r'discogs\.com/release/(?P\d+)-', + r'discogs\.com/[^/]+/release/(?P\d+)', + ]: + match = re.search(pattern, album_id) + if match: + return int(match.group('id')) + + return None + + def album_for_id(self, album_id): + """Fetches an album by its Discogs ID and returns an AlbumInfo object + or None if the album is not found. + """ + if not self.discogs_client: + return + + self._log.debug('Searching for release {0}', album_id) + + discogs_id = self.extract_release_id_regex(album_id) + + if not discogs_id: + return None + + result = Release(self.discogs_client, {'id': discogs_id}) + # Try to obtain title to verify that we indeed have a valid Release + try: + getattr(result, 'title') + except DiscogsAPIError as e: + if e.status_code != 404: + self._log.debug('API Error: {0} (query: {1})', e, + result.data['resource_url']) + if e.status_code == 401: + self.reset_auth() + return self.album_for_id(album_id) + return None + except CONNECTION_ERRORS: + self._log.debug('Connection error in album lookup', + exc_info=True) + return None + return self.get_album_info(result) + + def get_albums(self, query): + """Returns a list of AlbumInfo objects for a discogs search query. + """ + # Strip non-word characters from query. Things like "!" and "-" can + # cause a query to return no results, even if they match the artist or + # album title. Use `re.UNICODE` flag to avoid stripping non-english + # word characters. + query = re.sub(r'(?u)\W+', ' ', query) + # Strip medium information from query, Things like "CD1" and "disk 1" + # can also negate an otherwise positive result. + query = re.sub(r'(?i)\b(CD|disc)\s*\d+', '', query) + + try: + releases = self.discogs_client.search(query, + type='release').page(1) + + except CONNECTION_ERRORS: + self._log.debug("Communication error while searching for {0!r}", + query, exc_info=True) + return [] + return [album for album in map(self.get_album_info, releases[:5]) + if album] + + def get_master_year(self, master_id): + """Fetches a master release given its Discogs ID and returns its year + or None if the master release is not found. + """ + self._log.debug('Searching for master release {0}', master_id) + result = Master(self.discogs_client, {'id': master_id}) + + try: + year = result.fetch('year') + return year + except DiscogsAPIError as e: + if e.status_code != 404: + self._log.debug('API Error: {0} (query: {1})', e, + result.data['resource_url']) + if e.status_code == 401: + self.reset_auth() + return self.get_master_year(master_id) + return None + except CONNECTION_ERRORS: + self._log.debug('Connection error in master release lookup', + exc_info=True) + return None + + def get_album_info(self, result): + """Returns an AlbumInfo object for a discogs Release object. + """ + # Explicitly reload the `Release` fields, as they might not be yet + # present if the result is from a `discogs_client.search()`. + if not result.data.get('artists'): + result.refresh() + + # Sanity check for required fields. The list of required fields is + # defined at Guideline 1.3.1.a, but in practice some releases might be + # lacking some of these fields. This function expects at least: + # `artists` (>0), `title`, `id`, `tracklist` (>0) + # https://www.discogs.com/help/doc/submission-guidelines-general-rules + if not all([result.data.get(k) for k in ['artists', 'title', 'id', + 'tracklist']]): + self._log.warning("Release does not contain the required fields") + return None + + artist, artist_id = MetadataSourcePlugin.get_artist( + [a.data for a in result.artists] + ) + album = re.sub(r' +', ' ', result.title) + album_id = result.data['id'] + # Use `.data` to access the tracklist directly instead of the + # convenient `.tracklist` property, which will strip out useful artist + # information and leave us with skeleton `Artist` objects that will + # each make an API call just to get the same data back. + tracks = self.get_tracks(result.data['tracklist']) + + # Extract information for the optional AlbumInfo fields, if possible. + va = result.data['artists'][0].get('name', '').lower() == 'various' + year = result.data.get('year') + mediums = [t.medium for t in tracks] + country = result.data.get('country') + data_url = result.data.get('uri') + style = self.format(result.data.get('styles')) + genre = self.format(result.data.get('genres')) + discogs_albumid = self.extract_release_id(result.data.get('uri')) + + # Extract information for the optional AlbumInfo fields that are + # contained on nested discogs fields. + albumtype = media = label = catalogno = labelid = None + if result.data.get('formats'): + albumtype = ', '.join( + result.data['formats'][0].get('descriptions', [])) or None + media = result.data['formats'][0]['name'] + if result.data.get('labels'): + label = result.data['labels'][0].get('name') + catalogno = result.data['labels'][0].get('catno') + labelid = result.data['labels'][0].get('id') + + # Additional cleanups (various artists name, catalog number, media). + if va: + artist = config['va_name'].as_str() + if catalogno == 'none': + catalogno = None + # Explicitly set the `media` for the tracks, since it is expected by + # `autotag.apply_metadata`, and set `medium_total`. + for track in tracks: + track.media = media + track.medium_total = mediums.count(track.medium) + # Discogs does not have track IDs. Invent our own IDs as proposed + # in #2336. + track.track_id = str(album_id) + "-" + track.track_alt + + # Retrieve master release id (returns None if there isn't one). + master_id = result.data.get('master_id') + # Assume `original_year` is equal to `year` for releases without + # a master release, otherwise fetch the master release. + original_year = self.get_master_year(master_id) if master_id else year + + return AlbumInfo(album=album, album_id=album_id, artist=artist, + artist_id=artist_id, tracks=tracks, + albumtype=albumtype, va=va, year=year, + label=label, mediums=len(set(mediums)), + releasegroup_id=master_id, catalognum=catalogno, + country=country, style=style, genre=genre, + media=media, original_year=original_year, + data_source='Discogs', data_url=data_url, + discogs_albumid=discogs_albumid, + discogs_labelid=labelid, discogs_artistid=artist_id) + + def format(self, classification): + if classification: + return self.config['separator'].as_str() \ + .join(sorted(classification)) + else: + return None + + def extract_release_id(self, uri): + if uri: + return uri.split("/")[-1] + else: + return None + + def get_tracks(self, tracklist): + """Returns a list of TrackInfo objects for a discogs tracklist. + """ + try: + clean_tracklist = self.coalesce_tracks(tracklist) + except Exception as exc: + # FIXME: this is an extra precaution for making sure there are no + # side effects after #2222. It should be removed after further + # testing. + self._log.debug('{}', traceback.format_exc()) + self._log.error('uncaught exception in coalesce_tracks: {}', exc) + clean_tracklist = tracklist + tracks = [] + index_tracks = {} + index = 0 + # Distinct works and intra-work divisions, as defined by index tracks. + divisions, next_divisions = [], [] + for track in clean_tracklist: + # Only real tracks have `position`. Otherwise, it's an index track. + if track['position']: + index += 1 + if next_divisions: + # End of a block of index tracks: update the current + # divisions. + divisions += next_divisions + del next_divisions[:] + track_info = self.get_track_info(track, index, divisions) + track_info.track_alt = track['position'] + tracks.append(track_info) + else: + next_divisions.append(track['title']) + # We expect new levels of division at the beginning of the + # tracklist (and possibly elsewhere). + try: + divisions.pop() + except IndexError: + pass + index_tracks[index + 1] = track['title'] + + # Fix up medium and medium_index for each track. Discogs position is + # unreliable, but tracks are in order. + medium = None + medium_count, index_count, side_count = 0, 0, 0 + sides_per_medium = 1 + + # If a medium has two sides (ie. vinyl or cassette), each pair of + # consecutive sides should belong to the same medium. + if all([track.medium is not None for track in tracks]): + m = sorted({track.medium.lower() for track in tracks}) + # If all track.medium are single consecutive letters, assume it is + # a 2-sided medium. + if ''.join(m) in ascii_lowercase: + sides_per_medium = 2 + + for track in tracks: + # Handle special case where a different medium does not indicate a + # new disc, when there is no medium_index and the ordinal of medium + # is not sequential. For example, I, II, III, IV, V. Assume these + # are the track index, not the medium. + # side_count is the number of mediums or medium sides (in the case + # of two-sided mediums) that were seen before. + medium_is_index = track.medium and not track.medium_index and ( + len(track.medium) != 1 or + # Not within standard incremental medium values (A, B, C, ...). + ord(track.medium) - 64 != side_count + 1 + ) + + if not medium_is_index and medium != track.medium: + side_count += 1 + if sides_per_medium == 2: + if side_count % sides_per_medium: + # Two-sided medium changed. Reset index_count. + index_count = 0 + medium_count += 1 + else: + # Medium changed. Reset index_count. + medium_count += 1 + index_count = 0 + medium = track.medium + + index_count += 1 + medium_count = 1 if medium_count == 0 else medium_count + track.medium, track.medium_index = medium_count, index_count + + # Get `disctitle` from Discogs index tracks. Assume that an index track + # before the first track of each medium is a disc title. + for track in tracks: + if track.medium_index == 1: + if track.index in index_tracks: + disctitle = index_tracks[track.index] + else: + disctitle = None + track.disctitle = disctitle + + return tracks + + def coalesce_tracks(self, raw_tracklist): + """Pre-process a tracklist, merging subtracks into a single track. The + title for the merged track is the one from the previous index track, + if present; otherwise it is a combination of the subtracks titles. + """ + def add_merged_subtracks(tracklist, subtracks): + """Modify `tracklist` in place, merging a list of `subtracks` into + a single track into `tracklist`.""" + # Calculate position based on first subtrack, without subindex. + idx, medium_idx, sub_idx = \ + self.get_track_index(subtracks[0]['position']) + position = '{}{}'.format(idx or '', medium_idx or '') + + if tracklist and not tracklist[-1]['position']: + # Assume the previous index track contains the track title. + if sub_idx: + # "Convert" the track title to a real track, discarding the + # subtracks assuming they are logical divisions of a + # physical track (12.2.9 Subtracks). + tracklist[-1]['position'] = position + else: + # Promote the subtracks to real tracks, discarding the + # index track, assuming the subtracks are physical tracks. + index_track = tracklist.pop() + # Fix artists when they are specified on the index track. + if index_track.get('artists'): + for subtrack in subtracks: + if not subtrack.get('artists'): + subtrack['artists'] = index_track['artists'] + # Concatenate index with track title when index_tracks + # option is set + if self.config['index_tracks']: + for subtrack in subtracks: + subtrack['title'] = '{}: {}'.format( + index_track['title'], subtrack['title']) + tracklist.extend(subtracks) + else: + # Merge the subtracks, pick a title, and append the new track. + track = subtracks[0].copy() + track['title'] = ' / '.join([t['title'] for t in subtracks]) + tracklist.append(track) + + # Pre-process the tracklist, trying to identify subtracks. + subtracks = [] + tracklist = [] + prev_subindex = '' + for track in raw_tracklist: + # Regular subtrack (track with subindex). + if track['position']: + _, _, subindex = self.get_track_index(track['position']) + if subindex: + if subindex.rjust(len(raw_tracklist)) > prev_subindex: + # Subtrack still part of the current main track. + subtracks.append(track) + else: + # Subtrack part of a new group (..., 1.3, *2.1*, ...). + add_merged_subtracks(tracklist, subtracks) + subtracks = [track] + prev_subindex = subindex.rjust(len(raw_tracklist)) + continue + + # Index track with nested sub_tracks. + if not track['position'] and 'sub_tracks' in track: + # Append the index track, assuming it contains the track title. + tracklist.append(track) + add_merged_subtracks(tracklist, track['sub_tracks']) + continue + + # Regular track or index track without nested sub_tracks. + if subtracks: + add_merged_subtracks(tracklist, subtracks) + subtracks = [] + prev_subindex = '' + tracklist.append(track) + + # Merge and add the remaining subtracks, if any. + if subtracks: + add_merged_subtracks(tracklist, subtracks) + + return tracklist + + def get_track_info(self, track, index, divisions): + """Returns a TrackInfo object for a discogs track. + """ + title = track['title'] + if self.config['index_tracks']: + prefix = ', '.join(divisions) + if prefix: + title = f'{prefix}: {title}' + track_id = None + medium, medium_index, _ = self.get_track_index(track['position']) + artist, artist_id = MetadataSourcePlugin.get_artist( + track.get('artists', []) + ) + length = self.get_track_length(track['duration']) + return TrackInfo(title=title, track_id=track_id, artist=artist, + artist_id=artist_id, length=length, index=index, + medium=medium, medium_index=medium_index) + + def get_track_index(self, position): + """Returns the medium, medium index and subtrack index for a discogs + track position.""" + # Match the standard Discogs positions (12.2.9), which can have several + # forms (1, 1-1, A1, A1.1, A1a, ...). + match = re.match( + r'^(.*?)' # medium: everything before medium_index. + r'(\d*?)' # medium_index: a number at the end of + # `position`, except if followed by a subtrack + # index. + # subtrack_index: can only be matched if medium + # or medium_index have been matched, and can be + r'((?<=\w)\.[\w]+' # - a dot followed by a string (A.1, 2.A) + r'|(?<=\d)[A-Z]+' # - a string that follows a number (1A, B2a) + r')?' + r'$', + position.upper() + ) + + if match: + medium, index, subindex = match.groups() + + if subindex and subindex.startswith('.'): + subindex = subindex[1:] + else: + self._log.debug('Invalid position: {0}', position) + medium = index = subindex = None + return medium or None, index or None, subindex or None + + def get_track_length(self, duration): + """Returns the track length in seconds for a discogs duration. + """ + try: + length = time.strptime(duration, '%M:%S') + except ValueError: + return None + return length.tm_min * 60 + length.tm_sec diff --git a/lib/beetsplug/duplicates.py b/lib/beetsplug/duplicates.py new file mode 100644 index 00000000..fdd5c175 --- /dev/null +++ b/lib/beetsplug/duplicates.py @@ -0,0 +1,341 @@ +# This file is part of beets. +# Copyright 2016, Pedro Silva. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""List duplicate tracks or albums. +""" + +import shlex + +from beets.plugins import BeetsPlugin +from beets.ui import decargs, print_, Subcommand, UserError +from beets.util import command_output, displayable_path, subprocess, \ + bytestring_path, MoveOperation, decode_commandline_path +from beets.library import Item, Album + + +PLUGIN = 'duplicates' + + +class DuplicatesPlugin(BeetsPlugin): + """List duplicate tracks or albums + """ + def __init__(self): + super().__init__() + + self.config.add({ + 'album': False, + 'checksum': '', + 'copy': '', + 'count': False, + 'delete': False, + 'format': '', + 'full': False, + 'keys': [], + 'merge': False, + 'move': '', + 'path': False, + 'tiebreak': {}, + 'strict': False, + 'tag': '', + }) + + self._command = Subcommand('duplicates', + help=__doc__, + aliases=['dup']) + self._command.parser.add_option( + '-c', '--count', dest='count', + action='store_true', + help='show duplicate counts', + ) + self._command.parser.add_option( + '-C', '--checksum', dest='checksum', + action='store', metavar='PROG', + help='report duplicates based on arbitrary command', + ) + self._command.parser.add_option( + '-d', '--delete', dest='delete', + action='store_true', + help='delete items from library and disk', + ) + self._command.parser.add_option( + '-F', '--full', dest='full', + action='store_true', + help='show all versions of duplicate tracks or albums', + ) + self._command.parser.add_option( + '-s', '--strict', dest='strict', + action='store_true', + help='report duplicates only if all attributes are set', + ) + self._command.parser.add_option( + '-k', '--key', dest='keys', + action='append', metavar='KEY', + help='report duplicates based on keys (use multiple times)', + ) + self._command.parser.add_option( + '-M', '--merge', dest='merge', + action='store_true', + help='merge duplicate items', + ) + self._command.parser.add_option( + '-m', '--move', dest='move', + action='store', metavar='DEST', + help='move items to dest', + ) + self._command.parser.add_option( + '-o', '--copy', dest='copy', + action='store', metavar='DEST', + help='copy items to dest', + ) + self._command.parser.add_option( + '-t', '--tag', dest='tag', + action='store', + help='tag matched items with \'k=v\' attribute', + ) + self._command.parser.add_all_common_options() + + def commands(self): + + def _dup(lib, opts, args): + self.config.set_args(opts) + album = self.config['album'].get(bool) + checksum = self.config['checksum'].get(str) + copy = bytestring_path(self.config['copy'].as_str()) + count = self.config['count'].get(bool) + delete = self.config['delete'].get(bool) + fmt = self.config['format'].get(str) + full = self.config['full'].get(bool) + keys = self.config['keys'].as_str_seq() + merge = self.config['merge'].get(bool) + move = bytestring_path(self.config['move'].as_str()) + path = self.config['path'].get(bool) + tiebreak = self.config['tiebreak'].get(dict) + strict = self.config['strict'].get(bool) + tag = self.config['tag'].get(str) + + if album: + if not keys: + keys = ['mb_albumid'] + items = lib.albums(decargs(args)) + else: + if not keys: + keys = ['mb_trackid', 'mb_albumid'] + items = lib.items(decargs(args)) + + # If there's nothing to do, return early. The code below assumes + # `items` to be non-empty. + if not items: + return + + if path: + fmt = '$path' + + # Default format string for count mode. + if count and not fmt: + if album: + fmt = '$albumartist - $album' + else: + fmt = '$albumartist - $album - $title' + fmt += ': {0}' + + if checksum: + for i in items: + k, _ = self._checksum(i, checksum) + keys = [k] + + for obj_id, obj_count, objs in self._duplicates(items, + keys=keys, + full=full, + strict=strict, + tiebreak=tiebreak, + merge=merge): + if obj_id: # Skip empty IDs. + for o in objs: + self._process_item(o, + copy=copy, + move=move, + delete=delete, + tag=tag, + fmt=fmt.format(obj_count)) + + self._command.func = _dup + return [self._command] + + def _process_item(self, item, copy=False, move=False, delete=False, + tag=False, fmt=''): + """Process Item `item`. + """ + print_(format(item, fmt)) + if copy: + item.move(basedir=copy, operation=MoveOperation.COPY) + item.store() + if move: + item.move(basedir=move) + item.store() + if delete: + item.remove(delete=True) + if tag: + try: + k, v = tag.split('=') + except Exception: + raise UserError( + f"{PLUGIN}: can't parse k=v tag: {tag}" + ) + setattr(item, k, v) + item.store() + + def _checksum(self, item, prog): + """Run external `prog` on file path associated with `item`, cache + output as flexattr on a key that is the name of the program, and + return the key, checksum tuple. + """ + args = [p.format(file=decode_commandline_path(item.path)) + for p in shlex.split(prog)] + key = args[0] + checksum = getattr(item, key, False) + if not checksum: + self._log.debug('key {0} on item {1} not cached:' + 'computing checksum', + key, displayable_path(item.path)) + try: + checksum = command_output(args).stdout + setattr(item, key, checksum) + item.store() + self._log.debug('computed checksum for {0} using {1}', + item.title, key) + except subprocess.CalledProcessError as e: + self._log.debug('failed to checksum {0}: {1}', + displayable_path(item.path), e) + else: + self._log.debug('key {0} on item {1} cached:' + 'not computing checksum', + key, displayable_path(item.path)) + return key, checksum + + def _group_by(self, objs, keys, strict): + """Return a dictionary with keys arbitrary concatenations of attributes + and values lists of objects (Albums or Items) with those keys. + + If strict, all attributes must be defined for a duplicate match. + """ + import collections + counts = collections.defaultdict(list) + for obj in objs: + values = [getattr(obj, k, None) for k in keys] + values = [v for v in values if v not in (None, '')] + if strict and len(values) < len(keys): + self._log.debug('some keys {0} on item {1} are null or empty:' + ' skipping', + keys, displayable_path(obj.path)) + elif (not strict and not len(values)): + self._log.debug('all keys {0} on item {1} are null or empty:' + ' skipping', + keys, displayable_path(obj.path)) + else: + key = tuple(values) + counts[key].append(obj) + + return counts + + def _order(self, objs, tiebreak=None): + """Return the objects (Items or Albums) sorted by descending + order of priority. + + If provided, the `tiebreak` dict indicates the field to use to + prioritize the objects. Otherwise, Items are placed in order of + "completeness" (objects with more non-null fields come first) + and Albums are ordered by their track count. + """ + kind = 'items' if all(isinstance(o, Item) for o in objs) else 'albums' + + if tiebreak and kind in tiebreak.keys(): + key = lambda x: tuple(getattr(x, k) for k in tiebreak[kind]) + else: + if kind == 'items': + def truthy(v): + # Avoid a Unicode warning by avoiding comparison + # between a bytes object and the empty Unicode + # string ''. + return v is not None and \ + (v != '' if isinstance(v, str) else True) + fields = Item.all_keys() + key = lambda x: sum(1 for f in fields if truthy(getattr(x, f))) + else: + key = lambda x: len(x.items()) + + return sorted(objs, key=key, reverse=True) + + def _merge_items(self, objs): + """Merge Item objs by copying missing fields from items in the tail to + the head item. + + Return same number of items, with the head item modified. + """ + fields = Item.all_keys() + for f in fields: + for o in objs[1:]: + if getattr(objs[0], f, None) in (None, ''): + value = getattr(o, f, None) + if value: + self._log.debug('key {0} on item {1} is null ' + 'or empty: setting from item {2}', + f, displayable_path(objs[0].path), + displayable_path(o.path)) + setattr(objs[0], f, value) + objs[0].store() + break + return objs + + def _merge_albums(self, objs): + """Merge Album objs by copying missing items from albums in the tail + to the head album. + + Return same number of albums, with the head album modified.""" + ids = [i.mb_trackid for i in objs[0].items()] + for o in objs[1:]: + for i in o.items(): + if i.mb_trackid not in ids: + missing = Item.from_path(i.path) + missing.album_id = objs[0].id + missing.add(i._db) + self._log.debug('item {0} missing from album {1}:' + ' merging from {2} into {3}', + missing, + objs[0], + displayable_path(o.path), + displayable_path(missing.destination())) + missing.move(operation=MoveOperation.COPY) + return objs + + def _merge(self, objs): + """Merge duplicate items. See ``_merge_items`` and ``_merge_albums`` + for the relevant strategies. + """ + kind = Item if all(isinstance(o, Item) for o in objs) else Album + if kind is Item: + objs = self._merge_items(objs) + else: + objs = self._merge_albums(objs) + return objs + + def _duplicates(self, objs, keys, full, strict, tiebreak, merge): + """Generate triples of keys, duplicate counts, and constituent objects. + """ + offset = 0 if full else 1 + for k, objs in self._group_by(objs, keys, strict).items(): + if len(objs) > 1: + objs = self._order(objs, tiebreak) + if merge: + objs = self._merge(objs) + yield (k, len(objs) - offset, objs[offset:]) diff --git a/lib/beetsplug/edit.py b/lib/beetsplug/edit.py new file mode 100644 index 00000000..6f03fa4d --- /dev/null +++ b/lib/beetsplug/edit.py @@ -0,0 +1,402 @@ +# This file is part of beets. +# Copyright 2016 +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Open metadata information in a text editor to let the user edit it. +""" + +from beets import plugins +from beets import util +from beets import ui +from beets.dbcore import types +from beets.importer import action +from beets.ui.commands import _do_query, PromptChoice +import codecs +import subprocess +import yaml +from tempfile import NamedTemporaryFile +import os +import shlex + + +# These "safe" types can avoid the format/parse cycle that most fields go +# through: they are safe to edit with native YAML types. +SAFE_TYPES = (types.Float, types.Integer, types.Boolean) + + +class ParseError(Exception): + """The modified file is unreadable. The user should be offered a chance to + fix the error. + """ + + +def edit(filename, log): + """Open `filename` in a text editor. + """ + cmd = shlex.split(util.editor_command()) + cmd.append(filename) + log.debug('invoking editor command: {!r}', cmd) + try: + subprocess.call(cmd) + except OSError as exc: + raise ui.UserError('could not run editor command {!r}: {}'.format( + cmd[0], exc + )) + + +def dump(arg): + """Dump a sequence of dictionaries as YAML for editing. + """ + return yaml.safe_dump_all( + arg, + allow_unicode=True, + default_flow_style=False, + ) + + +def load(s): + """Read a sequence of YAML documents back to a list of dictionaries + with string keys. + + Can raise a `ParseError`. + """ + try: + out = [] + for d in yaml.safe_load_all(s): + if not isinstance(d, dict): + raise ParseError( + 'each entry must be a dictionary; found {}'.format( + type(d).__name__ + ) + ) + + # Convert all keys to strings. They started out as strings, + # but the user may have inadvertently messed this up. + out.append({str(k): v for k, v in d.items()}) + + except yaml.YAMLError as e: + raise ParseError(f'invalid YAML: {e}') + return out + + +def _safe_value(obj, key, value): + """Check whether the `value` is safe to represent in YAML and trust as + returned from parsed YAML. + + This ensures that values do not change their type when the user edits their + YAML representation. + """ + typ = obj._type(key) + return isinstance(typ, SAFE_TYPES) and isinstance(value, typ.model_type) + + +def flatten(obj, fields): + """Represent `obj`, a `dbcore.Model` object, as a dictionary for + serialization. Only include the given `fields` if provided; + otherwise, include everything. + + The resulting dictionary's keys are strings and the values are + safely YAML-serializable types. + """ + # Format each value. + d = {} + for key in obj.keys(): + value = obj[key] + if _safe_value(obj, key, value): + # A safe value that is faithfully representable in YAML. + d[key] = value + else: + # A value that should be edited as a string. + d[key] = obj.formatted()[key] + + # Possibly filter field names. + if fields: + return {k: v for k, v in d.items() if k in fields} + else: + return d + + +def apply_(obj, data): + """Set the fields of a `dbcore.Model` object according to a + dictionary. + + This is the opposite of `flatten`. The `data` dictionary should have + strings as values. + """ + for key, value in data.items(): + if _safe_value(obj, key, value): + # A safe value *stayed* represented as a safe type. Assign it + # directly. + obj[key] = value + else: + # Either the field was stringified originally or the user changed + # it from a safe type to an unsafe one. Parse it as a string. + obj.set_parse(key, str(value)) + + +class EditPlugin(plugins.BeetsPlugin): + + def __init__(self): + super().__init__() + + self.config.add({ + # The default fields to edit. + 'albumfields': 'album albumartist', + 'itemfields': 'track title artist album', + + # Silently ignore any changes to these fields. + 'ignore_fields': 'id path', + }) + + self.register_listener('before_choose_candidate', + self.before_choose_candidate_listener) + + def commands(self): + edit_command = ui.Subcommand( + 'edit', + help='interactively edit metadata' + ) + edit_command.parser.add_option( + '-f', '--field', + metavar='FIELD', + action='append', + help='edit this field also', + ) + edit_command.parser.add_option( + '--all', + action='store_true', dest='all', + help='edit all fields', + ) + edit_command.parser.add_album_option() + edit_command.func = self._edit_command + return [edit_command] + + def _edit_command(self, lib, opts, args): + """The CLI command function for the `beet edit` command. + """ + # Get the objects to edit. + query = ui.decargs(args) + items, albums = _do_query(lib, query, opts.album, False) + objs = albums if opts.album else items + if not objs: + ui.print_('Nothing to edit.') + return + + # Get the fields to edit. + if opts.all: + fields = None + else: + fields = self._get_fields(opts.album, opts.field) + self.edit(opts.album, objs, fields) + + def _get_fields(self, album, extra): + """Get the set of fields to edit. + """ + # Start with the configured base fields. + if album: + fields = self.config['albumfields'].as_str_seq() + else: + fields = self.config['itemfields'].as_str_seq() + + # Add the requested extra fields. + if extra: + fields += extra + + # Ensure we always have the `id` field for identification. + fields.append('id') + + return set(fields) + + def edit(self, album, objs, fields): + """The core editor function. + + - `album`: A flag indicating whether we're editing Items or Albums. + - `objs`: The `Item`s or `Album`s to edit. + - `fields`: The set of field names to edit (or None to edit + everything). + """ + # Present the YAML to the user and let her change it. + success = self.edit_objects(objs, fields) + + # Save the new data. + if success: + self.save_changes(objs) + + def edit_objects(self, objs, fields): + """Dump a set of Model objects to a file as text, ask the user + to edit it, and apply any changes to the objects. + + Return a boolean indicating whether the edit succeeded. + """ + # Get the content to edit as raw data structures. + old_data = [flatten(o, fields) for o in objs] + + # Set up a temporary file with the initial data for editing. + new = NamedTemporaryFile(mode='w', suffix='.yaml', delete=False, + encoding='utf-8') + old_str = dump(old_data) + new.write(old_str) + new.close() + + # Loop until we have parseable data and the user confirms. + try: + while True: + # Ask the user to edit the data. + edit(new.name, self._log) + + # Read the data back after editing and check whether anything + # changed. + with codecs.open(new.name, encoding='utf-8') as f: + new_str = f.read() + if new_str == old_str: + ui.print_("No changes; aborting.") + return False + + # Parse the updated data. + try: + new_data = load(new_str) + except ParseError as e: + ui.print_(f"Could not read data: {e}") + if ui.input_yn("Edit again to fix? (Y/n)", True): + continue + else: + return False + + # Show the changes. + # If the objects are not on the DB yet, we need a copy of their + # original state for show_model_changes. + objs_old = [obj.copy() if obj.id < 0 else None + for obj in objs] + self.apply_data(objs, old_data, new_data) + changed = False + for obj, obj_old in zip(objs, objs_old): + changed |= ui.show_model_changes(obj, obj_old) + if not changed: + ui.print_('No changes to apply.') + return False + + # Confirm the changes. + choice = ui.input_options( + ('continue Editing', 'apply', 'cancel') + ) + if choice == 'a': # Apply. + return True + elif choice == 'c': # Cancel. + return False + elif choice == 'e': # Keep editing. + # Reset the temporary changes to the objects. I we have a + # copy from above, use that, else reload from the database. + objs = [(old_obj or obj) + for old_obj, obj in zip(objs_old, objs)] + for obj in objs: + if not obj.id < 0: + obj.load() + continue + + # Remove the temporary file before returning. + finally: + os.remove(new.name) + + def apply_data(self, objs, old_data, new_data): + """Take potentially-updated data and apply it to a set of Model + objects. + + The objects are not written back to the database, so the changes + are temporary. + """ + if len(old_data) != len(new_data): + self._log.warning('number of objects changed from {} to {}', + len(old_data), len(new_data)) + + obj_by_id = {o.id: o for o in objs} + ignore_fields = self.config['ignore_fields'].as_str_seq() + for old_dict, new_dict in zip(old_data, new_data): + # Prohibit any changes to forbidden fields to avoid + # clobbering `id` and such by mistake. + forbidden = False + for key in ignore_fields: + if old_dict.get(key) != new_dict.get(key): + self._log.warning('ignoring object whose {} changed', key) + forbidden = True + break + if forbidden: + continue + + id_ = int(old_dict['id']) + apply_(obj_by_id[id_], new_dict) + + def save_changes(self, objs): + """Save a list of updated Model objects to the database. + """ + # Save to the database and possibly write tags. + for ob in objs: + if ob._dirty: + self._log.debug('saving changes to {}', ob) + ob.try_sync(ui.should_write(), ui.should_move()) + + # Methods for interactive importer execution. + + def before_choose_candidate_listener(self, session, task): + """Append an "Edit" choice and an "edit Candidates" choice (if + there are candidates) to the interactive importer prompt. + """ + choices = [PromptChoice('d', 'eDit', self.importer_edit)] + if task.candidates: + choices.append(PromptChoice('c', 'edit Candidates', + self.importer_edit_candidate)) + + return choices + + def importer_edit(self, session, task): + """Callback for invoking the functionality during an interactive + import session on the *original* item tags. + """ + # Assign negative temporary ids to Items that are not in the database + # yet. By using negative values, no clash with items in the database + # can occur. + for i, obj in enumerate(task.items, start=1): + # The importer may set the id to None when re-importing albums. + if not obj._db or obj.id is None: + obj.id = -i + + # Present the YAML to the user and let her change it. + fields = self._get_fields(album=False, extra=[]) + success = self.edit_objects(task.items, fields) + + # Remove temporary ids. + for obj in task.items: + if obj.id < 0: + obj.id = None + + # Save the new data. + if success: + # Return action.RETAG, which makes the importer write the tags + # to the files if needed without re-applying metadata. + return action.RETAG + else: + # Edit cancelled / no edits made. Revert changes. + for obj in task.items: + obj.read() + + def importer_edit_candidate(self, session, task): + """Callback for invoking the functionality during an interactive + import session on a *candidate*. The candidate's metadata is + applied to the original items. + """ + # Prompt the user for a candidate. + sel = ui.input_options([], numrange=(1, len(task.candidates))) + # Force applying the candidate on the items. + task.match = task.candidates[sel - 1] + task.apply_metadata() + + return self.importer_edit(session, task) diff --git a/lib/beetsplug/embedart.py b/lib/beetsplug/embedart.py index 49ed4792..6db46f8c 100644 --- a/lib/beetsplug/embedart.py +++ b/lib/beetsplug/embedart.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -13,46 +13,65 @@ # included in all copies or substantial portions of the Software. """Allows beets to embed album art into file metadata.""" + import os.path -import logging -import imghdr -import subprocess -import platform -from tempfile import NamedTemporaryFile from beets.plugins import BeetsPlugin -from beets import mediafile from beets import ui -from beets.ui import decargs -from beets.util import syspath, normpath, displayable_path +from beets.ui import print_, decargs +from beets.util import syspath, normpath, displayable_path, bytestring_path from beets.util.artresizer import ArtResizer from beets import config +from beets import art -log = logging.getLogger('beets') +def _confirm(objs, album): + """Show the list of affected objects (items or albums) and confirm + that the user wants to modify their artwork. + + `album` is a Boolean indicating whether these are albums (as opposed + to items). + """ + noun = 'album' if album else 'file' + prompt = 'Modify artwork for {} {}{} (Y/n)?'.format( + len(objs), + noun, + 's' if len(objs) > 1 else '' + ) + + # Show all the items or albums. + for obj in objs: + print_(format(obj)) + + # Confirm with user. + return ui.input_yn(prompt) class EmbedCoverArtPlugin(BeetsPlugin): """Allows albumart to be embedded into the actual files. """ def __init__(self): - super(EmbedCoverArtPlugin, self).__init__() + super().__init__() self.config.add({ 'maxwidth': 0, 'auto': True, 'compare_threshold': 0, 'ifempty': False, + 'remove_art_file': False, + 'quality': 0, }) if self.config['maxwidth'].get(int) and not ArtResizer.shared.local: self.config['maxwidth'] = 0 - log.warn(u"embedart: ImageMagick or PIL not found; " - u"'maxwidth' option ignored") + self._log.warning("ImageMagick or PIL not found; " + "'maxwidth' option ignored") if self.config['compare_threshold'].get(int) and not \ ArtResizer.shared.can_compare: self.config['compare_threshold'] = 0 - log.warn(u"embedart: ImageMagick 6.8.7 or higher not installed; " - u"'compare_threshold' option ignored") + self._log.warning("ImageMagick 6.8.7 or higher not installed; " + "'compare_threshold' option ignored") + + self.register_listener('art_set', self.process_album) def commands(self): # Embed command. @@ -62,219 +81,121 @@ class EmbedCoverArtPlugin(BeetsPlugin): embed_cmd.parser.add_option( '-f', '--file', metavar='PATH', help='the image file to embed' ) - maxwidth = config['embedart']['maxwidth'].get(int) - compare_threshold = config['embedart']['compare_threshold'].get(int) - ifempty = config['embedart']['ifempty'].get(bool) + embed_cmd.parser.add_option( + "-y", "--yes", action="store_true", help="skip confirmation" + ) + maxwidth = self.config['maxwidth'].get(int) + quality = self.config['quality'].get(int) + compare_threshold = self.config['compare_threshold'].get(int) + ifempty = self.config['ifempty'].get(bool) def embed_func(lib, opts, args): if opts.file: imagepath = normpath(opts.file) - for item in lib.items(decargs(args)): - embed_item(item, imagepath, maxwidth, None, - compare_threshold, ifempty) + if not os.path.isfile(syspath(imagepath)): + raise ui.UserError('image file {} not found'.format( + displayable_path(imagepath) + )) + + items = lib.items(decargs(args)) + + # Confirm with user. + if not opts.yes and not _confirm(items, not opts.file): + return + + for item in items: + art.embed_item(self._log, item, imagepath, maxwidth, + None, compare_threshold, ifempty, + quality=quality) else: - for album in lib.albums(decargs(args)): - embed_album(album, maxwidth) + albums = lib.albums(decargs(args)) + + # Confirm with user. + if not opts.yes and not _confirm(albums, not opts.file): + return + + for album in albums: + art.embed_album(self._log, album, maxwidth, + False, compare_threshold, ifempty, + quality=quality) + self.remove_artfile(album) embed_cmd.func = embed_func # Extract command. - extract_cmd = ui.Subcommand('extractart', - help='extract an image from file metadata') - extract_cmd.parser.add_option('-o', dest='outpath', - help='image output file') + extract_cmd = ui.Subcommand( + 'extractart', + help='extract an image from file metadata', + ) + extract_cmd.parser.add_option( + '-o', dest='outpath', + help='image output file', + ) + extract_cmd.parser.add_option( + '-n', dest='filename', + help='image filename to create for all matched albums', + ) + extract_cmd.parser.add_option( + '-a', dest='associate', action='store_true', + help='associate the extracted images with the album', + ) def extract_func(lib, opts, args): - outpath = normpath(opts.outpath or 'cover') - item = lib.items(decargs(args)).get() - extract(outpath, item) + if opts.outpath: + art.extract_first(self._log, normpath(opts.outpath), + lib.items(decargs(args))) + else: + filename = bytestring_path(opts.filename or + config['art_filename'].get()) + if os.path.dirname(filename) != b'': + self._log.error( + "Only specify a name rather than a path for -n") + return + for album in lib.albums(decargs(args)): + artpath = normpath(os.path.join(album.path, filename)) + artpath = art.extract_first(self._log, artpath, + album.items()) + if artpath and opts.associate: + album.set_art(artpath) + album.store() extract_cmd.func = extract_func # Clear command. - clear_cmd = ui.Subcommand('clearart', - help='remove images from file metadata') + clear_cmd = ui.Subcommand( + 'clearart', + help='remove images from file metadata', + ) + clear_cmd.parser.add_option( + "-y", "--yes", action="store_true", help="skip confirmation" + ) def clear_func(lib, opts, args): - clear(lib, decargs(args)) + items = lib.items(decargs(args)) + # Confirm with user. + if not opts.yes and not _confirm(items, False): + return + art.clear(self._log, lib, decargs(args)) clear_cmd.func = clear_func return [embed_cmd, extract_cmd, clear_cmd] + def process_album(self, album): + """Automatically embed art after art has been set + """ + if self.config['auto'] and ui.should_write(): + max_width = self.config['maxwidth'].get(int) + art.embed_album(self._log, album, max_width, True, + self.config['compare_threshold'].get(int), + self.config['ifempty'].get(bool)) + self.remove_artfile(album) -@EmbedCoverArtPlugin.listen('album_imported') -def album_imported(lib, album): - """Automatically embed art into imported albums. - """ - if album.artpath and config['embedart']['auto']: - embed_album(album, config['embedart']['maxwidth'].get(int), True) - - -def embed_item(item, imagepath, maxwidth=None, itempath=None, - compare_threshold=0, ifempty=False, as_album=False): - """Embed an image into the item's media file. - """ - if compare_threshold: - if not check_art_similarity(item, imagepath, compare_threshold): - log.warn(u'Image not similar; skipping.') - return - if ifempty: - art = get_art(item) - if not art: - pass - else: - log.debug(u'embedart: media file contained art already {0}'.format( - displayable_path(imagepath) - )) - return - if maxwidth and not as_album: - imagepath = resize_image(imagepath, maxwidth) - - try: - log.debug(u'embedart: embedding {0}'.format( - displayable_path(imagepath) - )) - item['images'] = [_mediafile_image(imagepath, maxwidth)] - except IOError as exc: - log.error(u'embedart: could not read image file: {0}'.format(exc)) - else: - # We don't want to store the image in the database. - item.try_write(itempath) - del item['images'] - - -def embed_album(album, maxwidth=None, quiet=False): - """Embed album art into all of the album's items. - """ - imagepath = album.artpath - if not imagepath: - log.info(u'No album art present: {0} - {1}'. - format(album.albumartist, album.album)) - return - if not os.path.isfile(syspath(imagepath)): - log.error(u'Album art not found at {0}' - .format(displayable_path(imagepath))) - return - if maxwidth: - imagepath = resize_image(imagepath, maxwidth) - - log.log( - logging.DEBUG if quiet else logging.INFO, - u'Embedding album art into {0.albumartist} - {0.album}.'.format(album), - ) - - for item in album.items(): - embed_item(item, imagepath, maxwidth, None, - config['embedart']['compare_threshold'].get(int), - config['embedart']['ifempty'].get(bool), as_album=True) - - -def resize_image(imagepath, maxwidth): - """Returns path to an image resized to maxwidth. - """ - log.info(u'Resizing album art to {0} pixels wide' - .format(maxwidth)) - imagepath = ArtResizer.shared.resize(maxwidth, syspath(imagepath)) - return imagepath - - -def check_art_similarity(item, imagepath, compare_threshold): - """A boolean indicating if an image is similar to embedded item art. - """ - with NamedTemporaryFile(delete=True) as f: - art = extract(f.name, item) - - if art: - # Converting images to grayscale tends to minimize the weight - # of colors in the diff score - cmd = 'convert {0} {1} -colorspace gray MIFF:- | ' \ - 'compare -metric PHASH - null:'.format(syspath(imagepath), - syspath(art)) - - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=platform.system() != 'Windows', - shell=True) - stdout, stderr = proc.communicate() - if proc.returncode: - if proc.returncode != 1: - log.warn(u'embedart: IM phashes compare failed for {0}, \ - {1}'.format(displayable_path(imagepath), - displayable_path(art))) - return - phashDiff = float(stderr) - else: - phashDiff = float(stdout) - - log.info(u'embedart: compare PHASH score is {0}'.format(phashDiff)) - if phashDiff > compare_threshold: - return False - - return True - - -def _mediafile_image(image_path, maxwidth=None): - """Return a `mediafile.Image` object for the path. - """ - - with open(syspath(image_path), 'rb') as f: - data = f.read() - return mediafile.Image(data, type=mediafile.ImageType.front) - - -def get_art(item): - # Extract the art. - try: - mf = mediafile.MediaFile(syspath(item.path)) - except mediafile.UnreadableFileError as exc: - log.error(u'Could not extract art from {0}: {1}'.format( - displayable_path(item.path), exc - )) - return - - return mf.art - -# 'extractart' command. - - -def extract(outpath, item): - if not item: - log.error(u'No item matches query.') - return - - art = get_art(item) - - if not art: - log.error(u'No album art present in {0} - {1}.' - .format(item.artist, item.title)) - return - - # Add an extension to the filename. - ext = imghdr.what(None, h=art) - if not ext: - log.error(u'Unknown image type.') - return - outpath += '.' + ext - - log.info(u'Extracting album art from: {0.artist} - {0.title} ' - u'to: {1}'.format(item, displayable_path(outpath))) - with open(syspath(outpath), 'wb') as f: - f.write(art) - return outpath - - -# 'clearart' command. - -def clear(lib, query): - log.info(u'Clearing album art from items:') - for item in lib.items(query): - log.info(u'{0} - {1}'.format(item.artist, item.title)) - try: - mf = mediafile.MediaFile(syspath(item.path), - config['id3v23'].get(bool)) - except mediafile.UnreadableFileError as exc: - log.error(u'Could not clear art from {0}: {1}'.format( - displayable_path(item.path), exc - )) - continue - del mf.art - mf.save() + def remove_artfile(self, album): + """Possibly delete the album art file for an album (if the + appropriate configuration option is enabled). + """ + if self.config['remove_art_file'] and album.artpath: + if os.path.isfile(album.artpath): + self._log.debug('Removing album art file for {0}', album) + os.remove(album.artpath) + album.artpath = None + album.store() diff --git a/lib/beetsplug/embyupdate.py b/lib/beetsplug/embyupdate.py new file mode 100644 index 00000000..c17fabad --- /dev/null +++ b/lib/beetsplug/embyupdate.py @@ -0,0 +1,205 @@ +"""Updates the Emby Library whenever the beets library is changed. + + emby: + host: localhost + port: 8096 + username: user + apikey: apikey + password: password +""" + +import hashlib +import requests + +from urllib.parse import urlencode, urljoin, parse_qs, urlsplit, urlunsplit +from beets import config +from beets.plugins import BeetsPlugin + + +def api_url(host, port, endpoint): + """Returns a joined url. + + Takes host, port and endpoint and generates a valid emby API url. + + :param host: Hostname of the emby server + :param port: Portnumber of the emby server + :param endpoint: API endpoint + :type host: str + :type port: int + :type endpoint: str + :returns: Full API url + :rtype: str + """ + # check if http or https is defined as host and create hostname + hostname_list = [host] + if host.startswith('http://') or host.startswith('https://'): + hostname = ''.join(hostname_list) + else: + hostname_list.insert(0, 'http://') + hostname = ''.join(hostname_list) + + joined = urljoin( + '{hostname}:{port}'.format( + hostname=hostname, + port=port + ), + endpoint + ) + + scheme, netloc, path, query_string, fragment = urlsplit(joined) + query_params = parse_qs(query_string) + + query_params['format'] = ['json'] + new_query_string = urlencode(query_params, doseq=True) + + return urlunsplit((scheme, netloc, path, new_query_string, fragment)) + + +def password_data(username, password): + """Returns a dict with username and its encoded password. + + :param username: Emby username + :param password: Emby password + :type username: str + :type password: str + :returns: Dictionary with username and encoded password + :rtype: dict + """ + return { + 'username': username, + 'password': hashlib.sha1(password.encode('utf-8')).hexdigest(), + 'passwordMd5': hashlib.md5(password.encode('utf-8')).hexdigest() + } + + +def create_headers(user_id, token=None): + """Return header dict that is needed to talk to the Emby API. + + :param user_id: Emby user ID + :param token: Authentication token for Emby + :type user_id: str + :type token: str + :returns: Headers for requests + :rtype: dict + """ + headers = {} + + authorization = ( + 'MediaBrowser UserId="{user_id}", ' + 'Client="other", ' + 'Device="beets", ' + 'DeviceId="beets", ' + 'Version="0.0.0"' + ).format(user_id=user_id) + + headers['x-emby-authorization'] = authorization + + if token: + headers['x-mediabrowser-token'] = token + + return headers + + +def get_token(host, port, headers, auth_data): + """Return token for a user. + + :param host: Emby host + :param port: Emby port + :param headers: Headers for requests + :param auth_data: Username and encoded password for authentication + :type host: str + :type port: int + :type headers: dict + :type auth_data: dict + :returns: Access Token + :rtype: str + """ + url = api_url(host, port, '/Users/AuthenticateByName') + r = requests.post(url, headers=headers, data=auth_data) + + return r.json().get('AccessToken') + + +def get_user(host, port, username): + """Return user dict from server or None if there is no user. + + :param host: Emby host + :param port: Emby port + :username: Username + :type host: str + :type port: int + :type username: str + :returns: Matched Users + :rtype: list + """ + url = api_url(host, port, '/Users/Public') + r = requests.get(url) + user = [i for i in r.json() if i['Name'] == username] + + return user + + +class EmbyUpdate(BeetsPlugin): + def __init__(self): + super().__init__() + + # Adding defaults. + config['emby'].add({ + 'host': 'http://localhost', + 'port': 8096, + 'apikey': None, + 'password': None, + }) + + self.register_listener('database_change', self.listen_for_db_change) + + def listen_for_db_change(self, lib, model): + """Listens for beets db change and register the update for the end. + """ + self.register_listener('cli_exit', self.update) + + def update(self, lib): + """When the client exists try to send refresh request to Emby. + """ + self._log.info('Updating Emby library...') + + host = config['emby']['host'].get() + port = config['emby']['port'].get() + username = config['emby']['username'].get() + password = config['emby']['password'].get() + token = config['emby']['apikey'].get() + + # Check if at least a apikey or password is given. + if not any([password, token]): + self._log.warning('Provide at least Emby password or apikey.') + return + + # Get user information from the Emby API. + user = get_user(host, port, username) + if not user: + self._log.warning(f'User {username} could not be found.') + return + + if not token: + # Create Authentication data and headers. + auth_data = password_data(username, password) + headers = create_headers(user[0]['Id']) + + # Get authentication token. + token = get_token(host, port, headers, auth_data) + if not token: + self._log.warning( + 'Could not get token for user {0}', username + ) + return + + # Recreate headers with a token. + headers = create_headers(user[0]['Id'], token=token) + + # Trigger the Update. + url = api_url(host, port, '/Library/Refresh') + r = requests.post(url, headers=headers) + if r.status_code != 204: + self._log.warning('Update could not be triggered') + else: + self._log.info('Update triggered.') diff --git a/lib/beetsplug/export.py b/lib/beetsplug/export.py new file mode 100644 index 00000000..99f6d706 --- /dev/null +++ b/lib/beetsplug/export.py @@ -0,0 +1,227 @@ +# This file is part of beets. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Exports data from beets +""" + + +import sys +import codecs +import json +import csv +from xml.etree import ElementTree + +from datetime import datetime, date +from beets.plugins import BeetsPlugin +from beets import ui +from beets import util +import mediafile +from beetsplug.info import library_data, tag_data + + +class ExportEncoder(json.JSONEncoder): + """Deals with dates because JSON doesn't have a standard""" + def default(self, o): + if isinstance(o, (datetime, date)): + return o.isoformat() + return json.JSONEncoder.default(self, o) + + +class ExportPlugin(BeetsPlugin): + + def __init__(self): + super().__init__() + + self.config.add({ + 'default_format': 'json', + 'json': { + # JSON module formatting options. + 'formatting': { + 'ensure_ascii': False, + 'indent': 4, + 'separators': (',', ': '), + 'sort_keys': True + } + }, + 'jsonlines': { + # JSON Lines formatting options. + 'formatting': { + 'ensure_ascii': False, + 'separators': (',', ': '), + 'sort_keys': True + } + }, + 'csv': { + # CSV module formatting options. + 'formatting': { + # The delimiter used to seperate columns. + 'delimiter': ',', + # The dialect to use when formating the file output. + 'dialect': 'excel' + } + }, + 'xml': { + # XML module formatting options. + 'formatting': {} + } + # TODO: Use something like the edit plugin + # 'item_fields': [] + }) + + def commands(self): + cmd = ui.Subcommand('export', help='export data from beets') + cmd.func = self.run + cmd.parser.add_option( + '-l', '--library', action='store_true', + help='show library fields instead of tags', + ) + cmd.parser.add_option( + '-a', '--album', action='store_true', + help='show album fields instead of tracks (implies "--library")', + ) + cmd.parser.add_option( + '--append', action='store_true', default=False, + help='if should append data to the file', + ) + cmd.parser.add_option( + '-i', '--include-keys', default=[], + action='append', dest='included_keys', + help='comma separated list of keys to show', + ) + cmd.parser.add_option( + '-o', '--output', + help='path for the output file. If not given, will print the data' + ) + cmd.parser.add_option( + '-f', '--format', default='json', + help="the output format: json (default), jsonlines, csv, or xml" + ) + return [cmd] + + def run(self, lib, opts, args): + file_path = opts.output + file_mode = 'a' if opts.append else 'w' + file_format = opts.format or self.config['default_format'].get(str) + file_format_is_line_based = (file_format == 'jsonlines') + format_options = self.config[file_format]['formatting'].get(dict) + + export_format = ExportFormat.factory( + file_type=file_format, + **{ + 'file_path': file_path, + 'file_mode': file_mode + } + ) + + if opts.library or opts.album: + data_collector = library_data + else: + data_collector = tag_data + + included_keys = [] + for keys in opts.included_keys: + included_keys.extend(keys.split(',')) + + items = [] + for data_emitter in data_collector( + lib, ui.decargs(args), + album=opts.album, + ): + try: + data, item = data_emitter(included_keys or '*') + except (mediafile.UnreadableFileError, OSError) as ex: + self._log.error('cannot read file: {0}', ex) + continue + + for key, value in data.items(): + if isinstance(value, bytes): + data[key] = util.displayable_path(value) + + if file_format_is_line_based: + export_format.export(data, **format_options) + else: + items += [data] + + if not file_format_is_line_based: + export_format.export(items, **format_options) + + +class ExportFormat: + """The output format type""" + def __init__(self, file_path, file_mode='w', encoding='utf-8'): + self.path = file_path + self.mode = file_mode + self.encoding = encoding + # creates a file object to write/append or sets to stdout + self.out_stream = codecs.open(self.path, self.mode, self.encoding) \ + if self.path else sys.stdout + + @classmethod + def factory(cls, file_type, **kwargs): + if file_type in ["json", "jsonlines"]: + return JsonFormat(**kwargs) + elif file_type == "csv": + return CSVFormat(**kwargs) + elif file_type == "xml": + return XMLFormat(**kwargs) + else: + raise NotImplementedError() + + def export(self, data, **kwargs): + raise NotImplementedError() + + +class JsonFormat(ExportFormat): + """Saves in a json file""" + def __init__(self, file_path, file_mode='w', encoding='utf-8'): + super().__init__(file_path, file_mode, encoding) + + def export(self, data, **kwargs): + json.dump(data, self.out_stream, cls=ExportEncoder, **kwargs) + self.out_stream.write('\n') + + +class CSVFormat(ExportFormat): + """Saves in a csv file""" + def __init__(self, file_path, file_mode='w', encoding='utf-8'): + super().__init__(file_path, file_mode, encoding) + + def export(self, data, **kwargs): + header = list(data[0].keys()) if data else [] + writer = csv.DictWriter(self.out_stream, fieldnames=header, **kwargs) + writer.writeheader() + writer.writerows(data) + + +class XMLFormat(ExportFormat): + """Saves in a xml file""" + def __init__(self, file_path, file_mode='w', encoding='utf-8'): + super().__init__(file_path, file_mode, encoding) + + def export(self, data, **kwargs): + # Creates the XML file structure. + library = ElementTree.Element('library') + tracks = ElementTree.SubElement(library, 'tracks') + if data and isinstance(data[0], dict): + for index, item in enumerate(data): + track = ElementTree.SubElement(tracks, 'track') + for key, value in item.items(): + track_details = ElementTree.SubElement(track, key) + track_details.text = value + # Depending on the version of python the encoding needs to change + try: + data = ElementTree.tostring(library, encoding='unicode', **kwargs) + except LookupError: + data = ElementTree.tostring(library, encoding='utf-8', **kwargs) + + self.out_stream.write(data) diff --git a/lib/beetsplug/fetchart.py b/lib/beetsplug/fetchart.py index b2a4620b..f2c1e5a7 100644 --- a/lib/beetsplug/fetchart.py +++ b/lib/beetsplug/fetchart.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,11 +14,12 @@ """Fetches album art. """ + from contextlib import closing -import logging import os import re from tempfile import NamedTemporaryFile +from collections import OrderedDict import requests @@ -27,370 +28,1122 @@ from beets import importer from beets import ui from beets import util from beets import config +from mediafile import image_mime_type from beets.util.artresizer import ArtResizer +from beets.util import sorted_walk +from beets.util import syspath, bytestring_path, py3_path +import confuse -try: - import itunes - HAVE_ITUNES = True -except ImportError: - HAVE_ITUNES = False - -IMAGE_EXTENSIONS = ['png', 'jpg', 'jpeg'] -CONTENT_TYPES = ('image/jpeg',) -DOWNLOAD_EXTENSION = '.jpg' - -log = logging.getLogger('beets') - -requests_session = requests.Session() -requests_session.headers = {'User-Agent': 'beets'} +CONTENT_TYPES = { + 'image/jpeg': [b'jpg', b'jpeg'], + 'image/png': [b'png'] +} +IMAGE_EXTENSIONS = [ext for exts in CONTENT_TYPES.values() for ext in exts] -def _fetch_image(url): - """Downloads an image from a URL and checks whether it seems to - actually be an image. If so, returns a path to the downloaded image. - Otherwise, returns None. +class Candidate: + """Holds information about a matching artwork, deals with validation of + dimension restrictions and resizing. """ - log.debug(u'fetchart: downloading art: {0}'.format(url)) - try: - with closing(requests_session.get(url, stream=True)) as resp: - if 'Content-Type' not in resp.headers \ - or resp.headers['Content-Type'] not in CONTENT_TYPES: - log.debug(u'fetchart: not an image') - return + CANDIDATE_BAD = 0 + CANDIDATE_EXACT = 1 + CANDIDATE_DOWNSCALE = 2 + CANDIDATE_DOWNSIZE = 3 + CANDIDATE_DEINTERLACE = 4 + CANDIDATE_REFORMAT = 5 - # Generate a temporary file with the correct extension. - with NamedTemporaryFile(suffix=DOWNLOAD_EXTENSION, delete=False) \ - as fh: - for chunk in resp.iter_content(): - fh.write(chunk) - log.debug(u'fetchart: downloaded art to: {0}'.format( - util.displayable_path(fh.name) - )) - return fh.name - except (IOError, requests.RequestException): - log.debug(u'fetchart: error fetching art') + MATCH_EXACT = 0 + MATCH_FALLBACK = 1 + + def __init__(self, log, path=None, url=None, source='', + match=None, size=None): + self._log = log + self.path = path + self.url = url + self.source = source + self.check = None + self.match = match + self.size = size + + def _validate(self, plugin): + """Determine whether the candidate artwork is valid based on + its dimensions (width and ratio). + + Return `CANDIDATE_BAD` if the file is unusable. + Return `CANDIDATE_EXACT` if the file is usable as-is. + Return `CANDIDATE_DOWNSCALE` if the file must be rescaled. + Return `CANDIDATE_DOWNSIZE` if the file must be resized, and possibly + also rescaled. + Return `CANDIDATE_DEINTERLACE` if the file must be deinterlaced. + Return `CANDIDATE_REFORMAT` if the file has to be converted. + """ + if not self.path: + return self.CANDIDATE_BAD + + if (not (plugin.enforce_ratio or plugin.minwidth or plugin.maxwidth + or plugin.max_filesize or plugin.deinterlace + or plugin.cover_format)): + return self.CANDIDATE_EXACT + + # get_size returns None if no local imaging backend is available + if not self.size: + self.size = ArtResizer.shared.get_size(self.path) + self._log.debug('image size: {}', self.size) + + if not self.size: + self._log.warning('Could not get size of image (please see ' + 'documentation for dependencies). ' + 'The configuration options `minwidth`, ' + '`enforce_ratio` and `max_filesize` ' + 'may be violated.') + return self.CANDIDATE_EXACT + + short_edge = min(self.size) + long_edge = max(self.size) + + # Check minimum dimension. + if plugin.minwidth and self.size[0] < plugin.minwidth: + self._log.debug('image too small ({} < {})', + self.size[0], plugin.minwidth) + return self.CANDIDATE_BAD + + # Check aspect ratio. + edge_diff = long_edge - short_edge + if plugin.enforce_ratio: + if plugin.margin_px: + if edge_diff > plugin.margin_px: + self._log.debug('image is not close enough to being ' + 'square, ({} - {} > {})', + long_edge, short_edge, plugin.margin_px) + return self.CANDIDATE_BAD + elif plugin.margin_percent: + margin_px = plugin.margin_percent * long_edge + if edge_diff > margin_px: + self._log.debug('image is not close enough to being ' + 'square, ({} - {} > {})', + long_edge, short_edge, margin_px) + return self.CANDIDATE_BAD + elif edge_diff: + # also reached for margin_px == 0 and margin_percent == 0.0 + self._log.debug('image is not square ({} != {})', + self.size[0], self.size[1]) + return self.CANDIDATE_BAD + + # Check maximum dimension. + downscale = False + if plugin.maxwidth and self.size[0] > plugin.maxwidth: + self._log.debug('image needs rescaling ({} > {})', + self.size[0], plugin.maxwidth) + downscale = True + + # Check filesize. + downsize = False + if plugin.max_filesize: + filesize = os.stat(syspath(self.path)).st_size + if filesize > plugin.max_filesize: + self._log.debug('image needs resizing ({}B > {}B)', + filesize, plugin.max_filesize) + downsize = True + + # Check image format + reformat = False + if plugin.cover_format: + fmt = ArtResizer.shared.get_format(self.path) + reformat = fmt != plugin.cover_format + if reformat: + self._log.debug('image needs reformatting: {} -> {}', + fmt, plugin.cover_format) + + if downscale: + return self.CANDIDATE_DOWNSCALE + elif downsize: + return self.CANDIDATE_DOWNSIZE + elif plugin.deinterlace: + return self.CANDIDATE_DEINTERLACE + elif reformat: + return self.CANDIDATE_REFORMAT + else: + return self.CANDIDATE_EXACT + + def validate(self, plugin): + self.check = self._validate(plugin) + return self.check + + def resize(self, plugin): + if self.check == self.CANDIDATE_DOWNSCALE: + self.path = \ + ArtResizer.shared.resize(plugin.maxwidth, self.path, + quality=plugin.quality, + max_filesize=plugin.max_filesize) + elif self.check == self.CANDIDATE_DOWNSIZE: + # dimensions are correct, so maxwidth is set to maximum dimension + self.path = \ + ArtResizer.shared.resize(max(self.size), self.path, + quality=plugin.quality, + max_filesize=plugin.max_filesize) + elif self.check == self.CANDIDATE_DEINTERLACE: + self.path = ArtResizer.shared.deinterlace(self.path) + elif self.check == self.CANDIDATE_REFORMAT: + self.path = ArtResizer.shared.reformat( + self.path, + plugin.cover_format, + deinterlaced=plugin.deinterlace, + ) + + +def _logged_get(log, *args, **kwargs): + """Like `requests.get`, but logs the effective URL to the specified + `log` at the `DEBUG` level. + + Use the optional `message` parameter to specify what to log before + the URL. By default, the string is "getting URL". + + Also sets the User-Agent header to indicate beets. + """ + # Use some arguments with the `send` call but most with the + # `Request` construction. This is a cheap, magic-filled way to + # emulate `requests.get` or, more pertinently, + # `requests.Session.request`. + req_kwargs = kwargs + send_kwargs = {} + for arg in ('stream', 'verify', 'proxies', 'cert', 'timeout'): + if arg in kwargs: + send_kwargs[arg] = req_kwargs.pop(arg) + + # Our special logging message parameter. + if 'message' in kwargs: + message = kwargs.pop('message') + else: + message = 'getting URL' + + req = requests.Request('GET', *args, **req_kwargs) + + with requests.Session() as s: + s.headers = {'User-Agent': 'beets'} + prepped = s.prepare_request(req) + settings = s.merge_environment_settings( + prepped.url, {}, None, None, None + ) + send_kwargs.update(settings) + log.debug('{}: {}', message, prepped.url) + return s.send(prepped, **send_kwargs) + + +class RequestMixin: + """Adds a Requests wrapper to the class that uses the logger, which + must be named `self._log`. + """ + + def request(self, *args, **kwargs): + """Like `requests.get`, but uses the logger `self._log`. + + See also `_logged_get`. + """ + return _logged_get(self._log, *args, **kwargs) # ART SOURCES ################################################################ -# Cover Art Archive. +class ArtSource(RequestMixin): + VALID_MATCHING_CRITERIA = ['default'] -CAA_URL = 'http://coverartarchive.org/release/{mbid}/front-500.jpg' -CAA_GROUP_URL = 'http://coverartarchive.org/release-group/{mbid}/front-500.jpg' + def __init__(self, log, config, match_by=None): + self._log = log + self._config = config + self.match_by = match_by or self.VALID_MATCHING_CRITERIA + + def get(self, album, plugin, paths): + raise NotImplementedError() + + def _candidate(self, **kwargs): + return Candidate(source=self, log=self._log, **kwargs) + + def fetch_image(self, candidate, plugin): + raise NotImplementedError() + + def cleanup(self, candidate): + pass -def caa_art(album): - """Return the Cover Art Archive and Cover Art Archive release group URLs - using album MusicBrainz release ID and release group ID. - """ - if album.mb_albumid: - yield CAA_URL.format(mbid=album.mb_albumid) - if album.mb_releasegroupid: - yield CAA_GROUP_URL.format(mbid=album.mb_releasegroupid) +class LocalArtSource(ArtSource): + IS_LOCAL = True + LOC_STR = 'local' + + def fetch_image(self, candidate, plugin): + pass -# Art from Amazon. +class RemoteArtSource(ArtSource): + IS_LOCAL = False + LOC_STR = 'remote' -AMAZON_URL = 'http://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg' -AMAZON_INDICES = (1, 2) - - -def art_for_asin(album): - """Generate URLs using Amazon ID (ASIN) string. - """ - if album.asin: - for index in AMAZON_INDICES: - yield AMAZON_URL % (album.asin, index) - - -# AlbumArt.org scraper. - -AAO_URL = 'http://www.albumart.org/index_detail.php' -AAO_PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"' - - -def aao_art(album): - """Return art URL from AlbumArt.org using album ASIN. - """ - if not album.asin: - return - # Get the page from albumart.org. - try: - resp = requests_session.get(AAO_URL, params={'asin': album.asin}) - log.debug(u'fetchart: scraped art URL: {0}'.format(resp.url)) - except requests.RequestException: - log.debug(u'fetchart: error scraping art page') - return - - # Search the page for the image URL. - m = re.search(AAO_PAT, resp.text) - if m: - image_url = m.group(1) - yield image_url - else: - log.debug(u'fetchart: no image found on page') - - -# Google Images scraper. - -GOOGLE_URL = 'https://ajax.googleapis.com/ajax/services/search/images' - - -def google_art(album): - """Return art URL from google.org given an album title and - interpreter. - """ - if not (album.albumartist and album.album): - return - search_string = (album.albumartist + ',' + album.album).encode('utf-8') - response = requests_session.get(GOOGLE_URL, params={ - 'v': '1.0', - 'q': search_string, - 'start': '0', - }) - - # Get results using JSON. - try: - results = response.json() - data = results['responseData'] - dataInfo = data['results'] - for myUrl in dataInfo: - yield myUrl['unescapedUrl'] - except: - log.debug(u'fetchart: error scraping art page') - return - - -# Art from the iTunes Store. - -def itunes_art(album): - """Return art URL from iTunes Store given an album title. - """ - search_string = (album.albumartist + ' ' + album.album).encode('utf-8') - try: - # Isolate bugs in the iTunes library while searching. + def fetch_image(self, candidate, plugin): + """Downloads an image from a URL and checks whether it seems to + actually be an image. If so, returns a path to the downloaded image. + Otherwise, returns None. + """ + if plugin.maxwidth: + candidate.url = ArtResizer.shared.proxy_url(plugin.maxwidth, + candidate.url) try: - itunes_album = itunes.search_album(search_string)[0] - except Exception as exc: - log.debug('fetchart: iTunes search failed: {0}'.format(exc)) + with closing(self.request(candidate.url, stream=True, + message='downloading image')) as resp: + ct = resp.headers.get('Content-Type', None) + + # Download the image to a temporary file. As some servers + # (notably fanart.tv) have proven to return wrong Content-Types + # when images were uploaded with a bad file extension, do not + # rely on it. Instead validate the type using the file magic + # and only then determine the extension. + data = resp.iter_content(chunk_size=1024) + header = b'' + for chunk in data: + header += chunk + if len(header) >= 32: + # The imghdr module will only read 32 bytes, and our + # own additions in mediafile even less. + break + else: + # server didn't return enough data, i.e. corrupt image + return + + real_ct = image_mime_type(header) + if real_ct is None: + # detection by file magic failed, fall back to the + # server-supplied Content-Type + # Is our type detection failsafe enough to drop this? + real_ct = ct + + if real_ct not in CONTENT_TYPES: + self._log.debug('not a supported image: {}', + real_ct or 'unknown content type') + return + + ext = b'.' + CONTENT_TYPES[real_ct][0] + + if real_ct != ct: + self._log.warning('Server specified {}, but returned a ' + '{} image. Correcting the extension ' + 'to {}', + ct, real_ct, ext) + + suffix = py3_path(ext) + with NamedTemporaryFile(suffix=suffix, delete=False) as fh: + # write the first already loaded part of the image + fh.write(header) + # download the remaining part of the image + for chunk in data: + fh.write(chunk) + self._log.debug('downloaded art to: {0}', + util.displayable_path(fh.name)) + candidate.path = util.bytestring_path(fh.name) + return + + except (OSError, requests.RequestException, TypeError) as exc: + # Handling TypeError works around a urllib3 bug: + # https://github.com/shazow/urllib3/issues/556 + self._log.debug('error fetching art: {}', exc) return - if itunes_album.get_artwork()['100']: - small_url = itunes_album.get_artwork()['100'] - big_url = small_url.replace('100x100', '1200x1200') - yield big_url + def cleanup(self, candidate): + if candidate.path: + try: + util.remove(path=candidate.path) + except util.FilesystemError as exc: + self._log.debug('error cleaning up tmp art: {}', exc) + + +class CoverArtArchive(RemoteArtSource): + NAME = "Cover Art Archive" + VALID_MATCHING_CRITERIA = ['release', 'releasegroup'] + VALID_THUMBNAIL_SIZES = [250, 500, 1200] + + URL = 'https://coverartarchive.org/release/{mbid}' + GROUP_URL = 'https://coverartarchive.org/release-group/{mbid}' + + def get(self, album, plugin, paths): + """Return the Cover Art Archive and Cover Art Archive release group URLs + using album MusicBrainz release ID and release group ID. + """ + + def get_image_urls(url, size_suffix=None): + try: + response = self.request(url) + except requests.RequestException: + self._log.debug('{}: error receiving response' + .format(self.NAME)) + return + + try: + data = response.json() + except ValueError: + self._log.debug('{}: error loading response: {}' + .format(self.NAME, response.text)) + return + + for item in data.get('images', []): + try: + if 'Front' not in item['types']: + continue + + if size_suffix: + yield item['thumbnails'][size_suffix] + else: + yield item['image'] + except KeyError: + pass + + release_url = self.URL.format(mbid=album.mb_albumid) + release_group_url = self.GROUP_URL.format(mbid=album.mb_releasegroupid) + + # Cover Art Archive API offers pre-resized thumbnails at several sizes. + # If the maxwidth config matches one of the already available sizes + # fetch it directly intead of fetching the full sized image and + # resizing it. + size_suffix = None + if plugin.maxwidth in self.VALID_THUMBNAIL_SIZES: + size_suffix = "-" + str(plugin.maxwidth) + + if 'release' in self.match_by and album.mb_albumid: + for url in get_image_urls(release_url, size_suffix): + yield self._candidate(url=url, match=Candidate.MATCH_EXACT) + + if 'releasegroup' in self.match_by and album.mb_releasegroupid: + for url in get_image_urls(release_group_url): + yield self._candidate(url=url, match=Candidate.MATCH_FALLBACK) + + +class Amazon(RemoteArtSource): + NAME = "Amazon" + URL = 'https://images.amazon.com/images/P/%s.%02i.LZZZZZZZ.jpg' + INDICES = (1, 2) + + def get(self, album, plugin, paths): + """Generate URLs using Amazon ID (ASIN) string. + """ + if album.asin: + for index in self.INDICES: + yield self._candidate(url=self.URL % (album.asin, index), + match=Candidate.MATCH_EXACT) + + +class AlbumArtOrg(RemoteArtSource): + NAME = "AlbumArt.org scraper" + URL = 'https://www.albumart.org/index_detail.php' + PAT = r'href\s*=\s*"([^>"]*)"[^>]*title\s*=\s*"View larger image"' + + def get(self, album, plugin, paths): + """Return art URL from AlbumArt.org using album ASIN. + """ + if not album.asin: + return + # Get the page from albumart.org. + try: + resp = self.request(self.URL, params={'asin': album.asin}) + self._log.debug('scraped art URL: {0}', resp.url) + except requests.RequestException: + self._log.debug('error scraping art page') + return + + # Search the page for the image URL. + m = re.search(self.PAT, resp.text) + if m: + image_url = m.group(1) + yield self._candidate(url=image_url, match=Candidate.MATCH_EXACT) else: - log.debug(u'fetchart: album has no artwork in iTunes Store') - except IndexError: - log.debug(u'fetchart: album not found in iTunes Store') + self._log.debug('no image found on page') -# Art from the filesystem. +class GoogleImages(RemoteArtSource): + NAME = "Google Images" + URL = 'https://www.googleapis.com/customsearch/v1' + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.key = self._config['google_key'].get(), + self.cx = self._config['google_engine'].get(), + + def get(self, album, plugin, paths): + """Return art URL from google custom search engine + given an album title and interpreter. + """ + if not (album.albumartist and album.album): + return + search_string = (album.albumartist + ',' + album.album).encode('utf-8') + + try: + response = self.request(self.URL, params={ + 'key': self.key, + 'cx': self.cx, + 'q': search_string, + 'searchType': 'image' + }) + except requests.RequestException: + self._log.debug('google: error receiving response') + return + + # Get results using JSON. + try: + data = response.json() + except ValueError: + self._log.debug('google: error loading response: {}' + .format(response.text)) + return + + if 'error' in data: + reason = data['error']['errors'][0]['reason'] + self._log.debug('google fetchart error: {0}', reason) + return + + if 'items' in data.keys(): + for item in data['items']: + yield self._candidate(url=item['link'], + match=Candidate.MATCH_EXACT) -def filename_priority(filename, cover_names): - """Sort order for image names. +class FanartTV(RemoteArtSource): + """Art from fanart.tv requested using their API""" + NAME = "fanart.tv" + API_URL = 'https://webservice.fanart.tv/v3/' + API_ALBUMS = API_URL + 'music/albums/' + PROJECT_KEY = '61a7d0ab4e67162b7a0c7c35915cd48e' - Return indexes of cover names found in the image filename. This - means that images with lower-numbered and more keywords will have higher - priority. - """ - return [idx for (idx, x) in enumerate(cover_names) if x in filename] + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.client_key = self._config['fanarttv_key'].get() + + def get(self, album, plugin, paths): + if not album.mb_releasegroupid: + return + + try: + response = self.request( + self.API_ALBUMS + album.mb_releasegroupid, + headers={'api-key': self.PROJECT_KEY, + 'client-key': self.client_key}) + except requests.RequestException: + self._log.debug('fanart.tv: error receiving response') + return + + try: + data = response.json() + except ValueError: + self._log.debug('fanart.tv: error loading response: {}', + response.text) + return + + if 'status' in data and data['status'] == 'error': + if 'not found' in data['error message'].lower(): + self._log.debug('fanart.tv: no image found') + elif 'api key' in data['error message'].lower(): + self._log.warning('fanart.tv: Invalid API key given, please ' + 'enter a valid one in your config file.') + else: + self._log.debug('fanart.tv: error on request: {}', + data['error message']) + return + + matches = [] + # can there be more than one releasegroupid per response? + for mbid, art in data.get('albums', {}).items(): + # there might be more art referenced, e.g. cdart, and an albumcover + # might not be present, even if the request was successful + if album.mb_releasegroupid == mbid and 'albumcover' in art: + matches.extend(art['albumcover']) + # can this actually occur? + else: + self._log.debug('fanart.tv: unexpected mb_releasegroupid in ' + 'response!') + + matches.sort(key=lambda x: x['likes'], reverse=True) + for item in matches: + # fanart.tv has a strict size requirement for album art to be + # uploaded + yield self._candidate(url=item['url'], + match=Candidate.MATCH_EXACT, + size=(1000, 1000)) -def art_in_path(path, cover_names, cautious): - """Look for album art files in a specified directory. - """ - if not os.path.isdir(path): - return +class ITunesStore(RemoteArtSource): + NAME = "iTunes Store" + API_URL = 'https://itunes.apple.com/search' - # Find all files that look like images in the directory. - images = [] - for fn in os.listdir(path): - for ext in IMAGE_EXTENSIONS: - if fn.lower().endswith('.' + ext): - images.append(fn) + def get(self, album, plugin, paths): + """Return art URL from iTunes Store given an album title. + """ + if not (album.albumartist and album.album): + return - # Look for "preferred" filenames. - images = sorted(images, key=lambda x: filename_priority(x, cover_names)) - cover_pat = r"(\b|_)({0})(\b|_)".format('|'.join(cover_names)) - for fn in images: - if re.search(cover_pat, os.path.splitext(fn)[0], re.I): - log.debug(u'fetchart: using well-named art file {0}'.format( - util.displayable_path(fn) - )) - return os.path.join(path, fn) + payload = { + 'term': album.albumartist + ' ' + album.album, + 'entity': 'album', + 'media': 'music', + 'limit': 200 + } + try: + r = self.request(self.API_URL, params=payload) + r.raise_for_status() + except requests.RequestException as e: + self._log.debug('iTunes search failed: {0}', e) + return - # Fall back to any image in the folder. - if images and not cautious: - log.debug(u'fetchart: using fallback art file {0}'.format( - util.displayable_path(images[0]) - )) - return os.path.join(path, images[0]) + try: + candidates = r.json()['results'] + except ValueError as e: + self._log.debug('Could not decode json response: {0}', e) + return + except KeyError as e: + self._log.debug('{} not found in json. Fields are {} ', + e, + list(r.json().keys())) + return + if not candidates: + self._log.debug('iTunes search for {!r} got no results', + payload['term']) + return + + if self._config['high_resolution']: + image_suffix = '100000x100000-999' + else: + image_suffix = '1200x1200bb' + + for c in candidates: + try: + if (c['artistName'] == album.albumartist + and c['collectionName'] == album.album): + art_url = c['artworkUrl100'] + art_url = art_url.replace('100x100bb', + image_suffix) + yield self._candidate(url=art_url, + match=Candidate.MATCH_EXACT) + except KeyError as e: + self._log.debug('Malformed itunes candidate: {} not found in {}', # NOQA E501 + e, + list(c.keys())) + + try: + fallback_art_url = candidates[0]['artworkUrl100'] + fallback_art_url = fallback_art_url.replace('100x100bb', + image_suffix) + yield self._candidate(url=fallback_art_url, + match=Candidate.MATCH_FALLBACK) + except KeyError as e: + self._log.debug('Malformed itunes candidate: {} not found in {}', + e, + list(c.keys())) + + +class Wikipedia(RemoteArtSource): + NAME = "Wikipedia (queried through DBpedia)" + DBPEDIA_URL = 'https://dbpedia.org/sparql' + WIKIPEDIA_URL = 'https://en.wikipedia.org/w/api.php' + SPARQL_QUERY = '''PREFIX rdf: + PREFIX dbpprop: + PREFIX owl: + PREFIX rdfs: + PREFIX foaf: + + SELECT DISTINCT ?pageId ?coverFilename WHERE {{ + ?subject owl:wikiPageID ?pageId . + ?subject dbpprop:name ?name . + ?subject rdfs:label ?label . + {{ ?subject dbpprop:artist ?artist }} + UNION + {{ ?subject owl:artist ?artist }} + {{ ?artist foaf:name "{artist}"@en }} + UNION + {{ ?artist dbpprop:name "{artist}"@en }} + ?subject rdf:type . + ?subject dbpprop:cover ?coverFilename . + FILTER ( regex(?name, "{album}", "i") ) + }} + Limit 1''' + + def get(self, album, plugin, paths): + if not (album.albumartist and album.album): + return + + # Find the name of the cover art filename on DBpedia + cover_filename, page_id = None, None + + try: + dbpedia_response = self.request( + self.DBPEDIA_URL, + params={ + 'format': 'application/sparql-results+json', + 'timeout': 2500, + 'query': self.SPARQL_QUERY.format( + artist=album.albumartist.title(), album=album.album) + }, + headers={'content-type': 'application/json'}, + ) + except requests.RequestException: + self._log.debug('dbpedia: error receiving response') + return + + try: + data = dbpedia_response.json() + results = data['results']['bindings'] + if results: + cover_filename = 'File:' + results[0]['coverFilename']['value'] + page_id = results[0]['pageId']['value'] + else: + self._log.debug('wikipedia: album not found on dbpedia') + except (ValueError, KeyError, IndexError): + self._log.debug('wikipedia: error scraping dbpedia response: {}', + dbpedia_response.text) + + # Ensure we have a filename before attempting to query wikipedia + if not (cover_filename and page_id): + return + + # DBPedia sometimes provides an incomplete cover_filename, indicated + # by the filename having a space before the extension, e.g., 'foo .bar' + # An additional Wikipedia call can help to find the real filename. + # This may be removed once the DBPedia issue is resolved, see: + # https://github.com/dbpedia/extraction-framework/issues/396 + if ' .' in cover_filename and \ + '.' not in cover_filename.split(' .')[-1]: + self._log.debug( + 'wikipedia: dbpedia provided incomplete cover_filename' + ) + lpart, rpart = cover_filename.rsplit(' .', 1) + + # Query all the images in the page + try: + wikipedia_response = self.request( + self.WIKIPEDIA_URL, + params={ + 'format': 'json', + 'action': 'query', + 'continue': '', + 'prop': 'images', + 'pageids': page_id, + }, + headers={'content-type': 'application/json'}, + ) + except requests.RequestException: + self._log.debug('wikipedia: error receiving response') + return + + # Try to see if one of the images on the pages matches our + # incomplete cover_filename + try: + data = wikipedia_response.json() + results = data['query']['pages'][page_id]['images'] + for result in results: + if re.match(re.escape(lpart) + r'.*?\.' + re.escape(rpart), + result['title']): + cover_filename = result['title'] + break + except (ValueError, KeyError): + self._log.debug( + 'wikipedia: failed to retrieve a cover_filename' + ) + return + + # Find the absolute url of the cover art on Wikipedia + try: + wikipedia_response = self.request( + self.WIKIPEDIA_URL, + params={ + 'format': 'json', + 'action': 'query', + 'continue': '', + 'prop': 'imageinfo', + 'iiprop': 'url', + 'titles': cover_filename.encode('utf-8'), + }, + headers={'content-type': 'application/json'}, + ) + except requests.RequestException: + self._log.debug('wikipedia: error receiving response') + return + + try: + data = wikipedia_response.json() + results = data['query']['pages'] + for _, result in results.items(): + image_url = result['imageinfo'][0]['url'] + yield self._candidate(url=image_url, + match=Candidate.MATCH_EXACT) + except (ValueError, KeyError, IndexError): + self._log.debug('wikipedia: error scraping imageinfo') + return + + +class FileSystem(LocalArtSource): + NAME = "Filesystem" + + @staticmethod + def filename_priority(filename, cover_names): + """Sort order for image names. + + Return indexes of cover names found in the image filename. This + means that images with lower-numbered and more keywords will have + higher priority. + """ + return [idx for (idx, x) in enumerate(cover_names) if x in filename] + + def get(self, album, plugin, paths): + """Look for album art files in the specified directories. + """ + if not paths: + return + cover_names = list(map(util.bytestring_path, plugin.cover_names)) + cover_names_str = b'|'.join(cover_names) + cover_pat = br''.join([br"(\b|_)(", cover_names_str, br")(\b|_)"]) + + for path in paths: + if not os.path.isdir(syspath(path)): + continue + + # Find all files that look like images in the directory. + images = [] + ignore = config['ignore'].as_str_seq() + ignore_hidden = config['ignore_hidden'].get(bool) + for _, _, files in sorted_walk(path, ignore=ignore, + ignore_hidden=ignore_hidden): + for fn in files: + fn = bytestring_path(fn) + for ext in IMAGE_EXTENSIONS: + if fn.lower().endswith(b'.' + ext) and \ + os.path.isfile(syspath(os.path.join(path, fn))): + images.append(fn) + + # Look for "preferred" filenames. + images = sorted(images, + key=lambda x: + self.filename_priority(x, cover_names)) + remaining = [] + for fn in images: + if re.search(cover_pat, os.path.splitext(fn)[0], re.I): + self._log.debug('using well-named art file {0}', + util.displayable_path(fn)) + yield self._candidate(path=os.path.join(path, fn), + match=Candidate.MATCH_EXACT) + else: + remaining.append(fn) + + # Fall back to any image in the folder. + if remaining and not plugin.cautious: + self._log.debug('using fallback art file {0}', + util.displayable_path(remaining[0])) + yield self._candidate(path=os.path.join(path, remaining[0]), + match=Candidate.MATCH_FALLBACK) + + +class LastFM(RemoteArtSource): + NAME = "Last.fm" + + # Sizes in priority order. + SIZES = OrderedDict([ + ('mega', (300, 300)), + ('extralarge', (300, 300)), + ('large', (174, 174)), + ('medium', (64, 64)), + ('small', (34, 34)), + ]) + + API_URL = 'https://ws.audioscrobbler.com/2.0' + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.key = self._config['lastfm_key'].get(), + + def get(self, album, plugin, paths): + if not album.mb_albumid: + return + + try: + response = self.request(self.API_URL, params={ + 'method': 'album.getinfo', + 'api_key': self.key, + 'mbid': album.mb_albumid, + 'format': 'json', + }) + except requests.RequestException: + self._log.debug('lastfm: error receiving response') + return + + try: + data = response.json() + + if 'error' in data: + if data['error'] == 6: + self._log.debug('lastfm: no results for {}', + album.mb_albumid) + else: + self._log.error( + 'lastfm: failed to get album info: {} ({})', + data['message'], data['error']) + else: + images = {image['size']: image['#text'] + for image in data['album']['image']} + + # Provide candidates in order of size. + for size in self.SIZES.keys(): + if size in images: + yield self._candidate(url=images[size], + size=self.SIZES[size]) + except ValueError: + self._log.debug('lastfm: error loading response: {}' + .format(response.text)) + return # Try each source in turn. -SOURCES_ALL = [u'coverart', u'itunes', u'amazon', u'albumart', u'google'] +SOURCES_ALL = ['filesystem', + 'coverart', 'itunes', 'amazon', 'albumart', + 'wikipedia', 'google', 'fanarttv', 'lastfm'] -ART_FUNCS = { - u'coverart': caa_art, - u'itunes': itunes_art, - u'albumart': aao_art, - u'amazon': art_for_asin, - u'google': google_art, +ART_SOURCES = { + 'filesystem': FileSystem, + 'coverart': CoverArtArchive, + 'itunes': ITunesStore, + 'albumart': AlbumArtOrg, + 'amazon': Amazon, + 'wikipedia': Wikipedia, + 'google': GoogleImages, + 'fanarttv': FanartTV, + 'lastfm': LastFM, } - - -def _source_urls(album, sources=SOURCES_ALL): - """Generate possible source URLs for an album's art. The URLs are - not guaranteed to work so they each need to be attempted in turn. - This allows the main `art_for_album` function to abort iteration - through this sequence early to avoid the cost of scraping when not - necessary. - """ - for s in sources: - urls = ART_FUNCS[s](album) - for url in urls: - yield url - - -def art_for_album(album, paths, maxwidth=None, local_only=False): - """Given an Album object, returns a path to downloaded art for the - album (or None if no art is found). If `maxwidth`, then images are - resized to this maximum pixel size. If `local_only`, then only local - image files from the filesystem are returned; no network requests - are made. - """ - out = None - - # Local art. - cover_names = config['fetchart']['cover_names'].as_str_seq() - cover_names = map(util.bytestring_path, cover_names) - cautious = config['fetchart']['cautious'].get(bool) - if paths: - for path in paths: - out = art_in_path(path, cover_names, cautious) - if out: - break - - # Web art sources. - remote_priority = config['fetchart']['remote_priority'].get(bool) - if not local_only and (remote_priority or not out): - for url in _source_urls(album, - config['fetchart']['sources'].as_str_seq()): - if maxwidth: - url = ArtResizer.shared.proxy_url(maxwidth, url) - candidate = _fetch_image(url) - if candidate: - out = candidate - break - - if maxwidth and out: - out = ArtResizer.shared.resize(maxwidth, out) - return out - +SOURCE_NAMES = {v: k for k, v in ART_SOURCES.items()} # PLUGIN LOGIC ############################################################### -def batch_fetch_art(lib, albums, force, maxwidth=None): - """Fetch album art for each of the albums. This implements the manual - fetchart CLI command. - """ - for album in albums: - if album.artpath and not force: - message = 'has album art' - else: - # In ordinary invocations, look for images on the - # filesystem. When forcing, however, always go to the Web - # sources. - local_paths = None if force else [album.path] +class FetchArtPlugin(plugins.BeetsPlugin, RequestMixin): + PAT_PX = r"(0|[1-9][0-9]*)px" + PAT_PERCENT = r"(100(\.00?)?|[1-9]?[0-9](\.[0-9]{1,2})?)%" - path = art_for_album(album, local_paths, maxwidth) - if path: - album.set_art(path, False) - album.store() - message = ui.colorize('green', 'found album art') - else: - message = ui.colorize('red', 'no art found') - - log.info(u'{0} - {1}: {2}'.format(album.albumartist, album.album, - message)) - - -class FetchArtPlugin(plugins.BeetsPlugin): def __init__(self): - super(FetchArtPlugin, self).__init__() + super().__init__() + + # Holds candidates corresponding to downloaded images between + # fetching them and placing them in the filesystem. + self.art_candidates = {} self.config.add({ 'auto': True, + 'minwidth': 0, 'maxwidth': 0, - 'remote_priority': False, + 'quality': 0, + 'max_filesize': 0, + 'enforce_ratio': False, 'cautious': False, - 'google_search': False, 'cover_names': ['cover', 'front', 'art', 'album', 'folder'], - 'sources': SOURCES_ALL, + 'sources': ['filesystem', + 'coverart', 'itunes', 'amazon', 'albumart'], + 'google_key': None, + 'google_engine': '001442825323518660753:hrh5ch1gjzm', + 'fanarttv_key': None, + 'lastfm_key': None, + 'store_source': False, + 'high_resolution': False, + 'deinterlace': False, + 'cover_format': None, }) + self.config['google_key'].redact = True + self.config['fanarttv_key'].redact = True + self.config['lastfm_key'].redact = True - # Holds paths to downloaded images between fetching them and - # placing them in the filesystem. - self.art_paths = {} - + self.minwidth = self.config['minwidth'].get(int) self.maxwidth = self.config['maxwidth'].get(int) + self.max_filesize = self.config['max_filesize'].get(int) + self.quality = self.config['quality'].get(int) + + # allow both pixel and percentage-based margin specifications + self.enforce_ratio = self.config['enforce_ratio'].get( + confuse.OneOf([bool, + confuse.String(pattern=self.PAT_PX), + confuse.String(pattern=self.PAT_PERCENT)])) + self.margin_px = None + self.margin_percent = None + self.deinterlace = self.config['deinterlace'].get(bool) + if type(self.enforce_ratio) is str: + if self.enforce_ratio[-1] == '%': + self.margin_percent = float(self.enforce_ratio[:-1]) / 100 + elif self.enforce_ratio[-2:] == 'px': + self.margin_px = int(self.enforce_ratio[:-2]) + else: + # shouldn't happen + raise confuse.ConfigValueError() + self.enforce_ratio = True + + cover_names = self.config['cover_names'].as_str_seq() + self.cover_names = list(map(util.bytestring_path, cover_names)) + self.cautious = self.config['cautious'].get(bool) + self.store_source = self.config['store_source'].get(bool) + + self.src_removed = (config['import']['delete'].get(bool) or + config['import']['move'].get(bool)) + + self.cover_format = self.config['cover_format'].get( + confuse.Optional(str) + ) + if self.config['auto']: # Enable two import hooks when fetching is enabled. self.import_stages = [self.fetch_art] self.register_listener('import_task_files', self.assign_art) available_sources = list(SOURCES_ALL) - if not HAVE_ITUNES and u'itunes' in available_sources: - available_sources.remove(u'itunes') - self.config['sources'] = plugins.sanitize_choices( - self.config['sources'].as_str_seq(), available_sources) + if not self.config['google_key'].get() and \ + 'google' in available_sources: + available_sources.remove('google') + if not self.config['lastfm_key'].get() and \ + 'lastfm' in available_sources: + available_sources.remove('lastfm') + available_sources = [(s, c) + for s in available_sources + for c in ART_SOURCES[s].VALID_MATCHING_CRITERIA] + sources = plugins.sanitize_pairs( + self.config['sources'].as_pairs(default_value='*'), + available_sources) + + if 'remote_priority' in self.config: + self._log.warning( + 'The `fetch_art.remote_priority` configuration option has ' + 'been deprecated. Instead, place `filesystem` at the end of ' + 'your `sources` list.') + if self.config['remote_priority'].get(bool): + fs = [] + others = [] + for s, c in sources: + if s == 'filesystem': + fs.append((s, c)) + else: + others.append((s, c)) + sources = others + fs + + self.sources = [ART_SOURCES[s](self._log, self.config, match_by=[c]) + for s, c in sources] # Asynchronous; after music is added to the library. def fetch_art(self, session, task): """Find art for the album being imported.""" if task.is_album: # Only fetch art for full albums. + if task.album.artpath and os.path.isfile(task.album.artpath): + # Album already has art (probably a re-import); skip it. + return if task.choice_flag == importer.action.ASIS: # For as-is imports, don't search Web sources for art. local = True - elif task.choice_flag == importer.action.APPLY: + elif task.choice_flag in (importer.action.APPLY, + importer.action.RETAG): # Search everywhere for art. local = False else: # For any other choices (e.g., TRACKS), do nothing. return - path = art_for_album(task.album, task.paths, self.maxwidth, local) + candidate = self.art_for_album(task.album, task.paths, local) - if path: - self.art_paths[task] = path + if candidate: + self.art_candidates[task] = candidate + + def _set_art(self, album, candidate, delete=False): + album.set_art(candidate.path, delete) + if self.store_source: + # store the source of the chosen artwork in a flexible field + self._log.debug( + "Storing art_source for {0.albumartist} - {0.album}", + album) + album.art_source = SOURCE_NAMES[type(candidate.source)] + album.store() # Synchronous; after music files are put in place. def assign_art(self, session, task): """Place the discovered art in the filesystem.""" - if task in self.art_paths: - path = self.art_paths.pop(task) + if task in self.art_candidates: + candidate = self.art_candidates.pop(task) - album = task.album - src_removed = (config['import']['delete'].get(bool) or - config['import']['move'].get(bool)) - album.set_art(path, not src_removed) - album.store() - if src_removed: - task.prune(path) + self._set_art(task.album, candidate, not self.src_removed) + + if self.src_removed: + task.prune(candidate.path) # Manual album art fetching. def commands(self): cmd = ui.Subcommand('fetchart', help='download album art') - cmd.parser.add_option('-f', '--force', dest='force', - action='store_true', default=False, - help='re-download art when already present') + cmd.parser.add_option( + '-f', '--force', dest='force', + action='store_true', default=False, + help='re-download art when already present' + ) + cmd.parser.add_option( + '-q', '--quiet', dest='quiet', + action='store_true', default=False, + help='quiet mode: do not output albums that already have artwork' + ) def func(lib, opts, args): - batch_fetch_art(lib, lib.albums(ui.decargs(args)), opts.force, - self.maxwidth) + self.batch_fetch_art(lib, lib.albums(ui.decargs(args)), opts.force, + opts.quiet) cmd.func = func return [cmd] + + # Utilities converted from functions to methods on logging overhaul + + def art_for_album(self, album, paths, local_only=False): + """Given an Album object, returns a path to downloaded art for the + album (or None if no art is found). If `maxwidth`, then images are + resized to this maximum pixel size. If `quality` then resized images + are saved at the specified quality level. If `local_only`, then only + local image files from the filesystem are returned; no network + requests are made. + """ + out = None + + for source in self.sources: + if source.IS_LOCAL or not local_only: + self._log.debug( + 'trying source {0} for album {1.albumartist} - {1.album}', + SOURCE_NAMES[type(source)], + album, + ) + # URLs might be invalid at this point, or the image may not + # fulfill the requirements + for candidate in source.get(album, self, paths): + source.fetch_image(candidate, self) + if candidate.validate(self): + out = candidate + self._log.debug( + 'using {0.LOC_STR} image {1}'.format( + source, util.displayable_path(out.path))) + break + # Remove temporary files for invalid candidates. + source.cleanup(candidate) + if out: + break + + if out: + out.resize(self) + + return out + + def batch_fetch_art(self, lib, albums, force, quiet): + """Fetch album art for each of the albums. This implements the manual + fetchart CLI command. + """ + for album in albums: + if album.artpath and not force and os.path.isfile(album.artpath): + if not quiet: + message = ui.colorize('text_highlight_minor', + 'has album art') + self._log.info('{0}: {1}', album, message) + else: + # In ordinary invocations, look for images on the + # filesystem. When forcing, however, always go to the Web + # sources. + local_paths = None if force else [album.path] + + candidate = self.art_for_album(album, local_paths) + if candidate: + self._set_art(album, candidate) + message = ui.colorize('text_success', 'found album art') + else: + message = ui.colorize('text_error', 'no art found') + self._log.info('{0}: {1}', album, message) diff --git a/lib/beetsplug/filefilter.py b/lib/beetsplug/filefilter.py new file mode 100644 index 00000000..ec8fddb4 --- /dev/null +++ b/lib/beetsplug/filefilter.py @@ -0,0 +1,79 @@ +# This file is part of beets. +# Copyright 2016, Malte Ried. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Filter imported files using a regular expression. +""" + + +import re +from beets import config +from beets.util import bytestring_path +from beets.plugins import BeetsPlugin +from beets.importer import SingletonImportTask + + +class FileFilterPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + self.register_listener('import_task_created', + self.import_task_created_event) + self.config.add({ + 'path': '.*' + }) + + self.path_album_regex = \ + self.path_singleton_regex = \ + re.compile(bytestring_path(self.config['path'].get())) + + if 'album_path' in self.config: + self.path_album_regex = re.compile( + bytestring_path(self.config['album_path'].get())) + + if 'singleton_path' in self.config: + self.path_singleton_regex = re.compile( + bytestring_path(self.config['singleton_path'].get())) + + def import_task_created_event(self, session, task): + if task.items and len(task.items) > 0: + items_to_import = [] + for item in task.items: + if self.file_filter(item['path']): + items_to_import.append(item) + if len(items_to_import) > 0: + task.items = items_to_import + else: + # Returning an empty list of tasks from the handler + # drops the task from the rest of the importer pipeline. + return [] + + elif isinstance(task, SingletonImportTask): + if not self.file_filter(task.item['path']): + return [] + + # If not filtered, return the original task unchanged. + return [task] + + def file_filter(self, full_path): + """Checks if the configured regular expressions allow the import + of the file given in full_path. + """ + import_config = dict(config['import']) + full_path = bytestring_path(full_path) + if 'singletons' not in import_config or not import_config[ + 'singletons']: + # Album + return self.path_album_regex.match(full_path) is not None + else: + # Singleton + return self.path_singleton_regex.match(full_path) is not None diff --git a/lib/beetsplug/fish.py b/lib/beetsplug/fish.py new file mode 100644 index 00000000..21fd67f6 --- /dev/null +++ b/lib/beetsplug/fish.py @@ -0,0 +1,285 @@ +# This file is part of beets. +# Copyright 2015, winters jean-marie. +# Copyright 2020, Justin Mayer +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""This plugin generates tab completions for Beets commands for the Fish shell +, including completions for Beets commands, plugin +commands, and option flags. Also generated are completions for all the album +and track fields, suggesting for example `genre:` or `album:` when querying the +Beets database. Completions for the *values* of those fields are not generated +by default but can be added via the `-e` / `--extravalues` flag. For example: +`beet fish -e genre -e albumartist` +""" + + +from beets.plugins import BeetsPlugin +from beets import library, ui +from beets.ui import commands +from operator import attrgetter +import os +BL_NEED2 = """complete -c beet -n '__fish_beet_needs_command' {} {}\n""" +BL_USE3 = """complete -c beet -n '__fish_beet_using_command {}' {} {}\n""" +BL_SUBS = """complete -c beet -n '__fish_at_level {} ""' {} {}\n""" +BL_EXTRA3 = """complete -c beet -n '__fish_beet_use_extra {}' {} {}\n""" + +HEAD = ''' +function __fish_beet_needs_command + set cmd (commandline -opc) + if test (count $cmd) -eq 1 + return 0 + end + return 1 +end + +function __fish_beet_using_command + set cmd (commandline -opc) + set needle (count $cmd) + if test $needle -gt 1 + if begin test $argv[1] = $cmd[2]; + and not contains -- $cmd[$needle] $FIELDS; end + return 0 + end + end + return 1 +end + +function __fish_beet_use_extra + set cmd (commandline -opc) + set needle (count $cmd) + if test $argv[2] = $cmd[$needle] + return 0 + end + return 1 +end +''' + + +class FishPlugin(BeetsPlugin): + + def commands(self): + cmd = ui.Subcommand('fish', help='generate Fish shell tab completions') + cmd.func = self.run + cmd.parser.add_option('-f', '--noFields', action='store_true', + default=False, + help='omit album/track field completions') + cmd.parser.add_option( + '-e', + '--extravalues', + action='append', + type='choice', + choices=library.Item.all_keys() + + library.Album.all_keys(), + help='include specified field *values* in completions') + return [cmd] + + def run(self, lib, opts, args): + # Gather the commands from Beets core and its plugins. + # Collect the album and track fields. + # If specified, also collect the values for these fields. + # Make a giant string of all the above, formatted in a way that + # allows Fish to do tab completion for the `beet` command. + home_dir = os.path.expanduser("~") + completion_dir = os.path.join(home_dir, '.config/fish/completions') + try: + os.makedirs(completion_dir) + except OSError: + if not os.path.isdir(completion_dir): + raise + completion_file_path = os.path.join(completion_dir, 'beet.fish') + nobasicfields = opts.noFields # Do not complete for album/track fields + extravalues = opts.extravalues # e.g., Also complete artists names + beetcmds = sorted( + (commands.default_commands + + commands.plugins.commands()), + key=attrgetter('name')) + fields = sorted(set( + library.Album.all_keys() + library.Item.all_keys())) + # Collect commands, their aliases, and their help text + cmd_names_help = [] + for cmd in beetcmds: + names = list(cmd.aliases) + names.append(cmd.name) + for name in names: + cmd_names_help.append((name, cmd.help)) + # Concatenate the string + totstring = HEAD + "\n" + totstring += get_cmds_list([name[0] for name in cmd_names_help]) + totstring += '' if nobasicfields else get_standard_fields(fields) + totstring += get_extravalues(lib, extravalues) if extravalues else '' + totstring += "\n" + "# ====== {} =====".format( + "setup basic beet completion") + "\n" * 2 + totstring += get_basic_beet_options() + totstring += "\n" + "# ====== {} =====".format( + "setup field completion for subcommands") + "\n" + totstring += get_subcommands( + cmd_names_help, nobasicfields, extravalues) + # Set up completion for all the command options + totstring += get_all_commands(beetcmds) + + with open(completion_file_path, 'w') as fish_file: + fish_file.write(totstring) + + +def _escape(name): + # Escape ? in fish + if name == "?": + name = "\\" + name + return name + + +def get_cmds_list(cmds_names): + # Make a list of all Beets core & plugin commands + substr = '' + substr += ( + "set CMDS " + " ".join(cmds_names) + ("\n" * 2) + ) + return substr + + +def get_standard_fields(fields): + # Make a list of album/track fields and append with ':' + fields = (field + ":" for field in fields) + substr = '' + substr += ( + "set FIELDS " + " ".join(fields) + ("\n" * 2) + ) + return substr + + +def get_extravalues(lib, extravalues): + # Make a list of all values from an album/track field. + # 'beet ls albumartist: ' yields completions for ABBA, Beatles, etc. + word = '' + values_set = get_set_of_values_for_field(lib, extravalues) + for fld in extravalues: + extraname = fld.upper() + 'S' + word += ( + "set " + extraname + " " + " ".join(sorted(values_set[fld])) + + ("\n" * 2) + ) + return word + + +def get_set_of_values_for_field(lib, fields): + # Get unique values from a specified album/track field + fields_dict = {} + for each in fields: + fields_dict[each] = set() + for item in lib.items(): + for field in fields: + fields_dict[field].add(wrap(item[field])) + return fields_dict + + +def get_basic_beet_options(): + word = ( + BL_NEED2.format("-l format-item", + "-f -d 'print with custom format'") + + BL_NEED2.format("-l format-album", + "-f -d 'print with custom format'") + + BL_NEED2.format("-s l -l library", + "-f -r -d 'library database file to use'") + + BL_NEED2.format("-s d -l directory", + "-f -r -d 'destination music directory'") + + BL_NEED2.format("-s v -l verbose", + "-f -d 'print debugging information'") + + + BL_NEED2.format("-s c -l config", + "-f -r -d 'path to configuration file'") + + BL_NEED2.format("-s h -l help", + "-f -d 'print this help message and exit'")) + return word + + +def get_subcommands(cmd_name_and_help, nobasicfields, extravalues): + # Formatting for Fish to complete our fields/values + word = "" + for cmdname, cmdhelp in cmd_name_and_help: + cmdname = _escape(cmdname) + + word += "\n" + "# ------ {} -------".format( + "fieldsetups for " + cmdname) + "\n" + word += ( + BL_NEED2.format( + ("-a " + cmdname), + ("-f " + "-d " + wrap(clean_whitespace(cmdhelp))))) + + if nobasicfields is False: + word += ( + BL_USE3.format( + cmdname, + ("-a " + wrap("$FIELDS")), + ("-f " + "-d " + wrap("fieldname")))) + + if extravalues: + for f in extravalues: + setvar = wrap("$" + f.upper() + "S") + word += " ".join(BL_EXTRA3.format( + (cmdname + " " + f + ":"), + ('-f ' + '-A ' + '-a ' + setvar), + ('-d ' + wrap(f))).split()) + "\n" + return word + + +def get_all_commands(beetcmds): + # Formatting for Fish to complete command options + word = "" + for cmd in beetcmds: + names = list(cmd.aliases) + names.append(cmd.name) + for name in names: + name = _escape(name) + + word += "\n" + word += ("\n" * 2) + "# ====== {} =====".format( + "completions for " + name) + "\n" + + for option in cmd.parser._get_all_options()[1:]: + cmd_l = (" -l " + option._long_opts[0].replace('--', '') + )if option._long_opts else '' + cmd_s = (" -s " + option._short_opts[0].replace('-', '') + ) if option._short_opts else '' + cmd_need_arg = ' -r ' if option.nargs in [1] else '' + cmd_helpstr = (" -d " + wrap(' '.join(option.help.split())) + ) if option.help else '' + cmd_arglist = (' -a ' + wrap(" ".join(option.choices)) + ) if option.choices else '' + + word += " ".join(BL_USE3.format( + name, + (cmd_need_arg + cmd_s + cmd_l + " -f " + cmd_arglist), + cmd_helpstr).split()) + "\n" + + word = (word + " ".join(BL_USE3.format( + name, + ("-s " + "h " + "-l " + "help" + " -f "), + ('-d ' + wrap("print help") + "\n") + ).split())) + return word + + +def clean_whitespace(word): + # Remove excess whitespace and tabs in a string + return " ".join(word.split()) + + +def wrap(word): + # Need " or ' around strings but watch out if they're in the string + sptoken = '\"' + if ('"') in word and ("'") in word: + word.replace('"', sptoken) + return '"' + word + '"' + + tok = '"' if "'" in word else "'" + return tok + word + tok diff --git a/lib/beetsplug/freedesktop.py b/lib/beetsplug/freedesktop.py new file mode 100644 index 00000000..ba4d5879 --- /dev/null +++ b/lib/beetsplug/freedesktop.py @@ -0,0 +1,35 @@ +# This file is part of beets. +# Copyright 2016, Matt Lichtenberg. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Creates freedesktop.org-compliant .directory files on an album level. +""" + + +from beets.plugins import BeetsPlugin +from beets import ui + + +class FreedesktopPlugin(BeetsPlugin): + def commands(self): + deprecated = ui.Subcommand( + "freedesktop", + help="Print a message to redirect to thumbnails --dolphin") + deprecated.func = self.deprecation_message + return [deprecated] + + def deprecation_message(self, lib, opts, args): + ui.print_("This plugin is deprecated. Its functionality is " + "superseded by the 'thumbnails' plugin") + ui.print_("'thumbnails --dolphin' replaces freedesktop. See doc & " + "changelog for more information") diff --git a/lib/beetsplug/fromfilename.py b/lib/beetsplug/fromfilename.py new file mode 100644 index 00000000..55684a27 --- /dev/null +++ b/lib/beetsplug/fromfilename.py @@ -0,0 +1,162 @@ +# This file is part of beets. +# Copyright 2016, Jan-Erik Dahlin +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""If the title is empty, try to extract track and title from the +filename. +""" + +from beets import plugins +from beets.util import displayable_path +import os +import re + + +# Filename field extraction patterns. +PATTERNS = [ + # Useful patterns. + r'^(?P.+)[\-_](?P.+)[\-_](?P<tag>.*)$', + r'^(?P<track>\d+)[\s.\-_]+(?P<artist>.+)[\-_](?P<title>.+)[\-_](?P<tag>.*)$', + r'^(?P<artist>.+)[\-_](?P<title>.+)$', + r'^(?P<track>\d+)[\s.\-_]+(?P<artist>.+)[\-_](?P<title>.+)$', + r'^(?P<title>.+)$', + r'^(?P<track>\d+)[\s.\-_]+(?P<title>.+)$', + r'^(?P<track>\d+)\s+(?P<title>.+)$', + r'^(?P<title>.+) by (?P<artist>.+)$', + r'^(?P<track>\d+).*$', +] + +# Titles considered "empty" and in need of replacement. +BAD_TITLE_PATTERNS = [ + r'^$', +] + + +def equal(seq): + """Determine whether a sequence holds identical elements. + """ + return len(set(seq)) <= 1 + + +def equal_fields(matchdict, field): + """Do all items in `matchdict`, whose values are dictionaries, have + the same value for `field`? (If they do, the field is probably not + the title.) + """ + return equal(m[field] for m in matchdict.values()) + + +def all_matches(names, pattern): + """If all the filenames in the item/filename mapping match the + pattern, return a dictionary mapping the items to dictionaries + giving the value for each named subpattern in the match. Otherwise, + return None. + """ + matches = {} + for item, name in names.items(): + m = re.match(pattern, name, re.IGNORECASE) + if m and m.groupdict(): + # Only yield a match when the regex applies *and* has + # capture groups. Otherwise, no information can be extracted + # from the filename. + matches[item] = m.groupdict() + else: + return None + return matches + + +def bad_title(title): + """Determine whether a given title is "bad" (empty or otherwise + meaningless) and in need of replacement. + """ + for pat in BAD_TITLE_PATTERNS: + if re.match(pat, title, re.IGNORECASE): + return True + return False + + +def apply_matches(d): + """Given a mapping from items to field dicts, apply the fields to + the objects. + """ + some_map = list(d.values())[0] + keys = some_map.keys() + + # Only proceed if the "tag" field is equal across all filenames. + if 'tag' in keys and not equal_fields(d, 'tag'): + return + + # Given both an "artist" and "title" field, assume that one is + # *actually* the artist, which must be uniform, and use the other + # for the title. This, of course, won't work for VA albums. + if 'artist' in keys: + if equal_fields(d, 'artist'): + artist = some_map['artist'] + title_field = 'title' + elif equal_fields(d, 'title'): + artist = some_map['title'] + title_field = 'artist' + else: + # Both vary. Abort. + return + + for item in d: + if not item.artist: + item.artist = artist + + # No artist field: remaining field is the title. + else: + title_field = 'title' + + # Apply the title and track. + for item in d: + if bad_title(item.title): + item.title = str(d[item][title_field]) + if 'track' in d[item] and item.track == 0: + item.track = int(d[item]['track']) + + +# Plugin structure and hook into import process. + +class FromFilenamePlugin(plugins.BeetsPlugin): + def __init__(self): + super().__init__() + self.register_listener('import_task_start', filename_task) + + +def filename_task(task, session): + """Examine each item in the task to see if we can extract a title + from the filename. Try to match all filenames to a number of + regexps, starting with the most complex patterns and successively + trying less complex patterns. As soon as all filenames match the + same regex we can make an educated guess of which part of the + regex that contains the title. + """ + items = task.items if task.is_album else [task.item] + + # Look for suspicious (empty or meaningless) titles. + missing_titles = sum(bad_title(i.title) for i in items) + + if missing_titles: + # Get the base filenames (no path or extension). + names = {} + for item in items: + path = displayable_path(item.path) + name, _ = os.path.splitext(os.path.basename(path)) + names[item] = name + + # Look for useful information in the filenames. + for pattern in PATTERNS: + d = all_matches(names, pattern) + if d: + apply_matches(d) diff --git a/lib/beetsplug/ftintitle.py b/lib/beetsplug/ftintitle.py new file mode 100644 index 00000000..57863d2b --- /dev/null +++ b/lib/beetsplug/ftintitle.py @@ -0,0 +1,166 @@ +# This file is part of beets. +# Copyright 2016, Verrus, <github.com/Verrus/beets-plugin-featInTitle> +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Moves "featured" artists to the title from the artist field. +""" + +import re + +from beets import plugins +from beets import ui +from beets.util import displayable_path + + +def split_on_feat(artist): + """Given an artist string, split the "main" artist from any artist + on the right-hand side of a string like "feat". Return the main + artist, which is always a string, and the featuring artist, which + may be a string or None if none is present. + """ + # split on the first "feat". + regex = re.compile(plugins.feat_tokens(), re.IGNORECASE) + parts = [s.strip() for s in regex.split(artist, 1)] + if len(parts) == 1: + return parts[0], None + else: + return tuple(parts) + + +def contains_feat(title): + """Determine whether the title contains a "featured" marker. + """ + return bool(re.search(plugins.feat_tokens(), title, flags=re.IGNORECASE)) + + +def find_feat_part(artist, albumartist): + """Attempt to find featured artists in the item's artist fields and + return the results. Returns None if no featured artist found. + """ + # Look for the album artist in the artist field. If it's not + # present, give up. + albumartist_split = artist.split(albumartist, 1) + if len(albumartist_split) <= 1: + return None + + # If the last element of the split (the right-hand side of the + # album artist) is nonempty, then it probably contains the + # featured artist. + elif albumartist_split[1] != '': + # Extract the featured artist from the right-hand side. + _, feat_part = split_on_feat(albumartist_split[1]) + return feat_part + + # Otherwise, if there's nothing on the right-hand side, look for a + # featuring artist on the left-hand side. + else: + lhs, rhs = split_on_feat(albumartist_split[0]) + if lhs: + return lhs + + return None + + +class FtInTitlePlugin(plugins.BeetsPlugin): + def __init__(self): + super().__init__() + + self.config.add({ + 'auto': True, + 'drop': False, + 'format': 'feat. {0}', + }) + + self._command = ui.Subcommand( + 'ftintitle', + help='move featured artists to the title field') + + self._command.parser.add_option( + '-d', '--drop', dest='drop', + action='store_true', default=None, + help='drop featuring from artists and ignore title update') + + if self.config['auto']: + self.import_stages = [self.imported] + + def commands(self): + + def func(lib, opts, args): + self.config.set_args(opts) + drop_feat = self.config['drop'].get(bool) + write = ui.should_write() + + for item in lib.items(ui.decargs(args)): + self.ft_in_title(item, drop_feat) + item.store() + if write: + item.try_write() + + self._command.func = func + return [self._command] + + def imported(self, session, task): + """Import hook for moving featuring artist automatically. + """ + drop_feat = self.config['drop'].get(bool) + + for item in task.imported_items(): + self.ft_in_title(item, drop_feat) + item.store() + + def update_metadata(self, item, feat_part, drop_feat): + """Choose how to add new artists to the title and set the new + metadata. Also, print out messages about any changes that are made. + If `drop_feat` is set, then do not add the artist to the title; just + remove it from the artist field. + """ + # In all cases, update the artist fields. + self._log.info('artist: {0} -> {1}', item.artist, item.albumartist) + item.artist = item.albumartist + if item.artist_sort: + # Just strip the featured artist from the sort name. + item.artist_sort, _ = split_on_feat(item.artist_sort) + + # Only update the title if it does not already contain a featured + # artist and if we do not drop featuring information. + if not drop_feat and not contains_feat(item.title): + feat_format = self.config['format'].as_str() + new_format = feat_format.format(feat_part) + new_title = f"{item.title} {new_format}" + self._log.info('title: {0} -> {1}', item.title, new_title) + item.title = new_title + + def ft_in_title(self, item, drop_feat): + """Look for featured artists in the item's artist fields and move + them to the title. + """ + artist = item.artist.strip() + albumartist = item.albumartist.strip() + + # Check whether there is a featured artist on this track and the + # artist field does not exactly match the album artist field. In + # that case, we attempt to move the featured artist to the title. + _, featured = split_on_feat(artist) + if featured and albumartist != artist and albumartist: + self._log.info('{}', displayable_path(item.path)) + + feat_part = None + + # Attempt to find the featured artist. + feat_part = find_feat_part(artist, albumartist) + + # If we have a featuring artist, move it to the title. + if feat_part: + self.update_metadata(item, feat_part, drop_feat) + else: + self._log.info('no featuring artists found') diff --git a/lib/beetsplug/fuzzy.py b/lib/beetsplug/fuzzy.py new file mode 100644 index 00000000..41829639 --- /dev/null +++ b/lib/beetsplug/fuzzy.py @@ -0,0 +1,46 @@ +# This file is part of beets. +# Copyright 2016, Philippe Mongeau. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Provides a fuzzy matching query. +""" + + +from beets.plugins import BeetsPlugin +from beets.dbcore.query import StringFieldQuery +from beets import config +import difflib + + +class FuzzyQuery(StringFieldQuery): + @classmethod + def string_match(cls, pattern, val): + # smartcase + if pattern.islower(): + val = val.lower() + query_matcher = difflib.SequenceMatcher(None, pattern, val) + threshold = config['fuzzy']['threshold'].as_number() + return query_matcher.quick_ratio() >= threshold + + +class FuzzyPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + self.config.add({ + 'prefix': '~', + 'threshold': 0.7, + }) + + def queries(self): + prefix = self.config['prefix'].as_str() + return {prefix: FuzzyQuery} diff --git a/lib/beetsplug/gmusic.py b/lib/beetsplug/gmusic.py new file mode 100644 index 00000000..844234f9 --- /dev/null +++ b/lib/beetsplug/gmusic.py @@ -0,0 +1,25 @@ +# This file is part of beets. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Deprecation warning for the removed gmusic plugin.""" + +from beets.plugins import BeetsPlugin + + +class Gmusic(BeetsPlugin): + def __init__(self): + super().__init__() + + self._log.warning("The 'gmusic' plugin has been removed following the" + " shutdown of Google Play Music. Remove the plugin" + " from your configuration to silence this warning.") diff --git a/lib/beetsplug/hook.py b/lib/beetsplug/hook.py new file mode 100644 index 00000000..0fe3bffc --- /dev/null +++ b/lib/beetsplug/hook.py @@ -0,0 +1,115 @@ +# This file is part of beets. +# Copyright 2015, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Allows custom commands to be run when an event is emitted by beets""" + +import string +import subprocess +import shlex + +from beets.plugins import BeetsPlugin +from beets.util import arg_encoding + + +class CodingFormatter(string.Formatter): + """A variant of `string.Formatter` that converts everything to `unicode` + strings. + + This is necessary on Python 2, where formatting otherwise occurs on + bytestrings. It intercepts two points in the formatting process to decode + the format string and all fields using the specified encoding. If decoding + fails, the values are used as-is. + """ + + def __init__(self, coding): + """Creates a new coding formatter with the provided coding.""" + self._coding = coding + + def format(self, format_string, *args, **kwargs): + """Formats the provided string using the provided arguments and keyword + arguments. + + This method decodes the format string using the formatter's coding. + + See str.format and string.Formatter.format. + """ + if isinstance(format_string, bytes): + format_string = format_string.decode(self._coding) + + return super().format(format_string, *args, + **kwargs) + + def convert_field(self, value, conversion): + """Converts the provided value given a conversion type. + + This method decodes the converted value using the formatter's coding. + + See string.Formatter.convert_field. + """ + converted = super().convert_field(value, + conversion) + + if isinstance(converted, bytes): + return converted.decode(self._coding) + + return converted + + +class HookPlugin(BeetsPlugin): + """Allows custom commands to be run when an event is emitted by beets""" + + def __init__(self): + super().__init__() + + self.config.add({ + 'hooks': [] + }) + + hooks = self.config['hooks'].get(list) + + for hook_index in range(len(hooks)): + hook = self.config['hooks'][hook_index] + + hook_event = hook['event'].as_str() + hook_command = hook['command'].as_str() + + self.create_and_register_hook(hook_event, hook_command) + + def create_and_register_hook(self, event, command): + def hook_function(**kwargs): + if command is None or len(command) == 0: + self._log.error('invalid command "{0}"', command) + return + + # Use a string formatter that works on Unicode strings. + formatter = CodingFormatter(arg_encoding()) + + command_pieces = shlex.split(command) + + for i, piece in enumerate(command_pieces): + command_pieces[i] = formatter.format(piece, event=event, + **kwargs) + + self._log.debug('running command "{0}" for event {1}', + ' '.join(command_pieces), event) + + try: + subprocess.check_call(command_pieces) + except subprocess.CalledProcessError as exc: + self._log.error('hook for {0} exited with status {1}', + event, exc.returncode) + except OSError as exc: + self._log.error('hook for {0} failed: {1}', event, exc) + + self.register_listener(event, hook_function) diff --git a/lib/beetsplug/ihate.py b/lib/beetsplug/ihate.py new file mode 100644 index 00000000..91850e09 --- /dev/null +++ b/lib/beetsplug/ihate.py @@ -0,0 +1,80 @@ +# This file is part of beets. +# Copyright 2016, Blemjhoo Tezoulbr <baobab@heresiarch.info>. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +"""Warns you about things you hate (or even blocks import).""" + +from beets.plugins import BeetsPlugin +from beets.importer import action +from beets.library import parse_query_string +from beets.library import Item +from beets.library import Album + + +__author__ = 'baobab@heresiarch.info' +__version__ = '2.0' + + +def summary(task): + """Given an ImportTask, produce a short string identifying the + object. + """ + if task.is_album: + return f'{task.cur_artist} - {task.cur_album}' + else: + return f'{task.item.artist} - {task.item.title}' + + +class IHatePlugin(BeetsPlugin): + def __init__(self): + super().__init__() + self.register_listener('import_task_choice', + self.import_task_choice_event) + self.config.add({ + 'warn': [], + 'skip': [], + }) + + @classmethod + def do_i_hate_this(cls, task, action_patterns): + """Process group of patterns (warn or skip) and returns True if + task is hated and not whitelisted. + """ + if action_patterns: + for query_string in action_patterns: + query, _ = parse_query_string( + query_string, + Album if task.is_album else Item, + ) + if any(query.match(item) for item in task.imported_items()): + return True + return False + + def import_task_choice_event(self, session, task): + skip_queries = self.config['skip'].as_str_seq() + warn_queries = self.config['warn'].as_str_seq() + + if task.choice_flag == action.APPLY: + if skip_queries or warn_queries: + self._log.debug('processing your hate') + if self.do_i_hate_this(task, skip_queries): + task.choice_flag = action.SKIP + self._log.info('skipped: {0}', summary(task)) + return + if self.do_i_hate_this(task, warn_queries): + self._log.info('you may hate this: {0}', summary(task)) + else: + self._log.debug('nothing to do') + else: + self._log.debug('user made a decision, nothing to do') diff --git a/lib/beetsplug/importadded.py b/lib/beetsplug/importadded.py new file mode 100644 index 00000000..e6665e0f --- /dev/null +++ b/lib/beetsplug/importadded.py @@ -0,0 +1,132 @@ +"""Populate an item's `added` and `mtime` fields by using the file +modification time (mtime) of the item's source file before import. + +Reimported albums and items are skipped. +""" + +import os + +from beets import util +from beets import importer +from beets.plugins import BeetsPlugin + + +class ImportAddedPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + self.config.add({ + 'preserve_mtimes': False, + 'preserve_write_mtimes': False, + }) + + # item.id for new items that were reimported + self.reimported_item_ids = None + # album.path for old albums that were replaced by a reimported album + self.replaced_album_paths = None + # item path in the library to the mtime of the source file + self.item_mtime = {} + + register = self.register_listener + register('import_task_created', self.check_config) + register('import_task_created', self.record_if_inplace) + register('import_task_files', self.record_reimported) + register('before_item_moved', self.record_import_mtime) + register('item_copied', self.record_import_mtime) + register('item_linked', self.record_import_mtime) + register('item_hardlinked', self.record_import_mtime) + register('album_imported', self.update_album_times) + register('item_imported', self.update_item_times) + register('after_write', self.update_after_write_time) + + def check_config(self, task, session): + self.config['preserve_mtimes'].get(bool) + + def reimported_item(self, item): + return item.id in self.reimported_item_ids + + def reimported_album(self, album): + return album.path in self.replaced_album_paths + + def record_if_inplace(self, task, session): + if not (session.config['copy'] or session.config['move'] or + session.config['link'] or session.config['hardlink']): + self._log.debug("In place import detected, recording mtimes from " + "source paths") + items = [task.item] \ + if isinstance(task, importer.SingletonImportTask) \ + else task.items + for item in items: + self.record_import_mtime(item, item.path, item.path) + + def record_reimported(self, task, session): + self.reimported_item_ids = {item.id for item, replaced_items + in task.replaced_items.items() + if replaced_items} + self.replaced_album_paths = set(task.replaced_albums.keys()) + + def write_file_mtime(self, path, mtime): + """Write the given mtime to the destination path. + """ + stat = os.stat(util.syspath(path)) + os.utime(util.syspath(path), (stat.st_atime, mtime)) + + def write_item_mtime(self, item, mtime): + """Write the given mtime to an item's `mtime` field and to the mtime + of the item's file. + """ + # The file's mtime on disk must be in sync with the item's mtime + self.write_file_mtime(util.syspath(item.path), mtime) + item.mtime = mtime + + def record_import_mtime(self, item, source, destination): + """Record the file mtime of an item's path before its import. + """ + mtime = os.stat(util.syspath(source)).st_mtime + self.item_mtime[destination] = mtime + self._log.debug("Recorded mtime {0} for item '{1}' imported from " + "'{2}'", mtime, util.displayable_path(destination), + util.displayable_path(source)) + + def update_album_times(self, lib, album): + if self.reimported_album(album): + self._log.debug("Album '{0}' is reimported, skipping import of " + "added dates for the album and its items.", + util.displayable_path(album.path)) + return + + album_mtimes = [] + for item in album.items(): + mtime = self.item_mtime.pop(item.path, None) + if mtime: + album_mtimes.append(mtime) + if self.config['preserve_mtimes'].get(bool): + self.write_item_mtime(item, mtime) + item.store() + album.added = min(album_mtimes) + self._log.debug("Import of album '{0}', selected album.added={1} " + "from item file mtimes.", album.album, album.added) + album.store() + + def update_item_times(self, lib, item): + if self.reimported_item(item): + self._log.debug("Item '{0}' is reimported, skipping import of " + "added date.", util.displayable_path(item.path)) + return + mtime = self.item_mtime.pop(item.path, None) + if mtime: + item.added = mtime + if self.config['preserve_mtimes'].get(bool): + self.write_item_mtime(item, mtime) + self._log.debug("Import of item '{0}', selected item.added={1}", + util.displayable_path(item.path), item.added) + item.store() + + def update_after_write_time(self, item, path): + """Update the mtime of the item's file with the item.added value + after each write of the item if `preserve_write_mtimes` is enabled. + """ + if item.added: + if self.config['preserve_write_mtimes'].get(bool): + self.write_item_mtime(item, item.added) + self._log.debug("Write of item '{0}', selected item.added={1}", + util.displayable_path(item.path), item.added) diff --git a/lib/beetsplug/importfeeds.py b/lib/beetsplug/importfeeds.py new file mode 100644 index 00000000..ad6d8415 --- /dev/null +++ b/lib/beetsplug/importfeeds.py @@ -0,0 +1,127 @@ +# This file is part of beets. +# Copyright 2016, Fabrice Laporte. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +"""Write paths of imported files in various formats to ease later import in a +music player. Also allow printing the new file locations to stdout in case +one wants to manually add music to a player by its path. +""" +import datetime +import os +import re + +from beets.plugins import BeetsPlugin +from beets.util import mkdirall, normpath, syspath, bytestring_path, link +from beets import config + +M3U_DEFAULT_NAME = 'imported.m3u' + + +def _build_m3u_filename(basename): + """Builds unique m3u filename by appending given basename to current + date.""" + + basename = re.sub(r"[\s,/\\'\"]", '_', basename) + date = datetime.datetime.now().strftime("%Y%m%d_%Hh%M") + path = normpath(os.path.join( + config['importfeeds']['dir'].as_filename(), + date + '_' + basename + '.m3u' + )) + return path + + +def _write_m3u(m3u_path, items_paths): + """Append relative paths to items into m3u file. + """ + mkdirall(m3u_path) + with open(syspath(m3u_path), 'ab') as f: + for path in items_paths: + f.write(path + b'\n') + + +class ImportFeedsPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + + self.config.add({ + 'formats': [], + 'm3u_name': 'imported.m3u', + 'dir': None, + 'relative_to': None, + 'absolute_path': False, + }) + + relative_to = self.config['relative_to'].get() + if relative_to: + self.config['relative_to'] = normpath(relative_to) + else: + self.config['relative_to'] = self.get_feeds_dir() + + self.register_listener('album_imported', self.album_imported) + self.register_listener('item_imported', self.item_imported) + + def get_feeds_dir(self): + feeds_dir = self.config['dir'].get() + if feeds_dir: + return os.path.expanduser(bytestring_path(feeds_dir)) + return config['directory'].as_filename() + + def _record_items(self, lib, basename, items): + """Records relative paths to the given items for each feed format + """ + feedsdir = bytestring_path(self.get_feeds_dir()) + formats = self.config['formats'].as_str_seq() + relative_to = self.config['relative_to'].get() \ + or self.get_feeds_dir() + relative_to = bytestring_path(relative_to) + + paths = [] + for item in items: + if self.config['absolute_path']: + paths.append(item.path) + else: + try: + relpath = os.path.relpath(item.path, relative_to) + except ValueError: + # On Windows, it is sometimes not possible to construct a + # relative path (if the files are on different disks). + relpath = item.path + paths.append(relpath) + + if 'm3u' in formats: + m3u_basename = bytestring_path( + self.config['m3u_name'].as_str()) + m3u_path = os.path.join(feedsdir, m3u_basename) + _write_m3u(m3u_path, paths) + + if 'm3u_multi' in formats: + m3u_path = _build_m3u_filename(basename) + _write_m3u(m3u_path, paths) + + if 'link' in formats: + for path in paths: + dest = os.path.join(feedsdir, os.path.basename(path)) + if not os.path.exists(syspath(dest)): + link(path, dest) + + if 'echo' in formats: + self._log.info("Location of imported music:") + for path in paths: + self._log.info(" {0}", path) + + def album_imported(self, lib, album): + self._record_items(lib, album.album, album.items()) + + def item_imported(self, lib, item): + self._record_items(lib, item.title, [item]) diff --git a/lib/beetsplug/info.py b/lib/beetsplug/info.py new file mode 100644 index 00000000..1e6d4b32 --- /dev/null +++ b/lib/beetsplug/info.py @@ -0,0 +1,229 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Shows file metadata. +""" + + +import os + +from beets.plugins import BeetsPlugin +from beets import ui +import mediafile +from beets.library import Item +from beets.util import displayable_path, normpath, syspath + + +def tag_data(lib, args, album=False): + query = [] + for arg in args: + path = normpath(arg) + if os.path.isfile(syspath(path)): + yield tag_data_emitter(path) + else: + query.append(arg) + + if query: + for item in lib.items(query): + yield tag_data_emitter(item.path) + + +def tag_fields(): + fields = set(mediafile.MediaFile.readable_fields()) + fields.add('art') + return fields + + +def tag_data_emitter(path): + def emitter(included_keys): + if included_keys == '*': + fields = tag_fields() + else: + fields = included_keys + if 'images' in fields: + # We can't serialize the image data. + fields.remove('images') + mf = mediafile.MediaFile(syspath(path)) + tags = {} + for field in fields: + if field == 'art': + tags[field] = mf.art is not None + else: + tags[field] = getattr(mf, field, None) + + # create a temporary Item to take advantage of __format__ + item = Item.from_path(syspath(path)) + + return tags, item + return emitter + + +def library_data(lib, args, album=False): + for item in lib.albums(args) if album else lib.items(args): + yield library_data_emitter(item) + + +def library_data_emitter(item): + def emitter(included_keys): + data = dict(item.formatted(included_keys=included_keys)) + + return data, item + return emitter + + +def update_summary(summary, tags): + for key, value in tags.items(): + if key not in summary: + summary[key] = value + elif summary[key] != value: + summary[key] = '[various]' + return summary + + +def print_data(data, item=None, fmt=None): + """Print, with optional formatting, the fields of a single element. + + If no format string `fmt` is passed, the entries on `data` are printed one + in each line, with the format 'field: value'. If `fmt` is not `None`, the + `item` is printed according to `fmt`, using the `Item.__format__` + machinery. + """ + if fmt: + # use fmt specified by the user + ui.print_(format(item, fmt)) + return + + path = displayable_path(item.path) if item else None + formatted = {} + for key, value in data.items(): + if isinstance(value, list): + formatted[key] = '; '.join(value) + if value is not None: + formatted[key] = value + + if len(formatted) == 0: + return + + maxwidth = max(len(key) for key in formatted) + lineformat = f'{{0:>{maxwidth}}}: {{1}}' + + if path: + ui.print_(displayable_path(path)) + + for field in sorted(formatted): + value = formatted[field] + if isinstance(value, list): + value = '; '.join(value) + ui.print_(lineformat.format(field, value)) + + +def print_data_keys(data, item=None): + """Print only the keys (field names) for an item. + """ + path = displayable_path(item.path) if item else None + formatted = [] + for key, value in data.items(): + formatted.append(key) + + if len(formatted) == 0: + return + + line_format = '{0}{{0}}'.format(' ' * 4) + if path: + ui.print_(displayable_path(path)) + + for field in sorted(formatted): + ui.print_(line_format.format(field)) + + +class InfoPlugin(BeetsPlugin): + + def commands(self): + cmd = ui.Subcommand('info', help='show file metadata') + cmd.func = self.run + cmd.parser.add_option( + '-l', '--library', action='store_true', + help='show library fields instead of tags', + ) + cmd.parser.add_option( + '-a', '--album', action='store_true', + help='show album fields instead of tracks (implies "--library")', + ) + cmd.parser.add_option( + '-s', '--summarize', action='store_true', + help='summarize the tags of all files', + ) + cmd.parser.add_option( + '-i', '--include-keys', default=[], + action='append', dest='included_keys', + help='comma separated list of keys to show', + ) + cmd.parser.add_option( + '-k', '--keys-only', action='store_true', + help='show only the keys', + ) + cmd.parser.add_format_option(target='item') + return [cmd] + + def run(self, lib, opts, args): + """Print tag info or library data for each file referenced by args. + + Main entry point for the `beet info ARGS...` command. + + If an argument is a path pointing to an existing file, then the tags + of that file are printed. All other arguments are considered + queries, and for each item matching all those queries the tags from + the file are printed. + + If `opts.summarize` is true, the function merges all tags into one + dictionary and only prints that. If two files have different values + for the same tag, the value is set to '[various]' + """ + if opts.library or opts.album: + data_collector = library_data + else: + data_collector = tag_data + + included_keys = [] + for keys in opts.included_keys: + included_keys.extend(keys.split(',')) + # Drop path even if user provides it multiple times + included_keys = [k for k in included_keys if k != 'path'] + + first = True + summary = {} + for data_emitter in data_collector( + lib, ui.decargs(args), + album=opts.album, + ): + try: + data, item = data_emitter(included_keys or '*') + except (mediafile.UnreadableFileError, OSError) as ex: + self._log.error('cannot read file: {0}', ex) + continue + + if opts.summarize: + update_summary(summary, data) + else: + if not first: + ui.print_() + if opts.keys_only: + print_data_keys(data, item) + else: + fmt = ui.decargs([opts.format])[0] if opts.format else None + print_data(data, item, fmt) + first = False + + if opts.summarize: + print_data(summary) diff --git a/lib/beetsplug/inline.py b/lib/beetsplug/inline.py new file mode 100644 index 00000000..e19eaa9d --- /dev/null +++ b/lib/beetsplug/inline.py @@ -0,0 +1,126 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Allows inline path template customization code in the config file. +""" + +import traceback +import itertools + +from beets.plugins import BeetsPlugin +from beets import config + +FUNC_NAME = '__INLINE_FUNC__' + + +class InlineError(Exception): + """Raised when a runtime error occurs in an inline expression. + """ + def __init__(self, code, exc): + super().__init__( + ("error in inline path field code:\n" + "%s\n%s: %s") % (code, type(exc).__name__, str(exc)) + ) + + +def _compile_func(body): + """Given Python code for a function body, return a compiled + callable that invokes that code. + """ + body = 'def {}():\n {}'.format( + FUNC_NAME, + body.replace('\n', '\n ') + ) + code = compile(body, 'inline', 'exec') + env = {} + eval(code, env) + return env[FUNC_NAME] + + +class InlinePlugin(BeetsPlugin): + def __init__(self): + super().__init__() + + config.add({ + 'pathfields': {}, # Legacy name. + 'item_fields': {}, + 'album_fields': {}, + }) + + # Item fields. + for key, view in itertools.chain(config['item_fields'].items(), + config['pathfields'].items()): + self._log.debug('adding item field {0}', key) + func = self.compile_inline(view.as_str(), False) + if func is not None: + self.template_fields[key] = func + + # Album fields. + for key, view in config['album_fields'].items(): + self._log.debug('adding album field {0}', key) + func = self.compile_inline(view.as_str(), True) + if func is not None: + self.album_template_fields[key] = func + + def compile_inline(self, python_code, album): + """Given a Python expression or function body, compile it as a path + field function. The returned function takes a single argument, an + Item, and returns a Unicode string. If the expression cannot be + compiled, then an error is logged and this function returns None. + """ + # First, try compiling as a single function. + try: + code = compile(f'({python_code})', 'inline', 'eval') + except SyntaxError: + # Fall back to a function body. + try: + func = _compile_func(python_code) + except SyntaxError: + self._log.error('syntax error in inline field definition:\n' + '{0}', traceback.format_exc()) + return + else: + is_expr = False + else: + is_expr = True + + def _dict_for(obj): + out = dict(obj) + if album: + out['items'] = list(obj.items()) + return out + + if is_expr: + # For expressions, just evaluate and return the result. + def _expr_func(obj): + values = _dict_for(obj) + try: + return eval(code, values) + except Exception as exc: + raise InlineError(python_code, exc) + return _expr_func + else: + # For function bodies, invoke the function with values as global + # variables. + def _func_func(obj): + old_globals = dict(func.__globals__) + func.__globals__.update(_dict_for(obj)) + try: + return func() + except Exception as exc: + raise InlineError(python_code, exc) + finally: + func.__globals__.clear() + func.__globals__.update(old_globals) + return _func_func diff --git a/lib/beetsplug/ipfs.py b/lib/beetsplug/ipfs.py new file mode 100644 index 00000000..3c42e7c8 --- /dev/null +++ b/lib/beetsplug/ipfs.py @@ -0,0 +1,295 @@ +# This file is part of beets. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds support for ipfs. Requires go-ipfs and a running ipfs daemon +""" + + +from beets import ui, util, library, config +from beets.plugins import BeetsPlugin + +import subprocess +import shutil +import os +import tempfile + + +class IPFSPlugin(BeetsPlugin): + + def __init__(self): + super().__init__() + self.config.add({ + 'auto': True, + 'nocopy': False, + }) + + if self.config['auto']: + self.import_stages = [self.auto_add] + + def commands(self): + cmd = ui.Subcommand('ipfs', + help='interact with ipfs') + cmd.parser.add_option('-a', '--add', dest='add', + action='store_true', + help='Add to ipfs') + cmd.parser.add_option('-g', '--get', dest='get', + action='store_true', + help='Get from ipfs') + cmd.parser.add_option('-p', '--publish', dest='publish', + action='store_true', + help='Publish local library to ipfs') + cmd.parser.add_option('-i', '--import', dest='_import', + action='store_true', + help='Import remote library from ipfs') + cmd.parser.add_option('-l', '--list', dest='_list', + action='store_true', + help='Query imported libraries') + cmd.parser.add_option('-m', '--play', dest='play', + action='store_true', + help='Play music from remote libraries') + + def func(lib, opts, args): + if opts.add: + for album in lib.albums(ui.decargs(args)): + if len(album.items()) == 0: + self._log.info('{0} does not contain items, aborting', + album) + + self.ipfs_add(album) + album.store() + + if opts.get: + self.ipfs_get(lib, ui.decargs(args)) + + if opts.publish: + self.ipfs_publish(lib) + + if opts._import: + self.ipfs_import(lib, ui.decargs(args)) + + if opts._list: + self.ipfs_list(lib, ui.decargs(args)) + + if opts.play: + self.ipfs_play(lib, opts, ui.decargs(args)) + + cmd.func = func + return [cmd] + + def auto_add(self, session, task): + if task.is_album: + if self.ipfs_add(task.album): + task.album.store() + + def ipfs_play(self, lib, opts, args): + from beetsplug.play import PlayPlugin + + jlib = self.get_remote_lib(lib) + player = PlayPlugin() + config['play']['relative_to'] = None + player.album = True + player.play_music(jlib, player, args) + + def ipfs_add(self, album): + try: + album_dir = album.item_dir() + except AttributeError: + return False + try: + if album.ipfs: + self._log.debug('{0} already added', album_dir) + # Already added to ipfs + return False + except AttributeError: + pass + + self._log.info('Adding {0} to ipfs', album_dir) + + if self.config['nocopy']: + cmd = "ipfs add --nocopy -q -r".split() + else: + cmd = "ipfs add -q -r".split() + cmd.append(album_dir) + try: + output = util.command_output(cmd).stdout.split() + except (OSError, subprocess.CalledProcessError) as exc: + self._log.error('Failed to add {0}, error: {1}', album_dir, exc) + return False + length = len(output) + + for linenr, line in enumerate(output): + line = line.strip() + if linenr == length - 1: + # last printed line is the album hash + self._log.info("album: {0}", line) + album.ipfs = line + else: + try: + item = album.items()[linenr] + self._log.info("item: {0}", line) + item.ipfs = line + item.store() + except IndexError: + # if there's non music files in the to-add folder they'll + # get ignored here + pass + + return True + + def ipfs_get(self, lib, query): + query = query[0] + # Check if query is a hash + # TODO: generalize to other hashes; probably use a multihash + # implementation + if query.startswith("Qm") and len(query) == 46: + self.ipfs_get_from_hash(lib, query) + else: + albums = self.query(lib, query) + for album in albums: + self.ipfs_get_from_hash(lib, album.ipfs) + + def ipfs_get_from_hash(self, lib, _hash): + try: + cmd = "ipfs get".split() + cmd.append(_hash) + util.command_output(cmd) + except (OSError, subprocess.CalledProcessError) as err: + self._log.error('Failed to get {0} from ipfs.\n{1}', + _hash, err.output) + return False + + self._log.info('Getting {0} from ipfs', _hash) + imp = ui.commands.TerminalImportSession(lib, loghandler=None, + query=None, paths=[_hash]) + imp.run() + shutil.rmtree(_hash) + + def ipfs_publish(self, lib): + with tempfile.NamedTemporaryFile() as tmp: + self.ipfs_added_albums(lib, tmp.name) + try: + if self.config['nocopy']: + cmd = "ipfs add --nocopy -q ".split() + else: + cmd = "ipfs add -q ".split() + cmd.append(tmp.name) + output = util.command_output(cmd).stdout + except (OSError, subprocess.CalledProcessError) as err: + msg = f"Failed to publish library. Error: {err}" + self._log.error(msg) + return False + self._log.info("hash of library: {0}", output) + + def ipfs_import(self, lib, args): + _hash = args[0] + if len(args) > 1: + lib_name = args[1] + else: + lib_name = _hash + lib_root = os.path.dirname(lib.path) + remote_libs = os.path.join(lib_root, b"remotes") + if not os.path.exists(remote_libs): + try: + os.makedirs(remote_libs) + except OSError as e: + msg = f"Could not create {remote_libs}. Error: {e}" + self._log.error(msg) + return False + path = os.path.join(remote_libs, lib_name.encode() + b".db") + if not os.path.exists(path): + cmd = f"ipfs get {_hash} -o".split() + cmd.append(path) + try: + util.command_output(cmd) + except (OSError, subprocess.CalledProcessError): + self._log.error(f"Could not import {_hash}") + return False + + # add all albums from remotes into a combined library + jpath = os.path.join(remote_libs, b"joined.db") + jlib = library.Library(jpath) + nlib = library.Library(path) + for album in nlib.albums(): + if not self.already_added(album, jlib): + new_album = [] + for item in album.items(): + item.id = None + new_album.append(item) + added_album = jlib.add_album(new_album) + added_album.ipfs = album.ipfs + added_album.store() + + def already_added(self, check, jlib): + for jalbum in jlib.albums(): + if jalbum.mb_albumid == check.mb_albumid: + return True + return False + + def ipfs_list(self, lib, args): + fmt = config['format_album'].get() + try: + albums = self.query(lib, args) + except OSError: + ui.print_("No imported libraries yet.") + return + + for album in albums: + ui.print_(format(album, fmt), " : ", album.ipfs.decode()) + + def query(self, lib, args): + rlib = self.get_remote_lib(lib) + albums = rlib.albums(args) + return albums + + def get_remote_lib(self, lib): + lib_root = os.path.dirname(lib.path) + remote_libs = os.path.join(lib_root, b"remotes") + path = os.path.join(remote_libs, b"joined.db") + if not os.path.isfile(path): + raise OSError + return library.Library(path) + + def ipfs_added_albums(self, rlib, tmpname): + """ Returns a new library with only albums/items added to ipfs + """ + tmplib = library.Library(tmpname) + for album in rlib.albums(): + try: + if album.ipfs: + self.create_new_album(album, tmplib) + except AttributeError: + pass + return tmplib + + def create_new_album(self, album, tmplib): + items = [] + for item in album.items(): + try: + if not item.ipfs: + break + except AttributeError: + pass + item_path = os.path.basename(item.path).decode( + util._fsencoding(), 'ignore' + ) + # Clear current path from item + item.path = f'/ipfs/{album.ipfs}/{item_path}' + + item.id = None + items.append(item) + if len(items) < 1: + return False + self._log.info("Adding '{0}' to temporary library", album) + new_album = tmplib.add_album(items) + new_album.ipfs = album.ipfs + new_album.store() diff --git a/lib/beetsplug/keyfinder.py b/lib/beetsplug/keyfinder.py new file mode 100644 index 00000000..b695ab54 --- /dev/null +++ b/lib/beetsplug/keyfinder.py @@ -0,0 +1,97 @@ +# This file is part of beets. +# Copyright 2016, Thomas Scholtes. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Uses the `KeyFinder` program to add the `initial_key` field. +""" + + +import os.path +import subprocess + +from beets import ui +from beets import util +from beets.plugins import BeetsPlugin + + +class KeyFinderPlugin(BeetsPlugin): + + def __init__(self): + super().__init__() + self.config.add({ + 'bin': 'KeyFinder', + 'auto': True, + 'overwrite': False, + }) + + if self.config['auto'].get(bool): + self.import_stages = [self.imported] + + def commands(self): + cmd = ui.Subcommand('keyfinder', + help='detect and add initial key from audio') + cmd.func = self.command + return [cmd] + + def command(self, lib, opts, args): + self.find_key(lib.items(ui.decargs(args)), write=ui.should_write()) + + def imported(self, session, task): + self.find_key(task.imported_items()) + + def find_key(self, items, write=False): + overwrite = self.config['overwrite'].get(bool) + command = [self.config['bin'].as_str()] + # The KeyFinder GUI program needs the -f flag before the path. + # keyfinder-cli is similar, but just wants the path with no flag. + if 'keyfinder-cli' not in os.path.basename(command[0]).lower(): + command.append('-f') + + for item in items: + if item['initial_key'] and not overwrite: + continue + + try: + output = util.command_output(command + [util.syspath( + item.path)]).stdout + except (subprocess.CalledProcessError, OSError) as exc: + self._log.error('execution failed: {0}', exc) + continue + except UnicodeEncodeError: + # Workaround for Python 2 Windows bug. + # https://bugs.python.org/issue1759845 + self._log.error('execution failed for Unicode path: {0!r}', + item.path) + continue + + try: + key_raw = output.rsplit(None, 1)[-1] + except IndexError: + # Sometimes keyfinder-cli returns 0 but with no key, usually + # when the file is silent or corrupt, so we log and skip. + self._log.error('no key returned for path: {0}', item.path) + continue + + try: + key = util.text_string(key_raw) + except UnicodeDecodeError: + self._log.error('output is invalid UTF-8') + continue + + item['initial_key'] = key + self._log.info('added computed initial key {0} for {1}', + key, util.displayable_path(item.path)) + + if write: + item.try_write() + item.store() diff --git a/lib/beetsplug/kodiupdate.py b/lib/beetsplug/kodiupdate.py new file mode 100644 index 00000000..2a885d2c --- /dev/null +++ b/lib/beetsplug/kodiupdate.py @@ -0,0 +1,95 @@ +# This file is part of beets. +# Copyright 2017, Pauli Kettunen. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Updates a Kodi library whenever the beets library is changed. +This is based on the Plex Update plugin. + +Put something like the following in your config.yaml to configure: + kodi: + host: localhost + port: 8080 + user: user + pwd: secret +""" + +import requests +from beets import config +from beets.plugins import BeetsPlugin + + +def update_kodi(host, port, user, password): + """Sends request to the Kodi api to start a library refresh. + """ + url = f"http://{host}:{port}/jsonrpc" + + """Content-Type: application/json is mandatory + according to the kodi jsonrpc documentation""" + + headers = {'Content-Type': 'application/json'} + + # Create the payload. Id seems to be mandatory. + payload = {'jsonrpc': '2.0', 'method': 'AudioLibrary.Scan', 'id': 1} + r = requests.post( + url, + auth=(user, password), + json=payload, + headers=headers) + + return r + + +class KodiUpdate(BeetsPlugin): + def __init__(self): + super().__init__() + + # Adding defaults. + config['kodi'].add({ + 'host': 'localhost', + 'port': 8080, + 'user': 'kodi', + 'pwd': 'kodi'}) + + config['kodi']['pwd'].redact = True + self.register_listener('database_change', self.listen_for_db_change) + + def listen_for_db_change(self, lib, model): + """Listens for beets db change and register the update""" + self.register_listener('cli_exit', self.update) + + def update(self, lib): + """When the client exists try to send refresh request to Kodi server. + """ + self._log.info('Requesting a Kodi library update...') + + # Try to send update request. + try: + r = update_kodi( + config['kodi']['host'].get(), + config['kodi']['port'].get(), + config['kodi']['user'].get(), + config['kodi']['pwd'].get()) + r.raise_for_status() + + except requests.exceptions.RequestException as e: + self._log.warning('Kodi update failed: {0}', + str(e)) + return + + json = r.json() + if json.get('result') != 'OK': + self._log.warning('Kodi update failed: JSON response was {0!r}', + json) + return + + self._log.info('Kodi update triggered') diff --git a/lib/beetsplug/lastgenre/__init__.py b/lib/beetsplug/lastgenre/__init__.py new file mode 100644 index 00000000..05412308 --- /dev/null +++ b/lib/beetsplug/lastgenre/__init__.py @@ -0,0 +1,486 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +"""Gets genres for imported music based on Last.fm tags. + +Uses a provided whitelist file to determine which tags are valid genres. +The included (default) genre list was originally produced by scraping Wikipedia +and has been edited to remove some questionable entries. +The scraper script used is available here: +https://gist.github.com/1241307 +""" +import pylast +import codecs +import os +import yaml +import traceback + +from beets import plugins +from beets import ui +from beets import config +from beets.util import normpath, plurality +from beets import library + + +LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY) + +PYLAST_EXCEPTIONS = ( + pylast.WSError, + pylast.MalformedResponseError, + pylast.NetworkError, +) + +REPLACE = { + '\u2010': '-', +} + + +def deduplicate(seq): + """Remove duplicates from sequence wile preserving order. + """ + seen = set() + return [x for x in seq if x not in seen and not seen.add(x)] + + +# Canonicalization tree processing. + +def flatten_tree(elem, path, branches): + """Flatten nested lists/dictionaries into lists of strings + (branches). + """ + if not path: + path = [] + + if isinstance(elem, dict): + for (k, v) in elem.items(): + flatten_tree(v, path + [k], branches) + elif isinstance(elem, list): + for sub in elem: + flatten_tree(sub, path, branches) + else: + branches.append(path + [str(elem)]) + + +def find_parents(candidate, branches): + """Find parents genre of a given genre, ordered from the closest to + the further parent. + """ + for branch in branches: + try: + idx = branch.index(candidate.lower()) + return list(reversed(branch[:idx + 1])) + except ValueError: + continue + return [candidate] + + +# Main plugin logic. + +WHITELIST = os.path.join(os.path.dirname(__file__), 'genres.txt') +C14N_TREE = os.path.join(os.path.dirname(__file__), 'genres-tree.yaml') + + +class LastGenrePlugin(plugins.BeetsPlugin): + def __init__(self): + super().__init__() + + self.config.add({ + 'whitelist': True, + 'min_weight': 10, + 'count': 1, + 'fallback': None, + 'canonical': False, + 'source': 'album', + 'force': True, + 'auto': True, + 'separator': ', ', + 'prefer_specific': False, + 'title_case': True, + }) + + self.setup() + + def setup(self): + """Setup plugin from config options + """ + if self.config['auto']: + self.import_stages = [self.imported] + + self._genre_cache = {} + + # Read the whitelist file if enabled. + self.whitelist = set() + wl_filename = self.config['whitelist'].get() + if wl_filename in (True, ''): # Indicates the default whitelist. + wl_filename = WHITELIST + if wl_filename: + wl_filename = normpath(wl_filename) + with open(wl_filename, 'rb') as f: + for line in f: + line = line.decode('utf-8').strip().lower() + if line and not line.startswith('#'): + self.whitelist.add(line) + + # Read the genres tree for canonicalization if enabled. + self.c14n_branches = [] + c14n_filename = self.config['canonical'].get() + self.canonicalize = c14n_filename is not False + + # Default tree + if c14n_filename in (True, ''): + c14n_filename = C14N_TREE + elif not self.canonicalize and self.config['prefer_specific'].get(): + # prefer_specific requires a tree, load default tree + c14n_filename = C14N_TREE + + # Read the tree + if c14n_filename: + self._log.debug('Loading canonicalization tree {0}', c14n_filename) + c14n_filename = normpath(c14n_filename) + with codecs.open(c14n_filename, 'r', encoding='utf-8') as f: + genres_tree = yaml.safe_load(f) + flatten_tree(genres_tree, [], self.c14n_branches) + + @property + def sources(self): + """A tuple of allowed genre sources. May contain 'track', + 'album', or 'artist.' + """ + source = self.config['source'].as_choice(('track', 'album', 'artist')) + if source == 'track': + return 'track', 'album', 'artist' + elif source == 'album': + return 'album', 'artist' + elif source == 'artist': + return 'artist', + + def _get_depth(self, tag): + """Find the depth of a tag in the genres tree. + """ + depth = None + for key, value in enumerate(self.c14n_branches): + if tag in value: + depth = value.index(tag) + break + return depth + + def _sort_by_depth(self, tags): + """Given a list of tags, sort the tags by their depths in the + genre tree. + """ + depth_tag_pairs = [(self._get_depth(t), t) for t in tags] + depth_tag_pairs = [e for e in depth_tag_pairs if e[0] is not None] + depth_tag_pairs.sort(reverse=True) + return [p[1] for p in depth_tag_pairs] + + def _resolve_genres(self, tags): + """Given a list of strings, return a genre by joining them into a + single string and (optionally) canonicalizing each. + """ + if not tags: + return None + + count = self.config['count'].get(int) + if self.canonicalize: + # Extend the list to consider tags parents in the c14n tree + tags_all = [] + for tag in tags: + # Add parents that are in the whitelist, or add the oldest + # ancestor if no whitelist + if self.whitelist: + parents = [x for x in find_parents(tag, self.c14n_branches) + if self._is_allowed(x)] + else: + parents = [find_parents(tag, self.c14n_branches)[-1]] + + tags_all += parents + # Stop if we have enough tags already, unless we need to find + # the most specific tag (instead of the most popular). + if (not self.config['prefer_specific'] and + len(tags_all) >= count): + break + tags = tags_all + + tags = deduplicate(tags) + + # Sort the tags by specificity. + if self.config['prefer_specific']: + tags = self._sort_by_depth(tags) + + # c14n only adds allowed genres but we may have had forbidden genres in + # the original tags list + tags = [self._format_tag(x) for x in tags if self._is_allowed(x)] + + return self.config['separator'].as_str().join( + tags[:self.config['count'].get(int)] + ) + + def _format_tag(self, tag): + if self.config["title_case"]: + return tag.title() + return tag + + def fetch_genre(self, lastfm_obj): + """Return the genre for a pylast entity or None if no suitable genre + can be found. Ex. 'Electronic, House, Dance' + """ + min_weight = self.config['min_weight'].get(int) + return self._resolve_genres(self._tags_for(lastfm_obj, min_weight)) + + def _is_allowed(self, genre): + """Determine whether the genre is present in the whitelist, + returning a boolean. + """ + if genre is None: + return False + if not self.whitelist or genre in self.whitelist: + return True + return False + + # Cached entity lookups. + + def _last_lookup(self, entity, method, *args): + """Get a genre based on the named entity using the callable `method` + whose arguments are given in the sequence `args`. The genre lookup + is cached based on the entity name and the arguments. Before the + lookup, each argument is has some Unicode characters replaced with + rough ASCII equivalents in order to return better results from the + Last.fm database. + """ + # Shortcut if we're missing metadata. + if any(not s for s in args): + return None + + key = '{}.{}'.format(entity, + '-'.join(str(a) for a in args)) + if key in self._genre_cache: + return self._genre_cache[key] + else: + args_replaced = [] + for arg in args: + for k, v in REPLACE.items(): + arg = arg.replace(k, v) + args_replaced.append(arg) + + genre = self.fetch_genre(method(*args_replaced)) + self._genre_cache[key] = genre + return genre + + def fetch_album_genre(self, obj): + """Return the album genre for this Item or Album. + """ + return self._last_lookup( + 'album', LASTFM.get_album, obj.albumartist, obj.album + ) + + def fetch_album_artist_genre(self, obj): + """Return the album artist genre for this Item or Album. + """ + return self._last_lookup( + 'artist', LASTFM.get_artist, obj.albumartist + ) + + def fetch_artist_genre(self, item): + """Returns the track artist genre for this Item. + """ + return self._last_lookup( + 'artist', LASTFM.get_artist, item.artist + ) + + def fetch_track_genre(self, obj): + """Returns the track genre for this Item. + """ + return self._last_lookup( + 'track', LASTFM.get_track, obj.artist, obj.title + ) + + def _get_genre(self, obj): + """Get the genre string for an Album or Item object based on + self.sources. Return a `(genre, source)` pair. The + prioritization order is: + - track (for Items only) + - album + - artist + - original + - fallback + - None + """ + + # Shortcut to existing genre if not forcing. + if not self.config['force'] and self._is_allowed(obj.genre): + return obj.genre, 'keep' + + # Track genre (for Items only). + if isinstance(obj, library.Item): + if 'track' in self.sources: + result = self.fetch_track_genre(obj) + if result: + return result, 'track' + + # Album genre. + if 'album' in self.sources: + result = self.fetch_album_genre(obj) + if result: + return result, 'album' + + # Artist (or album artist) genre. + if 'artist' in self.sources: + result = None + if isinstance(obj, library.Item): + result = self.fetch_artist_genre(obj) + elif obj.albumartist != config['va_name'].as_str(): + result = self.fetch_album_artist_genre(obj) + else: + # For "Various Artists", pick the most popular track genre. + item_genres = [] + for item in obj.items(): + item_genre = None + if 'track' in self.sources: + item_genre = self.fetch_track_genre(item) + if not item_genre: + item_genre = self.fetch_artist_genre(item) + if item_genre: + item_genres.append(item_genre) + if item_genres: + result, _ = plurality(item_genres) + + if result: + return result, 'artist' + + # Filter the existing genre. + if obj.genre: + result = self._resolve_genres([obj.genre]) + if result: + return result, 'original' + + # Fallback string. + fallback = self.config['fallback'].get() + if fallback: + return fallback, 'fallback' + + return None, None + + def commands(self): + lastgenre_cmd = ui.Subcommand('lastgenre', help='fetch genres') + lastgenre_cmd.parser.add_option( + '-f', '--force', dest='force', + action='store_true', + help='re-download genre when already present' + ) + lastgenre_cmd.parser.add_option( + '-s', '--source', dest='source', type='string', + help='genre source: artist, album, or track' + ) + lastgenre_cmd.parser.add_option( + '-A', '--items', action='store_false', dest='album', + help='match items instead of albums') + lastgenre_cmd.parser.add_option( + '-a', '--albums', action='store_true', dest='album', + help='match albums instead of items') + lastgenre_cmd.parser.set_defaults(album=True) + + def lastgenre_func(lib, opts, args): + write = ui.should_write() + self.config.set_args(opts) + + if opts.album: + # Fetch genres for whole albums + for album in lib.albums(ui.decargs(args)): + album.genre, src = self._get_genre(album) + self._log.info('genre for album {0} ({1}): {0.genre}', + album, src) + album.store() + + for item in album.items(): + # If we're using track-level sources, also look up each + # track on the album. + if 'track' in self.sources: + item.genre, src = self._get_genre(item) + item.store() + self._log.info( + 'genre for track {0} ({1}): {0.genre}', + item, src) + + if write: + item.try_write() + else: + # Just query singletons, i.e. items that are not part of + # an album + for item in lib.items(ui.decargs(args)): + item.genre, src = self._get_genre(item) + self._log.debug('added last.fm item genre ({0}): {1}', + src, item.genre) + item.store() + + lastgenre_cmd.func = lastgenre_func + return [lastgenre_cmd] + + def imported(self, session, task): + """Event hook called when an import task finishes.""" + if task.is_album: + album = task.album + album.genre, src = self._get_genre(album) + self._log.debug('added last.fm album genre ({0}): {1}', + src, album.genre) + album.store() + + if 'track' in self.sources: + for item in album.items(): + item.genre, src = self._get_genre(item) + self._log.debug('added last.fm item genre ({0}): {1}', + src, item.genre) + item.store() + + else: + item = task.item + item.genre, src = self._get_genre(item) + self._log.debug('added last.fm item genre ({0}): {1}', + src, item.genre) + item.store() + + def _tags_for(self, obj, min_weight=None): + """Core genre identification routine. + + Given a pylast entity (album or track), return a list of + tag names for that entity. Return an empty list if the entity is + not found or another error occurs. + + If `min_weight` is specified, tags are filtered by weight. + """ + # Work around an inconsistency in pylast where + # Album.get_top_tags() does not return TopItem instances. + # https://github.com/pylast/pylast/issues/86 + if isinstance(obj, pylast.Album): + obj = super(pylast.Album, obj) + + try: + res = obj.get_top_tags() + except PYLAST_EXCEPTIONS as exc: + self._log.debug('last.fm error: {0}', exc) + return [] + except Exception as exc: + # Isolate bugs in pylast. + self._log.debug('{}', traceback.format_exc()) + self._log.error('error in pylast library: {0}', exc) + return [] + + # Filter by weight (optionally). + if min_weight: + res = [el for el in res if (int(el.weight or 0)) >= min_weight] + + # Get strings from tags. + res = [el.item.get_name().lower() for el in res] + + return res diff --git a/lib/beetsplug/lastgenre/genres-tree.yaml b/lib/beetsplug/lastgenre/genres-tree.yaml new file mode 100644 index 00000000..c8ae4247 --- /dev/null +++ b/lib/beetsplug/lastgenre/genres-tree.yaml @@ -0,0 +1,766 @@ +- african: + - african heavy metal + - african hip hop + - afrobeat + - apala + - benga + - bikutsi + - bongo flava + - cape jazz + - chimurenga + - coupé-décalé + - fuji music + - genge + - highlife + - hiplife + - isicathamiya + - jit + - jùjú + - kapuka + - kizomba + - kuduro + - kwaito + - kwela + - makossa + - maloya + - marrabenta + - mbalax + - mbaqanga + - mbube + - morna + - museve + - palm-wine + - raï + - sakara + - sega + - seggae + - semba + - soukous + - taarab + - zouglou +- asian: + - east asian: + - anison + - c-pop + - cantopop + - enka + - hong kong english pop + - j-pop + - k-pop + - kayōkyoku + - korean pop + - mandopop + - onkyokei + - taiwanese pop + - fann at-tanbura + - fijiri + - khaliji + - liwa + - sawt + - south and southeast asian: + - baila + - bhangra + - bhojpuri + - dangdut + - filmi + - indian pop + - lavani + - luk thung: + - luk krung + - manila sound + - morlam + - pinoy pop + - pop sunda + - ragini + - thai pop +- avant-garde: + - experimental music + - lo-fi + - musique concrète +- blues: + - african blues + - blues rock + - blues shouter + - british blues + - canadian blues + - chicago blues + - classic female blues + - contemporary r&b + - country blues + - delta blues + - detroit blues + - electric blues + - gospel blues + - hill country blues + - hokum blues + - jazz blues + - jump blues + - kansas city blues + - louisiana blues + - memphis blues + - piano blues + - piedmont blues + - punk blues + - soul blues + - st. louis blues + - swamp blues + - texas blues + - west coast blues +- caribbean and latin american: + - bachata + - baithak gana + - bolero + - brazilian: + - axé + - bossa nova + - brazilian rock + - brega + - choro + - forró + - frevo + - funk carioca + - lambada + - maracatu + - música popular brasileira + - música sertaneja + - pagode + - samba + - samba rock + - tecnobrega + - tropicalia + - zouk-lambada + - calypso + - chutney + - chutney soca + - compas + - mambo + - merengue + - méringue + - other latin: + - chicha + - criolla + - cumbia + - huayno + - mariachi + - ranchera + - tejano + - punta + - punta rock + - rasin + - reggaeton + - salsa + - soca + - son + - timba + - twoubadou + - zouk +- classical: + - ballet + - baroque: + - baroque music + - cantata + - chamber music: + - string quartet + - classical music + - concerto: + - concerto grosso + - contemporary classical + - modern classical + - opera + - oratorio + - orchestra: + - orchestral + - symphonic + - symphony + - organum + - mass: + - requiem + - sacred music: + - cantique + - gregorian chant + - sonata +- comedy: + - comedy music + - comedy rock + - humor + - parody music + - stand-up +- country: + - alternative country: + - cowpunk + - americana + - australian country music + - bakersfield sound + - bluegrass: + - progressive bluegrass + - reactionary bluegrass + - blues country + - cajun: + - cajun fiddle tunes + - christian country music + - classic country + - close harmony + - country pop + - country rap + - country rock + - country soul + - cowboy/western music + - dansband music + - franco-country + - gulf and western + - hellbilly music + - hokum + - honky tonk + - instrumental country + - lubbock sound + - nashville sound + - neotraditional country + - outlaw country + - progressive country + - psychobilly/punkabilly + - red dirt + - rockabilly + - sertanejo + - texas country + - traditional country music + - truck-driving country + - western swing + - zydeco +- easy listening: + - background music + - beautiful music + - elevator music + - furniture music + - lounge music + - middle of the road + - new-age music +- electronic: + - ambient: + - ambient dub + - ambient house + - ambient techno + - dark ambient + - drone music + - illbient + - isolationism + - lowercase + - asian underground + - breakbeat: + - 4-beat + - acid breaks + - baltimore club + - big beat + - breakbeat hardcore + - broken beat + - florida breaks + - nu skool breaks + - chiptune: + - bitpop + - game boy music + - nintendocore + - video game music + - yorkshire bleeps and bass + - disco: + - cosmic disco + - disco polo + - euro disco + - italo disco + - nu-disco + - space disco + - downtempo: + - acid jazz + - balearic beat + - chill out + - dub music + - dubtronica + - ethnic electronica + - moombahton + - nu jazz + - trip hop + - drum and bass: + - darkcore + - darkstep + - drumfunk + - drumstep + - hardstep + - intelligent drum and bass + - jump-up + - liquid funk + - neurofunk + - oldschool jungle: + - darkside jungle + - ragga jungle + - raggacore + - sambass + - techstep + - electro: + - crunk + - electro backbeat + - electro-grime + - electropop + - electroacoustic: + - acousmatic music + - computer music + - electroacoustic improvisation + - field recording + - live coding + - live electronics + - soundscape composition + - tape music + - electronic rock: + - alternative dance: + - baggy + - madchester + - dance-punk + - dance-rock + - dark wave + - electroclash + - electronicore + - electropunk + - ethereal wave + - indietronica + - new rave + - space rock + - synthpop + - synthpunk + - electronica: + - berlin school + - chillwave + - electronic art music + - electronic dance music + - folktronica + - freestyle music + - glitch + - idm + - laptronica + - skweee + - sound art + - synthcore + - eurodance: + - bubblegum dance + - italo dance + - turbofolk + - hardcore: + - bouncy house + - bouncy techno + - breakcore + - digital hardcore + - doomcore + - dubstyle + - gabber + - happy hardcore + - hardstyle + - jumpstyle + - makina + - speedcore + - terrorcore + - uk hardcore + - hi-nrg: + - eurobeat + - hard nrg + - new beat + - house: + - acid house + - chicago house + - deep house + - diva house + - dutch house + - electro house + - freestyle house + - french house + - funky house + - ghetto house + - hardbag + - hip house + - italo house + - latin house + - minimal house + - progressive house + - rave music + - swing house + - tech house + - tribal house + - uk hard house + - us garage + - vocal house + - industrial: + - aggrotech + - coldwave + - cybergrind + - dark electro + - death industrial + - electro-industrial + - electronic body music: + - futurepop + - industrial metal: + - neue deutsche härte + - industrial rock + - noise: + - japanoise + - power electronics + - power noise + - witch house + - post-disco: + - boogie + - dance-pop + - progressive: + - progressive house/trance: + - disco house + - dream house + - space house + - progressive breaks + - progressive drum & bass + - progressive techno + - techno: + - acid techno + - detroit techno + - free tekno + - ghettotech + - minimal + - nortec + - schranz + - techno-dnb + - technopop + - tecno brega + - toytown techno + - trance: + - acid trance + - classic trance + - dream trance + - goa trance: + - dark psytrance + - full on + - psybreaks + - psyprog + - suomisaundi + - hard trance + - tech trance + - uplifting trance: + - orchestral uplifting + - vocal trance + - uk garage: + - 2-step + - 4x4 + - bassline + - breakstep + - dubstep + - funky + - grime + - speed garage + - trap +- folk: + - american folk revival + - anti-folk + - british folk revival + - celtic music + - contemporary folk + - filk music + - freak folk + - indie folk + - industrial folk + - neofolk + - progressive folk + - psychedelic folk + - sung poetry + - techno-folk +- hip hop: + - alternative hip hop + - avant-garde hip hop + - chap hop + - christian hip hop + - conscious hip hop + - country-rap + - crunkcore + - cumbia rap + - east coast hip hop: + - brick city club + - hardcore hip hop + - mafioso rap + - new jersey hip hop + - electro music + - freestyle rap + - g-funk + - gangsta rap + - golden age hip hop + - hip hop soul + - hip pop + - hyphy + - industrial hip hop + - instrumental hip hop + - jazz rap + - low bap + - lyrical hip hop + - merenrap + - midwest hip hop: + - chicago hip hop + - detroit hip hop + - horrorcore + - st. louis hip hop + - twin cities hip hop + - motswako + - nerdcore + - new jack swing + - new school hip hop + - old school hip hop + - political hip hop + - rap opera + - rap rock: + - rap metal + - rapcore + - songo-salsa + - southern hip hop: + - atlanta hip hop: + - snap music + - bounce music + - houston hip hop: + - chopped and screwed + - miami bass + - turntablism + - underground hip hop + - urban pasifika + - west coast hip hop: + - chicano rap + - jerkin' +- jazz: + - asian american jazz + - avant-garde jazz + - bebop + - boogie-woogie + - british dance band + - chamber jazz + - continental jazz + - cool jazz + - crossover jazz + - cubop + - dixieland + - ethno jazz + - european free jazz + - free funk + - free improvisation + - free jazz + - gypsy jazz + - hard bop + - jazz fusion + - jazz rock + - jazz-funk + - kansas city jazz + - latin jazz + - livetronica + - m-base + - mainstream jazz + - modal jazz + - neo-bop jazz + - neo-swing + - novelty ragtime + - orchestral jazz + - post-bop + - punk jazz + - ragtime + - shibuya-kei + - ska jazz + - smooth jazz + - soul jazz + - straight-ahead jazz + - stride jazz + - swing + - third stream + - trad jazz + - vocal jazz + - west coast gypsy jazz + - west coast jazz +- other: + - worldbeat +- pop: + - adult contemporary + - arab pop + - baroque pop + - bubblegum pop + - chanson + - christian pop + - classical crossover + - europop: + - austropop + - balkan pop + - french pop + - latin pop + - laïkó + - nederpop + - russian pop + - iranian pop + - jangle pop + - latin ballad + - levenslied + - louisiana swamp pop + - mexican pop + - motorpop + - new romanticism + - pop rap + - popera + - psychedelic pop + - schlager + - soft rock + - sophisti-pop + - space age pop + - sunshine pop + - surf pop + - teen pop + - traditional pop music + - turkish pop + - vispop + - wonky pop +- rhythm and blues: + - funk: + - deep funk + - go-go + - p-funk + - soul: + - blue-eyed soul + - neo soul + - northern soul +- rock: + - alternative rock: + - britpop: + - post-britpop + - dream pop + - grunge: + - post-grunge + - indie pop: + - dunedin sound + - twee pop + - indie rock + - noise pop + - nu metal + - post-punk revival + - post-rock: + - post-metal + - sadcore + - shoegaze + - slowcore + - art rock + - beat music + - chinese rock + - christian rock + - dark cabaret + - desert rock + - experimental rock + - folk rock + - garage rock + - glam rock + - hard rock + - heavy metal: + - alternative metal: + - funk metal + - black metal: + - viking metal + - christian metal + - death metal: + - death/doom + - goregrind + - melodic death metal + - technical death metal + - doom metal: + - epic doom metal + - funeral doom + - drone metal + - epic metal + - folk metal: + - celtic metal + - medieval metal + - pagan metal + - funk metal + - glam metal + - gothic metal + - industrial metal: + - industrial death metal + - metalcore: + - deathcore + - mathcore: + - djent + - synthcore + - neoclassical metal + - post-metal + - power metal: + - progressive power metal + - progressive metal + - sludge metal + - speed metal + - stoner rock: + - stoner metal + - symphonic metal + - thrash metal: + - crossover thrash + - groove metal + - progressive thrash metal + - teutonic thrash metal + - traditional heavy metal + - math rock + - new wave: + - world fusion + - paisley underground + - pop rock + - post-punk: + - gothic rock + - no wave + - noise rock + - power pop + - progressive rock: + - canterbury scene + - krautrock + - new prog + - rock in opposition + - psychedelic rock: + - acid rock + - freakbeat + - neo-psychedelia + - raga rock + - punk rock: + - anarcho punk: + - crust punk: + - d-beat + - art punk + - christian punk + - deathrock + - folk punk: + - celtic punk + - gypsy punk + - garage punk + - grindcore: + - crustgrind + - noisegrind + - hardcore punk: + - post-hardcore: + - emo: + - screamo + - powerviolence + - street punk + - thrashcore + - horror punk + - oi! + - pop punk + - psychobilly + - riot grrrl + - ska punk: + - ska-core + - skate punk + - rock and roll + - southern rock + - sufi rock + - surf rock + - visual kei: + - nagoya kei +- reggae: + - roots reggae + - reggae fusion + - reggae en español: + - spanish reggae + - reggae 110 + - reggae bultrón + - romantic flow + - lovers rock + - raggamuffin: + - ragga + - dancehall + - ska: + - 2 tone + - dub + - rocksteady diff --git a/lib/beetsplug/lastgenre/genres.txt b/lib/beetsplug/lastgenre/genres.txt new file mode 100644 index 00000000..7ccd7ad3 --- /dev/null +++ b/lib/beetsplug/lastgenre/genres.txt @@ -0,0 +1,1542 @@ +2 tone +2-step garage +4-beat +4x4 garage +8-bit +acapella +acid +acid breaks +acid house +acid jazz +acid rock +acoustic music +acousticana +adult contemporary music +african popular music +african rumba +afrobeat +aleatoric music +alternative country +alternative dance +alternative hip hop +alternative metal +alternative rock +ambient +ambient house +ambient music +americana +anarcho punk +anti-folk +apala +ape haters +arab pop +arabesque +arabic pop +argentine rock +ars antiqua +ars nova +art punk +art rock +ashiq +asian american jazz +australian country music +australian hip hop +australian pub rock +austropop +avant-garde +avant-garde jazz +avant-garde metal +avant-garde music +axé +bac-bal +bachata +baggy +baila +baile funk +baisha xiyue +baithak gana +baião +bajourou +bakersfield sound +bakou +bakshy +bal-musette +balakadri +balinese gamelan +balkan pop +ballad +ballata +ballet +bamboo band +bambuco +banda +bangsawan +bantowbol +barbershop music +barndance +baroque +baroque music +baroque pop +bass music +batcave +batucada +batuco +batá-rumba +beach music +beat +beatboxing +beautiful music +bebop +beiguan +bel canto +bend-skin +benga +berlin school of electronic music +bhajan +bhangra +bhangra-wine +bhangragga +bhangramuffin +big band +big band music +big beat +biguine +bihu +bikutsi +biomusic +bitcore +bitpop +black metal +blackened death metal +blue-eyed soul +bluegrass +blues +blues ballad +blues-rock +boogie +boogie woogie +boogie-woogie +bossa nova +brass band +brazilian funk +brazilian jazz +breakbeat +breakbeat hardcore +breakcore +breton music +brill building pop +britfunk +british blues +british invasion +britpop +broken beat +brown-eyed soul +brukdown +brutal death metal +bubblegum dance +bubblegum pop +bulerias +bumba-meu-boi +bunraku +burger-highlife +burgundian school +byzantine chant +ca din tulnic +ca pe lunca +ca trù +cabaret +cadence +cadence rampa +cadence-lypso +café-aman +cai luong +cajun music +cakewalk +calenda +calentanos +calgia +calypso +calypso jazz +calypso-style baila +campursari +canatronic +candombe +canon +canrock +cantata +cante chico +cante jondo +canterbury scene +cantiga +cantique +cantiñas +canto livre +canto nuevo +canto popular +cantopop +canzone napoletana +cape jazz +capoeira music +caracoles +carceleras +cardas +cardiowave +carimbó +cariso +carnatic music +carol +cartageneras +cassette culture +casséy-co +cavacha +caveman +caña +celempungan +cello rock +celtic +celtic fusion +celtic metal +celtic punk +celtic reggae +celtic rock +cha-cha-cha +chakacha +chalga +chamamé +chamber jazz +chamber music +chamber pop +champeta +changuí +chanson +chant +charanga +charanga-vallenata +charikawi +chastushki +chau van +chemical breaks +chicago blues +chicago house +chicago soul +chicano rap +chicha +chicken scratch +children's music +chillout +chillwave +chimurenga +chinese music +chinese pop +chinese rock +chip music +cho-kantrum +chongak +chopera +chorinho +choro +chouval bwa +chowtal +christian alternative +christian black metal +christian electronic music +christian hardcore +christian hip hop +christian industrial +christian metal +christian music +christian punk +christian r&b +christian rock +christian ska +christmas carol +christmas music +chumba +chut-kai-pang +chutney +chutney soca +chutney-bhangra +chutney-hip hop +chutney-soca +chylandyk +chzalni +chèo +cigányzene +classic +classic country +classic female blues +classic rock +classical +classical music +classical music era +clicks n cuts +close harmony +club music +cocobale +coimbra fado +coladeira +colombianas +combined rhythm +comedy +comedy rap +comedy rock +comic opera +comparsa +compas direct +compas meringue +concert overture +concerto +concerto grosso +congo +conjunto +contemporary christian +contemporary christian music +contemporary classical +contemporary r&b +contonbley +contradanza +cool jazz +corrido +corsican polyphonic song +cothoza mfana +country +country blues +country gospel +country music +country pop +country r&b +country rock +country-rap +countrypolitan +couple de sonneurs +coupé-décalé +cowpunk +cretan music +crossover jazz +crossover music +crossover thrash +crossover thrash metal +crunk +crunk&b +crunkcore +crust punk +csárdás +cuarteto +cuban rumba +cuddlecore +cueca +cumbia +cumbia villera +cybergrind +dabka +dadra +daina +dalauna +dance +dance music +dance-pop +dance-punk +dance-rock +dancehall +dangdut +danger music +dansband +danza +danzón +dark ambient +dark cabaret +dark pop +darkcore +darkstep +darkwave +de ascultat la servici +de codru +de dragoste +de jale +de pahar +death industrial +death metal +death rock +death/doom +deathcore +deathgrind +deathrock +deep funk +deep house +deep soul +degung +delta blues +dementia +desert rock +desi +detroit blues +detroit techno +dhamar +dhimotiká +dhrupad +dhun +digital hardcore +dirge +dirty dutch +dirty rap +dirty rap/pornocore +dirty south +disco +disco house +disco polo +disney +disney hardcore +disney pop +diva house +divine rock +dixieland +dixieland jazz +djambadon +djent +dodompa +doina +dombola +dondang sayang +donegal fiddle tradition +dongjing +doo wop +doom metal +doomcore +downtempo +drag +dream pop +drone doom +drone metal +drone music +dronology +drum and bass +dub +dub house +dubanguthu +dubstep +dubtronica +dunedin sound +dunun +dutch jazz +décima +early music +east coast blues +east coast hip hop +easy listening +electric blues +electric folk +electro +electro backbeat +electro hop +electro house +electro punk +electro-industrial +electro-swing +electroclash +electrofunk +electronic +electronic art music +electronic body music +electronic dance +electronic luk thung +electronic music +electronic rock +electronica +electropop +elevator music +emo +emo pop +emo rap +emocore +emotronic +enka +epic doom metal +epic metal +eremwu eu +ethereal pop +ethereal wave +euro +euro disco +eurobeat +eurodance +europop +eurotrance +eurourban +exotica +experimental music +experimental noise +experimental pop +experimental rock +extreme metal +ezengileer +fado +falak +fandango +farruca +fife and drum blues +filk +film score +filmi +filmi-ghazal +finger-style +fjatpangarri +flamenco +flamenco rumba +flower power +foaie verde +fofa +folk hop +folk metal +folk music +folk pop +folk punk +folk rock +folktronica +forró +franco-country +freak-folk +freakbeat +free improvisation +free jazz +free music +freestyle +freestyle house +freetekno +french pop +frenchcore +frevo +fricote +fuji +fuji music +fulia +full on +funaná +funeral doom +funk +funk metal +funk rock +funkcore +funky house +furniture music +fusion jazz +g-funk +gaana +gabba +gabber +gagaku +gaikyoku +gaita +galant +gamad +gambang kromong +gamelan +gamelan angklung +gamelan bang +gamelan bebonangan +gamelan buh +gamelan degung +gamelan gede +gamelan kebyar +gamelan salendro +gamelan selunding +gamelan semar pegulingan +gamewave +gammeldans +gandrung +gangsta rap +gar +garage rock +garrotin +gavotte +gelugpa chanting +gender wayang +gending +german folk music +gharbi +gharnati +ghazal +ghazal-song +ghetto house +ghettotech +girl group +glam metal +glam punk +glam rock +glitch +gnawa +go-go +goa +goa trance +gong-chime music +goombay +goregrind +goshu ondo +gospel music +gothic metal +gothic rock +granadinas +grebo +gregorian chant +grime +grindcore +groove metal +group sounds +grunge +grupera +guaguanbo +guajira +guasca +guitarra baiana +guitarradas +gumbe +gunchei +gunka +guoyue +gwo ka +gwo ka moderne +gypsy jazz +gypsy punk +gypsybilly +gyu ke +habanera +hajnali +hakka +halling +hambo +hands up +hapa haole +happy hardcore +haqibah +hard +hard bop +hard house +hard rock +hard trance +hardcore hip hop +hardcore metal +hardcore punk +hardcore techno +hardstyle +harepa +harmonica blues +hasaposérviko +heart attack +heartland rock +heavy beat +heavy metal +hesher +hi-nrg +highlands +highlife +highlife fusion +hillybilly music +hindustani classical music +hip hop +hip hop & rap +hip hop soul +hip house +hiplife +hiragasy +hiva usu +hong kong and cantonese pop +hong kong english pop +honky tonk +honkyoku +hora lunga +hornpipe +horror punk +horrorcore +horrorcore rap +house +house music +hua'er +huasteco +huayno +hula +humor +humppa +hunguhungu +hyangak +hymn +hyphy +hát chau van +hát chèo +hát cãi luong +hát tuồng +ibiza music +icaro +idm +igbo music +ijexá +ilahije +illbient +impressionist music +improvisational +incidental music +indian pop +indie folk +indie music +indie pop +indie rock +indietronica +indo jazz +indo rock +indonesian pop +indoyíftika +industrial death metal +industrial hip-hop +industrial metal +industrial music +industrial musical +industrial rock +instrumental rock +intelligent dance music +international latin +inuit music +iranian pop +irish folk +irish rebel music +iscathamiya +isicathamiya +isikhwela jo +island +isolationist +italo dance +italo disco +italo house +itsmeños +izvorna bosanska muzika +j'ouvert +j-fusion +j-pop +j-rock +jaipongan +jaliscienses +jam band +jam rock +jamana kura +jamrieng samai +jangle pop +japanese pop +jarana +jariang +jarochos +jawaiian +jazz +jazz blues +jazz fusion +jazz metal +jazz rap +jazz-funk +jazz-rock +jegog +jenkka +jesus music +jibaro +jig +jig punk +jing ping +jingle +jit +jitterbug +jive +joged +joged bumbung +joik +jonnycore +joropo +jota +jtek +jug band +jujitsu +juju +juke joint blues +jump blues +jumpstyle +jungle +junkanoo +juré +jùjú +k-pop +kaba +kabuki +kachāshī +kadans +kagok +kagyupa chanting +kaiso +kalamatianó +kalattuut +kalinda +kamba pop +kan ha diskan +kansas city blues +kantrum +kantádhes +kargyraa +karma +kaseko +katajjaq +kawachi ondo +kayōkyoku +ke-kwe +kebyar +kecak +kecapi suling +kertok +khaleeji +khap +khelimaski djili +khene +khoomei +khorovodi +khplam wai +khrung sai +khyal +kilapanda +kinko +kirtan +kiwi rock +kizomba +klape +klasik +klezmer +kliningan +kléftiko +kochare +kolomyjka +komagaku +kompa +konpa +korean pop +koumpaneia +kpanlogo +krakowiak +krautrock +kriti +kroncong +krump +krzesany +kuduro +kulintang +kulning +kumina +kun-borrk +kundere +kundiman +kussundé +kutumba wake +kveding +kvæði +kwaito +kwassa kwassa +kwela +käng +kélé +kĩkũyũ pop +la la +latin american +latin jazz +latin pop +latin rap +lavway +laïko +laïkó +le leagan +legényes +lelio +letkajenkka +levenslied +lhamo +lieder +light music +light rock +likanos +liquid drum&bass +liquid funk +liquindi +llanera +llanto +lo-fi +lo-fi music +loki djili +long-song +louisiana blues +louisiana swamp pop +lounge music +lovers rock +lowercase +lubbock sound +lucknavi thumri +luhya omutibo +luk grung +lullaby +lundu +lundum +m-base +madchester +madrigal +mafioso rap +maglaal +magnificat +mahori +mainstream jazz +makossa +makossa-soukous +malagueñas +malawian jazz +malhun +maloya +maluf +maluka +mambo +manaschi +mandarin pop +manding swing +mango +mangue bit +mangulina +manikay +manila sound +manouche +manzuma +mapouka +mapouka-serré +marabi +maracatu +marga +mariachi +marimba +marinera +marrabenta +martial industrial +martinetes +maskanda +mass +matamuerte +math rock +mathcore +matt bello +maxixe +mazurka +mbalax +mbaqanga +mbube +mbumba +medh +medieval folk rock +medieval metal +medieval music +meditation +mejorana +melhoun +melhûn +melodic black metal +melodic death metal +melodic hardcore +melodic metalcore +melodic music +melodic trance +memphis blues +memphis rap +memphis soul +mento +merengue +merengue típico moderno +merengue-bomba +meringue +merseybeat +metal +metalcore +metallic hardcore +mexican pop +mexican rock +mexican son +meykhana +mezwed +miami bass +microhouse +middle of the road +midwest hip hop +milonga +min'yo +mineras +mini compas +mini-jazz +minimal techno +minimalist music +minimalist trance +minneapolis sound +minstrel show +minuet +mirolóyia +modal jazz +modern classical +modern classical music +modern laika +modern rock +modinha +mohabelo +montuno +monumental dance +mor lam +mor lam sing +morna +motorpop +motown +mozambique +mpb +mugam +multicultural +murga +musette +museve +mushroom jazz +music drama +music hall +musiqi-e assil +musique concrète +mutuashi +muwashshah +muzak +méringue +música campesina +música criolla +música de la interior +música llanera +música nordestina +música popular brasileira +música tropical +nagauta +nakasi +nangma +nanguan +narcocorrido +nardcore +narodna muzika +nasheed +nashville sound +nashville sound/countrypolitan +national socialist black metal +naturalismo +nederpop +neo soul +neo-classical metal +neo-medieval +neo-prog +neo-psychedelia +neoclassical +neoclassical metal +neoclassical music +neofolk +neotraditional country +nerdcore +neue deutsche härte +neue deutsche welle +new age music +new beat +new instrumental +new jack swing +new orleans blues +new orleans jazz +new pop +new prog +new rave +new romantic +new school hip hop +new taiwanese song +new wave +new wave of british heavy metal +new wave of new wave +new weird america +new york blues +new york house +newgrass +nganja +nightcore +nintendocore +nisiótika +no wave +noh +noise music +noise pop +noise rock +nongak +norae undong +nordic folk dance music +nordic folk music +nortec +norteño +northern soul +nota +nu breaks +nu jazz +nu metal +nu soul +nueva canción +nyatiti +néo kýma +obscuro +oi! +old school hip hop +old-time +oldies +olonkho +oltului +ondo +opera +operatic pop +oratorio +orchestra +orchestral +organ trio +organic ambient +organum +orgel +oriental metal +ottava rima +outlaw country +outsider music +p-funk +pagan metal +pagan rock +pagode +paisley underground +palm wine +palm-wine +pambiche +panambih +panchai baja +panchavadyam +pansori +paranda +parang +parody +parranda +partido alto +pasillo +patriotic +peace punk +pelimanni music +petenera +peyote song +philadelphia soul +piano blues +piano rock +piedmont blues +pimba +pinoy pop +pinoy rock +pinpeat orchestra +piphat +piyyutim +plainchant +plena +pleng phua cheewit +pleng thai sakorn +political hip hop +polka +polo +polonaise +pols +polska +pong lang +pop +pop folk +pop music +pop punk +pop rap +pop rock +pop sunda +pornocore +porro +post disco +post-britpop +post-disco +post-grunge +post-hardcore +post-industrial +post-metal +post-minimalism +post-punk +post-rock +post-romanticism +pow-wow +power electronics +power metal +power noise +power pop +powerviolence +ppongtchak +praise song +program symphony +progressive bluegrass +progressive country +progressive death metal +progressive electronic +progressive electronic music +progressive folk +progressive folk music +progressive house +progressive metal +progressive power metal +progressive rock +progressive trance +progressive thrash metal +protopunk +psych folk +psychedelic music +psychedelic pop +psychedelic rock +psychedelic trance +psychobilly +punk blues +punk cabaret +punk jazz +punk rock +punta +punta rock +qasidah +qasidah modern +qawwali +quadrille +quan ho +queercore +quiet storm +rada +raga +raga rock +ragga +ragga jungle +raggamuffin +ragtime +rai +rake-and-scrape +ramkbach +ramvong +ranchera +rap +rap metal +rap rock +rapcore +rara +rare groove +rasiya +rave +raw rock +raï +rebetiko +red dirt +reel +reggae +reggae 110 +reggae bultrón +reggae en español +reggae fusion +reggae highlife +reggaefusion +reggaeton +rekilaulu +relax music +religious +rembetiko +renaissance music +requiem +rhapsody +rhyming spiritual +rhythm & blues +rhythm and blues +ricercar +riot grrrl +rock +rock and roll +rock en español +rock opera +rockabilly +rocksteady +rococo +romantic flow +romantic period in music +rondeaux +ronggeng +roots reggae +roots rock +roots rock reggae +rumba +russian pop +rímur +sabar +sacred harp +sacred music +sadcore +saibara +sakara +salegy +salsa +salsa erotica +salsa romantica +saltarello +samba +samba-canção +samba-reggae +samba-rock +sambai +sanjo +sato kagura +sawt +saya +scat +schlager +schottisch +schranz +scottish baroque music +screamo +scrumpy and western +sea shanty +sean nós +second viennese school +sega music +seggae +seis +semba +sephardic music +serialism +set dance +sevdalinka +sevillana +shabab +shabad +shalako +shan'ge +shango +shape note +shibuya-kei +shidaiqu +shima uta +shock rock +shoegaze +shoegazer +shoka +shomyo +show tune +sica +siguiriyas +silat +sinawi +situational +ska +ska punk +skacore +skald +skate punk +skiffle +slack-key guitar +slide +slowcore +sludge metal +slängpolska +smooth jazz +soca +soft rock +son +son montuno +son-batá +sonata +songo +songo-salsa +sophisti-pop +soukous +soul +soul blues +soul jazz +soul music +southern gospel +southern harmony +southern hip hop +southern metal +southern rock +southern soul +space age pop +space music +space rock +spectralism +speed garage +speed metal +speedcore +spirituals +spouge +sprechgesang +square dance +squee +st. louis blues +stand-up +steelband +stoner metal +stoner rock +straight edge +strathspeys +stride +string +string quartet +sufi music +suite +sunshine pop +suomirock +super eurobeat +surf ballad +surf instrumental +surf music +surf pop +surf rock +swamp blues +swamp pop +swamp rock +swing +swing music +swingbeat +sygyt +symphonic +symphonic black metal +symphonic metal +symphonic poem +symphonic rock +symphony +synthcore +synthpop +synthpunk +t'ong guitar +taarab +tai tu +taiwanese pop +tala +talempong +tambu +tamburitza +tamil christian keerthanai +tango +tanguk +tappa +tarana +tarantella +taranto +tech +tech house +tech trance +technical death metal +technical metal +techno +technoid +technopop +techstep +techtonik +teen pop +tejano +tejano music +tekno +tembang sunda +teutonic thrash metal +texas blues +thai pop +thillana +thrash metal +thrashcore +thumri +tibetan pop +tiento +timbila +tin pan alley +tinga +tinku +toeshey +togaku +trad jazz +traditional bluegrass +traditional heavy metal +traditional pop music +trallalero +trance +tribal house +trikitixa +trip hop +trip rock +trip-hop +tropicalia +tropicalismo +tropipop +truck-driving country +tumba +turbo-folk +turkish music +turkish pop +turntablism +tuvan throat-singing +twee pop +twist +two tone +táncház +uk garage +uk pub rock +unblack metal +underground music +uplifting +uplifting trance +urban cowboy +urban folk +urban jazz +vallenato +vaudeville +venezuela +verbunkos +verismo +viking metal +villanella +virelai +vispop +visual kei +visual music +vocal +vocal house +vocal jazz +vocal music +volksmusik +waila +waltz +wangga +warabe uta +wassoulou +weld +were music +west coast hip hop +west coast jazz +western +western blues +western swing +witch house +wizard rock +women's music +wong shadow +wonky pop +wood +work song +world fusion +world fusion music +world music +worldbeat +xhosa music +xoomii +yo-pop +yodeling +yukar +yé-yé +zajal +zapin +zarzuela +zeibekiko +zeuhl +ziglibithy +zouglou +zouk +zouk chouv +zouklove +zulu music +zydeco diff --git a/lib/beetsplug/lastimport.py b/lib/beetsplug/lastimport.py new file mode 100644 index 00000000..16d53302 --- /dev/null +++ b/lib/beetsplug/lastimport.py @@ -0,0 +1,247 @@ +# This file is part of beets. +# Copyright 2016, Rafael Bodill https://github.com/rafi +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +import pylast +from pylast import TopItem, _extract, _number +from beets import ui +from beets import dbcore +from beets import config +from beets import plugins +from beets.dbcore import types + +API_URL = 'https://ws.audioscrobbler.com/2.0/' + + +class LastImportPlugin(plugins.BeetsPlugin): + def __init__(self): + super().__init__() + config['lastfm'].add({ + 'user': '', + 'api_key': plugins.LASTFM_KEY, + }) + config['lastfm']['api_key'].redact = True + self.config.add({ + 'per_page': 500, + 'retry_limit': 3, + }) + self.item_types = { + 'play_count': types.INTEGER, + } + + def commands(self): + cmd = ui.Subcommand('lastimport', help='import last.fm play-count') + + def func(lib, opts, args): + import_lastfm(lib, self._log) + + cmd.func = func + return [cmd] + + +class CustomUser(pylast.User): + """ Custom user class derived from pylast.User, and overriding the + _get_things method to return MBID and album. Also introduces new + get_top_tracks_by_page method to allow access to more than one page of top + tracks. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def _get_things(self, method, thing, thing_type, params=None, + cacheable=True): + """Returns a list of the most played thing_types by this thing, in a + tuple with the total number of pages of results. Includes an MBID, if + found. + """ + doc = self._request( + self.ws_prefix + "." + method, cacheable, params) + + toptracks_node = doc.getElementsByTagName('toptracks')[0] + total_pages = int(toptracks_node.getAttribute('totalPages')) + + seq = [] + for node in doc.getElementsByTagName(thing): + title = _extract(node, "name") + artist = _extract(node, "name", 1) + mbid = _extract(node, "mbid") + playcount = _number(_extract(node, "playcount")) + + thing = thing_type(artist, title, self.network) + thing.mbid = mbid + seq.append(TopItem(thing, playcount)) + + return seq, total_pages + + def get_top_tracks_by_page(self, period=pylast.PERIOD_OVERALL, limit=None, + page=1, cacheable=True): + """Returns the top tracks played by a user, in a tuple with the total + number of pages of results. + * period: The period of time. Possible values: + o PERIOD_OVERALL + o PERIOD_7DAYS + o PERIOD_1MONTH + o PERIOD_3MONTHS + o PERIOD_6MONTHS + o PERIOD_12MONTHS + """ + + params = self._get_params() + params['period'] = period + params['page'] = page + if limit: + params['limit'] = limit + + return self._get_things( + "getTopTracks", "track", pylast.Track, params, cacheable) + + +def import_lastfm(lib, log): + user = config['lastfm']['user'].as_str() + per_page = config['lastimport']['per_page'].get(int) + + if not user: + raise ui.UserError('You must specify a user name for lastimport') + + log.info('Fetching last.fm library for @{0}', user) + + page_total = 1 + page_current = 0 + found_total = 0 + unknown_total = 0 + retry_limit = config['lastimport']['retry_limit'].get(int) + # Iterate through a yet to be known page total count + while page_current < page_total: + log.info('Querying page #{0}{1}...', + page_current + 1, + f'/{page_total}' if page_total > 1 else '') + + for retry in range(0, retry_limit): + tracks, page_total = fetch_tracks(user, page_current + 1, per_page) + if page_total < 1: + # It means nothing to us! + raise ui.UserError('Last.fm reported no data.') + + if tracks: + found, unknown = process_tracks(lib, tracks, log) + found_total += found + unknown_total += unknown + break + else: + log.error('ERROR: unable to read page #{0}', + page_current + 1) + if retry < retry_limit: + log.info( + 'Retrying page #{0}... ({1}/{2} retry)', + page_current + 1, retry + 1, retry_limit + ) + else: + log.error('FAIL: unable to fetch page #{0}, ', + 'tried {1} times', page_current, retry + 1) + page_current += 1 + + log.info('... done!') + log.info('finished processing {0} song pages', page_total) + log.info('{0} unknown play-counts', unknown_total) + log.info('{0} play-counts imported', found_total) + + +def fetch_tracks(user, page, limit): + """ JSON format: + [ + { + "mbid": "...", + "artist": "...", + "title": "...", + "playcount": "..." + } + ] + """ + network = pylast.LastFMNetwork(api_key=config['lastfm']['api_key']) + user_obj = CustomUser(user, network) + results, total_pages =\ + user_obj.get_top_tracks_by_page(limit=limit, page=page) + return [ + { + "mbid": track.item.mbid if track.item.mbid else '', + "artist": { + "name": track.item.artist.name + }, + "name": track.item.title, + "playcount": track.weight + } for track in results + ], total_pages + + +def process_tracks(lib, tracks, log): + total = len(tracks) + total_found = 0 + total_fails = 0 + log.info('Received {0} tracks in this page, processing...', total) + + for num in range(0, total): + song = None + trackid = tracks[num]['mbid'].strip() + artist = tracks[num]['artist'].get('name', '').strip() + title = tracks[num]['name'].strip() + album = '' + if 'album' in tracks[num]: + album = tracks[num]['album'].get('name', '').strip() + + log.debug('query: {0} - {1} ({2})', artist, title, album) + + # First try to query by musicbrainz's trackid + if trackid: + song = lib.items( + dbcore.query.MatchQuery('mb_trackid', trackid) + ).get() + + # If not, try just artist/title + if song is None: + log.debug('no album match, trying by artist/title') + query = dbcore.AndQuery([ + dbcore.query.SubstringQuery('artist', artist), + dbcore.query.SubstringQuery('title', title) + ]) + song = lib.items(query).get() + + # Last resort, try just replacing to utf-8 quote + if song is None: + title = title.replace("'", '\u2019') + log.debug('no title match, trying utf-8 single quote') + query = dbcore.AndQuery([ + dbcore.query.SubstringQuery('artist', artist), + dbcore.query.SubstringQuery('title', title) + ]) + song = lib.items(query).get() + + if song is not None: + count = int(song.get('play_count', 0)) + new_count = int(tracks[num]['playcount']) + log.debug('match: {0} - {1} ({2}) ' + 'updating: play_count {3} => {4}', + song.artist, song.title, song.album, count, new_count) + song['play_count'] = new_count + song.store() + total_found += 1 + else: + total_fails += 1 + log.info(' - No match: {0} - {1} ({2})', + artist, title, album) + + if total_fails > 0: + log.info('Acquired {0}/{1} play-counts ({2} unknown)', + total_found, total, total_fails) + + return total_found, total_fails diff --git a/lib/beetsplug/loadext.py b/lib/beetsplug/loadext.py new file mode 100644 index 00000000..191b97a2 --- /dev/null +++ b/lib/beetsplug/loadext.py @@ -0,0 +1,44 @@ +# This file is part of beets. +# Copyright 2019, Jack Wilsdon <jack.wilsdon@gmail.com> +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Load SQLite extensions. +""" + + +from beets.dbcore import Database +from beets.plugins import BeetsPlugin +import sqlite3 + + +class LoadExtPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + + if not Database.supports_extensions: + self._log.warn('loadext is enabled but the current SQLite ' + 'installation does not support extensions') + return + + self.register_listener('library_opened', self.library_opened) + + def library_opened(self, lib): + for v in self.config: + ext = v.as_filename() + + self._log.debug('loading extension {}', ext) + + try: + lib.load_extension(ext) + except sqlite3.OperationalError as e: + self._log.error('failed to load extension {}: {}', ext, e) diff --git a/lib/beetsplug/lyrics.py b/lib/beetsplug/lyrics.py index a2ebe7c3..2cb50ca5 100644 --- a/lib/beetsplug/lyrics.py +++ b/lib/beetsplug/lyrics.py @@ -1,5 +1,5 @@ # This file is part of beets. -# Copyright 2014, Adrian Sampson. +# Copyright 2016, Adrian Sampson. # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -14,73 +14,122 @@ """Fetches, embeds, and displays lyrics. """ -from __future__ import print_function -import re -import logging -import requests -import json -import unicodedata -import urllib + import difflib +import errno import itertools -from HTMLParser import HTMLParseError +import json +import struct +import os.path +import re +import requests +import unicodedata +from unidecode import unidecode +import warnings +import urllib + +try: + import bs4 + from bs4 import SoupStrainer + HAS_BEAUTIFUL_SOUP = True +except ImportError: + HAS_BEAUTIFUL_SOUP = False + +try: + import langdetect + HAS_LANGDETECT = True +except ImportError: + HAS_LANGDETECT = False + +try: + # PY3: HTMLParseError was removed in 3.5 as strict mode + # was deprecated in 3.3. + # https://docs.python.org/3.3/library/html.parser.html + from html.parser import HTMLParseError +except ImportError: + class HTMLParseError(Exception): + pass from beets import plugins -from beets import config, ui - - -# Global logger. - -log = logging.getLogger('beets') +from beets import ui +import beets DIV_RE = re.compile(r'<(/?)div>?', re.I) COMMENT_RE = re.compile(r'<!--.*-->', re.S) TAG_RE = re.compile(r'<[^>]*>') BREAK_RE = re.compile(r'\n?\s*<br([\s|/][^>]*)*>\s*\n?', re.I) URL_CHARACTERS = { - u'\u2018': u"'", - u'\u2019': u"'", - u'\u201c': u'"', - u'\u201d': u'"', - u'\u2010': u'-', - u'\u2011': u'-', - u'\u2012': u'-', - u'\u2013': u'-', - u'\u2014': u'-', - u'\u2015': u'-', - u'\u2016': u'-', - u'\u2026': u'...', + '\u2018': "'", + '\u2019': "'", + '\u201c': '"', + '\u201d': '"', + '\u2010': '-', + '\u2011': '-', + '\u2012': '-', + '\u2013': '-', + '\u2014': '-', + '\u2015': '-', + '\u2016': '-', + '\u2026': '...', } +USER_AGENT = f'beets/{beets.__version__}' + +# The content for the base index.rst generated in ReST mode. +REST_INDEX_TEMPLATE = '''Lyrics +====== + +* :ref:`Song index <genindex>` +* :ref:`search` + +Artist index: + +.. toctree:: + :maxdepth: 1 + :glob: + + artists/* +''' + +# The content for the base conf.py generated. +REST_CONF_TEMPLATE = '''# -*- coding: utf-8 -*- +master_doc = 'index' +project = 'Lyrics' +copyright = 'none' +author = 'Various Authors' +latex_documents = [ + (master_doc, 'Lyrics.tex', project, + author, 'manual'), +] +epub_title = project +epub_author = author +epub_publisher = author +epub_copyright = copyright +epub_exclude_files = ['search.html'] +epub_tocdepth = 1 +epub_tocdup = False +''' # Utilities. -def fetch_url(url): - """Retrieve the content at a given URL, or return None if the source - is unreachable. - """ +def unichar(i): try: - r = requests.get(url, verify=False) - except requests.RequestException as exc: - log.debug(u'lyrics request failed: {0}'.format(exc)) - return - if r.status_code == requests.codes.ok: - return r.text - else: - log.debug(u'failed to fetch: {0} ({1})'.format(url, r.status_code)) + return chr(i) + except ValueError: + return struct.pack('i', i).decode('utf-32') def unescape(text): - """Resolves &#xxx; HTML entities (and some others).""" - if isinstance(text, str): - text = text.decode('utf8', 'ignore') - out = text.replace(u' ', u' ') + """Resolve &#xxx; HTML entities (and some others).""" + if isinstance(text, bytes): + text = text.decode('utf-8', 'ignore') + out = text.replace(' ', ' ') def replchar(m): num = m.group(1) - return unichr(int(num)) - out = re.sub(u"&#(\d+);", replchar, out) + return unichar(int(num)) + out = re.sub("&#(\\d+);", replchar, out) return out @@ -89,44 +138,10 @@ def extract_text_between(html, start_marker, end_marker): _, html = html.split(start_marker, 1) html, _ = html.split(end_marker, 1) except ValueError: - return u'' + return '' return html -def extract_text_in(html, starttag): - """Extract the text from a <DIV> tag in the HTML starting with - ``starttag``. Returns None if parsing fails. - """ - - # Strip off the leading text before opening tag. - try: - _, html = html.split(starttag, 1) - except ValueError: - return - - # Walk through balanced DIV tags. - level = 0 - parts = [] - pos = 0 - for match in DIV_RE.finditer(html): - if match.group(1): # Closing tag. - level -= 1 - if level == 0: - pos = match.end() - else: # Opening tag. - if level == 0: - parts.append(html[pos:match.start()]) - level += 1 - - if level == -1: - parts.append(html[pos:match.start()]) - break - else: - print('no closing tag found!') - return - return u''.join(parts) - - def search_pairs(item): """Yield a pairs of artists and titles to search for. @@ -136,32 +151,42 @@ def search_pairs(item): In addition to the artist and title obtained from the `item` the method tries to strip extra information like paranthesized suffixes and featured artists from the strings and add them as candidates. + The artist sort name is added as a fallback candidate to help in + cases where artist name includes special characters or is in a + non-latin script. The method also tries to split multiple titles separated with `/`. """ + def generate_alternatives(string, patterns): + """Generate string alternatives by extracting first matching group for + each given pattern. + """ + alternatives = [string] + for pattern in patterns: + match = re.search(pattern, string, re.IGNORECASE) + if match: + alternatives.append(match.group(1)) + return alternatives - title, artist = item.title, item.artist - titles = [title] - artists = [artist] + title, artist, artist_sort = item.title, item.artist, item.artist_sort - # Remove any featuring artists from the artists name - pattern = r"(.*?) {0}".format(plugins.feat_tokens()) - match = re.search(pattern, artist, re.IGNORECASE) - if match: - artists.append(match.group(1)) + patterns = [ + # Remove any featuring artists from the artists name + fr"(.*?) {plugins.feat_tokens()}"] + artists = generate_alternatives(artist, patterns) + # Use the artist_sort as fallback only if it differs from artist to avoid + # repeated remote requests with the same search terms + if artist != artist_sort: + artists.append(artist_sort) - # Remove a parenthesized suffix from a title string. Common - # examples include (live), (remix), and (acoustic). - pattern = r"(.+?)\s+[(].*[)]$" - match = re.search(pattern, title, re.IGNORECASE) - if match: - titles.append(match.group(1)) - - # Remove any featuring artists from the title - pattern = r"(.*?) {0}".format(plugins.feat_tokens(for_artist=False)) - for title in titles[:]: - match = re.search(pattern, title, re.IGNORECASE) - if match: - titles.append(match.group(1)) + patterns = [ + # Remove a parenthesized suffix from a title string. Common + # examples include (live), (remix), and (acoustic). + r"(.+?)\s+[(].*[)]$", + # Remove any featuring artists from the title + r"(.*?) {}".format(plugins.feat_tokens(for_artist=False)), + # Remove part of title after colon ':' for songs with subtitles + r"(.+?)\s*:.*"] + titles = generate_alternatives(title, patterns) # Check for a dual song (e.g. Pink Floyd - Speak to Me / Breathe) # and each of them. @@ -174,131 +199,300 @@ def search_pairs(item): return itertools.product(artists, multi_titles) -def _encode(s): - """Encode the string for inclusion in a URL (common to both - LyricsWiki and Lyrics.com). +def slug(text): + """Make a URL-safe, human-readable version of the given text + + This will do the following: + + 1. decode unicode characters into ASCII + 2. shift everything to lowercase + 3. strip whitespace + 4. replace other non-word characters with dashes + 5. strip extra dashes + + This somewhat duplicates the :func:`Google.slugify` function but + slugify is not as generic as this one, which can be reused + elsewhere. """ - if isinstance(s, unicode): - for char, repl in URL_CHARACTERS.items(): - s = s.replace(char, repl) - s = s.encode('utf8', 'ignore') - return urllib.quote(s) - -# Musixmatch - -MUSIXMATCH_URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s' + return re.sub(r'\W+', '-', unidecode(text).lower().strip()).strip('-') -def fetch_musixmatch(artist, title): - url = MUSIXMATCH_URL_PATTERN % (_lw_encode(artist.title()), - _lw_encode(title.title())) - html = fetch_url(url) - if not html: - return - lyrics = extract_text_between(html, '"lyrics_body":', '"lyrics_language":') - return lyrics.strip(',"').replace('\\n', '\n') - -# LyricsWiki. - -LYRICSWIKI_URL_PATTERN = 'http://lyrics.wikia.com/%s:%s' +if HAS_BEAUTIFUL_SOUP: + def try_parse_html(html, **kwargs): + try: + return bs4.BeautifulSoup(html, 'html.parser', **kwargs) + except HTMLParseError: + return None +else: + def try_parse_html(html, **kwargs): + return None -def _lw_encode(s): - s = re.sub(r'\s+', '_', s) - s = s.replace("<", "Less_Than") - s = s.replace(">", "Greater_Than") - s = s.replace("#", "Number_") - s = re.sub(r'[\[\{]', '(', s) - s = re.sub(r'[\]\}]', ')', s) - return _encode(s) +class Backend: + REQUIRES_BS = False + + def __init__(self, config, log): + self._log = log + + @staticmethod + def _encode(s): + """Encode the string for inclusion in a URL""" + if isinstance(s, str): + for char, repl in URL_CHARACTERS.items(): + s = s.replace(char, repl) + s = s.encode('utf-8', 'ignore') + return urllib.parse.quote(s) + + def build_url(self, artist, title): + return self.URL_PATTERN % (self._encode(artist.title()), + self._encode(title.title())) + + def fetch_url(self, url): + """Retrieve the content at a given URL, or return None if the source + is unreachable. + """ + try: + # Disable the InsecureRequestWarning that comes from using + # `verify=false`. + # https://github.com/kennethreitz/requests/issues/2214 + # We're not overly worried about the NSA MITMing our lyrics scraper + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + r = requests.get(url, verify=False, headers={ + 'User-Agent': USER_AGENT, + }) + except requests.RequestException as exc: + self._log.debug('lyrics request failed: {0}', exc) + return + if r.status_code == requests.codes.ok: + return r.text + else: + self._log.debug('failed to fetch: {0} ({1})', url, r.status_code) + return None + + def fetch(self, artist, title): + raise NotImplementedError() -def fetch_lyricswiki(artist, title): - """Fetch lyrics from LyricsWiki.""" - url = LYRICSWIKI_URL_PATTERN % (_lw_encode(artist), _lw_encode(title)) - html = fetch_url(url) - if not html: - return +class MusiXmatch(Backend): + REPLACEMENTS = { + r'\s+': '-', + '<': 'Less_Than', + '>': 'Greater_Than', + '#': 'Number_', + r'[\[\{]': '(', + r'[\]\}]': ')', + } - lyrics = extract_text_in(html, u"<div class='lyricbox'>") - if lyrics and 'Unfortunately, we are not licensed' not in lyrics: + URL_PATTERN = 'https://www.musixmatch.com/lyrics/%s/%s' + + @classmethod + def _encode(cls, s): + for old, new in cls.REPLACEMENTS.items(): + s = re.sub(old, new, s) + + return super()._encode(s) + + def fetch(self, artist, title): + url = self.build_url(artist, title) + + html = self.fetch_url(url) + if not html: + return None + if "We detected that your IP is blocked" in html: + self._log.warning('we are blocked at MusixMatch: url %s failed' + % url) + return None + html_parts = html.split('<p class="mxm-lyrics__content') + # Sometimes lyrics come in 2 or more parts + lyrics_parts = [] + for html_part in html_parts: + lyrics_parts.append(extract_text_between(html_part, '>', '</p>')) + lyrics = '\n'.join(lyrics_parts) + lyrics = lyrics.strip(',"').replace('\\n', '\n') + # another odd case: sometimes only that string remains, for + # missing songs. this seems to happen after being blocked + # above, when filling in the CAPTCHA. + if "Instant lyrics for all your music." in lyrics: + return None + # sometimes there are non-existent lyrics with some content + if 'Lyrics | Musixmatch' in lyrics: + return None return lyrics -# Lyrics.com. +class Genius(Backend): + """Fetch lyrics from Genius via genius-api. -LYRICSCOM_URL_PATTERN = 'http://www.lyrics.com/%s-lyrics-%s.html' -LYRICSCOM_NOT_FOUND = ( - 'Sorry, we do not have the lyric', - 'Submit Lyrics', -) + Simply adapted from + bigishdata.com/2016/09/27/getting-song-lyrics-from-geniuss-api-scraping/ + """ + REQUIRES_BS = True -def _lc_encode(s): - s = re.sub(r'[^\w\s-]', '', s) - s = re.sub(r'\s+', '-', s) - return _encode(s).lower() + base_url = "https://api.genius.com" + def __init__(self, config, log): + super().__init__(config, log) + self.api_key = config['genius_api_key'].as_str() + self.headers = { + 'Authorization': "Bearer %s" % self.api_key, + 'User-Agent': USER_AGENT, + } -def fetch_lyricscom(artist, title): - """Fetch lyrics from Lyrics.com.""" - url = LYRICSCOM_URL_PATTERN % (_lc_encode(title), _lc_encode(artist)) - html = fetch_url(url) - if not html: - return - lyrics = extract_text_between(html, '<div id="lyrics" class="SCREENONLY" ' - 'itemprop="description">', '</div>') - if not lyrics: - return - for not_found_str in LYRICSCOM_NOT_FOUND: - if not_found_str in lyrics: + def fetch(self, artist, title): + """Fetch lyrics from genius.com + + Because genius doesn't allow accesssing lyrics via the api, + we first query the api for a url matching our artist & title, + then attempt to scrape that url for the lyrics. + """ + json = self._search(artist, title) + if not json: + self._log.debug('Genius API request returned invalid JSON') + return None + + # find a matching artist in the json + for hit in json["response"]["hits"]: + hit_artist = hit["result"]["primary_artist"]["name"] + + if slug(hit_artist) == slug(artist): + html = self.fetch_url(hit["result"]["url"]) + if not html: + return None + return self._scrape_lyrics_from_html(html) + + self._log.debug('Genius failed to find a matching artist for \'{0}\'', + artist) + return None + + def _search(self, artist, title): + """Searches the genius api for a given artist and title + + https://docs.genius.com/#search-h2 + + :returns: json response + """ + search_url = self.base_url + "/search" + data = {'q': title + " " + artist.lower()} + try: + response = requests.get( + search_url, data=data, headers=self.headers) + except requests.RequestException as exc: + self._log.debug('Genius API request failed: {0}', exc) + return None + + try: + return response.json() + except ValueError: + return None + + def _scrape_lyrics_from_html(self, html): + """Scrape lyrics from a given genius.com html""" + + soup = try_parse_html(html) + if not soup: return - parts = lyrics.split('\n---\nLyrics powered by', 1) - if parts: - return parts[0] + # Remove script tags that they put in the middle of the lyrics. + [h.extract() for h in soup('script')] + + # Most of the time, the page contains a div with class="lyrics" where + # all of the lyrics can be found already correctly formatted + # Sometimes, though, it packages the lyrics into separate divs, most + # likely for easier ad placement + lyrics_div = soup.find("div", class_="lyrics") + if not lyrics_div: + self._log.debug('Received unusual song page html') + verse_div = soup.find("div", + class_=re.compile("Lyrics__Container")) + if not verse_div: + if soup.find("div", + class_=re.compile("LyricsPlaceholder__Message"), + string="This song is an instrumental"): + self._log.debug('Detected instrumental') + return "[Instrumental]" + else: + self._log.debug("Couldn't scrape page using known layouts") + return None + + lyrics_div = verse_div.parent + for br in lyrics_div.find_all("br"): + br.replace_with("\n") + ads = lyrics_div.find_all("div", + class_=re.compile("InreadAd__Container")) + for ad in ads: + ad.replace_with("\n") + + return lyrics_div.get_text() -# Optional Google custom search API backend. +class Tekstowo(Backend): + # Fetch lyrics from Tekstowo.pl. + REQUIRES_BS = True -def slugify(text): - """Normalize a string and remove non-alphanumeric characters. - """ - text = re.sub(r"[-'_\s]", '_', text) - text = re.sub(r"_+", '_', text).strip('_') - pat = "([^,\(]*)\((.*?)\)" # Remove content within parentheses - text = re.sub(pat, '\g<1>', text).strip() - try: - text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore') - text = unicode(re.sub('[-\s]+', ' ', text)) - except UnicodeDecodeError: - log.exception(u"Failing to normalize '{0}'".format(text)) - return text + BASE_URL = 'http://www.tekstowo.pl' + URL_PATTERN = BASE_URL + '/wyszukaj.html?search-title=%s&search-artist=%s' + def fetch(self, artist, title): + url = self.build_url(title, artist) + search_results = self.fetch_url(url) + if not search_results: + return None -BY_TRANS = ['by', 'par', 'de', 'von'] -LYRICS_TRANS = ['lyrics', 'paroles', 'letras', 'liedtexte'] + song_page_url = self.parse_search_results(search_results) + if not song_page_url: + return None + song_page_html = self.fetch_url(song_page_url) + if not song_page_html: + return None -def is_page_candidate(urlLink, urlTitle, title, artist): - """Return True if the URL title makes it a good candidate to be a - page that contains lyrics of title by artist. - """ - title = slugify(title.lower()) - artist = slugify(artist.lower()) - sitename = re.search(u"//([^/]+)/.*", slugify(urlLink.lower())).group(1) - urlTitle = slugify(urlTitle.lower()) - # Check if URL title contains song title (exact match) - if urlTitle.find(title) != -1: - return True - # or try extracting song title from URL title and check if - # they are close enough - tokens = [by + '_' + artist for by in BY_TRANS] + \ - [artist, sitename, sitename.replace('www.', '')] + LYRICS_TRANS - songTitle = re.sub(u'(%s)' % u'|'.join(tokens), u'', urlTitle) - songTitle = songTitle.strip('_|') - typoRatio = .9 - return difflib.SequenceMatcher(None, songTitle, title).ratio() >= typoRatio + return self.extract_lyrics(song_page_html) + + def parse_search_results(self, html): + html = _scrape_strip_cruft(html) + html = _scrape_merge_paragraphs(html) + + soup = try_parse_html(html) + if not soup: + return None + + content_div = soup.find("div", class_="content") + if not content_div: + return None + + card_div = content_div.find("div", class_="card") + if not card_div: + return None + + song_rows = card_div.find_all("div", class_="box-przeboje") + if not song_rows: + return None + + song_row = song_rows[0] + if not song_row: + return None + + link = song_row.find('a') + if not link: + return None + + return self.BASE_URL + link.get('href') + + def extract_lyrics(self, html): + html = _scrape_strip_cruft(html) + html = _scrape_merge_paragraphs(html) + + soup = try_parse_html(html) + if not soup: + return None + + lyrics_div = soup.find("div", class_="song-text") + if not lyrics_div: + return None + + return lyrics_div.get_text() def remove_credits(text): @@ -315,36 +509,6 @@ def remove_credits(text): return text -def is_lyrics(text, artist=None): - """Determine whether the text seems to be valid lyrics. - """ - if not text: - return False - badTriggersOcc = [] - nbLines = text.count('\n') - if nbLines <= 1: - log.debug(u"Ignoring too short lyrics '{0}'".format(text)) - return False - elif nbLines < 5: - badTriggersOcc.append('too_short') - else: - # Lyrics look legit, remove credits to avoid being penalized further - # down - text = remove_credits(text) - - badTriggers = ['lyrics', 'copyright', 'property', 'links'] - if artist: - badTriggersOcc += [artist] - - for item in badTriggers: - badTriggersOcc += [item] * len(re.findall(r'\W%s\W' % item, - text, re.I)) - - if badTriggersOcc: - log.debug(u'Bad triggers detected: {0}'.format(badTriggersOcc)) - return len(badTriggersOcc) < 2 - - def _scrape_strip_cruft(html, plain_text_out=False): """Clean up HTML """ @@ -353,7 +517,8 @@ def _scrape_strip_cruft(html, plain_text_out=False): html = html.replace('\r', '\n') # Normalize EOL. html = re.sub(r' +', ' ', html) # Whitespaces collapse. html = BREAK_RE.sub('\n', html) # <br> eats up surrounding '\n'. - html = re.sub(r'<(script).*?</\1>(?s)', '', html) # Strip script tags. + html = re.sub(r'(?s)<(script).*?</\1>', '', html) # Strip script tags. + html = re.sub('\u2005', " ", html) # replace unicode with regular space if plain_text_out: # Strip remaining HTML tags html = COMMENT_RE.sub('', html) @@ -373,11 +538,6 @@ def scrape_lyrics_from_html(html): """Scrape lyrics from a URL. If no lyrics can be found, return None instead. """ - from bs4 import SoupStrainer, BeautifulSoup - - if not html: - return None - def is_text_notcode(text): length = len(text) return (length > 20 and @@ -387,123 +547,371 @@ def scrape_lyrics_from_html(html): html = _scrape_merge_paragraphs(html) # extract all long text blocks that are not code - try: - soup = BeautifulSoup(html, "html.parser", - parse_only=SoupStrainer(text=is_text_notcode)) - except HTMLParseError: + soup = try_parse_html(html, parse_only=SoupStrainer(text=is_text_notcode)) + if not soup: + return None + + # Get the longest text element (if any). + strings = sorted(soup.stripped_strings, key=len, reverse=True) + if strings: + return strings[0] + else: return None - soup = sorted(soup.stripped_strings, key=len)[-1] - return soup -def fetch_google(artist, title): - """Fetch lyrics from Google search results. - """ - query = u"%s %s" % (artist, title) - api_key = config['lyrics']['google_API_key'].get(unicode) - engine_id = config['lyrics']['google_engine_ID'].get(unicode) - url = u'https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&q=%s' % \ - (api_key, engine_id, urllib.quote(query.encode('utf8'))) +class Google(Backend): + """Fetch lyrics from Google search results.""" - data = urllib.urlopen(url) - data = json.load(data) - if 'error' in data: - reason = data['error']['errors'][0]['reason'] - log.debug(u'google lyrics backend error: {0}'.format(reason)) - return + REQUIRES_BS = True - if 'items' in data.keys(): - for item in data['items']: - urlLink = item['link'] - urlTitle = item.get('title', u'') - if not is_page_candidate(urlLink, urlTitle, title, artist): - continue - html = fetch_url(urlLink) - lyrics = scrape_lyrics_from_html(html) - if not lyrics: - continue + def __init__(self, config, log): + super().__init__(config, log) + self.api_key = config['google_API_key'].as_str() + self.engine_id = config['google_engine_ID'].as_str() - if is_lyrics(lyrics, artist): - log.debug(u'got lyrics from {0}'.format(item['displayLink'])) - return lyrics + def is_lyrics(self, text, artist=None): + """Determine whether the text seems to be valid lyrics. + """ + if not text: + return False + bad_triggers_occ = [] + nb_lines = text.count('\n') + if nb_lines <= 1: + self._log.debug("Ignoring too short lyrics '{0}'", text) + return False + elif nb_lines < 5: + bad_triggers_occ.append('too_short') + else: + # Lyrics look legit, remove credits to avoid being penalized + # further down + text = remove_credits(text) + bad_triggers = ['lyrics', 'copyright', 'property', 'links'] + if artist: + bad_triggers += [artist] -# Plugin logic. + for item in bad_triggers: + bad_triggers_occ += [item] * len(re.findall(r'\W%s\W' % item, + text, re.I)) -SOURCES = ['google', 'lyricwiki', 'lyrics.com', 'musixmatch'] -SOURCE_BACKENDS = { - 'google': fetch_google, - 'lyricwiki': fetch_lyricswiki, - 'lyrics.com': fetch_lyricscom, - 'musixmatch': fetch_musixmatch, -} + if bad_triggers_occ: + self._log.debug('Bad triggers detected: {0}', bad_triggers_occ) + return len(bad_triggers_occ) < 2 + + def slugify(self, text): + """Normalize a string and remove non-alphanumeric characters. + """ + text = re.sub(r"[-'_\s]", '_', text) + text = re.sub(r"_+", '_', text).strip('_') + pat = r"([^,\(]*)\((.*?)\)" # Remove content within parentheses + text = re.sub(pat, r'\g<1>', text).strip() + try: + text = unicodedata.normalize('NFKD', text).encode('ascii', + 'ignore') + text = str(re.sub(r'[-\s]+', ' ', text.decode('utf-8'))) + except UnicodeDecodeError: + self._log.exception("Failing to normalize '{0}'", text) + return text + + BY_TRANS = ['by', 'par', 'de', 'von'] + LYRICS_TRANS = ['lyrics', 'paroles', 'letras', 'liedtexte'] + + def is_page_candidate(self, url_link, url_title, title, artist): + """Return True if the URL title makes it a good candidate to be a + page that contains lyrics of title by artist. + """ + title = self.slugify(title.lower()) + artist = self.slugify(artist.lower()) + sitename = re.search("//([^/]+)/.*", + self.slugify(url_link.lower())).group(1) + url_title = self.slugify(url_title.lower()) + + # Check if URL title contains song title (exact match) + if url_title.find(title) != -1: + return True + + # or try extracting song title from URL title and check if + # they are close enough + tokens = [by + '_' + artist for by in self.BY_TRANS] + \ + [artist, sitename, sitename.replace('www.', '')] + \ + self.LYRICS_TRANS + tokens = [re.escape(t) for t in tokens] + song_title = re.sub('(%s)' % '|'.join(tokens), '', url_title) + + song_title = song_title.strip('_|') + typo_ratio = .9 + ratio = difflib.SequenceMatcher(None, song_title, title).ratio() + return ratio >= typo_ratio + + def fetch(self, artist, title): + query = f"{artist} {title}" + url = 'https://www.googleapis.com/customsearch/v1?key=%s&cx=%s&q=%s' \ + % (self.api_key, self.engine_id, + urllib.parse.quote(query.encode('utf-8'))) + + data = self.fetch_url(url) + if not data: + self._log.debug('google backend returned no data') + return None + try: + data = json.loads(data) + except ValueError as exc: + self._log.debug('google backend returned malformed JSON: {}', exc) + if 'error' in data: + reason = data['error']['errors'][0]['reason'] + self._log.debug('google backend error: {0}', reason) + return None + + if 'items' in data.keys(): + for item in data['items']: + url_link = item['link'] + url_title = item.get('title', '') + if not self.is_page_candidate(url_link, url_title, + title, artist): + continue + html = self.fetch_url(url_link) + if not html: + continue + lyrics = scrape_lyrics_from_html(html) + if not lyrics: + continue + + if self.is_lyrics(lyrics, artist): + self._log.debug('got lyrics from {0}', + item['displayLink']) + return lyrics + + return None class LyricsPlugin(plugins.BeetsPlugin): + SOURCES = ['google', 'musixmatch', 'genius', 'tekstowo'] + SOURCE_BACKENDS = { + 'google': Google, + 'musixmatch': MusiXmatch, + 'genius': Genius, + 'tekstowo': Tekstowo, + } + def __init__(self): - super(LyricsPlugin, self).__init__() + super().__init__() self.import_stages = [self.imported] self.config.add({ 'auto': True, + 'bing_client_secret': None, + 'bing_lang_from': [], + 'bing_lang_to': None, 'google_API_key': None, - 'google_engine_ID': u'009217259823014548361:lndtuqkycfu', + 'google_engine_ID': '009217259823014548361:lndtuqkycfu', + 'genius_api_key': + "Ryq93pUGm8bM6eUWwD_M3NOFFDAtp2yEE7W" + "76V-uFL5jks5dNvcGCdarqFjDhP9c", 'fallback': None, 'force': False, - 'sources': SOURCES, + 'local': False, + 'sources': self.SOURCES, }) + self.config['bing_client_secret'].redact = True + self.config['google_API_key'].redact = True + self.config['google_engine_ID'].redact = True + self.config['genius_api_key'].redact = True - available_sources = list(SOURCES) - if not self.config['google_API_key'].get() and \ - 'google' in SOURCES: - available_sources.remove('google') - self.config['sources'] = plugins.sanitize_choices( + # State information for the ReST writer. + # First, the current artist we're writing. + self.artist = 'Unknown artist' + # The current album: False means no album yet. + self.album = False + # The current rest file content. None means the file is not + # open yet. + self.rest = None + + available_sources = list(self.SOURCES) + sources = plugins.sanitize_choices( self.config['sources'].as_str_seq(), available_sources) - self.backends = [] - for key in self.config['sources'].as_str_seq(): - self.backends.append(SOURCE_BACKENDS[key]) + + if not HAS_BEAUTIFUL_SOUP: + sources = self.sanitize_bs_sources(sources) + + if 'google' in sources: + if not self.config['google_API_key'].get(): + # We log a *debug* message here because the default + # configuration includes `google`. This way, the source + # is silent by default but can be enabled just by + # setting an API key. + self._log.debug('Disabling google source: ' + 'no API key configured.') + sources.remove('google') + + self.config['bing_lang_from'] = [ + x.lower() for x in self.config['bing_lang_from'].as_str_seq()] + self.bing_auth_token = None + + if not HAS_LANGDETECT and self.config['bing_client_secret'].get(): + self._log.warning('To use bing translations, you need to ' + 'install the langdetect module. See the ' + 'documentation for further details.') + + self.backends = [self.SOURCE_BACKENDS[source](self.config, self._log) + for source in sources] + + def sanitize_bs_sources(self, sources): + enabled_sources = [] + for source in sources: + if self.SOURCE_BACKENDS[source].REQUIRES_BS: + self._log.debug('To use the %s lyrics source, you must ' + 'install the beautifulsoup4 module. See ' + 'the documentation for further details.' + % source) + else: + enabled_sources.append(source) + + return enabled_sources + + def get_bing_access_token(self): + params = { + 'client_id': 'beets', + 'client_secret': self.config['bing_client_secret'], + 'scope': "https://api.microsofttranslator.com", + 'grant_type': 'client_credentials', + } + + oauth_url = 'https://datamarket.accesscontrol.windows.net/v2/OAuth2-13' + oauth_token = json.loads(requests.post( + oauth_url, + data=urllib.parse.urlencode(params)).content) + if 'access_token' in oauth_token: + return "Bearer " + oauth_token['access_token'] + else: + self._log.warning('Could not get Bing Translate API access token.' + ' Check your "bing_client_secret" password') def commands(self): cmd = ui.Subcommand('lyrics', help='fetch song lyrics') - cmd.parser.add_option('-p', '--print', dest='printlyr', - action='store_true', default=False, - help='print lyrics to console') - cmd.parser.add_option('-f', '--force', dest='force_refetch', - action='store_true', default=False, - help='always re-download lyrics') + cmd.parser.add_option( + '-p', '--print', dest='printlyr', + action='store_true', default=False, + help='print lyrics to console', + ) + cmd.parser.add_option( + '-r', '--write-rest', dest='writerest', + action='store', default=None, metavar='dir', + help='write lyrics to given directory as ReST files', + ) + cmd.parser.add_option( + '-f', '--force', dest='force_refetch', + action='store_true', default=False, + help='always re-download lyrics', + ) + cmd.parser.add_option( + '-l', '--local', dest='local_only', + action='store_true', default=False, + help='do not fetch missing lyrics', + ) def func(lib, opts, args): # The "write to files" option corresponds to the # import_write config value. - write = config['import']['write'].get(bool) - for item in lib.items(ui.decargs(args)): - self.fetch_item_lyrics( - lib, logging.INFO, item, write, - opts.force_refetch or self.config['force'], - ) - if opts.printlyr and item.lyrics: - ui.print_(item.lyrics) - + write = ui.should_write() + if opts.writerest: + self.writerest_indexes(opts.writerest) + items = lib.items(ui.decargs(args)) + for item in items: + if not opts.local_only and not self.config['local']: + self.fetch_item_lyrics( + lib, item, write, + opts.force_refetch or self.config['force'], + ) + if item.lyrics: + if opts.printlyr: + ui.print_(item.lyrics) + if opts.writerest: + self.appendrest(opts.writerest, item) + if opts.writerest and items: + # flush last artist & write to ReST + self.writerest(opts.writerest) + ui.print_('ReST files generated. to build, use one of:') + ui.print_(' sphinx-build -b html %s _build/html' + % opts.writerest) + ui.print_(' sphinx-build -b epub %s _build/epub' + % opts.writerest) + ui.print_((' sphinx-build -b latex %s _build/latex ' + '&& make -C _build/latex all-pdf') + % opts.writerest) cmd.func = func return [cmd] + def appendrest(self, directory, item): + """Append the item to an ReST file + + This will keep state (in the `rest` variable) in order to avoid + writing continuously to the same files. + """ + + if slug(self.artist) != slug(item.albumartist): + # Write current file and start a new one ~ item.albumartist + self.writerest(directory) + self.artist = item.albumartist.strip() + self.rest = "%s\n%s\n\n.. contents::\n :local:\n\n" \ + % (self.artist, + '=' * len(self.artist)) + + if self.album != item.album: + tmpalbum = self.album = item.album.strip() + if self.album == '': + tmpalbum = 'Unknown album' + self.rest += "{}\n{}\n\n".format(tmpalbum, '-' * len(tmpalbum)) + title_str = ":index:`%s`" % item.title.strip() + block = '| ' + item.lyrics.replace('\n', '\n| ') + self.rest += "{}\n{}\n\n{}\n\n".format(title_str, + '~' * len(title_str), + block) + + def writerest(self, directory): + """Write self.rest to a ReST file + """ + if self.rest is not None and self.artist is not None: + path = os.path.join(directory, 'artists', + slug(self.artist) + '.rst') + with open(path, 'wb') as output: + output.write(self.rest.encode('utf-8')) + + def writerest_indexes(self, directory): + """Write conf.py and index.rst files necessary for Sphinx + + We write minimal configurations that are necessary for Sphinx + to operate. We do not overwrite existing files so that + customizations are respected.""" + try: + os.makedirs(os.path.join(directory, 'artists')) + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + indexfile = os.path.join(directory, 'index.rst') + if not os.path.exists(indexfile): + with open(indexfile, 'w') as output: + output.write(REST_INDEX_TEMPLATE) + conffile = os.path.join(directory, 'conf.py') + if not os.path.exists(conffile): + with open(conffile, 'w') as output: + output.write(REST_CONF_TEMPLATE) + def imported(self, session, task): """Import hook for fetching lyrics automatically. """ if self.config['auto']: for item in task.imported_items(): - self.fetch_item_lyrics(session.lib, logging.DEBUG, item, + self.fetch_item_lyrics(session.lib, item, False, self.config['force']) - def fetch_item_lyrics(self, lib, loglevel, item, write, force): + def fetch_item_lyrics(self, lib, item, write, force): """Fetch and store lyrics for a single item. If ``write``, then the - lyrics will also be written to the file itself. The ``loglevel`` - parameter controls the visibility of the function's status log - messages. + lyrics will also be written to the file itself. """ # Skip if the item already has lyrics. if not force and item.lyrics: - log.log(loglevel, u'lyrics already present: {0} - {1}' - .format(item.artist, item.title)) + self._log.info('lyrics already present: {0}', item) return lyrics = None @@ -512,22 +920,26 @@ class LyricsPlugin(plugins.BeetsPlugin): if any(lyrics): break - lyrics = u"\n\n---\n\n".join([l for l in lyrics if l]) + lyrics = "\n\n---\n\n".join([l for l in lyrics if l]) if lyrics: - log.log(loglevel, u'fetched lyrics: {0} - {1}' - .format(item.artist, item.title)) + self._log.info('fetched lyrics: {0}', item) + if HAS_LANGDETECT and self.config['bing_client_secret'].get(): + lang_from = langdetect.detect(lyrics) + if self.config['bing_lang_to'].get() != lang_from and ( + not self.config['bing_lang_from'] or ( + lang_from in self.config[ + 'bing_lang_from'].as_str_seq())): + lyrics = self.append_translation( + lyrics, self.config['bing_lang_to']) else: - log.log(loglevel, u'lyrics not found: {0} - {1}' - .format(item.artist, item.title)) + self._log.info('lyrics not found: {0}', item) fallback = self.config['fallback'].get() if fallback: lyrics = fallback else: return - item.lyrics = lyrics - if write: item.try_write() item.store() @@ -537,8 +949,36 @@ class LyricsPlugin(plugins.BeetsPlugin): None if no lyrics were found. """ for backend in self.backends: - lyrics = backend(artist, title) + lyrics = backend.fetch(artist, title) if lyrics: - log.debug(u'got lyrics from backend: {0}' - .format(backend.__name__)) + self._log.debug('got lyrics from backend: {0}', + backend.__class__.__name__) return _scrape_strip_cruft(lyrics, True) + + def append_translation(self, text, to_lang): + from xml.etree import ElementTree + + if not self.bing_auth_token: + self.bing_auth_token = self.get_bing_access_token() + if self.bing_auth_token: + # Extract unique lines to limit API request size per song + text_lines = set(text.split('\n')) + url = ('https://api.microsofttranslator.com/v2/Http.svc/' + 'Translate?text=%s&to=%s' % ('|'.join(text_lines), to_lang)) + r = requests.get(url, + headers={"Authorization ": self.bing_auth_token}) + if r.status_code != 200: + self._log.debug('translation API error {}: {}', r.status_code, + r.text) + if 'token has expired' in r.text: + self.bing_auth_token = None + return self.append_translation(text, to_lang) + return text + lines_translated = ElementTree.fromstring( + r.text.encode('utf-8')).text + # Use a translation mapping dict to build resulting lyrics + translations = dict(zip(text_lines, lines_translated.split('|'))) + result = '' + for line in text.split('\n'): + result += '{} / {}\n'.format(line, translations[line]) + return result diff --git a/lib/beetsplug/mbcollection.py b/lib/beetsplug/mbcollection.py new file mode 100644 index 00000000..f4a0d161 --- /dev/null +++ b/lib/beetsplug/mbcollection.py @@ -0,0 +1,164 @@ +# This file is part of beets. +# Copyright (c) 2011, Jeffrey Aylesworth <mail@jeffrey.red> +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand +from beets import ui +from beets import config +import musicbrainzngs + +import re + +SUBMISSION_CHUNK_SIZE = 200 +FETCH_CHUNK_SIZE = 100 +UUID_REGEX = r'^[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}$' + + +def mb_call(func, *args, **kwargs): + """Call a MusicBrainz API function and catch exceptions. + """ + try: + return func(*args, **kwargs) + except musicbrainzngs.AuthenticationError: + raise ui.UserError('authentication with MusicBrainz failed') + except (musicbrainzngs.ResponseError, musicbrainzngs.NetworkError) as exc: + raise ui.UserError(f'MusicBrainz API error: {exc}') + except musicbrainzngs.UsageError: + raise ui.UserError('MusicBrainz credentials missing') + + +def submit_albums(collection_id, release_ids): + """Add all of the release IDs to the indicated collection. Multiple + requests are made if there are many release IDs to submit. + """ + for i in range(0, len(release_ids), SUBMISSION_CHUNK_SIZE): + chunk = release_ids[i:i + SUBMISSION_CHUNK_SIZE] + mb_call( + musicbrainzngs.add_releases_to_collection, + collection_id, chunk + ) + + +class MusicBrainzCollectionPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + config['musicbrainz']['pass'].redact = True + musicbrainzngs.auth( + config['musicbrainz']['user'].as_str(), + config['musicbrainz']['pass'].as_str(), + ) + self.config.add({ + 'auto': False, + 'collection': '', + 'remove': False, + }) + if self.config['auto']: + self.import_stages = [self.imported] + + def _get_collection(self): + collections = mb_call(musicbrainzngs.get_collections) + if not collections['collection-list']: + raise ui.UserError('no collections exist for user') + + # Get all collection IDs, avoiding event collections + collection_ids = [x['id'] for x in collections['collection-list']] + if not collection_ids: + raise ui.UserError('No collection found.') + + # Check that the collection exists so we can present a nice error + collection = self.config['collection'].as_str() + if collection: + if collection not in collection_ids: + raise ui.UserError('invalid collection ID: {}' + .format(collection)) + return collection + + # No specified collection. Just return the first collection ID + return collection_ids[0] + + def _get_albums_in_collection(self, id): + def _fetch(offset): + res = mb_call( + musicbrainzngs.get_releases_in_collection, + id, + limit=FETCH_CHUNK_SIZE, + offset=offset + )['collection'] + return [x['id'] for x in res['release-list']], res['release-count'] + + offset = 0 + albums_in_collection, release_count = _fetch(offset) + for i in range(0, release_count, FETCH_CHUNK_SIZE): + albums_in_collection += _fetch(offset)[0] + offset += FETCH_CHUNK_SIZE + + return albums_in_collection + + def commands(self): + mbupdate = Subcommand('mbupdate', + help='Update MusicBrainz collection') + mbupdate.parser.add_option('-r', '--remove', + action='store_true', + default=None, + dest='remove', + help='Remove albums not in beets library') + mbupdate.func = self.update_collection + return [mbupdate] + + def remove_missing(self, collection_id, lib_albums): + lib_ids = {x.mb_albumid for x in lib_albums} + albums_in_collection = self._get_albums_in_collection(collection_id) + remove_me = list(set(albums_in_collection) - lib_ids) + for i in range(0, len(remove_me), FETCH_CHUNK_SIZE): + chunk = remove_me[i:i + FETCH_CHUNK_SIZE] + mb_call( + musicbrainzngs.remove_releases_from_collection, + collection_id, chunk + ) + + def update_collection(self, lib, opts, args): + self.config.set_args(opts) + remove_missing = self.config['remove'].get(bool) + self.update_album_list(lib, lib.albums(), remove_missing) + + def imported(self, session, task): + """Add each imported album to the collection. + """ + if task.is_album: + self.update_album_list(session.lib, [task.album]) + + def update_album_list(self, lib, album_list, remove_missing=False): + """Update the MusicBrainz collection from a list of Beets albums + """ + collection_id = self._get_collection() + + # Get a list of all the album IDs. + album_ids = [] + for album in album_list: + aid = album.mb_albumid + if aid: + if re.match(UUID_REGEX, aid): + album_ids.append(aid) + else: + self._log.info('skipping invalid MBID: {0}', aid) + + # Submit to MusicBrainz. + self._log.info( + 'Updating MusicBrainz collection {0}...', collection_id + ) + submit_albums(collection_id, album_ids) + if remove_missing: + self.remove_missing(collection_id, lib.albums()) + self._log.info('...MusicBrainz collection updated.') diff --git a/lib/beetsplug/mbsubmit.py b/lib/beetsplug/mbsubmit.py new file mode 100644 index 00000000..3ede0125 --- /dev/null +++ b/lib/beetsplug/mbsubmit.py @@ -0,0 +1,57 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson and Diego Moreda. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Aid in submitting information to MusicBrainz. + +This plugin allows the user to print track information in a format that is +parseable by the MusicBrainz track parser [1]. Programmatic submitting is not +implemented by MusicBrainz yet. + +[1] https://wiki.musicbrainz.org/History:How_To_Parse_Track_Listings +""" + + +from beets.autotag import Recommendation +from beets.plugins import BeetsPlugin +from beets.ui.commands import PromptChoice +from beetsplug.info import print_data + + +class MBSubmitPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + + self.config.add({ + 'format': '$track. $title - $artist ($length)', + 'threshold': 'medium', + }) + + # Validate and store threshold. + self.threshold = self.config['threshold'].as_choice({ + 'none': Recommendation.none, + 'low': Recommendation.low, + 'medium': Recommendation.medium, + 'strong': Recommendation.strong + }) + + self.register_listener('before_choose_candidate', + self.before_choose_candidate_event) + + def before_choose_candidate_event(self, session, task): + if task.rec <= self.threshold: + return [PromptChoice('p', 'Print tracks', self.print_tracks)] + + def print_tracks(self, session, task): + for i in sorted(task.items, key=lambda i: i.track): + print_data(None, i, self.config['format'].as_str()) diff --git a/lib/beetsplug/mbsync.py b/lib/beetsplug/mbsync.py new file mode 100644 index 00000000..26778830 --- /dev/null +++ b/lib/beetsplug/mbsync.py @@ -0,0 +1,178 @@ +# This file is part of beets. +# Copyright 2016, Jakob Schnitzer. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Update library's tags using MusicBrainz. +""" + +from beets.plugins import BeetsPlugin, apply_item_changes +from beets import autotag, library, ui, util +from beets.autotag import hooks +from collections import defaultdict + +import re + +MBID_REGEX = r"(\d|\w){8}-(\d|\w){4}-(\d|\w){4}-(\d|\w){4}-(\d|\w){12}" + + +class MBSyncPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + + def commands(self): + cmd = ui.Subcommand('mbsync', + help='update metadata from musicbrainz') + cmd.parser.add_option( + '-p', '--pretend', action='store_true', + help='show all changes but do nothing') + cmd.parser.add_option( + '-m', '--move', action='store_true', dest='move', + help="move files in the library directory") + cmd.parser.add_option( + '-M', '--nomove', action='store_false', dest='move', + help="don't move files in library") + cmd.parser.add_option( + '-W', '--nowrite', action='store_false', + default=None, dest='write', + help="don't write updated metadata to files") + cmd.parser.add_format_option() + cmd.func = self.func + return [cmd] + + def func(self, lib, opts, args): + """Command handler for the mbsync function. + """ + move = ui.should_move(opts.move) + pretend = opts.pretend + write = ui.should_write(opts.write) + query = ui.decargs(args) + + self.singletons(lib, query, move, pretend, write) + self.albums(lib, query, move, pretend, write) + + def singletons(self, lib, query, move, pretend, write): + """Retrieve and apply info from the autotagger for items matched by + query. + """ + for item in lib.items(query + ['singleton:true']): + item_formatted = format(item) + if not item.mb_trackid: + self._log.info('Skipping singleton with no mb_trackid: {0}', + item_formatted) + continue + + # Do we have a valid MusicBrainz track ID? + if not re.match(MBID_REGEX, item.mb_trackid): + self._log.info('Skipping singleton with invalid mb_trackid:' + + ' {0}', item_formatted) + continue + + # Get the MusicBrainz recording info. + track_info = hooks.track_for_mbid(item.mb_trackid) + if not track_info: + self._log.info('Recording ID not found: {0} for track {0}', + item.mb_trackid, + item_formatted) + continue + + # Apply. + with lib.transaction(): + autotag.apply_item_metadata(item, track_info) + apply_item_changes(lib, item, move, pretend, write) + + def albums(self, lib, query, move, pretend, write): + """Retrieve and apply info from the autotagger for albums matched by + query and their items. + """ + # Process matching albums. + for a in lib.albums(query): + album_formatted = format(a) + if not a.mb_albumid: + self._log.info('Skipping album with no mb_albumid: {0}', + album_formatted) + continue + + items = list(a.items()) + + # Do we have a valid MusicBrainz album ID? + if not re.match(MBID_REGEX, a.mb_albumid): + self._log.info('Skipping album with invalid mb_albumid: {0}', + album_formatted) + continue + + # Get the MusicBrainz album information. + album_info = hooks.album_for_mbid(a.mb_albumid) + if not album_info: + self._log.info('Release ID {0} not found for album {1}', + a.mb_albumid, + album_formatted) + continue + + # Map release track and recording MBIDs to their information. + # Recordings can appear multiple times on a release, so each MBID + # maps to a list of TrackInfo objects. + releasetrack_index = {} + track_index = defaultdict(list) + for track_info in album_info.tracks: + releasetrack_index[track_info.release_track_id] = track_info + track_index[track_info.track_id].append(track_info) + + # Construct a track mapping according to MBIDs (release track MBIDs + # first, if available, and recording MBIDs otherwise). This should + # work for albums that have missing or extra tracks. + mapping = {} + for item in items: + if item.mb_releasetrackid and \ + item.mb_releasetrackid in releasetrack_index: + mapping[item] = releasetrack_index[item.mb_releasetrackid] + else: + candidates = track_index[item.mb_trackid] + if len(candidates) == 1: + mapping[item] = candidates[0] + else: + # If there are multiple copies of a recording, they are + # disambiguated using their disc and track number. + for c in candidates: + if (c.medium_index == item.track and + c.medium == item.disc): + mapping[item] = c + break + + # Apply. + self._log.debug('applying changes to {}', album_formatted) + with lib.transaction(): + autotag.apply_metadata(album_info, mapping) + changed = False + # Find any changed item to apply MusicBrainz changes to album. + any_changed_item = items[0] + for item in items: + item_changed = ui.show_model_changes(item) + changed |= item_changed + if item_changed: + any_changed_item = item + apply_item_changes(lib, item, move, pretend, write) + + if not changed: + # No change to any item. + continue + + if not pretend: + # Update album structure to reflect an item in it. + for key in library.Album.item_keys: + a[key] = any_changed_item[key] + a.store() + + # Move album art (and any inconsistent items). + if move and lib.directory in util.ancestry(items[0].path): + self._log.debug('moving album {0}', album_formatted) + a.move() diff --git a/lib/beetsplug/metasync/__init__.py b/lib/beetsplug/metasync/__init__.py new file mode 100644 index 00000000..361071fb --- /dev/null +++ b/lib/beetsplug/metasync/__init__.py @@ -0,0 +1,138 @@ +# This file is part of beets. +# Copyright 2016, Heinz Wiesinger. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Synchronize information from music player libraries +""" + + +from abc import abstractmethod, ABCMeta +from importlib import import_module + +from confuse import ConfigValueError +from beets import ui +from beets.plugins import BeetsPlugin + + +METASYNC_MODULE = 'beetsplug.metasync' + +# Dictionary to map the MODULE and the CLASS NAME of meta sources +SOURCES = { + 'amarok': 'Amarok', + 'itunes': 'Itunes', +} + + +class MetaSource(metaclass=ABCMeta): + def __init__(self, config, log): + self.item_types = {} + self.config = config + self._log = log + + @abstractmethod + def sync_from_source(self, item): + pass + + +def load_meta_sources(): + """ Returns a dictionary of all the MetaSources + E.g., {'itunes': Itunes} with isinstance(Itunes, MetaSource) true + """ + meta_sources = {} + + for module_path, class_name in SOURCES.items(): + module = import_module(METASYNC_MODULE + '.' + module_path) + meta_sources[class_name.lower()] = getattr(module, class_name) + + return meta_sources + + +META_SOURCES = load_meta_sources() + + +def load_item_types(): + """ Returns a dictionary containing the item_types of all the MetaSources + """ + item_types = {} + for meta_source in META_SOURCES.values(): + item_types.update(meta_source.item_types) + return item_types + + +class MetaSyncPlugin(BeetsPlugin): + + item_types = load_item_types() + + def __init__(self): + super().__init__() + + def commands(self): + cmd = ui.Subcommand('metasync', + help='update metadata from music player libraries') + cmd.parser.add_option('-p', '--pretend', action='store_true', + help='show all changes but do nothing') + cmd.parser.add_option('-s', '--source', default=[], + action='append', dest='sources', + help='comma-separated list of sources to sync') + cmd.parser.add_format_option() + cmd.func = self.func + return [cmd] + + def func(self, lib, opts, args): + """Command handler for the metasync function. + """ + pretend = opts.pretend + query = ui.decargs(args) + + sources = [] + for source in opts.sources: + sources.extend(source.split(',')) + + sources = sources or self.config['source'].as_str_seq() + + meta_source_instances = {} + items = lib.items(query) + + # Avoid needlessly instantiating meta sources (can be expensive) + if not items: + self._log.info('No items found matching query') + return + + # Instantiate the meta sources + for player in sources: + try: + cls = META_SOURCES[player] + except KeyError: + self._log.error('Unknown metadata source \'{}\''.format( + player)) + + try: + meta_source_instances[player] = cls(self.config, self._log) + except (ImportError, ConfigValueError) as e: + self._log.error('Failed to instantiate metadata source ' + '\'{}\': {}'.format(player, e)) + + # Avoid needlessly iterating over items + if not meta_source_instances: + self._log.error('No valid metadata sources found') + return + + # Sync the items with all of the meta sources + for item in items: + for meta_source in meta_source_instances.values(): + meta_source.sync_from_source(item) + + changed = ui.show_model_changes(item) + + if changed and not pretend: + item.store() diff --git a/lib/beetsplug/metasync/amarok.py b/lib/beetsplug/metasync/amarok.py new file mode 100644 index 00000000..a49eecc3 --- /dev/null +++ b/lib/beetsplug/metasync/amarok.py @@ -0,0 +1,110 @@ +# This file is part of beets. +# Copyright 2016, Heinz Wiesinger. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Synchronize information from amarok's library via dbus +""" + + +from os.path import basename +from datetime import datetime +from time import mktime +from xml.sax.saxutils import quoteattr + +from beets.util import displayable_path +from beets.dbcore import types +from beets.library import DateType +from beetsplug.metasync import MetaSource + + +def import_dbus(): + try: + return __import__('dbus') + except ImportError: + return None + +dbus = import_dbus() + + +class Amarok(MetaSource): + + item_types = { + 'amarok_rating': types.INTEGER, + 'amarok_score': types.FLOAT, + 'amarok_uid': types.STRING, + 'amarok_playcount': types.INTEGER, + 'amarok_firstplayed': DateType(), + 'amarok_lastplayed': DateType(), + } + + query_xml = '<query version="1.0"> \ + <filters> \ + <and><include field="filename" value=%s /></and> \ + </filters> \ + </query>' + + def __init__(self, config, log): + super().__init__(config, log) + + if not dbus: + raise ImportError('failed to import dbus') + + self.collection = \ + dbus.SessionBus().get_object('org.kde.amarok', '/Collection') + + def sync_from_source(self, item): + path = displayable_path(item.path) + + # amarok unfortunately doesn't allow searching for the full path, only + # for the patch relative to the mount point. But the full path is part + # of the result set. So query for the filename and then try to match + # the correct item from the results we get back + results = self.collection.Query( + self.query_xml % quoteattr(basename(path)) + ) + for result in results: + if result['xesam:url'] != path: + continue + + item.amarok_rating = result['xesam:userRating'] + item.amarok_score = result['xesam:autoRating'] + item.amarok_playcount = result['xesam:useCount'] + item.amarok_uid = \ + result['xesam:id'].replace('amarok-sqltrackuid://', '') + + if result['xesam:firstUsed'][0][0] != 0: + # These dates are stored as timestamps in amarok's db, but + # exposed over dbus as fixed integers in the current timezone. + first_played = datetime( + result['xesam:firstUsed'][0][0], + result['xesam:firstUsed'][0][1], + result['xesam:firstUsed'][0][2], + result['xesam:firstUsed'][1][0], + result['xesam:firstUsed'][1][1], + result['xesam:firstUsed'][1][2] + ) + + if result['xesam:lastUsed'][0][0] != 0: + last_played = datetime( + result['xesam:lastUsed'][0][0], + result['xesam:lastUsed'][0][1], + result['xesam:lastUsed'][0][2], + result['xesam:lastUsed'][1][0], + result['xesam:lastUsed'][1][1], + result['xesam:lastUsed'][1][2] + ) + else: + last_played = first_played + + item.amarok_firstplayed = mktime(first_played.timetuple()) + item.amarok_lastplayed = mktime(last_played.timetuple()) diff --git a/lib/beetsplug/metasync/itunes.py b/lib/beetsplug/metasync/itunes.py new file mode 100644 index 00000000..e50a5713 --- /dev/null +++ b/lib/beetsplug/metasync/itunes.py @@ -0,0 +1,125 @@ +# This file is part of beets. +# Copyright 2016, Tom Jaspers. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Synchronize information from iTunes's library +""" + + +from contextlib import contextmanager +import os +import shutil +import tempfile +import plistlib + +from urllib.parse import urlparse, unquote +from time import mktime + +from beets import util +from beets.dbcore import types +from beets.library import DateType +from confuse import ConfigValueError +from beetsplug.metasync import MetaSource + + +@contextmanager +def create_temporary_copy(path): + temp_dir = tempfile.mkdtemp() + temp_path = os.path.join(temp_dir, 'temp_itunes_lib') + shutil.copyfile(path, temp_path) + try: + yield temp_path + finally: + shutil.rmtree(temp_dir) + + +def _norm_itunes_path(path): + # Itunes prepends the location with 'file://' on posix systems, + # and with 'file://localhost/' on Windows systems. + # The actual path to the file is always saved as posix form + # E.g., 'file://Users/Music/bar' or 'file://localhost/G:/Music/bar' + + # The entire path will also be capitalized (e.g., '/Music/Alt-J') + # Note that this means the path will always have a leading separator, + # which is unwanted in the case of Windows systems. + # E.g., '\\G:\\Music\\bar' needs to be stripped to 'G:\\Music\\bar' + + return util.bytestring_path(os.path.normpath( + unquote(urlparse(path).path)).lstrip('\\')).lower() + + +class Itunes(MetaSource): + + item_types = { + 'itunes_rating': types.INTEGER, # 0..100 scale + 'itunes_playcount': types.INTEGER, + 'itunes_skipcount': types.INTEGER, + 'itunes_lastplayed': DateType(), + 'itunes_lastskipped': DateType(), + 'itunes_dateadded': DateType(), + } + + def __init__(self, config, log): + super().__init__(config, log) + + config.add({'itunes': { + 'library': '~/Music/iTunes/iTunes Library.xml' + }}) + + # Load the iTunes library, which has to be the .xml one (not the .itl) + library_path = config['itunes']['library'].as_filename() + + try: + self._log.debug( + f'loading iTunes library from {library_path}') + with create_temporary_copy(library_path) as library_copy: + with open(library_copy, 'rb') as library_copy_f: + raw_library = plistlib.load(library_copy_f) + except OSError as e: + raise ConfigValueError('invalid iTunes library: ' + e.strerror) + except Exception: + # It's likely the user configured their '.itl' library (<> xml) + if os.path.splitext(library_path)[1].lower() != '.xml': + hint = ': please ensure that the configured path' \ + ' points to the .XML library' + else: + hint = '' + raise ConfigValueError('invalid iTunes library' + hint) + + # Make the iTunes library queryable using the path + self.collection = {_norm_itunes_path(track['Location']): track + for track in raw_library['Tracks'].values() + if 'Location' in track} + + def sync_from_source(self, item): + result = self.collection.get(util.bytestring_path(item.path).lower()) + + if not result: + self._log.warning(f'no iTunes match found for {item}') + return + + item.itunes_rating = result.get('Rating') + item.itunes_playcount = result.get('Play Count') + item.itunes_skipcount = result.get('Skip Count') + + if result.get('Play Date UTC'): + item.itunes_lastplayed = mktime( + result.get('Play Date UTC').timetuple()) + + if result.get('Skip Date'): + item.itunes_lastskipped = mktime( + result.get('Skip Date').timetuple()) + + if result.get('Date Added'): + item.itunes_dateadded = mktime( + result.get('Date Added').timetuple()) diff --git a/lib/beetsplug/missing.py b/lib/beetsplug/missing.py new file mode 100644 index 00000000..771978c1 --- /dev/null +++ b/lib/beetsplug/missing.py @@ -0,0 +1,226 @@ +# This file is part of beets. +# Copyright 2016, Pedro Silva. +# Copyright 2017, Quentin Young. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""List missing tracks. +""" + +import musicbrainzngs + +from musicbrainzngs.musicbrainz import MusicBrainzError +from collections import defaultdict +from beets.autotag import hooks +from beets.library import Item +from beets.plugins import BeetsPlugin +from beets.ui import decargs, print_, Subcommand +from beets import config +from beets.dbcore import types + + +def _missing_count(album): + """Return number of missing items in `album`. + """ + return (album.albumtotal or 0) - len(album.items()) + + +def _item(track_info, album_info, album_id): + """Build and return `item` from `track_info` and `album info` + objects. `item` is missing what fields cannot be obtained from + MusicBrainz alone (encoder, rg_track_gain, rg_track_peak, + rg_album_gain, rg_album_peak, original_year, original_month, + original_day, length, bitrate, format, samplerate, bitdepth, + channels, mtime.) + """ + t = track_info + a = album_info + + return Item(**{ + 'album_id': album_id, + 'album': a.album, + 'albumartist': a.artist, + 'albumartist_credit': a.artist_credit, + 'albumartist_sort': a.artist_sort, + 'albumdisambig': a.albumdisambig, + 'albumstatus': a.albumstatus, + 'albumtype': a.albumtype, + 'artist': t.artist, + 'artist_credit': t.artist_credit, + 'artist_sort': t.artist_sort, + 'asin': a.asin, + 'catalognum': a.catalognum, + 'comp': a.va, + 'country': a.country, + 'day': a.day, + 'disc': t.medium, + 'disctitle': t.disctitle, + 'disctotal': a.mediums, + 'label': a.label, + 'language': a.language, + 'length': t.length, + 'mb_albumid': a.album_id, + 'mb_artistid': t.artist_id, + 'mb_releasegroupid': a.releasegroup_id, + 'mb_trackid': t.track_id, + 'media': t.media, + 'month': a.month, + 'script': a.script, + 'title': t.title, + 'track': t.index, + 'tracktotal': len(a.tracks), + 'year': a.year, + }) + + +class MissingPlugin(BeetsPlugin): + """List missing tracks + """ + + album_types = { + 'missing': types.INTEGER, + } + + def __init__(self): + super().__init__() + + self.config.add({ + 'count': False, + 'total': False, + 'album': False, + }) + + self.album_template_fields['missing'] = _missing_count + + self._command = Subcommand('missing', + help=__doc__, + aliases=['miss']) + self._command.parser.add_option( + '-c', '--count', dest='count', action='store_true', + help='count missing tracks per album') + self._command.parser.add_option( + '-t', '--total', dest='total', action='store_true', + help='count total of missing tracks') + self._command.parser.add_option( + '-a', '--album', dest='album', action='store_true', + help='show missing albums for artist instead of tracks') + self._command.parser.add_format_option() + + def commands(self): + def _miss(lib, opts, args): + self.config.set_args(opts) + albms = self.config['album'].get() + + helper = self._missing_albums if albms else self._missing_tracks + helper(lib, decargs(args)) + + self._command.func = _miss + return [self._command] + + def _missing_tracks(self, lib, query): + """Print a listing of tracks missing from each album in the library + matching query. + """ + albums = lib.albums(query) + + count = self.config['count'].get() + total = self.config['total'].get() + fmt = config['format_album' if count else 'format_item'].get() + + if total: + print(sum([_missing_count(a) for a in albums])) + return + + # Default format string for count mode. + if count: + fmt += ': $missing' + + for album in albums: + if count: + if _missing_count(album): + print_(format(album, fmt)) + + else: + for item in self._missing(album): + print_(format(item, fmt)) + + def _missing_albums(self, lib, query): + """Print a listing of albums missing from each artist in the library + matching query. + """ + total = self.config['total'].get() + + albums = lib.albums(query) + # build dict mapping artist to list of their albums in library + albums_by_artist = defaultdict(list) + for alb in albums: + artist = (alb['albumartist'], alb['mb_albumartistid']) + albums_by_artist[artist].append(alb) + + total_missing = 0 + + # build dict mapping artist to list of all albums + for artist, albums in albums_by_artist.items(): + if artist[1] is None or artist[1] == "": + albs_no_mbid = ["'" + a['album'] + "'" for a in albums] + self._log.info( + "No musicbrainz ID for artist '{}' found in album(s) {}; " + "skipping", artist[0], ", ".join(albs_no_mbid) + ) + continue + + try: + resp = musicbrainzngs.browse_release_groups(artist=artist[1]) + release_groups = resp['release-group-list'] + except MusicBrainzError as err: + self._log.info( + "Couldn't fetch info for artist '{}' ({}) - '{}'", + artist[0], artist[1], err + ) + continue + + missing = [] + present = [] + for rg in release_groups: + missing.append(rg) + for alb in albums: + if alb['mb_releasegroupid'] == rg['id']: + missing.remove(rg) + present.append(rg) + break + + total_missing += len(missing) + if total: + continue + + missing_titles = {rg['title'] for rg in missing} + + for release_title in missing_titles: + print_("{} - {}".format(artist[0], release_title)) + + if total: + print(total_missing) + + def _missing(self, album): + """Query MusicBrainz to determine items missing from `album`. + """ + item_mbids = [x.mb_trackid for x in album.items()] + if len(list(album.items())) < album.albumtotal: + # fetch missing items + # TODO: Implement caching that without breaking other stuff + album_info = hooks.album_for_mbid(album.mb_albumid) + for track_info in getattr(album_info, 'tracks', []): + if track_info.track_id not in item_mbids: + item = _item(track_info, album_info, album.id) + self._log.debug('track {0} in album {1}', + track_info.track_id, album_info.album_id) + yield item diff --git a/lib/beetsplug/mpdstats.py b/lib/beetsplug/mpdstats.py new file mode 100644 index 00000000..96291cf4 --- /dev/null +++ b/lib/beetsplug/mpdstats.py @@ -0,0 +1,380 @@ +# This file is part of beets. +# Copyright 2016, Peter Schnebel and Johann Klähn. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +import mpd +import time +import os + +from beets import ui +from beets import config +from beets import plugins +from beets import library +from beets.util import displayable_path +from beets.dbcore import types + +# If we lose the connection, how many times do we want to retry and how +# much time should we wait between retries? +RETRIES = 10 +RETRY_INTERVAL = 5 + + +mpd_config = config['mpd'] + + +def is_url(path): + """Try to determine if the path is an URL. + """ + if isinstance(path, bytes): # if it's bytes, then it's a path + return False + return path.split('://', 1)[0] in ['http', 'https'] + + +class MPDClientWrapper: + def __init__(self, log): + self._log = log + + self.music_directory = mpd_config['music_directory'].as_str() + self.strip_path = mpd_config['strip_path'].as_str() + + # Ensure strip_path end with '/' + if not self.strip_path.endswith('/'): + self.strip_path += '/' + + self._log.debug('music_directory: {0}', self.music_directory) + self._log.debug('strip_path: {0}', self.strip_path) + + self.client = mpd.MPDClient() + + def connect(self): + """Connect to the MPD. + """ + host = mpd_config['host'].as_str() + port = mpd_config['port'].get(int) + + if host[0] in ['/', '~']: + host = os.path.expanduser(host) + + self._log.info('connecting to {0}:{1}', host, port) + try: + self.client.connect(host, port) + except OSError as e: + raise ui.UserError(f'could not connect to MPD: {e}') + + password = mpd_config['password'].as_str() + if password: + try: + self.client.password(password) + except mpd.CommandError as e: + raise ui.UserError( + f'could not authenticate to MPD: {e}' + ) + + def disconnect(self): + """Disconnect from the MPD. + """ + self.client.close() + self.client.disconnect() + + def get(self, command, retries=RETRIES): + """Wrapper for requests to the MPD server. Tries to re-connect if the + connection was lost (f.ex. during MPD's library refresh). + """ + try: + return getattr(self.client, command)() + except (OSError, mpd.ConnectionError) as err: + self._log.error('{0}', err) + + if retries <= 0: + # if we exited without breaking, we couldn't reconnect in time :( + raise ui.UserError('communication with MPD server failed') + + time.sleep(RETRY_INTERVAL) + + try: + self.disconnect() + except mpd.ConnectionError: + pass + + self.connect() + return self.get(command, retries=retries - 1) + + def currentsong(self): + """Return the path to the currently playing song, along with its + songid. Prefixes paths with the music_directory, to get the absolute + path. + In some cases, we need to remove the local path from MPD server, + we replace 'strip_path' with ''. + `strip_path` defaults to ''. + """ + result = None + entry = self.get('currentsong') + if 'file' in entry: + if not is_url(entry['file']): + file = entry['file'] + if file.startswith(self.strip_path): + file = file[len(self.strip_path):] + result = os.path.join(self.music_directory, file) + else: + result = entry['file'] + self._log.debug('returning: {0}', result) + return result, entry.get('id') + + def status(self): + """Return the current status of the MPD. + """ + return self.get('status') + + def events(self): + """Return list of events. This may block a long time while waiting for + an answer from MPD. + """ + return self.get('idle') + + +class MPDStats: + def __init__(self, lib, log): + self.lib = lib + self._log = log + + self.do_rating = mpd_config['rating'].get(bool) + self.rating_mix = mpd_config['rating_mix'].get(float) + self.time_threshold = 10.0 # TODO: maybe add config option? + + self.now_playing = None + self.mpd = MPDClientWrapper(log) + + def rating(self, play_count, skip_count, rating, skipped): + """Calculate a new rating for a song based on play count, skip count, + old rating and the fact if it was skipped or not. + """ + if skipped: + rolling = (rating - rating / 2.0) + else: + rolling = (rating + (1.0 - rating) / 2.0) + stable = (play_count + 1.0) / (play_count + skip_count + 2.0) + return (self.rating_mix * stable + + (1.0 - self.rating_mix) * rolling) + + def get_item(self, path): + """Return the beets item related to path. + """ + query = library.PathQuery('path', path) + item = self.lib.items(query).get() + if item: + return item + else: + self._log.info('item not found: {0}', displayable_path(path)) + + def update_item(self, item, attribute, value=None, increment=None): + """Update the beets item. Set attribute to value or increment the value + of attribute. If the increment argument is used the value is cast to + the corresponding type. + """ + if item is None: + return + + if increment is not None: + item.load() + value = type(increment)(item.get(attribute, 0)) + increment + + if value is not None: + item[attribute] = value + item.store() + + self._log.debug('updated: {0} = {1} [{2}]', + attribute, + item[attribute], + displayable_path(item.path)) + + def update_rating(self, item, skipped): + """Update the rating for a beets item. The `item` can either be a + beets `Item` or None. If the item is None, nothing changes. + """ + if item is None: + return + + item.load() + rating = self.rating( + int(item.get('play_count', 0)), + int(item.get('skip_count', 0)), + float(item.get('rating', 0.5)), + skipped) + + self.update_item(item, 'rating', rating) + + def handle_song_change(self, song): + """Determine if a song was skipped or not and update its attributes. + To this end the difference between the song's supposed end time + and the current time is calculated. If it's greater than a threshold, + the song is considered skipped. + + Returns whether the change was manual (skipped previous song or not) + """ + diff = abs(song['remaining'] - (time.time() - song['started'])) + + skipped = diff >= self.time_threshold + + if skipped: + self.handle_skipped(song) + else: + self.handle_played(song) + + if self.do_rating: + self.update_rating(song['beets_item'], skipped) + + return skipped + + def handle_played(self, song): + """Updates the play count of a song. + """ + self.update_item(song['beets_item'], 'play_count', increment=1) + self._log.info('played {0}', displayable_path(song['path'])) + + def handle_skipped(self, song): + """Updates the skip count of a song. + """ + self.update_item(song['beets_item'], 'skip_count', increment=1) + self._log.info('skipped {0}', displayable_path(song['path'])) + + def on_stop(self, status): + self._log.info('stop') + + # if the current song stays the same it means that we stopped on the + # current track and should not record a skip. + if self.now_playing and self.now_playing['id'] != status.get('songid'): + self.handle_song_change(self.now_playing) + + self.now_playing = None + + def on_pause(self, status): + self._log.info('pause') + self.now_playing = None + + def on_play(self, status): + + path, songid = self.mpd.currentsong() + + if not path: + return + + played, duration = map(int, status['time'].split(':', 1)) + remaining = duration - played + + if self.now_playing: + if self.now_playing['path'] != path: + self.handle_song_change(self.now_playing) + else: + # In case we got mpd play event with same song playing + # multiple times, + # assume low diff means redundant second play event + # after natural song start. + diff = abs(time.time() - self.now_playing['started']) + + if diff <= self.time_threshold: + return + + if self.now_playing['path'] == path and played == 0: + self.handle_song_change(self.now_playing) + + if is_url(path): + self._log.info('playing stream {0}', displayable_path(path)) + self.now_playing = None + return + + self._log.info('playing {0}', displayable_path(path)) + + self.now_playing = { + 'started': time.time(), + 'remaining': remaining, + 'path': path, + 'id': songid, + 'beets_item': self.get_item(path), + } + + self.update_item(self.now_playing['beets_item'], + 'last_played', value=int(time.time())) + + def run(self): + self.mpd.connect() + events = ['player'] + + while True: + if 'player' in events: + status = self.mpd.status() + + handler = getattr(self, 'on_' + status['state'], None) + + if handler: + handler(status) + else: + self._log.debug('unhandled status "{0}"', status) + + events = self.mpd.events() + + +class MPDStatsPlugin(plugins.BeetsPlugin): + + item_types = { + 'play_count': types.INTEGER, + 'skip_count': types.INTEGER, + 'last_played': library.DateType(), + 'rating': types.FLOAT, + } + + def __init__(self): + super().__init__() + mpd_config.add({ + 'music_directory': config['directory'].as_filename(), + 'strip_path': '', + 'rating': True, + 'rating_mix': 0.75, + 'host': os.environ.get('MPD_HOST', 'localhost'), + 'port': int(os.environ.get('MPD_PORT', 6600)), + 'password': '', + }) + mpd_config['password'].redact = True + + def commands(self): + cmd = ui.Subcommand( + 'mpdstats', + help='run a MPD client to gather play statistics') + cmd.parser.add_option( + '--host', dest='host', type='string', + help='set the hostname of the server to connect to') + cmd.parser.add_option( + '--port', dest='port', type='int', + help='set the port of the MPD server to connect to') + cmd.parser.add_option( + '--password', dest='password', type='string', + help='set the password of the MPD server to connect to') + + def func(lib, opts, args): + mpd_config.set_args(opts) + + # Overrides for MPD settings. + if opts.host: + mpd_config['host'] = opts.host.decode('utf-8') + if opts.port: + mpd_config['host'] = int(opts.port) + if opts.password: + mpd_config['password'] = opts.password.decode('utf-8') + + try: + MPDStats(lib, self._log).run() + except KeyboardInterrupt: + pass + + cmd.func = func + return [cmd] diff --git a/lib/beetsplug/mpdupdate.py b/lib/beetsplug/mpdupdate.py new file mode 100644 index 00000000..e5264e18 --- /dev/null +++ b/lib/beetsplug/mpdupdate.py @@ -0,0 +1,126 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Updates an MPD index whenever the library is changed. + +Put something like the following in your config.yaml to configure: + mpd: + host: localhost + port: 6600 + password: seekrit +""" + +from beets.plugins import BeetsPlugin +import os +import socket +from beets import config + + +# No need to introduce a dependency on an MPD library for such a +# simple use case. Here's a simple socket abstraction to make things +# easier. +class BufferedSocket: + """Socket abstraction that allows reading by line.""" + def __init__(self, host, port, sep=b'\n'): + if host[0] in ['/', '~']: + self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + self.sock.connect(os.path.expanduser(host)) + else: + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.sock.connect((host, port)) + self.buf = b'' + self.sep = sep + + def readline(self): + while self.sep not in self.buf: + data = self.sock.recv(1024) + if not data: + break + self.buf += data + if self.sep in self.buf: + res, self.buf = self.buf.split(self.sep, 1) + return res + self.sep + else: + return b'' + + def send(self, data): + self.sock.send(data) + + def close(self): + self.sock.close() + + +class MPDUpdatePlugin(BeetsPlugin): + def __init__(self): + super().__init__() + config['mpd'].add({ + 'host': os.environ.get('MPD_HOST', 'localhost'), + 'port': int(os.environ.get('MPD_PORT', 6600)), + 'password': '', + }) + config['mpd']['password'].redact = True + + # For backwards compatibility, use any values from the + # plugin-specific "mpdupdate" section. + for key in config['mpd'].keys(): + if self.config[key].exists(): + config['mpd'][key] = self.config[key].get() + + self.register_listener('database_change', self.db_change) + + def db_change(self, lib, model): + self.register_listener('cli_exit', self.update) + + def update(self, lib): + self.update_mpd( + config['mpd']['host'].as_str(), + config['mpd']['port'].get(int), + config['mpd']['password'].as_str(), + ) + + def update_mpd(self, host='localhost', port=6600, password=None): + """Sends the "update" command to the MPD server indicated, + possibly authenticating with a password first. + """ + self._log.info('Updating MPD database...') + + try: + s = BufferedSocket(host, port) + except OSError as e: + self._log.warning('MPD connection failed: {0}', + str(e.strerror)) + return + + resp = s.readline() + if b'OK MPD' not in resp: + self._log.warning('MPD connection failed: {0!r}', resp) + return + + if password: + s.send(b'password "%s"\n' % password.encode('utf8')) + resp = s.readline() + if b'OK' not in resp: + self._log.warning('Authentication failed: {0!r}', resp) + s.send(b'close\n') + s.close() + return + + s.send(b'update\n') + resp = s.readline() + if b'updating_db' not in resp: + self._log.warning('Update failed: {0!r}', resp) + + s.send(b'close\n') + s.close() + self._log.info('Database updated.') diff --git a/lib/beetsplug/parentwork.py b/lib/beetsplug/parentwork.py new file mode 100644 index 00000000..75307b8f --- /dev/null +++ b/lib/beetsplug/parentwork.py @@ -0,0 +1,211 @@ +# This file is part of beets. +# Copyright 2017, Dorian Soergel. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Gets parent work, its disambiguation and id, composer, composer sort name +and work composition date +""" + + +from beets import ui +from beets.plugins import BeetsPlugin + +import musicbrainzngs + + +def direct_parent_id(mb_workid, work_date=None): + """Given a Musicbrainz work id, find the id one of the works the work is + part of and the first composition date it encounters. + """ + work_info = musicbrainzngs.get_work_by_id(mb_workid, + includes=["work-rels", + "artist-rels"]) + if 'artist-relation-list' in work_info['work'] and work_date is None: + for artist in work_info['work']['artist-relation-list']: + if artist['type'] == 'composer': + if 'end' in artist.keys(): + work_date = artist['end'] + + if 'work-relation-list' in work_info['work']: + for direct_parent in work_info['work']['work-relation-list']: + if direct_parent['type'] == 'parts' \ + and direct_parent.get('direction') == 'backward': + direct_id = direct_parent['work']['id'] + return direct_id, work_date + return None, work_date + + +def work_parent_id(mb_workid): + """Find the parent work id and composition date of a work given its id. + """ + work_date = None + while True: + new_mb_workid, work_date = direct_parent_id(mb_workid, work_date) + if not new_mb_workid: + return mb_workid, work_date + mb_workid = new_mb_workid + return mb_workid, work_date + + +def find_parentwork_info(mb_workid): + """Get the MusicBrainz information dict about a parent work, including + the artist relations, and the composition date for a work's parent work. + """ + parent_id, work_date = work_parent_id(mb_workid) + work_info = musicbrainzngs.get_work_by_id(parent_id, + includes=["artist-rels"]) + return work_info, work_date + + +class ParentWorkPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + + self.config.add({ + 'auto': False, + 'force': False, + }) + + if self.config['auto']: + self.import_stages = [self.imported] + + def commands(self): + + def func(lib, opts, args): + self.config.set_args(opts) + force_parent = self.config['force'].get(bool) + write = ui.should_write() + + for item in lib.items(ui.decargs(args)): + changed = self.find_work(item, force_parent) + if changed: + item.store() + if write: + item.try_write() + command = ui.Subcommand( + 'parentwork', + help='fetch parent works, composers and dates') + + command.parser.add_option( + '-f', '--force', dest='force', + action='store_true', default=None, + help='re-fetch when parent work is already present') + + command.func = func + return [command] + + def imported(self, session, task): + """Import hook for fetching parent works automatically. + """ + force_parent = self.config['force'].get(bool) + + for item in task.imported_items(): + self.find_work(item, force_parent) + item.store() + + def get_info(self, item, work_info): + """Given the parent work info dict, fetch parent_composer, + parent_composer_sort, parentwork, parentwork_disambig, mb_workid and + composer_ids. + """ + + parent_composer = [] + parent_composer_sort = [] + parentwork_info = {} + + composer_exists = False + if 'artist-relation-list' in work_info['work']: + for artist in work_info['work']['artist-relation-list']: + if artist['type'] == 'composer': + composer_exists = True + parent_composer.append(artist['artist']['name']) + parent_composer_sort.append(artist['artist']['sort-name']) + if 'end' in artist.keys(): + parentwork_info["parentwork_date"] = artist['end'] + + parentwork_info['parent_composer'] = ', '.join(parent_composer) + parentwork_info['parent_composer_sort'] = ', '.join( + parent_composer_sort) + + if not composer_exists: + self._log.debug( + 'no composer for {}; add one at ' + 'https://musicbrainz.org/work/{}', + item, work_info['work']['id'], + ) + + parentwork_info['parentwork'] = work_info['work']['title'] + parentwork_info['mb_parentworkid'] = work_info['work']['id'] + + if 'disambiguation' in work_info['work']: + parentwork_info['parentwork_disambig'] = work_info[ + 'work']['disambiguation'] + + else: + parentwork_info['parentwork_disambig'] = None + + return parentwork_info + + def find_work(self, item, force): + """Finds the parent work of a recording and populates the tags + accordingly. + + The parent work is found recursively, by finding the direct parent + repeatedly until there are no more links in the chain. We return the + final, topmost work in the chain. + + Namely, the tags parentwork, parentwork_disambig, mb_parentworkid, + parent_composer, parent_composer_sort and work_date are populated. + """ + + if not item.mb_workid: + self._log.info('No work for {}, \ +add one at https://musicbrainz.org/recording/{}', item, item.mb_trackid) + return + + hasparent = hasattr(item, 'parentwork') + work_changed = True + if hasattr(item, 'parentwork_workid_current'): + work_changed = item.parentwork_workid_current != item.mb_workid + if force or not hasparent or work_changed: + try: + work_info, work_date = find_parentwork_info(item.mb_workid) + except musicbrainzngs.musicbrainz.WebServiceError as e: + self._log.debug("error fetching work: {}", e) + return + parent_info = self.get_info(item, work_info) + parent_info['parentwork_workid_current'] = item.mb_workid + if 'parent_composer' in parent_info: + self._log.debug("Work fetched: {} - {}", + parent_info['parentwork'], + parent_info['parent_composer']) + else: + self._log.debug("Work fetched: {} - no parent composer", + parent_info['parentwork']) + + elif hasparent: + self._log.debug("{}: Work present, skipping", item) + return + + # apply all non-null values to the item + for key, value in parent_info.items(): + if value: + item[key] = value + + if work_date: + item['work_date'] = work_date + return ui.show_model_changes( + item, fields=['parentwork', 'parentwork_disambig', + 'mb_parentworkid', 'parent_composer', + 'parent_composer_sort', 'work_date', + 'parentwork_workid_current', 'parentwork_date']) diff --git a/lib/beetsplug/permissions.py b/lib/beetsplug/permissions.py new file mode 100644 index 00000000..f5aab056 --- /dev/null +++ b/lib/beetsplug/permissions.py @@ -0,0 +1,121 @@ +"""Fixes file permissions after the file gets written on import. Put something +like the following in your config.yaml to configure: + + permissions: + file: 644 + dir: 755 +""" +import os +from beets import config, util +from beets.plugins import BeetsPlugin +from beets.util import ancestry + + +def convert_perm(perm): + """Convert a string to an integer, interpreting the text as octal. + Or, if `perm` is an integer, reinterpret it as an octal number that + has been "misinterpreted" as decimal. + """ + if isinstance(perm, int): + perm = str(perm) + return int(perm, 8) + + +def check_permissions(path, permission): + """Check whether the file's permissions equal the given vector. + Return a boolean. + """ + return oct(os.stat(path).st_mode & 0o777) == oct(permission) + + +def assert_permissions(path, permission, log): + """Check whether the file's permissions are as expected, otherwise, + log a warning message. Return a boolean indicating the match, like + `check_permissions`. + """ + if not check_permissions(util.syspath(path), permission): + log.warning( + 'could not set permissions on {}', + util.displayable_path(path), + ) + log.debug( + 'set permissions to {}, but permissions are now {}', + permission, + os.stat(util.syspath(path)).st_mode & 0o777, + ) + + +def dirs_in_library(library, item): + """Creates a list of ancestor directories in the beets library path. + """ + return [ancestor + for ancestor in ancestry(item) + if ancestor.startswith(library)][1:] + + +class Permissions(BeetsPlugin): + def __init__(self): + super().__init__() + + # Adding defaults. + self.config.add({ + 'file': '644', + 'dir': '755', + }) + + self.register_listener('item_imported', self.fix) + self.register_listener('album_imported', self.fix) + self.register_listener('art_set', self.fix_art) + + def fix(self, lib, item=None, album=None): + """Fix the permissions for an imported Item or Album. + """ + files = [] + dirs = set() + if item: + files.append(item.path) + dirs.update(dirs_in_library(lib.directory, item.path)) + elif album: + for album_item in album.items(): + files.append(album_item.path) + dirs.update(dirs_in_library(lib.directory, album_item.path)) + self.set_permissions(files=files, dirs=dirs) + + def fix_art(self, album): + """Fix the permission for Album art file. + """ + if album.artpath: + self.set_permissions(files=[album.artpath]) + + def set_permissions(self, files=[], dirs=[]): + # Get the configured permissions. The user can specify this either a + # string (in YAML quotes) or, for convenience, as an integer so the + # quotes can be omitted. In the latter case, we need to reinterpret the + # integer as octal, not decimal. + file_perm = config['permissions']['file'].get() + dir_perm = config['permissions']['dir'].get() + file_perm = convert_perm(file_perm) + dir_perm = convert_perm(dir_perm) + + for path in files: + # Changing permissions on the destination file. + self._log.debug( + 'setting file permissions on {}', + util.displayable_path(path), + ) + os.chmod(util.syspath(path), file_perm) + + # Checks if the destination path has the permissions configured. + assert_permissions(path, file_perm, self._log) + + # Change permissions for the directories. + for path in dirs: + # Changing permissions on the destination directory. + self._log.debug( + 'setting directory permissions on {}', + util.displayable_path(path), + ) + os.chmod(util.syspath(path), dir_perm) + + # Checks if the destination path has the permissions configured. + assert_permissions(path, dir_perm, self._log) diff --git a/lib/beetsplug/play.py b/lib/beetsplug/play.py new file mode 100644 index 00000000..f4233490 --- /dev/null +++ b/lib/beetsplug/play.py @@ -0,0 +1,215 @@ +# This file is part of beets. +# Copyright 2016, David Hamp-Gonsalves +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Send the results of a query to the configured music player as a playlist. +""" + +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand +from beets.ui.commands import PromptChoice +from beets import config +from beets import ui +from beets import util +from os.path import relpath +from tempfile import NamedTemporaryFile +import subprocess +import shlex + +# Indicate where arguments should be inserted into the command string. +# If this is missing, they're placed at the end. +ARGS_MARKER = '$args' + + +def play(command_str, selection, paths, open_args, log, item_type='track', + keep_open=False): + """Play items in paths with command_str and optional arguments. If + keep_open, return to beets, otherwise exit once command runs. + """ + # Print number of tracks or albums to be played, log command to be run. + item_type += 's' if len(selection) > 1 else '' + ui.print_('Playing {} {}.'.format(len(selection), item_type)) + log.debug('executing command: {} {!r}', command_str, open_args) + + try: + if keep_open: + command = shlex.split(command_str) + command = command + open_args + subprocess.call(command) + else: + util.interactive_open(open_args, command_str) + except OSError as exc: + raise ui.UserError( + f"Could not play the query: {exc}") + + +class PlayPlugin(BeetsPlugin): + + def __init__(self): + super().__init__() + + config['play'].add({ + 'command': None, + 'use_folders': False, + 'relative_to': None, + 'raw': False, + 'warning_threshold': 100, + 'bom': False, + }) + + self.register_listener('before_choose_candidate', + self.before_choose_candidate_listener) + + def commands(self): + play_command = Subcommand( + 'play', + help='send music to a player as a playlist' + ) + play_command.parser.add_album_option() + play_command.parser.add_option( + '-A', '--args', + action='store', + help='add additional arguments to the command', + ) + play_command.parser.add_option( + '-y', '--yes', + action="store_true", + help='skip the warning threshold', + ) + play_command.func = self._play_command + return [play_command] + + def _play_command(self, lib, opts, args): + """The CLI command function for `beet play`. Create a list of paths + from query, determine if tracks or albums are to be played. + """ + use_folders = config['play']['use_folders'].get(bool) + relative_to = config['play']['relative_to'].get() + if relative_to: + relative_to = util.normpath(relative_to) + # Perform search by album and add folders rather than tracks to + # playlist. + if opts.album: + selection = lib.albums(ui.decargs(args)) + paths = [] + + sort = lib.get_default_album_sort() + for album in selection: + if use_folders: + paths.append(album.item_dir()) + else: + paths.extend(item.path + for item in sort.sort(album.items())) + item_type = 'album' + + # Perform item query and add tracks to playlist. + else: + selection = lib.items(ui.decargs(args)) + paths = [item.path for item in selection] + item_type = 'track' + + if relative_to: + paths = [relpath(path, relative_to) for path in paths] + + if not selection: + ui.print_(ui.colorize('text_warning', + f'No {item_type} to play.')) + return + + open_args = self._playlist_or_paths(paths) + command_str = self._command_str(opts.args) + + # Check if the selection exceeds configured threshold. If True, + # cancel, otherwise proceed with play command. + if opts.yes or not self._exceeds_threshold( + selection, command_str, open_args, item_type): + play(command_str, selection, paths, open_args, self._log, + item_type) + + def _command_str(self, args=None): + """Create a command string from the config command and optional args. + """ + command_str = config['play']['command'].get() + if not command_str: + return util.open_anything() + # Add optional arguments to the player command. + if args: + if ARGS_MARKER in command_str: + return command_str.replace(ARGS_MARKER, args) + else: + return f"{command_str} {args}" + else: + # Don't include the marker in the command. + return command_str.replace(" " + ARGS_MARKER, "") + + def _playlist_or_paths(self, paths): + """Return either the raw paths of items or a playlist of the items. + """ + if config['play']['raw']: + return paths + else: + return [self._create_tmp_playlist(paths)] + + def _exceeds_threshold(self, selection, command_str, open_args, + item_type='track'): + """Prompt user whether to abort if playlist exceeds threshold. If + True, cancel playback. If False, execute play command. + """ + warning_threshold = config['play']['warning_threshold'].get(int) + + # Warn user before playing any huge playlists. + if warning_threshold and len(selection) > warning_threshold: + if len(selection) > 1: + item_type += 's' + + ui.print_(ui.colorize( + 'text_warning', + 'You are about to queue {} {}.'.format( + len(selection), item_type))) + + if ui.input_options(('Continue', 'Abort')) == 'a': + return True + + return False + + def _create_tmp_playlist(self, paths_list): + """Create a temporary .m3u file. Return the filename. + """ + utf8_bom = config['play']['bom'].get(bool) + m3u = NamedTemporaryFile('wb', suffix='.m3u', delete=False) + + if utf8_bom: + m3u.write(b'\xEF\xBB\xBF') + + for item in paths_list: + m3u.write(item + b'\n') + m3u.close() + return m3u.name + + def before_choose_candidate_listener(self, session, task): + """Append a "Play" choice to the interactive importer prompt. + """ + return [PromptChoice('y', 'plaY', self.importer_play)] + + def importer_play(self, session, task): + """Get items from current import task and send to play function. + """ + selection = task.items + paths = [item.path for item in selection] + + open_args = self._playlist_or_paths(paths) + command_str = self._command_str() + + if not self._exceeds_threshold(selection, command_str, open_args): + play(command_str, selection, paths, open_args, self._log, + keep_open=True) diff --git a/lib/beetsplug/playlist.py b/lib/beetsplug/playlist.py new file mode 100644 index 00000000..265b8bad --- /dev/null +++ b/lib/beetsplug/playlist.py @@ -0,0 +1,185 @@ +# This file is part of beets. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +import os +import fnmatch +import tempfile +import beets +from beets.util import path_as_posix + + +class PlaylistQuery(beets.dbcore.Query): + """Matches files listed by a playlist file. + """ + def __init__(self, pattern): + self.pattern = pattern + config = beets.config['playlist'] + + # Get the full path to the playlist + playlist_paths = ( + pattern, + os.path.abspath(os.path.join( + config['playlist_dir'].as_filename(), + f'{pattern}.m3u', + )), + ) + + self.paths = [] + for playlist_path in playlist_paths: + if not fnmatch.fnmatch(playlist_path, '*.[mM]3[uU]'): + # This is not am M3U playlist, skip this candidate + continue + + try: + f = open(beets.util.syspath(playlist_path), mode='rb') + except OSError: + continue + + if config['relative_to'].get() == 'library': + relative_to = beets.config['directory'].as_filename() + elif config['relative_to'].get() == 'playlist': + relative_to = os.path.dirname(playlist_path) + else: + relative_to = config['relative_to'].as_filename() + relative_to = beets.util.bytestring_path(relative_to) + + for line in f: + if line[0] == '#': + # ignore comments, and extm3u extension + continue + + self.paths.append(beets.util.normpath( + os.path.join(relative_to, line.rstrip()) + )) + f.close() + break + + def col_clause(self): + if not self.paths: + # Playlist is empty + return '0', () + clause = 'path IN ({})'.format(', '.join('?' for path in self.paths)) + return clause, (beets.library.BLOB_TYPE(p) for p in self.paths) + + def match(self, item): + return item.path in self.paths + + +class PlaylistPlugin(beets.plugins.BeetsPlugin): + item_queries = {'playlist': PlaylistQuery} + + def __init__(self): + super().__init__() + self.config.add({ + 'auto': False, + 'playlist_dir': '.', + 'relative_to': 'library', + 'forward_slash': False, + }) + + self.playlist_dir = self.config['playlist_dir'].as_filename() + self.changes = {} + + if self.config['relative_to'].get() == 'library': + self.relative_to = beets.util.bytestring_path( + beets.config['directory'].as_filename()) + elif self.config['relative_to'].get() != 'playlist': + self.relative_to = beets.util.bytestring_path( + self.config['relative_to'].as_filename()) + else: + self.relative_to = None + + if self.config['auto']: + self.register_listener('item_moved', self.item_moved) + self.register_listener('item_removed', self.item_removed) + self.register_listener('cli_exit', self.cli_exit) + + def item_moved(self, item, source, destination): + self.changes[source] = destination + + def item_removed(self, item): + if not os.path.exists(beets.util.syspath(item.path)): + self.changes[item.path] = None + + def cli_exit(self, lib): + for playlist in self.find_playlists(): + self._log.info(f'Updating playlist: {playlist}') + base_dir = beets.util.bytestring_path( + self.relative_to if self.relative_to + else os.path.dirname(playlist) + ) + + try: + self.update_playlist(playlist, base_dir) + except beets.util.FilesystemError: + self._log.error('Failed to update playlist: {}'.format( + beets.util.displayable_path(playlist))) + + def find_playlists(self): + """Find M3U playlists in the playlist directory.""" + try: + dir_contents = os.listdir(beets.util.syspath(self.playlist_dir)) + except OSError: + self._log.warning('Unable to open playlist directory {}'.format( + beets.util.displayable_path(self.playlist_dir))) + return + + for filename in dir_contents: + if fnmatch.fnmatch(filename, '*.[mM]3[uU]'): + yield os.path.join(self.playlist_dir, filename) + + def update_playlist(self, filename, base_dir): + """Find M3U playlists in the specified directory.""" + changes = 0 + deletions = 0 + + with tempfile.NamedTemporaryFile(mode='w+b', delete=False) as tempfp: + new_playlist = tempfp.name + with open(filename, mode='rb') as fp: + for line in fp: + original_path = line.rstrip(b'\r\n') + + # Ensure that path from playlist is absolute + is_relative = not os.path.isabs(line) + if is_relative: + lookup = os.path.join(base_dir, original_path) + else: + lookup = original_path + + try: + new_path = self.changes[beets.util.normpath(lookup)] + except KeyError: + if self.config['forward_slash']: + line = path_as_posix(line) + tempfp.write(line) + else: + if new_path is None: + # Item has been deleted + deletions += 1 + continue + + changes += 1 + if is_relative: + new_path = os.path.relpath(new_path, base_dir) + line = line.replace(original_path, new_path) + if self.config['forward_slash']: + line = path_as_posix(line) + tempfp.write(line) + + if changes or deletions: + self._log.info( + 'Updated playlist {} ({} changes, {} deletions)'.format( + filename, changes, deletions)) + beets.util.copy(new_playlist, filename, replace=True) + beets.util.remove(new_playlist) diff --git a/lib/beetsplug/plexupdate.py b/lib/beetsplug/plexupdate.py new file mode 100644 index 00000000..2261a55f --- /dev/null +++ b/lib/beetsplug/plexupdate.py @@ -0,0 +1,110 @@ +"""Updates an Plex library whenever the beets library is changed. + +Plex Home users enter the Plex Token to enable updating. +Put something like the following in your config.yaml to configure: + plex: + host: localhost + port: 32400 + token: token +""" + +import requests +from xml.etree import ElementTree +from urllib.parse import urljoin, urlencode +from beets import config +from beets.plugins import BeetsPlugin + + +def get_music_section(host, port, token, library_name, secure, + ignore_cert_errors): + """Getting the section key for the music library in Plex. + """ + api_endpoint = append_token('library/sections', token) + url = urljoin('{}://{}:{}'.format(get_protocol(secure), host, + port), api_endpoint) + + # Sends request. + r = requests.get(url, verify=not ignore_cert_errors) + + # Parse xml tree and extract music section key. + tree = ElementTree.fromstring(r.content) + for child in tree.findall('Directory'): + if child.get('title') == library_name: + return child.get('key') + + +def update_plex(host, port, token, library_name, secure, + ignore_cert_errors): + """Ignore certificate errors if configured to. + """ + if ignore_cert_errors: + import urllib3 + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + """Sends request to the Plex api to start a library refresh. + """ + # Getting section key and build url. + section_key = get_music_section(host, port, token, library_name, + secure, ignore_cert_errors) + api_endpoint = f'library/sections/{section_key}/refresh' + api_endpoint = append_token(api_endpoint, token) + url = urljoin('{}://{}:{}'.format(get_protocol(secure), host, + port), api_endpoint) + + # Sends request and returns requests object. + r = requests.get(url, verify=not ignore_cert_errors) + return r + + +def append_token(url, token): + """Appends the Plex Home token to the api call if required. + """ + if token: + url += '?' + urlencode({'X-Plex-Token': token}) + return url + + +def get_protocol(secure): + if secure: + return 'https' + else: + return 'http' + + +class PlexUpdate(BeetsPlugin): + def __init__(self): + super().__init__() + + # Adding defaults. + config['plex'].add({ + 'host': 'localhost', + 'port': 32400, + 'token': '', + 'library_name': 'Music', + 'secure': False, + 'ignore_cert_errors': False}) + + config['plex']['token'].redact = True + self.register_listener('database_change', self.listen_for_db_change) + + def listen_for_db_change(self, lib, model): + """Listens for beets db change and register the update for the end""" + self.register_listener('cli_exit', self.update) + + def update(self, lib): + """When the client exists try to send refresh request to Plex server. + """ + self._log.info('Updating Plex library...') + + # Try to send update request. + try: + update_plex( + config['plex']['host'].get(), + config['plex']['port'].get(), + config['plex']['token'].get(), + config['plex']['library_name'].get(), + config['plex']['secure'].get(bool), + config['plex']['ignore_cert_errors'].get(bool)) + self._log.info('... started.') + + except requests.exceptions.RequestException: + self._log.warning('Update failed.') diff --git a/lib/beetsplug/random.py b/lib/beetsplug/random.py new file mode 100644 index 00000000..ea9b7b98 --- /dev/null +++ b/lib/beetsplug/random.py @@ -0,0 +1,57 @@ +# This file is part of beets. +# Copyright 2016, Philippe Mongeau. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Get a random song or album from the library. +""" + +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand, decargs, print_ +from beets.random import random_objs + + +def random_func(lib, opts, args): + """Select some random items or albums and print the results. + """ + # Fetch all the objects matching the query into a list. + query = decargs(args) + if opts.album: + objs = list(lib.albums(query)) + else: + objs = list(lib.items(query)) + + # Print a random subset. + objs = random_objs(objs, opts.album, opts.number, opts.time, + opts.equal_chance) + for obj in objs: + print_(format(obj)) + + +random_cmd = Subcommand('random', + help='choose a random track or album') +random_cmd.parser.add_option( + '-n', '--number', action='store', type="int", + help='number of objects to choose', default=1) +random_cmd.parser.add_option( + '-e', '--equal-chance', action='store_true', + help='each artist has the same chance') +random_cmd.parser.add_option( + '-t', '--time', action='store', type="float", + help='total length in minutes of objects to choose') +random_cmd.parser.add_all_common_options() +random_cmd.func = random_func + + +class Random(BeetsPlugin): + def commands(self): + return [random_cmd] diff --git a/lib/beetsplug/replaygain.py b/lib/beetsplug/replaygain.py new file mode 100644 index 00000000..b6297d93 --- /dev/null +++ b/lib/beetsplug/replaygain.py @@ -0,0 +1,1368 @@ +# This file is part of beets. +# Copyright 2016, Fabrice Laporte, Yevgeny Bezman, and Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +import collections +import enum +import math +import os +import signal +import subprocess +import sys +import warnings +from multiprocessing.pool import ThreadPool, RUN +from six.moves import queue +from threading import Thread, Event + +from beets import ui +from beets.plugins import BeetsPlugin +from beets.util import (syspath, command_output, displayable_path, + py3_path, cpu_count) + + +# Utilities. + +class ReplayGainError(Exception): + """Raised when a local (to a track or an album) error occurs in one + of the backends. + """ + + +class FatalReplayGainError(Exception): + """Raised when a fatal error occurs in one of the backends. + """ + + +class FatalGstreamerPluginReplayGainError(FatalReplayGainError): + """Raised when a fatal error occurs in the GStreamerBackend when + loading the required plugins.""" + + +def call(args, **kwargs): + """Execute the command and return its output or raise a + ReplayGainError on failure. + """ + try: + return command_output(args, **kwargs) + except subprocess.CalledProcessError as e: + raise ReplayGainError( + "{} exited with status {}".format(args[0], e.returncode) + ) + except UnicodeEncodeError: + # Due to a bug in Python 2's subprocess on Windows, Unicode + # filenames can fail to encode on that platform. See: + # https://github.com/google-code-export/beets/issues/499 + raise ReplayGainError("argument encoding failed") + + +def after_version(version_a, version_b): + return tuple(int(s) for s in version_a.split('.')) \ + >= tuple(int(s) for s in version_b.split('.')) + + +def db_to_lufs(db): + """Convert db to LUFS. + + According to https://wiki.hydrogenaud.io/index.php?title= + ReplayGain_2.0_specification#Reference_level + """ + return db - 107 + + +def lufs_to_db(db): + """Convert LUFS to db. + + According to https://wiki.hydrogenaud.io/index.php?title= + ReplayGain_2.0_specification#Reference_level + """ + return db + 107 + + +# Backend base and plumbing classes. + +# gain: in LU to reference level +# peak: part of full scale (FS is 1.0) +Gain = collections.namedtuple("Gain", "gain peak") +# album_gain: Gain object +# track_gains: list of Gain objects +AlbumGain = collections.namedtuple("AlbumGain", "album_gain track_gains") + + +class Peak(enum.Enum): + none = 0 + true = 1 + sample = 2 + + +class Backend: + """An abstract class representing engine for calculating RG values. + """ + + do_parallel = False + + def __init__(self, config, log): + """Initialize the backend with the configuration view for the + plugin. + """ + self._log = log + + def compute_track_gain(self, items, target_level, peak): + """Computes the track gain of the given tracks, returns a list + of Gain objects. + """ + raise NotImplementedError() + + def compute_album_gain(self, items, target_level, peak): + """Computes the album gain of the given album, returns an + AlbumGain object. + """ + raise NotImplementedError() + + +# ffmpeg backend +class FfmpegBackend(Backend): + """A replaygain backend using ffmpeg's ebur128 filter. + """ + + do_parallel = True + + def __init__(self, config, log): + super().__init__(config, log) + self._ffmpeg_path = "ffmpeg" + + # check that ffmpeg is installed + try: + ffmpeg_version_out = call([self._ffmpeg_path, "-version"]) + except OSError: + raise FatalReplayGainError( + f"could not find ffmpeg at {self._ffmpeg_path}" + ) + incompatible_ffmpeg = True + for line in ffmpeg_version_out.stdout.splitlines(): + if line.startswith(b"configuration:"): + if b"--enable-libebur128" in line: + incompatible_ffmpeg = False + if line.startswith(b"libavfilter"): + version = line.split(b" ", 1)[1].split(b"/", 1)[0].split(b".") + version = tuple(map(int, version)) + if version >= (6, 67, 100): + incompatible_ffmpeg = False + if incompatible_ffmpeg: + raise FatalReplayGainError( + "Installed FFmpeg version does not support ReplayGain." + "calculation. Either libavfilter version 6.67.100 or above or" + "the --enable-libebur128 configuration option is required." + ) + + def compute_track_gain(self, items, target_level, peak): + """Computes the track gain of the given tracks, returns a list + of Gain objects (the track gains). + """ + gains = [] + for item in items: + gains.append( + self._analyse_item( + item, + target_level, + peak, + count_blocks=False, + )[0] # take only the gain, discarding number of gating blocks + ) + return gains + + def compute_album_gain(self, items, target_level, peak): + """Computes the album gain of the given album, returns an + AlbumGain object. + """ + target_level_lufs = db_to_lufs(target_level) + + # analyse tracks + # list of track Gain objects + track_gains = [] + # maximum peak + album_peak = 0 + # sum of BS.1770 gating block powers + sum_powers = 0 + # total number of BS.1770 gating blocks + n_blocks = 0 + + for item in items: + track_gain, track_n_blocks = self._analyse_item( + item, target_level, peak + ) + track_gains.append(track_gain) + + # album peak is maximum track peak + album_peak = max(album_peak, track_gain.peak) + + # prepare album_gain calculation + # total number of blocks is sum of track blocks + n_blocks += track_n_blocks + + # convert `LU to target_level` -> LUFS + track_loudness = target_level_lufs - track_gain.gain + # This reverses ITU-R BS.1770-4 p. 6 equation (5) to convert + # from loudness to power. The result is the average gating + # block power. + track_power = 10**((track_loudness + 0.691) / 10) + + # Weight that average power by the number of gating blocks to + # get the sum of all their powers. Add that to the sum of all + # block powers in this album. + sum_powers += track_power * track_n_blocks + + # calculate album gain + if n_blocks > 0: + # compare ITU-R BS.1770-4 p. 6 equation (5) + # Album gain is the replaygain of the concatenation of all tracks. + album_gain = -0.691 + 10 * math.log10(sum_powers / n_blocks) + else: + album_gain = -70 + # convert LUFS -> `LU to target_level` + album_gain = target_level_lufs - album_gain + + self._log.debug( + "{}: gain {} LU, peak {}" + .format(items, album_gain, album_peak) + ) + + return AlbumGain(Gain(album_gain, album_peak), track_gains) + + def _construct_cmd(self, item, peak_method): + """Construct the shell command to analyse items.""" + return [ + self._ffmpeg_path, + "-nostats", + "-hide_banner", + "-i", + item.path, + "-map", + "a:0", + "-filter", + f"ebur128=peak={peak_method}", + "-f", + "null", + "-", + ] + + def _analyse_item(self, item, target_level, peak, count_blocks=True): + """Analyse item. Return a pair of a Gain object and the number + of gating blocks above the threshold. + + If `count_blocks` is False, the number of gating blocks returned + will be 0. + """ + target_level_lufs = db_to_lufs(target_level) + peak_method = peak.name + + # call ffmpeg + self._log.debug(f"analyzing {item}") + cmd = self._construct_cmd(item, peak_method) + self._log.debug( + 'executing {0}', ' '.join(map(displayable_path, cmd)) + ) + output = call(cmd).stderr.splitlines() + + # parse output + + if peak == Peak.none: + peak = 0 + else: + line_peak = self._find_line( + output, + f" {peak_method.capitalize()} peak:".encode(), + start_line=len(output) - 1, step_size=-1, + ) + peak = self._parse_float( + output[self._find_line( + output, b" Peak:", + line_peak, + )] + ) + # convert TPFS -> part of FS + peak = 10**(peak / 20) + + line_integrated_loudness = self._find_line( + output, b" Integrated loudness:", + start_line=len(output) - 1, step_size=-1, + ) + gain = self._parse_float( + output[self._find_line( + output, b" I:", + line_integrated_loudness, + )] + ) + # convert LUFS -> LU from target level + gain = target_level_lufs - gain + + # count BS.1770 gating blocks + n_blocks = 0 + if count_blocks: + gating_threshold = self._parse_float( + output[self._find_line( + output, b" Threshold:", + start_line=line_integrated_loudness, + )] + ) + for line in output: + if not line.startswith(b"[Parsed_ebur128"): + continue + if line.endswith(b"Summary:"): + continue + line = line.split(b"M:", 1) + if len(line) < 2: + continue + if self._parse_float(b"M: " + line[1]) >= gating_threshold: + n_blocks += 1 + self._log.debug( + "{}: {} blocks over {} LUFS" + .format(item, n_blocks, gating_threshold) + ) + + self._log.debug( + "{}: gain {} LU, peak {}" + .format(item, gain, peak) + ) + + return Gain(gain, peak), n_blocks + + def _find_line(self, output, search, start_line=0, step_size=1): + """Return index of line beginning with `search`. + + Begins searching at index `start_line` in `output`. + """ + end_index = len(output) if step_size > 0 else -1 + for i in range(start_line, end_index, step_size): + if output[i].startswith(search): + return i + raise ReplayGainError( + "ffmpeg output: missing {} after line {}" + .format(repr(search), start_line) + ) + + def _parse_float(self, line): + """Extract a float from a key value pair in `line`. + + This format is expected: /[^:]:[[:space:]]*value.*/, where `value` is + the float. + """ + # extract value + value = line.split(b":", 1) + if len(value) < 2: + raise ReplayGainError( + "ffmpeg output: expected key value pair, found {}" + .format(line) + ) + value = value[1].lstrip() + # strip unit + value = value.split(b" ", 1)[0] + # cast value to float + try: + return float(value) + except ValueError: + raise ReplayGainError( + "ffmpeg output: expected float value, found {}" + .format(value) + ) + + +# mpgain/aacgain CLI tool backend. +class CommandBackend(Backend): + do_parallel = True + + def __init__(self, config, log): + super().__init__(config, log) + config.add({ + 'command': "", + 'noclip': True, + }) + + self.command = config["command"].as_str() + + if self.command: + # Explicit executable path. + if not os.path.isfile(self.command): + raise FatalReplayGainError( + 'replaygain command does not exist: {}'.format( + self.command) + ) + else: + # Check whether the program is in $PATH. + for cmd in ('mp3gain', 'aacgain'): + try: + call([cmd, '-v']) + self.command = cmd + except OSError: + pass + if not self.command: + raise FatalReplayGainError( + 'no replaygain command found: install mp3gain or aacgain' + ) + + self.noclip = config['noclip'].get(bool) + + def compute_track_gain(self, items, target_level, peak): + """Computes the track gain of the given tracks, returns a list + of TrackGain objects. + """ + supported_items = list(filter(self.format_supported, items)) + output = self.compute_gain(supported_items, target_level, False) + return output + + def compute_album_gain(self, items, target_level, peak): + """Computes the album gain of the given album, returns an + AlbumGain object. + """ + # TODO: What should be done when not all tracks in the album are + # supported? + + supported_items = list(filter(self.format_supported, items)) + if len(supported_items) != len(items): + self._log.debug('tracks are of unsupported format') + return AlbumGain(None, []) + + output = self.compute_gain(supported_items, target_level, True) + return AlbumGain(output[-1], output[:-1]) + + def format_supported(self, item): + """Checks whether the given item is supported by the selected tool. + """ + if 'mp3gain' in self.command and item.format != 'MP3': + return False + elif 'aacgain' in self.command and item.format not in ('MP3', 'AAC'): + return False + return True + + def compute_gain(self, items, target_level, is_album): + """Computes the track or album gain of a list of items, returns + a list of TrackGain objects. + + When computing album gain, the last TrackGain object returned is + the album gain + """ + if len(items) == 0: + self._log.debug('no supported tracks to analyze') + return [] + + """Compute ReplayGain values and return a list of results + dictionaries as given by `parse_tool_output`. + """ + # Construct shell command. The "-o" option makes the output + # easily parseable (tab-delimited). "-s s" forces gain + # recalculation even if tags are already present and disables + # tag-writing; this turns the mp3gain/aacgain tool into a gain + # calculator rather than a tag manipulator because we take care + # of changing tags ourselves. + cmd = [self.command, '-o', '-s', 's'] + if self.noclip: + # Adjust to avoid clipping. + cmd = cmd + ['-k'] + else: + # Disable clipping warning. + cmd = cmd + ['-c'] + cmd = cmd + ['-d', str(int(target_level - 89))] + cmd = cmd + [syspath(i.path) for i in items] + + self._log.debug('analyzing {0} files', len(items)) + self._log.debug("executing {0}", " ".join(map(displayable_path, cmd))) + output = call(cmd).stdout + self._log.debug('analysis finished') + return self.parse_tool_output(output, + len(items) + (1 if is_album else 0)) + + def parse_tool_output(self, text, num_lines): + """Given the tab-delimited output from an invocation of mp3gain + or aacgain, parse the text and return a list of dictionaries + containing information about each analyzed file. + """ + out = [] + for line in text.split(b'\n')[1:num_lines + 1]: + parts = line.split(b'\t') + if len(parts) != 6 or parts[0] == b'File': + self._log.debug('bad tool output: {0}', text) + raise ReplayGainError('mp3gain failed') + d = { + 'file': parts[0], + 'mp3gain': int(parts[1]), + 'gain': float(parts[2]), + 'peak': float(parts[3]) / (1 << 15), + 'maxgain': int(parts[4]), + 'mingain': int(parts[5]), + + } + out.append(Gain(d['gain'], d['peak'])) + return out + + +# GStreamer-based backend. + +class GStreamerBackend(Backend): + def __init__(self, config, log): + super().__init__(config, log) + self._import_gst() + + # Initialized a GStreamer pipeline of the form filesrc -> + # decodebin -> audioconvert -> audioresample -> rganalysis -> + # fakesink The connection between decodebin and audioconvert is + # handled dynamically after decodebin figures out the type of + # the input file. + self._src = self.Gst.ElementFactory.make("filesrc", "src") + self._decbin = self.Gst.ElementFactory.make("decodebin", "decbin") + self._conv = self.Gst.ElementFactory.make("audioconvert", "conv") + self._res = self.Gst.ElementFactory.make("audioresample", "res") + self._rg = self.Gst.ElementFactory.make("rganalysis", "rg") + + if self._src is None or self._decbin is None or self._conv is None \ + or self._res is None or self._rg is None: + raise FatalGstreamerPluginReplayGainError( + "Failed to load required GStreamer plugins" + ) + + # We check which files need gain ourselves, so all files given + # to rganalsys should have their gain computed, even if it + # already exists. + self._rg.set_property("forced", True) + self._sink = self.Gst.ElementFactory.make("fakesink", "sink") + + self._pipe = self.Gst.Pipeline() + self._pipe.add(self._src) + self._pipe.add(self._decbin) + self._pipe.add(self._conv) + self._pipe.add(self._res) + self._pipe.add(self._rg) + self._pipe.add(self._sink) + + self._src.link(self._decbin) + self._conv.link(self._res) + self._res.link(self._rg) + self._rg.link(self._sink) + + self._bus = self._pipe.get_bus() + self._bus.add_signal_watch() + self._bus.connect("message::eos", self._on_eos) + self._bus.connect("message::error", self._on_error) + self._bus.connect("message::tag", self._on_tag) + # Needed for handling the dynamic connection between decodebin + # and audioconvert + self._decbin.connect("pad-added", self._on_pad_added) + self._decbin.connect("pad-removed", self._on_pad_removed) + + self._main_loop = self.GLib.MainLoop() + + self._files = [] + + def _import_gst(self): + """Import the necessary GObject-related modules and assign `Gst` + and `GObject` fields on this object. + """ + + try: + import gi + except ImportError: + raise FatalReplayGainError( + "Failed to load GStreamer: python-gi not found" + ) + + try: + gi.require_version('Gst', '1.0') + except ValueError as e: + raise FatalReplayGainError( + f"Failed to load GStreamer 1.0: {e}" + ) + + from gi.repository import GObject, Gst, GLib + # Calling GObject.threads_init() is not needed for + # PyGObject 3.10.2+ + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + GObject.threads_init() + Gst.init([sys.argv[0]]) + + self.GObject = GObject + self.GLib = GLib + self.Gst = Gst + + def compute(self, files, target_level, album): + self._error = None + self._files = list(files) + + if len(self._files) == 0: + return + + self._file_tags = collections.defaultdict(dict) + + self._rg.set_property("reference-level", target_level) + + if album: + self._rg.set_property("num-tracks", len(self._files)) + + if self._set_first_file(): + self._main_loop.run() + if self._error is not None: + raise self._error + + def compute_track_gain(self, items, target_level, peak): + self.compute(items, target_level, False) + if len(self._file_tags) != len(items): + raise ReplayGainError("Some tracks did not receive tags") + + ret = [] + for item in items: + ret.append(Gain(self._file_tags[item]["TRACK_GAIN"], + self._file_tags[item]["TRACK_PEAK"])) + + return ret + + def compute_album_gain(self, items, target_level, peak): + items = list(items) + self.compute(items, target_level, True) + if len(self._file_tags) != len(items): + raise ReplayGainError("Some items in album did not receive tags") + + # Collect track gains. + track_gains = [] + for item in items: + try: + gain = self._file_tags[item]["TRACK_GAIN"] + peak = self._file_tags[item]["TRACK_PEAK"] + except KeyError: + raise ReplayGainError("results missing for track") + track_gains.append(Gain(gain, peak)) + + # Get album gain information from the last track. + last_tags = self._file_tags[items[-1]] + try: + gain = last_tags["ALBUM_GAIN"] + peak = last_tags["ALBUM_PEAK"] + except KeyError: + raise ReplayGainError("results missing for album") + + return AlbumGain(Gain(gain, peak), track_gains) + + def close(self): + self._bus.remove_signal_watch() + + def _on_eos(self, bus, message): + # A file finished playing in all elements of the pipeline. The + # RG tags have already been propagated. If we don't have a next + # file, we stop processing. + if not self._set_next_file(): + self._pipe.set_state(self.Gst.State.NULL) + self._main_loop.quit() + + def _on_error(self, bus, message): + self._pipe.set_state(self.Gst.State.NULL) + self._main_loop.quit() + err, debug = message.parse_error() + f = self._src.get_property("location") + # A GStreamer error, either an unsupported format or a bug. + self._error = ReplayGainError( + f"Error {err!r} - {debug!r} on file {f!r}" + ) + + def _on_tag(self, bus, message): + tags = message.parse_tag() + + def handle_tag(taglist, tag, userdata): + # The rganalysis element provides both the existing tags for + # files and the new computes tags. In order to ensure we + # store the computed tags, we overwrite the RG values of + # received a second time. + if tag == self.Gst.TAG_TRACK_GAIN: + self._file_tags[self._file]["TRACK_GAIN"] = \ + taglist.get_double(tag)[1] + elif tag == self.Gst.TAG_TRACK_PEAK: + self._file_tags[self._file]["TRACK_PEAK"] = \ + taglist.get_double(tag)[1] + elif tag == self.Gst.TAG_ALBUM_GAIN: + self._file_tags[self._file]["ALBUM_GAIN"] = \ + taglist.get_double(tag)[1] + elif tag == self.Gst.TAG_ALBUM_PEAK: + self._file_tags[self._file]["ALBUM_PEAK"] = \ + taglist.get_double(tag)[1] + elif tag == self.Gst.TAG_REFERENCE_LEVEL: + self._file_tags[self._file]["REFERENCE_LEVEL"] = \ + taglist.get_double(tag)[1] + + tags.foreach(handle_tag, None) + + def _set_first_file(self): + if len(self._files) == 0: + return False + + self._file = self._files.pop(0) + self._pipe.set_state(self.Gst.State.NULL) + self._src.set_property("location", py3_path(syspath(self._file.path))) + self._pipe.set_state(self.Gst.State.PLAYING) + return True + + def _set_file(self): + """Initialize the filesrc element with the next file to be analyzed. + """ + # No more files, we're done + if len(self._files) == 0: + return False + + self._file = self._files.pop(0) + + # Ensure the filesrc element received the paused state of the + # pipeline in a blocking manner + self._src.sync_state_with_parent() + self._src.get_state(self.Gst.CLOCK_TIME_NONE) + + # Ensure the decodebin element receives the paused state of the + # pipeline in a blocking manner + self._decbin.sync_state_with_parent() + self._decbin.get_state(self.Gst.CLOCK_TIME_NONE) + + # Disconnect the decodebin element from the pipeline, set its + # state to READY to to clear it. + self._decbin.unlink(self._conv) + self._decbin.set_state(self.Gst.State.READY) + + # Set a new file on the filesrc element, can only be done in the + # READY state + self._src.set_state(self.Gst.State.READY) + self._src.set_property("location", py3_path(syspath(self._file.path))) + + self._decbin.link(self._conv) + self._pipe.set_state(self.Gst.State.READY) + + return True + + def _set_next_file(self): + """Set the next file to be analyzed while keeping the pipeline + in the PAUSED state so that the rganalysis element can correctly + handle album gain. + """ + # A blocking pause + self._pipe.set_state(self.Gst.State.PAUSED) + self._pipe.get_state(self.Gst.CLOCK_TIME_NONE) + + # Try setting the next file + ret = self._set_file() + if ret: + # Seek to the beginning in order to clear the EOS state of the + # various elements of the pipeline + self._pipe.seek_simple(self.Gst.Format.TIME, + self.Gst.SeekFlags.FLUSH, + 0) + self._pipe.set_state(self.Gst.State.PLAYING) + + return ret + + def _on_pad_added(self, decbin, pad): + sink_pad = self._conv.get_compatible_pad(pad, None) + assert(sink_pad is not None) + pad.link(sink_pad) + + def _on_pad_removed(self, decbin, pad): + # Called when the decodebin element is disconnected from the + # rest of the pipeline while switching input files + peer = pad.get_peer() + assert(peer is None) + + +class AudioToolsBackend(Backend): + """ReplayGain backend that uses `Python Audio Tools + <http://audiotools.sourceforge.net/>`_ and its capabilities to read more + file formats and compute ReplayGain values using it replaygain module. + """ + + def __init__(self, config, log): + super().__init__(config, log) + self._import_audiotools() + + def _import_audiotools(self): + """Check whether it's possible to import the necessary modules. + There is no check on the file formats at runtime. + + :raises :exc:`ReplayGainError`: if the modules cannot be imported + """ + try: + import audiotools + import audiotools.replaygain + except ImportError: + raise FatalReplayGainError( + "Failed to load audiotools: audiotools not found" + ) + self._mod_audiotools = audiotools + self._mod_replaygain = audiotools.replaygain + + def open_audio_file(self, item): + """Open the file to read the PCM stream from the using + ``item.path``. + + :return: the audiofile instance + :rtype: :class:`audiotools.AudioFile` + :raises :exc:`ReplayGainError`: if the file is not found or the + file format is not supported + """ + try: + audiofile = self._mod_audiotools.open(py3_path(syspath(item.path))) + except OSError: + raise ReplayGainError( + f"File {item.path} was not found" + ) + except self._mod_audiotools.UnsupportedFile: + raise ReplayGainError( + f"Unsupported file type {item.format}" + ) + + return audiofile + + def init_replaygain(self, audiofile, item): + """Return an initialized :class:`audiotools.replaygain.ReplayGain` + instance, which requires the sample rate of the song(s) on which + the ReplayGain values will be computed. The item is passed in case + the sample rate is invalid to log the stored item sample rate. + + :return: initialized replagain object + :rtype: :class:`audiotools.replaygain.ReplayGain` + :raises: :exc:`ReplayGainError` if the sample rate is invalid + """ + try: + rg = self._mod_replaygain.ReplayGain(audiofile.sample_rate()) + except ValueError: + raise ReplayGainError( + f"Unsupported sample rate {item.samplerate}") + return + return rg + + def compute_track_gain(self, items, target_level, peak): + """Compute ReplayGain values for the requested items. + + :return list: list of :class:`Gain` objects + """ + return [self._compute_track_gain(item, target_level) for item in items] + + def _with_target_level(self, gain, target_level): + """Return `gain` relative to `target_level`. + + Assumes `gain` is relative to 89 db. + """ + return gain + (target_level - 89) + + def _title_gain(self, rg, audiofile, target_level): + """Get the gain result pair from PyAudioTools using the `ReplayGain` + instance `rg` for the given `audiofile`. + + Wraps `rg.title_gain(audiofile.to_pcm())` and throws a + `ReplayGainError` when the library fails. + """ + try: + # The method needs an audiotools.PCMReader instance that can + # be obtained from an audiofile instance. + gain, peak = rg.title_gain(audiofile.to_pcm()) + except ValueError as exc: + # `audiotools.replaygain` can raise a `ValueError` if the sample + # rate is incorrect. + self._log.debug('error in rg.title_gain() call: {}', exc) + raise ReplayGainError('audiotools audio data error') + return self._with_target_level(gain, target_level), peak + + def _compute_track_gain(self, item, target_level): + """Compute ReplayGain value for the requested item. + + :rtype: :class:`Gain` + """ + audiofile = self.open_audio_file(item) + rg = self.init_replaygain(audiofile, item) + + # Each call to title_gain on a ReplayGain object returns peak and gain + # of the track. + rg_track_gain, rg_track_peak = self._title_gain( + rg, audiofile, target_level + ) + + self._log.debug('ReplayGain for track {0} - {1}: {2:.2f}, {3:.2f}', + item.artist, item.title, rg_track_gain, rg_track_peak) + return Gain(gain=rg_track_gain, peak=rg_track_peak) + + def compute_album_gain(self, items, target_level, peak): + """Compute ReplayGain values for the requested album and its items. + + :rtype: :class:`AlbumGain` + """ + # The first item is taken and opened to get the sample rate to + # initialize the replaygain object. The object is used for all the + # tracks in the album to get the album values. + item = list(items)[0] + audiofile = self.open_audio_file(item) + rg = self.init_replaygain(audiofile, item) + + track_gains = [] + for item in items: + audiofile = self.open_audio_file(item) + rg_track_gain, rg_track_peak = self._title_gain( + rg, audiofile, target_level + ) + track_gains.append( + Gain(gain=rg_track_gain, peak=rg_track_peak) + ) + self._log.debug('ReplayGain for track {0}: {1:.2f}, {2:.2f}', + item, rg_track_gain, rg_track_peak) + + # After getting the values for all tracks, it's possible to get the + # album values. + rg_album_gain, rg_album_peak = rg.album_gain() + rg_album_gain = self._with_target_level(rg_album_gain, target_level) + self._log.debug('ReplayGain for album {0}: {1:.2f}, {2:.2f}', + items[0].album, rg_album_gain, rg_album_peak) + + return AlbumGain( + Gain(gain=rg_album_gain, peak=rg_album_peak), + track_gains=track_gains + ) + + +class ExceptionWatcher(Thread): + """Monitors a queue for exceptions asynchronously. + Once an exception occurs, raise it and execute a callback. + """ + + def __init__(self, queue, callback): + self._queue = queue + self._callback = callback + self._stopevent = Event() + Thread.__init__(self) + + def run(self): + while not self._stopevent.is_set(): + try: + exc = self._queue.get_nowait() + self._callback() + raise exc[1].with_traceback(exc[2]) + except queue.Empty: + # No exceptions yet, loop back to check + # whether `_stopevent` is set + pass + + def join(self, timeout=None): + self._stopevent.set() + Thread.join(self, timeout) + + +# Main plugin logic. + +class ReplayGainPlugin(BeetsPlugin): + """Provides ReplayGain analysis. + """ + + backends = { + "command": CommandBackend, + "gstreamer": GStreamerBackend, + "audiotools": AudioToolsBackend, + "ffmpeg": FfmpegBackend, + } + + peak_methods = { + "true": Peak.true, + "sample": Peak.sample, + } + + def __init__(self): + super().__init__() + + # default backend is 'command' for backward-compatibility. + self.config.add({ + 'overwrite': False, + 'auto': True, + 'backend': 'command', + 'threads': cpu_count(), + 'parallel_on_import': False, + 'per_disc': False, + 'peak': 'true', + 'targetlevel': 89, + 'r128': ['Opus'], + 'r128_targetlevel': lufs_to_db(-23), + }) + + self.overwrite = self.config['overwrite'].get(bool) + self.per_disc = self.config['per_disc'].get(bool) + + # Remember which backend is used for CLI feedback + self.backend_name = self.config['backend'].as_str() + + if self.backend_name not in self.backends: + raise ui.UserError( + "Selected ReplayGain backend {} is not supported. " + "Please select one of: {}".format( + self.backend_name, + ', '.join(self.backends.keys()) + ) + ) + peak_method = self.config["peak"].as_str() + if peak_method not in self.peak_methods: + raise ui.UserError( + "Selected ReplayGain peak method {} is not supported. " + "Please select one of: {}".format( + peak_method, + ', '.join(self.peak_methods.keys()) + ) + ) + self._peak_method = self.peak_methods[peak_method] + + # On-import analysis. + if self.config['auto']: + self.register_listener('import_begin', self.import_begin) + self.register_listener('import', self.import_end) + self.import_stages = [self.imported] + + # Formats to use R128. + self.r128_whitelist = self.config['r128'].as_str_seq() + + try: + self.backend_instance = self.backends[self.backend_name]( + self.config, self._log + ) + except (ReplayGainError, FatalReplayGainError) as e: + raise ui.UserError( + f'replaygain initialization failed: {e}') + + def should_use_r128(self, item): + """Checks the plugin setting to decide whether the calculation + should be done using the EBU R128 standard and use R128_ tags instead. + """ + return item.format in self.r128_whitelist + + def track_requires_gain(self, item): + return self.overwrite or \ + (self.should_use_r128(item) and not item.r128_track_gain) or \ + (not self.should_use_r128(item) and + (not item.rg_track_gain or not item.rg_track_peak)) + + def album_requires_gain(self, album): + # Skip calculating gain only when *all* files don't need + # recalculation. This way, if any file among an album's tracks + # needs recalculation, we still get an accurate album gain + # value. + return self.overwrite or \ + any([self.should_use_r128(item) and + (not item.r128_track_gain or not item.r128_album_gain) + for item in album.items()]) or \ + any([not self.should_use_r128(item) and + (not item.rg_album_gain or not item.rg_album_peak) + for item in album.items()]) + + def store_track_gain(self, item, track_gain): + item.rg_track_gain = track_gain.gain + item.rg_track_peak = track_gain.peak + item.store() + self._log.debug('applied track gain {0} LU, peak {1} of FS', + item.rg_track_gain, item.rg_track_peak) + + def store_album_gain(self, item, album_gain): + item.rg_album_gain = album_gain.gain + item.rg_album_peak = album_gain.peak + item.store() + self._log.debug('applied album gain {0} LU, peak {1} of FS', + item.rg_album_gain, item.rg_album_peak) + + def store_track_r128_gain(self, item, track_gain): + item.r128_track_gain = track_gain.gain + item.store() + + self._log.debug('applied r128 track gain {0} LU', + item.r128_track_gain) + + def store_album_r128_gain(self, item, album_gain): + item.r128_album_gain = album_gain.gain + item.store() + self._log.debug('applied r128 album gain {0} LU', + item.r128_album_gain) + + def tag_specific_values(self, items): + """Return some tag specific values. + + Returns a tuple (store_track_gain, store_album_gain, target_level, + peak_method). + """ + if any([self.should_use_r128(item) for item in items]): + store_track_gain = self.store_track_r128_gain + store_album_gain = self.store_album_r128_gain + target_level = self.config['r128_targetlevel'].as_number() + peak = Peak.none # R128_* tags do not store the track/album peak + else: + store_track_gain = self.store_track_gain + store_album_gain = self.store_album_gain + target_level = self.config['targetlevel'].as_number() + peak = self._peak_method + + return store_track_gain, store_album_gain, target_level, peak + + def handle_album(self, album, write, force=False): + """Compute album and track replay gain store it in all of the + album's items. + + If ``write`` is truthy then ``item.write()`` is called for each + item. If replay gain information is already present in all + items, nothing is done. + """ + if not force and not self.album_requires_gain(album): + self._log.info('Skipping album {0}', album) + return + + if (any([self.should_use_r128(item) for item in album.items()]) and not + all([self.should_use_r128(item) for item in album.items()])): + self._log.error( + "Cannot calculate gain for album {0} (incompatible formats)", + album) + return + + self._log.info('analyzing {0}', album) + + tag_vals = self.tag_specific_values(album.items()) + store_track_gain, store_album_gain, target_level, peak = tag_vals + + discs = {} + if self.per_disc: + for item in album.items(): + if discs.get(item.disc) is None: + discs[item.disc] = [] + discs[item.disc].append(item) + else: + discs[1] = album.items() + + for discnumber, items in discs.items(): + def _store_album(album_gain): + if not album_gain or not album_gain.album_gain \ + or len(album_gain.track_gains) != len(items): + # In some cases, backends fail to produce a valid + # `album_gain` without throwing FatalReplayGainError + # => raise non-fatal exception & continue + raise ReplayGainError( + "ReplayGain backend `{}` failed " + "for some tracks in album {}" + .format(self.backend_name, album) + ) + for item, track_gain in zip(items, + album_gain.track_gains): + store_track_gain(item, track_gain) + store_album_gain(item, album_gain.album_gain) + if write: + item.try_write() + self._log.debug('done analyzing {0}', item) + + try: + self._apply( + self.backend_instance.compute_album_gain, args=(), + kwds={ + "items": list(items), + "target_level": target_level, + "peak": peak + }, + callback=_store_album + ) + except ReplayGainError as e: + self._log.info("ReplayGain error: {0}", e) + except FatalReplayGainError as e: + raise ui.UserError( + f"Fatal replay gain error: {e}") + + def handle_track(self, item, write, force=False): + """Compute track replay gain and store it in the item. + + If ``write`` is truthy then ``item.write()`` is called to write + the data to disk. If replay gain information is already present + in the item, nothing is done. + """ + if not force and not self.track_requires_gain(item): + self._log.info('Skipping track {0}', item) + return + + tag_vals = self.tag_specific_values([item]) + store_track_gain, store_album_gain, target_level, peak = tag_vals + + def _store_track(track_gains): + if not track_gains or len(track_gains) != 1: + # In some cases, backends fail to produce a valid + # `track_gains` without throwing FatalReplayGainError + # => raise non-fatal exception & continue + raise ReplayGainError( + "ReplayGain backend `{}` failed for track {}" + .format(self.backend_name, item) + ) + + store_track_gain(item, track_gains[0]) + if write: + item.try_write() + self._log.debug('done analyzing {0}', item) + + try: + self._apply( + self.backend_instance.compute_track_gain, args=(), + kwds={ + "items": [item], + "target_level": target_level, + "peak": peak, + }, + callback=_store_track + ) + except ReplayGainError as e: + self._log.info("ReplayGain error: {0}", e) + except FatalReplayGainError as e: + raise ui.UserError(f"Fatal replay gain error: {e}") + + def _has_pool(self): + """Check whether a `ThreadPool` is running instance in `self.pool` + """ + if hasattr(self, 'pool'): + if isinstance(self.pool, ThreadPool) and self.pool._state == RUN: + return True + return False + + def open_pool(self, threads): + """Open a `ThreadPool` instance in `self.pool` + """ + if not self._has_pool() and self.backend_instance.do_parallel: + self.pool = ThreadPool(threads) + self.exc_queue = queue.Queue() + + signal.signal(signal.SIGINT, self._interrupt) + + self.exc_watcher = ExceptionWatcher( + self.exc_queue, # threads push exceptions here + self.terminate_pool # abort once an exception occurs + ) + self.exc_watcher.start() + + def _apply(self, func, args, kwds, callback): + if self._has_pool(): + def catch_exc(func, exc_queue, log): + """Wrapper to catch raised exceptions in threads + """ + def wfunc(*args, **kwargs): + try: + return func(*args, **kwargs) + except ReplayGainError as e: + log.info(e.args[0]) # log non-fatal exceptions + except Exception: + exc_queue.put(sys.exc_info()) + return wfunc + + # Wrap function and callback to catch exceptions + func = catch_exc(func, self.exc_queue, self._log) + callback = catch_exc(callback, self.exc_queue, self._log) + + self.pool.apply_async(func, args, kwds, callback) + else: + callback(func(*args, **kwds)) + + def terminate_pool(self): + """Terminate the `ThreadPool` instance in `self.pool` + (e.g. stop execution in case of exception) + """ + # Don't call self._as_pool() here, + # self.pool._state may not be == RUN + if hasattr(self, 'pool') and isinstance(self.pool, ThreadPool): + self.pool.terminate() + self.pool.join() + # self.exc_watcher.join() + + def _interrupt(self, signal, frame): + try: + self._log.info('interrupted') + self.terminate_pool() + sys.exit(0) + except SystemExit: + # Silence raised SystemExit ~ exit(0) + pass + + def close_pool(self): + """Close the `ThreadPool` instance in `self.pool` (if there is one) + """ + if self._has_pool(): + self.pool.close() + self.pool.join() + self.exc_watcher.join() + + def import_begin(self, session): + """Handle `import_begin` event -> open pool + """ + threads = self.config['threads'].get(int) + + if self.config['parallel_on_import'] \ + and self.config['auto'] \ + and threads: + self.open_pool(threads) + + def import_end(self, paths): + """Handle `import` event -> close pool + """ + self.close_pool() + + def imported(self, session, task): + """Add replay gain info to items or albums of ``task``. + """ + if self.config['auto']: + if task.is_album: + self.handle_album(task.album, False) + else: + self.handle_track(task.item, False) + + def command_func(self, lib, opts, args): + try: + write = ui.should_write(opts.write) + force = opts.force + + # Bypass self.open_pool() if called with `--threads 0` + if opts.threads != 0: + threads = opts.threads or self.config['threads'].get(int) + self.open_pool(threads) + + if opts.album: + albums = lib.albums(ui.decargs(args)) + self._log.info( + "Analyzing {} albums ~ {} backend..." + .format(len(albums), self.backend_name) + ) + for album in albums: + self.handle_album(album, write, force) + else: + items = lib.items(ui.decargs(args)) + self._log.info( + "Analyzing {} tracks ~ {} backend..." + .format(len(items), self.backend_name) + ) + for item in items: + self.handle_track(item, write, force) + + self.close_pool() + except (SystemExit, KeyboardInterrupt): + # Silence interrupt exceptions + pass + + def commands(self): + """Return the "replaygain" ui subcommand. + """ + cmd = ui.Subcommand('replaygain', help='analyze for ReplayGain') + cmd.parser.add_album_option() + cmd.parser.add_option( + "-t", "--threads", dest="threads", type=int, + help='change the number of threads, \ + defaults to maximum available processors' + ) + cmd.parser.add_option( + "-f", "--force", dest="force", action="store_true", default=False, + help="analyze all files, including those that " + "already have ReplayGain metadata") + cmd.parser.add_option( + "-w", "--write", default=None, action="store_true", + help="write new metadata to files' tags") + cmd.parser.add_option( + "-W", "--nowrite", dest="write", action="store_false", + help="don't write metadata (opposite of -w)") + cmd.func = self.command_func + return [cmd] diff --git a/lib/beetsplug/rewrite.py b/lib/beetsplug/rewrite.py new file mode 100644 index 00000000..e02e4080 --- /dev/null +++ b/lib/beetsplug/rewrite.py @@ -0,0 +1,73 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Uses user-specified rewriting rules to canonicalize names for path +formats. +""" + +import re +from collections import defaultdict + +from beets.plugins import BeetsPlugin +from beets import ui +from beets import library + + +def rewriter(field, rules): + """Create a template field function that rewrites the given field + with the given rewriting rules. ``rules`` must be a list of + (pattern, replacement) pairs. + """ + def fieldfunc(item): + value = item._values_fixed[field] + for pattern, replacement in rules: + if pattern.match(value.lower()): + # Rewrite activated. + return replacement + # Not activated; return original value. + return value + return fieldfunc + + +class RewritePlugin(BeetsPlugin): + def __init__(self): + super().__init__() + + self.config.add({}) + + # Gather all the rewrite rules for each field. + rules = defaultdict(list) + for key, view in self.config.items(): + value = view.as_str() + try: + fieldname, pattern = key.split(None, 1) + except ValueError: + raise ui.UserError("invalid rewrite specification") + if fieldname not in library.Item._fields: + raise ui.UserError("invalid field name (%s) in rewriter" % + fieldname) + self._log.debug('adding template field {0}', key) + pattern = re.compile(pattern.lower()) + rules[fieldname].append((pattern, value)) + if fieldname == 'artist': + # Special case for the artist field: apply the same + # rewrite for "albumartist" as well. + rules['albumartist'].append((pattern, value)) + + # Replace each template field with the new rewriter function. + for fieldname, fieldrules in rules.items(): + getter = rewriter(fieldname, fieldrules) + self.template_fields[fieldname] = getter + if fieldname in library.Album._fields: + self.album_template_fields[fieldname] = getter diff --git a/lib/beetsplug/scrub.py b/lib/beetsplug/scrub.py new file mode 100644 index 00000000..d8044668 --- /dev/null +++ b/lib/beetsplug/scrub.py @@ -0,0 +1,149 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Cleans extraneous metadata from files' tags via a command or +automatically whenever tags are written. +""" + + +from beets.plugins import BeetsPlugin +from beets import ui +from beets import util +from beets import config +import mediafile +import mutagen + +_MUTAGEN_FORMATS = { + 'asf': 'ASF', + 'apev2': 'APEv2File', + 'flac': 'FLAC', + 'id3': 'ID3FileType', + 'mp3': 'MP3', + 'mp4': 'MP4', + 'oggflac': 'OggFLAC', + 'oggspeex': 'OggSpeex', + 'oggtheora': 'OggTheora', + 'oggvorbis': 'OggVorbis', + 'oggopus': 'OggOpus', + 'trueaudio': 'TrueAudio', + 'wavpack': 'WavPack', + 'monkeysaudio': 'MonkeysAudio', + 'optimfrog': 'OptimFROG', +} + + +class ScrubPlugin(BeetsPlugin): + """Removes extraneous metadata from files' tags.""" + def __init__(self): + super().__init__() + self.config.add({ + 'auto': True, + }) + + if self.config['auto']: + self.register_listener("import_task_files", self.import_task_files) + + def commands(self): + def scrub_func(lib, opts, args): + # Walk through matching files and remove tags. + for item in lib.items(ui.decargs(args)): + self._log.info('scrubbing: {0}', + util.displayable_path(item.path)) + self._scrub_item(item, opts.write) + + scrub_cmd = ui.Subcommand('scrub', help='clean audio tags') + scrub_cmd.parser.add_option( + '-W', '--nowrite', dest='write', + action='store_false', default=True, + help='leave tags empty') + scrub_cmd.func = scrub_func + + return [scrub_cmd] + + @staticmethod + def _mutagen_classes(): + """Get a list of file type classes from the Mutagen module. + """ + classes = [] + for modname, clsname in _MUTAGEN_FORMATS.items(): + mod = __import__(f'mutagen.{modname}', + fromlist=[clsname]) + classes.append(getattr(mod, clsname)) + return classes + + def _scrub(self, path): + """Remove all tags from a file. + """ + for cls in self._mutagen_classes(): + # Try opening the file with this type, but just skip in the + # event of any error. + try: + f = cls(util.syspath(path)) + except Exception: + continue + if f.tags is None: + continue + + # Remove the tag for this type. + try: + f.delete() + except NotImplementedError: + # Some Mutagen metadata subclasses (namely, ASFTag) do not + # support .delete(), presumably because it is impossible to + # remove them. In this case, we just remove all the tags. + for tag in f.keys(): + del f[tag] + f.save() + except (OSError, mutagen.MutagenError) as exc: + self._log.error('could not scrub {0}: {1}', + util.displayable_path(path), exc) + + def _scrub_item(self, item, restore=True): + """Remove tags from an Item's associated file and, if `restore` + is enabled, write the database's tags back to the file. + """ + # Get album art if we need to restore it. + if restore: + try: + mf = mediafile.MediaFile(util.syspath(item.path), + config['id3v23'].get(bool)) + except mediafile.UnreadableFileError as exc: + self._log.error('could not open file to scrub: {0}', + exc) + return + images = mf.images + + # Remove all tags. + self._scrub(item.path) + + # Restore tags, if enabled. + if restore: + self._log.debug('writing new tags after scrub') + item.try_write() + if images: + self._log.debug('restoring art') + try: + mf = mediafile.MediaFile(util.syspath(item.path), + config['id3v23'].get(bool)) + mf.images = images + mf.save() + except mediafile.UnreadableFileError as exc: + self._log.error('could not write tags: {0}', exc) + + def import_task_files(self, session, task): + """Automatically scrub imported files.""" + for item in task.imported_items(): + self._log.debug('auto-scrubbing {0}', + util.displayable_path(item.path)) + self._scrub_item(item) diff --git a/lib/beetsplug/smartplaylist.py b/lib/beetsplug/smartplaylist.py new file mode 100644 index 00000000..4c921ecc --- /dev/null +++ b/lib/beetsplug/smartplaylist.py @@ -0,0 +1,224 @@ +# This file is part of beets. +# Copyright 2016, Dang Mai <contact@dangmai.net>. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Generates smart playlists based on beets queries. +""" + + +from beets.plugins import BeetsPlugin +from beets import ui +from beets.util import (mkdirall, normpath, sanitize_path, syspath, + bytestring_path, path_as_posix) +from beets.library import Item, Album, parse_query_string +from beets.dbcore import OrQuery +from beets.dbcore.query import MultipleSort, ParsingError +import os + +try: + from urllib.request import pathname2url +except ImportError: + # python2 is a bit different + from urllib import pathname2url + + +class SmartPlaylistPlugin(BeetsPlugin): + + def __init__(self): + super().__init__() + self.config.add({ + 'relative_to': None, + 'playlist_dir': '.', + 'auto': True, + 'playlists': [], + 'forward_slash': False, + 'prefix': '', + 'urlencode': False, + }) + + self.config['prefix'].redact = True # May contain username/password. + self._matched_playlists = None + self._unmatched_playlists = None + + if self.config['auto']: + self.register_listener('database_change', self.db_change) + + def commands(self): + spl_update = ui.Subcommand( + 'splupdate', + help='update the smart playlists. Playlist names may be ' + 'passed as arguments.' + ) + spl_update.func = self.update_cmd + return [spl_update] + + def update_cmd(self, lib, opts, args): + self.build_queries() + if args: + args = set(ui.decargs(args)) + for a in list(args): + if not a.endswith(".m3u"): + args.add(f"{a}.m3u") + + playlists = {(name, q, a_q) + for name, q, a_q in self._unmatched_playlists + if name in args} + if not playlists: + raise ui.UserError( + 'No playlist matching any of {} found'.format( + [name for name, _, _ in self._unmatched_playlists]) + ) + + self._matched_playlists = playlists + self._unmatched_playlists -= playlists + else: + self._matched_playlists = self._unmatched_playlists + + self.update_playlists(lib) + + def build_queries(self): + """ + Instantiate queries for the playlists. + + Each playlist has 2 queries: one or items one for albums, each with a + sort. We must also remember its name. _unmatched_playlists is a set of + tuples (name, (q, q_sort), (album_q, album_q_sort)). + + sort may be any sort, or NullSort, or None. None and NullSort are + equivalent and both eval to False. + More precisely + - it will be NullSort when a playlist query ('query' or 'album_query') + is a single item or a list with 1 element + - it will be None when there are multiple items i a query + """ + self._unmatched_playlists = set() + self._matched_playlists = set() + + for playlist in self.config['playlists'].get(list): + if 'name' not in playlist: + self._log.warning("playlist configuration is missing name") + continue + + playlist_data = (playlist['name'],) + try: + for key, model_cls in (('query', Item), + ('album_query', Album)): + qs = playlist.get(key) + if qs is None: + query_and_sort = None, None + elif isinstance(qs, str): + query_and_sort = parse_query_string(qs, model_cls) + elif len(qs) == 1: + query_and_sort = parse_query_string(qs[0], model_cls) + else: + # multiple queries and sorts + queries, sorts = zip(*(parse_query_string(q, model_cls) + for q in qs)) + query = OrQuery(queries) + final_sorts = [] + for s in sorts: + if s: + if isinstance(s, MultipleSort): + final_sorts += s.sorts + else: + final_sorts.append(s) + if not final_sorts: + sort = None + elif len(final_sorts) == 1: + sort, = final_sorts + else: + sort = MultipleSort(final_sorts) + query_and_sort = query, sort + + playlist_data += (query_and_sort,) + + except ParsingError as exc: + self._log.warning("invalid query in playlist {}: {}", + playlist['name'], exc) + continue + + self._unmatched_playlists.add(playlist_data) + + def matches(self, model, query, album_query): + if album_query and isinstance(model, Album): + return album_query.match(model) + if query and isinstance(model, Item): + return query.match(model) + return False + + def db_change(self, lib, model): + if self._unmatched_playlists is None: + self.build_queries() + + for playlist in self._unmatched_playlists: + n, (q, _), (a_q, _) = playlist + if self.matches(model, q, a_q): + self._log.debug( + "{0} will be updated because of {1}", n, model) + self._matched_playlists.add(playlist) + self.register_listener('cli_exit', self.update_playlists) + + self._unmatched_playlists -= self._matched_playlists + + def update_playlists(self, lib): + self._log.info("Updating {0} smart playlists...", + len(self._matched_playlists)) + + playlist_dir = self.config['playlist_dir'].as_filename() + playlist_dir = bytestring_path(playlist_dir) + relative_to = self.config['relative_to'].get() + if relative_to: + relative_to = normpath(relative_to) + + # Maps playlist filenames to lists of track filenames. + m3us = {} + + for playlist in self._matched_playlists: + name, (query, q_sort), (album_query, a_q_sort) = playlist + self._log.debug("Creating playlist {0}", name) + items = [] + + if query: + items.extend(lib.items(query, q_sort)) + if album_query: + for album in lib.albums(album_query, a_q_sort): + items.extend(album.items()) + + # As we allow tags in the m3u names, we'll need to iterate through + # the items and generate the correct m3u file names. + for item in items: + m3u_name = item.evaluate_template(name, True) + m3u_name = sanitize_path(m3u_name, lib.replacements) + if m3u_name not in m3us: + m3us[m3u_name] = [] + item_path = item.path + if relative_to: + item_path = os.path.relpath(item.path, relative_to) + if item_path not in m3us[m3u_name]: + m3us[m3u_name].append(item_path) + + prefix = bytestring_path(self.config['prefix'].as_str()) + # Write all of the accumulated track lists to files. + for m3u in m3us: + m3u_path = normpath(os.path.join(playlist_dir, + bytestring_path(m3u))) + mkdirall(m3u_path) + with open(syspath(m3u_path), 'wb') as f: + for path in m3us[m3u]: + if self.config['forward_slash'].get(): + path = path_as_posix(path) + if self.config['urlencode']: + path = bytestring_path(pathname2url(path)) + f.write(prefix + path + b'\n') + + self._log.info("{0} playlists updated", len(self._matched_playlists)) diff --git a/lib/beetsplug/sonosupdate.py b/lib/beetsplug/sonosupdate.py new file mode 100644 index 00000000..aeb211d8 --- /dev/null +++ b/lib/beetsplug/sonosupdate.py @@ -0,0 +1,46 @@ +# This file is part of beets. +# Copyright 2018, Tobias Sauerwein. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Updates a Sonos library whenever the beets library is changed. +This is based on the Kodi Update plugin. +""" + +from beets.plugins import BeetsPlugin +import soco + + +class SonosUpdate(BeetsPlugin): + def __init__(self): + super().__init__() + self.register_listener('database_change', self.listen_for_db_change) + + def listen_for_db_change(self, lib, model): + """Listens for beets db change and register the update""" + self.register_listener('cli_exit', self.update) + + def update(self, lib): + """When the client exists try to send refresh request to a Sonos + controler. + """ + self._log.info('Requesting a Sonos library update...') + + device = soco.discovery.any_soco() + + if device: + device.music_library.start_library_update() + else: + self._log.warning('Could not find a Sonos device.') + return + + self._log.info('Sonos update triggered') diff --git a/lib/beetsplug/spotify.py b/lib/beetsplug/spotify.py new file mode 100644 index 00000000..2529160d --- /dev/null +++ b/lib/beetsplug/spotify.py @@ -0,0 +1,527 @@ +# This file is part of beets. +# Copyright 2019, Rahul Ahuja. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Adds Spotify release and track search support to the autotagger, along with +Spotify playlist construction. +""" + +import re +import json +import base64 +import webbrowser +import collections + +import unidecode +import requests +import confuse + +from beets import ui +from beets.autotag.hooks import AlbumInfo, TrackInfo +from beets.plugins import MetadataSourcePlugin, BeetsPlugin + + +class SpotifyPlugin(MetadataSourcePlugin, BeetsPlugin): + data_source = 'Spotify' + + # Base URLs for the Spotify API + # Documentation: https://developer.spotify.com/web-api + oauth_token_url = 'https://accounts.spotify.com/api/token' + open_track_url = 'https://open.spotify.com/track/' + search_url = 'https://api.spotify.com/v1/search' + album_url = 'https://api.spotify.com/v1/albums/' + track_url = 'https://api.spotify.com/v1/tracks/' + + # Spotify IDs consist of 22 alphanumeric characters + # (zero-left-padded base62 representation of randomly generated UUID4) + id_regex = { + 'pattern': r'(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})', + 'match_group': 2, + } + + def __init__(self): + super().__init__() + self.config.add( + { + 'mode': 'list', + 'tiebreak': 'popularity', + 'show_failures': False, + 'artist_field': 'albumartist', + 'album_field': 'album', + 'track_field': 'title', + 'region_filter': None, + 'regex': [], + 'client_id': '4e414367a1d14c75a5c5129a627fcab8', + 'client_secret': 'f82bdc09b2254f1a8286815d02fd46dc', + 'tokenfile': 'spotify_token.json', + } + ) + self.config['client_secret'].redact = True + + self.tokenfile = self.config['tokenfile'].get( + confuse.Filename(in_app_dir=True) + ) # Path to the JSON file for storing the OAuth access token. + self.setup() + + def setup(self): + """Retrieve previously saved OAuth token or generate a new one.""" + try: + with open(self.tokenfile) as f: + token_data = json.load(f) + except OSError: + self._authenticate() + else: + self.access_token = token_data['access_token'] + + def _authenticate(self): + """Request an access token via the Client Credentials Flow: + https://developer.spotify.com/documentation/general/guides/authorization-guide/#client-credentials-flow + """ + headers = { + 'Authorization': 'Basic {}'.format( + base64.b64encode( + ':'.join( + self.config[k].as_str() + for k in ('client_id', 'client_secret') + ).encode() + ).decode() + ) + } + response = requests.post( + self.oauth_token_url, + data={'grant_type': 'client_credentials'}, + headers=headers, + ) + try: + response.raise_for_status() + except requests.exceptions.HTTPError as e: + raise ui.UserError( + 'Spotify authorization failed: {}\n{}'.format( + e, response.text + ) + ) + self.access_token = response.json()['access_token'] + + # Save the token for later use. + self._log.debug( + '{} access token: {}', self.data_source, self.access_token + ) + with open(self.tokenfile, 'w') as f: + json.dump({'access_token': self.access_token}, f) + + def _handle_response(self, request_type, url, params=None): + """Send a request, reauthenticating if necessary. + + :param request_type: Type of :class:`Request` constructor, + e.g. ``requests.get``, ``requests.post``, etc. + :type request_type: function + :param url: URL for the new :class:`Request` object. + :type url: str + :param params: (optional) list of tuples or bytes to send + in the query string for the :class:`Request`. + :type params: dict + :return: JSON data for the class:`Response <Response>` object. + :rtype: dict + """ + response = request_type( + url, + headers={'Authorization': f'Bearer {self.access_token}'}, + params=params, + ) + if response.status_code != 200: + if 'token expired' in response.text: + self._log.debug( + '{} access token has expired. Reauthenticating.', + self.data_source, + ) + self._authenticate() + return self._handle_response(request_type, url, params=params) + else: + raise ui.UserError( + '{} API error:\n{}\nURL:\n{}\nparams:\n{}'.format( + self.data_source, response.text, url, params + ) + ) + return response.json() + + def album_for_id(self, album_id): + """Fetch an album by its Spotify ID or URL and return an + AlbumInfo object or None if the album is not found. + + :param album_id: Spotify ID or URL for the album + :type album_id: str + :return: AlbumInfo object for album + :rtype: beets.autotag.hooks.AlbumInfo or None + """ + spotify_id = self._get_id('album', album_id) + if spotify_id is None: + return None + + album_data = self._handle_response( + requests.get, self.album_url + spotify_id + ) + artist, artist_id = self.get_artist(album_data['artists']) + + date_parts = [ + int(part) for part in album_data['release_date'].split('-') + ] + + release_date_precision = album_data['release_date_precision'] + if release_date_precision == 'day': + year, month, day = date_parts + elif release_date_precision == 'month': + year, month = date_parts + day = None + elif release_date_precision == 'year': + year = date_parts[0] + month = None + day = None + else: + raise ui.UserError( + "Invalid `release_date_precision` returned " + "by {} API: '{}'".format( + self.data_source, release_date_precision + ) + ) + + tracks = [] + medium_totals = collections.defaultdict(int) + for i, track_data in enumerate(album_data['tracks']['items'], start=1): + track = self._get_track(track_data) + track.index = i + medium_totals[track.medium] += 1 + tracks.append(track) + for track in tracks: + track.medium_total = medium_totals[track.medium] + + return AlbumInfo( + album=album_data['name'], + album_id=spotify_id, + artist=artist, + artist_id=artist_id, + tracks=tracks, + albumtype=album_data['album_type'], + va=len(album_data['artists']) == 1 + and artist.lower() == 'various artists', + year=year, + month=month, + day=day, + label=album_data['label'], + mediums=max(medium_totals.keys()), + data_source=self.data_source, + data_url=album_data['external_urls']['spotify'], + ) + + def _get_track(self, track_data): + """Convert a Spotify track object dict to a TrackInfo object. + + :param track_data: Simplified track object + (https://developer.spotify.com/documentation/web-api/reference/object-model/#track-object-simplified) + :type track_data: dict + :return: TrackInfo object for track + :rtype: beets.autotag.hooks.TrackInfo + """ + artist, artist_id = self.get_artist(track_data['artists']) + return TrackInfo( + title=track_data['name'], + track_id=track_data['id'], + artist=artist, + artist_id=artist_id, + length=track_data['duration_ms'] / 1000, + index=track_data['track_number'], + medium=track_data['disc_number'], + medium_index=track_data['track_number'], + data_source=self.data_source, + data_url=track_data['external_urls']['spotify'], + ) + + def track_for_id(self, track_id=None, track_data=None): + """Fetch a track by its Spotify ID or URL and return a + TrackInfo object or None if the track is not found. + + :param track_id: (Optional) Spotify ID or URL for the track. Either + ``track_id`` or ``track_data`` must be provided. + :type track_id: str + :param track_data: (Optional) Simplified track object dict. May be + provided instead of ``track_id`` to avoid unnecessary API calls. + :type track_data: dict + :return: TrackInfo object for track + :rtype: beets.autotag.hooks.TrackInfo or None + """ + if track_data is None: + spotify_id = self._get_id('track', track_id) + if spotify_id is None: + return None + track_data = self._handle_response( + requests.get, self.track_url + spotify_id + ) + track = self._get_track(track_data) + + # Get album's tracks to set `track.index` (position on the entire + # release) and `track.medium_total` (total number of tracks on + # the track's disc). + album_data = self._handle_response( + requests.get, self.album_url + track_data['album']['id'] + ) + medium_total = 0 + for i, track_data in enumerate(album_data['tracks']['items'], start=1): + if track_data['disc_number'] == track.medium: + medium_total += 1 + if track_data['id'] == track.track_id: + track.index = i + track.medium_total = medium_total + return track + + @staticmethod + def _construct_search_query(filters=None, keywords=''): + """Construct a query string with the specified filters and keywords to + be provided to the Spotify Search API + (https://developer.spotify.com/documentation/web-api/reference/search/search/#writing-a-query---guidelines). + + :param filters: (Optional) Field filters to apply. + :type filters: dict + :param keywords: (Optional) Query keywords to use. + :type keywords: str + :return: Query string to be provided to the Search API. + :rtype: str + """ + query_components = [ + keywords, + ' '.join(':'.join((k, v)) for k, v in filters.items()), + ] + query = ' '.join([q for q in query_components if q]) + if not isinstance(query, str): + query = query.decode('utf8') + return unidecode.unidecode(query) + + def _search_api(self, query_type, filters=None, keywords=''): + """Query the Spotify Search API for the specified ``keywords``, applying + the provided ``filters``. + + :param query_type: Item type to search across. Valid types are: + 'album', 'artist', 'playlist', and 'track'. + :type query_type: str + :param filters: (Optional) Field filters to apply. + :type filters: dict + :param keywords: (Optional) Query keywords to use. + :type keywords: str + :return: JSON data for the class:`Response <Response>` object or None + if no search results are returned. + :rtype: dict or None + """ + query = self._construct_search_query( + keywords=keywords, filters=filters + ) + if not query: + return None + self._log.debug( + f"Searching {self.data_source} for '{query}'" + ) + response_data = ( + self._handle_response( + requests.get, + self.search_url, + params={'q': query, 'type': query_type}, + ) + .get(query_type + 's', {}) + .get('items', []) + ) + self._log.debug( + "Found {} result(s) from {} for '{}'", + len(response_data), + self.data_source, + query, + ) + return response_data + + def commands(self): + def queries(lib, opts, args): + success = self._parse_opts(opts) + if success: + results = self._match_library_tracks(lib, ui.decargs(args)) + self._output_match_results(results) + + spotify_cmd = ui.Subcommand( + 'spotify', help=f'build a {self.data_source} playlist' + ) + spotify_cmd.parser.add_option( + '-m', + '--mode', + action='store', + help='"open" to open {} with playlist, ' + '"list" to print (default)'.format(self.data_source), + ) + spotify_cmd.parser.add_option( + '-f', + '--show-failures', + action='store_true', + dest='show_failures', + help='list tracks that did not match a {} ID'.format( + self.data_source + ), + ) + spotify_cmd.func = queries + return [spotify_cmd] + + def _parse_opts(self, opts): + if opts.mode: + self.config['mode'].set(opts.mode) + + if opts.show_failures: + self.config['show_failures'].set(True) + + if self.config['mode'].get() not in ['list', 'open']: + self._log.warning( + '{0} is not a valid mode', self.config['mode'].get() + ) + return False + + self.opts = opts + return True + + def _match_library_tracks(self, library, keywords): + """Get a list of simplified track object dicts for library tracks + matching the specified ``keywords``. + + :param library: beets library object to query. + :type library: beets.library.Library + :param keywords: Query to match library items against. + :type keywords: str + :return: List of simplified track object dicts for library items + matching the specified query. + :rtype: list[dict] + """ + results = [] + failures = [] + + items = library.items(keywords) + + if not items: + self._log.debug( + 'Your beets query returned no items, skipping {}.', + self.data_source, + ) + return + + self._log.info('Processing {} tracks...', len(items)) + + for item in items: + # Apply regex transformations if provided + for regex in self.config['regex'].get(): + if ( + not regex['field'] + or not regex['search'] + or not regex['replace'] + ): + continue + + value = item[regex['field']] + item[regex['field']] = re.sub( + regex['search'], regex['replace'], value + ) + + # Custom values can be passed in the config (just in case) + artist = item[self.config['artist_field'].get()] + album = item[self.config['album_field'].get()] + keywords = item[self.config['track_field'].get()] + + # Query the Web API for each track, look for the items' JSON data + query_filters = {'artist': artist, 'album': album} + response_data_tracks = self._search_api( + query_type='track', keywords=keywords, filters=query_filters + ) + if not response_data_tracks: + query = self._construct_search_query( + keywords=keywords, filters=query_filters + ) + failures.append(query) + continue + + # Apply market filter if requested + region_filter = self.config['region_filter'].get() + if region_filter: + response_data_tracks = [ + track_data + for track_data in response_data_tracks + if region_filter in track_data['available_markets'] + ] + + if ( + len(response_data_tracks) == 1 + or self.config['tiebreak'].get() == 'first' + ): + self._log.debug( + '{} track(s) found, count: {}', + self.data_source, + len(response_data_tracks), + ) + chosen_result = response_data_tracks[0] + else: + # Use the popularity filter + self._log.debug( + 'Most popular track chosen, count: {}', + len(response_data_tracks), + ) + chosen_result = max( + response_data_tracks, key=lambda x: x['popularity'] + ) + results.append(chosen_result) + + failure_count = len(failures) + if failure_count > 0: + if self.config['show_failures'].get(): + self._log.info( + '{} track(s) did not match a {} ID:', + failure_count, + self.data_source, + ) + for track in failures: + self._log.info('track: {}', track) + self._log.info('') + else: + self._log.warning( + '{} track(s) did not match a {} ID:\n' + 'use --show-failures to display', + failure_count, + self.data_source, + ) + + return results + + def _output_match_results(self, results): + """Open a playlist or print Spotify URLs for the provided track + object dicts. + + :param results: List of simplified track object dicts + (https://developer.spotify.com/documentation/web-api/reference/object-model/#track-object-simplified) + :type results: list[dict] + """ + if results: + spotify_ids = [track_data['id'] for track_data in results] + if self.config['mode'].get() == 'open': + self._log.info( + 'Attempting to open {} with playlist'.format( + self.data_source + ) + ) + spotify_url = 'spotify:trackset:Playlist:' + ','.join( + spotify_ids + ) + webbrowser.open(spotify_url) + else: + for spotify_id in spotify_ids: + print(self.open_track_url + spotify_id) + else: + self._log.warning( + f'No {self.data_source} tracks found from beets query' + ) diff --git a/lib/beetsplug/subsonicplaylist.py b/lib/beetsplug/subsonicplaylist.py new file mode 100644 index 00000000..ead78919 --- /dev/null +++ b/lib/beetsplug/subsonicplaylist.py @@ -0,0 +1,171 @@ +# This file is part of beets. +# Copyright 2019, Joris Jensen +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +import random +import string +from xml.etree import ElementTree +from hashlib import md5 +from urllib.parse import urlencode + +import requests + +from beets.dbcore import AndQuery +from beets.dbcore.query import MatchQuery +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand + +__author__ = 'https://github.com/MrNuggelz' + + +def filter_to_be_removed(items, keys): + if len(items) > len(keys): + dont_remove = [] + for artist, album, title in keys: + for item in items: + if artist == item['artist'] and \ + album == item['album'] and \ + title == item['title']: + dont_remove.append(item) + return [item for item in items if item not in dont_remove] + else: + def to_be_removed(item): + for artist, album, title in keys: + if artist == item['artist'] and\ + album == item['album'] and\ + title == item['title']: + return False + return True + + return [item for item in items if to_be_removed(item)] + + +class SubsonicPlaylistPlugin(BeetsPlugin): + + def __init__(self): + super().__init__() + self.config.add( + { + 'delete': False, + 'playlist_ids': [], + 'playlist_names': [], + 'username': '', + 'password': '' + } + ) + self.config['password'].redact = True + + def update_tags(self, playlist_dict, lib): + with lib.transaction(): + for query, playlist_tag in playlist_dict.items(): + query = AndQuery([MatchQuery("artist", query[0]), + MatchQuery("album", query[1]), + MatchQuery("title", query[2])]) + items = lib.items(query) + if not items: + self._log.warn("{} | track not found ({})", playlist_tag, + query) + continue + for item in items: + item.subsonic_playlist = playlist_tag + item.try_sync(write=True, move=False) + + def get_playlist(self, playlist_id): + xml = self.send('getPlaylist', {'id': playlist_id}).text + playlist = ElementTree.fromstring(xml)[0] + if playlist.attrib.get('code', '200') != '200': + alt_error = 'error getting playlist, but no error message found' + self._log.warn(playlist.attrib.get('message', alt_error)) + return + + name = playlist.attrib.get('name', 'undefined') + tracks = [(t.attrib['artist'], t.attrib['album'], t.attrib['title']) + for t in playlist] + return name, tracks + + def commands(self): + def build_playlist(lib, opts, args): + self.config.set_args(opts) + ids = self.config['playlist_ids'].as_str_seq() + if self.config['playlist_names'].as_str_seq(): + playlists = ElementTree.fromstring( + self.send('getPlaylists').text)[0] + if playlists.attrib.get('code', '200') != '200': + alt_error = 'error getting playlists,' \ + ' but no error message found' + self._log.warn( + playlists.attrib.get('message', alt_error)) + return + for name in self.config['playlist_names'].as_str_seq(): + for playlist in playlists: + if name == playlist.attrib['name']: + ids.append(playlist.attrib['id']) + + playlist_dict = self.get_playlists(ids) + + # delete old tags + if self.config['delete']: + existing = list(lib.items('subsonic_playlist:";"')) + to_be_removed = filter_to_be_removed( + existing, + playlist_dict.keys()) + for item in to_be_removed: + item['subsonic_playlist'] = '' + with lib.transaction(): + item.try_sync(write=True, move=False) + + self.update_tags(playlist_dict, lib) + + subsonicplaylist_cmds = Subcommand( + 'subsonicplaylist', help='import a subsonic playlist' + ) + subsonicplaylist_cmds.parser.add_option( + '-d', + '--delete', + action='store_true', + help='delete tag from items not in any playlist anymore', + ) + subsonicplaylist_cmds.func = build_playlist + return [subsonicplaylist_cmds] + + def generate_token(self): + salt = ''.join(random.choices(string.ascii_lowercase + string.digits)) + return md5( + (self.config['password'].get() + salt).encode()).hexdigest(), salt + + def send(self, endpoint, params=None): + if params is None: + params = {} + a, b = self.generate_token() + params['u'] = self.config['username'] + params['t'] = a + params['s'] = b + params['v'] = '1.12.0' + params['c'] = 'beets' + resp = requests.get('{}/rest/{}?{}'.format( + self.config['base_url'].get(), + endpoint, + urlencode(params)) + ) + return resp + + def get_playlists(self, ids): + output = {} + for playlist_id in ids: + name, tracks = self.get_playlist(playlist_id) + for track in tracks: + if track not in output: + output[track] = ';' + output[track] += name + ';' + return output diff --git a/lib/beetsplug/subsonicupdate.py b/lib/beetsplug/subsonicupdate.py new file mode 100644 index 00000000..9480bcb4 --- /dev/null +++ b/lib/beetsplug/subsonicupdate.py @@ -0,0 +1,144 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Updates Subsonic library on Beets import +Your Beets configuration file should contain +a "subsonic" section like the following: + subsonic: + url: https://mydomain.com:443/subsonic + user: username + pass: password + auth: token +For older Subsonic versions, token authentication +is not supported, use password instead: + subsonic: + url: https://mydomain.com:443/subsonic + user: username + pass: password + auth: pass +""" + +import hashlib +import random +import string + +import requests + +from binascii import hexlify +from beets import config +from beets.plugins import BeetsPlugin + +__author__ = 'https://github.com/maffo999' + + +class SubsonicUpdate(BeetsPlugin): + def __init__(self): + super().__init__() + # Set default configuration values + config['subsonic'].add({ + 'user': 'admin', + 'pass': 'admin', + 'url': 'http://localhost:4040', + 'auth': 'token', + }) + config['subsonic']['pass'].redact = True + self.register_listener('import', self.start_scan) + + @staticmethod + def __create_token(): + """Create salt and token from given password. + + :return: The generated salt and hashed token + """ + password = config['subsonic']['pass'].as_str() + + # Pick the random sequence and salt the password + r = string.ascii_letters + string.digits + salt = "".join([random.choice(r) for _ in range(6)]) + salted_password = password + salt + token = hashlib.md5(salted_password.encode('utf-8')).hexdigest() + + # Put together the payload of the request to the server and the URL + return salt, token + + @staticmethod + def __format_url(endpoint): + """Get the Subsonic URL to trigger the given endpoint. + Uses either the url config option or the deprecated host, port, + and context_path config options together. + + :return: Endpoint for updating Subsonic + """ + + url = config['subsonic']['url'].as_str() + if url and url.endswith('/'): + url = url[:-1] + + # @deprecated("Use url config option instead") + if not url: + host = config['subsonic']['host'].as_str() + port = config['subsonic']['port'].get(int) + context_path = config['subsonic']['contextpath'].as_str() + if context_path == '/': + context_path = '' + url = f"http://{host}:{port}{context_path}" + + return url + f'/rest/{endpoint}' + + def start_scan(self): + user = config['subsonic']['user'].as_str() + auth = config['subsonic']['auth'].as_str() + url = self.__format_url("startScan") + self._log.debug('URL is {0}', url) + self._log.debug('auth type is {0}', config['subsonic']['auth']) + + if auth == "token": + salt, token = self.__create_token() + payload = { + 'u': user, + 't': token, + 's': salt, + 'v': '1.13.0', # Subsonic 5.3 and newer + 'c': 'beets', + 'f': 'json' + } + elif auth == "password": + password = config['subsonic']['pass'].as_str() + encpass = hexlify(password.encode()).decode() + payload = { + 'u': user, + 'p': f'enc:{encpass}', + 'v': '1.12.0', + 'c': 'beets', + 'f': 'json' + } + else: + return + try: + response = requests.get(url, params=payload) + json = response.json() + + if response.status_code == 200 and \ + json['subsonic-response']['status'] == "ok": + count = json['subsonic-response']['scanStatus']['count'] + self._log.info( + f'Updating Subsonic; scanning {count} tracks') + elif response.status_code == 200 and \ + json['subsonic-response']['status'] == "failed": + error_message = json['subsonic-response']['error']['message'] + self._log.error(f'Error: {error_message}') + else: + self._log.error('Error: {0}', json) + except Exception as error: + self._log.error(f'Error: {error}') diff --git a/lib/beetsplug/the.py b/lib/beetsplug/the.py new file mode 100644 index 00000000..e6626d2b --- /dev/null +++ b/lib/beetsplug/the.py @@ -0,0 +1,98 @@ +# This file is part of beets. +# Copyright 2016, Blemjhoo Tezoulbr <baobab@heresiarch.info>. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Moves patterns in path formats (suitable for moving articles).""" + + +import re +from beets.plugins import BeetsPlugin + +__author__ = 'baobab@heresiarch.info' +__version__ = '1.1' + +PATTERN_THE = '^the\\s' +PATTERN_A = '^[a][n]?\\s' +FORMAT = '{0}, {1}' + + +class ThePlugin(BeetsPlugin): + + patterns = [] + + def __init__(self): + super().__init__() + + self.template_funcs['the'] = self.the_template_func + + self.config.add({ + 'the': True, + 'a': True, + 'format': '{0}, {1}', + 'strip': False, + 'patterns': [], + }) + + self.patterns = self.config['patterns'].as_str_seq() + for p in self.patterns: + if p: + try: + re.compile(p) + except re.error: + self._log.error('invalid pattern: {0}', p) + else: + if not (p.startswith('^') or p.endswith('$')): + self._log.warning('warning: \"{0}\" will not ' + 'match string start/end', p) + if self.config['a']: + self.patterns = [PATTERN_A] + self.patterns + if self.config['the']: + self.patterns = [PATTERN_THE] + self.patterns + if not self.patterns: + self._log.warning('no patterns defined!') + + def unthe(self, text, pattern): + """Moves pattern in the path format string or strips it + + text -- text to handle + pattern -- regexp pattern (case ignore is already on) + strip -- if True, pattern will be removed + """ + if text: + r = re.compile(pattern, flags=re.IGNORECASE) + try: + t = r.findall(text)[0] + except IndexError: + return text + else: + r = re.sub(r, '', text).strip() + if self.config['strip']: + return r + else: + fmt = self.config['format'].as_str() + return fmt.format(r, t.strip()).strip() + else: + return '' + + def the_template_func(self, text): + if not self.patterns: + return text + if text: + for p in self.patterns: + r = self.unthe(text, p) + if r != text: + self._log.debug('\"{0}\" -> \"{1}\"', text, r) + break + return r + else: + return '' diff --git a/lib/beetsplug/thumbnails.py b/lib/beetsplug/thumbnails.py new file mode 100644 index 00000000..6bd9cbac --- /dev/null +++ b/lib/beetsplug/thumbnails.py @@ -0,0 +1,291 @@ +# This file is part of beets. +# Copyright 2016, Bruno Cauet +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Create freedesktop.org-compliant thumbnails for album folders + +This plugin is POSIX-only. +Spec: standards.freedesktop.org/thumbnail-spec/latest/index.html +""" + + +from hashlib import md5 +import os +import shutil +from itertools import chain +from pathlib import PurePosixPath +import ctypes +import ctypes.util + +from xdg import BaseDirectory + +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand, decargs +from beets import util +from beets.util.artresizer import ArtResizer, get_im_version, get_pil_version + + +BASE_DIR = os.path.join(BaseDirectory.xdg_cache_home, "thumbnails") +NORMAL_DIR = util.bytestring_path(os.path.join(BASE_DIR, "normal")) +LARGE_DIR = util.bytestring_path(os.path.join(BASE_DIR, "large")) + + +class ThumbnailsPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + self.config.add({ + 'auto': True, + 'force': False, + 'dolphin': False, + }) + + self.write_metadata = None + if self.config['auto'] and self._check_local_ok(): + self.register_listener('art_set', self.process_album) + + def commands(self): + thumbnails_command = Subcommand("thumbnails", + help="Create album thumbnails") + thumbnails_command.parser.add_option( + '-f', '--force', + dest='force', action='store_true', default=False, + help='force regeneration of thumbnails deemed fine (existing & ' + 'recent enough)') + thumbnails_command.parser.add_option( + '--dolphin', dest='dolphin', action='store_true', default=False, + help="create Dolphin-compatible thumbnail information (for KDE)") + thumbnails_command.func = self.process_query + + return [thumbnails_command] + + def process_query(self, lib, opts, args): + self.config.set_args(opts) + if self._check_local_ok(): + for album in lib.albums(decargs(args)): + self.process_album(album) + + def _check_local_ok(self): + """Check that's everythings ready: + - local capability to resize images + - thumbnail dirs exist (create them if needed) + - detect whether we'll use PIL or IM + - detect whether we'll use GIO or Python to get URIs + """ + if not ArtResizer.shared.local: + self._log.warning("No local image resizing capabilities, " + "cannot generate thumbnails") + return False + + for dir in (NORMAL_DIR, LARGE_DIR): + if not os.path.exists(dir): + os.makedirs(dir) + + if get_im_version(): + self.write_metadata = write_metadata_im + tool = "IM" + else: + assert get_pil_version() # since we're local + self.write_metadata = write_metadata_pil + tool = "PIL" + self._log.debug("using {0} to write metadata", tool) + + uri_getter = GioURI() + if not uri_getter.available: + uri_getter = PathlibURI() + self._log.debug("using {0.name} to compute URIs", uri_getter) + self.get_uri = uri_getter.uri + + return True + + def process_album(self, album): + """Produce thumbnails for the album folder. + """ + self._log.debug('generating thumbnail for {0}', album) + if not album.artpath: + self._log.info('album {0} has no art', album) + return + + if self.config['dolphin']: + self.make_dolphin_cover_thumbnail(album) + + size = ArtResizer.shared.get_size(album.artpath) + if not size: + self._log.warning('problem getting the picture size for {0}', + album.artpath) + return + + wrote = True + if max(size) >= 256: + wrote &= self.make_cover_thumbnail(album, 256, LARGE_DIR) + wrote &= self.make_cover_thumbnail(album, 128, NORMAL_DIR) + + if wrote: + self._log.info('wrote thumbnail for {0}', album) + else: + self._log.info('nothing to do for {0}', album) + + def make_cover_thumbnail(self, album, size, target_dir): + """Make a thumbnail of given size for `album` and put it in + `target_dir`. + """ + target = os.path.join(target_dir, self.thumbnail_file_name(album.path)) + + if os.path.exists(target) and \ + os.stat(target).st_mtime > os.stat(album.artpath).st_mtime: + if self.config['force']: + self._log.debug("found a suitable {1}x{1} thumbnail for {0}, " + "forcing regeneration", album, size) + else: + self._log.debug("{1}x{1} thumbnail for {0} exists and is " + "recent enough", album, size) + return False + resized = ArtResizer.shared.resize(size, album.artpath, + util.syspath(target)) + self.add_tags(album, util.syspath(resized)) + shutil.move(resized, target) + return True + + def thumbnail_file_name(self, path): + """Compute the thumbnail file name + See https://standards.freedesktop.org/thumbnail-spec/latest/x227.html + """ + uri = self.get_uri(path) + hash = md5(uri.encode('utf-8')).hexdigest() + return util.bytestring_path(f"{hash}.png") + + def add_tags(self, album, image_path): + """Write required metadata to the thumbnail + See https://standards.freedesktop.org/thumbnail-spec/latest/x142.html + """ + mtime = os.stat(album.artpath).st_mtime + metadata = {"Thumb::URI": self.get_uri(album.artpath), + "Thumb::MTime": str(mtime)} + try: + self.write_metadata(image_path, metadata) + except Exception: + self._log.exception("could not write metadata to {0}", + util.displayable_path(image_path)) + + def make_dolphin_cover_thumbnail(self, album): + outfilename = os.path.join(album.path, b".directory") + if os.path.exists(outfilename): + return + artfile = os.path.split(album.artpath)[1] + with open(outfilename, 'w') as f: + f.write('[Desktop Entry]\n') + f.write('Icon=./{}'.format(artfile.decode('utf-8'))) + f.close() + self._log.debug("Wrote file {0}", util.displayable_path(outfilename)) + + +def write_metadata_im(file, metadata): + """Enrich the file metadata with `metadata` dict thanks to IM.""" + command = ['convert', file] + \ + list(chain.from_iterable(('-set', k, v) + for k, v in metadata.items())) + [file] + util.command_output(command) + return True + + +def write_metadata_pil(file, metadata): + """Enrich the file metadata with `metadata` dict thanks to PIL.""" + from PIL import Image, PngImagePlugin + im = Image.open(file) + meta = PngImagePlugin.PngInfo() + for k, v in metadata.items(): + meta.add_text(k, v, 0) + im.save(file, "PNG", pnginfo=meta) + return True + + +class URIGetter: + available = False + name = "Abstract base" + + def uri(self, path): + raise NotImplementedError() + + +class PathlibURI(URIGetter): + available = True + name = "Python Pathlib" + + def uri(self, path): + return PurePosixPath(util.py3_path(path)).as_uri() + + +def copy_c_string(c_string): + """Copy a `ctypes.POINTER(ctypes.c_char)` value into a new Python + string and return it. The old memory is then safe to free. + """ + # This is a pretty dumb way to get a string copy, but it seems to + # work. A more surefire way would be to allocate a ctypes buffer and copy + # the data with `memcpy` or somesuch. + s = ctypes.cast(c_string, ctypes.c_char_p).value + return b'' + s + + +class GioURI(URIGetter): + """Use gio URI function g_file_get_uri. Paths must be utf-8 encoded. + """ + name = "GIO" + + def __init__(self): + self.libgio = self.get_library() + self.available = bool(self.libgio) + if self.available: + self.libgio.g_type_init() # for glib < 2.36 + + self.libgio.g_file_get_uri.argtypes = [ctypes.c_char_p] + self.libgio.g_file_new_for_path.restype = ctypes.c_void_p + + self.libgio.g_file_get_uri.argtypes = [ctypes.c_void_p] + self.libgio.g_file_get_uri.restype = ctypes.POINTER(ctypes.c_char) + + self.libgio.g_object_unref.argtypes = [ctypes.c_void_p] + + def get_library(self): + lib_name = ctypes.util.find_library("gio-2") + try: + if not lib_name: + return False + return ctypes.cdll.LoadLibrary(lib_name) + except OSError: + return False + + def uri(self, path): + g_file_ptr = self.libgio.g_file_new_for_path(path) + if not g_file_ptr: + raise RuntimeError("No gfile pointer received for {}".format( + util.displayable_path(path))) + + try: + uri_ptr = self.libgio.g_file_get_uri(g_file_ptr) + finally: + self.libgio.g_object_unref(g_file_ptr) + if not uri_ptr: + self.libgio.g_free(uri_ptr) + raise RuntimeError("No URI received from the gfile pointer for " + "{}".format(util.displayable_path(path))) + + try: + uri = copy_c_string(uri_ptr) + finally: + self.libgio.g_free(uri_ptr) + + try: + return uri.decode(util._fsencoding()) + except UnicodeDecodeError: + raise RuntimeError( + f"Could not decode filename from GIO: {uri!r}" + ) diff --git a/lib/beetsplug/types.py b/lib/beetsplug/types.py new file mode 100644 index 00000000..930d5e86 --- /dev/null +++ b/lib/beetsplug/types.py @@ -0,0 +1,50 @@ +# This file is part of beets. +# Copyright 2016, Thomas Scholtes. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + + +from beets.plugins import BeetsPlugin +from beets.dbcore import types +from confuse import ConfigValueError +from beets import library + + +class TypesPlugin(BeetsPlugin): + + @property + def item_types(self): + return self._types() + + @property + def album_types(self): + return self._types() + + def _types(self): + if not self.config.exists(): + return {} + + mytypes = {} + for key, value in self.config.items(): + if value.get() == 'int': + mytypes[key] = types.INTEGER + elif value.get() == 'float': + mytypes[key] = types.FLOAT + elif value.get() == 'bool': + mytypes[key] = types.BOOLEAN + elif value.get() == 'date': + mytypes[key] = library.DateType() + else: + raise ConfigValueError( + "unknown type '{}' for the '{}' field" + .format(value, key)) + return mytypes diff --git a/lib/beetsplug/unimported.py b/lib/beetsplug/unimported.py new file mode 100644 index 00000000..7714ec83 --- /dev/null +++ b/lib/beetsplug/unimported.py @@ -0,0 +1,68 @@ +# This file is part of beets. +# Copyright 2019, Joris Jensen +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +""" +List all files in the library folder which are not listed in the + beets library database, including art files +""" + +import os + +from beets import util +from beets.plugins import BeetsPlugin +from beets.ui import Subcommand, print_ + +__author__ = 'https://github.com/MrNuggelz' + + +class Unimported(BeetsPlugin): + + def __init__(self): + super().__init__() + self.config.add( + { + 'ignore_extensions': [] + } + ) + + def commands(self): + def print_unimported(lib, opts, args): + ignore_exts = [ + ('.' + x).encode() + for x in self.config["ignore_extensions"].as_str_seq() + ] + ignore_dirs = [ + os.path.join(lib.directory, x.encode()) + for x in self.config["ignore_subdirectories"].as_str_seq() + ] + in_folder = { + os.path.join(r, file) + for r, d, f in os.walk(lib.directory) + for file in f + if not any( + [file.endswith(ext) for ext in ignore_exts] + + [r in ignore_dirs] + ) + } + in_library = {x.path for x in lib.items()} + art_files = {x.artpath for x in lib.albums()} + for f in in_folder - in_library - art_files: + print_(util.displayable_path(f)) + + unimported = Subcommand( + 'unimported', + help='list all files in the library folder which are not listed' + ' in the beets library database') + unimported.func = print_unimported + return [unimported] diff --git a/lib/beetsplug/web/__init__.py b/lib/beetsplug/web/__init__.py new file mode 100644 index 00000000..240126e9 --- /dev/null +++ b/lib/beetsplug/web/__init__.py @@ -0,0 +1,526 @@ +# This file is part of beets. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""A Web interface to beets.""" + +from beets.plugins import BeetsPlugin +from beets import ui +from beets import util +import beets.library +import flask +from flask import g, jsonify +from werkzeug.routing import BaseConverter, PathConverter +import os +from unidecode import unidecode +import json +import base64 + + +# Utilities. + +def _rep(obj, expand=False): + """Get a flat -- i.e., JSON-ish -- representation of a beets Item or + Album object. For Albums, `expand` dictates whether tracks are + included. + """ + out = dict(obj) + + if isinstance(obj, beets.library.Item): + if app.config.get('INCLUDE_PATHS', False): + out['path'] = util.displayable_path(out['path']) + else: + del out['path'] + + # Filter all bytes attributes and convert them to strings. + for key, value in out.items(): + if isinstance(out[key], bytes): + out[key] = base64.b64encode(value).decode('ascii') + + # Get the size (in bytes) of the backing file. This is useful + # for the Tomahawk resolver API. + try: + out['size'] = os.path.getsize(util.syspath(obj.path)) + except OSError: + out['size'] = 0 + + return out + + elif isinstance(obj, beets.library.Album): + if app.config.get('INCLUDE_PATHS', False): + out['artpath'] = util.displayable_path(out['artpath']) + else: + del out['artpath'] + if expand: + out['items'] = [_rep(item) for item in obj.items()] + return out + + +def json_generator(items, root, expand=False): + """Generator that dumps list of beets Items or Albums as JSON + + :param root: root key for JSON + :param items: list of :class:`Item` or :class:`Album` to dump + :param expand: If true every :class:`Album` contains its items in the json + representation + :returns: generator that yields strings + """ + yield '{"%s":[' % root + first = True + for item in items: + if first: + first = False + else: + yield ',' + yield json.dumps(_rep(item, expand=expand)) + yield ']}' + + +def is_expand(): + """Returns whether the current request is for an expanded response.""" + + return flask.request.args.get('expand') is not None + + +def is_delete(): + """Returns whether the current delete request should remove the selected + files. + """ + + return flask.request.args.get('delete') is not None + + +def get_method(): + """Returns the HTTP method of the current request.""" + return flask.request.method + + +def resource(name, patchable=False): + """Decorates a function to handle RESTful HTTP requests for a resource. + """ + def make_responder(retriever): + def responder(ids): + entities = [retriever(id) for id in ids] + entities = [entity for entity in entities if entity] + + if get_method() == "DELETE": + + if app.config.get('READONLY', True): + return flask.abort(405) + + for entity in entities: + entity.remove(delete=is_delete()) + + return flask.make_response(jsonify({'deleted': True}), 200) + + elif get_method() == "PATCH" and patchable: + if app.config.get('READONLY', True): + return flask.abort(405) + + for entity in entities: + entity.update(flask.request.get_json()) + entity.try_sync(True, False) # write, don't move + + if len(entities) == 1: + return flask.jsonify(_rep(entities[0], expand=is_expand())) + elif entities: + return app.response_class( + json_generator(entities, root=name), + mimetype='application/json' + ) + + elif get_method() == "GET": + if len(entities) == 1: + return flask.jsonify(_rep(entities[0], expand=is_expand())) + elif entities: + return app.response_class( + json_generator(entities, root=name), + mimetype='application/json' + ) + else: + return flask.abort(404) + + else: + return flask.abort(405) + + responder.__name__ = f'get_{name}' + + return responder + return make_responder + + +def resource_query(name, patchable=False): + """Decorates a function to handle RESTful HTTP queries for resources. + """ + def make_responder(query_func): + def responder(queries): + entities = query_func(queries) + + if get_method() == "DELETE": + + if app.config.get('READONLY', True): + return flask.abort(405) + + for entity in entities: + entity.remove(delete=is_delete()) + + return flask.make_response(jsonify({'deleted': True}), 200) + + elif get_method() == "PATCH" and patchable: + if app.config.get('READONLY', True): + return flask.abort(405) + + for entity in entities: + entity.update(flask.request.get_json()) + entity.try_sync(True, False) # write, don't move + + return app.response_class( + json_generator(entities, root=name), + mimetype='application/json' + ) + + elif get_method() == "GET": + return app.response_class( + json_generator( + entities, + root='results', expand=is_expand() + ), + mimetype='application/json' + ) + + else: + return flask.abort(405) + + responder.__name__ = f'query_{name}' + + return responder + + return make_responder + + +def resource_list(name): + """Decorates a function to handle RESTful HTTP request for a list of + resources. + """ + def make_responder(list_all): + def responder(): + return app.response_class( + json_generator(list_all(), root=name, expand=is_expand()), + mimetype='application/json' + ) + responder.__name__ = f'all_{name}' + return responder + return make_responder + + +def _get_unique_table_field_values(model, field, sort_field): + """ retrieve all unique values belonging to a key from a model """ + if field not in model.all_keys() or sort_field not in model.all_keys(): + raise KeyError + with g.lib.transaction() as tx: + rows = tx.query('SELECT DISTINCT "{}" FROM "{}" ORDER BY "{}"' + .format(field, model._table, sort_field)) + return [row[0] for row in rows] + + +class IdListConverter(BaseConverter): + """Converts comma separated lists of ids in urls to integer lists. + """ + + def to_python(self, value): + ids = [] + for id in value.split(','): + try: + ids.append(int(id)) + except ValueError: + pass + return ids + + def to_url(self, value): + return ','.join(str(v) for v in value) + + +class QueryConverter(PathConverter): + """Converts slash separated lists of queries in the url to string list. + """ + + def to_python(self, value): + queries = value.split('/') + """Do not do path substitution on regex value tests""" + return [query if '::' in query else query.replace('\\', os.sep) + for query in queries] + + def to_url(self, value): + return ','.join([v.replace(os.sep, '\\') for v in value]) + + +class EverythingConverter(PathConverter): + regex = '.*?' + + +# Flask setup. + +app = flask.Flask(__name__) +app.url_map.converters['idlist'] = IdListConverter +app.url_map.converters['query'] = QueryConverter +app.url_map.converters['everything'] = EverythingConverter + + +@app.before_request +def before_request(): + g.lib = app.config['lib'] + + +# Items. + +@app.route('/item/<idlist:ids>', methods=["GET", "DELETE", "PATCH"]) +@resource('items', patchable=True) +def get_item(id): + return g.lib.get_item(id) + + +@app.route('/item/') +@app.route('/item/query/') +@resource_list('items') +def all_items(): + return g.lib.items() + + +@app.route('/item/<int:item_id>/file') +def item_file(item_id): + item = g.lib.get_item(item_id) + + # On Windows under Python 2, Flask wants a Unicode path. On Python 3, it + # *always* wants a Unicode path. + if os.name == 'nt': + item_path = util.syspath(item.path) + else: + item_path = util.py3_path(item.path) + + try: + unicode_item_path = util.text_string(item.path) + except (UnicodeDecodeError, UnicodeEncodeError): + unicode_item_path = util.displayable_path(item.path) + + base_filename = os.path.basename(unicode_item_path) + try: + # Imitate http.server behaviour + base_filename.encode("latin-1", "strict") + except UnicodeEncodeError: + safe_filename = unidecode(base_filename) + else: + safe_filename = base_filename + + response = flask.send_file( + item_path, + as_attachment=True, + attachment_filename=safe_filename + ) + response.headers['Content-Length'] = os.path.getsize(item_path) + return response + + +@app.route('/item/query/<query:queries>', methods=["GET", "DELETE", "PATCH"]) +@resource_query('items', patchable=True) +def item_query(queries): + return g.lib.items(queries) + + +@app.route('/item/path/<everything:path>') +def item_at_path(path): + query = beets.library.PathQuery('path', path.encode('utf-8')) + item = g.lib.items(query).get() + if item: + return flask.jsonify(_rep(item)) + else: + return flask.abort(404) + + +@app.route('/item/values/<string:key>') +def item_unique_field_values(key): + sort_key = flask.request.args.get('sort_key', key) + try: + values = _get_unique_table_field_values(beets.library.Item, key, + sort_key) + except KeyError: + return flask.abort(404) + return flask.jsonify(values=values) + + +# Albums. + +@app.route('/album/<idlist:ids>', methods=["GET", "DELETE"]) +@resource('albums') +def get_album(id): + return g.lib.get_album(id) + + +@app.route('/album/') +@app.route('/album/query/') +@resource_list('albums') +def all_albums(): + return g.lib.albums() + + +@app.route('/album/query/<query:queries>', methods=["GET", "DELETE"]) +@resource_query('albums') +def album_query(queries): + return g.lib.albums(queries) + + +@app.route('/album/<int:album_id>/art') +def album_art(album_id): + album = g.lib.get_album(album_id) + if album and album.artpath: + return flask.send_file(album.artpath.decode()) + else: + return flask.abort(404) + + +@app.route('/album/values/<string:key>') +def album_unique_field_values(key): + sort_key = flask.request.args.get('sort_key', key) + try: + values = _get_unique_table_field_values(beets.library.Album, key, + sort_key) + except KeyError: + return flask.abort(404) + return flask.jsonify(values=values) + + +# Artists. + +@app.route('/artist/') +def all_artists(): + with g.lib.transaction() as tx: + rows = tx.query("SELECT DISTINCT albumartist FROM albums") + all_artists = [row[0] for row in rows] + return flask.jsonify(artist_names=all_artists) + + +# Library information. + +@app.route('/stats') +def stats(): + with g.lib.transaction() as tx: + item_rows = tx.query("SELECT COUNT(*) FROM items") + album_rows = tx.query("SELECT COUNT(*) FROM albums") + return flask.jsonify({ + 'items': item_rows[0][0], + 'albums': album_rows[0][0], + }) + + +# UI. + +@app.route('/') +def home(): + return flask.render_template('index.html') + + +# Plugin hook. + +class WebPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + self.config.add({ + 'host': '127.0.0.1', + 'port': 8337, + 'cors': '', + 'cors_supports_credentials': False, + 'reverse_proxy': False, + 'include_paths': False, + 'readonly': True, + }) + + def commands(self): + cmd = ui.Subcommand('web', help='start a Web interface') + cmd.parser.add_option('-d', '--debug', action='store_true', + default=False, help='debug mode') + + def func(lib, opts, args): + args = ui.decargs(args) + if args: + self.config['host'] = args.pop(0) + if args: + self.config['port'] = int(args.pop(0)) + + app.config['lib'] = lib + # Normalizes json output + app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False + + app.config['INCLUDE_PATHS'] = self.config['include_paths'] + app.config['READONLY'] = self.config['readonly'] + + # Enable CORS if required. + if self.config['cors']: + self._log.info('Enabling CORS with origin: {0}', + self.config['cors']) + from flask_cors import CORS + app.config['CORS_ALLOW_HEADERS'] = "Content-Type" + app.config['CORS_RESOURCES'] = { + r"/*": {"origins": self.config['cors'].get(str)} + } + CORS( + app, + supports_credentials=self.config[ + 'cors_supports_credentials' + ].get(bool) + ) + + # Allow serving behind a reverse proxy + if self.config['reverse_proxy']: + app.wsgi_app = ReverseProxied(app.wsgi_app) + + # Start the web application. + app.run(host=self.config['host'].as_str(), + port=self.config['port'].get(int), + debug=opts.debug, threaded=True) + cmd.func = func + return [cmd] + + +class ReverseProxied: + '''Wrap the application in this middleware and configure the + front-end server to add these headers, to let you quietly bind + this to a URL other than / and to an HTTP scheme that is + different than what is used locally. + + In nginx: + location /myprefix { + proxy_pass http://192.168.0.1:5001; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Scheme $scheme; + proxy_set_header X-Script-Name /myprefix; + } + + From: http://flask.pocoo.org/snippets/35/ + + :param app: the WSGI application + ''' + def __init__(self, app): + self.app = app + + def __call__(self, environ, start_response): + script_name = environ.get('HTTP_X_SCRIPT_NAME', '') + if script_name: + environ['SCRIPT_NAME'] = script_name + path_info = environ['PATH_INFO'] + if path_info.startswith(script_name): + environ['PATH_INFO'] = path_info[len(script_name):] + + scheme = environ.get('HTTP_X_SCHEME', '') + if scheme: + environ['wsgi.url_scheme'] = scheme + return self.app(environ, start_response) diff --git a/lib/beetsplug/web/static/backbone.js b/lib/beetsplug/web/static/backbone.js new file mode 100644 index 00000000..b2e49322 --- /dev/null +++ b/lib/beetsplug/web/static/backbone.js @@ -0,0 +1,1158 @@ +// Backbone.js 0.5.3 +// (c) 2010 Jeremy Ashkenas, DocumentCloud Inc. +// Backbone may be freely distributed under the MIT license. +// For all details and documentation: +// http://documentcloud.github.com/backbone + +(function(){ + + // Initial Setup + // ------------- + + // Save a reference to the global object. + var root = this; + + // Save the previous value of the `Backbone` variable. + var previousBackbone = root.Backbone; + + // The top-level namespace. All public Backbone classes and modules will + // be attached to this. Exported for both CommonJS and the browser. + var Backbone; + if (typeof exports !== 'undefined') { + Backbone = exports; + } else { + Backbone = root.Backbone = {}; + } + + // Current version of the library. Keep in sync with `package.json`. + Backbone.VERSION = '0.5.3'; + + // Require Underscore, if we're on the server, and it's not already present. + var _ = root._; + if (!_ && (typeof require !== 'undefined')) _ = require('underscore')._; + + // For Backbone's purposes, jQuery or Zepto owns the `$` variable. + var $ = root.jQuery || root.Zepto; + + // Runs Backbone.js in *noConflict* mode, returning the `Backbone` variable + // to its previous owner. Returns a reference to this Backbone object. + Backbone.noConflict = function() { + root.Backbone = previousBackbone; + return this; + }; + + // Turn on `emulateHTTP` to support legacy HTTP servers. Setting this option will + // fake `"PUT"` and `"DELETE"` requests via the `_method` parameter and set a + // `X-Http-Method-Override` header. + Backbone.emulateHTTP = false; + + // Turn on `emulateJSON` to support legacy servers that can't deal with direct + // `application/json` requests ... will encode the body as + // `application/x-www-form-urlencoded` instead and will send the model in a + // form param named `model`. + Backbone.emulateJSON = false; + + // Backbone.Events + // ----------------- + + // A module that can be mixed in to *any object* in order to provide it with + // custom events. You may `bind` or `unbind` a callback function to an event; + // `trigger`-ing an event fires all callbacks in succession. + // + // var object = {}; + // _.extend(object, Backbone.Events); + // object.bind('expand', function(){ alert('expanded'); }); + // object.trigger('expand'); + // + Backbone.Events = { + + // Bind an event, specified by a string name, `ev`, to a `callback` function. + // Passing `"all"` will bind the callback to all events fired. + bind : function(ev, callback, context) { + var calls = this._callbacks || (this._callbacks = {}); + var list = calls[ev] || (calls[ev] = []); + list.push([callback, context]); + return this; + }, + + // Remove one or many callbacks. If `callback` is null, removes all + // callbacks for the event. If `ev` is null, removes all bound callbacks + // for all events. + unbind : function(ev, callback) { + var calls; + if (!ev) { + this._callbacks = {}; + } else if (calls = this._callbacks) { + if (!callback) { + calls[ev] = []; + } else { + var list = calls[ev]; + if (!list) return this; + for (var i = 0, l = list.length; i < l; i++) { + if (list[i] && callback === list[i][0]) { + list[i] = null; + break; + } + } + } + } + return this; + }, + + // Trigger an event, firing all bound callbacks. Callbacks are passed the + // same arguments as `trigger` is, apart from the event name. + // Listening for `"all"` passes the true event name as the first argument. + trigger : function(eventName) { + var list, calls, ev, callback, args; + var both = 2; + if (!(calls = this._callbacks)) return this; + while (both--) { + ev = both ? eventName : 'all'; + if (list = calls[ev]) { + for (var i = 0, l = list.length; i < l; i++) { + if (!(callback = list[i])) { + list.splice(i, 1); i--; l--; + } else { + args = both ? Array.prototype.slice.call(arguments, 1) : arguments; + callback[0].apply(callback[1] || this, args); + } + } + } + } + return this; + } + + }; + + // Backbone.Model + // -------------- + + // Create a new model, with defined attributes. A client id (`cid`) + // is automatically generated and assigned for you. + Backbone.Model = function(attributes, options) { + var defaults; + attributes || (attributes = {}); + if (defaults = this.defaults) { + if (_.isFunction(defaults)) defaults = defaults.call(this); + attributes = _.extend({}, defaults, attributes); + } + this.attributes = {}; + this._escapedAttributes = {}; + this.cid = _.uniqueId('c'); + this.set(attributes, {silent : true}); + this._changed = false; + this._previousAttributes = _.clone(this.attributes); + if (options && options.collection) this.collection = options.collection; + this.initialize(attributes, options); + }; + + // Attach all inheritable methods to the Model prototype. + _.extend(Backbone.Model.prototype, Backbone.Events, { + + // A snapshot of the model's previous attributes, taken immediately + // after the last `"change"` event was fired. + _previousAttributes : null, + + // Has the item been changed since the last `"change"` event? + _changed : false, + + // The default name for the JSON `id` attribute is `"id"`. MongoDB and + // CouchDB users may want to set this to `"_id"`. + idAttribute : 'id', + + // Initialize is an empty function by default. Override it with your own + // initialization logic. + initialize : function(){}, + + // Return a copy of the model's `attributes` object. + toJSON : function() { + return _.clone(this.attributes); + }, + + // Get the value of an attribute. + get : function(attr) { + return this.attributes[attr]; + }, + + // Get the HTML-escaped value of an attribute. + escape : function(attr) { + var html; + if (html = this._escapedAttributes[attr]) return html; + var val = this.attributes[attr]; + return this._escapedAttributes[attr] = escapeHTML(val == null ? '' : '' + val); + }, + + // Returns `true` if the attribute contains a value that is not null + // or undefined. + has : function(attr) { + return this.attributes[attr] != null; + }, + + // Set a hash of model attributes on the object, firing `"change"` unless you + // choose to silence it. + set : function(attrs, options) { + + // Extract attributes and options. + options || (options = {}); + if (!attrs) return this; + if (attrs.attributes) attrs = attrs.attributes; + var now = this.attributes, escaped = this._escapedAttributes; + + // Run validation. + if (!options.silent && this.validate && !this._performValidation(attrs, options)) return false; + + // Check for changes of `id`. + if (this.idAttribute in attrs) this.id = attrs[this.idAttribute]; + + // We're about to start triggering change events. + var alreadyChanging = this._changing; + this._changing = true; + + // Update attributes. + for (var attr in attrs) { + var val = attrs[attr]; + if (!_.isEqual(now[attr], val)) { + now[attr] = val; + delete escaped[attr]; + this._changed = true; + if (!options.silent) this.trigger('change:' + attr, this, val, options); + } + } + + // Fire the `"change"` event, if the model has been changed. + if (!alreadyChanging && !options.silent && this._changed) this.change(options); + this._changing = false; + return this; + }, + + // Remove an attribute from the model, firing `"change"` unless you choose + // to silence it. `unset` is a noop if the attribute doesn't exist. + unset : function(attr, options) { + if (!(attr in this.attributes)) return this; + options || (options = {}); + var value = this.attributes[attr]; + + // Run validation. + var validObj = {}; + validObj[attr] = void 0; + if (!options.silent && this.validate && !this._performValidation(validObj, options)) return false; + + // Remove the attribute. + delete this.attributes[attr]; + delete this._escapedAttributes[attr]; + if (attr == this.idAttribute) delete this.id; + this._changed = true; + if (!options.silent) { + this.trigger('change:' + attr, this, void 0, options); + this.change(options); + } + return this; + }, + + // Clear all attributes on the model, firing `"change"` unless you choose + // to silence it. + clear : function(options) { + options || (options = {}); + var attr; + var old = this.attributes; + + // Run validation. + var validObj = {}; + for (attr in old) validObj[attr] = void 0; + if (!options.silent && this.validate && !this._performValidation(validObj, options)) return false; + + this.attributes = {}; + this._escapedAttributes = {}; + this._changed = true; + if (!options.silent) { + for (attr in old) { + this.trigger('change:' + attr, this, void 0, options); + } + this.change(options); + } + return this; + }, + + // Fetch the model from the server. If the server's representation of the + // model differs from its current attributes, they will be overriden, + // triggering a `"change"` event. + fetch : function(options) { + options || (options = {}); + var model = this; + var success = options.success; + options.success = function(resp, status, xhr) { + if (!model.set(model.parse(resp, xhr), options)) return false; + if (success) success(model, resp); + }; + options.error = wrapError(options.error, model, options); + return (this.sync || Backbone.sync).call(this, 'read', this, options); + }, + + // Set a hash of model attributes, and sync the model to the server. + // If the server returns an attributes hash that differs, the model's + // state will be `set` again. + save : function(attrs, options) { + options || (options = {}); + if (attrs && !this.set(attrs, options)) return false; + var model = this; + var success = options.success; + options.success = function(resp, status, xhr) { + if (!model.set(model.parse(resp, xhr), options)) return false; + if (success) success(model, resp, xhr); + }; + options.error = wrapError(options.error, model, options); + var method = this.isNew() ? 'create' : 'update'; + return (this.sync || Backbone.sync).call(this, method, this, options); + }, + + // Destroy this model on the server if it was already persisted. Upon success, the model is removed + // from its collection, if it has one. + destroy : function(options) { + options || (options = {}); + if (this.isNew()) return this.trigger('destroy', this, this.collection, options); + var model = this; + var success = options.success; + options.success = function(resp) { + model.trigger('destroy', model, model.collection, options); + if (success) success(model, resp); + }; + options.error = wrapError(options.error, model, options); + return (this.sync || Backbone.sync).call(this, 'delete', this, options); + }, + + // Default URL for the model's representation on the server -- if you're + // using Backbone's restful methods, override this to change the endpoint + // that will be called. + url : function() { + var base = getUrl(this.collection) || this.urlRoot || urlError(); + if (this.isNew()) return base; + return base + (base.charAt(base.length - 1) == '/' ? '' : '/') + encodeURIComponent(this.id); + }, + + // **parse** converts a response into the hash of attributes to be `set` on + // the model. The default implementation is just to pass the response along. + parse : function(resp, xhr) { + return resp; + }, + + // Create a new model with identical attributes to this one. + clone : function() { + return new this.constructor(this); + }, + + // A model is new if it has never been saved to the server, and lacks an id. + isNew : function() { + return this.id == null; + }, + + // Call this method to manually fire a `change` event for this model. + // Calling this will cause all objects observing the model to update. + change : function(options) { + this.trigger('change', this, options); + this._previousAttributes = _.clone(this.attributes); + this._changed = false; + }, + + // Determine if the model has changed since the last `"change"` event. + // If you specify an attribute name, determine if that attribute has changed. + hasChanged : function(attr) { + if (attr) return this._previousAttributes[attr] != this.attributes[attr]; + return this._changed; + }, + + // Return an object containing all the attributes that have changed, or false + // if there are no changed attributes. Useful for determining what parts of a + // view need to be updated and/or what attributes need to be persisted to + // the server. + changedAttributes : function(now) { + now || (now = this.attributes); + var old = this._previousAttributes; + var changed = false; + for (var attr in now) { + if (!_.isEqual(old[attr], now[attr])) { + changed = changed || {}; + changed[attr] = now[attr]; + } + } + return changed; + }, + + // Get the previous value of an attribute, recorded at the time the last + // `"change"` event was fired. + previous : function(attr) { + if (!attr || !this._previousAttributes) return null; + return this._previousAttributes[attr]; + }, + + // Get all of the attributes of the model at the time of the previous + // `"change"` event. + previousAttributes : function() { + return _.clone(this._previousAttributes); + }, + + // Run validation against a set of incoming attributes, returning `true` + // if all is well. If a specific `error` callback has been passed, + // call that instead of firing the general `"error"` event. + _performValidation : function(attrs, options) { + var error = this.validate(attrs); + if (error) { + if (options.error) { + options.error(this, error, options); + } else { + this.trigger('error', this, error, options); + } + return false; + } + return true; + } + + }); + + // Backbone.Collection + // ------------------- + + // Provides a standard collection class for our sets of models, ordered + // or unordered. If a `comparator` is specified, the Collection will maintain + // its models in sort order, as they're added and removed. + Backbone.Collection = function(models, options) { + options || (options = {}); + if (options.comparator) this.comparator = options.comparator; + _.bindAll(this, '_onModelEvent', '_removeReference'); + this._reset(); + if (models) this.reset(models, {silent: true}); + this.initialize.apply(this, arguments); + }; + + // Define the Collection's inheritable methods. + _.extend(Backbone.Collection.prototype, Backbone.Events, { + + // The default model for a collection is just a **Backbone.Model**. + // This should be overridden in most cases. + model : Backbone.Model, + + // Initialize is an empty function by default. Override it with your own + // initialization logic. + initialize : function(){}, + + // The JSON representation of a Collection is an array of the + // models' attributes. + toJSON : function() { + return this.map(function(model){ return model.toJSON(); }); + }, + + // Add a model, or list of models to the set. Pass **silent** to avoid + // firing the `added` event for every new model. + add : function(models, options) { + if (_.isArray(models)) { + for (var i = 0, l = models.length; i < l; i++) { + this._add(models[i], options); + } + } else { + this._add(models, options); + } + return this; + }, + + // Remove a model, or a list of models from the set. Pass silent to avoid + // firing the `removed` event for every model removed. + remove : function(models, options) { + if (_.isArray(models)) { + for (var i = 0, l = models.length; i < l; i++) { + this._remove(models[i], options); + } + } else { + this._remove(models, options); + } + return this; + }, + + // Get a model from the set by id. + get : function(id) { + if (id == null) return null; + return this._byId[id.id != null ? id.id : id]; + }, + + // Get a model from the set by client id. + getByCid : function(cid) { + return cid && this._byCid[cid.cid || cid]; + }, + + // Get the model at the given index. + at: function(index) { + return this.models[index]; + }, + + // Force the collection to re-sort itself. You don't need to call this under normal + // circumstances, as the set will maintain sort order as each item is added. + sort : function(options) { + options || (options = {}); + if (!this.comparator) throw new Error('Cannot sort a set without a comparator'); + this.models = this.sortBy(this.comparator); + if (!options.silent) this.trigger('reset', this, options); + return this; + }, + + // Pluck an attribute from each model in the collection. + pluck : function(attr) { + return _.map(this.models, function(model){ return model.get(attr); }); + }, + + // When you have more items than you want to add or remove individually, + // you can reset the entire set with a new list of models, without firing + // any `added` or `removed` events. Fires `reset` when finished. + reset : function(models, options) { + models || (models = []); + options || (options = {}); + this.each(this._removeReference); + this._reset(); + this.add(models, {silent: true}); + if (!options.silent) this.trigger('reset', this, options); + return this; + }, + + // Fetch the default set of models for this collection, resetting the + // collection when they arrive. If `add: true` is passed, appends the + // models to the collection instead of resetting. + fetch : function(options) { + options || (options = {}); + var collection = this; + var success = options.success; + options.success = function(resp, status, xhr) { + collection[options.add ? 'add' : 'reset'](collection.parse(resp, xhr), options); + if (success) success(collection, resp); + }; + options.error = wrapError(options.error, collection, options); + return (this.sync || Backbone.sync).call(this, 'read', this, options); + }, + + // Create a new instance of a model in this collection. After the model + // has been created on the server, it will be added to the collection. + // Returns the model, or 'false' if validation on a new model fails. + create : function(model, options) { + var coll = this; + options || (options = {}); + model = this._prepareModel(model, options); + if (!model) return false; + var success = options.success; + options.success = function(nextModel, resp, xhr) { + coll.add(nextModel, options); + if (success) success(nextModel, resp, xhr); + }; + model.save(null, options); + return model; + }, + + // **parse** converts a response into a list of models to be added to the + // collection. The default implementation is just to pass it through. + parse : function(resp, xhr) { + return resp; + }, + + // Proxy to _'s chain. Can't be proxied the same way the rest of the + // underscore methods are proxied because it relies on the underscore + // constructor. + chain: function () { + return _(this.models).chain(); + }, + + // Reset all internal state. Called when the collection is reset. + _reset : function(options) { + this.length = 0; + this.models = []; + this._byId = {}; + this._byCid = {}; + }, + + // Prepare a model to be added to this collection + _prepareModel: function(model, options) { + if (!(model instanceof Backbone.Model)) { + var attrs = model; + model = new this.model(attrs, {collection: this}); + if (model.validate && !model._performValidation(attrs, options)) model = false; + } else if (!model.collection) { + model.collection = this; + } + return model; + }, + + // Internal implementation of adding a single model to the set, updating + // hash indexes for `id` and `cid` lookups. + // Returns the model, or 'false' if validation on a new model fails. + _add : function(model, options) { + options || (options = {}); + model = this._prepareModel(model, options); + if (!model) return false; + var already = this.getByCid(model); + if (already) throw new Error(["Can't add the same model to a set twice", already.id]); + this._byId[model.id] = model; + this._byCid[model.cid] = model; + var index = options.at != null ? options.at : + this.comparator ? this.sortedIndex(model, this.comparator) : + this.length; + this.models.splice(index, 0, model); + model.bind('all', this._onModelEvent); + this.length++; + if (!options.silent) model.trigger('add', model, this, options); + return model; + }, + + // Internal implementation of removing a single model from the set, updating + // hash indexes for `id` and `cid` lookups. + _remove : function(model, options) { + options || (options = {}); + model = this.getByCid(model) || this.get(model); + if (!model) return null; + delete this._byId[model.id]; + delete this._byCid[model.cid]; + this.models.splice(this.indexOf(model), 1); + this.length--; + if (!options.silent) model.trigger('remove', model, this, options); + this._removeReference(model); + return model; + }, + + // Internal method to remove a model's ties to a collection. + _removeReference : function(model) { + if (this == model.collection) { + delete model.collection; + } + model.unbind('all', this._onModelEvent); + }, + + // Internal method called every time a model in the set fires an event. + // Sets need to update their indexes when models change ids. All other + // events simply proxy through. "add" and "remove" events that originate + // in other collections are ignored. + _onModelEvent : function(ev, model, collection, options) { + if ((ev == 'add' || ev == 'remove') && collection != this) return; + if (ev == 'destroy') { + this._remove(model, options); + } + if (model && ev === 'change:' + model.idAttribute) { + delete this._byId[model.previous(model.idAttribute)]; + this._byId[model.id] = model; + } + this.trigger.apply(this, arguments); + } + + }); + + // Underscore methods that we want to implement on the Collection. + var methods = ['forEach', 'each', 'map', 'reduce', 'reduceRight', 'find', 'detect', + 'filter', 'select', 'reject', 'every', 'all', 'some', 'any', 'include', + 'contains', 'invoke', 'max', 'min', 'sortBy', 'sortedIndex', 'toArray', 'size', + 'first', 'rest', 'last', 'without', 'indexOf', 'lastIndexOf', 'isEmpty', 'groupBy']; + + // Mix in each Underscore method as a proxy to `Collection#models`. + _.each(methods, function(method) { + Backbone.Collection.prototype[method] = function() { + return _[method].apply(_, [this.models].concat(_.toArray(arguments))); + }; + }); + + // Backbone.Router + // ------------------- + + // Routers map faux-URLs to actions, and fire events when routes are + // matched. Creating a new one sets its `routes` hash, if not set statically. + Backbone.Router = function(options) { + options || (options = {}); + if (options.routes) this.routes = options.routes; + this._bindRoutes(); + this.initialize.apply(this, arguments); + }; + + // Cached regular expressions for matching named param parts and splatted + // parts of route strings. + var namedParam = /:([\w\d]+)/g; + var splatParam = /\*([\w\d]+)/g; + var escapeRegExp = /[-[\]{}()+?.,\\^$|#\s]/g; + + // Set up all inheritable **Backbone.Router** properties and methods. + _.extend(Backbone.Router.prototype, Backbone.Events, { + + // Initialize is an empty function by default. Override it with your own + // initialization logic. + initialize : function(){}, + + // Manually bind a single named route to a callback. For example: + // + // this.route('search/:query/p:num', 'search', function(query, num) { + // ... + // }); + // + route : function(route, name, callback) { + Backbone.history || (Backbone.history = new Backbone.History); + if (!_.isRegExp(route)) route = this._routeToRegExp(route); + Backbone.history.route(route, _.bind(function(fragment) { + var args = this._extractParameters(route, fragment); + callback.apply(this, args); + this.trigger.apply(this, ['route:' + name].concat(args)); + }, this)); + }, + + // Simple proxy to `Backbone.history` to save a fragment into the history. + navigate : function(fragment, triggerRoute) { + Backbone.history.navigate(fragment, triggerRoute); + }, + + // Bind all defined routes to `Backbone.history`. We have to reverse the + // order of the routes here to support behavior where the most general + // routes can be defined at the bottom of the route map. + _bindRoutes : function() { + if (!this.routes) return; + var routes = []; + for (var route in this.routes) { + routes.unshift([route, this.routes[route]]); + } + for (var i = 0, l = routes.length; i < l; i++) { + this.route(routes[i][0], routes[i][1], this[routes[i][1]]); + } + }, + + // Convert a route string into a regular expression, suitable for matching + // against the current location hash. + _routeToRegExp : function(route) { + route = route.replace(escapeRegExp, "\\$&") + .replace(namedParam, "([^\/]*)") + .replace(splatParam, "(.*?)"); + return new RegExp('^' + route + '$'); + }, + + // Given a route, and a URL fragment that it matches, return the array of + // extracted parameters. + _extractParameters : function(route, fragment) { + return route.exec(fragment).slice(1); + } + + }); + + // Backbone.History + // ---------------- + + // Handles cross-browser history management, based on URL fragments. If the + // browser does not support `onhashchange`, falls back to polling. + Backbone.History = function() { + this.handlers = []; + _.bindAll(this, 'checkUrl'); + }; + + // Cached regex for cleaning hashes. + var hashStrip = /^#*/; + + // Cached regex for detecting MSIE. + var isExplorer = /msie [\w.]+/; + + // Has the history handling already been started? + var historyStarted = false; + + // Set up all inheritable **Backbone.History** properties and methods. + _.extend(Backbone.History.prototype, { + + // The default interval to poll for hash changes, if necessary, is + // twenty times a second. + interval: 50, + + // Get the cross-browser normalized URL fragment, either from the URL, + // the hash, or the override. + getFragment : function(fragment, forcePushState) { + if (fragment == null) { + if (this._hasPushState || forcePushState) { + fragment = window.location.pathname; + var search = window.location.search; + if (search) fragment += search; + if (fragment.indexOf(this.options.root) == 0) fragment = fragment.substr(this.options.root.length); + } else { + fragment = window.location.hash; + } + } + return decodeURIComponent(fragment.replace(hashStrip, '')); + }, + + // Start the hash change handling, returning `true` if the current URL matches + // an existing route, and `false` otherwise. + start : function(options) { + + // Figure out the initial configuration. Do we need an iframe? + // Is pushState desired ... is it available? + if (historyStarted) throw new Error("Backbone.history has already been started"); + this.options = _.extend({}, {root: '/'}, this.options, options); + this._wantsPushState = !!this.options.pushState; + this._hasPushState = !!(this.options.pushState && window.history && window.history.pushState); + var fragment = this.getFragment(); + var docMode = document.documentMode; + var oldIE = (isExplorer.exec(navigator.userAgent.toLowerCase()) && (!docMode || docMode <= 7)); + if (oldIE) { + this.iframe = $('<iframe src="javascript:0" tabindex="-1" />').hide().appendTo('body')[0].contentWindow; + this.navigate(fragment); + } + + // Depending on whether we're using pushState or hashes, and whether + // 'onhashchange' is supported, determine how we check the URL state. + if (this._hasPushState) { + $(window).bind('popstate', this.checkUrl); + } else if ('onhashchange' in window && !oldIE) { + $(window).bind('hashchange', this.checkUrl); + } else { + setInterval(this.checkUrl, this.interval); + } + + // Determine if we need to change the base url, for a pushState link + // opened by a non-pushState browser. + this.fragment = fragment; + historyStarted = true; + var loc = window.location; + var atRoot = loc.pathname == this.options.root; + if (this._wantsPushState && !this._hasPushState && !atRoot) { + this.fragment = this.getFragment(null, true); + window.location.replace(this.options.root + '#' + this.fragment); + // Return immediately as browser will do redirect to new url + return true; + } else if (this._wantsPushState && this._hasPushState && atRoot && loc.hash) { + this.fragment = loc.hash.replace(hashStrip, ''); + window.history.replaceState({}, document.title, loc.protocol + '//' + loc.host + this.options.root + this.fragment); + } + + if (!this.options.silent) { + return this.loadUrl(); + } + }, + + // Add a route to be tested when the fragment changes. Routes added later may + // override previous routes. + route : function(route, callback) { + this.handlers.unshift({route : route, callback : callback}); + }, + + // Checks the current URL to see if it has changed, and if it has, + // calls `loadUrl`, normalizing across the hidden iframe. + checkUrl : function(e) { + var current = this.getFragment(); + if (current == this.fragment && this.iframe) current = this.getFragment(this.iframe.location.hash); + if (current == this.fragment || current == decodeURIComponent(this.fragment)) return false; + if (this.iframe) this.navigate(current); + this.loadUrl() || this.loadUrl(window.location.hash); + }, + + // Attempt to load the current URL fragment. If a route succeeds with a + // match, returns `true`. If no defined routes matches the fragment, + // returns `false`. + loadUrl : function(fragmentOverride) { + var fragment = this.fragment = this.getFragment(fragmentOverride); + var matched = _.any(this.handlers, function(handler) { + if (handler.route.test(fragment)) { + handler.callback(fragment); + return true; + } + }); + return matched; + }, + + // Save a fragment into the hash history. You are responsible for properly + // URL-encoding the fragment in advance. This does not trigger + // a `hashchange` event. + navigate : function(fragment, triggerRoute) { + var frag = (fragment || '').replace(hashStrip, ''); + if (this.fragment == frag || this.fragment == decodeURIComponent(frag)) return; + if (this._hasPushState) { + var loc = window.location; + if (frag.indexOf(this.options.root) != 0) frag = this.options.root + frag; + this.fragment = frag; + window.history.pushState({}, document.title, loc.protocol + '//' + loc.host + frag); + } else { + window.location.hash = this.fragment = frag; + if (this.iframe && (frag != this.getFragment(this.iframe.location.hash))) { + this.iframe.document.open().close(); + this.iframe.location.hash = frag; + } + } + if (triggerRoute) this.loadUrl(fragment); + } + + }); + + // Backbone.View + // ------------- + + // Creating a Backbone.View creates its initial element outside of the DOM, + // if an existing element is not provided... + Backbone.View = function(options) { + this.cid = _.uniqueId('view'); + this._configure(options || {}); + this._ensureElement(); + this.delegateEvents(); + this.initialize.apply(this, arguments); + }; + + // Element lookup, scoped to DOM elements within the current view. + // This should be prefered to global lookups, if you're dealing with + // a specific view. + var selectorDelegate = function(selector) { + return $(selector, this.el); + }; + + // Cached regex to split keys for `delegate`. + var eventSplitter = /^(\S+)\s*(.*)$/; + + // List of view options to be merged as properties. + var viewOptions = ['model', 'collection', 'el', 'id', 'attributes', 'className', 'tagName']; + + // Set up all inheritable **Backbone.View** properties and methods. + _.extend(Backbone.View.prototype, Backbone.Events, { + + // The default `tagName` of a View's element is `"div"`. + tagName : 'div', + + // Attach the `selectorDelegate` function as the `$` property. + $ : selectorDelegate, + + // Initialize is an empty function by default. Override it with your own + // initialization logic. + initialize : function(){}, + + // **render** is the core function that your view should override, in order + // to populate its element (`this.el`), with the appropriate HTML. The + // convention is for **render** to always return `this`. + render : function() { + return this; + }, + + // Remove this view from the DOM. Note that the view isn't present in the + // DOM by default, so calling this method may be a no-op. + remove : function() { + $(this.el).remove(); + return this; + }, + + // For small amounts of DOM Elements, where a full-blown template isn't + // needed, use **make** to manufacture elements, one at a time. + // + // var el = this.make('li', {'class': 'row'}, this.model.escape('title')); + // + make : function(tagName, attributes, content) { + var el = document.createElement(tagName); + if (attributes) $(el).attr(attributes); + if (content) $(el).html(content); + return el; + }, + + // Set callbacks, where `this.callbacks` is a hash of + // + // *{"event selector": "callback"}* + // + // { + // 'mousedown .title': 'edit', + // 'click .button': 'save' + // } + // + // pairs. Callbacks will be bound to the view, with `this` set properly. + // Uses event delegation for efficiency. + // Omitting the selector binds the event to `this.el`. + // This only works for delegate-able events: not `focus`, `blur`, and + // not `change`, `submit`, and `reset` in Internet Explorer. + delegateEvents : function(events) { + if (!(events || (events = this.events))) return; + if (_.isFunction(events)) events = events.call(this); + $(this.el).unbind('.delegateEvents' + this.cid); + for (var key in events) { + var method = this[events[key]]; + if (!method) throw new Error('Event "' + events[key] + '" does not exist'); + var match = key.match(eventSplitter); + var eventName = match[1], selector = match[2]; + method = _.bind(method, this); + eventName += '.delegateEvents' + this.cid; + if (selector === '') { + $(this.el).bind(eventName, method); + } else { + $(this.el).delegate(selector, eventName, method); + } + } + }, + + // Performs the initial configuration of a View with a set of options. + // Keys with special meaning *(model, collection, id, className)*, are + // attached directly to the view. + _configure : function(options) { + if (this.options) options = _.extend({}, this.options, options); + for (var i = 0, l = viewOptions.length; i < l; i++) { + var attr = viewOptions[i]; + if (options[attr]) this[attr] = options[attr]; + } + this.options = options; + }, + + // Ensure that the View has a DOM element to render into. + // If `this.el` is a string, pass it through `$()`, take the first + // matching element, and re-assign it to `el`. Otherwise, create + // an element from the `id`, `className` and `tagName` proeprties. + _ensureElement : function() { + if (!this.el) { + var attrs = this.attributes || {}; + if (this.id) attrs.id = this.id; + if (this.className) attrs['class'] = this.className; + this.el = this.make(this.tagName, attrs); + } else if (_.isString(this.el)) { + this.el = $(this.el).get(0); + } + } + + }); + + // The self-propagating extend function that Backbone classes use. + var extend = function (protoProps, classProps) { + var child = inherits(this, protoProps, classProps); + child.extend = this.extend; + return child; + }; + + // Set up inheritance for the model, collection, and view. + Backbone.Model.extend = Backbone.Collection.extend = + Backbone.Router.extend = Backbone.View.extend = extend; + + // Map from CRUD to HTTP for our default `Backbone.sync` implementation. + var methodMap = { + 'create': 'POST', + 'update': 'PUT', + 'delete': 'DELETE', + 'read' : 'GET' + }; + + // Backbone.sync + // ------------- + + // Override this function to change the manner in which Backbone persists + // models to the server. You will be passed the type of request, and the + // model in question. By default, uses makes a RESTful Ajax request + // to the model's `url()`. Some possible customizations could be: + // + // * Use `setTimeout` to batch rapid-fire updates into a single request. + // * Send up the models as XML instead of JSON. + // * Persist models via WebSockets instead of Ajax. + // + // Turn on `Backbone.emulateHTTP` in order to send `PUT` and `DELETE` requests + // as `POST`, with a `_method` parameter containing the true HTTP method, + // as well as all requests with the body as `application/x-www-form-urlencoded` instead of + // `application/json` with the model in a param named `model`. + // Useful when interfacing with server-side languages like **PHP** that make + // it difficult to read the body of `PUT` requests. + Backbone.sync = function(method, model, options) { + var type = methodMap[method]; + + // Default JSON-request options. + var params = _.extend({ + type: type, + dataType: 'json' + }, options); + + // Ensure that we have a URL. + if (!params.url) { + params.url = getUrl(model) || urlError(); + } + + // Ensure that we have the appropriate request data. + if (!params.data && model && (method == 'create' || method == 'update')) { + params.contentType = 'application/json'; + params.data = JSON.stringify(model.toJSON()); + } + + // For older servers, emulate JSON by encoding the request into an HTML-form. + if (Backbone.emulateJSON) { + params.contentType = 'application/x-www-form-urlencoded'; + params.data = params.data ? {model : params.data} : {}; + } + + // For older servers, emulate HTTP by mimicking the HTTP method with `_method` + // And an `X-HTTP-Method-Override` header. + if (Backbone.emulateHTTP) { + if (type === 'PUT' || type === 'DELETE') { + if (Backbone.emulateJSON) params.data._method = type; + params.type = 'POST'; + params.beforeSend = function(xhr) { + xhr.setRequestHeader('X-HTTP-Method-Override', type); + }; + } + } + + // Don't process data on a non-GET request. + if (params.type !== 'GET' && !Backbone.emulateJSON) { + params.processData = false; + } + + // Make the request. + return $.ajax(params); + }; + + // Helpers + // ------- + + // Shared empty constructor function to aid in prototype-chain creation. + var ctor = function(){}; + + // Helper function to correctly set up the prototype chain, for subclasses. + // Similar to `goog.inherits`, but uses a hash of prototype properties and + // class properties to be extended. + var inherits = function(parent, protoProps, staticProps) { + var child; + + // The constructor function for the new subclass is either defined by you + // (the "constructor" property in your `extend` definition), or defaulted + // by us to simply call `super()`. + if (protoProps && protoProps.hasOwnProperty('constructor')) { + child = protoProps.constructor; + } else { + child = function(){ return parent.apply(this, arguments); }; + } + + // Inherit class (static) properties from parent. + _.extend(child, parent); + + // Set the prototype chain to inherit from `parent`, without calling + // `parent`'s constructor function. + ctor.prototype = parent.prototype; + child.prototype = new ctor(); + + // Add prototype properties (instance properties) to the subclass, + // if supplied. + if (protoProps) _.extend(child.prototype, protoProps); + + // Add static properties to the constructor function, if supplied. + if (staticProps) _.extend(child, staticProps); + + // Correctly set child's `prototype.constructor`. + child.prototype.constructor = child; + + // Set a convenience property in case the parent's prototype is needed later. + child.__super__ = parent.prototype; + + return child; + }; + + // Helper function to get a URL from a Model or Collection as a property + // or as a function. + var getUrl = function(object) { + if (!(object && object.url)) return null; + return _.isFunction(object.url) ? object.url() : object.url; + }; + + // Throw an error when a URL is needed, and none is supplied. + var urlError = function() { + throw new Error('A "url" property or function must be specified'); + }; + + // Wrap an optional error callback with a fallback error event. + var wrapError = function(onError, model, options) { + return function(resp) { + if (onError) { + onError(model, resp, options); + } else { + model.trigger('error', model, resp, options); + } + }; + }; + + // Helper function to escape a string for HTML rendering. + var escapeHTML = function(string) { + return string.replace(/&(?!\w+;|#\d+;|#x[\da-f]+;)/gi, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/'/g, ''').replace(/\//g,'/'); + }; + +}).call(this); diff --git a/lib/beetsplug/web/static/beets.css b/lib/beetsplug/web/static/beets.css new file mode 100644 index 00000000..2ca7fc83 --- /dev/null +++ b/lib/beetsplug/web/static/beets.css @@ -0,0 +1,160 @@ +body { + font-family: Helvetica, Arial, sans-serif; +} + +#header { + position: fixed; + left: 0; + right: 0; + top: 0; + height: 36px; + + color: white; + + cursor: default; + + /* shadowy border */ + box-shadow: 0 0 20px #999; + -webkit-box-shadow: 0 0 20px #999; + -moz-box-shadow: 0 0 20px #999; + + /* background gradient */ + background: #0e0e0e; + background: -moz-linear-gradient(top, #6b6b6b 0%, #0e0e0e 100%); + background: -webkit-linear-gradient(top, #6b6b6b 0%,#0e0e0e 100%); +} +#header h1 { + font-size: 1.1em; + font-weight: bold; + color: white; + margin: 0.35em; + float: left; +} + +#entities { + width: 17em; + + position: fixed; + top: 36px; + left: 0; + bottom: 0; + margin: 0; + + z-index: 1; + background: #dde4eb; + + /* shadowy border */ + box-shadow: 0 0 20px #666; + -webkit-box-shadow: 0 0 20px #666; + -moz-box-shadow: 0 0 20px #666; +} +#queryForm { + display: block; + text-align: center; + margin: 0.25em 0; +} +#query { + width: 95%; + font-size: 1em; +} +#entities ul { + width: 17em; + + position: fixed; + top: 36px; + left: 0; + bottom: 0; + margin: 2.2em 0 0 0; + padding: 0; + + overflow-y: auto; + overflow-x: hidden; +} +#entities ul li { + list-style: none; + padding: 4px 8px; + margin: 0; + cursor: default; +} +#entities ul li.selected { + background: #7abcff; + background: -moz-linear-gradient(top, #7abcff 0%, #60abf8 44%, #4096ee 100%); + background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#7abcff), color-stop(44%,#60abf8), color-stop(100%,#4096ee)); + color: white; +} +#entities ul li .playing { + margin-left: 5px; + font-size: 0.9em; +} + + +#main-detail, #extra-detail { + position: fixed; + left: 17em; + margin: 1.0em 0 0 1.5em; +} +#main-detail { + top: 36px; + height: 98px; +} +#main-detail .artist, #main-detail .album, #main-detail .title { + display: block; +} +#main-detail .title { + font-size: 1.3em; + font-weight: bold; +} +#main-detail .albumtitle { + font-style: italic; +} + +#extra-detail { + overflow-x: hidden; + overflow-y: auto; + top: 134px; + bottom: 0; + right: 0; +} +/*Fix for correctly displaying line breaks in lyrics*/ +#extra-detail .lyrics { + white-space: pre-wrap; +} +#extra-detail dl dt, #extra-detail dl dd { + list-style: none; + margin: 0; + padding: 0; +} +#extra-detail dl dt { + width: 10em; + float: left; + text-align: right; + font-weight: bold; + clear: both; +} +#extra-detail dl dd { + margin-left: 10.5em; +} + + +#player { + float: left; + width: 150px; + height: 36px; +} +#player .play, #player .pause, #player .disabled { + -webkit-appearance: none; + font-size: 1em; + font-family: Helvetica, Arial, sans-serif; + background: none; + border: none; + color: white; + padding: 5px; + margin: 0; + text-align: center; + + width: 36px; + height: 36px; +} +#player .disabled { + color: #666; +} diff --git a/lib/beetsplug/web/static/beets.js b/lib/beetsplug/web/static/beets.js new file mode 100644 index 00000000..97af7011 --- /dev/null +++ b/lib/beetsplug/web/static/beets.js @@ -0,0 +1,314 @@ +// Format times as minutes and seconds. +var timeFormat = function(secs) { + if (secs == undefined || isNaN(secs)) { + return '0:00'; + } + secs = Math.round(secs); + var mins = '' + Math.floor(secs / 60); + secs = '' + (secs % 60); + if (secs.length < 2) { + secs = '0' + secs; + } + return mins + ':' + secs; +} + +// jQuery extension encapsulating event hookups for audio element controls. +$.fn.player = function(debug) { + // Selected element should contain an HTML5 Audio element. + var audio = $('audio', this).get(0); + + // Control elements that may be present, identified by class. + var playBtn = $('.play', this); + var pauseBtn = $('.pause', this); + var disabledInd = $('.disabled', this); + var timesEl = $('.times', this); + var curTimeEl = $('.currentTime', this); + var totalTimeEl = $('.totalTime', this); + var sliderPlayedEl = $('.slider .played', this); + var sliderLoadedEl = $('.slider .loaded', this); + + // Button events. + playBtn.click(function() { + audio.play(); + }); + pauseBtn.click(function(ev) { + audio.pause(); + }); + + // Utilities. + var timePercent = function(cur, total) { + if (cur == undefined || isNaN(cur) || + total == undefined || isNaN(total) || total == 0) { + return 0; + } + var ratio = cur / total; + if (ratio > 1.0) { + ratio = 1.0; + } + return (Math.round(ratio * 10000) / 100) + '%'; + } + + // Event helpers. + var dbg = function(msg) { + if (debug) + console.log(msg); + } + var showState = function() { + if (audio.duration == undefined || isNaN(audio.duration)) { + playBtn.hide(); + pauseBtn.hide(); + disabledInd.show(); + timesEl.hide(); + } else if (audio.paused) { + playBtn.show(); + pauseBtn.hide(); + disabledInd.hide(); + timesEl.show(); + } else { + playBtn.hide(); + pauseBtn.show(); + disabledInd.hide(); + timesEl.show(); + } + } + var showTimes = function() { + curTimeEl.text(timeFormat(audio.currentTime)); + totalTimeEl.text(timeFormat(audio.duration)); + + sliderPlayedEl.css('width', + timePercent(audio.currentTime, audio.duration)); + + // last time buffered + var bufferEnd = 0; + for (var i = 0; i < audio.buffered.length; ++i) { + if (audio.buffered.end(i) > bufferEnd) + bufferEnd = audio.buffered.end(i); + } + sliderLoadedEl.css('width', + timePercent(bufferEnd, audio.duration)); + } + + // Initialize controls. + showState(); + showTimes(); + + // Bind events. + $('audio', this).bind({ + playing: function() { + dbg('playing'); + showState(); + }, + pause: function() { + dbg('pause'); + showState(); + }, + ended: function() { + dbg('ended'); + showState(); + }, + progress: function() { + dbg('progress ' + audio.buffered); + }, + timeupdate: function() { + dbg('timeupdate ' + audio.currentTime); + showTimes(); + }, + durationchange: function() { + dbg('durationchange ' + audio.duration); + showState(); + showTimes(); + }, + loadeddata: function() { + dbg('loadeddata'); + }, + loadedmetadata: function() { + dbg('loadedmetadata'); + } + }); +} + +// Simple selection disable for jQuery. +// Cut-and-paste from: +// https://stackoverflow.com/questions/2700000 +$.fn.disableSelection = function() { + $(this).attr('unselectable', 'on') + .css('-moz-user-select', 'none') + .each(function() { + this.onselectstart = function() { return false; }; + }); +}; + +$(function() { + +// Routes. +var BeetsRouter = Backbone.Router.extend({ + routes: { + "item/query/:query": "itemQuery", + }, + itemQuery: function(query) { + var queryURL = query.split(/\s+/).map(encodeURIComponent).join('/'); + $.getJSON('item/query/' + queryURL, function(data) { + var models = _.map( + data['results'], + function(d) { return new Item(d); } + ); + var results = new Items(models); + app.showItems(results); + }); + } +}); +var router = new BeetsRouter(); + +// Model. +var Item = Backbone.Model.extend({ + urlRoot: 'item' +}); +var Items = Backbone.Collection.extend({ + model: Item +}); + +// Item views. +var ItemEntryView = Backbone.View.extend({ + tagName: "li", + template: _.template($('#item-entry-template').html()), + events: { + 'click': 'select', + 'dblclick': 'play' + }, + initialize: function() { + this.playing = false; + }, + render: function() { + $(this.el).html(this.template(this.model.toJSON())); + this.setPlaying(this.playing); + return this; + }, + select: function() { + app.selectItem(this); + }, + play: function() { + app.playItem(this.model); + }, + setPlaying: function(val) { + this.playing = val; + if (val) + this.$('.playing').show(); + else + this.$('.playing').hide(); + } +}); +//Holds Title, Artist, Album etc. +var ItemMainDetailView = Backbone.View.extend({ + tagName: "div", + template: _.template($('#item-main-detail-template').html()), + events: { + 'click .play': 'play', + }, + render: function() { + $(this.el).html(this.template(this.model.toJSON())); + return this; + }, + play: function() { + app.playItem(this.model); + } +}); +// Holds Track no., Format, MusicBrainz link, Lyrics, Comments etc. +var ItemExtraDetailView = Backbone.View.extend({ + tagName: "div", + template: _.template($('#item-extra-detail-template').html()), + render: function() { + $(this.el).html(this.template(this.model.toJSON())); + return this; + } +}); +// Main app view. +var AppView = Backbone.View.extend({ + el: $('body'), + events: { + 'submit #queryForm': 'querySubmit', + }, + querySubmit: function(ev) { + ev.preventDefault(); + router.navigate('item/query/' + encodeURIComponent($('#query').val()), true); + }, + initialize: function() { + this.playingItem = null; + this.shownItems = null; + + // Not sure why these events won't bind automatically. + this.$('audio').bind({ + 'play': _.bind(this.audioPlay, this), + 'pause': _.bind(this.audioPause, this), + 'ended': _.bind(this.audioEnded, this) + }); + }, + showItems: function(items) { + this.shownItems = items; + $('#results').empty(); + items.each(function(item) { + var view = new ItemEntryView({model: item}); + item.entryView = view; + $('#results').append(view.render().el); + }); + }, + selectItem: function(view) { + // Mark row as selected. + $('#results li').removeClass("selected"); + $(view.el).addClass("selected"); + + // Show main and extra detail. + var mainDetailView = new ItemMainDetailView({model: view.model}); + $('#main-detail').empty().append(mainDetailView.render().el); + + var extraDetailView = new ItemExtraDetailView({model: view.model}); + $('#extra-detail').empty().append(extraDetailView.render().el); + }, + playItem: function(item) { + var url = 'item/' + item.get('id') + '/file'; + $('#player audio').attr('src', url); + $('#player audio').get(0).play(); + + if (this.playingItem != null) { + this.playingItem.entryView.setPlaying(false); + } + item.entryView.setPlaying(true); + this.playingItem = item; + }, + + audioPause: function() { + this.playingItem.entryView.setPlaying(false); + }, + audioPlay: function() { + if (this.playingItem != null) + this.playingItem.entryView.setPlaying(true); + }, + audioEnded: function() { + this.playingItem.entryView.setPlaying(false); + + // Try to play the next track. + var idx = this.shownItems.indexOf(this.playingItem); + if (idx == -1) { + // Not in current list. + return; + } + var nextIdx = idx + 1; + if (nextIdx >= this.shownItems.size()) { + // End of list. + return; + } + this.playItem(this.shownItems.at(nextIdx)); + } +}); +var app = new AppView(); + +// App setup. +Backbone.history.start({pushState: false}); + +// Disable selection on UI elements. +$('#entities ul').disableSelection(); +$('#header').disableSelection(); + +// Audio player setup. +$('#player').player(); + +}); diff --git a/lib/beetsplug/web/static/jquery.js b/lib/beetsplug/web/static/jquery.js new file mode 100644 index 00000000..e1414212 --- /dev/null +++ b/lib/beetsplug/web/static/jquery.js @@ -0,0 +1,9266 @@ +/*! + * jQuery JavaScript Library v1.7.1 + * http://jquery.com/ + * + * Copyright 2016, John Resig + * Dual licensed under the MIT or GPL Version 2 licenses. + * http://jquery.org/license + * + * Includes Sizzle.js + * http://sizzlejs.com/ + * Copyright 2016, The Dojo Foundation + * Released under the MIT, BSD, and GPL Licenses. + * + * Date: Mon Nov 21 21:11:03 2011 -0500 + */ +(function( window, undefined ) { + +// Use the correct document accordingly with window argument (sandbox) +var document = window.document, + navigator = window.navigator, + location = window.location; +var jQuery = (function() { + +// Define a local copy of jQuery +var jQuery = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context, rootjQuery ); + }, + + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + + // Map over the $ in case of overwrite + _$ = window.$, + + // A central reference to the root jQuery(document) + rootjQuery, + + // A simple way to check for HTML strings or ID strings + // Prioritize #id over <tag> to avoid XSS via location.hash (#9521) + quickExpr = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/, + + // Check if a string has a non-whitespace character in it + rnotwhite = /\S/, + + // Used for trimming whitespace + trimLeft = /^\s+/, + trimRight = /\s+$/, + + // Match a standalone tag + rsingleTag = /^<(\w+)\s*\/?>(?:<\/\1>)?$/, + + // JSON RegExp + rvalidchars = /^[\],:{}\s]*$/, + rvalidescape = /\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g, + rvalidtokens = /"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g, + rvalidbraces = /(?:^|:|,)(?:\s*\[)+/g, + + // Useragent RegExp + rwebkit = /(webkit)[ \/]([\w.]+)/, + ropera = /(opera)(?:.*version)?[ \/]([\w.]+)/, + rmsie = /(msie) ([\w.]+)/, + rmozilla = /(mozilla)(?:.*? rv:([\w.]+))?/, + + // Matches dashed string for camelizing + rdashAlpha = /-([a-z]|[0-9])/ig, + rmsPrefix = /^-ms-/, + + // Used by jQuery.camelCase as callback to replace() + fcamelCase = function( all, letter ) { + return ( letter + "" ).toUpperCase(); + }, + + // Keep a UserAgent string for use with jQuery.browser + userAgent = navigator.userAgent, + + // For matching the engine and version of the browser + browserMatch, + + // The deferred used on DOM ready + readyList, + + // The ready event handler + DOMContentLoaded, + + // Save a reference to some core methods + toString = Object.prototype.toString, + hasOwn = Object.prototype.hasOwnProperty, + push = Array.prototype.push, + slice = Array.prototype.slice, + trim = String.prototype.trim, + indexOf = Array.prototype.indexOf, + + // [[Class]] -> type pairs + class2type = {}; + +jQuery.fn = jQuery.prototype = { + constructor: jQuery, + init: function( selector, context, rootjQuery ) { + var match, elem, ret, doc; + + // Handle $(""), $(null), or $(undefined) + if ( !selector ) { + return this; + } + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this.context = this[0] = selector; + this.length = 1; + return this; + } + + // The body element only exists once, optimize finding it + if ( selector === "body" && !context && document.body ) { + this.context = document; + this[0] = document.body; + this.selector = selector; + this.length = 1; + return this; + } + + // Handle HTML strings + if ( typeof selector === "string" ) { + // Are we dealing with HTML string or an ID? + if ( selector.charAt(0) === "<" && selector.charAt( selector.length - 1 ) === ">" && selector.length >= 3 ) { + // Assume that strings that start and end with <> are HTML and skip the regex check + match = [ null, selector, null ]; + + } else { + match = quickExpr.exec( selector ); + } + + // Verify a match, and that no context was specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) { + context = context instanceof jQuery ? context[0] : context; + doc = ( context ? context.ownerDocument || context : document ); + + // If a single string is passed in and it's a single tag + // just do a createElement and skip the rest + ret = rsingleTag.exec( selector ); + + if ( ret ) { + if ( jQuery.isPlainObject( context ) ) { + selector = [ document.createElement( ret[1] ) ]; + jQuery.fn.attr.call( selector, context, true ); + + } else { + selector = [ doc.createElement( ret[1] ) ]; + } + + } else { + ret = jQuery.buildFragment( [ match[1] ], [ doc ] ); + selector = ( ret.cacheable ? jQuery.clone(ret.fragment) : ret.fragment ).childNodes; + } + + return jQuery.merge( this, selector ); + + // HANDLE: $("#id") + } else { + elem = document.getElementById( match[2] ); + + // Check parentNode to catch when Blackberry 4.6 returns + // nodes that are no longer in the document #6963 + if ( elem && elem.parentNode ) { + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem.id !== match[2] ) { + return rootjQuery.find( selector ); + } + + // Otherwise, we inject the element directly into the jQuery object + this.length = 1; + this[0] = elem; + } + + this.context = document; + this.selector = selector; + return this; + } + + // HANDLE: $(expr, $(...)) + } else if ( !context || context.jquery ) { + return ( context || rootjQuery ).find( selector ); + + // HANDLE: $(expr, context) + // (which is just equivalent to: $(context).find(expr) + } else { + return this.constructor( context ).find( selector ); + } + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) { + return rootjQuery.ready( selector ); + } + + if ( selector.selector !== undefined ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return jQuery.makeArray( selector, this ); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.7.1", + + // The default length of a jQuery object is 0 + length: 0, + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + toArray: function() { + return slice.call( this, 0 ); + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num == null ? + + // Return a 'clean' array + this.toArray() : + + // Return just the object + ( num < 0 ? this[ this.length + num ] : this[ num ] ); + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + // Build a new jQuery matched element set + var ret = this.constructor(); + + if ( jQuery.isArray( elems ) ) { + push.apply( ret, elems ); + + } else { + jQuery.merge( ret, elems ); + } + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) { + ret.selector = this.selector + ( this.selector ? " " : "" ) + selector; + } else if ( name ) { + ret.selector = this.selector + "." + name + "(" + selector + ")"; + } + + // Return the newly-formed element set + return ret; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + ready: function( fn ) { + // Attach the listeners + jQuery.bindReady(); + + // Add the callback + readyList.add( fn ); + + return this; + }, + + eq: function( i ) { + i = +i; + return i === -1 ? + this.slice( i ) : + this.slice( i, i + 1 ); + }, + + first: function() { + return this.eq( 0 ); + }, + + last: function() { + return this.eq( -1 ); + }, + + slice: function() { + return this.pushStack( slice.apply( this, arguments ), + "slice", slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function( elem, i ) { + return callback.call( elem, i, elem ); + })); + }, + + end: function() { + return this.prevObject || this.constructor(null); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: push, + sort: [].sort, + splice: [].splice +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +jQuery.extend = jQuery.fn.extend = function() { + var options, name, src, copy, copyIsArray, clone, + target = arguments[0] || {}, + i = 1, + length = arguments.length, + deep = false; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) { + target = {}; + } + + // extend jQuery itself if only one argument is passed + if ( length === i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) { + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) { + // Extend the base object + for ( name in options ) { + src = target[ name ]; + copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) { + continue; + } + + // Recurse if we're merging plain objects or arrays + if ( deep && copy && ( jQuery.isPlainObject(copy) || (copyIsArray = jQuery.isArray(copy)) ) ) { + if ( copyIsArray ) { + copyIsArray = false; + clone = src && jQuery.isArray(src) ? src : []; + + } else { + clone = src && jQuery.isPlainObject(src) ? src : {}; + } + + // Never move original objects, clone them + target[ name ] = jQuery.extend( deep, clone, copy ); + + // Don't bring in undefined values + } else if ( copy !== undefined ) { + target[ name ] = copy; + } + } + } + } + + // Return the modified object + return target; +}; + +jQuery.extend({ + noConflict: function( deep ) { + if ( window.$ === jQuery ) { + window.$ = _$; + } + + if ( deep && window.jQuery === jQuery ) { + window.jQuery = _jQuery; + } + + return jQuery; + }, + + // Is the DOM ready to be used? Set to true once it occurs. + isReady: false, + + // A counter to track how many items to wait for before + // the ready event fires. See #6781 + readyWait: 1, + + // Hold (or release) the ready event + holdReady: function( hold ) { + if ( hold ) { + jQuery.readyWait++; + } else { + jQuery.ready( true ); + } + }, + + // Handle when the DOM is ready + ready: function( wait ) { + // Either a released hold or an DOMready/load event and not yet ready + if ( (wait === true && !--jQuery.readyWait) || (wait !== true && !jQuery.isReady) ) { + // Make sure body exists, at least, in case IE gets a little overzealous (ticket #5443). + if ( !document.body ) { + return setTimeout( jQuery.ready, 1 ); + } + + // Remember that the DOM is ready + jQuery.isReady = true; + + // If a normal DOM Ready event fired, decrement, and wait if need be + if ( wait !== true && --jQuery.readyWait > 0 ) { + return; + } + + // If there are functions bound, to execute + readyList.fireWith( document, [ jQuery ] ); + + // Trigger any bound ready events + if ( jQuery.fn.trigger ) { + jQuery( document ).trigger( "ready" ).off( "ready" ); + } + } + }, + + bindReady: function() { + if ( readyList ) { + return; + } + + readyList = jQuery.Callbacks( "once memory" ); + + // Catch cases where $(document).ready() is called after the + // browser event has already occurred. + if ( document.readyState === "complete" ) { + // Handle it asynchronously to allow scripts the opportunity to delay ready + return setTimeout( jQuery.ready, 1 ); + } + + // Mozilla, Opera and webkit nightlies currently support this event + if ( document.addEventListener ) { + // Use the handy event callback + document.addEventListener( "DOMContentLoaded", DOMContentLoaded, false ); + + // A fallback to window.onload, that will always work + window.addEventListener( "load", jQuery.ready, false ); + + // If IE event model is used + } else if ( document.attachEvent ) { + // ensure firing before onload, + // maybe late but safe also for iframes + document.attachEvent( "onreadystatechange", DOMContentLoaded ); + + // A fallback to window.onload, that will always work + window.attachEvent( "onload", jQuery.ready ); + + // If IE and not a frame + // continually check to see if the document is ready + var toplevel = false; + + try { + toplevel = window.frameElement == null; + } catch(e) {} + + if ( document.documentElement.doScroll && toplevel ) { + doScrollCheck(); + } + } + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return jQuery.type(obj) === "function"; + }, + + isArray: Array.isArray || function( obj ) { + return jQuery.type(obj) === "array"; + }, + + // A crude way of determining if an object is a window + isWindow: function( obj ) { + return obj && typeof obj === "object" && "setInterval" in obj; + }, + + isNumeric: function( obj ) { + return !isNaN( parseFloat(obj) ) && isFinite( obj ); + }, + + type: function( obj ) { + return obj == null ? + String( obj ) : + class2type[ toString.call(obj) ] || "object"; + }, + + isPlainObject: function( obj ) { + // Must be an Object. + // Because of IE, we also have to check the presence of the constructor property. + // Make sure that DOM nodes and window objects don't pass through, as well + if ( !obj || jQuery.type(obj) !== "object" || obj.nodeType || jQuery.isWindow( obj ) ) { + return false; + } + + try { + // Not own constructor property must be Object + if ( obj.constructor && + !hasOwn.call(obj, "constructor") && + !hasOwn.call(obj.constructor.prototype, "isPrototypeOf") ) { + return false; + } + } catch ( e ) { + // IE8,9 Will throw exceptions on certain host objects #9897 + return false; + } + + // Own properties are enumerated firstly, so to speed up, + // if last one is own, then all properties are own. + + var key; + for ( key in obj ) {} + + return key === undefined || hasOwn.call( obj, key ); + }, + + isEmptyObject: function( obj ) { + for ( var name in obj ) { + return false; + } + return true; + }, + + error: function( msg ) { + throw new Error( msg ); + }, + + parseJSON: function( data ) { + if ( typeof data !== "string" || !data ) { + return null; + } + + // Make sure leading/trailing whitespace is removed (IE can't handle it) + data = jQuery.trim( data ); + + // Attempt to parse using the native JSON parser first + if ( window.JSON && window.JSON.parse ) { + return window.JSON.parse( data ); + } + + // Make sure the incoming data is actual JSON + // Logic borrowed from http://json.org/json2.js + if ( rvalidchars.test( data.replace( rvalidescape, "@" ) + .replace( rvalidtokens, "]" ) + .replace( rvalidbraces, "")) ) { + + return ( new Function( "return " + data ) )(); + + } + jQuery.error( "Invalid JSON: " + data ); + }, + + // Cross-browser xml parsing + parseXML: function( data ) { + var xml, tmp; + try { + if ( window.DOMParser ) { // Standard + tmp = new DOMParser(); + xml = tmp.parseFromString( data , "text/xml" ); + } else { // IE + xml = new ActiveXObject( "Microsoft.XMLDOM" ); + xml.async = "false"; + xml.loadXML( data ); + } + } catch( e ) { + xml = undefined; + } + if ( !xml || !xml.documentElement || xml.getElementsByTagName( "parsererror" ).length ) { + jQuery.error( "Invalid XML: " + data ); + } + return xml; + }, + + noop: function() {}, + + // Evaluates a script in a global context + // Workarounds based on findings by Jim Driscoll + // http://weblogs.java.net/blog/driscoll/archive/2009/09/08/eval-javascript-global-context + globalEval: function( data ) { + if ( data && rnotwhite.test( data ) ) { + // We use execScript on Internet Explorer + // We use an anonymous function so that context is window + // rather than jQuery in Firefox + ( window.execScript || function( data ) { + window[ "eval" ].call( window, data ); + } )( data ); + } + }, + + // Convert dashed to camelCase; used by the css and data modules + // Microsoft forgot to hump their vendor prefix (#9572) + camelCase: function( string ) { + return string.replace( rmsPrefix, "ms-" ).replace( rdashAlpha, fcamelCase ); + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toUpperCase() === name.toUpperCase(); + }, + + // args is for internal usage only + each: function( object, callback, args ) { + var name, i = 0, + length = object.length, + isObj = length === undefined || jQuery.isFunction( object ); + + if ( args ) { + if ( isObj ) { + for ( name in object ) { + if ( callback.apply( object[ name ], args ) === false ) { + break; + } + } + } else { + for ( ; i < length; ) { + if ( callback.apply( object[ i++ ], args ) === false ) { + break; + } + } + } + + // A special, fast, case for the most common use of each + } else { + if ( isObj ) { + for ( name in object ) { + if ( callback.call( object[ name ], name, object[ name ] ) === false ) { + break; + } + } + } else { + for ( ; i < length; ) { + if ( callback.call( object[ i ], i, object[ i++ ] ) === false ) { + break; + } + } + } + } + + return object; + }, + + // Use native String.trim function wherever possible + trim: trim ? + function( text ) { + return text == null ? + "" : + trim.call( text ); + } : + + // Otherwise use our own trimming functionality + function( text ) { + return text == null ? + "" : + text.toString().replace( trimLeft, "" ).replace( trimRight, "" ); + }, + + // results is for internal usage only + makeArray: function( array, results ) { + var ret = results || []; + + if ( array != null ) { + // The window, strings (and functions) also have 'length' + // Tweaked logic slightly to handle Blackberry 4.7 RegExp issues #6930 + var type = jQuery.type( array ); + + if ( array.length == null || type === "string" || type === "function" || type === "regexp" || jQuery.isWindow( array ) ) { + push.call( ret, array ); + } else { + jQuery.merge( ret, array ); + } + } + + return ret; + }, + + inArray: function( elem, array, i ) { + var len; + + if ( array ) { + if ( indexOf ) { + return indexOf.call( array, elem, i ); + } + + len = array.length; + i = i ? i < 0 ? Math.max( 0, len + i ) : i : 0; + + for ( ; i < len; i++ ) { + // Skip accessing in sparse arrays + if ( i in array && array[ i ] === elem ) { + return i; + } + } + } + + return -1; + }, + + merge: function( first, second ) { + var i = first.length, + j = 0; + + if ( typeof second.length === "number" ) { + for ( var l = second.length; j < l; j++ ) { + first[ i++ ] = second[ j ]; + } + + } else { + while ( second[j] !== undefined ) { + first[ i++ ] = second[ j++ ]; + } + } + + first.length = i; + + return first; + }, + + grep: function( elems, callback, inv ) { + var ret = [], retVal; + inv = !!inv; + + // Go through the array, only saving the items + // that pass the validator function + for ( var i = 0, length = elems.length; i < length; i++ ) { + retVal = !!callback( elems[ i ], i ); + if ( inv !== retVal ) { + ret.push( elems[ i ] ); + } + } + + return ret; + }, + + // arg is for internal usage only + map: function( elems, callback, arg ) { + var value, key, ret = [], + i = 0, + length = elems.length, + // jquery objects are treated as arrays + isArray = elems instanceof jQuery || length !== undefined && typeof length === "number" && ( ( length > 0 && elems[ 0 ] && elems[ length -1 ] ) || length === 0 || jQuery.isArray( elems ) ) ; + + // Go through the array, translating each of the items to their + if ( isArray ) { + for ( ; i < length; i++ ) { + value = callback( elems[ i ], i, arg ); + + if ( value != null ) { + ret[ ret.length ] = value; + } + } + + // Go through every key on the object, + } else { + for ( key in elems ) { + value = callback( elems[ key ], key, arg ); + + if ( value != null ) { + ret[ ret.length ] = value; + } + } + } + + // Flatten any nested arrays + return ret.concat.apply( [], ret ); + }, + + // A global GUID counter for objects + guid: 1, + + // Bind a function to a context, optionally partially applying any + // arguments. + proxy: function( fn, context ) { + if ( typeof context === "string" ) { + var tmp = fn[ context ]; + context = fn; + fn = tmp; + } + + // Quick check to determine if target is callable, in the spec + // this throws a TypeError, but we will just return undefined. + if ( !jQuery.isFunction( fn ) ) { + return undefined; + } + + // Simulated bind + var args = slice.call( arguments, 2 ), + proxy = function() { + return fn.apply( context, args.concat( slice.call( arguments ) ) ); + }; + + // Set the guid of unique handler to the same of original handler, so it can be removed + proxy.guid = fn.guid = fn.guid || proxy.guid || jQuery.guid++; + + return proxy; + }, + + // Mutifunctional method to get and set values to a collection + // The value/s can optionally be executed if it's a function + access: function( elems, key, value, exec, fn, pass ) { + var length = elems.length; + + // Setting many attributes + if ( typeof key === "object" ) { + for ( var k in key ) { + jQuery.access( elems, k, key[k], exec, fn, value ); + } + return elems; + } + + // Setting one attribute + if ( value !== undefined ) { + // Optionally, function values get executed if exec is true + exec = !pass && exec && jQuery.isFunction(value); + + for ( var i = 0; i < length; i++ ) { + fn( elems[i], key, exec ? value.call( elems[i], i, fn( elems[i], key ) ) : value, pass ); + } + + return elems; + } + + // Getting an attribute + return length ? fn( elems[0], key ) : undefined; + }, + + now: function() { + return ( new Date() ).getTime(); + }, + + // Use of jQuery.browser is frowned upon. + // More details: http://docs.jquery.com/Utilities/jQuery.browser + uaMatch: function( ua ) { + ua = ua.toLowerCase(); + + var match = rwebkit.exec( ua ) || + ropera.exec( ua ) || + rmsie.exec( ua ) || + ua.indexOf("compatible") < 0 && rmozilla.exec( ua ) || + []; + + return { browser: match[1] || "", version: match[2] || "0" }; + }, + + sub: function() { + function jQuerySub( selector, context ) { + return new jQuerySub.fn.init( selector, context ); + } + jQuery.extend( true, jQuerySub, this ); + jQuerySub.superclass = this; + jQuerySub.fn = jQuerySub.prototype = this(); + jQuerySub.fn.constructor = jQuerySub; + jQuerySub.sub = this.sub; + jQuerySub.fn.init = function init( selector, context ) { + if ( context && context instanceof jQuery && !(context instanceof jQuerySub) ) { + context = jQuerySub( context ); + } + + return jQuery.fn.init.call( this, selector, context, rootjQuerySub ); + }; + jQuerySub.fn.init.prototype = jQuerySub.fn; + var rootjQuerySub = jQuerySub(document); + return jQuerySub; + }, + + browser: {} +}); + +// Populate the class2type map +jQuery.each("Boolean Number String Function Array Date RegExp Object".split(" "), function(i, name) { + class2type[ "[object " + name + "]" ] = name.toLowerCase(); +}); + +browserMatch = jQuery.uaMatch( userAgent ); +if ( browserMatch.browser ) { + jQuery.browser[ browserMatch.browser ] = true; + jQuery.browser.version = browserMatch.version; +} + +// Deprecated, use jQuery.browser.webkit instead +if ( jQuery.browser.webkit ) { + jQuery.browser.safari = true; +} + +// IE doesn't match non-breaking spaces with \s +if ( rnotwhite.test( "\xA0" ) ) { + trimLeft = /^[\s\xA0]+/; + trimRight = /[\s\xA0]+$/; +} + +// All jQuery objects should point back to these +rootjQuery = jQuery(document); + +// Cleanup functions for the document ready method +if ( document.addEventListener ) { + DOMContentLoaded = function() { + document.removeEventListener( "DOMContentLoaded", DOMContentLoaded, false ); + jQuery.ready(); + }; + +} else if ( document.attachEvent ) { + DOMContentLoaded = function() { + // Make sure body exists, at least, in case IE gets a little overzealous (ticket #5443). + if ( document.readyState === "complete" ) { + document.detachEvent( "onreadystatechange", DOMContentLoaded ); + jQuery.ready(); + } + }; +} + +// The DOM ready check for Internet Explorer +function doScrollCheck() { + if ( jQuery.isReady ) { + return; + } + + try { + // If IE is used, use the trick by Diego Perini + // http://javascript.nwbox.com/IEContentLoaded/ + document.documentElement.doScroll("left"); + } catch(e) { + setTimeout( doScrollCheck, 1 ); + return; + } + + // and execute any waiting functions + jQuery.ready(); +} + +return jQuery; + +})(); + + +// String to Object flags format cache +var flagsCache = {}; + +// Convert String-formatted flags into Object-formatted ones and store in cache +function createFlags( flags ) { + var object = flagsCache[ flags ] = {}, + i, length; + flags = flags.split( /\s+/ ); + for ( i = 0, length = flags.length; i < length; i++ ) { + object[ flags[i] ] = true; + } + return object; +} + +/* + * Create a callback list using the following parameters: + * + * flags: an optional list of space-separated flags that will change how + * the callback list behaves + * + * By default a callback list will act like an event callback list and can be + * "fired" multiple times. + * + * Possible flags: + * + * once: will ensure the callback list can only be fired once (like a Deferred) + * + * memory: will keep track of previous values and will call any callback added + * after the list has been fired right away with the latest "memorized" + * values (like a Deferred) + * + * unique: will ensure a callback can only be added once (no duplicate in the list) + * + * stopOnFalse: interrupt callings when a callback returns false + * + */ +jQuery.Callbacks = function( flags ) { + + // Convert flags from String-formatted to Object-formatted + // (we check in cache first) + flags = flags ? ( flagsCache[ flags ] || createFlags( flags ) ) : {}; + + var // Actual callback list + list = [], + // Stack of fire calls for repeatable lists + stack = [], + // Last fire value (for non-forgettable lists) + memory, + // Flag to know if list is currently firing + firing, + // First callback to fire (used internally by add and fireWith) + firingStart, + // End of the loop when firing + firingLength, + // Index of currently firing callback (modified by remove if needed) + firingIndex, + // Add one or several callbacks to the list + add = function( args ) { + var i, + length, + elem, + type, + actual; + for ( i = 0, length = args.length; i < length; i++ ) { + elem = args[ i ]; + type = jQuery.type( elem ); + if ( type === "array" ) { + // Inspect recursively + add( elem ); + } else if ( type === "function" ) { + // Add if not in unique mode and callback is not in + if ( !flags.unique || !self.has( elem ) ) { + list.push( elem ); + } + } + } + }, + // Fire callbacks + fire = function( context, args ) { + args = args || []; + memory = !flags.memory || [ context, args ]; + firing = true; + firingIndex = firingStart || 0; + firingStart = 0; + firingLength = list.length; + for ( ; list && firingIndex < firingLength; firingIndex++ ) { + if ( list[ firingIndex ].apply( context, args ) === false && flags.stopOnFalse ) { + memory = true; // Mark as halted + break; + } + } + firing = false; + if ( list ) { + if ( !flags.once ) { + if ( stack && stack.length ) { + memory = stack.shift(); + self.fireWith( memory[ 0 ], memory[ 1 ] ); + } + } else if ( memory === true ) { + self.disable(); + } else { + list = []; + } + } + }, + // Actual Callbacks object + self = { + // Add a callback or a collection of callbacks to the list + add: function() { + if ( list ) { + var length = list.length; + add( arguments ); + // Do we need to add the callbacks to the + // current firing batch? + if ( firing ) { + firingLength = list.length; + // With memory, if we're not firing then + // we should call right away, unless previous + // firing was halted (stopOnFalse) + } else if ( memory && memory !== true ) { + firingStart = length; + fire( memory[ 0 ], memory[ 1 ] ); + } + } + return this; + }, + // Remove a callback from the list + remove: function() { + if ( list ) { + var args = arguments, + argIndex = 0, + argLength = args.length; + for ( ; argIndex < argLength ; argIndex++ ) { + for ( var i = 0; i < list.length; i++ ) { + if ( args[ argIndex ] === list[ i ] ) { + // Handle firingIndex and firingLength + if ( firing ) { + if ( i <= firingLength ) { + firingLength--; + if ( i <= firingIndex ) { + firingIndex--; + } + } + } + // Remove the element + list.splice( i--, 1 ); + // If we have some unicity property then + // we only need to do this once + if ( flags.unique ) { + break; + } + } + } + } + } + return this; + }, + // Control if a given callback is in the list + has: function( fn ) { + if ( list ) { + var i = 0, + length = list.length; + for ( ; i < length; i++ ) { + if ( fn === list[ i ] ) { + return true; + } + } + } + return false; + }, + // Remove all callbacks from the list + empty: function() { + list = []; + return this; + }, + // Have the list do nothing anymore + disable: function() { + list = stack = memory = undefined; + return this; + }, + // Is it disabled? + disabled: function() { + return !list; + }, + // Lock the list in its current state + lock: function() { + stack = undefined; + if ( !memory || memory === true ) { + self.disable(); + } + return this; + }, + // Is it locked? + locked: function() { + return !stack; + }, + // Call all callbacks with the given context and arguments + fireWith: function( context, args ) { + if ( stack ) { + if ( firing ) { + if ( !flags.once ) { + stack.push( [ context, args ] ); + } + } else if ( !( flags.once && memory ) ) { + fire( context, args ); + } + } + return this; + }, + // Call all the callbacks with the given arguments + fire: function() { + self.fireWith( this, arguments ); + return this; + }, + // To know if the callbacks have already been called at least once + fired: function() { + return !!memory; + } + }; + + return self; +}; + + + + +var // Static reference to slice + sliceDeferred = [].slice; + +jQuery.extend({ + + Deferred: function( func ) { + var doneList = jQuery.Callbacks( "once memory" ), + failList = jQuery.Callbacks( "once memory" ), + progressList = jQuery.Callbacks( "memory" ), + state = "pending", + lists = { + resolve: doneList, + reject: failList, + notify: progressList + }, + promise = { + done: doneList.add, + fail: failList.add, + progress: progressList.add, + + state: function() { + return state; + }, + + // Deprecated + isResolved: doneList.fired, + isRejected: failList.fired, + + then: function( doneCallbacks, failCallbacks, progressCallbacks ) { + deferred.done( doneCallbacks ).fail( failCallbacks ).progress( progressCallbacks ); + return this; + }, + always: function() { + deferred.done.apply( deferred, arguments ).fail.apply( deferred, arguments ); + return this; + }, + pipe: function( fnDone, fnFail, fnProgress ) { + return jQuery.Deferred(function( newDefer ) { + jQuery.each( { + done: [ fnDone, "resolve" ], + fail: [ fnFail, "reject" ], + progress: [ fnProgress, "notify" ] + }, function( handler, data ) { + var fn = data[ 0 ], + action = data[ 1 ], + returned; + if ( jQuery.isFunction( fn ) ) { + deferred[ handler ](function() { + returned = fn.apply( this, arguments ); + if ( returned && jQuery.isFunction( returned.promise ) ) { + returned.promise().then( newDefer.resolve, newDefer.reject, newDefer.notify ); + } else { + newDefer[ action + "With" ]( this === deferred ? newDefer : this, [ returned ] ); + } + }); + } else { + deferred[ handler ]( newDefer[ action ] ); + } + }); + }).promise(); + }, + // Get a promise for this deferred + // If obj is provided, the promise aspect is added to the object + promise: function( obj ) { + if ( obj == null ) { + obj = promise; + } else { + for ( var key in promise ) { + obj[ key ] = promise[ key ]; + } + } + return obj; + } + }, + deferred = promise.promise({}), + key; + + for ( key in lists ) { + deferred[ key ] = lists[ key ].fire; + deferred[ key + "With" ] = lists[ key ].fireWith; + } + + // Handle state + deferred.done( function() { + state = "resolved"; + }, failList.disable, progressList.lock ).fail( function() { + state = "rejected"; + }, doneList.disable, progressList.lock ); + + // Call given func if any + if ( func ) { + func.call( deferred, deferred ); + } + + // All done! + return deferred; + }, + + // Deferred helper + when: function( firstParam ) { + var args = sliceDeferred.call( arguments, 0 ), + i = 0, + length = args.length, + pValues = new Array( length ), + count = length, + pCount = length, + deferred = length <= 1 && firstParam && jQuery.isFunction( firstParam.promise ) ? + firstParam : + jQuery.Deferred(), + promise = deferred.promise(); + function resolveFunc( i ) { + return function( value ) { + args[ i ] = arguments.length > 1 ? sliceDeferred.call( arguments, 0 ) : value; + if ( !( --count ) ) { + deferred.resolveWith( deferred, args ); + } + }; + } + function progressFunc( i ) { + return function( value ) { + pValues[ i ] = arguments.length > 1 ? sliceDeferred.call( arguments, 0 ) : value; + deferred.notifyWith( promise, pValues ); + }; + } + if ( length > 1 ) { + for ( ; i < length; i++ ) { + if ( args[ i ] && args[ i ].promise && jQuery.isFunction( args[ i ].promise ) ) { + args[ i ].promise().then( resolveFunc(i), deferred.reject, progressFunc(i) ); + } else { + --count; + } + } + if ( !count ) { + deferred.resolveWith( deferred, args ); + } + } else if ( deferred !== firstParam ) { + deferred.resolveWith( deferred, length ? [ firstParam ] : [] ); + } + return promise; + } +}); + + + + +jQuery.support = (function() { + + var support, + all, + a, + select, + opt, + input, + marginDiv, + fragment, + tds, + events, + eventName, + i, + isSupported, + div = document.createElement( "div" ), + documentElement = document.documentElement; + + // Preliminary tests + div.setAttribute("className", "t"); + div.innerHTML = " <link/><table></table><a href='/a' style='top:1px;float:left;opacity:.55;'>a</a><input type='checkbox'/>"; + + all = div.getElementsByTagName( "*" ); + a = div.getElementsByTagName( "a" )[ 0 ]; + + // Can't get basic test support + if ( !all || !all.length || !a ) { + return {}; + } + + // First batch of supports tests + select = document.createElement( "select" ); + opt = select.appendChild( document.createElement("option") ); + input = div.getElementsByTagName( "input" )[ 0 ]; + + support = { + // IE strips leading whitespace when .innerHTML is used + leadingWhitespace: ( div.firstChild.nodeType === 3 ), + + // Make sure that tbody elements aren't automatically inserted + // IE will insert them into empty tables + tbody: !div.getElementsByTagName("tbody").length, + + // Make sure that link elements get serialized correctly by innerHTML + // This requires a wrapper element in IE + htmlSerialize: !!div.getElementsByTagName("link").length, + + // Get the style information from getAttribute + // (IE uses .cssText instead) + style: /top/.test( a.getAttribute("style") ), + + // Make sure that URLs aren't manipulated + // (IE normalizes it by default) + hrefNormalized: ( a.getAttribute("href") === "/a" ), + + // Make sure that element opacity exists + // (IE uses filter instead) + // Use a regex to work around a WebKit issue. See #5145 + opacity: /^0.55/.test( a.style.opacity ), + + // Verify style float existence + // (IE uses styleFloat instead of cssFloat) + cssFloat: !!a.style.cssFloat, + + // Make sure that if no value is specified for a checkbox + // that it defaults to "on". + // (WebKit defaults to "" instead) + checkOn: ( input.value === "on" ), + + // Make sure that a selected-by-default option has a working selected property. + // (WebKit defaults to false instead of true, IE too, if it's in an optgroup) + optSelected: opt.selected, + + // Test setAttribute on camelCase class. If it works, we need attrFixes when doing get/setAttribute (ie6/7) + getSetAttribute: div.className !== "t", + + // Tests for enctype support on a form(#6743) + enctype: !!document.createElement("form").enctype, + + // Makes sure cloning an html5 element does not cause problems + // Where outerHTML is undefined, this still works + html5Clone: document.createElement("nav").cloneNode( true ).outerHTML !== "<:nav></:nav>", + + // Will be defined later + submitBubbles: true, + changeBubbles: true, + focusinBubbles: false, + deleteExpando: true, + noCloneEvent: true, + inlineBlockNeedsLayout: false, + shrinkWrapBlocks: false, + reliableMarginRight: true + }; + + // Make sure checked status is properly cloned + input.checked = true; + support.noCloneChecked = input.cloneNode( true ).checked; + + // Make sure that the options inside disabled selects aren't marked as disabled + // (WebKit marks them as disabled) + select.disabled = true; + support.optDisabled = !opt.disabled; + + // Test to see if it's possible to delete an expando from an element + // Fails in Internet Explorer + try { + delete div.test; + } catch( e ) { + support.deleteExpando = false; + } + + if ( !div.addEventListener && div.attachEvent && div.fireEvent ) { + div.attachEvent( "onclick", function() { + // Cloning a node shouldn't copy over any + // bound event handlers (IE does this) + support.noCloneEvent = false; + }); + div.cloneNode( true ).fireEvent( "onclick" ); + } + + // Check if a radio maintains its value + // after being appended to the DOM + input = document.createElement("input"); + input.value = "t"; + input.setAttribute("type", "radio"); + support.radioValue = input.value === "t"; + + input.setAttribute("checked", "checked"); + div.appendChild( input ); + fragment = document.createDocumentFragment(); + fragment.appendChild( div.lastChild ); + + // WebKit doesn't clone checked state correctly in fragments + support.checkClone = fragment.cloneNode( true ).cloneNode( true ).lastChild.checked; + + // Check if a disconnected checkbox will retain its checked + // value of true after appended to the DOM (IE6/7) + support.appendChecked = input.checked; + + fragment.removeChild( input ); + fragment.appendChild( div ); + + div.innerHTML = ""; + + // Check if div with explicit width and no margin-right incorrectly + // gets computed margin-right based on width of container. For more + // info see bug #3333 + // Fails in WebKit before Feb 2011 nightlies + // WebKit Bug 13343 - getComputedStyle returns wrong value for margin-right + if ( window.getComputedStyle ) { + marginDiv = document.createElement( "div" ); + marginDiv.style.width = "0"; + marginDiv.style.marginRight = "0"; + div.style.width = "2px"; + div.appendChild( marginDiv ); + support.reliableMarginRight = + ( parseInt( ( window.getComputedStyle( marginDiv, null ) || { marginRight: 0 } ).marginRight, 10 ) || 0 ) === 0; + } + + // Technique from Juriy Zaytsev + // http://perfectionkills.com/detecting-event-support-without-browser-sniffing/ + // We only care about the case where non-standard event systems + // are used, namely in IE. Short-circuiting here helps us to + // avoid an eval call (in setAttribute) which can cause CSP + // to go haywire. See: https://developer.mozilla.org/en/Security/CSP + if ( div.attachEvent ) { + for( i in { + submit: 1, + change: 1, + focusin: 1 + }) { + eventName = "on" + i; + isSupported = ( eventName in div ); + if ( !isSupported ) { + div.setAttribute( eventName, "return;" ); + isSupported = ( typeof div[ eventName ] === "function" ); + } + support[ i + "Bubbles" ] = isSupported; + } + } + + fragment.removeChild( div ); + + // Null elements to avoid leaks in IE + fragment = select = opt = marginDiv = div = input = null; + + // Run tests that need a body at doc ready + jQuery(function() { + var container, outer, inner, table, td, offsetSupport, + conMarginTop, ptlm, vb, style, html, + body = document.getElementsByTagName("body")[0]; + + if ( !body ) { + // Return for frameset docs that don't have a body + return; + } + + conMarginTop = 1; + ptlm = "position:absolute;top:0;left:0;width:1px;height:1px;margin:0;"; + vb = "visibility:hidden;border:0;"; + style = "style='" + ptlm + "border:5px solid #000;padding:0;'"; + html = "<div " + style + "><div></div></div>" + + "<table " + style + " cellpadding='0' cellspacing='0'>" + + "<tr><td></td></tr></table>"; + + container = document.createElement("div"); + container.style.cssText = vb + "width:0;height:0;position:static;top:0;margin-top:" + conMarginTop + "px"; + body.insertBefore( container, body.firstChild ); + + // Construct the test element + div = document.createElement("div"); + container.appendChild( div ); + + // Check if table cells still have offsetWidth/Height when they are set + // to display:none and there are still other visible table cells in a + // table row; if so, offsetWidth/Height are not reliable for use when + // determining if an element has been hidden directly using + // display:none (it is still safe to use offsets if a parent element is + // hidden; don safety goggles and see bug #4512 for more information). + // (only IE 8 fails this test) + div.innerHTML = "<table><tr><td style='padding:0;border:0;display:none'></td><td>t</td></tr></table>"; + tds = div.getElementsByTagName( "td" ); + isSupported = ( tds[ 0 ].offsetHeight === 0 ); + + tds[ 0 ].style.display = ""; + tds[ 1 ].style.display = "none"; + + // Check if empty table cells still have offsetWidth/Height + // (IE <= 8 fail this test) + support.reliableHiddenOffsets = isSupported && ( tds[ 0 ].offsetHeight === 0 ); + + // Figure out if the W3C box model works as expected + div.innerHTML = ""; + div.style.width = div.style.paddingLeft = "1px"; + jQuery.boxModel = support.boxModel = div.offsetWidth === 2; + + if ( typeof div.style.zoom !== "undefined" ) { + // Check if natively block-level elements act like inline-block + // elements when setting their display to 'inline' and giving + // them layout + // (IE < 8 does this) + div.style.display = "inline"; + div.style.zoom = 1; + support.inlineBlockNeedsLayout = ( div.offsetWidth === 2 ); + + // Check if elements with layout shrink-wrap their children + // (IE 6 does this) + div.style.display = ""; + div.innerHTML = "<div style='width:4px;'></div>"; + support.shrinkWrapBlocks = ( div.offsetWidth !== 2 ); + } + + div.style.cssText = ptlm + vb; + div.innerHTML = html; + + outer = div.firstChild; + inner = outer.firstChild; + td = outer.nextSibling.firstChild.firstChild; + + offsetSupport = { + doesNotAddBorder: ( inner.offsetTop !== 5 ), + doesAddBorderForTableAndCells: ( td.offsetTop === 5 ) + }; + + inner.style.position = "fixed"; + inner.style.top = "20px"; + + // safari subtracts parent border width here which is 5px + offsetSupport.fixedPosition = ( inner.offsetTop === 20 || inner.offsetTop === 15 ); + inner.style.position = inner.style.top = ""; + + outer.style.overflow = "hidden"; + outer.style.position = "relative"; + + offsetSupport.subtractsBorderForOverflowNotVisible = ( inner.offsetTop === -5 ); + offsetSupport.doesNotIncludeMarginInBodyOffset = ( body.offsetTop !== conMarginTop ); + + body.removeChild( container ); + div = container = null; + + jQuery.extend( support, offsetSupport ); + }); + + return support; +})(); + + + + +var rbrace = /^(?:\{.*\}|\[.*\])$/, + rmultiDash = /([A-Z])/g; + +jQuery.extend({ + cache: {}, + + // Please use with caution + uuid: 0, + + // Unique for each copy of jQuery on the page + // Non-digits removed to match rinlinejQuery + expando: "jQuery" + ( jQuery.fn.jquery + Math.random() ).replace( /\D/g, "" ), + + // The following elements throw uncatchable exceptions if you + // attempt to add expando properties to them. + noData: { + "embed": true, + // Ban all objects except for Flash (which handle expandos) + "object": "clsid:D27CDB6E-AE6D-11cf-96B8-444553540000", + "applet": true + }, + + hasData: function( elem ) { + elem = elem.nodeType ? jQuery.cache[ elem[jQuery.expando] ] : elem[ jQuery.expando ]; + return !!elem && !isEmptyDataObject( elem ); + }, + + data: function( elem, name, data, pvt /* Internal Use Only */ ) { + if ( !jQuery.acceptData( elem ) ) { + return; + } + + var privateCache, thisCache, ret, + internalKey = jQuery.expando, + getByName = typeof name === "string", + + // We have to handle DOM nodes and JS objects differently because IE6-7 + // can't GC object references properly across the DOM-JS boundary + isNode = elem.nodeType, + + // Only DOM nodes need the global jQuery cache; JS object data is + // attached directly to the object so GC can occur automatically + cache = isNode ? jQuery.cache : elem, + + // Only defining an ID for JS objects if its cache already exists allows + // the code to shortcut on the same path as a DOM node with no cache + id = isNode ? elem[ internalKey ] : elem[ internalKey ] && internalKey, + isEvents = name === "events"; + + // Avoid doing any more work than we need to when trying to get data on an + // object that has no data at all + if ( (!id || !cache[id] || (!isEvents && !pvt && !cache[id].data)) && getByName && data === undefined ) { + return; + } + + if ( !id ) { + // Only DOM nodes need a new unique ID for each element since their data + // ends up in the global cache + if ( isNode ) { + elem[ internalKey ] = id = ++jQuery.uuid; + } else { + id = internalKey; + } + } + + if ( !cache[ id ] ) { + cache[ id ] = {}; + + // Avoids exposing jQuery metadata on plain JS objects when the object + // is serialized using JSON.stringify + if ( !isNode ) { + cache[ id ].toJSON = jQuery.noop; + } + } + + // An object can be passed to jQuery.data instead of a key/value pair; this gets + // shallow copied over onto the existing cache + if ( typeof name === "object" || typeof name === "function" ) { + if ( pvt ) { + cache[ id ] = jQuery.extend( cache[ id ], name ); + } else { + cache[ id ].data = jQuery.extend( cache[ id ].data, name ); + } + } + + privateCache = thisCache = cache[ id ]; + + // jQuery data() is stored in a separate object inside the object's internal data + // cache in order to avoid key collisions between internal data and user-defined + // data. + if ( !pvt ) { + if ( !thisCache.data ) { + thisCache.data = {}; + } + + thisCache = thisCache.data; + } + + if ( data !== undefined ) { + thisCache[ jQuery.camelCase( name ) ] = data; + } + + // Users should not attempt to inspect the internal events object using jQuery.data, + // it is undocumented and subject to change. But does anyone listen? No. + if ( isEvents && !thisCache[ name ] ) { + return privateCache.events; + } + + // Check for both converted-to-camel and non-converted data property names + // If a data property was specified + if ( getByName ) { + + // First Try to find as-is property data + ret = thisCache[ name ]; + + // Test for null|undefined property data + if ( ret == null ) { + + // Try to find the camelCased property + ret = thisCache[ jQuery.camelCase( name ) ]; + } + } else { + ret = thisCache; + } + + return ret; + }, + + removeData: function( elem, name, pvt /* Internal Use Only */ ) { + if ( !jQuery.acceptData( elem ) ) { + return; + } + + var thisCache, i, l, + + // Reference to internal data cache key + internalKey = jQuery.expando, + + isNode = elem.nodeType, + + // See jQuery.data for more information + cache = isNode ? jQuery.cache : elem, + + // See jQuery.data for more information + id = isNode ? elem[ internalKey ] : internalKey; + + // If there is already no cache entry for this object, there is no + // purpose in continuing + if ( !cache[ id ] ) { + return; + } + + if ( name ) { + + thisCache = pvt ? cache[ id ] : cache[ id ].data; + + if ( thisCache ) { + + // Support array or space separated string names for data keys + if ( !jQuery.isArray( name ) ) { + + // try the string as a key before any manipulation + if ( name in thisCache ) { + name = [ name ]; + } else { + + // split the camel cased version by spaces unless a key with the spaces exists + name = jQuery.camelCase( name ); + if ( name in thisCache ) { + name = [ name ]; + } else { + name = name.split( " " ); + } + } + } + + for ( i = 0, l = name.length; i < l; i++ ) { + delete thisCache[ name[i] ]; + } + + // If there is no data left in the cache, we want to continue + // and let the cache object itself get destroyed + if ( !( pvt ? isEmptyDataObject : jQuery.isEmptyObject )( thisCache ) ) { + return; + } + } + } + + // See jQuery.data for more information + if ( !pvt ) { + delete cache[ id ].data; + + // Don't destroy the parent cache unless the internal data object + // had been the only thing left in it + if ( !isEmptyDataObject(cache[ id ]) ) { + return; + } + } + + // Browsers that fail expando deletion also refuse to delete expandos on + // the window, but it will allow it on all other JS objects; other browsers + // don't care + // Ensure that `cache` is not a window object #10080 + if ( jQuery.support.deleteExpando || !cache.setInterval ) { + delete cache[ id ]; + } else { + cache[ id ] = null; + } + + // We destroyed the cache and need to eliminate the expando on the node to avoid + // false lookups in the cache for entries that no longer exist + if ( isNode ) { + // IE does not allow us to delete expando properties from nodes, + // nor does it have a removeAttribute function on Document nodes; + // we must handle all of these cases + if ( jQuery.support.deleteExpando ) { + delete elem[ internalKey ]; + } else if ( elem.removeAttribute ) { + elem.removeAttribute( internalKey ); + } else { + elem[ internalKey ] = null; + } + } + }, + + // For internal use only. + _data: function( elem, name, data ) { + return jQuery.data( elem, name, data, true ); + }, + + // A method for determining if a DOM node can handle the data expando + acceptData: function( elem ) { + if ( elem.nodeName ) { + var match = jQuery.noData[ elem.nodeName.toLowerCase() ]; + + if ( match ) { + return !(match === true || elem.getAttribute("classid") !== match); + } + } + + return true; + } +}); + +jQuery.fn.extend({ + data: function( key, value ) { + var parts, attr, name, + data = null; + + if ( typeof key === "undefined" ) { + if ( this.length ) { + data = jQuery.data( this[0] ); + + if ( this[0].nodeType === 1 && !jQuery._data( this[0], "parsedAttrs" ) ) { + attr = this[0].attributes; + for ( var i = 0, l = attr.length; i < l; i++ ) { + name = attr[i].name; + + if ( name.indexOf( "data-" ) === 0 ) { + name = jQuery.camelCase( name.substring(5) ); + + dataAttr( this[0], name, data[ name ] ); + } + } + jQuery._data( this[0], "parsedAttrs", true ); + } + } + + return data; + + } else if ( typeof key === "object" ) { + return this.each(function() { + jQuery.data( this, key ); + }); + } + + parts = key.split("."); + parts[1] = parts[1] ? "." + parts[1] : ""; + + if ( value === undefined ) { + data = this.triggerHandler("getData" + parts[1] + "!", [parts[0]]); + + // Try to fetch any internally stored data first + if ( data === undefined && this.length ) { + data = jQuery.data( this[0], key ); + data = dataAttr( this[0], key, data ); + } + + return data === undefined && parts[1] ? + this.data( parts[0] ) : + data; + + } else { + return this.each(function() { + var self = jQuery( this ), + args = [ parts[0], value ]; + + self.triggerHandler( "setData" + parts[1] + "!", args ); + jQuery.data( this, key, value ); + self.triggerHandler( "changeData" + parts[1] + "!", args ); + }); + } + }, + + removeData: function( key ) { + return this.each(function() { + jQuery.removeData( this, key ); + }); + } +}); + +function dataAttr( elem, key, data ) { + // If nothing was found internally, try to fetch any + // data from the HTML5 data-* attribute + if ( data === undefined && elem.nodeType === 1 ) { + + var name = "data-" + key.replace( rmultiDash, "-$1" ).toLowerCase(); + + data = elem.getAttribute( name ); + + if ( typeof data === "string" ) { + try { + data = data === "true" ? true : + data === "false" ? false : + data === "null" ? null : + jQuery.isNumeric( data ) ? parseFloat( data ) : + rbrace.test( data ) ? jQuery.parseJSON( data ) : + data; + } catch( e ) {} + + // Make sure we set the data so it isn't changed later + jQuery.data( elem, key, data ); + + } else { + data = undefined; + } + } + + return data; +} + +// checks a cache object for emptiness +function isEmptyDataObject( obj ) { + for ( var name in obj ) { + + // if the public data object is empty, the private is still empty + if ( name === "data" && jQuery.isEmptyObject( obj[name] ) ) { + continue; + } + if ( name !== "toJSON" ) { + return false; + } + } + + return true; +} + + + + +function handleQueueMarkDefer( elem, type, src ) { + var deferDataKey = type + "defer", + queueDataKey = type + "queue", + markDataKey = type + "mark", + defer = jQuery._data( elem, deferDataKey ); + if ( defer && + ( src === "queue" || !jQuery._data(elem, queueDataKey) ) && + ( src === "mark" || !jQuery._data(elem, markDataKey) ) ) { + // Give room for hard-coded callbacks to fire first + // and eventually mark/queue something else on the element + setTimeout( function() { + if ( !jQuery._data( elem, queueDataKey ) && + !jQuery._data( elem, markDataKey ) ) { + jQuery.removeData( elem, deferDataKey, true ); + defer.fire(); + } + }, 0 ); + } +} + +jQuery.extend({ + + _mark: function( elem, type ) { + if ( elem ) { + type = ( type || "fx" ) + "mark"; + jQuery._data( elem, type, (jQuery._data( elem, type ) || 0) + 1 ); + } + }, + + _unmark: function( force, elem, type ) { + if ( force !== true ) { + type = elem; + elem = force; + force = false; + } + if ( elem ) { + type = type || "fx"; + var key = type + "mark", + count = force ? 0 : ( (jQuery._data( elem, key ) || 1) - 1 ); + if ( count ) { + jQuery._data( elem, key, count ); + } else { + jQuery.removeData( elem, key, true ); + handleQueueMarkDefer( elem, type, "mark" ); + } + } + }, + + queue: function( elem, type, data ) { + var q; + if ( elem ) { + type = ( type || "fx" ) + "queue"; + q = jQuery._data( elem, type ); + + // Speed up dequeue by getting out quickly if this is just a lookup + if ( data ) { + if ( !q || jQuery.isArray(data) ) { + q = jQuery._data( elem, type, jQuery.makeArray(data) ); + } else { + q.push( data ); + } + } + return q || []; + } + }, + + dequeue: function( elem, type ) { + type = type || "fx"; + + var queue = jQuery.queue( elem, type ), + fn = queue.shift(), + hooks = {}; + + // If the fx queue is dequeued, always remove the progress sentinel + if ( fn === "inprogress" ) { + fn = queue.shift(); + } + + if ( fn ) { + // Add a progress sentinel to prevent the fx queue from being + // automatically dequeued + if ( type === "fx" ) { + queue.unshift( "inprogress" ); + } + + jQuery._data( elem, type + ".run", hooks ); + fn.call( elem, function() { + jQuery.dequeue( elem, type ); + }, hooks ); + } + + if ( !queue.length ) { + jQuery.removeData( elem, type + "queue " + type + ".run", true ); + handleQueueMarkDefer( elem, type, "queue" ); + } + } +}); + +jQuery.fn.extend({ + queue: function( type, data ) { + if ( typeof type !== "string" ) { + data = type; + type = "fx"; + } + + if ( data === undefined ) { + return jQuery.queue( this[0], type ); + } + return this.each(function() { + var queue = jQuery.queue( this, type, data ); + + if ( type === "fx" && queue[0] !== "inprogress" ) { + jQuery.dequeue( this, type ); + } + }); + }, + dequeue: function( type ) { + return this.each(function() { + jQuery.dequeue( this, type ); + }); + }, + // Based off of the plugin by Clint Helfers, with permission. + // http://blindsignals.com/index.php/2009/07/jquery-delay/ + delay: function( time, type ) { + time = jQuery.fx ? jQuery.fx.speeds[ time ] || time : time; + type = type || "fx"; + + return this.queue( type, function( next, hooks ) { + var timeout = setTimeout( next, time ); + hooks.stop = function() { + clearTimeout( timeout ); + }; + }); + }, + clearQueue: function( type ) { + return this.queue( type || "fx", [] ); + }, + // Get a promise resolved when queues of a certain type + // are emptied (fx is the type by default) + promise: function( type, object ) { + if ( typeof type !== "string" ) { + object = type; + type = undefined; + } + type = type || "fx"; + var defer = jQuery.Deferred(), + elements = this, + i = elements.length, + count = 1, + deferDataKey = type + "defer", + queueDataKey = type + "queue", + markDataKey = type + "mark", + tmp; + function resolve() { + if ( !( --count ) ) { + defer.resolveWith( elements, [ elements ] ); + } + } + while( i-- ) { + if (( tmp = jQuery.data( elements[ i ], deferDataKey, undefined, true ) || + ( jQuery.data( elements[ i ], queueDataKey, undefined, true ) || + jQuery.data( elements[ i ], markDataKey, undefined, true ) ) && + jQuery.data( elements[ i ], deferDataKey, jQuery.Callbacks( "once memory" ), true ) )) { + count++; + tmp.add( resolve ); + } + } + resolve(); + return defer.promise(); + } +}); + + + + +var rclass = /[\n\t\r]/g, + rspace = /\s+/, + rreturn = /\r/g, + rtype = /^(?:button|input)$/i, + rfocusable = /^(?:button|input|object|select|textarea)$/i, + rclickable = /^a(?:rea)?$/i, + rboolean = /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i, + getSetAttribute = jQuery.support.getSetAttribute, + nodeHook, boolHook, fixSpecified; + +jQuery.fn.extend({ + attr: function( name, value ) { + return jQuery.access( this, name, value, true, jQuery.attr ); + }, + + removeAttr: function( name ) { + return this.each(function() { + jQuery.removeAttr( this, name ); + }); + }, + + prop: function( name, value ) { + return jQuery.access( this, name, value, true, jQuery.prop ); + }, + + removeProp: function( name ) { + name = jQuery.propFix[ name ] || name; + return this.each(function() { + // try/catch handles cases where IE balks (such as removing a property on window) + try { + this[ name ] = undefined; + delete this[ name ]; + } catch( e ) {} + }); + }, + + addClass: function( value ) { + var classNames, i, l, elem, + setClass, c, cl; + + if ( jQuery.isFunction( value ) ) { + return this.each(function( j ) { + jQuery( this ).addClass( value.call(this, j, this.className) ); + }); + } + + if ( value && typeof value === "string" ) { + classNames = value.split( rspace ); + + for ( i = 0, l = this.length; i < l; i++ ) { + elem = this[ i ]; + + if ( elem.nodeType === 1 ) { + if ( !elem.className && classNames.length === 1 ) { + elem.className = value; + + } else { + setClass = " " + elem.className + " "; + + for ( c = 0, cl = classNames.length; c < cl; c++ ) { + if ( !~setClass.indexOf( " " + classNames[ c ] + " " ) ) { + setClass += classNames[ c ] + " "; + } + } + elem.className = jQuery.trim( setClass ); + } + } + } + } + + return this; + }, + + removeClass: function( value ) { + var classNames, i, l, elem, className, c, cl; + + if ( jQuery.isFunction( value ) ) { + return this.each(function( j ) { + jQuery( this ).removeClass( value.call(this, j, this.className) ); + }); + } + + if ( (value && typeof value === "string") || value === undefined ) { + classNames = ( value || "" ).split( rspace ); + + for ( i = 0, l = this.length; i < l; i++ ) { + elem = this[ i ]; + + if ( elem.nodeType === 1 && elem.className ) { + if ( value ) { + className = (" " + elem.className + " ").replace( rclass, " " ); + for ( c = 0, cl = classNames.length; c < cl; c++ ) { + className = className.replace(" " + classNames[ c ] + " ", " "); + } + elem.className = jQuery.trim( className ); + + } else { + elem.className = ""; + } + } + } + } + + return this; + }, + + toggleClass: function( value, stateVal ) { + var type = typeof value, + isBool = typeof stateVal === "boolean"; + + if ( jQuery.isFunction( value ) ) { + return this.each(function( i ) { + jQuery( this ).toggleClass( value.call(this, i, this.className, stateVal), stateVal ); + }); + } + + return this.each(function() { + if ( type === "string" ) { + // toggle individual class names + var className, + i = 0, + self = jQuery( this ), + state = stateVal, + classNames = value.split( rspace ); + + while ( (className = classNames[ i++ ]) ) { + // check each className given, space seperated list + state = isBool ? state : !self.hasClass( className ); + self[ state ? "addClass" : "removeClass" ]( className ); + } + + } else if ( type === "undefined" || type === "boolean" ) { + if ( this.className ) { + // store className if set + jQuery._data( this, "__className__", this.className ); + } + + // toggle whole className + this.className = this.className || value === false ? "" : jQuery._data( this, "__className__" ) || ""; + } + }); + }, + + hasClass: function( selector ) { + var className = " " + selector + " ", + i = 0, + l = this.length; + for ( ; i < l; i++ ) { + if ( this[i].nodeType === 1 && (" " + this[i].className + " ").replace(rclass, " ").indexOf( className ) > -1 ) { + return true; + } + } + + return false; + }, + + val: function( value ) { + var hooks, ret, isFunction, + elem = this[0]; + + if ( !arguments.length ) { + if ( elem ) { + hooks = jQuery.valHooks[ elem.nodeName.toLowerCase() ] || jQuery.valHooks[ elem.type ]; + + if ( hooks && "get" in hooks && (ret = hooks.get( elem, "value" )) !== undefined ) { + return ret; + } + + ret = elem.value; + + return typeof ret === "string" ? + // handle most common string cases + ret.replace(rreturn, "") : + // handle cases where value is null/undef or number + ret == null ? "" : ret; + } + + return; + } + + isFunction = jQuery.isFunction( value ); + + return this.each(function( i ) { + var self = jQuery(this), val; + + if ( this.nodeType !== 1 ) { + return; + } + + if ( isFunction ) { + val = value.call( this, i, self.val() ); + } else { + val = value; + } + + // Treat null/undefined as ""; convert numbers to string + if ( val == null ) { + val = ""; + } else if ( typeof val === "number" ) { + val += ""; + } else if ( jQuery.isArray( val ) ) { + val = jQuery.map(val, function ( value ) { + return value == null ? "" : value + ""; + }); + } + + hooks = jQuery.valHooks[ this.nodeName.toLowerCase() ] || jQuery.valHooks[ this.type ]; + + // If set returns undefined, fall back to normal setting + if ( !hooks || !("set" in hooks) || hooks.set( this, val, "value" ) === undefined ) { + this.value = val; + } + }); + } +}); + +jQuery.extend({ + valHooks: { + option: { + get: function( elem ) { + // attributes.value is undefined in Blackberry 4.7 but + // uses .value. See #6932 + var val = elem.attributes.value; + return !val || val.specified ? elem.value : elem.text; + } + }, + select: { + get: function( elem ) { + var value, i, max, option, + index = elem.selectedIndex, + values = [], + options = elem.options, + one = elem.type === "select-one"; + + // Nothing was selected + if ( index < 0 ) { + return null; + } + + // Loop through all the selected options + i = one ? index : 0; + max = one ? index + 1 : options.length; + for ( ; i < max; i++ ) { + option = options[ i ]; + + // Don't return options that are disabled or in a disabled optgroup + if ( option.selected && (jQuery.support.optDisabled ? !option.disabled : option.getAttribute("disabled") === null) && + (!option.parentNode.disabled || !jQuery.nodeName( option.parentNode, "optgroup" )) ) { + + // Get the specific value for the option + value = jQuery( option ).val(); + + // We don't need an array for one selects + if ( one ) { + return value; + } + + // Multi-Selects return an array + values.push( value ); + } + } + + // Fixes Bug #2551 -- select.val() broken in IE after form.reset() + if ( one && !values.length && options.length ) { + return jQuery( options[ index ] ).val(); + } + + return values; + }, + + set: function( elem, value ) { + var values = jQuery.makeArray( value ); + + jQuery(elem).find("option").each(function() { + this.selected = jQuery.inArray( jQuery(this).val(), values ) >= 0; + }); + + if ( !values.length ) { + elem.selectedIndex = -1; + } + return values; + } + } + }, + + attrFn: { + val: true, + css: true, + html: true, + text: true, + data: true, + width: true, + height: true, + offset: true + }, + + attr: function( elem, name, value, pass ) { + var ret, hooks, notxml, + nType = elem.nodeType; + + // don't get/set attributes on text, comment and attribute nodes + if ( !elem || nType === 3 || nType === 8 || nType === 2 ) { + return; + } + + if ( pass && name in jQuery.attrFn ) { + return jQuery( elem )[ name ]( value ); + } + + // Fallback to prop when attributes are not supported + if ( typeof elem.getAttribute === "undefined" ) { + return jQuery.prop( elem, name, value ); + } + + notxml = nType !== 1 || !jQuery.isXMLDoc( elem ); + + // All attributes are lowercase + // Grab necessary hook if one is defined + if ( notxml ) { + name = name.toLowerCase(); + hooks = jQuery.attrHooks[ name ] || ( rboolean.test( name ) ? boolHook : nodeHook ); + } + + if ( value !== undefined ) { + + if ( value === null ) { + jQuery.removeAttr( elem, name ); + return; + + } else if ( hooks && "set" in hooks && notxml && (ret = hooks.set( elem, value, name )) !== undefined ) { + return ret; + + } else { + elem.setAttribute( name, "" + value ); + return value; + } + + } else if ( hooks && "get" in hooks && notxml && (ret = hooks.get( elem, name )) !== null ) { + return ret; + + } else { + + ret = elem.getAttribute( name ); + + // Non-existent attributes return null, we normalize to undefined + return ret === null ? + undefined : + ret; + } + }, + + removeAttr: function( elem, value ) { + var propName, attrNames, name, l, + i = 0; + + if ( value && elem.nodeType === 1 ) { + attrNames = value.toLowerCase().split( rspace ); + l = attrNames.length; + + for ( ; i < l; i++ ) { + name = attrNames[ i ]; + + if ( name ) { + propName = jQuery.propFix[ name ] || name; + + // See #9699 for explanation of this approach (setting first, then removal) + jQuery.attr( elem, name, "" ); + elem.removeAttribute( getSetAttribute ? name : propName ); + + // Set corresponding property to false for boolean attributes + if ( rboolean.test( name ) && propName in elem ) { + elem[ propName ] = false; + } + } + } + } + }, + + attrHooks: { + type: { + set: function( elem, value ) { + // We can't allow the type property to be changed (since it causes problems in IE) + if ( rtype.test( elem.nodeName ) && elem.parentNode ) { + jQuery.error( "type property can't be changed" ); + } else if ( !jQuery.support.radioValue && value === "radio" && jQuery.nodeName(elem, "input") ) { + // Setting the type on a radio button after the value resets the value in IE6-9 + // Reset value to it's default in case type is set after value + // This is for element creation + var val = elem.value; + elem.setAttribute( "type", value ); + if ( val ) { + elem.value = val; + } + return value; + } + } + }, + // Use the value property for back compat + // Use the nodeHook for button elements in IE6/7 (#1954) + value: { + get: function( elem, name ) { + if ( nodeHook && jQuery.nodeName( elem, "button" ) ) { + return nodeHook.get( elem, name ); + } + return name in elem ? + elem.value : + null; + }, + set: function( elem, value, name ) { + if ( nodeHook && jQuery.nodeName( elem, "button" ) ) { + return nodeHook.set( elem, value, name ); + } + // Does not return so that setAttribute is also used + elem.value = value; + } + } + }, + + propFix: { + tabindex: "tabIndex", + readonly: "readOnly", + "for": "htmlFor", + "class": "className", + maxlength: "maxLength", + cellspacing: "cellSpacing", + cellpadding: "cellPadding", + rowspan: "rowSpan", + colspan: "colSpan", + usemap: "useMap", + frameborder: "frameBorder", + contenteditable: "contentEditable" + }, + + prop: function( elem, name, value ) { + var ret, hooks, notxml, + nType = elem.nodeType; + + // don't get/set properties on text, comment and attribute nodes + if ( !elem || nType === 3 || nType === 8 || nType === 2 ) { + return; + } + + notxml = nType !== 1 || !jQuery.isXMLDoc( elem ); + + if ( notxml ) { + // Fix name and attach hooks + name = jQuery.propFix[ name ] || name; + hooks = jQuery.propHooks[ name ]; + } + + if ( value !== undefined ) { + if ( hooks && "set" in hooks && (ret = hooks.set( elem, value, name )) !== undefined ) { + return ret; + + } else { + return ( elem[ name ] = value ); + } + + } else { + if ( hooks && "get" in hooks && (ret = hooks.get( elem, name )) !== null ) { + return ret; + + } else { + return elem[ name ]; + } + } + }, + + propHooks: { + tabIndex: { + get: function( elem ) { + // elem.tabIndex doesn't always return the correct value when it hasn't been explicitly set + // http://fluidproject.org/blog/2008/01/09/getting-setting-and-removing-tabindex-values-with-javascript/ + var attributeNode = elem.getAttributeNode("tabindex"); + + return attributeNode && attributeNode.specified ? + parseInt( attributeNode.value, 10 ) : + rfocusable.test( elem.nodeName ) || rclickable.test( elem.nodeName ) && elem.href ? + 0 : + undefined; + } + } + } +}); + +// Add the tabIndex propHook to attrHooks for back-compat (different case is intentional) +jQuery.attrHooks.tabindex = jQuery.propHooks.tabIndex; + +// Hook for boolean attributes +boolHook = { + get: function( elem, name ) { + // Align boolean attributes with corresponding properties + // Fall back to attribute presence where some booleans are not supported + var attrNode, + property = jQuery.prop( elem, name ); + return property === true || typeof property !== "boolean" && ( attrNode = elem.getAttributeNode(name) ) && attrNode.nodeValue !== false ? + name.toLowerCase() : + undefined; + }, + set: function( elem, value, name ) { + var propName; + if ( value === false ) { + // Remove boolean attributes when set to false + jQuery.removeAttr( elem, name ); + } else { + // value is true since we know at this point it's type boolean and not false + // Set boolean attributes to the same name and set the DOM property + propName = jQuery.propFix[ name ] || name; + if ( propName in elem ) { + // Only set the IDL specifically if it already exists on the element + elem[ propName ] = true; + } + + elem.setAttribute( name, name.toLowerCase() ); + } + return name; + } +}; + +// IE6/7 do not support getting/setting some attributes with get/setAttribute +if ( !getSetAttribute ) { + + fixSpecified = { + name: true, + id: true + }; + + // Use this for any attribute in IE6/7 + // This fixes almost every IE6/7 issue + nodeHook = jQuery.valHooks.button = { + get: function( elem, name ) { + var ret; + ret = elem.getAttributeNode( name ); + return ret && ( fixSpecified[ name ] ? ret.nodeValue !== "" : ret.specified ) ? + ret.nodeValue : + undefined; + }, + set: function( elem, value, name ) { + // Set the existing or create a new attribute node + var ret = elem.getAttributeNode( name ); + if ( !ret ) { + ret = document.createAttribute( name ); + elem.setAttributeNode( ret ); + } + return ( ret.nodeValue = value + "" ); + } + }; + + // Apply the nodeHook to tabindex + jQuery.attrHooks.tabindex.set = nodeHook.set; + + // Set width and height to auto instead of 0 on empty string( Bug #8150 ) + // This is for removals + jQuery.each([ "width", "height" ], function( i, name ) { + jQuery.attrHooks[ name ] = jQuery.extend( jQuery.attrHooks[ name ], { + set: function( elem, value ) { + if ( value === "" ) { + elem.setAttribute( name, "auto" ); + return value; + } + } + }); + }); + + // Set contenteditable to false on removals(#10429) + // Setting to empty string throws an error as an invalid value + jQuery.attrHooks.contenteditable = { + get: nodeHook.get, + set: function( elem, value, name ) { + if ( value === "" ) { + value = "false"; + } + nodeHook.set( elem, value, name ); + } + }; +} + + +// Some attributes require a special call on IE +if ( !jQuery.support.hrefNormalized ) { + jQuery.each([ "href", "src", "width", "height" ], function( i, name ) { + jQuery.attrHooks[ name ] = jQuery.extend( jQuery.attrHooks[ name ], { + get: function( elem ) { + var ret = elem.getAttribute( name, 2 ); + return ret === null ? undefined : ret; + } + }); + }); +} + +if ( !jQuery.support.style ) { + jQuery.attrHooks.style = { + get: function( elem ) { + // Return undefined in the case of empty string + // Normalize to lowercase since IE uppercases css property names + return elem.style.cssText.toLowerCase() || undefined; + }, + set: function( elem, value ) { + return ( elem.style.cssText = "" + value ); + } + }; +} + +// Safari mis-reports the default selected property of an option +// Accessing the parent's selectedIndex property fixes it +if ( !jQuery.support.optSelected ) { + jQuery.propHooks.selected = jQuery.extend( jQuery.propHooks.selected, { + get: function( elem ) { + var parent = elem.parentNode; + + if ( parent ) { + parent.selectedIndex; + + // Make sure that it also works with optgroups, see #5701 + if ( parent.parentNode ) { + parent.parentNode.selectedIndex; + } + } + return null; + } + }); +} + +// IE6/7 call enctype encoding +if ( !jQuery.support.enctype ) { + jQuery.propFix.enctype = "encoding"; +} + +// Radios and checkboxes getter/setter +if ( !jQuery.support.checkOn ) { + jQuery.each([ "radio", "checkbox" ], function() { + jQuery.valHooks[ this ] = { + get: function( elem ) { + // Handle the case where in Webkit "" is returned instead of "on" if a value isn't specified + return elem.getAttribute("value") === null ? "on" : elem.value; + } + }; + }); +} +jQuery.each([ "radio", "checkbox" ], function() { + jQuery.valHooks[ this ] = jQuery.extend( jQuery.valHooks[ this ], { + set: function( elem, value ) { + if ( jQuery.isArray( value ) ) { + return ( elem.checked = jQuery.inArray( jQuery(elem).val(), value ) >= 0 ); + } + } + }); +}); + + + + +var rformElems = /^(?:textarea|input|select)$/i, + rtypenamespace = /^([^\.]*)?(?:\.(.+))?$/, + rhoverHack = /\bhover(\.\S+)?\b/, + rkeyEvent = /^key/, + rmouseEvent = /^(?:mouse|contextmenu)|click/, + rfocusMorph = /^(?:focusinfocus|focusoutblur)$/, + rquickIs = /^(\w*)(?:#([\w\-]+))?(?:\.([\w\-]+))?$/, + quickParse = function( selector ) { + var quick = rquickIs.exec( selector ); + if ( quick ) { + // 0 1 2 3 + // [ _, tag, id, class ] + quick[1] = ( quick[1] || "" ).toLowerCase(); + quick[3] = quick[3] && new RegExp( "(?:^|\\s)" + quick[3] + "(?:\\s|$)" ); + } + return quick; + }, + quickIs = function( elem, m ) { + var attrs = elem.attributes || {}; + return ( + (!m[1] || elem.nodeName.toLowerCase() === m[1]) && + (!m[2] || (attrs.id || {}).value === m[2]) && + (!m[3] || m[3].test( (attrs[ "class" ] || {}).value )) + ); + }, + hoverHack = function( events ) { + return jQuery.event.special.hover ? events : events.replace( rhoverHack, "mouseenter$1 mouseleave$1" ); + }; + +/* + * Helper functions for managing events -- not part of the public interface. + * Props to Dean Edwards' addEvent library for many of the ideas. + */ +jQuery.event = { + + add: function( elem, types, handler, data, selector ) { + + var elemData, eventHandle, events, + t, tns, type, namespaces, handleObj, + handleObjIn, quick, handlers, special; + + // Don't attach events to noData or text/comment nodes (allow plain objects tho) + if ( elem.nodeType === 3 || elem.nodeType === 8 || !types || !handler || !(elemData = jQuery._data( elem )) ) { + return; + } + + // Caller can pass in an object of custom data in lieu of the handler + if ( handler.handler ) { + handleObjIn = handler; + handler = handleObjIn.handler; + } + + // Make sure that the handler has a unique ID, used to find/remove it later + if ( !handler.guid ) { + handler.guid = jQuery.guid++; + } + + // Init the element's event structure and main handler, if this is the first + events = elemData.events; + if ( !events ) { + elemData.events = events = {}; + } + eventHandle = elemData.handle; + if ( !eventHandle ) { + elemData.handle = eventHandle = function( e ) { + // Discard the second event of a jQuery.event.trigger() and + // when an event is called after a page has unloaded + return typeof jQuery !== "undefined" && (!e || jQuery.event.triggered !== e.type) ? + jQuery.event.dispatch.apply( eventHandle.elem, arguments ) : + undefined; + }; + // Add elem as a property of the handle fn to prevent a memory leak with IE non-native events + eventHandle.elem = elem; + } + + // Handle multiple events separated by a space + // jQuery(...).bind("mouseover mouseout", fn); + types = jQuery.trim( hoverHack(types) ).split( " " ); + for ( t = 0; t < types.length; t++ ) { + + tns = rtypenamespace.exec( types[t] ) || []; + type = tns[1]; + namespaces = ( tns[2] || "" ).split( "." ).sort(); + + // If event changes its type, use the special event handlers for the changed type + special = jQuery.event.special[ type ] || {}; + + // If selector defined, determine special event api type, otherwise given type + type = ( selector ? special.delegateType : special.bindType ) || type; + + // Update special based on newly reset type + special = jQuery.event.special[ type ] || {}; + + // handleObj is passed to all event handlers + handleObj = jQuery.extend({ + type: type, + origType: tns[1], + data: data, + handler: handler, + guid: handler.guid, + selector: selector, + quick: quickParse( selector ), + namespace: namespaces.join(".") + }, handleObjIn ); + + // Init the event handler queue if we're the first + handlers = events[ type ]; + if ( !handlers ) { + handlers = events[ type ] = []; + handlers.delegateCount = 0; + + // Only use addEventListener/attachEvent if the special events handler returns false + if ( !special.setup || special.setup.call( elem, data, namespaces, eventHandle ) === false ) { + // Bind the global event handler to the element + if ( elem.addEventListener ) { + elem.addEventListener( type, eventHandle, false ); + + } else if ( elem.attachEvent ) { + elem.attachEvent( "on" + type, eventHandle ); + } + } + } + + if ( special.add ) { + special.add.call( elem, handleObj ); + + if ( !handleObj.handler.guid ) { + handleObj.handler.guid = handler.guid; + } + } + + // Add to the element's handler list, delegates in front + if ( selector ) { + handlers.splice( handlers.delegateCount++, 0, handleObj ); + } else { + handlers.push( handleObj ); + } + + // Keep track of which events have ever been used, for event optimization + jQuery.event.global[ type ] = true; + } + + // Nullify elem to prevent memory leaks in IE + elem = null; + }, + + global: {}, + + // Detach an event or set of events from an element + remove: function( elem, types, handler, selector, mappedTypes ) { + + var elemData = jQuery.hasData( elem ) && jQuery._data( elem ), + t, tns, type, origType, namespaces, origCount, + j, events, special, handle, eventType, handleObj; + + if ( !elemData || !(events = elemData.events) ) { + return; + } + + // Once for each type.namespace in types; type may be omitted + types = jQuery.trim( hoverHack( types || "" ) ).split(" "); + for ( t = 0; t < types.length; t++ ) { + tns = rtypenamespace.exec( types[t] ) || []; + type = origType = tns[1]; + namespaces = tns[2]; + + // Unbind all events (on this namespace, if provided) for the element + if ( !type ) { + for ( type in events ) { + jQuery.event.remove( elem, type + types[ t ], handler, selector, true ); + } + continue; + } + + special = jQuery.event.special[ type ] || {}; + type = ( selector? special.delegateType : special.bindType ) || type; + eventType = events[ type ] || []; + origCount = eventType.length; + namespaces = namespaces ? new RegExp("(^|\\.)" + namespaces.split(".").sort().join("\\.(?:.*\\.)?") + "(\\.|$)") : null; + + // Remove matching events + for ( j = 0; j < eventType.length; j++ ) { + handleObj = eventType[ j ]; + + if ( ( mappedTypes || origType === handleObj.origType ) && + ( !handler || handler.guid === handleObj.guid ) && + ( !namespaces || namespaces.test( handleObj.namespace ) ) && + ( !selector || selector === handleObj.selector || selector === "**" && handleObj.selector ) ) { + eventType.splice( j--, 1 ); + + if ( handleObj.selector ) { + eventType.delegateCount--; + } + if ( special.remove ) { + special.remove.call( elem, handleObj ); + } + } + } + + // Remove generic event handler if we removed something and no more handlers exist + // (avoids potential for endless recursion during removal of special event handlers) + if ( eventType.length === 0 && origCount !== eventType.length ) { + if ( !special.teardown || special.teardown.call( elem, namespaces ) === false ) { + jQuery.removeEvent( elem, type, elemData.handle ); + } + + delete events[ type ]; + } + } + + // Remove the expando if it's no longer used + if ( jQuery.isEmptyObject( events ) ) { + handle = elemData.handle; + if ( handle ) { + handle.elem = null; + } + + // removeData also checks for emptiness and clears the expando if empty + // so use it instead of delete + jQuery.removeData( elem, [ "events", "handle" ], true ); + } + }, + + // Events that are safe to short-circuit if no handlers are attached. + // Native DOM events should not be added, they may have inline handlers. + customEvent: { + "getData": true, + "setData": true, + "changeData": true + }, + + trigger: function( event, data, elem, onlyHandlers ) { + // Don't do events on text and comment nodes + if ( elem && (elem.nodeType === 3 || elem.nodeType === 8) ) { + return; + } + + // Event object or event type + var type = event.type || event, + namespaces = [], + cache, exclusive, i, cur, old, ontype, special, handle, eventPath, bubbleType; + + // focus/blur morphs to focusin/out; ensure we're not firing them right now + if ( rfocusMorph.test( type + jQuery.event.triggered ) ) { + return; + } + + if ( type.indexOf( "!" ) >= 0 ) { + // Exclusive events trigger only for the exact event (no namespaces) + type = type.slice(0, -1); + exclusive = true; + } + + if ( type.indexOf( "." ) >= 0 ) { + // Namespaced trigger; create a regexp to match event type in handle() + namespaces = type.split("."); + type = namespaces.shift(); + namespaces.sort(); + } + + if ( (!elem || jQuery.event.customEvent[ type ]) && !jQuery.event.global[ type ] ) { + // No jQuery handlers for this event type, and it can't have inline handlers + return; + } + + // Caller can pass in an Event, Object, or just an event type string + event = typeof event === "object" ? + // jQuery.Event object + event[ jQuery.expando ] ? event : + // Object literal + new jQuery.Event( type, event ) : + // Just the event type (string) + new jQuery.Event( type ); + + event.type = type; + event.isTrigger = true; + event.exclusive = exclusive; + event.namespace = namespaces.join( "." ); + event.namespace_re = event.namespace? new RegExp("(^|\\.)" + namespaces.join("\\.(?:.*\\.)?") + "(\\.|$)") : null; + ontype = type.indexOf( ":" ) < 0 ? "on" + type : ""; + + // Handle a global trigger + if ( !elem ) { + + // TODO: Stop taunting the data cache; remove global events and always attach to document + cache = jQuery.cache; + for ( i in cache ) { + if ( cache[ i ].events && cache[ i ].events[ type ] ) { + jQuery.event.trigger( event, data, cache[ i ].handle.elem, true ); + } + } + return; + } + + // Clean up the event in case it is being reused + event.result = undefined; + if ( !event.target ) { + event.target = elem; + } + + // Clone any incoming data and prepend the event, creating the handler arg list + data = data != null ? jQuery.makeArray( data ) : []; + data.unshift( event ); + + // Allow special events to draw outside the lines + special = jQuery.event.special[ type ] || {}; + if ( special.trigger && special.trigger.apply( elem, data ) === false ) { + return; + } + + // Determine event propagation path in advance, per W3C events spec (#9951) + // Bubble up to document, then to window; watch for a global ownerDocument var (#9724) + eventPath = [[ elem, special.bindType || type ]]; + if ( !onlyHandlers && !special.noBubble && !jQuery.isWindow( elem ) ) { + + bubbleType = special.delegateType || type; + cur = rfocusMorph.test( bubbleType + type ) ? elem : elem.parentNode; + old = null; + for ( ; cur; cur = cur.parentNode ) { + eventPath.push([ cur, bubbleType ]); + old = cur; + } + + // Only add window if we got to document (e.g., not plain obj or detached DOM) + if ( old && old === elem.ownerDocument ) { + eventPath.push([ old.defaultView || old.parentWindow || window, bubbleType ]); + } + } + + // Fire handlers on the event path + for ( i = 0; i < eventPath.length && !event.isPropagationStopped(); i++ ) { + + cur = eventPath[i][0]; + event.type = eventPath[i][1]; + + handle = ( jQuery._data( cur, "events" ) || {} )[ event.type ] && jQuery._data( cur, "handle" ); + if ( handle ) { + handle.apply( cur, data ); + } + // Note that this is a bare JS function and not a jQuery handler + handle = ontype && cur[ ontype ]; + if ( handle && jQuery.acceptData( cur ) && handle.apply( cur, data ) === false ) { + event.preventDefault(); + } + } + event.type = type; + + // If nobody prevented the default action, do it now + if ( !onlyHandlers && !event.isDefaultPrevented() ) { + + if ( (!special._default || special._default.apply( elem.ownerDocument, data ) === false) && + !(type === "click" && jQuery.nodeName( elem, "a" )) && jQuery.acceptData( elem ) ) { + + // Call a native DOM method on the target with the same name name as the event. + // Can't use an .isFunction() check here because IE6/7 fails that test. + // Don't do default actions on window, that's where global variables be (#6170) + // IE<9 dies on focus/blur to hidden element (#1486) + if ( ontype && elem[ type ] && ((type !== "focus" && type !== "blur") || event.target.offsetWidth !== 0) && !jQuery.isWindow( elem ) ) { + + // Don't re-trigger an onFOO event when we call its FOO() method + old = elem[ ontype ]; + + if ( old ) { + elem[ ontype ] = null; + } + + // Prevent re-triggering of the same event, since we already bubbled it above + jQuery.event.triggered = type; + elem[ type ](); + jQuery.event.triggered = undefined; + + if ( old ) { + elem[ ontype ] = old; + } + } + } + } + + return event.result; + }, + + dispatch: function( event ) { + + // Make a writable jQuery.Event from the native event object + event = jQuery.event.fix( event || window.event ); + + var handlers = ( (jQuery._data( this, "events" ) || {} )[ event.type ] || []), + delegateCount = handlers.delegateCount, + args = [].slice.call( arguments, 0 ), + run_all = !event.exclusive && !event.namespace, + handlerQueue = [], + i, j, cur, jqcur, ret, selMatch, matched, matches, handleObj, sel, related; + + // Use the fix-ed jQuery.Event rather than the (read-only) native event + args[0] = event; + event.delegateTarget = this; + + // Determine handlers that should run if there are delegated events + // Avoid disabled elements in IE (#6911) and non-left-click bubbling in Firefox (#3861) + if ( delegateCount && !event.target.disabled && !(event.button && event.type === "click") ) { + + // Pregenerate a single jQuery object for reuse with .is() + jqcur = jQuery(this); + jqcur.context = this.ownerDocument || this; + + for ( cur = event.target; cur != this; cur = cur.parentNode || this ) { + selMatch = {}; + matches = []; + jqcur[0] = cur; + for ( i = 0; i < delegateCount; i++ ) { + handleObj = handlers[ i ]; + sel = handleObj.selector; + + if ( selMatch[ sel ] === undefined ) { + selMatch[ sel ] = ( + handleObj.quick ? quickIs( cur, handleObj.quick ) : jqcur.is( sel ) + ); + } + if ( selMatch[ sel ] ) { + matches.push( handleObj ); + } + } + if ( matches.length ) { + handlerQueue.push({ elem: cur, matches: matches }); + } + } + } + + // Add the remaining (directly-bound) handlers + if ( handlers.length > delegateCount ) { + handlerQueue.push({ elem: this, matches: handlers.slice( delegateCount ) }); + } + + // Run delegates first; they may want to stop propagation beneath us + for ( i = 0; i < handlerQueue.length && !event.isPropagationStopped(); i++ ) { + matched = handlerQueue[ i ]; + event.currentTarget = matched.elem; + + for ( j = 0; j < matched.matches.length && !event.isImmediatePropagationStopped(); j++ ) { + handleObj = matched.matches[ j ]; + + // Triggered event must either 1) be non-exclusive and have no namespace, or + // 2) have namespace(s) a subset or equal to those in the bound event (both can have no namespace). + if ( run_all || (!event.namespace && !handleObj.namespace) || event.namespace_re && event.namespace_re.test( handleObj.namespace ) ) { + + event.data = handleObj.data; + event.handleObj = handleObj; + + ret = ( (jQuery.event.special[ handleObj.origType ] || {}).handle || handleObj.handler ) + .apply( matched.elem, args ); + + if ( ret !== undefined ) { + event.result = ret; + if ( ret === false ) { + event.preventDefault(); + event.stopPropagation(); + } + } + } + } + } + + return event.result; + }, + + // Includes some event props shared by KeyEvent and MouseEvent + // *** attrChange attrName relatedNode srcElement are not normalized, non-W3C, deprecated, will be removed in 1.8 *** + props: "attrChange attrName relatedNode srcElement altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "), + + fixHooks: {}, + + keyHooks: { + props: "char charCode key keyCode".split(" "), + filter: function( event, original ) { + + // Add which for key events + if ( event.which == null ) { + event.which = original.charCode != null ? original.charCode : original.keyCode; + } + + return event; + } + }, + + mouseHooks: { + props: "button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "), + filter: function( event, original ) { + var eventDoc, doc, body, + button = original.button, + fromElement = original.fromElement; + + // Calculate pageX/Y if missing and clientX/Y available + if ( event.pageX == null && original.clientX != null ) { + eventDoc = event.target.ownerDocument || document; + doc = eventDoc.documentElement; + body = eventDoc.body; + + event.pageX = original.clientX + ( doc && doc.scrollLeft || body && body.scrollLeft || 0 ) - ( doc && doc.clientLeft || body && body.clientLeft || 0 ); + event.pageY = original.clientY + ( doc && doc.scrollTop || body && body.scrollTop || 0 ) - ( doc && doc.clientTop || body && body.clientTop || 0 ); + } + + // Add relatedTarget, if necessary + if ( !event.relatedTarget && fromElement ) { + event.relatedTarget = fromElement === event.target ? original.toElement : fromElement; + } + + // Add which for click: 1 === left; 2 === middle; 3 === right + // Note: button is not normalized, so don't use it + if ( !event.which && button !== undefined ) { + event.which = ( button & 1 ? 1 : ( button & 2 ? 3 : ( button & 4 ? 2 : 0 ) ) ); + } + + return event; + } + }, + + fix: function( event ) { + if ( event[ jQuery.expando ] ) { + return event; + } + + // Create a writable copy of the event object and normalize some properties + var i, prop, + originalEvent = event, + fixHook = jQuery.event.fixHooks[ event.type ] || {}, + copy = fixHook.props ? this.props.concat( fixHook.props ) : this.props; + + event = jQuery.Event( originalEvent ); + + for ( i = copy.length; i; ) { + prop = copy[ --i ]; + event[ prop ] = originalEvent[ prop ]; + } + + // Fix target property, if necessary (#1925, IE 6/7/8 & Safari2) + if ( !event.target ) { + event.target = originalEvent.srcElement || document; + } + + // Target should not be a text node (#504, Safari) + if ( event.target.nodeType === 3 ) { + event.target = event.target.parentNode; + } + + // For mouse/key events; add metaKey if it's not there (#3368, IE6/7/8) + if ( event.metaKey === undefined ) { + event.metaKey = event.ctrlKey; + } + + return fixHook.filter? fixHook.filter( event, originalEvent ) : event; + }, + + special: { + ready: { + // Make sure the ready event is setup + setup: jQuery.bindReady + }, + + load: { + // Prevent triggered image.load events from bubbling to window.load + noBubble: true + }, + + focus: { + delegateType: "focusin" + }, + blur: { + delegateType: "focusout" + }, + + beforeunload: { + setup: function( data, namespaces, eventHandle ) { + // We only want to do this special case on windows + if ( jQuery.isWindow( this ) ) { + this.onbeforeunload = eventHandle; + } + }, + + teardown: function( namespaces, eventHandle ) { + if ( this.onbeforeunload === eventHandle ) { + this.onbeforeunload = null; + } + } + } + }, + + simulate: function( type, elem, event, bubble ) { + // Piggyback on a donor event to simulate a different one. + // Fake originalEvent to avoid donor's stopPropagation, but if the + // simulated event prevents default then we do the same on the donor. + var e = jQuery.extend( + new jQuery.Event(), + event, + { type: type, + isSimulated: true, + originalEvent: {} + } + ); + if ( bubble ) { + jQuery.event.trigger( e, null, elem ); + } else { + jQuery.event.dispatch.call( elem, e ); + } + if ( e.isDefaultPrevented() ) { + event.preventDefault(); + } + } +}; + +// Some plugins are using, but it's undocumented/deprecated and will be removed. +// The 1.7 special event interface should provide all the hooks needed now. +jQuery.event.handle = jQuery.event.dispatch; + +jQuery.removeEvent = document.removeEventListener ? + function( elem, type, handle ) { + if ( elem.removeEventListener ) { + elem.removeEventListener( type, handle, false ); + } + } : + function( elem, type, handle ) { + if ( elem.detachEvent ) { + elem.detachEvent( "on" + type, handle ); + } + }; + +jQuery.Event = function( src, props ) { + // Allow instantiation without the 'new' keyword + if ( !(this instanceof jQuery.Event) ) { + return new jQuery.Event( src, props ); + } + + // Event object + if ( src && src.type ) { + this.originalEvent = src; + this.type = src.type; + + // Events bubbling up the document may have been marked as prevented + // by a handler lower down the tree; reflect the correct value. + this.isDefaultPrevented = ( src.defaultPrevented || src.returnValue === false || + src.getPreventDefault && src.getPreventDefault() ) ? returnTrue : returnFalse; + + // Event type + } else { + this.type = src; + } + + // Put explicitly provided properties onto the event object + if ( props ) { + jQuery.extend( this, props ); + } + + // Create a timestamp if incoming event doesn't have one + this.timeStamp = src && src.timeStamp || jQuery.now(); + + // Mark it as fixed + this[ jQuery.expando ] = true; +}; + +function returnFalse() { + return false; +} +function returnTrue() { + return true; +} + +// jQuery.Event is based on DOM3 Events as specified by the ECMAScript Language Binding +// http://www.w3.org/TR/2003/WD-DOM-Level-3-Events-20030331/ecma-script-binding.html +jQuery.Event.prototype = { + preventDefault: function() { + this.isDefaultPrevented = returnTrue; + + var e = this.originalEvent; + if ( !e ) { + return; + } + + // if preventDefault exists run it on the original event + if ( e.preventDefault ) { + e.preventDefault(); + + // otherwise set the returnValue property of the original event to false (IE) + } else { + e.returnValue = false; + } + }, + stopPropagation: function() { + this.isPropagationStopped = returnTrue; + + var e = this.originalEvent; + if ( !e ) { + return; + } + // if stopPropagation exists run it on the original event + if ( e.stopPropagation ) { + e.stopPropagation(); + } + // otherwise set the cancelBubble property of the original event to true (IE) + e.cancelBubble = true; + }, + stopImmediatePropagation: function() { + this.isImmediatePropagationStopped = returnTrue; + this.stopPropagation(); + }, + isDefaultPrevented: returnFalse, + isPropagationStopped: returnFalse, + isImmediatePropagationStopped: returnFalse +}; + +// Create mouseenter/leave events using mouseover/out and event-time checks +jQuery.each({ + mouseenter: "mouseover", + mouseleave: "mouseout" +}, function( orig, fix ) { + jQuery.event.special[ orig ] = { + delegateType: fix, + bindType: fix, + + handle: function( event ) { + var target = this, + related = event.relatedTarget, + handleObj = event.handleObj, + selector = handleObj.selector, + ret; + + // For mousenter/leave call the handler if related is outside the target. + // NB: No relatedTarget if the mouse left/entered the browser window + if ( !related || (related !== target && !jQuery.contains( target, related )) ) { + event.type = handleObj.origType; + ret = handleObj.handler.apply( this, arguments ); + event.type = fix; + } + return ret; + } + }; +}); + +// IE submit delegation +if ( !jQuery.support.submitBubbles ) { + + jQuery.event.special.submit = { + setup: function() { + // Only need this for delegated form submit events + if ( jQuery.nodeName( this, "form" ) ) { + return false; + } + + // Lazy-add a submit handler when a descendant form may potentially be submitted + jQuery.event.add( this, "click._submit keypress._submit", function( e ) { + // Node name check avoids a VML-related crash in IE (#9807) + var elem = e.target, + form = jQuery.nodeName( elem, "input" ) || jQuery.nodeName( elem, "button" ) ? elem.form : undefined; + if ( form && !form._submit_attached ) { + jQuery.event.add( form, "submit._submit", function( event ) { + // If form was submitted by the user, bubble the event up the tree + if ( this.parentNode && !event.isTrigger ) { + jQuery.event.simulate( "submit", this.parentNode, event, true ); + } + }); + form._submit_attached = true; + } + }); + // return undefined since we don't need an event listener + }, + + teardown: function() { + // Only need this for delegated form submit events + if ( jQuery.nodeName( this, "form" ) ) { + return false; + } + + // Remove delegated handlers; cleanData eventually reaps submit handlers attached above + jQuery.event.remove( this, "._submit" ); + } + }; +} + +// IE change delegation and checkbox/radio fix +if ( !jQuery.support.changeBubbles ) { + + jQuery.event.special.change = { + + setup: function() { + + if ( rformElems.test( this.nodeName ) ) { + // IE doesn't fire change on a check/radio until blur; trigger it on click + // after a propertychange. Eat the blur-change in special.change.handle. + // This still fires onchange a second time for check/radio after blur. + if ( this.type === "checkbox" || this.type === "radio" ) { + jQuery.event.add( this, "propertychange._change", function( event ) { + if ( event.originalEvent.propertyName === "checked" ) { + this._just_changed = true; + } + }); + jQuery.event.add( this, "click._change", function( event ) { + if ( this._just_changed && !event.isTrigger ) { + this._just_changed = false; + jQuery.event.simulate( "change", this, event, true ); + } + }); + } + return false; + } + // Delegated event; lazy-add a change handler on descendant inputs + jQuery.event.add( this, "beforeactivate._change", function( e ) { + var elem = e.target; + + if ( rformElems.test( elem.nodeName ) && !elem._change_attached ) { + jQuery.event.add( elem, "change._change", function( event ) { + if ( this.parentNode && !event.isSimulated && !event.isTrigger ) { + jQuery.event.simulate( "change", this.parentNode, event, true ); + } + }); + elem._change_attached = true; + } + }); + }, + + handle: function( event ) { + var elem = event.target; + + // Swallow native change events from checkbox/radio, we already triggered them above + if ( this !== elem || event.isSimulated || event.isTrigger || (elem.type !== "radio" && elem.type !== "checkbox") ) { + return event.handleObj.handler.apply( this, arguments ); + } + }, + + teardown: function() { + jQuery.event.remove( this, "._change" ); + + return rformElems.test( this.nodeName ); + } + }; +} + +// Create "bubbling" focus and blur events +if ( !jQuery.support.focusinBubbles ) { + jQuery.each({ focus: "focusin", blur: "focusout" }, function( orig, fix ) { + + // Attach a single capturing handler while someone wants focusin/focusout + var attaches = 0, + handler = function( event ) { + jQuery.event.simulate( fix, event.target, jQuery.event.fix( event ), true ); + }; + + jQuery.event.special[ fix ] = { + setup: function() { + if ( attaches++ === 0 ) { + document.addEventListener( orig, handler, true ); + } + }, + teardown: function() { + if ( --attaches === 0 ) { + document.removeEventListener( orig, handler, true ); + } + } + }; + }); +} + +jQuery.fn.extend({ + + on: function( types, selector, data, fn, /*INTERNAL*/ one ) { + var origFn, type; + + // Types can be a map of types/handlers + if ( typeof types === "object" ) { + // ( types-Object, selector, data ) + if ( typeof selector !== "string" ) { + // ( types-Object, data ) + data = selector; + selector = undefined; + } + for ( type in types ) { + this.on( type, selector, data, types[ type ], one ); + } + return this; + } + + if ( data == null && fn == null ) { + // ( types, fn ) + fn = selector; + data = selector = undefined; + } else if ( fn == null ) { + if ( typeof selector === "string" ) { + // ( types, selector, fn ) + fn = data; + data = undefined; + } else { + // ( types, data, fn ) + fn = data; + data = selector; + selector = undefined; + } + } + if ( fn === false ) { + fn = returnFalse; + } else if ( !fn ) { + return this; + } + + if ( one === 1 ) { + origFn = fn; + fn = function( event ) { + // Can use an empty set, since event contains the info + jQuery().off( event ); + return origFn.apply( this, arguments ); + }; + // Use same guid so caller can remove using origFn + fn.guid = origFn.guid || ( origFn.guid = jQuery.guid++ ); + } + return this.each( function() { + jQuery.event.add( this, types, fn, data, selector ); + }); + }, + one: function( types, selector, data, fn ) { + return this.on.call( this, types, selector, data, fn, 1 ); + }, + off: function( types, selector, fn ) { + if ( types && types.preventDefault && types.handleObj ) { + // ( event ) dispatched jQuery.Event + var handleObj = types.handleObj; + jQuery( types.delegateTarget ).off( + handleObj.namespace? handleObj.type + "." + handleObj.namespace : handleObj.type, + handleObj.selector, + handleObj.handler + ); + return this; + } + if ( typeof types === "object" ) { + // ( types-object [, selector] ) + for ( var type in types ) { + this.off( type, selector, types[ type ] ); + } + return this; + } + if ( selector === false || typeof selector === "function" ) { + // ( types [, fn] ) + fn = selector; + selector = undefined; + } + if ( fn === false ) { + fn = returnFalse; + } + return this.each(function() { + jQuery.event.remove( this, types, fn, selector ); + }); + }, + + bind: function( types, data, fn ) { + return this.on( types, null, data, fn ); + }, + unbind: function( types, fn ) { + return this.off( types, null, fn ); + }, + + live: function( types, data, fn ) { + jQuery( this.context ).on( types, this.selector, data, fn ); + return this; + }, + die: function( types, fn ) { + jQuery( this.context ).off( types, this.selector || "**", fn ); + return this; + }, + + delegate: function( selector, types, data, fn ) { + return this.on( types, selector, data, fn ); + }, + undelegate: function( selector, types, fn ) { + // ( namespace ) or ( selector, types [, fn] ) + return arguments.length == 1? this.off( selector, "**" ) : this.off( types, selector, fn ); + }, + + trigger: function( type, data ) { + return this.each(function() { + jQuery.event.trigger( type, data, this ); + }); + }, + triggerHandler: function( type, data ) { + if ( this[0] ) { + return jQuery.event.trigger( type, data, this[0], true ); + } + }, + + toggle: function( fn ) { + // Save reference to arguments for access in closure + var args = arguments, + guid = fn.guid || jQuery.guid++, + i = 0, + toggler = function( event ) { + // Figure out which function to execute + var lastToggle = ( jQuery._data( this, "lastToggle" + fn.guid ) || 0 ) % i; + jQuery._data( this, "lastToggle" + fn.guid, lastToggle + 1 ); + + // Make sure that clicks stop + event.preventDefault(); + + // and execute the function + return args[ lastToggle ].apply( this, arguments ) || false; + }; + + // link all the functions, so any of them can unbind this click handler + toggler.guid = guid; + while ( i < args.length ) { + args[ i++ ].guid = guid; + } + + return this.click( toggler ); + }, + + hover: function( fnOver, fnOut ) { + return this.mouseenter( fnOver ).mouseleave( fnOut || fnOver ); + } +}); + +jQuery.each( ("blur focus focusin focusout load resize scroll unload click dblclick " + + "mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave " + + "change select submit keydown keypress keyup error contextmenu").split(" "), function( i, name ) { + + // Handle event binding + jQuery.fn[ name ] = function( data, fn ) { + if ( fn == null ) { + fn = data; + data = null; + } + + return arguments.length > 0 ? + this.on( name, null, data, fn ) : + this.trigger( name ); + }; + + if ( jQuery.attrFn ) { + jQuery.attrFn[ name ] = true; + } + + if ( rkeyEvent.test( name ) ) { + jQuery.event.fixHooks[ name ] = jQuery.event.keyHooks; + } + + if ( rmouseEvent.test( name ) ) { + jQuery.event.fixHooks[ name ] = jQuery.event.mouseHooks; + } +}); + + + +/*! + * Sizzle CSS Selector Engine + * Copyright 2016, The Dojo Foundation + * Released under the MIT, BSD, and GPL Licenses. + * More information: http://sizzlejs.com/ + */ +(function(){ + +var chunker = /((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^\[\]]*\]|['"][^'"]*['"]|[^\[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g, + expando = "sizcache" + (Math.random() + '').replace('.', ''), + done = 0, + toString = Object.prototype.toString, + hasDuplicate = false, + baseHasDuplicate = true, + rBackslash = /\\/g, + rReturn = /\r\n/g, + rNonWord = /\W/; + +// Here we check if the JavaScript engine is using some sort of +// optimization where it does not always call our comparision +// function. If that is the case, discard the hasDuplicate value. +// Thus far that includes Google Chrome. +[0, 0].sort(function() { + baseHasDuplicate = false; + return 0; +}); + +var Sizzle = function( selector, context, results, seed ) { + results = results || []; + context = context || document; + + var origContext = context; + + if ( context.nodeType !== 1 && context.nodeType !== 9 ) { + return []; + } + + if ( !selector || typeof selector !== "string" ) { + return results; + } + + var m, set, checkSet, extra, ret, cur, pop, i, + prune = true, + contextXML = Sizzle.isXML( context ), + parts = [], + soFar = selector; + + // Reset the position of the chunker regexp (start from head) + do { + chunker.exec( "" ); + m = chunker.exec( soFar ); + + if ( m ) { + soFar = m[3]; + + parts.push( m[1] ); + + if ( m[2] ) { + extra = m[3]; + break; + } + } + } while ( m ); + + if ( parts.length > 1 && origPOS.exec( selector ) ) { + + if ( parts.length === 2 && Expr.relative[ parts[0] ] ) { + set = posProcess( parts[0] + parts[1], context, seed ); + + } else { + set = Expr.relative[ parts[0] ] ? + [ context ] : + Sizzle( parts.shift(), context ); + + while ( parts.length ) { + selector = parts.shift(); + + if ( Expr.relative[ selector ] ) { + selector += parts.shift(); + } + + set = posProcess( selector, set, seed ); + } + } + + } else { + // Take a shortcut and set the context if the root selector is an ID + // (but not if it'll be faster if the inner selector is an ID) + if ( !seed && parts.length > 1 && context.nodeType === 9 && !contextXML && + Expr.match.ID.test(parts[0]) && !Expr.match.ID.test(parts[parts.length - 1]) ) { + + ret = Sizzle.find( parts.shift(), context, contextXML ); + context = ret.expr ? + Sizzle.filter( ret.expr, ret.set )[0] : + ret.set[0]; + } + + if ( context ) { + ret = seed ? + { expr: parts.pop(), set: makeArray(seed) } : + Sizzle.find( parts.pop(), parts.length === 1 && (parts[0] === "~" || parts[0] === "+") && context.parentNode ? context.parentNode : context, contextXML ); + + set = ret.expr ? + Sizzle.filter( ret.expr, ret.set ) : + ret.set; + + if ( parts.length > 0 ) { + checkSet = makeArray( set ); + + } else { + prune = false; + } + + while ( parts.length ) { + cur = parts.pop(); + pop = cur; + + if ( !Expr.relative[ cur ] ) { + cur = ""; + } else { + pop = parts.pop(); + } + + if ( pop == null ) { + pop = context; + } + + Expr.relative[ cur ]( checkSet, pop, contextXML ); + } + + } else { + checkSet = parts = []; + } + } + + if ( !checkSet ) { + checkSet = set; + } + + if ( !checkSet ) { + Sizzle.error( cur || selector ); + } + + if ( toString.call(checkSet) === "[object Array]" ) { + if ( !prune ) { + results.push.apply( results, checkSet ); + + } else if ( context && context.nodeType === 1 ) { + for ( i = 0; checkSet[i] != null; i++ ) { + if ( checkSet[i] && (checkSet[i] === true || checkSet[i].nodeType === 1 && Sizzle.contains(context, checkSet[i])) ) { + results.push( set[i] ); + } + } + + } else { + for ( i = 0; checkSet[i] != null; i++ ) { + if ( checkSet[i] && checkSet[i].nodeType === 1 ) { + results.push( set[i] ); + } + } + } + + } else { + makeArray( checkSet, results ); + } + + if ( extra ) { + Sizzle( extra, origContext, results, seed ); + Sizzle.uniqueSort( results ); + } + + return results; +}; + +Sizzle.uniqueSort = function( results ) { + if ( sortOrder ) { + hasDuplicate = baseHasDuplicate; + results.sort( sortOrder ); + + if ( hasDuplicate ) { + for ( var i = 1; i < results.length; i++ ) { + if ( results[i] === results[ i - 1 ] ) { + results.splice( i--, 1 ); + } + } + } + } + + return results; +}; + +Sizzle.matches = function( expr, set ) { + return Sizzle( expr, null, null, set ); +}; + +Sizzle.matchesSelector = function( node, expr ) { + return Sizzle( expr, null, null, [node] ).length > 0; +}; + +Sizzle.find = function( expr, context, isXML ) { + var set, i, len, match, type, left; + + if ( !expr ) { + return []; + } + + for ( i = 0, len = Expr.order.length; i < len; i++ ) { + type = Expr.order[i]; + + if ( (match = Expr.leftMatch[ type ].exec( expr )) ) { + left = match[1]; + match.splice( 1, 1 ); + + if ( left.substr( left.length - 1 ) !== "\\" ) { + match[1] = (match[1] || "").replace( rBackslash, "" ); + set = Expr.find[ type ]( match, context, isXML ); + + if ( set != null ) { + expr = expr.replace( Expr.match[ type ], "" ); + break; + } + } + } + } + + if ( !set ) { + set = typeof context.getElementsByTagName !== "undefined" ? + context.getElementsByTagName( "*" ) : + []; + } + + return { set: set, expr: expr }; +}; + +Sizzle.filter = function( expr, set, inplace, not ) { + var match, anyFound, + type, found, item, filter, left, + i, pass, + old = expr, + result = [], + curLoop = set, + isXMLFilter = set && set[0] && Sizzle.isXML( set[0] ); + + while ( expr && set.length ) { + for ( type in Expr.filter ) { + if ( (match = Expr.leftMatch[ type ].exec( expr )) != null && match[2] ) { + filter = Expr.filter[ type ]; + left = match[1]; + + anyFound = false; + + match.splice(1,1); + + if ( left.substr( left.length - 1 ) === "\\" ) { + continue; + } + + if ( curLoop === result ) { + result = []; + } + + if ( Expr.preFilter[ type ] ) { + match = Expr.preFilter[ type ]( match, curLoop, inplace, result, not, isXMLFilter ); + + if ( !match ) { + anyFound = found = true; + + } else if ( match === true ) { + continue; + } + } + + if ( match ) { + for ( i = 0; (item = curLoop[i]) != null; i++ ) { + if ( item ) { + found = filter( item, match, i, curLoop ); + pass = not ^ found; + + if ( inplace && found != null ) { + if ( pass ) { + anyFound = true; + + } else { + curLoop[i] = false; + } + + } else if ( pass ) { + result.push( item ); + anyFound = true; + } + } + } + } + + if ( found !== undefined ) { + if ( !inplace ) { + curLoop = result; + } + + expr = expr.replace( Expr.match[ type ], "" ); + + if ( !anyFound ) { + return []; + } + + break; + } + } + } + + // Improper expression + if ( expr === old ) { + if ( anyFound == null ) { + Sizzle.error( expr ); + + } else { + break; + } + } + + old = expr; + } + + return curLoop; +}; + +Sizzle.error = function( msg ) { + throw new Error( "Syntax error, unrecognized expression: " + msg ); +}; + +/** + * Utility function for retreiving the text value of an array of DOM nodes + * @param {Array|Element} elem + */ +var getText = Sizzle.getText = function( elem ) { + var i, node, + nodeType = elem.nodeType, + ret = ""; + + if ( nodeType ) { + if ( nodeType === 1 || nodeType === 9 ) { + // Use textContent || innerText for elements + if ( typeof elem.textContent === 'string' ) { + return elem.textContent; + } else if ( typeof elem.innerText === 'string' ) { + // Replace IE's carriage returns + return elem.innerText.replace( rReturn, '' ); + } else { + // Traverse it's children + for ( elem = elem.firstChild; elem; elem = elem.nextSibling) { + ret += getText( elem ); + } + } + } else if ( nodeType === 3 || nodeType === 4 ) { + return elem.nodeValue; + } + } else { + + // If no nodeType, this is expected to be an array + for ( i = 0; (node = elem[i]); i++ ) { + // Do not traverse comment nodes + if ( node.nodeType !== 8 ) { + ret += getText( node ); + } + } + } + return ret; +}; + +var Expr = Sizzle.selectors = { + order: [ "ID", "NAME", "TAG" ], + + match: { + ID: /#((?:[\w\u00c0-\uFFFF\-]|\\.)+)/, + CLASS: /\.((?:[\w\u00c0-\uFFFF\-]|\\.)+)/, + NAME: /\[name=['"]*((?:[\w\u00c0-\uFFFF\-]|\\.)+)['"]*\]/, + ATTR: /\[\s*((?:[\w\u00c0-\uFFFF\-]|\\.)+)\s*(?:(\S?=)\s*(?:(['"])(.*?)\3|(#?(?:[\w\u00c0-\uFFFF\-]|\\.)*)|)|)\s*\]/, + TAG: /^((?:[\w\u00c0-\uFFFF\*\-]|\\.)+)/, + CHILD: /:(only|nth|last|first)-child(?:\(\s*(even|odd|(?:[+\-]?\d+|(?:[+\-]?\d*)?n\s*(?:[+\-]\s*\d+)?))\s*\))?/, + POS: /:(nth|eq|gt|lt|first|last|even|odd)(?:\((\d*)\))?(?=[^\-]|$)/, + PSEUDO: /:((?:[\w\u00c0-\uFFFF\-]|\\.)+)(?:\((['"]?)((?:\([^\)]+\)|[^\(\)]*)+)\2\))?/ + }, + + leftMatch: {}, + + attrMap: { + "class": "className", + "for": "htmlFor" + }, + + attrHandle: { + href: function( elem ) { + return elem.getAttribute( "href" ); + }, + type: function( elem ) { + return elem.getAttribute( "type" ); + } + }, + + relative: { + "+": function(checkSet, part){ + var isPartStr = typeof part === "string", + isTag = isPartStr && !rNonWord.test( part ), + isPartStrNotTag = isPartStr && !isTag; + + if ( isTag ) { + part = part.toLowerCase(); + } + + for ( var i = 0, l = checkSet.length, elem; i < l; i++ ) { + if ( (elem = checkSet[i]) ) { + while ( (elem = elem.previousSibling) && elem.nodeType !== 1 ) {} + + checkSet[i] = isPartStrNotTag || elem && elem.nodeName.toLowerCase() === part ? + elem || false : + elem === part; + } + } + + if ( isPartStrNotTag ) { + Sizzle.filter( part, checkSet, true ); + } + }, + + ">": function( checkSet, part ) { + var elem, + isPartStr = typeof part === "string", + i = 0, + l = checkSet.length; + + if ( isPartStr && !rNonWord.test( part ) ) { + part = part.toLowerCase(); + + for ( ; i < l; i++ ) { + elem = checkSet[i]; + + if ( elem ) { + var parent = elem.parentNode; + checkSet[i] = parent.nodeName.toLowerCase() === part ? parent : false; + } + } + + } else { + for ( ; i < l; i++ ) { + elem = checkSet[i]; + + if ( elem ) { + checkSet[i] = isPartStr ? + elem.parentNode : + elem.parentNode === part; + } + } + + if ( isPartStr ) { + Sizzle.filter( part, checkSet, true ); + } + } + }, + + "": function(checkSet, part, isXML){ + var nodeCheck, + doneName = done++, + checkFn = dirCheck; + + if ( typeof part === "string" && !rNonWord.test( part ) ) { + part = part.toLowerCase(); + nodeCheck = part; + checkFn = dirNodeCheck; + } + + checkFn( "parentNode", part, doneName, checkSet, nodeCheck, isXML ); + }, + + "~": function( checkSet, part, isXML ) { + var nodeCheck, + doneName = done++, + checkFn = dirCheck; + + if ( typeof part === "string" && !rNonWord.test( part ) ) { + part = part.toLowerCase(); + nodeCheck = part; + checkFn = dirNodeCheck; + } + + checkFn( "previousSibling", part, doneName, checkSet, nodeCheck, isXML ); + } + }, + + find: { + ID: function( match, context, isXML ) { + if ( typeof context.getElementById !== "undefined" && !isXML ) { + var m = context.getElementById(match[1]); + // Check parentNode to catch when Blackberry 4.6 returns + // nodes that are no longer in the document #6963 + return m && m.parentNode ? [m] : []; + } + }, + + NAME: function( match, context ) { + if ( typeof context.getElementsByName !== "undefined" ) { + var ret = [], + results = context.getElementsByName( match[1] ); + + for ( var i = 0, l = results.length; i < l; i++ ) { + if ( results[i].getAttribute("name") === match[1] ) { + ret.push( results[i] ); + } + } + + return ret.length === 0 ? null : ret; + } + }, + + TAG: function( match, context ) { + if ( typeof context.getElementsByTagName !== "undefined" ) { + return context.getElementsByTagName( match[1] ); + } + } + }, + preFilter: { + CLASS: function( match, curLoop, inplace, result, not, isXML ) { + match = " " + match[1].replace( rBackslash, "" ) + " "; + + if ( isXML ) { + return match; + } + + for ( var i = 0, elem; (elem = curLoop[i]) != null; i++ ) { + if ( elem ) { + if ( not ^ (elem.className && (" " + elem.className + " ").replace(/[\t\n\r]/g, " ").indexOf(match) >= 0) ) { + if ( !inplace ) { + result.push( elem ); + } + + } else if ( inplace ) { + curLoop[i] = false; + } + } + } + + return false; + }, + + ID: function( match ) { + return match[1].replace( rBackslash, "" ); + }, + + TAG: function( match, curLoop ) { + return match[1].replace( rBackslash, "" ).toLowerCase(); + }, + + CHILD: function( match ) { + if ( match[1] === "nth" ) { + if ( !match[2] ) { + Sizzle.error( match[0] ); + } + + match[2] = match[2].replace(/^\+|\s*/g, ''); + + // parse equations like 'even', 'odd', '5', '2n', '3n+2', '4n-1', '-n+6' + var test = /(-?)(\d*)(?:n([+\-]?\d*))?/.exec( + match[2] === "even" && "2n" || match[2] === "odd" && "2n+1" || + !/\D/.test( match[2] ) && "0n+" + match[2] || match[2]); + + // calculate the numbers (first)n+(last) including if they are negative + match[2] = (test[1] + (test[2] || 1)) - 0; + match[3] = test[3] - 0; + } + else if ( match[2] ) { + Sizzle.error( match[0] ); + } + + // TODO: Move to normal caching system + match[0] = done++; + + return match; + }, + + ATTR: function( match, curLoop, inplace, result, not, isXML ) { + var name = match[1] = match[1].replace( rBackslash, "" ); + + if ( !isXML && Expr.attrMap[name] ) { + match[1] = Expr.attrMap[name]; + } + + // Handle if an un-quoted value was used + match[4] = ( match[4] || match[5] || "" ).replace( rBackslash, "" ); + + if ( match[2] === "~=" ) { + match[4] = " " + match[4] + " "; + } + + return match; + }, + + PSEUDO: function( match, curLoop, inplace, result, not ) { + if ( match[1] === "not" ) { + // If we're dealing with a complex expression, or a simple one + if ( ( chunker.exec(match[3]) || "" ).length > 1 || /^\w/.test(match[3]) ) { + match[3] = Sizzle(match[3], null, null, curLoop); + + } else { + var ret = Sizzle.filter(match[3], curLoop, inplace, true ^ not); + + if ( !inplace ) { + result.push.apply( result, ret ); + } + + return false; + } + + } else if ( Expr.match.POS.test( match[0] ) || Expr.match.CHILD.test( match[0] ) ) { + return true; + } + + return match; + }, + + POS: function( match ) { + match.unshift( true ); + + return match; + } + }, + + filters: { + enabled: function( elem ) { + return elem.disabled === false && elem.type !== "hidden"; + }, + + disabled: function( elem ) { + return elem.disabled === true; + }, + + checked: function( elem ) { + return elem.checked === true; + }, + + selected: function( elem ) { + // Accessing this property makes selected-by-default + // options in Safari work properly + if ( elem.parentNode ) { + elem.parentNode.selectedIndex; + } + + return elem.selected === true; + }, + + parent: function( elem ) { + return !!elem.firstChild; + }, + + empty: function( elem ) { + return !elem.firstChild; + }, + + has: function( elem, i, match ) { + return !!Sizzle( match[3], elem ).length; + }, + + header: function( elem ) { + return (/h\d/i).test( elem.nodeName ); + }, + + text: function( elem ) { + var attr = elem.getAttribute( "type" ), type = elem.type; + // IE6 and 7 will map elem.type to 'text' for new HTML5 types (search, etc) + // use getAttribute instead to test this case + return elem.nodeName.toLowerCase() === "input" && "text" === type && ( attr === type || attr === null ); + }, + + radio: function( elem ) { + return elem.nodeName.toLowerCase() === "input" && "radio" === elem.type; + }, + + checkbox: function( elem ) { + return elem.nodeName.toLowerCase() === "input" && "checkbox" === elem.type; + }, + + file: function( elem ) { + return elem.nodeName.toLowerCase() === "input" && "file" === elem.type; + }, + + password: function( elem ) { + return elem.nodeName.toLowerCase() === "input" && "password" === elem.type; + }, + + submit: function( elem ) { + var name = elem.nodeName.toLowerCase(); + return (name === "input" || name === "button") && "submit" === elem.type; + }, + + image: function( elem ) { + return elem.nodeName.toLowerCase() === "input" && "image" === elem.type; + }, + + reset: function( elem ) { + var name = elem.nodeName.toLowerCase(); + return (name === "input" || name === "button") && "reset" === elem.type; + }, + + button: function( elem ) { + var name = elem.nodeName.toLowerCase(); + return name === "input" && "button" === elem.type || name === "button"; + }, + + input: function( elem ) { + return (/input|select|textarea|button/i).test( elem.nodeName ); + }, + + focus: function( elem ) { + return elem === elem.ownerDocument.activeElement; + } + }, + setFilters: { + first: function( elem, i ) { + return i === 0; + }, + + last: function( elem, i, match, array ) { + return i === array.length - 1; + }, + + even: function( elem, i ) { + return i % 2 === 0; + }, + + odd: function( elem, i ) { + return i % 2 === 1; + }, + + lt: function( elem, i, match ) { + return i < match[3] - 0; + }, + + gt: function( elem, i, match ) { + return i > match[3] - 0; + }, + + nth: function( elem, i, match ) { + return match[3] - 0 === i; + }, + + eq: function( elem, i, match ) { + return match[3] - 0 === i; + } + }, + filter: { + PSEUDO: function( elem, match, i, array ) { + var name = match[1], + filter = Expr.filters[ name ]; + + if ( filter ) { + return filter( elem, i, match, array ); + + } else if ( name === "contains" ) { + return (elem.textContent || elem.innerText || getText([ elem ]) || "").indexOf(match[3]) >= 0; + + } else if ( name === "not" ) { + var not = match[3]; + + for ( var j = 0, l = not.length; j < l; j++ ) { + if ( not[j] === elem ) { + return false; + } + } + + return true; + + } else { + Sizzle.error( name ); + } + }, + + CHILD: function( elem, match ) { + var first, last, + doneName, parent, cache, + count, diff, + type = match[1], + node = elem; + + switch ( type ) { + case "only": + case "first": + while ( (node = node.previousSibling) ) { + if ( node.nodeType === 1 ) { + return false; + } + } + + if ( type === "first" ) { + return true; + } + + node = elem; + + case "last": + while ( (node = node.nextSibling) ) { + if ( node.nodeType === 1 ) { + return false; + } + } + + return true; + + case "nth": + first = match[2]; + last = match[3]; + + if ( first === 1 && last === 0 ) { + return true; + } + + doneName = match[0]; + parent = elem.parentNode; + + if ( parent && (parent[ expando ] !== doneName || !elem.nodeIndex) ) { + count = 0; + + for ( node = parent.firstChild; node; node = node.nextSibling ) { + if ( node.nodeType === 1 ) { + node.nodeIndex = ++count; + } + } + + parent[ expando ] = doneName; + } + + diff = elem.nodeIndex - last; + + if ( first === 0 ) { + return diff === 0; + + } else { + return ( diff % first === 0 && diff / first >= 0 ); + } + } + }, + + ID: function( elem, match ) { + return elem.nodeType === 1 && elem.getAttribute("id") === match; + }, + + TAG: function( elem, match ) { + return (match === "*" && elem.nodeType === 1) || !!elem.nodeName && elem.nodeName.toLowerCase() === match; + }, + + CLASS: function( elem, match ) { + return (" " + (elem.className || elem.getAttribute("class")) + " ") + .indexOf( match ) > -1; + }, + + ATTR: function( elem, match ) { + var name = match[1], + result = Sizzle.attr ? + Sizzle.attr( elem, name ) : + Expr.attrHandle[ name ] ? + Expr.attrHandle[ name ]( elem ) : + elem[ name ] != null ? + elem[ name ] : + elem.getAttribute( name ), + value = result + "", + type = match[2], + check = match[4]; + + return result == null ? + type === "!=" : + !type && Sizzle.attr ? + result != null : + type === "=" ? + value === check : + type === "*=" ? + value.indexOf(check) >= 0 : + type === "~=" ? + (" " + value + " ").indexOf(check) >= 0 : + !check ? + value && result !== false : + type === "!=" ? + value !== check : + type === "^=" ? + value.indexOf(check) === 0 : + type === "$=" ? + value.substr(value.length - check.length) === check : + type === "|=" ? + value === check || value.substr(0, check.length + 1) === check + "-" : + false; + }, + + POS: function( elem, match, i, array ) { + var name = match[2], + filter = Expr.setFilters[ name ]; + + if ( filter ) { + return filter( elem, i, match, array ); + } + } + } +}; + +var origPOS = Expr.match.POS, + fescape = function(all, num){ + return "\\" + (num - 0 + 1); + }; + +for ( var type in Expr.match ) { + Expr.match[ type ] = new RegExp( Expr.match[ type ].source + (/(?![^\[]*\])(?![^\(]*\))/.source) ); + Expr.leftMatch[ type ] = new RegExp( /(^(?:.|\r|\n)*?)/.source + Expr.match[ type ].source.replace(/\\(\d+)/g, fescape) ); +} + +var makeArray = function( array, results ) { + array = Array.prototype.slice.call( array, 0 ); + + if ( results ) { + results.push.apply( results, array ); + return results; + } + + return array; +}; + +// Perform a simple check to determine if the browser is capable of +// converting a NodeList to an array using builtin methods. +// Also verifies that the returned array holds DOM nodes +// (which is not the case in the Blackberry browser) +try { + Array.prototype.slice.call( document.documentElement.childNodes, 0 )[0].nodeType; + +// Provide a fallback method if it does not work +} catch( e ) { + makeArray = function( array, results ) { + var i = 0, + ret = results || []; + + if ( toString.call(array) === "[object Array]" ) { + Array.prototype.push.apply( ret, array ); + + } else { + if ( typeof array.length === "number" ) { + for ( var l = array.length; i < l; i++ ) { + ret.push( array[i] ); + } + + } else { + for ( ; array[i]; i++ ) { + ret.push( array[i] ); + } + } + } + + return ret; + }; +} + +var sortOrder, siblingCheck; + +if ( document.documentElement.compareDocumentPosition ) { + sortOrder = function( a, b ) { + if ( a === b ) { + hasDuplicate = true; + return 0; + } + + if ( !a.compareDocumentPosition || !b.compareDocumentPosition ) { + return a.compareDocumentPosition ? -1 : 1; + } + + return a.compareDocumentPosition(b) & 4 ? -1 : 1; + }; + +} else { + sortOrder = function( a, b ) { + // The nodes are identical, we can exit early + if ( a === b ) { + hasDuplicate = true; + return 0; + + // Fallback to using sourceIndex (in IE) if it's available on both nodes + } else if ( a.sourceIndex && b.sourceIndex ) { + return a.sourceIndex - b.sourceIndex; + } + + var al, bl, + ap = [], + bp = [], + aup = a.parentNode, + bup = b.parentNode, + cur = aup; + + // If the nodes are siblings (or identical) we can do a quick check + if ( aup === bup ) { + return siblingCheck( a, b ); + + // If no parents were found then the nodes are disconnected + } else if ( !aup ) { + return -1; + + } else if ( !bup ) { + return 1; + } + + // Otherwise they're somewhere else in the tree so we need + // to build up a full list of the parentNodes for comparison + while ( cur ) { + ap.unshift( cur ); + cur = cur.parentNode; + } + + cur = bup; + + while ( cur ) { + bp.unshift( cur ); + cur = cur.parentNode; + } + + al = ap.length; + bl = bp.length; + + // Start walking down the tree looking for a discrepancy + for ( var i = 0; i < al && i < bl; i++ ) { + if ( ap[i] !== bp[i] ) { + return siblingCheck( ap[i], bp[i] ); + } + } + + // We ended someplace up the tree so do a sibling check + return i === al ? + siblingCheck( a, bp[i], -1 ) : + siblingCheck( ap[i], b, 1 ); + }; + + siblingCheck = function( a, b, ret ) { + if ( a === b ) { + return ret; + } + + var cur = a.nextSibling; + + while ( cur ) { + if ( cur === b ) { + return -1; + } + + cur = cur.nextSibling; + } + + return 1; + }; +} + +// Check to see if the browser returns elements by name when +// querying by getElementById (and provide a workaround) +(function(){ + // We're going to inject a fake input element with a specified name + var form = document.createElement("div"), + id = "script" + (new Date()).getTime(), + root = document.documentElement; + + form.innerHTML = "<a name='" + id + "'/>"; + + // Inject it into the root element, check its status, and remove it quickly + root.insertBefore( form, root.firstChild ); + + // The workaround has to do additional checks after a getElementById + // Which slows things down for other browsers (hence the branching) + if ( document.getElementById( id ) ) { + Expr.find.ID = function( match, context, isXML ) { + if ( typeof context.getElementById !== "undefined" && !isXML ) { + var m = context.getElementById(match[1]); + + return m ? + m.id === match[1] || typeof m.getAttributeNode !== "undefined" && m.getAttributeNode("id").nodeValue === match[1] ? + [m] : + undefined : + []; + } + }; + + Expr.filter.ID = function( elem, match ) { + var node = typeof elem.getAttributeNode !== "undefined" && elem.getAttributeNode("id"); + + return elem.nodeType === 1 && node && node.nodeValue === match; + }; + } + + root.removeChild( form ); + + // release memory in IE + root = form = null; +})(); + +(function(){ + // Check to see if the browser returns only elements + // when doing getElementsByTagName("*") + + // Create a fake element + var div = document.createElement("div"); + div.appendChild( document.createComment("") ); + + // Make sure no comments are found + if ( div.getElementsByTagName("*").length > 0 ) { + Expr.find.TAG = function( match, context ) { + var results = context.getElementsByTagName( match[1] ); + + // Filter out possible comments + if ( match[1] === "*" ) { + var tmp = []; + + for ( var i = 0; results[i]; i++ ) { + if ( results[i].nodeType === 1 ) { + tmp.push( results[i] ); + } + } + + results = tmp; + } + + return results; + }; + } + + // Check to see if an attribute returns normalized href attributes + div.innerHTML = "<a href='#'></a>"; + + if ( div.firstChild && typeof div.firstChild.getAttribute !== "undefined" && + div.firstChild.getAttribute("href") !== "#" ) { + + Expr.attrHandle.href = function( elem ) { + return elem.getAttribute( "href", 2 ); + }; + } + + // release memory in IE + div = null; +})(); + +if ( document.querySelectorAll ) { + (function(){ + var oldSizzle = Sizzle, + div = document.createElement("div"), + id = "__sizzle__"; + + div.innerHTML = "<p class='TEST'></p>"; + + // Safari can't handle uppercase or unicode characters when + // in quirks mode. + if ( div.querySelectorAll && div.querySelectorAll(".TEST").length === 0 ) { + return; + } + + Sizzle = function( query, context, extra, seed ) { + context = context || document; + + // Only use querySelectorAll on non-XML documents + // (ID selectors don't work in non-HTML documents) + if ( !seed && !Sizzle.isXML(context) ) { + // See if we find a selector to speed up + var match = /^(\w+$)|^\.([\w\-]+$)|^#([\w\-]+$)/.exec( query ); + + if ( match && (context.nodeType === 1 || context.nodeType === 9) ) { + // Speed-up: Sizzle("TAG") + if ( match[1] ) { + return makeArray( context.getElementsByTagName( query ), extra ); + + // Speed-up: Sizzle(".CLASS") + } else if ( match[2] && Expr.find.CLASS && context.getElementsByClassName ) { + return makeArray( context.getElementsByClassName( match[2] ), extra ); + } + } + + if ( context.nodeType === 9 ) { + // Speed-up: Sizzle("body") + // The body element only exists once, optimize finding it + if ( query === "body" && context.body ) { + return makeArray( [ context.body ], extra ); + + // Speed-up: Sizzle("#ID") + } else if ( match && match[3] ) { + var elem = context.getElementById( match[3] ); + + // Check parentNode to catch when Blackberry 4.6 returns + // nodes that are no longer in the document #6963 + if ( elem && elem.parentNode ) { + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem.id === match[3] ) { + return makeArray( [ elem ], extra ); + } + + } else { + return makeArray( [], extra ); + } + } + + try { + return makeArray( context.querySelectorAll(query), extra ); + } catch(qsaError) {} + + // qSA works strangely on Element-rooted queries + // We can work around this by specifying an extra ID on the root + // and working up from there (Thanks to Andrew Dupont for the technique) + // IE 8 doesn't work on object elements + } else if ( context.nodeType === 1 && context.nodeName.toLowerCase() !== "object" ) { + var oldContext = context, + old = context.getAttribute( "id" ), + nid = old || id, + hasParent = context.parentNode, + relativeHierarchySelector = /^\s*[+~]/.test( query ); + + if ( !old ) { + context.setAttribute( "id", nid ); + } else { + nid = nid.replace( /'/g, "\\$&" ); + } + if ( relativeHierarchySelector && hasParent ) { + context = context.parentNode; + } + + try { + if ( !relativeHierarchySelector || hasParent ) { + return makeArray( context.querySelectorAll( "[id='" + nid + "'] " + query ), extra ); + } + + } catch(pseudoError) { + } finally { + if ( !old ) { + oldContext.removeAttribute( "id" ); + } + } + } + } + + return oldSizzle(query, context, extra, seed); + }; + + for ( var prop in oldSizzle ) { + Sizzle[ prop ] = oldSizzle[ prop ]; + } + + // release memory in IE + div = null; + })(); +} + +(function(){ + var html = document.documentElement, + matches = html.matchesSelector || html.mozMatchesSelector || html.webkitMatchesSelector || html.msMatchesSelector; + + if ( matches ) { + // Check to see if it's possible to do matchesSelector + // on a disconnected node (IE 9 fails this) + var disconnectedMatch = !matches.call( document.createElement( "div" ), "div" ), + pseudoWorks = false; + + try { + // This should fail with an exception + // Gecko does not error, returns false instead + matches.call( document.documentElement, "[test!='']:sizzle" ); + + } catch( pseudoError ) { + pseudoWorks = true; + } + + Sizzle.matchesSelector = function( node, expr ) { + // Make sure that attribute selectors are quoted + expr = expr.replace(/\=\s*([^'"\]]*)\s*\]/g, "='$1']"); + + if ( !Sizzle.isXML( node ) ) { + try { + if ( pseudoWorks || !Expr.match.PSEUDO.test( expr ) && !/!=/.test( expr ) ) { + var ret = matches.call( node, expr ); + + // IE 9's matchesSelector returns false on disconnected nodes + if ( ret || !disconnectedMatch || + // As well, disconnected nodes are said to be in a document + // fragment in IE 9, so check for that + node.document && node.document.nodeType !== 11 ) { + return ret; + } + } + } catch(e) {} + } + + return Sizzle(expr, null, null, [node]).length > 0; + }; + } +})(); + +(function(){ + var div = document.createElement("div"); + + div.innerHTML = "<div class='test e'></div><div class='test'></div>"; + + // Opera can't find a second classname (in 9.6) + // Also, make sure that getElementsByClassName actually exists + if ( !div.getElementsByClassName || div.getElementsByClassName("e").length === 0 ) { + return; + } + + // Safari caches class attributes, doesn't catch changes (in 3.2) + div.lastChild.className = "e"; + + if ( div.getElementsByClassName("e").length === 1 ) { + return; + } + + Expr.order.splice(1, 0, "CLASS"); + Expr.find.CLASS = function( match, context, isXML ) { + if ( typeof context.getElementsByClassName !== "undefined" && !isXML ) { + return context.getElementsByClassName(match[1]); + } + }; + + // release memory in IE + div = null; +})(); + +function dirNodeCheck( dir, cur, doneName, checkSet, nodeCheck, isXML ) { + for ( var i = 0, l = checkSet.length; i < l; i++ ) { + var elem = checkSet[i]; + + if ( elem ) { + var match = false; + + elem = elem[dir]; + + while ( elem ) { + if ( elem[ expando ] === doneName ) { + match = checkSet[elem.sizset]; + break; + } + + if ( elem.nodeType === 1 && !isXML ){ + elem[ expando ] = doneName; + elem.sizset = i; + } + + if ( elem.nodeName.toLowerCase() === cur ) { + match = elem; + break; + } + + elem = elem[dir]; + } + + checkSet[i] = match; + } + } +} + +function dirCheck( dir, cur, doneName, checkSet, nodeCheck, isXML ) { + for ( var i = 0, l = checkSet.length; i < l; i++ ) { + var elem = checkSet[i]; + + if ( elem ) { + var match = false; + + elem = elem[dir]; + + while ( elem ) { + if ( elem[ expando ] === doneName ) { + match = checkSet[elem.sizset]; + break; + } + + if ( elem.nodeType === 1 ) { + if ( !isXML ) { + elem[ expando ] = doneName; + elem.sizset = i; + } + + if ( typeof cur !== "string" ) { + if ( elem === cur ) { + match = true; + break; + } + + } else if ( Sizzle.filter( cur, [elem] ).length > 0 ) { + match = elem; + break; + } + } + + elem = elem[dir]; + } + + checkSet[i] = match; + } + } +} + +if ( document.documentElement.contains ) { + Sizzle.contains = function( a, b ) { + return a !== b && (a.contains ? a.contains(b) : true); + }; + +} else if ( document.documentElement.compareDocumentPosition ) { + Sizzle.contains = function( a, b ) { + return !!(a.compareDocumentPosition(b) & 16); + }; + +} else { + Sizzle.contains = function() { + return false; + }; +} + +Sizzle.isXML = function( elem ) { + // documentElement is verified for cases where it doesn't yet exist + // (such as loading iframes in IE - #4833) + var documentElement = (elem ? elem.ownerDocument || elem : 0).documentElement; + + return documentElement ? documentElement.nodeName !== "HTML" : false; +}; + +var posProcess = function( selector, context, seed ) { + var match, + tmpSet = [], + later = "", + root = context.nodeType ? [context] : context; + + // Position selectors must be done after the filter + // And so must :not(positional) so we move all PSEUDOs to the end + while ( (match = Expr.match.PSEUDO.exec( selector )) ) { + later += match[0]; + selector = selector.replace( Expr.match.PSEUDO, "" ); + } + + selector = Expr.relative[selector] ? selector + "*" : selector; + + for ( var i = 0, l = root.length; i < l; i++ ) { + Sizzle( selector, root[i], tmpSet, seed ); + } + + return Sizzle.filter( later, tmpSet ); +}; + +// EXPOSE +// Override sizzle attribute retrieval +Sizzle.attr = jQuery.attr; +Sizzle.selectors.attrMap = {}; +jQuery.find = Sizzle; +jQuery.expr = Sizzle.selectors; +jQuery.expr[":"] = jQuery.expr.filters; +jQuery.unique = Sizzle.uniqueSort; +jQuery.text = Sizzle.getText; +jQuery.isXMLDoc = Sizzle.isXML; +jQuery.contains = Sizzle.contains; + + +})(); + + +var runtil = /Until$/, + rparentsprev = /^(?:parents|prevUntil|prevAll)/, + // Note: This RegExp should be improved, or likely pulled from Sizzle + rmultiselector = /,/, + isSimple = /^.[^:#\[\.,]*$/, + slice = Array.prototype.slice, + POS = jQuery.expr.match.POS, + // methods guaranteed to produce a unique set when starting from a unique set + guaranteedUnique = { + children: true, + contents: true, + next: true, + prev: true + }; + +jQuery.fn.extend({ + find: function( selector ) { + var self = this, + i, l; + + if ( typeof selector !== "string" ) { + return jQuery( selector ).filter(function() { + for ( i = 0, l = self.length; i < l; i++ ) { + if ( jQuery.contains( self[ i ], this ) ) { + return true; + } + } + }); + } + + var ret = this.pushStack( "", "find", selector ), + length, n, r; + + for ( i = 0, l = this.length; i < l; i++ ) { + length = ret.length; + jQuery.find( selector, this[i], ret ); + + if ( i > 0 ) { + // Make sure that the results are unique + for ( n = length; n < ret.length; n++ ) { + for ( r = 0; r < length; r++ ) { + if ( ret[r] === ret[n] ) { + ret.splice(n--, 1); + break; + } + } + } + } + } + + return ret; + }, + + has: function( target ) { + var targets = jQuery( target ); + return this.filter(function() { + for ( var i = 0, l = targets.length; i < l; i++ ) { + if ( jQuery.contains( this, targets[i] ) ) { + return true; + } + } + }); + }, + + not: function( selector ) { + return this.pushStack( winnow(this, selector, false), "not", selector); + }, + + filter: function( selector ) { + return this.pushStack( winnow(this, selector, true), "filter", selector ); + }, + + is: function( selector ) { + return !!selector && ( + typeof selector === "string" ? + // If this is a positional selector, check membership in the returned set + // so $("p:first").is("p:last") won't return true for a doc with two "p". + POS.test( selector ) ? + jQuery( selector, this.context ).index( this[0] ) >= 0 : + jQuery.filter( selector, this ).length > 0 : + this.filter( selector ).length > 0 ); + }, + + closest: function( selectors, context ) { + var ret = [], i, l, cur = this[0]; + + // Array (deprecated as of jQuery 1.7) + if ( jQuery.isArray( selectors ) ) { + var level = 1; + + while ( cur && cur.ownerDocument && cur !== context ) { + for ( i = 0; i < selectors.length; i++ ) { + + if ( jQuery( cur ).is( selectors[ i ] ) ) { + ret.push({ selector: selectors[ i ], elem: cur, level: level }); + } + } + + cur = cur.parentNode; + level++; + } + + return ret; + } + + // String + var pos = POS.test( selectors ) || typeof selectors !== "string" ? + jQuery( selectors, context || this.context ) : + 0; + + for ( i = 0, l = this.length; i < l; i++ ) { + cur = this[i]; + + while ( cur ) { + if ( pos ? pos.index(cur) > -1 : jQuery.find.matchesSelector(cur, selectors) ) { + ret.push( cur ); + break; + + } else { + cur = cur.parentNode; + if ( !cur || !cur.ownerDocument || cur === context || cur.nodeType === 11 ) { + break; + } + } + } + } + + ret = ret.length > 1 ? jQuery.unique( ret ) : ret; + + return this.pushStack( ret, "closest", selectors ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + + // No argument, return index in parent + if ( !elem ) { + return ( this[0] && this[0].parentNode ) ? this.prevAll().length : -1; + } + + // index in selector + if ( typeof elem === "string" ) { + return jQuery.inArray( this[0], jQuery( elem ) ); + } + + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem.jquery ? elem[0] : elem, this ); + }, + + add: function( selector, context ) { + var set = typeof selector === "string" ? + jQuery( selector, context ) : + jQuery.makeArray( selector && selector.nodeType ? [ selector ] : selector ), + all = jQuery.merge( this.get(), set ); + + return this.pushStack( isDisconnected( set[0] ) || isDisconnected( all[0] ) ? + all : + jQuery.unique( all ) ); + }, + + andSelf: function() { + return this.add( this.prevObject ); + } +}); + +// A painfully simple check to see if an element is disconnected +// from a document (should be improved, where feasible). +function isDisconnected( node ) { + return !node || !node.parentNode || node.parentNode.nodeType === 11; +} + +jQuery.each({ + parent: function( elem ) { + var parent = elem.parentNode; + return parent && parent.nodeType !== 11 ? parent : null; + }, + parents: function( elem ) { + return jQuery.dir( elem, "parentNode" ); + }, + parentsUntil: function( elem, i, until ) { + return jQuery.dir( elem, "parentNode", until ); + }, + next: function( elem ) { + return jQuery.nth( elem, 2, "nextSibling" ); + }, + prev: function( elem ) { + return jQuery.nth( elem, 2, "previousSibling" ); + }, + nextAll: function( elem ) { + return jQuery.dir( elem, "nextSibling" ); + }, + prevAll: function( elem ) { + return jQuery.dir( elem, "previousSibling" ); + }, + nextUntil: function( elem, i, until ) { + return jQuery.dir( elem, "nextSibling", until ); + }, + prevUntil: function( elem, i, until ) { + return jQuery.dir( elem, "previousSibling", until ); + }, + siblings: function( elem ) { + return jQuery.sibling( elem.parentNode.firstChild, elem ); + }, + children: function( elem ) { + return jQuery.sibling( elem.firstChild ); + }, + contents: function( elem ) { + return jQuery.nodeName( elem, "iframe" ) ? + elem.contentDocument || elem.contentWindow.document : + jQuery.makeArray( elem.childNodes ); + } +}, function( name, fn ) { + jQuery.fn[ name ] = function( until, selector ) { + var ret = jQuery.map( this, fn, until ); + + if ( !runtil.test( name ) ) { + selector = until; + } + + if ( selector && typeof selector === "string" ) { + ret = jQuery.filter( selector, ret ); + } + + ret = this.length > 1 && !guaranteedUnique[ name ] ? jQuery.unique( ret ) : ret; + + if ( (this.length > 1 || rmultiselector.test( selector )) && rparentsprev.test( name ) ) { + ret = ret.reverse(); + } + + return this.pushStack( ret, name, slice.call( arguments ).join(",") ); + }; +}); + +jQuery.extend({ + filter: function( expr, elems, not ) { + if ( not ) { + expr = ":not(" + expr + ")"; + } + + return elems.length === 1 ? + jQuery.find.matchesSelector(elems[0], expr) ? [ elems[0] ] : [] : + jQuery.find.matches(expr, elems); + }, + + dir: function( elem, dir, until ) { + var matched = [], + cur = elem[ dir ]; + + while ( cur && cur.nodeType !== 9 && (until === undefined || cur.nodeType !== 1 || !jQuery( cur ).is( until )) ) { + if ( cur.nodeType === 1 ) { + matched.push( cur ); + } + cur = cur[dir]; + } + return matched; + }, + + nth: function( cur, result, dir, elem ) { + result = result || 1; + var num = 0; + + for ( ; cur; cur = cur[dir] ) { + if ( cur.nodeType === 1 && ++num === result ) { + break; + } + } + + return cur; + }, + + sibling: function( n, elem ) { + var r = []; + + for ( ; n; n = n.nextSibling ) { + if ( n.nodeType === 1 && n !== elem ) { + r.push( n ); + } + } + + return r; + } +}); + +// Implement the identical functionality for filter and not +function winnow( elements, qualifier, keep ) { + + // Can't pass null or undefined to indexOf in Firefox 4 + // Set to 0 to skip string check + qualifier = qualifier || 0; + + if ( jQuery.isFunction( qualifier ) ) { + return jQuery.grep(elements, function( elem, i ) { + var retVal = !!qualifier.call( elem, i, elem ); + return retVal === keep; + }); + + } else if ( qualifier.nodeType ) { + return jQuery.grep(elements, function( elem, i ) { + return ( elem === qualifier ) === keep; + }); + + } else if ( typeof qualifier === "string" ) { + var filtered = jQuery.grep(elements, function( elem ) { + return elem.nodeType === 1; + }); + + if ( isSimple.test( qualifier ) ) { + return jQuery.filter(qualifier, filtered, !keep); + } else { + qualifier = jQuery.filter( qualifier, filtered ); + } + } + + return jQuery.grep(elements, function( elem, i ) { + return ( jQuery.inArray( elem, qualifier ) >= 0 ) === keep; + }); +} + + + + +function createSafeFragment( document ) { + var list = nodeNames.split( "|" ), + safeFrag = document.createDocumentFragment(); + + if ( safeFrag.createElement ) { + while ( list.length ) { + safeFrag.createElement( + list.pop() + ); + } + } + return safeFrag; +} + +var nodeNames = "abbr|article|aside|audio|canvas|datalist|details|figcaption|figure|footer|" + + "header|hgroup|mark|meter|nav|output|progress|section|summary|time|video", + rinlinejQuery = / jQuery\d+="(?:\d+|null)"/g, + rleadingWhitespace = /^\s+/, + rxhtmlTag = /<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/ig, + rtagName = /<([\w:]+)/, + rtbody = /<tbody/i, + rhtml = /<|&#?\w+;/, + rnoInnerhtml = /<(?:script|style)/i, + rnocache = /<(?:script|object|embed|option|style)/i, + rnoshimcache = new RegExp("<(?:" + nodeNames + ")", "i"), + // checked="checked" or checked + rchecked = /checked\s*(?:[^=]|=\s*.checked.)/i, + rscriptType = /\/(java|ecma)script/i, + rcleanScript = /^\s*<!(?:\[CDATA\[|\-\-)/, + wrapMap = { + option: [ 1, "<select multiple='multiple'>", "</select>" ], + legend: [ 1, "<fieldset>", "</fieldset>" ], + thead: [ 1, "<table>", "</table>" ], + tr: [ 2, "<table><tbody>", "</tbody></table>" ], + td: [ 3, "<table><tbody><tr>", "</tr></tbody></table>" ], + col: [ 2, "<table><tbody></tbody><colgroup>", "</colgroup></table>" ], + area: [ 1, "<map>", "</map>" ], + _default: [ 0, "", "" ] + }, + safeFragment = createSafeFragment( document ); + +wrapMap.optgroup = wrapMap.option; +wrapMap.tbody = wrapMap.tfoot = wrapMap.colgroup = wrapMap.caption = wrapMap.thead; +wrapMap.th = wrapMap.td; + +// IE can't serialize <link> and <script> tags normally +if ( !jQuery.support.htmlSerialize ) { + wrapMap._default = [ 1, "div<div>", "</div>" ]; +} + +jQuery.fn.extend({ + text: function( text ) { + if ( jQuery.isFunction(text) ) { + return this.each(function(i) { + var self = jQuery( this ); + + self.text( text.call(this, i, self.text()) ); + }); + } + + if ( typeof text !== "object" && text !== undefined ) { + return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) ); + } + + return jQuery.text( this ); + }, + + wrapAll: function( html ) { + if ( jQuery.isFunction( html ) ) { + return this.each(function(i) { + jQuery(this).wrapAll( html.call(this, i) ); + }); + } + + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).eq(0).clone(true); + + if ( this[0].parentNode ) { + wrap.insertBefore( this[0] ); + } + + wrap.map(function() { + var elem = this; + + while ( elem.firstChild && elem.firstChild.nodeType === 1 ) { + elem = elem.firstChild; + } + + return elem; + }).append( this ); + } + + return this; + }, + + wrapInner: function( html ) { + if ( jQuery.isFunction( html ) ) { + return this.each(function(i) { + jQuery(this).wrapInner( html.call(this, i) ); + }); + } + + return this.each(function() { + var self = jQuery( this ), + contents = self.contents(); + + if ( contents.length ) { + contents.wrapAll( html ); + + } else { + self.append( html ); + } + }); + }, + + wrap: function( html ) { + var isFunction = jQuery.isFunction( html ); + + return this.each(function(i) { + jQuery( this ).wrapAll( isFunction ? html.call(this, i) : html ); + }); + }, + + unwrap: function() { + return this.parent().each(function() { + if ( !jQuery.nodeName( this, "body" ) ) { + jQuery( this ).replaceWith( this.childNodes ); + } + }).end(); + }, + + append: function() { + return this.domManip(arguments, true, function( elem ) { + if ( this.nodeType === 1 ) { + this.appendChild( elem ); + } + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function( elem ) { + if ( this.nodeType === 1 ) { + this.insertBefore( elem, this.firstChild ); + } + }); + }, + + before: function() { + if ( this[0] && this[0].parentNode ) { + return this.domManip(arguments, false, function( elem ) { + this.parentNode.insertBefore( elem, this ); + }); + } else if ( arguments.length ) { + var set = jQuery.clean( arguments ); + set.push.apply( set, this.toArray() ); + return this.pushStack( set, "before", arguments ); + } + }, + + after: function() { + if ( this[0] && this[0].parentNode ) { + return this.domManip(arguments, false, function( elem ) { + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + } else if ( arguments.length ) { + var set = this.pushStack( this, "after", arguments ); + set.push.apply( set, jQuery.clean(arguments) ); + return set; + } + }, + + // keepData is for internal use only--do not document + remove: function( selector, keepData ) { + for ( var i = 0, elem; (elem = this[i]) != null; i++ ) { + if ( !selector || jQuery.filter( selector, [ elem ] ).length ) { + if ( !keepData && elem.nodeType === 1 ) { + jQuery.cleanData( elem.getElementsByTagName("*") ); + jQuery.cleanData( [ elem ] ); + } + + if ( elem.parentNode ) { + elem.parentNode.removeChild( elem ); + } + } + } + + return this; + }, + + empty: function() { + for ( var i = 0, elem; (elem = this[i]) != null; i++ ) { + // Remove element nodes and prevent memory leaks + if ( elem.nodeType === 1 ) { + jQuery.cleanData( elem.getElementsByTagName("*") ); + } + + // Remove any remaining nodes + while ( elem.firstChild ) { + elem.removeChild( elem.firstChild ); + } + } + + return this; + }, + + clone: function( dataAndEvents, deepDataAndEvents ) { + dataAndEvents = dataAndEvents == null ? false : dataAndEvents; + deepDataAndEvents = deepDataAndEvents == null ? dataAndEvents : deepDataAndEvents; + + return this.map( function () { + return jQuery.clone( this, dataAndEvents, deepDataAndEvents ); + }); + }, + + html: function( value ) { + if ( value === undefined ) { + return this[0] && this[0].nodeType === 1 ? + this[0].innerHTML.replace(rinlinejQuery, "") : + null; + + // See if we can take a shortcut and just use innerHTML + } else if ( typeof value === "string" && !rnoInnerhtml.test( value ) && + (jQuery.support.leadingWhitespace || !rleadingWhitespace.test( value )) && + !wrapMap[ (rtagName.exec( value ) || ["", ""])[1].toLowerCase() ] ) { + + value = value.replace(rxhtmlTag, "<$1></$2>"); + + try { + for ( var i = 0, l = this.length; i < l; i++ ) { + // Remove element nodes and prevent memory leaks + if ( this[i].nodeType === 1 ) { + jQuery.cleanData( this[i].getElementsByTagName("*") ); + this[i].innerHTML = value; + } + } + + // If using innerHTML throws an exception, use the fallback method + } catch(e) { + this.empty().append( value ); + } + + } else if ( jQuery.isFunction( value ) ) { + this.each(function(i){ + var self = jQuery( this ); + + self.html( value.call(this, i, self.html()) ); + }); + + } else { + this.empty().append( value ); + } + + return this; + }, + + replaceWith: function( value ) { + if ( this[0] && this[0].parentNode ) { + // Make sure that the elements are removed from the DOM before they are inserted + // this can help fix replacing a parent with child elements + if ( jQuery.isFunction( value ) ) { + return this.each(function(i) { + var self = jQuery(this), old = self.html(); + self.replaceWith( value.call( this, i, old ) ); + }); + } + + if ( typeof value !== "string" ) { + value = jQuery( value ).detach(); + } + + return this.each(function() { + var next = this.nextSibling, + parent = this.parentNode; + + jQuery( this ).remove(); + + if ( next ) { + jQuery(next).before( value ); + } else { + jQuery(parent).append( value ); + } + }); + } else { + return this.length ? + this.pushStack( jQuery(jQuery.isFunction(value) ? value() : value), "replaceWith", value ) : + this; + } + }, + + detach: function( selector ) { + return this.remove( selector, true ); + }, + + domManip: function( args, table, callback ) { + var results, first, fragment, parent, + value = args[0], + scripts = []; + + // We can't cloneNode fragments that contain checked, in WebKit + if ( !jQuery.support.checkClone && arguments.length === 3 && typeof value === "string" && rchecked.test( value ) ) { + return this.each(function() { + jQuery(this).domManip( args, table, callback, true ); + }); + } + + if ( jQuery.isFunction(value) ) { + return this.each(function(i) { + var self = jQuery(this); + args[0] = value.call(this, i, table ? self.html() : undefined); + self.domManip( args, table, callback ); + }); + } + + if ( this[0] ) { + parent = value && value.parentNode; + + // If we're in a fragment, just use that instead of building a new one + if ( jQuery.support.parentNode && parent && parent.nodeType === 11 && parent.childNodes.length === this.length ) { + results = { fragment: parent }; + + } else { + results = jQuery.buildFragment( args, this, scripts ); + } + + fragment = results.fragment; + + if ( fragment.childNodes.length === 1 ) { + first = fragment = fragment.firstChild; + } else { + first = fragment.firstChild; + } + + if ( first ) { + table = table && jQuery.nodeName( first, "tr" ); + + for ( var i = 0, l = this.length, lastIndex = l - 1; i < l; i++ ) { + callback.call( + table ? + root(this[i], first) : + this[i], + // Make sure that we do not leak memory by inadvertently discarding + // the original fragment (which might have attached data) instead of + // using it; in addition, use the original fragment object for the last + // item instead of first because it can end up being emptied incorrectly + // in certain situations (Bug #8070). + // Fragments from the fragment cache must always be cloned and never used + // in place. + results.cacheable || ( l > 1 && i < lastIndex ) ? + jQuery.clone( fragment, true, true ) : + fragment + ); + } + } + + if ( scripts.length ) { + jQuery.each( scripts, evalScript ); + } + } + + return this; + } +}); + +function root( elem, cur ) { + return jQuery.nodeName(elem, "table") ? + (elem.getElementsByTagName("tbody")[0] || + elem.appendChild(elem.ownerDocument.createElement("tbody"))) : + elem; +} + +function cloneCopyEvent( src, dest ) { + + if ( dest.nodeType !== 1 || !jQuery.hasData( src ) ) { + return; + } + + var type, i, l, + oldData = jQuery._data( src ), + curData = jQuery._data( dest, oldData ), + events = oldData.events; + + if ( events ) { + delete curData.handle; + curData.events = {}; + + for ( type in events ) { + for ( i = 0, l = events[ type ].length; i < l; i++ ) { + jQuery.event.add( dest, type + ( events[ type ][ i ].namespace ? "." : "" ) + events[ type ][ i ].namespace, events[ type ][ i ], events[ type ][ i ].data ); + } + } + } + + // make the cloned public data object a copy from the original + if ( curData.data ) { + curData.data = jQuery.extend( {}, curData.data ); + } +} + +function cloneFixAttributes( src, dest ) { + var nodeName; + + // We do not need to do anything for non-Elements + if ( dest.nodeType !== 1 ) { + return; + } + + // clearAttributes removes the attributes, which we don't want, + // but also removes the attachEvent events, which we *do* want + if ( dest.clearAttributes ) { + dest.clearAttributes(); + } + + // mergeAttributes, in contrast, only merges back on the + // original attributes, not the events + if ( dest.mergeAttributes ) { + dest.mergeAttributes( src ); + } + + nodeName = dest.nodeName.toLowerCase(); + + // IE6-8 fail to clone children inside object elements that use + // the proprietary classid attribute value (rather than the type + // attribute) to identify the type of content to display + if ( nodeName === "object" ) { + dest.outerHTML = src.outerHTML; + + } else if ( nodeName === "input" && (src.type === "checkbox" || src.type === "radio") ) { + // IE6-8 fails to persist the checked state of a cloned checkbox + // or radio button. Worse, IE6-7 fail to give the cloned element + // a checked appearance if the defaultChecked value isn't also set + if ( src.checked ) { + dest.defaultChecked = dest.checked = src.checked; + } + + // IE6-7 get confused and end up setting the value of a cloned + // checkbox/radio button to an empty string instead of "on" + if ( dest.value !== src.value ) { + dest.value = src.value; + } + + // IE6-8 fails to return the selected option to the default selected + // state when cloning options + } else if ( nodeName === "option" ) { + dest.selected = src.defaultSelected; + + // IE6-8 fails to set the defaultValue to the correct value when + // cloning other types of input fields + } else if ( nodeName === "input" || nodeName === "textarea" ) { + dest.defaultValue = src.defaultValue; + } + + // Event data gets referenced instead of copied if the expando + // gets copied too + dest.removeAttribute( jQuery.expando ); +} + +jQuery.buildFragment = function( args, nodes, scripts ) { + var fragment, cacheable, cacheresults, doc, + first = args[ 0 ]; + + // nodes may contain either an explicit document object, + // a jQuery collection or context object. + // If nodes[0] contains a valid object to assign to doc + if ( nodes && nodes[0] ) { + doc = nodes[0].ownerDocument || nodes[0]; + } + + // Ensure that an attr object doesn't incorrectly stand in as a document object + // Chrome and Firefox seem to allow this to occur and will throw exception + // Fixes #8950 + if ( !doc.createDocumentFragment ) { + doc = document; + } + + // Only cache "small" (1/2 KB) HTML strings that are associated with the main document + // Cloning options loses the selected state, so don't cache them + // IE 6 doesn't like it when you put <object> or <embed> elements in a fragment + // Also, WebKit does not clone 'checked' attributes on cloneNode, so don't cache + // Lastly, IE6,7,8 will not correctly reuse cached fragments that were created from unknown elems #10501 + if ( args.length === 1 && typeof first === "string" && first.length < 512 && doc === document && + first.charAt(0) === "<" && !rnocache.test( first ) && + (jQuery.support.checkClone || !rchecked.test( first )) && + (jQuery.support.html5Clone || !rnoshimcache.test( first )) ) { + + cacheable = true; + + cacheresults = jQuery.fragments[ first ]; + if ( cacheresults && cacheresults !== 1 ) { + fragment = cacheresults; + } + } + + if ( !fragment ) { + fragment = doc.createDocumentFragment(); + jQuery.clean( args, doc, fragment, scripts ); + } + + if ( cacheable ) { + jQuery.fragments[ first ] = cacheresults ? fragment : 1; + } + + return { fragment: fragment, cacheable: cacheable }; +}; + +jQuery.fragments = {}; + +jQuery.each({ + appendTo: "append", + prependTo: "prepend", + insertBefore: "before", + insertAfter: "after", + replaceAll: "replaceWith" +}, function( name, original ) { + jQuery.fn[ name ] = function( selector ) { + var ret = [], + insert = jQuery( selector ), + parent = this.length === 1 && this[0].parentNode; + + if ( parent && parent.nodeType === 11 && parent.childNodes.length === 1 && insert.length === 1 ) { + insert[ original ]( this[0] ); + return this; + + } else { + for ( var i = 0, l = insert.length; i < l; i++ ) { + var elems = ( i > 0 ? this.clone(true) : this ).get(); + jQuery( insert[i] )[ original ]( elems ); + ret = ret.concat( elems ); + } + + return this.pushStack( ret, name, insert.selector ); + } + }; +}); + +function getAll( elem ) { + if ( typeof elem.getElementsByTagName !== "undefined" ) { + return elem.getElementsByTagName( "*" ); + + } else if ( typeof elem.querySelectorAll !== "undefined" ) { + return elem.querySelectorAll( "*" ); + + } else { + return []; + } +} + +// Used in clean, fixes the defaultChecked property +function fixDefaultChecked( elem ) { + if ( elem.type === "checkbox" || elem.type === "radio" ) { + elem.defaultChecked = elem.checked; + } +} +// Finds all inputs and passes them to fixDefaultChecked +function findInputs( elem ) { + var nodeName = ( elem.nodeName || "" ).toLowerCase(); + if ( nodeName === "input" ) { + fixDefaultChecked( elem ); + // Skip scripts, get other children + } else if ( nodeName !== "script" && typeof elem.getElementsByTagName !== "undefined" ) { + jQuery.grep( elem.getElementsByTagName("input"), fixDefaultChecked ); + } +} + +// Derived From: http://www.iecss.com/shimprove/javascript/shimprove.1-0-1.js +function shimCloneNode( elem ) { + var div = document.createElement( "div" ); + safeFragment.appendChild( div ); + + div.innerHTML = elem.outerHTML; + return div.firstChild; +} + +jQuery.extend({ + clone: function( elem, dataAndEvents, deepDataAndEvents ) { + var srcElements, + destElements, + i, + // IE<=8 does not properly clone detached, unknown element nodes + clone = jQuery.support.html5Clone || !rnoshimcache.test( "<" + elem.nodeName ) ? + elem.cloneNode( true ) : + shimCloneNode( elem ); + + if ( (!jQuery.support.noCloneEvent || !jQuery.support.noCloneChecked) && + (elem.nodeType === 1 || elem.nodeType === 11) && !jQuery.isXMLDoc(elem) ) { + // IE copies events bound via attachEvent when using cloneNode. + // Calling detachEvent on the clone will also remove the events + // from the original. In order to get around this, we use some + // proprietary methods to clear the events. Thanks to MooTools + // guys for this hotness. + + cloneFixAttributes( elem, clone ); + + // Using Sizzle here is crazy slow, so we use getElementsByTagName instead + srcElements = getAll( elem ); + destElements = getAll( clone ); + + // Weird iteration because IE will replace the length property + // with an element if you are cloning the body and one of the + // elements on the page has a name or id of "length" + for ( i = 0; srcElements[i]; ++i ) { + // Ensure that the destination node is not null; Fixes #9587 + if ( destElements[i] ) { + cloneFixAttributes( srcElements[i], destElements[i] ); + } + } + } + + // Copy the events from the original to the clone + if ( dataAndEvents ) { + cloneCopyEvent( elem, clone ); + + if ( deepDataAndEvents ) { + srcElements = getAll( elem ); + destElements = getAll( clone ); + + for ( i = 0; srcElements[i]; ++i ) { + cloneCopyEvent( srcElements[i], destElements[i] ); + } + } + } + + srcElements = destElements = null; + + // Return the cloned set + return clone; + }, + + clean: function( elems, context, fragment, scripts ) { + var checkScriptType; + + context = context || document; + + // !context.createElement fails in IE with an error but returns typeof 'object' + if ( typeof context.createElement === "undefined" ) { + context = context.ownerDocument || context[0] && context[0].ownerDocument || document; + } + + var ret = [], j; + + for ( var i = 0, elem; (elem = elems[i]) != null; i++ ) { + if ( typeof elem === "number" ) { + elem += ""; + } + + if ( !elem ) { + continue; + } + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + if ( !rhtml.test( elem ) ) { + elem = context.createTextNode( elem ); + } else { + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(rxhtmlTag, "<$1></$2>"); + + // Trim whitespace, otherwise indexOf won't work as expected + var tag = ( rtagName.exec( elem ) || ["", ""] )[1].toLowerCase(), + wrap = wrapMap[ tag ] || wrapMap._default, + depth = wrap[0], + div = context.createElement("div"); + + // Append wrapper element to unknown element safe doc fragment + if ( context === document ) { + // Use the fragment we've already created for this document + safeFragment.appendChild( div ); + } else { + // Use a fragment created with the owner document + createSafeFragment( context ).appendChild( div ); + } + + // Go to html and back, then peel off extra wrappers + div.innerHTML = wrap[1] + elem + wrap[2]; + + // Move to the right depth + while ( depth-- ) { + div = div.lastChild; + } + + // Remove IE's autoinserted <tbody> from table fragments + if ( !jQuery.support.tbody ) { + + // String was a <table>, *may* have spurious <tbody> + var hasBody = rtbody.test(elem), + tbody = tag === "table" && !hasBody ? + div.firstChild && div.firstChild.childNodes : + + // String was a bare <thead> or <tfoot> + wrap[1] === "<table>" && !hasBody ? + div.childNodes : + []; + + for ( j = tbody.length - 1; j >= 0 ; --j ) { + if ( jQuery.nodeName( tbody[ j ], "tbody" ) && !tbody[ j ].childNodes.length ) { + tbody[ j ].parentNode.removeChild( tbody[ j ] ); + } + } + } + + // IE completely kills leading whitespace when innerHTML is used + if ( !jQuery.support.leadingWhitespace && rleadingWhitespace.test( elem ) ) { + div.insertBefore( context.createTextNode( rleadingWhitespace.exec(elem)[0] ), div.firstChild ); + } + + elem = div.childNodes; + } + } + + // Resets defaultChecked for any radios and checkboxes + // about to be appended to the DOM in IE 6/7 (#8060) + var len; + if ( !jQuery.support.appendChecked ) { + if ( elem[0] && typeof (len = elem.length) === "number" ) { + for ( j = 0; j < len; j++ ) { + findInputs( elem[j] ); + } + } else { + findInputs( elem ); + } + } + + if ( elem.nodeType ) { + ret.push( elem ); + } else { + ret = jQuery.merge( ret, elem ); + } + } + + if ( fragment ) { + checkScriptType = function( elem ) { + return !elem.type || rscriptType.test( elem.type ); + }; + for ( i = 0; ret[i]; i++ ) { + if ( scripts && jQuery.nodeName( ret[i], "script" ) && (!ret[i].type || ret[i].type.toLowerCase() === "text/javascript") ) { + scripts.push( ret[i].parentNode ? ret[i].parentNode.removeChild( ret[i] ) : ret[i] ); + + } else { + if ( ret[i].nodeType === 1 ) { + var jsTags = jQuery.grep( ret[i].getElementsByTagName( "script" ), checkScriptType ); + + ret.splice.apply( ret, [i + 1, 0].concat( jsTags ) ); + } + fragment.appendChild( ret[i] ); + } + } + } + + return ret; + }, + + cleanData: function( elems ) { + var data, id, + cache = jQuery.cache, + special = jQuery.event.special, + deleteExpando = jQuery.support.deleteExpando; + + for ( var i = 0, elem; (elem = elems[i]) != null; i++ ) { + if ( elem.nodeName && jQuery.noData[elem.nodeName.toLowerCase()] ) { + continue; + } + + id = elem[ jQuery.expando ]; + + if ( id ) { + data = cache[ id ]; + + if ( data && data.events ) { + for ( var type in data.events ) { + if ( special[ type ] ) { + jQuery.event.remove( elem, type ); + + // This is a shortcut to avoid jQuery.event.remove's overhead + } else { + jQuery.removeEvent( elem, type, data.handle ); + } + } + + // Null the DOM reference to avoid IE6/7/8 leak (#7054) + if ( data.handle ) { + data.handle.elem = null; + } + } + + if ( deleteExpando ) { + delete elem[ jQuery.expando ]; + + } else if ( elem.removeAttribute ) { + elem.removeAttribute( jQuery.expando ); + } + + delete cache[ id ]; + } + } + } +}); + +function evalScript( i, elem ) { + if ( elem.src ) { + jQuery.ajax({ + url: elem.src, + async: false, + dataType: "script" + }); + } else { + jQuery.globalEval( ( elem.text || elem.textContent || elem.innerHTML || "" ).replace( rcleanScript, "/*$0*/" ) ); + } + + if ( elem.parentNode ) { + elem.parentNode.removeChild( elem ); + } +} + + + + +var ralpha = /alpha\([^)]*\)/i, + ropacity = /opacity=([^)]*)/, + // fixed for IE9, see #8346 + rupper = /([A-Z]|^ms)/g, + rnumpx = /^-?\d+(?:px)?$/i, + rnum = /^-?\d/, + rrelNum = /^([\-+])=([\-+.\de]+)/, + + cssShow = { position: "absolute", visibility: "hidden", display: "block" }, + cssWidth = [ "Left", "Right" ], + cssHeight = [ "Top", "Bottom" ], + curCSS, + + getComputedStyle, + currentStyle; + +jQuery.fn.css = function( name, value ) { + // Setting 'undefined' is a no-op + if ( arguments.length === 2 && value === undefined ) { + return this; + } + + return jQuery.access( this, name, value, true, function( elem, name, value ) { + return value !== undefined ? + jQuery.style( elem, name, value ) : + jQuery.css( elem, name ); + }); +}; + +jQuery.extend({ + // Add in style property hooks for overriding the default + // behavior of getting and setting a style property + cssHooks: { + opacity: { + get: function( elem, computed ) { + if ( computed ) { + // We should always get a number back from opacity + var ret = curCSS( elem, "opacity", "opacity" ); + return ret === "" ? "1" : ret; + + } else { + return elem.style.opacity; + } + } + } + }, + + // Exclude the following css properties to add px + cssNumber: { + "fillOpacity": true, + "fontWeight": true, + "lineHeight": true, + "opacity": true, + "orphans": true, + "widows": true, + "zIndex": true, + "zoom": true + }, + + // Add in properties whose names you wish to fix before + // setting or getting the value + cssProps: { + // normalize float css property + "float": jQuery.support.cssFloat ? "cssFloat" : "styleFloat" + }, + + // Get and set the style property on a DOM Node + style: function( elem, name, value, extra ) { + // Don't set styles on text and comment nodes + if ( !elem || elem.nodeType === 3 || elem.nodeType === 8 || !elem.style ) { + return; + } + + // Make sure that we're working with the right name + var ret, type, origName = jQuery.camelCase( name ), + style = elem.style, hooks = jQuery.cssHooks[ origName ]; + + name = jQuery.cssProps[ origName ] || origName; + + // Check if we're setting a value + if ( value !== undefined ) { + type = typeof value; + + // convert relative number strings (+= or -=) to relative numbers. #7345 + if ( type === "string" && (ret = rrelNum.exec( value )) ) { + value = ( +( ret[1] + 1) * +ret[2] ) + parseFloat( jQuery.css( elem, name ) ); + // Fixes bug #9237 + type = "number"; + } + + // Make sure that NaN and null values aren't set. See: #7116 + if ( value == null || type === "number" && isNaN( value ) ) { + return; + } + + // If a number was passed in, add 'px' to the (except for certain CSS properties) + if ( type === "number" && !jQuery.cssNumber[ origName ] ) { + value += "px"; + } + + // If a hook was provided, use that value, otherwise just set the specified value + if ( !hooks || !("set" in hooks) || (value = hooks.set( elem, value )) !== undefined ) { + // Wrapped to prevent IE from throwing errors when 'invalid' values are provided + // Fixes bug #5509 + try { + style[ name ] = value; + } catch(e) {} + } + + } else { + // If a hook was provided get the non-computed value from there + if ( hooks && "get" in hooks && (ret = hooks.get( elem, false, extra )) !== undefined ) { + return ret; + } + + // Otherwise just get the value from the style object + return style[ name ]; + } + }, + + css: function( elem, name, extra ) { + var ret, hooks; + + // Make sure that we're working with the right name + name = jQuery.camelCase( name ); + hooks = jQuery.cssHooks[ name ]; + name = jQuery.cssProps[ name ] || name; + + // cssFloat needs a special treatment + if ( name === "cssFloat" ) { + name = "float"; + } + + // If a hook was provided get the computed value from there + if ( hooks && "get" in hooks && (ret = hooks.get( elem, true, extra )) !== undefined ) { + return ret; + + // Otherwise, if a way to get the computed value exists, use that + } else if ( curCSS ) { + return curCSS( elem, name ); + } + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var old = {}; + + // Remember the old values, and insert the new ones + for ( var name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + callback.call( elem ); + + // Revert the old values + for ( name in options ) { + elem.style[ name ] = old[ name ]; + } + } +}); + +// DEPRECATED, Use jQuery.css() instead +jQuery.curCSS = jQuery.css; + +jQuery.each(["height", "width"], function( i, name ) { + jQuery.cssHooks[ name ] = { + get: function( elem, computed, extra ) { + var val; + + if ( computed ) { + if ( elem.offsetWidth !== 0 ) { + return getWH( elem, name, extra ); + } else { + jQuery.swap( elem, cssShow, function() { + val = getWH( elem, name, extra ); + }); + } + + return val; + } + }, + + set: function( elem, value ) { + if ( rnumpx.test( value ) ) { + // ignore negative width and height values #1599 + value = parseFloat( value ); + + if ( value >= 0 ) { + return value + "px"; + } + + } else { + return value; + } + } + }; +}); + +if ( !jQuery.support.opacity ) { + jQuery.cssHooks.opacity = { + get: function( elem, computed ) { + // IE uses filters for opacity + return ropacity.test( (computed && elem.currentStyle ? elem.currentStyle.filter : elem.style.filter) || "" ) ? + ( parseFloat( RegExp.$1 ) / 100 ) + "" : + computed ? "1" : ""; + }, + + set: function( elem, value ) { + var style = elem.style, + currentStyle = elem.currentStyle, + opacity = jQuery.isNumeric( value ) ? "alpha(opacity=" + value * 100 + ")" : "", + filter = currentStyle && currentStyle.filter || style.filter || ""; + + // IE has trouble with opacity if it does not have layout + // Force it by setting the zoom level + style.zoom = 1; + + // if setting opacity to 1, and no other filters exist - attempt to remove filter attribute #6652 + if ( value >= 1 && jQuery.trim( filter.replace( ralpha, "" ) ) === "" ) { + + // Setting style.filter to null, "" & " " still leave "filter:" in the cssText + // if "filter:" is present at all, clearType is disabled, we want to avoid this + // style.removeAttribute is IE Only, but so apparently is this code path... + style.removeAttribute( "filter" ); + + // if there there is no filter style applied in a css rule, we are done + if ( currentStyle && !currentStyle.filter ) { + return; + } + } + + // otherwise, set new filter values + style.filter = ralpha.test( filter ) ? + filter.replace( ralpha, opacity ) : + filter + " " + opacity; + } + }; +} + +jQuery(function() { + // This hook cannot be added until DOM ready because the support test + // for it is not run until after DOM ready + if ( !jQuery.support.reliableMarginRight ) { + jQuery.cssHooks.marginRight = { + get: function( elem, computed ) { + // WebKit Bug 13343 - getComputedStyle returns wrong value for margin-right + // Work around by temporarily setting element display to inline-block + var ret; + jQuery.swap( elem, { "display": "inline-block" }, function() { + if ( computed ) { + ret = curCSS( elem, "margin-right", "marginRight" ); + } else { + ret = elem.style.marginRight; + } + }); + return ret; + } + }; + } +}); + +if ( document.defaultView && document.defaultView.getComputedStyle ) { + getComputedStyle = function( elem, name ) { + var ret, defaultView, computedStyle; + + name = name.replace( rupper, "-$1" ).toLowerCase(); + + if ( (defaultView = elem.ownerDocument.defaultView) && + (computedStyle = defaultView.getComputedStyle( elem, null )) ) { + ret = computedStyle.getPropertyValue( name ); + if ( ret === "" && !jQuery.contains( elem.ownerDocument.documentElement, elem ) ) { + ret = jQuery.style( elem, name ); + } + } + + return ret; + }; +} + +if ( document.documentElement.currentStyle ) { + currentStyle = function( elem, name ) { + var left, rsLeft, uncomputed, + ret = elem.currentStyle && elem.currentStyle[ name ], + style = elem.style; + + // Avoid setting ret to empty string here + // so we don't default to auto + if ( ret === null && style && (uncomputed = style[ name ]) ) { + ret = uncomputed; + } + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + if ( !rnumpx.test( ret ) && rnum.test( ret ) ) { + + // Remember the original values + left = style.left; + rsLeft = elem.runtimeStyle && elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + if ( rsLeft ) { + elem.runtimeStyle.left = elem.currentStyle.left; + } + style.left = name === "fontSize" ? "1em" : ( ret || 0 ); + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + if ( rsLeft ) { + elem.runtimeStyle.left = rsLeft; + } + } + + return ret === "" ? "auto" : ret; + }; +} + +curCSS = getComputedStyle || currentStyle; + +function getWH( elem, name, extra ) { + + // Start with offset property + var val = name === "width" ? elem.offsetWidth : elem.offsetHeight, + which = name === "width" ? cssWidth : cssHeight, + i = 0, + len = which.length; + + if ( val > 0 ) { + if ( extra !== "border" ) { + for ( ; i < len; i++ ) { + if ( !extra ) { + val -= parseFloat( jQuery.css( elem, "padding" + which[ i ] ) ) || 0; + } + if ( extra === "margin" ) { + val += parseFloat( jQuery.css( elem, extra + which[ i ] ) ) || 0; + } else { + val -= parseFloat( jQuery.css( elem, "border" + which[ i ] + "Width" ) ) || 0; + } + } + } + + return val + "px"; + } + + // Fall back to computed then uncomputed css if necessary + val = curCSS( elem, name, name ); + if ( val < 0 || val == null ) { + val = elem.style[ name ] || 0; + } + // Normalize "", auto, and prepare for extra + val = parseFloat( val ) || 0; + + // Add padding, border, margin + if ( extra ) { + for ( ; i < len; i++ ) { + val += parseFloat( jQuery.css( elem, "padding" + which[ i ] ) ) || 0; + if ( extra !== "padding" ) { + val += parseFloat( jQuery.css( elem, "border" + which[ i ] + "Width" ) ) || 0; + } + if ( extra === "margin" ) { + val += parseFloat( jQuery.css( elem, extra + which[ i ] ) ) || 0; + } + } + } + + return val + "px"; +} + +if ( jQuery.expr && jQuery.expr.filters ) { + jQuery.expr.filters.hidden = function( elem ) { + var width = elem.offsetWidth, + height = elem.offsetHeight; + + return ( width === 0 && height === 0 ) || (!jQuery.support.reliableHiddenOffsets && ((elem.style && elem.style.display) || jQuery.css( elem, "display" )) === "none"); + }; + + jQuery.expr.filters.visible = function( elem ) { + return !jQuery.expr.filters.hidden( elem ); + }; +} + + + + +var r20 = /%20/g, + rbracket = /\[\]$/, + rCRLF = /\r?\n/g, + rhash = /#.*$/, + rheaders = /^(.*?):[ \t]*([^\r\n]*)\r?$/mg, // IE leaves an \r character at EOL + rinput = /^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i, + // #7653, #8125, #8152: local protocol detection + rlocalProtocol = /^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/, + rnoContent = /^(?:GET|HEAD)$/, + rprotocol = /^\/\//, + rquery = /\?/, + rscript = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, + rselectTextarea = /^(?:select|textarea)/i, + rspacesAjax = /\s+/, + rts = /([?&])_=[^&]*/, + rurl = /^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+))?)?/, + + // Keep a copy of the old load method + _load = jQuery.fn.load, + + /* Prefilters + * 1) They are useful to introduce custom dataTypes (see ajax/jsonp.js for an example) + * 2) These are called: + * - BEFORE asking for a transport + * - AFTER param serialization (s.data is a string if s.processData is true) + * 3) key is the dataType + * 4) the catchall symbol "*" can be used + * 5) execution will start with transport dataType and THEN continue down to "*" if needed + */ + prefilters = {}, + + /* Transports bindings + * 1) key is the dataType + * 2) the catchall symbol "*" can be used + * 3) selection will start with transport dataType and THEN go to "*" if needed + */ + transports = {}, + + // Document location + ajaxLocation, + + // Document location segments + ajaxLocParts, + + // Avoid comment-prolog char sequence (#10098); must appease lint and evade compression + allTypes = ["*/"] + ["*"]; + +// #8138, IE may throw an exception when accessing +// a field from window.location if document.domain has been set +try { + ajaxLocation = location.href; +} catch( e ) { + // Use the href attribute of an A element + // since IE will modify it given document.location + ajaxLocation = document.createElement( "a" ); + ajaxLocation.href = ""; + ajaxLocation = ajaxLocation.href; +} + +// Segment location into parts +ajaxLocParts = rurl.exec( ajaxLocation.toLowerCase() ) || []; + +// Base "constructor" for jQuery.ajaxPrefilter and jQuery.ajaxTransport +function addToPrefiltersOrTransports( structure ) { + + // dataTypeExpression is optional and defaults to "*" + return function( dataTypeExpression, func ) { + + if ( typeof dataTypeExpression !== "string" ) { + func = dataTypeExpression; + dataTypeExpression = "*"; + } + + if ( jQuery.isFunction( func ) ) { + var dataTypes = dataTypeExpression.toLowerCase().split( rspacesAjax ), + i = 0, + length = dataTypes.length, + dataType, + list, + placeBefore; + + // For each dataType in the dataTypeExpression + for ( ; i < length; i++ ) { + dataType = dataTypes[ i ]; + // We control if we're asked to add before + // any existing element + placeBefore = /^\+/.test( dataType ); + if ( placeBefore ) { + dataType = dataType.substr( 1 ) || "*"; + } + list = structure[ dataType ] = structure[ dataType ] || []; + // then we add to the structure accordingly + list[ placeBefore ? "unshift" : "push" ]( func ); + } + } + }; +} + +// Base inspection function for prefilters and transports +function inspectPrefiltersOrTransports( structure, options, originalOptions, jqXHR, + dataType /* internal */, inspected /* internal */ ) { + + dataType = dataType || options.dataTypes[ 0 ]; + inspected = inspected || {}; + + inspected[ dataType ] = true; + + var list = structure[ dataType ], + i = 0, + length = list ? list.length : 0, + executeOnly = ( structure === prefilters ), + selection; + + for ( ; i < length && ( executeOnly || !selection ); i++ ) { + selection = list[ i ]( options, originalOptions, jqXHR ); + // If we got redirected to another dataType + // we try there if executing only and not done already + if ( typeof selection === "string" ) { + if ( !executeOnly || inspected[ selection ] ) { + selection = undefined; + } else { + options.dataTypes.unshift( selection ); + selection = inspectPrefiltersOrTransports( + structure, options, originalOptions, jqXHR, selection, inspected ); + } + } + } + // If we're only executing or nothing was selected + // we try the catchall dataType if not done already + if ( ( executeOnly || !selection ) && !inspected[ "*" ] ) { + selection = inspectPrefiltersOrTransports( + structure, options, originalOptions, jqXHR, "*", inspected ); + } + // unnecessary when only executing (prefilters) + // but it'll be ignored by the caller in that case + return selection; +} + +// A special extend for ajax options +// that takes "flat" options (not to be deep extended) +// Fixes #9887 +function ajaxExtend( target, src ) { + var key, deep, + flatOptions = jQuery.ajaxSettings.flatOptions || {}; + for ( key in src ) { + if ( src[ key ] !== undefined ) { + ( flatOptions[ key ] ? target : ( deep || ( deep = {} ) ) )[ key ] = src[ key ]; + } + } + if ( deep ) { + jQuery.extend( true, target, deep ); + } +} + +jQuery.fn.extend({ + load: function( url, params, callback ) { + if ( typeof url !== "string" && _load ) { + return _load.apply( this, arguments ); + + // Don't do a request if no elements are being requested + } else if ( !this.length ) { + return this; + } + + var off = url.indexOf( " " ); + if ( off >= 0 ) { + var selector = url.slice( off, url.length ); + url = url.slice( 0, off ); + } + + // Default to a GET request + var type = "GET"; + + // If the second parameter was provided + if ( params ) { + // If it's a function + if ( jQuery.isFunction( params ) ) { + // We assume that it's the callback + callback = params; + params = undefined; + + // Otherwise, build a param string + } else if ( typeof params === "object" ) { + params = jQuery.param( params, jQuery.ajaxSettings.traditional ); + type = "POST"; + } + } + + var self = this; + + // Request the remote document + jQuery.ajax({ + url: url, + type: type, + dataType: "html", + data: params, + // Complete callback (responseText is used internally) + complete: function( jqXHR, status, responseText ) { + // Store the response as specified by the jqXHR object + responseText = jqXHR.responseText; + // If successful, inject the HTML into all the matched elements + if ( jqXHR.isResolved() ) { + // #4825: Get the actual response in case + // a dataFilter is present in ajaxSettings + jqXHR.done(function( r ) { + responseText = r; + }); + // See if a selector was specified + self.html( selector ? + // Create a dummy div to hold the results + jQuery("<div>") + // inject the contents of the document in, removing the scripts + // to avoid any 'Permission Denied' errors in IE + .append(responseText.replace(rscript, "")) + + // Locate the specified elements + .find(selector) : + + // If not, just inject the full result + responseText ); + } + + if ( callback ) { + self.each( callback, [ responseText, status, jqXHR ] ); + } + } + }); + + return this; + }, + + serialize: function() { + return jQuery.param( this.serializeArray() ); + }, + + serializeArray: function() { + return this.map(function(){ + return this.elements ? jQuery.makeArray( this.elements ) : this; + }) + .filter(function(){ + return this.name && !this.disabled && + ( this.checked || rselectTextarea.test( this.nodeName ) || + rinput.test( this.type ) ); + }) + .map(function( i, elem ){ + var val = jQuery( this ).val(); + + return val == null ? + null : + jQuery.isArray( val ) ? + jQuery.map( val, function( val, i ){ + return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) }; + }) : + { name: elem.name, value: val.replace( rCRLF, "\r\n" ) }; + }).get(); + } +}); + +// Attach a bunch of functions for handling common AJAX events +jQuery.each( "ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split( " " ), function( i, o ){ + jQuery.fn[ o ] = function( f ){ + return this.on( o, f ); + }; +}); + +jQuery.each( [ "get", "post" ], function( i, method ) { + jQuery[ method ] = function( url, data, callback, type ) { + // shift arguments if data argument was omitted + if ( jQuery.isFunction( data ) ) { + type = type || callback; + callback = data; + data = undefined; + } + + return jQuery.ajax({ + type: method, + url: url, + data: data, + success: callback, + dataType: type + }); + }; +}); + +jQuery.extend({ + + getScript: function( url, callback ) { + return jQuery.get( url, undefined, callback, "script" ); + }, + + getJSON: function( url, data, callback ) { + return jQuery.get( url, data, callback, "json" ); + }, + + // Creates a full fledged settings object into target + // with both ajaxSettings and settings fields. + // If target is omitted, writes into ajaxSettings. + ajaxSetup: function( target, settings ) { + if ( settings ) { + // Building a settings object + ajaxExtend( target, jQuery.ajaxSettings ); + } else { + // Extending ajaxSettings + settings = target; + target = jQuery.ajaxSettings; + } + ajaxExtend( target, settings ); + return target; + }, + + ajaxSettings: { + url: ajaxLocation, + isLocal: rlocalProtocol.test( ajaxLocParts[ 1 ] ), + global: true, + type: "GET", + contentType: "application/x-www-form-urlencoded", + processData: true, + async: true, + /* + timeout: 0, + data: null, + dataType: null, + username: null, + password: null, + cache: null, + traditional: false, + headers: {}, + */ + + accepts: { + xml: "application/xml, text/xml", + html: "text/html", + text: "text/plain", + json: "application/json, text/javascript", + "*": allTypes + }, + + contents: { + xml: /xml/, + html: /html/, + json: /json/ + }, + + responseFields: { + xml: "responseXML", + text: "responseText" + }, + + // List of data converters + // 1) key format is "source_type destination_type" (a single space in-between) + // 2) the catchall symbol "*" can be used for source_type + converters: { + + // Convert anything to text + "* text": window.String, + + // Text to html (true = no transformation) + "text html": true, + + // Evaluate text as a json expression + "text json": jQuery.parseJSON, + + // Parse text as xml + "text xml": jQuery.parseXML + }, + + // For options that shouldn't be deep extended: + // you can add your own custom options here if + // and when you create one that shouldn't be + // deep extended (see ajaxExtend) + flatOptions: { + context: true, + url: true + } + }, + + ajaxPrefilter: addToPrefiltersOrTransports( prefilters ), + ajaxTransport: addToPrefiltersOrTransports( transports ), + + // Main method + ajax: function( url, options ) { + + // If url is an object, simulate pre-1.5 signature + if ( typeof url === "object" ) { + options = url; + url = undefined; + } + + // Force options to be an object + options = options || {}; + + var // Create the final options object + s = jQuery.ajaxSetup( {}, options ), + // Callbacks context + callbackContext = s.context || s, + // Context for global events + // It's the callbackContext if one was provided in the options + // and if it's a DOM node or a jQuery collection + globalEventContext = callbackContext !== s && + ( callbackContext.nodeType || callbackContext instanceof jQuery ) ? + jQuery( callbackContext ) : jQuery.event, + // Deferreds + deferred = jQuery.Deferred(), + completeDeferred = jQuery.Callbacks( "once memory" ), + // Status-dependent callbacks + statusCode = s.statusCode || {}, + // ifModified key + ifModifiedKey, + // Headers (they are sent all at once) + requestHeaders = {}, + requestHeadersNames = {}, + // Response headers + responseHeadersString, + responseHeaders, + // transport + transport, + // timeout handle + timeoutTimer, + // Cross-domain detection vars + parts, + // The jqXHR state + state = 0, + // To know if global events are to be dispatched + fireGlobals, + // Loop variable + i, + // Fake xhr + jqXHR = { + + readyState: 0, + + // Caches the header + setRequestHeader: function( name, value ) { + if ( !state ) { + var lname = name.toLowerCase(); + name = requestHeadersNames[ lname ] = requestHeadersNames[ lname ] || name; + requestHeaders[ name ] = value; + } + return this; + }, + + // Raw string + getAllResponseHeaders: function() { + return state === 2 ? responseHeadersString : null; + }, + + // Builds headers hashtable if needed + getResponseHeader: function( key ) { + var match; + if ( state === 2 ) { + if ( !responseHeaders ) { + responseHeaders = {}; + while( ( match = rheaders.exec( responseHeadersString ) ) ) { + responseHeaders[ match[1].toLowerCase() ] = match[ 2 ]; + } + } + match = responseHeaders[ key.toLowerCase() ]; + } + return match === undefined ? null : match; + }, + + // Overrides response content-type header + overrideMimeType: function( type ) { + if ( !state ) { + s.mimeType = type; + } + return this; + }, + + // Cancel the request + abort: function( statusText ) { + statusText = statusText || "abort"; + if ( transport ) { + transport.abort( statusText ); + } + done( 0, statusText ); + return this; + } + }; + + // Callback for when everything is done + // It is defined here because jslint complains if it is declared + // at the end of the function (which would be more logical and readable) + function done( status, nativeStatusText, responses, headers ) { + + // Called once + if ( state === 2 ) { + return; + } + + // State is "done" now + state = 2; + + // Clear timeout if it exists + if ( timeoutTimer ) { + clearTimeout( timeoutTimer ); + } + + // Dereference transport for early garbage collection + // (no matter how long the jqXHR object will be used) + transport = undefined; + + // Cache response headers + responseHeadersString = headers || ""; + + // Set readyState + jqXHR.readyState = status > 0 ? 4 : 0; + + var isSuccess, + success, + error, + statusText = nativeStatusText, + response = responses ? ajaxHandleResponses( s, jqXHR, responses ) : undefined, + lastModified, + etag; + + // If successful, handle type chaining + if ( status >= 200 && status < 300 || status === 304 ) { + + // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode. + if ( s.ifModified ) { + + if ( ( lastModified = jqXHR.getResponseHeader( "Last-Modified" ) ) ) { + jQuery.lastModified[ ifModifiedKey ] = lastModified; + } + if ( ( etag = jqXHR.getResponseHeader( "Etag" ) ) ) { + jQuery.etag[ ifModifiedKey ] = etag; + } + } + + // If not modified + if ( status === 304 ) { + + statusText = "notmodified"; + isSuccess = true; + + // If we have data + } else { + + try { + success = ajaxConvert( s, response ); + statusText = "success"; + isSuccess = true; + } catch(e) { + // We have a parsererror + statusText = "parsererror"; + error = e; + } + } + } else { + // We extract error from statusText + // then normalize statusText and status for non-aborts + error = statusText; + if ( !statusText || status ) { + statusText = "error"; + if ( status < 0 ) { + status = 0; + } + } + } + + // Set data for the fake xhr object + jqXHR.status = status; + jqXHR.statusText = "" + ( nativeStatusText || statusText ); + + // Success/Error + if ( isSuccess ) { + deferred.resolveWith( callbackContext, [ success, statusText, jqXHR ] ); + } else { + deferred.rejectWith( callbackContext, [ jqXHR, statusText, error ] ); + } + + // Status-dependent callbacks + jqXHR.statusCode( statusCode ); + statusCode = undefined; + + if ( fireGlobals ) { + globalEventContext.trigger( "ajax" + ( isSuccess ? "Success" : "Error" ), + [ jqXHR, s, isSuccess ? success : error ] ); + } + + // Complete + completeDeferred.fireWith( callbackContext, [ jqXHR, statusText ] ); + + if ( fireGlobals ) { + globalEventContext.trigger( "ajaxComplete", [ jqXHR, s ] ); + // Handle the global AJAX counter + if ( !( --jQuery.active ) ) { + jQuery.event.trigger( "ajaxStop" ); + } + } + } + + // Attach deferreds + deferred.promise( jqXHR ); + jqXHR.success = jqXHR.done; + jqXHR.error = jqXHR.fail; + jqXHR.complete = completeDeferred.add; + + // Status-dependent callbacks + jqXHR.statusCode = function( map ) { + if ( map ) { + var tmp; + if ( state < 2 ) { + for ( tmp in map ) { + statusCode[ tmp ] = [ statusCode[tmp], map[tmp] ]; + } + } else { + tmp = map[ jqXHR.status ]; + jqXHR.then( tmp, tmp ); + } + } + return this; + }; + + // Remove hash character (#7531: and string promotion) + // Add protocol if not provided (#5866: IE7 issue with protocol-less urls) + // We also use the url parameter if available + s.url = ( ( url || s.url ) + "" ).replace( rhash, "" ).replace( rprotocol, ajaxLocParts[ 1 ] + "//" ); + + // Extract dataTypes list + s.dataTypes = jQuery.trim( s.dataType || "*" ).toLowerCase().split( rspacesAjax ); + + // Determine if a cross-domain request is in order + if ( s.crossDomain == null ) { + parts = rurl.exec( s.url.toLowerCase() ); + s.crossDomain = !!( parts && + ( parts[ 1 ] != ajaxLocParts[ 1 ] || parts[ 2 ] != ajaxLocParts[ 2 ] || + ( parts[ 3 ] || ( parts[ 1 ] === "http:" ? 80 : 443 ) ) != + ( ajaxLocParts[ 3 ] || ( ajaxLocParts[ 1 ] === "http:" ? 80 : 443 ) ) ) + ); + } + + // Convert data if not already a string + if ( s.data && s.processData && typeof s.data !== "string" ) { + s.data = jQuery.param( s.data, s.traditional ); + } + + // Apply prefilters + inspectPrefiltersOrTransports( prefilters, s, options, jqXHR ); + + // If request was aborted inside a prefiler, stop there + if ( state === 2 ) { + return false; + } + + // We can fire global events as of now if asked to + fireGlobals = s.global; + + // Uppercase the type + s.type = s.type.toUpperCase(); + + // Determine if request has content + s.hasContent = !rnoContent.test( s.type ); + + // Watch for a new set of requests + if ( fireGlobals && jQuery.active++ === 0 ) { + jQuery.event.trigger( "ajaxStart" ); + } + + // More options handling for requests with no content + if ( !s.hasContent ) { + + // If data is available, append data to url + if ( s.data ) { + s.url += ( rquery.test( s.url ) ? "&" : "?" ) + s.data; + // #9682: remove data so that it's not used in an eventual retry + delete s.data; + } + + // Get ifModifiedKey before adding the anti-cache parameter + ifModifiedKey = s.url; + + // Add anti-cache in url if needed + if ( s.cache === false ) { + + var ts = jQuery.now(), + // try replacing _= if it is there + ret = s.url.replace( rts, "$1_=" + ts ); + + // if nothing was replaced, add timestamp to the end + s.url = ret + ( ( ret === s.url ) ? ( rquery.test( s.url ) ? "&" : "?" ) + "_=" + ts : "" ); + } + } + + // Set the correct header, if data is being sent + if ( s.data && s.hasContent && s.contentType !== false || options.contentType ) { + jqXHR.setRequestHeader( "Content-Type", s.contentType ); + } + + // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode. + if ( s.ifModified ) { + ifModifiedKey = ifModifiedKey || s.url; + if ( jQuery.lastModified[ ifModifiedKey ] ) { + jqXHR.setRequestHeader( "If-Modified-Since", jQuery.lastModified[ ifModifiedKey ] ); + } + if ( jQuery.etag[ ifModifiedKey ] ) { + jqXHR.setRequestHeader( "If-None-Match", jQuery.etag[ ifModifiedKey ] ); + } + } + + // Set the Accepts header for the server, depending on the dataType + jqXHR.setRequestHeader( + "Accept", + s.dataTypes[ 0 ] && s.accepts[ s.dataTypes[0] ] ? + s.accepts[ s.dataTypes[0] ] + ( s.dataTypes[ 0 ] !== "*" ? ", " + allTypes + "; q=0.01" : "" ) : + s.accepts[ "*" ] + ); + + // Check for headers option + for ( i in s.headers ) { + jqXHR.setRequestHeader( i, s.headers[ i ] ); + } + + // Allow custom headers/mimetypes and early abort + if ( s.beforeSend && ( s.beforeSend.call( callbackContext, jqXHR, s ) === false || state === 2 ) ) { + // Abort if not done already + jqXHR.abort(); + return false; + + } + + // Install callbacks on deferreds + for ( i in { success: 1, error: 1, complete: 1 } ) { + jqXHR[ i ]( s[ i ] ); + } + + // Get transport + transport = inspectPrefiltersOrTransports( transports, s, options, jqXHR ); + + // If no transport, we auto-abort + if ( !transport ) { + done( -1, "No Transport" ); + } else { + jqXHR.readyState = 1; + // Send global event + if ( fireGlobals ) { + globalEventContext.trigger( "ajaxSend", [ jqXHR, s ] ); + } + // Timeout + if ( s.async && s.timeout > 0 ) { + timeoutTimer = setTimeout( function(){ + jqXHR.abort( "timeout" ); + }, s.timeout ); + } + + try { + state = 1; + transport.send( requestHeaders, done ); + } catch (e) { + // Propagate exception as error if not done + if ( state < 2 ) { + done( -1, e ); + // Simply rethrow otherwise + } else { + throw e; + } + } + } + + return jqXHR; + }, + + // Serialize an array of form elements or a set of + // key/values into a query string + param: function( a, traditional ) { + var s = [], + add = function( key, value ) { + // If value is a function, invoke it and return its value + value = jQuery.isFunction( value ) ? value() : value; + s[ s.length ] = encodeURIComponent( key ) + "=" + encodeURIComponent( value ); + }; + + // Set traditional to true for jQuery <= 1.3.2 behavior. + if ( traditional === undefined ) { + traditional = jQuery.ajaxSettings.traditional; + } + + // If an array was passed in, assume that it is an array of form elements. + if ( jQuery.isArray( a ) || ( a.jquery && !jQuery.isPlainObject( a ) ) ) { + // Serialize the form elements + jQuery.each( a, function() { + add( this.name, this.value ); + }); + + } else { + // If traditional, encode the "old" way (the way 1.3.2 or older + // did it), otherwise encode params recursively. + for ( var prefix in a ) { + buildParams( prefix, a[ prefix ], traditional, add ); + } + } + + // Return the resulting serialization + return s.join( "&" ).replace( r20, "+" ); + } +}); + +function buildParams( prefix, obj, traditional, add ) { + if ( jQuery.isArray( obj ) ) { + // Serialize array item. + jQuery.each( obj, function( i, v ) { + if ( traditional || rbracket.test( prefix ) ) { + // Treat each array item as a scalar. + add( prefix, v ); + + } else { + // If array item is non-scalar (array or object), encode its + // numeric index to resolve deserialization ambiguity issues. + // Note that rack (as of 1.0.0) can't currently deserialize + // nested arrays properly, and attempting to do so may cause + // a server error. Possible fixes are to modify rack's + // deserialization algorithm or to provide an option or flag + // to force array serialization to be shallow. + buildParams( prefix + "[" + ( typeof v === "object" || jQuery.isArray(v) ? i : "" ) + "]", v, traditional, add ); + } + }); + + } else if ( !traditional && obj != null && typeof obj === "object" ) { + // Serialize object item. + for ( var name in obj ) { + buildParams( prefix + "[" + name + "]", obj[ name ], traditional, add ); + } + + } else { + // Serialize scalar item. + add( prefix, obj ); + } +} + +// This is still on the jQuery object... for now +// Want to move this to jQuery.ajax some day +jQuery.extend({ + + // Counter for holding the number of active queries + active: 0, + + // Last-Modified header cache for next request + lastModified: {}, + etag: {} + +}); + +/* Handles responses to an ajax request: + * - sets all responseXXX fields accordingly + * - finds the right dataType (mediates between content-type and expected dataType) + * - returns the corresponding response + */ +function ajaxHandleResponses( s, jqXHR, responses ) { + + var contents = s.contents, + dataTypes = s.dataTypes, + responseFields = s.responseFields, + ct, + type, + finalDataType, + firstDataType; + + // Fill responseXXX fields + for ( type in responseFields ) { + if ( type in responses ) { + jqXHR[ responseFields[type] ] = responses[ type ]; + } + } + + // Remove auto dataType and get content-type in the process + while( dataTypes[ 0 ] === "*" ) { + dataTypes.shift(); + if ( ct === undefined ) { + ct = s.mimeType || jqXHR.getResponseHeader( "content-type" ); + } + } + + // Check if we're dealing with a known content-type + if ( ct ) { + for ( type in contents ) { + if ( contents[ type ] && contents[ type ].test( ct ) ) { + dataTypes.unshift( type ); + break; + } + } + } + + // Check to see if we have a response for the expected dataType + if ( dataTypes[ 0 ] in responses ) { + finalDataType = dataTypes[ 0 ]; + } else { + // Try convertible dataTypes + for ( type in responses ) { + if ( !dataTypes[ 0 ] || s.converters[ type + " " + dataTypes[0] ] ) { + finalDataType = type; + break; + } + if ( !firstDataType ) { + firstDataType = type; + } + } + // Or just use first one + finalDataType = finalDataType || firstDataType; + } + + // If we found a dataType + // We add the dataType to the list if needed + // and return the corresponding response + if ( finalDataType ) { + if ( finalDataType !== dataTypes[ 0 ] ) { + dataTypes.unshift( finalDataType ); + } + return responses[ finalDataType ]; + } +} + +// Chain conversions given the request and the original response +function ajaxConvert( s, response ) { + + // Apply the dataFilter if provided + if ( s.dataFilter ) { + response = s.dataFilter( response, s.dataType ); + } + + var dataTypes = s.dataTypes, + converters = {}, + i, + key, + length = dataTypes.length, + tmp, + // Current and previous dataTypes + current = dataTypes[ 0 ], + prev, + // Conversion expression + conversion, + // Conversion function + conv, + // Conversion functions (transitive conversion) + conv1, + conv2; + + // For each dataType in the chain + for ( i = 1; i < length; i++ ) { + + // Create converters map + // with lowercased keys + if ( i === 1 ) { + for ( key in s.converters ) { + if ( typeof key === "string" ) { + converters[ key.toLowerCase() ] = s.converters[ key ]; + } + } + } + + // Get the dataTypes + prev = current; + current = dataTypes[ i ]; + + // If current is auto dataType, update it to prev + if ( current === "*" ) { + current = prev; + // If no auto and dataTypes are actually different + } else if ( prev !== "*" && prev !== current ) { + + // Get the converter + conversion = prev + " " + current; + conv = converters[ conversion ] || converters[ "* " + current ]; + + // If there is no direct converter, search transitively + if ( !conv ) { + conv2 = undefined; + for ( conv1 in converters ) { + tmp = conv1.split( " " ); + if ( tmp[ 0 ] === prev || tmp[ 0 ] === "*" ) { + conv2 = converters[ tmp[1] + " " + current ]; + if ( conv2 ) { + conv1 = converters[ conv1 ]; + if ( conv1 === true ) { + conv = conv2; + } else if ( conv2 === true ) { + conv = conv1; + } + break; + } + } + } + } + // If we found no converter, dispatch an error + if ( !( conv || conv2 ) ) { + jQuery.error( "No conversion from " + conversion.replace(" "," to ") ); + } + // If found converter is not an equivalence + if ( conv !== true ) { + // Convert with 1 or 2 converters accordingly + response = conv ? conv( response ) : conv2( conv1(response) ); + } + } + } + return response; +} + + + + +var jsc = jQuery.now(), + jsre = /(\=)\?(&|$)|\?\?/i; + +// Default jsonp settings +jQuery.ajaxSetup({ + jsonp: "callback", + jsonpCallback: function() { + return jQuery.expando + "_" + ( jsc++ ); + } +}); + +// Detect, normalize options and install callbacks for jsonp requests +jQuery.ajaxPrefilter( "json jsonp", function( s, originalSettings, jqXHR ) { + + var inspectData = s.contentType === "application/x-www-form-urlencoded" && + ( typeof s.data === "string" ); + + if ( s.dataTypes[ 0 ] === "jsonp" || + s.jsonp !== false && ( jsre.test( s.url ) || + inspectData && jsre.test( s.data ) ) ) { + + var responseContainer, + jsonpCallback = s.jsonpCallback = + jQuery.isFunction( s.jsonpCallback ) ? s.jsonpCallback() : s.jsonpCallback, + previous = window[ jsonpCallback ], + url = s.url, + data = s.data, + replace = "$1" + jsonpCallback + "$2"; + + if ( s.jsonp !== false ) { + url = url.replace( jsre, replace ); + if ( s.url === url ) { + if ( inspectData ) { + data = data.replace( jsre, replace ); + } + if ( s.data === data ) { + // Add callback manually + url += (/\?/.test( url ) ? "&" : "?") + s.jsonp + "=" + jsonpCallback; + } + } + } + + s.url = url; + s.data = data; + + // Install callback + window[ jsonpCallback ] = function( response ) { + responseContainer = [ response ]; + }; + + // Clean-up function + jqXHR.always(function() { + // Set callback back to previous value + window[ jsonpCallback ] = previous; + // Call if it was a function and we have a response + if ( responseContainer && jQuery.isFunction( previous ) ) { + window[ jsonpCallback ]( responseContainer[ 0 ] ); + } + }); + + // Use data converter to retrieve json after script execution + s.converters["script json"] = function() { + if ( !responseContainer ) { + jQuery.error( jsonpCallback + " was not called" ); + } + return responseContainer[ 0 ]; + }; + + // force json dataType + s.dataTypes[ 0 ] = "json"; + + // Delegate to script + return "script"; + } +}); + + + + +// Install script dataType +jQuery.ajaxSetup({ + accepts: { + script: "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript" + }, + contents: { + script: /javascript|ecmascript/ + }, + converters: { + "text script": function( text ) { + jQuery.globalEval( text ); + return text; + } + } +}); + +// Handle cache's special case and global +jQuery.ajaxPrefilter( "script", function( s ) { + if ( s.cache === undefined ) { + s.cache = false; + } + if ( s.crossDomain ) { + s.type = "GET"; + s.global = false; + } +}); + +// Bind script tag hack transport +jQuery.ajaxTransport( "script", function(s) { + + // This transport only deals with cross domain requests + if ( s.crossDomain ) { + + var script, + head = document.head || document.getElementsByTagName( "head" )[0] || document.documentElement; + + return { + + send: function( _, callback ) { + + script = document.createElement( "script" ); + + script.async = "async"; + + if ( s.scriptCharset ) { + script.charset = s.scriptCharset; + } + + script.src = s.url; + + // Attach handlers for all browsers + script.onload = script.onreadystatechange = function( _, isAbort ) { + + if ( isAbort || !script.readyState || /loaded|complete/.test( script.readyState ) ) { + + // Handle memory leak in IE + script.onload = script.onreadystatechange = null; + + // Remove the script + if ( head && script.parentNode ) { + head.removeChild( script ); + } + + // Dereference the script + script = undefined; + + // Callback if not abort + if ( !isAbort ) { + callback( 200, "success" ); + } + } + }; + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709 and #4378). + head.insertBefore( script, head.firstChild ); + }, + + abort: function() { + if ( script ) { + script.onload( 0, 1 ); + } + } + }; + } +}); + + + + +var // #5280: Internet Explorer will keep connections alive if we don't abort on unload + xhrOnUnloadAbort = window.ActiveXObject ? function() { + // Abort all pending requests + for ( var key in xhrCallbacks ) { + xhrCallbacks[ key ]( 0, 1 ); + } + } : false, + xhrId = 0, + xhrCallbacks; + +// Functions to create xhrs +function createStandardXHR() { + try { + return new window.XMLHttpRequest(); + } catch( e ) {} +} + +function createActiveXHR() { + try { + return new window.ActiveXObject( "Microsoft.XMLHTTP" ); + } catch( e ) {} +} + +// Create the request object +// (This is still attached to ajaxSettings for backward compatibility) +jQuery.ajaxSettings.xhr = window.ActiveXObject ? + /* Microsoft failed to properly + * implement the XMLHttpRequest in IE7 (can't request local files), + * so we use the ActiveXObject when it is available + * Additionally XMLHttpRequest can be disabled in IE7/IE8 so + * we need a fallback. + */ + function() { + return !this.isLocal && createStandardXHR() || createActiveXHR(); + } : + // For all other browsers, use the standard XMLHttpRequest object + createStandardXHR; + +// Determine support properties +(function( xhr ) { + jQuery.extend( jQuery.support, { + ajax: !!xhr, + cors: !!xhr && ( "withCredentials" in xhr ) + }); +})( jQuery.ajaxSettings.xhr() ); + +// Create transport if the browser can provide an xhr +if ( jQuery.support.ajax ) { + + jQuery.ajaxTransport(function( s ) { + // Cross domain only allowed if supported through XMLHttpRequest + if ( !s.crossDomain || jQuery.support.cors ) { + + var callback; + + return { + send: function( headers, complete ) { + + // Get a new xhr + var xhr = s.xhr(), + handle, + i; + + // Open the socket + // Passing null username, generates a login popup on Opera (#2865) + if ( s.username ) { + xhr.open( s.type, s.url, s.async, s.username, s.password ); + } else { + xhr.open( s.type, s.url, s.async ); + } + + // Apply custom fields if provided + if ( s.xhrFields ) { + for ( i in s.xhrFields ) { + xhr[ i ] = s.xhrFields[ i ]; + } + } + + // Override mime type if needed + if ( s.mimeType && xhr.overrideMimeType ) { + xhr.overrideMimeType( s.mimeType ); + } + + // X-Requested-With header + // For cross-domain requests, seeing as conditions for a preflight are + // akin to a jigsaw puzzle, we simply never set it to be sure. + // (it can always be set on a per-request basis or even using ajaxSetup) + // For same-domain requests, won't change header if already provided. + if ( !s.crossDomain && !headers["X-Requested-With"] ) { + headers[ "X-Requested-With" ] = "XMLHttpRequest"; + } + + // Need an extra try/catch for cross domain requests in Firefox 3 + try { + for ( i in headers ) { + xhr.setRequestHeader( i, headers[ i ] ); + } + } catch( _ ) {} + + // Do send the request + // This may raise an exception which is actually + // handled in jQuery.ajax (so no try/catch here) + xhr.send( ( s.hasContent && s.data ) || null ); + + // Listener + callback = function( _, isAbort ) { + + var status, + statusText, + responseHeaders, + responses, + xml; + + // Firefox throws exceptions when accessing properties + // of an xhr when a network error occured + // http://helpful.knobs-dials.com/index.php/Component_returned_failure_code:_0x80040111_(NS_ERROR_NOT_AVAILABLE) + try { + + // Was never called and is aborted or complete + if ( callback && ( isAbort || xhr.readyState === 4 ) ) { + + // Only called once + callback = undefined; + + // Do not keep as active anymore + if ( handle ) { + xhr.onreadystatechange = jQuery.noop; + if ( xhrOnUnloadAbort ) { + delete xhrCallbacks[ handle ]; + } + } + + // If it's an abort + if ( isAbort ) { + // Abort it manually if needed + if ( xhr.readyState !== 4 ) { + xhr.abort(); + } + } else { + status = xhr.status; + responseHeaders = xhr.getAllResponseHeaders(); + responses = {}; + xml = xhr.responseXML; + + // Construct response list + if ( xml && xml.documentElement /* #4958 */ ) { + responses.xml = xml; + } + responses.text = xhr.responseText; + + // Firefox throws an exception when accessing + // statusText for faulty cross-domain requests + try { + statusText = xhr.statusText; + } catch( e ) { + // We normalize with Webkit giving an empty statusText + statusText = ""; + } + + // Filter status for non standard behaviors + + // If the request is local and we have data: assume a success + // (success with no data won't get notified, that's the best we + // can do given current implementations) + if ( !status && s.isLocal && !s.crossDomain ) { + status = responses.text ? 200 : 404; + // IE - #1450: sometimes returns 1223 when it should be 204 + } else if ( status === 1223 ) { + status = 204; + } + } + } + } catch( firefoxAccessException ) { + if ( !isAbort ) { + complete( -1, firefoxAccessException ); + } + } + + // Call complete if needed + if ( responses ) { + complete( status, statusText, responses, responseHeaders ); + } + }; + + // if we're in sync mode or it's in cache + // and has been retrieved directly (IE6 & IE7) + // we need to manually fire the callback + if ( !s.async || xhr.readyState === 4 ) { + callback(); + } else { + handle = ++xhrId; + if ( xhrOnUnloadAbort ) { + // Create the active xhrs callbacks list if needed + // and attach the unload handler + if ( !xhrCallbacks ) { + xhrCallbacks = {}; + jQuery( window ).unload( xhrOnUnloadAbort ); + } + // Add to list of active xhrs callbacks + xhrCallbacks[ handle ] = callback; + } + xhr.onreadystatechange = callback; + } + }, + + abort: function() { + if ( callback ) { + callback(0,1); + } + } + }; + } + }); +} + + + + +var elemdisplay = {}, + iframe, iframeDoc, + rfxtypes = /^(?:toggle|show|hide)$/, + rfxnum = /^([+\-]=)?([\d+.\-]+)([a-z%]*)$/i, + timerId, + fxAttrs = [ + // height animations + [ "height", "marginTop", "marginBottom", "paddingTop", "paddingBottom" ], + // width animations + [ "width", "marginLeft", "marginRight", "paddingLeft", "paddingRight" ], + // opacity animations + [ "opacity" ] + ], + fxNow; + +jQuery.fn.extend({ + show: function( speed, easing, callback ) { + var elem, display; + + if ( speed || speed === 0 ) { + return this.animate( genFx("show", 3), speed, easing, callback ); + + } else { + for ( var i = 0, j = this.length; i < j; i++ ) { + elem = this[ i ]; + + if ( elem.style ) { + display = elem.style.display; + + // Reset the inline display of this element to learn if it is + // being hidden by cascaded rules or not + if ( !jQuery._data(elem, "olddisplay") && display === "none" ) { + display = elem.style.display = ""; + } + + // Set elements which have been overridden with display: none + // in a stylesheet to whatever the default browser style is + // for such an element + if ( display === "" && jQuery.css(elem, "display") === "none" ) { + jQuery._data( elem, "olddisplay", defaultDisplay(elem.nodeName) ); + } + } + } + + // Set the display of most of the elements in a second loop + // to avoid the constant reflow + for ( i = 0; i < j; i++ ) { + elem = this[ i ]; + + if ( elem.style ) { + display = elem.style.display; + + if ( display === "" || display === "none" ) { + elem.style.display = jQuery._data( elem, "olddisplay" ) || ""; + } + } + } + + return this; + } + }, + + hide: function( speed, easing, callback ) { + if ( speed || speed === 0 ) { + return this.animate( genFx("hide", 3), speed, easing, callback); + + } else { + var elem, display, + i = 0, + j = this.length; + + for ( ; i < j; i++ ) { + elem = this[i]; + if ( elem.style ) { + display = jQuery.css( elem, "display" ); + + if ( display !== "none" && !jQuery._data( elem, "olddisplay" ) ) { + jQuery._data( elem, "olddisplay", display ); + } + } + } + + // Set the display of the elements in a second loop + // to avoid the constant reflow + for ( i = 0; i < j; i++ ) { + if ( this[i].style ) { + this[i].style.display = "none"; + } + } + + return this; + } + }, + + // Save the old toggle function + _toggle: jQuery.fn.toggle, + + toggle: function( fn, fn2, callback ) { + var bool = typeof fn === "boolean"; + + if ( jQuery.isFunction(fn) && jQuery.isFunction(fn2) ) { + this._toggle.apply( this, arguments ); + + } else if ( fn == null || bool ) { + this.each(function() { + var state = bool ? fn : jQuery(this).is(":hidden"); + jQuery(this)[ state ? "show" : "hide" ](); + }); + + } else { + this.animate(genFx("toggle", 3), fn, fn2, callback); + } + + return this; + }, + + fadeTo: function( speed, to, easing, callback ) { + return this.filter(":hidden").css("opacity", 0).show().end() + .animate({opacity: to}, speed, easing, callback); + }, + + animate: function( prop, speed, easing, callback ) { + var optall = jQuery.speed( speed, easing, callback ); + + if ( jQuery.isEmptyObject( prop ) ) { + return this.each( optall.complete, [ false ] ); + } + + // Do not change referenced properties as per-property easing will be lost + prop = jQuery.extend( {}, prop ); + + function doAnimation() { + // XXX 'this' does not always have a nodeName when running the + // test suite + + if ( optall.queue === false ) { + jQuery._mark( this ); + } + + var opt = jQuery.extend( {}, optall ), + isElement = this.nodeType === 1, + hidden = isElement && jQuery(this).is(":hidden"), + name, val, p, e, + parts, start, end, unit, + method; + + // will store per property easing and be used to determine when an animation is complete + opt.animatedProperties = {}; + + for ( p in prop ) { + + // property name normalization + name = jQuery.camelCase( p ); + if ( p !== name ) { + prop[ name ] = prop[ p ]; + delete prop[ p ]; + } + + val = prop[ name ]; + + // easing resolution: per property > opt.specialEasing > opt.easing > 'swing' (default) + if ( jQuery.isArray( val ) ) { + opt.animatedProperties[ name ] = val[ 1 ]; + val = prop[ name ] = val[ 0 ]; + } else { + opt.animatedProperties[ name ] = opt.specialEasing && opt.specialEasing[ name ] || opt.easing || 'swing'; + } + + if ( val === "hide" && hidden || val === "show" && !hidden ) { + return opt.complete.call( this ); + } + + if ( isElement && ( name === "height" || name === "width" ) ) { + // Make sure that nothing sneaks out + // Record all 3 overflow attributes because IE does not + // change the overflow attribute when overflowX and + // overflowY are set to the same value + opt.overflow = [ this.style.overflow, this.style.overflowX, this.style.overflowY ]; + + // Set display property to inline-block for height/width + // animations on inline elements that are having width/height animated + if ( jQuery.css( this, "display" ) === "inline" && + jQuery.css( this, "float" ) === "none" ) { + + // inline-level elements accept inline-block; + // block-level elements need to be inline with layout + if ( !jQuery.support.inlineBlockNeedsLayout || defaultDisplay( this.nodeName ) === "inline" ) { + this.style.display = "inline-block"; + + } else { + this.style.zoom = 1; + } + } + } + } + + if ( opt.overflow != null ) { + this.style.overflow = "hidden"; + } + + for ( p in prop ) { + e = new jQuery.fx( this, opt, p ); + val = prop[ p ]; + + if ( rfxtypes.test( val ) ) { + + // Tracks whether to show or hide based on private + // data attached to the element + method = jQuery._data( this, "toggle" + p ) || ( val === "toggle" ? hidden ? "show" : "hide" : 0 ); + if ( method ) { + jQuery._data( this, "toggle" + p, method === "show" ? "hide" : "show" ); + e[ method ](); + } else { + e[ val ](); + } + + } else { + parts = rfxnum.exec( val ); + start = e.cur(); + + if ( parts ) { + end = parseFloat( parts[2] ); + unit = parts[3] || ( jQuery.cssNumber[ p ] ? "" : "px" ); + + // We need to compute starting value + if ( unit !== "px" ) { + jQuery.style( this, p, (end || 1) + unit); + start = ( (end || 1) / e.cur() ) * start; + jQuery.style( this, p, start + unit); + } + + // If a +=/-= token was provided, we're doing a relative animation + if ( parts[1] ) { + end = ( (parts[ 1 ] === "-=" ? -1 : 1) * end ) + start; + } + + e.custom( start, end, unit ); + + } else { + e.custom( start, val, "" ); + } + } + } + + // For JS strict compliance + return true; + } + + return optall.queue === false ? + this.each( doAnimation ) : + this.queue( optall.queue, doAnimation ); + }, + + stop: function( type, clearQueue, gotoEnd ) { + if ( typeof type !== "string" ) { + gotoEnd = clearQueue; + clearQueue = type; + type = undefined; + } + if ( clearQueue && type !== false ) { + this.queue( type || "fx", [] ); + } + + return this.each(function() { + var index, + hadTimers = false, + timers = jQuery.timers, + data = jQuery._data( this ); + + // clear marker counters if we know they won't be + if ( !gotoEnd ) { + jQuery._unmark( true, this ); + } + + function stopQueue( elem, data, index ) { + var hooks = data[ index ]; + jQuery.removeData( elem, index, true ); + hooks.stop( gotoEnd ); + } + + if ( type == null ) { + for ( index in data ) { + if ( data[ index ] && data[ index ].stop && index.indexOf(".run") === index.length - 4 ) { + stopQueue( this, data, index ); + } + } + } else if ( data[ index = type + ".run" ] && data[ index ].stop ){ + stopQueue( this, data, index ); + } + + for ( index = timers.length; index--; ) { + if ( timers[ index ].elem === this && (type == null || timers[ index ].queue === type) ) { + if ( gotoEnd ) { + + // force the next step to be the last + timers[ index ]( true ); + } else { + timers[ index ].saveState(); + } + hadTimers = true; + timers.splice( index, 1 ); + } + } + + // start the next in the queue if the last step wasn't forced + // timers currently will call their complete callbacks, which will dequeue + // but only if they were gotoEnd + if ( !( gotoEnd && hadTimers ) ) { + jQuery.dequeue( this, type ); + } + }); + } + +}); + +// Animations created synchronously will run synchronously +function createFxNow() { + setTimeout( clearFxNow, 0 ); + return ( fxNow = jQuery.now() ); +} + +function clearFxNow() { + fxNow = undefined; +} + +// Generate parameters to create a standard animation +function genFx( type, num ) { + var obj = {}; + + jQuery.each( fxAttrs.concat.apply([], fxAttrs.slice( 0, num )), function() { + obj[ this ] = type; + }); + + return obj; +} + +// Generate shortcuts for custom animations +jQuery.each({ + slideDown: genFx( "show", 1 ), + slideUp: genFx( "hide", 1 ), + slideToggle: genFx( "toggle", 1 ), + fadeIn: { opacity: "show" }, + fadeOut: { opacity: "hide" }, + fadeToggle: { opacity: "toggle" } +}, function( name, props ) { + jQuery.fn[ name ] = function( speed, easing, callback ) { + return this.animate( props, speed, easing, callback ); + }; +}); + +jQuery.extend({ + speed: function( speed, easing, fn ) { + var opt = speed && typeof speed === "object" ? jQuery.extend( {}, speed ) : { + complete: fn || !fn && easing || + jQuery.isFunction( speed ) && speed, + duration: speed, + easing: fn && easing || easing && !jQuery.isFunction( easing ) && easing + }; + + opt.duration = jQuery.fx.off ? 0 : typeof opt.duration === "number" ? opt.duration : + opt.duration in jQuery.fx.speeds ? jQuery.fx.speeds[ opt.duration ] : jQuery.fx.speeds._default; + + // normalize opt.queue - true/undefined/null -> "fx" + if ( opt.queue == null || opt.queue === true ) { + opt.queue = "fx"; + } + + // Queueing + opt.old = opt.complete; + + opt.complete = function( noUnmark ) { + if ( jQuery.isFunction( opt.old ) ) { + opt.old.call( this ); + } + + if ( opt.queue ) { + jQuery.dequeue( this, opt.queue ); + } else if ( noUnmark !== false ) { + jQuery._unmark( this ); + } + }; + + return opt; + }, + + easing: { + linear: function( p, n, firstNum, diff ) { + return firstNum + diff * p; + }, + swing: function( p, n, firstNum, diff ) { + return ( ( -Math.cos( p*Math.PI ) / 2 ) + 0.5 ) * diff + firstNum; + } + }, + + timers: [], + + fx: function( elem, options, prop ) { + this.options = options; + this.elem = elem; + this.prop = prop; + + options.orig = options.orig || {}; + } + +}); + +jQuery.fx.prototype = { + // Simple function for setting a style value + update: function() { + if ( this.options.step ) { + this.options.step.call( this.elem, this.now, this ); + } + + ( jQuery.fx.step[ this.prop ] || jQuery.fx.step._default )( this ); + }, + + // Get the current size + cur: function() { + if ( this.elem[ this.prop ] != null && (!this.elem.style || this.elem.style[ this.prop ] == null) ) { + return this.elem[ this.prop ]; + } + + var parsed, + r = jQuery.css( this.elem, this.prop ); + // Empty strings, null, undefined and "auto" are converted to 0, + // complex values such as "rotate(1rad)" are returned as is, + // simple values such as "10px" are parsed to Float. + return isNaN( parsed = parseFloat( r ) ) ? !r || r === "auto" ? 0 : r : parsed; + }, + + // Start an animation from one number to another + custom: function( from, to, unit ) { + var self = this, + fx = jQuery.fx; + + this.startTime = fxNow || createFxNow(); + this.end = to; + this.now = this.start = from; + this.pos = this.state = 0; + this.unit = unit || this.unit || ( jQuery.cssNumber[ this.prop ] ? "" : "px" ); + + function t( gotoEnd ) { + return self.step( gotoEnd ); + } + + t.queue = this.options.queue; + t.elem = this.elem; + t.saveState = function() { + if ( self.options.hide && jQuery._data( self.elem, "fxshow" + self.prop ) === undefined ) { + jQuery._data( self.elem, "fxshow" + self.prop, self.start ); + } + }; + + if ( t() && jQuery.timers.push(t) && !timerId ) { + timerId = setInterval( fx.tick, fx.interval ); + } + }, + + // Simple 'show' function + show: function() { + var dataShow = jQuery._data( this.elem, "fxshow" + this.prop ); + + // Remember where we started, so that we can go back to it later + this.options.orig[ this.prop ] = dataShow || jQuery.style( this.elem, this.prop ); + this.options.show = true; + + // Begin the animation + // Make sure that we start at a small width/height to avoid any flash of content + if ( dataShow !== undefined ) { + // This show is picking up where a previous hide or show left off + this.custom( this.cur(), dataShow ); + } else { + this.custom( this.prop === "width" || this.prop === "height" ? 1 : 0, this.cur() ); + } + + // Start by showing the element + jQuery( this.elem ).show(); + }, + + // Simple 'hide' function + hide: function() { + // Remember where we started, so that we can go back to it later + this.options.orig[ this.prop ] = jQuery._data( this.elem, "fxshow" + this.prop ) || jQuery.style( this.elem, this.prop ); + this.options.hide = true; + + // Begin the animation + this.custom( this.cur(), 0 ); + }, + + // Each step of an animation + step: function( gotoEnd ) { + var p, n, complete, + t = fxNow || createFxNow(), + done = true, + elem = this.elem, + options = this.options; + + if ( gotoEnd || t >= options.duration + this.startTime ) { + this.now = this.end; + this.pos = this.state = 1; + this.update(); + + options.animatedProperties[ this.prop ] = true; + + for ( p in options.animatedProperties ) { + if ( options.animatedProperties[ p ] !== true ) { + done = false; + } + } + + if ( done ) { + // Reset the overflow + if ( options.overflow != null && !jQuery.support.shrinkWrapBlocks ) { + + jQuery.each( [ "", "X", "Y" ], function( index, value ) { + elem.style[ "overflow" + value ] = options.overflow[ index ]; + }); + } + + // Hide the element if the "hide" operation was done + if ( options.hide ) { + jQuery( elem ).hide(); + } + + // Reset the properties, if the item has been hidden or shown + if ( options.hide || options.show ) { + for ( p in options.animatedProperties ) { + jQuery.style( elem, p, options.orig[ p ] ); + jQuery.removeData( elem, "fxshow" + p, true ); + // Toggle data is no longer needed + jQuery.removeData( elem, "toggle" + p, true ); + } + } + + // Execute the complete function + // in the event that the complete function throws an exception + // we must ensure it won't be called twice. #5684 + + complete = options.complete; + if ( complete ) { + + options.complete = false; + complete.call( elem ); + } + } + + return false; + + } else { + // classical easing cannot be used with an Infinity duration + if ( options.duration == Infinity ) { + this.now = t; + } else { + n = t - this.startTime; + this.state = n / options.duration; + + // Perform the easing function, defaults to swing + this.pos = jQuery.easing[ options.animatedProperties[this.prop] ]( this.state, n, 0, 1, options.duration ); + this.now = this.start + ( (this.end - this.start) * this.pos ); + } + // Perform the next step of the animation + this.update(); + } + + return true; + } +}; + +jQuery.extend( jQuery.fx, { + tick: function() { + var timer, + timers = jQuery.timers, + i = 0; + + for ( ; i < timers.length; i++ ) { + timer = timers[ i ]; + // Checks the timer has not already been removed + if ( !timer() && timers[ i ] === timer ) { + timers.splice( i--, 1 ); + } + } + + if ( !timers.length ) { + jQuery.fx.stop(); + } + }, + + interval: 13, + + stop: function() { + clearInterval( timerId ); + timerId = null; + }, + + speeds: { + slow: 600, + fast: 200, + // Default speed + _default: 400 + }, + + step: { + opacity: function( fx ) { + jQuery.style( fx.elem, "opacity", fx.now ); + }, + + _default: function( fx ) { + if ( fx.elem.style && fx.elem.style[ fx.prop ] != null ) { + fx.elem.style[ fx.prop ] = fx.now + fx.unit; + } else { + fx.elem[ fx.prop ] = fx.now; + } + } + } +}); + +// Adds width/height step functions +// Do not set anything below 0 +jQuery.each([ "width", "height" ], function( i, prop ) { + jQuery.fx.step[ prop ] = function( fx ) { + jQuery.style( fx.elem, prop, Math.max(0, fx.now) + fx.unit ); + }; +}); + +if ( jQuery.expr && jQuery.expr.filters ) { + jQuery.expr.filters.animated = function( elem ) { + return jQuery.grep(jQuery.timers, function( fn ) { + return elem === fn.elem; + }).length; + }; +} + +// Try to restore the default display value of an element +function defaultDisplay( nodeName ) { + + if ( !elemdisplay[ nodeName ] ) { + + var body = document.body, + elem = jQuery( "<" + nodeName + ">" ).appendTo( body ), + display = elem.css( "display" ); + elem.remove(); + + // If the simple way fails, + // get element's real default display by attaching it to a temp iframe + if ( display === "none" || display === "" ) { + // No iframe to use yet, so create it + if ( !iframe ) { + iframe = document.createElement( "iframe" ); + iframe.frameBorder = iframe.width = iframe.height = 0; + } + + body.appendChild( iframe ); + + // Create a cacheable copy of the iframe document on first call. + // IE and Opera will allow us to reuse the iframeDoc without re-writing the fake HTML + // document to it; WebKit & Firefox won't allow reusing the iframe document. + if ( !iframeDoc || !iframe.createElement ) { + iframeDoc = ( iframe.contentWindow || iframe.contentDocument ).document; + iframeDoc.write( ( document.compatMode === "CSS1Compat" ? "<!doctype html>" : "" ) + "<html><body>" ); + iframeDoc.close(); + } + + elem = iframeDoc.createElement( nodeName ); + + iframeDoc.body.appendChild( elem ); + + display = jQuery.css( elem, "display" ); + body.removeChild( iframe ); + } + + // Store the correct default display + elemdisplay[ nodeName ] = display; + } + + return elemdisplay[ nodeName ]; +} + + + + +var rtable = /^t(?:able|d|h)$/i, + rroot = /^(?:body|html)$/i; + +if ( "getBoundingClientRect" in document.documentElement ) { + jQuery.fn.offset = function( options ) { + var elem = this[0], box; + + if ( options ) { + return this.each(function( i ) { + jQuery.offset.setOffset( this, options, i ); + }); + } + + if ( !elem || !elem.ownerDocument ) { + return null; + } + + if ( elem === elem.ownerDocument.body ) { + return jQuery.offset.bodyOffset( elem ); + } + + try { + box = elem.getBoundingClientRect(); + } catch(e) {} + + var doc = elem.ownerDocument, + docElem = doc.documentElement; + + // Make sure we're not dealing with a disconnected DOM node + if ( !box || !jQuery.contains( docElem, elem ) ) { + return box ? { top: box.top, left: box.left } : { top: 0, left: 0 }; + } + + var body = doc.body, + win = getWindow(doc), + clientTop = docElem.clientTop || body.clientTop || 0, + clientLeft = docElem.clientLeft || body.clientLeft || 0, + scrollTop = win.pageYOffset || jQuery.support.boxModel && docElem.scrollTop || body.scrollTop, + scrollLeft = win.pageXOffset || jQuery.support.boxModel && docElem.scrollLeft || body.scrollLeft, + top = box.top + scrollTop - clientTop, + left = box.left + scrollLeft - clientLeft; + + return { top: top, left: left }; + }; + +} else { + jQuery.fn.offset = function( options ) { + var elem = this[0]; + + if ( options ) { + return this.each(function( i ) { + jQuery.offset.setOffset( this, options, i ); + }); + } + + if ( !elem || !elem.ownerDocument ) { + return null; + } + + if ( elem === elem.ownerDocument.body ) { + return jQuery.offset.bodyOffset( elem ); + } + + var computedStyle, + offsetParent = elem.offsetParent, + prevOffsetParent = elem, + doc = elem.ownerDocument, + docElem = doc.documentElement, + body = doc.body, + defaultView = doc.defaultView, + prevComputedStyle = defaultView ? defaultView.getComputedStyle( elem, null ) : elem.currentStyle, + top = elem.offsetTop, + left = elem.offsetLeft; + + while ( (elem = elem.parentNode) && elem !== body && elem !== docElem ) { + if ( jQuery.support.fixedPosition && prevComputedStyle.position === "fixed" ) { + break; + } + + computedStyle = defaultView ? defaultView.getComputedStyle(elem, null) : elem.currentStyle; + top -= elem.scrollTop; + left -= elem.scrollLeft; + + if ( elem === offsetParent ) { + top += elem.offsetTop; + left += elem.offsetLeft; + + if ( jQuery.support.doesNotAddBorder && !(jQuery.support.doesAddBorderForTableAndCells && rtable.test(elem.nodeName)) ) { + top += parseFloat( computedStyle.borderTopWidth ) || 0; + left += parseFloat( computedStyle.borderLeftWidth ) || 0; + } + + prevOffsetParent = offsetParent; + offsetParent = elem.offsetParent; + } + + if ( jQuery.support.subtractsBorderForOverflowNotVisible && computedStyle.overflow !== "visible" ) { + top += parseFloat( computedStyle.borderTopWidth ) || 0; + left += parseFloat( computedStyle.borderLeftWidth ) || 0; + } + + prevComputedStyle = computedStyle; + } + + if ( prevComputedStyle.position === "relative" || prevComputedStyle.position === "static" ) { + top += body.offsetTop; + left += body.offsetLeft; + } + + if ( jQuery.support.fixedPosition && prevComputedStyle.position === "fixed" ) { + top += Math.max( docElem.scrollTop, body.scrollTop ); + left += Math.max( docElem.scrollLeft, body.scrollLeft ); + } + + return { top: top, left: left }; + }; +} + +jQuery.offset = { + + bodyOffset: function( body ) { + var top = body.offsetTop, + left = body.offsetLeft; + + if ( jQuery.support.doesNotIncludeMarginInBodyOffset ) { + top += parseFloat( jQuery.css(body, "marginTop") ) || 0; + left += parseFloat( jQuery.css(body, "marginLeft") ) || 0; + } + + return { top: top, left: left }; + }, + + setOffset: function( elem, options, i ) { + var position = jQuery.css( elem, "position" ); + + // set position first, in-case top/left are set even on static elem + if ( position === "static" ) { + elem.style.position = "relative"; + } + + var curElem = jQuery( elem ), + curOffset = curElem.offset(), + curCSSTop = jQuery.css( elem, "top" ), + curCSSLeft = jQuery.css( elem, "left" ), + calculatePosition = ( position === "absolute" || position === "fixed" ) && jQuery.inArray("auto", [curCSSTop, curCSSLeft]) > -1, + props = {}, curPosition = {}, curTop, curLeft; + + // need to be able to calculate position if either top or left is auto and position is either absolute or fixed + if ( calculatePosition ) { + curPosition = curElem.position(); + curTop = curPosition.top; + curLeft = curPosition.left; + } else { + curTop = parseFloat( curCSSTop ) || 0; + curLeft = parseFloat( curCSSLeft ) || 0; + } + + if ( jQuery.isFunction( options ) ) { + options = options.call( elem, i, curOffset ); + } + + if ( options.top != null ) { + props.top = ( options.top - curOffset.top ) + curTop; + } + if ( options.left != null ) { + props.left = ( options.left - curOffset.left ) + curLeft; + } + + if ( "using" in options ) { + options.using.call( elem, props ); + } else { + curElem.css( props ); + } + } +}; + + +jQuery.fn.extend({ + + position: function() { + if ( !this[0] ) { + return null; + } + + var elem = this[0], + + // Get *real* offsetParent + offsetParent = this.offsetParent(), + + // Get correct offsets + offset = this.offset(), + parentOffset = rroot.test(offsetParent[0].nodeName) ? { top: 0, left: 0 } : offsetParent.offset(); + + // Subtract element margins + // note: when an element has margin: auto the offsetLeft and marginLeft + // are the same in Safari causing offset.left to incorrectly be 0 + offset.top -= parseFloat( jQuery.css(elem, "marginTop") ) || 0; + offset.left -= parseFloat( jQuery.css(elem, "marginLeft") ) || 0; + + // Add offsetParent borders + parentOffset.top += parseFloat( jQuery.css(offsetParent[0], "borderTopWidth") ) || 0; + parentOffset.left += parseFloat( jQuery.css(offsetParent[0], "borderLeftWidth") ) || 0; + + // Subtract the two offsets + return { + top: offset.top - parentOffset.top, + left: offset.left - parentOffset.left + }; + }, + + offsetParent: function() { + return this.map(function() { + var offsetParent = this.offsetParent || document.body; + while ( offsetParent && (!rroot.test(offsetParent.nodeName) && jQuery.css(offsetParent, "position") === "static") ) { + offsetParent = offsetParent.offsetParent; + } + return offsetParent; + }); + } +}); + + +// Create scrollLeft and scrollTop methods +jQuery.each( ["Left", "Top"], function( i, name ) { + var method = "scroll" + name; + + jQuery.fn[ method ] = function( val ) { + var elem, win; + + if ( val === undefined ) { + elem = this[ 0 ]; + + if ( !elem ) { + return null; + } + + win = getWindow( elem ); + + // Return the scroll offset + return win ? ("pageXOffset" in win) ? win[ i ? "pageYOffset" : "pageXOffset" ] : + jQuery.support.boxModel && win.document.documentElement[ method ] || + win.document.body[ method ] : + elem[ method ]; + } + + // Set the scroll offset + return this.each(function() { + win = getWindow( this ); + + if ( win ) { + win.scrollTo( + !i ? val : jQuery( win ).scrollLeft(), + i ? val : jQuery( win ).scrollTop() + ); + + } else { + this[ method ] = val; + } + }); + }; +}); + +function getWindow( elem ) { + return jQuery.isWindow( elem ) ? + elem : + elem.nodeType === 9 ? + elem.defaultView || elem.parentWindow : + false; +} + + + + +// Create width, height, innerHeight, innerWidth, outerHeight and outerWidth methods +jQuery.each([ "Height", "Width" ], function( i, name ) { + + var type = name.toLowerCase(); + + // innerHeight and innerWidth + jQuery.fn[ "inner" + name ] = function() { + var elem = this[0]; + return elem ? + elem.style ? + parseFloat( jQuery.css( elem, type, "padding" ) ) : + this[ type ]() : + null; + }; + + // outerHeight and outerWidth + jQuery.fn[ "outer" + name ] = function( margin ) { + var elem = this[0]; + return elem ? + elem.style ? + parseFloat( jQuery.css( elem, type, margin ? "margin" : "border" ) ) : + this[ type ]() : + null; + }; + + jQuery.fn[ type ] = function( size ) { + // Get window width or height + var elem = this[0]; + if ( !elem ) { + return size == null ? null : this; + } + + if ( jQuery.isFunction( size ) ) { + return this.each(function( i ) { + var self = jQuery( this ); + self[ type ]( size.call( this, i, self[ type ]() ) ); + }); + } + + if ( jQuery.isWindow( elem ) ) { + // Everyone else use document.documentElement or document.body depending on Quirks vs Standards mode + // 3rd condition allows Nokia support, as it supports the docElem prop but not CSS1Compat + var docElemProp = elem.document.documentElement[ "client" + name ], + body = elem.document.body; + return elem.document.compatMode === "CSS1Compat" && docElemProp || + body && body[ "client" + name ] || docElemProp; + + // Get document width or height + } else if ( elem.nodeType === 9 ) { + // Either scroll[Width/Height] or offset[Width/Height], whichever is greater + return Math.max( + elem.documentElement["client" + name], + elem.body["scroll" + name], elem.documentElement["scroll" + name], + elem.body["offset" + name], elem.documentElement["offset" + name] + ); + + // Get or set width or height on the element + } else if ( size === undefined ) { + var orig = jQuery.css( elem, type ), + ret = parseFloat( orig ); + + return jQuery.isNumeric( ret ) ? ret : orig; + + // Set the width or height on the element (default to pixels if value is unitless) + } else { + return this.css( type, typeof size === "string" ? size : size + "px" ); + } + }; + +}); + + + + +// Expose jQuery to the global object +window.jQuery = window.$ = jQuery; + +// Expose jQuery as an AMD module, but only for AMD loaders that +// understand the issues with loading multiple versions of jQuery +// in a page that all might call define(). The loader will indicate +// they have special allowances for multiple jQuery versions by +// specifying define.amd.jQuery = true. Register as a named module, +// since jQuery can be concatenated with other files that may use define, +// but not use a proper concatenation script that understands anonymous +// AMD modules. A named AMD is safest and most robust way to register. +// Lowercase jquery is used because AMD module names are derived from +// file names, and jQuery is normally delivered in a lowercase file name. +// Do this after creating the global so that if an AMD module wants to call +// noConflict to hide this version of jQuery, it will work. +if ( typeof define === "function" && define.amd && define.amd.jQuery ) { + define( "jquery", [], function () { return jQuery; } ); +} + + + +})( window ); diff --git a/lib/beetsplug/web/static/underscore.js b/lib/beetsplug/web/static/underscore.js new file mode 100644 index 00000000..5579c07d --- /dev/null +++ b/lib/beetsplug/web/static/underscore.js @@ -0,0 +1,977 @@ +// Underscore.js 1.2.2 +// (c) 2011 Jeremy Ashkenas, DocumentCloud Inc. +// Underscore is freely distributable under the MIT license. +// Portions of Underscore are inspired or borrowed from Prototype, +// Oliver Steele's Functional, and John Resig's Micro-Templating. +// For all details and documentation: +// http://documentcloud.github.com/underscore + +(function() { + + // Baseline setup + // -------------- + + // Establish the root object, `window` in the browser, or `global` on the server. + var root = this; + + // Save the previous value of the `_` variable. + var previousUnderscore = root._; + + // Establish the object that gets returned to break out of a loop iteration. + var breaker = {}; + + // Save bytes in the minified (but not gzipped) version: + var ArrayProto = Array.prototype, ObjProto = Object.prototype, FuncProto = Function.prototype; + + // Create quick reference variables for speed access to core prototypes. + var slice = ArrayProto.slice, + unshift = ArrayProto.unshift, + toString = ObjProto.toString, + hasOwnProperty = ObjProto.hasOwnProperty; + + // All **ECMAScript 5** native function implementations that we hope to use + // are declared here. + var + nativeForEach = ArrayProto.forEach, + nativeMap = ArrayProto.map, + nativeReduce = ArrayProto.reduce, + nativeReduceRight = ArrayProto.reduceRight, + nativeFilter = ArrayProto.filter, + nativeEvery = ArrayProto.every, + nativeSome = ArrayProto.some, + nativeIndexOf = ArrayProto.indexOf, + nativeLastIndexOf = ArrayProto.lastIndexOf, + nativeIsArray = Array.isArray, + nativeKeys = Object.keys, + nativeBind = FuncProto.bind; + + // Create a safe reference to the Underscore object for use below. + var _ = function(obj) { return new wrapper(obj); }; + + // Export the Underscore object for **Node.js** and **"CommonJS"**, with + // backwards-compatibility for the old `require()` API. If we're not in + // CommonJS, add `_` to the global object. + if (typeof exports !== 'undefined') { + if (typeof module !== 'undefined' && module.exports) { + exports = module.exports = _; + } + exports._ = _; + } else if (typeof define === 'function' && define.amd) { + // Register as a named module with AMD. + define('underscore', function() { + return _; + }); + } else { + // Exported as a string, for Closure Compiler "advanced" mode. + root['_'] = _; + } + + // Current version. + _.VERSION = '1.2.2'; + + // Collection Functions + // -------------------- + + // The cornerstone, an `each` implementation, aka `forEach`. + // Handles objects with the built-in `forEach`, arrays, and raw objects. + // Delegates to **ECMAScript 5**'s native `forEach` if available. + var each = _.each = _.forEach = function(obj, iterator, context) { + if (obj == null) return; + if (nativeForEach && obj.forEach === nativeForEach) { + obj.forEach(iterator, context); + } else if (obj.length === +obj.length) { + for (var i = 0, l = obj.length; i < l; i++) { + if (i in obj && iterator.call(context, obj[i], i, obj) === breaker) return; + } + } else { + for (var key in obj) { + if (hasOwnProperty.call(obj, key)) { + if (iterator.call(context, obj[key], key, obj) === breaker) return; + } + } + } + }; + + // Return the results of applying the iterator to each element. + // Delegates to **ECMAScript 5**'s native `map` if available. + _.map = function(obj, iterator, context) { + var results = []; + if (obj == null) return results; + if (nativeMap && obj.map === nativeMap) return obj.map(iterator, context); + each(obj, function(value, index, list) { + results[results.length] = iterator.call(context, value, index, list); + }); + return results; + }; + + // **Reduce** builds up a single result from a list of values, aka `inject`, + // or `foldl`. Delegates to **ECMAScript 5**'s native `reduce` if available. + _.reduce = _.foldl = _.inject = function(obj, iterator, memo, context) { + var initial = memo !== void 0; + if (obj == null) obj = []; + if (nativeReduce && obj.reduce === nativeReduce) { + if (context) iterator = _.bind(iterator, context); + return initial ? obj.reduce(iterator, memo) : obj.reduce(iterator); + } + each(obj, function(value, index, list) { + if (!initial) { + memo = value; + initial = true; + } else { + memo = iterator.call(context, memo, value, index, list); + } + }); + if (!initial) throw new TypeError("Reduce of empty array with no initial value"); + return memo; + }; + + // The right-associative version of reduce, also known as `foldr`. + // Delegates to **ECMAScript 5**'s native `reduceRight` if available. + _.reduceRight = _.foldr = function(obj, iterator, memo, context) { + if (obj == null) obj = []; + if (nativeReduceRight && obj.reduceRight === nativeReduceRight) { + if (context) iterator = _.bind(iterator, context); + return memo !== void 0 ? obj.reduceRight(iterator, memo) : obj.reduceRight(iterator); + } + var reversed = (_.isArray(obj) ? obj.slice() : _.toArray(obj)).reverse(); + return _.reduce(reversed, iterator, memo, context); + }; + + // Return the first value which passes a truth test. Aliased as `detect`. + _.find = _.detect = function(obj, iterator, context) { + var result; + any(obj, function(value, index, list) { + if (iterator.call(context, value, index, list)) { + result = value; + return true; + } + }); + return result; + }; + + // Return all the elements that pass a truth test. + // Delegates to **ECMAScript 5**'s native `filter` if available. + // Aliased as `select`. + _.filter = _.select = function(obj, iterator, context) { + var results = []; + if (obj == null) return results; + if (nativeFilter && obj.filter === nativeFilter) return obj.filter(iterator, context); + each(obj, function(value, index, list) { + if (iterator.call(context, value, index, list)) results[results.length] = value; + }); + return results; + }; + + // Return all the elements for which a truth test fails. + _.reject = function(obj, iterator, context) { + var results = []; + if (obj == null) return results; + each(obj, function(value, index, list) { + if (!iterator.call(context, value, index, list)) results[results.length] = value; + }); + return results; + }; + + // Determine whether all of the elements match a truth test. + // Delegates to **ECMAScript 5**'s native `every` if available. + // Aliased as `all`. + _.every = _.all = function(obj, iterator, context) { + var result = true; + if (obj == null) return result; + if (nativeEvery && obj.every === nativeEvery) return obj.every(iterator, context); + each(obj, function(value, index, list) { + if (!(result = result && iterator.call(context, value, index, list))) return breaker; + }); + return result; + }; + + // Determine if at least one element in the object matches a truth test. + // Delegates to **ECMAScript 5**'s native `some` if available. + // Aliased as `any`. + var any = _.some = _.any = function(obj, iterator, context) { + iterator = iterator || _.identity; + var result = false; + if (obj == null) return result; + if (nativeSome && obj.some === nativeSome) return obj.some(iterator, context); + each(obj, function(value, index, list) { + if (result || (result = iterator.call(context, value, index, list))) return breaker; + }); + return !!result; + }; + + // Determine if a given value is included in the array or object using `===`. + // Aliased as `contains`. + _.include = _.contains = function(obj, target) { + var found = false; + if (obj == null) return found; + if (nativeIndexOf && obj.indexOf === nativeIndexOf) return obj.indexOf(target) != -1; + found = any(obj, function(value) { + return value === target; + }); + return found; + }; + + // Invoke a method (with arguments) on every item in a collection. + _.invoke = function(obj, method) { + var args = slice.call(arguments, 2); + return _.map(obj, function(value) { + return (method.call ? method || value : value[method]).apply(value, args); + }); + }; + + // Convenience version of a common use case of `map`: fetching a property. + _.pluck = function(obj, key) { + return _.map(obj, function(value){ return value[key]; }); + }; + + // Return the maximum element or (element-based computation). + _.max = function(obj, iterator, context) { + if (!iterator && _.isArray(obj)) return Math.max.apply(Math, obj); + if (!iterator && _.isEmpty(obj)) return -Infinity; + var result = {computed : -Infinity}; + each(obj, function(value, index, list) { + var computed = iterator ? iterator.call(context, value, index, list) : value; + computed >= result.computed && (result = {value : value, computed : computed}); + }); + return result.value; + }; + + // Return the minimum element (or element-based computation). + _.min = function(obj, iterator, context) { + if (!iterator && _.isArray(obj)) return Math.min.apply(Math, obj); + if (!iterator && _.isEmpty(obj)) return Infinity; + var result = {computed : Infinity}; + each(obj, function(value, index, list) { + var computed = iterator ? iterator.call(context, value, index, list) : value; + computed < result.computed && (result = {value : value, computed : computed}); + }); + return result.value; + }; + + // Shuffle an array. + _.shuffle = function(obj) { + var shuffled = [], rand; + each(obj, function(value, index, list) { + if (index == 0) { + shuffled[0] = value; + } else { + rand = Math.floor(Math.random() * (index + 1)); + shuffled[index] = shuffled[rand]; + shuffled[rand] = value; + } + }); + return shuffled; + }; + + // Sort the object's values by a criterion produced by an iterator. + _.sortBy = function(obj, iterator, context) { + return _.pluck(_.map(obj, function(value, index, list) { + return { + value : value, + criteria : iterator.call(context, value, index, list) + }; + }).sort(function(left, right) { + var a = left.criteria, b = right.criteria; + return a < b ? -1 : a > b ? 1 : 0; + }), 'value'); + }; + + // Groups the object's values by a criterion. Pass either a string attribute + // to group by, or a function that returns the criterion. + _.groupBy = function(obj, val) { + var result = {}; + var iterator = _.isFunction(val) ? val : function(obj) { return obj[val]; }; + each(obj, function(value, index) { + var key = iterator(value, index); + (result[key] || (result[key] = [])).push(value); + }); + return result; + }; + + // Use a comparator function to figure out at what index an object should + // be inserted so as to maintain order. Uses binary search. + _.sortedIndex = function(array, obj, iterator) { + iterator || (iterator = _.identity); + var low = 0, high = array.length; + while (low < high) { + var mid = (low + high) >> 1; + iterator(array[mid]) < iterator(obj) ? low = mid + 1 : high = mid; + } + return low; + }; + + // Safely convert anything iterable into a real, live array. + _.toArray = function(iterable) { + if (!iterable) return []; + if (iterable.toArray) return iterable.toArray(); + if (_.isArray(iterable)) return slice.call(iterable); + if (_.isArguments(iterable)) return slice.call(iterable); + return _.values(iterable); + }; + + // Return the number of elements in an object. + _.size = function(obj) { + return _.toArray(obj).length; + }; + + // Array Functions + // --------------- + + // Get the first element of an array. Passing **n** will return the first N + // values in the array. Aliased as `head`. The **guard** check allows it to work + // with `_.map`. + _.first = _.head = function(array, n, guard) { + return (n != null) && !guard ? slice.call(array, 0, n) : array[0]; + }; + + // Returns everything but the last entry of the array. Especcialy useful on + // the arguments object. Passing **n** will return all the values in + // the array, excluding the last N. The **guard** check allows it to work with + // `_.map`. + _.initial = function(array, n, guard) { + return slice.call(array, 0, array.length - ((n == null) || guard ? 1 : n)); + }; + + // Get the last element of an array. Passing **n** will return the last N + // values in the array. The **guard** check allows it to work with `_.map`. + _.last = function(array, n, guard) { + if ((n != null) && !guard) { + return slice.call(array, Math.max(array.length - n, 0)); + } else { + return array[array.length - 1]; + } + }; + + // Returns everything but the first entry of the array. Aliased as `tail`. + // Especially useful on the arguments object. Passing an **index** will return + // the rest of the values in the array from that index onward. The **guard** + // check allows it to work with `_.map`. + _.rest = _.tail = function(array, index, guard) { + return slice.call(array, (index == null) || guard ? 1 : index); + }; + + // Trim out all falsy values from an array. + _.compact = function(array) { + return _.filter(array, function(value){ return !!value; }); + }; + + // Return a completely flattened version of an array. + _.flatten = function(array, shallow) { + return _.reduce(array, function(memo, value) { + if (_.isArray(value)) return memo.concat(shallow ? value : _.flatten(value)); + memo[memo.length] = value; + return memo; + }, []); + }; + + // Return a version of the array that does not contain the specified value(s). + _.without = function(array) { + return _.difference(array, slice.call(arguments, 1)); + }; + + // Produce a duplicate-free version of the array. If the array has already + // been sorted, you have the option of using a faster algorithm. + // Aliased as `unique`. + _.uniq = _.unique = function(array, isSorted, iterator) { + var initial = iterator ? _.map(array, iterator) : array; + var result = []; + _.reduce(initial, function(memo, el, i) { + if (0 == i || (isSorted === true ? _.last(memo) != el : !_.include(memo, el))) { + memo[memo.length] = el; + result[result.length] = array[i]; + } + return memo; + }, []); + return result; + }; + + // Produce an array that contains the union: each distinct element from all of + // the passed-in arrays. + _.union = function() { + return _.uniq(_.flatten(arguments, true)); + }; + + // Produce an array that contains every item shared between all the + // passed-in arrays. (Aliased as "intersect" for back-compat.) + _.intersection = _.intersect = function(array) { + var rest = slice.call(arguments, 1); + return _.filter(_.uniq(array), function(item) { + return _.every(rest, function(other) { + return _.indexOf(other, item) >= 0; + }); + }); + }; + + // Take the difference between one array and another. + // Only the elements present in just the first array will remain. + _.difference = function(array, other) { + return _.filter(array, function(value){ return !_.include(other, value); }); + }; + + // Zip together multiple lists into a single array -- elements that share + // an index go together. + _.zip = function() { + var args = slice.call(arguments); + var length = _.max(_.pluck(args, 'length')); + var results = new Array(length); + for (var i = 0; i < length; i++) results[i] = _.pluck(args, "" + i); + return results; + }; + + // If the browser doesn't supply us with indexOf (I'm looking at you, **MSIE**), + // we need this function. Return the position of the first occurrence of an + // item in an array, or -1 if the item is not included in the array. + // Delegates to **ECMAScript 5**'s native `indexOf` if available. + // If the array is large and already in sort order, pass `true` + // for **isSorted** to use binary search. + _.indexOf = function(array, item, isSorted) { + if (array == null) return -1; + var i, l; + if (isSorted) { + i = _.sortedIndex(array, item); + return array[i] === item ? i : -1; + } + if (nativeIndexOf && array.indexOf === nativeIndexOf) return array.indexOf(item); + for (i = 0, l = array.length; i < l; i++) if (array[i] === item) return i; + return -1; + }; + + // Delegates to **ECMAScript 5**'s native `lastIndexOf` if available. + _.lastIndexOf = function(array, item) { + if (array == null) return -1; + if (nativeLastIndexOf && array.lastIndexOf === nativeLastIndexOf) return array.lastIndexOf(item); + var i = array.length; + while (i--) if (array[i] === item) return i; + return -1; + }; + + // Generate an integer Array containing an arithmetic progression. A port of + // the native Python `range()` function. See + // [the Python documentation](http://docs.python.org/library/functions.html#range). + _.range = function(start, stop, step) { + if (arguments.length <= 1) { + stop = start || 0; + start = 0; + } + step = arguments[2] || 1; + + var len = Math.max(Math.ceil((stop - start) / step), 0); + var idx = 0; + var range = new Array(len); + + while(idx < len) { + range[idx++] = start; + start += step; + } + + return range; + }; + + // Function (ahem) Functions + // ------------------ + + // Reusable constructor function for prototype setting. + var ctor = function(){}; + + // Create a function bound to a given object (assigning `this`, and arguments, + // optionally). Binding with arguments is also known as `curry`. + // Delegates to **ECMAScript 5**'s native `Function.bind` if available. + // We check for `func.bind` first, to fail fast when `func` is undefined. + _.bind = function bind(func, context) { + var bound, args; + if (func.bind === nativeBind && nativeBind) return nativeBind.apply(func, slice.call(arguments, 1)); + if (!_.isFunction(func)) throw new TypeError; + args = slice.call(arguments, 2); + return bound = function() { + if (!(this instanceof bound)) return func.apply(context, args.concat(slice.call(arguments))); + ctor.prototype = func.prototype; + var self = new ctor; + var result = func.apply(self, args.concat(slice.call(arguments))); + if (Object(result) === result) return result; + return self; + }; + }; + + // Bind all of an object's methods to that object. Useful for ensuring that + // all callbacks defined on an object belong to it. + _.bindAll = function(obj) { + var funcs = slice.call(arguments, 1); + if (funcs.length == 0) funcs = _.functions(obj); + each(funcs, function(f) { obj[f] = _.bind(obj[f], obj); }); + return obj; + }; + + // Memoize an expensive function by storing its results. + _.memoize = function(func, hasher) { + var memo = {}; + hasher || (hasher = _.identity); + return function() { + var key = hasher.apply(this, arguments); + return hasOwnProperty.call(memo, key) ? memo[key] : (memo[key] = func.apply(this, arguments)); + }; + }; + + // Delays a function for the given number of milliseconds, and then calls + // it with the arguments supplied. + _.delay = function(func, wait) { + var args = slice.call(arguments, 2); + return setTimeout(function(){ return func.apply(func, args); }, wait); + }; + + // Defers a function, scheduling it to run after the current call stack has + // cleared. + _.defer = function(func) { + return _.delay.apply(_, [func, 1].concat(slice.call(arguments, 1))); + }; + + // Returns a function, that, when invoked, will only be triggered at most once + // during a given window of time. + _.throttle = function(func, wait) { + var context, args, timeout, throttling, more; + var whenDone = _.debounce(function(){ more = throttling = false; }, wait); + return function() { + context = this; args = arguments; + var later = function() { + timeout = null; + if (more) func.apply(context, args); + whenDone(); + }; + if (!timeout) timeout = setTimeout(later, wait); + if (throttling) { + more = true; + } else { + func.apply(context, args); + } + whenDone(); + throttling = true; + }; + }; + + // Returns a function, that, as long as it continues to be invoked, will not + // be triggered. The function will be called after it stops being called for + // N milliseconds. + _.debounce = function(func, wait) { + var timeout; + return function() { + var context = this, args = arguments; + var later = function() { + timeout = null; + func.apply(context, args); + }; + clearTimeout(timeout); + timeout = setTimeout(later, wait); + }; + }; + + // Returns a function that will be executed at most one time, no matter how + // often you call it. Useful for lazy initialization. + _.once = function(func) { + var ran = false, memo; + return function() { + if (ran) return memo; + ran = true; + return memo = func.apply(this, arguments); + }; + }; + + // Returns the first function passed as an argument to the second, + // allowing you to adjust arguments, run code before and after, and + // conditionally execute the original function. + _.wrap = function(func, wrapper) { + return function() { + var args = [func].concat(slice.call(arguments)); + return wrapper.apply(this, args); + }; + }; + + // Returns a function that is the composition of a list of functions, each + // consuming the return value of the function that follows. + _.compose = function() { + var funcs = slice.call(arguments); + return function() { + var args = slice.call(arguments); + for (var i = funcs.length - 1; i >= 0; i--) { + args = [funcs[i].apply(this, args)]; + } + return args[0]; + }; + }; + + // Returns a function that will only be executed after being called N times. + _.after = function(times, func) { + if (times <= 0) return func(); + return function() { + if (--times < 1) { return func.apply(this, arguments); } + }; + }; + + // Object Functions + // ---------------- + + // Retrieve the names of an object's properties. + // Delegates to **ECMAScript 5**'s native `Object.keys` + _.keys = nativeKeys || function(obj) { + if (obj !== Object(obj)) throw new TypeError('Invalid object'); + var keys = []; + for (var key in obj) if (hasOwnProperty.call(obj, key)) keys[keys.length] = key; + return keys; + }; + + // Retrieve the values of an object's properties. + _.values = function(obj) { + return _.map(obj, _.identity); + }; + + // Return a sorted list of the function names available on the object. + // Aliased as `methods` + _.functions = _.methods = function(obj) { + var names = []; + for (var key in obj) { + if (_.isFunction(obj[key])) names.push(key); + } + return names.sort(); + }; + + // Extend a given object with all the properties in passed-in object(s). + _.extend = function(obj) { + each(slice.call(arguments, 1), function(source) { + for (var prop in source) { + if (source[prop] !== void 0) obj[prop] = source[prop]; + } + }); + return obj; + }; + + // Fill in a given object with default properties. + _.defaults = function(obj) { + each(slice.call(arguments, 1), function(source) { + for (var prop in source) { + if (obj[prop] == null) obj[prop] = source[prop]; + } + }); + return obj; + }; + + // Create a (shallow-cloned) duplicate of an object. + _.clone = function(obj) { + if (!_.isObject(obj)) return obj; + return _.isArray(obj) ? obj.slice() : _.extend({}, obj); + }; + + // Invokes interceptor with the obj, and then returns obj. + // The primary purpose of this method is to "tap into" a method chain, in + // order to perform operations on intermediate results within the chain. + _.tap = function(obj, interceptor) { + interceptor(obj); + return obj; + }; + + // Internal recursive comparison function. + function eq(a, b, stack) { + // Identical objects are equal. `0 === -0`, but they aren't identical. + // See the Harmony `egal` proposal: http://wiki.ecmascript.org/doku.php?id=harmony:egal. + if (a === b) return a !== 0 || 1 / a == 1 / b; + // A strict comparison is necessary because `null == undefined`. + if (a == null || b == null) return a === b; + // Unwrap any wrapped objects. + if (a._chain) a = a._wrapped; + if (b._chain) b = b._wrapped; + // Invoke a custom `isEqual` method if one is provided. + if (_.isFunction(a.isEqual)) return a.isEqual(b); + if (_.isFunction(b.isEqual)) return b.isEqual(a); + // Compare `[[Class]]` names. + var className = toString.call(a); + if (className != toString.call(b)) return false; + switch (className) { + // Strings, numbers, dates, and booleans are compared by value. + case '[object String]': + // Primitives and their corresponding object wrappers are equivalent; thus, `"5"` is + // equivalent to `new String("5")`. + return String(a) == String(b); + case '[object Number]': + a = +a; + b = +b; + // `NaN`s are equivalent, but non-reflexive. An `egal` comparison is performed for + // other numeric values. + return a != a ? b != b : (a == 0 ? 1 / a == 1 / b : a == b); + case '[object Date]': + case '[object Boolean]': + // Coerce dates and booleans to numeric primitive values. Dates are compared by their + // millisecond representations. Note that invalid dates with millisecond representations + // of `NaN` are not equivalent. + return +a == +b; + // RegExps are compared by their source patterns and flags. + case '[object RegExp]': + return a.source == b.source && + a.global == b.global && + a.multiline == b.multiline && + a.ignoreCase == b.ignoreCase; + } + if (typeof a != 'object' || typeof b != 'object') return false; + // Assume equality for cyclic structures. The algorithm for detecting cyclic + // structures is adapted from ES 5.1 section 15.12.3, abstract operation `JO`. + var length = stack.length; + while (length--) { + // Linear search. Performance is inversely proportional to the number of + // unique nested structures. + if (stack[length] == a) return true; + } + // Add the first object to the stack of traversed objects. + stack.push(a); + var size = 0, result = true; + // Recursively compare objects and arrays. + if (className == '[object Array]') { + // Compare array lengths to determine if a deep comparison is necessary. + size = a.length; + result = size == b.length; + if (result) { + // Deep compare the contents, ignoring non-numeric properties. + while (size--) { + // Ensure commutative equality for sparse arrays. + if (!(result = size in a == size in b && eq(a[size], b[size], stack))) break; + } + } + } else { + // Objects with different constructors are not equivalent. + if ("constructor" in a != "constructor" in b || a.constructor != b.constructor) return false; + // Deep compare objects. + for (var key in a) { + if (hasOwnProperty.call(a, key)) { + // Count the expected number of properties. + size++; + // Deep compare each member. + if (!(result = hasOwnProperty.call(b, key) && eq(a[key], b[key], stack))) break; + } + } + // Ensure that both objects contain the same number of properties. + if (result) { + for (key in b) { + if (hasOwnProperty.call(b, key) && !(size--)) break; + } + result = !size; + } + } + // Remove the first object from the stack of traversed objects. + stack.pop(); + return result; + } + + // Perform a deep comparison to check if two objects are equal. + _.isEqual = function(a, b) { + return eq(a, b, []); + }; + + // Is a given array, string, or object empty? + // An "empty" object has no enumerable own-properties. + _.isEmpty = function(obj) { + if (_.isArray(obj) || _.isString(obj)) return obj.length === 0; + for (var key in obj) if (hasOwnProperty.call(obj, key)) return false; + return true; + }; + + // Is a given value a DOM element? + _.isElement = function(obj) { + return !!(obj && obj.nodeType == 1); + }; + + // Is a given value an array? + // Delegates to ECMA5's native Array.isArray + _.isArray = nativeIsArray || function(obj) { + return toString.call(obj) == '[object Array]'; + }; + + // Is a given variable an object? + _.isObject = function(obj) { + return obj === Object(obj); + }; + + // Is a given variable an arguments object? + if (toString.call(arguments) == '[object Arguments]') { + _.isArguments = function(obj) { + return toString.call(obj) == '[object Arguments]'; + }; + } else { + _.isArguments = function(obj) { + return !!(obj && hasOwnProperty.call(obj, 'callee')); + }; + } + + // Is a given value a function? + _.isFunction = function(obj) { + return toString.call(obj) == '[object Function]'; + }; + + // Is a given value a string? + _.isString = function(obj) { + return toString.call(obj) == '[object String]'; + }; + + // Is a given value a number? + _.isNumber = function(obj) { + return toString.call(obj) == '[object Number]'; + }; + + // Is the given value `NaN`? + _.isNaN = function(obj) { + // `NaN` is the only value for which `===` is not reflexive. + return obj !== obj; + }; + + // Is a given value a boolean? + _.isBoolean = function(obj) { + return obj === true || obj === false || toString.call(obj) == '[object Boolean]'; + }; + + // Is a given value a date? + _.isDate = function(obj) { + return toString.call(obj) == '[object Date]'; + }; + + // Is the given value a regular expression? + _.isRegExp = function(obj) { + return toString.call(obj) == '[object RegExp]'; + }; + + // Is a given value equal to null? + _.isNull = function(obj) { + return obj === null; + }; + + // Is a given variable undefined? + _.isUndefined = function(obj) { + return obj === void 0; + }; + + // Utility Functions + // ----------------- + + // Run Underscore.js in *noConflict* mode, returning the `_` variable to its + // previous owner. Returns a reference to the Underscore object. + _.noConflict = function() { + root._ = previousUnderscore; + return this; + }; + + // Keep the identity function around for default iterators. + _.identity = function(value) { + return value; + }; + + // Run a function **n** times. + _.times = function (n, iterator, context) { + for (var i = 0; i < n; i++) iterator.call(context, i); + }; + + // Escape a string for HTML interpolation. + _.escape = function(string) { + return (''+string).replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"').replace(/'/g, ''').replace(/\//g,'/'); + }; + + // Add your own custom functions to the Underscore object, ensuring that + // they're correctly added to the OOP wrapper as well. + _.mixin = function(obj) { + each(_.functions(obj), function(name){ + addToWrapper(name, _[name] = obj[name]); + }); + }; + + // Generate a unique integer id (unique within the entire client session). + // Useful for temporary DOM ids. + var idCounter = 0; + _.uniqueId = function(prefix) { + var id = idCounter++; + return prefix ? prefix + id : id; + }; + + // By default, Underscore uses ERB-style template delimiters, change the + // following template settings to use alternative delimiters. + _.templateSettings = { + evaluate : /<%([\s\S]+?)%>/g, + interpolate : /<%=([\s\S]+?)%>/g, + escape : /<%-([\s\S]+?)%>/g + }; + + // JavaScript micro-templating, similar to John Resig's implementation. + // Underscore templating handles arbitrary delimiters, preserves whitespace, + // and correctly escapes quotes within interpolated code. + _.template = function(str, data) { + var c = _.templateSettings; + var tmpl = 'var __p=[],print=function(){__p.push.apply(__p,arguments);};' + + 'with(obj||{}){__p.push(\'' + + str.replace(/\\/g, '\\\\') + .replace(/'/g, "\\'") + .replace(c.escape, function(match, code) { + return "',_.escape(" + code.replace(/\\'/g, "'") + "),'"; + }) + .replace(c.interpolate, function(match, code) { + return "'," + code.replace(/\\'/g, "'") + ",'"; + }) + .replace(c.evaluate || null, function(match, code) { + return "');" + code.replace(/\\'/g, "'") + .replace(/[\r\n\t]/g, ' ') + ";__p.push('"; + }) + .replace(/\r/g, '\\r') + .replace(/\n/g, '\\n') + .replace(/\t/g, '\\t') + + "');}return __p.join('');"; + var func = new Function('obj', '_', tmpl); + return data ? func(data, _) : function(data) { return func(data, _) }; + }; + + // The OOP Wrapper + // --------------- + + // If Underscore is called as a function, it returns a wrapped object that + // can be used OO-style. This wrapper holds altered versions of all the + // underscore functions. Wrapped objects may be chained. + var wrapper = function(obj) { this._wrapped = obj; }; + + // Expose `wrapper.prototype` as `_.prototype` + _.prototype = wrapper.prototype; + + // Helper function to continue chaining intermediate results. + var result = function(obj, chain) { + return chain ? _(obj).chain() : obj; + }; + + // A method to easily add functions to the OOP wrapper. + var addToWrapper = function(name, func) { + wrapper.prototype[name] = function() { + var args = slice.call(arguments); + unshift.call(args, this._wrapped); + return result(func.apply(_, args), this._chain); + }; + }; + + // Add all of the Underscore functions to the wrapper object. + _.mixin(_); + + // Add all mutator Array functions to the wrapper. + each(['pop', 'push', 'reverse', 'shift', 'sort', 'splice', 'unshift'], function(name) { + var method = ArrayProto[name]; + wrapper.prototype[name] = function() { + method.apply(this._wrapped, arguments); + return result(this._wrapped, this._chain); + }; + }); + + // Add all accessor Array functions to the wrapper. + each(['concat', 'join', 'slice'], function(name) { + var method = ArrayProto[name]; + wrapper.prototype[name] = function() { + return result(method.apply(this._wrapped, arguments), this._chain); + }; + }); + + // Start chaining a wrapped Underscore object. + wrapper.prototype.chain = function() { + this._chain = true; + return this; + }; + + // Extracts the result from a wrapped and chained object. + wrapper.prototype.value = function() { + return this._wrapped; + }; + +}).call(this); diff --git a/lib/beetsplug/web/templates/index.html b/lib/beetsplug/web/templates/index.html new file mode 100644 index 00000000..0fdd46d1 --- /dev/null +++ b/lib/beetsplug/web/templates/index.html @@ -0,0 +1,98 @@ +<!DOCTYPE html> +<html> + <head> + <title>beets + + + + + + + + + + + +
+
+ +
+
    +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/lib/beetsplug/zero.py b/lib/beetsplug/zero.py new file mode 100644 index 00000000..f05b1b5a --- /dev/null +++ b/lib/beetsplug/zero.py @@ -0,0 +1,160 @@ +# This file is part of beets. +# Copyright 2016, Blemjhoo Tezoulbr . +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +""" Clears tag fields in media files.""" + + +import re + +from beets.plugins import BeetsPlugin +from mediafile import MediaFile +from beets.importer import action +from beets.ui import Subcommand, decargs, input_yn +import confuse + +__author__ = 'baobab@heresiarch.info' + + +class ZeroPlugin(BeetsPlugin): + def __init__(self): + super().__init__() + + self.register_listener('write', self.write_event) + self.register_listener('import_task_choice', + self.import_task_choice_event) + + self.config.add({ + 'auto': True, + 'fields': [], + 'keep_fields': [], + 'update_database': False, + }) + + self.fields_to_progs = {} + self.warned = False + + """Read the bulk of the config into `self.fields_to_progs`. + After construction, `fields_to_progs` contains all the fields that + should be zeroed as keys and maps each of those to a list of compiled + regexes (progs) as values. + A field is zeroed if its value matches one of the associated progs. If + progs is empty, then the associated field is always zeroed. + """ + if self.config['fields'] and self.config['keep_fields']: + self._log.warning( + 'cannot blacklist and whitelist at the same time' + ) + # Blacklist mode. + elif self.config['fields']: + for field in self.config['fields'].as_str_seq(): + self._set_pattern(field) + # Whitelist mode. + elif self.config['keep_fields']: + for field in MediaFile.fields(): + if (field not in self.config['keep_fields'].as_str_seq() and + # These fields should always be preserved. + field not in ('id', 'path', 'album_id')): + self._set_pattern(field) + + def commands(self): + zero_command = Subcommand('zero', help='set fields to null') + + def zero_fields(lib, opts, args): + if not decargs(args) and not input_yn( + "Remove fields for all items? (Y/n)", + True): + return + for item in lib.items(decargs(args)): + self.process_item(item) + + zero_command.func = zero_fields + return [zero_command] + + def _set_pattern(self, field): + """Populate `self.fields_to_progs` for a given field. + Do some sanity checks then compile the regexes. + """ + if field not in MediaFile.fields(): + self._log.error('invalid field: {0}', field) + elif field in ('id', 'path', 'album_id'): + self._log.warning('field \'{0}\' ignored, zeroing ' + 'it would be dangerous', field) + else: + try: + for pattern in self.config[field].as_str_seq(): + prog = re.compile(pattern, re.IGNORECASE) + self.fields_to_progs.setdefault(field, []).append(prog) + except confuse.NotFoundError: + # Matches everything + self.fields_to_progs[field] = [] + + def import_task_choice_event(self, session, task): + if task.choice_flag == action.ASIS and not self.warned: + self._log.warning('cannot zero in \"as-is\" mode') + self.warned = True + # TODO request write in as-is mode + + def write_event(self, item, path, tags): + if self.config['auto']: + self.set_fields(item, tags) + + def set_fields(self, item, tags): + """Set values in `tags` to `None` if the field is in + `self.fields_to_progs` and any of the corresponding `progs` matches the + field value. + Also update the `item` itself if `update_database` is set in the + config. + """ + fields_set = False + + if not self.fields_to_progs: + self._log.warning('no fields, nothing to do') + return False + + for field, progs in self.fields_to_progs.items(): + if field in tags: + value = tags[field] + match = _match_progs(tags[field], progs) + else: + value = '' + match = not progs + + if match: + fields_set = True + self._log.debug('{0}: {1} -> None', field, value) + tags[field] = None + if self.config['update_database']: + item[field] = None + + return fields_set + + def process_item(self, item): + tags = dict(item) + + if self.set_fields(item, tags): + item.write(tags=tags) + if self.config['update_database']: + item.store(fields=tags) + + +def _match_progs(value, progs): + """Check if `value` (as string) is matching any of the compiled regexes in + the `progs` list. + """ + if not progs: + return True + for prog in progs: + if prog.search(str(value)): + return True + return False diff --git a/lib/bencode.py b/lib/bencode.py deleted file mode 100644 index 3181ee1c..00000000 --- a/lib/bencode.py +++ /dev/null @@ -1,131 +0,0 @@ -# Got this from here: https://gist.github.com/1126793 - -# The contents of this file are subject to the Python Software Foundation -# License Version 2.3 (the License). You may not copy or use this file, in -# either source code or executable form, except in compliance with the License. -# You may obtain a copy of the License at http://www.python.org/license. -# -# Software distributed under the License is distributed on an AS IS basis, -# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License -# for the specific language governing rights and limitations under the -# License. - -# Written by Petru Paler - -# Minor modifications made by Andrew Resch to replace the BTFailure errors with Exceptions - -def decode_int(x, f): - f += 1 - newf = x.index('e', f) - n = int(x[f:newf]) - if x[f] == '-': - if x[f + 1] == '0': - raise ValueError - elif x[f] == '0' and newf != f+1: - raise ValueError - return (n, newf+1) - -def decode_string(x, f): - colon = x.index(':', f) - n = int(x[f:colon]) - if x[f] == '0' and colon != f+1: - raise ValueError - colon += 1 - return (x[colon:colon+n], colon+n) - -def decode_list(x, f): - r, f = [], f+1 - while x[f] != 'e': - v, f = decode_func[x[f]](x, f) - r.append(v) - return (r, f + 1) - -def decode_dict(x, f): - r, f = {}, f+1 - while x[f] != 'e': - k, f = decode_string(x, f) - r[k], f = decode_func[x[f]](x, f) - return (r, f + 1) - -decode_func = {} -decode_func['l'] = decode_list -decode_func['d'] = decode_dict -decode_func['i'] = decode_int -decode_func['0'] = decode_string -decode_func['1'] = decode_string -decode_func['2'] = decode_string -decode_func['3'] = decode_string -decode_func['4'] = decode_string -decode_func['5'] = decode_string -decode_func['6'] = decode_string -decode_func['7'] = decode_string -decode_func['8'] = decode_string -decode_func['9'] = decode_string - -def bdecode(x): - try: - r, l = decode_func[x[0]](x, 0) - except (IndexError, KeyError, ValueError): - raise Exception("not a valid bencoded string") - - return r - -from types import StringType, IntType, LongType, DictType, ListType, TupleType - - -class Bencached(object): - - __slots__ = ['bencoded'] - - def __init__(self, s): - self.bencoded = s - -def encode_bencached(x,r): - r.append(x.bencoded) - -def encode_int(x, r): - r.extend(('i', str(x), 'e')) - -def encode_bool(x, r): - if x: - encode_int(1, r) - else: - encode_int(0, r) - -def encode_string(x, r): - r.extend((str(len(x)), ':', x)) - -def encode_list(x, r): - r.append('l') - for i in x: - encode_func[type(i)](i, r) - r.append('e') - -def encode_dict(x,r): - r.append('d') - ilist = x.items() - ilist.sort() - for k, v in ilist: - r.extend((str(len(k)), ':', k)) - encode_func[type(v)](v, r) - r.append('e') - -encode_func = {} -encode_func[Bencached] = encode_bencached -encode_func[IntType] = encode_int -encode_func[LongType] = encode_int -encode_func[StringType] = encode_string -encode_func[ListType] = encode_list -encode_func[TupleType] = encode_list -encode_func[DictType] = encode_dict - -try: - from types import BooleanType - encode_func[BooleanType] = encode_bool -except ImportError: - pass - -def bencode(x): - r = [] - encode_func[type(x)](x, r) - return ''.join(r) diff --git a/lib/bencode/__init__.py b/lib/bencode/__init__.py new file mode 100644 index 00000000..5d06f31b --- /dev/null +++ b/lib/bencode/__init__.py @@ -0,0 +1,4 @@ +"""We import some functions here, so they are available on the package level""" +from .bencode import decode, encode # noqa +from .torrent import decode_torrent, encode_torrent # noqa +from .transform import be_to_str, str_to_be # noqa diff --git a/lib/bencode/bencode.py b/lib/bencode/bencode.py new file mode 100644 index 00000000..5f9171c7 --- /dev/null +++ b/lib/bencode/bencode.py @@ -0,0 +1,270 @@ +"""Code, which deals with bencoded data.""" +from dataclasses import dataclass +from typing import Union + +COLON = ord(":") +END_MARKER = ord("e") +START_DICT = ord("d") +START_INTEGER = ord("i") +START_LIST = ord("l") + + +@dataclass +class BencodedString: + """An internal container for bencoded strings""" + + def __init__(self, data): + """Called when the object is created, sets its attributes""" + self.bytes = bytearray(data) + + def del_prefix(self, index): + """Delete the prefix of specified length""" + del self.bytes[:index] + + def get_prefix(self, index): + """Get the prefix of specified length (as bytes)""" + return bytes(self.bytes[:index]) + + +def _decode(data: BencodedString) -> Union[bytes, dict, int, list]: + """Convert the given bencoded string to a Python object. + + Args: + Some BencodedString + + Raises: + ValueError: + If the argument is empty + If the first byte doesn't match a supported by bencode data type + + Returns: + A Python object + """ + if not data.bytes: + raise ValueError("Cannot decode an empty bencoded string.") + + if data.bytes[0] == START_DICT: + return _decode_dict(data) + + if data.bytes[0] == START_LIST: + return _decode_list(data) + + if data.bytes[0] == START_INTEGER: + return _decode_int(data) + + if chr(data.bytes[0]).isdigit(): + return _decode_bytes(data) + + raise ValueError( + "Cannot decode data, expected the first byte to be one of " + f"'d', 'i', 'l' or a digit, got {chr(data.bytes[0])!r} instead." + ) + + +def _decode_bytes(data: BencodedString) -> bytes: + """Extract the first byte string from the given bencoded string + + Args: + Some BencodedString, which starts with a byte string + + Raises: + ValueError: + If the byte string doesn't contain a delimiter + If the real string length is shorter, than the prefix length + + Returns: + An extracted byte string + """ + # Get byte string length + delimiter_index = data.bytes.find(COLON) + + if delimiter_index > 0: + length_prefix = data.get_prefix(delimiter_index) + string_length = int(length_prefix.decode("ascii")) + data.del_prefix(delimiter_index + 1) + else: + raise ValueError( + "Cannot decode a byte string, it doesn't contain a delimiter. " + "Most likely the bencoded string is incomplete or incorrect." + ) + + # Get byte string data + if len(data.bytes) >= string_length: + result_bytes = data.get_prefix(string_length) + data.del_prefix(string_length) + else: + raise ValueError( + f"Cannot decode a byte string (prefix length " + f"- {string_length}, real_length - {len(data.bytes)}. " + "Most likely the bencoded string is incomplete or incorrect." + ) + + return result_bytes + + +def _decode_dict(data: BencodedString) -> dict: + """Extract the first dict from the given bencoded string + + Args: + Some BencodedString, which starts with a dictionary + + Raises: + ValueError: If bencoded string ended before the end marker was found + + Returns: + An extracted dictionary + """ + result_dict = {} + data.del_prefix(1) + + while True: + if data.bytes: + if data.bytes[0] != END_MARKER: + key = _decode(data) + value = _decode(data) + result_dict[key] = value + else: + data.del_prefix(1) + break + else: + raise ValueError( + "Cannot decode a dictionary, reached end of the bencoded " + "string before the end marker was found. Most likely the " + "bencoded string is incomplete or incorrect." + ) + + return result_dict + + +def _decode_int(data: BencodedString) -> int: + """Extract the first integer from the given bencoded string + + Args: + Some BencodedString, which starts with an integer + + Raises: + ValueError: If bencoded string ended before the end marker was found + + Returns: + An extracted integer + """ + data.del_prefix(1) + end_marker_index = data.bytes.find(END_MARKER) + + if end_marker_index > 0: + result_bytes = data.get_prefix(end_marker_index) + data.del_prefix(end_marker_index + 1) + else: + raise ValueError( + "Cannot decode an integer, reached the end of the bencoded " + "string before the end marker was found. Most likely the " + "bencoded string is incomplete or incorrect." + ) + + return int(result_bytes.decode("ascii")) + + +def _decode_list(data: BencodedString) -> list: + """Extract the first list from the given bencoded string + + Args: + Some BencodedString, which starts with a list + + Raises: + ValueError: If bencoded string ended before the end marker was found + + Returns: + An extracted list + """ + result_list = [] + data.del_prefix(1) + + while True: + if data.bytes: + if data.bytes[0] != END_MARKER: + result_list.append(_decode(data)) + else: + data.del_prefix(1) + break + else: + raise ValueError( + "Cannot decode a list, reached end of the bencoded string " + "before the end marker was found. Most likely the bencoded " + "string is incomplete or incorrect." + ) + + return result_list + + +def _encode_bytes(source: bytes) -> bytes: + """Encode provided bytes as a bencoded string""" + return str(len(source)).encode("ascii") + b":" + source + + +def _encode_dict(source: dict) -> bytes: + """Encode provided dictionary as a bencoded string""" + result_data = b"d" + + for key, value in source.items(): + result_data += encode(key) + encode(value) + + return result_data + b"e" + + +def _encode_int(source: int) -> bytes: + """Encode provided integer as a bencoded string""" + return b"i" + str(source).encode("ascii") + b"e" + + +def _encode_list(source: list) -> bytes: + """Encode provided list as a bencoded string""" + result_data = b"l" + + for item in source: + result_data += encode(item) + + return result_data + b"e" + + +def decode(data: bytes) -> Union[bytes, dict, int, list]: + """Convert the given bencoded string to a Python object. + + Raises: + ValueError: + If the argument is not of type bytes or is empty + If the first byte doesn't match a supported by bencode data type + + Returns: + A Python object + """ + if not isinstance(data, bytes): + raise ValueError( + f"Cannot decode data, expected bytes, got {type(data)} instead." + ) + return _decode(BencodedString(data)) + + +def encode(data: Union[bytes, dict, int, list]) -> bytes: + """Convert the given Python object to a bencoded string. + + Raises: + ValueError: If the provided object type is not supported + + Returns: + A bencoded string + """ + if isinstance(data, bytes): + return _encode_bytes(data) + + if isinstance(data, dict): + return _encode_dict(data) + + if isinstance(data, int): + return _encode_int(data) + + if isinstance(data, list): + return _encode_list(data) + + raise ValueError( + f"Cannot encode data: objects of type {type(data)} are not supported." + ) diff --git a/lib/html5lib/filters/__init__.py b/lib/bencode/py.typed similarity index 100% rename from lib/html5lib/filters/__init__.py rename to lib/bencode/py.typed diff --git a/lib/bencode/torrent.py b/lib/bencode/torrent.py new file mode 100644 index 00000000..1cf7d366 --- /dev/null +++ b/lib/bencode/torrent.py @@ -0,0 +1,100 @@ +"""Code, which deals with torrent data.""" +from typing import Any + +from bencode.bencode import decode, encode + + +def _decode_object(data: Any, encoding: str, errors: str) -> Any: + """Replace bytes with strings in the provided Python object""" + if isinstance(data, bytes): + return data.decode(encoding, errors) + + if isinstance(data, dict): + result_dict = {} + for key, value in data.items(): + decoded_key = _decode_object(key, encoding, errors) + if decoded_key.endswith(".utf-8"): + decoded_value = _decode_object(value, "utf8", errors) + elif decoded_key in ["ed2k", "filehash", "pieces"]: + decoded_value = value.hex() + else: + decoded_value = _decode_object(value, encoding, errors) + result_dict[decoded_key] = decoded_value + return result_dict + + if isinstance(data, list): + return [_decode_object(item, encoding, errors) for item in data] + + return data + + +def _encode_object(data: Any, encoding: str, errors: str) -> Any: + """Replace strings with bytes in the provided Python object""" + if isinstance(data, str): + return data.encode(encoding, errors) + + if isinstance(data, dict): + result_dict = {} + for key, value in data.items(): + encoded_key = _encode_object(key, encoding, errors) + if encoded_key.endswith(b".utf-8"): + encoded_value = _encode_object(value, "utf8", errors) + elif encoded_key in [b"ed2k", b"filehash", b"pieces"]: + encoded_value = bytes.fromhex(value) + else: + encoded_value = _encode_object(value, encoding, errors) + result_dict[encoded_key] = encoded_value + return result_dict + + if isinstance(data, list): + return [_encode_object(item, encoding, errors) for item in data] + + return data + + +def decode_torrent( + data: bytes, encoding: str = "utf_8", errors: str = "strict" +) -> dict: + """Convert the given torrent to a Python dictionary. + + Fields are decoded: + - using utf8 (if the key ends with ".utf-8" suffix, like "name.utf-8") + - using the provided encoding (for other human readable fields) + - as hex (for binary fields) + + Args: + data: some binary data to decode + encoding: which encoding should be used + (https://docs.python.org/3/library/codecs.html#standard-encodings) + errors: what to do if decoding is not possible + (https://docs.python.org/3/library/codecs.html#error-handlers) + + Raises: + UnicodeDecodeError: If some key or value cannot be decoded using the + provided encoding + ValueError: If the first argument is not of type bytes + """ + if not isinstance(data, bytes): + raise ValueError( + f"Cannot decode data, expected bytes, got {type(data)} instead." + ) + return _decode_object(decode(data), encoding, errors) + + +def encode_torrent( + data: dict, encoding: str = "utf8", errors: str = "strict" +) -> bytes: + """Convert the given Python dictionary to a torrent + + Mirror function for the "decode_torrent" function. + + Raises: + UnicodeEncodeError: If some key or value cannot be encoded using the + provided encoding + ValueError: If the first argument is not of type dict + """ + if not isinstance(data, dict): + raise ValueError( + f"Cannot encode data, expected dict, got {type(data)} instead." + ) + return encode(_encode_object(data, encoding, errors)) diff --git a/lib/bencode/transform.py b/lib/bencode/transform.py new file mode 100644 index 00000000..713bd902 --- /dev/null +++ b/lib/bencode/transform.py @@ -0,0 +1,35 @@ +"""Code, for converting bencoded data to string and back.""" + + +def be_to_str(data: bytes) -> str: + """Convert bencoded data from bytes to string""" + result = [] + for num in data: + # Non-printable characters, double quotes, square brackets, accent + if num < 32 or num in [34, 91, 92, 93] or num > 126: + result.append("[%0.2x]" % num) + else: + result.append(chr(num)) + return "".join(result) + + +def str_to_be(data: str) -> bytes: + """Convert bencoded data from string to bytes""" + result = bytearray() + seq_marker = False + seq_chars = "" + + for char in data: + if char == "[": + seq_marker = True + continue + if char == "]": + result.append(int(seq_chars, 16)) + seq_marker = False + seq_chars = "" + continue + if seq_marker: + seq_chars += char + else: + result.append(ord(char)) + return bytes(result) diff --git a/lib/biplist/__init__.py b/lib/biplist/__init__.py index fe5b711b..d0db52d2 100755 --- a/lib/biplist/__init__.py +++ b/lib/biplist/__init__.py @@ -56,20 +56,20 @@ import sys import time try: - unicode + str unicodeEmpty = r'' except NameError: - unicode = str + str = str unicodeEmpty = '' try: - long + int except NameError: long = int try: {}.iteritems - iteritems = lambda x: x.iteritems() + iteritems = lambda x: iter(x.items()) except AttributeError: - iteritems = lambda x: x.items() + iteritems = lambda x: list(x.items()) __all__ = [ 'Uid', 'Data', 'readPlist', 'writePlist', 'readPlistFromString', @@ -101,7 +101,7 @@ def readPlist(pathOrFile): """Raises NotBinaryPlistException, InvalidPlistException""" didOpen = False result = None - if isinstance(pathOrFile, (bytes, unicode)): + if isinstance(pathOrFile, (bytes, str)): pathOrFile = open(pathOrFile, 'rb') didOpen = True try: @@ -113,7 +113,7 @@ def readPlist(pathOrFile): result = None if hasattr(plistlib, 'loads'): contents = None - if isinstance(pathOrFile, (bytes, unicode)): + if isinstance(pathOrFile, (bytes, str)): with open(pathOrFile, 'rb') as f: contents = f.read() else: @@ -152,7 +152,7 @@ def writePlist(rootObject, pathOrFile, binary=True): if not binary: rootObject = wrapDataObject(rootObject, binary) if hasattr(plistlib, "dump"): - if isinstance(pathOrFile, (bytes, unicode)): + if isinstance(pathOrFile, (bytes, str)): with open(pathOrFile, 'wb') as f: return plistlib.dump(rootObject, f) else: @@ -161,7 +161,7 @@ def writePlist(rootObject, pathOrFile, binary=True): return plistlib.writePlist(rootObject, pathOrFile) else: didOpen = False - if isinstance(pathOrFile, (bytes, unicode)): + if isinstance(pathOrFile, (bytes, str)): pathOrFile = open(pathOrFile, 'wb') didOpen = True writer = PlistWriter(pathOrFile) @@ -564,7 +564,7 @@ class PlistWriter(object): raise InvalidPlistException('Dictionary keys cannot be null in plists.') elif isinstance(key, Data): raise InvalidPlistException('Data cannot be dictionary keys in plists.') - elif not isinstance(key, (bytes, unicode)): + elif not isinstance(key, (bytes, str)): raise InvalidPlistException('Keys must be strings.') def proc_size(size): @@ -586,7 +586,7 @@ class PlistWriter(object): elif isinstance(obj, Uid): size = self.intSize(obj) self.incrementByteCount('uidBytes', incr=1+size) - elif isinstance(obj, (int, long)): + elif isinstance(obj, int): size = self.intSize(obj) self.incrementByteCount('intBytes', incr=1+size) elif isinstance(obj, FloatWrapper): @@ -597,7 +597,7 @@ class PlistWriter(object): elif isinstance(obj, Data): size = proc_size(len(obj)) self.incrementByteCount('dataBytes', incr=1+size) - elif isinstance(obj, (unicode, bytes)): + elif isinstance(obj, (str, bytes)): size = proc_size(len(obj)) self.incrementByteCount('stringBytes', incr=1+size) elif isinstance(obj, HashableWrapper): @@ -653,7 +653,7 @@ class PlistWriter(object): result += pack('!B', (format << 4) | length) return result - if isinstance(obj, (str, unicode)) and obj == unicodeEmpty: + if isinstance(obj, str) and obj == unicodeEmpty: # The Apple Plist decoder can't decode a zero length Unicode string. obj = b'' @@ -671,7 +671,7 @@ class PlistWriter(object): size = self.intSize(obj) output += pack('!B', (0b1000 << 4) | size - 1) output += self.binaryInt(obj) - elif isinstance(obj, (int, long)): + elif isinstance(obj, int): byteSize = self.intSize(obj) root = math.log(byteSize, 2) output += pack('!B', (0b0001 << 4) | int(root)) @@ -687,7 +687,7 @@ class PlistWriter(object): elif isinstance(obj, Data): output += proc_variable_length(0b0100, len(obj)) output += obj - elif isinstance(obj, unicode): + elif isinstance(obj, str): byteData = obj.encode('utf_16_be') output += proc_variable_length(0b0110, len(byteData)//2) output += byteData diff --git a/lib/bs4/__init__.py b/lib/bs4/__init__.py index d35f765b..2a436d34 100644 --- a/lib/bs4/__init__.py +++ b/lib/bs4/__init__.py @@ -1,32 +1,40 @@ -"""Beautiful Soup -Elixir and Tonic -"The Screen-Scraper's Friend" +"""Beautiful Soup Elixir and Tonic - "The Screen-Scraper's Friend". + http://www.crummy.com/software/BeautifulSoup/ Beautiful Soup uses a pluggable XML or HTML parser to parse a (possibly invalid) document into a tree representation. Beautiful Soup -provides provides methods and Pythonic idioms that make it easy to -navigate, search, and modify the parse tree. +provides methods and Pythonic idioms that make it easy to navigate, +search, and modify the parse tree. -Beautiful Soup works with Python 2.6 and up. It works better if lxml +Beautiful Soup works with Python 3.5 and up. It works better if lxml and/or html5lib is installed. For more than you ever wanted to know about Beautiful Soup, see the -documentation: -http://www.crummy.com/software/BeautifulSoup/bs4/doc/ +documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/ """ __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.4.0" -__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson" +__version__ = "4.10.0" +__copyright__ = "Copyright (c) 2004-2021 Leonard Richardson" +# Use of this source code is governed by the MIT license. __license__ = "MIT" __all__ = ['BeautifulSoup'] + +from collections import Counter import os import re +import sys +import traceback import warnings +# The very first thing we do is give a useful error if someone is +# running this code under Python 2. +if sys.version_info.major < 3: + raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.') + from .builder import builder_registry, ParserRejectedMarkup from .dammit import UnicodeDammit from .element import ( @@ -38,28 +46,49 @@ from .element import ( NavigableString, PageElement, ProcessingInstruction, + PYTHON_SPECIFIC_ENCODINGS, ResultSet, + Script, + Stylesheet, SoupStrainer, Tag, + TemplateString, ) -# The very first thing we do is give a useful error if someone is -# running this code under Python 3 without converting it. -'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' +# Define some custom warnings. +class GuessedAtParserWarning(UserWarning): + """The warning issued when BeautifulSoup has to guess what parser to + use -- probably because no parser was specified in the constructor. + """ + +class MarkupResemblesLocatorWarning(UserWarning): + """The warning issued when BeautifulSoup is given 'markup' that + actually looks like a resource locator -- a URL or a path to a file + on disk. + """ + class BeautifulSoup(Tag): - """ - This class defines the basic interface called by the tree builders. + """A data structure representing a parsed HTML or XML document. - These methods will be called by the parser: - reset() - feed(markup) + Most of the methods you'll call on a BeautifulSoup object are inherited from + PageElement or Tag. + + Internally, this class defines the basic interface called by the + tree builders when converting an HTML/XML document into a data + structure. The interface abstracts away the differences between + parsers. To write a new tree builder, you'll need to understand + these methods as a whole. + + These methods will be called by the BeautifulSoup constructor: + * reset() + * feed(markup) The tree builder may call these methods from its feed() implementation: - handle_starttag(name, attrs) # See note about return value - handle_endtag(name) - handle_data(data) # Appends to the current data node - endData(containerClass=NavigableString) # Ends the current data node + * handle_starttag(name, attrs) # See note about return value + * handle_endtag(name) + * handle_data(data) # Appends to the current data node + * endData(containerClass) # Ends the current data node No matter how complicated the underlying parser is, you should be able to build a tree using 'start tag' events, 'end tag' events, @@ -69,24 +98,77 @@ class BeautifulSoup(Tag): like HTML's
tag), call handle_starttag and then handle_endtag. """ - ROOT_TAG_NAME = u'[document]' + + # Since BeautifulSoup subclasses Tag, it's possible to treat it as + # a Tag with a .name. This name makes it clear the BeautifulSoup + # object isn't a real markup tag. + ROOT_TAG_NAME = '[document]' # If the end-user gives no indication which tree builder they # want, look for one with these features. DEFAULT_BUILDER_FEATURES = ['html', 'fast'] + # A string containing all ASCII whitespace characters, used in + # endData() to detect data chunks that seem 'empty'. ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' - NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" - + NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n" + def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, exclude_encodings=None, - **kwargs): - """The Soup object is initialized as the 'root tag', and the - provided markup (which can be a string or a file-like object) - is fed into the underlying parser.""" + element_classes=None, **kwargs): + """Constructor. + :param markup: A string or a file-like object representing + markup to be parsed. + + :param features: Desirable features of the parser to be + used. This may be the name of a specific parser ("lxml", + "lxml-xml", "html.parser", or "html5lib") or it may be the + type of markup to be used ("html", "html5", "xml"). It's + recommended that you name a specific parser, so that + Beautiful Soup gives you the same results across platforms + and virtual environments. + + :param builder: A TreeBuilder subclass to instantiate (or + instance to use) instead of looking one up based on + `features`. You only need to use this if you've implemented a + custom TreeBuilder. + + :param parse_only: A SoupStrainer. Only parts of the document + matching the SoupStrainer will be considered. This is useful + when parsing part of a document that would otherwise be too + large to fit into memory. + + :param from_encoding: A string indicating the encoding of the + document to be parsed. Pass this in if Beautiful Soup is + guessing wrongly about the document's encoding. + + :param exclude_encodings: A list of strings indicating + encodings known to be wrong. Pass this in if you don't know + the document's encoding but you know Beautiful Soup's guess is + wrong. + + :param element_classes: A dictionary mapping BeautifulSoup + classes like Tag and NavigableString, to other classes you'd + like to be instantiated instead as the parse tree is + built. This is useful for subclassing Tag or NavigableString + to modify default behavior. + + :param kwargs: For backwards compatibility purposes, the + constructor accepts certain keyword arguments used in + Beautiful Soup 3. None of these arguments do anything in + Beautiful Soup 4; they will result in a warning and then be + ignored. + + Apart from this, any keyword arguments passed into the + BeautifulSoup constructor are propagated to the TreeBuilder + constructor. This makes it possible to configure a + TreeBuilder by passing in arguments, not just by saying which + one to use. + """ if 'convertEntities' in kwargs: + del kwargs['convertEntities'] warnings.warn( "BS4 does not respect the convertEntities argument to the " "BeautifulSoup constructor. Entities are always converted " @@ -137,14 +219,24 @@ class BeautifulSoup(Tag): from_encoding = from_encoding or deprecated_argument( "fromEncoding", "from_encoding") - if len(kwargs) > 0: - arg = kwargs.keys().pop() - raise TypeError( - "__init__() got an unexpected keyword argument '%s'" % arg) + if from_encoding and isinstance(markup, str): + warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") + from_encoding = None - if builder is None: - original_features = features - if isinstance(features, basestring): + self.element_classes = element_classes or dict() + + # We need this information to track whether or not the builder + # was specified well enough that we can omit the 'you need to + # specify a parser' warning. + original_builder = builder + original_features = features + + if isinstance(builder, type): + # A builder class was passed in; it needs to be instantiated. + builder_class = builder + builder = None + elif builder is None: + if isinstance(features, str): features = [features] if features is None or len(features) == 0: features = self.DEFAULT_BUILDER_FEATURES @@ -154,58 +246,113 @@ class BeautifulSoup(Tag): "Couldn't find a tree builder with the features you " "requested: %s. Do you need to install a parser library?" % ",".join(features)) - builder = builder_class() - if not (original_features == builder.NAME or - original_features in builder.ALTERNATE_NAMES): + + # At this point either we have a TreeBuilder instance in + # builder, or we have a builder_class that we can instantiate + # with the remaining **kwargs. + if builder is None: + builder = builder_class(**kwargs) + if not original_builder and not ( + original_features == builder.NAME or + original_features in builder.ALTERNATE_NAMES + ) and markup: + # The user did not tell us which TreeBuilder to use, + # and we had to guess. Issue a warning. if builder.is_xml: markup_type = "XML" else: markup_type = "HTML" - warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( - parser=builder.NAME, - markup_type=markup_type)) + # This code adapted from warnings.py so that we get the same line + # of code as our warnings.warn() call gets, even if the answer is wrong + # (as it may be in a multithreading situation). + caller = None + try: + caller = sys._getframe(1) + except ValueError: + pass + if caller: + globals = caller.f_globals + line_number = caller.f_lineno + else: + globals = sys.__dict__ + line_number= 1 + filename = globals.get('__file__') + if filename: + fnl = filename.lower() + if fnl.endswith((".pyc", ".pyo")): + filename = filename[:-1] + if filename: + # If there is no filename at all, the user is most likely in a REPL, + # and the warning is not necessary. + values = dict( + filename=filename, + line_number=line_number, + parser=builder.NAME, + markup_type=markup_type + ) + warnings.warn( + self.NO_PARSER_SPECIFIED_WARNING % values, + GuessedAtParserWarning, stacklevel=2 + ) + else: + if kwargs: + warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.") + self.builder = builder self.is_xml = builder.is_xml - self.builder.soup = self - + self.known_xml = self.is_xml + self._namespaces = dict() self.parse_only = parse_only + self.builder.initialize_soup(self) + if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() - elif len(markup) <= 256: + elif len(markup) <= 256 and ( + (isinstance(markup, bytes) and not b'<' in markup) + or (isinstance(markup, str) and not '<' in markup) + ): # Print out warnings for a couple beginner problems # involving passing non-markup to Beautiful Soup. # Beautiful Soup will still parse the input as markup, # just in case that's what the user really wants. - if (isinstance(markup, unicode) + if (isinstance(markup, str) and not os.path.supports_unicode_filenames): possible_filename = markup.encode("utf8") else: possible_filename = markup is_file = False + is_directory = False try: is_file = os.path.exists(possible_filename) - except Exception, e: + if is_file: + is_directory = os.path.isdir(possible_filename) + except Exception as e: # This is almost certainly a problem involving # characters not valid in filenames on this # system. Just let it go. pass - if is_file: - if isinstance(markup, unicode): - markup = markup.encode("utf8") + if is_directory: warnings.warn( - '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) - if markup[:5] == "http:" or markup[:6] == "https:": - # TODO: This is ugly but I couldn't get it to work in - # Python 3 otherwise. - if ((isinstance(markup, bytes) and not b' ' in markup) - or (isinstance(markup, unicode) and not u' ' in markup)): - if isinstance(markup, unicode): - markup = markup.encode("utf8") - warnings.warn( - '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) + '"%s" looks like a directory name, not markup. You may' + ' want to open a file found in this directory and pass' + ' the filehandle into Beautiful Soup.' % ( + self._decode_markup(markup) + ), + MarkupResemblesLocatorWarning + ) + elif is_file: + warnings.warn( + '"%s" looks like a filename, not markup. You should' + ' probably open this file and pass the filehandle into' + ' Beautiful Soup.' % self._decode_markup(markup), + MarkupResemblesLocatorWarning + ) + self._check_markup_is_url(markup) + rejections = [] + success = False for (self.markup, self.original_encoding, self.declared_html_encoding, self.contains_replacement_characters) in ( self.builder.prepare_markup( @@ -213,26 +360,88 @@ class BeautifulSoup(Tag): self.reset() try: self._feed() + success = True break - except ParserRejectedMarkup: + except ParserRejectedMarkup as e: + rejections.append(e) pass + if not success: + other_exceptions = [str(e) for e in rejections] + raise ParserRejectedMarkup( + "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions) + ) + # Clear out the markup and remove the builder's circular # reference to this object. self.markup = None self.builder.soup = None def __copy__(self): - return type(self)(self.encode(), builder=self.builder) + """Copy a BeautifulSoup object by converting the document to a string and parsing it again.""" + copy = type(self)( + self.encode('utf-8'), builder=self.builder, from_encoding='utf-8' + ) + + # Although we encoded the tree to UTF-8, that may not have + # been the encoding of the original markup. Set the copy's + # .original_encoding to reflect the original object's + # .original_encoding. + copy.original_encoding = self.original_encoding + return copy def __getstate__(self): # Frequently a tree builder can't be pickled. d = dict(self.__dict__) if 'builder' in d and not self.builder.picklable: - del d['builder'] + d['builder'] = None return d + @classmethod + def _decode_markup(cls, markup): + """Ensure `markup` is bytes so it's safe to send into warnings.warn. + + TODO: warnings.warn had this problem back in 2010 but it might not + anymore. + """ + if isinstance(markup, bytes): + decoded = markup.decode('utf-8', 'replace') + else: + decoded = markup + return decoded + + @classmethod + def _check_markup_is_url(cls, markup): + """Error-handling method to raise a warning if incoming markup looks + like a URL. + + :param markup: A string. + """ + if isinstance(markup, bytes): + space = b' ' + cant_start_with = (b"http:", b"https:") + elif isinstance(markup, str): + space = ' ' + cant_start_with = ("http:", "https:") + else: + return + + if any(markup.startswith(prefix) for prefix in cant_start_with): + if not space in markup: + warnings.warn( + '"%s" looks like a URL. Beautiful Soup is not an' + ' HTTP client. You should probably use an HTTP client like' + ' requests to get the document behind the URL, and feed' + ' that document to Beautiful Soup.' % cls._decode_markup( + markup + ), + MarkupResemblesLocatorWarning + ) + def _feed(self): + """Internal method that parses previously set markup, creating a large + number of Tag and NavigableString objects. + """ # Convert the document to Unicode. self.builder.reset() @@ -243,50 +452,112 @@ class BeautifulSoup(Tag): self.popTag() def reset(self): + """Reset this object to a state as though it had never parsed any + markup. + """ Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) self.hidden = 1 self.builder.reset() self.current_data = [] self.currentTag = None self.tagStack = [] + self.open_tag_counter = Counter() self.preserve_whitespace_tag_stack = [] + self.string_container_stack = [] self.pushTag(self) - def new_tag(self, name, namespace=None, nsprefix=None, **attrs): - """Create a new tag associated with this soup.""" - return Tag(None, self.builder, name, namespace, nsprefix, attrs) + def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, + sourceline=None, sourcepos=None, **kwattrs): + """Create a new Tag associated with this BeautifulSoup object. - def new_string(self, s, subclass=NavigableString): - """Create a new NavigableString associated with this soup.""" - return subclass(s) + :param name: The name of the new Tag. + :param namespace: The URI of the new Tag's XML namespace, if any. + :param prefix: The prefix for the new Tag's XML namespace, if any. + :param attrs: A dictionary of this Tag's attribute values; can + be used instead of `kwattrs` for attributes like 'class' + that are reserved words in Python. + :param sourceline: The line number where this tag was + (purportedly) found in its source document. + :param sourcepos: The character position within `sourceline` where this + tag was (purportedly) found. + :param kwattrs: Keyword arguments for the new Tag's attribute values. - def insert_before(self, successor): + """ + kwattrs.update(attrs) + return self.element_classes.get(Tag, Tag)( + None, self.builder, name, namespace, nsprefix, kwattrs, + sourceline=sourceline, sourcepos=sourcepos + ) + + def string_container(self, base_class=None): + container = base_class or NavigableString + + # There may be a general override of NavigableString. + container = self.element_classes.get( + container, container + ) + + # On top of that, we may be inside a tag that needs a special + # container class. + if self.string_container_stack and container is NavigableString: + container = self.builder.string_containers.get( + self.string_container_stack[-1].name, container + ) + return container + + def new_string(self, s, subclass=None): + """Create a new NavigableString associated with this BeautifulSoup + object. + """ + container = self.string_container(subclass) + return container(s) + + def insert_before(self, *args): + """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement + it because there is nothing before or after it in the parse tree. + """ raise NotImplementedError("BeautifulSoup objects don't support insert_before().") - def insert_after(self, successor): + def insert_after(self, *args): + """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement + it because there is nothing before or after it in the parse tree. + """ raise NotImplementedError("BeautifulSoup objects don't support insert_after().") def popTag(self): + """Internal method called by _popToTag when a tag is closed.""" tag = self.tagStack.pop() + if tag.name in self.open_tag_counter: + self.open_tag_counter[tag.name] -= 1 if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: self.preserve_whitespace_tag_stack.pop() - #print "Pop", tag.name + if self.string_container_stack and tag == self.string_container_stack[-1]: + self.string_container_stack.pop() + #print("Pop", tag.name) if self.tagStack: self.currentTag = self.tagStack[-1] return self.currentTag def pushTag(self, tag): - #print "Push", tag.name - if self.currentTag: + """Internal method called by handle_starttag when a tag is opened.""" + #print("Push", tag.name) + if self.currentTag is not None: self.currentTag.contents.append(tag) self.tagStack.append(tag) self.currentTag = self.tagStack[-1] + if tag.name != self.ROOT_TAG_NAME: + self.open_tag_counter[tag.name] += 1 if tag.name in self.builder.preserve_whitespace_tags: self.preserve_whitespace_tag_stack.append(tag) + if tag.name in self.builder.string_containers: + self.string_container_stack.append(tag) - def endData(self, containerClass=NavigableString): + def endData(self, containerClass=None): + """Method called by the TreeBuilder when the end of a data segment + occurs. + """ if self.current_data: - current_data = u''.join(self.current_data) + current_data = ''.join(self.current_data) # If whitespace is not preserved, and this string contains # nothing but ASCII spaces, replace it with a single space # or newline. @@ -311,61 +582,93 @@ class BeautifulSoup(Tag): not self.parse_only.search(current_data)): return + containerClass = self.string_container(containerClass) o = containerClass(current_data) self.object_was_parsed(o) def object_was_parsed(self, o, parent=None, most_recent_element=None): - """Add an object to the parse tree.""" - parent = parent or self.currentTag - previous_element = most_recent_element or self._most_recent_element + """Method called by the TreeBuilder to integrate an object into the parse tree.""" + if parent is None: + parent = self.currentTag + if most_recent_element is not None: + previous_element = most_recent_element + else: + previous_element = self._most_recent_element next_element = previous_sibling = next_sibling = None if isinstance(o, Tag): next_element = o.next_element next_sibling = o.next_sibling previous_sibling = o.previous_sibling - if not previous_element: + if previous_element is None: previous_element = o.previous_element + fix = parent.next_element is not None + o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) self._most_recent_element = o parent.contents.append(o) - if parent.next_sibling: - # This node is being inserted into an element that has - # already been parsed. Deal with any dangling references. - index = parent.contents.index(o) - if index == 0: - previous_element = parent - previous_sibling = None - else: - previous_element = previous_sibling = parent.contents[index-1] - if index == len(parent.contents)-1: - next_element = parent.next_sibling - next_sibling = None - else: - next_element = next_sibling = parent.contents[index+1] + # Check if we are inserting into an already parsed node. + if fix: + self._linkage_fixer(parent) - o.previous_element = previous_element - if previous_element: - previous_element.next_element = o - o.next_element = next_element - if next_element: - next_element.previous_element = o - o.next_sibling = next_sibling - if next_sibling: - next_sibling.previous_sibling = o - o.previous_sibling = previous_sibling - if previous_sibling: - previous_sibling.next_sibling = o + def _linkage_fixer(self, el): + """Make sure linkage of this fragment is sound.""" + + first = el.contents[0] + child = el.contents[-1] + descendant = child + + if child is first and el.parent is not None: + # Parent should be linked to first child + el.next_element = child + # We are no longer linked to whatever this element is + prev_el = child.previous_element + if prev_el is not None and prev_el is not el: + prev_el.next_element = None + # First child should be linked to the parent, and no previous siblings. + child.previous_element = el + child.previous_sibling = None + + # We have no sibling as we've been appended as the last. + child.next_sibling = None + + # This index is a tag, dig deeper for a "last descendant" + if isinstance(child, Tag) and child.contents: + descendant = child._last_descendant(False) + + # As the final step, link last descendant. It should be linked + # to the parent's next sibling (if found), else walk up the chain + # and find a parent with a sibling. It should have no next sibling. + descendant.next_element = None + descendant.next_sibling = None + target = el + while True: + if target is None: + break + elif target.next_sibling is not None: + descendant.next_element = target.next_sibling + target.next_sibling.previous_element = child + break + target = target.parent def _popToTag(self, name, nsprefix=None, inclusivePop=True): """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - #print "Popping to %s" % name + instance of the given tag. + + If there are no open tags with the given name, nothing will be + popped. + + :param name: Pop up to the most recent tag with this name. + :param nsprefix: The namespace prefix that goes with `name`. + :param inclusivePop: It this is false, pops the tag stack up + to but *not* including the most recent instqance of the + given tag. + + """ + #print("Popping to %s" % name) if name == self.ROOT_TAG_NAME: # The BeautifulSoup object itself can never be popped. return @@ -374,6 +677,8 @@ class BeautifulSoup(Tag): stack_size = len(self.tagStack) for i in range(stack_size - 1, 0, -1): + if not self.open_tag_counter.get(name): + break t = self.tagStack[i] if (name == t.name and nsprefix == t.prefix): if inclusivePop: @@ -383,16 +688,24 @@ class BeautifulSoup(Tag): return most_recently_popped - def handle_starttag(self, name, namespace, nsprefix, attrs): - """Push a start tag on to the stack. + def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None, + sourcepos=None): + """Called by the tree builder when a new tag is encountered. - If this method returns None, the tag was rejected by the - SoupStrainer. You should proceed as if the tag had not occured + :param name: Name of the tag. + :param nsprefix: Namespace prefix for the tag. + :param attrs: A dictionary of attribute values. + :param sourceline: The line number where this tag was found in its + source document. + :param sourcepos: The character position within `sourceline` where this + tag was found. + + If this method returns None, the tag was rejected by an active + SoupStrainer. You should proceed as if the tag had not occurred in the document. For instance, if this was a self-closing tag, don't call handle_endtag. """ - - # print "Start tag %s: %s" % (name, attrs) + # print("Start tag %s: %s" % (name, attrs)) self.endData() if (self.parse_only and len(self.tagStack) <= 1 @@ -400,38 +713,57 @@ class BeautifulSoup(Tag): or not self.parse_only.search_tag(name, attrs))): return None - tag = Tag(self, self.builder, name, namespace, nsprefix, attrs, - self.currentTag, self._most_recent_element) + tag = self.element_classes.get(Tag, Tag)( + self, self.builder, name, namespace, nsprefix, attrs, + self.currentTag, self._most_recent_element, + sourceline=sourceline, sourcepos=sourcepos + ) if tag is None: return tag - if self._most_recent_element: + if self._most_recent_element is not None: self._most_recent_element.next_element = tag self._most_recent_element = tag self.pushTag(tag) return tag def handle_endtag(self, name, nsprefix=None): - #print "End tag: " + name + """Called by the tree builder when an ending tag is encountered. + + :param name: Name of the tag. + :param nsprefix: Namespace prefix for the tag. + """ + #print("End tag: " + name) self.endData() self._popToTag(name, nsprefix) def handle_data(self, data): + """Called by the tree builder when a chunk of textual data is encountered.""" self.current_data.append(data) - + def decode(self, pretty_print=False, eventual_encoding=DEFAULT_OUTPUT_ENCODING, formatter="minimal"): - """Returns a string or Unicode representation of this document. - To get Unicode, pass None for encoding.""" + """Returns a string or Unicode representation of the parse tree + as an HTML or XML document. + :param pretty_print: If this is True, indentation will be used to + make the document more readable. + :param eventual_encoding: The encoding of the final document. + If this is None, the document will be a Unicode string. + """ if self.is_xml: # Print the XML declaration encoding_part = '' + if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS: + # This is a special Python encoding; it can't actually + # go into an XML document because it means nothing + # outside of Python. + eventual_encoding = None if eventual_encoding != None: encoding_part = ' encoding="%s"' % eventual_encoding - prefix = u'\n' % encoding_part + prefix = '\n' % encoding_part else: - prefix = u'' + prefix = '' if not pretty_print: indent_level = None else: @@ -439,7 +771,7 @@ class BeautifulSoup(Tag): return prefix + super(BeautifulSoup, self).decode( indent_level, eventual_encoding, formatter) -# Alias to make it easier to type import: 'from bs4 import _soup' +# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup' _s = BeautifulSoup _soup = BeautifulSoup @@ -455,14 +787,18 @@ class BeautifulStoneSoup(BeautifulSoup): class StopParsing(Exception): + """Exception raised by a TreeBuilder if it's unable to continue parsing.""" pass class FeatureNotFound(ValueError): + """Exception raised by the BeautifulSoup constructor if no parser with the + requested features is found. + """ pass -#By default, act as an HTML pretty-printer. +#If this file is run as a script, act as an HTML pretty-printer. if __name__ == '__main__': import sys soup = BeautifulSoup(sys.stdin) - print soup.prettify() + print((soup.prettify())) diff --git a/lib/bs4/builder/__init__.py b/lib/bs4/builder/__init__.py index f8fce568..bd44905e 100644 --- a/lib/bs4/builder/__init__.py +++ b/lib/bs4/builder/__init__.py @@ -1,11 +1,17 @@ +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + from collections import defaultdict import itertools import sys from bs4.element import ( CharsetMetaAttributeValue, ContentMetaAttributeValue, - whitespace_re - ) + Stylesheet, + Script, + TemplateString, + nonwhitespace_re +) __all__ = [ 'HTMLTreeBuilder', @@ -24,18 +30,33 @@ HTML_5 = 'html5' class TreeBuilderRegistry(object): - + """A way of looking up TreeBuilder subclasses by their name or by desired + features. + """ + def __init__(self): self.builders_for_feature = defaultdict(list) self.builders = [] def register(self, treebuilder_class): - """Register a treebuilder based on its advertised features.""" + """Register a treebuilder based on its advertised features. + + :param treebuilder_class: A subclass of Treebuilder. its .features + attribute should list its features. + """ for feature in treebuilder_class.features: self.builders_for_feature[feature].insert(0, treebuilder_class) self.builders.insert(0, treebuilder_class) def lookup(self, *features): + """Look up a TreeBuilder subclass with the desired features. + + :param features: A list of features to look for. If none are + provided, the most recently registered TreeBuilder subclass + will be used. + :return: A TreeBuilder subclass, or None if there's no + registered subclass with all the requested features. + """ if len(self.builders) == 0: # There are no builders at all. return None @@ -78,7 +99,7 @@ class TreeBuilderRegistry(object): builder_registry = TreeBuilderRegistry() class TreeBuilder(object): - """Turn a document into a Beautiful Soup object tree.""" + """Turn a textual document into a Beautiful Soup object tree.""" NAME = "[Unknown tree builder]" ALTERNATE_NAMES = [] @@ -86,19 +107,89 @@ class TreeBuilder(object): is_xml = False picklable = False - preserve_whitespace_tags = set() empty_element_tags = None # A tag will be considered an empty-element # tag when and only when it has no contents. - + # A value for these tag/attribute combinations is a space- or # comma-separated list of CDATA, rather than a single CDATA. - cdata_list_attributes = {} + DEFAULT_CDATA_LIST_ATTRIBUTES = {} + # Whitespace should be preserved inside these tags. + DEFAULT_PRESERVE_WHITESPACE_TAGS = set() - def __init__(self): + # The textual contents of tags with these names should be + # instantiated with some class other than NavigableString. + DEFAULT_STRING_CONTAINERS = {} + + USE_DEFAULT = object() + + # Most parsers don't keep track of line numbers. + TRACKS_LINE_NUMBERS = False + + def __init__(self, multi_valued_attributes=USE_DEFAULT, + preserve_whitespace_tags=USE_DEFAULT, + store_line_numbers=USE_DEFAULT, + string_containers=USE_DEFAULT, + ): + """Constructor. + + :param multi_valued_attributes: If this is set to None, the + TreeBuilder will not turn any values for attributes like + 'class' into lists. Setting this to a dictionary will + customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES + for an example. + + Internally, these are called "CDATA list attributes", but that + probably doesn't make sense to an end-user, so the argument name + is `multi_valued_attributes`. + + :param preserve_whitespace_tags: A list of tags to treat + the way
 tags are treated in HTML. Tags in this list
+         are immune from pretty-printing; their contents will always be
+         output as-is.
+
+        :param string_containers: A dictionary mapping tag names to
+        the classes that should be instantiated to contain the textual
+        contents of those tags. The default is to use NavigableString
+        for every tag, no matter what the name. You can override the
+        default by changing DEFAULT_STRING_CONTAINERS.
+
+        :param store_line_numbers: If the parser keeps track of the
+         line numbers and positions of the original markup, that
+         information will, by default, be stored in each corresponding
+         `Tag` object. You can turn this off by passing
+         store_line_numbers=False. If the parser you're using doesn't 
+         keep track of this information, then setting store_line_numbers=True
+         will do nothing.
+        """
         self.soup = None
+        if multi_valued_attributes is self.USE_DEFAULT:
+            multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
+        self.cdata_list_attributes = multi_valued_attributes
+        if preserve_whitespace_tags is self.USE_DEFAULT:
+            preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS
+        self.preserve_whitespace_tags = preserve_whitespace_tags
+        if store_line_numbers == self.USE_DEFAULT:
+            store_line_numbers = self.TRACKS_LINE_NUMBERS
+        self.store_line_numbers = store_line_numbers 
+        if string_containers == self.USE_DEFAULT:
+            string_containers = self.DEFAULT_STRING_CONTAINERS
+        self.string_containers = string_containers
+        
+    def initialize_soup(self, soup):
+        """The BeautifulSoup object has been initialized and is now
+        being associated with the TreeBuilder.
 
+        :param soup: A BeautifulSoup object.
+        """
+        self.soup = soup
+        
     def reset(self):
+        """Do any work necessary to reset the underlying parser
+        for a new document.
+
+        By default, this does nothing.
+        """
         pass
 
     def can_be_empty_element(self, tag_name):
@@ -110,24 +201,58 @@ class TreeBuilder(object):
         For instance: an HTMLBuilder does not consider a 

tag to be an empty-element tag (it's not in HTMLBuilder.empty_element_tags). This means an empty

tag - will be presented as "

", not "

". + will be presented as "

", not "

" or "

". The default implementation has no opinion about which tags are empty-element tags, so a tag will be presented as an - empty-element tag if and only if it has no contents. - "" will become "", and "bar" will + empty-element tag if and only if it has no children. + "" will become "", and "bar" will be left alone. + + :param tag_name: The name of a markup tag. """ if self.empty_element_tags is None: return True return tag_name in self.empty_element_tags - + def feed(self, markup): + """Run some incoming markup through some parsing process, + populating the `BeautifulSoup` object in self.soup. + + This method is not implemented in TreeBuilder; it must be + implemented in subclasses. + + :return: None. + """ raise NotImplementedError() def prepare_markup(self, markup, user_specified_encoding=None, - document_declared_encoding=None): - return markup, None, None, False + document_declared_encoding=None, exclude_encodings=None): + """Run any preliminary steps necessary to make incoming markup + acceptable to the parser. + + :param markup: Some markup -- probably a bytestring. + :param user_specified_encoding: The user asked to try this encoding. + :param document_declared_encoding: The markup itself claims to be + in this encoding. NOTE: This argument is not used by the + calling code and can probably be removed. + :param exclude_encodings: The user asked _not_ to try any of + these encodings. + + :yield: A series of 4-tuples: + (markup, encoding, declared encoding, + has undergone character replacement) + + Each 4-tuple represents a strategy for converting the + document to Unicode and parsing it. Each strategy will be tried + in turn. + + By default, the only strategy is to parse the markup + as-is. See `LXMLTreeBuilderForXML` and + `HTMLParserTreeBuilder` for implementations that take into + account the quirks of particular parsers. + """ + yield markup, None, None, False def test_fragment_to_document(self, fragment): """Wrap an HTML fragment to make it look like a document. @@ -139,16 +264,36 @@ class TreeBuilder(object): results against other HTML fragments. This method should not be used outside of tests. + + :param fragment: A string -- fragment of HTML. + :return: A string -- a full HTML document. """ return fragment def set_up_substitutions(self, tag): + """Set up any substitutions that will need to be performed on + a `Tag` when it's output as a string. + + By default, this does nothing. See `HTMLTreeBuilder` for a + case where this is used. + + :param tag: A `Tag` + :return: Whether or not a substitution was performed. + """ return False def _replace_cdata_list_attribute_values(self, tag_name, attrs): - """Replaces class="foo bar" with class=["foo", "bar"] + """When an attribute value is associated with a tag that can + have multiple values for that attribute, convert the string + value to a list of strings. - Modifies its input in place. + Basically, replaces class="foo bar" with class=["foo", "bar"] + + NOTE: This method modifies its input in place. + + :param tag_name: The name of a tag. + :param attrs: A dictionary containing the tag's attributes. + Any appropriate attribute values will be modified in place. """ if not attrs: return attrs @@ -156,14 +301,14 @@ class TreeBuilder(object): universal = self.cdata_list_attributes.get('*', []) tag_specific = self.cdata_list_attributes.get( tag_name.lower(), None) - for attr in attrs.keys(): + for attr in list(attrs.keys()): if attr in universal or (tag_specific and attr in tag_specific): # We have a "class"-type attribute whose string # value is a whitespace-separated list of # values. Split it into a list. value = attrs[attr] - if isinstance(value, basestring): - values = whitespace_re.split(value) + if isinstance(value, str): + values = nonwhitespace_re.findall(value) else: # html5lib sometimes calls setAttributes twice # for the same tag when rearranging the parse @@ -176,7 +321,11 @@ class TreeBuilder(object): return attrs class SAXTreeBuilder(TreeBuilder): - """A Beautiful Soup treebuilder that listens for SAX events.""" + """A Beautiful Soup treebuilder that listens for SAX events. + + This is not currently used for anything, but it demonstrates + how a simple TreeBuilder would work. + """ def feed(self, markup): raise NotImplementedError() @@ -186,11 +335,11 @@ class SAXTreeBuilder(TreeBuilder): def startElement(self, name, attrs): attrs = dict((key[1], value) for key, value in list(attrs.items())) - #print "Start %s, %r" % (name, attrs) + #print("Start %s, %r" % (name, attrs)) self.soup.handle_starttag(name, attrs) def endElement(self, name): - #print "End %s" % name + #print("End %s" % name) self.soup.handle_endtag(name) def startElementNS(self, nsTuple, nodeName, attrs): @@ -227,10 +376,36 @@ class HTMLTreeBuilder(TreeBuilder): Such as which tags are empty-element tags. """ - preserve_whitespace_tags = set(['pre', 'textarea']) - empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base']) + empty_element_tags = set([ + # These are from HTML5. + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', + + # These are from earlier versions of HTML and are removed in HTML5. + 'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer' + ]) + # The HTML standard defines these as block-level elements. Beautiful + # Soup does not treat these elements differently from other elements, + # but it may do so eventually, and this information is available if + # you need to use it. + block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]) + + # The HTML standard defines an unusual content model for these tags. + # We represent this by using a string class other than NavigableString + # inside these tags. + # + # I made this list by going through the HTML spec + # (https://html.spec.whatwg.org/#metadata-content) and looking for + # "metadata content" elements that can contain strings. + # + # TODO: Arguably

as a diff --git a/lib/bs4/builder/_lxml.py b/lib/bs4/builder/_lxml.py index 9e8f88fb..11c9a696 100644 --- a/lib/bs4/builder/_lxml.py +++ b/lib/bs4/builder/_lxml.py @@ -1,17 +1,25 @@ +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + __all__ = [ 'LXMLTreeBuilderForXML', 'LXMLTreeBuilder', ] +try: + from collections.abc import Callable # Python 3.6 +except ImportError as e: + from collections import Callable + from io import BytesIO -from StringIO import StringIO -import collections +from io import StringIO from lxml import etree from bs4.element import ( Comment, Doctype, NamespacedAttribute, ProcessingInstruction, + XMLProcessingInstruction, ) from bs4.builder import ( FAST, @@ -25,10 +33,15 @@ from bs4.dammit import EncodingDetector LXML = 'lxml' +def _invert(d): + "Invert a dictionary." + return dict((v,k) for k, v in list(d.items())) + class LXMLTreeBuilderForXML(TreeBuilder): DEFAULT_PARSER_CLASS = etree.XMLParser is_xml = True + processing_instruction_class = XMLProcessingInstruction NAME = "lxml-xml" ALTERNATE_NAMES = ["xml"] @@ -40,26 +53,70 @@ class LXMLTreeBuilderForXML(TreeBuilder): # This namespace mapping is specified in the XML Namespace # standard. - DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"} + DEFAULT_NSMAPS = dict(xml='http://www.w3.org/XML/1998/namespace') + + DEFAULT_NSMAPS_INVERTED = _invert(DEFAULT_NSMAPS) + + # NOTE: If we parsed Element objects and looked at .sourceline, + # we'd be able to see the line numbers from the original document. + # But instead we build an XMLParser or HTMLParser object to serve + # as the target of parse messages, and those messages don't include + # line numbers. + # See: https://bugs.launchpad.net/lxml/+bug/1846906 + + def initialize_soup(self, soup): + """Let the BeautifulSoup object know about the standard namespace + mapping. + + :param soup: A `BeautifulSoup`. + """ + super(LXMLTreeBuilderForXML, self).initialize_soup(soup) + self._register_namespaces(self.DEFAULT_NSMAPS) + + def _register_namespaces(self, mapping): + """Let the BeautifulSoup object know about namespaces encountered + while parsing the document. + + This might be useful later on when creating CSS selectors. + + :param mapping: A dictionary mapping namespace prefixes to URIs. + """ + for key, value in list(mapping.items()): + if key and key not in self.soup._namespaces: + # Let the BeautifulSoup object know about a new namespace. + # If there are multiple namespaces defined with the same + # prefix, the first one in the document takes precedence. + self.soup._namespaces[key] = value def default_parser(self, encoding): - # This can either return a parser object or a class, which - # will be instantiated with default arguments. + """Find the default parser for the given encoding. + + :param encoding: A string. + :return: Either a parser object or a class, which + will be instantiated with default arguments. + """ if self._default_parser is not None: return self._default_parser return etree.XMLParser( target=self, strip_cdata=False, recover=True, encoding=encoding) def parser_for(self, encoding): + """Instantiate an appropriate parser for the given encoding. + + :param encoding: A string. + :return: A parser object such as an `etree.XMLParser`. + """ # Use the default parser. parser = self.default_parser(encoding) - if isinstance(parser, collections.Callable): + if isinstance(parser, Callable): # Instantiate the parser with default arguments - parser = parser(target=self, strip_cdata=False, encoding=encoding) + parser = parser( + target=self, strip_cdata=False, recover=True, encoding=encoding + ) return parser - def __init__(self, parser=None, empty_element_tags=None): + def __init__(self, parser=None, empty_element_tags=None, **kwargs): # TODO: Issue a warning if parser is present but not a # callable, since that means there's no way to create new # parsers for different encodings. @@ -67,8 +124,9 @@ class LXMLTreeBuilderForXML(TreeBuilder): if empty_element_tags is not None: self.empty_element_tags = set(empty_element_tags) self.soup = None - self.nsmaps = [self.DEFAULT_NSMAPS] - + self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED] + super(LXMLTreeBuilderForXML, self).__init__(**kwargs) + def _getNsTag(self, tag): # Split the namespace URL out of a fully-qualified lxml tag # name. Copied from lxml's src/lxml/sax.py. @@ -80,39 +138,68 @@ class LXMLTreeBuilderForXML(TreeBuilder): def prepare_markup(self, markup, user_specified_encoding=None, exclude_encodings=None, document_declared_encoding=None): - """ - :yield: A series of 4-tuples. + """Run any preliminary steps necessary to make incoming markup + acceptable to the parser. + + lxml really wants to get a bytestring and convert it to + Unicode itself. So instead of using UnicodeDammit to convert + the bytestring to Unicode using different encodings, this + implementation uses EncodingDetector to iterate over the + encodings, and tell lxml to try to parse the document as each + one in turn. + + :param markup: Some markup -- hopefully a bytestring. + :param user_specified_encoding: The user asked to try this encoding. + :param document_declared_encoding: The markup itself claims to be + in this encoding. + :param exclude_encodings: The user asked _not_ to try any of + these encodings. + + :yield: A series of 4-tuples: (markup, encoding, declared encoding, has undergone character replacement) - Each 4-tuple represents a strategy for parsing the document. + Each 4-tuple represents a strategy for converting the + document to Unicode and parsing it. Each strategy will be tried + in turn. """ - if isinstance(markup, unicode): + is_html = not self.is_xml + if is_html: + self.processing_instruction_class = ProcessingInstruction + else: + self.processing_instruction_class = XMLProcessingInstruction + + if isinstance(markup, str): # We were given Unicode. Maybe lxml can parse Unicode on # this system? yield markup, None, document_declared_encoding, False - if isinstance(markup, unicode): + if isinstance(markup, str): # No, apparently not. Convert the Unicode to UTF-8 and # tell lxml to parse it as UTF-8. yield (markup.encode("utf8"), "utf8", document_declared_encoding, False) - # Instead of using UnicodeDammit to convert the bytestring to - # Unicode using different encodings, use EncodingDetector to - # iterate over the encodings, and tell lxml to try to parse - # the document as each one in turn. - is_html = not self.is_xml - try_encodings = [user_specified_encoding, document_declared_encoding] + # This was provided by the end-user; treat it as a known + # definite encoding per the algorithm laid out in the HTML5 + # spec. (See the EncodingDetector class for details.) + known_definite_encodings = [user_specified_encoding] + + # This was found in the document; treat it as a slightly lower-priority + # user encoding. + user_encodings = [document_declared_encoding] detector = EncodingDetector( - markup, try_encodings, is_html, exclude_encodings) + markup, known_definite_encodings=known_definite_encodings, + user_encodings=user_encodings, is_html=is_html, + exclude_encodings=exclude_encodings + ) for encoding in detector.encodings: yield (detector.markup, encoding, document_declared_encoding, False) def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) - elif isinstance(markup, unicode): + elif isinstance(markup, str): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, @@ -127,30 +214,36 @@ class LXMLTreeBuilderForXML(TreeBuilder): if len(data) != 0: self.parser.feed(data) self.parser.close() - except (UnicodeDecodeError, LookupError, etree.ParserError), e: - raise ParserRejectedMarkup(str(e)) + except (UnicodeDecodeError, LookupError, etree.ParserError) as e: + raise ParserRejectedMarkup(e) def close(self): - self.nsmaps = [self.DEFAULT_NSMAPS] + self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED] def start(self, name, attrs, nsmap={}): # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy. attrs = dict(attrs) nsprefix = None # Invert each namespace map as it comes in. - if len(self.nsmaps) > 1: - # There are no new namespaces for this tag, but - # non-default namespaces are in play, so we need a - # separate tag stack to know when they end. - self.nsmaps.append(None) + if len(nsmap) == 0 and len(self.nsmaps) > 1: + # There are no new namespaces for this tag, but + # non-default namespaces are in play, so we need a + # separate tag stack to know when they end. + self.nsmaps.append(None) elif len(nsmap) > 0: # A new namespace mapping has come into play. - inverted_nsmap = dict((value, key) for key, value in nsmap.items()) - self.nsmaps.append(inverted_nsmap) + + # First, Let the BeautifulSoup object know about it. + self._register_namespaces(nsmap) + + # Then, add it to our running list of inverted namespace + # mappings. + self.nsmaps.append(_invert(nsmap)) + # Also treat the namespace mapping as a set of attributes on the # tag, so we can recreate it later. attrs = attrs.copy() - for prefix, namespace in nsmap.items(): + for prefix, namespace in list(nsmap.items()): attribute = NamespacedAttribute( "xmlns", prefix, "http://www.w3.org/2000/xmlns/") attrs[attribute] = namespace @@ -159,7 +252,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): # from lxml with namespaces attached to their names, and # turn then into NamespacedAttribute objects. new_attrs = {} - for attr, value in attrs.items(): + for attr, value in list(attrs.items()): namespace, attr = self._getNsTag(attr) if namespace is None: new_attrs[attr] = value @@ -201,7 +294,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): def pi(self, target, data): self.soup.endData() self.soup.handle_data(target + ' ' + data) - self.soup.endData(ProcessingInstruction) + self.soup.endData(self.processing_instruction_class) def data(self, content): self.soup.handle_data(content) @@ -219,7 +312,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): def test_fragment_to_document(self, fragment): """See `TreeBuilder`.""" - return u'\n%s' % fragment + return '\n%s' % fragment class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): @@ -229,6 +322,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE] is_xml = False + processing_instruction_class = ProcessingInstruction def default_parser(self, encoding): return etree.HTMLParser @@ -239,10 +333,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): self.parser = self.parser_for(encoding) self.parser.feed(markup) self.parser.close() - except (UnicodeDecodeError, LookupError, etree.ParserError), e: - raise ParserRejectedMarkup(str(e)) + except (UnicodeDecodeError, LookupError, etree.ParserError) as e: + raise ParserRejectedMarkup(e) def test_fragment_to_document(self, fragment): """See `TreeBuilder`.""" - return u'%s' % fragment + return '%s' % fragment diff --git a/lib/bs4/dammit.py b/lib/bs4/dammit.py index 317ad6d7..e017408b 100644 --- a/lib/bs4/dammit.py +++ b/lib/bs4/dammit.py @@ -6,10 +6,12 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal Feed Parser. It works best on XML and HTML, but it does not rewrite the XML or HTML to reflect a new encoding; that's the tree builder's job. """ +# Use of this source code is governed by the MIT license. +__license__ = "MIT" -from pdb import set_trace +from html.entities import codepoint2name +from collections import defaultdict import codecs -from htmlentitydefs import codepoint2name import re import logging import string @@ -21,6 +23,8 @@ try: # PyPI package: cchardet import cchardet def chardet_dammit(s): + if isinstance(s, str): + return None return cchardet.detect(s)['encoding'] except ImportError: try: @@ -29,6 +33,8 @@ except ImportError: # PyPI package: chardet import chardet def chardet_dammit(s): + if isinstance(s, str): + return None return chardet.detect(s)['encoding'] #import chardet.constants #chardet.constants._debug = 1 @@ -38,36 +44,2390 @@ except ImportError: return None # Available from http://cjkpython.i18n.org/. +# +# TODO: This doesn't work anymore and the closest thing, iconv_codecs, +# is GPL-licensed. Check whether this is still necessary. try: import iconv_codec except ImportError: pass -xml_encoding_re = re.compile( - '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I) -html_meta_re = re.compile( - '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I) +# Build bytestring and Unicode versions of regular expressions for finding +# a declared encoding inside an XML or HTML document. +xml_encoding = '^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>' +html_meta = '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]' +encoding_res = dict() +encoding_res[bytes] = { + 'html' : re.compile(html_meta.encode("ascii"), re.I), + 'xml' : re.compile(xml_encoding.encode("ascii"), re.I), +} +encoding_res[str] = { + 'html' : re.compile(html_meta, re.I), + 'xml' : re.compile(xml_encoding, re.I) +} + +try: + from html.entities import html5 +except ImportError: + # This is a copy of html.entities.html5 from Python 3.9. There's + # no equivalent table in Python 2, so we'll just provide a copy + # here. + html5 = { + 'Aacute': '\xc1', + 'aacute': '\xe1', + 'Aacute;': '\xc1', + 'aacute;': '\xe1', + 'Abreve;': '\u0102', + 'abreve;': '\u0103', + 'ac;': '\u223e', + 'acd;': '\u223f', + 'acE;': '\u223e\u0333', + 'Acirc': '\xc2', + 'acirc': '\xe2', + 'Acirc;': '\xc2', + 'acirc;': '\xe2', + 'acute': '\xb4', + 'acute;': '\xb4', + 'Acy;': '\u0410', + 'acy;': '\u0430', + 'AElig': '\xc6', + 'aelig': '\xe6', + 'AElig;': '\xc6', + 'aelig;': '\xe6', + 'af;': '\u2061', + 'Afr;': '\U0001d504', + 'afr;': '\U0001d51e', + 'Agrave': '\xc0', + 'agrave': '\xe0', + 'Agrave;': '\xc0', + 'agrave;': '\xe0', + 'alefsym;': '\u2135', + 'aleph;': '\u2135', + 'Alpha;': '\u0391', + 'alpha;': '\u03b1', + 'Amacr;': '\u0100', + 'amacr;': '\u0101', + 'amalg;': '\u2a3f', + 'AMP': '&', + 'amp': '&', + 'AMP;': '&', + 'amp;': '&', + 'And;': '\u2a53', + 'and;': '\u2227', + 'andand;': '\u2a55', + 'andd;': '\u2a5c', + 'andslope;': '\u2a58', + 'andv;': '\u2a5a', + 'ang;': '\u2220', + 'ange;': '\u29a4', + 'angle;': '\u2220', + 'angmsd;': '\u2221', + 'angmsdaa;': '\u29a8', + 'angmsdab;': '\u29a9', + 'angmsdac;': '\u29aa', + 'angmsdad;': '\u29ab', + 'angmsdae;': '\u29ac', + 'angmsdaf;': '\u29ad', + 'angmsdag;': '\u29ae', + 'angmsdah;': '\u29af', + 'angrt;': '\u221f', + 'angrtvb;': '\u22be', + 'angrtvbd;': '\u299d', + 'angsph;': '\u2222', + 'angst;': '\xc5', + 'angzarr;': '\u237c', + 'Aogon;': '\u0104', + 'aogon;': '\u0105', + 'Aopf;': '\U0001d538', + 'aopf;': '\U0001d552', + 'ap;': '\u2248', + 'apacir;': '\u2a6f', + 'apE;': '\u2a70', + 'ape;': '\u224a', + 'apid;': '\u224b', + 'apos;': "'", + 'ApplyFunction;': '\u2061', + 'approx;': '\u2248', + 'approxeq;': '\u224a', + 'Aring': '\xc5', + 'aring': '\xe5', + 'Aring;': '\xc5', + 'aring;': '\xe5', + 'Ascr;': '\U0001d49c', + 'ascr;': '\U0001d4b6', + 'Assign;': '\u2254', + 'ast;': '*', + 'asymp;': '\u2248', + 'asympeq;': '\u224d', + 'Atilde': '\xc3', + 'atilde': '\xe3', + 'Atilde;': '\xc3', + 'atilde;': '\xe3', + 'Auml': '\xc4', + 'auml': '\xe4', + 'Auml;': '\xc4', + 'auml;': '\xe4', + 'awconint;': '\u2233', + 'awint;': '\u2a11', + 'backcong;': '\u224c', + 'backepsilon;': '\u03f6', + 'backprime;': '\u2035', + 'backsim;': '\u223d', + 'backsimeq;': '\u22cd', + 'Backslash;': '\u2216', + 'Barv;': '\u2ae7', + 'barvee;': '\u22bd', + 'Barwed;': '\u2306', + 'barwed;': '\u2305', + 'barwedge;': '\u2305', + 'bbrk;': '\u23b5', + 'bbrktbrk;': '\u23b6', + 'bcong;': '\u224c', + 'Bcy;': '\u0411', + 'bcy;': '\u0431', + 'bdquo;': '\u201e', + 'becaus;': '\u2235', + 'Because;': '\u2235', + 'because;': '\u2235', + 'bemptyv;': '\u29b0', + 'bepsi;': '\u03f6', + 'bernou;': '\u212c', + 'Bernoullis;': '\u212c', + 'Beta;': '\u0392', + 'beta;': '\u03b2', + 'beth;': '\u2136', + 'between;': '\u226c', + 'Bfr;': '\U0001d505', + 'bfr;': '\U0001d51f', + 'bigcap;': '\u22c2', + 'bigcirc;': '\u25ef', + 'bigcup;': '\u22c3', + 'bigodot;': '\u2a00', + 'bigoplus;': '\u2a01', + 'bigotimes;': '\u2a02', + 'bigsqcup;': '\u2a06', + 'bigstar;': '\u2605', + 'bigtriangledown;': '\u25bd', + 'bigtriangleup;': '\u25b3', + 'biguplus;': '\u2a04', + 'bigvee;': '\u22c1', + 'bigwedge;': '\u22c0', + 'bkarow;': '\u290d', + 'blacklozenge;': '\u29eb', + 'blacksquare;': '\u25aa', + 'blacktriangle;': '\u25b4', + 'blacktriangledown;': '\u25be', + 'blacktriangleleft;': '\u25c2', + 'blacktriangleright;': '\u25b8', + 'blank;': '\u2423', + 'blk12;': '\u2592', + 'blk14;': '\u2591', + 'blk34;': '\u2593', + 'block;': '\u2588', + 'bne;': '=\u20e5', + 'bnequiv;': '\u2261\u20e5', + 'bNot;': '\u2aed', + 'bnot;': '\u2310', + 'Bopf;': '\U0001d539', + 'bopf;': '\U0001d553', + 'bot;': '\u22a5', + 'bottom;': '\u22a5', + 'bowtie;': '\u22c8', + 'boxbox;': '\u29c9', + 'boxDL;': '\u2557', + 'boxDl;': '\u2556', + 'boxdL;': '\u2555', + 'boxdl;': '\u2510', + 'boxDR;': '\u2554', + 'boxDr;': '\u2553', + 'boxdR;': '\u2552', + 'boxdr;': '\u250c', + 'boxH;': '\u2550', + 'boxh;': '\u2500', + 'boxHD;': '\u2566', + 'boxHd;': '\u2564', + 'boxhD;': '\u2565', + 'boxhd;': '\u252c', + 'boxHU;': '\u2569', + 'boxHu;': '\u2567', + 'boxhU;': '\u2568', + 'boxhu;': '\u2534', + 'boxminus;': '\u229f', + 'boxplus;': '\u229e', + 'boxtimes;': '\u22a0', + 'boxUL;': '\u255d', + 'boxUl;': '\u255c', + 'boxuL;': '\u255b', + 'boxul;': '\u2518', + 'boxUR;': '\u255a', + 'boxUr;': '\u2559', + 'boxuR;': '\u2558', + 'boxur;': '\u2514', + 'boxV;': '\u2551', + 'boxv;': '\u2502', + 'boxVH;': '\u256c', + 'boxVh;': '\u256b', + 'boxvH;': '\u256a', + 'boxvh;': '\u253c', + 'boxVL;': '\u2563', + 'boxVl;': '\u2562', + 'boxvL;': '\u2561', + 'boxvl;': '\u2524', + 'boxVR;': '\u2560', + 'boxVr;': '\u255f', + 'boxvR;': '\u255e', + 'boxvr;': '\u251c', + 'bprime;': '\u2035', + 'Breve;': '\u02d8', + 'breve;': '\u02d8', + 'brvbar': '\xa6', + 'brvbar;': '\xa6', + 'Bscr;': '\u212c', + 'bscr;': '\U0001d4b7', + 'bsemi;': '\u204f', + 'bsim;': '\u223d', + 'bsime;': '\u22cd', + 'bsol;': '\\', + 'bsolb;': '\u29c5', + 'bsolhsub;': '\u27c8', + 'bull;': '\u2022', + 'bullet;': '\u2022', + 'bump;': '\u224e', + 'bumpE;': '\u2aae', + 'bumpe;': '\u224f', + 'Bumpeq;': '\u224e', + 'bumpeq;': '\u224f', + 'Cacute;': '\u0106', + 'cacute;': '\u0107', + 'Cap;': '\u22d2', + 'cap;': '\u2229', + 'capand;': '\u2a44', + 'capbrcup;': '\u2a49', + 'capcap;': '\u2a4b', + 'capcup;': '\u2a47', + 'capdot;': '\u2a40', + 'CapitalDifferentialD;': '\u2145', + 'caps;': '\u2229\ufe00', + 'caret;': '\u2041', + 'caron;': '\u02c7', + 'Cayleys;': '\u212d', + 'ccaps;': '\u2a4d', + 'Ccaron;': '\u010c', + 'ccaron;': '\u010d', + 'Ccedil': '\xc7', + 'ccedil': '\xe7', + 'Ccedil;': '\xc7', + 'ccedil;': '\xe7', + 'Ccirc;': '\u0108', + 'ccirc;': '\u0109', + 'Cconint;': '\u2230', + 'ccups;': '\u2a4c', + 'ccupssm;': '\u2a50', + 'Cdot;': '\u010a', + 'cdot;': '\u010b', + 'cedil': '\xb8', + 'cedil;': '\xb8', + 'Cedilla;': '\xb8', + 'cemptyv;': '\u29b2', + 'cent': '\xa2', + 'cent;': '\xa2', + 'CenterDot;': '\xb7', + 'centerdot;': '\xb7', + 'Cfr;': '\u212d', + 'cfr;': '\U0001d520', + 'CHcy;': '\u0427', + 'chcy;': '\u0447', + 'check;': '\u2713', + 'checkmark;': '\u2713', + 'Chi;': '\u03a7', + 'chi;': '\u03c7', + 'cir;': '\u25cb', + 'circ;': '\u02c6', + 'circeq;': '\u2257', + 'circlearrowleft;': '\u21ba', + 'circlearrowright;': '\u21bb', + 'circledast;': '\u229b', + 'circledcirc;': '\u229a', + 'circleddash;': '\u229d', + 'CircleDot;': '\u2299', + 'circledR;': '\xae', + 'circledS;': '\u24c8', + 'CircleMinus;': '\u2296', + 'CirclePlus;': '\u2295', + 'CircleTimes;': '\u2297', + 'cirE;': '\u29c3', + 'cire;': '\u2257', + 'cirfnint;': '\u2a10', + 'cirmid;': '\u2aef', + 'cirscir;': '\u29c2', + 'ClockwiseContourIntegral;': '\u2232', + 'CloseCurlyDoubleQuote;': '\u201d', + 'CloseCurlyQuote;': '\u2019', + 'clubs;': '\u2663', + 'clubsuit;': '\u2663', + 'Colon;': '\u2237', + 'colon;': ':', + 'Colone;': '\u2a74', + 'colone;': '\u2254', + 'coloneq;': '\u2254', + 'comma;': ',', + 'commat;': '@', + 'comp;': '\u2201', + 'compfn;': '\u2218', + 'complement;': '\u2201', + 'complexes;': '\u2102', + 'cong;': '\u2245', + 'congdot;': '\u2a6d', + 'Congruent;': '\u2261', + 'Conint;': '\u222f', + 'conint;': '\u222e', + 'ContourIntegral;': '\u222e', + 'Copf;': '\u2102', + 'copf;': '\U0001d554', + 'coprod;': '\u2210', + 'Coproduct;': '\u2210', + 'COPY': '\xa9', + 'copy': '\xa9', + 'COPY;': '\xa9', + 'copy;': '\xa9', + 'copysr;': '\u2117', + 'CounterClockwiseContourIntegral;': '\u2233', + 'crarr;': '\u21b5', + 'Cross;': '\u2a2f', + 'cross;': '\u2717', + 'Cscr;': '\U0001d49e', + 'cscr;': '\U0001d4b8', + 'csub;': '\u2acf', + 'csube;': '\u2ad1', + 'csup;': '\u2ad0', + 'csupe;': '\u2ad2', + 'ctdot;': '\u22ef', + 'cudarrl;': '\u2938', + 'cudarrr;': '\u2935', + 'cuepr;': '\u22de', + 'cuesc;': '\u22df', + 'cularr;': '\u21b6', + 'cularrp;': '\u293d', + 'Cup;': '\u22d3', + 'cup;': '\u222a', + 'cupbrcap;': '\u2a48', + 'CupCap;': '\u224d', + 'cupcap;': '\u2a46', + 'cupcup;': '\u2a4a', + 'cupdot;': '\u228d', + 'cupor;': '\u2a45', + 'cups;': '\u222a\ufe00', + 'curarr;': '\u21b7', + 'curarrm;': '\u293c', + 'curlyeqprec;': '\u22de', + 'curlyeqsucc;': '\u22df', + 'curlyvee;': '\u22ce', + 'curlywedge;': '\u22cf', + 'curren': '\xa4', + 'curren;': '\xa4', + 'curvearrowleft;': '\u21b6', + 'curvearrowright;': '\u21b7', + 'cuvee;': '\u22ce', + 'cuwed;': '\u22cf', + 'cwconint;': '\u2232', + 'cwint;': '\u2231', + 'cylcty;': '\u232d', + 'Dagger;': '\u2021', + 'dagger;': '\u2020', + 'daleth;': '\u2138', + 'Darr;': '\u21a1', + 'dArr;': '\u21d3', + 'darr;': '\u2193', + 'dash;': '\u2010', + 'Dashv;': '\u2ae4', + 'dashv;': '\u22a3', + 'dbkarow;': '\u290f', + 'dblac;': '\u02dd', + 'Dcaron;': '\u010e', + 'dcaron;': '\u010f', + 'Dcy;': '\u0414', + 'dcy;': '\u0434', + 'DD;': '\u2145', + 'dd;': '\u2146', + 'ddagger;': '\u2021', + 'ddarr;': '\u21ca', + 'DDotrahd;': '\u2911', + 'ddotseq;': '\u2a77', + 'deg': '\xb0', + 'deg;': '\xb0', + 'Del;': '\u2207', + 'Delta;': '\u0394', + 'delta;': '\u03b4', + 'demptyv;': '\u29b1', + 'dfisht;': '\u297f', + 'Dfr;': '\U0001d507', + 'dfr;': '\U0001d521', + 'dHar;': '\u2965', + 'dharl;': '\u21c3', + 'dharr;': '\u21c2', + 'DiacriticalAcute;': '\xb4', + 'DiacriticalDot;': '\u02d9', + 'DiacriticalDoubleAcute;': '\u02dd', + 'DiacriticalGrave;': '`', + 'DiacriticalTilde;': '\u02dc', + 'diam;': '\u22c4', + 'Diamond;': '\u22c4', + 'diamond;': '\u22c4', + 'diamondsuit;': '\u2666', + 'diams;': '\u2666', + 'die;': '\xa8', + 'DifferentialD;': '\u2146', + 'digamma;': '\u03dd', + 'disin;': '\u22f2', + 'div;': '\xf7', + 'divide': '\xf7', + 'divide;': '\xf7', + 'divideontimes;': '\u22c7', + 'divonx;': '\u22c7', + 'DJcy;': '\u0402', + 'djcy;': '\u0452', + 'dlcorn;': '\u231e', + 'dlcrop;': '\u230d', + 'dollar;': '$', + 'Dopf;': '\U0001d53b', + 'dopf;': '\U0001d555', + 'Dot;': '\xa8', + 'dot;': '\u02d9', + 'DotDot;': '\u20dc', + 'doteq;': '\u2250', + 'doteqdot;': '\u2251', + 'DotEqual;': '\u2250', + 'dotminus;': '\u2238', + 'dotplus;': '\u2214', + 'dotsquare;': '\u22a1', + 'doublebarwedge;': '\u2306', + 'DoubleContourIntegral;': '\u222f', + 'DoubleDot;': '\xa8', + 'DoubleDownArrow;': '\u21d3', + 'DoubleLeftArrow;': '\u21d0', + 'DoubleLeftRightArrow;': '\u21d4', + 'DoubleLeftTee;': '\u2ae4', + 'DoubleLongLeftArrow;': '\u27f8', + 'DoubleLongLeftRightArrow;': '\u27fa', + 'DoubleLongRightArrow;': '\u27f9', + 'DoubleRightArrow;': '\u21d2', + 'DoubleRightTee;': '\u22a8', + 'DoubleUpArrow;': '\u21d1', + 'DoubleUpDownArrow;': '\u21d5', + 'DoubleVerticalBar;': '\u2225', + 'DownArrow;': '\u2193', + 'Downarrow;': '\u21d3', + 'downarrow;': '\u2193', + 'DownArrowBar;': '\u2913', + 'DownArrowUpArrow;': '\u21f5', + 'DownBreve;': '\u0311', + 'downdownarrows;': '\u21ca', + 'downharpoonleft;': '\u21c3', + 'downharpoonright;': '\u21c2', + 'DownLeftRightVector;': '\u2950', + 'DownLeftTeeVector;': '\u295e', + 'DownLeftVector;': '\u21bd', + 'DownLeftVectorBar;': '\u2956', + 'DownRightTeeVector;': '\u295f', + 'DownRightVector;': '\u21c1', + 'DownRightVectorBar;': '\u2957', + 'DownTee;': '\u22a4', + 'DownTeeArrow;': '\u21a7', + 'drbkarow;': '\u2910', + 'drcorn;': '\u231f', + 'drcrop;': '\u230c', + 'Dscr;': '\U0001d49f', + 'dscr;': '\U0001d4b9', + 'DScy;': '\u0405', + 'dscy;': '\u0455', + 'dsol;': '\u29f6', + 'Dstrok;': '\u0110', + 'dstrok;': '\u0111', + 'dtdot;': '\u22f1', + 'dtri;': '\u25bf', + 'dtrif;': '\u25be', + 'duarr;': '\u21f5', + 'duhar;': '\u296f', + 'dwangle;': '\u29a6', + 'DZcy;': '\u040f', + 'dzcy;': '\u045f', + 'dzigrarr;': '\u27ff', + 'Eacute': '\xc9', + 'eacute': '\xe9', + 'Eacute;': '\xc9', + 'eacute;': '\xe9', + 'easter;': '\u2a6e', + 'Ecaron;': '\u011a', + 'ecaron;': '\u011b', + 'ecir;': '\u2256', + 'Ecirc': '\xca', + 'ecirc': '\xea', + 'Ecirc;': '\xca', + 'ecirc;': '\xea', + 'ecolon;': '\u2255', + 'Ecy;': '\u042d', + 'ecy;': '\u044d', + 'eDDot;': '\u2a77', + 'Edot;': '\u0116', + 'eDot;': '\u2251', + 'edot;': '\u0117', + 'ee;': '\u2147', + 'efDot;': '\u2252', + 'Efr;': '\U0001d508', + 'efr;': '\U0001d522', + 'eg;': '\u2a9a', + 'Egrave': '\xc8', + 'egrave': '\xe8', + 'Egrave;': '\xc8', + 'egrave;': '\xe8', + 'egs;': '\u2a96', + 'egsdot;': '\u2a98', + 'el;': '\u2a99', + 'Element;': '\u2208', + 'elinters;': '\u23e7', + 'ell;': '\u2113', + 'els;': '\u2a95', + 'elsdot;': '\u2a97', + 'Emacr;': '\u0112', + 'emacr;': '\u0113', + 'empty;': '\u2205', + 'emptyset;': '\u2205', + 'EmptySmallSquare;': '\u25fb', + 'emptyv;': '\u2205', + 'EmptyVerySmallSquare;': '\u25ab', + 'emsp13;': '\u2004', + 'emsp14;': '\u2005', + 'emsp;': '\u2003', + 'ENG;': '\u014a', + 'eng;': '\u014b', + 'ensp;': '\u2002', + 'Eogon;': '\u0118', + 'eogon;': '\u0119', + 'Eopf;': '\U0001d53c', + 'eopf;': '\U0001d556', + 'epar;': '\u22d5', + 'eparsl;': '\u29e3', + 'eplus;': '\u2a71', + 'epsi;': '\u03b5', + 'Epsilon;': '\u0395', + 'epsilon;': '\u03b5', + 'epsiv;': '\u03f5', + 'eqcirc;': '\u2256', + 'eqcolon;': '\u2255', + 'eqsim;': '\u2242', + 'eqslantgtr;': '\u2a96', + 'eqslantless;': '\u2a95', + 'Equal;': '\u2a75', + 'equals;': '=', + 'EqualTilde;': '\u2242', + 'equest;': '\u225f', + 'Equilibrium;': '\u21cc', + 'equiv;': '\u2261', + 'equivDD;': '\u2a78', + 'eqvparsl;': '\u29e5', + 'erarr;': '\u2971', + 'erDot;': '\u2253', + 'Escr;': '\u2130', + 'escr;': '\u212f', + 'esdot;': '\u2250', + 'Esim;': '\u2a73', + 'esim;': '\u2242', + 'Eta;': '\u0397', + 'eta;': '\u03b7', + 'ETH': '\xd0', + 'eth': '\xf0', + 'ETH;': '\xd0', + 'eth;': '\xf0', + 'Euml': '\xcb', + 'euml': '\xeb', + 'Euml;': '\xcb', + 'euml;': '\xeb', + 'euro;': '\u20ac', + 'excl;': '!', + 'exist;': '\u2203', + 'Exists;': '\u2203', + 'expectation;': '\u2130', + 'ExponentialE;': '\u2147', + 'exponentiale;': '\u2147', + 'fallingdotseq;': '\u2252', + 'Fcy;': '\u0424', + 'fcy;': '\u0444', + 'female;': '\u2640', + 'ffilig;': '\ufb03', + 'fflig;': '\ufb00', + 'ffllig;': '\ufb04', + 'Ffr;': '\U0001d509', + 'ffr;': '\U0001d523', + 'filig;': '\ufb01', + 'FilledSmallSquare;': '\u25fc', + 'FilledVerySmallSquare;': '\u25aa', + 'fjlig;': 'fj', + 'flat;': '\u266d', + 'fllig;': '\ufb02', + 'fltns;': '\u25b1', + 'fnof;': '\u0192', + 'Fopf;': '\U0001d53d', + 'fopf;': '\U0001d557', + 'ForAll;': '\u2200', + 'forall;': '\u2200', + 'fork;': '\u22d4', + 'forkv;': '\u2ad9', + 'Fouriertrf;': '\u2131', + 'fpartint;': '\u2a0d', + 'frac12': '\xbd', + 'frac12;': '\xbd', + 'frac13;': '\u2153', + 'frac14': '\xbc', + 'frac14;': '\xbc', + 'frac15;': '\u2155', + 'frac16;': '\u2159', + 'frac18;': '\u215b', + 'frac23;': '\u2154', + 'frac25;': '\u2156', + 'frac34': '\xbe', + 'frac34;': '\xbe', + 'frac35;': '\u2157', + 'frac38;': '\u215c', + 'frac45;': '\u2158', + 'frac56;': '\u215a', + 'frac58;': '\u215d', + 'frac78;': '\u215e', + 'frasl;': '\u2044', + 'frown;': '\u2322', + 'Fscr;': '\u2131', + 'fscr;': '\U0001d4bb', + 'gacute;': '\u01f5', + 'Gamma;': '\u0393', + 'gamma;': '\u03b3', + 'Gammad;': '\u03dc', + 'gammad;': '\u03dd', + 'gap;': '\u2a86', + 'Gbreve;': '\u011e', + 'gbreve;': '\u011f', + 'Gcedil;': '\u0122', + 'Gcirc;': '\u011c', + 'gcirc;': '\u011d', + 'Gcy;': '\u0413', + 'gcy;': '\u0433', + 'Gdot;': '\u0120', + 'gdot;': '\u0121', + 'gE;': '\u2267', + 'ge;': '\u2265', + 'gEl;': '\u2a8c', + 'gel;': '\u22db', + 'geq;': '\u2265', + 'geqq;': '\u2267', + 'geqslant;': '\u2a7e', + 'ges;': '\u2a7e', + 'gescc;': '\u2aa9', + 'gesdot;': '\u2a80', + 'gesdoto;': '\u2a82', + 'gesdotol;': '\u2a84', + 'gesl;': '\u22db\ufe00', + 'gesles;': '\u2a94', + 'Gfr;': '\U0001d50a', + 'gfr;': '\U0001d524', + 'Gg;': '\u22d9', + 'gg;': '\u226b', + 'ggg;': '\u22d9', + 'gimel;': '\u2137', + 'GJcy;': '\u0403', + 'gjcy;': '\u0453', + 'gl;': '\u2277', + 'gla;': '\u2aa5', + 'glE;': '\u2a92', + 'glj;': '\u2aa4', + 'gnap;': '\u2a8a', + 'gnapprox;': '\u2a8a', + 'gnE;': '\u2269', + 'gne;': '\u2a88', + 'gneq;': '\u2a88', + 'gneqq;': '\u2269', + 'gnsim;': '\u22e7', + 'Gopf;': '\U0001d53e', + 'gopf;': '\U0001d558', + 'grave;': '`', + 'GreaterEqual;': '\u2265', + 'GreaterEqualLess;': '\u22db', + 'GreaterFullEqual;': '\u2267', + 'GreaterGreater;': '\u2aa2', + 'GreaterLess;': '\u2277', + 'GreaterSlantEqual;': '\u2a7e', + 'GreaterTilde;': '\u2273', + 'Gscr;': '\U0001d4a2', + 'gscr;': '\u210a', + 'gsim;': '\u2273', + 'gsime;': '\u2a8e', + 'gsiml;': '\u2a90', + 'GT': '>', + 'gt': '>', + 'GT;': '>', + 'Gt;': '\u226b', + 'gt;': '>', + 'gtcc;': '\u2aa7', + 'gtcir;': '\u2a7a', + 'gtdot;': '\u22d7', + 'gtlPar;': '\u2995', + 'gtquest;': '\u2a7c', + 'gtrapprox;': '\u2a86', + 'gtrarr;': '\u2978', + 'gtrdot;': '\u22d7', + 'gtreqless;': '\u22db', + 'gtreqqless;': '\u2a8c', + 'gtrless;': '\u2277', + 'gtrsim;': '\u2273', + 'gvertneqq;': '\u2269\ufe00', + 'gvnE;': '\u2269\ufe00', + 'Hacek;': '\u02c7', + 'hairsp;': '\u200a', + 'half;': '\xbd', + 'hamilt;': '\u210b', + 'HARDcy;': '\u042a', + 'hardcy;': '\u044a', + 'hArr;': '\u21d4', + 'harr;': '\u2194', + 'harrcir;': '\u2948', + 'harrw;': '\u21ad', + 'Hat;': '^', + 'hbar;': '\u210f', + 'Hcirc;': '\u0124', + 'hcirc;': '\u0125', + 'hearts;': '\u2665', + 'heartsuit;': '\u2665', + 'hellip;': '\u2026', + 'hercon;': '\u22b9', + 'Hfr;': '\u210c', + 'hfr;': '\U0001d525', + 'HilbertSpace;': '\u210b', + 'hksearow;': '\u2925', + 'hkswarow;': '\u2926', + 'hoarr;': '\u21ff', + 'homtht;': '\u223b', + 'hookleftarrow;': '\u21a9', + 'hookrightarrow;': '\u21aa', + 'Hopf;': '\u210d', + 'hopf;': '\U0001d559', + 'horbar;': '\u2015', + 'HorizontalLine;': '\u2500', + 'Hscr;': '\u210b', + 'hscr;': '\U0001d4bd', + 'hslash;': '\u210f', + 'Hstrok;': '\u0126', + 'hstrok;': '\u0127', + 'HumpDownHump;': '\u224e', + 'HumpEqual;': '\u224f', + 'hybull;': '\u2043', + 'hyphen;': '\u2010', + 'Iacute': '\xcd', + 'iacute': '\xed', + 'Iacute;': '\xcd', + 'iacute;': '\xed', + 'ic;': '\u2063', + 'Icirc': '\xce', + 'icirc': '\xee', + 'Icirc;': '\xce', + 'icirc;': '\xee', + 'Icy;': '\u0418', + 'icy;': '\u0438', + 'Idot;': '\u0130', + 'IEcy;': '\u0415', + 'iecy;': '\u0435', + 'iexcl': '\xa1', + 'iexcl;': '\xa1', + 'iff;': '\u21d4', + 'Ifr;': '\u2111', + 'ifr;': '\U0001d526', + 'Igrave': '\xcc', + 'igrave': '\xec', + 'Igrave;': '\xcc', + 'igrave;': '\xec', + 'ii;': '\u2148', + 'iiiint;': '\u2a0c', + 'iiint;': '\u222d', + 'iinfin;': '\u29dc', + 'iiota;': '\u2129', + 'IJlig;': '\u0132', + 'ijlig;': '\u0133', + 'Im;': '\u2111', + 'Imacr;': '\u012a', + 'imacr;': '\u012b', + 'image;': '\u2111', + 'ImaginaryI;': '\u2148', + 'imagline;': '\u2110', + 'imagpart;': '\u2111', + 'imath;': '\u0131', + 'imof;': '\u22b7', + 'imped;': '\u01b5', + 'Implies;': '\u21d2', + 'in;': '\u2208', + 'incare;': '\u2105', + 'infin;': '\u221e', + 'infintie;': '\u29dd', + 'inodot;': '\u0131', + 'Int;': '\u222c', + 'int;': '\u222b', + 'intcal;': '\u22ba', + 'integers;': '\u2124', + 'Integral;': '\u222b', + 'intercal;': '\u22ba', + 'Intersection;': '\u22c2', + 'intlarhk;': '\u2a17', + 'intprod;': '\u2a3c', + 'InvisibleComma;': '\u2063', + 'InvisibleTimes;': '\u2062', + 'IOcy;': '\u0401', + 'iocy;': '\u0451', + 'Iogon;': '\u012e', + 'iogon;': '\u012f', + 'Iopf;': '\U0001d540', + 'iopf;': '\U0001d55a', + 'Iota;': '\u0399', + 'iota;': '\u03b9', + 'iprod;': '\u2a3c', + 'iquest': '\xbf', + 'iquest;': '\xbf', + 'Iscr;': '\u2110', + 'iscr;': '\U0001d4be', + 'isin;': '\u2208', + 'isindot;': '\u22f5', + 'isinE;': '\u22f9', + 'isins;': '\u22f4', + 'isinsv;': '\u22f3', + 'isinv;': '\u2208', + 'it;': '\u2062', + 'Itilde;': '\u0128', + 'itilde;': '\u0129', + 'Iukcy;': '\u0406', + 'iukcy;': '\u0456', + 'Iuml': '\xcf', + 'iuml': '\xef', + 'Iuml;': '\xcf', + 'iuml;': '\xef', + 'Jcirc;': '\u0134', + 'jcirc;': '\u0135', + 'Jcy;': '\u0419', + 'jcy;': '\u0439', + 'Jfr;': '\U0001d50d', + 'jfr;': '\U0001d527', + 'jmath;': '\u0237', + 'Jopf;': '\U0001d541', + 'jopf;': '\U0001d55b', + 'Jscr;': '\U0001d4a5', + 'jscr;': '\U0001d4bf', + 'Jsercy;': '\u0408', + 'jsercy;': '\u0458', + 'Jukcy;': '\u0404', + 'jukcy;': '\u0454', + 'Kappa;': '\u039a', + 'kappa;': '\u03ba', + 'kappav;': '\u03f0', + 'Kcedil;': '\u0136', + 'kcedil;': '\u0137', + 'Kcy;': '\u041a', + 'kcy;': '\u043a', + 'Kfr;': '\U0001d50e', + 'kfr;': '\U0001d528', + 'kgreen;': '\u0138', + 'KHcy;': '\u0425', + 'khcy;': '\u0445', + 'KJcy;': '\u040c', + 'kjcy;': '\u045c', + 'Kopf;': '\U0001d542', + 'kopf;': '\U0001d55c', + 'Kscr;': '\U0001d4a6', + 'kscr;': '\U0001d4c0', + 'lAarr;': '\u21da', + 'Lacute;': '\u0139', + 'lacute;': '\u013a', + 'laemptyv;': '\u29b4', + 'lagran;': '\u2112', + 'Lambda;': '\u039b', + 'lambda;': '\u03bb', + 'Lang;': '\u27ea', + 'lang;': '\u27e8', + 'langd;': '\u2991', + 'langle;': '\u27e8', + 'lap;': '\u2a85', + 'Laplacetrf;': '\u2112', + 'laquo': '\xab', + 'laquo;': '\xab', + 'Larr;': '\u219e', + 'lArr;': '\u21d0', + 'larr;': '\u2190', + 'larrb;': '\u21e4', + 'larrbfs;': '\u291f', + 'larrfs;': '\u291d', + 'larrhk;': '\u21a9', + 'larrlp;': '\u21ab', + 'larrpl;': '\u2939', + 'larrsim;': '\u2973', + 'larrtl;': '\u21a2', + 'lat;': '\u2aab', + 'lAtail;': '\u291b', + 'latail;': '\u2919', + 'late;': '\u2aad', + 'lates;': '\u2aad\ufe00', + 'lBarr;': '\u290e', + 'lbarr;': '\u290c', + 'lbbrk;': '\u2772', + 'lbrace;': '{', + 'lbrack;': '[', + 'lbrke;': '\u298b', + 'lbrksld;': '\u298f', + 'lbrkslu;': '\u298d', + 'Lcaron;': '\u013d', + 'lcaron;': '\u013e', + 'Lcedil;': '\u013b', + 'lcedil;': '\u013c', + 'lceil;': '\u2308', + 'lcub;': '{', + 'Lcy;': '\u041b', + 'lcy;': '\u043b', + 'ldca;': '\u2936', + 'ldquo;': '\u201c', + 'ldquor;': '\u201e', + 'ldrdhar;': '\u2967', + 'ldrushar;': '\u294b', + 'ldsh;': '\u21b2', + 'lE;': '\u2266', + 'le;': '\u2264', + 'LeftAngleBracket;': '\u27e8', + 'LeftArrow;': '\u2190', + 'Leftarrow;': '\u21d0', + 'leftarrow;': '\u2190', + 'LeftArrowBar;': '\u21e4', + 'LeftArrowRightArrow;': '\u21c6', + 'leftarrowtail;': '\u21a2', + 'LeftCeiling;': '\u2308', + 'LeftDoubleBracket;': '\u27e6', + 'LeftDownTeeVector;': '\u2961', + 'LeftDownVector;': '\u21c3', + 'LeftDownVectorBar;': '\u2959', + 'LeftFloor;': '\u230a', + 'leftharpoondown;': '\u21bd', + 'leftharpoonup;': '\u21bc', + 'leftleftarrows;': '\u21c7', + 'LeftRightArrow;': '\u2194', + 'Leftrightarrow;': '\u21d4', + 'leftrightarrow;': '\u2194', + 'leftrightarrows;': '\u21c6', + 'leftrightharpoons;': '\u21cb', + 'leftrightsquigarrow;': '\u21ad', + 'LeftRightVector;': '\u294e', + 'LeftTee;': '\u22a3', + 'LeftTeeArrow;': '\u21a4', + 'LeftTeeVector;': '\u295a', + 'leftthreetimes;': '\u22cb', + 'LeftTriangle;': '\u22b2', + 'LeftTriangleBar;': '\u29cf', + 'LeftTriangleEqual;': '\u22b4', + 'LeftUpDownVector;': '\u2951', + 'LeftUpTeeVector;': '\u2960', + 'LeftUpVector;': '\u21bf', + 'LeftUpVectorBar;': '\u2958', + 'LeftVector;': '\u21bc', + 'LeftVectorBar;': '\u2952', + 'lEg;': '\u2a8b', + 'leg;': '\u22da', + 'leq;': '\u2264', + 'leqq;': '\u2266', + 'leqslant;': '\u2a7d', + 'les;': '\u2a7d', + 'lescc;': '\u2aa8', + 'lesdot;': '\u2a7f', + 'lesdoto;': '\u2a81', + 'lesdotor;': '\u2a83', + 'lesg;': '\u22da\ufe00', + 'lesges;': '\u2a93', + 'lessapprox;': '\u2a85', + 'lessdot;': '\u22d6', + 'lesseqgtr;': '\u22da', + 'lesseqqgtr;': '\u2a8b', + 'LessEqualGreater;': '\u22da', + 'LessFullEqual;': '\u2266', + 'LessGreater;': '\u2276', + 'lessgtr;': '\u2276', + 'LessLess;': '\u2aa1', + 'lesssim;': '\u2272', + 'LessSlantEqual;': '\u2a7d', + 'LessTilde;': '\u2272', + 'lfisht;': '\u297c', + 'lfloor;': '\u230a', + 'Lfr;': '\U0001d50f', + 'lfr;': '\U0001d529', + 'lg;': '\u2276', + 'lgE;': '\u2a91', + 'lHar;': '\u2962', + 'lhard;': '\u21bd', + 'lharu;': '\u21bc', + 'lharul;': '\u296a', + 'lhblk;': '\u2584', + 'LJcy;': '\u0409', + 'ljcy;': '\u0459', + 'Ll;': '\u22d8', + 'll;': '\u226a', + 'llarr;': '\u21c7', + 'llcorner;': '\u231e', + 'Lleftarrow;': '\u21da', + 'llhard;': '\u296b', + 'lltri;': '\u25fa', + 'Lmidot;': '\u013f', + 'lmidot;': '\u0140', + 'lmoust;': '\u23b0', + 'lmoustache;': '\u23b0', + 'lnap;': '\u2a89', + 'lnapprox;': '\u2a89', + 'lnE;': '\u2268', + 'lne;': '\u2a87', + 'lneq;': '\u2a87', + 'lneqq;': '\u2268', + 'lnsim;': '\u22e6', + 'loang;': '\u27ec', + 'loarr;': '\u21fd', + 'lobrk;': '\u27e6', + 'LongLeftArrow;': '\u27f5', + 'Longleftarrow;': '\u27f8', + 'longleftarrow;': '\u27f5', + 'LongLeftRightArrow;': '\u27f7', + 'Longleftrightarrow;': '\u27fa', + 'longleftrightarrow;': '\u27f7', + 'longmapsto;': '\u27fc', + 'LongRightArrow;': '\u27f6', + 'Longrightarrow;': '\u27f9', + 'longrightarrow;': '\u27f6', + 'looparrowleft;': '\u21ab', + 'looparrowright;': '\u21ac', + 'lopar;': '\u2985', + 'Lopf;': '\U0001d543', + 'lopf;': '\U0001d55d', + 'loplus;': '\u2a2d', + 'lotimes;': '\u2a34', + 'lowast;': '\u2217', + 'lowbar;': '_', + 'LowerLeftArrow;': '\u2199', + 'LowerRightArrow;': '\u2198', + 'loz;': '\u25ca', + 'lozenge;': '\u25ca', + 'lozf;': '\u29eb', + 'lpar;': '(', + 'lparlt;': '\u2993', + 'lrarr;': '\u21c6', + 'lrcorner;': '\u231f', + 'lrhar;': '\u21cb', + 'lrhard;': '\u296d', + 'lrm;': '\u200e', + 'lrtri;': '\u22bf', + 'lsaquo;': '\u2039', + 'Lscr;': '\u2112', + 'lscr;': '\U0001d4c1', + 'Lsh;': '\u21b0', + 'lsh;': '\u21b0', + 'lsim;': '\u2272', + 'lsime;': '\u2a8d', + 'lsimg;': '\u2a8f', + 'lsqb;': '[', + 'lsquo;': '\u2018', + 'lsquor;': '\u201a', + 'Lstrok;': '\u0141', + 'lstrok;': '\u0142', + 'LT': '<', + 'lt': '<', + 'LT;': '<', + 'Lt;': '\u226a', + 'lt;': '<', + 'ltcc;': '\u2aa6', + 'ltcir;': '\u2a79', + 'ltdot;': '\u22d6', + 'lthree;': '\u22cb', + 'ltimes;': '\u22c9', + 'ltlarr;': '\u2976', + 'ltquest;': '\u2a7b', + 'ltri;': '\u25c3', + 'ltrie;': '\u22b4', + 'ltrif;': '\u25c2', + 'ltrPar;': '\u2996', + 'lurdshar;': '\u294a', + 'luruhar;': '\u2966', + 'lvertneqq;': '\u2268\ufe00', + 'lvnE;': '\u2268\ufe00', + 'macr': '\xaf', + 'macr;': '\xaf', + 'male;': '\u2642', + 'malt;': '\u2720', + 'maltese;': '\u2720', + 'Map;': '\u2905', + 'map;': '\u21a6', + 'mapsto;': '\u21a6', + 'mapstodown;': '\u21a7', + 'mapstoleft;': '\u21a4', + 'mapstoup;': '\u21a5', + 'marker;': '\u25ae', + 'mcomma;': '\u2a29', + 'Mcy;': '\u041c', + 'mcy;': '\u043c', + 'mdash;': '\u2014', + 'mDDot;': '\u223a', + 'measuredangle;': '\u2221', + 'MediumSpace;': '\u205f', + 'Mellintrf;': '\u2133', + 'Mfr;': '\U0001d510', + 'mfr;': '\U0001d52a', + 'mho;': '\u2127', + 'micro': '\xb5', + 'micro;': '\xb5', + 'mid;': '\u2223', + 'midast;': '*', + 'midcir;': '\u2af0', + 'middot': '\xb7', + 'middot;': '\xb7', + 'minus;': '\u2212', + 'minusb;': '\u229f', + 'minusd;': '\u2238', + 'minusdu;': '\u2a2a', + 'MinusPlus;': '\u2213', + 'mlcp;': '\u2adb', + 'mldr;': '\u2026', + 'mnplus;': '\u2213', + 'models;': '\u22a7', + 'Mopf;': '\U0001d544', + 'mopf;': '\U0001d55e', + 'mp;': '\u2213', + 'Mscr;': '\u2133', + 'mscr;': '\U0001d4c2', + 'mstpos;': '\u223e', + 'Mu;': '\u039c', + 'mu;': '\u03bc', + 'multimap;': '\u22b8', + 'mumap;': '\u22b8', + 'nabla;': '\u2207', + 'Nacute;': '\u0143', + 'nacute;': '\u0144', + 'nang;': '\u2220\u20d2', + 'nap;': '\u2249', + 'napE;': '\u2a70\u0338', + 'napid;': '\u224b\u0338', + 'napos;': '\u0149', + 'napprox;': '\u2249', + 'natur;': '\u266e', + 'natural;': '\u266e', + 'naturals;': '\u2115', + 'nbsp': '\xa0', + 'nbsp;': '\xa0', + 'nbump;': '\u224e\u0338', + 'nbumpe;': '\u224f\u0338', + 'ncap;': '\u2a43', + 'Ncaron;': '\u0147', + 'ncaron;': '\u0148', + 'Ncedil;': '\u0145', + 'ncedil;': '\u0146', + 'ncong;': '\u2247', + 'ncongdot;': '\u2a6d\u0338', + 'ncup;': '\u2a42', + 'Ncy;': '\u041d', + 'ncy;': '\u043d', + 'ndash;': '\u2013', + 'ne;': '\u2260', + 'nearhk;': '\u2924', + 'neArr;': '\u21d7', + 'nearr;': '\u2197', + 'nearrow;': '\u2197', + 'nedot;': '\u2250\u0338', + 'NegativeMediumSpace;': '\u200b', + 'NegativeThickSpace;': '\u200b', + 'NegativeThinSpace;': '\u200b', + 'NegativeVeryThinSpace;': '\u200b', + 'nequiv;': '\u2262', + 'nesear;': '\u2928', + 'nesim;': '\u2242\u0338', + 'NestedGreaterGreater;': '\u226b', + 'NestedLessLess;': '\u226a', + 'NewLine;': '\n', + 'nexist;': '\u2204', + 'nexists;': '\u2204', + 'Nfr;': '\U0001d511', + 'nfr;': '\U0001d52b', + 'ngE;': '\u2267\u0338', + 'nge;': '\u2271', + 'ngeq;': '\u2271', + 'ngeqq;': '\u2267\u0338', + 'ngeqslant;': '\u2a7e\u0338', + 'nges;': '\u2a7e\u0338', + 'nGg;': '\u22d9\u0338', + 'ngsim;': '\u2275', + 'nGt;': '\u226b\u20d2', + 'ngt;': '\u226f', + 'ngtr;': '\u226f', + 'nGtv;': '\u226b\u0338', + 'nhArr;': '\u21ce', + 'nharr;': '\u21ae', + 'nhpar;': '\u2af2', + 'ni;': '\u220b', + 'nis;': '\u22fc', + 'nisd;': '\u22fa', + 'niv;': '\u220b', + 'NJcy;': '\u040a', + 'njcy;': '\u045a', + 'nlArr;': '\u21cd', + 'nlarr;': '\u219a', + 'nldr;': '\u2025', + 'nlE;': '\u2266\u0338', + 'nle;': '\u2270', + 'nLeftarrow;': '\u21cd', + 'nleftarrow;': '\u219a', + 'nLeftrightarrow;': '\u21ce', + 'nleftrightarrow;': '\u21ae', + 'nleq;': '\u2270', + 'nleqq;': '\u2266\u0338', + 'nleqslant;': '\u2a7d\u0338', + 'nles;': '\u2a7d\u0338', + 'nless;': '\u226e', + 'nLl;': '\u22d8\u0338', + 'nlsim;': '\u2274', + 'nLt;': '\u226a\u20d2', + 'nlt;': '\u226e', + 'nltri;': '\u22ea', + 'nltrie;': '\u22ec', + 'nLtv;': '\u226a\u0338', + 'nmid;': '\u2224', + 'NoBreak;': '\u2060', + 'NonBreakingSpace;': '\xa0', + 'Nopf;': '\u2115', + 'nopf;': '\U0001d55f', + 'not': '\xac', + 'Not;': '\u2aec', + 'not;': '\xac', + 'NotCongruent;': '\u2262', + 'NotCupCap;': '\u226d', + 'NotDoubleVerticalBar;': '\u2226', + 'NotElement;': '\u2209', + 'NotEqual;': '\u2260', + 'NotEqualTilde;': '\u2242\u0338', + 'NotExists;': '\u2204', + 'NotGreater;': '\u226f', + 'NotGreaterEqual;': '\u2271', + 'NotGreaterFullEqual;': '\u2267\u0338', + 'NotGreaterGreater;': '\u226b\u0338', + 'NotGreaterLess;': '\u2279', + 'NotGreaterSlantEqual;': '\u2a7e\u0338', + 'NotGreaterTilde;': '\u2275', + 'NotHumpDownHump;': '\u224e\u0338', + 'NotHumpEqual;': '\u224f\u0338', + 'notin;': '\u2209', + 'notindot;': '\u22f5\u0338', + 'notinE;': '\u22f9\u0338', + 'notinva;': '\u2209', + 'notinvb;': '\u22f7', + 'notinvc;': '\u22f6', + 'NotLeftTriangle;': '\u22ea', + 'NotLeftTriangleBar;': '\u29cf\u0338', + 'NotLeftTriangleEqual;': '\u22ec', + 'NotLess;': '\u226e', + 'NotLessEqual;': '\u2270', + 'NotLessGreater;': '\u2278', + 'NotLessLess;': '\u226a\u0338', + 'NotLessSlantEqual;': '\u2a7d\u0338', + 'NotLessTilde;': '\u2274', + 'NotNestedGreaterGreater;': '\u2aa2\u0338', + 'NotNestedLessLess;': '\u2aa1\u0338', + 'notni;': '\u220c', + 'notniva;': '\u220c', + 'notnivb;': '\u22fe', + 'notnivc;': '\u22fd', + 'NotPrecedes;': '\u2280', + 'NotPrecedesEqual;': '\u2aaf\u0338', + 'NotPrecedesSlantEqual;': '\u22e0', + 'NotReverseElement;': '\u220c', + 'NotRightTriangle;': '\u22eb', + 'NotRightTriangleBar;': '\u29d0\u0338', + 'NotRightTriangleEqual;': '\u22ed', + 'NotSquareSubset;': '\u228f\u0338', + 'NotSquareSubsetEqual;': '\u22e2', + 'NotSquareSuperset;': '\u2290\u0338', + 'NotSquareSupersetEqual;': '\u22e3', + 'NotSubset;': '\u2282\u20d2', + 'NotSubsetEqual;': '\u2288', + 'NotSucceeds;': '\u2281', + 'NotSucceedsEqual;': '\u2ab0\u0338', + 'NotSucceedsSlantEqual;': '\u22e1', + 'NotSucceedsTilde;': '\u227f\u0338', + 'NotSuperset;': '\u2283\u20d2', + 'NotSupersetEqual;': '\u2289', + 'NotTilde;': '\u2241', + 'NotTildeEqual;': '\u2244', + 'NotTildeFullEqual;': '\u2247', + 'NotTildeTilde;': '\u2249', + 'NotVerticalBar;': '\u2224', + 'npar;': '\u2226', + 'nparallel;': '\u2226', + 'nparsl;': '\u2afd\u20e5', + 'npart;': '\u2202\u0338', + 'npolint;': '\u2a14', + 'npr;': '\u2280', + 'nprcue;': '\u22e0', + 'npre;': '\u2aaf\u0338', + 'nprec;': '\u2280', + 'npreceq;': '\u2aaf\u0338', + 'nrArr;': '\u21cf', + 'nrarr;': '\u219b', + 'nrarrc;': '\u2933\u0338', + 'nrarrw;': '\u219d\u0338', + 'nRightarrow;': '\u21cf', + 'nrightarrow;': '\u219b', + 'nrtri;': '\u22eb', + 'nrtrie;': '\u22ed', + 'nsc;': '\u2281', + 'nsccue;': '\u22e1', + 'nsce;': '\u2ab0\u0338', + 'Nscr;': '\U0001d4a9', + 'nscr;': '\U0001d4c3', + 'nshortmid;': '\u2224', + 'nshortparallel;': '\u2226', + 'nsim;': '\u2241', + 'nsime;': '\u2244', + 'nsimeq;': '\u2244', + 'nsmid;': '\u2224', + 'nspar;': '\u2226', + 'nsqsube;': '\u22e2', + 'nsqsupe;': '\u22e3', + 'nsub;': '\u2284', + 'nsubE;': '\u2ac5\u0338', + 'nsube;': '\u2288', + 'nsubset;': '\u2282\u20d2', + 'nsubseteq;': '\u2288', + 'nsubseteqq;': '\u2ac5\u0338', + 'nsucc;': '\u2281', + 'nsucceq;': '\u2ab0\u0338', + 'nsup;': '\u2285', + 'nsupE;': '\u2ac6\u0338', + 'nsupe;': '\u2289', + 'nsupset;': '\u2283\u20d2', + 'nsupseteq;': '\u2289', + 'nsupseteqq;': '\u2ac6\u0338', + 'ntgl;': '\u2279', + 'Ntilde': '\xd1', + 'ntilde': '\xf1', + 'Ntilde;': '\xd1', + 'ntilde;': '\xf1', + 'ntlg;': '\u2278', + 'ntriangleleft;': '\u22ea', + 'ntrianglelefteq;': '\u22ec', + 'ntriangleright;': '\u22eb', + 'ntrianglerighteq;': '\u22ed', + 'Nu;': '\u039d', + 'nu;': '\u03bd', + 'num;': '#', + 'numero;': '\u2116', + 'numsp;': '\u2007', + 'nvap;': '\u224d\u20d2', + 'nVDash;': '\u22af', + 'nVdash;': '\u22ae', + 'nvDash;': '\u22ad', + 'nvdash;': '\u22ac', + 'nvge;': '\u2265\u20d2', + 'nvgt;': '>\u20d2', + 'nvHarr;': '\u2904', + 'nvinfin;': '\u29de', + 'nvlArr;': '\u2902', + 'nvle;': '\u2264\u20d2', + 'nvlt;': '<\u20d2', + 'nvltrie;': '\u22b4\u20d2', + 'nvrArr;': '\u2903', + 'nvrtrie;': '\u22b5\u20d2', + 'nvsim;': '\u223c\u20d2', + 'nwarhk;': '\u2923', + 'nwArr;': '\u21d6', + 'nwarr;': '\u2196', + 'nwarrow;': '\u2196', + 'nwnear;': '\u2927', + 'Oacute': '\xd3', + 'oacute': '\xf3', + 'Oacute;': '\xd3', + 'oacute;': '\xf3', + 'oast;': '\u229b', + 'ocir;': '\u229a', + 'Ocirc': '\xd4', + 'ocirc': '\xf4', + 'Ocirc;': '\xd4', + 'ocirc;': '\xf4', + 'Ocy;': '\u041e', + 'ocy;': '\u043e', + 'odash;': '\u229d', + 'Odblac;': '\u0150', + 'odblac;': '\u0151', + 'odiv;': '\u2a38', + 'odot;': '\u2299', + 'odsold;': '\u29bc', + 'OElig;': '\u0152', + 'oelig;': '\u0153', + 'ofcir;': '\u29bf', + 'Ofr;': '\U0001d512', + 'ofr;': '\U0001d52c', + 'ogon;': '\u02db', + 'Ograve': '\xd2', + 'ograve': '\xf2', + 'Ograve;': '\xd2', + 'ograve;': '\xf2', + 'ogt;': '\u29c1', + 'ohbar;': '\u29b5', + 'ohm;': '\u03a9', + 'oint;': '\u222e', + 'olarr;': '\u21ba', + 'olcir;': '\u29be', + 'olcross;': '\u29bb', + 'oline;': '\u203e', + 'olt;': '\u29c0', + 'Omacr;': '\u014c', + 'omacr;': '\u014d', + 'Omega;': '\u03a9', + 'omega;': '\u03c9', + 'Omicron;': '\u039f', + 'omicron;': '\u03bf', + 'omid;': '\u29b6', + 'ominus;': '\u2296', + 'Oopf;': '\U0001d546', + 'oopf;': '\U0001d560', + 'opar;': '\u29b7', + 'OpenCurlyDoubleQuote;': '\u201c', + 'OpenCurlyQuote;': '\u2018', + 'operp;': '\u29b9', + 'oplus;': '\u2295', + 'Or;': '\u2a54', + 'or;': '\u2228', + 'orarr;': '\u21bb', + 'ord;': '\u2a5d', + 'order;': '\u2134', + 'orderof;': '\u2134', + 'ordf': '\xaa', + 'ordf;': '\xaa', + 'ordm': '\xba', + 'ordm;': '\xba', + 'origof;': '\u22b6', + 'oror;': '\u2a56', + 'orslope;': '\u2a57', + 'orv;': '\u2a5b', + 'oS;': '\u24c8', + 'Oscr;': '\U0001d4aa', + 'oscr;': '\u2134', + 'Oslash': '\xd8', + 'oslash': '\xf8', + 'Oslash;': '\xd8', + 'oslash;': '\xf8', + 'osol;': '\u2298', + 'Otilde': '\xd5', + 'otilde': '\xf5', + 'Otilde;': '\xd5', + 'otilde;': '\xf5', + 'Otimes;': '\u2a37', + 'otimes;': '\u2297', + 'otimesas;': '\u2a36', + 'Ouml': '\xd6', + 'ouml': '\xf6', + 'Ouml;': '\xd6', + 'ouml;': '\xf6', + 'ovbar;': '\u233d', + 'OverBar;': '\u203e', + 'OverBrace;': '\u23de', + 'OverBracket;': '\u23b4', + 'OverParenthesis;': '\u23dc', + 'par;': '\u2225', + 'para': '\xb6', + 'para;': '\xb6', + 'parallel;': '\u2225', + 'parsim;': '\u2af3', + 'parsl;': '\u2afd', + 'part;': '\u2202', + 'PartialD;': '\u2202', + 'Pcy;': '\u041f', + 'pcy;': '\u043f', + 'percnt;': '%', + 'period;': '.', + 'permil;': '\u2030', + 'perp;': '\u22a5', + 'pertenk;': '\u2031', + 'Pfr;': '\U0001d513', + 'pfr;': '\U0001d52d', + 'Phi;': '\u03a6', + 'phi;': '\u03c6', + 'phiv;': '\u03d5', + 'phmmat;': '\u2133', + 'phone;': '\u260e', + 'Pi;': '\u03a0', + 'pi;': '\u03c0', + 'pitchfork;': '\u22d4', + 'piv;': '\u03d6', + 'planck;': '\u210f', + 'planckh;': '\u210e', + 'plankv;': '\u210f', + 'plus;': '+', + 'plusacir;': '\u2a23', + 'plusb;': '\u229e', + 'pluscir;': '\u2a22', + 'plusdo;': '\u2214', + 'plusdu;': '\u2a25', + 'pluse;': '\u2a72', + 'PlusMinus;': '\xb1', + 'plusmn': '\xb1', + 'plusmn;': '\xb1', + 'plussim;': '\u2a26', + 'plustwo;': '\u2a27', + 'pm;': '\xb1', + 'Poincareplane;': '\u210c', + 'pointint;': '\u2a15', + 'Popf;': '\u2119', + 'popf;': '\U0001d561', + 'pound': '\xa3', + 'pound;': '\xa3', + 'Pr;': '\u2abb', + 'pr;': '\u227a', + 'prap;': '\u2ab7', + 'prcue;': '\u227c', + 'prE;': '\u2ab3', + 'pre;': '\u2aaf', + 'prec;': '\u227a', + 'precapprox;': '\u2ab7', + 'preccurlyeq;': '\u227c', + 'Precedes;': '\u227a', + 'PrecedesEqual;': '\u2aaf', + 'PrecedesSlantEqual;': '\u227c', + 'PrecedesTilde;': '\u227e', + 'preceq;': '\u2aaf', + 'precnapprox;': '\u2ab9', + 'precneqq;': '\u2ab5', + 'precnsim;': '\u22e8', + 'precsim;': '\u227e', + 'Prime;': '\u2033', + 'prime;': '\u2032', + 'primes;': '\u2119', + 'prnap;': '\u2ab9', + 'prnE;': '\u2ab5', + 'prnsim;': '\u22e8', + 'prod;': '\u220f', + 'Product;': '\u220f', + 'profalar;': '\u232e', + 'profline;': '\u2312', + 'profsurf;': '\u2313', + 'prop;': '\u221d', + 'Proportion;': '\u2237', + 'Proportional;': '\u221d', + 'propto;': '\u221d', + 'prsim;': '\u227e', + 'prurel;': '\u22b0', + 'Pscr;': '\U0001d4ab', + 'pscr;': '\U0001d4c5', + 'Psi;': '\u03a8', + 'psi;': '\u03c8', + 'puncsp;': '\u2008', + 'Qfr;': '\U0001d514', + 'qfr;': '\U0001d52e', + 'qint;': '\u2a0c', + 'Qopf;': '\u211a', + 'qopf;': '\U0001d562', + 'qprime;': '\u2057', + 'Qscr;': '\U0001d4ac', + 'qscr;': '\U0001d4c6', + 'quaternions;': '\u210d', + 'quatint;': '\u2a16', + 'quest;': '?', + 'questeq;': '\u225f', + 'QUOT': '"', + 'quot': '"', + 'QUOT;': '"', + 'quot;': '"', + 'rAarr;': '\u21db', + 'race;': '\u223d\u0331', + 'Racute;': '\u0154', + 'racute;': '\u0155', + 'radic;': '\u221a', + 'raemptyv;': '\u29b3', + 'Rang;': '\u27eb', + 'rang;': '\u27e9', + 'rangd;': '\u2992', + 'range;': '\u29a5', + 'rangle;': '\u27e9', + 'raquo': '\xbb', + 'raquo;': '\xbb', + 'Rarr;': '\u21a0', + 'rArr;': '\u21d2', + 'rarr;': '\u2192', + 'rarrap;': '\u2975', + 'rarrb;': '\u21e5', + 'rarrbfs;': '\u2920', + 'rarrc;': '\u2933', + 'rarrfs;': '\u291e', + 'rarrhk;': '\u21aa', + 'rarrlp;': '\u21ac', + 'rarrpl;': '\u2945', + 'rarrsim;': '\u2974', + 'Rarrtl;': '\u2916', + 'rarrtl;': '\u21a3', + 'rarrw;': '\u219d', + 'rAtail;': '\u291c', + 'ratail;': '\u291a', + 'ratio;': '\u2236', + 'rationals;': '\u211a', + 'RBarr;': '\u2910', + 'rBarr;': '\u290f', + 'rbarr;': '\u290d', + 'rbbrk;': '\u2773', + 'rbrace;': '}', + 'rbrack;': ']', + 'rbrke;': '\u298c', + 'rbrksld;': '\u298e', + 'rbrkslu;': '\u2990', + 'Rcaron;': '\u0158', + 'rcaron;': '\u0159', + 'Rcedil;': '\u0156', + 'rcedil;': '\u0157', + 'rceil;': '\u2309', + 'rcub;': '}', + 'Rcy;': '\u0420', + 'rcy;': '\u0440', + 'rdca;': '\u2937', + 'rdldhar;': '\u2969', + 'rdquo;': '\u201d', + 'rdquor;': '\u201d', + 'rdsh;': '\u21b3', + 'Re;': '\u211c', + 'real;': '\u211c', + 'realine;': '\u211b', + 'realpart;': '\u211c', + 'reals;': '\u211d', + 'rect;': '\u25ad', + 'REG': '\xae', + 'reg': '\xae', + 'REG;': '\xae', + 'reg;': '\xae', + 'ReverseElement;': '\u220b', + 'ReverseEquilibrium;': '\u21cb', + 'ReverseUpEquilibrium;': '\u296f', + 'rfisht;': '\u297d', + 'rfloor;': '\u230b', + 'Rfr;': '\u211c', + 'rfr;': '\U0001d52f', + 'rHar;': '\u2964', + 'rhard;': '\u21c1', + 'rharu;': '\u21c0', + 'rharul;': '\u296c', + 'Rho;': '\u03a1', + 'rho;': '\u03c1', + 'rhov;': '\u03f1', + 'RightAngleBracket;': '\u27e9', + 'RightArrow;': '\u2192', + 'Rightarrow;': '\u21d2', + 'rightarrow;': '\u2192', + 'RightArrowBar;': '\u21e5', + 'RightArrowLeftArrow;': '\u21c4', + 'rightarrowtail;': '\u21a3', + 'RightCeiling;': '\u2309', + 'RightDoubleBracket;': '\u27e7', + 'RightDownTeeVector;': '\u295d', + 'RightDownVector;': '\u21c2', + 'RightDownVectorBar;': '\u2955', + 'RightFloor;': '\u230b', + 'rightharpoondown;': '\u21c1', + 'rightharpoonup;': '\u21c0', + 'rightleftarrows;': '\u21c4', + 'rightleftharpoons;': '\u21cc', + 'rightrightarrows;': '\u21c9', + 'rightsquigarrow;': '\u219d', + 'RightTee;': '\u22a2', + 'RightTeeArrow;': '\u21a6', + 'RightTeeVector;': '\u295b', + 'rightthreetimes;': '\u22cc', + 'RightTriangle;': '\u22b3', + 'RightTriangleBar;': '\u29d0', + 'RightTriangleEqual;': '\u22b5', + 'RightUpDownVector;': '\u294f', + 'RightUpTeeVector;': '\u295c', + 'RightUpVector;': '\u21be', + 'RightUpVectorBar;': '\u2954', + 'RightVector;': '\u21c0', + 'RightVectorBar;': '\u2953', + 'ring;': '\u02da', + 'risingdotseq;': '\u2253', + 'rlarr;': '\u21c4', + 'rlhar;': '\u21cc', + 'rlm;': '\u200f', + 'rmoust;': '\u23b1', + 'rmoustache;': '\u23b1', + 'rnmid;': '\u2aee', + 'roang;': '\u27ed', + 'roarr;': '\u21fe', + 'robrk;': '\u27e7', + 'ropar;': '\u2986', + 'Ropf;': '\u211d', + 'ropf;': '\U0001d563', + 'roplus;': '\u2a2e', + 'rotimes;': '\u2a35', + 'RoundImplies;': '\u2970', + 'rpar;': ')', + 'rpargt;': '\u2994', + 'rppolint;': '\u2a12', + 'rrarr;': '\u21c9', + 'Rrightarrow;': '\u21db', + 'rsaquo;': '\u203a', + 'Rscr;': '\u211b', + 'rscr;': '\U0001d4c7', + 'Rsh;': '\u21b1', + 'rsh;': '\u21b1', + 'rsqb;': ']', + 'rsquo;': '\u2019', + 'rsquor;': '\u2019', + 'rthree;': '\u22cc', + 'rtimes;': '\u22ca', + 'rtri;': '\u25b9', + 'rtrie;': '\u22b5', + 'rtrif;': '\u25b8', + 'rtriltri;': '\u29ce', + 'RuleDelayed;': '\u29f4', + 'ruluhar;': '\u2968', + 'rx;': '\u211e', + 'Sacute;': '\u015a', + 'sacute;': '\u015b', + 'sbquo;': '\u201a', + 'Sc;': '\u2abc', + 'sc;': '\u227b', + 'scap;': '\u2ab8', + 'Scaron;': '\u0160', + 'scaron;': '\u0161', + 'sccue;': '\u227d', + 'scE;': '\u2ab4', + 'sce;': '\u2ab0', + 'Scedil;': '\u015e', + 'scedil;': '\u015f', + 'Scirc;': '\u015c', + 'scirc;': '\u015d', + 'scnap;': '\u2aba', + 'scnE;': '\u2ab6', + 'scnsim;': '\u22e9', + 'scpolint;': '\u2a13', + 'scsim;': '\u227f', + 'Scy;': '\u0421', + 'scy;': '\u0441', + 'sdot;': '\u22c5', + 'sdotb;': '\u22a1', + 'sdote;': '\u2a66', + 'searhk;': '\u2925', + 'seArr;': '\u21d8', + 'searr;': '\u2198', + 'searrow;': '\u2198', + 'sect': '\xa7', + 'sect;': '\xa7', + 'semi;': ';', + 'seswar;': '\u2929', + 'setminus;': '\u2216', + 'setmn;': '\u2216', + 'sext;': '\u2736', + 'Sfr;': '\U0001d516', + 'sfr;': '\U0001d530', + 'sfrown;': '\u2322', + 'sharp;': '\u266f', + 'SHCHcy;': '\u0429', + 'shchcy;': '\u0449', + 'SHcy;': '\u0428', + 'shcy;': '\u0448', + 'ShortDownArrow;': '\u2193', + 'ShortLeftArrow;': '\u2190', + 'shortmid;': '\u2223', + 'shortparallel;': '\u2225', + 'ShortRightArrow;': '\u2192', + 'ShortUpArrow;': '\u2191', + 'shy': '\xad', + 'shy;': '\xad', + 'Sigma;': '\u03a3', + 'sigma;': '\u03c3', + 'sigmaf;': '\u03c2', + 'sigmav;': '\u03c2', + 'sim;': '\u223c', + 'simdot;': '\u2a6a', + 'sime;': '\u2243', + 'simeq;': '\u2243', + 'simg;': '\u2a9e', + 'simgE;': '\u2aa0', + 'siml;': '\u2a9d', + 'simlE;': '\u2a9f', + 'simne;': '\u2246', + 'simplus;': '\u2a24', + 'simrarr;': '\u2972', + 'slarr;': '\u2190', + 'SmallCircle;': '\u2218', + 'smallsetminus;': '\u2216', + 'smashp;': '\u2a33', + 'smeparsl;': '\u29e4', + 'smid;': '\u2223', + 'smile;': '\u2323', + 'smt;': '\u2aaa', + 'smte;': '\u2aac', + 'smtes;': '\u2aac\ufe00', + 'SOFTcy;': '\u042c', + 'softcy;': '\u044c', + 'sol;': '/', + 'solb;': '\u29c4', + 'solbar;': '\u233f', + 'Sopf;': '\U0001d54a', + 'sopf;': '\U0001d564', + 'spades;': '\u2660', + 'spadesuit;': '\u2660', + 'spar;': '\u2225', + 'sqcap;': '\u2293', + 'sqcaps;': '\u2293\ufe00', + 'sqcup;': '\u2294', + 'sqcups;': '\u2294\ufe00', + 'Sqrt;': '\u221a', + 'sqsub;': '\u228f', + 'sqsube;': '\u2291', + 'sqsubset;': '\u228f', + 'sqsubseteq;': '\u2291', + 'sqsup;': '\u2290', + 'sqsupe;': '\u2292', + 'sqsupset;': '\u2290', + 'sqsupseteq;': '\u2292', + 'squ;': '\u25a1', + 'Square;': '\u25a1', + 'square;': '\u25a1', + 'SquareIntersection;': '\u2293', + 'SquareSubset;': '\u228f', + 'SquareSubsetEqual;': '\u2291', + 'SquareSuperset;': '\u2290', + 'SquareSupersetEqual;': '\u2292', + 'SquareUnion;': '\u2294', + 'squarf;': '\u25aa', + 'squf;': '\u25aa', + 'srarr;': '\u2192', + 'Sscr;': '\U0001d4ae', + 'sscr;': '\U0001d4c8', + 'ssetmn;': '\u2216', + 'ssmile;': '\u2323', + 'sstarf;': '\u22c6', + 'Star;': '\u22c6', + 'star;': '\u2606', + 'starf;': '\u2605', + 'straightepsilon;': '\u03f5', + 'straightphi;': '\u03d5', + 'strns;': '\xaf', + 'Sub;': '\u22d0', + 'sub;': '\u2282', + 'subdot;': '\u2abd', + 'subE;': '\u2ac5', + 'sube;': '\u2286', + 'subedot;': '\u2ac3', + 'submult;': '\u2ac1', + 'subnE;': '\u2acb', + 'subne;': '\u228a', + 'subplus;': '\u2abf', + 'subrarr;': '\u2979', + 'Subset;': '\u22d0', + 'subset;': '\u2282', + 'subseteq;': '\u2286', + 'subseteqq;': '\u2ac5', + 'SubsetEqual;': '\u2286', + 'subsetneq;': '\u228a', + 'subsetneqq;': '\u2acb', + 'subsim;': '\u2ac7', + 'subsub;': '\u2ad5', + 'subsup;': '\u2ad3', + 'succ;': '\u227b', + 'succapprox;': '\u2ab8', + 'succcurlyeq;': '\u227d', + 'Succeeds;': '\u227b', + 'SucceedsEqual;': '\u2ab0', + 'SucceedsSlantEqual;': '\u227d', + 'SucceedsTilde;': '\u227f', + 'succeq;': '\u2ab0', + 'succnapprox;': '\u2aba', + 'succneqq;': '\u2ab6', + 'succnsim;': '\u22e9', + 'succsim;': '\u227f', + 'SuchThat;': '\u220b', + 'Sum;': '\u2211', + 'sum;': '\u2211', + 'sung;': '\u266a', + 'sup1': '\xb9', + 'sup1;': '\xb9', + 'sup2': '\xb2', + 'sup2;': '\xb2', + 'sup3': '\xb3', + 'sup3;': '\xb3', + 'Sup;': '\u22d1', + 'sup;': '\u2283', + 'supdot;': '\u2abe', + 'supdsub;': '\u2ad8', + 'supE;': '\u2ac6', + 'supe;': '\u2287', + 'supedot;': '\u2ac4', + 'Superset;': '\u2283', + 'SupersetEqual;': '\u2287', + 'suphsol;': '\u27c9', + 'suphsub;': '\u2ad7', + 'suplarr;': '\u297b', + 'supmult;': '\u2ac2', + 'supnE;': '\u2acc', + 'supne;': '\u228b', + 'supplus;': '\u2ac0', + 'Supset;': '\u22d1', + 'supset;': '\u2283', + 'supseteq;': '\u2287', + 'supseteqq;': '\u2ac6', + 'supsetneq;': '\u228b', + 'supsetneqq;': '\u2acc', + 'supsim;': '\u2ac8', + 'supsub;': '\u2ad4', + 'supsup;': '\u2ad6', + 'swarhk;': '\u2926', + 'swArr;': '\u21d9', + 'swarr;': '\u2199', + 'swarrow;': '\u2199', + 'swnwar;': '\u292a', + 'szlig': '\xdf', + 'szlig;': '\xdf', + 'Tab;': '\t', + 'target;': '\u2316', + 'Tau;': '\u03a4', + 'tau;': '\u03c4', + 'tbrk;': '\u23b4', + 'Tcaron;': '\u0164', + 'tcaron;': '\u0165', + 'Tcedil;': '\u0162', + 'tcedil;': '\u0163', + 'Tcy;': '\u0422', + 'tcy;': '\u0442', + 'tdot;': '\u20db', + 'telrec;': '\u2315', + 'Tfr;': '\U0001d517', + 'tfr;': '\U0001d531', + 'there4;': '\u2234', + 'Therefore;': '\u2234', + 'therefore;': '\u2234', + 'Theta;': '\u0398', + 'theta;': '\u03b8', + 'thetasym;': '\u03d1', + 'thetav;': '\u03d1', + 'thickapprox;': '\u2248', + 'thicksim;': '\u223c', + 'ThickSpace;': '\u205f\u200a', + 'thinsp;': '\u2009', + 'ThinSpace;': '\u2009', + 'thkap;': '\u2248', + 'thksim;': '\u223c', + 'THORN': '\xde', + 'thorn': '\xfe', + 'THORN;': '\xde', + 'thorn;': '\xfe', + 'Tilde;': '\u223c', + 'tilde;': '\u02dc', + 'TildeEqual;': '\u2243', + 'TildeFullEqual;': '\u2245', + 'TildeTilde;': '\u2248', + 'times': '\xd7', + 'times;': '\xd7', + 'timesb;': '\u22a0', + 'timesbar;': '\u2a31', + 'timesd;': '\u2a30', + 'tint;': '\u222d', + 'toea;': '\u2928', + 'top;': '\u22a4', + 'topbot;': '\u2336', + 'topcir;': '\u2af1', + 'Topf;': '\U0001d54b', + 'topf;': '\U0001d565', + 'topfork;': '\u2ada', + 'tosa;': '\u2929', + 'tprime;': '\u2034', + 'TRADE;': '\u2122', + 'trade;': '\u2122', + 'triangle;': '\u25b5', + 'triangledown;': '\u25bf', + 'triangleleft;': '\u25c3', + 'trianglelefteq;': '\u22b4', + 'triangleq;': '\u225c', + 'triangleright;': '\u25b9', + 'trianglerighteq;': '\u22b5', + 'tridot;': '\u25ec', + 'trie;': '\u225c', + 'triminus;': '\u2a3a', + 'TripleDot;': '\u20db', + 'triplus;': '\u2a39', + 'trisb;': '\u29cd', + 'tritime;': '\u2a3b', + 'trpezium;': '\u23e2', + 'Tscr;': '\U0001d4af', + 'tscr;': '\U0001d4c9', + 'TScy;': '\u0426', + 'tscy;': '\u0446', + 'TSHcy;': '\u040b', + 'tshcy;': '\u045b', + 'Tstrok;': '\u0166', + 'tstrok;': '\u0167', + 'twixt;': '\u226c', + 'twoheadleftarrow;': '\u219e', + 'twoheadrightarrow;': '\u21a0', + 'Uacute': '\xda', + 'uacute': '\xfa', + 'Uacute;': '\xda', + 'uacute;': '\xfa', + 'Uarr;': '\u219f', + 'uArr;': '\u21d1', + 'uarr;': '\u2191', + 'Uarrocir;': '\u2949', + 'Ubrcy;': '\u040e', + 'ubrcy;': '\u045e', + 'Ubreve;': '\u016c', + 'ubreve;': '\u016d', + 'Ucirc': '\xdb', + 'ucirc': '\xfb', + 'Ucirc;': '\xdb', + 'ucirc;': '\xfb', + 'Ucy;': '\u0423', + 'ucy;': '\u0443', + 'udarr;': '\u21c5', + 'Udblac;': '\u0170', + 'udblac;': '\u0171', + 'udhar;': '\u296e', + 'ufisht;': '\u297e', + 'Ufr;': '\U0001d518', + 'ufr;': '\U0001d532', + 'Ugrave': '\xd9', + 'ugrave': '\xf9', + 'Ugrave;': '\xd9', + 'ugrave;': '\xf9', + 'uHar;': '\u2963', + 'uharl;': '\u21bf', + 'uharr;': '\u21be', + 'uhblk;': '\u2580', + 'ulcorn;': '\u231c', + 'ulcorner;': '\u231c', + 'ulcrop;': '\u230f', + 'ultri;': '\u25f8', + 'Umacr;': '\u016a', + 'umacr;': '\u016b', + 'uml': '\xa8', + 'uml;': '\xa8', + 'UnderBar;': '_', + 'UnderBrace;': '\u23df', + 'UnderBracket;': '\u23b5', + 'UnderParenthesis;': '\u23dd', + 'Union;': '\u22c3', + 'UnionPlus;': '\u228e', + 'Uogon;': '\u0172', + 'uogon;': '\u0173', + 'Uopf;': '\U0001d54c', + 'uopf;': '\U0001d566', + 'UpArrow;': '\u2191', + 'Uparrow;': '\u21d1', + 'uparrow;': '\u2191', + 'UpArrowBar;': '\u2912', + 'UpArrowDownArrow;': '\u21c5', + 'UpDownArrow;': '\u2195', + 'Updownarrow;': '\u21d5', + 'updownarrow;': '\u2195', + 'UpEquilibrium;': '\u296e', + 'upharpoonleft;': '\u21bf', + 'upharpoonright;': '\u21be', + 'uplus;': '\u228e', + 'UpperLeftArrow;': '\u2196', + 'UpperRightArrow;': '\u2197', + 'Upsi;': '\u03d2', + 'upsi;': '\u03c5', + 'upsih;': '\u03d2', + 'Upsilon;': '\u03a5', + 'upsilon;': '\u03c5', + 'UpTee;': '\u22a5', + 'UpTeeArrow;': '\u21a5', + 'upuparrows;': '\u21c8', + 'urcorn;': '\u231d', + 'urcorner;': '\u231d', + 'urcrop;': '\u230e', + 'Uring;': '\u016e', + 'uring;': '\u016f', + 'urtri;': '\u25f9', + 'Uscr;': '\U0001d4b0', + 'uscr;': '\U0001d4ca', + 'utdot;': '\u22f0', + 'Utilde;': '\u0168', + 'utilde;': '\u0169', + 'utri;': '\u25b5', + 'utrif;': '\u25b4', + 'uuarr;': '\u21c8', + 'Uuml': '\xdc', + 'uuml': '\xfc', + 'Uuml;': '\xdc', + 'uuml;': '\xfc', + 'uwangle;': '\u29a7', + 'vangrt;': '\u299c', + 'varepsilon;': '\u03f5', + 'varkappa;': '\u03f0', + 'varnothing;': '\u2205', + 'varphi;': '\u03d5', + 'varpi;': '\u03d6', + 'varpropto;': '\u221d', + 'vArr;': '\u21d5', + 'varr;': '\u2195', + 'varrho;': '\u03f1', + 'varsigma;': '\u03c2', + 'varsubsetneq;': '\u228a\ufe00', + 'varsubsetneqq;': '\u2acb\ufe00', + 'varsupsetneq;': '\u228b\ufe00', + 'varsupsetneqq;': '\u2acc\ufe00', + 'vartheta;': '\u03d1', + 'vartriangleleft;': '\u22b2', + 'vartriangleright;': '\u22b3', + 'Vbar;': '\u2aeb', + 'vBar;': '\u2ae8', + 'vBarv;': '\u2ae9', + 'Vcy;': '\u0412', + 'vcy;': '\u0432', + 'VDash;': '\u22ab', + 'Vdash;': '\u22a9', + 'vDash;': '\u22a8', + 'vdash;': '\u22a2', + 'Vdashl;': '\u2ae6', + 'Vee;': '\u22c1', + 'vee;': '\u2228', + 'veebar;': '\u22bb', + 'veeeq;': '\u225a', + 'vellip;': '\u22ee', + 'Verbar;': '\u2016', + 'verbar;': '|', + 'Vert;': '\u2016', + 'vert;': '|', + 'VerticalBar;': '\u2223', + 'VerticalLine;': '|', + 'VerticalSeparator;': '\u2758', + 'VerticalTilde;': '\u2240', + 'VeryThinSpace;': '\u200a', + 'Vfr;': '\U0001d519', + 'vfr;': '\U0001d533', + 'vltri;': '\u22b2', + 'vnsub;': '\u2282\u20d2', + 'vnsup;': '\u2283\u20d2', + 'Vopf;': '\U0001d54d', + 'vopf;': '\U0001d567', + 'vprop;': '\u221d', + 'vrtri;': '\u22b3', + 'Vscr;': '\U0001d4b1', + 'vscr;': '\U0001d4cb', + 'vsubnE;': '\u2acb\ufe00', + 'vsubne;': '\u228a\ufe00', + 'vsupnE;': '\u2acc\ufe00', + 'vsupne;': '\u228b\ufe00', + 'Vvdash;': '\u22aa', + 'vzigzag;': '\u299a', + 'Wcirc;': '\u0174', + 'wcirc;': '\u0175', + 'wedbar;': '\u2a5f', + 'Wedge;': '\u22c0', + 'wedge;': '\u2227', + 'wedgeq;': '\u2259', + 'weierp;': '\u2118', + 'Wfr;': '\U0001d51a', + 'wfr;': '\U0001d534', + 'Wopf;': '\U0001d54e', + 'wopf;': '\U0001d568', + 'wp;': '\u2118', + 'wr;': '\u2240', + 'wreath;': '\u2240', + 'Wscr;': '\U0001d4b2', + 'wscr;': '\U0001d4cc', + 'xcap;': '\u22c2', + 'xcirc;': '\u25ef', + 'xcup;': '\u22c3', + 'xdtri;': '\u25bd', + 'Xfr;': '\U0001d51b', + 'xfr;': '\U0001d535', + 'xhArr;': '\u27fa', + 'xharr;': '\u27f7', + 'Xi;': '\u039e', + 'xi;': '\u03be', + 'xlArr;': '\u27f8', + 'xlarr;': '\u27f5', + 'xmap;': '\u27fc', + 'xnis;': '\u22fb', + 'xodot;': '\u2a00', + 'Xopf;': '\U0001d54f', + 'xopf;': '\U0001d569', + 'xoplus;': '\u2a01', + 'xotime;': '\u2a02', + 'xrArr;': '\u27f9', + 'xrarr;': '\u27f6', + 'Xscr;': '\U0001d4b3', + 'xscr;': '\U0001d4cd', + 'xsqcup;': '\u2a06', + 'xuplus;': '\u2a04', + 'xutri;': '\u25b3', + 'xvee;': '\u22c1', + 'xwedge;': '\u22c0', + 'Yacute': '\xdd', + 'yacute': '\xfd', + 'Yacute;': '\xdd', + 'yacute;': '\xfd', + 'YAcy;': '\u042f', + 'yacy;': '\u044f', + 'Ycirc;': '\u0176', + 'ycirc;': '\u0177', + 'Ycy;': '\u042b', + 'ycy;': '\u044b', + 'yen': '\xa5', + 'yen;': '\xa5', + 'Yfr;': '\U0001d51c', + 'yfr;': '\U0001d536', + 'YIcy;': '\u0407', + 'yicy;': '\u0457', + 'Yopf;': '\U0001d550', + 'yopf;': '\U0001d56a', + 'Yscr;': '\U0001d4b4', + 'yscr;': '\U0001d4ce', + 'YUcy;': '\u042e', + 'yucy;': '\u044e', + 'yuml': '\xff', + 'Yuml;': '\u0178', + 'yuml;': '\xff', + 'Zacute;': '\u0179', + 'zacute;': '\u017a', + 'Zcaron;': '\u017d', + 'zcaron;': '\u017e', + 'Zcy;': '\u0417', + 'zcy;': '\u0437', + 'Zdot;': '\u017b', + 'zdot;': '\u017c', + 'zeetrf;': '\u2128', + 'ZeroWidthSpace;': '\u200b', + 'Zeta;': '\u0396', + 'zeta;': '\u03b6', + 'Zfr;': '\u2128', + 'zfr;': '\U0001d537', + 'ZHcy;': '\u0416', + 'zhcy;': '\u0436', + 'zigrarr;': '\u21dd', + 'Zopf;': '\u2124', + 'zopf;': '\U0001d56b', + 'Zscr;': '\U0001d4b5', + 'zscr;': '\U0001d4cf', + 'zwj;': '\u200d', + 'zwnj;': '\u200c', +} + class EntitySubstitution(object): - - """Substitute XML or HTML entities for the corresponding characters.""" + """The ability to substitute XML or HTML entities for certain characters.""" def _populate_class_variables(): - lookup = {} - reverse_lookup = {} - characters_for_re = [] + """Initialize variables used by this class to manage the plethora of + HTML5 named entities. + + This function returns a 3-tuple containing two dictionaries + and a regular expression: + + unicode_to_name - A mapping of Unicode strings like "⦨" to + entity names like "angmsdaa". When a single Unicode string has + multiple entity names, we try to choose the most commonly-used + name. + + name_to_unicode: A mapping of entity names like "angmsdaa" to + Unicode strings like "⦨". + + named_entity_re: A regular expression matching (almost) any + Unicode string that corresponds to an HTML5 named entity. + """ + unicode_to_name = {} + name_to_unicode = {} + + short_entities = set() + long_entities_by_first_character = defaultdict(set) + + for name_with_semicolon, character in sorted(html5.items()): + # "It is intentional, for legacy compatibility, that many + # code points have multiple character reference names. For + # example, some appear both with and without the trailing + # semicolon, or with different capitalizations." + # - https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references + # + # The parsers are in charge of handling (or not) character + # references with no trailing semicolon, so we remove the + # semicolon whenever it appears. + if name_with_semicolon.endswith(';'): + name = name_with_semicolon[:-1] + else: + name = name_with_semicolon + + # When parsing HTML, we want to recognize any known named + # entity and convert it to a sequence of Unicode + # characters. + if name not in name_to_unicode: + name_to_unicode[name] = character + + # When _generating_ HTML, we want to recognize special + # character sequences that _could_ be converted to named + # entities. + unicode_to_name[character] = name + + # We also need to build a regular expression that lets us + # _find_ those characters in output strings so we can + # replace them. + # + # This is tricky, for two reasons. + + if (len(character) == 1 and ord(character) < 128 + and character not in '<>&'): + # First, it would be annoying to turn single ASCII + # characters like | into named entities like + # |. The exceptions are <>&, which we _must_ + # turn into named entities to produce valid HTML. + continue + + if len(character) > 1 and all(ord(x) < 128 for x in character): + # We also do not want to turn _combinations_ of ASCII + # characters like 'fj' into named entities like 'fj', + # though that's more debateable. + continue + + # Second, some named entities have a Unicode value that's + # a subset of the Unicode value for some _other_ named + # entity. As an example, \u2267' is ≧, + # but '\u2267\u0338' is ≧̸. Our regular + # expression needs to match the first two characters of + # "\u2267\u0338foo", but only the first character of + # "\u2267foo". + # + # In this step, we build two sets of characters that + # _eventually_ need to go into the regular expression. But + # we won't know exactly what the regular expression needs + # to look like until we've gone through the entire list of + # named entities. + if len(character) == 1: + short_entities.add(character) + else: + long_entities_by_first_character[character[0]].add(character) + + # Now that we've been through the entire list of entities, we + # can create a regular expression that matches any of them. + particles = set() + for short in short_entities: + long_versions = long_entities_by_first_character[short] + if not long_versions: + particles.add(short) + else: + ignore = "".join([x[1] for x in long_versions]) + # This finds, e.g. \u2267 but only if it is _not_ + # followed by \u0338. + particles.add("%s(?![%s])" % (short, ignore)) + + for long_entities in list(long_entities_by_first_character.values()): + for long_entity in long_entities: + particles.add(long_entity) + + re_definition = "(%s)" % "|".join(particles) + + # If an entity shows up in both html5 and codepoint2name, it's + # likely that HTML5 gives it several different names, such as + # 'rsquo' and 'rsquor'. When converting Unicode characters to + # named entities, the codepoint2name name should take + # precedence where possible, since that's the more easily + # recognizable one. for codepoint, name in list(codepoint2name.items()): - character = unichr(codepoint) - if codepoint != 34: - # There's no point in turning the quotation mark into - # ", unless it happens within an attribute value, which - # is handled elsewhere. - characters_for_re.append(character) - lookup[character] = name - # But we do want to turn " into the quotation mark. - reverse_lookup[name] = character - re_definition = "[%s]" % "".join(characters_for_re) - return lookup, reverse_lookup, re.compile(re_definition) + character = chr(codepoint) + unicode_to_name[character] = name + + return unicode_to_name, name_to_unicode, re.compile(re_definition) (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER, CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables() @@ -80,20 +2440,22 @@ class EntitySubstitution(object): } BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" - "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + "&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)" ")") AMPERSAND_OR_BRACKET = re.compile("([<>&])") @classmethod def _substitute_html_entity(cls, matchobj): + """Used with a regular expression to substitute the + appropriate HTML entity for a special character string.""" entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0)) return "&%s;" % entity @classmethod def _substitute_xml_entity(cls, matchobj): """Used with a regular expression to substitute the - appropriate XML entity for an XML special character.""" + appropriate XML entity for a special character string.""" entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)] return "&%s;" % entity @@ -188,6 +2550,8 @@ class EntitySubstitution(object): containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that character with "é" will make it more readable to some people. + + :param s: A Unicode string. """ return cls.CHARACTER_TO_HTML_ENTITY_RE.sub( cls._substitute_html_entity, s) @@ -199,23 +2563,65 @@ class EncodingDetector: Order of precedence: 1. Encodings you specifically tell EncodingDetector to try first - (the override_encodings argument to the constructor). + (the known_definite_encodings argument to the constructor). - 2. An encoding declared within the bytestring itself, either in an + 2. An encoding determined by sniffing the document's byte-order mark. + + 3. Encodings you specifically tell EncodingDetector to try if + byte-order mark sniffing fails (the user_encodings argument to the + constructor). + + 4. An encoding declared within the bytestring itself, either in an XML declaration (if the bytestring is to be interpreted as an XML document), or in a tag (if the bytestring is to be interpreted as an HTML document.) - 3. An encoding detected through textual analysis by chardet, + 5. An encoding detected through textual analysis by chardet, cchardet, or a similar external library. 4. UTF-8. 5. Windows-1252. + """ - def __init__(self, markup, override_encodings=None, is_html=False, - exclude_encodings=None): - self.override_encodings = override_encodings or [] + def __init__(self, markup, known_definite_encodings=None, + is_html=False, exclude_encodings=None, + user_encodings=None, override_encodings=None): + """Constructor. + + :param markup: Some markup in an unknown encoding. + + :param known_definite_encodings: When determining the encoding + of `markup`, these encodings will be tried first, in + order. In HTML terms, this corresponds to the "known + definite encoding" step defined here: + https://html.spec.whatwg.org/multipage/parsing.html#parsing-with-a-known-character-encoding + + :param user_encodings: These encodings will be tried after the + `known_definite_encodings` have been tried and failed, and + after an attempt to sniff the encoding by looking at a + byte order mark has failed. In HTML terms, this + corresponds to the step "user has explicitly instructed + the user agent to override the document's character + encoding", defined here: + https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding + + :param override_encodings: A deprecated alias for + known_definite_encodings. Any encodings here will be tried + immediately after the encodings in + known_definite_encodings. + + :param is_html: If True, this markup is considered to be + HTML. Otherwise it's assumed to be XML. + + :param exclude_encodings: These encodings will not be tried, + even if they otherwise would be. + + """ + self.known_definite_encodings = list(known_definite_encodings or []) + if override_encodings: + self.known_definite_encodings += override_encodings + self.user_encodings = user_encodings or [] exclude_encodings = exclude_encodings or [] self.exclude_encodings = set([x.lower() for x in exclude_encodings]) self.chardet_encoding = None @@ -226,6 +2632,12 @@ class EncodingDetector: self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup) def _usable(self, encoding, tried): + """Should we even bother to try this encoding? + + :param encoding: Name of an encoding. + :param tried: Encodings that have already been tried. This will be modified + as a side effect. + """ if encoding is not None: encoding = encoding.lower() if encoding in self.exclude_encodings: @@ -237,9 +2649,14 @@ class EncodingDetector: @property def encodings(self): - """Yield a number of encodings that might work for this markup.""" + """Yield a number of encodings that might work for this markup. + + :yield: A sequence of strings. + """ tried = set() - for e in self.override_encodings: + + # First, try the known definite encodings + for e in self.known_definite_encodings: if self._usable(e, tried): yield e @@ -248,6 +2665,12 @@ class EncodingDetector: if self._usable(self.sniffed_encoding, tried): yield self.sniffed_encoding + # Sniffing the byte-order mark did nothing; try the user + # encodings. + for e in self.user_encodings: + if self._usable(e, tried): + yield e + # Look within the document for an XML or HTML encoding # declaration. if self.declared_encoding is None: @@ -270,9 +2693,13 @@ class EncodingDetector: @classmethod def strip_byte_order_mark(cls, data): - """If a byte-order mark is present, strip it and return the encoding it implies.""" + """If a byte-order mark is present, strip it and return the encoding it implies. + + :param data: Some markup. + :return: A 2-tuple (modified data, implied encoding) + """ encoding = None - if isinstance(data, unicode): + if isinstance(data, str): # Unicode data cannot have a byte-order mark. return data, encoding if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ @@ -302,21 +2729,36 @@ class EncodingDetector: An HTML encoding is declared in a tag, hopefully near the beginning of the document. + + :param markup: Some markup. + :param is_html: If True, this markup is considered to be HTML. Otherwise + it's assumed to be XML. + :param search_entire_document: Since an encoding is supposed to declared near the beginning + of the document, most of the time it's only necessary to search a few kilobytes of data. + Set this to True to force this method to search the entire document. """ if search_entire_document: xml_endpos = html_endpos = len(markup) else: xml_endpos = 1024 html_endpos = max(2048, int(len(markup) * 0.05)) - + + if isinstance(markup, bytes): + res = encoding_res[bytes] + else: + res = encoding_res[str] + + xml_re = res['xml'] + html_re = res['html'] declared_encoding = None - declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos) + declared_encoding_match = xml_re.search(markup, endpos=xml_endpos) if not declared_encoding_match and is_html: - declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos) + declared_encoding_match = html_re.search(markup, endpos=html_endpos) if declared_encoding_match is not None: - declared_encoding = declared_encoding_match.groups()[0].decode( - 'ascii', 'replace') + declared_encoding = declared_encoding_match.groups()[0] if declared_encoding: + if isinstance(declared_encoding, bytes): + declared_encoding = declared_encoding.decode('ascii', 'replace') return declared_encoding.lower() return None @@ -339,20 +2781,58 @@ class UnicodeDammit: "iso-8859-2", ] - def __init__(self, markup, override_encodings=[], - smart_quotes_to=None, is_html=False, exclude_encodings=[]): + def __init__(self, markup, known_definite_encodings=[], + smart_quotes_to=None, is_html=False, exclude_encodings=[], + user_encodings=None, override_encodings=None + ): + """Constructor. + + :param markup: A bytestring representing markup in an unknown encoding. + + :param known_definite_encodings: When determining the encoding + of `markup`, these encodings will be tried first, in + order. In HTML terms, this corresponds to the "known + definite encoding" step defined here: + https://html.spec.whatwg.org/multipage/parsing.html#parsing-with-a-known-character-encoding + + :param user_encodings: These encodings will be tried after the + `known_definite_encodings` have been tried and failed, and + after an attempt to sniff the encoding by looking at a + byte order mark has failed. In HTML terms, this + corresponds to the step "user has explicitly instructed + the user agent to override the document's character + encoding", defined here: + https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding + + :param override_encodings: A deprecated alias for + known_definite_encodings. Any encodings here will be tried + immediately after the encodings in + known_definite_encodings. + + :param smart_quotes_to: By default, Microsoft smart quotes will, like all other characters, be converted + to Unicode characters. Setting this to 'ascii' will convert them to ASCII quotes instead. + Setting it to 'xml' will convert them to XML entity references, and setting it to 'html' + will convert them to HTML entity references. + :param is_html: If True, this markup is considered to be HTML. Otherwise + it's assumed to be XML. + :param exclude_encodings: These encodings will not be considered, even + if the sniffing code thinks they might make sense. + + """ self.smart_quotes_to = smart_quotes_to self.tried_encodings = [] self.contains_replacement_characters = False self.is_html = is_html - + self.log = logging.getLogger(__name__) self.detector = EncodingDetector( - markup, override_encodings, is_html, exclude_encodings) + markup, known_definite_encodings, is_html, exclude_encodings, + user_encodings, override_encodings + ) # Short-circuit if the data is in Unicode to begin with. - if isinstance(markup, unicode) or markup == '': + if isinstance(markup, str) or markup == '': self.markup = markup - self.unicode_markup = unicode(markup) + self.unicode_markup = str(markup) self.original_encoding = None return @@ -375,9 +2855,10 @@ class UnicodeDammit: if encoding != "ascii": u = self._convert_from(encoding, "replace") if u is not None: - logging.warning( + self.log.warning( "Some characters could not be decoded, and were " - "replaced with REPLACEMENT CHARACTER.") + "replaced with REPLACEMENT CHARACTER." + ) self.contains_replacement_characters = True break @@ -406,6 +2887,10 @@ class UnicodeDammit: return sub def _convert_from(self, proposed, errors="strict"): + """Attempt to convert the markup to the proposed encoding. + + :param proposed: The name of a character encoding. + """ proposed = self.find_codec(proposed) if not proposed or (proposed, errors) in self.tried_encodings: return None @@ -420,30 +2905,40 @@ class UnicodeDammit: markup = smart_quotes_compiled.sub(self._sub_ms_char, markup) try: - #print "Trying to convert document to %s (errors=%s)" % ( - # proposed, errors) + #print("Trying to convert document to %s (errors=%s)" % ( + # proposed, errors)) u = self._to_unicode(markup, proposed, errors) self.markup = u self.original_encoding = proposed except Exception as e: - #print "That didn't work!" - #print e + #print("That didn't work!") + #print(e) return None - #print "Correct encoding: %s" % proposed + #print("Correct encoding: %s" % proposed) return self.markup def _to_unicode(self, data, encoding, errors="strict"): - '''Given a string and its encoding, decodes the string into Unicode. - %encoding is a string recognized by encodings.aliases''' - return unicode(data, encoding, errors) + """Given a string and its encoding, decodes the string into Unicode. + + :param encoding: The name of an encoding. + """ + return str(data, encoding, errors) @property def declared_html_encoding(self): + """If the markup is an HTML document, returns the encoding declared _within_ + the document. + """ if not self.is_html: return None return self.detector.declared_encoding def find_codec(self, charset): + """Convert the name of a character set to a codec name. + + :param charset: The name of a character set. + :return: The name of a codec. + """ value = (self._codec(self.CHARSET_ALIASES.get(charset, charset)) or (charset and self._codec(charset.replace("-", ""))) or (charset and self._codec(charset.replace("-", "_"))) @@ -733,7 +3228,7 @@ class UnicodeDammit: 0xde : b'\xc3\x9e', # Þ 0xdf : b'\xc3\x9f', # ß 0xe0 : b'\xc3\xa0', # à - 0xe1 : b'\xa1', # á + 0xe1 : b'\xa1', # á 0xe2 : b'\xc3\xa2', # â 0xe3 : b'\xc3\xa3', # ã 0xe4 : b'\xc3\xa4', # ä @@ -782,12 +3277,16 @@ class UnicodeDammit: Currently the only situation supported is Windows-1252 (or its subset ISO-8859-1), embedded in UTF-8. - The input must be a bytestring. If you've already converted - the document to Unicode, you're too late. - - The output is a bytestring in which `embedded_encoding` - characters have been converted to their `main_encoding` - equivalents. + :param in_bytes: A bytestring that you suspect contains + characters from multiple encodings. Note that this _must_ + be a bytestring. If you've already converted the document + to Unicode, you're too late. + :param main_encoding: The primary encoding of `in_bytes`. + :param embedded_encoding: The encoding that was used to embed characters + in the main document. + :return: A bytestring in which `embedded_encoding` + characters have been converted to their `main_encoding` + equivalents. """ if embedded_encoding.replace('_', '-').lower() not in ( 'windows-1252', 'windows_1252'): diff --git a/lib/bs4/diagnose.py b/lib/bs4/diagnose.py index 1b719830..500e92df 100644 --- a/lib/bs4/diagnose.py +++ b/lib/bs4/diagnose.py @@ -1,7 +1,11 @@ """Diagnostic functions, mainly for use when doing tech support.""" + +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + import cProfile -from StringIO import StringIO -from HTMLParser import HTMLParser +from io import StringIO +from html.parser import HTMLParser import bs4 from bs4 import BeautifulSoup, __version__ from bs4.builder import builder_registry @@ -16,9 +20,13 @@ import sys import cProfile def diagnose(data): - """Diagnostic suite for isolating common problems.""" - print "Diagnostic running on Beautiful Soup %s" % __version__ - print "Python version %s" % sys.version + """Diagnostic suite for isolating common problems. + + :param data: A string containing markup that needs to be explained. + :return: None; diagnostics are printed to standard output. + """ + print(("Diagnostic running on Beautiful Soup %s" % __version__)) + print(("Python version %s" % sys.version)) basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: @@ -27,66 +35,84 @@ def diagnose(data): break else: basic_parsers.remove(name) - print ( + print(( "I noticed that %s is not installed. Installing it may help." % - name) + name)) if 'lxml' in basic_parsers: - basic_parsers.append(["lxml", "xml"]) + basic_parsers.append("lxml-xml") try: from lxml import etree - print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) - except ImportError, e: - print ( + print(("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))) + except ImportError as e: + print( "lxml is not installed or couldn't be imported.") if 'html5lib' in basic_parsers: try: import html5lib - print "Found html5lib version %s" % html5lib.__version__ - except ImportError, e: - print ( + print(("Found html5lib version %s" % html5lib.__version__)) + except ImportError as e: + print( "html5lib is not installed or couldn't be imported.") if hasattr(data, 'read'): data = data.read() - elif os.path.exists(data): - print '"%s" looks like a filename. Reading data from the file.' % data - data = open(data).read() elif data.startswith("http:") or data.startswith("https:"): - print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data - print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." + print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)) + print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") return - print + else: + try: + if os.path.exists(data): + print(('"%s" looks like a filename. Reading data from the file.' % data)) + with open(data) as fp: + data = fp.read() + except ValueError: + # This can happen on some platforms when the 'filename' is + # too long. Assume it's data and not a filename. + pass + print("") for parser in basic_parsers: - print "Trying to parse your markup with %s" % parser + print(("Trying to parse your markup with %s" % parser)) success = False try: - soup = BeautifulSoup(data, parser) + soup = BeautifulSoup(data, features=parser) success = True - except Exception, e: - print "%s could not parse the markup." % parser + except Exception as e: + print(("%s could not parse the markup." % parser)) traceback.print_exc() if success: - print "Here's what %s did with the markup:" % parser - print soup.prettify() + print(("Here's what %s did with the markup:" % parser)) + print((soup.prettify())) - print "-" * 80 + print(("-" * 80)) def lxml_trace(data, html=True, **kwargs): """Print out the lxml events that occur during parsing. This lets you see how lxml parses a document when no Beautiful - Soup code is running. + Soup code is running. You can use this to determine whether + an lxml-specific problem is in Beautiful Soup's lxml tree builders + or in lxml itself. + + :param data: Some markup. + :param html: If True, markup will be parsed with lxml's HTML parser. + if False, lxml's XML parser will be used. """ from lxml import etree for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): - print("%s, %4s, %s" % (event, element.tag, element.text)) + print(("%s, %4s, %s" % (event, element.tag, element.text))) class AnnouncingParser(HTMLParser): - """Announces HTMLParser parse events, without doing anything else.""" + """Subclass of HTMLParser that announces parse events, without doing + anything else. + + You can use this to get a picture of how html.parser sees a given + document. The easiest way to do this is to call `htmlparser_trace`. + """ def _p(self, s): print(s) @@ -123,6 +149,8 @@ def htmlparser_trace(data): This lets you see how HTMLParser parses a document when no Beautiful Soup code is running. + + :param data: Some markup. """ parser = AnnouncingParser() parser.feed(data) @@ -165,9 +193,9 @@ def rdoc(num_elements=1000): def benchmark_parsers(num_elements=100000): """Very basic head-to-head performance benchmark.""" - print "Comparative parser benchmark on Beautiful Soup %s" % __version__ + print(("Comparative parser benchmark on Beautiful Soup %s" % __version__)) data = rdoc(num_elements) - print "Generated a large invalid HTML document (%d bytes)." % len(data) + print(("Generated a large invalid HTML document (%d bytes)." % len(data))) for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: success = False @@ -176,27 +204,27 @@ def benchmark_parsers(num_elements=100000): soup = BeautifulSoup(data, parser) b = time.time() success = True - except Exception, e: - print "%s could not parse the markup." % parser + except Exception as e: + print(("%s could not parse the markup." % parser)) traceback.print_exc() if success: - print "BS4+%s parsed the markup in %.2fs." % (parser, b-a) + print(("BS4+%s parsed the markup in %.2fs." % (parser, b-a))) from lxml import etree a = time.time() etree.HTML(data) b = time.time() - print "Raw lxml parsed the markup in %.2fs." % (b-a) + print(("Raw lxml parsed the markup in %.2fs." % (b-a))) import html5lib parser = html5lib.HTMLParser() a = time.time() parser.parse(data) b = time.time() - print "Raw html5lib parsed the markup in %.2fs." % (b-a) + print(("Raw html5lib parsed the markup in %.2fs." % (b-a))) def profile(num_elements=100000, parser="lxml"): - + """Use Python's profiler on a randomly generated document.""" filehandle = tempfile.NamedTemporaryFile() filename = filehandle.name @@ -209,5 +237,6 @@ def profile(num_elements=100000, parser="lxml"): stats.sort_stats("cumulative") stats.print_stats('_html5lib|bs4', 50) +# If this file is run as a script, standard input is diagnosed. if __name__ == '__main__': diagnose(sys.stdin.read()) diff --git a/lib/bs4/element.py b/lib/bs4/element.py index c70ad5a0..82a986e4 100644 --- a/lib/bs4/element.py +++ b/lib/bs4/element.py @@ -1,14 +1,35 @@ -from pdb import set_trace -import collections +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + +try: + from collections.abc import Callable # Python 3.6 +except ImportError as e: + from collections import Callable import re import sys import warnings -from bs4.dammit import EntitySubstitution +try: + import soupsieve +except ImportError as e: + soupsieve = None + warnings.warn( + 'The soupsieve package is not installed. CSS selectors cannot be used.' + ) + +from bs4.formatter import ( + Formatter, + HTMLFormatter, + XMLFormatter, +) DEFAULT_OUTPUT_ENCODING = "utf-8" PY3K = (sys.version_info[0] > 2) -whitespace_re = re.compile("\s+") +nonwhitespace_re = re.compile(r"\S+") + +# NOTE: This isn't used as of 4.7.0. I'm leaving it for a little bit on +# the off chance someone imported it for their own use. +whitespace_re = re.compile(r"\s+") def _alias(attr): """Alias one attribute name to another for backward compatibility""" @@ -22,22 +43,59 @@ def _alias(attr): return alias -class NamespacedAttribute(unicode): +# These encodings are recognized by Python (so PageElement.encode +# could theoretically support them) but XML and HTML don't recognize +# them (so they should not show up in an XML or HTML document as that +# document's encoding). +# +# If an XML document is encoded in one of these encodings, no encoding +# will be mentioned in the XML declaration. If an HTML document is +# encoded in one of these encodings, and the HTML document has a +# tag that mentions an encoding, the encoding will be given as +# the empty string. +# +# Source: +# https://docs.python.org/3/library/codecs.html#python-specific-encodings +PYTHON_SPECIFIC_ENCODINGS = set([ + "idna", + "mbcs", + "oem", + "palmos", + "punycode", + "raw_unicode_escape", + "undefined", + "unicode_escape", + "raw-unicode-escape", + "unicode-escape", + "string-escape", + "string_escape", +]) + - def __new__(cls, prefix, name, namespace=None): - if name is None: - obj = unicode.__new__(cls, prefix) - elif prefix is None: +class NamespacedAttribute(str): + """A namespaced string (e.g. 'xml:lang') that remembers the namespace + ('xml') and the name ('lang') that were used to create it. + """ + + def __new__(cls, prefix, name=None, namespace=None): + if not name: + # This is the default namespace. Its name "has no value" + # per https://www.w3.org/TR/xml-names/#defaulting + name = None + + if not name: + obj = str.__new__(cls, prefix) + elif not prefix: # Not really namespaced. - obj = unicode.__new__(cls, name) + obj = str.__new__(cls, name) else: - obj = unicode.__new__(cls, prefix + ":" + name) + obj = str.__new__(cls, prefix + ":" + name) obj.prefix = prefix obj.name = name obj.namespace = namespace return obj -class AttributeValueWithCharsetSubstitution(unicode): +class AttributeValueWithCharsetSubstitution(str): """A stand-in object for a character encoding specified in HTML.""" class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): @@ -48,11 +106,16 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): """ def __new__(cls, original_value): - obj = unicode.__new__(cls, original_value) + obj = str.__new__(cls, original_value) obj.original_value = original_value return obj def encode(self, encoding): + """When an HTML document is being encoded to a given encoding, the + value of a meta tag's 'charset' is the name of the encoding. + """ + if encoding in PYTHON_SPECIFIC_ENCODINGS: + return '' return encoding @@ -65,131 +128,52 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): The value of the 'content' attribute will be one of these objects. """ - CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M) def __new__(cls, original_value): match = cls.CHARSET_RE.search(original_value) if match is None: # No substitution necessary. - return unicode.__new__(unicode, original_value) + return str.__new__(str, original_value) - obj = unicode.__new__(cls, original_value) + obj = str.__new__(cls, original_value) obj.original_value = original_value return obj def encode(self, encoding): + if encoding in PYTHON_SPECIFIC_ENCODINGS: + return '' def rewrite(match): return match.group(1) + encoding return self.CHARSET_RE.sub(rewrite, self.original_value) -class HTMLAwareEntitySubstitution(EntitySubstitution): - - """Entity substitution rules that are aware of some HTML quirks. - - Specifically, the contents of ') - # => <script> do_nasty_stuff() </script> - # sanitize_html('Click here for $100') - # => Click here for $100 - def sanitize_token(self, token): - - # accommodate filters which use token_type differently - token_type = token["type"] - if token_type in list(tokenTypes.keys()): - token_type = tokenTypes[token_type] - - if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], - tokenTypes["EmptyTag"]): - if token["name"] in self.allowed_elements: - return self.allowed_token(token, token_type) - else: - return self.disallowed_token(token, token_type) - elif token_type == tokenTypes["Comment"]: - pass - else: - return token - - def allowed_token(self, token, token_type): - if "data" in token: - attrs = dict([(name, val) for name, val in - token["data"][::-1] - if name in self.allowed_attributes]) - for attr in self.attr_val_is_uri: - if attr not in attrs: - continue - val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', - unescape(attrs[attr])).lower() - # remove replacement characters from unescaped characters - val_unescaped = val_unescaped.replace("\ufffd", "") - uri = urlparse.urlparse(val_unescaped) - if uri and uri.scheme: - if uri.scheme not in self.allowed_protocols: - del attrs[attr] - if uri.scheme == 'data': - m = content_type_rgx.match(uri.path) - if not m: - del attrs[attr] - elif m.group('content_type') not in self.allowed_content_types: - del attrs[attr] - - for attr in self.svg_attr_val_allows_ref: - if attr in attrs: - attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', - ' ', - unescape(attrs[attr])) - if (token["name"] in self.svg_allow_local_href and - 'xlink:href' in attrs and re.search('^\s*[^#\s].*', - attrs['xlink:href'])): - del attrs['xlink:href'] - if 'style' in attrs: - attrs['style'] = self.sanitize_css(attrs['style']) - token["data"] = [[name, val] for name, val in list(attrs.items())] - return token - - def disallowed_token(self, token, token_type): - if token_type == tokenTypes["EndTag"]: - token["data"] = "" % token["name"] - elif token["data"]: - attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]]) - token["data"] = "<%s%s>" % (token["name"], attrs) - else: - token["data"] = "<%s>" % token["name"] - if token.get("selfClosing"): - token["data"] = token["data"][:-1] + "/>" - - if token["type"] in list(tokenTypes.keys()): - token["type"] = "Characters" - else: - token["type"] = tokenTypes["Characters"] - - del token["name"] - return token - - def sanitize_css(self, style): - # disallow urls - style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) - - # gauntlet - if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): - return '' - if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): - return '' - - clean = [] - for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style): - if not value: - continue - if prop.lower() in self.allowed_css_properties: - clean.append(prop + ': ' + value + ';') - elif prop.split('-')[0].lower() in ['background', 'border', 'margin', - 'padding']: - for keyword in value.split(): - if keyword not in self.acceptable_css_keywords and \ - not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): - break - else: - clean.append(prop + ': ' + value + ';') - elif prop.lower() in self.allowed_svg_properties: - clean.append(prop + ': ' + value + ';') - - return ' '.join(clean) - - -class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin): - def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True, - lowercaseElementName=False, lowercaseAttrName=False, parser=None): - # Change case matching defaults as we only output lowercase html anyway - # This solution doesn't seem ideal... - HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet, - lowercaseElementName, lowercaseAttrName, parser=parser) - - def __iter__(self): - for token in HTMLTokenizer.__iter__(self): - token = self.sanitize_token(token) - if token: - yield token diff --git a/lib/html5lib/serializer/__init__.py b/lib/html5lib/serializer/__init__.py deleted file mode 100644 index 8380839a..00000000 --- a/lib/html5lib/serializer/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from .. import treewalkers - -from .htmlserializer import HTMLSerializer - - -def serialize(input, tree="etree", format="html", encoding=None, - **serializer_opts): - # XXX: Should we cache this? - walker = treewalkers.getTreeWalker(tree) - if format == "html": - s = HTMLSerializer(**serializer_opts) - else: - raise ValueError("type must be html") - return s.render(walker(input), encoding) diff --git a/lib/html5lib/serializer/htmlserializer.py b/lib/html5lib/serializer/htmlserializer.py deleted file mode 100644 index be4d6344..00000000 --- a/lib/html5lib/serializer/htmlserializer.py +++ /dev/null @@ -1,317 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from six import text_type - -try: - from functools import reduce -except ImportError: - pass - -from ..constants import voidElements, booleanAttributes, spaceCharacters -from ..constants import rcdataElements, entities, xmlEntities -from .. import utils -from xml.sax.saxutils import escape - -spaceCharacters = "".join(spaceCharacters) - -try: - from codecs import register_error, xmlcharrefreplace_errors -except ImportError: - unicode_encode_errors = "strict" -else: - unicode_encode_errors = "htmlentityreplace" - - encode_entity_map = {} - is_ucs4 = len("\U0010FFFF") == 1 - for k, v in list(entities.items()): - # skip multi-character entities - if ((is_ucs4 and len(v) > 1) or - (not is_ucs4 and len(v) > 2)): - continue - if v != "&": - if len(v) == 2: - v = utils.surrogatePairToCodepoint(v) - else: - v = ord(v) - if v not in encode_entity_map or k.islower(): - # prefer < over < and similarly for &, >, etc. - encode_entity_map[v] = k - - def htmlentityreplace_errors(exc): - if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): - res = [] - codepoints = [] - skip = False - for i, c in enumerate(exc.object[exc.start:exc.end]): - if skip: - skip = False - continue - index = i + exc.start - if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): - codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2]) - skip = True - else: - codepoint = ord(c) - codepoints.append(codepoint) - for cp in codepoints: - e = encode_entity_map.get(cp) - if e: - res.append("&") - res.append(e) - if not e.endswith(";"): - res.append(";") - else: - res.append("&#x%s;" % (hex(cp)[2:])) - return ("".join(res), exc.end) - else: - return xmlcharrefreplace_errors(exc) - - register_error(unicode_encode_errors, htmlentityreplace_errors) - - del register_error - - -class HTMLSerializer(object): - - # attribute quoting options - quote_attr_values = False - quote_char = '"' - use_best_quote_char = True - - # tag syntax options - omit_optional_tags = True - minimize_boolean_attributes = True - use_trailing_solidus = False - space_before_trailing_solidus = True - - # escaping options - escape_lt_in_attrs = False - escape_rcdata = False - resolve_entities = True - - # miscellaneous options - alphabetical_attributes = False - inject_meta_charset = True - strip_whitespace = False - sanitize = False - - options = ("quote_attr_values", "quote_char", "use_best_quote_char", - "omit_optional_tags", "minimize_boolean_attributes", - "use_trailing_solidus", "space_before_trailing_solidus", - "escape_lt_in_attrs", "escape_rcdata", "resolve_entities", - "alphabetical_attributes", "inject_meta_charset", - "strip_whitespace", "sanitize") - - def __init__(self, **kwargs): - """Initialize HTMLSerializer. - - Keyword options (default given first unless specified) include: - - inject_meta_charset=True|False - Whether it insert a meta element to define the character set of the - document. - quote_attr_values=True|False - Whether to quote attribute values that don't require quoting - per HTML5 parsing rules. - quote_char=u'"'|u"'" - Use given quote character for attribute quoting. Default is to - use double quote unless attribute value contains a double quote, - in which case single quotes are used instead. - escape_lt_in_attrs=False|True - Whether to escape < in attribute values. - escape_rcdata=False|True - Whether to escape characters that need to be escaped within normal - elements within rcdata elements such as style. - resolve_entities=True|False - Whether to resolve named character entities that appear in the - source tree. The XML predefined entities < > & " ' - are unaffected by this setting. - strip_whitespace=False|True - Whether to remove semantically meaningless whitespace. (This - compresses all whitespace to a single space except within pre.) - minimize_boolean_attributes=True|False - Shortens boolean attributes to give just the attribute value, - for example becomes . - use_trailing_solidus=False|True - Includes a close-tag slash at the end of the start tag of void - elements (empty elements whose end tag is forbidden). E.g.
. - space_before_trailing_solidus=True|False - Places a space immediately before the closing slash in a tag - using a trailing solidus. E.g.
. Requires use_trailing_solidus. - sanitize=False|True - Strip all unsafe or unknown constructs from output. - See `html5lib user documentation`_ - omit_optional_tags=True|False - Omit start/end tags that are optional. - alphabetical_attributes=False|True - Reorder attributes to be in alphabetical order. - - .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation - """ - if 'quote_char' in kwargs: - self.use_best_quote_char = False - for attr in self.options: - setattr(self, attr, kwargs.get(attr, getattr(self, attr))) - self.errors = [] - self.strict = False - - def encode(self, string): - assert(isinstance(string, text_type)) - if self.encoding: - return string.encode(self.encoding, unicode_encode_errors) - else: - return string - - def encodeStrict(self, string): - assert(isinstance(string, text_type)) - if self.encoding: - return string.encode(self.encoding, "strict") - else: - return string - - def serialize(self, treewalker, encoding=None): - self.encoding = encoding - in_cdata = False - self.errors = [] - - if encoding and self.inject_meta_charset: - from ..filters.inject_meta_charset import Filter - treewalker = Filter(treewalker, encoding) - # WhitespaceFilter should be used before OptionalTagFilter - # for maximum efficiently of this latter filter - if self.strip_whitespace: - from ..filters.whitespace import Filter - treewalker = Filter(treewalker) - if self.sanitize: - from ..filters.sanitizer import Filter - treewalker = Filter(treewalker) - if self.omit_optional_tags: - from ..filters.optionaltags import Filter - treewalker = Filter(treewalker) - # Alphabetical attributes must be last, as other filters - # could add attributes and alter the order - if self.alphabetical_attributes: - from ..filters.alphabeticalattributes import Filter - treewalker = Filter(treewalker) - - for token in treewalker: - type = token["type"] - if type == "Doctype": - doctype = "= 0: - if token["systemId"].find("'") >= 0: - self.serializeError("System identifer contains both single and double quote characters") - quote_char = "'" - else: - quote_char = '"' - doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char) - - doctype += ">" - yield self.encodeStrict(doctype) - - elif type in ("Characters", "SpaceCharacters"): - if type == "SpaceCharacters" or in_cdata: - if in_cdata and token["data"].find("= 0: - self.serializeError("Unexpected \"'=", False) - v = v.replace("&", "&") - if self.escape_lt_in_attrs: - v = v.replace("<", "<") - if quote_attr: - quote_char = self.quote_char - if self.use_best_quote_char: - if "'" in v and '"' not in v: - quote_char = '"' - elif '"' in v and "'" not in v: - quote_char = "'" - if quote_char == "'": - v = v.replace("'", "'") - else: - v = v.replace('"', """) - yield self.encodeStrict(quote_char) - yield self.encode(v) - yield self.encodeStrict(quote_char) - else: - yield self.encode(v) - if name in voidElements and self.use_trailing_solidus: - if self.space_before_trailing_solidus: - yield self.encodeStrict(" /") - else: - yield self.encodeStrict("/") - yield self.encode(">") - - elif type == "EndTag": - name = token["name"] - if name in rcdataElements: - in_cdata = False - elif in_cdata: - self.serializeError("Unexpected child element of a CDATA element") - yield self.encodeStrict("" % name) - - elif type == "Comment": - data = token["data"] - if data.find("--") >= 0: - self.serializeError("Comment contains --") - yield self.encodeStrict("" % token["data"]) - - elif type == "Entity": - name = token["name"] - key = name + ";" - if key not in entities: - self.serializeError("Entity %s not recognized" % name) - if self.resolve_entities and key not in xmlEntities: - data = entities[key] - else: - data = "&%s;" % name - yield self.encodeStrict(data) - - else: - self.serializeError(token["data"]) - - def render(self, treewalker, encoding=None): - if encoding: - return b"".join(list(self.serialize(treewalker, encoding))) - else: - return "".join(list(self.serialize(treewalker))) - - def serializeError(self, data="XXX ERROR MESSAGE NEEDED"): - # XXX The idea is to make data mandatory. - self.errors.append(data) - if self.strict: - raise SerializeError - - -def SerializeError(Exception): - """Error in serialized tree""" - pass diff --git a/lib/html5lib/tokenizer.py b/lib/html5lib/tokenizer.py deleted file mode 100644 index 79774578..00000000 --- a/lib/html5lib/tokenizer.py +++ /dev/null @@ -1,1731 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -try: - chr = unichr # flake8: noqa -except NameError: - pass - -from collections import deque - -from .constants import spaceCharacters -from .constants import entities -from .constants import asciiLetters, asciiUpper2Lower -from .constants import digits, hexDigits, EOF -from .constants import tokenTypes, tagTokenTypes -from .constants import replacementCharacters - -from .inputstream import HTMLInputStream - -from .trie import Trie - -entitiesTrie = Trie(entities) - - -class HTMLTokenizer(object): - """ This class takes care of tokenizing HTML. - - * self.currentToken - Holds the token that is currently being processed. - - * self.state - Holds a reference to the method to be invoked... XXX - - * self.stream - Points to HTMLInputStream object. - """ - - def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True, - lowercaseElementName=True, lowercaseAttrName=True, parser=None): - - self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet) - self.parser = parser - - # Perform case conversions? - self.lowercaseElementName = lowercaseElementName - self.lowercaseAttrName = lowercaseAttrName - - # Setup the initial tokenizer state - self.escapeFlag = False - self.lastFourChars = [] - self.state = self.dataState - self.escape = False - - # The current token being created - self.currentToken = None - super(HTMLTokenizer, self).__init__() - - def __iter__(self): - """ This is where the magic happens. - - We do our usually processing through the states and when we have a token - to return we yield the token which pauses processing until the next token - is requested. - """ - self.tokenQueue = deque([]) - # Start processing. When EOF is reached self.state will return False - # instead of True and the loop will terminate. - while self.state(): - while self.stream.errors: - yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)} - while self.tokenQueue: - yield self.tokenQueue.popleft() - - def consumeNumberEntity(self, isHex): - """This function returns either U+FFFD or the character based on the - decimal or hexadecimal representation. It also discards ";" if present. - If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. - """ - - allowed = digits - radix = 10 - if isHex: - allowed = hexDigits - radix = 16 - - charStack = [] - - # Consume all the characters that are in range while making sure we - # don't hit an EOF. - c = self.stream.char() - while c in allowed and c is not EOF: - charStack.append(c) - c = self.stream.char() - - # Convert the set of characters consumed to an int. - charAsInt = int("".join(charStack), radix) - - # Certain characters get replaced with others - if charAsInt in replacementCharacters: - char = replacementCharacters[charAsInt] - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - elif ((0xD800 <= charAsInt <= 0xDFFF) or - (charAsInt > 0x10FFFF)): - char = "\uFFFD" - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - else: - # Should speed up this check somehow (e.g. move the set to a constant) - if ((0x0001 <= charAsInt <= 0x0008) or - (0x000E <= charAsInt <= 0x001F) or - (0x007F <= charAsInt <= 0x009F) or - (0xFDD0 <= charAsInt <= 0xFDEF) or - charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, - 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, - 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, - 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, - 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, - 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, - 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, - 0xFFFFF, 0x10FFFE, 0x10FFFF])): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - try: - # Try/except needed as UCS-2 Python builds' unichar only works - # within the BMP. - char = chr(charAsInt) - except ValueError: - v = charAsInt - 0x10000 - char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) - - # Discard the ; if present. Otherwise, put it back on the queue and - # invoke parseError on parser. - if c != ";": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "numeric-entity-without-semicolon"}) - self.stream.unget(c) - - return char - - def consumeEntity(self, allowedChar=None, fromAttribute=False): - # Initialise to the default output for when no entity is matched - output = "&" - - charStack = [self.stream.char()] - if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") - or (allowedChar is not None and allowedChar == charStack[0])): - self.stream.unget(charStack[0]) - - elif charStack[0] == "#": - # Read the next character to see if it's hex or decimal - hex = False - charStack.append(self.stream.char()) - if charStack[-1] in ("x", "X"): - hex = True - charStack.append(self.stream.char()) - - # charStack[-1] should be the first digit - if (hex and charStack[-1] in hexDigits) \ - or (not hex and charStack[-1] in digits): - # At least one digit found, so consume the whole number - self.stream.unget(charStack[-1]) - output = self.consumeNumberEntity(hex) - else: - # No digits found - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "expected-numeric-entity"}) - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - - else: - # At this point in the process might have named entity. Entities - # are stored in the global variable "entities". - # - # Consume characters and compare to these to a substring of the - # entity names in the list until the substring no longer matches. - while (charStack[-1] is not EOF): - if not entitiesTrie.has_keys_with_prefix("".join(charStack)): - break - charStack.append(self.stream.char()) - - # At this point we have a string that starts with some characters - # that may match an entity - # Try to find the longest entity the string will match to take care - # of ¬i for instance. - try: - entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) - entityLength = len(entityName) - except KeyError: - entityName = None - - if entityName is not None: - if entityName[-1] != ";": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "named-entity-without-semicolon"}) - if (entityName[-1] != ";" and fromAttribute and - (charStack[entityLength] in asciiLetters or - charStack[entityLength] in digits or - charStack[entityLength] == "=")): - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - else: - output = entities[entityName] - self.stream.unget(charStack.pop()) - output += "".join(charStack[entityLength:]) - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-named-entity"}) - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - - if fromAttribute: - self.currentToken["data"][-1][1] += output - else: - if output in spaceCharacters: - tokenType = "SpaceCharacters" - else: - tokenType = "Characters" - self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) - - def processEntityInAttribute(self, allowedChar): - """This method replaces the need for "entityInAttributeValueState". - """ - self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) - - def emitCurrentToken(self): - """This method is a generic handler for emitting the tags. It also sets - the state to "data" because that's what's needed after a token has been - emitted. - """ - token = self.currentToken - # Add token to the queue to be yielded - if (token["type"] in tagTokenTypes): - if self.lowercaseElementName: - token["name"] = token["name"].translate(asciiUpper2Lower) - if token["type"] == tokenTypes["EndTag"]: - if token["data"]: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "attributes-in-end-tag"}) - if token["selfClosing"]: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "self-closing-flag-on-end-tag"}) - self.tokenQueue.append(token) - self.state = self.dataState - - # Below are the various tokenizer states worked out. - def dataState(self): - data = self.stream.char() - if data == "&": - self.state = self.entityDataState - elif data == "<": - self.state = self.tagOpenState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\u0000"}) - elif data is EOF: - # Tokenization ends. - return False - elif data in spaceCharacters: - # Directly after emitting a token you switch back to the "data - # state". At that point spaceCharacters are important so they are - # emitted separately. - self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": - data + self.stream.charsUntil(spaceCharacters, True)}) - # No need to update lastFourChars here, since the first space will - # have already been appended to lastFourChars and will have broken - # any sequences - else: - chars = self.stream.charsUntil(("&", "<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def entityDataState(self): - self.consumeEntity() - self.state = self.dataState - return True - - def rcdataState(self): - data = self.stream.char() - if data == "&": - self.state = self.characterReferenceInRcdata - elif data == "<": - self.state = self.rcdataLessThanSignState - elif data == EOF: - # Tokenization ends. - return False - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data in spaceCharacters: - # Directly after emitting a token you switch back to the "data - # state". At that point spaceCharacters are important so they are - # emitted separately. - self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": - data + self.stream.charsUntil(spaceCharacters, True)}) - # No need to update lastFourChars here, since the first space will - # have already been appended to lastFourChars and will have broken - # any sequences - else: - chars = self.stream.charsUntil(("&", "<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def characterReferenceInRcdata(self): - self.consumeEntity() - self.state = self.rcdataState - return True - - def rawtextState(self): - data = self.stream.char() - if data == "<": - self.state = self.rawtextLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - # Tokenization ends. - return False - else: - chars = self.stream.charsUntil(("<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def scriptDataState(self): - data = self.stream.char() - if data == "<": - self.state = self.scriptDataLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - # Tokenization ends. - return False - else: - chars = self.stream.charsUntil(("<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def plaintextState(self): - data = self.stream.char() - if data == EOF: - # Tokenization ends. - return False - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + self.stream.charsUntil("\u0000")}) - return True - - def tagOpenState(self): - data = self.stream.char() - if data == "!": - self.state = self.markupDeclarationOpenState - elif data == "/": - self.state = self.closeTagOpenState - elif data in asciiLetters: - self.currentToken = {"type": tokenTypes["StartTag"], - "name": data, "data": [], - "selfClosing": False, - "selfClosingAcknowledged": False} - self.state = self.tagNameState - elif data == ">": - # XXX In theory it could be something besides a tag name. But - # do we really care? - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name-but-got-right-bracket"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) - self.state = self.dataState - elif data == "?": - # XXX In theory it could be something besides a tag name. But - # do we really care? - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name-but-got-question-mark"}) - self.stream.unget(data) - self.state = self.bogusCommentState - else: - # XXX - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.dataState - return True - - def closeTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.currentToken = {"type": tokenTypes["EndTag"], "name": data, - "data": [], "selfClosing": False} - self.state = self.tagNameState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-right-bracket"}) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-eof"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "": - self.emitCurrentToken() - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-tag-name"}) - self.state = self.dataState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] += "\uFFFD" - else: - self.currentToken["name"] += data - # (Don't use charsUntil here, because tag names are - # very short and it's faster to not do anything fancy) - return True - - def rcdataLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.temporaryBuffer = "" - self.state = self.rcdataEndTagOpenState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.rcdataState - return True - - def rcdataEndTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.temporaryBuffer += data - self.state = self.rcdataEndTagNameState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) - self.state = self.scriptDataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataEscapedState - elif data == EOF: - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataEscapedState - return True - - def scriptDataEscapedLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.temporaryBuffer = "" - self.state = self.scriptDataEscapedEndTagOpenState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) - self.temporaryBuffer = data - self.state = self.scriptDataDoubleEscapeStartState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.scriptDataEscapedState - return True - - def scriptDataEscapedEndTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.temporaryBuffer = data - self.state = self.scriptDataEscapedEndTagNameState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": ""))): - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - if self.temporaryBuffer.lower() == "script": - self.state = self.scriptDataDoubleEscapedState - else: - self.state = self.scriptDataEscapedState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.temporaryBuffer += data - else: - self.stream.unget(data) - self.state = self.scriptDataEscapedState - return True - - def scriptDataDoubleEscapedState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - self.state = self.scriptDataDoubleEscapedDashState - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - return True - - def scriptDataDoubleEscapedDashState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - self.state = self.scriptDataDoubleEscapedDashDashState - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataDoubleEscapedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapedDashDashState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) - self.state = self.scriptDataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataDoubleEscapedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapedLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) - self.temporaryBuffer = "" - self.state = self.scriptDataDoubleEscapeEndState - else: - self.stream.unget(data) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapeEndState(self): - data = self.stream.char() - if data in (spaceCharacters | frozenset(("/", ">"))): - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - if self.temporaryBuffer.lower() == "script": - self.state = self.scriptDataEscapedState - else: - self.state = self.scriptDataDoubleEscapedState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.temporaryBuffer += data - else: - self.stream.unget(data) - self.state = self.scriptDataDoubleEscapedState - return True - - def beforeAttributeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data in asciiLetters: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == ">": - self.emitCurrentToken() - elif data == "/": - self.state = self.selfClosingStartTagState - elif data in ("'", '"', "=", "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-in-attribute-name"}) - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"].append(["\uFFFD", ""]) - self.state = self.attributeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-name-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - return True - - def attributeNameState(self): - data = self.stream.char() - leavingThisState = True - emitToken = False - if data == "=": - self.state = self.beforeAttributeValueState - elif data in asciiLetters: - self.currentToken["data"][-1][0] += data +\ - self.stream.charsUntil(asciiLetters, True) - leavingThisState = False - elif data == ">": - # XXX If we emit here the attributes are converted to a dict - # without being checked and when the code below runs we error - # because data is a dict not a list - emitToken = True - elif data in spaceCharacters: - self.state = self.afterAttributeNameState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][0] += "\uFFFD" - leavingThisState = False - elif data in ("'", '"', "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "invalid-character-in-attribute-name"}) - self.currentToken["data"][-1][0] += data - leavingThisState = False - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "eof-in-attribute-name"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][0] += data - leavingThisState = False - - if leavingThisState: - # Attributes are not dropped at this stage. That happens when the - # start tag token is emitted so values can still be safely appended - # to attributes, but we do want to report the parse error in time. - if self.lowercaseAttrName: - self.currentToken["data"][-1][0] = ( - self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) - for name, value in self.currentToken["data"][:-1]: - if self.currentToken["data"][-1][0] == name: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "duplicate-attribute"}) - break - # XXX Fix for above XXX - if emitToken: - self.emitCurrentToken() - return True - - def afterAttributeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data == "=": - self.state = self.beforeAttributeValueState - elif data == ">": - self.emitCurrentToken() - elif data in asciiLetters: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"].append(["\uFFFD", ""]) - self.state = self.attributeNameState - elif data in ("'", '"', "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-after-attribute-name"}) - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-end-of-tag-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - return True - - def beforeAttributeValueState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data == "\"": - self.state = self.attributeValueDoubleQuotedState - elif data == "&": - self.state = self.attributeValueUnQuotedState - self.stream.unget(data) - elif data == "'": - self.state = self.attributeValueSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-value-but-got-right-bracket"}) - self.emitCurrentToken() - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - self.state = self.attributeValueUnQuotedState - elif data in ("=", "<", "`"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "equals-in-unquoted-attribute-value"}) - self.currentToken["data"][-1][1] += data - self.state = self.attributeValueUnQuotedState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-value-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data - self.state = self.attributeValueUnQuotedState - return True - - def attributeValueDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterAttributeValueState - elif data == "&": - self.processEntityInAttribute('"') - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-double-quote"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("\"", "&", "\u0000")) - return True - - def attributeValueSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterAttributeValueState - elif data == "&": - self.processEntityInAttribute("'") - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-single-quote"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("'", "&", "\u0000")) - return True - - def attributeValueUnQuotedState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeAttributeNameState - elif data == "&": - self.processEntityInAttribute(">") - elif data == ">": - self.emitCurrentToken() - elif data in ('"', "'", "=", "<", "`"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-in-unquoted-attribute-value"}) - self.currentToken["data"][-1][1] += data - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-no-quotes"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data + self.stream.charsUntil( - frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) - return True - - def afterAttributeValueState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeAttributeNameState - elif data == ">": - self.emitCurrentToken() - elif data == "/": - self.state = self.selfClosingStartTagState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-EOF-after-attribute-value"}) - self.stream.unget(data) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-after-attribute-value"}) - self.stream.unget(data) - self.state = self.beforeAttributeNameState - return True - - def selfClosingStartTagState(self): - data = self.stream.char() - if data == ">": - self.currentToken["selfClosing"] = True - self.emitCurrentToken() - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "unexpected-EOF-after-solidus-in-tag"}) - self.stream.unget(data) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-after-solidus-in-tag"}) - self.stream.unget(data) - self.state = self.beforeAttributeNameState - return True - - def bogusCommentState(self): - # Make a new comment token and give it as value all the characters - # until the first > or EOF (charsUntil checks for EOF automatically) - # and emit it. - data = self.stream.charsUntil(">") - data = data.replace("\u0000", "\uFFFD") - self.tokenQueue.append( - {"type": tokenTypes["Comment"], "data": data}) - - # Eat the character directly after the bogus comment which is either a - # ">" or an EOF. - self.stream.char() - self.state = self.dataState - return True - - def markupDeclarationOpenState(self): - charStack = [self.stream.char()] - if charStack[-1] == "-": - charStack.append(self.stream.char()) - if charStack[-1] == "-": - self.currentToken = {"type": tokenTypes["Comment"], "data": ""} - self.state = self.commentStartState - return True - elif charStack[-1] in ('d', 'D'): - matched = True - for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), - ('y', 'Y'), ('p', 'P'), ('e', 'E')): - charStack.append(self.stream.char()) - if charStack[-1] not in expected: - matched = False - break - if matched: - self.currentToken = {"type": tokenTypes["Doctype"], - "name": "", - "publicId": None, "systemId": None, - "correct": True} - self.state = self.doctypeState - return True - elif (charStack[-1] == "[" and - self.parser is not None and - self.parser.tree.openElements and - self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): - matched = True - for expected in ["C", "D", "A", "T", "A", "["]: - charStack.append(self.stream.char()) - if charStack[-1] != expected: - matched = False - break - if matched: - self.state = self.cdataSectionState - return True - - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-dashes-or-doctype"}) - - while charStack: - self.stream.unget(charStack.pop()) - self.state = self.bogusCommentState - return True - - def commentStartState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentStartDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "incorrect-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += data - self.state = self.commentState - return True - - def commentStartDashState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "-\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "incorrect-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "-" + data - self.state = self.commentState - return True - - def commentState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += data + \ - self.stream.charsUntil(("-", "\u0000")) - return True - - def commentEndDashState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "-\uFFFD" - self.state = self.commentState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-end-dash"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "-" + data - self.state = self.commentState - return True - - def commentEndState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "--\uFFFD" - self.state = self.commentState - elif data == "!": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-bang-after-double-dash-in-comment"}) - self.state = self.commentEndBangState - elif data == "-": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-dash-after-double-dash-in-comment"}) - self.currentToken["data"] += data - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-double-dash"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - # XXX - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-comment"}) - self.currentToken["data"] += "--" + data - self.state = self.commentState - return True - - def commentEndBangState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "-": - self.currentToken["data"] += "--!" - self.state = self.commentEndDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "--!\uFFFD" - self.state = self.commentState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-end-bang-state"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "--!" + data - self.state = self.commentState - return True - - def doctypeState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-eof"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "need-space-after-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypeNameState - return True - - def beforeDoctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-right-bracket"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] = "\uFFFD" - self.state = self.doctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-eof"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["name"] = data - self.state = self.doctypeNameState - return True - - def doctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.state = self.afterDoctypeNameState - elif data == ">": - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] += "\uFFFD" - self.state = self.doctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype-name"}) - self.currentToken["correct"] = False - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["name"] += data - return True - - def afterDoctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.currentToken["correct"] = False - self.stream.unget(data) - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - if data in ("p", "P"): - matched = True - for expected in (("u", "U"), ("b", "B"), ("l", "L"), - ("i", "I"), ("c", "C")): - data = self.stream.char() - if data not in expected: - matched = False - break - if matched: - self.state = self.afterDoctypePublicKeywordState - return True - elif data in ("s", "S"): - matched = True - for expected in (("y", "Y"), ("s", "S"), ("t", "T"), - ("e", "E"), ("m", "M")): - data = self.stream.char() - if data not in expected: - matched = False - break - if matched: - self.state = self.afterDoctypeSystemKeywordState - return True - - # All the characters read before the current 'data' will be - # [a-zA-Z], so they're garbage in the bogus doctype and can be - # discarded; only the latest character might be '>' or EOF - # and needs to be ungetted - self.stream.unget(data) - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-space-or-right-bracket-in-doctype", "datavars": - {"data": data}}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - - return True - - def afterDoctypePublicKeywordState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypePublicIdentifierState - elif data in ("'", '"'): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypePublicIdentifierState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.stream.unget(data) - self.state = self.beforeDoctypePublicIdentifierState - return True - - def beforeDoctypePublicIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == "\"": - self.currentToken["publicId"] = "" - self.state = self.doctypePublicIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["publicId"] = "" - self.state = self.doctypePublicIdentifierSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def doctypePublicIdentifierDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterDoctypePublicIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["publicId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["publicId"] += data - return True - - def doctypePublicIdentifierSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterDoctypePublicIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["publicId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["publicId"] += data - return True - - def afterDoctypePublicIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.betweenDoctypePublicAndSystemIdentifiersState - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == '"': - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def betweenDoctypePublicAndSystemIdentifiersState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == '"': - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def afterDoctypeSystemKeywordState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypeSystemIdentifierState - elif data in ("'", '"'): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypeSystemIdentifierState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.stream.unget(data) - self.state = self.beforeDoctypeSystemIdentifierState - return True - - def beforeDoctypeSystemIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == "\"": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def doctypeSystemIdentifierDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterDoctypeSystemIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["systemId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["systemId"] += data - return True - - def doctypeSystemIdentifierSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterDoctypeSystemIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["systemId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["systemId"] += data - return True - - def afterDoctypeSystemIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.state = self.bogusDoctypeState - return True - - def bogusDoctypeState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - # XXX EMIT - self.stream.unget(data) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - pass - return True - - def cdataSectionState(self): - data = [] - while True: - data.append(self.stream.charsUntil("]")) - data.append(self.stream.charsUntil(">")) - char = self.stream.char() - if char == EOF: - break - else: - assert char == ">" - if data[-1][-2:] == "]]": - data[-1] = data[-1][:-2] - break - else: - data.append(char) - - data = "".join(data) - # Deal with null here rather than in the parser - nullCount = data.count("\u0000") - if nullCount > 0: - for i in range(nullCount): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - data = data.replace("\u0000", "\uFFFD") - if data: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": data}) - self.state = self.dataState - return True diff --git a/lib/html5lib/treeadapters/sax.py b/lib/html5lib/treeadapters/sax.py deleted file mode 100644 index ad47df95..00000000 --- a/lib/html5lib/treeadapters/sax.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from xml.sax.xmlreader import AttributesNSImpl - -from ..constants import adjustForeignAttributes, unadjustForeignAttributes - -prefix_mapping = {} -for prefix, localName, namespace in adjustForeignAttributes.values(): - if prefix is not None: - prefix_mapping[prefix] = namespace - - -def to_sax(walker, handler): - """Call SAX-like content handler based on treewalker walker""" - handler.startDocument() - for prefix, namespace in prefix_mapping.items(): - handler.startPrefixMapping(prefix, namespace) - - for token in walker: - type = token["type"] - if type == "Doctype": - continue - elif type in ("StartTag", "EmptyTag"): - attrs = AttributesNSImpl(token["data"], - unadjustForeignAttributes) - handler.startElementNS((token["namespace"], token["name"]), - token["name"], - attrs) - if type == "EmptyTag": - handler.endElementNS((token["namespace"], token["name"]), - token["name"]) - elif type == "EndTag": - handler.endElementNS((token["namespace"], token["name"]), - token["name"]) - elif type in ("Characters", "SpaceCharacters"): - handler.characters(token["data"]) - elif type == "Comment": - pass - else: - assert False, "Unknown token type" - - for prefix, namespace in prefix_mapping.items(): - handler.endPrefixMapping(prefix) - handler.endDocument() diff --git a/lib/html5lib/treebuilders/__init__.py b/lib/html5lib/treebuilders/__init__.py deleted file mode 100644 index 6a6b2a4c..00000000 --- a/lib/html5lib/treebuilders/__init__.py +++ /dev/null @@ -1,76 +0,0 @@ -"""A collection of modules for building different kinds of tree from -HTML documents. - -To create a treebuilder for a new type of tree, you need to do -implement several things: - -1) A set of classes for various types of elements: Document, Doctype, -Comment, Element. These must implement the interface of -_base.treebuilders.Node (although comment nodes have a different -signature for their constructor, see treebuilders.etree.Comment) -Textual content may also be implemented as another node type, or not, as -your tree implementation requires. - -2) A treebuilder object (called TreeBuilder by convention) that -inherits from treebuilders._base.TreeBuilder. This has 4 required attributes: -documentClass - the class to use for the bottommost node of a document -elementClass - the class to use for HTML Elements -commentClass - the class to use for comments -doctypeClass - the class to use for doctypes -It also has one required method: -getDocument - Returns the root node of the complete document tree - -3) If you wish to run the unit tests, you must also create a -testSerializer method on your treebuilder which accepts a node and -returns a string containing Node and its children serialized according -to the format used in the unittests -""" - -from __future__ import absolute_import, division, unicode_literals - -from ..utils import default_etree - -treeBuilderCache = {} - - -def getTreeBuilder(treeType, implementation=None, **kwargs): - """Get a TreeBuilder class for various types of tree with built-in support - - treeType - the name of the tree type required (case-insensitive). Supported - values are: - - "dom" - A generic builder for DOM implementations, defaulting to - a xml.dom.minidom based implementation. - "etree" - A generic builder for tree implementations exposing an - ElementTree-like interface, defaulting to - xml.etree.cElementTree if available and - xml.etree.ElementTree if not. - "lxml" - A etree-based builder for lxml.etree, handling - limitations of lxml's implementation. - - implementation - (Currently applies to the "etree" and "dom" tree types). A - module implementing the tree type e.g. - xml.etree.ElementTree or xml.etree.cElementTree.""" - - treeType = treeType.lower() - if treeType not in treeBuilderCache: - if treeType == "dom": - from . import dom - # Come up with a sane default (pref. from the stdlib) - if implementation is None: - from xml.dom import minidom - implementation = minidom - # NEVER cache here, caching is done in the dom submodule - return dom.getDomModule(implementation, **kwargs).TreeBuilder - elif treeType == "lxml": - from . import etree_lxml - treeBuilderCache[treeType] = etree_lxml.TreeBuilder - elif treeType == "etree": - from . import etree - if implementation is None: - implementation = default_etree - # NEVER cache here, caching is done in the etree submodule - return etree.getETreeModule(implementation, **kwargs).TreeBuilder - else: - raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) - return treeBuilderCache.get(treeType) diff --git a/lib/html5lib/treebuilders/_base.py b/lib/html5lib/treebuilders/_base.py deleted file mode 100644 index 8b97cc11..00000000 --- a/lib/html5lib/treebuilders/_base.py +++ /dev/null @@ -1,377 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from six import text_type - -from ..constants import scopingElements, tableInsertModeElements, namespaces - -# The scope markers are inserted when entering object elements, -# marquees, table cells, and table captions, and are used to prevent formatting -# from "leaking" into tables, object elements, and marquees. -Marker = None - -listElementsMap = { - None: (frozenset(scopingElements), False), - "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), - "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), - (namespaces["html"], "ul")])), False), - "table": (frozenset([(namespaces["html"], "html"), - (namespaces["html"], "table")]), False), - "select": (frozenset([(namespaces["html"], "optgroup"), - (namespaces["html"], "option")]), True) -} - - -class Node(object): - def __init__(self, name): - """Node representing an item in the tree. - name - The tag name associated with the node - parent - The parent of the current node (or None for the document node) - value - The value of the current node (applies to text nodes and - comments - attributes - a dict holding name, value pairs for attributes of the node - childNodes - a list of child nodes of the current node. This must - include all elements but not necessarily other node types - _flags - A list of miscellaneous flags that can be set on the node - """ - self.name = name - self.parent = None - self.value = None - self.attributes = {} - self.childNodes = [] - self._flags = [] - - def __str__(self): - attributesStr = " ".join(["%s=\"%s\"" % (name, value) - for name, value in - self.attributes.items()]) - if attributesStr: - return "<%s %s>" % (self.name, attributesStr) - else: - return "<%s>" % (self.name) - - def __repr__(self): - return "<%s>" % (self.name) - - def appendChild(self, node): - """Insert node as a child of the current node - """ - raise NotImplementedError - - def insertText(self, data, insertBefore=None): - """Insert data as text in the current node, positioned before the - start of node insertBefore or to the end of the node's text. - """ - raise NotImplementedError - - def insertBefore(self, node, refNode): - """Insert node as a child of the current node, before refNode in the - list of child nodes. Raises ValueError if refNode is not a child of - the current node""" - raise NotImplementedError - - def removeChild(self, node): - """Remove node from the children of the current node - """ - raise NotImplementedError - - def reparentChildren(self, newParent): - """Move all the children of the current node to newParent. - This is needed so that trees that don't store text as nodes move the - text in the correct way - """ - # XXX - should this method be made more general? - for child in self.childNodes: - newParent.appendChild(child) - self.childNodes = [] - - def cloneNode(self): - """Return a shallow copy of the current node i.e. a node with the same - name and attributes but with no parent or child nodes - """ - raise NotImplementedError - - def hasContent(self): - """Return true if the node has children or text, false otherwise - """ - raise NotImplementedError - - -class ActiveFormattingElements(list): - def append(self, node): - equalCount = 0 - if node != Marker: - for element in self[::-1]: - if element == Marker: - break - if self.nodesEqual(element, node): - equalCount += 1 - if equalCount == 3: - self.remove(element) - break - list.append(self, node) - - def nodesEqual(self, node1, node2): - if not node1.nameTuple == node2.nameTuple: - return False - - if not node1.attributes == node2.attributes: - return False - - return True - - -class TreeBuilder(object): - """Base treebuilder implementation - documentClass - the class to use for the bottommost node of a document - elementClass - the class to use for HTML Elements - commentClass - the class to use for comments - doctypeClass - the class to use for doctypes - """ - - # Document class - documentClass = None - - # The class to use for creating a node - elementClass = None - - # The class to use for creating comments - commentClass = None - - # The class to use for creating doctypes - doctypeClass = None - - # Fragment class - fragmentClass = None - - def __init__(self, namespaceHTMLElements): - if namespaceHTMLElements: - self.defaultNamespace = "http://www.w3.org/1999/xhtml" - else: - self.defaultNamespace = None - self.reset() - - def reset(self): - self.openElements = [] - self.activeFormattingElements = ActiveFormattingElements() - - # XXX - rename these to headElement, formElement - self.headPointer = None - self.formPointer = None - - self.insertFromTable = False - - self.document = self.documentClass() - - def elementInScope(self, target, variant=None): - - # If we pass a node in we match that. if we pass a string - # match any node with that name - exactNode = hasattr(target, "nameTuple") - - listElements, invert = listElementsMap[variant] - - for node in reversed(self.openElements): - if (node.name == target and not exactNode or - node == target and exactNode): - return True - elif (invert ^ (node.nameTuple in listElements)): - return False - - assert False # We should never reach this point - - def reconstructActiveFormattingElements(self): - # Within this algorithm the order of steps described in the - # specification is not quite the same as the order of steps in the - # code. It should still do the same though. - - # Step 1: stop the algorithm when there's nothing to do. - if not self.activeFormattingElements: - return - - # Step 2 and step 3: we start with the last element. So i is -1. - i = len(self.activeFormattingElements) - 1 - entry = self.activeFormattingElements[i] - if entry == Marker or entry in self.openElements: - return - - # Step 6 - while entry != Marker and entry not in self.openElements: - if i == 0: - # This will be reset to 0 below - i = -1 - break - i -= 1 - # Step 5: let entry be one earlier in the list. - entry = self.activeFormattingElements[i] - - while True: - # Step 7 - i += 1 - - # Step 8 - entry = self.activeFormattingElements[i] - clone = entry.cloneNode() # Mainly to get a new copy of the attributes - - # Step 9 - element = self.insertElement({"type": "StartTag", - "name": clone.name, - "namespace": clone.namespace, - "data": clone.attributes}) - - # Step 10 - self.activeFormattingElements[i] = element - - # Step 11 - if element == self.activeFormattingElements[-1]: - break - - def clearActiveFormattingElements(self): - entry = self.activeFormattingElements.pop() - while self.activeFormattingElements and entry != Marker: - entry = self.activeFormattingElements.pop() - - def elementInActiveFormattingElements(self, name): - """Check if an element exists between the end of the active - formatting elements and the last marker. If it does, return it, else - return false""" - - for item in self.activeFormattingElements[::-1]: - # Check for Marker first because if it's a Marker it doesn't have a - # name attribute. - if item == Marker: - break - elif item.name == name: - return item - return False - - def insertRoot(self, token): - element = self.createElement(token) - self.openElements.append(element) - self.document.appendChild(element) - - def insertDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - - doctype = self.doctypeClass(name, publicId, systemId) - self.document.appendChild(doctype) - - def insertComment(self, token, parent=None): - if parent is None: - parent = self.openElements[-1] - parent.appendChild(self.commentClass(token["data"])) - - def createElement(self, token): - """Create an element but don't insert it anywhere""" - name = token["name"] - namespace = token.get("namespace", self.defaultNamespace) - element = self.elementClass(name, namespace) - element.attributes = token["data"] - return element - - def _getInsertFromTable(self): - return self._insertFromTable - - def _setInsertFromTable(self, value): - """Switch the function used to insert an element from the - normal one to the misnested table one and back again""" - self._insertFromTable = value - if value: - self.insertElement = self.insertElementTable - else: - self.insertElement = self.insertElementNormal - - insertFromTable = property(_getInsertFromTable, _setInsertFromTable) - - def insertElementNormal(self, token): - name = token["name"] - assert isinstance(name, text_type), "Element %s not unicode" % name - namespace = token.get("namespace", self.defaultNamespace) - element = self.elementClass(name, namespace) - element.attributes = token["data"] - self.openElements[-1].appendChild(element) - self.openElements.append(element) - return element - - def insertElementTable(self, token): - """Create an element and insert it into the tree""" - element = self.createElement(token) - if self.openElements[-1].name not in tableInsertModeElements: - return self.insertElementNormal(token) - else: - # We should be in the InTable mode. This means we want to do - # special magic element rearranging - parent, insertBefore = self.getTableMisnestedNodePosition() - if insertBefore is None: - parent.appendChild(element) - else: - parent.insertBefore(element, insertBefore) - self.openElements.append(element) - return element - - def insertText(self, data, parent=None): - """Insert text data.""" - if parent is None: - parent = self.openElements[-1] - - if (not self.insertFromTable or (self.insertFromTable and - self.openElements[-1].name - not in tableInsertModeElements)): - parent.insertText(data) - else: - # We should be in the InTable mode. This means we want to do - # special magic element rearranging - parent, insertBefore = self.getTableMisnestedNodePosition() - parent.insertText(data, insertBefore) - - def getTableMisnestedNodePosition(self): - """Get the foster parent element, and sibling to insert before - (or None) when inserting a misnested table node""" - # The foster parent element is the one which comes before the most - # recently opened table element - # XXX - this is really inelegant - lastTable = None - fosterParent = None - insertBefore = None - for elm in self.openElements[::-1]: - if elm.name == "table": - lastTable = elm - break - if lastTable: - # XXX - we should really check that this parent is actually a - # node here - if lastTable.parent: - fosterParent = lastTable.parent - insertBefore = lastTable - else: - fosterParent = self.openElements[ - self.openElements.index(lastTable) - 1] - else: - fosterParent = self.openElements[0] - return fosterParent, insertBefore - - def generateImpliedEndTags(self, exclude=None): - name = self.openElements[-1].name - # XXX td, th and tr are not actually needed - if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) - and name != exclude): - self.openElements.pop() - # XXX This is not entirely what the specification says. We should - # investigate it more closely. - self.generateImpliedEndTags(exclude) - - def getDocument(self): - "Return the final tree" - return self.document - - def getFragment(self): - "Return the final fragment" - # assert self.innerHTML - fragment = self.fragmentClass() - self.openElements[0].reparentChildren(fragment) - return fragment - - def testSerializer(self, node): - """Serialize the subtree of node in the format required by unit tests - node - the node from which to start serializing""" - raise NotImplementedError diff --git a/lib/html5lib/treebuilders/dom.py b/lib/html5lib/treebuilders/dom.py deleted file mode 100644 index 234233b7..00000000 --- a/lib/html5lib/treebuilders/dom.py +++ /dev/null @@ -1,227 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - - -from xml.dom import minidom, Node -import weakref - -from . import _base -from .. import constants -from ..constants import namespaces -from ..utils import moduleFactoryFactory - - -def getDomBuilder(DomImplementation): - Dom = DomImplementation - - class AttrList(object): - def __init__(self, element): - self.element = element - - def __iter__(self): - return list(self.element.attributes.items()).__iter__() - - def __setitem__(self, name, value): - self.element.setAttribute(name, value) - - def __len__(self): - return len(list(self.element.attributes.items())) - - def items(self): - return [(item[0], item[1]) for item in - list(self.element.attributes.items())] - - def keys(self): - return list(self.element.attributes.keys()) - - def __getitem__(self, name): - return self.element.getAttribute(name) - - def __contains__(self, name): - if isinstance(name, tuple): - raise NotImplementedError - else: - return self.element.hasAttribute(name) - - class NodeBuilder(_base.Node): - def __init__(self, element): - _base.Node.__init__(self, element.nodeName) - self.element = element - - namespace = property(lambda self: hasattr(self.element, "namespaceURI") - and self.element.namespaceURI or None) - - def appendChild(self, node): - node.parent = self - self.element.appendChild(node.element) - - def insertText(self, data, insertBefore=None): - text = self.element.ownerDocument.createTextNode(data) - if insertBefore: - self.element.insertBefore(text, insertBefore.element) - else: - self.element.appendChild(text) - - def insertBefore(self, node, refNode): - self.element.insertBefore(node.element, refNode.element) - node.parent = self - - def removeChild(self, node): - if node.element.parentNode == self.element: - self.element.removeChild(node.element) - node.parent = None - - def reparentChildren(self, newParent): - while self.element.hasChildNodes(): - child = self.element.firstChild - self.element.removeChild(child) - newParent.element.appendChild(child) - self.childNodes = [] - - def getAttributes(self): - return AttrList(self.element) - - def setAttributes(self, attributes): - if attributes: - for name, value in list(attributes.items()): - if isinstance(name, tuple): - if name[0] is not None: - qualifiedName = (name[0] + ":" + name[1]) - else: - qualifiedName = name[1] - self.element.setAttributeNS(name[2], qualifiedName, - value) - else: - self.element.setAttribute( - name, value) - attributes = property(getAttributes, setAttributes) - - def cloneNode(self): - return NodeBuilder(self.element.cloneNode(False)) - - def hasContent(self): - return self.element.hasChildNodes() - - def getNameTuple(self): - if self.namespace is None: - return namespaces["html"], self.name - else: - return self.namespace, self.name - - nameTuple = property(getNameTuple) - - class TreeBuilder(_base.TreeBuilder): - def documentClass(self): - self.dom = Dom.getDOMImplementation().createDocument(None, None, None) - return weakref.proxy(self) - - def insertDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - - domimpl = Dom.getDOMImplementation() - doctype = domimpl.createDocumentType(name, publicId, systemId) - self.document.appendChild(NodeBuilder(doctype)) - if Dom == minidom: - doctype.ownerDocument = self.dom - - def elementClass(self, name, namespace=None): - if namespace is None and self.defaultNamespace is None: - node = self.dom.createElement(name) - else: - node = self.dom.createElementNS(namespace, name) - - return NodeBuilder(node) - - def commentClass(self, data): - return NodeBuilder(self.dom.createComment(data)) - - def fragmentClass(self): - return NodeBuilder(self.dom.createDocumentFragment()) - - def appendChild(self, node): - self.dom.appendChild(node.element) - - def testSerializer(self, element): - return testSerializer(element) - - def getDocument(self): - return self.dom - - def getFragment(self): - return _base.TreeBuilder.getFragment(self).element - - def insertText(self, data, parent=None): - data = data - if parent != self: - _base.TreeBuilder.insertText(self, data, parent) - else: - # HACK: allow text nodes as children of the document node - if hasattr(self.dom, '_child_node_types'): - if Node.TEXT_NODE not in self.dom._child_node_types: - self.dom._child_node_types = list(self.dom._child_node_types) - self.dom._child_node_types.append(Node.TEXT_NODE) - self.dom.appendChild(self.dom.createTextNode(data)) - - implementation = DomImplementation - name = None - - def testSerializer(element): - element.normalize() - rv = [] - - def serializeElement(element, indent=0): - if element.nodeType == Node.DOCUMENT_TYPE_NODE: - if element.name: - if element.publicId or element.systemId: - publicId = element.publicId or "" - systemId = element.systemId or "" - rv.append("""|%s""" % - (' ' * indent, element.name, publicId, systemId)) - else: - rv.append("|%s" % (' ' * indent, element.name)) - else: - rv.append("|%s" % (' ' * indent,)) - elif element.nodeType == Node.DOCUMENT_NODE: - rv.append("#document") - elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: - rv.append("#document-fragment") - elif element.nodeType == Node.COMMENT_NODE: - rv.append("|%s" % (' ' * indent, element.nodeValue)) - elif element.nodeType == Node.TEXT_NODE: - rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) - else: - if (hasattr(element, "namespaceURI") and - element.namespaceURI is not None): - name = "%s %s" % (constants.prefixes[element.namespaceURI], - element.nodeName) - else: - name = element.nodeName - rv.append("|%s<%s>" % (' ' * indent, name)) - if element.hasAttributes(): - attributes = [] - for i in range(len(element.attributes)): - attr = element.attributes.item(i) - name = attr.nodeName - value = attr.value - ns = attr.namespaceURI - if ns: - name = "%s %s" % (constants.prefixes[ns], attr.localName) - else: - name = attr.nodeName - attributes.append((name, value)) - - for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) - indent += 2 - for child in element.childNodes: - serializeElement(child, indent) - serializeElement(element, 0) - - return "\n".join(rv) - - return locals() - - -# The actual means to get a module! -getDomModule = moduleFactoryFactory(getDomBuilder) diff --git a/lib/html5lib/treebuilders/etree.py b/lib/html5lib/treebuilders/etree.py deleted file mode 100644 index 2c8ed19f..00000000 --- a/lib/html5lib/treebuilders/etree.py +++ /dev/null @@ -1,337 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from six import text_type - -import re - -from . import _base -from .. import ihatexml -from .. import constants -from ..constants import namespaces -from ..utils import moduleFactoryFactory - -tag_regexp = re.compile("{([^}]*)}(.*)") - - -def getETreeBuilder(ElementTreeImplementation, fullTree=False): - ElementTree = ElementTreeImplementation - ElementTreeCommentType = ElementTree.Comment("asd").tag - - class Element(_base.Node): - def __init__(self, name, namespace=None): - self._name = name - self._namespace = namespace - self._element = ElementTree.Element(self._getETreeTag(name, - namespace)) - if namespace is None: - self.nameTuple = namespaces["html"], self._name - else: - self.nameTuple = self._namespace, self._name - self.parent = None - self._childNodes = [] - self._flags = [] - - def _getETreeTag(self, name, namespace): - if namespace is None: - etree_tag = name - else: - etree_tag = "{%s}%s" % (namespace, name) - return etree_tag - - def _setName(self, name): - self._name = name - self._element.tag = self._getETreeTag(self._name, self._namespace) - - def _getName(self): - return self._name - - name = property(_getName, _setName) - - def _setNamespace(self, namespace): - self._namespace = namespace - self._element.tag = self._getETreeTag(self._name, self._namespace) - - def _getNamespace(self): - return self._namespace - - namespace = property(_getNamespace, _setNamespace) - - def _getAttributes(self): - return self._element.attrib - - def _setAttributes(self, attributes): - # Delete existing attributes first - # XXX - there may be a better way to do this... - for key in list(self._element.attrib.keys()): - del self._element.attrib[key] - for key, value in attributes.items(): - if isinstance(key, tuple): - name = "{%s}%s" % (key[2], key[1]) - else: - name = key - self._element.set(name, value) - - attributes = property(_getAttributes, _setAttributes) - - def _getChildNodes(self): - return self._childNodes - - def _setChildNodes(self, value): - del self._element[:] - self._childNodes = [] - for element in value: - self.insertChild(element) - - childNodes = property(_getChildNodes, _setChildNodes) - - def hasContent(self): - """Return true if the node has children or text""" - return bool(self._element.text or len(self._element)) - - def appendChild(self, node): - self._childNodes.append(node) - self._element.append(node._element) - node.parent = self - - def insertBefore(self, node, refNode): - index = list(self._element).index(refNode._element) - self._element.insert(index, node._element) - node.parent = self - - def removeChild(self, node): - self._element.remove(node._element) - node.parent = None - - def insertText(self, data, insertBefore=None): - if not(len(self._element)): - if not self._element.text: - self._element.text = "" - self._element.text += data - elif insertBefore is None: - # Insert the text as the tail of the last child element - if not self._element[-1].tail: - self._element[-1].tail = "" - self._element[-1].tail += data - else: - # Insert the text before the specified node - children = list(self._element) - index = children.index(insertBefore._element) - if index > 0: - if not self._element[index - 1].tail: - self._element[index - 1].tail = "" - self._element[index - 1].tail += data - else: - if not self._element.text: - self._element.text = "" - self._element.text += data - - def cloneNode(self): - element = type(self)(self.name, self.namespace) - for name, value in self.attributes.items(): - element.attributes[name] = value - return element - - def reparentChildren(self, newParent): - if newParent.childNodes: - newParent.childNodes[-1]._element.tail += self._element.text - else: - if not newParent._element.text: - newParent._element.text = "" - if self._element.text is not None: - newParent._element.text += self._element.text - self._element.text = "" - _base.Node.reparentChildren(self, newParent) - - class Comment(Element): - def __init__(self, data): - # Use the superclass constructor to set all properties on the - # wrapper element - self._element = ElementTree.Comment(data) - self.parent = None - self._childNodes = [] - self._flags = [] - - def _getData(self): - return self._element.text - - def _setData(self, value): - self._element.text = value - - data = property(_getData, _setData) - - class DocumentType(Element): - def __init__(self, name, publicId, systemId): - Element.__init__(self, "") - self._element.text = name - self.publicId = publicId - self.systemId = systemId - - def _getPublicId(self): - return self._element.get("publicId", "") - - def _setPublicId(self, value): - if value is not None: - self._element.set("publicId", value) - - publicId = property(_getPublicId, _setPublicId) - - def _getSystemId(self): - return self._element.get("systemId", "") - - def _setSystemId(self, value): - if value is not None: - self._element.set("systemId", value) - - systemId = property(_getSystemId, _setSystemId) - - class Document(Element): - def __init__(self): - Element.__init__(self, "DOCUMENT_ROOT") - - class DocumentFragment(Element): - def __init__(self): - Element.__init__(self, "DOCUMENT_FRAGMENT") - - def testSerializer(element): - rv = [] - - def serializeElement(element, indent=0): - if not(hasattr(element, "tag")): - element = element.getroot() - if element.tag == "": - if element.get("publicId") or element.get("systemId"): - publicId = element.get("publicId") or "" - systemId = element.get("systemId") or "" - rv.append("""""" % - (element.text, publicId, systemId)) - else: - rv.append("" % (element.text,)) - elif element.tag == "DOCUMENT_ROOT": - rv.append("#document") - if element.text is not None: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) - if element.tail is not None: - raise TypeError("Document node cannot have tail") - if hasattr(element, "attrib") and len(element.attrib): - raise TypeError("Document node cannot have attributes") - elif element.tag == ElementTreeCommentType: - rv.append("|%s" % (' ' * indent, element.text)) - else: - assert isinstance(element.tag, text_type), \ - "Expected unicode, got %s, %s" % (type(element.tag), element.tag) - nsmatch = tag_regexp.match(element.tag) - - if nsmatch is None: - name = element.tag - else: - ns, name = nsmatch.groups() - prefix = constants.prefixes[ns] - name = "%s %s" % (prefix, name) - rv.append("|%s<%s>" % (' ' * indent, name)) - - if hasattr(element, "attrib"): - attributes = [] - for name, value in element.attrib.items(): - nsmatch = tag_regexp.match(name) - if nsmatch is not None: - ns, name = nsmatch.groups() - prefix = constants.prefixes[ns] - attr_string = "%s %s" % (prefix, name) - else: - attr_string = name - attributes.append((attr_string, value)) - - for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) - if element.text: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) - indent += 2 - for child in element: - serializeElement(child, indent) - if element.tail: - rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) - serializeElement(element, 0) - - return "\n".join(rv) - - def tostring(element): - """Serialize an element and its child nodes to a string""" - rv = [] - filter = ihatexml.InfosetFilter() - - def serializeElement(element): - if isinstance(element, ElementTree.ElementTree): - element = element.getroot() - - if element.tag == "": - if element.get("publicId") or element.get("systemId"): - publicId = element.get("publicId") or "" - systemId = element.get("systemId") or "" - rv.append("""""" % - (element.text, publicId, systemId)) - else: - rv.append("" % (element.text,)) - elif element.tag == "DOCUMENT_ROOT": - if element.text is not None: - rv.append(element.text) - if element.tail is not None: - raise TypeError("Document node cannot have tail") - if hasattr(element, "attrib") and len(element.attrib): - raise TypeError("Document node cannot have attributes") - - for child in element: - serializeElement(child) - - elif element.tag == ElementTreeCommentType: - rv.append("" % (element.text,)) - else: - # This is assumed to be an ordinary element - if not element.attrib: - rv.append("<%s>" % (filter.fromXmlName(element.tag),)) - else: - attr = " ".join(["%s=\"%s\"" % ( - filter.fromXmlName(name), value) - for name, value in element.attrib.items()]) - rv.append("<%s %s>" % (element.tag, attr)) - if element.text: - rv.append(element.text) - - for child in element: - serializeElement(child) - - rv.append("" % (element.tag,)) - - if element.tail: - rv.append(element.tail) - - serializeElement(element) - - return "".join(rv) - - class TreeBuilder(_base.TreeBuilder): - documentClass = Document - doctypeClass = DocumentType - elementClass = Element - commentClass = Comment - fragmentClass = DocumentFragment - implementation = ElementTreeImplementation - - def testSerializer(self, element): - return testSerializer(element) - - def getDocument(self): - if fullTree: - return self.document._element - else: - if self.defaultNamespace is not None: - return self.document._element.find( - "{%s}html" % self.defaultNamespace) - else: - return self.document._element.find("html") - - def getFragment(self): - return _base.TreeBuilder.getFragment(self)._element - - return locals() - - -getETreeModule = moduleFactoryFactory(getETreeBuilder) diff --git a/lib/html5lib/treebuilders/etree_lxml.py b/lib/html5lib/treebuilders/etree_lxml.py deleted file mode 100644 index 35d08efa..00000000 --- a/lib/html5lib/treebuilders/etree_lxml.py +++ /dev/null @@ -1,369 +0,0 @@ -"""Module for supporting the lxml.etree library. The idea here is to use as much -of the native library as possible, without using fragile hacks like custom element -names that break between releases. The downside of this is that we cannot represent -all possible trees; specifically the following are known to cause problems: - -Text or comments as siblings of the root element -Docypes with no name - -When any of these things occur, we emit a DataLossWarning -""" - -from __future__ import absolute_import, division, unicode_literals - -import warnings -import re -import sys - -from . import _base -from ..constants import DataLossWarning -from .. import constants -from . import etree as etree_builders -from .. import ihatexml - -import lxml.etree as etree - - -fullTree = True -tag_regexp = re.compile("{([^}]*)}(.*)") - -comment_type = etree.Comment("asd").tag - - -class DocumentType(object): - def __init__(self, name, publicId, systemId): - self.name = name - self.publicId = publicId - self.systemId = systemId - - -class Document(object): - def __init__(self): - self._elementTree = None - self._childNodes = [] - - def appendChild(self, element): - self._elementTree.getroot().addnext(element._element) - - def _getChildNodes(self): - return self._childNodes - - childNodes = property(_getChildNodes) - - -def testSerializer(element): - rv = [] - finalText = None - infosetFilter = ihatexml.InfosetFilter() - - def serializeElement(element, indent=0): - if not hasattr(element, "tag"): - if hasattr(element, "getroot"): - # Full tree case - rv.append("#document") - if element.docinfo.internalDTD: - if not (element.docinfo.public_id or - element.docinfo.system_url): - dtd_str = "" % element.docinfo.root_name - else: - dtd_str = """""" % ( - element.docinfo.root_name, - element.docinfo.public_id, - element.docinfo.system_url) - rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) - next_element = element.getroot() - while next_element.getprevious() is not None: - next_element = next_element.getprevious() - while next_element is not None: - serializeElement(next_element, indent + 2) - next_element = next_element.getnext() - elif isinstance(element, str) or isinstance(element, bytes): - # Text in a fragment - assert isinstance(element, str) or sys.version_info.major == 2 - rv.append("|%s\"%s\"" % (' ' * indent, element)) - else: - # Fragment case - rv.append("#document-fragment") - for next_element in element: - serializeElement(next_element, indent + 2) - elif element.tag == comment_type: - rv.append("|%s" % (' ' * indent, element.text)) - if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) - else: - assert isinstance(element, etree._Element) - nsmatch = etree_builders.tag_regexp.match(element.tag) - if nsmatch is not None: - ns = nsmatch.group(1) - tag = nsmatch.group(2) - prefix = constants.prefixes[ns] - rv.append("|%s<%s %s>" % (' ' * indent, prefix, - infosetFilter.fromXmlName(tag))) - else: - rv.append("|%s<%s>" % (' ' * indent, - infosetFilter.fromXmlName(element.tag))) - - if hasattr(element, "attrib"): - attributes = [] - for name, value in element.attrib.items(): - nsmatch = tag_regexp.match(name) - if nsmatch is not None: - ns, name = nsmatch.groups() - name = infosetFilter.fromXmlName(name) - prefix = constants.prefixes[ns] - attr_string = "%s %s" % (prefix, name) - else: - attr_string = infosetFilter.fromXmlName(name) - attributes.append((attr_string, value)) - - for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) - - if element.text: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) - indent += 2 - for child in element: - serializeElement(child, indent) - if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) - serializeElement(element, 0) - - if finalText is not None: - rv.append("|%s\"%s\"" % (' ' * 2, finalText)) - - return "\n".join(rv) - - -def tostring(element): - """Serialize an element and its child nodes to a string""" - rv = [] - finalText = None - - def serializeElement(element): - if not hasattr(element, "tag"): - if element.docinfo.internalDTD: - if element.docinfo.doctype: - dtd_str = element.docinfo.doctype - else: - dtd_str = "" % element.docinfo.root_name - rv.append(dtd_str) - serializeElement(element.getroot()) - - elif element.tag == comment_type: - rv.append("" % (element.text,)) - - else: - # This is assumed to be an ordinary element - if not element.attrib: - rv.append("<%s>" % (element.tag,)) - else: - attr = " ".join(["%s=\"%s\"" % (name, value) - for name, value in element.attrib.items()]) - rv.append("<%s %s>" % (element.tag, attr)) - if element.text: - rv.append(element.text) - - for child in element: - serializeElement(child) - - rv.append("" % (element.tag,)) - - if hasattr(element, "tail") and element.tail: - rv.append(element.tail) - - serializeElement(element) - - if finalText is not None: - rv.append("%s\"" % (' ' * 2, finalText)) - - return "".join(rv) - - -class TreeBuilder(_base.TreeBuilder): - documentClass = Document - doctypeClass = DocumentType - elementClass = None - commentClass = None - fragmentClass = Document - implementation = etree - - def __init__(self, namespaceHTMLElements, fullTree=False): - builder = etree_builders.getETreeModule(etree, fullTree=fullTree) - infosetFilter = self.infosetFilter = ihatexml.InfosetFilter() - self.namespaceHTMLElements = namespaceHTMLElements - - class Attributes(dict): - def __init__(self, element, value={}): - self._element = element - dict.__init__(self, value) - for key, value in self.items(): - if isinstance(key, tuple): - name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) - else: - name = infosetFilter.coerceAttribute(key) - self._element._element.attrib[name] = value - - def __setitem__(self, key, value): - dict.__setitem__(self, key, value) - if isinstance(key, tuple): - name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) - else: - name = infosetFilter.coerceAttribute(key) - self._element._element.attrib[name] = value - - class Element(builder.Element): - def __init__(self, name, namespace): - name = infosetFilter.coerceElement(name) - builder.Element.__init__(self, name, namespace=namespace) - self._attributes = Attributes(self) - - def _setName(self, name): - self._name = infosetFilter.coerceElement(name) - self._element.tag = self._getETreeTag( - self._name, self._namespace) - - def _getName(self): - return infosetFilter.fromXmlName(self._name) - - name = property(_getName, _setName) - - def _getAttributes(self): - return self._attributes - - def _setAttributes(self, attributes): - self._attributes = Attributes(self, attributes) - - attributes = property(_getAttributes, _setAttributes) - - def insertText(self, data, insertBefore=None): - data = infosetFilter.coerceCharacters(data) - builder.Element.insertText(self, data, insertBefore) - - def appendChild(self, child): - builder.Element.appendChild(self, child) - - class Comment(builder.Comment): - def __init__(self, data): - data = infosetFilter.coerceComment(data) - builder.Comment.__init__(self, data) - - def _setData(self, data): - data = infosetFilter.coerceComment(data) - self._element.text = data - - def _getData(self): - return self._element.text - - data = property(_getData, _setData) - - self.elementClass = Element - self.commentClass = builder.Comment - # self.fragmentClass = builder.DocumentFragment - _base.TreeBuilder.__init__(self, namespaceHTMLElements) - - def reset(self): - _base.TreeBuilder.reset(self) - self.insertComment = self.insertCommentInitial - self.initial_comments = [] - self.doctype = None - - def testSerializer(self, element): - return testSerializer(element) - - def getDocument(self): - if fullTree: - return self.document._elementTree - else: - return self.document._elementTree.getroot() - - def getFragment(self): - fragment = [] - element = self.openElements[0]._element - if element.text: - fragment.append(element.text) - fragment.extend(list(element)) - if element.tail: - fragment.append(element.tail) - return fragment - - def insertDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - - if not name: - warnings.warn("lxml cannot represent empty doctype", DataLossWarning) - self.doctype = None - else: - coercedName = self.infosetFilter.coerceElement(name) - if coercedName != name: - warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning) - - doctype = self.doctypeClass(coercedName, publicId, systemId) - self.doctype = doctype - - def insertCommentInitial(self, data, parent=None): - self.initial_comments.append(data) - - def insertCommentMain(self, data, parent=None): - if (parent == self.document and - self.document._elementTree.getroot()[-1].tag == comment_type): - warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) - super(TreeBuilder, self).insertComment(data, parent) - - def insertRoot(self, token): - """Create the document root""" - # Because of the way libxml2 works, it doesn't seem to be possible to - # alter information like the doctype after the tree has been parsed. - # Therefore we need to use the built-in parser to create our iniial - # tree, after which we can add elements like normal - docStr = "" - if self.doctype: - assert self.doctype.name - docStr += "= 0 and sysid.find('"') >= 0: - warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning) - sysid = sysid.replace("'", 'U00027') - if sysid.find("'") >= 0: - docStr += '"%s"' % sysid - else: - docStr += "'%s'" % sysid - else: - docStr += "''" - docStr += ">" - if self.doctype.name != token["name"]: - warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning) - docStr += "" - root = etree.fromstring(docStr) - - # Append the initial comments: - for comment_token in self.initial_comments: - root.addprevious(etree.Comment(comment_token["data"])) - - # Create the root document and add the ElementTree to it - self.document = self.documentClass() - self.document._elementTree = root.getroottree() - - # Give the root element the right name - name = token["name"] - namespace = token.get("namespace", self.defaultNamespace) - if namespace is None: - etree_tag = name - else: - etree_tag = "{%s}%s" % (namespace, name) - root.tag = etree_tag - - # Add the root element to the internal child/open data structures - root_element = self.elementClass(name, namespace) - root_element._element = root - self.document._childNodes.append(root_element) - self.openElements.append(root_element) - - # Reset to the default insert comment function - self.insertComment = self.insertCommentMain diff --git a/lib/html5lib/treewalkers/__init__.py b/lib/html5lib/treewalkers/__init__.py deleted file mode 100644 index 20b91b11..00000000 --- a/lib/html5lib/treewalkers/__init__.py +++ /dev/null @@ -1,147 +0,0 @@ -"""A collection of modules for iterating through different kinds of -tree, generating tokens identical to those produced by the tokenizer -module. - -To create a tree walker for a new type of tree, you need to do -implement a tree walker object (called TreeWalker by convention) that -implements a 'serialize' method taking a tree as sole argument and -returning an iterator generating tokens. -""" - -from __future__ import absolute_import, division, unicode_literals - -__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree", - "pulldom"] - -import sys - -from .. import constants -from ..utils import default_etree - -treeWalkerCache = {} - - -def getTreeWalker(treeType, implementation=None, **kwargs): - """Get a TreeWalker class for various types of tree with built-in support - - treeType - the name of the tree type required (case-insensitive). Supported - values are: - - "dom" - The xml.dom.minidom DOM implementation - "pulldom" - The xml.dom.pulldom event stream - "etree" - A generic walker for tree implementations exposing an - elementtree-like interface (known to work with - ElementTree, cElementTree and lxml.etree). - "lxml" - Optimized walker for lxml.etree - "genshi" - a Genshi stream - - implementation - (Currently applies to the "etree" tree type only). A module - implementing the tree type e.g. xml.etree.ElementTree or - cElementTree.""" - - treeType = treeType.lower() - if treeType not in treeWalkerCache: - if treeType in ("dom", "pulldom"): - name = "%s.%s" % (__name__, treeType) - __import__(name) - mod = sys.modules[name] - treeWalkerCache[treeType] = mod.TreeWalker - elif treeType == "genshi": - from . import genshistream - treeWalkerCache[treeType] = genshistream.TreeWalker - elif treeType == "lxml": - from . import lxmletree - treeWalkerCache[treeType] = lxmletree.TreeWalker - elif treeType == "etree": - from . import etree - if implementation is None: - implementation = default_etree - # XXX: NEVER cache here, caching is done in the etree submodule - return etree.getETreeModule(implementation, **kwargs).TreeWalker - return treeWalkerCache.get(treeType) - - -def concatenateCharacterTokens(tokens): - pendingCharacters = [] - for token in tokens: - type = token["type"] - if type in ("Characters", "SpaceCharacters"): - pendingCharacters.append(token["data"]) - else: - if pendingCharacters: - yield {"type": "Characters", "data": "".join(pendingCharacters)} - pendingCharacters = [] - yield token - if pendingCharacters: - yield {"type": "Characters", "data": "".join(pendingCharacters)} - - -def pprint(walker): - """Pretty printer for tree walkers""" - output = [] - indent = 0 - for token in concatenateCharacterTokens(walker): - type = token["type"] - if type in ("StartTag", "EmptyTag"): - # tag name - if token["namespace"] and token["namespace"] != constants.namespaces["html"]: - if token["namespace"] in constants.prefixes: - ns = constants.prefixes[token["namespace"]] - else: - ns = token["namespace"] - name = "%s %s" % (ns, token["name"]) - else: - name = token["name"] - output.append("%s<%s>" % (" " * indent, name)) - indent += 2 - # attributes (sorted for consistent ordering) - attrs = token["data"] - for (namespace, localname), value in sorted(attrs.items()): - if namespace: - if namespace in constants.prefixes: - ns = constants.prefixes[namespace] - else: - ns = namespace - name = "%s %s" % (ns, localname) - else: - name = localname - output.append("%s%s=\"%s\"" % (" " * indent, name, value)) - # self-closing - if type == "EmptyTag": - indent -= 2 - - elif type == "EndTag": - indent -= 2 - - elif type == "Comment": - output.append("%s" % (" " * indent, token["data"])) - - elif type == "Doctype": - if token["name"]: - if token["publicId"]: - output.append("""%s""" % - (" " * indent, - token["name"], - token["publicId"], - token["systemId"] if token["systemId"] else "")) - elif token["systemId"]: - output.append("""%s""" % - (" " * indent, - token["name"], - token["systemId"])) - else: - output.append("%s" % (" " * indent, - token["name"])) - else: - output.append("%s" % (" " * indent,)) - - elif type == "Characters": - output.append("%s\"%s\"" % (" " * indent, token["data"])) - - elif type == "SpaceCharacters": - assert False, "concatenateCharacterTokens should have got rid of all Space tokens" - - else: - raise ValueError("Unknown token type, %s" % type) - - return "\n".join(output) diff --git a/lib/html5lib/treewalkers/_base.py b/lib/html5lib/treewalkers/_base.py deleted file mode 100644 index 4e11cd02..00000000 --- a/lib/html5lib/treewalkers/_base.py +++ /dev/null @@ -1,200 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from six import text_type, string_types - -__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN", - "TreeWalker", "NonRecursiveTreeWalker"] - -from xml.dom import Node - -DOCUMENT = Node.DOCUMENT_NODE -DOCTYPE = Node.DOCUMENT_TYPE_NODE -TEXT = Node.TEXT_NODE -ELEMENT = Node.ELEMENT_NODE -COMMENT = Node.COMMENT_NODE -ENTITY = Node.ENTITY_NODE -UNKNOWN = "<#UNKNOWN#>" - -from ..constants import voidElements, spaceCharacters -spaceCharacters = "".join(spaceCharacters) - - -def to_text(s, blank_if_none=True): - """Wrapper around six.text_type to convert None to empty string""" - if s is None: - if blank_if_none: - return "" - else: - return None - elif isinstance(s, text_type): - return s - else: - return text_type(s) - - -def is_text_or_none(string): - """Wrapper around isinstance(string_types) or is None""" - return string is None or isinstance(string, string_types) - - -class TreeWalker(object): - def __init__(self, tree): - self.tree = tree - - def __iter__(self): - raise NotImplementedError - - def error(self, msg): - return {"type": "SerializeError", "data": msg} - - def emptyTag(self, namespace, name, attrs, hasChildren=False): - assert namespace is None or isinstance(namespace, string_types), type(namespace) - assert isinstance(name, string_types), type(name) - assert all((namespace is None or isinstance(namespace, string_types)) and - isinstance(name, string_types) and - isinstance(value, string_types) - for (namespace, name), value in attrs.items()) - - yield {"type": "EmptyTag", "name": to_text(name, False), - "namespace": to_text(namespace), - "data": attrs} - if hasChildren: - yield self.error("Void element has children") - - def startTag(self, namespace, name, attrs): - assert namespace is None or isinstance(namespace, string_types), type(namespace) - assert isinstance(name, string_types), type(name) - assert all((namespace is None or isinstance(namespace, string_types)) and - isinstance(name, string_types) and - isinstance(value, string_types) - for (namespace, name), value in attrs.items()) - - return {"type": "StartTag", - "name": text_type(name), - "namespace": to_text(namespace), - "data": dict(((to_text(namespace, False), to_text(name)), - to_text(value, False)) - for (namespace, name), value in attrs.items())} - - def endTag(self, namespace, name): - assert namespace is None or isinstance(namespace, string_types), type(namespace) - assert isinstance(name, string_types), type(namespace) - - return {"type": "EndTag", - "name": to_text(name, False), - "namespace": to_text(namespace), - "data": {}} - - def text(self, data): - assert isinstance(data, string_types), type(data) - - data = to_text(data) - middle = data.lstrip(spaceCharacters) - left = data[:len(data) - len(middle)] - if left: - yield {"type": "SpaceCharacters", "data": left} - data = middle - middle = data.rstrip(spaceCharacters) - right = data[len(middle):] - if middle: - yield {"type": "Characters", "data": middle} - if right: - yield {"type": "SpaceCharacters", "data": right} - - def comment(self, data): - assert isinstance(data, string_types), type(data) - - return {"type": "Comment", "data": text_type(data)} - - def doctype(self, name, publicId=None, systemId=None, correct=True): - assert is_text_or_none(name), type(name) - assert is_text_or_none(publicId), type(publicId) - assert is_text_or_none(systemId), type(systemId) - - return {"type": "Doctype", - "name": to_text(name), - "publicId": to_text(publicId), - "systemId": to_text(systemId), - "correct": to_text(correct)} - - def entity(self, name): - assert isinstance(name, string_types), type(name) - - return {"type": "Entity", "name": text_type(name)} - - def unknown(self, nodeType): - return self.error("Unknown node type: " + nodeType) - - -class NonRecursiveTreeWalker(TreeWalker): - def getNodeDetails(self, node): - raise NotImplementedError - - def getFirstChild(self, node): - raise NotImplementedError - - def getNextSibling(self, node): - raise NotImplementedError - - def getParentNode(self, node): - raise NotImplementedError - - def __iter__(self): - currentNode = self.tree - while currentNode is not None: - details = self.getNodeDetails(currentNode) - type, details = details[0], details[1:] - hasChildren = False - - if type == DOCTYPE: - yield self.doctype(*details) - - elif type == TEXT: - for token in self.text(*details): - yield token - - elif type == ELEMENT: - namespace, name, attributes, hasChildren = details - if name in voidElements: - for token in self.emptyTag(namespace, name, attributes, - hasChildren): - yield token - hasChildren = False - else: - yield self.startTag(namespace, name, attributes) - - elif type == COMMENT: - yield self.comment(details[0]) - - elif type == ENTITY: - yield self.entity(details[0]) - - elif type == DOCUMENT: - hasChildren = True - - else: - yield self.unknown(details[0]) - - if hasChildren: - firstChild = self.getFirstChild(currentNode) - else: - firstChild = None - - if firstChild is not None: - currentNode = firstChild - else: - while currentNode is not None: - details = self.getNodeDetails(currentNode) - type, details = details[0], details[1:] - if type == ELEMENT: - namespace, name, attributes, hasChildren = details - if name not in voidElements: - yield self.endTag(namespace, name) - if self.tree is currentNode: - currentNode = None - break - nextSibling = self.getNextSibling(currentNode) - if nextSibling is not None: - currentNode = nextSibling - break - else: - currentNode = self.getParentNode(currentNode) diff --git a/lib/html5lib/treewalkers/dom.py b/lib/html5lib/treewalkers/dom.py deleted file mode 100644 index ac4dcf31..00000000 --- a/lib/html5lib/treewalkers/dom.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from xml.dom import Node - -from . import _base - - -class TreeWalker(_base.NonRecursiveTreeWalker): - def getNodeDetails(self, node): - if node.nodeType == Node.DOCUMENT_TYPE_NODE: - return _base.DOCTYPE, node.name, node.publicId, node.systemId - - elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): - return _base.TEXT, node.nodeValue - - elif node.nodeType == Node.ELEMENT_NODE: - attrs = {} - for attr in list(node.attributes.keys()): - attr = node.getAttributeNode(attr) - if attr.namespaceURI: - attrs[(attr.namespaceURI, attr.localName)] = attr.value - else: - attrs[(None, attr.name)] = attr.value - return (_base.ELEMENT, node.namespaceURI, node.nodeName, - attrs, node.hasChildNodes()) - - elif node.nodeType == Node.COMMENT_NODE: - return _base.COMMENT, node.nodeValue - - elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): - return (_base.DOCUMENT,) - - else: - return _base.UNKNOWN, node.nodeType - - def getFirstChild(self, node): - return node.firstChild - - def getNextSibling(self, node): - return node.nextSibling - - def getParentNode(self, node): - return node.parentNode diff --git a/lib/html5lib/treewalkers/etree.py b/lib/html5lib/treewalkers/etree.py deleted file mode 100644 index 69840c21..00000000 --- a/lib/html5lib/treewalkers/etree.py +++ /dev/null @@ -1,136 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -try: - from collections import OrderedDict -except ImportError: - try: - from ordereddict import OrderedDict - except ImportError: - OrderedDict = dict - -import re - -from six import string_types - -from . import _base -from ..utils import moduleFactoryFactory - -tag_regexp = re.compile("{([^}]*)}(.*)") - - -def getETreeBuilder(ElementTreeImplementation): - ElementTree = ElementTreeImplementation - ElementTreeCommentType = ElementTree.Comment("asd").tag - - class TreeWalker(_base.NonRecursiveTreeWalker): - """Given the particular ElementTree representation, this implementation, - to avoid using recursion, returns "nodes" as tuples with the following - content: - - 1. The current element - - 2. The index of the element relative to its parent - - 3. A stack of ancestor elements - - 4. A flag "text", "tail" or None to indicate if the current node is a - text node; either the text or tail of the current element (1) - """ - def getNodeDetails(self, node): - if isinstance(node, tuple): # It might be the root Element - elt, key, parents, flag = node - if flag in ("text", "tail"): - return _base.TEXT, getattr(elt, flag) - else: - node = elt - - if not(hasattr(node, "tag")): - node = node.getroot() - - if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): - return (_base.DOCUMENT,) - - elif node.tag == "": - return (_base.DOCTYPE, node.text, - node.get("publicId"), node.get("systemId")) - - elif node.tag == ElementTreeCommentType: - return _base.COMMENT, node.text - - else: - assert isinstance(node.tag, string_types), type(node.tag) - # This is assumed to be an ordinary element - match = tag_regexp.match(node.tag) - if match: - namespace, tag = match.groups() - else: - namespace = None - tag = node.tag - attrs = OrderedDict() - for name, value in list(node.attrib.items()): - match = tag_regexp.match(name) - if match: - attrs[(match.group(1), match.group(2))] = value - else: - attrs[(None, name)] = value - return (_base.ELEMENT, namespace, tag, - attrs, len(node) or node.text) - - def getFirstChild(self, node): - if isinstance(node, tuple): - element, key, parents, flag = node - else: - element, key, parents, flag = node, None, [], None - - if flag in ("text", "tail"): - return None - else: - if element.text: - return element, key, parents, "text" - elif len(element): - parents.append(element) - return element[0], 0, parents, None - else: - return None - - def getNextSibling(self, node): - if isinstance(node, tuple): - element, key, parents, flag = node - else: - return None - - if flag == "text": - if len(element): - parents.append(element) - return element[0], 0, parents, None - else: - return None - else: - if element.tail and flag != "tail": - return element, key, parents, "tail" - elif key < len(parents[-1]) - 1: - return parents[-1][key + 1], key + 1, parents, None - else: - return None - - def getParentNode(self, node): - if isinstance(node, tuple): - element, key, parents, flag = node - else: - return None - - if flag == "text": - if not parents: - return element - else: - return element, key, parents, None - else: - parent = parents.pop() - if not parents: - return parent - else: - return parent, list(parents[-1]).index(parent), parents, None - - return locals() - -getETreeModule = moduleFactoryFactory(getETreeBuilder) diff --git a/lib/html5lib/treewalkers/genshistream.py b/lib/html5lib/treewalkers/genshistream.py deleted file mode 100644 index f559c45d..00000000 --- a/lib/html5lib/treewalkers/genshistream.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from genshi.core import QName -from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT -from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT - -from . import _base - -from ..constants import voidElements, namespaces - - -class TreeWalker(_base.TreeWalker): - def __iter__(self): - # Buffer the events so we can pass in the following one - previous = None - for event in self.tree: - if previous is not None: - for token in self.tokens(previous, event): - yield token - previous = event - - # Don't forget the final event! - if previous is not None: - for token in self.tokens(previous, None): - yield token - - def tokens(self, event, next): - kind, data, pos = event - if kind == START: - tag, attribs = data - name = tag.localname - namespace = tag.namespace - converted_attribs = {} - for k, v in attribs: - if isinstance(k, QName): - converted_attribs[(k.namespace, k.localname)] = v - else: - converted_attribs[(None, k)] = v - - if namespace == namespaces["html"] and name in voidElements: - for token in self.emptyTag(namespace, name, converted_attribs, - not next or next[0] != END - or next[1] != tag): - yield token - else: - yield self.startTag(namespace, name, converted_attribs) - - elif kind == END: - name = data.localname - namespace = data.namespace - if name not in voidElements: - yield self.endTag(namespace, name) - - elif kind == COMMENT: - yield self.comment(data) - - elif kind == TEXT: - for token in self.text(data): - yield token - - elif kind == DOCTYPE: - yield self.doctype(*data) - - elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, - START_CDATA, END_CDATA, PI): - pass - - else: - yield self.unknown(kind) diff --git a/lib/html5lib/treewalkers/lxmletree.py b/lib/html5lib/treewalkers/lxmletree.py deleted file mode 100644 index 90e116d3..00000000 --- a/lib/html5lib/treewalkers/lxmletree.py +++ /dev/null @@ -1,201 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from six import text_type - -from lxml import etree -from ..treebuilders.etree import tag_regexp - -from . import _base - -from .. import ihatexml - - -def ensure_str(s): - if s is None: - return None - elif isinstance(s, text_type): - return s - else: - return s.decode("utf-8", "strict") - - -class Root(object): - def __init__(self, et): - self.elementtree = et - self.children = [] - if et.docinfo.internalDTD: - self.children.append(Doctype(self, - ensure_str(et.docinfo.root_name), - ensure_str(et.docinfo.public_id), - ensure_str(et.docinfo.system_url))) - root = et.getroot() - node = root - - while node.getprevious() is not None: - node = node.getprevious() - while node is not None: - self.children.append(node) - node = node.getnext() - - self.text = None - self.tail = None - - def __getitem__(self, key): - return self.children[key] - - def getnext(self): - return None - - def __len__(self): - return 1 - - -class Doctype(object): - def __init__(self, root_node, name, public_id, system_id): - self.root_node = root_node - self.name = name - self.public_id = public_id - self.system_id = system_id - - self.text = None - self.tail = None - - def getnext(self): - return self.root_node.children[1] - - -class FragmentRoot(Root): - def __init__(self, children): - self.children = [FragmentWrapper(self, child) for child in children] - self.text = self.tail = None - - def getnext(self): - return None - - -class FragmentWrapper(object): - def __init__(self, fragment_root, obj): - self.root_node = fragment_root - self.obj = obj - if hasattr(self.obj, 'text'): - self.text = ensure_str(self.obj.text) - else: - self.text = None - if hasattr(self.obj, 'tail'): - self.tail = ensure_str(self.obj.tail) - else: - self.tail = None - - def __getattr__(self, name): - return getattr(self.obj, name) - - def getnext(self): - siblings = self.root_node.children - idx = siblings.index(self) - if idx < len(siblings) - 1: - return siblings[idx + 1] - else: - return None - - def __getitem__(self, key): - return self.obj[key] - - def __bool__(self): - return bool(self.obj) - - def getparent(self): - return None - - def __str__(self): - return str(self.obj) - - def __unicode__(self): - return str(self.obj) - - def __len__(self): - return len(self.obj) - - -class TreeWalker(_base.NonRecursiveTreeWalker): - def __init__(self, tree): - if hasattr(tree, "getroot"): - tree = Root(tree) - elif isinstance(tree, list): - tree = FragmentRoot(tree) - _base.NonRecursiveTreeWalker.__init__(self, tree) - self.filter = ihatexml.InfosetFilter() - - def getNodeDetails(self, node): - if isinstance(node, tuple): # Text node - node, key = node - assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key - return _base.TEXT, ensure_str(getattr(node, key)) - - elif isinstance(node, Root): - return (_base.DOCUMENT,) - - elif isinstance(node, Doctype): - return _base.DOCTYPE, node.name, node.public_id, node.system_id - - elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): - return _base.TEXT, node.obj - - elif node.tag == etree.Comment: - return _base.COMMENT, ensure_str(node.text) - - elif node.tag == etree.Entity: - return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &; - - else: - # This is assumed to be an ordinary element - match = tag_regexp.match(ensure_str(node.tag)) - if match: - namespace, tag = match.groups() - else: - namespace = None - tag = ensure_str(node.tag) - attrs = {} - for name, value in list(node.attrib.items()): - name = ensure_str(name) - value = ensure_str(value) - match = tag_regexp.match(name) - if match: - attrs[(match.group(1), match.group(2))] = value - else: - attrs[(None, name)] = value - return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag), - attrs, len(node) > 0 or node.text) - - def getFirstChild(self, node): - assert not isinstance(node, tuple), "Text nodes have no children" - - assert len(node) or node.text, "Node has no children" - if node.text: - return (node, "text") - else: - return node[0] - - def getNextSibling(self, node): - if isinstance(node, tuple): # Text node - node, key = node - assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key - if key == "text": - # XXX: we cannot use a "bool(node) and node[0] or None" construct here - # because node[0] might evaluate to False if it has no child element - if len(node): - return node[0] - else: - return None - else: # tail - return node.getnext() - - return (node, "tail") if node.tail else node.getnext() - - def getParentNode(self, node): - if isinstance(node, tuple): # Text node - node, key = node - assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key - if key == "text": - return node - # else: fallback to "normal" processing - - return node.getparent() diff --git a/lib/html5lib/treewalkers/pulldom.py b/lib/html5lib/treewalkers/pulldom.py deleted file mode 100644 index 0b0f515f..00000000 --- a/lib/html5lib/treewalkers/pulldom.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \ - COMMENT, IGNORABLE_WHITESPACE, CHARACTERS - -from . import _base - -from ..constants import voidElements - - -class TreeWalker(_base.TreeWalker): - def __iter__(self): - ignore_until = None - previous = None - for event in self.tree: - if previous is not None and \ - (ignore_until is None or previous[1] is ignore_until): - if previous[1] is ignore_until: - ignore_until = None - for token in self.tokens(previous, event): - yield token - if token["type"] == "EmptyTag": - ignore_until = previous[1] - previous = event - if ignore_until is None or previous[1] is ignore_until: - for token in self.tokens(previous, None): - yield token - elif ignore_until is not None: - raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") - - def tokens(self, event, next): - type, node = event - if type == START_ELEMENT: - name = node.nodeName - namespace = node.namespaceURI - attrs = {} - for attr in list(node.attributes.keys()): - attr = node.getAttributeNode(attr) - attrs[(attr.namespaceURI, attr.localName)] = attr.value - if name in voidElements: - for token in self.emptyTag(namespace, - name, - attrs, - not next or next[1] is not node): - yield token - else: - yield self.startTag(namespace, name, attrs) - - elif type == END_ELEMENT: - name = node.nodeName - namespace = node.namespaceURI - if name not in voidElements: - yield self.endTag(namespace, name) - - elif type == COMMENT: - yield self.comment(node.nodeValue) - - elif type in (IGNORABLE_WHITESPACE, CHARACTERS): - for token in self.text(node.nodeValue): - yield token - - else: - yield self.unknown(type) diff --git a/lib/html5lib/trie/__init__.py b/lib/html5lib/trie/__init__.py deleted file mode 100644 index a8cca8a9..00000000 --- a/lib/html5lib/trie/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from .py import Trie as PyTrie - -Trie = PyTrie - -try: - from .datrie import Trie as DATrie -except ImportError: - pass -else: - Trie = DATrie diff --git a/lib/html5lib/trie/_base.py b/lib/html5lib/trie/_base.py deleted file mode 100644 index 724486b1..00000000 --- a/lib/html5lib/trie/_base.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from collections import Mapping - - -class Trie(Mapping): - """Abstract base class for tries""" - - def keys(self, prefix=None): - keys = super().keys() - - if prefix is None: - return set(keys) - - # Python 2.6: no set comprehensions - return set([x for x in keys if x.startswith(prefix)]) - - def has_keys_with_prefix(self, prefix): - for key in self.keys(): - if key.startswith(prefix): - return True - - return False - - def longest_prefix(self, prefix): - if prefix in self: - return prefix - - for i in range(1, len(prefix) + 1): - if prefix[:-i] in self: - return prefix[:-i] - - raise KeyError(prefix) - - def longest_prefix_item(self, prefix): - lprefix = self.longest_prefix(prefix) - return (lprefix, self[lprefix]) diff --git a/lib/html5lib/trie/datrie.py b/lib/html5lib/trie/datrie.py deleted file mode 100644 index 51f3d046..00000000 --- a/lib/html5lib/trie/datrie.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from datrie import Trie as DATrie -from six import text_type - -from ._base import Trie as ABCTrie - - -class Trie(ABCTrie): - def __init__(self, data): - chars = set() - for key in data.keys(): - if not isinstance(key, text_type): - raise TypeError("All keys must be strings") - for char in key: - chars.add(char) - - self._data = DATrie("".join(chars)) - for key, value in data.items(): - self._data[key] = value - - def __contains__(self, key): - return key in self._data - - def __len__(self): - return len(self._data) - - def __iter__(self): - raise NotImplementedError() - - def __getitem__(self, key): - return self._data[key] - - def keys(self, prefix=None): - return self._data.keys(prefix) - - def has_keys_with_prefix(self, prefix): - return self._data.has_keys_with_prefix(prefix) - - def longest_prefix(self, prefix): - return self._data.longest_prefix(prefix) - - def longest_prefix_item(self, prefix): - return self._data.longest_prefix_item(prefix) diff --git a/lib/html5lib/trie/py.py b/lib/html5lib/trie/py.py deleted file mode 100644 index c2ba3da7..00000000 --- a/lib/html5lib/trie/py.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from six import text_type - -from bisect import bisect_left - -from ._base import Trie as ABCTrie - - -class Trie(ABCTrie): - def __init__(self, data): - if not all(isinstance(x, text_type) for x in data.keys()): - raise TypeError("All keys must be strings") - - self._data = data - self._keys = sorted(data.keys()) - self._cachestr = "" - self._cachepoints = (0, len(data)) - - def __contains__(self, key): - return key in self._data - - def __len__(self): - return len(self._data) - - def __iter__(self): - return iter(self._data) - - def __getitem__(self, key): - return self._data[key] - - def keys(self, prefix=None): - if prefix is None or prefix == "" or not self._keys: - return set(self._keys) - - if prefix.startswith(self._cachestr): - lo, hi = self._cachepoints - start = i = bisect_left(self._keys, prefix, lo, hi) - else: - start = i = bisect_left(self._keys, prefix) - - keys = set() - if start == len(self._keys): - return keys - - while self._keys[i].startswith(prefix): - keys.add(self._keys[i]) - i += 1 - - self._cachestr = prefix - self._cachepoints = (start, i) - - return keys - - def has_keys_with_prefix(self, prefix): - if prefix in self._data: - return True - - if prefix.startswith(self._cachestr): - lo, hi = self._cachepoints - i = bisect_left(self._keys, prefix, lo, hi) - else: - i = bisect_left(self._keys, prefix) - - if i == len(self._keys): - return False - - return self._keys[i].startswith(prefix) diff --git a/lib/html5lib/utils.py b/lib/html5lib/utils.py deleted file mode 100644 index fdc18feb..00000000 --- a/lib/html5lib/utils.py +++ /dev/null @@ -1,103 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from types import ModuleType - -from six import text_type - -try: - import xml.etree.cElementTree as default_etree -except ImportError: - import xml.etree.ElementTree as default_etree - - -__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", - "surrogatePairToCodepoint", "moduleFactoryFactory", - "supports_lone_surrogates"] - - -# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be -# caught by the below test. In general this would be any platform -# using UTF-16 as its encoding of unicode strings, such as -# Jython. This is because UTF-16 itself is based on the use of such -# surrogates, and there is no mechanism to further escape such -# escapes. -try: - _x = eval('"\\uD800"') - if not isinstance(_x, text_type): - # We need this with u"" because of http://bugs.jython.org/issue2039 - _x = eval('u"\\uD800"') - assert isinstance(_x, text_type) -except: - supports_lone_surrogates = False -else: - supports_lone_surrogates = True - - -class MethodDispatcher(dict): - """Dict with 2 special properties: - - On initiation, keys that are lists, sets or tuples are converted to - multiple keys so accessing any one of the items in the original - list-like object returns the matching value - - md = MethodDispatcher({("foo", "bar"):"baz"}) - md["foo"] == "baz" - - A default value which can be set through the default attribute. - """ - - def __init__(self, items=()): - # Using _dictEntries instead of directly assigning to self is about - # twice as fast. Please do careful performance testing before changing - # anything here. - _dictEntries = [] - for name, value in items: - if type(name) in (list, tuple, frozenset, set): - for item in name: - _dictEntries.append((item, value)) - else: - _dictEntries.append((name, value)) - dict.__init__(self, _dictEntries) - self.default = None - - def __getitem__(self, key): - return dict.get(self, key, self.default) - - -# Some utility functions to dal with weirdness around UCS2 vs UCS4 -# python builds - -def isSurrogatePair(data): - return (len(data) == 2 and - ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and - ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) - - -def surrogatePairToCodepoint(data): - char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + - (ord(data[1]) - 0xDC00)) - return char_val - -# Module Factory Factory (no, this isn't Java, I know) -# Here to stop this being duplicated all over the place. - - -def moduleFactoryFactory(factory): - moduleCache = {} - - def moduleFactory(baseModule, *args, **kwargs): - if isinstance(ModuleType.__name__, type("")): - name = "_%s_factory" % baseModule.__name__ - else: - name = b"_%s_factory" % baseModule.__name__ - - if name in moduleCache: - return moduleCache[name] - else: - mod = ModuleType(name) - objs = factory(baseModule, *args, **kwargs) - mod.__dict__.update(objs) - moduleCache[name] = mod - return mod - - return moduleFactory diff --git a/lib/httplib2/__init__.py b/lib/httplib2/__init__.py deleted file mode 100755 index 3652df61..00000000 --- a/lib/httplib2/__init__.py +++ /dev/null @@ -1,1215 +0,0 @@ -from __future__ import generators -""" -httplib2 - -A caching http interface that supports ETags and gzip -to conserve bandwidth. - -Requires Python 2.3 or later - -Changelog: -2007-08-18, Rick: Modified so it's able to use a socks proxy if needed. - -""" - -__author__ = "Joe Gregorio (joe@bitworking.org)" -__copyright__ = "Copyright 2006, Joe Gregorio" -__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)", - "James Antill", - "Xavier Verges Farrero", - "Jonathan Feinberg", - "Blair Zajac", - "Sam Ruby", - "Louis Nyffenegger"] -__license__ = "MIT" -__version__ = "$Rev$" - -import re -import sys -import email -import email.Utils -import email.Message -import email.FeedParser -import StringIO -import gzip -import zlib -import httplib -import urlparse -import base64 -import os -import copy -import calendar -import time -import random -import errno -# remove depracated warning in python2.6 -try: - from hashlib import sha1 as _sha, md5 as _md5 -except ImportError: - import sha - import md5 - _sha = sha.new - _md5 = md5.new -import hmac -from gettext import gettext as _ -import socket - -# Try using local version, followed by system, and none if neither are found -try: - import socks as socks -except ImportError: - try: - import socks as socks - except ImportError: - socks = None - -# Build the appropriate socket wrapper for ssl -try: - import ssl # python 2.6 - _ssl_wrap_socket = ssl.wrap_socket -except ImportError: - def _ssl_wrap_socket(sock, key_file, cert_file): - ssl_sock = socket.ssl(sock, key_file, cert_file) - return httplib.FakeSocket(sock, ssl_sock) - - -if sys.version_info >= (2,3): - from iri2uri import iri2uri -else: - def iri2uri(uri): - return uri - -def has_timeout(timeout): # python 2.6 - if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'): - return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT) - return (timeout is not None) - -__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error', - 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent', - 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError', - 'debuglevel'] - - -# The httplib debug level, set to a non-zero value to get debug output -debuglevel = 0 - - -# Python 2.3 support -if sys.version_info < (2,4): - def sorted(seq): - seq.sort() - return seq - -# Python 2.3 support -def HTTPResponse__getheaders(self): - """Return list of (header, value) tuples.""" - if self.msg is None: - raise httplib.ResponseNotReady() - return self.msg.items() - -if not hasattr(httplib.HTTPResponse, 'getheaders'): - httplib.HTTPResponse.getheaders = HTTPResponse__getheaders - -# All exceptions raised here derive from HttpLib2Error -class HttpLib2Error(Exception): pass - -# Some exceptions can be caught and optionally -# be turned back into responses. -class HttpLib2ErrorWithResponse(HttpLib2Error): - def __init__(self, desc, response, content): - self.response = response - self.content = content - HttpLib2Error.__init__(self, desc) - -class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass -class RedirectLimit(HttpLib2ErrorWithResponse): pass -class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass -class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass -class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass - -class RelativeURIError(HttpLib2Error): pass -class ServerNotFoundError(HttpLib2Error): pass - -# Open Items: -# ----------- -# Proxy support - -# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) - -# Pluggable cache storage (supports storing the cache in -# flat files by default. We need a plug-in architecture -# that can support Berkeley DB and Squid) - -# == Known Issues == -# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator. -# Does not handle Cache-Control: max-stale -# Does not use Age: headers when calculating cache freshness. - - -# The number of redirections to follow before giving up. -# Note that only GET redirects are automatically followed. -# Will also honor 301 requests by saving that info and never -# requesting that URI again. -DEFAULT_MAX_REDIRECTS = 5 - -# Which headers are hop-by-hop headers by default -HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade'] - -def _get_end2end_headers(response): - hopbyhop = list(HOP_BY_HOP) - hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')]) - return [header for header in response.keys() if header not in hopbyhop] - -URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") - -def parse_uri(uri): - """Parses a URI using the regex given in Appendix B of RFC 3986. - - (scheme, authority, path, query, fragment) = parse_uri(uri) - """ - groups = URI.match(uri).groups() - return (groups[1], groups[3], groups[4], groups[6], groups[8]) - -def urlnorm(uri): - (scheme, authority, path, query, fragment) = parse_uri(uri) - if not scheme or not authority: - raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) - authority = authority.lower() - scheme = scheme.lower() - if not path: - path = "/" - # Could do syntax based normalization of the URI before - # computing the digest. See Section 6.2.2 of Std 66. - request_uri = query and "?".join([path, query]) or path - scheme = scheme.lower() - defrag_uri = scheme + "://" + authority + request_uri - return scheme, authority, request_uri, defrag_uri - - -# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/) -re_url_scheme = re.compile(r'^\w+://') -re_slash = re.compile(r'[?/:|]+') - -def safename(filename): - """Return a filename suitable for the cache. - - Strips dangerous and common characters to create a filename we - can use to store the cache in. - """ - - try: - if re_url_scheme.match(filename): - if isinstance(filename,str): - filename = filename.decode('utf-8') - filename = filename.encode('idna') - else: - filename = filename.encode('idna') - except UnicodeError: - pass - if isinstance(filename,unicode): - filename=filename.encode('utf-8') - filemd5 = _md5(filename).hexdigest() - filename = re_url_scheme.sub("", filename) - filename = re_slash.sub(",", filename) - - # limit length of filename - if len(filename)>200: - filename=filename[:200] - return ",".join((filename, filemd5)) - -NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+') -def _normalize_headers(headers): - return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()]) - -def _parse_cache_control(headers): - retval = {} - if headers.has_key('cache-control'): - parts = headers['cache-control'].split(',') - parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")] - parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")] - retval = dict(parts_with_args + parts_wo_args) - return retval - -# Whether to use a strict mode to parse WWW-Authenticate headers -# Might lead to bad results in case of ill-formed header value, -# so disabled by default, falling back to relaxed parsing. -# Set to true to turn on, usefull for testing servers. -USE_WWW_AUTH_STRICT_PARSING = 0 - -# In regex below: -# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP -# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space -# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both: -# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?@,;:\\\"/[\]?={} \t]+(?!\"))\"? -WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$") -WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(? current_age: - retval = "FRESH" - return retval - -def _decompressContent(response, new_content): - content = new_content - try: - encoding = response.get('content-encoding', None) - if encoding in ['gzip', 'deflate']: - if encoding == 'gzip': - content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read() - if encoding == 'deflate': - content = zlib.decompress(content) - response['content-length'] = str(len(content)) - # Record the historical presence of the encoding in a way the won't interfere. - response['-content-encoding'] = response['content-encoding'] - del response['content-encoding'] - except IOError: - content = "" - raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content) - return content - -def _updateCache(request_headers, response_headers, content, cache, cachekey): - if cachekey: - cc = _parse_cache_control(request_headers) - cc_response = _parse_cache_control(response_headers) - if cc.has_key('no-store') or cc_response.has_key('no-store'): - cache.delete(cachekey) - else: - info = email.Message.Message() - for key, value in response_headers.iteritems(): - if key not in ['status','content-encoding','transfer-encoding']: - info[key] = value - - # Add annotations to the cache to indicate what headers - # are variant for this request. - vary = response_headers.get('vary', None) - if vary: - vary_headers = vary.lower().replace(' ', '').split(',') - for header in vary_headers: - key = '-varied-%s' % header - try: - info[key] = request_headers[header] - except KeyError: - pass - - status = response_headers.status - if status == 304: - status = 200 - - status_header = 'status: %d\r\n' % response_headers.status - - header_str = info.as_string() - - header_str = re.sub("\r(?!\n)|(? 0: - service = "cl" - # No point in guessing Base or Spreadsheet - #elif request_uri.find("spreadsheets") > 0: - # service = "wise" - - auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent']) - resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'}) - lines = content.split('\n') - d = dict([tuple(line.split("=", 1)) for line in lines if line]) - if resp.status == 403: - self.Auth = "" - else: - self.Auth = d['Auth'] - - def request(self, method, request_uri, headers, content): - """Modify the request headers to add the appropriate - Authorization header.""" - headers['authorization'] = 'GoogleLogin Auth=' + self.Auth - - -AUTH_SCHEME_CLASSES = { - "basic": BasicAuthentication, - "wsse": WsseAuthentication, - "digest": DigestAuthentication, - "hmacdigest": HmacDigestAuthentication, - "googlelogin": GoogleLoginAuthentication -} - -AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] - -class FileCache(object): - """Uses a local directory as a store for cached files. - Not really safe to use if multiple threads or processes are going to - be running on the same cache. - """ - def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior - self.cache = cache - self.safe = safe - if not os.path.exists(cache): - os.makedirs(self.cache) - - def get(self, key): - retval = None - cacheFullPath = os.path.join(self.cache, self.safe(key)) - try: - f = file(cacheFullPath, "rb") - retval = f.read() - f.close() - except IOError, e: - pass - return retval - - def set(self, key, value): - cacheFullPath = os.path.join(self.cache, self.safe(key)) - f = file(cacheFullPath, "wb") - f.write(value) - f.close() - - def delete(self, key): - cacheFullPath = os.path.join(self.cache, self.safe(key)) - if os.path.exists(cacheFullPath): - os.remove(cacheFullPath) - -class Credentials(object): - def __init__(self): - self.credentials = [] - - def add(self, name, password, domain=""): - self.credentials.append((domain.lower(), name, password)) - - def clear(self): - self.credentials = [] - - def iter(self, domain): - for (cdomain, name, password) in self.credentials: - if cdomain == "" or domain == cdomain: - yield (name, password) - -class KeyCerts(Credentials): - """Identical to Credentials except that - name/password are mapped to key/cert.""" - pass - - -class ProxyInfo(object): - """Collect information required to use a proxy.""" - def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None): - """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX - constants. For example: - -p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000) - """ - self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass - - def astuple(self): - return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, - self.proxy_user, self.proxy_pass) - - def isgood(self): - return socks and (self.proxy_host != None) and (self.proxy_port != None) - - -class HTTPConnectionWithTimeout(httplib.HTTPConnection): - """HTTPConnection subclass that supports timeouts""" - - def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None): - httplib.HTTPConnection.__init__(self, host, port, strict) - self.timeout = timeout - self.proxy_info = proxy_info - - def connect(self): - """Connect to the host and port specified in __init__.""" - # Mostly verbatim from httplib.py. - msg = "getaddrinfo returns an empty list" - for res in socket.getaddrinfo(self.host, self.port, 0, - socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - try: - if self.proxy_info and self.proxy_info.isgood(): - self.sock = socks.socksocket(af, socktype, proto) - self.sock.setproxy(*self.proxy_info.astuple()) - else: - self.sock = socket.socket(af, socktype, proto) - # Different from httplib: support timeouts. - if has_timeout(self.timeout): - self.sock.settimeout(self.timeout) - # End of difference from httplib. - if self.debuglevel > 0: - print "connect: (%s, %s)" % (self.host, self.port) - - self.sock.connect(sa) - except socket.error, msg: - if self.debuglevel > 0: - print 'connect fail:', (self.host, self.port) - if self.sock: - self.sock.close() - self.sock = None - continue - break - if not self.sock: - raise socket.error, msg - -class HTTPSConnectionWithTimeout(httplib.HTTPSConnection): - "This class allows communication via SSL." - - def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=None, proxy_info=None): - httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file, - cert_file=cert_file, strict=strict) - self.timeout = timeout - self.proxy_info = proxy_info - - def connect(self): - "Connect to a host on a given (SSL) port." - - if self.proxy_info and self.proxy_info.isgood(): - sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM) - sock.setproxy(*self.proxy_info.astuple()) - else: - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - - if has_timeout(self.timeout): - sock.settimeout(self.timeout) - sock.connect((self.host, self.port)) - self.sock =_ssl_wrap_socket(sock, self.key_file, self.cert_file) - - - -class Http(object): - """An HTTP client that handles: -- all methods -- caching -- ETags -- compression, -- HTTPS -- Basic -- Digest -- WSSE - -and more. - """ - def __init__(self, cache=None, timeout=None, proxy_info=None): - """The value of proxy_info is a ProxyInfo instance. - -If 'cache' is a string then it is used as a directory name -for a disk cache. Otherwise it must be an object that supports -the same interface as FileCache.""" - self.proxy_info = proxy_info - # Map domain name to an httplib connection - self.connections = {} - # The location of the cache, for now a directory - # where cached responses are held. - if cache and isinstance(cache, str): - self.cache = FileCache(cache) - else: - self.cache = cache - - # Name/password - self.credentials = Credentials() - - # Key/cert - self.certificates = KeyCerts() - - # authorization objects - self.authorizations = [] - - # If set to False then no redirects are followed, even safe ones. - self.follow_redirects = True - - # Which HTTP methods do we apply optimistic concurrency to, i.e. - # which methods get an "if-match:" etag header added to them. - self.optimistic_concurrency_methods = ["PUT"] - - # If 'follow_redirects' is True, and this is set to True then - # all redirecs are followed, including unsafe ones. - self.follow_all_redirects = False - - self.ignore_etag = False - - self.force_exception_to_status_code = False - - self.timeout = timeout - - def _auth_from_challenge(self, host, request_uri, headers, response, content): - """A generator that creates Authorization objects - that can be applied to requests. - """ - challenges = _parse_www_authenticate(response, 'www-authenticate') - for cred in self.credentials.iter(host): - for scheme in AUTH_SCHEME_ORDER: - if challenges.has_key(scheme): - yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self) - - def add_credentials(self, name, password, domain=""): - """Add a name and password that will be used - any time a request requires authentication.""" - self.credentials.add(name, password, domain) - - def add_certificate(self, key, cert, domain): - """Add a key and cert that will be used - any time a request requires authentication.""" - self.certificates.add(key, cert, domain) - - def clear_credentials(self): - """Remove all the names and passwords - that are used for authentication""" - self.credentials.clear() - self.authorizations = [] - - def _conn_request(self, conn, request_uri, method, body, headers): - for i in range(2): - try: - conn.request(method, request_uri, body, headers) - except socket.gaierror: - conn.close() - raise ServerNotFoundError("Unable to find the server at %s" % conn.host) - except socket.error, e: - if not hasattr(e, 'errno'): # I don't know what this is so lets raise it if it happens - raise - elif e.errno == errno.ECONNREFUSED: # Connection refused - raise - # Just because the server closed the connection doesn't apparently mean - # that the server didn't send a response. - pass - except httplib.HTTPException: - # Just because the server closed the connection doesn't apparently mean - # that the server didn't send a response. - pass - try: - response = conn.getresponse() - except (socket.error, httplib.HTTPException): - if i == 0: - conn.close() - conn.connect() - continue - else: - raise - else: - content = "" - if method == "HEAD": - response.close() - else: - content = response.read() - response = Response(response) - if method != "HEAD": - content = _decompressContent(response, content) - break - return (response, content) - - - def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey): - """Do the actual request using the connection object - and also follow one level of redirects if necessary""" - - auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] - auth = auths and sorted(auths)[0][1] or None - if auth: - auth.request(method, request_uri, headers, body) - - (response, content) = self._conn_request(conn, request_uri, method, body, headers) - - if auth: - if auth.response(response, body): - auth.request(method, request_uri, headers, body) - (response, content) = self._conn_request(conn, request_uri, method, body, headers ) - response._stale_digest = 1 - - if response.status == 401: - for authorization in self._auth_from_challenge(host, request_uri, headers, response, content): - authorization.request(method, request_uri, headers, body) - (response, content) = self._conn_request(conn, request_uri, method, body, headers, ) - if response.status != 401: - self.authorizations.append(authorization) - authorization.response(response, body) - break - - if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303): - if self.follow_redirects and response.status in [300, 301, 302, 303, 307]: - # Pick out the location header and basically start from the beginning - # remembering first to strip the ETag header and decrement our 'depth' - if redirections: - if not response.has_key('location') and response.status != 300: - raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content) - # Fix-up relative redirects (which violate an RFC 2616 MUST) - if response.has_key('location'): - location = response['location'] - (scheme, authority, path, query, fragment) = parse_uri(location) - if authority is None: - response['location'] = urlparse.urljoin(absolute_uri, location) - if response.status == 301 and method in ["GET", "HEAD"]: - response['-x-permanent-redirect-url'] = response['location'] - if not response.has_key('content-location'): - response['content-location'] = absolute_uri - _updateCache(headers, response, content, self.cache, cachekey) - if headers.has_key('if-none-match'): - del headers['if-none-match'] - if headers.has_key('if-modified-since'): - del headers['if-modified-since'] - if response.has_key('location'): - location = response['location'] - old_response = copy.deepcopy(response) - if not old_response.has_key('content-location'): - old_response['content-location'] = absolute_uri - redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method - (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1) - response.previous = old_response - else: - raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content) - elif response.status in [200, 203] and method == "GET": - # Don't cache 206's since we aren't going to handle byte range requests - if not response.has_key('content-location'): - response['content-location'] = absolute_uri - _updateCache(headers, response, content, self.cache, cachekey) - - return (response, content) - - def _normalize_headers(self, headers): - return _normalize_headers(headers) - -# Need to catch and rebrand some exceptions -# Then need to optionally turn all exceptions into status codes -# including all socket.* and httplib.* exceptions. - - - def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None): - """ Performs a single HTTP request. -The 'uri' is the URI of the HTTP resource and can begin -with either 'http' or 'https'. The value of 'uri' must be an absolute URI. - -The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc. -There is no restriction on the methods allowed. - -The 'body' is the entity body to be sent with the request. It is a string -object. - -Any extra headers that are to be sent with the request should be provided in the -'headers' dictionary. - -The maximum number of redirect to follow before raising an -exception is 'redirections. The default is 5. - -The return value is a tuple of (response, content), the first -being and instance of the 'Response' class, the second being -a string that contains the response entity body. - """ - try: - if headers is None: - headers = {} - else: - headers = self._normalize_headers(headers) - - if not headers.has_key('user-agent'): - headers['user-agent'] = "Python-httplib2/%s" % __version__ - - uri = iri2uri(uri) - - (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) - domain_port = authority.split(":")[0:2] - if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http': - scheme = 'https' - authority = domain_port[0] - - conn_key = scheme+":"+authority - if conn_key in self.connections: - conn = self.connections[conn_key] - else: - if not connection_type: - connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout - certs = list(self.certificates.iter(authority)) - if scheme == 'https' and certs: - conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0], - cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info) - else: - conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info) - conn.set_debuglevel(debuglevel) - - if method in ["GET", "HEAD"] and 'range' not in headers and 'accept-encoding' not in headers: - headers['accept-encoding'] = 'gzip, deflate' - - info = email.Message.Message() - cached_value = None - if self.cache: - cachekey = defrag_uri - cached_value = self.cache.get(cachekey) - if cached_value: - # info = email.message_from_string(cached_value) - # - # Need to replace the line above with the kludge below - # to fix the non-existent bug not fixed in this - # bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html - try: - info, content = cached_value.split('\r\n\r\n', 1) - feedparser = email.FeedParser.FeedParser() - feedparser.feed(info) - info = feedparser.close() - feedparser._parse = None - except IndexError, ValueError: - self.cache.delete(cachekey) - cachekey = None - cached_value = None - else: - cachekey = None - - if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers: - # http://www.w3.org/1999/04/Editing/ - headers['if-match'] = info['etag'] - - if method not in ["GET", "HEAD"] and self.cache and cachekey: - # RFC 2616 Section 13.10 - self.cache.delete(cachekey) - - # Check the vary header in the cache to see if this request - # matches what varies in the cache. - if method in ['GET', 'HEAD'] and 'vary' in info: - vary = info['vary'] - vary_headers = vary.lower().replace(' ', '').split(',') - for header in vary_headers: - key = '-varied-%s' % header - value = info[key] - if headers.get(header, '') != value: - cached_value = None - break - - if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers: - if info.has_key('-x-permanent-redirect-url'): - # Should cached permanent redirects be counted in our redirection count? For now, yes. - (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1) - response.previous = Response(info) - response.previous.fromcache = True - else: - # Determine our course of action: - # Is the cached entry fresh or stale? - # Has the client requested a non-cached response? - # - # There seems to be three possible answers: - # 1. [FRESH] Return the cache entry w/o doing a GET - # 2. [STALE] Do the GET (but add in cache validators if available) - # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request - entry_disposition = _entry_disposition(info, headers) - - if entry_disposition == "FRESH": - if not cached_value: - info['status'] = '504' - content = "" - response = Response(info) - if cached_value: - response.fromcache = True - return (response, content) - - if entry_disposition == "STALE": - if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers: - headers['if-none-match'] = info['etag'] - if info.has_key('last-modified') and not 'last-modified' in headers: - headers['if-modified-since'] = info['last-modified'] - elif entry_disposition == "TRANSPARENT": - pass - - (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey) - - if response.status == 304 and method == "GET": - # Rewrite the cache entry with the new end-to-end headers - # Take all headers that are in response - # and overwrite their values in info. - # unless they are hop-by-hop, or are listed in the connection header. - - for key in _get_end2end_headers(response): - info[key] = response[key] - merged_response = Response(info) - if hasattr(response, "_stale_digest"): - merged_response._stale_digest = response._stale_digest - _updateCache(headers, merged_response, content, self.cache, cachekey) - response = merged_response - response.status = 200 - response.fromcache = True - - elif response.status == 200: - content = new_content - else: - self.cache.delete(cachekey) - content = new_content - else: - cc = _parse_cache_control(headers) - if cc.has_key('only-if-cached'): - info['status'] = '504' - response = Response(info) - content = "" - else: - (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey) - except Exception, e: - if self.force_exception_to_status_code: - if isinstance(e, HttpLib2ErrorWithResponse): - response = e.response - content = e.content - response.status = 500 - response.reason = str(e) - elif isinstance(e, socket.timeout) or (isinstance(e, socket.error) and 'timed out' in str(e)): - content = "Request Timeout" - response = Response( { - "content-type": "text/plain", - "status": "408", - "content-length": len(content) - }) - response.reason = "Request Timeout" - else: - content = str(e) - response = Response( { - "content-type": "text/plain", - "status": "400", - "content-length": len(content) - }) - response.reason = "Bad Request" - else: - raise - - - return (response, content) - - - -class Response(dict): - """An object more like email.Message than httplib.HTTPResponse.""" - - """Is this response from our local cache""" - fromcache = False - - """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """ - version = 11 - - "Status code returned by server. " - status = 200 - - """Reason phrase returned by server.""" - reason = "Ok" - - previous = None - - def __init__(self, info): - # info is either an email.Message or - # an httplib.HTTPResponse object. - if isinstance(info, httplib.HTTPResponse): - for key, value in info.getheaders(): - self[key.lower()] = value - self.status = info.status - self['status'] = str(self.status) - self.reason = info.reason - self.version = info.version - elif isinstance(info, email.Message.Message): - for key, value in info.items(): - self[key] = value - self.status = int(self['status']) - else: - for key, value in info.iteritems(): - self[key] = value - self.status = int(self.get('status', self.status)) - - - def __getattr__(self, name): - if name == 'dict': - return self - else: - raise AttributeError, name diff --git a/lib/httplib2/iri2uri.py b/lib/httplib2/iri2uri.py deleted file mode 100755 index e4cda1d7..00000000 --- a/lib/httplib2/iri2uri.py +++ /dev/null @@ -1,110 +0,0 @@ -""" -iri2uri - -Converts an IRI to a URI. - -""" -__author__ = "Joe Gregorio (joe@bitworking.org)" -__copyright__ = "Copyright 2006, Joe Gregorio" -__contributors__ = [] -__version__ = "1.0.0" -__license__ = "MIT" -__history__ = """ -""" - -import urlparse - - -# Convert an IRI to a URI following the rules in RFC 3987 -# -# The characters we need to enocde and escape are defined in the spec: -# -# iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD -# ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF -# / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD -# / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD -# / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD -# / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD -# / %xD0000-DFFFD / %xE1000-EFFFD - -escape_range = [ - (0xA0, 0xD7FF ), - (0xE000, 0xF8FF ), - (0xF900, 0xFDCF ), - (0xFDF0, 0xFFEF), - (0x10000, 0x1FFFD ), - (0x20000, 0x2FFFD ), - (0x30000, 0x3FFFD), - (0x40000, 0x4FFFD ), - (0x50000, 0x5FFFD ), - (0x60000, 0x6FFFD), - (0x70000, 0x7FFFD ), - (0x80000, 0x8FFFD ), - (0x90000, 0x9FFFD), - (0xA0000, 0xAFFFD ), - (0xB0000, 0xBFFFD ), - (0xC0000, 0xCFFFD), - (0xD0000, 0xDFFFD ), - (0xE1000, 0xEFFFD), - (0xF0000, 0xFFFFD ), - (0x100000, 0x10FFFD) -] - -def encode(c): - retval = c - i = ord(c) - for low, high in escape_range: - if i < low: - break - if i >= low and i <= high: - retval = "".join(["%%%2X" % ord(o) for o in c.encode('utf-8')]) - break - return retval - - -def iri2uri(uri): - """Convert an IRI to a URI. Note that IRIs must be - passed in a unicode strings. That is, do not utf-8 encode - the IRI before passing it into the function.""" - if isinstance(uri ,unicode): - (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri) - authority = authority.encode('idna') - # For each character in 'ucschar' or 'iprivate' - # 1. encode as utf-8 - # 2. then %-encode each octet of that utf-8 - uri = urlparse.urlunsplit((scheme, authority, path, query, fragment)) - uri = "".join([encode(c) for c in uri]) - return uri - -if __name__ == "__main__": - import unittest - - class Test(unittest.TestCase): - - def test_uris(self): - """Test that URIs are invariant under the transformation.""" - invariant = [ - u"ftp://ftp.is.co.za/rfc/rfc1808.txt", - u"http://www.ietf.org/rfc/rfc2396.txt", - u"ldap://[2001:db8::7]/c=GB?objectClass?one", - u"mailto:John.Doe@example.com", - u"news:comp.infosystems.www.servers.unix", - u"tel:+1-816-555-1212", - u"telnet://192.0.2.16:80/", - u"urn:oasis:names:specification:docbook:dtd:xml:4.1.2" ] - for uri in invariant: - self.assertEqual(uri, iri2uri(uri)) - - def test_iri(self): - """ Test that the right type of escaping is done for each part of the URI.""" - self.assertEqual("http://xn--o3h.com/%E2%98%84", iri2uri(u"http://\N{COMET}.com/\N{COMET}")) - self.assertEqual("http://bitworking.org/?fred=%E2%98%84", iri2uri(u"http://bitworking.org/?fred=\N{COMET}")) - self.assertEqual("http://bitworking.org/#%E2%98%84", iri2uri(u"http://bitworking.org/#\N{COMET}")) - self.assertEqual("#%E2%98%84", iri2uri(u"#\N{COMET}")) - self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}")) - self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}"))) - self.assertNotEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}".encode('utf-8'))) - - unittest.main() - - diff --git a/lib/idna/__init__.py b/lib/idna/__init__.py new file mode 100644 index 00000000..a40eeafc --- /dev/null +++ b/lib/idna/__init__.py @@ -0,0 +1,44 @@ +from .package_data import __version__ +from .core import ( + IDNABidiError, + IDNAError, + InvalidCodepoint, + InvalidCodepointContext, + alabel, + check_bidi, + check_hyphen_ok, + check_initial_combiner, + check_label, + check_nfc, + decode, + encode, + ulabel, + uts46_remap, + valid_contextj, + valid_contexto, + valid_label_length, + valid_string_length, +) +from .intranges import intranges_contain + +__all__ = [ + "IDNABidiError", + "IDNAError", + "InvalidCodepoint", + "InvalidCodepointContext", + "alabel", + "check_bidi", + "check_hyphen_ok", + "check_initial_combiner", + "check_label", + "check_nfc", + "decode", + "encode", + "intranges_contain", + "ulabel", + "uts46_remap", + "valid_contextj", + "valid_contexto", + "valid_label_length", + "valid_string_length", +] diff --git a/lib/idna/codec.py b/lib/idna/codec.py new file mode 100644 index 00000000..1ca9ba62 --- /dev/null +++ b/lib/idna/codec.py @@ -0,0 +1,112 @@ +from .core import encode, decode, alabel, ulabel, IDNAError +import codecs +import re +from typing import Tuple, Optional + +_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]') + +class Codec(codecs.Codec): + + def encode(self, data: str, errors: str = 'strict') -> Tuple[bytes, int]: + if errors != 'strict': + raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) + + if not data: + return b"", 0 + + return encode(data), len(data) + + def decode(self, data: bytes, errors: str = 'strict') -> Tuple[str, int]: + if errors != 'strict': + raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) + + if not data: + return '', 0 + + return decode(data), len(data) + +class IncrementalEncoder(codecs.BufferedIncrementalEncoder): + def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore + if errors != 'strict': + raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) + + if not data: + return "", 0 + + labels = _unicode_dots_re.split(data) + trailing_dot = '' + if labels: + if not labels[-1]: + trailing_dot = '.' + del labels[-1] + elif not final: + # Keep potentially unfinished label until the next call + del labels[-1] + if labels: + trailing_dot = '.' + + result = [] + size = 0 + for label in labels: + result.append(alabel(label)) + if size: + size += 1 + size += len(label) + + # Join with U+002E + result_str = '.'.join(result) + trailing_dot # type: ignore + size += len(trailing_dot) + return result_str, size + +class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def _buffer_decode(self, data: str, errors: str, final: bool) -> Tuple[str, int]: # type: ignore + if errors != 'strict': + raise IDNAError('Unsupported error handling \"{}\"'.format(errors)) + + if not data: + return ('', 0) + + labels = _unicode_dots_re.split(data) + trailing_dot = '' + if labels: + if not labels[-1]: + trailing_dot = '.' + del labels[-1] + elif not final: + # Keep potentially unfinished label until the next call + del labels[-1] + if labels: + trailing_dot = '.' + + result = [] + size = 0 + for label in labels: + result.append(ulabel(label)) + if size: + size += 1 + size += len(label) + + result_str = '.'.join(result) + trailing_dot + size += len(trailing_dot) + return (result_str, size) + + +class StreamWriter(Codec, codecs.StreamWriter): + pass + + +class StreamReader(Codec, codecs.StreamReader): + pass + + +def getregentry() -> codecs.CodecInfo: + # Compatibility as a search_function for codecs.register() + return codecs.CodecInfo( + name='idna', + encode=Codec().encode, # type: ignore + decode=Codec().decode, # type: ignore + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + ) diff --git a/lib/idna/compat.py b/lib/idna/compat.py new file mode 100644 index 00000000..786e6bda --- /dev/null +++ b/lib/idna/compat.py @@ -0,0 +1,13 @@ +from .core import * +from .codec import * +from typing import Any, Union + +def ToASCII(label: str) -> bytes: + return encode(label) + +def ToUnicode(label: Union[bytes, bytearray]) -> str: + return decode(label) + +def nameprep(s: Any) -> None: + raise NotImplementedError('IDNA 2008 does not utilise nameprep protocol') + diff --git a/lib/idna/core.py b/lib/idna/core.py new file mode 100644 index 00000000..55ab9678 --- /dev/null +++ b/lib/idna/core.py @@ -0,0 +1,397 @@ +from . import idnadata +import bisect +import unicodedata +import re +from typing import Union, Optional +from .intranges import intranges_contain + +_virama_combining_class = 9 +_alabel_prefix = b'xn--' +_unicode_dots_re = re.compile('[\u002e\u3002\uff0e\uff61]') + +class IDNAError(UnicodeError): + """ Base exception for all IDNA-encoding related problems """ + pass + + +class IDNABidiError(IDNAError): + """ Exception when bidirectional requirements are not satisfied """ + pass + + +class InvalidCodepoint(IDNAError): + """ Exception when a disallowed or unallocated codepoint is used """ + pass + + +class InvalidCodepointContext(IDNAError): + """ Exception when the codepoint is not valid in the context it is used """ + pass + + +def _combining_class(cp: int) -> int: + v = unicodedata.combining(chr(cp)) + if v == 0: + if not unicodedata.name(chr(cp)): + raise ValueError('Unknown character in unicodedata') + return v + +def _is_script(cp: str, script: str) -> bool: + return intranges_contain(ord(cp), idnadata.scripts[script]) + +def _punycode(s: str) -> bytes: + return s.encode('punycode') + +def _unot(s: int) -> str: + return 'U+{:04X}'.format(s) + + +def valid_label_length(label: Union[bytes, str]) -> bool: + if len(label) > 63: + return False + return True + + +def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool: + if len(label) > (254 if trailing_dot else 253): + return False + return True + + +def check_bidi(label: str, check_ltr: bool = False) -> bool: + # Bidi rules should only be applied if string contains RTL characters + bidi_label = False + for (idx, cp) in enumerate(label, 1): + direction = unicodedata.bidirectional(cp) + if direction == '': + # String likely comes from a newer version of Unicode + raise IDNABidiError('Unknown directionality in label {} at position {}'.format(repr(label), idx)) + if direction in ['R', 'AL', 'AN']: + bidi_label = True + if not bidi_label and not check_ltr: + return True + + # Bidi rule 1 + direction = unicodedata.bidirectional(label[0]) + if direction in ['R', 'AL']: + rtl = True + elif direction == 'L': + rtl = False + else: + raise IDNABidiError('First codepoint in label {} must be directionality L, R or AL'.format(repr(label))) + + valid_ending = False + number_type = None # type: Optional[str] + for (idx, cp) in enumerate(label, 1): + direction = unicodedata.bidirectional(cp) + + if rtl: + # Bidi rule 2 + if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: + raise IDNABidiError('Invalid direction for codepoint at position {} in a right-to-left label'.format(idx)) + # Bidi rule 3 + if direction in ['R', 'AL', 'EN', 'AN']: + valid_ending = True + elif direction != 'NSM': + valid_ending = False + # Bidi rule 4 + if direction in ['AN', 'EN']: + if not number_type: + number_type = direction + else: + if number_type != direction: + raise IDNABidiError('Can not mix numeral types in a right-to-left label') + else: + # Bidi rule 5 + if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: + raise IDNABidiError('Invalid direction for codepoint at position {} in a left-to-right label'.format(idx)) + # Bidi rule 6 + if direction in ['L', 'EN']: + valid_ending = True + elif direction != 'NSM': + valid_ending = False + + if not valid_ending: + raise IDNABidiError('Label ends with illegal codepoint directionality') + + return True + + +def check_initial_combiner(label: str) -> bool: + if unicodedata.category(label[0])[0] == 'M': + raise IDNAError('Label begins with an illegal combining character') + return True + + +def check_hyphen_ok(label: str) -> bool: + if label[2:4] == '--': + raise IDNAError('Label has disallowed hyphens in 3rd and 4th position') + if label[0] == '-' or label[-1] == '-': + raise IDNAError('Label must not start or end with a hyphen') + return True + + +def check_nfc(label: str) -> None: + if unicodedata.normalize('NFC', label) != label: + raise IDNAError('Label must be in Normalization Form C') + + +def valid_contextj(label: str, pos: int) -> bool: + cp_value = ord(label[pos]) + + if cp_value == 0x200c: + + if pos > 0: + if _combining_class(ord(label[pos - 1])) == _virama_combining_class: + return True + + ok = False + for i in range(pos-1, -1, -1): + joining_type = idnadata.joining_types.get(ord(label[i])) + if joining_type == ord('T'): + continue + if joining_type in [ord('L'), ord('D')]: + ok = True + break + + if not ok: + return False + + ok = False + for i in range(pos+1, len(label)): + joining_type = idnadata.joining_types.get(ord(label[i])) + if joining_type == ord('T'): + continue + if joining_type in [ord('R'), ord('D')]: + ok = True + break + return ok + + if cp_value == 0x200d: + + if pos > 0: + if _combining_class(ord(label[pos - 1])) == _virama_combining_class: + return True + return False + + else: + + return False + + +def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: + cp_value = ord(label[pos]) + + if cp_value == 0x00b7: + if 0 < pos < len(label)-1: + if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c: + return True + return False + + elif cp_value == 0x0375: + if pos < len(label)-1 and len(label) > 1: + return _is_script(label[pos + 1], 'Greek') + return False + + elif cp_value == 0x05f3 or cp_value == 0x05f4: + if pos > 0: + return _is_script(label[pos - 1], 'Hebrew') + return False + + elif cp_value == 0x30fb: + for cp in label: + if cp == '\u30fb': + continue + if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'): + return True + return False + + elif 0x660 <= cp_value <= 0x669: + for cp in label: + if 0x6f0 <= ord(cp) <= 0x06f9: + return False + return True + + elif 0x6f0 <= cp_value <= 0x6f9: + for cp in label: + if 0x660 <= ord(cp) <= 0x0669: + return False + return True + + return False + + +def check_label(label: Union[str, bytes, bytearray]) -> None: + if isinstance(label, (bytes, bytearray)): + label = label.decode('utf-8') + if len(label) == 0: + raise IDNAError('Empty Label') + + check_nfc(label) + check_hyphen_ok(label) + check_initial_combiner(label) + + for (pos, cp) in enumerate(label): + cp_value = ord(cp) + if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']): + continue + elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']): + try: + if not valid_contextj(label, pos): + raise InvalidCodepointContext('Joiner {} not allowed at position {} in {}'.format( + _unot(cp_value), pos+1, repr(label))) + except ValueError: + raise IDNAError('Unknown codepoint adjacent to joiner {} at position {} in {}'.format( + _unot(cp_value), pos+1, repr(label))) + elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']): + if not valid_contexto(label, pos): + raise InvalidCodepointContext('Codepoint {} not allowed at position {} in {}'.format(_unot(cp_value), pos+1, repr(label))) + else: + raise InvalidCodepoint('Codepoint {} at position {} of {} not allowed'.format(_unot(cp_value), pos+1, repr(label))) + + check_bidi(label) + + +def alabel(label: str) -> bytes: + try: + label_bytes = label.encode('ascii') + ulabel(label_bytes) + if not valid_label_length(label_bytes): + raise IDNAError('Label too long') + return label_bytes + except UnicodeEncodeError: + pass + + if not label: + raise IDNAError('No Input') + + label = str(label) + check_label(label) + label_bytes = _punycode(label) + label_bytes = _alabel_prefix + label_bytes + + if not valid_label_length(label_bytes): + raise IDNAError('Label too long') + + return label_bytes + + +def ulabel(label: Union[str, bytes, bytearray]) -> str: + if not isinstance(label, (bytes, bytearray)): + try: + label_bytes = label.encode('ascii') + except UnicodeEncodeError: + check_label(label) + return label + else: + label_bytes = label + + label_bytes = label_bytes.lower() + if label_bytes.startswith(_alabel_prefix): + label_bytes = label_bytes[len(_alabel_prefix):] + if not label_bytes: + raise IDNAError('Malformed A-label, no Punycode eligible content found') + if label_bytes.decode('ascii')[-1] == '-': + raise IDNAError('A-label must not end with a hyphen') + else: + check_label(label_bytes) + return label_bytes.decode('ascii') + + try: + label = label_bytes.decode('punycode') + except UnicodeError: + raise IDNAError('Invalid A-label') + check_label(label) + return label + + +def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str: + """Re-map the characters in the string according to UTS46 processing.""" + from .uts46data import uts46data + output = '' + + for pos, char in enumerate(domain): + code_point = ord(char) + try: + uts46row = uts46data[code_point if code_point < 256 else + bisect.bisect_left(uts46data, (code_point, 'Z')) - 1] + status = uts46row[1] + replacement = None # type: Optional[str] + if len(uts46row) == 3: + replacement = uts46row[2] # type: ignore + if (status == 'V' or + (status == 'D' and not transitional) or + (status == '3' and not std3_rules and replacement is None)): + output += char + elif replacement is not None and (status == 'M' or + (status == '3' and not std3_rules) or + (status == 'D' and transitional)): + output += replacement + elif status != 'I': + raise IndexError() + except IndexError: + raise InvalidCodepoint( + 'Codepoint {} not allowed at position {} in {}'.format( + _unot(code_point), pos + 1, repr(domain))) + + return unicodedata.normalize('NFC', output) + + +def encode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False, transitional: bool = False) -> bytes: + if isinstance(s, (bytes, bytearray)): + s = s.decode('ascii') + if uts46: + s = uts46_remap(s, std3_rules, transitional) + trailing_dot = False + result = [] + if strict: + labels = s.split('.') + else: + labels = _unicode_dots_re.split(s) + if not labels or labels == ['']: + raise IDNAError('Empty domain') + if labels[-1] == '': + del labels[-1] + trailing_dot = True + for label in labels: + s = alabel(label) + if s: + result.append(s) + else: + raise IDNAError('Empty label') + if trailing_dot: + result.append(b'') + s = b'.'.join(result) + if not valid_string_length(s, trailing_dot): + raise IDNAError('Domain too long') + return s + + +def decode(s: Union[str, bytes, bytearray], strict: bool = False, uts46: bool = False, std3_rules: bool = False) -> str: + try: + if isinstance(s, (bytes, bytearray)): + s = s.decode('ascii') + except UnicodeDecodeError: + raise IDNAError('Invalid ASCII in A-label') + if uts46: + s = uts46_remap(s, std3_rules, False) + trailing_dot = False + result = [] + if not strict: + labels = _unicode_dots_re.split(s) + else: + labels = s.split('.') + if not labels or labels == ['']: + raise IDNAError('Empty domain') + if not labels[-1]: + del labels[-1] + trailing_dot = True + for label in labels: + s = ulabel(label) + if s: + result.append(s) + else: + raise IDNAError('Empty label') + if trailing_dot: + result.append('') + return '.'.join(result) diff --git a/lib/idna/idnadata.py b/lib/idna/idnadata.py new file mode 100644 index 00000000..1b5805d1 --- /dev/null +++ b/lib/idna/idnadata.py @@ -0,0 +1,2137 @@ +# This file is automatically generated by tools/idna-data + +__version__ = '14.0.0' +scripts = { + 'Greek': ( + 0x37000000374, + 0x37500000378, + 0x37a0000037e, + 0x37f00000380, + 0x38400000385, + 0x38600000387, + 0x3880000038b, + 0x38c0000038d, + 0x38e000003a2, + 0x3a3000003e2, + 0x3f000000400, + 0x1d2600001d2b, + 0x1d5d00001d62, + 0x1d6600001d6b, + 0x1dbf00001dc0, + 0x1f0000001f16, + 0x1f1800001f1e, + 0x1f2000001f46, + 0x1f4800001f4e, + 0x1f5000001f58, + 0x1f5900001f5a, + 0x1f5b00001f5c, + 0x1f5d00001f5e, + 0x1f5f00001f7e, + 0x1f8000001fb5, + 0x1fb600001fc5, + 0x1fc600001fd4, + 0x1fd600001fdc, + 0x1fdd00001ff0, + 0x1ff200001ff5, + 0x1ff600001fff, + 0x212600002127, + 0xab650000ab66, + 0x101400001018f, + 0x101a0000101a1, + 0x1d2000001d246, + ), + 'Han': ( + 0x2e8000002e9a, + 0x2e9b00002ef4, + 0x2f0000002fd6, + 0x300500003006, + 0x300700003008, + 0x30210000302a, + 0x30380000303c, + 0x340000004dc0, + 0x4e000000a000, + 0xf9000000fa6e, + 0xfa700000fada, + 0x16fe200016fe4, + 0x16ff000016ff2, + 0x200000002a6e0, + 0x2a7000002b739, + 0x2b7400002b81e, + 0x2b8200002cea2, + 0x2ceb00002ebe1, + 0x2f8000002fa1e, + 0x300000003134b, + ), + 'Hebrew': ( + 0x591000005c8, + 0x5d0000005eb, + 0x5ef000005f5, + 0xfb1d0000fb37, + 0xfb380000fb3d, + 0xfb3e0000fb3f, + 0xfb400000fb42, + 0xfb430000fb45, + 0xfb460000fb50, + ), + 'Hiragana': ( + 0x304100003097, + 0x309d000030a0, + 0x1b0010001b120, + 0x1b1500001b153, + 0x1f2000001f201, + ), + 'Katakana': ( + 0x30a1000030fb, + 0x30fd00003100, + 0x31f000003200, + 0x32d0000032ff, + 0x330000003358, + 0xff660000ff70, + 0xff710000ff9e, + 0x1aff00001aff4, + 0x1aff50001affc, + 0x1affd0001afff, + 0x1b0000001b001, + 0x1b1200001b123, + 0x1b1640001b168, + ), +} +joining_types = { + 0x600: 85, + 0x601: 85, + 0x602: 85, + 0x603: 85, + 0x604: 85, + 0x605: 85, + 0x608: 85, + 0x60b: 85, + 0x620: 68, + 0x621: 85, + 0x622: 82, + 0x623: 82, + 0x624: 82, + 0x625: 82, + 0x626: 68, + 0x627: 82, + 0x628: 68, + 0x629: 82, + 0x62a: 68, + 0x62b: 68, + 0x62c: 68, + 0x62d: 68, + 0x62e: 68, + 0x62f: 82, + 0x630: 82, + 0x631: 82, + 0x632: 82, + 0x633: 68, + 0x634: 68, + 0x635: 68, + 0x636: 68, + 0x637: 68, + 0x638: 68, + 0x639: 68, + 0x63a: 68, + 0x63b: 68, + 0x63c: 68, + 0x63d: 68, + 0x63e: 68, + 0x63f: 68, + 0x640: 67, + 0x641: 68, + 0x642: 68, + 0x643: 68, + 0x644: 68, + 0x645: 68, + 0x646: 68, + 0x647: 68, + 0x648: 82, + 0x649: 68, + 0x64a: 68, + 0x66e: 68, + 0x66f: 68, + 0x671: 82, + 0x672: 82, + 0x673: 82, + 0x674: 85, + 0x675: 82, + 0x676: 82, + 0x677: 82, + 0x678: 68, + 0x679: 68, + 0x67a: 68, + 0x67b: 68, + 0x67c: 68, + 0x67d: 68, + 0x67e: 68, + 0x67f: 68, + 0x680: 68, + 0x681: 68, + 0x682: 68, + 0x683: 68, + 0x684: 68, + 0x685: 68, + 0x686: 68, + 0x687: 68, + 0x688: 82, + 0x689: 82, + 0x68a: 82, + 0x68b: 82, + 0x68c: 82, + 0x68d: 82, + 0x68e: 82, + 0x68f: 82, + 0x690: 82, + 0x691: 82, + 0x692: 82, + 0x693: 82, + 0x694: 82, + 0x695: 82, + 0x696: 82, + 0x697: 82, + 0x698: 82, + 0x699: 82, + 0x69a: 68, + 0x69b: 68, + 0x69c: 68, + 0x69d: 68, + 0x69e: 68, + 0x69f: 68, + 0x6a0: 68, + 0x6a1: 68, + 0x6a2: 68, + 0x6a3: 68, + 0x6a4: 68, + 0x6a5: 68, + 0x6a6: 68, + 0x6a7: 68, + 0x6a8: 68, + 0x6a9: 68, + 0x6aa: 68, + 0x6ab: 68, + 0x6ac: 68, + 0x6ad: 68, + 0x6ae: 68, + 0x6af: 68, + 0x6b0: 68, + 0x6b1: 68, + 0x6b2: 68, + 0x6b3: 68, + 0x6b4: 68, + 0x6b5: 68, + 0x6b6: 68, + 0x6b7: 68, + 0x6b8: 68, + 0x6b9: 68, + 0x6ba: 68, + 0x6bb: 68, + 0x6bc: 68, + 0x6bd: 68, + 0x6be: 68, + 0x6bf: 68, + 0x6c0: 82, + 0x6c1: 68, + 0x6c2: 68, + 0x6c3: 82, + 0x6c4: 82, + 0x6c5: 82, + 0x6c6: 82, + 0x6c7: 82, + 0x6c8: 82, + 0x6c9: 82, + 0x6ca: 82, + 0x6cb: 82, + 0x6cc: 68, + 0x6cd: 82, + 0x6ce: 68, + 0x6cf: 82, + 0x6d0: 68, + 0x6d1: 68, + 0x6d2: 82, + 0x6d3: 82, + 0x6d5: 82, + 0x6dd: 85, + 0x6ee: 82, + 0x6ef: 82, + 0x6fa: 68, + 0x6fb: 68, + 0x6fc: 68, + 0x6ff: 68, + 0x70f: 84, + 0x710: 82, + 0x712: 68, + 0x713: 68, + 0x714: 68, + 0x715: 82, + 0x716: 82, + 0x717: 82, + 0x718: 82, + 0x719: 82, + 0x71a: 68, + 0x71b: 68, + 0x71c: 68, + 0x71d: 68, + 0x71e: 82, + 0x71f: 68, + 0x720: 68, + 0x721: 68, + 0x722: 68, + 0x723: 68, + 0x724: 68, + 0x725: 68, + 0x726: 68, + 0x727: 68, + 0x728: 82, + 0x729: 68, + 0x72a: 82, + 0x72b: 68, + 0x72c: 82, + 0x72d: 68, + 0x72e: 68, + 0x72f: 82, + 0x74d: 82, + 0x74e: 68, + 0x74f: 68, + 0x750: 68, + 0x751: 68, + 0x752: 68, + 0x753: 68, + 0x754: 68, + 0x755: 68, + 0x756: 68, + 0x757: 68, + 0x758: 68, + 0x759: 82, + 0x75a: 82, + 0x75b: 82, + 0x75c: 68, + 0x75d: 68, + 0x75e: 68, + 0x75f: 68, + 0x760: 68, + 0x761: 68, + 0x762: 68, + 0x763: 68, + 0x764: 68, + 0x765: 68, + 0x766: 68, + 0x767: 68, + 0x768: 68, + 0x769: 68, + 0x76a: 68, + 0x76b: 82, + 0x76c: 82, + 0x76d: 68, + 0x76e: 68, + 0x76f: 68, + 0x770: 68, + 0x771: 82, + 0x772: 68, + 0x773: 82, + 0x774: 82, + 0x775: 68, + 0x776: 68, + 0x777: 68, + 0x778: 82, + 0x779: 82, + 0x77a: 68, + 0x77b: 68, + 0x77c: 68, + 0x77d: 68, + 0x77e: 68, + 0x77f: 68, + 0x7ca: 68, + 0x7cb: 68, + 0x7cc: 68, + 0x7cd: 68, + 0x7ce: 68, + 0x7cf: 68, + 0x7d0: 68, + 0x7d1: 68, + 0x7d2: 68, + 0x7d3: 68, + 0x7d4: 68, + 0x7d5: 68, + 0x7d6: 68, + 0x7d7: 68, + 0x7d8: 68, + 0x7d9: 68, + 0x7da: 68, + 0x7db: 68, + 0x7dc: 68, + 0x7dd: 68, + 0x7de: 68, + 0x7df: 68, + 0x7e0: 68, + 0x7e1: 68, + 0x7e2: 68, + 0x7e3: 68, + 0x7e4: 68, + 0x7e5: 68, + 0x7e6: 68, + 0x7e7: 68, + 0x7e8: 68, + 0x7e9: 68, + 0x7ea: 68, + 0x7fa: 67, + 0x840: 82, + 0x841: 68, + 0x842: 68, + 0x843: 68, + 0x844: 68, + 0x845: 68, + 0x846: 82, + 0x847: 82, + 0x848: 68, + 0x849: 82, + 0x84a: 68, + 0x84b: 68, + 0x84c: 68, + 0x84d: 68, + 0x84e: 68, + 0x84f: 68, + 0x850: 68, + 0x851: 68, + 0x852: 68, + 0x853: 68, + 0x854: 82, + 0x855: 68, + 0x856: 82, + 0x857: 82, + 0x858: 82, + 0x860: 68, + 0x861: 85, + 0x862: 68, + 0x863: 68, + 0x864: 68, + 0x865: 68, + 0x866: 85, + 0x867: 82, + 0x868: 68, + 0x869: 82, + 0x86a: 82, + 0x870: 82, + 0x871: 82, + 0x872: 82, + 0x873: 82, + 0x874: 82, + 0x875: 82, + 0x876: 82, + 0x877: 82, + 0x878: 82, + 0x879: 82, + 0x87a: 82, + 0x87b: 82, + 0x87c: 82, + 0x87d: 82, + 0x87e: 82, + 0x87f: 82, + 0x880: 82, + 0x881: 82, + 0x882: 82, + 0x883: 67, + 0x884: 67, + 0x885: 67, + 0x886: 68, + 0x887: 85, + 0x888: 85, + 0x889: 68, + 0x88a: 68, + 0x88b: 68, + 0x88c: 68, + 0x88d: 68, + 0x88e: 82, + 0x890: 85, + 0x891: 85, + 0x8a0: 68, + 0x8a1: 68, + 0x8a2: 68, + 0x8a3: 68, + 0x8a4: 68, + 0x8a5: 68, + 0x8a6: 68, + 0x8a7: 68, + 0x8a8: 68, + 0x8a9: 68, + 0x8aa: 82, + 0x8ab: 82, + 0x8ac: 82, + 0x8ad: 85, + 0x8ae: 82, + 0x8af: 68, + 0x8b0: 68, + 0x8b1: 82, + 0x8b2: 82, + 0x8b3: 68, + 0x8b4: 68, + 0x8b5: 68, + 0x8b6: 68, + 0x8b7: 68, + 0x8b8: 68, + 0x8b9: 82, + 0x8ba: 68, + 0x8bb: 68, + 0x8bc: 68, + 0x8bd: 68, + 0x8be: 68, + 0x8bf: 68, + 0x8c0: 68, + 0x8c1: 68, + 0x8c2: 68, + 0x8c3: 68, + 0x8c4: 68, + 0x8c5: 68, + 0x8c6: 68, + 0x8c7: 68, + 0x8c8: 68, + 0x8e2: 85, + 0x1806: 85, + 0x1807: 68, + 0x180a: 67, + 0x180e: 85, + 0x1820: 68, + 0x1821: 68, + 0x1822: 68, + 0x1823: 68, + 0x1824: 68, + 0x1825: 68, + 0x1826: 68, + 0x1827: 68, + 0x1828: 68, + 0x1829: 68, + 0x182a: 68, + 0x182b: 68, + 0x182c: 68, + 0x182d: 68, + 0x182e: 68, + 0x182f: 68, + 0x1830: 68, + 0x1831: 68, + 0x1832: 68, + 0x1833: 68, + 0x1834: 68, + 0x1835: 68, + 0x1836: 68, + 0x1837: 68, + 0x1838: 68, + 0x1839: 68, + 0x183a: 68, + 0x183b: 68, + 0x183c: 68, + 0x183d: 68, + 0x183e: 68, + 0x183f: 68, + 0x1840: 68, + 0x1841: 68, + 0x1842: 68, + 0x1843: 68, + 0x1844: 68, + 0x1845: 68, + 0x1846: 68, + 0x1847: 68, + 0x1848: 68, + 0x1849: 68, + 0x184a: 68, + 0x184b: 68, + 0x184c: 68, + 0x184d: 68, + 0x184e: 68, + 0x184f: 68, + 0x1850: 68, + 0x1851: 68, + 0x1852: 68, + 0x1853: 68, + 0x1854: 68, + 0x1855: 68, + 0x1856: 68, + 0x1857: 68, + 0x1858: 68, + 0x1859: 68, + 0x185a: 68, + 0x185b: 68, + 0x185c: 68, + 0x185d: 68, + 0x185e: 68, + 0x185f: 68, + 0x1860: 68, + 0x1861: 68, + 0x1862: 68, + 0x1863: 68, + 0x1864: 68, + 0x1865: 68, + 0x1866: 68, + 0x1867: 68, + 0x1868: 68, + 0x1869: 68, + 0x186a: 68, + 0x186b: 68, + 0x186c: 68, + 0x186d: 68, + 0x186e: 68, + 0x186f: 68, + 0x1870: 68, + 0x1871: 68, + 0x1872: 68, + 0x1873: 68, + 0x1874: 68, + 0x1875: 68, + 0x1876: 68, + 0x1877: 68, + 0x1878: 68, + 0x1880: 85, + 0x1881: 85, + 0x1882: 85, + 0x1883: 85, + 0x1884: 85, + 0x1885: 84, + 0x1886: 84, + 0x1887: 68, + 0x1888: 68, + 0x1889: 68, + 0x188a: 68, + 0x188b: 68, + 0x188c: 68, + 0x188d: 68, + 0x188e: 68, + 0x188f: 68, + 0x1890: 68, + 0x1891: 68, + 0x1892: 68, + 0x1893: 68, + 0x1894: 68, + 0x1895: 68, + 0x1896: 68, + 0x1897: 68, + 0x1898: 68, + 0x1899: 68, + 0x189a: 68, + 0x189b: 68, + 0x189c: 68, + 0x189d: 68, + 0x189e: 68, + 0x189f: 68, + 0x18a0: 68, + 0x18a1: 68, + 0x18a2: 68, + 0x18a3: 68, + 0x18a4: 68, + 0x18a5: 68, + 0x18a6: 68, + 0x18a7: 68, + 0x18a8: 68, + 0x18aa: 68, + 0x200c: 85, + 0x200d: 67, + 0x202f: 85, + 0x2066: 85, + 0x2067: 85, + 0x2068: 85, + 0x2069: 85, + 0xa840: 68, + 0xa841: 68, + 0xa842: 68, + 0xa843: 68, + 0xa844: 68, + 0xa845: 68, + 0xa846: 68, + 0xa847: 68, + 0xa848: 68, + 0xa849: 68, + 0xa84a: 68, + 0xa84b: 68, + 0xa84c: 68, + 0xa84d: 68, + 0xa84e: 68, + 0xa84f: 68, + 0xa850: 68, + 0xa851: 68, + 0xa852: 68, + 0xa853: 68, + 0xa854: 68, + 0xa855: 68, + 0xa856: 68, + 0xa857: 68, + 0xa858: 68, + 0xa859: 68, + 0xa85a: 68, + 0xa85b: 68, + 0xa85c: 68, + 0xa85d: 68, + 0xa85e: 68, + 0xa85f: 68, + 0xa860: 68, + 0xa861: 68, + 0xa862: 68, + 0xa863: 68, + 0xa864: 68, + 0xa865: 68, + 0xa866: 68, + 0xa867: 68, + 0xa868: 68, + 0xa869: 68, + 0xa86a: 68, + 0xa86b: 68, + 0xa86c: 68, + 0xa86d: 68, + 0xa86e: 68, + 0xa86f: 68, + 0xa870: 68, + 0xa871: 68, + 0xa872: 76, + 0xa873: 85, + 0x10ac0: 68, + 0x10ac1: 68, + 0x10ac2: 68, + 0x10ac3: 68, + 0x10ac4: 68, + 0x10ac5: 82, + 0x10ac6: 85, + 0x10ac7: 82, + 0x10ac8: 85, + 0x10ac9: 82, + 0x10aca: 82, + 0x10acb: 85, + 0x10acc: 85, + 0x10acd: 76, + 0x10ace: 82, + 0x10acf: 82, + 0x10ad0: 82, + 0x10ad1: 82, + 0x10ad2: 82, + 0x10ad3: 68, + 0x10ad4: 68, + 0x10ad5: 68, + 0x10ad6: 68, + 0x10ad7: 76, + 0x10ad8: 68, + 0x10ad9: 68, + 0x10ada: 68, + 0x10adb: 68, + 0x10adc: 68, + 0x10add: 82, + 0x10ade: 68, + 0x10adf: 68, + 0x10ae0: 68, + 0x10ae1: 82, + 0x10ae2: 85, + 0x10ae3: 85, + 0x10ae4: 82, + 0x10aeb: 68, + 0x10aec: 68, + 0x10aed: 68, + 0x10aee: 68, + 0x10aef: 82, + 0x10b80: 68, + 0x10b81: 82, + 0x10b82: 68, + 0x10b83: 82, + 0x10b84: 82, + 0x10b85: 82, + 0x10b86: 68, + 0x10b87: 68, + 0x10b88: 68, + 0x10b89: 82, + 0x10b8a: 68, + 0x10b8b: 68, + 0x10b8c: 82, + 0x10b8d: 68, + 0x10b8e: 82, + 0x10b8f: 82, + 0x10b90: 68, + 0x10b91: 82, + 0x10ba9: 82, + 0x10baa: 82, + 0x10bab: 82, + 0x10bac: 82, + 0x10bad: 68, + 0x10bae: 68, + 0x10baf: 85, + 0x10d00: 76, + 0x10d01: 68, + 0x10d02: 68, + 0x10d03: 68, + 0x10d04: 68, + 0x10d05: 68, + 0x10d06: 68, + 0x10d07: 68, + 0x10d08: 68, + 0x10d09: 68, + 0x10d0a: 68, + 0x10d0b: 68, + 0x10d0c: 68, + 0x10d0d: 68, + 0x10d0e: 68, + 0x10d0f: 68, + 0x10d10: 68, + 0x10d11: 68, + 0x10d12: 68, + 0x10d13: 68, + 0x10d14: 68, + 0x10d15: 68, + 0x10d16: 68, + 0x10d17: 68, + 0x10d18: 68, + 0x10d19: 68, + 0x10d1a: 68, + 0x10d1b: 68, + 0x10d1c: 68, + 0x10d1d: 68, + 0x10d1e: 68, + 0x10d1f: 68, + 0x10d20: 68, + 0x10d21: 68, + 0x10d22: 82, + 0x10d23: 68, + 0x10f30: 68, + 0x10f31: 68, + 0x10f32: 68, + 0x10f33: 82, + 0x10f34: 68, + 0x10f35: 68, + 0x10f36: 68, + 0x10f37: 68, + 0x10f38: 68, + 0x10f39: 68, + 0x10f3a: 68, + 0x10f3b: 68, + 0x10f3c: 68, + 0x10f3d: 68, + 0x10f3e: 68, + 0x10f3f: 68, + 0x10f40: 68, + 0x10f41: 68, + 0x10f42: 68, + 0x10f43: 68, + 0x10f44: 68, + 0x10f45: 85, + 0x10f51: 68, + 0x10f52: 68, + 0x10f53: 68, + 0x10f54: 82, + 0x10f70: 68, + 0x10f71: 68, + 0x10f72: 68, + 0x10f73: 68, + 0x10f74: 82, + 0x10f75: 82, + 0x10f76: 68, + 0x10f77: 68, + 0x10f78: 68, + 0x10f79: 68, + 0x10f7a: 68, + 0x10f7b: 68, + 0x10f7c: 68, + 0x10f7d: 68, + 0x10f7e: 68, + 0x10f7f: 68, + 0x10f80: 68, + 0x10f81: 68, + 0x10fb0: 68, + 0x10fb1: 85, + 0x10fb2: 68, + 0x10fb3: 68, + 0x10fb4: 82, + 0x10fb5: 82, + 0x10fb6: 82, + 0x10fb7: 85, + 0x10fb8: 68, + 0x10fb9: 82, + 0x10fba: 82, + 0x10fbb: 68, + 0x10fbc: 68, + 0x10fbd: 82, + 0x10fbe: 68, + 0x10fbf: 68, + 0x10fc0: 85, + 0x10fc1: 68, + 0x10fc2: 82, + 0x10fc3: 82, + 0x10fc4: 68, + 0x10fc5: 85, + 0x10fc6: 85, + 0x10fc7: 85, + 0x10fc8: 85, + 0x10fc9: 82, + 0x10fca: 68, + 0x10fcb: 76, + 0x110bd: 85, + 0x110cd: 85, + 0x1e900: 68, + 0x1e901: 68, + 0x1e902: 68, + 0x1e903: 68, + 0x1e904: 68, + 0x1e905: 68, + 0x1e906: 68, + 0x1e907: 68, + 0x1e908: 68, + 0x1e909: 68, + 0x1e90a: 68, + 0x1e90b: 68, + 0x1e90c: 68, + 0x1e90d: 68, + 0x1e90e: 68, + 0x1e90f: 68, + 0x1e910: 68, + 0x1e911: 68, + 0x1e912: 68, + 0x1e913: 68, + 0x1e914: 68, + 0x1e915: 68, + 0x1e916: 68, + 0x1e917: 68, + 0x1e918: 68, + 0x1e919: 68, + 0x1e91a: 68, + 0x1e91b: 68, + 0x1e91c: 68, + 0x1e91d: 68, + 0x1e91e: 68, + 0x1e91f: 68, + 0x1e920: 68, + 0x1e921: 68, + 0x1e922: 68, + 0x1e923: 68, + 0x1e924: 68, + 0x1e925: 68, + 0x1e926: 68, + 0x1e927: 68, + 0x1e928: 68, + 0x1e929: 68, + 0x1e92a: 68, + 0x1e92b: 68, + 0x1e92c: 68, + 0x1e92d: 68, + 0x1e92e: 68, + 0x1e92f: 68, + 0x1e930: 68, + 0x1e931: 68, + 0x1e932: 68, + 0x1e933: 68, + 0x1e934: 68, + 0x1e935: 68, + 0x1e936: 68, + 0x1e937: 68, + 0x1e938: 68, + 0x1e939: 68, + 0x1e93a: 68, + 0x1e93b: 68, + 0x1e93c: 68, + 0x1e93d: 68, + 0x1e93e: 68, + 0x1e93f: 68, + 0x1e940: 68, + 0x1e941: 68, + 0x1e942: 68, + 0x1e943: 68, + 0x1e94b: 84, +} +codepoint_classes = { + 'PVALID': ( + 0x2d0000002e, + 0x300000003a, + 0x610000007b, + 0xdf000000f7, + 0xf800000100, + 0x10100000102, + 0x10300000104, + 0x10500000106, + 0x10700000108, + 0x1090000010a, + 0x10b0000010c, + 0x10d0000010e, + 0x10f00000110, + 0x11100000112, + 0x11300000114, + 0x11500000116, + 0x11700000118, + 0x1190000011a, + 0x11b0000011c, + 0x11d0000011e, + 0x11f00000120, + 0x12100000122, + 0x12300000124, + 0x12500000126, + 0x12700000128, + 0x1290000012a, + 0x12b0000012c, + 0x12d0000012e, + 0x12f00000130, + 0x13100000132, + 0x13500000136, + 0x13700000139, + 0x13a0000013b, + 0x13c0000013d, + 0x13e0000013f, + 0x14200000143, + 0x14400000145, + 0x14600000147, + 0x14800000149, + 0x14b0000014c, + 0x14d0000014e, + 0x14f00000150, + 0x15100000152, + 0x15300000154, + 0x15500000156, + 0x15700000158, + 0x1590000015a, + 0x15b0000015c, + 0x15d0000015e, + 0x15f00000160, + 0x16100000162, + 0x16300000164, + 0x16500000166, + 0x16700000168, + 0x1690000016a, + 0x16b0000016c, + 0x16d0000016e, + 0x16f00000170, + 0x17100000172, + 0x17300000174, + 0x17500000176, + 0x17700000178, + 0x17a0000017b, + 0x17c0000017d, + 0x17e0000017f, + 0x18000000181, + 0x18300000184, + 0x18500000186, + 0x18800000189, + 0x18c0000018e, + 0x19200000193, + 0x19500000196, + 0x1990000019c, + 0x19e0000019f, + 0x1a1000001a2, + 0x1a3000001a4, + 0x1a5000001a6, + 0x1a8000001a9, + 0x1aa000001ac, + 0x1ad000001ae, + 0x1b0000001b1, + 0x1b4000001b5, + 0x1b6000001b7, + 0x1b9000001bc, + 0x1bd000001c4, + 0x1ce000001cf, + 0x1d0000001d1, + 0x1d2000001d3, + 0x1d4000001d5, + 0x1d6000001d7, + 0x1d8000001d9, + 0x1da000001db, + 0x1dc000001de, + 0x1df000001e0, + 0x1e1000001e2, + 0x1e3000001e4, + 0x1e5000001e6, + 0x1e7000001e8, + 0x1e9000001ea, + 0x1eb000001ec, + 0x1ed000001ee, + 0x1ef000001f1, + 0x1f5000001f6, + 0x1f9000001fa, + 0x1fb000001fc, + 0x1fd000001fe, + 0x1ff00000200, + 0x20100000202, + 0x20300000204, + 0x20500000206, + 0x20700000208, + 0x2090000020a, + 0x20b0000020c, + 0x20d0000020e, + 0x20f00000210, + 0x21100000212, + 0x21300000214, + 0x21500000216, + 0x21700000218, + 0x2190000021a, + 0x21b0000021c, + 0x21d0000021e, + 0x21f00000220, + 0x22100000222, + 0x22300000224, + 0x22500000226, + 0x22700000228, + 0x2290000022a, + 0x22b0000022c, + 0x22d0000022e, + 0x22f00000230, + 0x23100000232, + 0x2330000023a, + 0x23c0000023d, + 0x23f00000241, + 0x24200000243, + 0x24700000248, + 0x2490000024a, + 0x24b0000024c, + 0x24d0000024e, + 0x24f000002b0, + 0x2b9000002c2, + 0x2c6000002d2, + 0x2ec000002ed, + 0x2ee000002ef, + 0x30000000340, + 0x34200000343, + 0x3460000034f, + 0x35000000370, + 0x37100000372, + 0x37300000374, + 0x37700000378, + 0x37b0000037e, + 0x39000000391, + 0x3ac000003cf, + 0x3d7000003d8, + 0x3d9000003da, + 0x3db000003dc, + 0x3dd000003de, + 0x3df000003e0, + 0x3e1000003e2, + 0x3e3000003e4, + 0x3e5000003e6, + 0x3e7000003e8, + 0x3e9000003ea, + 0x3eb000003ec, + 0x3ed000003ee, + 0x3ef000003f0, + 0x3f3000003f4, + 0x3f8000003f9, + 0x3fb000003fd, + 0x43000000460, + 0x46100000462, + 0x46300000464, + 0x46500000466, + 0x46700000468, + 0x4690000046a, + 0x46b0000046c, + 0x46d0000046e, + 0x46f00000470, + 0x47100000472, + 0x47300000474, + 0x47500000476, + 0x47700000478, + 0x4790000047a, + 0x47b0000047c, + 0x47d0000047e, + 0x47f00000480, + 0x48100000482, + 0x48300000488, + 0x48b0000048c, + 0x48d0000048e, + 0x48f00000490, + 0x49100000492, + 0x49300000494, + 0x49500000496, + 0x49700000498, + 0x4990000049a, + 0x49b0000049c, + 0x49d0000049e, + 0x49f000004a0, + 0x4a1000004a2, + 0x4a3000004a4, + 0x4a5000004a6, + 0x4a7000004a8, + 0x4a9000004aa, + 0x4ab000004ac, + 0x4ad000004ae, + 0x4af000004b0, + 0x4b1000004b2, + 0x4b3000004b4, + 0x4b5000004b6, + 0x4b7000004b8, + 0x4b9000004ba, + 0x4bb000004bc, + 0x4bd000004be, + 0x4bf000004c0, + 0x4c2000004c3, + 0x4c4000004c5, + 0x4c6000004c7, + 0x4c8000004c9, + 0x4ca000004cb, + 0x4cc000004cd, + 0x4ce000004d0, + 0x4d1000004d2, + 0x4d3000004d4, + 0x4d5000004d6, + 0x4d7000004d8, + 0x4d9000004da, + 0x4db000004dc, + 0x4dd000004de, + 0x4df000004e0, + 0x4e1000004e2, + 0x4e3000004e4, + 0x4e5000004e6, + 0x4e7000004e8, + 0x4e9000004ea, + 0x4eb000004ec, + 0x4ed000004ee, + 0x4ef000004f0, + 0x4f1000004f2, + 0x4f3000004f4, + 0x4f5000004f6, + 0x4f7000004f8, + 0x4f9000004fa, + 0x4fb000004fc, + 0x4fd000004fe, + 0x4ff00000500, + 0x50100000502, + 0x50300000504, + 0x50500000506, + 0x50700000508, + 0x5090000050a, + 0x50b0000050c, + 0x50d0000050e, + 0x50f00000510, + 0x51100000512, + 0x51300000514, + 0x51500000516, + 0x51700000518, + 0x5190000051a, + 0x51b0000051c, + 0x51d0000051e, + 0x51f00000520, + 0x52100000522, + 0x52300000524, + 0x52500000526, + 0x52700000528, + 0x5290000052a, + 0x52b0000052c, + 0x52d0000052e, + 0x52f00000530, + 0x5590000055a, + 0x56000000587, + 0x58800000589, + 0x591000005be, + 0x5bf000005c0, + 0x5c1000005c3, + 0x5c4000005c6, + 0x5c7000005c8, + 0x5d0000005eb, + 0x5ef000005f3, + 0x6100000061b, + 0x62000000640, + 0x64100000660, + 0x66e00000675, + 0x679000006d4, + 0x6d5000006dd, + 0x6df000006e9, + 0x6ea000006f0, + 0x6fa00000700, + 0x7100000074b, + 0x74d000007b2, + 0x7c0000007f6, + 0x7fd000007fe, + 0x8000000082e, + 0x8400000085c, + 0x8600000086b, + 0x87000000888, + 0x8890000088f, + 0x898000008e2, + 0x8e300000958, + 0x96000000964, + 0x96600000970, + 0x97100000984, + 0x9850000098d, + 0x98f00000991, + 0x993000009a9, + 0x9aa000009b1, + 0x9b2000009b3, + 0x9b6000009ba, + 0x9bc000009c5, + 0x9c7000009c9, + 0x9cb000009cf, + 0x9d7000009d8, + 0x9e0000009e4, + 0x9e6000009f2, + 0x9fc000009fd, + 0x9fe000009ff, + 0xa0100000a04, + 0xa0500000a0b, + 0xa0f00000a11, + 0xa1300000a29, + 0xa2a00000a31, + 0xa3200000a33, + 0xa3500000a36, + 0xa3800000a3a, + 0xa3c00000a3d, + 0xa3e00000a43, + 0xa4700000a49, + 0xa4b00000a4e, + 0xa5100000a52, + 0xa5c00000a5d, + 0xa6600000a76, + 0xa8100000a84, + 0xa8500000a8e, + 0xa8f00000a92, + 0xa9300000aa9, + 0xaaa00000ab1, + 0xab200000ab4, + 0xab500000aba, + 0xabc00000ac6, + 0xac700000aca, + 0xacb00000ace, + 0xad000000ad1, + 0xae000000ae4, + 0xae600000af0, + 0xaf900000b00, + 0xb0100000b04, + 0xb0500000b0d, + 0xb0f00000b11, + 0xb1300000b29, + 0xb2a00000b31, + 0xb3200000b34, + 0xb3500000b3a, + 0xb3c00000b45, + 0xb4700000b49, + 0xb4b00000b4e, + 0xb5500000b58, + 0xb5f00000b64, + 0xb6600000b70, + 0xb7100000b72, + 0xb8200000b84, + 0xb8500000b8b, + 0xb8e00000b91, + 0xb9200000b96, + 0xb9900000b9b, + 0xb9c00000b9d, + 0xb9e00000ba0, + 0xba300000ba5, + 0xba800000bab, + 0xbae00000bba, + 0xbbe00000bc3, + 0xbc600000bc9, + 0xbca00000bce, + 0xbd000000bd1, + 0xbd700000bd8, + 0xbe600000bf0, + 0xc0000000c0d, + 0xc0e00000c11, + 0xc1200000c29, + 0xc2a00000c3a, + 0xc3c00000c45, + 0xc4600000c49, + 0xc4a00000c4e, + 0xc5500000c57, + 0xc5800000c5b, + 0xc5d00000c5e, + 0xc6000000c64, + 0xc6600000c70, + 0xc8000000c84, + 0xc8500000c8d, + 0xc8e00000c91, + 0xc9200000ca9, + 0xcaa00000cb4, + 0xcb500000cba, + 0xcbc00000cc5, + 0xcc600000cc9, + 0xcca00000cce, + 0xcd500000cd7, + 0xcdd00000cdf, + 0xce000000ce4, + 0xce600000cf0, + 0xcf100000cf3, + 0xd0000000d0d, + 0xd0e00000d11, + 0xd1200000d45, + 0xd4600000d49, + 0xd4a00000d4f, + 0xd5400000d58, + 0xd5f00000d64, + 0xd6600000d70, + 0xd7a00000d80, + 0xd8100000d84, + 0xd8500000d97, + 0xd9a00000db2, + 0xdb300000dbc, + 0xdbd00000dbe, + 0xdc000000dc7, + 0xdca00000dcb, + 0xdcf00000dd5, + 0xdd600000dd7, + 0xdd800000de0, + 0xde600000df0, + 0xdf200000df4, + 0xe0100000e33, + 0xe3400000e3b, + 0xe4000000e4f, + 0xe5000000e5a, + 0xe8100000e83, + 0xe8400000e85, + 0xe8600000e8b, + 0xe8c00000ea4, + 0xea500000ea6, + 0xea700000eb3, + 0xeb400000ebe, + 0xec000000ec5, + 0xec600000ec7, + 0xec800000ece, + 0xed000000eda, + 0xede00000ee0, + 0xf0000000f01, + 0xf0b00000f0c, + 0xf1800000f1a, + 0xf2000000f2a, + 0xf3500000f36, + 0xf3700000f38, + 0xf3900000f3a, + 0xf3e00000f43, + 0xf4400000f48, + 0xf4900000f4d, + 0xf4e00000f52, + 0xf5300000f57, + 0xf5800000f5c, + 0xf5d00000f69, + 0xf6a00000f6d, + 0xf7100000f73, + 0xf7400000f75, + 0xf7a00000f81, + 0xf8200000f85, + 0xf8600000f93, + 0xf9400000f98, + 0xf9900000f9d, + 0xf9e00000fa2, + 0xfa300000fa7, + 0xfa800000fac, + 0xfad00000fb9, + 0xfba00000fbd, + 0xfc600000fc7, + 0x10000000104a, + 0x10500000109e, + 0x10d0000010fb, + 0x10fd00001100, + 0x120000001249, + 0x124a0000124e, + 0x125000001257, + 0x125800001259, + 0x125a0000125e, + 0x126000001289, + 0x128a0000128e, + 0x1290000012b1, + 0x12b2000012b6, + 0x12b8000012bf, + 0x12c0000012c1, + 0x12c2000012c6, + 0x12c8000012d7, + 0x12d800001311, + 0x131200001316, + 0x13180000135b, + 0x135d00001360, + 0x138000001390, + 0x13a0000013f6, + 0x14010000166d, + 0x166f00001680, + 0x16810000169b, + 0x16a0000016eb, + 0x16f1000016f9, + 0x170000001716, + 0x171f00001735, + 0x174000001754, + 0x17600000176d, + 0x176e00001771, + 0x177200001774, + 0x1780000017b4, + 0x17b6000017d4, + 0x17d7000017d8, + 0x17dc000017de, + 0x17e0000017ea, + 0x18100000181a, + 0x182000001879, + 0x1880000018ab, + 0x18b0000018f6, + 0x19000000191f, + 0x19200000192c, + 0x19300000193c, + 0x19460000196e, + 0x197000001975, + 0x1980000019ac, + 0x19b0000019ca, + 0x19d0000019da, + 0x1a0000001a1c, + 0x1a2000001a5f, + 0x1a6000001a7d, + 0x1a7f00001a8a, + 0x1a9000001a9a, + 0x1aa700001aa8, + 0x1ab000001abe, + 0x1abf00001acf, + 0x1b0000001b4d, + 0x1b5000001b5a, + 0x1b6b00001b74, + 0x1b8000001bf4, + 0x1c0000001c38, + 0x1c4000001c4a, + 0x1c4d00001c7e, + 0x1cd000001cd3, + 0x1cd400001cfb, + 0x1d0000001d2c, + 0x1d2f00001d30, + 0x1d3b00001d3c, + 0x1d4e00001d4f, + 0x1d6b00001d78, + 0x1d7900001d9b, + 0x1dc000001e00, + 0x1e0100001e02, + 0x1e0300001e04, + 0x1e0500001e06, + 0x1e0700001e08, + 0x1e0900001e0a, + 0x1e0b00001e0c, + 0x1e0d00001e0e, + 0x1e0f00001e10, + 0x1e1100001e12, + 0x1e1300001e14, + 0x1e1500001e16, + 0x1e1700001e18, + 0x1e1900001e1a, + 0x1e1b00001e1c, + 0x1e1d00001e1e, + 0x1e1f00001e20, + 0x1e2100001e22, + 0x1e2300001e24, + 0x1e2500001e26, + 0x1e2700001e28, + 0x1e2900001e2a, + 0x1e2b00001e2c, + 0x1e2d00001e2e, + 0x1e2f00001e30, + 0x1e3100001e32, + 0x1e3300001e34, + 0x1e3500001e36, + 0x1e3700001e38, + 0x1e3900001e3a, + 0x1e3b00001e3c, + 0x1e3d00001e3e, + 0x1e3f00001e40, + 0x1e4100001e42, + 0x1e4300001e44, + 0x1e4500001e46, + 0x1e4700001e48, + 0x1e4900001e4a, + 0x1e4b00001e4c, + 0x1e4d00001e4e, + 0x1e4f00001e50, + 0x1e5100001e52, + 0x1e5300001e54, + 0x1e5500001e56, + 0x1e5700001e58, + 0x1e5900001e5a, + 0x1e5b00001e5c, + 0x1e5d00001e5e, + 0x1e5f00001e60, + 0x1e6100001e62, + 0x1e6300001e64, + 0x1e6500001e66, + 0x1e6700001e68, + 0x1e6900001e6a, + 0x1e6b00001e6c, + 0x1e6d00001e6e, + 0x1e6f00001e70, + 0x1e7100001e72, + 0x1e7300001e74, + 0x1e7500001e76, + 0x1e7700001e78, + 0x1e7900001e7a, + 0x1e7b00001e7c, + 0x1e7d00001e7e, + 0x1e7f00001e80, + 0x1e8100001e82, + 0x1e8300001e84, + 0x1e8500001e86, + 0x1e8700001e88, + 0x1e8900001e8a, + 0x1e8b00001e8c, + 0x1e8d00001e8e, + 0x1e8f00001e90, + 0x1e9100001e92, + 0x1e9300001e94, + 0x1e9500001e9a, + 0x1e9c00001e9e, + 0x1e9f00001ea0, + 0x1ea100001ea2, + 0x1ea300001ea4, + 0x1ea500001ea6, + 0x1ea700001ea8, + 0x1ea900001eaa, + 0x1eab00001eac, + 0x1ead00001eae, + 0x1eaf00001eb0, + 0x1eb100001eb2, + 0x1eb300001eb4, + 0x1eb500001eb6, + 0x1eb700001eb8, + 0x1eb900001eba, + 0x1ebb00001ebc, + 0x1ebd00001ebe, + 0x1ebf00001ec0, + 0x1ec100001ec2, + 0x1ec300001ec4, + 0x1ec500001ec6, + 0x1ec700001ec8, + 0x1ec900001eca, + 0x1ecb00001ecc, + 0x1ecd00001ece, + 0x1ecf00001ed0, + 0x1ed100001ed2, + 0x1ed300001ed4, + 0x1ed500001ed6, + 0x1ed700001ed8, + 0x1ed900001eda, + 0x1edb00001edc, + 0x1edd00001ede, + 0x1edf00001ee0, + 0x1ee100001ee2, + 0x1ee300001ee4, + 0x1ee500001ee6, + 0x1ee700001ee8, + 0x1ee900001eea, + 0x1eeb00001eec, + 0x1eed00001eee, + 0x1eef00001ef0, + 0x1ef100001ef2, + 0x1ef300001ef4, + 0x1ef500001ef6, + 0x1ef700001ef8, + 0x1ef900001efa, + 0x1efb00001efc, + 0x1efd00001efe, + 0x1eff00001f08, + 0x1f1000001f16, + 0x1f2000001f28, + 0x1f3000001f38, + 0x1f4000001f46, + 0x1f5000001f58, + 0x1f6000001f68, + 0x1f7000001f71, + 0x1f7200001f73, + 0x1f7400001f75, + 0x1f7600001f77, + 0x1f7800001f79, + 0x1f7a00001f7b, + 0x1f7c00001f7d, + 0x1fb000001fb2, + 0x1fb600001fb7, + 0x1fc600001fc7, + 0x1fd000001fd3, + 0x1fd600001fd8, + 0x1fe000001fe3, + 0x1fe400001fe8, + 0x1ff600001ff7, + 0x214e0000214f, + 0x218400002185, + 0x2c3000002c60, + 0x2c6100002c62, + 0x2c6500002c67, + 0x2c6800002c69, + 0x2c6a00002c6b, + 0x2c6c00002c6d, + 0x2c7100002c72, + 0x2c7300002c75, + 0x2c7600002c7c, + 0x2c8100002c82, + 0x2c8300002c84, + 0x2c8500002c86, + 0x2c8700002c88, + 0x2c8900002c8a, + 0x2c8b00002c8c, + 0x2c8d00002c8e, + 0x2c8f00002c90, + 0x2c9100002c92, + 0x2c9300002c94, + 0x2c9500002c96, + 0x2c9700002c98, + 0x2c9900002c9a, + 0x2c9b00002c9c, + 0x2c9d00002c9e, + 0x2c9f00002ca0, + 0x2ca100002ca2, + 0x2ca300002ca4, + 0x2ca500002ca6, + 0x2ca700002ca8, + 0x2ca900002caa, + 0x2cab00002cac, + 0x2cad00002cae, + 0x2caf00002cb0, + 0x2cb100002cb2, + 0x2cb300002cb4, + 0x2cb500002cb6, + 0x2cb700002cb8, + 0x2cb900002cba, + 0x2cbb00002cbc, + 0x2cbd00002cbe, + 0x2cbf00002cc0, + 0x2cc100002cc2, + 0x2cc300002cc4, + 0x2cc500002cc6, + 0x2cc700002cc8, + 0x2cc900002cca, + 0x2ccb00002ccc, + 0x2ccd00002cce, + 0x2ccf00002cd0, + 0x2cd100002cd2, + 0x2cd300002cd4, + 0x2cd500002cd6, + 0x2cd700002cd8, + 0x2cd900002cda, + 0x2cdb00002cdc, + 0x2cdd00002cde, + 0x2cdf00002ce0, + 0x2ce100002ce2, + 0x2ce300002ce5, + 0x2cec00002ced, + 0x2cee00002cf2, + 0x2cf300002cf4, + 0x2d0000002d26, + 0x2d2700002d28, + 0x2d2d00002d2e, + 0x2d3000002d68, + 0x2d7f00002d97, + 0x2da000002da7, + 0x2da800002daf, + 0x2db000002db7, + 0x2db800002dbf, + 0x2dc000002dc7, + 0x2dc800002dcf, + 0x2dd000002dd7, + 0x2dd800002ddf, + 0x2de000002e00, + 0x2e2f00002e30, + 0x300500003008, + 0x302a0000302e, + 0x303c0000303d, + 0x304100003097, + 0x30990000309b, + 0x309d0000309f, + 0x30a1000030fb, + 0x30fc000030ff, + 0x310500003130, + 0x31a0000031c0, + 0x31f000003200, + 0x340000004dc0, + 0x4e000000a48d, + 0xa4d00000a4fe, + 0xa5000000a60d, + 0xa6100000a62c, + 0xa6410000a642, + 0xa6430000a644, + 0xa6450000a646, + 0xa6470000a648, + 0xa6490000a64a, + 0xa64b0000a64c, + 0xa64d0000a64e, + 0xa64f0000a650, + 0xa6510000a652, + 0xa6530000a654, + 0xa6550000a656, + 0xa6570000a658, + 0xa6590000a65a, + 0xa65b0000a65c, + 0xa65d0000a65e, + 0xa65f0000a660, + 0xa6610000a662, + 0xa6630000a664, + 0xa6650000a666, + 0xa6670000a668, + 0xa6690000a66a, + 0xa66b0000a66c, + 0xa66d0000a670, + 0xa6740000a67e, + 0xa67f0000a680, + 0xa6810000a682, + 0xa6830000a684, + 0xa6850000a686, + 0xa6870000a688, + 0xa6890000a68a, + 0xa68b0000a68c, + 0xa68d0000a68e, + 0xa68f0000a690, + 0xa6910000a692, + 0xa6930000a694, + 0xa6950000a696, + 0xa6970000a698, + 0xa6990000a69a, + 0xa69b0000a69c, + 0xa69e0000a6e6, + 0xa6f00000a6f2, + 0xa7170000a720, + 0xa7230000a724, + 0xa7250000a726, + 0xa7270000a728, + 0xa7290000a72a, + 0xa72b0000a72c, + 0xa72d0000a72e, + 0xa72f0000a732, + 0xa7330000a734, + 0xa7350000a736, + 0xa7370000a738, + 0xa7390000a73a, + 0xa73b0000a73c, + 0xa73d0000a73e, + 0xa73f0000a740, + 0xa7410000a742, + 0xa7430000a744, + 0xa7450000a746, + 0xa7470000a748, + 0xa7490000a74a, + 0xa74b0000a74c, + 0xa74d0000a74e, + 0xa74f0000a750, + 0xa7510000a752, + 0xa7530000a754, + 0xa7550000a756, + 0xa7570000a758, + 0xa7590000a75a, + 0xa75b0000a75c, + 0xa75d0000a75e, + 0xa75f0000a760, + 0xa7610000a762, + 0xa7630000a764, + 0xa7650000a766, + 0xa7670000a768, + 0xa7690000a76a, + 0xa76b0000a76c, + 0xa76d0000a76e, + 0xa76f0000a770, + 0xa7710000a779, + 0xa77a0000a77b, + 0xa77c0000a77d, + 0xa77f0000a780, + 0xa7810000a782, + 0xa7830000a784, + 0xa7850000a786, + 0xa7870000a789, + 0xa78c0000a78d, + 0xa78e0000a790, + 0xa7910000a792, + 0xa7930000a796, + 0xa7970000a798, + 0xa7990000a79a, + 0xa79b0000a79c, + 0xa79d0000a79e, + 0xa79f0000a7a0, + 0xa7a10000a7a2, + 0xa7a30000a7a4, + 0xa7a50000a7a6, + 0xa7a70000a7a8, + 0xa7a90000a7aa, + 0xa7af0000a7b0, + 0xa7b50000a7b6, + 0xa7b70000a7b8, + 0xa7b90000a7ba, + 0xa7bb0000a7bc, + 0xa7bd0000a7be, + 0xa7bf0000a7c0, + 0xa7c10000a7c2, + 0xa7c30000a7c4, + 0xa7c80000a7c9, + 0xa7ca0000a7cb, + 0xa7d10000a7d2, + 0xa7d30000a7d4, + 0xa7d50000a7d6, + 0xa7d70000a7d8, + 0xa7d90000a7da, + 0xa7f20000a7f5, + 0xa7f60000a7f8, + 0xa7fa0000a828, + 0xa82c0000a82d, + 0xa8400000a874, + 0xa8800000a8c6, + 0xa8d00000a8da, + 0xa8e00000a8f8, + 0xa8fb0000a8fc, + 0xa8fd0000a92e, + 0xa9300000a954, + 0xa9800000a9c1, + 0xa9cf0000a9da, + 0xa9e00000a9ff, + 0xaa000000aa37, + 0xaa400000aa4e, + 0xaa500000aa5a, + 0xaa600000aa77, + 0xaa7a0000aac3, + 0xaadb0000aade, + 0xaae00000aaf0, + 0xaaf20000aaf7, + 0xab010000ab07, + 0xab090000ab0f, + 0xab110000ab17, + 0xab200000ab27, + 0xab280000ab2f, + 0xab300000ab5b, + 0xab600000ab6a, + 0xabc00000abeb, + 0xabec0000abee, + 0xabf00000abfa, + 0xac000000d7a4, + 0xfa0e0000fa10, + 0xfa110000fa12, + 0xfa130000fa15, + 0xfa1f0000fa20, + 0xfa210000fa22, + 0xfa230000fa25, + 0xfa270000fa2a, + 0xfb1e0000fb1f, + 0xfe200000fe30, + 0xfe730000fe74, + 0x100000001000c, + 0x1000d00010027, + 0x100280001003b, + 0x1003c0001003e, + 0x1003f0001004e, + 0x100500001005e, + 0x10080000100fb, + 0x101fd000101fe, + 0x102800001029d, + 0x102a0000102d1, + 0x102e0000102e1, + 0x1030000010320, + 0x1032d00010341, + 0x103420001034a, + 0x103500001037b, + 0x103800001039e, + 0x103a0000103c4, + 0x103c8000103d0, + 0x104280001049e, + 0x104a0000104aa, + 0x104d8000104fc, + 0x1050000010528, + 0x1053000010564, + 0x10597000105a2, + 0x105a3000105b2, + 0x105b3000105ba, + 0x105bb000105bd, + 0x1060000010737, + 0x1074000010756, + 0x1076000010768, + 0x1078000010786, + 0x10787000107b1, + 0x107b2000107bb, + 0x1080000010806, + 0x1080800010809, + 0x1080a00010836, + 0x1083700010839, + 0x1083c0001083d, + 0x1083f00010856, + 0x1086000010877, + 0x108800001089f, + 0x108e0000108f3, + 0x108f4000108f6, + 0x1090000010916, + 0x109200001093a, + 0x10980000109b8, + 0x109be000109c0, + 0x10a0000010a04, + 0x10a0500010a07, + 0x10a0c00010a14, + 0x10a1500010a18, + 0x10a1900010a36, + 0x10a3800010a3b, + 0x10a3f00010a40, + 0x10a6000010a7d, + 0x10a8000010a9d, + 0x10ac000010ac8, + 0x10ac900010ae7, + 0x10b0000010b36, + 0x10b4000010b56, + 0x10b6000010b73, + 0x10b8000010b92, + 0x10c0000010c49, + 0x10cc000010cf3, + 0x10d0000010d28, + 0x10d3000010d3a, + 0x10e8000010eaa, + 0x10eab00010ead, + 0x10eb000010eb2, + 0x10f0000010f1d, + 0x10f2700010f28, + 0x10f3000010f51, + 0x10f7000010f86, + 0x10fb000010fc5, + 0x10fe000010ff7, + 0x1100000011047, + 0x1106600011076, + 0x1107f000110bb, + 0x110c2000110c3, + 0x110d0000110e9, + 0x110f0000110fa, + 0x1110000011135, + 0x1113600011140, + 0x1114400011148, + 0x1115000011174, + 0x1117600011177, + 0x11180000111c5, + 0x111c9000111cd, + 0x111ce000111db, + 0x111dc000111dd, + 0x1120000011212, + 0x1121300011238, + 0x1123e0001123f, + 0x1128000011287, + 0x1128800011289, + 0x1128a0001128e, + 0x1128f0001129e, + 0x1129f000112a9, + 0x112b0000112eb, + 0x112f0000112fa, + 0x1130000011304, + 0x113050001130d, + 0x1130f00011311, + 0x1131300011329, + 0x1132a00011331, + 0x1133200011334, + 0x113350001133a, + 0x1133b00011345, + 0x1134700011349, + 0x1134b0001134e, + 0x1135000011351, + 0x1135700011358, + 0x1135d00011364, + 0x113660001136d, + 0x1137000011375, + 0x114000001144b, + 0x114500001145a, + 0x1145e00011462, + 0x11480000114c6, + 0x114c7000114c8, + 0x114d0000114da, + 0x11580000115b6, + 0x115b8000115c1, + 0x115d8000115de, + 0x1160000011641, + 0x1164400011645, + 0x116500001165a, + 0x11680000116b9, + 0x116c0000116ca, + 0x117000001171b, + 0x1171d0001172c, + 0x117300001173a, + 0x1174000011747, + 0x118000001183b, + 0x118c0000118ea, + 0x118ff00011907, + 0x119090001190a, + 0x1190c00011914, + 0x1191500011917, + 0x1191800011936, + 0x1193700011939, + 0x1193b00011944, + 0x119500001195a, + 0x119a0000119a8, + 0x119aa000119d8, + 0x119da000119e2, + 0x119e3000119e5, + 0x11a0000011a3f, + 0x11a4700011a48, + 0x11a5000011a9a, + 0x11a9d00011a9e, + 0x11ab000011af9, + 0x11c0000011c09, + 0x11c0a00011c37, + 0x11c3800011c41, + 0x11c5000011c5a, + 0x11c7200011c90, + 0x11c9200011ca8, + 0x11ca900011cb7, + 0x11d0000011d07, + 0x11d0800011d0a, + 0x11d0b00011d37, + 0x11d3a00011d3b, + 0x11d3c00011d3e, + 0x11d3f00011d48, + 0x11d5000011d5a, + 0x11d6000011d66, + 0x11d6700011d69, + 0x11d6a00011d8f, + 0x11d9000011d92, + 0x11d9300011d99, + 0x11da000011daa, + 0x11ee000011ef7, + 0x11fb000011fb1, + 0x120000001239a, + 0x1248000012544, + 0x12f9000012ff1, + 0x130000001342f, + 0x1440000014647, + 0x1680000016a39, + 0x16a4000016a5f, + 0x16a6000016a6a, + 0x16a7000016abf, + 0x16ac000016aca, + 0x16ad000016aee, + 0x16af000016af5, + 0x16b0000016b37, + 0x16b4000016b44, + 0x16b5000016b5a, + 0x16b6300016b78, + 0x16b7d00016b90, + 0x16e6000016e80, + 0x16f0000016f4b, + 0x16f4f00016f88, + 0x16f8f00016fa0, + 0x16fe000016fe2, + 0x16fe300016fe5, + 0x16ff000016ff2, + 0x17000000187f8, + 0x1880000018cd6, + 0x18d0000018d09, + 0x1aff00001aff4, + 0x1aff50001affc, + 0x1affd0001afff, + 0x1b0000001b123, + 0x1b1500001b153, + 0x1b1640001b168, + 0x1b1700001b2fc, + 0x1bc000001bc6b, + 0x1bc700001bc7d, + 0x1bc800001bc89, + 0x1bc900001bc9a, + 0x1bc9d0001bc9f, + 0x1cf000001cf2e, + 0x1cf300001cf47, + 0x1da000001da37, + 0x1da3b0001da6d, + 0x1da750001da76, + 0x1da840001da85, + 0x1da9b0001daa0, + 0x1daa10001dab0, + 0x1df000001df1f, + 0x1e0000001e007, + 0x1e0080001e019, + 0x1e01b0001e022, + 0x1e0230001e025, + 0x1e0260001e02b, + 0x1e1000001e12d, + 0x1e1300001e13e, + 0x1e1400001e14a, + 0x1e14e0001e14f, + 0x1e2900001e2af, + 0x1e2c00001e2fa, + 0x1e7e00001e7e7, + 0x1e7e80001e7ec, + 0x1e7ed0001e7ef, + 0x1e7f00001e7ff, + 0x1e8000001e8c5, + 0x1e8d00001e8d7, + 0x1e9220001e94c, + 0x1e9500001e95a, + 0x1fbf00001fbfa, + 0x200000002a6e0, + 0x2a7000002b739, + 0x2b7400002b81e, + 0x2b8200002cea2, + 0x2ceb00002ebe1, + 0x300000003134b, + ), + 'CONTEXTJ': ( + 0x200c0000200e, + ), + 'CONTEXTO': ( + 0xb7000000b8, + 0x37500000376, + 0x5f3000005f5, + 0x6600000066a, + 0x6f0000006fa, + 0x30fb000030fc, + ), +} diff --git a/lib/idna/intranges.py b/lib/idna/intranges.py new file mode 100644 index 00000000..6a43b047 --- /dev/null +++ b/lib/idna/intranges.py @@ -0,0 +1,54 @@ +""" +Given a list of integers, made up of (hopefully) a small number of long runs +of consecutive integers, compute a representation of the form +((start1, end1), (start2, end2) ...). Then answer the question "was x present +in the original list?" in time O(log(# runs)). +""" + +import bisect +from typing import List, Tuple + +def intranges_from_list(list_: List[int]) -> Tuple[int, ...]: + """Represent a list of integers as a sequence of ranges: + ((start_0, end_0), (start_1, end_1), ...), such that the original + integers are exactly those x such that start_i <= x < end_i for some i. + + Ranges are encoded as single integers (start << 32 | end), not as tuples. + """ + + sorted_list = sorted(list_) + ranges = [] + last_write = -1 + for i in range(len(sorted_list)): + if i+1 < len(sorted_list): + if sorted_list[i] == sorted_list[i+1]-1: + continue + current_range = sorted_list[last_write+1:i+1] + ranges.append(_encode_range(current_range[0], current_range[-1] + 1)) + last_write = i + + return tuple(ranges) + +def _encode_range(start: int, end: int) -> int: + return (start << 32) | end + +def _decode_range(r: int) -> Tuple[int, int]: + return (r >> 32), (r & ((1 << 32) - 1)) + + +def intranges_contain(int_: int, ranges: Tuple[int, ...]) -> bool: + """Determine if `int_` falls into one of the ranges in `ranges`.""" + tuple_ = _encode_range(int_, 0) + pos = bisect.bisect_left(ranges, tuple_) + # we could be immediately ahead of a tuple (start, end) + # with start < int_ <= end + if pos > 0: + left, right = _decode_range(ranges[pos-1]) + if left <= int_ < right: + return True + # or we could be immediately behind a tuple (int_, end) + if pos < len(ranges): + left, _ = _decode_range(ranges[pos]) + if left == int_: + return True + return False diff --git a/lib/idna/package_data.py b/lib/idna/package_data.py new file mode 100644 index 00000000..f5ea87c1 --- /dev/null +++ b/lib/idna/package_data.py @@ -0,0 +1,2 @@ +__version__ = '3.3' + diff --git a/lib/idna/py.typed b/lib/idna/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/lib/idna/uts46data.py b/lib/idna/uts46data.py new file mode 100644 index 00000000..8f65705e --- /dev/null +++ b/lib/idna/uts46data.py @@ -0,0 +1,8512 @@ +# This file is automatically generated by tools/idna-data +# vim: set fileencoding=utf-8 : + +from typing import List, Tuple, Union + + +"""IDNA Mapping Table from UTS46.""" + + +__version__ = '14.0.0' +def _seg_0() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x0, '3'), + (0x1, '3'), + (0x2, '3'), + (0x3, '3'), + (0x4, '3'), + (0x5, '3'), + (0x6, '3'), + (0x7, '3'), + (0x8, '3'), + (0x9, '3'), + (0xA, '3'), + (0xB, '3'), + (0xC, '3'), + (0xD, '3'), + (0xE, '3'), + (0xF, '3'), + (0x10, '3'), + (0x11, '3'), + (0x12, '3'), + (0x13, '3'), + (0x14, '3'), + (0x15, '3'), + (0x16, '3'), + (0x17, '3'), + (0x18, '3'), + (0x19, '3'), + (0x1A, '3'), + (0x1B, '3'), + (0x1C, '3'), + (0x1D, '3'), + (0x1E, '3'), + (0x1F, '3'), + (0x20, '3'), + (0x21, '3'), + (0x22, '3'), + (0x23, '3'), + (0x24, '3'), + (0x25, '3'), + (0x26, '3'), + (0x27, '3'), + (0x28, '3'), + (0x29, '3'), + (0x2A, '3'), + (0x2B, '3'), + (0x2C, '3'), + (0x2D, 'V'), + (0x2E, 'V'), + (0x2F, '3'), + (0x30, 'V'), + (0x31, 'V'), + (0x32, 'V'), + (0x33, 'V'), + (0x34, 'V'), + (0x35, 'V'), + (0x36, 'V'), + (0x37, 'V'), + (0x38, 'V'), + (0x39, 'V'), + (0x3A, '3'), + (0x3B, '3'), + (0x3C, '3'), + (0x3D, '3'), + (0x3E, '3'), + (0x3F, '3'), + (0x40, '3'), + (0x41, 'M', 'a'), + (0x42, 'M', 'b'), + (0x43, 'M', 'c'), + (0x44, 'M', 'd'), + (0x45, 'M', 'e'), + (0x46, 'M', 'f'), + (0x47, 'M', 'g'), + (0x48, 'M', 'h'), + (0x49, 'M', 'i'), + (0x4A, 'M', 'j'), + (0x4B, 'M', 'k'), + (0x4C, 'M', 'l'), + (0x4D, 'M', 'm'), + (0x4E, 'M', 'n'), + (0x4F, 'M', 'o'), + (0x50, 'M', 'p'), + (0x51, 'M', 'q'), + (0x52, 'M', 'r'), + (0x53, 'M', 's'), + (0x54, 'M', 't'), + (0x55, 'M', 'u'), + (0x56, 'M', 'v'), + (0x57, 'M', 'w'), + (0x58, 'M', 'x'), + (0x59, 'M', 'y'), + (0x5A, 'M', 'z'), + (0x5B, '3'), + (0x5C, '3'), + (0x5D, '3'), + (0x5E, '3'), + (0x5F, '3'), + (0x60, '3'), + (0x61, 'V'), + (0x62, 'V'), + (0x63, 'V'), + ] + +def _seg_1() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x64, 'V'), + (0x65, 'V'), + (0x66, 'V'), + (0x67, 'V'), + (0x68, 'V'), + (0x69, 'V'), + (0x6A, 'V'), + (0x6B, 'V'), + (0x6C, 'V'), + (0x6D, 'V'), + (0x6E, 'V'), + (0x6F, 'V'), + (0x70, 'V'), + (0x71, 'V'), + (0x72, 'V'), + (0x73, 'V'), + (0x74, 'V'), + (0x75, 'V'), + (0x76, 'V'), + (0x77, 'V'), + (0x78, 'V'), + (0x79, 'V'), + (0x7A, 'V'), + (0x7B, '3'), + (0x7C, '3'), + (0x7D, '3'), + (0x7E, '3'), + (0x7F, '3'), + (0x80, 'X'), + (0x81, 'X'), + (0x82, 'X'), + (0x83, 'X'), + (0x84, 'X'), + (0x85, 'X'), + (0x86, 'X'), + (0x87, 'X'), + (0x88, 'X'), + (0x89, 'X'), + (0x8A, 'X'), + (0x8B, 'X'), + (0x8C, 'X'), + (0x8D, 'X'), + (0x8E, 'X'), + (0x8F, 'X'), + (0x90, 'X'), + (0x91, 'X'), + (0x92, 'X'), + (0x93, 'X'), + (0x94, 'X'), + (0x95, 'X'), + (0x96, 'X'), + (0x97, 'X'), + (0x98, 'X'), + (0x99, 'X'), + (0x9A, 'X'), + (0x9B, 'X'), + (0x9C, 'X'), + (0x9D, 'X'), + (0x9E, 'X'), + (0x9F, 'X'), + (0xA0, '3', ' '), + (0xA1, 'V'), + (0xA2, 'V'), + (0xA3, 'V'), + (0xA4, 'V'), + (0xA5, 'V'), + (0xA6, 'V'), + (0xA7, 'V'), + (0xA8, '3', ' ̈'), + (0xA9, 'V'), + (0xAA, 'M', 'a'), + (0xAB, 'V'), + (0xAC, 'V'), + (0xAD, 'I'), + (0xAE, 'V'), + (0xAF, '3', ' ̄'), + (0xB0, 'V'), + (0xB1, 'V'), + (0xB2, 'M', '2'), + (0xB3, 'M', '3'), + (0xB4, '3', ' ́'), + (0xB5, 'M', 'μ'), + (0xB6, 'V'), + (0xB7, 'V'), + (0xB8, '3', ' ̧'), + (0xB9, 'M', '1'), + (0xBA, 'M', 'o'), + (0xBB, 'V'), + (0xBC, 'M', '1⁄4'), + (0xBD, 'M', '1⁄2'), + (0xBE, 'M', '3⁄4'), + (0xBF, 'V'), + (0xC0, 'M', 'à'), + (0xC1, 'M', 'á'), + (0xC2, 'M', 'â'), + (0xC3, 'M', 'ã'), + (0xC4, 'M', 'ä'), + (0xC5, 'M', 'å'), + (0xC6, 'M', 'æ'), + (0xC7, 'M', 'ç'), + ] + +def _seg_2() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xC8, 'M', 'è'), + (0xC9, 'M', 'é'), + (0xCA, 'M', 'ê'), + (0xCB, 'M', 'ë'), + (0xCC, 'M', 'ì'), + (0xCD, 'M', 'í'), + (0xCE, 'M', 'î'), + (0xCF, 'M', 'ï'), + (0xD0, 'M', 'ð'), + (0xD1, 'M', 'ñ'), + (0xD2, 'M', 'ò'), + (0xD3, 'M', 'ó'), + (0xD4, 'M', 'ô'), + (0xD5, 'M', 'õ'), + (0xD6, 'M', 'ö'), + (0xD7, 'V'), + (0xD8, 'M', 'ø'), + (0xD9, 'M', 'ù'), + (0xDA, 'M', 'ú'), + (0xDB, 'M', 'û'), + (0xDC, 'M', 'ü'), + (0xDD, 'M', 'ý'), + (0xDE, 'M', 'þ'), + (0xDF, 'D', 'ss'), + (0xE0, 'V'), + (0xE1, 'V'), + (0xE2, 'V'), + (0xE3, 'V'), + (0xE4, 'V'), + (0xE5, 'V'), + (0xE6, 'V'), + (0xE7, 'V'), + (0xE8, 'V'), + (0xE9, 'V'), + (0xEA, 'V'), + (0xEB, 'V'), + (0xEC, 'V'), + (0xED, 'V'), + (0xEE, 'V'), + (0xEF, 'V'), + (0xF0, 'V'), + (0xF1, 'V'), + (0xF2, 'V'), + (0xF3, 'V'), + (0xF4, 'V'), + (0xF5, 'V'), + (0xF6, 'V'), + (0xF7, 'V'), + (0xF8, 'V'), + (0xF9, 'V'), + (0xFA, 'V'), + (0xFB, 'V'), + (0xFC, 'V'), + (0xFD, 'V'), + (0xFE, 'V'), + (0xFF, 'V'), + (0x100, 'M', 'ā'), + (0x101, 'V'), + (0x102, 'M', 'ă'), + (0x103, 'V'), + (0x104, 'M', 'ą'), + (0x105, 'V'), + (0x106, 'M', 'ć'), + (0x107, 'V'), + (0x108, 'M', 'ĉ'), + (0x109, 'V'), + (0x10A, 'M', 'ċ'), + (0x10B, 'V'), + (0x10C, 'M', 'č'), + (0x10D, 'V'), + (0x10E, 'M', 'ď'), + (0x10F, 'V'), + (0x110, 'M', 'đ'), + (0x111, 'V'), + (0x112, 'M', 'ē'), + (0x113, 'V'), + (0x114, 'M', 'ĕ'), + (0x115, 'V'), + (0x116, 'M', 'ė'), + (0x117, 'V'), + (0x118, 'M', 'ę'), + (0x119, 'V'), + (0x11A, 'M', 'ě'), + (0x11B, 'V'), + (0x11C, 'M', 'ĝ'), + (0x11D, 'V'), + (0x11E, 'M', 'ğ'), + (0x11F, 'V'), + (0x120, 'M', 'ġ'), + (0x121, 'V'), + (0x122, 'M', 'ģ'), + (0x123, 'V'), + (0x124, 'M', 'ĥ'), + (0x125, 'V'), + (0x126, 'M', 'ħ'), + (0x127, 'V'), + (0x128, 'M', 'ĩ'), + (0x129, 'V'), + (0x12A, 'M', 'ī'), + (0x12B, 'V'), + ] + +def _seg_3() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x12C, 'M', 'ĭ'), + (0x12D, 'V'), + (0x12E, 'M', 'į'), + (0x12F, 'V'), + (0x130, 'M', 'i̇'), + (0x131, 'V'), + (0x132, 'M', 'ij'), + (0x134, 'M', 'ĵ'), + (0x135, 'V'), + (0x136, 'M', 'ķ'), + (0x137, 'V'), + (0x139, 'M', 'ĺ'), + (0x13A, 'V'), + (0x13B, 'M', 'ļ'), + (0x13C, 'V'), + (0x13D, 'M', 'ľ'), + (0x13E, 'V'), + (0x13F, 'M', 'l·'), + (0x141, 'M', 'ł'), + (0x142, 'V'), + (0x143, 'M', 'ń'), + (0x144, 'V'), + (0x145, 'M', 'ņ'), + (0x146, 'V'), + (0x147, 'M', 'ň'), + (0x148, 'V'), + (0x149, 'M', 'ʼn'), + (0x14A, 'M', 'ŋ'), + (0x14B, 'V'), + (0x14C, 'M', 'ō'), + (0x14D, 'V'), + (0x14E, 'M', 'ŏ'), + (0x14F, 'V'), + (0x150, 'M', 'ő'), + (0x151, 'V'), + (0x152, 'M', 'œ'), + (0x153, 'V'), + (0x154, 'M', 'ŕ'), + (0x155, 'V'), + (0x156, 'M', 'ŗ'), + (0x157, 'V'), + (0x158, 'M', 'ř'), + (0x159, 'V'), + (0x15A, 'M', 'ś'), + (0x15B, 'V'), + (0x15C, 'M', 'ŝ'), + (0x15D, 'V'), + (0x15E, 'M', 'ş'), + (0x15F, 'V'), + (0x160, 'M', 'š'), + (0x161, 'V'), + (0x162, 'M', 'ţ'), + (0x163, 'V'), + (0x164, 'M', 'ť'), + (0x165, 'V'), + (0x166, 'M', 'ŧ'), + (0x167, 'V'), + (0x168, 'M', 'ũ'), + (0x169, 'V'), + (0x16A, 'M', 'ū'), + (0x16B, 'V'), + (0x16C, 'M', 'ŭ'), + (0x16D, 'V'), + (0x16E, 'M', 'ů'), + (0x16F, 'V'), + (0x170, 'M', 'ű'), + (0x171, 'V'), + (0x172, 'M', 'ų'), + (0x173, 'V'), + (0x174, 'M', 'ŵ'), + (0x175, 'V'), + (0x176, 'M', 'ŷ'), + (0x177, 'V'), + (0x178, 'M', 'ÿ'), + (0x179, 'M', 'ź'), + (0x17A, 'V'), + (0x17B, 'M', 'ż'), + (0x17C, 'V'), + (0x17D, 'M', 'ž'), + (0x17E, 'V'), + (0x17F, 'M', 's'), + (0x180, 'V'), + (0x181, 'M', 'ɓ'), + (0x182, 'M', 'ƃ'), + (0x183, 'V'), + (0x184, 'M', 'ƅ'), + (0x185, 'V'), + (0x186, 'M', 'ɔ'), + (0x187, 'M', 'ƈ'), + (0x188, 'V'), + (0x189, 'M', 'ɖ'), + (0x18A, 'M', 'ɗ'), + (0x18B, 'M', 'ƌ'), + (0x18C, 'V'), + (0x18E, 'M', 'ǝ'), + (0x18F, 'M', 'ə'), + (0x190, 'M', 'ɛ'), + (0x191, 'M', 'ƒ'), + (0x192, 'V'), + (0x193, 'M', 'ɠ'), + ] + +def _seg_4() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x194, 'M', 'ɣ'), + (0x195, 'V'), + (0x196, 'M', 'ɩ'), + (0x197, 'M', 'ɨ'), + (0x198, 'M', 'ƙ'), + (0x199, 'V'), + (0x19C, 'M', 'ɯ'), + (0x19D, 'M', 'ɲ'), + (0x19E, 'V'), + (0x19F, 'M', 'ɵ'), + (0x1A0, 'M', 'ơ'), + (0x1A1, 'V'), + (0x1A2, 'M', 'ƣ'), + (0x1A3, 'V'), + (0x1A4, 'M', 'ƥ'), + (0x1A5, 'V'), + (0x1A6, 'M', 'ʀ'), + (0x1A7, 'M', 'ƨ'), + (0x1A8, 'V'), + (0x1A9, 'M', 'ʃ'), + (0x1AA, 'V'), + (0x1AC, 'M', 'ƭ'), + (0x1AD, 'V'), + (0x1AE, 'M', 'ʈ'), + (0x1AF, 'M', 'ư'), + (0x1B0, 'V'), + (0x1B1, 'M', 'ʊ'), + (0x1B2, 'M', 'ʋ'), + (0x1B3, 'M', 'ƴ'), + (0x1B4, 'V'), + (0x1B5, 'M', 'ƶ'), + (0x1B6, 'V'), + (0x1B7, 'M', 'ʒ'), + (0x1B8, 'M', 'ƹ'), + (0x1B9, 'V'), + (0x1BC, 'M', 'ƽ'), + (0x1BD, 'V'), + (0x1C4, 'M', 'dž'), + (0x1C7, 'M', 'lj'), + (0x1CA, 'M', 'nj'), + (0x1CD, 'M', 'ǎ'), + (0x1CE, 'V'), + (0x1CF, 'M', 'ǐ'), + (0x1D0, 'V'), + (0x1D1, 'M', 'ǒ'), + (0x1D2, 'V'), + (0x1D3, 'M', 'ǔ'), + (0x1D4, 'V'), + (0x1D5, 'M', 'ǖ'), + (0x1D6, 'V'), + (0x1D7, 'M', 'ǘ'), + (0x1D8, 'V'), + (0x1D9, 'M', 'ǚ'), + (0x1DA, 'V'), + (0x1DB, 'M', 'ǜ'), + (0x1DC, 'V'), + (0x1DE, 'M', 'ǟ'), + (0x1DF, 'V'), + (0x1E0, 'M', 'ǡ'), + (0x1E1, 'V'), + (0x1E2, 'M', 'ǣ'), + (0x1E3, 'V'), + (0x1E4, 'M', 'ǥ'), + (0x1E5, 'V'), + (0x1E6, 'M', 'ǧ'), + (0x1E7, 'V'), + (0x1E8, 'M', 'ǩ'), + (0x1E9, 'V'), + (0x1EA, 'M', 'ǫ'), + (0x1EB, 'V'), + (0x1EC, 'M', 'ǭ'), + (0x1ED, 'V'), + (0x1EE, 'M', 'ǯ'), + (0x1EF, 'V'), + (0x1F1, 'M', 'dz'), + (0x1F4, 'M', 'ǵ'), + (0x1F5, 'V'), + (0x1F6, 'M', 'ƕ'), + (0x1F7, 'M', 'ƿ'), + (0x1F8, 'M', 'ǹ'), + (0x1F9, 'V'), + (0x1FA, 'M', 'ǻ'), + (0x1FB, 'V'), + (0x1FC, 'M', 'ǽ'), + (0x1FD, 'V'), + (0x1FE, 'M', 'ǿ'), + (0x1FF, 'V'), + (0x200, 'M', 'ȁ'), + (0x201, 'V'), + (0x202, 'M', 'ȃ'), + (0x203, 'V'), + (0x204, 'M', 'ȅ'), + (0x205, 'V'), + (0x206, 'M', 'ȇ'), + (0x207, 'V'), + (0x208, 'M', 'ȉ'), + (0x209, 'V'), + (0x20A, 'M', 'ȋ'), + (0x20B, 'V'), + (0x20C, 'M', 'ȍ'), + ] + +def _seg_5() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x20D, 'V'), + (0x20E, 'M', 'ȏ'), + (0x20F, 'V'), + (0x210, 'M', 'ȑ'), + (0x211, 'V'), + (0x212, 'M', 'ȓ'), + (0x213, 'V'), + (0x214, 'M', 'ȕ'), + (0x215, 'V'), + (0x216, 'M', 'ȗ'), + (0x217, 'V'), + (0x218, 'M', 'ș'), + (0x219, 'V'), + (0x21A, 'M', 'ț'), + (0x21B, 'V'), + (0x21C, 'M', 'ȝ'), + (0x21D, 'V'), + (0x21E, 'M', 'ȟ'), + (0x21F, 'V'), + (0x220, 'M', 'ƞ'), + (0x221, 'V'), + (0x222, 'M', 'ȣ'), + (0x223, 'V'), + (0x224, 'M', 'ȥ'), + (0x225, 'V'), + (0x226, 'M', 'ȧ'), + (0x227, 'V'), + (0x228, 'M', 'ȩ'), + (0x229, 'V'), + (0x22A, 'M', 'ȫ'), + (0x22B, 'V'), + (0x22C, 'M', 'ȭ'), + (0x22D, 'V'), + (0x22E, 'M', 'ȯ'), + (0x22F, 'V'), + (0x230, 'M', 'ȱ'), + (0x231, 'V'), + (0x232, 'M', 'ȳ'), + (0x233, 'V'), + (0x23A, 'M', 'ⱥ'), + (0x23B, 'M', 'ȼ'), + (0x23C, 'V'), + (0x23D, 'M', 'ƚ'), + (0x23E, 'M', 'ⱦ'), + (0x23F, 'V'), + (0x241, 'M', 'ɂ'), + (0x242, 'V'), + (0x243, 'M', 'ƀ'), + (0x244, 'M', 'ʉ'), + (0x245, 'M', 'ʌ'), + (0x246, 'M', 'ɇ'), + (0x247, 'V'), + (0x248, 'M', 'ɉ'), + (0x249, 'V'), + (0x24A, 'M', 'ɋ'), + (0x24B, 'V'), + (0x24C, 'M', 'ɍ'), + (0x24D, 'V'), + (0x24E, 'M', 'ɏ'), + (0x24F, 'V'), + (0x2B0, 'M', 'h'), + (0x2B1, 'M', 'ɦ'), + (0x2B2, 'M', 'j'), + (0x2B3, 'M', 'r'), + (0x2B4, 'M', 'ɹ'), + (0x2B5, 'M', 'ɻ'), + (0x2B6, 'M', 'ʁ'), + (0x2B7, 'M', 'w'), + (0x2B8, 'M', 'y'), + (0x2B9, 'V'), + (0x2D8, '3', ' ̆'), + (0x2D9, '3', ' ̇'), + (0x2DA, '3', ' ̊'), + (0x2DB, '3', ' ̨'), + (0x2DC, '3', ' ̃'), + (0x2DD, '3', ' ̋'), + (0x2DE, 'V'), + (0x2E0, 'M', 'ɣ'), + (0x2E1, 'M', 'l'), + (0x2E2, 'M', 's'), + (0x2E3, 'M', 'x'), + (0x2E4, 'M', 'ʕ'), + (0x2E5, 'V'), + (0x340, 'M', '̀'), + (0x341, 'M', '́'), + (0x342, 'V'), + (0x343, 'M', '̓'), + (0x344, 'M', '̈́'), + (0x345, 'M', 'ι'), + (0x346, 'V'), + (0x34F, 'I'), + (0x350, 'V'), + (0x370, 'M', 'ͱ'), + (0x371, 'V'), + (0x372, 'M', 'ͳ'), + (0x373, 'V'), + (0x374, 'M', 'ʹ'), + (0x375, 'V'), + (0x376, 'M', 'ͷ'), + (0x377, 'V'), + ] + +def _seg_6() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x378, 'X'), + (0x37A, '3', ' ι'), + (0x37B, 'V'), + (0x37E, '3', ';'), + (0x37F, 'M', 'ϳ'), + (0x380, 'X'), + (0x384, '3', ' ́'), + (0x385, '3', ' ̈́'), + (0x386, 'M', 'ά'), + (0x387, 'M', '·'), + (0x388, 'M', 'έ'), + (0x389, 'M', 'ή'), + (0x38A, 'M', 'ί'), + (0x38B, 'X'), + (0x38C, 'M', 'ό'), + (0x38D, 'X'), + (0x38E, 'M', 'ύ'), + (0x38F, 'M', 'ώ'), + (0x390, 'V'), + (0x391, 'M', 'α'), + (0x392, 'M', 'β'), + (0x393, 'M', 'γ'), + (0x394, 'M', 'δ'), + (0x395, 'M', 'ε'), + (0x396, 'M', 'ζ'), + (0x397, 'M', 'η'), + (0x398, 'M', 'θ'), + (0x399, 'M', 'ι'), + (0x39A, 'M', 'κ'), + (0x39B, 'M', 'λ'), + (0x39C, 'M', 'μ'), + (0x39D, 'M', 'ν'), + (0x39E, 'M', 'ξ'), + (0x39F, 'M', 'ο'), + (0x3A0, 'M', 'π'), + (0x3A1, 'M', 'ρ'), + (0x3A2, 'X'), + (0x3A3, 'M', 'σ'), + (0x3A4, 'M', 'τ'), + (0x3A5, 'M', 'υ'), + (0x3A6, 'M', 'φ'), + (0x3A7, 'M', 'χ'), + (0x3A8, 'M', 'ψ'), + (0x3A9, 'M', 'ω'), + (0x3AA, 'M', 'ϊ'), + (0x3AB, 'M', 'ϋ'), + (0x3AC, 'V'), + (0x3C2, 'D', 'σ'), + (0x3C3, 'V'), + (0x3CF, 'M', 'ϗ'), + (0x3D0, 'M', 'β'), + (0x3D1, 'M', 'θ'), + (0x3D2, 'M', 'υ'), + (0x3D3, 'M', 'ύ'), + (0x3D4, 'M', 'ϋ'), + (0x3D5, 'M', 'φ'), + (0x3D6, 'M', 'π'), + (0x3D7, 'V'), + (0x3D8, 'M', 'ϙ'), + (0x3D9, 'V'), + (0x3DA, 'M', 'ϛ'), + (0x3DB, 'V'), + (0x3DC, 'M', 'ϝ'), + (0x3DD, 'V'), + (0x3DE, 'M', 'ϟ'), + (0x3DF, 'V'), + (0x3E0, 'M', 'ϡ'), + (0x3E1, 'V'), + (0x3E2, 'M', 'ϣ'), + (0x3E3, 'V'), + (0x3E4, 'M', 'ϥ'), + (0x3E5, 'V'), + (0x3E6, 'M', 'ϧ'), + (0x3E7, 'V'), + (0x3E8, 'M', 'ϩ'), + (0x3E9, 'V'), + (0x3EA, 'M', 'ϫ'), + (0x3EB, 'V'), + (0x3EC, 'M', 'ϭ'), + (0x3ED, 'V'), + (0x3EE, 'M', 'ϯ'), + (0x3EF, 'V'), + (0x3F0, 'M', 'κ'), + (0x3F1, 'M', 'ρ'), + (0x3F2, 'M', 'σ'), + (0x3F3, 'V'), + (0x3F4, 'M', 'θ'), + (0x3F5, 'M', 'ε'), + (0x3F6, 'V'), + (0x3F7, 'M', 'ϸ'), + (0x3F8, 'V'), + (0x3F9, 'M', 'σ'), + (0x3FA, 'M', 'ϻ'), + (0x3FB, 'V'), + (0x3FD, 'M', 'ͻ'), + (0x3FE, 'M', 'ͼ'), + (0x3FF, 'M', 'ͽ'), + (0x400, 'M', 'ѐ'), + (0x401, 'M', 'ё'), + (0x402, 'M', 'ђ'), + ] + +def _seg_7() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x403, 'M', 'ѓ'), + (0x404, 'M', 'є'), + (0x405, 'M', 'ѕ'), + (0x406, 'M', 'і'), + (0x407, 'M', 'ї'), + (0x408, 'M', 'ј'), + (0x409, 'M', 'љ'), + (0x40A, 'M', 'њ'), + (0x40B, 'M', 'ћ'), + (0x40C, 'M', 'ќ'), + (0x40D, 'M', 'ѝ'), + (0x40E, 'M', 'ў'), + (0x40F, 'M', 'џ'), + (0x410, 'M', 'а'), + (0x411, 'M', 'б'), + (0x412, 'M', 'в'), + (0x413, 'M', 'г'), + (0x414, 'M', 'д'), + (0x415, 'M', 'е'), + (0x416, 'M', 'ж'), + (0x417, 'M', 'з'), + (0x418, 'M', 'и'), + (0x419, 'M', 'й'), + (0x41A, 'M', 'к'), + (0x41B, 'M', 'л'), + (0x41C, 'M', 'м'), + (0x41D, 'M', 'н'), + (0x41E, 'M', 'о'), + (0x41F, 'M', 'п'), + (0x420, 'M', 'р'), + (0x421, 'M', 'с'), + (0x422, 'M', 'т'), + (0x423, 'M', 'у'), + (0x424, 'M', 'ф'), + (0x425, 'M', 'х'), + (0x426, 'M', 'ц'), + (0x427, 'M', 'ч'), + (0x428, 'M', 'ш'), + (0x429, 'M', 'щ'), + (0x42A, 'M', 'ъ'), + (0x42B, 'M', 'ы'), + (0x42C, 'M', 'ь'), + (0x42D, 'M', 'э'), + (0x42E, 'M', 'ю'), + (0x42F, 'M', 'я'), + (0x430, 'V'), + (0x460, 'M', 'ѡ'), + (0x461, 'V'), + (0x462, 'M', 'ѣ'), + (0x463, 'V'), + (0x464, 'M', 'ѥ'), + (0x465, 'V'), + (0x466, 'M', 'ѧ'), + (0x467, 'V'), + (0x468, 'M', 'ѩ'), + (0x469, 'V'), + (0x46A, 'M', 'ѫ'), + (0x46B, 'V'), + (0x46C, 'M', 'ѭ'), + (0x46D, 'V'), + (0x46E, 'M', 'ѯ'), + (0x46F, 'V'), + (0x470, 'M', 'ѱ'), + (0x471, 'V'), + (0x472, 'M', 'ѳ'), + (0x473, 'V'), + (0x474, 'M', 'ѵ'), + (0x475, 'V'), + (0x476, 'M', 'ѷ'), + (0x477, 'V'), + (0x478, 'M', 'ѹ'), + (0x479, 'V'), + (0x47A, 'M', 'ѻ'), + (0x47B, 'V'), + (0x47C, 'M', 'ѽ'), + (0x47D, 'V'), + (0x47E, 'M', 'ѿ'), + (0x47F, 'V'), + (0x480, 'M', 'ҁ'), + (0x481, 'V'), + (0x48A, 'M', 'ҋ'), + (0x48B, 'V'), + (0x48C, 'M', 'ҍ'), + (0x48D, 'V'), + (0x48E, 'M', 'ҏ'), + (0x48F, 'V'), + (0x490, 'M', 'ґ'), + (0x491, 'V'), + (0x492, 'M', 'ғ'), + (0x493, 'V'), + (0x494, 'M', 'ҕ'), + (0x495, 'V'), + (0x496, 'M', 'җ'), + (0x497, 'V'), + (0x498, 'M', 'ҙ'), + (0x499, 'V'), + (0x49A, 'M', 'қ'), + (0x49B, 'V'), + (0x49C, 'M', 'ҝ'), + (0x49D, 'V'), + ] + +def _seg_8() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x49E, 'M', 'ҟ'), + (0x49F, 'V'), + (0x4A0, 'M', 'ҡ'), + (0x4A1, 'V'), + (0x4A2, 'M', 'ң'), + (0x4A3, 'V'), + (0x4A4, 'M', 'ҥ'), + (0x4A5, 'V'), + (0x4A6, 'M', 'ҧ'), + (0x4A7, 'V'), + (0x4A8, 'M', 'ҩ'), + (0x4A9, 'V'), + (0x4AA, 'M', 'ҫ'), + (0x4AB, 'V'), + (0x4AC, 'M', 'ҭ'), + (0x4AD, 'V'), + (0x4AE, 'M', 'ү'), + (0x4AF, 'V'), + (0x4B0, 'M', 'ұ'), + (0x4B1, 'V'), + (0x4B2, 'M', 'ҳ'), + (0x4B3, 'V'), + (0x4B4, 'M', 'ҵ'), + (0x4B5, 'V'), + (0x4B6, 'M', 'ҷ'), + (0x4B7, 'V'), + (0x4B8, 'M', 'ҹ'), + (0x4B9, 'V'), + (0x4BA, 'M', 'һ'), + (0x4BB, 'V'), + (0x4BC, 'M', 'ҽ'), + (0x4BD, 'V'), + (0x4BE, 'M', 'ҿ'), + (0x4BF, 'V'), + (0x4C0, 'X'), + (0x4C1, 'M', 'ӂ'), + (0x4C2, 'V'), + (0x4C3, 'M', 'ӄ'), + (0x4C4, 'V'), + (0x4C5, 'M', 'ӆ'), + (0x4C6, 'V'), + (0x4C7, 'M', 'ӈ'), + (0x4C8, 'V'), + (0x4C9, 'M', 'ӊ'), + (0x4CA, 'V'), + (0x4CB, 'M', 'ӌ'), + (0x4CC, 'V'), + (0x4CD, 'M', 'ӎ'), + (0x4CE, 'V'), + (0x4D0, 'M', 'ӑ'), + (0x4D1, 'V'), + (0x4D2, 'M', 'ӓ'), + (0x4D3, 'V'), + (0x4D4, 'M', 'ӕ'), + (0x4D5, 'V'), + (0x4D6, 'M', 'ӗ'), + (0x4D7, 'V'), + (0x4D8, 'M', 'ә'), + (0x4D9, 'V'), + (0x4DA, 'M', 'ӛ'), + (0x4DB, 'V'), + (0x4DC, 'M', 'ӝ'), + (0x4DD, 'V'), + (0x4DE, 'M', 'ӟ'), + (0x4DF, 'V'), + (0x4E0, 'M', 'ӡ'), + (0x4E1, 'V'), + (0x4E2, 'M', 'ӣ'), + (0x4E3, 'V'), + (0x4E4, 'M', 'ӥ'), + (0x4E5, 'V'), + (0x4E6, 'M', 'ӧ'), + (0x4E7, 'V'), + (0x4E8, 'M', 'ө'), + (0x4E9, 'V'), + (0x4EA, 'M', 'ӫ'), + (0x4EB, 'V'), + (0x4EC, 'M', 'ӭ'), + (0x4ED, 'V'), + (0x4EE, 'M', 'ӯ'), + (0x4EF, 'V'), + (0x4F0, 'M', 'ӱ'), + (0x4F1, 'V'), + (0x4F2, 'M', 'ӳ'), + (0x4F3, 'V'), + (0x4F4, 'M', 'ӵ'), + (0x4F5, 'V'), + (0x4F6, 'M', 'ӷ'), + (0x4F7, 'V'), + (0x4F8, 'M', 'ӹ'), + (0x4F9, 'V'), + (0x4FA, 'M', 'ӻ'), + (0x4FB, 'V'), + (0x4FC, 'M', 'ӽ'), + (0x4FD, 'V'), + (0x4FE, 'M', 'ӿ'), + (0x4FF, 'V'), + (0x500, 'M', 'ԁ'), + (0x501, 'V'), + (0x502, 'M', 'ԃ'), + ] + +def _seg_9() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x503, 'V'), + (0x504, 'M', 'ԅ'), + (0x505, 'V'), + (0x506, 'M', 'ԇ'), + (0x507, 'V'), + (0x508, 'M', 'ԉ'), + (0x509, 'V'), + (0x50A, 'M', 'ԋ'), + (0x50B, 'V'), + (0x50C, 'M', 'ԍ'), + (0x50D, 'V'), + (0x50E, 'M', 'ԏ'), + (0x50F, 'V'), + (0x510, 'M', 'ԑ'), + (0x511, 'V'), + (0x512, 'M', 'ԓ'), + (0x513, 'V'), + (0x514, 'M', 'ԕ'), + (0x515, 'V'), + (0x516, 'M', 'ԗ'), + (0x517, 'V'), + (0x518, 'M', 'ԙ'), + (0x519, 'V'), + (0x51A, 'M', 'ԛ'), + (0x51B, 'V'), + (0x51C, 'M', 'ԝ'), + (0x51D, 'V'), + (0x51E, 'M', 'ԟ'), + (0x51F, 'V'), + (0x520, 'M', 'ԡ'), + (0x521, 'V'), + (0x522, 'M', 'ԣ'), + (0x523, 'V'), + (0x524, 'M', 'ԥ'), + (0x525, 'V'), + (0x526, 'M', 'ԧ'), + (0x527, 'V'), + (0x528, 'M', 'ԩ'), + (0x529, 'V'), + (0x52A, 'M', 'ԫ'), + (0x52B, 'V'), + (0x52C, 'M', 'ԭ'), + (0x52D, 'V'), + (0x52E, 'M', 'ԯ'), + (0x52F, 'V'), + (0x530, 'X'), + (0x531, 'M', 'ա'), + (0x532, 'M', 'բ'), + (0x533, 'M', 'գ'), + (0x534, 'M', 'դ'), + (0x535, 'M', 'ե'), + (0x536, 'M', 'զ'), + (0x537, 'M', 'է'), + (0x538, 'M', 'ը'), + (0x539, 'M', 'թ'), + (0x53A, 'M', 'ժ'), + (0x53B, 'M', 'ի'), + (0x53C, 'M', 'լ'), + (0x53D, 'M', 'խ'), + (0x53E, 'M', 'ծ'), + (0x53F, 'M', 'կ'), + (0x540, 'M', 'հ'), + (0x541, 'M', 'ձ'), + (0x542, 'M', 'ղ'), + (0x543, 'M', 'ճ'), + (0x544, 'M', 'մ'), + (0x545, 'M', 'յ'), + (0x546, 'M', 'ն'), + (0x547, 'M', 'շ'), + (0x548, 'M', 'ո'), + (0x549, 'M', 'չ'), + (0x54A, 'M', 'պ'), + (0x54B, 'M', 'ջ'), + (0x54C, 'M', 'ռ'), + (0x54D, 'M', 'ս'), + (0x54E, 'M', 'վ'), + (0x54F, 'M', 'տ'), + (0x550, 'M', 'ր'), + (0x551, 'M', 'ց'), + (0x552, 'M', 'ւ'), + (0x553, 'M', 'փ'), + (0x554, 'M', 'ք'), + (0x555, 'M', 'օ'), + (0x556, 'M', 'ֆ'), + (0x557, 'X'), + (0x559, 'V'), + (0x587, 'M', 'եւ'), + (0x588, 'V'), + (0x58B, 'X'), + (0x58D, 'V'), + (0x590, 'X'), + (0x591, 'V'), + (0x5C8, 'X'), + (0x5D0, 'V'), + (0x5EB, 'X'), + (0x5EF, 'V'), + (0x5F5, 'X'), + (0x606, 'V'), + (0x61C, 'X'), + (0x61D, 'V'), + ] + +def _seg_10() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x675, 'M', 'اٴ'), + (0x676, 'M', 'وٴ'), + (0x677, 'M', 'ۇٴ'), + (0x678, 'M', 'يٴ'), + (0x679, 'V'), + (0x6DD, 'X'), + (0x6DE, 'V'), + (0x70E, 'X'), + (0x710, 'V'), + (0x74B, 'X'), + (0x74D, 'V'), + (0x7B2, 'X'), + (0x7C0, 'V'), + (0x7FB, 'X'), + (0x7FD, 'V'), + (0x82E, 'X'), + (0x830, 'V'), + (0x83F, 'X'), + (0x840, 'V'), + (0x85C, 'X'), + (0x85E, 'V'), + (0x85F, 'X'), + (0x860, 'V'), + (0x86B, 'X'), + (0x870, 'V'), + (0x88F, 'X'), + (0x898, 'V'), + (0x8E2, 'X'), + (0x8E3, 'V'), + (0x958, 'M', 'क़'), + (0x959, 'M', 'ख़'), + (0x95A, 'M', 'ग़'), + (0x95B, 'M', 'ज़'), + (0x95C, 'M', 'ड़'), + (0x95D, 'M', 'ढ़'), + (0x95E, 'M', 'फ़'), + (0x95F, 'M', 'य़'), + (0x960, 'V'), + (0x984, 'X'), + (0x985, 'V'), + (0x98D, 'X'), + (0x98F, 'V'), + (0x991, 'X'), + (0x993, 'V'), + (0x9A9, 'X'), + (0x9AA, 'V'), + (0x9B1, 'X'), + (0x9B2, 'V'), + (0x9B3, 'X'), + (0x9B6, 'V'), + (0x9BA, 'X'), + (0x9BC, 'V'), + (0x9C5, 'X'), + (0x9C7, 'V'), + (0x9C9, 'X'), + (0x9CB, 'V'), + (0x9CF, 'X'), + (0x9D7, 'V'), + (0x9D8, 'X'), + (0x9DC, 'M', 'ড়'), + (0x9DD, 'M', 'ঢ়'), + (0x9DE, 'X'), + (0x9DF, 'M', 'য়'), + (0x9E0, 'V'), + (0x9E4, 'X'), + (0x9E6, 'V'), + (0x9FF, 'X'), + (0xA01, 'V'), + (0xA04, 'X'), + (0xA05, 'V'), + (0xA0B, 'X'), + (0xA0F, 'V'), + (0xA11, 'X'), + (0xA13, 'V'), + (0xA29, 'X'), + (0xA2A, 'V'), + (0xA31, 'X'), + (0xA32, 'V'), + (0xA33, 'M', 'ਲ਼'), + (0xA34, 'X'), + (0xA35, 'V'), + (0xA36, 'M', 'ਸ਼'), + (0xA37, 'X'), + (0xA38, 'V'), + (0xA3A, 'X'), + (0xA3C, 'V'), + (0xA3D, 'X'), + (0xA3E, 'V'), + (0xA43, 'X'), + (0xA47, 'V'), + (0xA49, 'X'), + (0xA4B, 'V'), + (0xA4E, 'X'), + (0xA51, 'V'), + (0xA52, 'X'), + (0xA59, 'M', 'ਖ਼'), + (0xA5A, 'M', 'ਗ਼'), + (0xA5B, 'M', 'ਜ਼'), + (0xA5C, 'V'), + (0xA5D, 'X'), + ] + +def _seg_11() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xA5E, 'M', 'ਫ਼'), + (0xA5F, 'X'), + (0xA66, 'V'), + (0xA77, 'X'), + (0xA81, 'V'), + (0xA84, 'X'), + (0xA85, 'V'), + (0xA8E, 'X'), + (0xA8F, 'V'), + (0xA92, 'X'), + (0xA93, 'V'), + (0xAA9, 'X'), + (0xAAA, 'V'), + (0xAB1, 'X'), + (0xAB2, 'V'), + (0xAB4, 'X'), + (0xAB5, 'V'), + (0xABA, 'X'), + (0xABC, 'V'), + (0xAC6, 'X'), + (0xAC7, 'V'), + (0xACA, 'X'), + (0xACB, 'V'), + (0xACE, 'X'), + (0xAD0, 'V'), + (0xAD1, 'X'), + (0xAE0, 'V'), + (0xAE4, 'X'), + (0xAE6, 'V'), + (0xAF2, 'X'), + (0xAF9, 'V'), + (0xB00, 'X'), + (0xB01, 'V'), + (0xB04, 'X'), + (0xB05, 'V'), + (0xB0D, 'X'), + (0xB0F, 'V'), + (0xB11, 'X'), + (0xB13, 'V'), + (0xB29, 'X'), + (0xB2A, 'V'), + (0xB31, 'X'), + (0xB32, 'V'), + (0xB34, 'X'), + (0xB35, 'V'), + (0xB3A, 'X'), + (0xB3C, 'V'), + (0xB45, 'X'), + (0xB47, 'V'), + (0xB49, 'X'), + (0xB4B, 'V'), + (0xB4E, 'X'), + (0xB55, 'V'), + (0xB58, 'X'), + (0xB5C, 'M', 'ଡ଼'), + (0xB5D, 'M', 'ଢ଼'), + (0xB5E, 'X'), + (0xB5F, 'V'), + (0xB64, 'X'), + (0xB66, 'V'), + (0xB78, 'X'), + (0xB82, 'V'), + (0xB84, 'X'), + (0xB85, 'V'), + (0xB8B, 'X'), + (0xB8E, 'V'), + (0xB91, 'X'), + (0xB92, 'V'), + (0xB96, 'X'), + (0xB99, 'V'), + (0xB9B, 'X'), + (0xB9C, 'V'), + (0xB9D, 'X'), + (0xB9E, 'V'), + (0xBA0, 'X'), + (0xBA3, 'V'), + (0xBA5, 'X'), + (0xBA8, 'V'), + (0xBAB, 'X'), + (0xBAE, 'V'), + (0xBBA, 'X'), + (0xBBE, 'V'), + (0xBC3, 'X'), + (0xBC6, 'V'), + (0xBC9, 'X'), + (0xBCA, 'V'), + (0xBCE, 'X'), + (0xBD0, 'V'), + (0xBD1, 'X'), + (0xBD7, 'V'), + (0xBD8, 'X'), + (0xBE6, 'V'), + (0xBFB, 'X'), + (0xC00, 'V'), + (0xC0D, 'X'), + (0xC0E, 'V'), + (0xC11, 'X'), + (0xC12, 'V'), + (0xC29, 'X'), + (0xC2A, 'V'), + ] + +def _seg_12() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xC3A, 'X'), + (0xC3C, 'V'), + (0xC45, 'X'), + (0xC46, 'V'), + (0xC49, 'X'), + (0xC4A, 'V'), + (0xC4E, 'X'), + (0xC55, 'V'), + (0xC57, 'X'), + (0xC58, 'V'), + (0xC5B, 'X'), + (0xC5D, 'V'), + (0xC5E, 'X'), + (0xC60, 'V'), + (0xC64, 'X'), + (0xC66, 'V'), + (0xC70, 'X'), + (0xC77, 'V'), + (0xC8D, 'X'), + (0xC8E, 'V'), + (0xC91, 'X'), + (0xC92, 'V'), + (0xCA9, 'X'), + (0xCAA, 'V'), + (0xCB4, 'X'), + (0xCB5, 'V'), + (0xCBA, 'X'), + (0xCBC, 'V'), + (0xCC5, 'X'), + (0xCC6, 'V'), + (0xCC9, 'X'), + (0xCCA, 'V'), + (0xCCE, 'X'), + (0xCD5, 'V'), + (0xCD7, 'X'), + (0xCDD, 'V'), + (0xCDF, 'X'), + (0xCE0, 'V'), + (0xCE4, 'X'), + (0xCE6, 'V'), + (0xCF0, 'X'), + (0xCF1, 'V'), + (0xCF3, 'X'), + (0xD00, 'V'), + (0xD0D, 'X'), + (0xD0E, 'V'), + (0xD11, 'X'), + (0xD12, 'V'), + (0xD45, 'X'), + (0xD46, 'V'), + (0xD49, 'X'), + (0xD4A, 'V'), + (0xD50, 'X'), + (0xD54, 'V'), + (0xD64, 'X'), + (0xD66, 'V'), + (0xD80, 'X'), + (0xD81, 'V'), + (0xD84, 'X'), + (0xD85, 'V'), + (0xD97, 'X'), + (0xD9A, 'V'), + (0xDB2, 'X'), + (0xDB3, 'V'), + (0xDBC, 'X'), + (0xDBD, 'V'), + (0xDBE, 'X'), + (0xDC0, 'V'), + (0xDC7, 'X'), + (0xDCA, 'V'), + (0xDCB, 'X'), + (0xDCF, 'V'), + (0xDD5, 'X'), + (0xDD6, 'V'), + (0xDD7, 'X'), + (0xDD8, 'V'), + (0xDE0, 'X'), + (0xDE6, 'V'), + (0xDF0, 'X'), + (0xDF2, 'V'), + (0xDF5, 'X'), + (0xE01, 'V'), + (0xE33, 'M', 'ํา'), + (0xE34, 'V'), + (0xE3B, 'X'), + (0xE3F, 'V'), + (0xE5C, 'X'), + (0xE81, 'V'), + (0xE83, 'X'), + (0xE84, 'V'), + (0xE85, 'X'), + (0xE86, 'V'), + (0xE8B, 'X'), + (0xE8C, 'V'), + (0xEA4, 'X'), + (0xEA5, 'V'), + (0xEA6, 'X'), + (0xEA7, 'V'), + (0xEB3, 'M', 'ໍາ'), + (0xEB4, 'V'), + ] + +def _seg_13() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xEBE, 'X'), + (0xEC0, 'V'), + (0xEC5, 'X'), + (0xEC6, 'V'), + (0xEC7, 'X'), + (0xEC8, 'V'), + (0xECE, 'X'), + (0xED0, 'V'), + (0xEDA, 'X'), + (0xEDC, 'M', 'ຫນ'), + (0xEDD, 'M', 'ຫມ'), + (0xEDE, 'V'), + (0xEE0, 'X'), + (0xF00, 'V'), + (0xF0C, 'M', '་'), + (0xF0D, 'V'), + (0xF43, 'M', 'གྷ'), + (0xF44, 'V'), + (0xF48, 'X'), + (0xF49, 'V'), + (0xF4D, 'M', 'ཌྷ'), + (0xF4E, 'V'), + (0xF52, 'M', 'དྷ'), + (0xF53, 'V'), + (0xF57, 'M', 'བྷ'), + (0xF58, 'V'), + (0xF5C, 'M', 'ཛྷ'), + (0xF5D, 'V'), + (0xF69, 'M', 'ཀྵ'), + (0xF6A, 'V'), + (0xF6D, 'X'), + (0xF71, 'V'), + (0xF73, 'M', 'ཱི'), + (0xF74, 'V'), + (0xF75, 'M', 'ཱུ'), + (0xF76, 'M', 'ྲྀ'), + (0xF77, 'M', 'ྲཱྀ'), + (0xF78, 'M', 'ླྀ'), + (0xF79, 'M', 'ླཱྀ'), + (0xF7A, 'V'), + (0xF81, 'M', 'ཱྀ'), + (0xF82, 'V'), + (0xF93, 'M', 'ྒྷ'), + (0xF94, 'V'), + (0xF98, 'X'), + (0xF99, 'V'), + (0xF9D, 'M', 'ྜྷ'), + (0xF9E, 'V'), + (0xFA2, 'M', 'ྡྷ'), + (0xFA3, 'V'), + (0xFA7, 'M', 'ྦྷ'), + (0xFA8, 'V'), + (0xFAC, 'M', 'ྫྷ'), + (0xFAD, 'V'), + (0xFB9, 'M', 'ྐྵ'), + (0xFBA, 'V'), + (0xFBD, 'X'), + (0xFBE, 'V'), + (0xFCD, 'X'), + (0xFCE, 'V'), + (0xFDB, 'X'), + (0x1000, 'V'), + (0x10A0, 'X'), + (0x10C7, 'M', 'ⴧ'), + (0x10C8, 'X'), + (0x10CD, 'M', 'ⴭ'), + (0x10CE, 'X'), + (0x10D0, 'V'), + (0x10FC, 'M', 'ნ'), + (0x10FD, 'V'), + (0x115F, 'X'), + (0x1161, 'V'), + (0x1249, 'X'), + (0x124A, 'V'), + (0x124E, 'X'), + (0x1250, 'V'), + (0x1257, 'X'), + (0x1258, 'V'), + (0x1259, 'X'), + (0x125A, 'V'), + (0x125E, 'X'), + (0x1260, 'V'), + (0x1289, 'X'), + (0x128A, 'V'), + (0x128E, 'X'), + (0x1290, 'V'), + (0x12B1, 'X'), + (0x12B2, 'V'), + (0x12B6, 'X'), + (0x12B8, 'V'), + (0x12BF, 'X'), + (0x12C0, 'V'), + (0x12C1, 'X'), + (0x12C2, 'V'), + (0x12C6, 'X'), + (0x12C8, 'V'), + (0x12D7, 'X'), + (0x12D8, 'V'), + (0x1311, 'X'), + (0x1312, 'V'), + ] + +def _seg_14() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1316, 'X'), + (0x1318, 'V'), + (0x135B, 'X'), + (0x135D, 'V'), + (0x137D, 'X'), + (0x1380, 'V'), + (0x139A, 'X'), + (0x13A0, 'V'), + (0x13F6, 'X'), + (0x13F8, 'M', 'Ᏸ'), + (0x13F9, 'M', 'Ᏹ'), + (0x13FA, 'M', 'Ᏺ'), + (0x13FB, 'M', 'Ᏻ'), + (0x13FC, 'M', 'Ᏼ'), + (0x13FD, 'M', 'Ᏽ'), + (0x13FE, 'X'), + (0x1400, 'V'), + (0x1680, 'X'), + (0x1681, 'V'), + (0x169D, 'X'), + (0x16A0, 'V'), + (0x16F9, 'X'), + (0x1700, 'V'), + (0x1716, 'X'), + (0x171F, 'V'), + (0x1737, 'X'), + (0x1740, 'V'), + (0x1754, 'X'), + (0x1760, 'V'), + (0x176D, 'X'), + (0x176E, 'V'), + (0x1771, 'X'), + (0x1772, 'V'), + (0x1774, 'X'), + (0x1780, 'V'), + (0x17B4, 'X'), + (0x17B6, 'V'), + (0x17DE, 'X'), + (0x17E0, 'V'), + (0x17EA, 'X'), + (0x17F0, 'V'), + (0x17FA, 'X'), + (0x1800, 'V'), + (0x1806, 'X'), + (0x1807, 'V'), + (0x180B, 'I'), + (0x180E, 'X'), + (0x180F, 'I'), + (0x1810, 'V'), + (0x181A, 'X'), + (0x1820, 'V'), + (0x1879, 'X'), + (0x1880, 'V'), + (0x18AB, 'X'), + (0x18B0, 'V'), + (0x18F6, 'X'), + (0x1900, 'V'), + (0x191F, 'X'), + (0x1920, 'V'), + (0x192C, 'X'), + (0x1930, 'V'), + (0x193C, 'X'), + (0x1940, 'V'), + (0x1941, 'X'), + (0x1944, 'V'), + (0x196E, 'X'), + (0x1970, 'V'), + (0x1975, 'X'), + (0x1980, 'V'), + (0x19AC, 'X'), + (0x19B0, 'V'), + (0x19CA, 'X'), + (0x19D0, 'V'), + (0x19DB, 'X'), + (0x19DE, 'V'), + (0x1A1C, 'X'), + (0x1A1E, 'V'), + (0x1A5F, 'X'), + (0x1A60, 'V'), + (0x1A7D, 'X'), + (0x1A7F, 'V'), + (0x1A8A, 'X'), + (0x1A90, 'V'), + (0x1A9A, 'X'), + (0x1AA0, 'V'), + (0x1AAE, 'X'), + (0x1AB0, 'V'), + (0x1ACF, 'X'), + (0x1B00, 'V'), + (0x1B4D, 'X'), + (0x1B50, 'V'), + (0x1B7F, 'X'), + (0x1B80, 'V'), + (0x1BF4, 'X'), + (0x1BFC, 'V'), + (0x1C38, 'X'), + (0x1C3B, 'V'), + (0x1C4A, 'X'), + (0x1C4D, 'V'), + (0x1C80, 'M', 'в'), + ] + +def _seg_15() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1C81, 'M', 'д'), + (0x1C82, 'M', 'о'), + (0x1C83, 'M', 'с'), + (0x1C84, 'M', 'т'), + (0x1C86, 'M', 'ъ'), + (0x1C87, 'M', 'ѣ'), + (0x1C88, 'M', 'ꙋ'), + (0x1C89, 'X'), + (0x1C90, 'M', 'ა'), + (0x1C91, 'M', 'ბ'), + (0x1C92, 'M', 'გ'), + (0x1C93, 'M', 'დ'), + (0x1C94, 'M', 'ე'), + (0x1C95, 'M', 'ვ'), + (0x1C96, 'M', 'ზ'), + (0x1C97, 'M', 'თ'), + (0x1C98, 'M', 'ი'), + (0x1C99, 'M', 'კ'), + (0x1C9A, 'M', 'ლ'), + (0x1C9B, 'M', 'მ'), + (0x1C9C, 'M', 'ნ'), + (0x1C9D, 'M', 'ო'), + (0x1C9E, 'M', 'პ'), + (0x1C9F, 'M', 'ჟ'), + (0x1CA0, 'M', 'რ'), + (0x1CA1, 'M', 'ს'), + (0x1CA2, 'M', 'ტ'), + (0x1CA3, 'M', 'უ'), + (0x1CA4, 'M', 'ფ'), + (0x1CA5, 'M', 'ქ'), + (0x1CA6, 'M', 'ღ'), + (0x1CA7, 'M', 'ყ'), + (0x1CA8, 'M', 'შ'), + (0x1CA9, 'M', 'ჩ'), + (0x1CAA, 'M', 'ც'), + (0x1CAB, 'M', 'ძ'), + (0x1CAC, 'M', 'წ'), + (0x1CAD, 'M', 'ჭ'), + (0x1CAE, 'M', 'ხ'), + (0x1CAF, 'M', 'ჯ'), + (0x1CB0, 'M', 'ჰ'), + (0x1CB1, 'M', 'ჱ'), + (0x1CB2, 'M', 'ჲ'), + (0x1CB3, 'M', 'ჳ'), + (0x1CB4, 'M', 'ჴ'), + (0x1CB5, 'M', 'ჵ'), + (0x1CB6, 'M', 'ჶ'), + (0x1CB7, 'M', 'ჷ'), + (0x1CB8, 'M', 'ჸ'), + (0x1CB9, 'M', 'ჹ'), + (0x1CBA, 'M', 'ჺ'), + (0x1CBB, 'X'), + (0x1CBD, 'M', 'ჽ'), + (0x1CBE, 'M', 'ჾ'), + (0x1CBF, 'M', 'ჿ'), + (0x1CC0, 'V'), + (0x1CC8, 'X'), + (0x1CD0, 'V'), + (0x1CFB, 'X'), + (0x1D00, 'V'), + (0x1D2C, 'M', 'a'), + (0x1D2D, 'M', 'æ'), + (0x1D2E, 'M', 'b'), + (0x1D2F, 'V'), + (0x1D30, 'M', 'd'), + (0x1D31, 'M', 'e'), + (0x1D32, 'M', 'ǝ'), + (0x1D33, 'M', 'g'), + (0x1D34, 'M', 'h'), + (0x1D35, 'M', 'i'), + (0x1D36, 'M', 'j'), + (0x1D37, 'M', 'k'), + (0x1D38, 'M', 'l'), + (0x1D39, 'M', 'm'), + (0x1D3A, 'M', 'n'), + (0x1D3B, 'V'), + (0x1D3C, 'M', 'o'), + (0x1D3D, 'M', 'ȣ'), + (0x1D3E, 'M', 'p'), + (0x1D3F, 'M', 'r'), + (0x1D40, 'M', 't'), + (0x1D41, 'M', 'u'), + (0x1D42, 'M', 'w'), + (0x1D43, 'M', 'a'), + (0x1D44, 'M', 'ɐ'), + (0x1D45, 'M', 'ɑ'), + (0x1D46, 'M', 'ᴂ'), + (0x1D47, 'M', 'b'), + (0x1D48, 'M', 'd'), + (0x1D49, 'M', 'e'), + (0x1D4A, 'M', 'ə'), + (0x1D4B, 'M', 'ɛ'), + (0x1D4C, 'M', 'ɜ'), + (0x1D4D, 'M', 'g'), + (0x1D4E, 'V'), + (0x1D4F, 'M', 'k'), + (0x1D50, 'M', 'm'), + (0x1D51, 'M', 'ŋ'), + (0x1D52, 'M', 'o'), + (0x1D53, 'M', 'ɔ'), + ] + +def _seg_16() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D54, 'M', 'ᴖ'), + (0x1D55, 'M', 'ᴗ'), + (0x1D56, 'M', 'p'), + (0x1D57, 'M', 't'), + (0x1D58, 'M', 'u'), + (0x1D59, 'M', 'ᴝ'), + (0x1D5A, 'M', 'ɯ'), + (0x1D5B, 'M', 'v'), + (0x1D5C, 'M', 'ᴥ'), + (0x1D5D, 'M', 'β'), + (0x1D5E, 'M', 'γ'), + (0x1D5F, 'M', 'δ'), + (0x1D60, 'M', 'φ'), + (0x1D61, 'M', 'χ'), + (0x1D62, 'M', 'i'), + (0x1D63, 'M', 'r'), + (0x1D64, 'M', 'u'), + (0x1D65, 'M', 'v'), + (0x1D66, 'M', 'β'), + (0x1D67, 'M', 'γ'), + (0x1D68, 'M', 'ρ'), + (0x1D69, 'M', 'φ'), + (0x1D6A, 'M', 'χ'), + (0x1D6B, 'V'), + (0x1D78, 'M', 'н'), + (0x1D79, 'V'), + (0x1D9B, 'M', 'ɒ'), + (0x1D9C, 'M', 'c'), + (0x1D9D, 'M', 'ɕ'), + (0x1D9E, 'M', 'ð'), + (0x1D9F, 'M', 'ɜ'), + (0x1DA0, 'M', 'f'), + (0x1DA1, 'M', 'ɟ'), + (0x1DA2, 'M', 'ɡ'), + (0x1DA3, 'M', 'ɥ'), + (0x1DA4, 'M', 'ɨ'), + (0x1DA5, 'M', 'ɩ'), + (0x1DA6, 'M', 'ɪ'), + (0x1DA7, 'M', 'ᵻ'), + (0x1DA8, 'M', 'ʝ'), + (0x1DA9, 'M', 'ɭ'), + (0x1DAA, 'M', 'ᶅ'), + (0x1DAB, 'M', 'ʟ'), + (0x1DAC, 'M', 'ɱ'), + (0x1DAD, 'M', 'ɰ'), + (0x1DAE, 'M', 'ɲ'), + (0x1DAF, 'M', 'ɳ'), + (0x1DB0, 'M', 'ɴ'), + (0x1DB1, 'M', 'ɵ'), + (0x1DB2, 'M', 'ɸ'), + (0x1DB3, 'M', 'ʂ'), + (0x1DB4, 'M', 'ʃ'), + (0x1DB5, 'M', 'ƫ'), + (0x1DB6, 'M', 'ʉ'), + (0x1DB7, 'M', 'ʊ'), + (0x1DB8, 'M', 'ᴜ'), + (0x1DB9, 'M', 'ʋ'), + (0x1DBA, 'M', 'ʌ'), + (0x1DBB, 'M', 'z'), + (0x1DBC, 'M', 'ʐ'), + (0x1DBD, 'M', 'ʑ'), + (0x1DBE, 'M', 'ʒ'), + (0x1DBF, 'M', 'θ'), + (0x1DC0, 'V'), + (0x1E00, 'M', 'ḁ'), + (0x1E01, 'V'), + (0x1E02, 'M', 'ḃ'), + (0x1E03, 'V'), + (0x1E04, 'M', 'ḅ'), + (0x1E05, 'V'), + (0x1E06, 'M', 'ḇ'), + (0x1E07, 'V'), + (0x1E08, 'M', 'ḉ'), + (0x1E09, 'V'), + (0x1E0A, 'M', 'ḋ'), + (0x1E0B, 'V'), + (0x1E0C, 'M', 'ḍ'), + (0x1E0D, 'V'), + (0x1E0E, 'M', 'ḏ'), + (0x1E0F, 'V'), + (0x1E10, 'M', 'ḑ'), + (0x1E11, 'V'), + (0x1E12, 'M', 'ḓ'), + (0x1E13, 'V'), + (0x1E14, 'M', 'ḕ'), + (0x1E15, 'V'), + (0x1E16, 'M', 'ḗ'), + (0x1E17, 'V'), + (0x1E18, 'M', 'ḙ'), + (0x1E19, 'V'), + (0x1E1A, 'M', 'ḛ'), + (0x1E1B, 'V'), + (0x1E1C, 'M', 'ḝ'), + (0x1E1D, 'V'), + (0x1E1E, 'M', 'ḟ'), + (0x1E1F, 'V'), + (0x1E20, 'M', 'ḡ'), + (0x1E21, 'V'), + (0x1E22, 'M', 'ḣ'), + (0x1E23, 'V'), + ] + +def _seg_17() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E24, 'M', 'ḥ'), + (0x1E25, 'V'), + (0x1E26, 'M', 'ḧ'), + (0x1E27, 'V'), + (0x1E28, 'M', 'ḩ'), + (0x1E29, 'V'), + (0x1E2A, 'M', 'ḫ'), + (0x1E2B, 'V'), + (0x1E2C, 'M', 'ḭ'), + (0x1E2D, 'V'), + (0x1E2E, 'M', 'ḯ'), + (0x1E2F, 'V'), + (0x1E30, 'M', 'ḱ'), + (0x1E31, 'V'), + (0x1E32, 'M', 'ḳ'), + (0x1E33, 'V'), + (0x1E34, 'M', 'ḵ'), + (0x1E35, 'V'), + (0x1E36, 'M', 'ḷ'), + (0x1E37, 'V'), + (0x1E38, 'M', 'ḹ'), + (0x1E39, 'V'), + (0x1E3A, 'M', 'ḻ'), + (0x1E3B, 'V'), + (0x1E3C, 'M', 'ḽ'), + (0x1E3D, 'V'), + (0x1E3E, 'M', 'ḿ'), + (0x1E3F, 'V'), + (0x1E40, 'M', 'ṁ'), + (0x1E41, 'V'), + (0x1E42, 'M', 'ṃ'), + (0x1E43, 'V'), + (0x1E44, 'M', 'ṅ'), + (0x1E45, 'V'), + (0x1E46, 'M', 'ṇ'), + (0x1E47, 'V'), + (0x1E48, 'M', 'ṉ'), + (0x1E49, 'V'), + (0x1E4A, 'M', 'ṋ'), + (0x1E4B, 'V'), + (0x1E4C, 'M', 'ṍ'), + (0x1E4D, 'V'), + (0x1E4E, 'M', 'ṏ'), + (0x1E4F, 'V'), + (0x1E50, 'M', 'ṑ'), + (0x1E51, 'V'), + (0x1E52, 'M', 'ṓ'), + (0x1E53, 'V'), + (0x1E54, 'M', 'ṕ'), + (0x1E55, 'V'), + (0x1E56, 'M', 'ṗ'), + (0x1E57, 'V'), + (0x1E58, 'M', 'ṙ'), + (0x1E59, 'V'), + (0x1E5A, 'M', 'ṛ'), + (0x1E5B, 'V'), + (0x1E5C, 'M', 'ṝ'), + (0x1E5D, 'V'), + (0x1E5E, 'M', 'ṟ'), + (0x1E5F, 'V'), + (0x1E60, 'M', 'ṡ'), + (0x1E61, 'V'), + (0x1E62, 'M', 'ṣ'), + (0x1E63, 'V'), + (0x1E64, 'M', 'ṥ'), + (0x1E65, 'V'), + (0x1E66, 'M', 'ṧ'), + (0x1E67, 'V'), + (0x1E68, 'M', 'ṩ'), + (0x1E69, 'V'), + (0x1E6A, 'M', 'ṫ'), + (0x1E6B, 'V'), + (0x1E6C, 'M', 'ṭ'), + (0x1E6D, 'V'), + (0x1E6E, 'M', 'ṯ'), + (0x1E6F, 'V'), + (0x1E70, 'M', 'ṱ'), + (0x1E71, 'V'), + (0x1E72, 'M', 'ṳ'), + (0x1E73, 'V'), + (0x1E74, 'M', 'ṵ'), + (0x1E75, 'V'), + (0x1E76, 'M', 'ṷ'), + (0x1E77, 'V'), + (0x1E78, 'M', 'ṹ'), + (0x1E79, 'V'), + (0x1E7A, 'M', 'ṻ'), + (0x1E7B, 'V'), + (0x1E7C, 'M', 'ṽ'), + (0x1E7D, 'V'), + (0x1E7E, 'M', 'ṿ'), + (0x1E7F, 'V'), + (0x1E80, 'M', 'ẁ'), + (0x1E81, 'V'), + (0x1E82, 'M', 'ẃ'), + (0x1E83, 'V'), + (0x1E84, 'M', 'ẅ'), + (0x1E85, 'V'), + (0x1E86, 'M', 'ẇ'), + (0x1E87, 'V'), + ] + +def _seg_18() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E88, 'M', 'ẉ'), + (0x1E89, 'V'), + (0x1E8A, 'M', 'ẋ'), + (0x1E8B, 'V'), + (0x1E8C, 'M', 'ẍ'), + (0x1E8D, 'V'), + (0x1E8E, 'M', 'ẏ'), + (0x1E8F, 'V'), + (0x1E90, 'M', 'ẑ'), + (0x1E91, 'V'), + (0x1E92, 'M', 'ẓ'), + (0x1E93, 'V'), + (0x1E94, 'M', 'ẕ'), + (0x1E95, 'V'), + (0x1E9A, 'M', 'aʾ'), + (0x1E9B, 'M', 'ṡ'), + (0x1E9C, 'V'), + (0x1E9E, 'M', 'ss'), + (0x1E9F, 'V'), + (0x1EA0, 'M', 'ạ'), + (0x1EA1, 'V'), + (0x1EA2, 'M', 'ả'), + (0x1EA3, 'V'), + (0x1EA4, 'M', 'ấ'), + (0x1EA5, 'V'), + (0x1EA6, 'M', 'ầ'), + (0x1EA7, 'V'), + (0x1EA8, 'M', 'ẩ'), + (0x1EA9, 'V'), + (0x1EAA, 'M', 'ẫ'), + (0x1EAB, 'V'), + (0x1EAC, 'M', 'ậ'), + (0x1EAD, 'V'), + (0x1EAE, 'M', 'ắ'), + (0x1EAF, 'V'), + (0x1EB0, 'M', 'ằ'), + (0x1EB1, 'V'), + (0x1EB2, 'M', 'ẳ'), + (0x1EB3, 'V'), + (0x1EB4, 'M', 'ẵ'), + (0x1EB5, 'V'), + (0x1EB6, 'M', 'ặ'), + (0x1EB7, 'V'), + (0x1EB8, 'M', 'ẹ'), + (0x1EB9, 'V'), + (0x1EBA, 'M', 'ẻ'), + (0x1EBB, 'V'), + (0x1EBC, 'M', 'ẽ'), + (0x1EBD, 'V'), + (0x1EBE, 'M', 'ế'), + (0x1EBF, 'V'), + (0x1EC0, 'M', 'ề'), + (0x1EC1, 'V'), + (0x1EC2, 'M', 'ể'), + (0x1EC3, 'V'), + (0x1EC4, 'M', 'ễ'), + (0x1EC5, 'V'), + (0x1EC6, 'M', 'ệ'), + (0x1EC7, 'V'), + (0x1EC8, 'M', 'ỉ'), + (0x1EC9, 'V'), + (0x1ECA, 'M', 'ị'), + (0x1ECB, 'V'), + (0x1ECC, 'M', 'ọ'), + (0x1ECD, 'V'), + (0x1ECE, 'M', 'ỏ'), + (0x1ECF, 'V'), + (0x1ED0, 'M', 'ố'), + (0x1ED1, 'V'), + (0x1ED2, 'M', 'ồ'), + (0x1ED3, 'V'), + (0x1ED4, 'M', 'ổ'), + (0x1ED5, 'V'), + (0x1ED6, 'M', 'ỗ'), + (0x1ED7, 'V'), + (0x1ED8, 'M', 'ộ'), + (0x1ED9, 'V'), + (0x1EDA, 'M', 'ớ'), + (0x1EDB, 'V'), + (0x1EDC, 'M', 'ờ'), + (0x1EDD, 'V'), + (0x1EDE, 'M', 'ở'), + (0x1EDF, 'V'), + (0x1EE0, 'M', 'ỡ'), + (0x1EE1, 'V'), + (0x1EE2, 'M', 'ợ'), + (0x1EE3, 'V'), + (0x1EE4, 'M', 'ụ'), + (0x1EE5, 'V'), + (0x1EE6, 'M', 'ủ'), + (0x1EE7, 'V'), + (0x1EE8, 'M', 'ứ'), + (0x1EE9, 'V'), + (0x1EEA, 'M', 'ừ'), + (0x1EEB, 'V'), + (0x1EEC, 'M', 'ử'), + (0x1EED, 'V'), + (0x1EEE, 'M', 'ữ'), + (0x1EEF, 'V'), + (0x1EF0, 'M', 'ự'), + ] + +def _seg_19() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1EF1, 'V'), + (0x1EF2, 'M', 'ỳ'), + (0x1EF3, 'V'), + (0x1EF4, 'M', 'ỵ'), + (0x1EF5, 'V'), + (0x1EF6, 'M', 'ỷ'), + (0x1EF7, 'V'), + (0x1EF8, 'M', 'ỹ'), + (0x1EF9, 'V'), + (0x1EFA, 'M', 'ỻ'), + (0x1EFB, 'V'), + (0x1EFC, 'M', 'ỽ'), + (0x1EFD, 'V'), + (0x1EFE, 'M', 'ỿ'), + (0x1EFF, 'V'), + (0x1F08, 'M', 'ἀ'), + (0x1F09, 'M', 'ἁ'), + (0x1F0A, 'M', 'ἂ'), + (0x1F0B, 'M', 'ἃ'), + (0x1F0C, 'M', 'ἄ'), + (0x1F0D, 'M', 'ἅ'), + (0x1F0E, 'M', 'ἆ'), + (0x1F0F, 'M', 'ἇ'), + (0x1F10, 'V'), + (0x1F16, 'X'), + (0x1F18, 'M', 'ἐ'), + (0x1F19, 'M', 'ἑ'), + (0x1F1A, 'M', 'ἒ'), + (0x1F1B, 'M', 'ἓ'), + (0x1F1C, 'M', 'ἔ'), + (0x1F1D, 'M', 'ἕ'), + (0x1F1E, 'X'), + (0x1F20, 'V'), + (0x1F28, 'M', 'ἠ'), + (0x1F29, 'M', 'ἡ'), + (0x1F2A, 'M', 'ἢ'), + (0x1F2B, 'M', 'ἣ'), + (0x1F2C, 'M', 'ἤ'), + (0x1F2D, 'M', 'ἥ'), + (0x1F2E, 'M', 'ἦ'), + (0x1F2F, 'M', 'ἧ'), + (0x1F30, 'V'), + (0x1F38, 'M', 'ἰ'), + (0x1F39, 'M', 'ἱ'), + (0x1F3A, 'M', 'ἲ'), + (0x1F3B, 'M', 'ἳ'), + (0x1F3C, 'M', 'ἴ'), + (0x1F3D, 'M', 'ἵ'), + (0x1F3E, 'M', 'ἶ'), + (0x1F3F, 'M', 'ἷ'), + (0x1F40, 'V'), + (0x1F46, 'X'), + (0x1F48, 'M', 'ὀ'), + (0x1F49, 'M', 'ὁ'), + (0x1F4A, 'M', 'ὂ'), + (0x1F4B, 'M', 'ὃ'), + (0x1F4C, 'M', 'ὄ'), + (0x1F4D, 'M', 'ὅ'), + (0x1F4E, 'X'), + (0x1F50, 'V'), + (0x1F58, 'X'), + (0x1F59, 'M', 'ὑ'), + (0x1F5A, 'X'), + (0x1F5B, 'M', 'ὓ'), + (0x1F5C, 'X'), + (0x1F5D, 'M', 'ὕ'), + (0x1F5E, 'X'), + (0x1F5F, 'M', 'ὗ'), + (0x1F60, 'V'), + (0x1F68, 'M', 'ὠ'), + (0x1F69, 'M', 'ὡ'), + (0x1F6A, 'M', 'ὢ'), + (0x1F6B, 'M', 'ὣ'), + (0x1F6C, 'M', 'ὤ'), + (0x1F6D, 'M', 'ὥ'), + (0x1F6E, 'M', 'ὦ'), + (0x1F6F, 'M', 'ὧ'), + (0x1F70, 'V'), + (0x1F71, 'M', 'ά'), + (0x1F72, 'V'), + (0x1F73, 'M', 'έ'), + (0x1F74, 'V'), + (0x1F75, 'M', 'ή'), + (0x1F76, 'V'), + (0x1F77, 'M', 'ί'), + (0x1F78, 'V'), + (0x1F79, 'M', 'ό'), + (0x1F7A, 'V'), + (0x1F7B, 'M', 'ύ'), + (0x1F7C, 'V'), + (0x1F7D, 'M', 'ώ'), + (0x1F7E, 'X'), + (0x1F80, 'M', 'ἀι'), + (0x1F81, 'M', 'ἁι'), + (0x1F82, 'M', 'ἂι'), + (0x1F83, 'M', 'ἃι'), + (0x1F84, 'M', 'ἄι'), + (0x1F85, 'M', 'ἅι'), + (0x1F86, 'M', 'ἆι'), + (0x1F87, 'M', 'ἇι'), + ] + +def _seg_20() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1F88, 'M', 'ἀι'), + (0x1F89, 'M', 'ἁι'), + (0x1F8A, 'M', 'ἂι'), + (0x1F8B, 'M', 'ἃι'), + (0x1F8C, 'M', 'ἄι'), + (0x1F8D, 'M', 'ἅι'), + (0x1F8E, 'M', 'ἆι'), + (0x1F8F, 'M', 'ἇι'), + (0x1F90, 'M', 'ἠι'), + (0x1F91, 'M', 'ἡι'), + (0x1F92, 'M', 'ἢι'), + (0x1F93, 'M', 'ἣι'), + (0x1F94, 'M', 'ἤι'), + (0x1F95, 'M', 'ἥι'), + (0x1F96, 'M', 'ἦι'), + (0x1F97, 'M', 'ἧι'), + (0x1F98, 'M', 'ἠι'), + (0x1F99, 'M', 'ἡι'), + (0x1F9A, 'M', 'ἢι'), + (0x1F9B, 'M', 'ἣι'), + (0x1F9C, 'M', 'ἤι'), + (0x1F9D, 'M', 'ἥι'), + (0x1F9E, 'M', 'ἦι'), + (0x1F9F, 'M', 'ἧι'), + (0x1FA0, 'M', 'ὠι'), + (0x1FA1, 'M', 'ὡι'), + (0x1FA2, 'M', 'ὢι'), + (0x1FA3, 'M', 'ὣι'), + (0x1FA4, 'M', 'ὤι'), + (0x1FA5, 'M', 'ὥι'), + (0x1FA6, 'M', 'ὦι'), + (0x1FA7, 'M', 'ὧι'), + (0x1FA8, 'M', 'ὠι'), + (0x1FA9, 'M', 'ὡι'), + (0x1FAA, 'M', 'ὢι'), + (0x1FAB, 'M', 'ὣι'), + (0x1FAC, 'M', 'ὤι'), + (0x1FAD, 'M', 'ὥι'), + (0x1FAE, 'M', 'ὦι'), + (0x1FAF, 'M', 'ὧι'), + (0x1FB0, 'V'), + (0x1FB2, 'M', 'ὰι'), + (0x1FB3, 'M', 'αι'), + (0x1FB4, 'M', 'άι'), + (0x1FB5, 'X'), + (0x1FB6, 'V'), + (0x1FB7, 'M', 'ᾶι'), + (0x1FB8, 'M', 'ᾰ'), + (0x1FB9, 'M', 'ᾱ'), + (0x1FBA, 'M', 'ὰ'), + (0x1FBB, 'M', 'ά'), + (0x1FBC, 'M', 'αι'), + (0x1FBD, '3', ' ̓'), + (0x1FBE, 'M', 'ι'), + (0x1FBF, '3', ' ̓'), + (0x1FC0, '3', ' ͂'), + (0x1FC1, '3', ' ̈͂'), + (0x1FC2, 'M', 'ὴι'), + (0x1FC3, 'M', 'ηι'), + (0x1FC4, 'M', 'ήι'), + (0x1FC5, 'X'), + (0x1FC6, 'V'), + (0x1FC7, 'M', 'ῆι'), + (0x1FC8, 'M', 'ὲ'), + (0x1FC9, 'M', 'έ'), + (0x1FCA, 'M', 'ὴ'), + (0x1FCB, 'M', 'ή'), + (0x1FCC, 'M', 'ηι'), + (0x1FCD, '3', ' ̓̀'), + (0x1FCE, '3', ' ̓́'), + (0x1FCF, '3', ' ̓͂'), + (0x1FD0, 'V'), + (0x1FD3, 'M', 'ΐ'), + (0x1FD4, 'X'), + (0x1FD6, 'V'), + (0x1FD8, 'M', 'ῐ'), + (0x1FD9, 'M', 'ῑ'), + (0x1FDA, 'M', 'ὶ'), + (0x1FDB, 'M', 'ί'), + (0x1FDC, 'X'), + (0x1FDD, '3', ' ̔̀'), + (0x1FDE, '3', ' ̔́'), + (0x1FDF, '3', ' ̔͂'), + (0x1FE0, 'V'), + (0x1FE3, 'M', 'ΰ'), + (0x1FE4, 'V'), + (0x1FE8, 'M', 'ῠ'), + (0x1FE9, 'M', 'ῡ'), + (0x1FEA, 'M', 'ὺ'), + (0x1FEB, 'M', 'ύ'), + (0x1FEC, 'M', 'ῥ'), + (0x1FED, '3', ' ̈̀'), + (0x1FEE, '3', ' ̈́'), + (0x1FEF, '3', '`'), + (0x1FF0, 'X'), + (0x1FF2, 'M', 'ὼι'), + (0x1FF3, 'M', 'ωι'), + (0x1FF4, 'M', 'ώι'), + (0x1FF5, 'X'), + (0x1FF6, 'V'), + ] + +def _seg_21() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1FF7, 'M', 'ῶι'), + (0x1FF8, 'M', 'ὸ'), + (0x1FF9, 'M', 'ό'), + (0x1FFA, 'M', 'ὼ'), + (0x1FFB, 'M', 'ώ'), + (0x1FFC, 'M', 'ωι'), + (0x1FFD, '3', ' ́'), + (0x1FFE, '3', ' ̔'), + (0x1FFF, 'X'), + (0x2000, '3', ' '), + (0x200B, 'I'), + (0x200C, 'D', ''), + (0x200E, 'X'), + (0x2010, 'V'), + (0x2011, 'M', '‐'), + (0x2012, 'V'), + (0x2017, '3', ' ̳'), + (0x2018, 'V'), + (0x2024, 'X'), + (0x2027, 'V'), + (0x2028, 'X'), + (0x202F, '3', ' '), + (0x2030, 'V'), + (0x2033, 'M', '′′'), + (0x2034, 'M', '′′′'), + (0x2035, 'V'), + (0x2036, 'M', '‵‵'), + (0x2037, 'M', '‵‵‵'), + (0x2038, 'V'), + (0x203C, '3', '!!'), + (0x203D, 'V'), + (0x203E, '3', ' ̅'), + (0x203F, 'V'), + (0x2047, '3', '??'), + (0x2048, '3', '?!'), + (0x2049, '3', '!?'), + (0x204A, 'V'), + (0x2057, 'M', '′′′′'), + (0x2058, 'V'), + (0x205F, '3', ' '), + (0x2060, 'I'), + (0x2061, 'X'), + (0x2064, 'I'), + (0x2065, 'X'), + (0x2070, 'M', '0'), + (0x2071, 'M', 'i'), + (0x2072, 'X'), + (0x2074, 'M', '4'), + (0x2075, 'M', '5'), + (0x2076, 'M', '6'), + (0x2077, 'M', '7'), + (0x2078, 'M', '8'), + (0x2079, 'M', '9'), + (0x207A, '3', '+'), + (0x207B, 'M', '−'), + (0x207C, '3', '='), + (0x207D, '3', '('), + (0x207E, '3', ')'), + (0x207F, 'M', 'n'), + (0x2080, 'M', '0'), + (0x2081, 'M', '1'), + (0x2082, 'M', '2'), + (0x2083, 'M', '3'), + (0x2084, 'M', '4'), + (0x2085, 'M', '5'), + (0x2086, 'M', '6'), + (0x2087, 'M', '7'), + (0x2088, 'M', '8'), + (0x2089, 'M', '9'), + (0x208A, '3', '+'), + (0x208B, 'M', '−'), + (0x208C, '3', '='), + (0x208D, '3', '('), + (0x208E, '3', ')'), + (0x208F, 'X'), + (0x2090, 'M', 'a'), + (0x2091, 'M', 'e'), + (0x2092, 'M', 'o'), + (0x2093, 'M', 'x'), + (0x2094, 'M', 'ə'), + (0x2095, 'M', 'h'), + (0x2096, 'M', 'k'), + (0x2097, 'M', 'l'), + (0x2098, 'M', 'm'), + (0x2099, 'M', 'n'), + (0x209A, 'M', 'p'), + (0x209B, 'M', 's'), + (0x209C, 'M', 't'), + (0x209D, 'X'), + (0x20A0, 'V'), + (0x20A8, 'M', 'rs'), + (0x20A9, 'V'), + (0x20C1, 'X'), + (0x20D0, 'V'), + (0x20F1, 'X'), + (0x2100, '3', 'a/c'), + (0x2101, '3', 'a/s'), + (0x2102, 'M', 'c'), + (0x2103, 'M', '°c'), + (0x2104, 'V'), + ] + +def _seg_22() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2105, '3', 'c/o'), + (0x2106, '3', 'c/u'), + (0x2107, 'M', 'ɛ'), + (0x2108, 'V'), + (0x2109, 'M', '°f'), + (0x210A, 'M', 'g'), + (0x210B, 'M', 'h'), + (0x210F, 'M', 'ħ'), + (0x2110, 'M', 'i'), + (0x2112, 'M', 'l'), + (0x2114, 'V'), + (0x2115, 'M', 'n'), + (0x2116, 'M', 'no'), + (0x2117, 'V'), + (0x2119, 'M', 'p'), + (0x211A, 'M', 'q'), + (0x211B, 'M', 'r'), + (0x211E, 'V'), + (0x2120, 'M', 'sm'), + (0x2121, 'M', 'tel'), + (0x2122, 'M', 'tm'), + (0x2123, 'V'), + (0x2124, 'M', 'z'), + (0x2125, 'V'), + (0x2126, 'M', 'ω'), + (0x2127, 'V'), + (0x2128, 'M', 'z'), + (0x2129, 'V'), + (0x212A, 'M', 'k'), + (0x212B, 'M', 'å'), + (0x212C, 'M', 'b'), + (0x212D, 'M', 'c'), + (0x212E, 'V'), + (0x212F, 'M', 'e'), + (0x2131, 'M', 'f'), + (0x2132, 'X'), + (0x2133, 'M', 'm'), + (0x2134, 'M', 'o'), + (0x2135, 'M', 'א'), + (0x2136, 'M', 'ב'), + (0x2137, 'M', 'ג'), + (0x2138, 'M', 'ד'), + (0x2139, 'M', 'i'), + (0x213A, 'V'), + (0x213B, 'M', 'fax'), + (0x213C, 'M', 'π'), + (0x213D, 'M', 'γ'), + (0x213F, 'M', 'π'), + (0x2140, 'M', '∑'), + (0x2141, 'V'), + (0x2145, 'M', 'd'), + (0x2147, 'M', 'e'), + (0x2148, 'M', 'i'), + (0x2149, 'M', 'j'), + (0x214A, 'V'), + (0x2150, 'M', '1⁄7'), + (0x2151, 'M', '1⁄9'), + (0x2152, 'M', '1⁄10'), + (0x2153, 'M', '1⁄3'), + (0x2154, 'M', '2⁄3'), + (0x2155, 'M', '1⁄5'), + (0x2156, 'M', '2⁄5'), + (0x2157, 'M', '3⁄5'), + (0x2158, 'M', '4⁄5'), + (0x2159, 'M', '1⁄6'), + (0x215A, 'M', '5⁄6'), + (0x215B, 'M', '1⁄8'), + (0x215C, 'M', '3⁄8'), + (0x215D, 'M', '5⁄8'), + (0x215E, 'M', '7⁄8'), + (0x215F, 'M', '1⁄'), + (0x2160, 'M', 'i'), + (0x2161, 'M', 'ii'), + (0x2162, 'M', 'iii'), + (0x2163, 'M', 'iv'), + (0x2164, 'M', 'v'), + (0x2165, 'M', 'vi'), + (0x2166, 'M', 'vii'), + (0x2167, 'M', 'viii'), + (0x2168, 'M', 'ix'), + (0x2169, 'M', 'x'), + (0x216A, 'M', 'xi'), + (0x216B, 'M', 'xii'), + (0x216C, 'M', 'l'), + (0x216D, 'M', 'c'), + (0x216E, 'M', 'd'), + (0x216F, 'M', 'm'), + (0x2170, 'M', 'i'), + (0x2171, 'M', 'ii'), + (0x2172, 'M', 'iii'), + (0x2173, 'M', 'iv'), + (0x2174, 'M', 'v'), + (0x2175, 'M', 'vi'), + (0x2176, 'M', 'vii'), + (0x2177, 'M', 'viii'), + (0x2178, 'M', 'ix'), + (0x2179, 'M', 'x'), + (0x217A, 'M', 'xi'), + (0x217B, 'M', 'xii'), + (0x217C, 'M', 'l'), + ] + +def _seg_23() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x217D, 'M', 'c'), + (0x217E, 'M', 'd'), + (0x217F, 'M', 'm'), + (0x2180, 'V'), + (0x2183, 'X'), + (0x2184, 'V'), + (0x2189, 'M', '0⁄3'), + (0x218A, 'V'), + (0x218C, 'X'), + (0x2190, 'V'), + (0x222C, 'M', '∫∫'), + (0x222D, 'M', '∫∫∫'), + (0x222E, 'V'), + (0x222F, 'M', '∮∮'), + (0x2230, 'M', '∮∮∮'), + (0x2231, 'V'), + (0x2260, '3'), + (0x2261, 'V'), + (0x226E, '3'), + (0x2270, 'V'), + (0x2329, 'M', '〈'), + (0x232A, 'M', '〉'), + (0x232B, 'V'), + (0x2427, 'X'), + (0x2440, 'V'), + (0x244B, 'X'), + (0x2460, 'M', '1'), + (0x2461, 'M', '2'), + (0x2462, 'M', '3'), + (0x2463, 'M', '4'), + (0x2464, 'M', '5'), + (0x2465, 'M', '6'), + (0x2466, 'M', '7'), + (0x2467, 'M', '8'), + (0x2468, 'M', '9'), + (0x2469, 'M', '10'), + (0x246A, 'M', '11'), + (0x246B, 'M', '12'), + (0x246C, 'M', '13'), + (0x246D, 'M', '14'), + (0x246E, 'M', '15'), + (0x246F, 'M', '16'), + (0x2470, 'M', '17'), + (0x2471, 'M', '18'), + (0x2472, 'M', '19'), + (0x2473, 'M', '20'), + (0x2474, '3', '(1)'), + (0x2475, '3', '(2)'), + (0x2476, '3', '(3)'), + (0x2477, '3', '(4)'), + (0x2478, '3', '(5)'), + (0x2479, '3', '(6)'), + (0x247A, '3', '(7)'), + (0x247B, '3', '(8)'), + (0x247C, '3', '(9)'), + (0x247D, '3', '(10)'), + (0x247E, '3', '(11)'), + (0x247F, '3', '(12)'), + (0x2480, '3', '(13)'), + (0x2481, '3', '(14)'), + (0x2482, '3', '(15)'), + (0x2483, '3', '(16)'), + (0x2484, '3', '(17)'), + (0x2485, '3', '(18)'), + (0x2486, '3', '(19)'), + (0x2487, '3', '(20)'), + (0x2488, 'X'), + (0x249C, '3', '(a)'), + (0x249D, '3', '(b)'), + (0x249E, '3', '(c)'), + (0x249F, '3', '(d)'), + (0x24A0, '3', '(e)'), + (0x24A1, '3', '(f)'), + (0x24A2, '3', '(g)'), + (0x24A3, '3', '(h)'), + (0x24A4, '3', '(i)'), + (0x24A5, '3', '(j)'), + (0x24A6, '3', '(k)'), + (0x24A7, '3', '(l)'), + (0x24A8, '3', '(m)'), + (0x24A9, '3', '(n)'), + (0x24AA, '3', '(o)'), + (0x24AB, '3', '(p)'), + (0x24AC, '3', '(q)'), + (0x24AD, '3', '(r)'), + (0x24AE, '3', '(s)'), + (0x24AF, '3', '(t)'), + (0x24B0, '3', '(u)'), + (0x24B1, '3', '(v)'), + (0x24B2, '3', '(w)'), + (0x24B3, '3', '(x)'), + (0x24B4, '3', '(y)'), + (0x24B5, '3', '(z)'), + (0x24B6, 'M', 'a'), + (0x24B7, 'M', 'b'), + (0x24B8, 'M', 'c'), + (0x24B9, 'M', 'd'), + (0x24BA, 'M', 'e'), + (0x24BB, 'M', 'f'), + (0x24BC, 'M', 'g'), + ] + +def _seg_24() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x24BD, 'M', 'h'), + (0x24BE, 'M', 'i'), + (0x24BF, 'M', 'j'), + (0x24C0, 'M', 'k'), + (0x24C1, 'M', 'l'), + (0x24C2, 'M', 'm'), + (0x24C3, 'M', 'n'), + (0x24C4, 'M', 'o'), + (0x24C5, 'M', 'p'), + (0x24C6, 'M', 'q'), + (0x24C7, 'M', 'r'), + (0x24C8, 'M', 's'), + (0x24C9, 'M', 't'), + (0x24CA, 'M', 'u'), + (0x24CB, 'M', 'v'), + (0x24CC, 'M', 'w'), + (0x24CD, 'M', 'x'), + (0x24CE, 'M', 'y'), + (0x24CF, 'M', 'z'), + (0x24D0, 'M', 'a'), + (0x24D1, 'M', 'b'), + (0x24D2, 'M', 'c'), + (0x24D3, 'M', 'd'), + (0x24D4, 'M', 'e'), + (0x24D5, 'M', 'f'), + (0x24D6, 'M', 'g'), + (0x24D7, 'M', 'h'), + (0x24D8, 'M', 'i'), + (0x24D9, 'M', 'j'), + (0x24DA, 'M', 'k'), + (0x24DB, 'M', 'l'), + (0x24DC, 'M', 'm'), + (0x24DD, 'M', 'n'), + (0x24DE, 'M', 'o'), + (0x24DF, 'M', 'p'), + (0x24E0, 'M', 'q'), + (0x24E1, 'M', 'r'), + (0x24E2, 'M', 's'), + (0x24E3, 'M', 't'), + (0x24E4, 'M', 'u'), + (0x24E5, 'M', 'v'), + (0x24E6, 'M', 'w'), + (0x24E7, 'M', 'x'), + (0x24E8, 'M', 'y'), + (0x24E9, 'M', 'z'), + (0x24EA, 'M', '0'), + (0x24EB, 'V'), + (0x2A0C, 'M', '∫∫∫∫'), + (0x2A0D, 'V'), + (0x2A74, '3', '::='), + (0x2A75, '3', '=='), + (0x2A76, '3', '==='), + (0x2A77, 'V'), + (0x2ADC, 'M', '⫝̸'), + (0x2ADD, 'V'), + (0x2B74, 'X'), + (0x2B76, 'V'), + (0x2B96, 'X'), + (0x2B97, 'V'), + (0x2C00, 'M', 'ⰰ'), + (0x2C01, 'M', 'ⰱ'), + (0x2C02, 'M', 'ⰲ'), + (0x2C03, 'M', 'ⰳ'), + (0x2C04, 'M', 'ⰴ'), + (0x2C05, 'M', 'ⰵ'), + (0x2C06, 'M', 'ⰶ'), + (0x2C07, 'M', 'ⰷ'), + (0x2C08, 'M', 'ⰸ'), + (0x2C09, 'M', 'ⰹ'), + (0x2C0A, 'M', 'ⰺ'), + (0x2C0B, 'M', 'ⰻ'), + (0x2C0C, 'M', 'ⰼ'), + (0x2C0D, 'M', 'ⰽ'), + (0x2C0E, 'M', 'ⰾ'), + (0x2C0F, 'M', 'ⰿ'), + (0x2C10, 'M', 'ⱀ'), + (0x2C11, 'M', 'ⱁ'), + (0x2C12, 'M', 'ⱂ'), + (0x2C13, 'M', 'ⱃ'), + (0x2C14, 'M', 'ⱄ'), + (0x2C15, 'M', 'ⱅ'), + (0x2C16, 'M', 'ⱆ'), + (0x2C17, 'M', 'ⱇ'), + (0x2C18, 'M', 'ⱈ'), + (0x2C19, 'M', 'ⱉ'), + (0x2C1A, 'M', 'ⱊ'), + (0x2C1B, 'M', 'ⱋ'), + (0x2C1C, 'M', 'ⱌ'), + (0x2C1D, 'M', 'ⱍ'), + (0x2C1E, 'M', 'ⱎ'), + (0x2C1F, 'M', 'ⱏ'), + (0x2C20, 'M', 'ⱐ'), + (0x2C21, 'M', 'ⱑ'), + (0x2C22, 'M', 'ⱒ'), + (0x2C23, 'M', 'ⱓ'), + (0x2C24, 'M', 'ⱔ'), + (0x2C25, 'M', 'ⱕ'), + (0x2C26, 'M', 'ⱖ'), + (0x2C27, 'M', 'ⱗ'), + (0x2C28, 'M', 'ⱘ'), + ] + +def _seg_25() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2C29, 'M', 'ⱙ'), + (0x2C2A, 'M', 'ⱚ'), + (0x2C2B, 'M', 'ⱛ'), + (0x2C2C, 'M', 'ⱜ'), + (0x2C2D, 'M', 'ⱝ'), + (0x2C2E, 'M', 'ⱞ'), + (0x2C2F, 'M', 'ⱟ'), + (0x2C30, 'V'), + (0x2C60, 'M', 'ⱡ'), + (0x2C61, 'V'), + (0x2C62, 'M', 'ɫ'), + (0x2C63, 'M', 'ᵽ'), + (0x2C64, 'M', 'ɽ'), + (0x2C65, 'V'), + (0x2C67, 'M', 'ⱨ'), + (0x2C68, 'V'), + (0x2C69, 'M', 'ⱪ'), + (0x2C6A, 'V'), + (0x2C6B, 'M', 'ⱬ'), + (0x2C6C, 'V'), + (0x2C6D, 'M', 'ɑ'), + (0x2C6E, 'M', 'ɱ'), + (0x2C6F, 'M', 'ɐ'), + (0x2C70, 'M', 'ɒ'), + (0x2C71, 'V'), + (0x2C72, 'M', 'ⱳ'), + (0x2C73, 'V'), + (0x2C75, 'M', 'ⱶ'), + (0x2C76, 'V'), + (0x2C7C, 'M', 'j'), + (0x2C7D, 'M', 'v'), + (0x2C7E, 'M', 'ȿ'), + (0x2C7F, 'M', 'ɀ'), + (0x2C80, 'M', 'ⲁ'), + (0x2C81, 'V'), + (0x2C82, 'M', 'ⲃ'), + (0x2C83, 'V'), + (0x2C84, 'M', 'ⲅ'), + (0x2C85, 'V'), + (0x2C86, 'M', 'ⲇ'), + (0x2C87, 'V'), + (0x2C88, 'M', 'ⲉ'), + (0x2C89, 'V'), + (0x2C8A, 'M', 'ⲋ'), + (0x2C8B, 'V'), + (0x2C8C, 'M', 'ⲍ'), + (0x2C8D, 'V'), + (0x2C8E, 'M', 'ⲏ'), + (0x2C8F, 'V'), + (0x2C90, 'M', 'ⲑ'), + (0x2C91, 'V'), + (0x2C92, 'M', 'ⲓ'), + (0x2C93, 'V'), + (0x2C94, 'M', 'ⲕ'), + (0x2C95, 'V'), + (0x2C96, 'M', 'ⲗ'), + (0x2C97, 'V'), + (0x2C98, 'M', 'ⲙ'), + (0x2C99, 'V'), + (0x2C9A, 'M', 'ⲛ'), + (0x2C9B, 'V'), + (0x2C9C, 'M', 'ⲝ'), + (0x2C9D, 'V'), + (0x2C9E, 'M', 'ⲟ'), + (0x2C9F, 'V'), + (0x2CA0, 'M', 'ⲡ'), + (0x2CA1, 'V'), + (0x2CA2, 'M', 'ⲣ'), + (0x2CA3, 'V'), + (0x2CA4, 'M', 'ⲥ'), + (0x2CA5, 'V'), + (0x2CA6, 'M', 'ⲧ'), + (0x2CA7, 'V'), + (0x2CA8, 'M', 'ⲩ'), + (0x2CA9, 'V'), + (0x2CAA, 'M', 'ⲫ'), + (0x2CAB, 'V'), + (0x2CAC, 'M', 'ⲭ'), + (0x2CAD, 'V'), + (0x2CAE, 'M', 'ⲯ'), + (0x2CAF, 'V'), + (0x2CB0, 'M', 'ⲱ'), + (0x2CB1, 'V'), + (0x2CB2, 'M', 'ⲳ'), + (0x2CB3, 'V'), + (0x2CB4, 'M', 'ⲵ'), + (0x2CB5, 'V'), + (0x2CB6, 'M', 'ⲷ'), + (0x2CB7, 'V'), + (0x2CB8, 'M', 'ⲹ'), + (0x2CB9, 'V'), + (0x2CBA, 'M', 'ⲻ'), + (0x2CBB, 'V'), + (0x2CBC, 'M', 'ⲽ'), + (0x2CBD, 'V'), + (0x2CBE, 'M', 'ⲿ'), + (0x2CBF, 'V'), + (0x2CC0, 'M', 'ⳁ'), + (0x2CC1, 'V'), + (0x2CC2, 'M', 'ⳃ'), + ] + +def _seg_26() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2CC3, 'V'), + (0x2CC4, 'M', 'ⳅ'), + (0x2CC5, 'V'), + (0x2CC6, 'M', 'ⳇ'), + (0x2CC7, 'V'), + (0x2CC8, 'M', 'ⳉ'), + (0x2CC9, 'V'), + (0x2CCA, 'M', 'ⳋ'), + (0x2CCB, 'V'), + (0x2CCC, 'M', 'ⳍ'), + (0x2CCD, 'V'), + (0x2CCE, 'M', 'ⳏ'), + (0x2CCF, 'V'), + (0x2CD0, 'M', 'ⳑ'), + (0x2CD1, 'V'), + (0x2CD2, 'M', 'ⳓ'), + (0x2CD3, 'V'), + (0x2CD4, 'M', 'ⳕ'), + (0x2CD5, 'V'), + (0x2CD6, 'M', 'ⳗ'), + (0x2CD7, 'V'), + (0x2CD8, 'M', 'ⳙ'), + (0x2CD9, 'V'), + (0x2CDA, 'M', 'ⳛ'), + (0x2CDB, 'V'), + (0x2CDC, 'M', 'ⳝ'), + (0x2CDD, 'V'), + (0x2CDE, 'M', 'ⳟ'), + (0x2CDF, 'V'), + (0x2CE0, 'M', 'ⳡ'), + (0x2CE1, 'V'), + (0x2CE2, 'M', 'ⳣ'), + (0x2CE3, 'V'), + (0x2CEB, 'M', 'ⳬ'), + (0x2CEC, 'V'), + (0x2CED, 'M', 'ⳮ'), + (0x2CEE, 'V'), + (0x2CF2, 'M', 'ⳳ'), + (0x2CF3, 'V'), + (0x2CF4, 'X'), + (0x2CF9, 'V'), + (0x2D26, 'X'), + (0x2D27, 'V'), + (0x2D28, 'X'), + (0x2D2D, 'V'), + (0x2D2E, 'X'), + (0x2D30, 'V'), + (0x2D68, 'X'), + (0x2D6F, 'M', 'ⵡ'), + (0x2D70, 'V'), + (0x2D71, 'X'), + (0x2D7F, 'V'), + (0x2D97, 'X'), + (0x2DA0, 'V'), + (0x2DA7, 'X'), + (0x2DA8, 'V'), + (0x2DAF, 'X'), + (0x2DB0, 'V'), + (0x2DB7, 'X'), + (0x2DB8, 'V'), + (0x2DBF, 'X'), + (0x2DC0, 'V'), + (0x2DC7, 'X'), + (0x2DC8, 'V'), + (0x2DCF, 'X'), + (0x2DD0, 'V'), + (0x2DD7, 'X'), + (0x2DD8, 'V'), + (0x2DDF, 'X'), + (0x2DE0, 'V'), + (0x2E5E, 'X'), + (0x2E80, 'V'), + (0x2E9A, 'X'), + (0x2E9B, 'V'), + (0x2E9F, 'M', '母'), + (0x2EA0, 'V'), + (0x2EF3, 'M', '龟'), + (0x2EF4, 'X'), + (0x2F00, 'M', '一'), + (0x2F01, 'M', '丨'), + (0x2F02, 'M', '丶'), + (0x2F03, 'M', '丿'), + (0x2F04, 'M', '乙'), + (0x2F05, 'M', '亅'), + (0x2F06, 'M', '二'), + (0x2F07, 'M', '亠'), + (0x2F08, 'M', '人'), + (0x2F09, 'M', '儿'), + (0x2F0A, 'M', '入'), + (0x2F0B, 'M', '八'), + (0x2F0C, 'M', '冂'), + (0x2F0D, 'M', '冖'), + (0x2F0E, 'M', '冫'), + (0x2F0F, 'M', '几'), + (0x2F10, 'M', '凵'), + (0x2F11, 'M', '刀'), + (0x2F12, 'M', '力'), + (0x2F13, 'M', '勹'), + (0x2F14, 'M', '匕'), + (0x2F15, 'M', '匚'), + ] + +def _seg_27() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F16, 'M', '匸'), + (0x2F17, 'M', '十'), + (0x2F18, 'M', '卜'), + (0x2F19, 'M', '卩'), + (0x2F1A, 'M', '厂'), + (0x2F1B, 'M', '厶'), + (0x2F1C, 'M', '又'), + (0x2F1D, 'M', '口'), + (0x2F1E, 'M', '囗'), + (0x2F1F, 'M', '土'), + (0x2F20, 'M', '士'), + (0x2F21, 'M', '夂'), + (0x2F22, 'M', '夊'), + (0x2F23, 'M', '夕'), + (0x2F24, 'M', '大'), + (0x2F25, 'M', '女'), + (0x2F26, 'M', '子'), + (0x2F27, 'M', '宀'), + (0x2F28, 'M', '寸'), + (0x2F29, 'M', '小'), + (0x2F2A, 'M', '尢'), + (0x2F2B, 'M', '尸'), + (0x2F2C, 'M', '屮'), + (0x2F2D, 'M', '山'), + (0x2F2E, 'M', '巛'), + (0x2F2F, 'M', '工'), + (0x2F30, 'M', '己'), + (0x2F31, 'M', '巾'), + (0x2F32, 'M', '干'), + (0x2F33, 'M', '幺'), + (0x2F34, 'M', '广'), + (0x2F35, 'M', '廴'), + (0x2F36, 'M', '廾'), + (0x2F37, 'M', '弋'), + (0x2F38, 'M', '弓'), + (0x2F39, 'M', '彐'), + (0x2F3A, 'M', '彡'), + (0x2F3B, 'M', '彳'), + (0x2F3C, 'M', '心'), + (0x2F3D, 'M', '戈'), + (0x2F3E, 'M', '戶'), + (0x2F3F, 'M', '手'), + (0x2F40, 'M', '支'), + (0x2F41, 'M', '攴'), + (0x2F42, 'M', '文'), + (0x2F43, 'M', '斗'), + (0x2F44, 'M', '斤'), + (0x2F45, 'M', '方'), + (0x2F46, 'M', '无'), + (0x2F47, 'M', '日'), + (0x2F48, 'M', '曰'), + (0x2F49, 'M', '月'), + (0x2F4A, 'M', '木'), + (0x2F4B, 'M', '欠'), + (0x2F4C, 'M', '止'), + (0x2F4D, 'M', '歹'), + (0x2F4E, 'M', '殳'), + (0x2F4F, 'M', '毋'), + (0x2F50, 'M', '比'), + (0x2F51, 'M', '毛'), + (0x2F52, 'M', '氏'), + (0x2F53, 'M', '气'), + (0x2F54, 'M', '水'), + (0x2F55, 'M', '火'), + (0x2F56, 'M', '爪'), + (0x2F57, 'M', '父'), + (0x2F58, 'M', '爻'), + (0x2F59, 'M', '爿'), + (0x2F5A, 'M', '片'), + (0x2F5B, 'M', '牙'), + (0x2F5C, 'M', '牛'), + (0x2F5D, 'M', '犬'), + (0x2F5E, 'M', '玄'), + (0x2F5F, 'M', '玉'), + (0x2F60, 'M', '瓜'), + (0x2F61, 'M', '瓦'), + (0x2F62, 'M', '甘'), + (0x2F63, 'M', '生'), + (0x2F64, 'M', '用'), + (0x2F65, 'M', '田'), + (0x2F66, 'M', '疋'), + (0x2F67, 'M', '疒'), + (0x2F68, 'M', '癶'), + (0x2F69, 'M', '白'), + (0x2F6A, 'M', '皮'), + (0x2F6B, 'M', '皿'), + (0x2F6C, 'M', '目'), + (0x2F6D, 'M', '矛'), + (0x2F6E, 'M', '矢'), + (0x2F6F, 'M', '石'), + (0x2F70, 'M', '示'), + (0x2F71, 'M', '禸'), + (0x2F72, 'M', '禾'), + (0x2F73, 'M', '穴'), + (0x2F74, 'M', '立'), + (0x2F75, 'M', '竹'), + (0x2F76, 'M', '米'), + (0x2F77, 'M', '糸'), + (0x2F78, 'M', '缶'), + (0x2F79, 'M', '网'), + ] + +def _seg_28() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F7A, 'M', '羊'), + (0x2F7B, 'M', '羽'), + (0x2F7C, 'M', '老'), + (0x2F7D, 'M', '而'), + (0x2F7E, 'M', '耒'), + (0x2F7F, 'M', '耳'), + (0x2F80, 'M', '聿'), + (0x2F81, 'M', '肉'), + (0x2F82, 'M', '臣'), + (0x2F83, 'M', '自'), + (0x2F84, 'M', '至'), + (0x2F85, 'M', '臼'), + (0x2F86, 'M', '舌'), + (0x2F87, 'M', '舛'), + (0x2F88, 'M', '舟'), + (0x2F89, 'M', '艮'), + (0x2F8A, 'M', '色'), + (0x2F8B, 'M', '艸'), + (0x2F8C, 'M', '虍'), + (0x2F8D, 'M', '虫'), + (0x2F8E, 'M', '血'), + (0x2F8F, 'M', '行'), + (0x2F90, 'M', '衣'), + (0x2F91, 'M', '襾'), + (0x2F92, 'M', '見'), + (0x2F93, 'M', '角'), + (0x2F94, 'M', '言'), + (0x2F95, 'M', '谷'), + (0x2F96, 'M', '豆'), + (0x2F97, 'M', '豕'), + (0x2F98, 'M', '豸'), + (0x2F99, 'M', '貝'), + (0x2F9A, 'M', '赤'), + (0x2F9B, 'M', '走'), + (0x2F9C, 'M', '足'), + (0x2F9D, 'M', '身'), + (0x2F9E, 'M', '車'), + (0x2F9F, 'M', '辛'), + (0x2FA0, 'M', '辰'), + (0x2FA1, 'M', '辵'), + (0x2FA2, 'M', '邑'), + (0x2FA3, 'M', '酉'), + (0x2FA4, 'M', '釆'), + (0x2FA5, 'M', '里'), + (0x2FA6, 'M', '金'), + (0x2FA7, 'M', '長'), + (0x2FA8, 'M', '門'), + (0x2FA9, 'M', '阜'), + (0x2FAA, 'M', '隶'), + (0x2FAB, 'M', '隹'), + (0x2FAC, 'M', '雨'), + (0x2FAD, 'M', '靑'), + (0x2FAE, 'M', '非'), + (0x2FAF, 'M', '面'), + (0x2FB0, 'M', '革'), + (0x2FB1, 'M', '韋'), + (0x2FB2, 'M', '韭'), + (0x2FB3, 'M', '音'), + (0x2FB4, 'M', '頁'), + (0x2FB5, 'M', '風'), + (0x2FB6, 'M', '飛'), + (0x2FB7, 'M', '食'), + (0x2FB8, 'M', '首'), + (0x2FB9, 'M', '香'), + (0x2FBA, 'M', '馬'), + (0x2FBB, 'M', '骨'), + (0x2FBC, 'M', '高'), + (0x2FBD, 'M', '髟'), + (0x2FBE, 'M', '鬥'), + (0x2FBF, 'M', '鬯'), + (0x2FC0, 'M', '鬲'), + (0x2FC1, 'M', '鬼'), + (0x2FC2, 'M', '魚'), + (0x2FC3, 'M', '鳥'), + (0x2FC4, 'M', '鹵'), + (0x2FC5, 'M', '鹿'), + (0x2FC6, 'M', '麥'), + (0x2FC7, 'M', '麻'), + (0x2FC8, 'M', '黃'), + (0x2FC9, 'M', '黍'), + (0x2FCA, 'M', '黑'), + (0x2FCB, 'M', '黹'), + (0x2FCC, 'M', '黽'), + (0x2FCD, 'M', '鼎'), + (0x2FCE, 'M', '鼓'), + (0x2FCF, 'M', '鼠'), + (0x2FD0, 'M', '鼻'), + (0x2FD1, 'M', '齊'), + (0x2FD2, 'M', '齒'), + (0x2FD3, 'M', '龍'), + (0x2FD4, 'M', '龜'), + (0x2FD5, 'M', '龠'), + (0x2FD6, 'X'), + (0x3000, '3', ' '), + (0x3001, 'V'), + (0x3002, 'M', '.'), + (0x3003, 'V'), + (0x3036, 'M', '〒'), + (0x3037, 'V'), + (0x3038, 'M', '十'), + ] + +def _seg_29() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x3039, 'M', '卄'), + (0x303A, 'M', '卅'), + (0x303B, 'V'), + (0x3040, 'X'), + (0x3041, 'V'), + (0x3097, 'X'), + (0x3099, 'V'), + (0x309B, '3', ' ゙'), + (0x309C, '3', ' ゚'), + (0x309D, 'V'), + (0x309F, 'M', 'より'), + (0x30A0, 'V'), + (0x30FF, 'M', 'コト'), + (0x3100, 'X'), + (0x3105, 'V'), + (0x3130, 'X'), + (0x3131, 'M', 'ᄀ'), + (0x3132, 'M', 'ᄁ'), + (0x3133, 'M', 'ᆪ'), + (0x3134, 'M', 'ᄂ'), + (0x3135, 'M', 'ᆬ'), + (0x3136, 'M', 'ᆭ'), + (0x3137, 'M', 'ᄃ'), + (0x3138, 'M', 'ᄄ'), + (0x3139, 'M', 'ᄅ'), + (0x313A, 'M', 'ᆰ'), + (0x313B, 'M', 'ᆱ'), + (0x313C, 'M', 'ᆲ'), + (0x313D, 'M', 'ᆳ'), + (0x313E, 'M', 'ᆴ'), + (0x313F, 'M', 'ᆵ'), + (0x3140, 'M', 'ᄚ'), + (0x3141, 'M', 'ᄆ'), + (0x3142, 'M', 'ᄇ'), + (0x3143, 'M', 'ᄈ'), + (0x3144, 'M', 'ᄡ'), + (0x3145, 'M', 'ᄉ'), + (0x3146, 'M', 'ᄊ'), + (0x3147, 'M', 'ᄋ'), + (0x3148, 'M', 'ᄌ'), + (0x3149, 'M', 'ᄍ'), + (0x314A, 'M', 'ᄎ'), + (0x314B, 'M', 'ᄏ'), + (0x314C, 'M', 'ᄐ'), + (0x314D, 'M', 'ᄑ'), + (0x314E, 'M', 'ᄒ'), + (0x314F, 'M', 'ᅡ'), + (0x3150, 'M', 'ᅢ'), + (0x3151, 'M', 'ᅣ'), + (0x3152, 'M', 'ᅤ'), + (0x3153, 'M', 'ᅥ'), + (0x3154, 'M', 'ᅦ'), + (0x3155, 'M', 'ᅧ'), + (0x3156, 'M', 'ᅨ'), + (0x3157, 'M', 'ᅩ'), + (0x3158, 'M', 'ᅪ'), + (0x3159, 'M', 'ᅫ'), + (0x315A, 'M', 'ᅬ'), + (0x315B, 'M', 'ᅭ'), + (0x315C, 'M', 'ᅮ'), + (0x315D, 'M', 'ᅯ'), + (0x315E, 'M', 'ᅰ'), + (0x315F, 'M', 'ᅱ'), + (0x3160, 'M', 'ᅲ'), + (0x3161, 'M', 'ᅳ'), + (0x3162, 'M', 'ᅴ'), + (0x3163, 'M', 'ᅵ'), + (0x3164, 'X'), + (0x3165, 'M', 'ᄔ'), + (0x3166, 'M', 'ᄕ'), + (0x3167, 'M', 'ᇇ'), + (0x3168, 'M', 'ᇈ'), + (0x3169, 'M', 'ᇌ'), + (0x316A, 'M', 'ᇎ'), + (0x316B, 'M', 'ᇓ'), + (0x316C, 'M', 'ᇗ'), + (0x316D, 'M', 'ᇙ'), + (0x316E, 'M', 'ᄜ'), + (0x316F, 'M', 'ᇝ'), + (0x3170, 'M', 'ᇟ'), + (0x3171, 'M', 'ᄝ'), + (0x3172, 'M', 'ᄞ'), + (0x3173, 'M', 'ᄠ'), + (0x3174, 'M', 'ᄢ'), + (0x3175, 'M', 'ᄣ'), + (0x3176, 'M', 'ᄧ'), + (0x3177, 'M', 'ᄩ'), + (0x3178, 'M', 'ᄫ'), + (0x3179, 'M', 'ᄬ'), + (0x317A, 'M', 'ᄭ'), + (0x317B, 'M', 'ᄮ'), + (0x317C, 'M', 'ᄯ'), + (0x317D, 'M', 'ᄲ'), + (0x317E, 'M', 'ᄶ'), + (0x317F, 'M', 'ᅀ'), + (0x3180, 'M', 'ᅇ'), + (0x3181, 'M', 'ᅌ'), + (0x3182, 'M', 'ᇱ'), + (0x3183, 'M', 'ᇲ'), + (0x3184, 'M', 'ᅗ'), + ] + +def _seg_30() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x3185, 'M', 'ᅘ'), + (0x3186, 'M', 'ᅙ'), + (0x3187, 'M', 'ᆄ'), + (0x3188, 'M', 'ᆅ'), + (0x3189, 'M', 'ᆈ'), + (0x318A, 'M', 'ᆑ'), + (0x318B, 'M', 'ᆒ'), + (0x318C, 'M', 'ᆔ'), + (0x318D, 'M', 'ᆞ'), + (0x318E, 'M', 'ᆡ'), + (0x318F, 'X'), + (0x3190, 'V'), + (0x3192, 'M', '一'), + (0x3193, 'M', '二'), + (0x3194, 'M', '三'), + (0x3195, 'M', '四'), + (0x3196, 'M', '上'), + (0x3197, 'M', '中'), + (0x3198, 'M', '下'), + (0x3199, 'M', '甲'), + (0x319A, 'M', '乙'), + (0x319B, 'M', '丙'), + (0x319C, 'M', '丁'), + (0x319D, 'M', '天'), + (0x319E, 'M', '地'), + (0x319F, 'M', '人'), + (0x31A0, 'V'), + (0x31E4, 'X'), + (0x31F0, 'V'), + (0x3200, '3', '(ᄀ)'), + (0x3201, '3', '(ᄂ)'), + (0x3202, '3', '(ᄃ)'), + (0x3203, '3', '(ᄅ)'), + (0x3204, '3', '(ᄆ)'), + (0x3205, '3', '(ᄇ)'), + (0x3206, '3', '(ᄉ)'), + (0x3207, '3', '(ᄋ)'), + (0x3208, '3', '(ᄌ)'), + (0x3209, '3', '(ᄎ)'), + (0x320A, '3', '(ᄏ)'), + (0x320B, '3', '(ᄐ)'), + (0x320C, '3', '(ᄑ)'), + (0x320D, '3', '(ᄒ)'), + (0x320E, '3', '(가)'), + (0x320F, '3', '(나)'), + (0x3210, '3', '(다)'), + (0x3211, '3', '(라)'), + (0x3212, '3', '(마)'), + (0x3213, '3', '(바)'), + (0x3214, '3', '(사)'), + (0x3215, '3', '(아)'), + (0x3216, '3', '(자)'), + (0x3217, '3', '(차)'), + (0x3218, '3', '(카)'), + (0x3219, '3', '(타)'), + (0x321A, '3', '(파)'), + (0x321B, '3', '(하)'), + (0x321C, '3', '(주)'), + (0x321D, '3', '(오전)'), + (0x321E, '3', '(오후)'), + (0x321F, 'X'), + (0x3220, '3', '(一)'), + (0x3221, '3', '(二)'), + (0x3222, '3', '(三)'), + (0x3223, '3', '(四)'), + (0x3224, '3', '(五)'), + (0x3225, '3', '(六)'), + (0x3226, '3', '(七)'), + (0x3227, '3', '(八)'), + (0x3228, '3', '(九)'), + (0x3229, '3', '(十)'), + (0x322A, '3', '(月)'), + (0x322B, '3', '(火)'), + (0x322C, '3', '(水)'), + (0x322D, '3', '(木)'), + (0x322E, '3', '(金)'), + (0x322F, '3', '(土)'), + (0x3230, '3', '(日)'), + (0x3231, '3', '(株)'), + (0x3232, '3', '(有)'), + (0x3233, '3', '(社)'), + (0x3234, '3', '(名)'), + (0x3235, '3', '(特)'), + (0x3236, '3', '(財)'), + (0x3237, '3', '(祝)'), + (0x3238, '3', '(労)'), + (0x3239, '3', '(代)'), + (0x323A, '3', '(呼)'), + (0x323B, '3', '(学)'), + (0x323C, '3', '(監)'), + (0x323D, '3', '(企)'), + (0x323E, '3', '(資)'), + (0x323F, '3', '(協)'), + (0x3240, '3', '(祭)'), + (0x3241, '3', '(休)'), + (0x3242, '3', '(自)'), + (0x3243, '3', '(至)'), + (0x3244, 'M', '問'), + (0x3245, 'M', '幼'), + (0x3246, 'M', '文'), + ] + +def _seg_31() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x3247, 'M', '箏'), + (0x3248, 'V'), + (0x3250, 'M', 'pte'), + (0x3251, 'M', '21'), + (0x3252, 'M', '22'), + (0x3253, 'M', '23'), + (0x3254, 'M', '24'), + (0x3255, 'M', '25'), + (0x3256, 'M', '26'), + (0x3257, 'M', '27'), + (0x3258, 'M', '28'), + (0x3259, 'M', '29'), + (0x325A, 'M', '30'), + (0x325B, 'M', '31'), + (0x325C, 'M', '32'), + (0x325D, 'M', '33'), + (0x325E, 'M', '34'), + (0x325F, 'M', '35'), + (0x3260, 'M', 'ᄀ'), + (0x3261, 'M', 'ᄂ'), + (0x3262, 'M', 'ᄃ'), + (0x3263, 'M', 'ᄅ'), + (0x3264, 'M', 'ᄆ'), + (0x3265, 'M', 'ᄇ'), + (0x3266, 'M', 'ᄉ'), + (0x3267, 'M', 'ᄋ'), + (0x3268, 'M', 'ᄌ'), + (0x3269, 'M', 'ᄎ'), + (0x326A, 'M', 'ᄏ'), + (0x326B, 'M', 'ᄐ'), + (0x326C, 'M', 'ᄑ'), + (0x326D, 'M', 'ᄒ'), + (0x326E, 'M', '가'), + (0x326F, 'M', '나'), + (0x3270, 'M', '다'), + (0x3271, 'M', '라'), + (0x3272, 'M', '마'), + (0x3273, 'M', '바'), + (0x3274, 'M', '사'), + (0x3275, 'M', '아'), + (0x3276, 'M', '자'), + (0x3277, 'M', '차'), + (0x3278, 'M', '카'), + (0x3279, 'M', '타'), + (0x327A, 'M', '파'), + (0x327B, 'M', '하'), + (0x327C, 'M', '참고'), + (0x327D, 'M', '주의'), + (0x327E, 'M', '우'), + (0x327F, 'V'), + (0x3280, 'M', '一'), + (0x3281, 'M', '二'), + (0x3282, 'M', '三'), + (0x3283, 'M', '四'), + (0x3284, 'M', '五'), + (0x3285, 'M', '六'), + (0x3286, 'M', '七'), + (0x3287, 'M', '八'), + (0x3288, 'M', '九'), + (0x3289, 'M', '十'), + (0x328A, 'M', '月'), + (0x328B, 'M', '火'), + (0x328C, 'M', '水'), + (0x328D, 'M', '木'), + (0x328E, 'M', '金'), + (0x328F, 'M', '土'), + (0x3290, 'M', '日'), + (0x3291, 'M', '株'), + (0x3292, 'M', '有'), + (0x3293, 'M', '社'), + (0x3294, 'M', '名'), + (0x3295, 'M', '特'), + (0x3296, 'M', '財'), + (0x3297, 'M', '祝'), + (0x3298, 'M', '労'), + (0x3299, 'M', '秘'), + (0x329A, 'M', '男'), + (0x329B, 'M', '女'), + (0x329C, 'M', '適'), + (0x329D, 'M', '優'), + (0x329E, 'M', '印'), + (0x329F, 'M', '注'), + (0x32A0, 'M', '項'), + (0x32A1, 'M', '休'), + (0x32A2, 'M', '写'), + (0x32A3, 'M', '正'), + (0x32A4, 'M', '上'), + (0x32A5, 'M', '中'), + (0x32A6, 'M', '下'), + (0x32A7, 'M', '左'), + (0x32A8, 'M', '右'), + (0x32A9, 'M', '医'), + (0x32AA, 'M', '宗'), + (0x32AB, 'M', '学'), + (0x32AC, 'M', '監'), + (0x32AD, 'M', '企'), + (0x32AE, 'M', '資'), + (0x32AF, 'M', '協'), + (0x32B0, 'M', '夜'), + (0x32B1, 'M', '36'), + ] + +def _seg_32() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x32B2, 'M', '37'), + (0x32B3, 'M', '38'), + (0x32B4, 'M', '39'), + (0x32B5, 'M', '40'), + (0x32B6, 'M', '41'), + (0x32B7, 'M', '42'), + (0x32B8, 'M', '43'), + (0x32B9, 'M', '44'), + (0x32BA, 'M', '45'), + (0x32BB, 'M', '46'), + (0x32BC, 'M', '47'), + (0x32BD, 'M', '48'), + (0x32BE, 'M', '49'), + (0x32BF, 'M', '50'), + (0x32C0, 'M', '1月'), + (0x32C1, 'M', '2月'), + (0x32C2, 'M', '3月'), + (0x32C3, 'M', '4月'), + (0x32C4, 'M', '5月'), + (0x32C5, 'M', '6月'), + (0x32C6, 'M', '7月'), + (0x32C7, 'M', '8月'), + (0x32C8, 'M', '9月'), + (0x32C9, 'M', '10月'), + (0x32CA, 'M', '11月'), + (0x32CB, 'M', '12月'), + (0x32CC, 'M', 'hg'), + (0x32CD, 'M', 'erg'), + (0x32CE, 'M', 'ev'), + (0x32CF, 'M', 'ltd'), + (0x32D0, 'M', 'ア'), + (0x32D1, 'M', 'イ'), + (0x32D2, 'M', 'ウ'), + (0x32D3, 'M', 'エ'), + (0x32D4, 'M', 'オ'), + (0x32D5, 'M', 'カ'), + (0x32D6, 'M', 'キ'), + (0x32D7, 'M', 'ク'), + (0x32D8, 'M', 'ケ'), + (0x32D9, 'M', 'コ'), + (0x32DA, 'M', 'サ'), + (0x32DB, 'M', 'シ'), + (0x32DC, 'M', 'ス'), + (0x32DD, 'M', 'セ'), + (0x32DE, 'M', 'ソ'), + (0x32DF, 'M', 'タ'), + (0x32E0, 'M', 'チ'), + (0x32E1, 'M', 'ツ'), + (0x32E2, 'M', 'テ'), + (0x32E3, 'M', 'ト'), + (0x32E4, 'M', 'ナ'), + (0x32E5, 'M', 'ニ'), + (0x32E6, 'M', 'ヌ'), + (0x32E7, 'M', 'ネ'), + (0x32E8, 'M', 'ノ'), + (0x32E9, 'M', 'ハ'), + (0x32EA, 'M', 'ヒ'), + (0x32EB, 'M', 'フ'), + (0x32EC, 'M', 'ヘ'), + (0x32ED, 'M', 'ホ'), + (0x32EE, 'M', 'マ'), + (0x32EF, 'M', 'ミ'), + (0x32F0, 'M', 'ム'), + (0x32F1, 'M', 'メ'), + (0x32F2, 'M', 'モ'), + (0x32F3, 'M', 'ヤ'), + (0x32F4, 'M', 'ユ'), + (0x32F5, 'M', 'ヨ'), + (0x32F6, 'M', 'ラ'), + (0x32F7, 'M', 'リ'), + (0x32F8, 'M', 'ル'), + (0x32F9, 'M', 'レ'), + (0x32FA, 'M', 'ロ'), + (0x32FB, 'M', 'ワ'), + (0x32FC, 'M', 'ヰ'), + (0x32FD, 'M', 'ヱ'), + (0x32FE, 'M', 'ヲ'), + (0x32FF, 'M', '令和'), + (0x3300, 'M', 'アパート'), + (0x3301, 'M', 'アルファ'), + (0x3302, 'M', 'アンペア'), + (0x3303, 'M', 'アール'), + (0x3304, 'M', 'イニング'), + (0x3305, 'M', 'インチ'), + (0x3306, 'M', 'ウォン'), + (0x3307, 'M', 'エスクード'), + (0x3308, 'M', 'エーカー'), + (0x3309, 'M', 'オンス'), + (0x330A, 'M', 'オーム'), + (0x330B, 'M', 'カイリ'), + (0x330C, 'M', 'カラット'), + (0x330D, 'M', 'カロリー'), + (0x330E, 'M', 'ガロン'), + (0x330F, 'M', 'ガンマ'), + (0x3310, 'M', 'ギガ'), + (0x3311, 'M', 'ギニー'), + (0x3312, 'M', 'キュリー'), + (0x3313, 'M', 'ギルダー'), + (0x3314, 'M', 'キロ'), + (0x3315, 'M', 'キログラム'), + ] + +def _seg_33() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x3316, 'M', 'キロメートル'), + (0x3317, 'M', 'キロワット'), + (0x3318, 'M', 'グラム'), + (0x3319, 'M', 'グラムトン'), + (0x331A, 'M', 'クルゼイロ'), + (0x331B, 'M', 'クローネ'), + (0x331C, 'M', 'ケース'), + (0x331D, 'M', 'コルナ'), + (0x331E, 'M', 'コーポ'), + (0x331F, 'M', 'サイクル'), + (0x3320, 'M', 'サンチーム'), + (0x3321, 'M', 'シリング'), + (0x3322, 'M', 'センチ'), + (0x3323, 'M', 'セント'), + (0x3324, 'M', 'ダース'), + (0x3325, 'M', 'デシ'), + (0x3326, 'M', 'ドル'), + (0x3327, 'M', 'トン'), + (0x3328, 'M', 'ナノ'), + (0x3329, 'M', 'ノット'), + (0x332A, 'M', 'ハイツ'), + (0x332B, 'M', 'パーセント'), + (0x332C, 'M', 'パーツ'), + (0x332D, 'M', 'バーレル'), + (0x332E, 'M', 'ピアストル'), + (0x332F, 'M', 'ピクル'), + (0x3330, 'M', 'ピコ'), + (0x3331, 'M', 'ビル'), + (0x3332, 'M', 'ファラッド'), + (0x3333, 'M', 'フィート'), + (0x3334, 'M', 'ブッシェル'), + (0x3335, 'M', 'フラン'), + (0x3336, 'M', 'ヘクタール'), + (0x3337, 'M', 'ペソ'), + (0x3338, 'M', 'ペニヒ'), + (0x3339, 'M', 'ヘルツ'), + (0x333A, 'M', 'ペンス'), + (0x333B, 'M', 'ページ'), + (0x333C, 'M', 'ベータ'), + (0x333D, 'M', 'ポイント'), + (0x333E, 'M', 'ボルト'), + (0x333F, 'M', 'ホン'), + (0x3340, 'M', 'ポンド'), + (0x3341, 'M', 'ホール'), + (0x3342, 'M', 'ホーン'), + (0x3343, 'M', 'マイクロ'), + (0x3344, 'M', 'マイル'), + (0x3345, 'M', 'マッハ'), + (0x3346, 'M', 'マルク'), + (0x3347, 'M', 'マンション'), + (0x3348, 'M', 'ミクロン'), + (0x3349, 'M', 'ミリ'), + (0x334A, 'M', 'ミリバール'), + (0x334B, 'M', 'メガ'), + (0x334C, 'M', 'メガトン'), + (0x334D, 'M', 'メートル'), + (0x334E, 'M', 'ヤード'), + (0x334F, 'M', 'ヤール'), + (0x3350, 'M', 'ユアン'), + (0x3351, 'M', 'リットル'), + (0x3352, 'M', 'リラ'), + (0x3353, 'M', 'ルピー'), + (0x3354, 'M', 'ルーブル'), + (0x3355, 'M', 'レム'), + (0x3356, 'M', 'レントゲン'), + (0x3357, 'M', 'ワット'), + (0x3358, 'M', '0点'), + (0x3359, 'M', '1点'), + (0x335A, 'M', '2点'), + (0x335B, 'M', '3点'), + (0x335C, 'M', '4点'), + (0x335D, 'M', '5点'), + (0x335E, 'M', '6点'), + (0x335F, 'M', '7点'), + (0x3360, 'M', '8点'), + (0x3361, 'M', '9点'), + (0x3362, 'M', '10点'), + (0x3363, 'M', '11点'), + (0x3364, 'M', '12点'), + (0x3365, 'M', '13点'), + (0x3366, 'M', '14点'), + (0x3367, 'M', '15点'), + (0x3368, 'M', '16点'), + (0x3369, 'M', '17点'), + (0x336A, 'M', '18点'), + (0x336B, 'M', '19点'), + (0x336C, 'M', '20点'), + (0x336D, 'M', '21点'), + (0x336E, 'M', '22点'), + (0x336F, 'M', '23点'), + (0x3370, 'M', '24点'), + (0x3371, 'M', 'hpa'), + (0x3372, 'M', 'da'), + (0x3373, 'M', 'au'), + (0x3374, 'M', 'bar'), + (0x3375, 'M', 'ov'), + (0x3376, 'M', 'pc'), + (0x3377, 'M', 'dm'), + (0x3378, 'M', 'dm2'), + (0x3379, 'M', 'dm3'), + ] + +def _seg_34() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x337A, 'M', 'iu'), + (0x337B, 'M', '平成'), + (0x337C, 'M', '昭和'), + (0x337D, 'M', '大正'), + (0x337E, 'M', '明治'), + (0x337F, 'M', '株式会社'), + (0x3380, 'M', 'pa'), + (0x3381, 'M', 'na'), + (0x3382, 'M', 'μa'), + (0x3383, 'M', 'ma'), + (0x3384, 'M', 'ka'), + (0x3385, 'M', 'kb'), + (0x3386, 'M', 'mb'), + (0x3387, 'M', 'gb'), + (0x3388, 'M', 'cal'), + (0x3389, 'M', 'kcal'), + (0x338A, 'M', 'pf'), + (0x338B, 'M', 'nf'), + (0x338C, 'M', 'μf'), + (0x338D, 'M', 'μg'), + (0x338E, 'M', 'mg'), + (0x338F, 'M', 'kg'), + (0x3390, 'M', 'hz'), + (0x3391, 'M', 'khz'), + (0x3392, 'M', 'mhz'), + (0x3393, 'M', 'ghz'), + (0x3394, 'M', 'thz'), + (0x3395, 'M', 'μl'), + (0x3396, 'M', 'ml'), + (0x3397, 'M', 'dl'), + (0x3398, 'M', 'kl'), + (0x3399, 'M', 'fm'), + (0x339A, 'M', 'nm'), + (0x339B, 'M', 'μm'), + (0x339C, 'M', 'mm'), + (0x339D, 'M', 'cm'), + (0x339E, 'M', 'km'), + (0x339F, 'M', 'mm2'), + (0x33A0, 'M', 'cm2'), + (0x33A1, 'M', 'm2'), + (0x33A2, 'M', 'km2'), + (0x33A3, 'M', 'mm3'), + (0x33A4, 'M', 'cm3'), + (0x33A5, 'M', 'm3'), + (0x33A6, 'M', 'km3'), + (0x33A7, 'M', 'm∕s'), + (0x33A8, 'M', 'm∕s2'), + (0x33A9, 'M', 'pa'), + (0x33AA, 'M', 'kpa'), + (0x33AB, 'M', 'mpa'), + (0x33AC, 'M', 'gpa'), + (0x33AD, 'M', 'rad'), + (0x33AE, 'M', 'rad∕s'), + (0x33AF, 'M', 'rad∕s2'), + (0x33B0, 'M', 'ps'), + (0x33B1, 'M', 'ns'), + (0x33B2, 'M', 'μs'), + (0x33B3, 'M', 'ms'), + (0x33B4, 'M', 'pv'), + (0x33B5, 'M', 'nv'), + (0x33B6, 'M', 'μv'), + (0x33B7, 'M', 'mv'), + (0x33B8, 'M', 'kv'), + (0x33B9, 'M', 'mv'), + (0x33BA, 'M', 'pw'), + (0x33BB, 'M', 'nw'), + (0x33BC, 'M', 'μw'), + (0x33BD, 'M', 'mw'), + (0x33BE, 'M', 'kw'), + (0x33BF, 'M', 'mw'), + (0x33C0, 'M', 'kω'), + (0x33C1, 'M', 'mω'), + (0x33C2, 'X'), + (0x33C3, 'M', 'bq'), + (0x33C4, 'M', 'cc'), + (0x33C5, 'M', 'cd'), + (0x33C6, 'M', 'c∕kg'), + (0x33C7, 'X'), + (0x33C8, 'M', 'db'), + (0x33C9, 'M', 'gy'), + (0x33CA, 'M', 'ha'), + (0x33CB, 'M', 'hp'), + (0x33CC, 'M', 'in'), + (0x33CD, 'M', 'kk'), + (0x33CE, 'M', 'km'), + (0x33CF, 'M', 'kt'), + (0x33D0, 'M', 'lm'), + (0x33D1, 'M', 'ln'), + (0x33D2, 'M', 'log'), + (0x33D3, 'M', 'lx'), + (0x33D4, 'M', 'mb'), + (0x33D5, 'M', 'mil'), + (0x33D6, 'M', 'mol'), + (0x33D7, 'M', 'ph'), + (0x33D8, 'X'), + (0x33D9, 'M', 'ppm'), + (0x33DA, 'M', 'pr'), + (0x33DB, 'M', 'sr'), + (0x33DC, 'M', 'sv'), + (0x33DD, 'M', 'wb'), + ] + +def _seg_35() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x33DE, 'M', 'v∕m'), + (0x33DF, 'M', 'a∕m'), + (0x33E0, 'M', '1日'), + (0x33E1, 'M', '2日'), + (0x33E2, 'M', '3日'), + (0x33E3, 'M', '4日'), + (0x33E4, 'M', '5日'), + (0x33E5, 'M', '6日'), + (0x33E6, 'M', '7日'), + (0x33E7, 'M', '8日'), + (0x33E8, 'M', '9日'), + (0x33E9, 'M', '10日'), + (0x33EA, 'M', '11日'), + (0x33EB, 'M', '12日'), + (0x33EC, 'M', '13日'), + (0x33ED, 'M', '14日'), + (0x33EE, 'M', '15日'), + (0x33EF, 'M', '16日'), + (0x33F0, 'M', '17日'), + (0x33F1, 'M', '18日'), + (0x33F2, 'M', '19日'), + (0x33F3, 'M', '20日'), + (0x33F4, 'M', '21日'), + (0x33F5, 'M', '22日'), + (0x33F6, 'M', '23日'), + (0x33F7, 'M', '24日'), + (0x33F8, 'M', '25日'), + (0x33F9, 'M', '26日'), + (0x33FA, 'M', '27日'), + (0x33FB, 'M', '28日'), + (0x33FC, 'M', '29日'), + (0x33FD, 'M', '30日'), + (0x33FE, 'M', '31日'), + (0x33FF, 'M', 'gal'), + (0x3400, 'V'), + (0xA48D, 'X'), + (0xA490, 'V'), + (0xA4C7, 'X'), + (0xA4D0, 'V'), + (0xA62C, 'X'), + (0xA640, 'M', 'ꙁ'), + (0xA641, 'V'), + (0xA642, 'M', 'ꙃ'), + (0xA643, 'V'), + (0xA644, 'M', 'ꙅ'), + (0xA645, 'V'), + (0xA646, 'M', 'ꙇ'), + (0xA647, 'V'), + (0xA648, 'M', 'ꙉ'), + (0xA649, 'V'), + (0xA64A, 'M', 'ꙋ'), + (0xA64B, 'V'), + (0xA64C, 'M', 'ꙍ'), + (0xA64D, 'V'), + (0xA64E, 'M', 'ꙏ'), + (0xA64F, 'V'), + (0xA650, 'M', 'ꙑ'), + (0xA651, 'V'), + (0xA652, 'M', 'ꙓ'), + (0xA653, 'V'), + (0xA654, 'M', 'ꙕ'), + (0xA655, 'V'), + (0xA656, 'M', 'ꙗ'), + (0xA657, 'V'), + (0xA658, 'M', 'ꙙ'), + (0xA659, 'V'), + (0xA65A, 'M', 'ꙛ'), + (0xA65B, 'V'), + (0xA65C, 'M', 'ꙝ'), + (0xA65D, 'V'), + (0xA65E, 'M', 'ꙟ'), + (0xA65F, 'V'), + (0xA660, 'M', 'ꙡ'), + (0xA661, 'V'), + (0xA662, 'M', 'ꙣ'), + (0xA663, 'V'), + (0xA664, 'M', 'ꙥ'), + (0xA665, 'V'), + (0xA666, 'M', 'ꙧ'), + (0xA667, 'V'), + (0xA668, 'M', 'ꙩ'), + (0xA669, 'V'), + (0xA66A, 'M', 'ꙫ'), + (0xA66B, 'V'), + (0xA66C, 'M', 'ꙭ'), + (0xA66D, 'V'), + (0xA680, 'M', 'ꚁ'), + (0xA681, 'V'), + (0xA682, 'M', 'ꚃ'), + (0xA683, 'V'), + (0xA684, 'M', 'ꚅ'), + (0xA685, 'V'), + (0xA686, 'M', 'ꚇ'), + (0xA687, 'V'), + (0xA688, 'M', 'ꚉ'), + (0xA689, 'V'), + (0xA68A, 'M', 'ꚋ'), + (0xA68B, 'V'), + (0xA68C, 'M', 'ꚍ'), + (0xA68D, 'V'), + ] + +def _seg_36() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xA68E, 'M', 'ꚏ'), + (0xA68F, 'V'), + (0xA690, 'M', 'ꚑ'), + (0xA691, 'V'), + (0xA692, 'M', 'ꚓ'), + (0xA693, 'V'), + (0xA694, 'M', 'ꚕ'), + (0xA695, 'V'), + (0xA696, 'M', 'ꚗ'), + (0xA697, 'V'), + (0xA698, 'M', 'ꚙ'), + (0xA699, 'V'), + (0xA69A, 'M', 'ꚛ'), + (0xA69B, 'V'), + (0xA69C, 'M', 'ъ'), + (0xA69D, 'M', 'ь'), + (0xA69E, 'V'), + (0xA6F8, 'X'), + (0xA700, 'V'), + (0xA722, 'M', 'ꜣ'), + (0xA723, 'V'), + (0xA724, 'M', 'ꜥ'), + (0xA725, 'V'), + (0xA726, 'M', 'ꜧ'), + (0xA727, 'V'), + (0xA728, 'M', 'ꜩ'), + (0xA729, 'V'), + (0xA72A, 'M', 'ꜫ'), + (0xA72B, 'V'), + (0xA72C, 'M', 'ꜭ'), + (0xA72D, 'V'), + (0xA72E, 'M', 'ꜯ'), + (0xA72F, 'V'), + (0xA732, 'M', 'ꜳ'), + (0xA733, 'V'), + (0xA734, 'M', 'ꜵ'), + (0xA735, 'V'), + (0xA736, 'M', 'ꜷ'), + (0xA737, 'V'), + (0xA738, 'M', 'ꜹ'), + (0xA739, 'V'), + (0xA73A, 'M', 'ꜻ'), + (0xA73B, 'V'), + (0xA73C, 'M', 'ꜽ'), + (0xA73D, 'V'), + (0xA73E, 'M', 'ꜿ'), + (0xA73F, 'V'), + (0xA740, 'M', 'ꝁ'), + (0xA741, 'V'), + (0xA742, 'M', 'ꝃ'), + (0xA743, 'V'), + (0xA744, 'M', 'ꝅ'), + (0xA745, 'V'), + (0xA746, 'M', 'ꝇ'), + (0xA747, 'V'), + (0xA748, 'M', 'ꝉ'), + (0xA749, 'V'), + (0xA74A, 'M', 'ꝋ'), + (0xA74B, 'V'), + (0xA74C, 'M', 'ꝍ'), + (0xA74D, 'V'), + (0xA74E, 'M', 'ꝏ'), + (0xA74F, 'V'), + (0xA750, 'M', 'ꝑ'), + (0xA751, 'V'), + (0xA752, 'M', 'ꝓ'), + (0xA753, 'V'), + (0xA754, 'M', 'ꝕ'), + (0xA755, 'V'), + (0xA756, 'M', 'ꝗ'), + (0xA757, 'V'), + (0xA758, 'M', 'ꝙ'), + (0xA759, 'V'), + (0xA75A, 'M', 'ꝛ'), + (0xA75B, 'V'), + (0xA75C, 'M', 'ꝝ'), + (0xA75D, 'V'), + (0xA75E, 'M', 'ꝟ'), + (0xA75F, 'V'), + (0xA760, 'M', 'ꝡ'), + (0xA761, 'V'), + (0xA762, 'M', 'ꝣ'), + (0xA763, 'V'), + (0xA764, 'M', 'ꝥ'), + (0xA765, 'V'), + (0xA766, 'M', 'ꝧ'), + (0xA767, 'V'), + (0xA768, 'M', 'ꝩ'), + (0xA769, 'V'), + (0xA76A, 'M', 'ꝫ'), + (0xA76B, 'V'), + (0xA76C, 'M', 'ꝭ'), + (0xA76D, 'V'), + (0xA76E, 'M', 'ꝯ'), + (0xA76F, 'V'), + (0xA770, 'M', 'ꝯ'), + (0xA771, 'V'), + (0xA779, 'M', 'ꝺ'), + (0xA77A, 'V'), + (0xA77B, 'M', 'ꝼ'), + ] + +def _seg_37() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xA77C, 'V'), + (0xA77D, 'M', 'ᵹ'), + (0xA77E, 'M', 'ꝿ'), + (0xA77F, 'V'), + (0xA780, 'M', 'ꞁ'), + (0xA781, 'V'), + (0xA782, 'M', 'ꞃ'), + (0xA783, 'V'), + (0xA784, 'M', 'ꞅ'), + (0xA785, 'V'), + (0xA786, 'M', 'ꞇ'), + (0xA787, 'V'), + (0xA78B, 'M', 'ꞌ'), + (0xA78C, 'V'), + (0xA78D, 'M', 'ɥ'), + (0xA78E, 'V'), + (0xA790, 'M', 'ꞑ'), + (0xA791, 'V'), + (0xA792, 'M', 'ꞓ'), + (0xA793, 'V'), + (0xA796, 'M', 'ꞗ'), + (0xA797, 'V'), + (0xA798, 'M', 'ꞙ'), + (0xA799, 'V'), + (0xA79A, 'M', 'ꞛ'), + (0xA79B, 'V'), + (0xA79C, 'M', 'ꞝ'), + (0xA79D, 'V'), + (0xA79E, 'M', 'ꞟ'), + (0xA79F, 'V'), + (0xA7A0, 'M', 'ꞡ'), + (0xA7A1, 'V'), + (0xA7A2, 'M', 'ꞣ'), + (0xA7A3, 'V'), + (0xA7A4, 'M', 'ꞥ'), + (0xA7A5, 'V'), + (0xA7A6, 'M', 'ꞧ'), + (0xA7A7, 'V'), + (0xA7A8, 'M', 'ꞩ'), + (0xA7A9, 'V'), + (0xA7AA, 'M', 'ɦ'), + (0xA7AB, 'M', 'ɜ'), + (0xA7AC, 'M', 'ɡ'), + (0xA7AD, 'M', 'ɬ'), + (0xA7AE, 'M', 'ɪ'), + (0xA7AF, 'V'), + (0xA7B0, 'M', 'ʞ'), + (0xA7B1, 'M', 'ʇ'), + (0xA7B2, 'M', 'ʝ'), + (0xA7B3, 'M', 'ꭓ'), + (0xA7B4, 'M', 'ꞵ'), + (0xA7B5, 'V'), + (0xA7B6, 'M', 'ꞷ'), + (0xA7B7, 'V'), + (0xA7B8, 'M', 'ꞹ'), + (0xA7B9, 'V'), + (0xA7BA, 'M', 'ꞻ'), + (0xA7BB, 'V'), + (0xA7BC, 'M', 'ꞽ'), + (0xA7BD, 'V'), + (0xA7BE, 'M', 'ꞿ'), + (0xA7BF, 'V'), + (0xA7C0, 'M', 'ꟁ'), + (0xA7C1, 'V'), + (0xA7C2, 'M', 'ꟃ'), + (0xA7C3, 'V'), + (0xA7C4, 'M', 'ꞔ'), + (0xA7C5, 'M', 'ʂ'), + (0xA7C6, 'M', 'ᶎ'), + (0xA7C7, 'M', 'ꟈ'), + (0xA7C8, 'V'), + (0xA7C9, 'M', 'ꟊ'), + (0xA7CA, 'V'), + (0xA7CB, 'X'), + (0xA7D0, 'M', 'ꟑ'), + (0xA7D1, 'V'), + (0xA7D2, 'X'), + (0xA7D3, 'V'), + (0xA7D4, 'X'), + (0xA7D5, 'V'), + (0xA7D6, 'M', 'ꟗ'), + (0xA7D7, 'V'), + (0xA7D8, 'M', 'ꟙ'), + (0xA7D9, 'V'), + (0xA7DA, 'X'), + (0xA7F2, 'M', 'c'), + (0xA7F3, 'M', 'f'), + (0xA7F4, 'M', 'q'), + (0xA7F5, 'M', 'ꟶ'), + (0xA7F6, 'V'), + (0xA7F8, 'M', 'ħ'), + (0xA7F9, 'M', 'œ'), + (0xA7FA, 'V'), + (0xA82D, 'X'), + (0xA830, 'V'), + (0xA83A, 'X'), + (0xA840, 'V'), + (0xA878, 'X'), + (0xA880, 'V'), + (0xA8C6, 'X'), + ] + +def _seg_38() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xA8CE, 'V'), + (0xA8DA, 'X'), + (0xA8E0, 'V'), + (0xA954, 'X'), + (0xA95F, 'V'), + (0xA97D, 'X'), + (0xA980, 'V'), + (0xA9CE, 'X'), + (0xA9CF, 'V'), + (0xA9DA, 'X'), + (0xA9DE, 'V'), + (0xA9FF, 'X'), + (0xAA00, 'V'), + (0xAA37, 'X'), + (0xAA40, 'V'), + (0xAA4E, 'X'), + (0xAA50, 'V'), + (0xAA5A, 'X'), + (0xAA5C, 'V'), + (0xAAC3, 'X'), + (0xAADB, 'V'), + (0xAAF7, 'X'), + (0xAB01, 'V'), + (0xAB07, 'X'), + (0xAB09, 'V'), + (0xAB0F, 'X'), + (0xAB11, 'V'), + (0xAB17, 'X'), + (0xAB20, 'V'), + (0xAB27, 'X'), + (0xAB28, 'V'), + (0xAB2F, 'X'), + (0xAB30, 'V'), + (0xAB5C, 'M', 'ꜧ'), + (0xAB5D, 'M', 'ꬷ'), + (0xAB5E, 'M', 'ɫ'), + (0xAB5F, 'M', 'ꭒ'), + (0xAB60, 'V'), + (0xAB69, 'M', 'ʍ'), + (0xAB6A, 'V'), + (0xAB6C, 'X'), + (0xAB70, 'M', 'Ꭰ'), + (0xAB71, 'M', 'Ꭱ'), + (0xAB72, 'M', 'Ꭲ'), + (0xAB73, 'M', 'Ꭳ'), + (0xAB74, 'M', 'Ꭴ'), + (0xAB75, 'M', 'Ꭵ'), + (0xAB76, 'M', 'Ꭶ'), + (0xAB77, 'M', 'Ꭷ'), + (0xAB78, 'M', 'Ꭸ'), + (0xAB79, 'M', 'Ꭹ'), + (0xAB7A, 'M', 'Ꭺ'), + (0xAB7B, 'M', 'Ꭻ'), + (0xAB7C, 'M', 'Ꭼ'), + (0xAB7D, 'M', 'Ꭽ'), + (0xAB7E, 'M', 'Ꭾ'), + (0xAB7F, 'M', 'Ꭿ'), + (0xAB80, 'M', 'Ꮀ'), + (0xAB81, 'M', 'Ꮁ'), + (0xAB82, 'M', 'Ꮂ'), + (0xAB83, 'M', 'Ꮃ'), + (0xAB84, 'M', 'Ꮄ'), + (0xAB85, 'M', 'Ꮅ'), + (0xAB86, 'M', 'Ꮆ'), + (0xAB87, 'M', 'Ꮇ'), + (0xAB88, 'M', 'Ꮈ'), + (0xAB89, 'M', 'Ꮉ'), + (0xAB8A, 'M', 'Ꮊ'), + (0xAB8B, 'M', 'Ꮋ'), + (0xAB8C, 'M', 'Ꮌ'), + (0xAB8D, 'M', 'Ꮍ'), + (0xAB8E, 'M', 'Ꮎ'), + (0xAB8F, 'M', 'Ꮏ'), + (0xAB90, 'M', 'Ꮐ'), + (0xAB91, 'M', 'Ꮑ'), + (0xAB92, 'M', 'Ꮒ'), + (0xAB93, 'M', 'Ꮓ'), + (0xAB94, 'M', 'Ꮔ'), + (0xAB95, 'M', 'Ꮕ'), + (0xAB96, 'M', 'Ꮖ'), + (0xAB97, 'M', 'Ꮗ'), + (0xAB98, 'M', 'Ꮘ'), + (0xAB99, 'M', 'Ꮙ'), + (0xAB9A, 'M', 'Ꮚ'), + (0xAB9B, 'M', 'Ꮛ'), + (0xAB9C, 'M', 'Ꮜ'), + (0xAB9D, 'M', 'Ꮝ'), + (0xAB9E, 'M', 'Ꮞ'), + (0xAB9F, 'M', 'Ꮟ'), + (0xABA0, 'M', 'Ꮠ'), + (0xABA1, 'M', 'Ꮡ'), + (0xABA2, 'M', 'Ꮢ'), + (0xABA3, 'M', 'Ꮣ'), + (0xABA4, 'M', 'Ꮤ'), + (0xABA5, 'M', 'Ꮥ'), + (0xABA6, 'M', 'Ꮦ'), + (0xABA7, 'M', 'Ꮧ'), + (0xABA8, 'M', 'Ꮨ'), + (0xABA9, 'M', 'Ꮩ'), + (0xABAA, 'M', 'Ꮪ'), + ] + +def _seg_39() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xABAB, 'M', 'Ꮫ'), + (0xABAC, 'M', 'Ꮬ'), + (0xABAD, 'M', 'Ꮭ'), + (0xABAE, 'M', 'Ꮮ'), + (0xABAF, 'M', 'Ꮯ'), + (0xABB0, 'M', 'Ꮰ'), + (0xABB1, 'M', 'Ꮱ'), + (0xABB2, 'M', 'Ꮲ'), + (0xABB3, 'M', 'Ꮳ'), + (0xABB4, 'M', 'Ꮴ'), + (0xABB5, 'M', 'Ꮵ'), + (0xABB6, 'M', 'Ꮶ'), + (0xABB7, 'M', 'Ꮷ'), + (0xABB8, 'M', 'Ꮸ'), + (0xABB9, 'M', 'Ꮹ'), + (0xABBA, 'M', 'Ꮺ'), + (0xABBB, 'M', 'Ꮻ'), + (0xABBC, 'M', 'Ꮼ'), + (0xABBD, 'M', 'Ꮽ'), + (0xABBE, 'M', 'Ꮾ'), + (0xABBF, 'M', 'Ꮿ'), + (0xABC0, 'V'), + (0xABEE, 'X'), + (0xABF0, 'V'), + (0xABFA, 'X'), + (0xAC00, 'V'), + (0xD7A4, 'X'), + (0xD7B0, 'V'), + (0xD7C7, 'X'), + (0xD7CB, 'V'), + (0xD7FC, 'X'), + (0xF900, 'M', '豈'), + (0xF901, 'M', '更'), + (0xF902, 'M', '車'), + (0xF903, 'M', '賈'), + (0xF904, 'M', '滑'), + (0xF905, 'M', '串'), + (0xF906, 'M', '句'), + (0xF907, 'M', '龜'), + (0xF909, 'M', '契'), + (0xF90A, 'M', '金'), + (0xF90B, 'M', '喇'), + (0xF90C, 'M', '奈'), + (0xF90D, 'M', '懶'), + (0xF90E, 'M', '癩'), + (0xF90F, 'M', '羅'), + (0xF910, 'M', '蘿'), + (0xF911, 'M', '螺'), + (0xF912, 'M', '裸'), + (0xF913, 'M', '邏'), + (0xF914, 'M', '樂'), + (0xF915, 'M', '洛'), + (0xF916, 'M', '烙'), + (0xF917, 'M', '珞'), + (0xF918, 'M', '落'), + (0xF919, 'M', '酪'), + (0xF91A, 'M', '駱'), + (0xF91B, 'M', '亂'), + (0xF91C, 'M', '卵'), + (0xF91D, 'M', '欄'), + (0xF91E, 'M', '爛'), + (0xF91F, 'M', '蘭'), + (0xF920, 'M', '鸞'), + (0xF921, 'M', '嵐'), + (0xF922, 'M', '濫'), + (0xF923, 'M', '藍'), + (0xF924, 'M', '襤'), + (0xF925, 'M', '拉'), + (0xF926, 'M', '臘'), + (0xF927, 'M', '蠟'), + (0xF928, 'M', '廊'), + (0xF929, 'M', '朗'), + (0xF92A, 'M', '浪'), + (0xF92B, 'M', '狼'), + (0xF92C, 'M', '郎'), + (0xF92D, 'M', '來'), + (0xF92E, 'M', '冷'), + (0xF92F, 'M', '勞'), + (0xF930, 'M', '擄'), + (0xF931, 'M', '櫓'), + (0xF932, 'M', '爐'), + (0xF933, 'M', '盧'), + (0xF934, 'M', '老'), + (0xF935, 'M', '蘆'), + (0xF936, 'M', '虜'), + (0xF937, 'M', '路'), + (0xF938, 'M', '露'), + (0xF939, 'M', '魯'), + (0xF93A, 'M', '鷺'), + (0xF93B, 'M', '碌'), + (0xF93C, 'M', '祿'), + (0xF93D, 'M', '綠'), + (0xF93E, 'M', '菉'), + (0xF93F, 'M', '錄'), + (0xF940, 'M', '鹿'), + (0xF941, 'M', '論'), + (0xF942, 'M', '壟'), + (0xF943, 'M', '弄'), + (0xF944, 'M', '籠'), + (0xF945, 'M', '聾'), + ] + +def _seg_40() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xF946, 'M', '牢'), + (0xF947, 'M', '磊'), + (0xF948, 'M', '賂'), + (0xF949, 'M', '雷'), + (0xF94A, 'M', '壘'), + (0xF94B, 'M', '屢'), + (0xF94C, 'M', '樓'), + (0xF94D, 'M', '淚'), + (0xF94E, 'M', '漏'), + (0xF94F, 'M', '累'), + (0xF950, 'M', '縷'), + (0xF951, 'M', '陋'), + (0xF952, 'M', '勒'), + (0xF953, 'M', '肋'), + (0xF954, 'M', '凜'), + (0xF955, 'M', '凌'), + (0xF956, 'M', '稜'), + (0xF957, 'M', '綾'), + (0xF958, 'M', '菱'), + (0xF959, 'M', '陵'), + (0xF95A, 'M', '讀'), + (0xF95B, 'M', '拏'), + (0xF95C, 'M', '樂'), + (0xF95D, 'M', '諾'), + (0xF95E, 'M', '丹'), + (0xF95F, 'M', '寧'), + (0xF960, 'M', '怒'), + (0xF961, 'M', '率'), + (0xF962, 'M', '異'), + (0xF963, 'M', '北'), + (0xF964, 'M', '磻'), + (0xF965, 'M', '便'), + (0xF966, 'M', '復'), + (0xF967, 'M', '不'), + (0xF968, 'M', '泌'), + (0xF969, 'M', '數'), + (0xF96A, 'M', '索'), + (0xF96B, 'M', '參'), + (0xF96C, 'M', '塞'), + (0xF96D, 'M', '省'), + (0xF96E, 'M', '葉'), + (0xF96F, 'M', '說'), + (0xF970, 'M', '殺'), + (0xF971, 'M', '辰'), + (0xF972, 'M', '沈'), + (0xF973, 'M', '拾'), + (0xF974, 'M', '若'), + (0xF975, 'M', '掠'), + (0xF976, 'M', '略'), + (0xF977, 'M', '亮'), + (0xF978, 'M', '兩'), + (0xF979, 'M', '凉'), + (0xF97A, 'M', '梁'), + (0xF97B, 'M', '糧'), + (0xF97C, 'M', '良'), + (0xF97D, 'M', '諒'), + (0xF97E, 'M', '量'), + (0xF97F, 'M', '勵'), + (0xF980, 'M', '呂'), + (0xF981, 'M', '女'), + (0xF982, 'M', '廬'), + (0xF983, 'M', '旅'), + (0xF984, 'M', '濾'), + (0xF985, 'M', '礪'), + (0xF986, 'M', '閭'), + (0xF987, 'M', '驪'), + (0xF988, 'M', '麗'), + (0xF989, 'M', '黎'), + (0xF98A, 'M', '力'), + (0xF98B, 'M', '曆'), + (0xF98C, 'M', '歷'), + (0xF98D, 'M', '轢'), + (0xF98E, 'M', '年'), + (0xF98F, 'M', '憐'), + (0xF990, 'M', '戀'), + (0xF991, 'M', '撚'), + (0xF992, 'M', '漣'), + (0xF993, 'M', '煉'), + (0xF994, 'M', '璉'), + (0xF995, 'M', '秊'), + (0xF996, 'M', '練'), + (0xF997, 'M', '聯'), + (0xF998, 'M', '輦'), + (0xF999, 'M', '蓮'), + (0xF99A, 'M', '連'), + (0xF99B, 'M', '鍊'), + (0xF99C, 'M', '列'), + (0xF99D, 'M', '劣'), + (0xF99E, 'M', '咽'), + (0xF99F, 'M', '烈'), + (0xF9A0, 'M', '裂'), + (0xF9A1, 'M', '說'), + (0xF9A2, 'M', '廉'), + (0xF9A3, 'M', '念'), + (0xF9A4, 'M', '捻'), + (0xF9A5, 'M', '殮'), + (0xF9A6, 'M', '簾'), + (0xF9A7, 'M', '獵'), + (0xF9A8, 'M', '令'), + (0xF9A9, 'M', '囹'), + ] + +def _seg_41() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xF9AA, 'M', '寧'), + (0xF9AB, 'M', '嶺'), + (0xF9AC, 'M', '怜'), + (0xF9AD, 'M', '玲'), + (0xF9AE, 'M', '瑩'), + (0xF9AF, 'M', '羚'), + (0xF9B0, 'M', '聆'), + (0xF9B1, 'M', '鈴'), + (0xF9B2, 'M', '零'), + (0xF9B3, 'M', '靈'), + (0xF9B4, 'M', '領'), + (0xF9B5, 'M', '例'), + (0xF9B6, 'M', '禮'), + (0xF9B7, 'M', '醴'), + (0xF9B8, 'M', '隸'), + (0xF9B9, 'M', '惡'), + (0xF9BA, 'M', '了'), + (0xF9BB, 'M', '僚'), + (0xF9BC, 'M', '寮'), + (0xF9BD, 'M', '尿'), + (0xF9BE, 'M', '料'), + (0xF9BF, 'M', '樂'), + (0xF9C0, 'M', '燎'), + (0xF9C1, 'M', '療'), + (0xF9C2, 'M', '蓼'), + (0xF9C3, 'M', '遼'), + (0xF9C4, 'M', '龍'), + (0xF9C5, 'M', '暈'), + (0xF9C6, 'M', '阮'), + (0xF9C7, 'M', '劉'), + (0xF9C8, 'M', '杻'), + (0xF9C9, 'M', '柳'), + (0xF9CA, 'M', '流'), + (0xF9CB, 'M', '溜'), + (0xF9CC, 'M', '琉'), + (0xF9CD, 'M', '留'), + (0xF9CE, 'M', '硫'), + (0xF9CF, 'M', '紐'), + (0xF9D0, 'M', '類'), + (0xF9D1, 'M', '六'), + (0xF9D2, 'M', '戮'), + (0xF9D3, 'M', '陸'), + (0xF9D4, 'M', '倫'), + (0xF9D5, 'M', '崙'), + (0xF9D6, 'M', '淪'), + (0xF9D7, 'M', '輪'), + (0xF9D8, 'M', '律'), + (0xF9D9, 'M', '慄'), + (0xF9DA, 'M', '栗'), + (0xF9DB, 'M', '率'), + (0xF9DC, 'M', '隆'), + (0xF9DD, 'M', '利'), + (0xF9DE, 'M', '吏'), + (0xF9DF, 'M', '履'), + (0xF9E0, 'M', '易'), + (0xF9E1, 'M', '李'), + (0xF9E2, 'M', '梨'), + (0xF9E3, 'M', '泥'), + (0xF9E4, 'M', '理'), + (0xF9E5, 'M', '痢'), + (0xF9E6, 'M', '罹'), + (0xF9E7, 'M', '裏'), + (0xF9E8, 'M', '裡'), + (0xF9E9, 'M', '里'), + (0xF9EA, 'M', '離'), + (0xF9EB, 'M', '匿'), + (0xF9EC, 'M', '溺'), + (0xF9ED, 'M', '吝'), + (0xF9EE, 'M', '燐'), + (0xF9EF, 'M', '璘'), + (0xF9F0, 'M', '藺'), + (0xF9F1, 'M', '隣'), + (0xF9F2, 'M', '鱗'), + (0xF9F3, 'M', '麟'), + (0xF9F4, 'M', '林'), + (0xF9F5, 'M', '淋'), + (0xF9F6, 'M', '臨'), + (0xF9F7, 'M', '立'), + (0xF9F8, 'M', '笠'), + (0xF9F9, 'M', '粒'), + (0xF9FA, 'M', '狀'), + (0xF9FB, 'M', '炙'), + (0xF9FC, 'M', '識'), + (0xF9FD, 'M', '什'), + (0xF9FE, 'M', '茶'), + (0xF9FF, 'M', '刺'), + (0xFA00, 'M', '切'), + (0xFA01, 'M', '度'), + (0xFA02, 'M', '拓'), + (0xFA03, 'M', '糖'), + (0xFA04, 'M', '宅'), + (0xFA05, 'M', '洞'), + (0xFA06, 'M', '暴'), + (0xFA07, 'M', '輻'), + (0xFA08, 'M', '行'), + (0xFA09, 'M', '降'), + (0xFA0A, 'M', '見'), + (0xFA0B, 'M', '廓'), + (0xFA0C, 'M', '兀'), + (0xFA0D, 'M', '嗀'), + ] + +def _seg_42() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFA0E, 'V'), + (0xFA10, 'M', '塚'), + (0xFA11, 'V'), + (0xFA12, 'M', '晴'), + (0xFA13, 'V'), + (0xFA15, 'M', '凞'), + (0xFA16, 'M', '猪'), + (0xFA17, 'M', '益'), + (0xFA18, 'M', '礼'), + (0xFA19, 'M', '神'), + (0xFA1A, 'M', '祥'), + (0xFA1B, 'M', '福'), + (0xFA1C, 'M', '靖'), + (0xFA1D, 'M', '精'), + (0xFA1E, 'M', '羽'), + (0xFA1F, 'V'), + (0xFA20, 'M', '蘒'), + (0xFA21, 'V'), + (0xFA22, 'M', '諸'), + (0xFA23, 'V'), + (0xFA25, 'M', '逸'), + (0xFA26, 'M', '都'), + (0xFA27, 'V'), + (0xFA2A, 'M', '飯'), + (0xFA2B, 'M', '飼'), + (0xFA2C, 'M', '館'), + (0xFA2D, 'M', '鶴'), + (0xFA2E, 'M', '郞'), + (0xFA2F, 'M', '隷'), + (0xFA30, 'M', '侮'), + (0xFA31, 'M', '僧'), + (0xFA32, 'M', '免'), + (0xFA33, 'M', '勉'), + (0xFA34, 'M', '勤'), + (0xFA35, 'M', '卑'), + (0xFA36, 'M', '喝'), + (0xFA37, 'M', '嘆'), + (0xFA38, 'M', '器'), + (0xFA39, 'M', '塀'), + (0xFA3A, 'M', '墨'), + (0xFA3B, 'M', '層'), + (0xFA3C, 'M', '屮'), + (0xFA3D, 'M', '悔'), + (0xFA3E, 'M', '慨'), + (0xFA3F, 'M', '憎'), + (0xFA40, 'M', '懲'), + (0xFA41, 'M', '敏'), + (0xFA42, 'M', '既'), + (0xFA43, 'M', '暑'), + (0xFA44, 'M', '梅'), + (0xFA45, 'M', '海'), + (0xFA46, 'M', '渚'), + (0xFA47, 'M', '漢'), + (0xFA48, 'M', '煮'), + (0xFA49, 'M', '爫'), + (0xFA4A, 'M', '琢'), + (0xFA4B, 'M', '碑'), + (0xFA4C, 'M', '社'), + (0xFA4D, 'M', '祉'), + (0xFA4E, 'M', '祈'), + (0xFA4F, 'M', '祐'), + (0xFA50, 'M', '祖'), + (0xFA51, 'M', '祝'), + (0xFA52, 'M', '禍'), + (0xFA53, 'M', '禎'), + (0xFA54, 'M', '穀'), + (0xFA55, 'M', '突'), + (0xFA56, 'M', '節'), + (0xFA57, 'M', '練'), + (0xFA58, 'M', '縉'), + (0xFA59, 'M', '繁'), + (0xFA5A, 'M', '署'), + (0xFA5B, 'M', '者'), + (0xFA5C, 'M', '臭'), + (0xFA5D, 'M', '艹'), + (0xFA5F, 'M', '著'), + (0xFA60, 'M', '褐'), + (0xFA61, 'M', '視'), + (0xFA62, 'M', '謁'), + (0xFA63, 'M', '謹'), + (0xFA64, 'M', '賓'), + (0xFA65, 'M', '贈'), + (0xFA66, 'M', '辶'), + (0xFA67, 'M', '逸'), + (0xFA68, 'M', '難'), + (0xFA69, 'M', '響'), + (0xFA6A, 'M', '頻'), + (0xFA6B, 'M', '恵'), + (0xFA6C, 'M', '𤋮'), + (0xFA6D, 'M', '舘'), + (0xFA6E, 'X'), + (0xFA70, 'M', '並'), + (0xFA71, 'M', '况'), + (0xFA72, 'M', '全'), + (0xFA73, 'M', '侀'), + (0xFA74, 'M', '充'), + (0xFA75, 'M', '冀'), + (0xFA76, 'M', '勇'), + (0xFA77, 'M', '勺'), + (0xFA78, 'M', '喝'), + ] + +def _seg_43() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFA79, 'M', '啕'), + (0xFA7A, 'M', '喙'), + (0xFA7B, 'M', '嗢'), + (0xFA7C, 'M', '塚'), + (0xFA7D, 'M', '墳'), + (0xFA7E, 'M', '奄'), + (0xFA7F, 'M', '奔'), + (0xFA80, 'M', '婢'), + (0xFA81, 'M', '嬨'), + (0xFA82, 'M', '廒'), + (0xFA83, 'M', '廙'), + (0xFA84, 'M', '彩'), + (0xFA85, 'M', '徭'), + (0xFA86, 'M', '惘'), + (0xFA87, 'M', '慎'), + (0xFA88, 'M', '愈'), + (0xFA89, 'M', '憎'), + (0xFA8A, 'M', '慠'), + (0xFA8B, 'M', '懲'), + (0xFA8C, 'M', '戴'), + (0xFA8D, 'M', '揄'), + (0xFA8E, 'M', '搜'), + (0xFA8F, 'M', '摒'), + (0xFA90, 'M', '敖'), + (0xFA91, 'M', '晴'), + (0xFA92, 'M', '朗'), + (0xFA93, 'M', '望'), + (0xFA94, 'M', '杖'), + (0xFA95, 'M', '歹'), + (0xFA96, 'M', '殺'), + (0xFA97, 'M', '流'), + (0xFA98, 'M', '滛'), + (0xFA99, 'M', '滋'), + (0xFA9A, 'M', '漢'), + (0xFA9B, 'M', '瀞'), + (0xFA9C, 'M', '煮'), + (0xFA9D, 'M', '瞧'), + (0xFA9E, 'M', '爵'), + (0xFA9F, 'M', '犯'), + (0xFAA0, 'M', '猪'), + (0xFAA1, 'M', '瑱'), + (0xFAA2, 'M', '甆'), + (0xFAA3, 'M', '画'), + (0xFAA4, 'M', '瘝'), + (0xFAA5, 'M', '瘟'), + (0xFAA6, 'M', '益'), + (0xFAA7, 'M', '盛'), + (0xFAA8, 'M', '直'), + (0xFAA9, 'M', '睊'), + (0xFAAA, 'M', '着'), + (0xFAAB, 'M', '磌'), + (0xFAAC, 'M', '窱'), + (0xFAAD, 'M', '節'), + (0xFAAE, 'M', '类'), + (0xFAAF, 'M', '絛'), + (0xFAB0, 'M', '練'), + (0xFAB1, 'M', '缾'), + (0xFAB2, 'M', '者'), + (0xFAB3, 'M', '荒'), + (0xFAB4, 'M', '華'), + (0xFAB5, 'M', '蝹'), + (0xFAB6, 'M', '襁'), + (0xFAB7, 'M', '覆'), + (0xFAB8, 'M', '視'), + (0xFAB9, 'M', '調'), + (0xFABA, 'M', '諸'), + (0xFABB, 'M', '請'), + (0xFABC, 'M', '謁'), + (0xFABD, 'M', '諾'), + (0xFABE, 'M', '諭'), + (0xFABF, 'M', '謹'), + (0xFAC0, 'M', '變'), + (0xFAC1, 'M', '贈'), + (0xFAC2, 'M', '輸'), + (0xFAC3, 'M', '遲'), + (0xFAC4, 'M', '醙'), + (0xFAC5, 'M', '鉶'), + (0xFAC6, 'M', '陼'), + (0xFAC7, 'M', '難'), + (0xFAC8, 'M', '靖'), + (0xFAC9, 'M', '韛'), + (0xFACA, 'M', '響'), + (0xFACB, 'M', '頋'), + (0xFACC, 'M', '頻'), + (0xFACD, 'M', '鬒'), + (0xFACE, 'M', '龜'), + (0xFACF, 'M', '𢡊'), + (0xFAD0, 'M', '𢡄'), + (0xFAD1, 'M', '𣏕'), + (0xFAD2, 'M', '㮝'), + (0xFAD3, 'M', '䀘'), + (0xFAD4, 'M', '䀹'), + (0xFAD5, 'M', '𥉉'), + (0xFAD6, 'M', '𥳐'), + (0xFAD7, 'M', '𧻓'), + (0xFAD8, 'M', '齃'), + (0xFAD9, 'M', '龎'), + (0xFADA, 'X'), + (0xFB00, 'M', 'ff'), + (0xFB01, 'M', 'fi'), + ] + +def _seg_44() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFB02, 'M', 'fl'), + (0xFB03, 'M', 'ffi'), + (0xFB04, 'M', 'ffl'), + (0xFB05, 'M', 'st'), + (0xFB07, 'X'), + (0xFB13, 'M', 'մն'), + (0xFB14, 'M', 'մե'), + (0xFB15, 'M', 'մի'), + (0xFB16, 'M', 'վն'), + (0xFB17, 'M', 'մխ'), + (0xFB18, 'X'), + (0xFB1D, 'M', 'יִ'), + (0xFB1E, 'V'), + (0xFB1F, 'M', 'ײַ'), + (0xFB20, 'M', 'ע'), + (0xFB21, 'M', 'א'), + (0xFB22, 'M', 'ד'), + (0xFB23, 'M', 'ה'), + (0xFB24, 'M', 'כ'), + (0xFB25, 'M', 'ל'), + (0xFB26, 'M', 'ם'), + (0xFB27, 'M', 'ר'), + (0xFB28, 'M', 'ת'), + (0xFB29, '3', '+'), + (0xFB2A, 'M', 'שׁ'), + (0xFB2B, 'M', 'שׂ'), + (0xFB2C, 'M', 'שּׁ'), + (0xFB2D, 'M', 'שּׂ'), + (0xFB2E, 'M', 'אַ'), + (0xFB2F, 'M', 'אָ'), + (0xFB30, 'M', 'אּ'), + (0xFB31, 'M', 'בּ'), + (0xFB32, 'M', 'גּ'), + (0xFB33, 'M', 'דּ'), + (0xFB34, 'M', 'הּ'), + (0xFB35, 'M', 'וּ'), + (0xFB36, 'M', 'זּ'), + (0xFB37, 'X'), + (0xFB38, 'M', 'טּ'), + (0xFB39, 'M', 'יּ'), + (0xFB3A, 'M', 'ךּ'), + (0xFB3B, 'M', 'כּ'), + (0xFB3C, 'M', 'לּ'), + (0xFB3D, 'X'), + (0xFB3E, 'M', 'מּ'), + (0xFB3F, 'X'), + (0xFB40, 'M', 'נּ'), + (0xFB41, 'M', 'סּ'), + (0xFB42, 'X'), + (0xFB43, 'M', 'ףּ'), + (0xFB44, 'M', 'פּ'), + (0xFB45, 'X'), + (0xFB46, 'M', 'צּ'), + (0xFB47, 'M', 'קּ'), + (0xFB48, 'M', 'רּ'), + (0xFB49, 'M', 'שּ'), + (0xFB4A, 'M', 'תּ'), + (0xFB4B, 'M', 'וֹ'), + (0xFB4C, 'M', 'בֿ'), + (0xFB4D, 'M', 'כֿ'), + (0xFB4E, 'M', 'פֿ'), + (0xFB4F, 'M', 'אל'), + (0xFB50, 'M', 'ٱ'), + (0xFB52, 'M', 'ٻ'), + (0xFB56, 'M', 'پ'), + (0xFB5A, 'M', 'ڀ'), + (0xFB5E, 'M', 'ٺ'), + (0xFB62, 'M', 'ٿ'), + (0xFB66, 'M', 'ٹ'), + (0xFB6A, 'M', 'ڤ'), + (0xFB6E, 'M', 'ڦ'), + (0xFB72, 'M', 'ڄ'), + (0xFB76, 'M', 'ڃ'), + (0xFB7A, 'M', 'چ'), + (0xFB7E, 'M', 'ڇ'), + (0xFB82, 'M', 'ڍ'), + (0xFB84, 'M', 'ڌ'), + (0xFB86, 'M', 'ڎ'), + (0xFB88, 'M', 'ڈ'), + (0xFB8A, 'M', 'ژ'), + (0xFB8C, 'M', 'ڑ'), + (0xFB8E, 'M', 'ک'), + (0xFB92, 'M', 'گ'), + (0xFB96, 'M', 'ڳ'), + (0xFB9A, 'M', 'ڱ'), + (0xFB9E, 'M', 'ں'), + (0xFBA0, 'M', 'ڻ'), + (0xFBA4, 'M', 'ۀ'), + (0xFBA6, 'M', 'ہ'), + (0xFBAA, 'M', 'ھ'), + (0xFBAE, 'M', 'ے'), + (0xFBB0, 'M', 'ۓ'), + (0xFBB2, 'V'), + (0xFBC3, 'X'), + (0xFBD3, 'M', 'ڭ'), + (0xFBD7, 'M', 'ۇ'), + (0xFBD9, 'M', 'ۆ'), + (0xFBDB, 'M', 'ۈ'), + (0xFBDD, 'M', 'ۇٴ'), + (0xFBDE, 'M', 'ۋ'), + ] + +def _seg_45() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFBE0, 'M', 'ۅ'), + (0xFBE2, 'M', 'ۉ'), + (0xFBE4, 'M', 'ې'), + (0xFBE8, 'M', 'ى'), + (0xFBEA, 'M', 'ئا'), + (0xFBEC, 'M', 'ئە'), + (0xFBEE, 'M', 'ئو'), + (0xFBF0, 'M', 'ئۇ'), + (0xFBF2, 'M', 'ئۆ'), + (0xFBF4, 'M', 'ئۈ'), + (0xFBF6, 'M', 'ئې'), + (0xFBF9, 'M', 'ئى'), + (0xFBFC, 'M', 'ی'), + (0xFC00, 'M', 'ئج'), + (0xFC01, 'M', 'ئح'), + (0xFC02, 'M', 'ئم'), + (0xFC03, 'M', 'ئى'), + (0xFC04, 'M', 'ئي'), + (0xFC05, 'M', 'بج'), + (0xFC06, 'M', 'بح'), + (0xFC07, 'M', 'بخ'), + (0xFC08, 'M', 'بم'), + (0xFC09, 'M', 'بى'), + (0xFC0A, 'M', 'بي'), + (0xFC0B, 'M', 'تج'), + (0xFC0C, 'M', 'تح'), + (0xFC0D, 'M', 'تخ'), + (0xFC0E, 'M', 'تم'), + (0xFC0F, 'M', 'تى'), + (0xFC10, 'M', 'تي'), + (0xFC11, 'M', 'ثج'), + (0xFC12, 'M', 'ثم'), + (0xFC13, 'M', 'ثى'), + (0xFC14, 'M', 'ثي'), + (0xFC15, 'M', 'جح'), + (0xFC16, 'M', 'جم'), + (0xFC17, 'M', 'حج'), + (0xFC18, 'M', 'حم'), + (0xFC19, 'M', 'خج'), + (0xFC1A, 'M', 'خح'), + (0xFC1B, 'M', 'خم'), + (0xFC1C, 'M', 'سج'), + (0xFC1D, 'M', 'سح'), + (0xFC1E, 'M', 'سخ'), + (0xFC1F, 'M', 'سم'), + (0xFC20, 'M', 'صح'), + (0xFC21, 'M', 'صم'), + (0xFC22, 'M', 'ضج'), + (0xFC23, 'M', 'ضح'), + (0xFC24, 'M', 'ضخ'), + (0xFC25, 'M', 'ضم'), + (0xFC26, 'M', 'طح'), + (0xFC27, 'M', 'طم'), + (0xFC28, 'M', 'ظم'), + (0xFC29, 'M', 'عج'), + (0xFC2A, 'M', 'عم'), + (0xFC2B, 'M', 'غج'), + (0xFC2C, 'M', 'غم'), + (0xFC2D, 'M', 'فج'), + (0xFC2E, 'M', 'فح'), + (0xFC2F, 'M', 'فخ'), + (0xFC30, 'M', 'فم'), + (0xFC31, 'M', 'فى'), + (0xFC32, 'M', 'في'), + (0xFC33, 'M', 'قح'), + (0xFC34, 'M', 'قم'), + (0xFC35, 'M', 'قى'), + (0xFC36, 'M', 'قي'), + (0xFC37, 'M', 'كا'), + (0xFC38, 'M', 'كج'), + (0xFC39, 'M', 'كح'), + (0xFC3A, 'M', 'كخ'), + (0xFC3B, 'M', 'كل'), + (0xFC3C, 'M', 'كم'), + (0xFC3D, 'M', 'كى'), + (0xFC3E, 'M', 'كي'), + (0xFC3F, 'M', 'لج'), + (0xFC40, 'M', 'لح'), + (0xFC41, 'M', 'لخ'), + (0xFC42, 'M', 'لم'), + (0xFC43, 'M', 'لى'), + (0xFC44, 'M', 'لي'), + (0xFC45, 'M', 'مج'), + (0xFC46, 'M', 'مح'), + (0xFC47, 'M', 'مخ'), + (0xFC48, 'M', 'مم'), + (0xFC49, 'M', 'مى'), + (0xFC4A, 'M', 'مي'), + (0xFC4B, 'M', 'نج'), + (0xFC4C, 'M', 'نح'), + (0xFC4D, 'M', 'نخ'), + (0xFC4E, 'M', 'نم'), + (0xFC4F, 'M', 'نى'), + (0xFC50, 'M', 'ني'), + (0xFC51, 'M', 'هج'), + (0xFC52, 'M', 'هم'), + (0xFC53, 'M', 'هى'), + (0xFC54, 'M', 'هي'), + (0xFC55, 'M', 'يج'), + (0xFC56, 'M', 'يح'), + ] + +def _seg_46() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFC57, 'M', 'يخ'), + (0xFC58, 'M', 'يم'), + (0xFC59, 'M', 'يى'), + (0xFC5A, 'M', 'يي'), + (0xFC5B, 'M', 'ذٰ'), + (0xFC5C, 'M', 'رٰ'), + (0xFC5D, 'M', 'ىٰ'), + (0xFC5E, '3', ' ٌّ'), + (0xFC5F, '3', ' ٍّ'), + (0xFC60, '3', ' َّ'), + (0xFC61, '3', ' ُّ'), + (0xFC62, '3', ' ِّ'), + (0xFC63, '3', ' ّٰ'), + (0xFC64, 'M', 'ئر'), + (0xFC65, 'M', 'ئز'), + (0xFC66, 'M', 'ئم'), + (0xFC67, 'M', 'ئن'), + (0xFC68, 'M', 'ئى'), + (0xFC69, 'M', 'ئي'), + (0xFC6A, 'M', 'بر'), + (0xFC6B, 'M', 'بز'), + (0xFC6C, 'M', 'بم'), + (0xFC6D, 'M', 'بن'), + (0xFC6E, 'M', 'بى'), + (0xFC6F, 'M', 'بي'), + (0xFC70, 'M', 'تر'), + (0xFC71, 'M', 'تز'), + (0xFC72, 'M', 'تم'), + (0xFC73, 'M', 'تن'), + (0xFC74, 'M', 'تى'), + (0xFC75, 'M', 'تي'), + (0xFC76, 'M', 'ثر'), + (0xFC77, 'M', 'ثز'), + (0xFC78, 'M', 'ثم'), + (0xFC79, 'M', 'ثن'), + (0xFC7A, 'M', 'ثى'), + (0xFC7B, 'M', 'ثي'), + (0xFC7C, 'M', 'فى'), + (0xFC7D, 'M', 'في'), + (0xFC7E, 'M', 'قى'), + (0xFC7F, 'M', 'قي'), + (0xFC80, 'M', 'كا'), + (0xFC81, 'M', 'كل'), + (0xFC82, 'M', 'كم'), + (0xFC83, 'M', 'كى'), + (0xFC84, 'M', 'كي'), + (0xFC85, 'M', 'لم'), + (0xFC86, 'M', 'لى'), + (0xFC87, 'M', 'لي'), + (0xFC88, 'M', 'ما'), + (0xFC89, 'M', 'مم'), + (0xFC8A, 'M', 'نر'), + (0xFC8B, 'M', 'نز'), + (0xFC8C, 'M', 'نم'), + (0xFC8D, 'M', 'نن'), + (0xFC8E, 'M', 'نى'), + (0xFC8F, 'M', 'ني'), + (0xFC90, 'M', 'ىٰ'), + (0xFC91, 'M', 'ير'), + (0xFC92, 'M', 'يز'), + (0xFC93, 'M', 'يم'), + (0xFC94, 'M', 'ين'), + (0xFC95, 'M', 'يى'), + (0xFC96, 'M', 'يي'), + (0xFC97, 'M', 'ئج'), + (0xFC98, 'M', 'ئح'), + (0xFC99, 'M', 'ئخ'), + (0xFC9A, 'M', 'ئم'), + (0xFC9B, 'M', 'ئه'), + (0xFC9C, 'M', 'بج'), + (0xFC9D, 'M', 'بح'), + (0xFC9E, 'M', 'بخ'), + (0xFC9F, 'M', 'بم'), + (0xFCA0, 'M', 'به'), + (0xFCA1, 'M', 'تج'), + (0xFCA2, 'M', 'تح'), + (0xFCA3, 'M', 'تخ'), + (0xFCA4, 'M', 'تم'), + (0xFCA5, 'M', 'ته'), + (0xFCA6, 'M', 'ثم'), + (0xFCA7, 'M', 'جح'), + (0xFCA8, 'M', 'جم'), + (0xFCA9, 'M', 'حج'), + (0xFCAA, 'M', 'حم'), + (0xFCAB, 'M', 'خج'), + (0xFCAC, 'M', 'خم'), + (0xFCAD, 'M', 'سج'), + (0xFCAE, 'M', 'سح'), + (0xFCAF, 'M', 'سخ'), + (0xFCB0, 'M', 'سم'), + (0xFCB1, 'M', 'صح'), + (0xFCB2, 'M', 'صخ'), + (0xFCB3, 'M', 'صم'), + (0xFCB4, 'M', 'ضج'), + (0xFCB5, 'M', 'ضح'), + (0xFCB6, 'M', 'ضخ'), + (0xFCB7, 'M', 'ضم'), + (0xFCB8, 'M', 'طح'), + (0xFCB9, 'M', 'ظم'), + (0xFCBA, 'M', 'عج'), + ] + +def _seg_47() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFCBB, 'M', 'عم'), + (0xFCBC, 'M', 'غج'), + (0xFCBD, 'M', 'غم'), + (0xFCBE, 'M', 'فج'), + (0xFCBF, 'M', 'فح'), + (0xFCC0, 'M', 'فخ'), + (0xFCC1, 'M', 'فم'), + (0xFCC2, 'M', 'قح'), + (0xFCC3, 'M', 'قم'), + (0xFCC4, 'M', 'كج'), + (0xFCC5, 'M', 'كح'), + (0xFCC6, 'M', 'كخ'), + (0xFCC7, 'M', 'كل'), + (0xFCC8, 'M', 'كم'), + (0xFCC9, 'M', 'لج'), + (0xFCCA, 'M', 'لح'), + (0xFCCB, 'M', 'لخ'), + (0xFCCC, 'M', 'لم'), + (0xFCCD, 'M', 'له'), + (0xFCCE, 'M', 'مج'), + (0xFCCF, 'M', 'مح'), + (0xFCD0, 'M', 'مخ'), + (0xFCD1, 'M', 'مم'), + (0xFCD2, 'M', 'نج'), + (0xFCD3, 'M', 'نح'), + (0xFCD4, 'M', 'نخ'), + (0xFCD5, 'M', 'نم'), + (0xFCD6, 'M', 'نه'), + (0xFCD7, 'M', 'هج'), + (0xFCD8, 'M', 'هم'), + (0xFCD9, 'M', 'هٰ'), + (0xFCDA, 'M', 'يج'), + (0xFCDB, 'M', 'يح'), + (0xFCDC, 'M', 'يخ'), + (0xFCDD, 'M', 'يم'), + (0xFCDE, 'M', 'يه'), + (0xFCDF, 'M', 'ئم'), + (0xFCE0, 'M', 'ئه'), + (0xFCE1, 'M', 'بم'), + (0xFCE2, 'M', 'به'), + (0xFCE3, 'M', 'تم'), + (0xFCE4, 'M', 'ته'), + (0xFCE5, 'M', 'ثم'), + (0xFCE6, 'M', 'ثه'), + (0xFCE7, 'M', 'سم'), + (0xFCE8, 'M', 'سه'), + (0xFCE9, 'M', 'شم'), + (0xFCEA, 'M', 'شه'), + (0xFCEB, 'M', 'كل'), + (0xFCEC, 'M', 'كم'), + (0xFCED, 'M', 'لم'), + (0xFCEE, 'M', 'نم'), + (0xFCEF, 'M', 'نه'), + (0xFCF0, 'M', 'يم'), + (0xFCF1, 'M', 'يه'), + (0xFCF2, 'M', 'ـَّ'), + (0xFCF3, 'M', 'ـُّ'), + (0xFCF4, 'M', 'ـِّ'), + (0xFCF5, 'M', 'طى'), + (0xFCF6, 'M', 'طي'), + (0xFCF7, 'M', 'عى'), + (0xFCF8, 'M', 'عي'), + (0xFCF9, 'M', 'غى'), + (0xFCFA, 'M', 'غي'), + (0xFCFB, 'M', 'سى'), + (0xFCFC, 'M', 'سي'), + (0xFCFD, 'M', 'شى'), + (0xFCFE, 'M', 'شي'), + (0xFCFF, 'M', 'حى'), + (0xFD00, 'M', 'حي'), + (0xFD01, 'M', 'جى'), + (0xFD02, 'M', 'جي'), + (0xFD03, 'M', 'خى'), + (0xFD04, 'M', 'خي'), + (0xFD05, 'M', 'صى'), + (0xFD06, 'M', 'صي'), + (0xFD07, 'M', 'ضى'), + (0xFD08, 'M', 'ضي'), + (0xFD09, 'M', 'شج'), + (0xFD0A, 'M', 'شح'), + (0xFD0B, 'M', 'شخ'), + (0xFD0C, 'M', 'شم'), + (0xFD0D, 'M', 'شر'), + (0xFD0E, 'M', 'سر'), + (0xFD0F, 'M', 'صر'), + (0xFD10, 'M', 'ضر'), + (0xFD11, 'M', 'طى'), + (0xFD12, 'M', 'طي'), + (0xFD13, 'M', 'عى'), + (0xFD14, 'M', 'عي'), + (0xFD15, 'M', 'غى'), + (0xFD16, 'M', 'غي'), + (0xFD17, 'M', 'سى'), + (0xFD18, 'M', 'سي'), + (0xFD19, 'M', 'شى'), + (0xFD1A, 'M', 'شي'), + (0xFD1B, 'M', 'حى'), + (0xFD1C, 'M', 'حي'), + (0xFD1D, 'M', 'جى'), + (0xFD1E, 'M', 'جي'), + ] + +def _seg_48() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFD1F, 'M', 'خى'), + (0xFD20, 'M', 'خي'), + (0xFD21, 'M', 'صى'), + (0xFD22, 'M', 'صي'), + (0xFD23, 'M', 'ضى'), + (0xFD24, 'M', 'ضي'), + (0xFD25, 'M', 'شج'), + (0xFD26, 'M', 'شح'), + (0xFD27, 'M', 'شخ'), + (0xFD28, 'M', 'شم'), + (0xFD29, 'M', 'شر'), + (0xFD2A, 'M', 'سر'), + (0xFD2B, 'M', 'صر'), + (0xFD2C, 'M', 'ضر'), + (0xFD2D, 'M', 'شج'), + (0xFD2E, 'M', 'شح'), + (0xFD2F, 'M', 'شخ'), + (0xFD30, 'M', 'شم'), + (0xFD31, 'M', 'سه'), + (0xFD32, 'M', 'شه'), + (0xFD33, 'M', 'طم'), + (0xFD34, 'M', 'سج'), + (0xFD35, 'M', 'سح'), + (0xFD36, 'M', 'سخ'), + (0xFD37, 'M', 'شج'), + (0xFD38, 'M', 'شح'), + (0xFD39, 'M', 'شخ'), + (0xFD3A, 'M', 'طم'), + (0xFD3B, 'M', 'ظم'), + (0xFD3C, 'M', 'اً'), + (0xFD3E, 'V'), + (0xFD50, 'M', 'تجم'), + (0xFD51, 'M', 'تحج'), + (0xFD53, 'M', 'تحم'), + (0xFD54, 'M', 'تخم'), + (0xFD55, 'M', 'تمج'), + (0xFD56, 'M', 'تمح'), + (0xFD57, 'M', 'تمخ'), + (0xFD58, 'M', 'جمح'), + (0xFD5A, 'M', 'حمي'), + (0xFD5B, 'M', 'حمى'), + (0xFD5C, 'M', 'سحج'), + (0xFD5D, 'M', 'سجح'), + (0xFD5E, 'M', 'سجى'), + (0xFD5F, 'M', 'سمح'), + (0xFD61, 'M', 'سمج'), + (0xFD62, 'M', 'سمم'), + (0xFD64, 'M', 'صحح'), + (0xFD66, 'M', 'صمم'), + (0xFD67, 'M', 'شحم'), + (0xFD69, 'M', 'شجي'), + (0xFD6A, 'M', 'شمخ'), + (0xFD6C, 'M', 'شمم'), + (0xFD6E, 'M', 'ضحى'), + (0xFD6F, 'M', 'ضخم'), + (0xFD71, 'M', 'طمح'), + (0xFD73, 'M', 'طمم'), + (0xFD74, 'M', 'طمي'), + (0xFD75, 'M', 'عجم'), + (0xFD76, 'M', 'عمم'), + (0xFD78, 'M', 'عمى'), + (0xFD79, 'M', 'غمم'), + (0xFD7A, 'M', 'غمي'), + (0xFD7B, 'M', 'غمى'), + (0xFD7C, 'M', 'فخم'), + (0xFD7E, 'M', 'قمح'), + (0xFD7F, 'M', 'قمم'), + (0xFD80, 'M', 'لحم'), + (0xFD81, 'M', 'لحي'), + (0xFD82, 'M', 'لحى'), + (0xFD83, 'M', 'لجج'), + (0xFD85, 'M', 'لخم'), + (0xFD87, 'M', 'لمح'), + (0xFD89, 'M', 'محج'), + (0xFD8A, 'M', 'محم'), + (0xFD8B, 'M', 'محي'), + (0xFD8C, 'M', 'مجح'), + (0xFD8D, 'M', 'مجم'), + (0xFD8E, 'M', 'مخج'), + (0xFD8F, 'M', 'مخم'), + (0xFD90, 'X'), + (0xFD92, 'M', 'مجخ'), + (0xFD93, 'M', 'همج'), + (0xFD94, 'M', 'همم'), + (0xFD95, 'M', 'نحم'), + (0xFD96, 'M', 'نحى'), + (0xFD97, 'M', 'نجم'), + (0xFD99, 'M', 'نجى'), + (0xFD9A, 'M', 'نمي'), + (0xFD9B, 'M', 'نمى'), + (0xFD9C, 'M', 'يمم'), + (0xFD9E, 'M', 'بخي'), + (0xFD9F, 'M', 'تجي'), + (0xFDA0, 'M', 'تجى'), + (0xFDA1, 'M', 'تخي'), + (0xFDA2, 'M', 'تخى'), + (0xFDA3, 'M', 'تمي'), + (0xFDA4, 'M', 'تمى'), + (0xFDA5, 'M', 'جمي'), + (0xFDA6, 'M', 'جحى'), + ] + +def _seg_49() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFDA7, 'M', 'جمى'), + (0xFDA8, 'M', 'سخى'), + (0xFDA9, 'M', 'صحي'), + (0xFDAA, 'M', 'شحي'), + (0xFDAB, 'M', 'ضحي'), + (0xFDAC, 'M', 'لجي'), + (0xFDAD, 'M', 'لمي'), + (0xFDAE, 'M', 'يحي'), + (0xFDAF, 'M', 'يجي'), + (0xFDB0, 'M', 'يمي'), + (0xFDB1, 'M', 'ممي'), + (0xFDB2, 'M', 'قمي'), + (0xFDB3, 'M', 'نحي'), + (0xFDB4, 'M', 'قمح'), + (0xFDB5, 'M', 'لحم'), + (0xFDB6, 'M', 'عمي'), + (0xFDB7, 'M', 'كمي'), + (0xFDB8, 'M', 'نجح'), + (0xFDB9, 'M', 'مخي'), + (0xFDBA, 'M', 'لجم'), + (0xFDBB, 'M', 'كمم'), + (0xFDBC, 'M', 'لجم'), + (0xFDBD, 'M', 'نجح'), + (0xFDBE, 'M', 'جحي'), + (0xFDBF, 'M', 'حجي'), + (0xFDC0, 'M', 'مجي'), + (0xFDC1, 'M', 'فمي'), + (0xFDC2, 'M', 'بحي'), + (0xFDC3, 'M', 'كمم'), + (0xFDC4, 'M', 'عجم'), + (0xFDC5, 'M', 'صمم'), + (0xFDC6, 'M', 'سخي'), + (0xFDC7, 'M', 'نجي'), + (0xFDC8, 'X'), + (0xFDCF, 'V'), + (0xFDD0, 'X'), + (0xFDF0, 'M', 'صلے'), + (0xFDF1, 'M', 'قلے'), + (0xFDF2, 'M', 'الله'), + (0xFDF3, 'M', 'اكبر'), + (0xFDF4, 'M', 'محمد'), + (0xFDF5, 'M', 'صلعم'), + (0xFDF6, 'M', 'رسول'), + (0xFDF7, 'M', 'عليه'), + (0xFDF8, 'M', 'وسلم'), + (0xFDF9, 'M', 'صلى'), + (0xFDFA, '3', 'صلى الله عليه وسلم'), + (0xFDFB, '3', 'جل جلاله'), + (0xFDFC, 'M', 'ریال'), + (0xFDFD, 'V'), + (0xFE00, 'I'), + (0xFE10, '3', ','), + (0xFE11, 'M', '、'), + (0xFE12, 'X'), + (0xFE13, '3', ':'), + (0xFE14, '3', ';'), + (0xFE15, '3', '!'), + (0xFE16, '3', '?'), + (0xFE17, 'M', '〖'), + (0xFE18, 'M', '〗'), + (0xFE19, 'X'), + (0xFE20, 'V'), + (0xFE30, 'X'), + (0xFE31, 'M', '—'), + (0xFE32, 'M', '–'), + (0xFE33, '3', '_'), + (0xFE35, '3', '('), + (0xFE36, '3', ')'), + (0xFE37, '3', '{'), + (0xFE38, '3', '}'), + (0xFE39, 'M', '〔'), + (0xFE3A, 'M', '〕'), + (0xFE3B, 'M', '【'), + (0xFE3C, 'M', '】'), + (0xFE3D, 'M', '《'), + (0xFE3E, 'M', '》'), + (0xFE3F, 'M', '〈'), + (0xFE40, 'M', '〉'), + (0xFE41, 'M', '「'), + (0xFE42, 'M', '」'), + (0xFE43, 'M', '『'), + (0xFE44, 'M', '』'), + (0xFE45, 'V'), + (0xFE47, '3', '['), + (0xFE48, '3', ']'), + (0xFE49, '3', ' ̅'), + (0xFE4D, '3', '_'), + (0xFE50, '3', ','), + (0xFE51, 'M', '、'), + (0xFE52, 'X'), + (0xFE54, '3', ';'), + (0xFE55, '3', ':'), + (0xFE56, '3', '?'), + (0xFE57, '3', '!'), + (0xFE58, 'M', '—'), + (0xFE59, '3', '('), + (0xFE5A, '3', ')'), + (0xFE5B, '3', '{'), + (0xFE5C, '3', '}'), + (0xFE5D, 'M', '〔'), + ] + +def _seg_50() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFE5E, 'M', '〕'), + (0xFE5F, '3', '#'), + (0xFE60, '3', '&'), + (0xFE61, '3', '*'), + (0xFE62, '3', '+'), + (0xFE63, 'M', '-'), + (0xFE64, '3', '<'), + (0xFE65, '3', '>'), + (0xFE66, '3', '='), + (0xFE67, 'X'), + (0xFE68, '3', '\\'), + (0xFE69, '3', '$'), + (0xFE6A, '3', '%'), + (0xFE6B, '3', '@'), + (0xFE6C, 'X'), + (0xFE70, '3', ' ً'), + (0xFE71, 'M', 'ـً'), + (0xFE72, '3', ' ٌ'), + (0xFE73, 'V'), + (0xFE74, '3', ' ٍ'), + (0xFE75, 'X'), + (0xFE76, '3', ' َ'), + (0xFE77, 'M', 'ـَ'), + (0xFE78, '3', ' ُ'), + (0xFE79, 'M', 'ـُ'), + (0xFE7A, '3', ' ِ'), + (0xFE7B, 'M', 'ـِ'), + (0xFE7C, '3', ' ّ'), + (0xFE7D, 'M', 'ـّ'), + (0xFE7E, '3', ' ْ'), + (0xFE7F, 'M', 'ـْ'), + (0xFE80, 'M', 'ء'), + (0xFE81, 'M', 'آ'), + (0xFE83, 'M', 'أ'), + (0xFE85, 'M', 'ؤ'), + (0xFE87, 'M', 'إ'), + (0xFE89, 'M', 'ئ'), + (0xFE8D, 'M', 'ا'), + (0xFE8F, 'M', 'ب'), + (0xFE93, 'M', 'ة'), + (0xFE95, 'M', 'ت'), + (0xFE99, 'M', 'ث'), + (0xFE9D, 'M', 'ج'), + (0xFEA1, 'M', 'ح'), + (0xFEA5, 'M', 'خ'), + (0xFEA9, 'M', 'د'), + (0xFEAB, 'M', 'ذ'), + (0xFEAD, 'M', 'ر'), + (0xFEAF, 'M', 'ز'), + (0xFEB1, 'M', 'س'), + (0xFEB5, 'M', 'ش'), + (0xFEB9, 'M', 'ص'), + (0xFEBD, 'M', 'ض'), + (0xFEC1, 'M', 'ط'), + (0xFEC5, 'M', 'ظ'), + (0xFEC9, 'M', 'ع'), + (0xFECD, 'M', 'غ'), + (0xFED1, 'M', 'ف'), + (0xFED5, 'M', 'ق'), + (0xFED9, 'M', 'ك'), + (0xFEDD, 'M', 'ل'), + (0xFEE1, 'M', 'م'), + (0xFEE5, 'M', 'ن'), + (0xFEE9, 'M', 'ه'), + (0xFEED, 'M', 'و'), + (0xFEEF, 'M', 'ى'), + (0xFEF1, 'M', 'ي'), + (0xFEF5, 'M', 'لآ'), + (0xFEF7, 'M', 'لأ'), + (0xFEF9, 'M', 'لإ'), + (0xFEFB, 'M', 'لا'), + (0xFEFD, 'X'), + (0xFEFF, 'I'), + (0xFF00, 'X'), + (0xFF01, '3', '!'), + (0xFF02, '3', '"'), + (0xFF03, '3', '#'), + (0xFF04, '3', '$'), + (0xFF05, '3', '%'), + (0xFF06, '3', '&'), + (0xFF07, '3', '\''), + (0xFF08, '3', '('), + (0xFF09, '3', ')'), + (0xFF0A, '3', '*'), + (0xFF0B, '3', '+'), + (0xFF0C, '3', ','), + (0xFF0D, 'M', '-'), + (0xFF0E, 'M', '.'), + (0xFF0F, '3', '/'), + (0xFF10, 'M', '0'), + (0xFF11, 'M', '1'), + (0xFF12, 'M', '2'), + (0xFF13, 'M', '3'), + (0xFF14, 'M', '4'), + (0xFF15, 'M', '5'), + (0xFF16, 'M', '6'), + (0xFF17, 'M', '7'), + (0xFF18, 'M', '8'), + (0xFF19, 'M', '9'), + (0xFF1A, '3', ':'), + ] + +def _seg_51() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFF1B, '3', ';'), + (0xFF1C, '3', '<'), + (0xFF1D, '3', '='), + (0xFF1E, '3', '>'), + (0xFF1F, '3', '?'), + (0xFF20, '3', '@'), + (0xFF21, 'M', 'a'), + (0xFF22, 'M', 'b'), + (0xFF23, 'M', 'c'), + (0xFF24, 'M', 'd'), + (0xFF25, 'M', 'e'), + (0xFF26, 'M', 'f'), + (0xFF27, 'M', 'g'), + (0xFF28, 'M', 'h'), + (0xFF29, 'M', 'i'), + (0xFF2A, 'M', 'j'), + (0xFF2B, 'M', 'k'), + (0xFF2C, 'M', 'l'), + (0xFF2D, 'M', 'm'), + (0xFF2E, 'M', 'n'), + (0xFF2F, 'M', 'o'), + (0xFF30, 'M', 'p'), + (0xFF31, 'M', 'q'), + (0xFF32, 'M', 'r'), + (0xFF33, 'M', 's'), + (0xFF34, 'M', 't'), + (0xFF35, 'M', 'u'), + (0xFF36, 'M', 'v'), + (0xFF37, 'M', 'w'), + (0xFF38, 'M', 'x'), + (0xFF39, 'M', 'y'), + (0xFF3A, 'M', 'z'), + (0xFF3B, '3', '['), + (0xFF3C, '3', '\\'), + (0xFF3D, '3', ']'), + (0xFF3E, '3', '^'), + (0xFF3F, '3', '_'), + (0xFF40, '3', '`'), + (0xFF41, 'M', 'a'), + (0xFF42, 'M', 'b'), + (0xFF43, 'M', 'c'), + (0xFF44, 'M', 'd'), + (0xFF45, 'M', 'e'), + (0xFF46, 'M', 'f'), + (0xFF47, 'M', 'g'), + (0xFF48, 'M', 'h'), + (0xFF49, 'M', 'i'), + (0xFF4A, 'M', 'j'), + (0xFF4B, 'M', 'k'), + (0xFF4C, 'M', 'l'), + (0xFF4D, 'M', 'm'), + (0xFF4E, 'M', 'n'), + (0xFF4F, 'M', 'o'), + (0xFF50, 'M', 'p'), + (0xFF51, 'M', 'q'), + (0xFF52, 'M', 'r'), + (0xFF53, 'M', 's'), + (0xFF54, 'M', 't'), + (0xFF55, 'M', 'u'), + (0xFF56, 'M', 'v'), + (0xFF57, 'M', 'w'), + (0xFF58, 'M', 'x'), + (0xFF59, 'M', 'y'), + (0xFF5A, 'M', 'z'), + (0xFF5B, '3', '{'), + (0xFF5C, '3', '|'), + (0xFF5D, '3', '}'), + (0xFF5E, '3', '~'), + (0xFF5F, 'M', '⦅'), + (0xFF60, 'M', '⦆'), + (0xFF61, 'M', '.'), + (0xFF62, 'M', '「'), + (0xFF63, 'M', '」'), + (0xFF64, 'M', '、'), + (0xFF65, 'M', '・'), + (0xFF66, 'M', 'ヲ'), + (0xFF67, 'M', 'ァ'), + (0xFF68, 'M', 'ィ'), + (0xFF69, 'M', 'ゥ'), + (0xFF6A, 'M', 'ェ'), + (0xFF6B, 'M', 'ォ'), + (0xFF6C, 'M', 'ャ'), + (0xFF6D, 'M', 'ュ'), + (0xFF6E, 'M', 'ョ'), + (0xFF6F, 'M', 'ッ'), + (0xFF70, 'M', 'ー'), + (0xFF71, 'M', 'ア'), + (0xFF72, 'M', 'イ'), + (0xFF73, 'M', 'ウ'), + (0xFF74, 'M', 'エ'), + (0xFF75, 'M', 'オ'), + (0xFF76, 'M', 'カ'), + (0xFF77, 'M', 'キ'), + (0xFF78, 'M', 'ク'), + (0xFF79, 'M', 'ケ'), + (0xFF7A, 'M', 'コ'), + (0xFF7B, 'M', 'サ'), + (0xFF7C, 'M', 'シ'), + (0xFF7D, 'M', 'ス'), + (0xFF7E, 'M', 'セ'), + ] + +def _seg_52() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFF7F, 'M', 'ソ'), + (0xFF80, 'M', 'タ'), + (0xFF81, 'M', 'チ'), + (0xFF82, 'M', 'ツ'), + (0xFF83, 'M', 'テ'), + (0xFF84, 'M', 'ト'), + (0xFF85, 'M', 'ナ'), + (0xFF86, 'M', 'ニ'), + (0xFF87, 'M', 'ヌ'), + (0xFF88, 'M', 'ネ'), + (0xFF89, 'M', 'ノ'), + (0xFF8A, 'M', 'ハ'), + (0xFF8B, 'M', 'ヒ'), + (0xFF8C, 'M', 'フ'), + (0xFF8D, 'M', 'ヘ'), + (0xFF8E, 'M', 'ホ'), + (0xFF8F, 'M', 'マ'), + (0xFF90, 'M', 'ミ'), + (0xFF91, 'M', 'ム'), + (0xFF92, 'M', 'メ'), + (0xFF93, 'M', 'モ'), + (0xFF94, 'M', 'ヤ'), + (0xFF95, 'M', 'ユ'), + (0xFF96, 'M', 'ヨ'), + (0xFF97, 'M', 'ラ'), + (0xFF98, 'M', 'リ'), + (0xFF99, 'M', 'ル'), + (0xFF9A, 'M', 'レ'), + (0xFF9B, 'M', 'ロ'), + (0xFF9C, 'M', 'ワ'), + (0xFF9D, 'M', 'ン'), + (0xFF9E, 'M', '゙'), + (0xFF9F, 'M', '゚'), + (0xFFA0, 'X'), + (0xFFA1, 'M', 'ᄀ'), + (0xFFA2, 'M', 'ᄁ'), + (0xFFA3, 'M', 'ᆪ'), + (0xFFA4, 'M', 'ᄂ'), + (0xFFA5, 'M', 'ᆬ'), + (0xFFA6, 'M', 'ᆭ'), + (0xFFA7, 'M', 'ᄃ'), + (0xFFA8, 'M', 'ᄄ'), + (0xFFA9, 'M', 'ᄅ'), + (0xFFAA, 'M', 'ᆰ'), + (0xFFAB, 'M', 'ᆱ'), + (0xFFAC, 'M', 'ᆲ'), + (0xFFAD, 'M', 'ᆳ'), + (0xFFAE, 'M', 'ᆴ'), + (0xFFAF, 'M', 'ᆵ'), + (0xFFB0, 'M', 'ᄚ'), + (0xFFB1, 'M', 'ᄆ'), + (0xFFB2, 'M', 'ᄇ'), + (0xFFB3, 'M', 'ᄈ'), + (0xFFB4, 'M', 'ᄡ'), + (0xFFB5, 'M', 'ᄉ'), + (0xFFB6, 'M', 'ᄊ'), + (0xFFB7, 'M', 'ᄋ'), + (0xFFB8, 'M', 'ᄌ'), + (0xFFB9, 'M', 'ᄍ'), + (0xFFBA, 'M', 'ᄎ'), + (0xFFBB, 'M', 'ᄏ'), + (0xFFBC, 'M', 'ᄐ'), + (0xFFBD, 'M', 'ᄑ'), + (0xFFBE, 'M', 'ᄒ'), + (0xFFBF, 'X'), + (0xFFC2, 'M', 'ᅡ'), + (0xFFC3, 'M', 'ᅢ'), + (0xFFC4, 'M', 'ᅣ'), + (0xFFC5, 'M', 'ᅤ'), + (0xFFC6, 'M', 'ᅥ'), + (0xFFC7, 'M', 'ᅦ'), + (0xFFC8, 'X'), + (0xFFCA, 'M', 'ᅧ'), + (0xFFCB, 'M', 'ᅨ'), + (0xFFCC, 'M', 'ᅩ'), + (0xFFCD, 'M', 'ᅪ'), + (0xFFCE, 'M', 'ᅫ'), + (0xFFCF, 'M', 'ᅬ'), + (0xFFD0, 'X'), + (0xFFD2, 'M', 'ᅭ'), + (0xFFD3, 'M', 'ᅮ'), + (0xFFD4, 'M', 'ᅯ'), + (0xFFD5, 'M', 'ᅰ'), + (0xFFD6, 'M', 'ᅱ'), + (0xFFD7, 'M', 'ᅲ'), + (0xFFD8, 'X'), + (0xFFDA, 'M', 'ᅳ'), + (0xFFDB, 'M', 'ᅴ'), + (0xFFDC, 'M', 'ᅵ'), + (0xFFDD, 'X'), + (0xFFE0, 'M', '¢'), + (0xFFE1, 'M', '£'), + (0xFFE2, 'M', '¬'), + (0xFFE3, '3', ' ̄'), + (0xFFE4, 'M', '¦'), + (0xFFE5, 'M', '¥'), + (0xFFE6, 'M', '₩'), + (0xFFE7, 'X'), + (0xFFE8, 'M', '│'), + (0xFFE9, 'M', '←'), + ] + +def _seg_53() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0xFFEA, 'M', '↑'), + (0xFFEB, 'M', '→'), + (0xFFEC, 'M', '↓'), + (0xFFED, 'M', '■'), + (0xFFEE, 'M', '○'), + (0xFFEF, 'X'), + (0x10000, 'V'), + (0x1000C, 'X'), + (0x1000D, 'V'), + (0x10027, 'X'), + (0x10028, 'V'), + (0x1003B, 'X'), + (0x1003C, 'V'), + (0x1003E, 'X'), + (0x1003F, 'V'), + (0x1004E, 'X'), + (0x10050, 'V'), + (0x1005E, 'X'), + (0x10080, 'V'), + (0x100FB, 'X'), + (0x10100, 'V'), + (0x10103, 'X'), + (0x10107, 'V'), + (0x10134, 'X'), + (0x10137, 'V'), + (0x1018F, 'X'), + (0x10190, 'V'), + (0x1019D, 'X'), + (0x101A0, 'V'), + (0x101A1, 'X'), + (0x101D0, 'V'), + (0x101FE, 'X'), + (0x10280, 'V'), + (0x1029D, 'X'), + (0x102A0, 'V'), + (0x102D1, 'X'), + (0x102E0, 'V'), + (0x102FC, 'X'), + (0x10300, 'V'), + (0x10324, 'X'), + (0x1032D, 'V'), + (0x1034B, 'X'), + (0x10350, 'V'), + (0x1037B, 'X'), + (0x10380, 'V'), + (0x1039E, 'X'), + (0x1039F, 'V'), + (0x103C4, 'X'), + (0x103C8, 'V'), + (0x103D6, 'X'), + (0x10400, 'M', '𐐨'), + (0x10401, 'M', '𐐩'), + (0x10402, 'M', '𐐪'), + (0x10403, 'M', '𐐫'), + (0x10404, 'M', '𐐬'), + (0x10405, 'M', '𐐭'), + (0x10406, 'M', '𐐮'), + (0x10407, 'M', '𐐯'), + (0x10408, 'M', '𐐰'), + (0x10409, 'M', '𐐱'), + (0x1040A, 'M', '𐐲'), + (0x1040B, 'M', '𐐳'), + (0x1040C, 'M', '𐐴'), + (0x1040D, 'M', '𐐵'), + (0x1040E, 'M', '𐐶'), + (0x1040F, 'M', '𐐷'), + (0x10410, 'M', '𐐸'), + (0x10411, 'M', '𐐹'), + (0x10412, 'M', '𐐺'), + (0x10413, 'M', '𐐻'), + (0x10414, 'M', '𐐼'), + (0x10415, 'M', '𐐽'), + (0x10416, 'M', '𐐾'), + (0x10417, 'M', '𐐿'), + (0x10418, 'M', '𐑀'), + (0x10419, 'M', '𐑁'), + (0x1041A, 'M', '𐑂'), + (0x1041B, 'M', '𐑃'), + (0x1041C, 'M', '𐑄'), + (0x1041D, 'M', '𐑅'), + (0x1041E, 'M', '𐑆'), + (0x1041F, 'M', '𐑇'), + (0x10420, 'M', '𐑈'), + (0x10421, 'M', '𐑉'), + (0x10422, 'M', '𐑊'), + (0x10423, 'M', '𐑋'), + (0x10424, 'M', '𐑌'), + (0x10425, 'M', '𐑍'), + (0x10426, 'M', '𐑎'), + (0x10427, 'M', '𐑏'), + (0x10428, 'V'), + (0x1049E, 'X'), + (0x104A0, 'V'), + (0x104AA, 'X'), + (0x104B0, 'M', '𐓘'), + (0x104B1, 'M', '𐓙'), + (0x104B2, 'M', '𐓚'), + (0x104B3, 'M', '𐓛'), + (0x104B4, 'M', '𐓜'), + (0x104B5, 'M', '𐓝'), + ] + +def _seg_54() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x104B6, 'M', '𐓞'), + (0x104B7, 'M', '𐓟'), + (0x104B8, 'M', '𐓠'), + (0x104B9, 'M', '𐓡'), + (0x104BA, 'M', '𐓢'), + (0x104BB, 'M', '𐓣'), + (0x104BC, 'M', '𐓤'), + (0x104BD, 'M', '𐓥'), + (0x104BE, 'M', '𐓦'), + (0x104BF, 'M', '𐓧'), + (0x104C0, 'M', '𐓨'), + (0x104C1, 'M', '𐓩'), + (0x104C2, 'M', '𐓪'), + (0x104C3, 'M', '𐓫'), + (0x104C4, 'M', '𐓬'), + (0x104C5, 'M', '𐓭'), + (0x104C6, 'M', '𐓮'), + (0x104C7, 'M', '𐓯'), + (0x104C8, 'M', '𐓰'), + (0x104C9, 'M', '𐓱'), + (0x104CA, 'M', '𐓲'), + (0x104CB, 'M', '𐓳'), + (0x104CC, 'M', '𐓴'), + (0x104CD, 'M', '𐓵'), + (0x104CE, 'M', '𐓶'), + (0x104CF, 'M', '𐓷'), + (0x104D0, 'M', '𐓸'), + (0x104D1, 'M', '𐓹'), + (0x104D2, 'M', '𐓺'), + (0x104D3, 'M', '𐓻'), + (0x104D4, 'X'), + (0x104D8, 'V'), + (0x104FC, 'X'), + (0x10500, 'V'), + (0x10528, 'X'), + (0x10530, 'V'), + (0x10564, 'X'), + (0x1056F, 'V'), + (0x10570, 'M', '𐖗'), + (0x10571, 'M', '𐖘'), + (0x10572, 'M', '𐖙'), + (0x10573, 'M', '𐖚'), + (0x10574, 'M', '𐖛'), + (0x10575, 'M', '𐖜'), + (0x10576, 'M', '𐖝'), + (0x10577, 'M', '𐖞'), + (0x10578, 'M', '𐖟'), + (0x10579, 'M', '𐖠'), + (0x1057A, 'M', '𐖡'), + (0x1057B, 'X'), + (0x1057C, 'M', '𐖣'), + (0x1057D, 'M', '𐖤'), + (0x1057E, 'M', '𐖥'), + (0x1057F, 'M', '𐖦'), + (0x10580, 'M', '𐖧'), + (0x10581, 'M', '𐖨'), + (0x10582, 'M', '𐖩'), + (0x10583, 'M', '𐖪'), + (0x10584, 'M', '𐖫'), + (0x10585, 'M', '𐖬'), + (0x10586, 'M', '𐖭'), + (0x10587, 'M', '𐖮'), + (0x10588, 'M', '𐖯'), + (0x10589, 'M', '𐖰'), + (0x1058A, 'M', '𐖱'), + (0x1058B, 'X'), + (0x1058C, 'M', '𐖳'), + (0x1058D, 'M', '𐖴'), + (0x1058E, 'M', '𐖵'), + (0x1058F, 'M', '𐖶'), + (0x10590, 'M', '𐖷'), + (0x10591, 'M', '𐖸'), + (0x10592, 'M', '𐖹'), + (0x10593, 'X'), + (0x10594, 'M', '𐖻'), + (0x10595, 'M', '𐖼'), + (0x10596, 'X'), + (0x10597, 'V'), + (0x105A2, 'X'), + (0x105A3, 'V'), + (0x105B2, 'X'), + (0x105B3, 'V'), + (0x105BA, 'X'), + (0x105BB, 'V'), + (0x105BD, 'X'), + (0x10600, 'V'), + (0x10737, 'X'), + (0x10740, 'V'), + (0x10756, 'X'), + (0x10760, 'V'), + (0x10768, 'X'), + (0x10780, 'V'), + (0x10781, 'M', 'ː'), + (0x10782, 'M', 'ˑ'), + (0x10783, 'M', 'æ'), + (0x10784, 'M', 'ʙ'), + (0x10785, 'M', 'ɓ'), + (0x10786, 'X'), + (0x10787, 'M', 'ʣ'), + (0x10788, 'M', 'ꭦ'), + ] + +def _seg_55() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x10789, 'M', 'ʥ'), + (0x1078A, 'M', 'ʤ'), + (0x1078B, 'M', 'ɖ'), + (0x1078C, 'M', 'ɗ'), + (0x1078D, 'M', 'ᶑ'), + (0x1078E, 'M', 'ɘ'), + (0x1078F, 'M', 'ɞ'), + (0x10790, 'M', 'ʩ'), + (0x10791, 'M', 'ɤ'), + (0x10792, 'M', 'ɢ'), + (0x10793, 'M', 'ɠ'), + (0x10794, 'M', 'ʛ'), + (0x10795, 'M', 'ħ'), + (0x10796, 'M', 'ʜ'), + (0x10797, 'M', 'ɧ'), + (0x10798, 'M', 'ʄ'), + (0x10799, 'M', 'ʪ'), + (0x1079A, 'M', 'ʫ'), + (0x1079B, 'M', 'ɬ'), + (0x1079C, 'M', '𝼄'), + (0x1079D, 'M', 'ꞎ'), + (0x1079E, 'M', 'ɮ'), + (0x1079F, 'M', '𝼅'), + (0x107A0, 'M', 'ʎ'), + (0x107A1, 'M', '𝼆'), + (0x107A2, 'M', 'ø'), + (0x107A3, 'M', 'ɶ'), + (0x107A4, 'M', 'ɷ'), + (0x107A5, 'M', 'q'), + (0x107A6, 'M', 'ɺ'), + (0x107A7, 'M', '𝼈'), + (0x107A8, 'M', 'ɽ'), + (0x107A9, 'M', 'ɾ'), + (0x107AA, 'M', 'ʀ'), + (0x107AB, 'M', 'ʨ'), + (0x107AC, 'M', 'ʦ'), + (0x107AD, 'M', 'ꭧ'), + (0x107AE, 'M', 'ʧ'), + (0x107AF, 'M', 'ʈ'), + (0x107B0, 'M', 'ⱱ'), + (0x107B1, 'X'), + (0x107B2, 'M', 'ʏ'), + (0x107B3, 'M', 'ʡ'), + (0x107B4, 'M', 'ʢ'), + (0x107B5, 'M', 'ʘ'), + (0x107B6, 'M', 'ǀ'), + (0x107B7, 'M', 'ǁ'), + (0x107B8, 'M', 'ǂ'), + (0x107B9, 'M', '𝼊'), + (0x107BA, 'M', '𝼞'), + (0x107BB, 'X'), + (0x10800, 'V'), + (0x10806, 'X'), + (0x10808, 'V'), + (0x10809, 'X'), + (0x1080A, 'V'), + (0x10836, 'X'), + (0x10837, 'V'), + (0x10839, 'X'), + (0x1083C, 'V'), + (0x1083D, 'X'), + (0x1083F, 'V'), + (0x10856, 'X'), + (0x10857, 'V'), + (0x1089F, 'X'), + (0x108A7, 'V'), + (0x108B0, 'X'), + (0x108E0, 'V'), + (0x108F3, 'X'), + (0x108F4, 'V'), + (0x108F6, 'X'), + (0x108FB, 'V'), + (0x1091C, 'X'), + (0x1091F, 'V'), + (0x1093A, 'X'), + (0x1093F, 'V'), + (0x10940, 'X'), + (0x10980, 'V'), + (0x109B8, 'X'), + (0x109BC, 'V'), + (0x109D0, 'X'), + (0x109D2, 'V'), + (0x10A04, 'X'), + (0x10A05, 'V'), + (0x10A07, 'X'), + (0x10A0C, 'V'), + (0x10A14, 'X'), + (0x10A15, 'V'), + (0x10A18, 'X'), + (0x10A19, 'V'), + (0x10A36, 'X'), + (0x10A38, 'V'), + (0x10A3B, 'X'), + (0x10A3F, 'V'), + (0x10A49, 'X'), + (0x10A50, 'V'), + (0x10A59, 'X'), + (0x10A60, 'V'), + (0x10AA0, 'X'), + (0x10AC0, 'V'), + ] + +def _seg_56() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x10AE7, 'X'), + (0x10AEB, 'V'), + (0x10AF7, 'X'), + (0x10B00, 'V'), + (0x10B36, 'X'), + (0x10B39, 'V'), + (0x10B56, 'X'), + (0x10B58, 'V'), + (0x10B73, 'X'), + (0x10B78, 'V'), + (0x10B92, 'X'), + (0x10B99, 'V'), + (0x10B9D, 'X'), + (0x10BA9, 'V'), + (0x10BB0, 'X'), + (0x10C00, 'V'), + (0x10C49, 'X'), + (0x10C80, 'M', '𐳀'), + (0x10C81, 'M', '𐳁'), + (0x10C82, 'M', '𐳂'), + (0x10C83, 'M', '𐳃'), + (0x10C84, 'M', '𐳄'), + (0x10C85, 'M', '𐳅'), + (0x10C86, 'M', '𐳆'), + (0x10C87, 'M', '𐳇'), + (0x10C88, 'M', '𐳈'), + (0x10C89, 'M', '𐳉'), + (0x10C8A, 'M', '𐳊'), + (0x10C8B, 'M', '𐳋'), + (0x10C8C, 'M', '𐳌'), + (0x10C8D, 'M', '𐳍'), + (0x10C8E, 'M', '𐳎'), + (0x10C8F, 'M', '𐳏'), + (0x10C90, 'M', '𐳐'), + (0x10C91, 'M', '𐳑'), + (0x10C92, 'M', '𐳒'), + (0x10C93, 'M', '𐳓'), + (0x10C94, 'M', '𐳔'), + (0x10C95, 'M', '𐳕'), + (0x10C96, 'M', '𐳖'), + (0x10C97, 'M', '𐳗'), + (0x10C98, 'M', '𐳘'), + (0x10C99, 'M', '𐳙'), + (0x10C9A, 'M', '𐳚'), + (0x10C9B, 'M', '𐳛'), + (0x10C9C, 'M', '𐳜'), + (0x10C9D, 'M', '𐳝'), + (0x10C9E, 'M', '𐳞'), + (0x10C9F, 'M', '𐳟'), + (0x10CA0, 'M', '𐳠'), + (0x10CA1, 'M', '𐳡'), + (0x10CA2, 'M', '𐳢'), + (0x10CA3, 'M', '𐳣'), + (0x10CA4, 'M', '𐳤'), + (0x10CA5, 'M', '𐳥'), + (0x10CA6, 'M', '𐳦'), + (0x10CA7, 'M', '𐳧'), + (0x10CA8, 'M', '𐳨'), + (0x10CA9, 'M', '𐳩'), + (0x10CAA, 'M', '𐳪'), + (0x10CAB, 'M', '𐳫'), + (0x10CAC, 'M', '𐳬'), + (0x10CAD, 'M', '𐳭'), + (0x10CAE, 'M', '𐳮'), + (0x10CAF, 'M', '𐳯'), + (0x10CB0, 'M', '𐳰'), + (0x10CB1, 'M', '𐳱'), + (0x10CB2, 'M', '𐳲'), + (0x10CB3, 'X'), + (0x10CC0, 'V'), + (0x10CF3, 'X'), + (0x10CFA, 'V'), + (0x10D28, 'X'), + (0x10D30, 'V'), + (0x10D3A, 'X'), + (0x10E60, 'V'), + (0x10E7F, 'X'), + (0x10E80, 'V'), + (0x10EAA, 'X'), + (0x10EAB, 'V'), + (0x10EAE, 'X'), + (0x10EB0, 'V'), + (0x10EB2, 'X'), + (0x10F00, 'V'), + (0x10F28, 'X'), + (0x10F30, 'V'), + (0x10F5A, 'X'), + (0x10F70, 'V'), + (0x10F8A, 'X'), + (0x10FB0, 'V'), + (0x10FCC, 'X'), + (0x10FE0, 'V'), + (0x10FF7, 'X'), + (0x11000, 'V'), + (0x1104E, 'X'), + (0x11052, 'V'), + (0x11076, 'X'), + (0x1107F, 'V'), + (0x110BD, 'X'), + (0x110BE, 'V'), + ] + +def _seg_57() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x110C3, 'X'), + (0x110D0, 'V'), + (0x110E9, 'X'), + (0x110F0, 'V'), + (0x110FA, 'X'), + (0x11100, 'V'), + (0x11135, 'X'), + (0x11136, 'V'), + (0x11148, 'X'), + (0x11150, 'V'), + (0x11177, 'X'), + (0x11180, 'V'), + (0x111E0, 'X'), + (0x111E1, 'V'), + (0x111F5, 'X'), + (0x11200, 'V'), + (0x11212, 'X'), + (0x11213, 'V'), + (0x1123F, 'X'), + (0x11280, 'V'), + (0x11287, 'X'), + (0x11288, 'V'), + (0x11289, 'X'), + (0x1128A, 'V'), + (0x1128E, 'X'), + (0x1128F, 'V'), + (0x1129E, 'X'), + (0x1129F, 'V'), + (0x112AA, 'X'), + (0x112B0, 'V'), + (0x112EB, 'X'), + (0x112F0, 'V'), + (0x112FA, 'X'), + (0x11300, 'V'), + (0x11304, 'X'), + (0x11305, 'V'), + (0x1130D, 'X'), + (0x1130F, 'V'), + (0x11311, 'X'), + (0x11313, 'V'), + (0x11329, 'X'), + (0x1132A, 'V'), + (0x11331, 'X'), + (0x11332, 'V'), + (0x11334, 'X'), + (0x11335, 'V'), + (0x1133A, 'X'), + (0x1133B, 'V'), + (0x11345, 'X'), + (0x11347, 'V'), + (0x11349, 'X'), + (0x1134B, 'V'), + (0x1134E, 'X'), + (0x11350, 'V'), + (0x11351, 'X'), + (0x11357, 'V'), + (0x11358, 'X'), + (0x1135D, 'V'), + (0x11364, 'X'), + (0x11366, 'V'), + (0x1136D, 'X'), + (0x11370, 'V'), + (0x11375, 'X'), + (0x11400, 'V'), + (0x1145C, 'X'), + (0x1145D, 'V'), + (0x11462, 'X'), + (0x11480, 'V'), + (0x114C8, 'X'), + (0x114D0, 'V'), + (0x114DA, 'X'), + (0x11580, 'V'), + (0x115B6, 'X'), + (0x115B8, 'V'), + (0x115DE, 'X'), + (0x11600, 'V'), + (0x11645, 'X'), + (0x11650, 'V'), + (0x1165A, 'X'), + (0x11660, 'V'), + (0x1166D, 'X'), + (0x11680, 'V'), + (0x116BA, 'X'), + (0x116C0, 'V'), + (0x116CA, 'X'), + (0x11700, 'V'), + (0x1171B, 'X'), + (0x1171D, 'V'), + (0x1172C, 'X'), + (0x11730, 'V'), + (0x11747, 'X'), + (0x11800, 'V'), + (0x1183C, 'X'), + (0x118A0, 'M', '𑣀'), + (0x118A1, 'M', '𑣁'), + (0x118A2, 'M', '𑣂'), + (0x118A3, 'M', '𑣃'), + (0x118A4, 'M', '𑣄'), + (0x118A5, 'M', '𑣅'), + (0x118A6, 'M', '𑣆'), + ] + +def _seg_58() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x118A7, 'M', '𑣇'), + (0x118A8, 'M', '𑣈'), + (0x118A9, 'M', '𑣉'), + (0x118AA, 'M', '𑣊'), + (0x118AB, 'M', '𑣋'), + (0x118AC, 'M', '𑣌'), + (0x118AD, 'M', '𑣍'), + (0x118AE, 'M', '𑣎'), + (0x118AF, 'M', '𑣏'), + (0x118B0, 'M', '𑣐'), + (0x118B1, 'M', '𑣑'), + (0x118B2, 'M', '𑣒'), + (0x118B3, 'M', '𑣓'), + (0x118B4, 'M', '𑣔'), + (0x118B5, 'M', '𑣕'), + (0x118B6, 'M', '𑣖'), + (0x118B7, 'M', '𑣗'), + (0x118B8, 'M', '𑣘'), + (0x118B9, 'M', '𑣙'), + (0x118BA, 'M', '𑣚'), + (0x118BB, 'M', '𑣛'), + (0x118BC, 'M', '𑣜'), + (0x118BD, 'M', '𑣝'), + (0x118BE, 'M', '𑣞'), + (0x118BF, 'M', '𑣟'), + (0x118C0, 'V'), + (0x118F3, 'X'), + (0x118FF, 'V'), + (0x11907, 'X'), + (0x11909, 'V'), + (0x1190A, 'X'), + (0x1190C, 'V'), + (0x11914, 'X'), + (0x11915, 'V'), + (0x11917, 'X'), + (0x11918, 'V'), + (0x11936, 'X'), + (0x11937, 'V'), + (0x11939, 'X'), + (0x1193B, 'V'), + (0x11947, 'X'), + (0x11950, 'V'), + (0x1195A, 'X'), + (0x119A0, 'V'), + (0x119A8, 'X'), + (0x119AA, 'V'), + (0x119D8, 'X'), + (0x119DA, 'V'), + (0x119E5, 'X'), + (0x11A00, 'V'), + (0x11A48, 'X'), + (0x11A50, 'V'), + (0x11AA3, 'X'), + (0x11AB0, 'V'), + (0x11AF9, 'X'), + (0x11C00, 'V'), + (0x11C09, 'X'), + (0x11C0A, 'V'), + (0x11C37, 'X'), + (0x11C38, 'V'), + (0x11C46, 'X'), + (0x11C50, 'V'), + (0x11C6D, 'X'), + (0x11C70, 'V'), + (0x11C90, 'X'), + (0x11C92, 'V'), + (0x11CA8, 'X'), + (0x11CA9, 'V'), + (0x11CB7, 'X'), + (0x11D00, 'V'), + (0x11D07, 'X'), + (0x11D08, 'V'), + (0x11D0A, 'X'), + (0x11D0B, 'V'), + (0x11D37, 'X'), + (0x11D3A, 'V'), + (0x11D3B, 'X'), + (0x11D3C, 'V'), + (0x11D3E, 'X'), + (0x11D3F, 'V'), + (0x11D48, 'X'), + (0x11D50, 'V'), + (0x11D5A, 'X'), + (0x11D60, 'V'), + (0x11D66, 'X'), + (0x11D67, 'V'), + (0x11D69, 'X'), + (0x11D6A, 'V'), + (0x11D8F, 'X'), + (0x11D90, 'V'), + (0x11D92, 'X'), + (0x11D93, 'V'), + (0x11D99, 'X'), + (0x11DA0, 'V'), + (0x11DAA, 'X'), + (0x11EE0, 'V'), + (0x11EF9, 'X'), + (0x11FB0, 'V'), + (0x11FB1, 'X'), + (0x11FC0, 'V'), + ] + +def _seg_59() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x11FF2, 'X'), + (0x11FFF, 'V'), + (0x1239A, 'X'), + (0x12400, 'V'), + (0x1246F, 'X'), + (0x12470, 'V'), + (0x12475, 'X'), + (0x12480, 'V'), + (0x12544, 'X'), + (0x12F90, 'V'), + (0x12FF3, 'X'), + (0x13000, 'V'), + (0x1342F, 'X'), + (0x14400, 'V'), + (0x14647, 'X'), + (0x16800, 'V'), + (0x16A39, 'X'), + (0x16A40, 'V'), + (0x16A5F, 'X'), + (0x16A60, 'V'), + (0x16A6A, 'X'), + (0x16A6E, 'V'), + (0x16ABF, 'X'), + (0x16AC0, 'V'), + (0x16ACA, 'X'), + (0x16AD0, 'V'), + (0x16AEE, 'X'), + (0x16AF0, 'V'), + (0x16AF6, 'X'), + (0x16B00, 'V'), + (0x16B46, 'X'), + (0x16B50, 'V'), + (0x16B5A, 'X'), + (0x16B5B, 'V'), + (0x16B62, 'X'), + (0x16B63, 'V'), + (0x16B78, 'X'), + (0x16B7D, 'V'), + (0x16B90, 'X'), + (0x16E40, 'M', '𖹠'), + (0x16E41, 'M', '𖹡'), + (0x16E42, 'M', '𖹢'), + (0x16E43, 'M', '𖹣'), + (0x16E44, 'M', '𖹤'), + (0x16E45, 'M', '𖹥'), + (0x16E46, 'M', '𖹦'), + (0x16E47, 'M', '𖹧'), + (0x16E48, 'M', '𖹨'), + (0x16E49, 'M', '𖹩'), + (0x16E4A, 'M', '𖹪'), + (0x16E4B, 'M', '𖹫'), + (0x16E4C, 'M', '𖹬'), + (0x16E4D, 'M', '𖹭'), + (0x16E4E, 'M', '𖹮'), + (0x16E4F, 'M', '𖹯'), + (0x16E50, 'M', '𖹰'), + (0x16E51, 'M', '𖹱'), + (0x16E52, 'M', '𖹲'), + (0x16E53, 'M', '𖹳'), + (0x16E54, 'M', '𖹴'), + (0x16E55, 'M', '𖹵'), + (0x16E56, 'M', '𖹶'), + (0x16E57, 'M', '𖹷'), + (0x16E58, 'M', '𖹸'), + (0x16E59, 'M', '𖹹'), + (0x16E5A, 'M', '𖹺'), + (0x16E5B, 'M', '𖹻'), + (0x16E5C, 'M', '𖹼'), + (0x16E5D, 'M', '𖹽'), + (0x16E5E, 'M', '𖹾'), + (0x16E5F, 'M', '𖹿'), + (0x16E60, 'V'), + (0x16E9B, 'X'), + (0x16F00, 'V'), + (0x16F4B, 'X'), + (0x16F4F, 'V'), + (0x16F88, 'X'), + (0x16F8F, 'V'), + (0x16FA0, 'X'), + (0x16FE0, 'V'), + (0x16FE5, 'X'), + (0x16FF0, 'V'), + (0x16FF2, 'X'), + (0x17000, 'V'), + (0x187F8, 'X'), + (0x18800, 'V'), + (0x18CD6, 'X'), + (0x18D00, 'V'), + (0x18D09, 'X'), + (0x1AFF0, 'V'), + (0x1AFF4, 'X'), + (0x1AFF5, 'V'), + (0x1AFFC, 'X'), + (0x1AFFD, 'V'), + (0x1AFFF, 'X'), + (0x1B000, 'V'), + (0x1B123, 'X'), + (0x1B150, 'V'), + (0x1B153, 'X'), + (0x1B164, 'V'), + ] + +def _seg_60() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1B168, 'X'), + (0x1B170, 'V'), + (0x1B2FC, 'X'), + (0x1BC00, 'V'), + (0x1BC6B, 'X'), + (0x1BC70, 'V'), + (0x1BC7D, 'X'), + (0x1BC80, 'V'), + (0x1BC89, 'X'), + (0x1BC90, 'V'), + (0x1BC9A, 'X'), + (0x1BC9C, 'V'), + (0x1BCA0, 'I'), + (0x1BCA4, 'X'), + (0x1CF00, 'V'), + (0x1CF2E, 'X'), + (0x1CF30, 'V'), + (0x1CF47, 'X'), + (0x1CF50, 'V'), + (0x1CFC4, 'X'), + (0x1D000, 'V'), + (0x1D0F6, 'X'), + (0x1D100, 'V'), + (0x1D127, 'X'), + (0x1D129, 'V'), + (0x1D15E, 'M', '𝅗𝅥'), + (0x1D15F, 'M', '𝅘𝅥'), + (0x1D160, 'M', '𝅘𝅥𝅮'), + (0x1D161, 'M', '𝅘𝅥𝅯'), + (0x1D162, 'M', '𝅘𝅥𝅰'), + (0x1D163, 'M', '𝅘𝅥𝅱'), + (0x1D164, 'M', '𝅘𝅥𝅲'), + (0x1D165, 'V'), + (0x1D173, 'X'), + (0x1D17B, 'V'), + (0x1D1BB, 'M', '𝆹𝅥'), + (0x1D1BC, 'M', '𝆺𝅥'), + (0x1D1BD, 'M', '𝆹𝅥𝅮'), + (0x1D1BE, 'M', '𝆺𝅥𝅮'), + (0x1D1BF, 'M', '𝆹𝅥𝅯'), + (0x1D1C0, 'M', '𝆺𝅥𝅯'), + (0x1D1C1, 'V'), + (0x1D1EB, 'X'), + (0x1D200, 'V'), + (0x1D246, 'X'), + (0x1D2E0, 'V'), + (0x1D2F4, 'X'), + (0x1D300, 'V'), + (0x1D357, 'X'), + (0x1D360, 'V'), + (0x1D379, 'X'), + (0x1D400, 'M', 'a'), + (0x1D401, 'M', 'b'), + (0x1D402, 'M', 'c'), + (0x1D403, 'M', 'd'), + (0x1D404, 'M', 'e'), + (0x1D405, 'M', 'f'), + (0x1D406, 'M', 'g'), + (0x1D407, 'M', 'h'), + (0x1D408, 'M', 'i'), + (0x1D409, 'M', 'j'), + (0x1D40A, 'M', 'k'), + (0x1D40B, 'M', 'l'), + (0x1D40C, 'M', 'm'), + (0x1D40D, 'M', 'n'), + (0x1D40E, 'M', 'o'), + (0x1D40F, 'M', 'p'), + (0x1D410, 'M', 'q'), + (0x1D411, 'M', 'r'), + (0x1D412, 'M', 's'), + (0x1D413, 'M', 't'), + (0x1D414, 'M', 'u'), + (0x1D415, 'M', 'v'), + (0x1D416, 'M', 'w'), + (0x1D417, 'M', 'x'), + (0x1D418, 'M', 'y'), + (0x1D419, 'M', 'z'), + (0x1D41A, 'M', 'a'), + (0x1D41B, 'M', 'b'), + (0x1D41C, 'M', 'c'), + (0x1D41D, 'M', 'd'), + (0x1D41E, 'M', 'e'), + (0x1D41F, 'M', 'f'), + (0x1D420, 'M', 'g'), + (0x1D421, 'M', 'h'), + (0x1D422, 'M', 'i'), + (0x1D423, 'M', 'j'), + (0x1D424, 'M', 'k'), + (0x1D425, 'M', 'l'), + (0x1D426, 'M', 'm'), + (0x1D427, 'M', 'n'), + (0x1D428, 'M', 'o'), + (0x1D429, 'M', 'p'), + (0x1D42A, 'M', 'q'), + (0x1D42B, 'M', 'r'), + (0x1D42C, 'M', 's'), + (0x1D42D, 'M', 't'), + (0x1D42E, 'M', 'u'), + (0x1D42F, 'M', 'v'), + (0x1D430, 'M', 'w'), + ] + +def _seg_61() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D431, 'M', 'x'), + (0x1D432, 'M', 'y'), + (0x1D433, 'M', 'z'), + (0x1D434, 'M', 'a'), + (0x1D435, 'M', 'b'), + (0x1D436, 'M', 'c'), + (0x1D437, 'M', 'd'), + (0x1D438, 'M', 'e'), + (0x1D439, 'M', 'f'), + (0x1D43A, 'M', 'g'), + (0x1D43B, 'M', 'h'), + (0x1D43C, 'M', 'i'), + (0x1D43D, 'M', 'j'), + (0x1D43E, 'M', 'k'), + (0x1D43F, 'M', 'l'), + (0x1D440, 'M', 'm'), + (0x1D441, 'M', 'n'), + (0x1D442, 'M', 'o'), + (0x1D443, 'M', 'p'), + (0x1D444, 'M', 'q'), + (0x1D445, 'M', 'r'), + (0x1D446, 'M', 's'), + (0x1D447, 'M', 't'), + (0x1D448, 'M', 'u'), + (0x1D449, 'M', 'v'), + (0x1D44A, 'M', 'w'), + (0x1D44B, 'M', 'x'), + (0x1D44C, 'M', 'y'), + (0x1D44D, 'M', 'z'), + (0x1D44E, 'M', 'a'), + (0x1D44F, 'M', 'b'), + (0x1D450, 'M', 'c'), + (0x1D451, 'M', 'd'), + (0x1D452, 'M', 'e'), + (0x1D453, 'M', 'f'), + (0x1D454, 'M', 'g'), + (0x1D455, 'X'), + (0x1D456, 'M', 'i'), + (0x1D457, 'M', 'j'), + (0x1D458, 'M', 'k'), + (0x1D459, 'M', 'l'), + (0x1D45A, 'M', 'm'), + (0x1D45B, 'M', 'n'), + (0x1D45C, 'M', 'o'), + (0x1D45D, 'M', 'p'), + (0x1D45E, 'M', 'q'), + (0x1D45F, 'M', 'r'), + (0x1D460, 'M', 's'), + (0x1D461, 'M', 't'), + (0x1D462, 'M', 'u'), + (0x1D463, 'M', 'v'), + (0x1D464, 'M', 'w'), + (0x1D465, 'M', 'x'), + (0x1D466, 'M', 'y'), + (0x1D467, 'M', 'z'), + (0x1D468, 'M', 'a'), + (0x1D469, 'M', 'b'), + (0x1D46A, 'M', 'c'), + (0x1D46B, 'M', 'd'), + (0x1D46C, 'M', 'e'), + (0x1D46D, 'M', 'f'), + (0x1D46E, 'M', 'g'), + (0x1D46F, 'M', 'h'), + (0x1D470, 'M', 'i'), + (0x1D471, 'M', 'j'), + (0x1D472, 'M', 'k'), + (0x1D473, 'M', 'l'), + (0x1D474, 'M', 'm'), + (0x1D475, 'M', 'n'), + (0x1D476, 'M', 'o'), + (0x1D477, 'M', 'p'), + (0x1D478, 'M', 'q'), + (0x1D479, 'M', 'r'), + (0x1D47A, 'M', 's'), + (0x1D47B, 'M', 't'), + (0x1D47C, 'M', 'u'), + (0x1D47D, 'M', 'v'), + (0x1D47E, 'M', 'w'), + (0x1D47F, 'M', 'x'), + (0x1D480, 'M', 'y'), + (0x1D481, 'M', 'z'), + (0x1D482, 'M', 'a'), + (0x1D483, 'M', 'b'), + (0x1D484, 'M', 'c'), + (0x1D485, 'M', 'd'), + (0x1D486, 'M', 'e'), + (0x1D487, 'M', 'f'), + (0x1D488, 'M', 'g'), + (0x1D489, 'M', 'h'), + (0x1D48A, 'M', 'i'), + (0x1D48B, 'M', 'j'), + (0x1D48C, 'M', 'k'), + (0x1D48D, 'M', 'l'), + (0x1D48E, 'M', 'm'), + (0x1D48F, 'M', 'n'), + (0x1D490, 'M', 'o'), + (0x1D491, 'M', 'p'), + (0x1D492, 'M', 'q'), + (0x1D493, 'M', 'r'), + (0x1D494, 'M', 's'), + ] + +def _seg_62() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D495, 'M', 't'), + (0x1D496, 'M', 'u'), + (0x1D497, 'M', 'v'), + (0x1D498, 'M', 'w'), + (0x1D499, 'M', 'x'), + (0x1D49A, 'M', 'y'), + (0x1D49B, 'M', 'z'), + (0x1D49C, 'M', 'a'), + (0x1D49D, 'X'), + (0x1D49E, 'M', 'c'), + (0x1D49F, 'M', 'd'), + (0x1D4A0, 'X'), + (0x1D4A2, 'M', 'g'), + (0x1D4A3, 'X'), + (0x1D4A5, 'M', 'j'), + (0x1D4A6, 'M', 'k'), + (0x1D4A7, 'X'), + (0x1D4A9, 'M', 'n'), + (0x1D4AA, 'M', 'o'), + (0x1D4AB, 'M', 'p'), + (0x1D4AC, 'M', 'q'), + (0x1D4AD, 'X'), + (0x1D4AE, 'M', 's'), + (0x1D4AF, 'M', 't'), + (0x1D4B0, 'M', 'u'), + (0x1D4B1, 'M', 'v'), + (0x1D4B2, 'M', 'w'), + (0x1D4B3, 'M', 'x'), + (0x1D4B4, 'M', 'y'), + (0x1D4B5, 'M', 'z'), + (0x1D4B6, 'M', 'a'), + (0x1D4B7, 'M', 'b'), + (0x1D4B8, 'M', 'c'), + (0x1D4B9, 'M', 'd'), + (0x1D4BA, 'X'), + (0x1D4BB, 'M', 'f'), + (0x1D4BC, 'X'), + (0x1D4BD, 'M', 'h'), + (0x1D4BE, 'M', 'i'), + (0x1D4BF, 'M', 'j'), + (0x1D4C0, 'M', 'k'), + (0x1D4C1, 'M', 'l'), + (0x1D4C2, 'M', 'm'), + (0x1D4C3, 'M', 'n'), + (0x1D4C4, 'X'), + (0x1D4C5, 'M', 'p'), + (0x1D4C6, 'M', 'q'), + (0x1D4C7, 'M', 'r'), + (0x1D4C8, 'M', 's'), + (0x1D4C9, 'M', 't'), + (0x1D4CA, 'M', 'u'), + (0x1D4CB, 'M', 'v'), + (0x1D4CC, 'M', 'w'), + (0x1D4CD, 'M', 'x'), + (0x1D4CE, 'M', 'y'), + (0x1D4CF, 'M', 'z'), + (0x1D4D0, 'M', 'a'), + (0x1D4D1, 'M', 'b'), + (0x1D4D2, 'M', 'c'), + (0x1D4D3, 'M', 'd'), + (0x1D4D4, 'M', 'e'), + (0x1D4D5, 'M', 'f'), + (0x1D4D6, 'M', 'g'), + (0x1D4D7, 'M', 'h'), + (0x1D4D8, 'M', 'i'), + (0x1D4D9, 'M', 'j'), + (0x1D4DA, 'M', 'k'), + (0x1D4DB, 'M', 'l'), + (0x1D4DC, 'M', 'm'), + (0x1D4DD, 'M', 'n'), + (0x1D4DE, 'M', 'o'), + (0x1D4DF, 'M', 'p'), + (0x1D4E0, 'M', 'q'), + (0x1D4E1, 'M', 'r'), + (0x1D4E2, 'M', 's'), + (0x1D4E3, 'M', 't'), + (0x1D4E4, 'M', 'u'), + (0x1D4E5, 'M', 'v'), + (0x1D4E6, 'M', 'w'), + (0x1D4E7, 'M', 'x'), + (0x1D4E8, 'M', 'y'), + (0x1D4E9, 'M', 'z'), + (0x1D4EA, 'M', 'a'), + (0x1D4EB, 'M', 'b'), + (0x1D4EC, 'M', 'c'), + (0x1D4ED, 'M', 'd'), + (0x1D4EE, 'M', 'e'), + (0x1D4EF, 'M', 'f'), + (0x1D4F0, 'M', 'g'), + (0x1D4F1, 'M', 'h'), + (0x1D4F2, 'M', 'i'), + (0x1D4F3, 'M', 'j'), + (0x1D4F4, 'M', 'k'), + (0x1D4F5, 'M', 'l'), + (0x1D4F6, 'M', 'm'), + (0x1D4F7, 'M', 'n'), + (0x1D4F8, 'M', 'o'), + (0x1D4F9, 'M', 'p'), + (0x1D4FA, 'M', 'q'), + (0x1D4FB, 'M', 'r'), + ] + +def _seg_63() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D4FC, 'M', 's'), + (0x1D4FD, 'M', 't'), + (0x1D4FE, 'M', 'u'), + (0x1D4FF, 'M', 'v'), + (0x1D500, 'M', 'w'), + (0x1D501, 'M', 'x'), + (0x1D502, 'M', 'y'), + (0x1D503, 'M', 'z'), + (0x1D504, 'M', 'a'), + (0x1D505, 'M', 'b'), + (0x1D506, 'X'), + (0x1D507, 'M', 'd'), + (0x1D508, 'M', 'e'), + (0x1D509, 'M', 'f'), + (0x1D50A, 'M', 'g'), + (0x1D50B, 'X'), + (0x1D50D, 'M', 'j'), + (0x1D50E, 'M', 'k'), + (0x1D50F, 'M', 'l'), + (0x1D510, 'M', 'm'), + (0x1D511, 'M', 'n'), + (0x1D512, 'M', 'o'), + (0x1D513, 'M', 'p'), + (0x1D514, 'M', 'q'), + (0x1D515, 'X'), + (0x1D516, 'M', 's'), + (0x1D517, 'M', 't'), + (0x1D518, 'M', 'u'), + (0x1D519, 'M', 'v'), + (0x1D51A, 'M', 'w'), + (0x1D51B, 'M', 'x'), + (0x1D51C, 'M', 'y'), + (0x1D51D, 'X'), + (0x1D51E, 'M', 'a'), + (0x1D51F, 'M', 'b'), + (0x1D520, 'M', 'c'), + (0x1D521, 'M', 'd'), + (0x1D522, 'M', 'e'), + (0x1D523, 'M', 'f'), + (0x1D524, 'M', 'g'), + (0x1D525, 'M', 'h'), + (0x1D526, 'M', 'i'), + (0x1D527, 'M', 'j'), + (0x1D528, 'M', 'k'), + (0x1D529, 'M', 'l'), + (0x1D52A, 'M', 'm'), + (0x1D52B, 'M', 'n'), + (0x1D52C, 'M', 'o'), + (0x1D52D, 'M', 'p'), + (0x1D52E, 'M', 'q'), + (0x1D52F, 'M', 'r'), + (0x1D530, 'M', 's'), + (0x1D531, 'M', 't'), + (0x1D532, 'M', 'u'), + (0x1D533, 'M', 'v'), + (0x1D534, 'M', 'w'), + (0x1D535, 'M', 'x'), + (0x1D536, 'M', 'y'), + (0x1D537, 'M', 'z'), + (0x1D538, 'M', 'a'), + (0x1D539, 'M', 'b'), + (0x1D53A, 'X'), + (0x1D53B, 'M', 'd'), + (0x1D53C, 'M', 'e'), + (0x1D53D, 'M', 'f'), + (0x1D53E, 'M', 'g'), + (0x1D53F, 'X'), + (0x1D540, 'M', 'i'), + (0x1D541, 'M', 'j'), + (0x1D542, 'M', 'k'), + (0x1D543, 'M', 'l'), + (0x1D544, 'M', 'm'), + (0x1D545, 'X'), + (0x1D546, 'M', 'o'), + (0x1D547, 'X'), + (0x1D54A, 'M', 's'), + (0x1D54B, 'M', 't'), + (0x1D54C, 'M', 'u'), + (0x1D54D, 'M', 'v'), + (0x1D54E, 'M', 'w'), + (0x1D54F, 'M', 'x'), + (0x1D550, 'M', 'y'), + (0x1D551, 'X'), + (0x1D552, 'M', 'a'), + (0x1D553, 'M', 'b'), + (0x1D554, 'M', 'c'), + (0x1D555, 'M', 'd'), + (0x1D556, 'M', 'e'), + (0x1D557, 'M', 'f'), + (0x1D558, 'M', 'g'), + (0x1D559, 'M', 'h'), + (0x1D55A, 'M', 'i'), + (0x1D55B, 'M', 'j'), + (0x1D55C, 'M', 'k'), + (0x1D55D, 'M', 'l'), + (0x1D55E, 'M', 'm'), + (0x1D55F, 'M', 'n'), + (0x1D560, 'M', 'o'), + (0x1D561, 'M', 'p'), + (0x1D562, 'M', 'q'), + ] + +def _seg_64() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D563, 'M', 'r'), + (0x1D564, 'M', 's'), + (0x1D565, 'M', 't'), + (0x1D566, 'M', 'u'), + (0x1D567, 'M', 'v'), + (0x1D568, 'M', 'w'), + (0x1D569, 'M', 'x'), + (0x1D56A, 'M', 'y'), + (0x1D56B, 'M', 'z'), + (0x1D56C, 'M', 'a'), + (0x1D56D, 'M', 'b'), + (0x1D56E, 'M', 'c'), + (0x1D56F, 'M', 'd'), + (0x1D570, 'M', 'e'), + (0x1D571, 'M', 'f'), + (0x1D572, 'M', 'g'), + (0x1D573, 'M', 'h'), + (0x1D574, 'M', 'i'), + (0x1D575, 'M', 'j'), + (0x1D576, 'M', 'k'), + (0x1D577, 'M', 'l'), + (0x1D578, 'M', 'm'), + (0x1D579, 'M', 'n'), + (0x1D57A, 'M', 'o'), + (0x1D57B, 'M', 'p'), + (0x1D57C, 'M', 'q'), + (0x1D57D, 'M', 'r'), + (0x1D57E, 'M', 's'), + (0x1D57F, 'M', 't'), + (0x1D580, 'M', 'u'), + (0x1D581, 'M', 'v'), + (0x1D582, 'M', 'w'), + (0x1D583, 'M', 'x'), + (0x1D584, 'M', 'y'), + (0x1D585, 'M', 'z'), + (0x1D586, 'M', 'a'), + (0x1D587, 'M', 'b'), + (0x1D588, 'M', 'c'), + (0x1D589, 'M', 'd'), + (0x1D58A, 'M', 'e'), + (0x1D58B, 'M', 'f'), + (0x1D58C, 'M', 'g'), + (0x1D58D, 'M', 'h'), + (0x1D58E, 'M', 'i'), + (0x1D58F, 'M', 'j'), + (0x1D590, 'M', 'k'), + (0x1D591, 'M', 'l'), + (0x1D592, 'M', 'm'), + (0x1D593, 'M', 'n'), + (0x1D594, 'M', 'o'), + (0x1D595, 'M', 'p'), + (0x1D596, 'M', 'q'), + (0x1D597, 'M', 'r'), + (0x1D598, 'M', 's'), + (0x1D599, 'M', 't'), + (0x1D59A, 'M', 'u'), + (0x1D59B, 'M', 'v'), + (0x1D59C, 'M', 'w'), + (0x1D59D, 'M', 'x'), + (0x1D59E, 'M', 'y'), + (0x1D59F, 'M', 'z'), + (0x1D5A0, 'M', 'a'), + (0x1D5A1, 'M', 'b'), + (0x1D5A2, 'M', 'c'), + (0x1D5A3, 'M', 'd'), + (0x1D5A4, 'M', 'e'), + (0x1D5A5, 'M', 'f'), + (0x1D5A6, 'M', 'g'), + (0x1D5A7, 'M', 'h'), + (0x1D5A8, 'M', 'i'), + (0x1D5A9, 'M', 'j'), + (0x1D5AA, 'M', 'k'), + (0x1D5AB, 'M', 'l'), + (0x1D5AC, 'M', 'm'), + (0x1D5AD, 'M', 'n'), + (0x1D5AE, 'M', 'o'), + (0x1D5AF, 'M', 'p'), + (0x1D5B0, 'M', 'q'), + (0x1D5B1, 'M', 'r'), + (0x1D5B2, 'M', 's'), + (0x1D5B3, 'M', 't'), + (0x1D5B4, 'M', 'u'), + (0x1D5B5, 'M', 'v'), + (0x1D5B6, 'M', 'w'), + (0x1D5B7, 'M', 'x'), + (0x1D5B8, 'M', 'y'), + (0x1D5B9, 'M', 'z'), + (0x1D5BA, 'M', 'a'), + (0x1D5BB, 'M', 'b'), + (0x1D5BC, 'M', 'c'), + (0x1D5BD, 'M', 'd'), + (0x1D5BE, 'M', 'e'), + (0x1D5BF, 'M', 'f'), + (0x1D5C0, 'M', 'g'), + (0x1D5C1, 'M', 'h'), + (0x1D5C2, 'M', 'i'), + (0x1D5C3, 'M', 'j'), + (0x1D5C4, 'M', 'k'), + (0x1D5C5, 'M', 'l'), + (0x1D5C6, 'M', 'm'), + ] + +def _seg_65() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D5C7, 'M', 'n'), + (0x1D5C8, 'M', 'o'), + (0x1D5C9, 'M', 'p'), + (0x1D5CA, 'M', 'q'), + (0x1D5CB, 'M', 'r'), + (0x1D5CC, 'M', 's'), + (0x1D5CD, 'M', 't'), + (0x1D5CE, 'M', 'u'), + (0x1D5CF, 'M', 'v'), + (0x1D5D0, 'M', 'w'), + (0x1D5D1, 'M', 'x'), + (0x1D5D2, 'M', 'y'), + (0x1D5D3, 'M', 'z'), + (0x1D5D4, 'M', 'a'), + (0x1D5D5, 'M', 'b'), + (0x1D5D6, 'M', 'c'), + (0x1D5D7, 'M', 'd'), + (0x1D5D8, 'M', 'e'), + (0x1D5D9, 'M', 'f'), + (0x1D5DA, 'M', 'g'), + (0x1D5DB, 'M', 'h'), + (0x1D5DC, 'M', 'i'), + (0x1D5DD, 'M', 'j'), + (0x1D5DE, 'M', 'k'), + (0x1D5DF, 'M', 'l'), + (0x1D5E0, 'M', 'm'), + (0x1D5E1, 'M', 'n'), + (0x1D5E2, 'M', 'o'), + (0x1D5E3, 'M', 'p'), + (0x1D5E4, 'M', 'q'), + (0x1D5E5, 'M', 'r'), + (0x1D5E6, 'M', 's'), + (0x1D5E7, 'M', 't'), + (0x1D5E8, 'M', 'u'), + (0x1D5E9, 'M', 'v'), + (0x1D5EA, 'M', 'w'), + (0x1D5EB, 'M', 'x'), + (0x1D5EC, 'M', 'y'), + (0x1D5ED, 'M', 'z'), + (0x1D5EE, 'M', 'a'), + (0x1D5EF, 'M', 'b'), + (0x1D5F0, 'M', 'c'), + (0x1D5F1, 'M', 'd'), + (0x1D5F2, 'M', 'e'), + (0x1D5F3, 'M', 'f'), + (0x1D5F4, 'M', 'g'), + (0x1D5F5, 'M', 'h'), + (0x1D5F6, 'M', 'i'), + (0x1D5F7, 'M', 'j'), + (0x1D5F8, 'M', 'k'), + (0x1D5F9, 'M', 'l'), + (0x1D5FA, 'M', 'm'), + (0x1D5FB, 'M', 'n'), + (0x1D5FC, 'M', 'o'), + (0x1D5FD, 'M', 'p'), + (0x1D5FE, 'M', 'q'), + (0x1D5FF, 'M', 'r'), + (0x1D600, 'M', 's'), + (0x1D601, 'M', 't'), + (0x1D602, 'M', 'u'), + (0x1D603, 'M', 'v'), + (0x1D604, 'M', 'w'), + (0x1D605, 'M', 'x'), + (0x1D606, 'M', 'y'), + (0x1D607, 'M', 'z'), + (0x1D608, 'M', 'a'), + (0x1D609, 'M', 'b'), + (0x1D60A, 'M', 'c'), + (0x1D60B, 'M', 'd'), + (0x1D60C, 'M', 'e'), + (0x1D60D, 'M', 'f'), + (0x1D60E, 'M', 'g'), + (0x1D60F, 'M', 'h'), + (0x1D610, 'M', 'i'), + (0x1D611, 'M', 'j'), + (0x1D612, 'M', 'k'), + (0x1D613, 'M', 'l'), + (0x1D614, 'M', 'm'), + (0x1D615, 'M', 'n'), + (0x1D616, 'M', 'o'), + (0x1D617, 'M', 'p'), + (0x1D618, 'M', 'q'), + (0x1D619, 'M', 'r'), + (0x1D61A, 'M', 's'), + (0x1D61B, 'M', 't'), + (0x1D61C, 'M', 'u'), + (0x1D61D, 'M', 'v'), + (0x1D61E, 'M', 'w'), + (0x1D61F, 'M', 'x'), + (0x1D620, 'M', 'y'), + (0x1D621, 'M', 'z'), + (0x1D622, 'M', 'a'), + (0x1D623, 'M', 'b'), + (0x1D624, 'M', 'c'), + (0x1D625, 'M', 'd'), + (0x1D626, 'M', 'e'), + (0x1D627, 'M', 'f'), + (0x1D628, 'M', 'g'), + (0x1D629, 'M', 'h'), + (0x1D62A, 'M', 'i'), + ] + +def _seg_66() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D62B, 'M', 'j'), + (0x1D62C, 'M', 'k'), + (0x1D62D, 'M', 'l'), + (0x1D62E, 'M', 'm'), + (0x1D62F, 'M', 'n'), + (0x1D630, 'M', 'o'), + (0x1D631, 'M', 'p'), + (0x1D632, 'M', 'q'), + (0x1D633, 'M', 'r'), + (0x1D634, 'M', 's'), + (0x1D635, 'M', 't'), + (0x1D636, 'M', 'u'), + (0x1D637, 'M', 'v'), + (0x1D638, 'M', 'w'), + (0x1D639, 'M', 'x'), + (0x1D63A, 'M', 'y'), + (0x1D63B, 'M', 'z'), + (0x1D63C, 'M', 'a'), + (0x1D63D, 'M', 'b'), + (0x1D63E, 'M', 'c'), + (0x1D63F, 'M', 'd'), + (0x1D640, 'M', 'e'), + (0x1D641, 'M', 'f'), + (0x1D642, 'M', 'g'), + (0x1D643, 'M', 'h'), + (0x1D644, 'M', 'i'), + (0x1D645, 'M', 'j'), + (0x1D646, 'M', 'k'), + (0x1D647, 'M', 'l'), + (0x1D648, 'M', 'm'), + (0x1D649, 'M', 'n'), + (0x1D64A, 'M', 'o'), + (0x1D64B, 'M', 'p'), + (0x1D64C, 'M', 'q'), + (0x1D64D, 'M', 'r'), + (0x1D64E, 'M', 's'), + (0x1D64F, 'M', 't'), + (0x1D650, 'M', 'u'), + (0x1D651, 'M', 'v'), + (0x1D652, 'M', 'w'), + (0x1D653, 'M', 'x'), + (0x1D654, 'M', 'y'), + (0x1D655, 'M', 'z'), + (0x1D656, 'M', 'a'), + (0x1D657, 'M', 'b'), + (0x1D658, 'M', 'c'), + (0x1D659, 'M', 'd'), + (0x1D65A, 'M', 'e'), + (0x1D65B, 'M', 'f'), + (0x1D65C, 'M', 'g'), + (0x1D65D, 'M', 'h'), + (0x1D65E, 'M', 'i'), + (0x1D65F, 'M', 'j'), + (0x1D660, 'M', 'k'), + (0x1D661, 'M', 'l'), + (0x1D662, 'M', 'm'), + (0x1D663, 'M', 'n'), + (0x1D664, 'M', 'o'), + (0x1D665, 'M', 'p'), + (0x1D666, 'M', 'q'), + (0x1D667, 'M', 'r'), + (0x1D668, 'M', 's'), + (0x1D669, 'M', 't'), + (0x1D66A, 'M', 'u'), + (0x1D66B, 'M', 'v'), + (0x1D66C, 'M', 'w'), + (0x1D66D, 'M', 'x'), + (0x1D66E, 'M', 'y'), + (0x1D66F, 'M', 'z'), + (0x1D670, 'M', 'a'), + (0x1D671, 'M', 'b'), + (0x1D672, 'M', 'c'), + (0x1D673, 'M', 'd'), + (0x1D674, 'M', 'e'), + (0x1D675, 'M', 'f'), + (0x1D676, 'M', 'g'), + (0x1D677, 'M', 'h'), + (0x1D678, 'M', 'i'), + (0x1D679, 'M', 'j'), + (0x1D67A, 'M', 'k'), + (0x1D67B, 'M', 'l'), + (0x1D67C, 'M', 'm'), + (0x1D67D, 'M', 'n'), + (0x1D67E, 'M', 'o'), + (0x1D67F, 'M', 'p'), + (0x1D680, 'M', 'q'), + (0x1D681, 'M', 'r'), + (0x1D682, 'M', 's'), + (0x1D683, 'M', 't'), + (0x1D684, 'M', 'u'), + (0x1D685, 'M', 'v'), + (0x1D686, 'M', 'w'), + (0x1D687, 'M', 'x'), + (0x1D688, 'M', 'y'), + (0x1D689, 'M', 'z'), + (0x1D68A, 'M', 'a'), + (0x1D68B, 'M', 'b'), + (0x1D68C, 'M', 'c'), + (0x1D68D, 'M', 'd'), + (0x1D68E, 'M', 'e'), + ] + +def _seg_67() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D68F, 'M', 'f'), + (0x1D690, 'M', 'g'), + (0x1D691, 'M', 'h'), + (0x1D692, 'M', 'i'), + (0x1D693, 'M', 'j'), + (0x1D694, 'M', 'k'), + (0x1D695, 'M', 'l'), + (0x1D696, 'M', 'm'), + (0x1D697, 'M', 'n'), + (0x1D698, 'M', 'o'), + (0x1D699, 'M', 'p'), + (0x1D69A, 'M', 'q'), + (0x1D69B, 'M', 'r'), + (0x1D69C, 'M', 's'), + (0x1D69D, 'M', 't'), + (0x1D69E, 'M', 'u'), + (0x1D69F, 'M', 'v'), + (0x1D6A0, 'M', 'w'), + (0x1D6A1, 'M', 'x'), + (0x1D6A2, 'M', 'y'), + (0x1D6A3, 'M', 'z'), + (0x1D6A4, 'M', 'ı'), + (0x1D6A5, 'M', 'ȷ'), + (0x1D6A6, 'X'), + (0x1D6A8, 'M', 'α'), + (0x1D6A9, 'M', 'β'), + (0x1D6AA, 'M', 'γ'), + (0x1D6AB, 'M', 'δ'), + (0x1D6AC, 'M', 'ε'), + (0x1D6AD, 'M', 'ζ'), + (0x1D6AE, 'M', 'η'), + (0x1D6AF, 'M', 'θ'), + (0x1D6B0, 'M', 'ι'), + (0x1D6B1, 'M', 'κ'), + (0x1D6B2, 'M', 'λ'), + (0x1D6B3, 'M', 'μ'), + (0x1D6B4, 'M', 'ν'), + (0x1D6B5, 'M', 'ξ'), + (0x1D6B6, 'M', 'ο'), + (0x1D6B7, 'M', 'π'), + (0x1D6B8, 'M', 'ρ'), + (0x1D6B9, 'M', 'θ'), + (0x1D6BA, 'M', 'σ'), + (0x1D6BB, 'M', 'τ'), + (0x1D6BC, 'M', 'υ'), + (0x1D6BD, 'M', 'φ'), + (0x1D6BE, 'M', 'χ'), + (0x1D6BF, 'M', 'ψ'), + (0x1D6C0, 'M', 'ω'), + (0x1D6C1, 'M', '∇'), + (0x1D6C2, 'M', 'α'), + (0x1D6C3, 'M', 'β'), + (0x1D6C4, 'M', 'γ'), + (0x1D6C5, 'M', 'δ'), + (0x1D6C6, 'M', 'ε'), + (0x1D6C7, 'M', 'ζ'), + (0x1D6C8, 'M', 'η'), + (0x1D6C9, 'M', 'θ'), + (0x1D6CA, 'M', 'ι'), + (0x1D6CB, 'M', 'κ'), + (0x1D6CC, 'M', 'λ'), + (0x1D6CD, 'M', 'μ'), + (0x1D6CE, 'M', 'ν'), + (0x1D6CF, 'M', 'ξ'), + (0x1D6D0, 'M', 'ο'), + (0x1D6D1, 'M', 'π'), + (0x1D6D2, 'M', 'ρ'), + (0x1D6D3, 'M', 'σ'), + (0x1D6D5, 'M', 'τ'), + (0x1D6D6, 'M', 'υ'), + (0x1D6D7, 'M', 'φ'), + (0x1D6D8, 'M', 'χ'), + (0x1D6D9, 'M', 'ψ'), + (0x1D6DA, 'M', 'ω'), + (0x1D6DB, 'M', '∂'), + (0x1D6DC, 'M', 'ε'), + (0x1D6DD, 'M', 'θ'), + (0x1D6DE, 'M', 'κ'), + (0x1D6DF, 'M', 'φ'), + (0x1D6E0, 'M', 'ρ'), + (0x1D6E1, 'M', 'π'), + (0x1D6E2, 'M', 'α'), + (0x1D6E3, 'M', 'β'), + (0x1D6E4, 'M', 'γ'), + (0x1D6E5, 'M', 'δ'), + (0x1D6E6, 'M', 'ε'), + (0x1D6E7, 'M', 'ζ'), + (0x1D6E8, 'M', 'η'), + (0x1D6E9, 'M', 'θ'), + (0x1D6EA, 'M', 'ι'), + (0x1D6EB, 'M', 'κ'), + (0x1D6EC, 'M', 'λ'), + (0x1D6ED, 'M', 'μ'), + (0x1D6EE, 'M', 'ν'), + (0x1D6EF, 'M', 'ξ'), + (0x1D6F0, 'M', 'ο'), + (0x1D6F1, 'M', 'π'), + (0x1D6F2, 'M', 'ρ'), + (0x1D6F3, 'M', 'θ'), + (0x1D6F4, 'M', 'σ'), + ] + +def _seg_68() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D6F5, 'M', 'τ'), + (0x1D6F6, 'M', 'υ'), + (0x1D6F7, 'M', 'φ'), + (0x1D6F8, 'M', 'χ'), + (0x1D6F9, 'M', 'ψ'), + (0x1D6FA, 'M', 'ω'), + (0x1D6FB, 'M', '∇'), + (0x1D6FC, 'M', 'α'), + (0x1D6FD, 'M', 'β'), + (0x1D6FE, 'M', 'γ'), + (0x1D6FF, 'M', 'δ'), + (0x1D700, 'M', 'ε'), + (0x1D701, 'M', 'ζ'), + (0x1D702, 'M', 'η'), + (0x1D703, 'M', 'θ'), + (0x1D704, 'M', 'ι'), + (0x1D705, 'M', 'κ'), + (0x1D706, 'M', 'λ'), + (0x1D707, 'M', 'μ'), + (0x1D708, 'M', 'ν'), + (0x1D709, 'M', 'ξ'), + (0x1D70A, 'M', 'ο'), + (0x1D70B, 'M', 'π'), + (0x1D70C, 'M', 'ρ'), + (0x1D70D, 'M', 'σ'), + (0x1D70F, 'M', 'τ'), + (0x1D710, 'M', 'υ'), + (0x1D711, 'M', 'φ'), + (0x1D712, 'M', 'χ'), + (0x1D713, 'M', 'ψ'), + (0x1D714, 'M', 'ω'), + (0x1D715, 'M', '∂'), + (0x1D716, 'M', 'ε'), + (0x1D717, 'M', 'θ'), + (0x1D718, 'M', 'κ'), + (0x1D719, 'M', 'φ'), + (0x1D71A, 'M', 'ρ'), + (0x1D71B, 'M', 'π'), + (0x1D71C, 'M', 'α'), + (0x1D71D, 'M', 'β'), + (0x1D71E, 'M', 'γ'), + (0x1D71F, 'M', 'δ'), + (0x1D720, 'M', 'ε'), + (0x1D721, 'M', 'ζ'), + (0x1D722, 'M', 'η'), + (0x1D723, 'M', 'θ'), + (0x1D724, 'M', 'ι'), + (0x1D725, 'M', 'κ'), + (0x1D726, 'M', 'λ'), + (0x1D727, 'M', 'μ'), + (0x1D728, 'M', 'ν'), + (0x1D729, 'M', 'ξ'), + (0x1D72A, 'M', 'ο'), + (0x1D72B, 'M', 'π'), + (0x1D72C, 'M', 'ρ'), + (0x1D72D, 'M', 'θ'), + (0x1D72E, 'M', 'σ'), + (0x1D72F, 'M', 'τ'), + (0x1D730, 'M', 'υ'), + (0x1D731, 'M', 'φ'), + (0x1D732, 'M', 'χ'), + (0x1D733, 'M', 'ψ'), + (0x1D734, 'M', 'ω'), + (0x1D735, 'M', '∇'), + (0x1D736, 'M', 'α'), + (0x1D737, 'M', 'β'), + (0x1D738, 'M', 'γ'), + (0x1D739, 'M', 'δ'), + (0x1D73A, 'M', 'ε'), + (0x1D73B, 'M', 'ζ'), + (0x1D73C, 'M', 'η'), + (0x1D73D, 'M', 'θ'), + (0x1D73E, 'M', 'ι'), + (0x1D73F, 'M', 'κ'), + (0x1D740, 'M', 'λ'), + (0x1D741, 'M', 'μ'), + (0x1D742, 'M', 'ν'), + (0x1D743, 'M', 'ξ'), + (0x1D744, 'M', 'ο'), + (0x1D745, 'M', 'π'), + (0x1D746, 'M', 'ρ'), + (0x1D747, 'M', 'σ'), + (0x1D749, 'M', 'τ'), + (0x1D74A, 'M', 'υ'), + (0x1D74B, 'M', 'φ'), + (0x1D74C, 'M', 'χ'), + (0x1D74D, 'M', 'ψ'), + (0x1D74E, 'M', 'ω'), + (0x1D74F, 'M', '∂'), + (0x1D750, 'M', 'ε'), + (0x1D751, 'M', 'θ'), + (0x1D752, 'M', 'κ'), + (0x1D753, 'M', 'φ'), + (0x1D754, 'M', 'ρ'), + (0x1D755, 'M', 'π'), + (0x1D756, 'M', 'α'), + (0x1D757, 'M', 'β'), + (0x1D758, 'M', 'γ'), + (0x1D759, 'M', 'δ'), + (0x1D75A, 'M', 'ε'), + ] + +def _seg_69() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D75B, 'M', 'ζ'), + (0x1D75C, 'M', 'η'), + (0x1D75D, 'M', 'θ'), + (0x1D75E, 'M', 'ι'), + (0x1D75F, 'M', 'κ'), + (0x1D760, 'M', 'λ'), + (0x1D761, 'M', 'μ'), + (0x1D762, 'M', 'ν'), + (0x1D763, 'M', 'ξ'), + (0x1D764, 'M', 'ο'), + (0x1D765, 'M', 'π'), + (0x1D766, 'M', 'ρ'), + (0x1D767, 'M', 'θ'), + (0x1D768, 'M', 'σ'), + (0x1D769, 'M', 'τ'), + (0x1D76A, 'M', 'υ'), + (0x1D76B, 'M', 'φ'), + (0x1D76C, 'M', 'χ'), + (0x1D76D, 'M', 'ψ'), + (0x1D76E, 'M', 'ω'), + (0x1D76F, 'M', '∇'), + (0x1D770, 'M', 'α'), + (0x1D771, 'M', 'β'), + (0x1D772, 'M', 'γ'), + (0x1D773, 'M', 'δ'), + (0x1D774, 'M', 'ε'), + (0x1D775, 'M', 'ζ'), + (0x1D776, 'M', 'η'), + (0x1D777, 'M', 'θ'), + (0x1D778, 'M', 'ι'), + (0x1D779, 'M', 'κ'), + (0x1D77A, 'M', 'λ'), + (0x1D77B, 'M', 'μ'), + (0x1D77C, 'M', 'ν'), + (0x1D77D, 'M', 'ξ'), + (0x1D77E, 'M', 'ο'), + (0x1D77F, 'M', 'π'), + (0x1D780, 'M', 'ρ'), + (0x1D781, 'M', 'σ'), + (0x1D783, 'M', 'τ'), + (0x1D784, 'M', 'υ'), + (0x1D785, 'M', 'φ'), + (0x1D786, 'M', 'χ'), + (0x1D787, 'M', 'ψ'), + (0x1D788, 'M', 'ω'), + (0x1D789, 'M', '∂'), + (0x1D78A, 'M', 'ε'), + (0x1D78B, 'M', 'θ'), + (0x1D78C, 'M', 'κ'), + (0x1D78D, 'M', 'φ'), + (0x1D78E, 'M', 'ρ'), + (0x1D78F, 'M', 'π'), + (0x1D790, 'M', 'α'), + (0x1D791, 'M', 'β'), + (0x1D792, 'M', 'γ'), + (0x1D793, 'M', 'δ'), + (0x1D794, 'M', 'ε'), + (0x1D795, 'M', 'ζ'), + (0x1D796, 'M', 'η'), + (0x1D797, 'M', 'θ'), + (0x1D798, 'M', 'ι'), + (0x1D799, 'M', 'κ'), + (0x1D79A, 'M', 'λ'), + (0x1D79B, 'M', 'μ'), + (0x1D79C, 'M', 'ν'), + (0x1D79D, 'M', 'ξ'), + (0x1D79E, 'M', 'ο'), + (0x1D79F, 'M', 'π'), + (0x1D7A0, 'M', 'ρ'), + (0x1D7A1, 'M', 'θ'), + (0x1D7A2, 'M', 'σ'), + (0x1D7A3, 'M', 'τ'), + (0x1D7A4, 'M', 'υ'), + (0x1D7A5, 'M', 'φ'), + (0x1D7A6, 'M', 'χ'), + (0x1D7A7, 'M', 'ψ'), + (0x1D7A8, 'M', 'ω'), + (0x1D7A9, 'M', '∇'), + (0x1D7AA, 'M', 'α'), + (0x1D7AB, 'M', 'β'), + (0x1D7AC, 'M', 'γ'), + (0x1D7AD, 'M', 'δ'), + (0x1D7AE, 'M', 'ε'), + (0x1D7AF, 'M', 'ζ'), + (0x1D7B0, 'M', 'η'), + (0x1D7B1, 'M', 'θ'), + (0x1D7B2, 'M', 'ι'), + (0x1D7B3, 'M', 'κ'), + (0x1D7B4, 'M', 'λ'), + (0x1D7B5, 'M', 'μ'), + (0x1D7B6, 'M', 'ν'), + (0x1D7B7, 'M', 'ξ'), + (0x1D7B8, 'M', 'ο'), + (0x1D7B9, 'M', 'π'), + (0x1D7BA, 'M', 'ρ'), + (0x1D7BB, 'M', 'σ'), + (0x1D7BD, 'M', 'τ'), + (0x1D7BE, 'M', 'υ'), + (0x1D7BF, 'M', 'φ'), + (0x1D7C0, 'M', 'χ'), + ] + +def _seg_70() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1D7C1, 'M', 'ψ'), + (0x1D7C2, 'M', 'ω'), + (0x1D7C3, 'M', '∂'), + (0x1D7C4, 'M', 'ε'), + (0x1D7C5, 'M', 'θ'), + (0x1D7C6, 'M', 'κ'), + (0x1D7C7, 'M', 'φ'), + (0x1D7C8, 'M', 'ρ'), + (0x1D7C9, 'M', 'π'), + (0x1D7CA, 'M', 'ϝ'), + (0x1D7CC, 'X'), + (0x1D7CE, 'M', '0'), + (0x1D7CF, 'M', '1'), + (0x1D7D0, 'M', '2'), + (0x1D7D1, 'M', '3'), + (0x1D7D2, 'M', '4'), + (0x1D7D3, 'M', '5'), + (0x1D7D4, 'M', '6'), + (0x1D7D5, 'M', '7'), + (0x1D7D6, 'M', '8'), + (0x1D7D7, 'M', '9'), + (0x1D7D8, 'M', '0'), + (0x1D7D9, 'M', '1'), + (0x1D7DA, 'M', '2'), + (0x1D7DB, 'M', '3'), + (0x1D7DC, 'M', '4'), + (0x1D7DD, 'M', '5'), + (0x1D7DE, 'M', '6'), + (0x1D7DF, 'M', '7'), + (0x1D7E0, 'M', '8'), + (0x1D7E1, 'M', '9'), + (0x1D7E2, 'M', '0'), + (0x1D7E3, 'M', '1'), + (0x1D7E4, 'M', '2'), + (0x1D7E5, 'M', '3'), + (0x1D7E6, 'M', '4'), + (0x1D7E7, 'M', '5'), + (0x1D7E8, 'M', '6'), + (0x1D7E9, 'M', '7'), + (0x1D7EA, 'M', '8'), + (0x1D7EB, 'M', '9'), + (0x1D7EC, 'M', '0'), + (0x1D7ED, 'M', '1'), + (0x1D7EE, 'M', '2'), + (0x1D7EF, 'M', '3'), + (0x1D7F0, 'M', '4'), + (0x1D7F1, 'M', '5'), + (0x1D7F2, 'M', '6'), + (0x1D7F3, 'M', '7'), + (0x1D7F4, 'M', '8'), + (0x1D7F5, 'M', '9'), + (0x1D7F6, 'M', '0'), + (0x1D7F7, 'M', '1'), + (0x1D7F8, 'M', '2'), + (0x1D7F9, 'M', '3'), + (0x1D7FA, 'M', '4'), + (0x1D7FB, 'M', '5'), + (0x1D7FC, 'M', '6'), + (0x1D7FD, 'M', '7'), + (0x1D7FE, 'M', '8'), + (0x1D7FF, 'M', '9'), + (0x1D800, 'V'), + (0x1DA8C, 'X'), + (0x1DA9B, 'V'), + (0x1DAA0, 'X'), + (0x1DAA1, 'V'), + (0x1DAB0, 'X'), + (0x1DF00, 'V'), + (0x1DF1F, 'X'), + (0x1E000, 'V'), + (0x1E007, 'X'), + (0x1E008, 'V'), + (0x1E019, 'X'), + (0x1E01B, 'V'), + (0x1E022, 'X'), + (0x1E023, 'V'), + (0x1E025, 'X'), + (0x1E026, 'V'), + (0x1E02B, 'X'), + (0x1E100, 'V'), + (0x1E12D, 'X'), + (0x1E130, 'V'), + (0x1E13E, 'X'), + (0x1E140, 'V'), + (0x1E14A, 'X'), + (0x1E14E, 'V'), + (0x1E150, 'X'), + (0x1E290, 'V'), + (0x1E2AF, 'X'), + (0x1E2C0, 'V'), + (0x1E2FA, 'X'), + (0x1E2FF, 'V'), + (0x1E300, 'X'), + (0x1E7E0, 'V'), + (0x1E7E7, 'X'), + (0x1E7E8, 'V'), + (0x1E7EC, 'X'), + (0x1E7ED, 'V'), + (0x1E7EF, 'X'), + (0x1E7F0, 'V'), + ] + +def _seg_71() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1E7FF, 'X'), + (0x1E800, 'V'), + (0x1E8C5, 'X'), + (0x1E8C7, 'V'), + (0x1E8D7, 'X'), + (0x1E900, 'M', '𞤢'), + (0x1E901, 'M', '𞤣'), + (0x1E902, 'M', '𞤤'), + (0x1E903, 'M', '𞤥'), + (0x1E904, 'M', '𞤦'), + (0x1E905, 'M', '𞤧'), + (0x1E906, 'M', '𞤨'), + (0x1E907, 'M', '𞤩'), + (0x1E908, 'M', '𞤪'), + (0x1E909, 'M', '𞤫'), + (0x1E90A, 'M', '𞤬'), + (0x1E90B, 'M', '𞤭'), + (0x1E90C, 'M', '𞤮'), + (0x1E90D, 'M', '𞤯'), + (0x1E90E, 'M', '𞤰'), + (0x1E90F, 'M', '𞤱'), + (0x1E910, 'M', '𞤲'), + (0x1E911, 'M', '𞤳'), + (0x1E912, 'M', '𞤴'), + (0x1E913, 'M', '𞤵'), + (0x1E914, 'M', '𞤶'), + (0x1E915, 'M', '𞤷'), + (0x1E916, 'M', '𞤸'), + (0x1E917, 'M', '𞤹'), + (0x1E918, 'M', '𞤺'), + (0x1E919, 'M', '𞤻'), + (0x1E91A, 'M', '𞤼'), + (0x1E91B, 'M', '𞤽'), + (0x1E91C, 'M', '𞤾'), + (0x1E91D, 'M', '𞤿'), + (0x1E91E, 'M', '𞥀'), + (0x1E91F, 'M', '𞥁'), + (0x1E920, 'M', '𞥂'), + (0x1E921, 'M', '𞥃'), + (0x1E922, 'V'), + (0x1E94C, 'X'), + (0x1E950, 'V'), + (0x1E95A, 'X'), + (0x1E95E, 'V'), + (0x1E960, 'X'), + (0x1EC71, 'V'), + (0x1ECB5, 'X'), + (0x1ED01, 'V'), + (0x1ED3E, 'X'), + (0x1EE00, 'M', 'ا'), + (0x1EE01, 'M', 'ب'), + (0x1EE02, 'M', 'ج'), + (0x1EE03, 'M', 'د'), + (0x1EE04, 'X'), + (0x1EE05, 'M', 'و'), + (0x1EE06, 'M', 'ز'), + (0x1EE07, 'M', 'ح'), + (0x1EE08, 'M', 'ط'), + (0x1EE09, 'M', 'ي'), + (0x1EE0A, 'M', 'ك'), + (0x1EE0B, 'M', 'ل'), + (0x1EE0C, 'M', 'م'), + (0x1EE0D, 'M', 'ن'), + (0x1EE0E, 'M', 'س'), + (0x1EE0F, 'M', 'ع'), + (0x1EE10, 'M', 'ف'), + (0x1EE11, 'M', 'ص'), + (0x1EE12, 'M', 'ق'), + (0x1EE13, 'M', 'ر'), + (0x1EE14, 'M', 'ش'), + (0x1EE15, 'M', 'ت'), + (0x1EE16, 'M', 'ث'), + (0x1EE17, 'M', 'خ'), + (0x1EE18, 'M', 'ذ'), + (0x1EE19, 'M', 'ض'), + (0x1EE1A, 'M', 'ظ'), + (0x1EE1B, 'M', 'غ'), + (0x1EE1C, 'M', 'ٮ'), + (0x1EE1D, 'M', 'ں'), + (0x1EE1E, 'M', 'ڡ'), + (0x1EE1F, 'M', 'ٯ'), + (0x1EE20, 'X'), + (0x1EE21, 'M', 'ب'), + (0x1EE22, 'M', 'ج'), + (0x1EE23, 'X'), + (0x1EE24, 'M', 'ه'), + (0x1EE25, 'X'), + (0x1EE27, 'M', 'ح'), + (0x1EE28, 'X'), + (0x1EE29, 'M', 'ي'), + (0x1EE2A, 'M', 'ك'), + (0x1EE2B, 'M', 'ل'), + (0x1EE2C, 'M', 'م'), + (0x1EE2D, 'M', 'ن'), + (0x1EE2E, 'M', 'س'), + (0x1EE2F, 'M', 'ع'), + (0x1EE30, 'M', 'ف'), + (0x1EE31, 'M', 'ص'), + (0x1EE32, 'M', 'ق'), + (0x1EE33, 'X'), + ] + +def _seg_72() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1EE34, 'M', 'ش'), + (0x1EE35, 'M', 'ت'), + (0x1EE36, 'M', 'ث'), + (0x1EE37, 'M', 'خ'), + (0x1EE38, 'X'), + (0x1EE39, 'M', 'ض'), + (0x1EE3A, 'X'), + (0x1EE3B, 'M', 'غ'), + (0x1EE3C, 'X'), + (0x1EE42, 'M', 'ج'), + (0x1EE43, 'X'), + (0x1EE47, 'M', 'ح'), + (0x1EE48, 'X'), + (0x1EE49, 'M', 'ي'), + (0x1EE4A, 'X'), + (0x1EE4B, 'M', 'ل'), + (0x1EE4C, 'X'), + (0x1EE4D, 'M', 'ن'), + (0x1EE4E, 'M', 'س'), + (0x1EE4F, 'M', 'ع'), + (0x1EE50, 'X'), + (0x1EE51, 'M', 'ص'), + (0x1EE52, 'M', 'ق'), + (0x1EE53, 'X'), + (0x1EE54, 'M', 'ش'), + (0x1EE55, 'X'), + (0x1EE57, 'M', 'خ'), + (0x1EE58, 'X'), + (0x1EE59, 'M', 'ض'), + (0x1EE5A, 'X'), + (0x1EE5B, 'M', 'غ'), + (0x1EE5C, 'X'), + (0x1EE5D, 'M', 'ں'), + (0x1EE5E, 'X'), + (0x1EE5F, 'M', 'ٯ'), + (0x1EE60, 'X'), + (0x1EE61, 'M', 'ب'), + (0x1EE62, 'M', 'ج'), + (0x1EE63, 'X'), + (0x1EE64, 'M', 'ه'), + (0x1EE65, 'X'), + (0x1EE67, 'M', 'ح'), + (0x1EE68, 'M', 'ط'), + (0x1EE69, 'M', 'ي'), + (0x1EE6A, 'M', 'ك'), + (0x1EE6B, 'X'), + (0x1EE6C, 'M', 'م'), + (0x1EE6D, 'M', 'ن'), + (0x1EE6E, 'M', 'س'), + (0x1EE6F, 'M', 'ع'), + (0x1EE70, 'M', 'ف'), + (0x1EE71, 'M', 'ص'), + (0x1EE72, 'M', 'ق'), + (0x1EE73, 'X'), + (0x1EE74, 'M', 'ش'), + (0x1EE75, 'M', 'ت'), + (0x1EE76, 'M', 'ث'), + (0x1EE77, 'M', 'خ'), + (0x1EE78, 'X'), + (0x1EE79, 'M', 'ض'), + (0x1EE7A, 'M', 'ظ'), + (0x1EE7B, 'M', 'غ'), + (0x1EE7C, 'M', 'ٮ'), + (0x1EE7D, 'X'), + (0x1EE7E, 'M', 'ڡ'), + (0x1EE7F, 'X'), + (0x1EE80, 'M', 'ا'), + (0x1EE81, 'M', 'ب'), + (0x1EE82, 'M', 'ج'), + (0x1EE83, 'M', 'د'), + (0x1EE84, 'M', 'ه'), + (0x1EE85, 'M', 'و'), + (0x1EE86, 'M', 'ز'), + (0x1EE87, 'M', 'ح'), + (0x1EE88, 'M', 'ط'), + (0x1EE89, 'M', 'ي'), + (0x1EE8A, 'X'), + (0x1EE8B, 'M', 'ل'), + (0x1EE8C, 'M', 'م'), + (0x1EE8D, 'M', 'ن'), + (0x1EE8E, 'M', 'س'), + (0x1EE8F, 'M', 'ع'), + (0x1EE90, 'M', 'ف'), + (0x1EE91, 'M', 'ص'), + (0x1EE92, 'M', 'ق'), + (0x1EE93, 'M', 'ر'), + (0x1EE94, 'M', 'ش'), + (0x1EE95, 'M', 'ت'), + (0x1EE96, 'M', 'ث'), + (0x1EE97, 'M', 'خ'), + (0x1EE98, 'M', 'ذ'), + (0x1EE99, 'M', 'ض'), + (0x1EE9A, 'M', 'ظ'), + (0x1EE9B, 'M', 'غ'), + (0x1EE9C, 'X'), + (0x1EEA1, 'M', 'ب'), + (0x1EEA2, 'M', 'ج'), + (0x1EEA3, 'M', 'د'), + (0x1EEA4, 'X'), + (0x1EEA5, 'M', 'و'), + ] + +def _seg_73() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1EEA6, 'M', 'ز'), + (0x1EEA7, 'M', 'ح'), + (0x1EEA8, 'M', 'ط'), + (0x1EEA9, 'M', 'ي'), + (0x1EEAA, 'X'), + (0x1EEAB, 'M', 'ل'), + (0x1EEAC, 'M', 'م'), + (0x1EEAD, 'M', 'ن'), + (0x1EEAE, 'M', 'س'), + (0x1EEAF, 'M', 'ع'), + (0x1EEB0, 'M', 'ف'), + (0x1EEB1, 'M', 'ص'), + (0x1EEB2, 'M', 'ق'), + (0x1EEB3, 'M', 'ر'), + (0x1EEB4, 'M', 'ش'), + (0x1EEB5, 'M', 'ت'), + (0x1EEB6, 'M', 'ث'), + (0x1EEB7, 'M', 'خ'), + (0x1EEB8, 'M', 'ذ'), + (0x1EEB9, 'M', 'ض'), + (0x1EEBA, 'M', 'ظ'), + (0x1EEBB, 'M', 'غ'), + (0x1EEBC, 'X'), + (0x1EEF0, 'V'), + (0x1EEF2, 'X'), + (0x1F000, 'V'), + (0x1F02C, 'X'), + (0x1F030, 'V'), + (0x1F094, 'X'), + (0x1F0A0, 'V'), + (0x1F0AF, 'X'), + (0x1F0B1, 'V'), + (0x1F0C0, 'X'), + (0x1F0C1, 'V'), + (0x1F0D0, 'X'), + (0x1F0D1, 'V'), + (0x1F0F6, 'X'), + (0x1F101, '3', '0,'), + (0x1F102, '3', '1,'), + (0x1F103, '3', '2,'), + (0x1F104, '3', '3,'), + (0x1F105, '3', '4,'), + (0x1F106, '3', '5,'), + (0x1F107, '3', '6,'), + (0x1F108, '3', '7,'), + (0x1F109, '3', '8,'), + (0x1F10A, '3', '9,'), + (0x1F10B, 'V'), + (0x1F110, '3', '(a)'), + (0x1F111, '3', '(b)'), + (0x1F112, '3', '(c)'), + (0x1F113, '3', '(d)'), + (0x1F114, '3', '(e)'), + (0x1F115, '3', '(f)'), + (0x1F116, '3', '(g)'), + (0x1F117, '3', '(h)'), + (0x1F118, '3', '(i)'), + (0x1F119, '3', '(j)'), + (0x1F11A, '3', '(k)'), + (0x1F11B, '3', '(l)'), + (0x1F11C, '3', '(m)'), + (0x1F11D, '3', '(n)'), + (0x1F11E, '3', '(o)'), + (0x1F11F, '3', '(p)'), + (0x1F120, '3', '(q)'), + (0x1F121, '3', '(r)'), + (0x1F122, '3', '(s)'), + (0x1F123, '3', '(t)'), + (0x1F124, '3', '(u)'), + (0x1F125, '3', '(v)'), + (0x1F126, '3', '(w)'), + (0x1F127, '3', '(x)'), + (0x1F128, '3', '(y)'), + (0x1F129, '3', '(z)'), + (0x1F12A, 'M', '〔s〕'), + (0x1F12B, 'M', 'c'), + (0x1F12C, 'M', 'r'), + (0x1F12D, 'M', 'cd'), + (0x1F12E, 'M', 'wz'), + (0x1F12F, 'V'), + (0x1F130, 'M', 'a'), + (0x1F131, 'M', 'b'), + (0x1F132, 'M', 'c'), + (0x1F133, 'M', 'd'), + (0x1F134, 'M', 'e'), + (0x1F135, 'M', 'f'), + (0x1F136, 'M', 'g'), + (0x1F137, 'M', 'h'), + (0x1F138, 'M', 'i'), + (0x1F139, 'M', 'j'), + (0x1F13A, 'M', 'k'), + (0x1F13B, 'M', 'l'), + (0x1F13C, 'M', 'm'), + (0x1F13D, 'M', 'n'), + (0x1F13E, 'M', 'o'), + (0x1F13F, 'M', 'p'), + (0x1F140, 'M', 'q'), + (0x1F141, 'M', 'r'), + (0x1F142, 'M', 's'), + (0x1F143, 'M', 't'), + ] + +def _seg_74() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1F144, 'M', 'u'), + (0x1F145, 'M', 'v'), + (0x1F146, 'M', 'w'), + (0x1F147, 'M', 'x'), + (0x1F148, 'M', 'y'), + (0x1F149, 'M', 'z'), + (0x1F14A, 'M', 'hv'), + (0x1F14B, 'M', 'mv'), + (0x1F14C, 'M', 'sd'), + (0x1F14D, 'M', 'ss'), + (0x1F14E, 'M', 'ppv'), + (0x1F14F, 'M', 'wc'), + (0x1F150, 'V'), + (0x1F16A, 'M', 'mc'), + (0x1F16B, 'M', 'md'), + (0x1F16C, 'M', 'mr'), + (0x1F16D, 'V'), + (0x1F190, 'M', 'dj'), + (0x1F191, 'V'), + (0x1F1AE, 'X'), + (0x1F1E6, 'V'), + (0x1F200, 'M', 'ほか'), + (0x1F201, 'M', 'ココ'), + (0x1F202, 'M', 'サ'), + (0x1F203, 'X'), + (0x1F210, 'M', '手'), + (0x1F211, 'M', '字'), + (0x1F212, 'M', '双'), + (0x1F213, 'M', 'デ'), + (0x1F214, 'M', '二'), + (0x1F215, 'M', '多'), + (0x1F216, 'M', '解'), + (0x1F217, 'M', '天'), + (0x1F218, 'M', '交'), + (0x1F219, 'M', '映'), + (0x1F21A, 'M', '無'), + (0x1F21B, 'M', '料'), + (0x1F21C, 'M', '前'), + (0x1F21D, 'M', '後'), + (0x1F21E, 'M', '再'), + (0x1F21F, 'M', '新'), + (0x1F220, 'M', '初'), + (0x1F221, 'M', '終'), + (0x1F222, 'M', '生'), + (0x1F223, 'M', '販'), + (0x1F224, 'M', '声'), + (0x1F225, 'M', '吹'), + (0x1F226, 'M', '演'), + (0x1F227, 'M', '投'), + (0x1F228, 'M', '捕'), + (0x1F229, 'M', '一'), + (0x1F22A, 'M', '三'), + (0x1F22B, 'M', '遊'), + (0x1F22C, 'M', '左'), + (0x1F22D, 'M', '中'), + (0x1F22E, 'M', '右'), + (0x1F22F, 'M', '指'), + (0x1F230, 'M', '走'), + (0x1F231, 'M', '打'), + (0x1F232, 'M', '禁'), + (0x1F233, 'M', '空'), + (0x1F234, 'M', '合'), + (0x1F235, 'M', '満'), + (0x1F236, 'M', '有'), + (0x1F237, 'M', '月'), + (0x1F238, 'M', '申'), + (0x1F239, 'M', '割'), + (0x1F23A, 'M', '営'), + (0x1F23B, 'M', '配'), + (0x1F23C, 'X'), + (0x1F240, 'M', '〔本〕'), + (0x1F241, 'M', '〔三〕'), + (0x1F242, 'M', '〔二〕'), + (0x1F243, 'M', '〔安〕'), + (0x1F244, 'M', '〔点〕'), + (0x1F245, 'M', '〔打〕'), + (0x1F246, 'M', '〔盗〕'), + (0x1F247, 'M', '〔勝〕'), + (0x1F248, 'M', '〔敗〕'), + (0x1F249, 'X'), + (0x1F250, 'M', '得'), + (0x1F251, 'M', '可'), + (0x1F252, 'X'), + (0x1F260, 'V'), + (0x1F266, 'X'), + (0x1F300, 'V'), + (0x1F6D8, 'X'), + (0x1F6DD, 'V'), + (0x1F6ED, 'X'), + (0x1F6F0, 'V'), + (0x1F6FD, 'X'), + (0x1F700, 'V'), + (0x1F774, 'X'), + (0x1F780, 'V'), + (0x1F7D9, 'X'), + (0x1F7E0, 'V'), + (0x1F7EC, 'X'), + (0x1F7F0, 'V'), + (0x1F7F1, 'X'), + (0x1F800, 'V'), + ] + +def _seg_75() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x1F80C, 'X'), + (0x1F810, 'V'), + (0x1F848, 'X'), + (0x1F850, 'V'), + (0x1F85A, 'X'), + (0x1F860, 'V'), + (0x1F888, 'X'), + (0x1F890, 'V'), + (0x1F8AE, 'X'), + (0x1F8B0, 'V'), + (0x1F8B2, 'X'), + (0x1F900, 'V'), + (0x1FA54, 'X'), + (0x1FA60, 'V'), + (0x1FA6E, 'X'), + (0x1FA70, 'V'), + (0x1FA75, 'X'), + (0x1FA78, 'V'), + (0x1FA7D, 'X'), + (0x1FA80, 'V'), + (0x1FA87, 'X'), + (0x1FA90, 'V'), + (0x1FAAD, 'X'), + (0x1FAB0, 'V'), + (0x1FABB, 'X'), + (0x1FAC0, 'V'), + (0x1FAC6, 'X'), + (0x1FAD0, 'V'), + (0x1FADA, 'X'), + (0x1FAE0, 'V'), + (0x1FAE8, 'X'), + (0x1FAF0, 'V'), + (0x1FAF7, 'X'), + (0x1FB00, 'V'), + (0x1FB93, 'X'), + (0x1FB94, 'V'), + (0x1FBCB, 'X'), + (0x1FBF0, 'M', '0'), + (0x1FBF1, 'M', '1'), + (0x1FBF2, 'M', '2'), + (0x1FBF3, 'M', '3'), + (0x1FBF4, 'M', '4'), + (0x1FBF5, 'M', '5'), + (0x1FBF6, 'M', '6'), + (0x1FBF7, 'M', '7'), + (0x1FBF8, 'M', '8'), + (0x1FBF9, 'M', '9'), + (0x1FBFA, 'X'), + (0x20000, 'V'), + (0x2A6E0, 'X'), + (0x2A700, 'V'), + (0x2B739, 'X'), + (0x2B740, 'V'), + (0x2B81E, 'X'), + (0x2B820, 'V'), + (0x2CEA2, 'X'), + (0x2CEB0, 'V'), + (0x2EBE1, 'X'), + (0x2F800, 'M', '丽'), + (0x2F801, 'M', '丸'), + (0x2F802, 'M', '乁'), + (0x2F803, 'M', '𠄢'), + (0x2F804, 'M', '你'), + (0x2F805, 'M', '侮'), + (0x2F806, 'M', '侻'), + (0x2F807, 'M', '倂'), + (0x2F808, 'M', '偺'), + (0x2F809, 'M', '備'), + (0x2F80A, 'M', '僧'), + (0x2F80B, 'M', '像'), + (0x2F80C, 'M', '㒞'), + (0x2F80D, 'M', '𠘺'), + (0x2F80E, 'M', '免'), + (0x2F80F, 'M', '兔'), + (0x2F810, 'M', '兤'), + (0x2F811, 'M', '具'), + (0x2F812, 'M', '𠔜'), + (0x2F813, 'M', '㒹'), + (0x2F814, 'M', '內'), + (0x2F815, 'M', '再'), + (0x2F816, 'M', '𠕋'), + (0x2F817, 'M', '冗'), + (0x2F818, 'M', '冤'), + (0x2F819, 'M', '仌'), + (0x2F81A, 'M', '冬'), + (0x2F81B, 'M', '况'), + (0x2F81C, 'M', '𩇟'), + (0x2F81D, 'M', '凵'), + (0x2F81E, 'M', '刃'), + (0x2F81F, 'M', '㓟'), + (0x2F820, 'M', '刻'), + (0x2F821, 'M', '剆'), + (0x2F822, 'M', '割'), + (0x2F823, 'M', '剷'), + (0x2F824, 'M', '㔕'), + (0x2F825, 'M', '勇'), + (0x2F826, 'M', '勉'), + (0x2F827, 'M', '勤'), + (0x2F828, 'M', '勺'), + (0x2F829, 'M', '包'), + ] + +def _seg_76() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F82A, 'M', '匆'), + (0x2F82B, 'M', '北'), + (0x2F82C, 'M', '卉'), + (0x2F82D, 'M', '卑'), + (0x2F82E, 'M', '博'), + (0x2F82F, 'M', '即'), + (0x2F830, 'M', '卽'), + (0x2F831, 'M', '卿'), + (0x2F834, 'M', '𠨬'), + (0x2F835, 'M', '灰'), + (0x2F836, 'M', '及'), + (0x2F837, 'M', '叟'), + (0x2F838, 'M', '𠭣'), + (0x2F839, 'M', '叫'), + (0x2F83A, 'M', '叱'), + (0x2F83B, 'M', '吆'), + (0x2F83C, 'M', '咞'), + (0x2F83D, 'M', '吸'), + (0x2F83E, 'M', '呈'), + (0x2F83F, 'M', '周'), + (0x2F840, 'M', '咢'), + (0x2F841, 'M', '哶'), + (0x2F842, 'M', '唐'), + (0x2F843, 'M', '啓'), + (0x2F844, 'M', '啣'), + (0x2F845, 'M', '善'), + (0x2F847, 'M', '喙'), + (0x2F848, 'M', '喫'), + (0x2F849, 'M', '喳'), + (0x2F84A, 'M', '嗂'), + (0x2F84B, 'M', '圖'), + (0x2F84C, 'M', '嘆'), + (0x2F84D, 'M', '圗'), + (0x2F84E, 'M', '噑'), + (0x2F84F, 'M', '噴'), + (0x2F850, 'M', '切'), + (0x2F851, 'M', '壮'), + (0x2F852, 'M', '城'), + (0x2F853, 'M', '埴'), + (0x2F854, 'M', '堍'), + (0x2F855, 'M', '型'), + (0x2F856, 'M', '堲'), + (0x2F857, 'M', '報'), + (0x2F858, 'M', '墬'), + (0x2F859, 'M', '𡓤'), + (0x2F85A, 'M', '売'), + (0x2F85B, 'M', '壷'), + (0x2F85C, 'M', '夆'), + (0x2F85D, 'M', '多'), + (0x2F85E, 'M', '夢'), + (0x2F85F, 'M', '奢'), + (0x2F860, 'M', '𡚨'), + (0x2F861, 'M', '𡛪'), + (0x2F862, 'M', '姬'), + (0x2F863, 'M', '娛'), + (0x2F864, 'M', '娧'), + (0x2F865, 'M', '姘'), + (0x2F866, 'M', '婦'), + (0x2F867, 'M', '㛮'), + (0x2F868, 'X'), + (0x2F869, 'M', '嬈'), + (0x2F86A, 'M', '嬾'), + (0x2F86C, 'M', '𡧈'), + (0x2F86D, 'M', '寃'), + (0x2F86E, 'M', '寘'), + (0x2F86F, 'M', '寧'), + (0x2F870, 'M', '寳'), + (0x2F871, 'M', '𡬘'), + (0x2F872, 'M', '寿'), + (0x2F873, 'M', '将'), + (0x2F874, 'X'), + (0x2F875, 'M', '尢'), + (0x2F876, 'M', '㞁'), + (0x2F877, 'M', '屠'), + (0x2F878, 'M', '屮'), + (0x2F879, 'M', '峀'), + (0x2F87A, 'M', '岍'), + (0x2F87B, 'M', '𡷤'), + (0x2F87C, 'M', '嵃'), + (0x2F87D, 'M', '𡷦'), + (0x2F87E, 'M', '嵮'), + (0x2F87F, 'M', '嵫'), + (0x2F880, 'M', '嵼'), + (0x2F881, 'M', '巡'), + (0x2F882, 'M', '巢'), + (0x2F883, 'M', '㠯'), + (0x2F884, 'M', '巽'), + (0x2F885, 'M', '帨'), + (0x2F886, 'M', '帽'), + (0x2F887, 'M', '幩'), + (0x2F888, 'M', '㡢'), + (0x2F889, 'M', '𢆃'), + (0x2F88A, 'M', '㡼'), + (0x2F88B, 'M', '庰'), + (0x2F88C, 'M', '庳'), + (0x2F88D, 'M', '庶'), + (0x2F88E, 'M', '廊'), + (0x2F88F, 'M', '𪎒'), + (0x2F890, 'M', '廾'), + (0x2F891, 'M', '𢌱'), + ] + +def _seg_77() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F893, 'M', '舁'), + (0x2F894, 'M', '弢'), + (0x2F896, 'M', '㣇'), + (0x2F897, 'M', '𣊸'), + (0x2F898, 'M', '𦇚'), + (0x2F899, 'M', '形'), + (0x2F89A, 'M', '彫'), + (0x2F89B, 'M', '㣣'), + (0x2F89C, 'M', '徚'), + (0x2F89D, 'M', '忍'), + (0x2F89E, 'M', '志'), + (0x2F89F, 'M', '忹'), + (0x2F8A0, 'M', '悁'), + (0x2F8A1, 'M', '㤺'), + (0x2F8A2, 'M', '㤜'), + (0x2F8A3, 'M', '悔'), + (0x2F8A4, 'M', '𢛔'), + (0x2F8A5, 'M', '惇'), + (0x2F8A6, 'M', '慈'), + (0x2F8A7, 'M', '慌'), + (0x2F8A8, 'M', '慎'), + (0x2F8A9, 'M', '慌'), + (0x2F8AA, 'M', '慺'), + (0x2F8AB, 'M', '憎'), + (0x2F8AC, 'M', '憲'), + (0x2F8AD, 'M', '憤'), + (0x2F8AE, 'M', '憯'), + (0x2F8AF, 'M', '懞'), + (0x2F8B0, 'M', '懲'), + (0x2F8B1, 'M', '懶'), + (0x2F8B2, 'M', '成'), + (0x2F8B3, 'M', '戛'), + (0x2F8B4, 'M', '扝'), + (0x2F8B5, 'M', '抱'), + (0x2F8B6, 'M', '拔'), + (0x2F8B7, 'M', '捐'), + (0x2F8B8, 'M', '𢬌'), + (0x2F8B9, 'M', '挽'), + (0x2F8BA, 'M', '拼'), + (0x2F8BB, 'M', '捨'), + (0x2F8BC, 'M', '掃'), + (0x2F8BD, 'M', '揤'), + (0x2F8BE, 'M', '𢯱'), + (0x2F8BF, 'M', '搢'), + (0x2F8C0, 'M', '揅'), + (0x2F8C1, 'M', '掩'), + (0x2F8C2, 'M', '㨮'), + (0x2F8C3, 'M', '摩'), + (0x2F8C4, 'M', '摾'), + (0x2F8C5, 'M', '撝'), + (0x2F8C6, 'M', '摷'), + (0x2F8C7, 'M', '㩬'), + (0x2F8C8, 'M', '敏'), + (0x2F8C9, 'M', '敬'), + (0x2F8CA, 'M', '𣀊'), + (0x2F8CB, 'M', '旣'), + (0x2F8CC, 'M', '書'), + (0x2F8CD, 'M', '晉'), + (0x2F8CE, 'M', '㬙'), + (0x2F8CF, 'M', '暑'), + (0x2F8D0, 'M', '㬈'), + (0x2F8D1, 'M', '㫤'), + (0x2F8D2, 'M', '冒'), + (0x2F8D3, 'M', '冕'), + (0x2F8D4, 'M', '最'), + (0x2F8D5, 'M', '暜'), + (0x2F8D6, 'M', '肭'), + (0x2F8D7, 'M', '䏙'), + (0x2F8D8, 'M', '朗'), + (0x2F8D9, 'M', '望'), + (0x2F8DA, 'M', '朡'), + (0x2F8DB, 'M', '杞'), + (0x2F8DC, 'M', '杓'), + (0x2F8DD, 'M', '𣏃'), + (0x2F8DE, 'M', '㭉'), + (0x2F8DF, 'M', '柺'), + (0x2F8E0, 'M', '枅'), + (0x2F8E1, 'M', '桒'), + (0x2F8E2, 'M', '梅'), + (0x2F8E3, 'M', '𣑭'), + (0x2F8E4, 'M', '梎'), + (0x2F8E5, 'M', '栟'), + (0x2F8E6, 'M', '椔'), + (0x2F8E7, 'M', '㮝'), + (0x2F8E8, 'M', '楂'), + (0x2F8E9, 'M', '榣'), + (0x2F8EA, 'M', '槪'), + (0x2F8EB, 'M', '檨'), + (0x2F8EC, 'M', '𣚣'), + (0x2F8ED, 'M', '櫛'), + (0x2F8EE, 'M', '㰘'), + (0x2F8EF, 'M', '次'), + (0x2F8F0, 'M', '𣢧'), + (0x2F8F1, 'M', '歔'), + (0x2F8F2, 'M', '㱎'), + (0x2F8F3, 'M', '歲'), + (0x2F8F4, 'M', '殟'), + (0x2F8F5, 'M', '殺'), + (0x2F8F6, 'M', '殻'), + (0x2F8F7, 'M', '𣪍'), + ] + +def _seg_78() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F8F8, 'M', '𡴋'), + (0x2F8F9, 'M', '𣫺'), + (0x2F8FA, 'M', '汎'), + (0x2F8FB, 'M', '𣲼'), + (0x2F8FC, 'M', '沿'), + (0x2F8FD, 'M', '泍'), + (0x2F8FE, 'M', '汧'), + (0x2F8FF, 'M', '洖'), + (0x2F900, 'M', '派'), + (0x2F901, 'M', '海'), + (0x2F902, 'M', '流'), + (0x2F903, 'M', '浩'), + (0x2F904, 'M', '浸'), + (0x2F905, 'M', '涅'), + (0x2F906, 'M', '𣴞'), + (0x2F907, 'M', '洴'), + (0x2F908, 'M', '港'), + (0x2F909, 'M', '湮'), + (0x2F90A, 'M', '㴳'), + (0x2F90B, 'M', '滋'), + (0x2F90C, 'M', '滇'), + (0x2F90D, 'M', '𣻑'), + (0x2F90E, 'M', '淹'), + (0x2F90F, 'M', '潮'), + (0x2F910, 'M', '𣽞'), + (0x2F911, 'M', '𣾎'), + (0x2F912, 'M', '濆'), + (0x2F913, 'M', '瀹'), + (0x2F914, 'M', '瀞'), + (0x2F915, 'M', '瀛'), + (0x2F916, 'M', '㶖'), + (0x2F917, 'M', '灊'), + (0x2F918, 'M', '災'), + (0x2F919, 'M', '灷'), + (0x2F91A, 'M', '炭'), + (0x2F91B, 'M', '𠔥'), + (0x2F91C, 'M', '煅'), + (0x2F91D, 'M', '𤉣'), + (0x2F91E, 'M', '熜'), + (0x2F91F, 'X'), + (0x2F920, 'M', '爨'), + (0x2F921, 'M', '爵'), + (0x2F922, 'M', '牐'), + (0x2F923, 'M', '𤘈'), + (0x2F924, 'M', '犀'), + (0x2F925, 'M', '犕'), + (0x2F926, 'M', '𤜵'), + (0x2F927, 'M', '𤠔'), + (0x2F928, 'M', '獺'), + (0x2F929, 'M', '王'), + (0x2F92A, 'M', '㺬'), + (0x2F92B, 'M', '玥'), + (0x2F92C, 'M', '㺸'), + (0x2F92E, 'M', '瑇'), + (0x2F92F, 'M', '瑜'), + (0x2F930, 'M', '瑱'), + (0x2F931, 'M', '璅'), + (0x2F932, 'M', '瓊'), + (0x2F933, 'M', '㼛'), + (0x2F934, 'M', '甤'), + (0x2F935, 'M', '𤰶'), + (0x2F936, 'M', '甾'), + (0x2F937, 'M', '𤲒'), + (0x2F938, 'M', '異'), + (0x2F939, 'M', '𢆟'), + (0x2F93A, 'M', '瘐'), + (0x2F93B, 'M', '𤾡'), + (0x2F93C, 'M', '𤾸'), + (0x2F93D, 'M', '𥁄'), + (0x2F93E, 'M', '㿼'), + (0x2F93F, 'M', '䀈'), + (0x2F940, 'M', '直'), + (0x2F941, 'M', '𥃳'), + (0x2F942, 'M', '𥃲'), + (0x2F943, 'M', '𥄙'), + (0x2F944, 'M', '𥄳'), + (0x2F945, 'M', '眞'), + (0x2F946, 'M', '真'), + (0x2F948, 'M', '睊'), + (0x2F949, 'M', '䀹'), + (0x2F94A, 'M', '瞋'), + (0x2F94B, 'M', '䁆'), + (0x2F94C, 'M', '䂖'), + (0x2F94D, 'M', '𥐝'), + (0x2F94E, 'M', '硎'), + (0x2F94F, 'M', '碌'), + (0x2F950, 'M', '磌'), + (0x2F951, 'M', '䃣'), + (0x2F952, 'M', '𥘦'), + (0x2F953, 'M', '祖'), + (0x2F954, 'M', '𥚚'), + (0x2F955, 'M', '𥛅'), + (0x2F956, 'M', '福'), + (0x2F957, 'M', '秫'), + (0x2F958, 'M', '䄯'), + (0x2F959, 'M', '穀'), + (0x2F95A, 'M', '穊'), + (0x2F95B, 'M', '穏'), + (0x2F95C, 'M', '𥥼'), + (0x2F95D, 'M', '𥪧'), + ] + +def _seg_79() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F95F, 'X'), + (0x2F960, 'M', '䈂'), + (0x2F961, 'M', '𥮫'), + (0x2F962, 'M', '篆'), + (0x2F963, 'M', '築'), + (0x2F964, 'M', '䈧'), + (0x2F965, 'M', '𥲀'), + (0x2F966, 'M', '糒'), + (0x2F967, 'M', '䊠'), + (0x2F968, 'M', '糨'), + (0x2F969, 'M', '糣'), + (0x2F96A, 'M', '紀'), + (0x2F96B, 'M', '𥾆'), + (0x2F96C, 'M', '絣'), + (0x2F96D, 'M', '䌁'), + (0x2F96E, 'M', '緇'), + (0x2F96F, 'M', '縂'), + (0x2F970, 'M', '繅'), + (0x2F971, 'M', '䌴'), + (0x2F972, 'M', '𦈨'), + (0x2F973, 'M', '𦉇'), + (0x2F974, 'M', '䍙'), + (0x2F975, 'M', '𦋙'), + (0x2F976, 'M', '罺'), + (0x2F977, 'M', '𦌾'), + (0x2F978, 'M', '羕'), + (0x2F979, 'M', '翺'), + (0x2F97A, 'M', '者'), + (0x2F97B, 'M', '𦓚'), + (0x2F97C, 'M', '𦔣'), + (0x2F97D, 'M', '聠'), + (0x2F97E, 'M', '𦖨'), + (0x2F97F, 'M', '聰'), + (0x2F980, 'M', '𣍟'), + (0x2F981, 'M', '䏕'), + (0x2F982, 'M', '育'), + (0x2F983, 'M', '脃'), + (0x2F984, 'M', '䐋'), + (0x2F985, 'M', '脾'), + (0x2F986, 'M', '媵'), + (0x2F987, 'M', '𦞧'), + (0x2F988, 'M', '𦞵'), + (0x2F989, 'M', '𣎓'), + (0x2F98A, 'M', '𣎜'), + (0x2F98B, 'M', '舁'), + (0x2F98C, 'M', '舄'), + (0x2F98D, 'M', '辞'), + (0x2F98E, 'M', '䑫'), + (0x2F98F, 'M', '芑'), + (0x2F990, 'M', '芋'), + (0x2F991, 'M', '芝'), + (0x2F992, 'M', '劳'), + (0x2F993, 'M', '花'), + (0x2F994, 'M', '芳'), + (0x2F995, 'M', '芽'), + (0x2F996, 'M', '苦'), + (0x2F997, 'M', '𦬼'), + (0x2F998, 'M', '若'), + (0x2F999, 'M', '茝'), + (0x2F99A, 'M', '荣'), + (0x2F99B, 'M', '莭'), + (0x2F99C, 'M', '茣'), + (0x2F99D, 'M', '莽'), + (0x2F99E, 'M', '菧'), + (0x2F99F, 'M', '著'), + (0x2F9A0, 'M', '荓'), + (0x2F9A1, 'M', '菊'), + (0x2F9A2, 'M', '菌'), + (0x2F9A3, 'M', '菜'), + (0x2F9A4, 'M', '𦰶'), + (0x2F9A5, 'M', '𦵫'), + (0x2F9A6, 'M', '𦳕'), + (0x2F9A7, 'M', '䔫'), + (0x2F9A8, 'M', '蓱'), + (0x2F9A9, 'M', '蓳'), + (0x2F9AA, 'M', '蔖'), + (0x2F9AB, 'M', '𧏊'), + (0x2F9AC, 'M', '蕤'), + (0x2F9AD, 'M', '𦼬'), + (0x2F9AE, 'M', '䕝'), + (0x2F9AF, 'M', '䕡'), + (0x2F9B0, 'M', '𦾱'), + (0x2F9B1, 'M', '𧃒'), + (0x2F9B2, 'M', '䕫'), + (0x2F9B3, 'M', '虐'), + (0x2F9B4, 'M', '虜'), + (0x2F9B5, 'M', '虧'), + (0x2F9B6, 'M', '虩'), + (0x2F9B7, 'M', '蚩'), + (0x2F9B8, 'M', '蚈'), + (0x2F9B9, 'M', '蜎'), + (0x2F9BA, 'M', '蛢'), + (0x2F9BB, 'M', '蝹'), + (0x2F9BC, 'M', '蜨'), + (0x2F9BD, 'M', '蝫'), + (0x2F9BE, 'M', '螆'), + (0x2F9BF, 'X'), + (0x2F9C0, 'M', '蟡'), + (0x2F9C1, 'M', '蠁'), + (0x2F9C2, 'M', '䗹'), + ] + +def _seg_80() -> List[Union[Tuple[int, str], Tuple[int, str, str]]]: + return [ + (0x2F9C3, 'M', '衠'), + (0x2F9C4, 'M', '衣'), + (0x2F9C5, 'M', '𧙧'), + (0x2F9C6, 'M', '裗'), + (0x2F9C7, 'M', '裞'), + (0x2F9C8, 'M', '䘵'), + (0x2F9C9, 'M', '裺'), + (0x2F9CA, 'M', '㒻'), + (0x2F9CB, 'M', '𧢮'), + (0x2F9CC, 'M', '𧥦'), + (0x2F9CD, 'M', '䚾'), + (0x2F9CE, 'M', '䛇'), + (0x2F9CF, 'M', '誠'), + (0x2F9D0, 'M', '諭'), + (0x2F9D1, 'M', '變'), + (0x2F9D2, 'M', '豕'), + (0x2F9D3, 'M', '𧲨'), + (0x2F9D4, 'M', '貫'), + (0x2F9D5, 'M', '賁'), + (0x2F9D6, 'M', '贛'), + (0x2F9D7, 'M', '起'), + (0x2F9D8, 'M', '𧼯'), + (0x2F9D9, 'M', '𠠄'), + (0x2F9DA, 'M', '跋'), + (0x2F9DB, 'M', '趼'), + (0x2F9DC, 'M', '跰'), + (0x2F9DD, 'M', '𠣞'), + (0x2F9DE, 'M', '軔'), + (0x2F9DF, 'M', '輸'), + (0x2F9E0, 'M', '𨗒'), + (0x2F9E1, 'M', '𨗭'), + (0x2F9E2, 'M', '邔'), + (0x2F9E3, 'M', '郱'), + (0x2F9E4, 'M', '鄑'), + (0x2F9E5, 'M', '𨜮'), + (0x2F9E6, 'M', '鄛'), + (0x2F9E7, 'M', '鈸'), + (0x2F9E8, 'M', '鋗'), + (0x2F9E9, 'M', '鋘'), + (0x2F9EA, 'M', '鉼'), + (0x2F9EB, 'M', '鏹'), + (0x2F9EC, 'M', '鐕'), + (0x2F9ED, 'M', '𨯺'), + (0x2F9EE, 'M', '開'), + (0x2F9EF, 'M', '䦕'), + (0x2F9F0, 'M', '閷'), + (0x2F9F1, 'M', '𨵷'), + (0x2F9F2, 'M', '䧦'), + (0x2F9F3, 'M', '雃'), + (0x2F9F4, 'M', '嶲'), + (0x2F9F5, 'M', '霣'), + (0x2F9F6, 'M', '𩅅'), + (0x2F9F7, 'M', '𩈚'), + (0x2F9F8, 'M', '䩮'), + (0x2F9F9, 'M', '䩶'), + (0x2F9FA, 'M', '韠'), + (0x2F9FB, 'M', '𩐊'), + (0x2F9FC, 'M', '䪲'), + (0x2F9FD, 'M', '𩒖'), + (0x2F9FE, 'M', '頋'), + (0x2FA00, 'M', '頩'), + (0x2FA01, 'M', '𩖶'), + (0x2FA02, 'M', '飢'), + (0x2FA03, 'M', '䬳'), + (0x2FA04, 'M', '餩'), + (0x2FA05, 'M', '馧'), + (0x2FA06, 'M', '駂'), + (0x2FA07, 'M', '駾'), + (0x2FA08, 'M', '䯎'), + (0x2FA09, 'M', '𩬰'), + (0x2FA0A, 'M', '鬒'), + (0x2FA0B, 'M', '鱀'), + (0x2FA0C, 'M', '鳽'), + (0x2FA0D, 'M', '䳎'), + (0x2FA0E, 'M', '䳭'), + (0x2FA0F, 'M', '鵧'), + (0x2FA10, 'M', '𪃎'), + (0x2FA11, 'M', '䳸'), + (0x2FA12, 'M', '𪄅'), + (0x2FA13, 'M', '𪈎'), + (0x2FA14, 'M', '𪊑'), + (0x2FA15, 'M', '麻'), + (0x2FA16, 'M', '䵖'), + (0x2FA17, 'M', '黹'), + (0x2FA18, 'M', '黾'), + (0x2FA19, 'M', '鼅'), + (0x2FA1A, 'M', '鼏'), + (0x2FA1B, 'M', '鼖'), + (0x2FA1C, 'M', '鼻'), + (0x2FA1D, 'M', '𪘀'), + (0x2FA1E, 'X'), + (0x30000, 'V'), + (0x3134B, 'X'), + (0xE0100, 'I'), + (0xE01F0, 'X'), + ] + +uts46data = tuple( + _seg_0() + + _seg_1() + + _seg_2() + + _seg_3() + + _seg_4() + + _seg_5() + + _seg_6() + + _seg_7() + + _seg_8() + + _seg_9() + + _seg_10() + + _seg_11() + + _seg_12() + + _seg_13() + + _seg_14() + + _seg_15() + + _seg_16() + + _seg_17() + + _seg_18() + + _seg_19() + + _seg_20() + + _seg_21() + + _seg_22() + + _seg_23() + + _seg_24() + + _seg_25() + + _seg_26() + + _seg_27() + + _seg_28() + + _seg_29() + + _seg_30() + + _seg_31() + + _seg_32() + + _seg_33() + + _seg_34() + + _seg_35() + + _seg_36() + + _seg_37() + + _seg_38() + + _seg_39() + + _seg_40() + + _seg_41() + + _seg_42() + + _seg_43() + + _seg_44() + + _seg_45() + + _seg_46() + + _seg_47() + + _seg_48() + + _seg_49() + + _seg_50() + + _seg_51() + + _seg_52() + + _seg_53() + + _seg_54() + + _seg_55() + + _seg_56() + + _seg_57() + + _seg_58() + + _seg_59() + + _seg_60() + + _seg_61() + + _seg_62() + + _seg_63() + + _seg_64() + + _seg_65() + + _seg_66() + + _seg_67() + + _seg_68() + + _seg_69() + + _seg_70() + + _seg_71() + + _seg_72() + + _seg_73() + + _seg_74() + + _seg_75() + + _seg_76() + + _seg_77() + + _seg_78() + + _seg_79() + + _seg_80() +) # type: Tuple[Union[Tuple[int, str], Tuple[int, str, str]], ...] diff --git a/lib/jaraco/classes/__init__.py b/lib/jaraco/classes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/jaraco/classes/ancestry.py b/lib/jaraco/classes/ancestry.py new file mode 100644 index 00000000..dd9b2e92 --- /dev/null +++ b/lib/jaraco/classes/ancestry.py @@ -0,0 +1,68 @@ +""" +Routines for obtaining the class names +of an object and its parent classes. +""" + +from more_itertools import unique_everseen + + +def all_bases(c): + """ + return a tuple of all base classes the class c has as a parent. + >>> object in all_bases(list) + True + """ + return c.mro()[1:] + + +def all_classes(c): + """ + return a tuple of all classes to which c belongs + >>> list in all_classes(list) + True + """ + return c.mro() + + +# borrowed from +# http://code.activestate.com/recipes/576949-find-all-subclasses-of-a-given-class/ + + +def iter_subclasses(cls): + """ + Generator over all subclasses of a given class, in depth-first order. + + >>> bool in list(iter_subclasses(int)) + True + >>> class A(object): pass + >>> class B(A): pass + >>> class C(A): pass + >>> class D(B,C): pass + >>> class E(D): pass + >>> + >>> for cls in iter_subclasses(A): + ... print(cls.__name__) + B + D + E + C + >>> # get ALL classes currently defined + >>> res = [cls.__name__ for cls in iter_subclasses(object)] + >>> 'type' in res + True + >>> 'tuple' in res + True + >>> len(res) > 100 + True + """ + return unique_everseen(_iter_all_subclasses(cls)) + + +def _iter_all_subclasses(cls): + try: + subs = cls.__subclasses__() + except TypeError: # fails only when cls is type + subs = cls.__subclasses__(cls) + for sub in subs: + yield sub + yield from iter_subclasses(sub) diff --git a/lib/jaraco/classes/meta.py b/lib/jaraco/classes/meta.py new file mode 100644 index 00000000..bd41a1d9 --- /dev/null +++ b/lib/jaraco/classes/meta.py @@ -0,0 +1,66 @@ +""" +meta.py + +Some useful metaclasses. +""" + + +class LeafClassesMeta(type): + """ + A metaclass for classes that keeps track of all of them that + aren't base classes. + + >>> Parent = LeafClassesMeta('MyParentClass', (), {}) + >>> Parent in Parent._leaf_classes + True + >>> Child = LeafClassesMeta('MyChildClass', (Parent,), {}) + >>> Child in Parent._leaf_classes + True + >>> Parent in Parent._leaf_classes + False + + >>> Other = LeafClassesMeta('OtherClass', (), {}) + >>> Parent in Other._leaf_classes + False + >>> len(Other._leaf_classes) + 1 + """ + + def __init__(cls, name, bases, attrs): + if not hasattr(cls, '_leaf_classes'): + cls._leaf_classes = set() + leaf_classes = getattr(cls, '_leaf_classes') + leaf_classes.add(cls) + # remove any base classes + leaf_classes -= set(bases) + + +class TagRegistered(type): + """ + As classes of this metaclass are created, they keep a registry in the + base class of all classes by a class attribute, indicated by attr_name. + + >>> FooObject = TagRegistered('FooObject', (), dict(tag='foo')) + >>> FooObject._registry['foo'] is FooObject + True + >>> BarObject = TagRegistered('Barobject', (FooObject,), dict(tag='bar')) + >>> FooObject._registry is BarObject._registry + True + >>> len(FooObject._registry) + 2 + + '...' below should be 'jaraco.classes' but for pytest-dev/pytest#3396 + >>> FooObject._registry['bar'] + + """ + + attr_name = 'tag' + + def __init__(cls, name, bases, namespace): + super(TagRegistered, cls).__init__(name, bases, namespace) + if not hasattr(cls, '_registry'): + cls._registry = {} + meta = cls.__class__ + attr = getattr(cls, meta.attr_name, None) + if attr: + cls._registry[attr] = cls diff --git a/lib/jaraco/classes/properties.py b/lib/jaraco/classes/properties.py new file mode 100644 index 00000000..4fda4cb6 --- /dev/null +++ b/lib/jaraco/classes/properties.py @@ -0,0 +1,171 @@ +class NonDataProperty: + """Much like the property builtin, but only implements __get__, + making it a non-data property, and can be subsequently reset. + + See http://users.rcn.com/python/download/Descriptor.htm for more + information. + + >>> class X(object): + ... @NonDataProperty + ... def foo(self): + ... return 3 + >>> x = X() + >>> x.foo + 3 + >>> x.foo = 4 + >>> x.foo + 4 + + '...' below should be 'jaraco.classes' but for pytest-dev/pytest#3396 + >>> X.foo + <....properties.NonDataProperty object at ...> + """ + + def __init__(self, fget): + assert fget is not None, "fget cannot be none" + assert callable(fget), "fget must be callable" + self.fget = fget + + def __get__(self, obj, objtype=None): + if obj is None: + return self + return self.fget(obj) + + +class classproperty: + """ + Like @property but applies at the class level. + + + >>> class X(metaclass=classproperty.Meta): + ... val = None + ... @classproperty + ... def foo(cls): + ... return cls.val + ... @foo.setter + ... def foo(cls, val): + ... cls.val = val + >>> X.foo + >>> X.foo = 3 + >>> X.foo + 3 + >>> x = X() + >>> x.foo + 3 + >>> X.foo = 4 + >>> x.foo + 4 + + Setting the property on an instance affects the class. + + >>> x.foo = 5 + >>> x.foo + 5 + >>> X.foo + 5 + >>> vars(x) + {} + >>> X().foo + 5 + + Attempting to set an attribute where no setter was defined + results in an AttributeError: + + >>> class GetOnly(metaclass=classproperty.Meta): + ... @classproperty + ... def foo(cls): + ... return 'bar' + >>> GetOnly.foo = 3 + Traceback (most recent call last): + ... + AttributeError: can't set attribute + + It is also possible to wrap a classmethod or staticmethod in + a classproperty. + + >>> class Static(metaclass=classproperty.Meta): + ... @classproperty + ... @classmethod + ... def foo(cls): + ... return 'foo' + ... @classproperty + ... @staticmethod + ... def bar(): + ... return 'bar' + >>> Static.foo + 'foo' + >>> Static.bar + 'bar' + + *Legacy* + + For compatibility, if the metaclass isn't specified, the + legacy behavior will be invoked. + + >>> class X: + ... val = None + ... @classproperty + ... def foo(cls): + ... return cls.val + ... @foo.setter + ... def foo(cls, val): + ... cls.val = val + >>> X.foo + >>> X.foo = 3 + >>> X.foo + 3 + >>> x = X() + >>> x.foo + 3 + >>> X.foo = 4 + >>> x.foo + 4 + + Note, because the metaclass was not specified, setting + a value on an instance does not have the intended effect. + + >>> x.foo = 5 + >>> x.foo + 5 + >>> X.foo # should be 5 + 4 + >>> vars(x) # should be empty + {'foo': 5} + >>> X().foo # should be 5 + 4 + """ + + class Meta(type): + def __setattr__(self, key, value): + obj = self.__dict__.get(key, None) + if type(obj) is classproperty: + return obj.__set__(self, value) + return super().__setattr__(key, value) + + def __init__(self, fget, fset=None): + self.fget = self._fix_function(fget) + self.fset = fset + fset and self.setter(fset) + + def __get__(self, instance, owner=None): + return self.fget.__get__(None, owner)() + + def __set__(self, owner, value): + if not self.fset: + raise AttributeError("can't set attribute") + if type(owner) is not classproperty.Meta: + owner = type(owner) + return self.fset.__get__(None, owner)(value) + + def setter(self, fset): + self.fset = self._fix_function(fset) + return self + + @classmethod + def _fix_function(cls, fn): + """ + Ensure fn is a classmethod or staticmethod. + """ + if not isinstance(fn, (classmethod, staticmethod)): + return classmethod(fn) + return fn diff --git a/lib/jaraco/collections.py b/lib/jaraco/collections.py new file mode 100644 index 00000000..8323db78 --- /dev/null +++ b/lib/jaraco/collections.py @@ -0,0 +1,1056 @@ +import re +import operator +import collections.abc +import itertools +import copy +import functools +import random + +from jaraco.classes.properties import NonDataProperty +import jaraco.text + + +class Projection(collections.abc.Mapping): + """ + Project a set of keys over a mapping + + >>> sample = {'a': 1, 'b': 2, 'c': 3} + >>> prj = Projection(['a', 'c', 'd'], sample) + >>> prj == {'a': 1, 'c': 3} + True + + Keys should only appear if they were specified and exist in the space. + + >>> sorted(list(prj.keys())) + ['a', 'c'] + + Attempting to access a key not in the projection + results in a KeyError. + + >>> prj['b'] + Traceback (most recent call last): + ... + KeyError: 'b' + + Use the projection to update another dict. + + >>> target = {'a': 2, 'b': 2} + >>> target.update(prj) + >>> target == {'a': 1, 'b': 2, 'c': 3} + True + + Also note that Projection keeps a reference to the original dict, so + if you modify the original dict, that could modify the Projection. + + >>> del sample['a'] + >>> dict(prj) + {'c': 3} + """ + + def __init__(self, keys, space): + self._keys = tuple(keys) + self._space = space + + def __getitem__(self, key): + if key not in self._keys: + raise KeyError(key) + return self._space[key] + + def __iter__(self): + return iter(set(self._keys).intersection(self._space)) + + def __len__(self): + return len(tuple(iter(self))) + + +class DictFilter(object): + """ + Takes a dict, and simulates a sub-dict based on the keys. + + >>> sample = {'a': 1, 'b': 2, 'c': 3} + >>> filtered = DictFilter(sample, ['a', 'c']) + >>> filtered == {'a': 1, 'c': 3} + True + >>> set(filtered.values()) == {1, 3} + True + >>> set(filtered.items()) == {('a', 1), ('c', 3)} + True + + One can also filter by a regular expression pattern + + >>> sample['d'] = 4 + >>> sample['ef'] = 5 + + Here we filter for only single-character keys + + >>> filtered = DictFilter(sample, include_pattern='.$') + >>> filtered == {'a': 1, 'b': 2, 'c': 3, 'd': 4} + True + + >>> filtered['e'] + Traceback (most recent call last): + ... + KeyError: 'e' + + Also note that DictFilter keeps a reference to the original dict, so + if you modify the original dict, that could modify the filtered dict. + + >>> del sample['d'] + >>> del sample['a'] + >>> filtered == {'b': 2, 'c': 3} + True + >>> filtered != {'b': 2, 'c': 3} + False + """ + + def __init__(self, dict, include_keys=[], include_pattern=None): + self.dict = dict + self.specified_keys = set(include_keys) + if include_pattern is not None: + self.include_pattern = re.compile(include_pattern) + else: + # for performance, replace the pattern_keys property + self.pattern_keys = set() + + def get_pattern_keys(self): + keys = filter(self.include_pattern.match, self.dict.keys()) + return set(keys) + + pattern_keys = NonDataProperty(get_pattern_keys) + + @property + def include_keys(self): + return self.specified_keys.union(self.pattern_keys) + + def keys(self): + return self.include_keys.intersection(self.dict.keys()) + + def values(self): + return map(self.dict.get, self.keys()) + + def __getitem__(self, i): + if i not in self.include_keys: + raise KeyError(i) + return self.dict[i] + + def items(self): + keys = self.keys() + values = map(self.dict.get, keys) + return zip(keys, values) + + def __eq__(self, other): + return dict(self) == other + + def __ne__(self, other): + return dict(self) != other + + +def dict_map(function, dictionary): + """ + dict_map is much like the built-in function map. It takes a dictionary + and applys a function to the values of that dictionary, returning a + new dictionary with the mapped values in the original keys. + + >>> d = dict_map(lambda x:x+1, dict(a=1, b=2)) + >>> d == dict(a=2,b=3) + True + """ + return dict((key, function(value)) for key, value in dictionary.items()) + + +class RangeMap(dict): + """ + A dictionary-like object that uses the keys as bounds for a range. + Inclusion of the value for that range is determined by the + key_match_comparator, which defaults to less-than-or-equal. + A value is returned for a key if it is the first key that matches in + the sorted list of keys. + + One may supply keyword parameters to be passed to the sort function used + to sort keys (i.e. cmp [python 2 only], keys, reverse) as sort_params. + + Let's create a map that maps 1-3 -> 'a', 4-6 -> 'b' + + >>> r = RangeMap({3: 'a', 6: 'b'}) # boy, that was easy + >>> r[1], r[2], r[3], r[4], r[5], r[6] + ('a', 'a', 'a', 'b', 'b', 'b') + + Even float values should work so long as the comparison operator + supports it. + + >>> r[4.5] + 'b' + + But you'll notice that the way rangemap is defined, it must be open-ended + on one side. + + >>> r[0] + 'a' + >>> r[-1] + 'a' + + One can close the open-end of the RangeMap by using undefined_value + + >>> r = RangeMap({0: RangeMap.undefined_value, 3: 'a', 6: 'b'}) + >>> r[0] + Traceback (most recent call last): + ... + KeyError: 0 + + One can get the first or last elements in the range by using RangeMap.Item + + >>> last_item = RangeMap.Item(-1) + >>> r[last_item] + 'b' + + .last_item is a shortcut for Item(-1) + + >>> r[RangeMap.last_item] + 'b' + + Sometimes it's useful to find the bounds for a RangeMap + + >>> r.bounds() + (0, 6) + + RangeMap supports .get(key, default) + + >>> r.get(0, 'not found') + 'not found' + + >>> r.get(7, 'not found') + 'not found' + """ + + def __init__(self, source, sort_params={}, key_match_comparator=operator.le): + dict.__init__(self, source) + self.sort_params = sort_params + self.match = key_match_comparator + + def __getitem__(self, item): + sorted_keys = sorted(self.keys(), **self.sort_params) + if isinstance(item, RangeMap.Item): + result = self.__getitem__(sorted_keys[item]) + else: + key = self._find_first_match_(sorted_keys, item) + result = dict.__getitem__(self, key) + if result is RangeMap.undefined_value: + raise KeyError(key) + return result + + def get(self, key, default=None): + """ + Return the value for key if key is in the dictionary, else default. + If default is not given, it defaults to None, so that this method + never raises a KeyError. + """ + try: + return self[key] + except KeyError: + return default + + def _find_first_match_(self, keys, item): + is_match = functools.partial(self.match, item) + matches = list(filter(is_match, keys)) + if matches: + return matches[0] + raise KeyError(item) + + def bounds(self): + sorted_keys = sorted(self.keys(), **self.sort_params) + return (sorted_keys[RangeMap.first_item], sorted_keys[RangeMap.last_item]) + + # some special values for the RangeMap + undefined_value = type(str('RangeValueUndefined'), (object,), {})() + + class Item(int): + "RangeMap Item" + + first_item = Item(0) + last_item = Item(-1) + + +def __identity(x): + return x + + +def sorted_items(d, key=__identity, reverse=False): + """ + Return the items of the dictionary sorted by the keys + + >>> sample = dict(foo=20, bar=42, baz=10) + >>> tuple(sorted_items(sample)) + (('bar', 42), ('baz', 10), ('foo', 20)) + + >>> reverse_string = lambda s: ''.join(reversed(s)) + >>> tuple(sorted_items(sample, key=reverse_string)) + (('foo', 20), ('bar', 42), ('baz', 10)) + + >>> tuple(sorted_items(sample, reverse=True)) + (('foo', 20), ('baz', 10), ('bar', 42)) + """ + # wrap the key func so it operates on the first element of each item + def pairkey_key(item): + return key(item[0]) + + return sorted(d.items(), key=pairkey_key, reverse=reverse) + + +class KeyTransformingDict(dict): + """ + A dict subclass that transforms the keys before they're used. + Subclasses may override the default transform_key to customize behavior. + """ + + @staticmethod + def transform_key(key): # pragma: nocover + return key + + def __init__(self, *args, **kargs): + super(KeyTransformingDict, self).__init__() + # build a dictionary using the default constructs + d = dict(*args, **kargs) + # build this dictionary using transformed keys. + for item in d.items(): + self.__setitem__(*item) + + def __setitem__(self, key, val): + key = self.transform_key(key) + super(KeyTransformingDict, self).__setitem__(key, val) + + def __getitem__(self, key): + key = self.transform_key(key) + return super(KeyTransformingDict, self).__getitem__(key) + + def __contains__(self, key): + key = self.transform_key(key) + return super(KeyTransformingDict, self).__contains__(key) + + def __delitem__(self, key): + key = self.transform_key(key) + return super(KeyTransformingDict, self).__delitem__(key) + + def get(self, key, *args, **kwargs): + key = self.transform_key(key) + return super(KeyTransformingDict, self).get(key, *args, **kwargs) + + def setdefault(self, key, *args, **kwargs): + key = self.transform_key(key) + return super(KeyTransformingDict, self).setdefault(key, *args, **kwargs) + + def pop(self, key, *args, **kwargs): + key = self.transform_key(key) + return super(KeyTransformingDict, self).pop(key, *args, **kwargs) + + def matching_key_for(self, key): + """ + Given a key, return the actual key stored in self that matches. + Raise KeyError if the key isn't found. + """ + try: + return next(e_key for e_key in self.keys() if e_key == key) + except StopIteration: + raise KeyError(key) + + +class FoldedCaseKeyedDict(KeyTransformingDict): + """ + A case-insensitive dictionary (keys are compared as insensitive + if they are strings). + + >>> d = FoldedCaseKeyedDict() + >>> d['heLlo'] = 'world' + >>> list(d.keys()) == ['heLlo'] + True + >>> list(d.values()) == ['world'] + True + >>> d['hello'] == 'world' + True + >>> 'hello' in d + True + >>> 'HELLO' in d + True + >>> print(repr(FoldedCaseKeyedDict({'heLlo': 'world'})).replace("u'", "'")) + {'heLlo': 'world'} + >>> d = FoldedCaseKeyedDict({'heLlo': 'world'}) + >>> print(d['hello']) + world + >>> print(d['Hello']) + world + >>> list(d.keys()) + ['heLlo'] + >>> d = FoldedCaseKeyedDict({'heLlo': 'world', 'Hello': 'world'}) + >>> list(d.values()) + ['world'] + >>> key, = d.keys() + >>> key in ['heLlo', 'Hello'] + True + >>> del d['HELLO'] + >>> d + {} + + get should work + + >>> d['Sumthin'] = 'else' + >>> d.get('SUMTHIN') + 'else' + >>> d.get('OTHER', 'thing') + 'thing' + >>> del d['sumthin'] + + setdefault should also work + + >>> d['This'] = 'that' + >>> print(d.setdefault('this', 'other')) + that + >>> len(d) + 1 + >>> print(d['this']) + that + >>> print(d.setdefault('That', 'other')) + other + >>> print(d['THAT']) + other + + Make it pop! + + >>> print(d.pop('THAT')) + other + + To retrieve the key in its originally-supplied form, use matching_key_for + + >>> print(d.matching_key_for('this')) + This + + >>> d.matching_key_for('missing') + Traceback (most recent call last): + ... + KeyError: 'missing' + """ + + @staticmethod + def transform_key(key): + return jaraco.text.FoldedCase(key) + + +class DictAdapter(object): + """ + Provide a getitem interface for attributes of an object. + + Let's say you want to get at the string.lowercase property in a formatted + string. It's easy with DictAdapter. + + >>> import string + >>> print("lowercase is %(ascii_lowercase)s" % DictAdapter(string)) + lowercase is abcdefghijklmnopqrstuvwxyz + """ + + def __init__(self, wrapped_ob): + self.object = wrapped_ob + + def __getitem__(self, name): + return getattr(self.object, name) + + +class ItemsAsAttributes(object): + """ + Mix-in class to enable a mapping object to provide items as + attributes. + + >>> C = type(str('C'), (dict, ItemsAsAttributes), dict()) + >>> i = C() + >>> i['foo'] = 'bar' + >>> i.foo + 'bar' + + Natural attribute access takes precedence + + >>> i.foo = 'henry' + >>> i.foo + 'henry' + + But as you might expect, the mapping functionality is preserved. + + >>> i['foo'] + 'bar' + + A normal attribute error should be raised if an attribute is + requested that doesn't exist. + + >>> i.missing + Traceback (most recent call last): + ... + AttributeError: 'C' object has no attribute 'missing' + + It also works on dicts that customize __getitem__ + + >>> missing_func = lambda self, key: 'missing item' + >>> C = type( + ... str('C'), + ... (dict, ItemsAsAttributes), + ... dict(__missing__ = missing_func), + ... ) + >>> i = C() + >>> i.missing + 'missing item' + >>> i.foo + 'missing item' + """ + + def __getattr__(self, key): + try: + return getattr(super(ItemsAsAttributes, self), key) + except AttributeError as e: + # attempt to get the value from the mapping (return self[key]) + # but be careful not to lose the original exception context. + noval = object() + + def _safe_getitem(cont, key, missing_result): + try: + return cont[key] + except KeyError: + return missing_result + + result = _safe_getitem(self, key, noval) + if result is not noval: + return result + # raise the original exception, but use the original class + # name, not 'super'. + (message,) = e.args + message = message.replace('super', self.__class__.__name__, 1) + e.args = (message,) + raise + + +def invert_map(map): + """ + Given a dictionary, return another dictionary with keys and values + switched. If any of the values resolve to the same key, raises + a ValueError. + + >>> numbers = dict(a=1, b=2, c=3) + >>> letters = invert_map(numbers) + >>> letters[1] + 'a' + >>> numbers['d'] = 3 + >>> invert_map(numbers) + Traceback (most recent call last): + ... + ValueError: Key conflict in inverted mapping + """ + res = dict((v, k) for k, v in map.items()) + if not len(res) == len(map): + raise ValueError('Key conflict in inverted mapping') + return res + + +class IdentityOverrideMap(dict): + """ + A dictionary that by default maps each key to itself, but otherwise + acts like a normal dictionary. + + >>> d = IdentityOverrideMap() + >>> d[42] + 42 + >>> d['speed'] = 'speedo' + >>> print(d['speed']) + speedo + """ + + def __missing__(self, key): + return key + + +class DictStack(list, collections.abc.Mapping): + """ + A stack of dictionaries that behaves as a view on those dictionaries, + giving preference to the last. + + >>> stack = DictStack([dict(a=1, c=2), dict(b=2, a=2)]) + >>> stack['a'] + 2 + >>> stack['b'] + 2 + >>> stack['c'] + 2 + >>> len(stack) + 3 + >>> stack.push(dict(a=3)) + >>> stack['a'] + 3 + >>> set(stack.keys()) == set(['a', 'b', 'c']) + True + >>> set(stack.items()) == set([('a', 3), ('b', 2), ('c', 2)]) + True + >>> dict(**stack) == dict(stack) == dict(a=3, c=2, b=2) + True + >>> d = stack.pop() + >>> stack['a'] + 2 + >>> d = stack.pop() + >>> stack['a'] + 1 + >>> stack.get('b', None) + >>> 'c' in stack + True + """ + + def __iter__(self): + dicts = list.__iter__(self) + return iter(set(itertools.chain.from_iterable(c.keys() for c in dicts))) + + def __getitem__(self, key): + for scope in reversed(tuple(list.__iter__(self))): + if key in scope: + return scope[key] + raise KeyError(key) + + push = list.append + + def __contains__(self, other): + return collections.abc.Mapping.__contains__(self, other) + + def __len__(self): + return len(list(iter(self))) + + +class BijectiveMap(dict): + """ + A Bijective Map (two-way mapping). + + Implemented as a simple dictionary of 2x the size, mapping values back + to keys. + + Note, this implementation may be incomplete. If there's not a test for + your use case below, it's likely to fail, so please test and send pull + requests or patches for additional functionality needed. + + + >>> m = BijectiveMap() + >>> m['a'] = 'b' + >>> m == {'a': 'b', 'b': 'a'} + True + >>> print(m['b']) + a + + >>> m['c'] = 'd' + >>> len(m) + 2 + + Some weird things happen if you map an item to itself or overwrite a + single key of a pair, so it's disallowed. + + >>> m['e'] = 'e' + Traceback (most recent call last): + ValueError: Key cannot map to itself + + >>> m['d'] = 'e' + Traceback (most recent call last): + ValueError: Key/Value pairs may not overlap + + >>> m['e'] = 'd' + Traceback (most recent call last): + ValueError: Key/Value pairs may not overlap + + >>> print(m.pop('d')) + c + + >>> 'c' in m + False + + >>> m = BijectiveMap(dict(a='b')) + >>> len(m) + 1 + >>> print(m['b']) + a + + >>> m = BijectiveMap() + >>> m.update(a='b') + >>> m['b'] + 'a' + + >>> del m['b'] + >>> len(m) + 0 + >>> 'a' in m + False + """ + + def __init__(self, *args, **kwargs): + super(BijectiveMap, self).__init__() + self.update(*args, **kwargs) + + def __setitem__(self, item, value): + if item == value: + raise ValueError("Key cannot map to itself") + overlap = ( + item in self + and self[item] != value + or value in self + and self[value] != item + ) + if overlap: + raise ValueError("Key/Value pairs may not overlap") + super(BijectiveMap, self).__setitem__(item, value) + super(BijectiveMap, self).__setitem__(value, item) + + def __delitem__(self, item): + self.pop(item) + + def __len__(self): + return super(BijectiveMap, self).__len__() // 2 + + def pop(self, key, *args, **kwargs): + mirror = self[key] + super(BijectiveMap, self).__delitem__(mirror) + return super(BijectiveMap, self).pop(key, *args, **kwargs) + + def update(self, *args, **kwargs): + # build a dictionary using the default constructs + d = dict(*args, **kwargs) + # build this dictionary using transformed keys. + for item in d.items(): + self.__setitem__(*item) + + +class FrozenDict(collections.abc.Mapping, collections.abc.Hashable): + """ + An immutable mapping. + + >>> a = FrozenDict(a=1, b=2) + >>> b = FrozenDict(a=1, b=2) + >>> a == b + True + + >>> a == dict(a=1, b=2) + True + >>> dict(a=1, b=2) == a + True + >>> 'a' in a + True + >>> type(hash(a)) is type(0) + True + >>> set(iter(a)) == {'a', 'b'} + True + >>> len(a) + 2 + >>> a['a'] == a.get('a') == 1 + True + + >>> a['c'] = 3 + Traceback (most recent call last): + ... + TypeError: 'FrozenDict' object does not support item assignment + + >>> a.update(y=3) + Traceback (most recent call last): + ... + AttributeError: 'FrozenDict' object has no attribute 'update' + + Copies should compare equal + + >>> copy.copy(a) == a + True + + Copies should be the same type + + >>> isinstance(copy.copy(a), FrozenDict) + True + + FrozenDict supplies .copy(), even though + collections.abc.Mapping doesn't demand it. + + >>> a.copy() == a + True + >>> a.copy() is not a + True + """ + + __slots__ = ['__data'] + + def __new__(cls, *args, **kwargs): + self = super(FrozenDict, cls).__new__(cls) + self.__data = dict(*args, **kwargs) + return self + + # Container + def __contains__(self, key): + return key in self.__data + + # Hashable + def __hash__(self): + return hash(tuple(sorted(self.__data.items()))) + + # Mapping + def __iter__(self): + return iter(self.__data) + + def __len__(self): + return len(self.__data) + + def __getitem__(self, key): + return self.__data[key] + + # override get for efficiency provided by dict + def get(self, *args, **kwargs): + return self.__data.get(*args, **kwargs) + + # override eq to recognize underlying implementation + def __eq__(self, other): + if isinstance(other, FrozenDict): + other = other.__data + return self.__data.__eq__(other) + + def copy(self): + "Return a shallow copy of self" + return copy.copy(self) + + +class Enumeration(ItemsAsAttributes, BijectiveMap): + """ + A convenient way to provide enumerated values + + >>> e = Enumeration('a b c') + >>> e['a'] + 0 + + >>> e.a + 0 + + >>> e[1] + 'b' + + >>> set(e.names) == set('abc') + True + + >>> set(e.codes) == set(range(3)) + True + + >>> e.get('d') is None + True + + Codes need not start with 0 + + >>> e = Enumeration('a b c', range(1, 4)) + >>> e['a'] + 1 + + >>> e[3] + 'c' + """ + + def __init__(self, names, codes=None): + if isinstance(names, str): + names = names.split() + if codes is None: + codes = itertools.count() + super(Enumeration, self).__init__(zip(names, codes)) + + @property + def names(self): + return (key for key in self if isinstance(key, str)) + + @property + def codes(self): + return (self[name] for name in self.names) + + +class Everything(object): + """ + A collection "containing" every possible thing. + + >>> 'foo' in Everything() + True + + >>> import random + >>> random.randint(1, 999) in Everything() + True + + >>> random.choice([None, 'foo', 42, ('a', 'b', 'c')]) in Everything() + True + """ + + def __contains__(self, other): + return True + + +class InstrumentedDict(collections.UserDict): # type: ignore # buggy mypy + """ + Instrument an existing dictionary with additional + functionality, but always reference and mutate + the original dictionary. + + >>> orig = {'a': 1, 'b': 2} + >>> inst = InstrumentedDict(orig) + >>> inst['a'] + 1 + >>> inst['c'] = 3 + >>> orig['c'] + 3 + >>> inst.keys() == orig.keys() + True + """ + + def __init__(self, data): + super().__init__() + self.data = data + + +class Least(object): + """ + A value that is always lesser than any other + + >>> least = Least() + >>> 3 < least + False + >>> 3 > least + True + >>> least < 3 + True + >>> least <= 3 + True + >>> least > 3 + False + >>> 'x' > least + True + >>> None > least + True + """ + + def __le__(self, other): + return True + + __lt__ = __le__ + + def __ge__(self, other): + return False + + __gt__ = __ge__ + + +class Greatest(object): + """ + A value that is always greater than any other + + >>> greatest = Greatest() + >>> 3 < greatest + True + >>> 3 > greatest + False + >>> greatest < 3 + False + >>> greatest > 3 + True + >>> greatest >= 3 + True + >>> 'x' > greatest + False + >>> None > greatest + False + """ + + def __ge__(self, other): + return True + + __gt__ = __ge__ + + def __le__(self, other): + return False + + __lt__ = __le__ + + +def pop_all(items): + """ + Clear items in place and return a copy of items. + + >>> items = [1, 2, 3] + >>> popped = pop_all(items) + >>> popped is items + False + >>> popped + [1, 2, 3] + >>> items + [] + """ + result, items[:] = items[:], [] + return result + + +# mypy disabled for pytest-dev/pytest#8332 +class FreezableDefaultDict(collections.defaultdict): # type: ignore + """ + Often it is desirable to prevent the mutation of + a default dict after its initial construction, such + as to prevent mutation during iteration. + + >>> dd = FreezableDefaultDict(list) + >>> dd[0].append('1') + >>> dd.freeze() + >>> dd[1] + [] + >>> len(dd) + 1 + """ + + def __missing__(self, key): + return getattr(self, '_frozen', super().__missing__)(key) + + def freeze(self): + self._frozen = lambda key: self.default_factory() + + +class Accumulator: + def __init__(self, initial=0): + self.val = initial + + def __call__(self, val): + self.val += val + return self.val + + +class WeightedLookup(RangeMap): + """ + Given parameters suitable for a dict representing keys + and a weighted proportion, return a RangeMap representing + spans of values proportial to the weights: + + >>> even = WeightedLookup(a=1, b=1) + + [0, 1) -> a + [1, 2) -> b + + >>> lk = WeightedLookup(a=1, b=2) + + [0, 1) -> a + [1, 3) -> b + + >>> lk[.5] + 'a' + >>> lk[1.5] + 'b' + + Adds ``.random()`` to select a random weighted value: + + >>> lk.random() in ['a', 'b'] + True + + >>> choices = [lk.random() for x in range(1000)] + + Statistically speaking, choices should be .5 a:b + >>> ratio = choices.count('a') / choices.count('b') + >>> .4 < ratio < .6 + True + """ + + def __init__(self, *args, **kwargs): + raw = dict(*args, **kwargs) + + # allocate keys by weight + indexes = map(Accumulator(), raw.values()) + super().__init__(zip(indexes, raw.keys()), key_match_comparator=operator.lt) + + def random(self): + lower, upper = self.bounds() + selector = random.random() * upper + return self[selector] diff --git a/lib/jaraco/functools.py b/lib/jaraco/functools.py new file mode 100644 index 00000000..fcdbb4f9 --- /dev/null +++ b/lib/jaraco/functools.py @@ -0,0 +1,525 @@ +import functools +import time +import inspect +import collections +import types +import itertools + +import more_itertools + +from typing import Callable, TypeVar + + +CallableT = TypeVar("CallableT", bound=Callable[..., object]) + + +def compose(*funcs): + """ + Compose any number of unary functions into a single unary function. + + >>> import textwrap + >>> expected = str.strip(textwrap.dedent(compose.__doc__)) + >>> strip_and_dedent = compose(str.strip, textwrap.dedent) + >>> strip_and_dedent(compose.__doc__) == expected + True + + Compose also allows the innermost function to take arbitrary arguments. + + >>> round_three = lambda x: round(x, ndigits=3) + >>> f = compose(round_three, int.__truediv__) + >>> [f(3*x, x+1) for x in range(1,10)] + [1.5, 2.0, 2.25, 2.4, 2.5, 2.571, 2.625, 2.667, 2.7] + """ + + def compose_two(f1, f2): + return lambda *args, **kwargs: f1(f2(*args, **kwargs)) + + return functools.reduce(compose_two, funcs) + + +def method_caller(method_name, *args, **kwargs): + """ + Return a function that will call a named method on the + target object with optional positional and keyword + arguments. + + >>> lower = method_caller('lower') + >>> lower('MyString') + 'mystring' + """ + + def call_method(target): + func = getattr(target, method_name) + return func(*args, **kwargs) + + return call_method + + +def once(func): + """ + Decorate func so it's only ever called the first time. + + This decorator can ensure that an expensive or non-idempotent function + will not be expensive on subsequent calls and is idempotent. + + >>> add_three = once(lambda a: a+3) + >>> add_three(3) + 6 + >>> add_three(9) + 6 + >>> add_three('12') + 6 + + To reset the stored value, simply clear the property ``saved_result``. + + >>> del add_three.saved_result + >>> add_three(9) + 12 + >>> add_three(8) + 12 + + Or invoke 'reset()' on it. + + >>> add_three.reset() + >>> add_three(-3) + 0 + >>> add_three(0) + 0 + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not hasattr(wrapper, 'saved_result'): + wrapper.saved_result = func(*args, **kwargs) + return wrapper.saved_result + + wrapper.reset = lambda: vars(wrapper).__delitem__('saved_result') + return wrapper + + +def method_cache( + method: CallableT, + cache_wrapper: Callable[ + [CallableT], CallableT + ] = functools.lru_cache(), # type: ignore[assignment] +) -> CallableT: + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + + def wrapper(self: object, *args: object, **kwargs: object) -> object: + # it's the first call, replace the method with a cached, bound method + bound_method: CallableT = types.MethodType( # type: ignore[assignment] + method, self + ) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None # type: ignore[attr-defined] + + return ( # type: ignore[return-value] + _special_method_cache(method, cache_wrapper) or wrapper + ) + + +def _special_method_cache(method, cache_wrapper): + """ + Because Python treats special methods differently, it's not + possible to use instance attributes to implement the cached + methods. + + Instead, install the wrapper method under a different name + and return a simple proxy to that wrapper. + + https://github.com/jaraco/jaraco.functools/issues/5 + """ + name = method.__name__ + special_names = '__getattr__', '__getitem__' + if name not in special_names: + return + + wrapper_name = '__cached' + name + + def proxy(self, *args, **kwargs): + if wrapper_name not in vars(self): + bound = types.MethodType(method, self) + cache = cache_wrapper(bound) + setattr(self, wrapper_name, cache) + else: + cache = getattr(self, wrapper_name) + return cache(*args, **kwargs) + + return proxy + + +def apply(transform): + """ + Decorate a function with a transform function that is + invoked on results returned from the decorated function. + + >>> @apply(reversed) + ... def get_numbers(start): + ... "doc for get_numbers" + ... return range(start, start+3) + >>> list(get_numbers(4)) + [6, 5, 4] + >>> get_numbers.__doc__ + 'doc for get_numbers' + """ + + def wrap(func): + return functools.wraps(func)(compose(transform, func)) + + return wrap + + +def result_invoke(action): + r""" + Decorate a function with an action function that is + invoked on the results returned from the decorated + function (for its side-effect), then return the original + result. + + >>> @result_invoke(print) + ... def add_two(a, b): + ... return a + b + >>> x = add_two(2, 3) + 5 + >>> x + 5 + """ + + def wrap(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + action(result) + return result + + return wrapper + + return wrap + + +def call_aside(f, *args, **kwargs): + """ + Call a function for its side effect after initialization. + + >>> @call_aside + ... def func(): print("called") + called + >>> func() + called + + Use functools.partial to pass parameters to the initial call + + >>> @functools.partial(call_aside, name='bingo') + ... def func(name): print("called with", name) + called with bingo + """ + f(*args, **kwargs) + return f + + +class Throttler: + """ + Rate-limit a function (or other callable) + """ + + def __init__(self, func, max_rate=float('Inf')): + if isinstance(func, Throttler): + func = func.func + self.func = func + self.max_rate = max_rate + self.reset() + + def reset(self): + self.last_called = 0 + + def __call__(self, *args, **kwargs): + self._wait() + return self.func(*args, **kwargs) + + def _wait(self): + "ensure at least 1/max_rate seconds from last call" + elapsed = time.time() - self.last_called + must_wait = 1 / self.max_rate - elapsed + time.sleep(max(0, must_wait)) + self.last_called = time.time() + + def __get__(self, obj, type=None): + return first_invoke(self._wait, functools.partial(self.func, obj)) + + +def first_invoke(func1, func2): + """ + Return a function that when invoked will invoke func1 without + any parameters (for its side-effect) and then invoke func2 + with whatever parameters were passed, returning its result. + """ + + def wrapper(*args, **kwargs): + func1() + return func2(*args, **kwargs) + + return wrapper + + +def retry_call(func, cleanup=lambda: None, retries=0, trap=()): + """ + Given a callable func, trap the indicated exceptions + for up to 'retries' times, invoking cleanup on the + exception. On the final attempt, allow any exceptions + to propagate. + """ + attempts = itertools.count() if retries == float('inf') else range(retries) + for attempt in attempts: + try: + return func() + except trap: + cleanup() + + return func() + + +def retry(*r_args, **r_kwargs): + """ + Decorator wrapper for retry_call. Accepts arguments to retry_call + except func and then returns a decorator for the decorated function. + + Ex: + + >>> @retry(retries=3) + ... def my_func(a, b): + ... "this is my funk" + ... print(a, b) + >>> my_func.__doc__ + 'this is my funk' + """ + + def decorate(func): + @functools.wraps(func) + def wrapper(*f_args, **f_kwargs): + bound = functools.partial(func, *f_args, **f_kwargs) + return retry_call(bound, *r_args, **r_kwargs) + + return wrapper + + return decorate + + +def print_yielded(func): + """ + Convert a generator into a function that prints all yielded elements + + >>> @print_yielded + ... def x(): + ... yield 3; yield None + >>> x() + 3 + None + """ + print_all = functools.partial(map, print) + print_results = compose(more_itertools.consume, print_all, func) + return functools.wraps(func)(print_results) + + +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper + + +def assign_params(func, namespace): + """ + Assign parameters from namespace where func solicits. + + >>> def func(x, y=3): + ... print(x, y) + >>> assigned = assign_params(func, dict(x=2, z=4)) + >>> assigned() + 2 3 + + The usual errors are raised if a function doesn't receive + its required parameters: + + >>> assigned = assign_params(func, dict(y=3, z=4)) + >>> assigned() + Traceback (most recent call last): + TypeError: func() ...argument... + + It even works on methods: + + >>> class Handler: + ... def meth(self, arg): + ... print(arg) + >>> assign_params(Handler().meth, dict(arg='crystal', foo='clear'))() + crystal + """ + sig = inspect.signature(func) + params = sig.parameters.keys() + call_ns = {k: namespace[k] for k in params if k in namespace} + return functools.partial(func, **call_ns) + + +def save_method_args(method): + """ + Wrap a method such that when it is called, the args and kwargs are + saved on the method. + + >>> class MyClass: + ... @save_method_args + ... def method(self, a, b): + ... print(a, b) + >>> my_ob = MyClass() + >>> my_ob.method(1, 2) + 1 2 + >>> my_ob._saved_method.args + (1, 2) + >>> my_ob._saved_method.kwargs + {} + >>> my_ob.method(a=3, b='foo') + 3 foo + >>> my_ob._saved_method.args + () + >>> my_ob._saved_method.kwargs == dict(a=3, b='foo') + True + + The arguments are stored on the instance, allowing for + different instance to save different args. + + >>> your_ob = MyClass() + >>> your_ob.method({str('x'): 3}, b=[4]) + {'x': 3} [4] + >>> your_ob._saved_method.args + ({'x': 3},) + >>> my_ob._saved_method.args + () + """ + args_and_kwargs = collections.namedtuple('args_and_kwargs', 'args kwargs') + + @functools.wraps(method) + def wrapper(self, *args, **kwargs): + attr_name = '_saved_' + method.__name__ + attr = args_and_kwargs(args, kwargs) + setattr(self, attr_name, attr) + return method(self, *args, **kwargs) + + return wrapper + + +def except_(*exceptions, replace=None, use=None): + """ + Replace the indicated exceptions, if raised, with the indicated + literal replacement or evaluated expression (if present). + + >>> safe_int = except_(ValueError)(int) + >>> safe_int('five') + >>> safe_int('5') + 5 + + Specify a literal replacement with ``replace``. + + >>> safe_int_r = except_(ValueError, replace=0)(int) + >>> safe_int_r('five') + 0 + + Provide an expression to ``use`` to pass through particular parameters. + + >>> safe_int_pt = except_(ValueError, use='args[0]')(int) + >>> safe_int_pt('five') + 'five' + + """ + + def decorate(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except exceptions: + try: + return eval(use) + except TypeError: + return replace + + return wrapper + + return decorate diff --git a/lib/jaraco/text/Lorem ipsum.txt b/lib/jaraco/text/Lorem ipsum.txt new file mode 100644 index 00000000..986f944b --- /dev/null +++ b/lib/jaraco/text/Lorem ipsum.txt @@ -0,0 +1,2 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus magna felis sollicitudin mauris. Integer in mauris eu nibh euismod gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue, eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis, neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis, molestie eu, feugiat in, orci. In hac habitasse platea dictumst. diff --git a/lib/jaraco/text/__init__.py b/lib/jaraco/text/__init__.py new file mode 100644 index 00000000..87e18681 --- /dev/null +++ b/lib/jaraco/text/__init__.py @@ -0,0 +1,529 @@ +import re +import itertools +import textwrap +import functools + +try: + from importlib.resources import files # type: ignore +except ImportError: # pragma: nocover + from importlib_resources import files # type: ignore + +from jaraco.functools import compose, method_cache + + +def substitution(old, new): + """ + Return a function that will perform a substitution on a string + """ + return lambda s: s.replace(old, new) + + +def multi_substitution(*substitutions): + """ + Take a sequence of pairs specifying substitutions, and create + a function that performs those substitutions. + + >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo') + 'baz' + """ + substitutions = itertools.starmap(substitution, substitutions) + # compose function applies last function first, so reverse the + # substitutions to get the expected order. + substitutions = reversed(tuple(substitutions)) + return compose(*substitutions) + + +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use ``in_``: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super().lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super().lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) + + +def is_decodable(value): + r""" + Return True if the supplied value is decodable (using the default + encoding). + + >>> is_decodable(b'\xff') + False + >>> is_decodable(b'\x32') + True + """ + # TODO: This code could be expressed more consisely and directly + # with a jaraco.context.ExceptionTrap, but that adds an unfortunate + # long dependency tree, so for now, use boolean literals. + try: + value.decode() + except UnicodeDecodeError: + return False + return True + + +def is_binary(value): + r""" + Return True if the value appears to be binary (that is, it's a byte + string and isn't decodable). + + >>> is_binary(b'\xff') + True + >>> is_binary('\xff') + False + """ + return isinstance(value, bytes) and not is_decodable(value) + + +def trim(s): + r""" + Trim something like a docstring to remove the whitespace that + is common due to indentation and formatting. + + >>> trim("\n\tfoo = bar\n\t\tbar = baz\n") + 'foo = bar\n\tbar = baz' + """ + return textwrap.dedent(s).strip() + + +def wrap(s): + """ + Wrap lines of text, retaining existing newlines as + paragraph markers. + + >>> print(wrap(lorem_ipsum)) + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do + eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad + minim veniam, quis nostrud exercitation ullamco laboris nisi ut + aliquip ex ea commodo consequat. Duis aute irure dolor in + reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla + pariatur. Excepteur sint occaecat cupidatat non proident, sunt in + culpa qui officia deserunt mollit anim id est laborum. + + Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam + varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus + magna felis sollicitudin mauris. Integer in mauris eu nibh euismod + gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis + risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue, + eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas + fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla + a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis, + neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing + sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque + nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus + quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis, + molestie eu, feugiat in, orci. In hac habitasse platea dictumst. + """ + paragraphs = s.splitlines() + wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs) + return '\n\n'.join(wrapped) + + +def unwrap(s): + r""" + Given a multi-line string, return an unwrapped version. + + >>> wrapped = wrap(lorem_ipsum) + >>> wrapped.count('\n') + 20 + >>> unwrapped = unwrap(wrapped) + >>> unwrapped.count('\n') + 1 + >>> print(unwrapped) + Lorem ipsum dolor sit amet, consectetur adipiscing ... + Curabitur pretium tincidunt lacus. Nulla gravida orci ... + + """ + paragraphs = re.split(r'\n\n+', s) + cleaned = (para.replace('\n', ' ') for para in paragraphs) + return '\n'.join(cleaned) + + +lorem_ipsum: str = files(__name__).joinpath('Lorem ipsum.txt').read_text() + + +class Splitter(object): + """object that will split a string with the given arguments for each call + + >>> s = Splitter(',') + >>> s('hello, world, this is your, master calling') + ['hello', ' world', ' this is your', ' master calling'] + """ + + def __init__(self, *args): + self.args = args + + def __call__(self, s): + return s.split(*self.args) + + +def indent(string, prefix=' ' * 4): + """ + >>> indent('foo') + ' foo' + """ + return prefix + string + + +class WordSet(tuple): + """ + Given an identifier, return the words that identifier represents, + whether in camel case, underscore-separated, etc. + + >>> WordSet.parse("camelCase") + ('camel', 'Case') + + >>> WordSet.parse("under_sep") + ('under', 'sep') + + Acronyms should be retained + + >>> WordSet.parse("firstSNL") + ('first', 'SNL') + + >>> WordSet.parse("you_and_I") + ('you', 'and', 'I') + + >>> WordSet.parse("A simple test") + ('A', 'simple', 'test') + + Multiple caps should not interfere with the first cap of another word. + + >>> WordSet.parse("myABCClass") + ('my', 'ABC', 'Class') + + The result is a WordSet, so you can get the form you need. + + >>> WordSet.parse("myABCClass").underscore_separated() + 'my_ABC_Class' + + >>> WordSet.parse('a-command').camel_case() + 'ACommand' + + >>> WordSet.parse('someIdentifier').lowered().space_separated() + 'some identifier' + + Slices of the result should return another WordSet. + + >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated() + 'out_of_context' + + >>> WordSet.from_class_name(WordSet()).lowered().space_separated() + 'word set' + + >>> example = WordSet.parse('figured it out') + >>> example.headless_camel_case() + 'figuredItOut' + >>> example.dash_separated() + 'figured-it-out' + + """ + + _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))') + + def capitalized(self): + return WordSet(word.capitalize() for word in self) + + def lowered(self): + return WordSet(word.lower() for word in self) + + def camel_case(self): + return ''.join(self.capitalized()) + + def headless_camel_case(self): + words = iter(self) + first = next(words).lower() + new_words = itertools.chain((first,), WordSet(words).camel_case()) + return ''.join(new_words) + + def underscore_separated(self): + return '_'.join(self) + + def dash_separated(self): + return '-'.join(self) + + def space_separated(self): + return ' '.join(self) + + def trim_right(self, item): + """ + Remove the item from the end of the set. + + >>> WordSet.parse('foo bar').trim_right('foo') + ('foo', 'bar') + >>> WordSet.parse('foo bar').trim_right('bar') + ('foo',) + >>> WordSet.parse('').trim_right('bar') + () + """ + return self[:-1] if self and self[-1] == item else self + + def trim_left(self, item): + """ + Remove the item from the beginning of the set. + + >>> WordSet.parse('foo bar').trim_left('foo') + ('bar',) + >>> WordSet.parse('foo bar').trim_left('bar') + ('foo', 'bar') + >>> WordSet.parse('').trim_left('bar') + () + """ + return self[1:] if self and self[0] == item else self + + def trim(self, item): + """ + >>> WordSet.parse('foo bar').trim('foo') + ('bar',) + """ + return self.trim_left(item).trim_right(item) + + def __getitem__(self, item): + result = super(WordSet, self).__getitem__(item) + if isinstance(item, slice): + result = WordSet(result) + return result + + # for compatibility with Python 2 + def __getslice__(self, i, j): # pragma: nocover + return self.__getitem__(slice(i, j)) + + @classmethod + def parse(cls, identifier): + matches = cls._pattern.finditer(identifier) + return WordSet(match.group(0) for match in matches) + + @classmethod + def from_class_name(cls, subject): + return cls.parse(subject.__class__.__name__) + + +# for backward compatibility +words = WordSet.parse + + +def simple_html_strip(s): + r""" + Remove HTML from the string `s`. + + >>> str(simple_html_strip('')) + '' + + >>> print(simple_html_strip('A stormy day in paradise')) + A stormy day in paradise + + >>> print(simple_html_strip('Somebody tell the truth.')) + Somebody tell the truth. + + >>> print(simple_html_strip('What about
\nmultiple lines?')) + What about + multiple lines? + """ + html_stripper = re.compile('()|(<[^>]*>)|([^<]+)', re.DOTALL) + texts = (match.group(3) or '' for match in html_stripper.finditer(s)) + return ''.join(texts) + + +class SeparatedValues(str): + """ + A string separated by a separator. Overrides __iter__ for getting + the values. + + >>> list(SeparatedValues('a,b,c')) + ['a', 'b', 'c'] + + Whitespace is stripped and empty values are discarded. + + >>> list(SeparatedValues(' a, b , c, ')) + ['a', 'b', 'c'] + """ + + separator = ',' + + def __iter__(self): + parts = self.split(self.separator) + return filter(None, (part.strip() for part in parts)) + + +class Stripper: + r""" + Given a series of lines, find the common prefix and strip it from them. + + >>> lines = [ + ... 'abcdefg\n', + ... 'abc\n', + ... 'abcde\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix + 'abc' + >>> list(res.lines) + ['defg\n', '\n', 'de\n'] + + If no prefix is common, nothing should be stripped. + + >>> lines = [ + ... 'abcd\n', + ... '1234\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix = '' + >>> list(res.lines) + ['abcd\n', '1234\n'] + """ + + def __init__(self, prefix, lines): + self.prefix = prefix + self.lines = map(self, lines) + + @classmethod + def strip_prefix(cls, lines): + prefix_lines, lines = itertools.tee(lines) + prefix = functools.reduce(cls.common_prefix, prefix_lines) + return cls(prefix, lines) + + def __call__(self, line): + if not self.prefix: + return line + null, prefix, rest = line.partition(self.prefix) + return rest + + @staticmethod + def common_prefix(s1, s2): + """ + Return the common prefix of two lines. + """ + index = min(len(s1), len(s2)) + while s1[:index] != s2[:index]: + index -= 1 + return s1[:index] + + +def remove_prefix(text, prefix): + """ + Remove the prefix from the text if it exists. + + >>> remove_prefix('underwhelming performance', 'underwhelming ') + 'performance' + + >>> remove_prefix('something special', 'sample') + 'something special' + """ + null, prefix, rest = text.rpartition(prefix) + return rest + + +def remove_suffix(text, suffix): + """ + Remove the suffix from the text if it exists. + + >>> remove_suffix('name.git', '.git') + 'name' + + >>> remove_suffix('something special', 'sample') + 'something special' + """ + rest, suffix, null = text.partition(suffix) + return rest + + +def normalize_newlines(text): + r""" + Replace alternate newlines with the canonical newline. + + >>> normalize_newlines('Lorem Ipsum\u2029') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\r\n') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\x85') + 'Lorem Ipsum\n' + """ + newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029'] + pattern = '|'.join(newlines) + return re.sub(pattern, '\n', text) diff --git a/lib/jellyfish/_jellyfish.py b/lib/jellyfish/_jellyfish.py index f3a4c9b5..4e9defdd 100644 --- a/lib/jellyfish/_jellyfish.py +++ b/lib/jellyfish/_jellyfish.py @@ -23,7 +23,7 @@ def levenshtein_distance(s1, s2): return rows-1 prev = None - cur = range(cols) + cur = list(range(cols)) for r in _range(1, rows): prev, cur = cur, [r] + [0]*(cols-1) for c in _range(1, cols): diff --git a/lib/jellyfish/test.py b/lib/jellyfish/test.py index 72ef9344..95d47c87 100644 --- a/lib/jellyfish/test.py +++ b/lib/jellyfish/test.py @@ -107,7 +107,7 @@ if platform.python_implementation() == 'CPython': def test_match_rating_comparison_segfault(): import hashlib from jellyfish import cjellyfish as jf - sha1s = [u'{}'.format(hashlib.sha1(str(v).encode('ascii')).hexdigest()) + sha1s = ['{}'.format(hashlib.sha1(str(v).encode('ascii')).hexdigest()) for v in range(100)] # this segfaulted on 0.1.2 assert [[jf.match_rating_comparison(h1, h2) for h1 in sha1s] for h2 in sha1s] @@ -117,8 +117,8 @@ if platform.python_implementation() == 'CPython': # unfortunate difference in behavior between Py & C versions from jellyfish.cjellyfish import damerau_levenshtein_distance as c_dl from jellyfish._jellyfish import damerau_levenshtein_distance as py_dl - s1 = u'mylifeoutdoors' - s2 = u'нахлыст' + s1 = 'mylifeoutdoors' + s2 = 'нахлыст' with pytest.raises(ValueError): c_dl(s1, s2) with pytest.raises(ValueError): @@ -129,85 +129,85 @@ if platform.python_implementation() == 'CPython': def test_jaro_winkler_long_tolerance(jf): - no_lt = jf.jaro_winkler(u'two long strings', u'two long stringz', long_tolerance=False) - with_lt = jf.jaro_winkler(u'two long strings', u'two long stringz', long_tolerance=True) + no_lt = jf.jaro_winkler('two long strings', 'two long stringz', long_tolerance=False) + with_lt = jf.jaro_winkler('two long strings', 'two long stringz', long_tolerance=True) # make sure long_tolerance does something assertAlmostEqual(no_lt, 0.975) assertAlmostEqual(with_lt, 0.984) def test_damerau_levenshtein_distance_type(jf): - jf.damerau_levenshtein_distance(u'abc', u'abc') + jf.damerau_levenshtein_distance('abc', 'abc') with pytest.raises(TypeError) as exc: jf.damerau_levenshtein_distance(b'abc', b'abc') assert 'expected' in str(exc.value) def test_levenshtein_distance_type(jf): - assert jf.levenshtein_distance(u'abc', u'abc') == 0 + assert jf.levenshtein_distance('abc', 'abc') == 0 with pytest.raises(TypeError) as exc: jf.levenshtein_distance(b'abc', b'abc') assert 'expected' in str(exc.value) def test_jaro_distance_type(jf): - assert jf.jaro_distance(u'abc', u'abc') == 1 + assert jf.jaro_distance('abc', 'abc') == 1 with pytest.raises(TypeError) as exc: jf.jaro_distance(b'abc', b'abc') assert 'expected' in str(exc.value) def test_jaro_winkler_type(jf): - assert jf.jaro_winkler(u'abc', u'abc') == 1 + assert jf.jaro_winkler('abc', 'abc') == 1 with pytest.raises(TypeError) as exc: jf.jaro_winkler(b'abc', b'abc') assert 'expected' in str(exc.value) def test_mra_comparison_type(jf): - assert jf.match_rating_comparison(u'abc', u'abc') is True + assert jf.match_rating_comparison('abc', 'abc') is True with pytest.raises(TypeError) as exc: jf.match_rating_comparison(b'abc', b'abc') assert 'expected' in str(exc.value) def test_hamming_type(jf): - assert jf.hamming_distance(u'abc', u'abc') == 0 + assert jf.hamming_distance('abc', 'abc') == 0 with pytest.raises(TypeError) as exc: jf.hamming_distance(b'abc', b'abc') assert 'expected' in str(exc.value) def test_soundex_type(jf): - assert jf.soundex(u'ABC') == 'A120' + assert jf.soundex('ABC') == 'A120' with pytest.raises(TypeError) as exc: jf.soundex(b'ABC') assert 'expected' in str(exc.value) def test_metaphone_type(jf): - assert jf.metaphone(u'abc') == 'ABK' + assert jf.metaphone('abc') == 'ABK' with pytest.raises(TypeError) as exc: jf.metaphone(b'abc') assert 'expected' in str(exc.value) def test_nysiis_type(jf): - assert jf.nysiis(u'abc') == 'ABC' + assert jf.nysiis('abc') == 'ABC' with pytest.raises(TypeError) as exc: jf.nysiis(b'abc') assert 'expected' in str(exc.value) def test_mr_codex_type(jf): - assert jf.match_rating_codex(u'abc') == 'ABC' + assert jf.match_rating_codex('abc') == 'ABC' with pytest.raises(TypeError) as exc: jf.match_rating_codex(b'abc') assert 'expected' in str(exc.value) def test_porter_type(jf): - assert jf.porter_stem(u'abc') == 'abc' + assert jf.porter_stem('abc') == 'abc' with pytest.raises(TypeError) as exc: jf.porter_stem(b'abc') assert 'expected' in str(exc.value) diff --git a/lib/logutils/__init__.py b/lib/logutils/__init__.py index 44e261f8..1cf7dc13 100644 --- a/lib/logutils/__init__.py +++ b/lib/logutils/__init__.py @@ -111,7 +111,7 @@ class Formatter(logging.Formatter): """ if style not in _STYLES: raise ValueError('Style must be one of: %s' % ','.join( - _STYLES.keys())) + list(_STYLES.keys()))) self._style = _STYLES[style](fmt) self._fmt = self._style._fmt self.datefmt = datefmt diff --git a/lib/logutils/colorize.py b/lib/logutils/colorize.py index 2c396394..0b1d2b0a 100644 --- a/lib/logutils/colorize.py +++ b/lib/logutils/colorize.py @@ -6,9 +6,9 @@ import logging import os try: - unicode + str except NameError: - unicode = None + str = None class ColorizingStreamHandler(logging.StreamHandler): """ @@ -63,7 +63,7 @@ class ColorizingStreamHandler(logging.StreamHandler): try: message = self.format(record) stream = self.stream - if unicode and isinstance(message, unicode): + if str and isinstance(message, str): enc = getattr(stream, 'encoding', 'utf-8') message = message.encode(enc, 'replace') if not self.is_tty: diff --git a/lib/logutils/dictconfig.py b/lib/logutils/dictconfig.py index 4a2281f3..589f385c 100644 --- a/lib/logutils/dictconfig.py +++ b/lib/logutils/dictconfig.py @@ -7,13 +7,13 @@ import sys import types try: - basestring + str except NameError: - basestring = str + str = str try: - StandardError + Exception except NameError: - StandardError = Exception + Exception = Exception IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I) @@ -228,7 +228,7 @@ class BaseConfigurator(object): isinstance(value, tuple): value = ConvertingTuple(value) value.configurator = self - elif isinstance(value, basestring): + elif isinstance(value, str): m = self.CONVERT_PATTERN.match(value) if m: d = m.groupdict() @@ -243,14 +243,14 @@ class BaseConfigurator(object): def configure_custom(self, config): """Configure an object with a user-supplied factory.""" c = config.pop('()') - if isinstance(c, basestring): + if isinstance(c, str): c = self.resolve(c) props = config.pop('.', None) # Check for valid identifiers kwargs = dict([(k, config[k]) for k in config if valid_ident(k)]) result = c(**kwargs) if props: - for name, value in props.items(): + for name, value in list(props.items()): setattr(result, name, value) return result @@ -304,7 +304,7 @@ class DictConfigurator(BaseConfigurator): level = handler_config.get('level', None) if level: handler.setLevel(_checkLevel(level)) - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to configure handler ' '%r: %s' % (name, e)) @@ -312,7 +312,7 @@ class DictConfigurator(BaseConfigurator): for name in loggers: try: self.configure_logger(name, loggers[name], True) - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to configure logger ' '%r: %s' % (name, e)) @@ -320,7 +320,7 @@ class DictConfigurator(BaseConfigurator): if root: try: self.configure_root(root, True) - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to configure root ' 'logger: %s' % e) @@ -336,7 +336,7 @@ class DictConfigurator(BaseConfigurator): try: formatters[name] = self.configure_formatter( formatters[name]) - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to configure ' 'formatter %r: %s' % (name, e)) @@ -345,7 +345,7 @@ class DictConfigurator(BaseConfigurator): for name in filters: try: filters[name] = self.configure_filter(filters[name]) - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to configure ' 'filter %r: %s' % (name, e)) @@ -359,7 +359,7 @@ class DictConfigurator(BaseConfigurator): handler = self.configure_handler(handlers[name]) handler.name = name handlers[name] = handler - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to configure handler ' '%r: %s' % (name, e)) @@ -398,7 +398,7 @@ class DictConfigurator(BaseConfigurator): existing.remove(name) try: self.configure_logger(name, loggers[name]) - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to configure logger ' '%r: %s' % (name, e)) @@ -422,7 +422,7 @@ class DictConfigurator(BaseConfigurator): if root: try: self.configure_root(root) - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to configure root ' 'logger: %s' % e) @@ -466,7 +466,7 @@ class DictConfigurator(BaseConfigurator): for f in filters: try: filterer.addFilter(self.config['filters'][f]) - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to add filter %r: %s' % (f, e)) @@ -476,7 +476,7 @@ class DictConfigurator(BaseConfigurator): if formatter: try: formatter = self.config['formatters'][formatter] - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to set formatter ' '%r: %s' % (formatter, e)) @@ -484,7 +484,7 @@ class DictConfigurator(BaseConfigurator): filters = config.pop('filters', None) if '()' in config: c = config.pop('()') - if isinstance(c, basestring): + if isinstance(c, str): c = self.resolve(c) factory = c else: @@ -494,7 +494,7 @@ class DictConfigurator(BaseConfigurator): 'target' in config: try: config['target'] = self.config['handlers'][config['target']] - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to set target handler ' '%r: %s' % (config['target'], e)) @@ -531,7 +531,7 @@ class DictConfigurator(BaseConfigurator): for h in handlers: try: logger.addHandler(self.config['handlers'][h]) - except StandardError: + except Exception: e = sys.exc_info()[1] raise ValueError('Unable to add handler %r: %s' % (h, e)) diff --git a/lib/logutils/queue.py b/lib/logutils/queue.py index cced8c55..46c5c64b 100644 --- a/lib/logutils/queue.py +++ b/lib/logutils/queue.py @@ -21,9 +21,9 @@ version here is for use with earlier Python versions. """ import logging try: - import Queue as queue + import queue as queue except ImportError: - import queue + from . import queue import threading class QueueHandler(logging.Handler): diff --git a/lib/logutils/redis.py b/lib/logutils/redis.py index 0fea2d1e..4a7e84e1 100644 --- a/lib/logutils/redis.py +++ b/lib/logutils/redis.py @@ -7,7 +7,7 @@ This module contains classes which help you work with Redis queues. from logutils.queue import QueueHandler, QueueListener try: - import cPickle as pickle + import pickle as pickle except ImportError: import pickle @@ -25,7 +25,7 @@ class RedisQueueHandler(QueueHandler): """ def __init__(self, key='python.logging', redis=None, limit=0): if redis is None: - from redis import Redis + from .redis import Redis redis = Redis() self.key = key assert limit >= 0 @@ -51,7 +51,7 @@ class RedisQueueListener(QueueListener): def __init__(self, *handlers, **kwargs): redis = kwargs.get('redis') if redis is None: - from redis import Redis + from .redis import Redis redis = Redis() self.key = kwargs.get('key', 'python.logging') QueueListener.__init__(self, redis, *handlers) diff --git a/lib/mako/LICENSE b/lib/mako/LICENSE deleted file mode 100644 index 24eefa1f..00000000 --- a/lib/mako/LICENSE +++ /dev/null @@ -1,20 +0,0 @@ -This is the MIT license: http://www.opensource.org/licenses/mit-license.php - -Copyright (C) 2006-2015 the Mako authors and contributors . -Mako is a trademark of Michael Bayer. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this -software and associated documentation files (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or -substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE -FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/lib/mako/README.rst b/lib/mako/README.rst deleted file mode 100644 index 8da9d697..00000000 --- a/lib/mako/README.rst +++ /dev/null @@ -1,52 +0,0 @@ -========================= -Mako Templates for Python -========================= - -Mako is a template library written in Python. It provides a familiar, non-XML -syntax which compiles into Python modules for maximum performance. Mako's -syntax and API borrows from the best ideas of many others, including Django -templates, Cheetah, Myghty, and Genshi. Conceptually, Mako is an embedded -Python (i.e. Python Server Page) language, which refines the familiar ideas -of componentized layout and inheritance to produce one of the most -straightforward and flexible models available, while also maintaining close -ties to Python calling and scoping semantics. - -Nutshell -======== - -:: - - <%inherit file="base.html"/> - <% - rows = [[v for v in range(0,10)] for row in range(0,10)] - %> - - % for row in rows: - ${makerow(row)} - % endfor -
- - <%def name="makerow(row)"> - - % for name in row: - ${name}\ - % endfor - - - -Philosophy -=========== - -Python is a great scripting language. Don't reinvent the wheel...your templates can handle it ! - -Documentation -============== - -See documentation for Mako at http://www.makotemplates.org/docs/ - -License -======== - -Mako is licensed under an MIT-style license (see LICENSE). -Other incorporated projects may be licensed under different licenses. -All licenses allow for non-commercial and commercial use. diff --git a/lib/mako/__init__.py b/lib/mako/__init__.py index d9638481..df2ae480 100644 --- a/lib/mako/__init__.py +++ b/lib/mako/__init__.py @@ -1,8 +1,8 @@ # mako/__init__.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -__version__ = '1.0.1' +__version__ = "1.1.6" diff --git a/lib/mako/_ast_util.py b/lib/mako/_ast_util.py index efbc4fc2..bdcdbf69 100644 --- a/lib/mako/_ast_util.py +++ b/lib/mako/_ast_util.py @@ -1,5 +1,5 @@ # mako/_ast_util.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -8,69 +8,77 @@ ast ~~~ - The `ast` module helps Python applications to process trees of the Python - abstract syntax grammar. The abstract syntax itself might change with - each Python release; this module helps to find out programmatically what - the current grammar looks like and allows modifications of it. - - An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as - a flag to the `compile()` builtin function or by using the `parse()` - function from this module. The result will be a tree of objects whose - classes all inherit from `ast.AST`. - - A modified abstract syntax tree can be compiled into a Python code object - using the built-in `compile()` function. - - Additionally various helper functions are provided that make working with - the trees simpler. The main intention of the helper functions and this - module in general is to provide an easy to use interface for libraries - that work tightly with the python syntax (template engines for example). - + This is a stripped down version of Armin Ronacher's ast module. :copyright: Copyright 2008 by Armin Ronacher. :license: Python License. """ -from _ast import * + + +from _ast import Add +from _ast import And +from _ast import AST +from _ast import BitAnd +from _ast import BitOr +from _ast import BitXor +from _ast import Div +from _ast import Eq +from _ast import FloorDiv +from _ast import Gt +from _ast import GtE +from _ast import If +from _ast import In +from _ast import Invert +from _ast import Is +from _ast import IsNot +from _ast import LShift +from _ast import Lt +from _ast import LtE +from _ast import Mod +from _ast import Mult +from _ast import Name +from _ast import Not +from _ast import NotEq +from _ast import NotIn +from _ast import Or +from _ast import PyCF_ONLY_AST +from _ast import RShift +from _ast import Sub +from _ast import UAdd +from _ast import USub + from mako.compat import arg_stringname -BOOLOP_SYMBOLS = { - And: 'and', - Or: 'or' -} +BOOLOP_SYMBOLS = {And: "and", Or: "or"} BINOP_SYMBOLS = { - Add: '+', - Sub: '-', - Mult: '*', - Div: '/', - FloorDiv: '//', - Mod: '%', - LShift: '<<', - RShift: '>>', - BitOr: '|', - BitAnd: '&', - BitXor: '^' + Add: "+", + Sub: "-", + Mult: "*", + Div: "/", + FloorDiv: "//", + Mod: "%", + LShift: "<<", + RShift: ">>", + BitOr: "|", + BitAnd: "&", + BitXor: "^", } CMPOP_SYMBOLS = { - Eq: '==', - Gt: '>', - GtE: '>=', - In: 'in', - Is: 'is', - IsNot: 'is not', - Lt: '<', - LtE: '<=', - NotEq: '!=', - NotIn: 'not in' + Eq: "==", + Gt: ">", + GtE: ">=", + In: "in", + Is: "is", + IsNot: "is not", + Lt: "<", + LtE: "<=", + NotEq: "!=", + NotIn: "not in", } -UNARYOP_SYMBOLS = { - Invert: '~', - Not: 'not', - UAdd: '+', - USub: '-' -} +UNARYOP_SYMBOLS = {Invert: "~", Not: "not", UAdd: "+", USub: "-"} ALL_SYMBOLS = {} ALL_SYMBOLS.update(BOOLOP_SYMBOLS) @@ -79,105 +87,15 @@ ALL_SYMBOLS.update(CMPOP_SYMBOLS) ALL_SYMBOLS.update(UNARYOP_SYMBOLS) -def parse(expr, filename='', mode='exec'): +def parse(expr, filename="", mode="exec"): """Parse an expression into an AST node.""" return compile(expr, filename, mode, PyCF_ONLY_AST) -def to_source(node, indent_with=' ' * 4): - """ - This function can convert a node tree back into python sourcecode. This - is useful for debugging purposes, especially if you're dealing with custom - asts not generated by python itself. - - It could be that the sourcecode is evaluable when the AST itself is not - compilable / evaluable. The reason for this is that the AST contains some - more data than regular sourcecode does, which is dropped during - conversion. - - Each level of indentation is replaced with `indent_with`. Per default this - parameter is equal to four spaces as suggested by PEP 8, but it might be - adjusted to match the application's styleguide. - """ - generator = SourceGenerator(indent_with) - generator.visit(node) - return ''.join(generator.result) - - -def dump(node): - """ - A very verbose representation of the node passed. This is useful for - debugging purposes. - """ - def _format(node): - if isinstance(node, AST): - return '%s(%s)' % (node.__class__.__name__, - ', '.join('%s=%s' % (a, _format(b)) - for a, b in iter_fields(node))) - elif isinstance(node, list): - return '[%s]' % ', '.join(_format(x) for x in node) - return repr(node) - if not isinstance(node, AST): - raise TypeError('expected AST, got %r' % node.__class__.__name__) - return _format(node) - - -def copy_location(new_node, old_node): - """ - Copy the source location hint (`lineno` and `col_offset`) from the - old to the new node if possible and return the new one. - """ - for attr in 'lineno', 'col_offset': - if attr in old_node._attributes and attr in new_node._attributes \ - and hasattr(old_node, attr): - setattr(new_node, attr, getattr(old_node, attr)) - return new_node - - -def fix_missing_locations(node): - """ - Some nodes require a line number and the column offset. Without that - information the compiler will abort the compilation. Because it can be - a dull task to add appropriate line numbers and column offsets when - adding new nodes this function can help. It copies the line number and - column offset of the parent node to the child nodes without this - information. - - Unlike `copy_location` this works recursive and won't touch nodes that - already have a location information. - """ - def _fix(node, lineno, col_offset): - if 'lineno' in node._attributes: - if not hasattr(node, 'lineno'): - node.lineno = lineno - else: - lineno = node.lineno - if 'col_offset' in node._attributes: - if not hasattr(node, 'col_offset'): - node.col_offset = col_offset - else: - col_offset = node.col_offset - for child in iter_child_nodes(node): - _fix(child, lineno, col_offset) - _fix(node, 1, 0) - return node - - -def increment_lineno(node, n=1): - """ - Increment the line numbers of all nodes by `n` if they have line number - attributes. This is useful to "move code" to a different location in a - file. - """ - for node in zip((node,), walk(node)): - if 'lineno' in node._attributes: - node.lineno = getattr(node, 'lineno', 0) + n - - def iter_fields(node): """Iterate over all fields of a node, only yielding existing fields.""" # CPython 2.5 compat - if not hasattr(node, '_fields') or not node._fields: + if not hasattr(node, "_fields") or not node._fields: return for field in node._fields: try: @@ -186,66 +104,8 @@ def iter_fields(node): pass -def get_fields(node): - """Like `iter_fiels` but returns a dict.""" - return dict(iter_fields(node)) - - -def iter_child_nodes(node): - """Iterate over all child nodes or a node.""" - for name, field in iter_fields(node): - if isinstance(field, AST): - yield field - elif isinstance(field, list): - for item in field: - if isinstance(item, AST): - yield item - - -def get_child_nodes(node): - """Like `iter_child_nodes` but returns a list.""" - return list(iter_child_nodes(node)) - - -def get_compile_mode(node): - """ - Get the mode for `compile` of a given node. If the node is not a `mod` - node (`Expression`, `Module` etc.) a `TypeError` is thrown. - """ - if not isinstance(node, mod): - raise TypeError('expected mod node, got %r' % node.__class__.__name__) - return { - Expression: 'eval', - Interactive: 'single' - }.get(node.__class__, 'expr') - - -def get_docstring(node): - """ - Return the docstring for the given node or `None` if no docstring can be - found. If the node provided does not accept docstrings a `TypeError` - will be raised. - """ - if not isinstance(node, (FunctionDef, ClassDef, Module)): - raise TypeError("%r can't have docstrings" % node.__class__.__name__) - if node.body and isinstance(node.body[0], Str): - return node.body[0].s - - -def walk(node): - """ - Iterate over all nodes. This is useful if you only want to modify nodes in - place and don't care about the context or the order the nodes are returned. - """ - from collections import deque - todo = deque([node]) - while todo: - node = todo.popleft() - todo.extend(iter_child_nodes(node)) - yield node - - class NodeVisitor(object): + """ Walks the abstract syntax tree and call visitor functions for every node found. The visitor functions may return values which will be forwarded @@ -268,7 +128,7 @@ class NodeVisitor(object): exists for this node. In that case the generic visit function is used instead. """ - method = 'visit_' + node.__class__.__name__ + method = "visit_" + node.__class__.__name__ return getattr(self, method, None) def visit(self, node): @@ -290,6 +150,7 @@ class NodeVisitor(object): class NodeTransformer(NodeVisitor): + """ Walks the abstract syntax tree and allows modifications of nodes. @@ -349,6 +210,7 @@ class NodeTransformer(NodeVisitor): class SourceGenerator(NodeVisitor): + """ This visitor is able to transform a well formed syntax tree into python sourcecode. For more details have a look at the docstring of the @@ -364,7 +226,7 @@ class SourceGenerator(NodeVisitor): def write(self, x): if self.new_lines: if self.result: - self.result.append('\n' * self.new_lines) + self.result.append("\n" * self.new_lines) self.result.append(self.indent_with * self.indentation) self.new_lines = 0 self.result.append(x) @@ -383,14 +245,15 @@ class SourceGenerator(NodeVisitor): self.body(node.body) if node.orelse: self.newline() - self.write('else:') + self.write("else:") self.body(node.orelse) def signature(self, node): want_comma = [] + def write_comma(): if want_comma: - self.write(', ') + self.write(", ") else: want_comma.append(True) @@ -399,19 +262,19 @@ class SourceGenerator(NodeVisitor): write_comma() self.visit(arg) if default is not None: - self.write('=') + self.write("=") self.visit(default) if node.vararg is not None: write_comma() - self.write('*' + arg_stringname(node.vararg)) + self.write("*" + arg_stringname(node.vararg)) if node.kwarg is not None: write_comma() - self.write('**' + arg_stringname(node.kwarg)) + self.write("**" + arg_stringname(node.kwarg)) def decorators(self, node): for decorator in node.decorator_list: self.newline() - self.write('@') + self.write("@") self.visit(decorator) # Statements @@ -420,29 +283,29 @@ class SourceGenerator(NodeVisitor): self.newline() for idx, target in enumerate(node.targets): if idx: - self.write(', ') + self.write(", ") self.visit(target) - self.write(' = ') + self.write(" = ") self.visit(node.value) def visit_AugAssign(self, node): self.newline() self.visit(node.target) - self.write(BINOP_SYMBOLS[type(node.op)] + '=') + self.write(BINOP_SYMBOLS[type(node.op)] + "=") self.visit(node.value) def visit_ImportFrom(self, node): self.newline() - self.write('from %s%s import ' % ('.' * node.level, node.module)) + self.write("from %s%s import " % ("." * node.level, node.module)) for idx, item in enumerate(node.names): if idx: - self.write(', ') + self.write(", ") self.write(item) def visit_Import(self, node): self.newline() for item in node.names: - self.write('import ') + self.write("import ") self.visit(item) def visit_Expr(self, node): @@ -453,208 +316,210 @@ class SourceGenerator(NodeVisitor): self.newline(n=2) self.decorators(node) self.newline() - self.write('def %s(' % node.name) + self.write("def %s(" % node.name) self.signature(node.args) - self.write('):') + self.write("):") self.body(node.body) def visit_ClassDef(self, node): have_args = [] + def paren_or_comma(): if have_args: - self.write(', ') + self.write(", ") else: have_args.append(True) - self.write('(') + self.write("(") self.newline(n=3) self.decorators(node) self.newline() - self.write('class %s' % node.name) + self.write("class %s" % node.name) for base in node.bases: paren_or_comma() self.visit(base) # XXX: the if here is used to keep this module compatible # with python 2.6. - if hasattr(node, 'keywords'): + if hasattr(node, "keywords"): for keyword in node.keywords: paren_or_comma() - self.write(keyword.arg + '=') + self.write(keyword.arg + "=") self.visit(keyword.value) - if node.starargs is not None: + if getattr(node, "starargs", None): paren_or_comma() - self.write('*') + self.write("*") self.visit(node.starargs) - if node.kwargs is not None: + if getattr(node, "kwargs", None): paren_or_comma() - self.write('**') + self.write("**") self.visit(node.kwargs) - self.write(have_args and '):' or ':') + self.write(have_args and "):" or ":") self.body(node.body) def visit_If(self, node): self.newline() - self.write('if ') + self.write("if ") self.visit(node.test) - self.write(':') + self.write(":") self.body(node.body) while True: else_ = node.orelse if len(else_) == 1 and isinstance(else_[0], If): node = else_[0] self.newline() - self.write('elif ') + self.write("elif ") self.visit(node.test) - self.write(':') + self.write(":") self.body(node.body) else: self.newline() - self.write('else:') + self.write("else:") self.body(else_) break def visit_For(self, node): self.newline() - self.write('for ') + self.write("for ") self.visit(node.target) - self.write(' in ') + self.write(" in ") self.visit(node.iter) - self.write(':') + self.write(":") self.body_or_else(node) def visit_While(self, node): self.newline() - self.write('while ') + self.write("while ") self.visit(node.test) - self.write(':') + self.write(":") self.body_or_else(node) def visit_With(self, node): self.newline() - self.write('with ') + self.write("with ") self.visit(node.context_expr) if node.optional_vars is not None: - self.write(' as ') + self.write(" as ") self.visit(node.optional_vars) - self.write(':') + self.write(":") self.body(node.body) def visit_Pass(self, node): self.newline() - self.write('pass') + self.write("pass") def visit_Print(self, node): # XXX: python 2.6 only self.newline() - self.write('print ') + self.write("print ") want_comma = False if node.dest is not None: - self.write(' >> ') + self.write(" >> ") self.visit(node.dest) want_comma = True for value in node.values: if want_comma: - self.write(', ') + self.write(", ") self.visit(value) want_comma = True if not node.nl: - self.write(',') + self.write(",") def visit_Delete(self, node): self.newline() - self.write('del ') + self.write("del ") for idx, target in enumerate(node): if idx: - self.write(', ') + self.write(", ") self.visit(target) def visit_TryExcept(self, node): self.newline() - self.write('try:') + self.write("try:") self.body(node.body) for handler in node.handlers: self.visit(handler) def visit_TryFinally(self, node): self.newline() - self.write('try:') + self.write("try:") self.body(node.body) self.newline() - self.write('finally:') + self.write("finally:") self.body(node.finalbody) def visit_Global(self, node): self.newline() - self.write('global ' + ', '.join(node.names)) + self.write("global " + ", ".join(node.names)) def visit_Nonlocal(self, node): self.newline() - self.write('nonlocal ' + ', '.join(node.names)) + self.write("nonlocal " + ", ".join(node.names)) def visit_Return(self, node): self.newline() - self.write('return ') + self.write("return ") self.visit(node.value) def visit_Break(self, node): self.newline() - self.write('break') + self.write("break") def visit_Continue(self, node): self.newline() - self.write('continue') + self.write("continue") def visit_Raise(self, node): # XXX: Python 2.6 / 3.0 compatibility self.newline() - self.write('raise') - if hasattr(node, 'exc') and node.exc is not None: - self.write(' ') + self.write("raise") + if hasattr(node, "exc") and node.exc is not None: + self.write(" ") self.visit(node.exc) if node.cause is not None: - self.write(' from ') + self.write(" from ") self.visit(node.cause) - elif hasattr(node, 'type') and node.type is not None: + elif hasattr(node, "type") and node.type is not None: self.visit(node.type) if node.inst is not None: - self.write(', ') + self.write(", ") self.visit(node.inst) if node.tback is not None: - self.write(', ') + self.write(", ") self.visit(node.tback) # Expressions def visit_Attribute(self, node): self.visit(node.value) - self.write('.' + node.attr) + self.write("." + node.attr) def visit_Call(self, node): want_comma = [] + def write_comma(): if want_comma: - self.write(', ') + self.write(", ") else: want_comma.append(True) self.visit(node.func) - self.write('(') + self.write("(") for arg in node.args: write_comma() self.visit(arg) for keyword in node.keywords: write_comma() - self.write(keyword.arg + '=') + self.write(keyword.arg + "=") self.visit(keyword.value) - if node.starargs is not None: + if getattr(node, "starargs", None): write_comma() - self.write('*') + self.write("*") self.visit(node.starargs) - if node.kwargs is not None: + if getattr(node, "kwargs", None): write_comma() - self.write('**') + self.write("**") self.visit(node.kwargs) - self.write(')') + self.write(")") def visit_Name(self, node): self.write(node.id) @@ -674,106 +539,111 @@ class SourceGenerator(NodeVisitor): def visit_Num(self, node): self.write(repr(node.n)) + # newly needed in Python 3.8 + def visit_Constant(self, node): + self.write(repr(node.value)) + def visit_Tuple(self, node): - self.write('(') + self.write("(") idx = -1 for idx, item in enumerate(node.elts): if idx: - self.write(', ') + self.write(", ") self.visit(item) - self.write(idx and ')' or ',)') + self.write(idx and ")" or ",)") def sequence_visit(left, right): def visit(self, node): self.write(left) for idx, item in enumerate(node.elts): if idx: - self.write(', ') + self.write(", ") self.visit(item) self.write(right) + return visit - visit_List = sequence_visit('[', ']') - visit_Set = sequence_visit('{', '}') + visit_List = sequence_visit("[", "]") + visit_Set = sequence_visit("{", "}") del sequence_visit def visit_Dict(self, node): - self.write('{') + self.write("{") for idx, (key, value) in enumerate(zip(node.keys, node.values)): if idx: - self.write(', ') + self.write(", ") self.visit(key) - self.write(': ') + self.write(": ") self.visit(value) - self.write('}') + self.write("}") def visit_BinOp(self, node): - self.write('(') + self.write("(") self.visit(node.left) - self.write(' %s ' % BINOP_SYMBOLS[type(node.op)]) + self.write(" %s " % BINOP_SYMBOLS[type(node.op)]) self.visit(node.right) - self.write(')') + self.write(")") def visit_BoolOp(self, node): - self.write('(') + self.write("(") for idx, value in enumerate(node.values): if idx: - self.write(' %s ' % BOOLOP_SYMBOLS[type(node.op)]) + self.write(" %s " % BOOLOP_SYMBOLS[type(node.op)]) self.visit(value) - self.write(')') + self.write(")") def visit_Compare(self, node): - self.write('(') + self.write("(") self.visit(node.left) for op, right in zip(node.ops, node.comparators): - self.write(' %s ' % CMPOP_SYMBOLS[type(op)]) + self.write(" %s " % CMPOP_SYMBOLS[type(op)]) self.visit(right) - self.write(')') + self.write(")") def visit_UnaryOp(self, node): - self.write('(') + self.write("(") op = UNARYOP_SYMBOLS[type(node.op)] self.write(op) - if op == 'not': - self.write(' ') + if op == "not": + self.write(" ") self.visit(node.operand) - self.write(')') + self.write(")") def visit_Subscript(self, node): self.visit(node.value) - self.write('[') + self.write("[") self.visit(node.slice) - self.write(']') + self.write("]") def visit_Slice(self, node): if node.lower is not None: self.visit(node.lower) - self.write(':') + self.write(":") if node.upper is not None: self.visit(node.upper) if node.step is not None: - self.write(':') - if not (isinstance(node.step, Name) and node.step.id == 'None'): + self.write(":") + if not (isinstance(node.step, Name) and node.step.id == "None"): self.visit(node.step) def visit_ExtSlice(self, node): for idx, item in node.dims: if idx: - self.write(', ') + self.write(", ") self.visit(item) def visit_Yield(self, node): - self.write('yield ') + self.write("yield ") self.visit(node.value) def visit_Lambda(self, node): - self.write('lambda ') + self.write("lambda ") self.signature(node.args) - self.write(': ') + self.write(": ") self.visit(node.body) def visit_Ellipsis(self, node): - self.write('Ellipsis') + self.write("Ellipsis") def generator_visit(left, right): def visit(self, node): @@ -782,64 +652,65 @@ class SourceGenerator(NodeVisitor): for comprehension in node.generators: self.visit(comprehension) self.write(right) + return visit - visit_ListComp = generator_visit('[', ']') - visit_GeneratorExp = generator_visit('(', ')') - visit_SetComp = generator_visit('{', '}') + visit_ListComp = generator_visit("[", "]") + visit_GeneratorExp = generator_visit("(", ")") + visit_SetComp = generator_visit("{", "}") del generator_visit def visit_DictComp(self, node): - self.write('{') + self.write("{") self.visit(node.key) - self.write(': ') + self.write(": ") self.visit(node.value) for comprehension in node.generators: self.visit(comprehension) - self.write('}') + self.write("}") def visit_IfExp(self, node): self.visit(node.body) - self.write(' if ') + self.write(" if ") self.visit(node.test) - self.write(' else ') + self.write(" else ") self.visit(node.orelse) def visit_Starred(self, node): - self.write('*') + self.write("*") self.visit(node.value) def visit_Repr(self, node): # XXX: python 2.6 only - self.write('`') + self.write("`") self.visit(node.value) - self.write('`') + self.write("`") # Helper Nodes def visit_alias(self, node): self.write(node.name) if node.asname is not None: - self.write(' as ' + node.asname) + self.write(" as " + node.asname) def visit_comprehension(self, node): - self.write(' for ') + self.write(" for ") self.visit(node.target) - self.write(' in ') + self.write(" in ") self.visit(node.iter) if node.ifs: for if_ in node.ifs: - self.write(' if ') + self.write(" if ") self.visit(if_) def visit_excepthandler(self, node): self.newline() - self.write('except') + self.write("except") if node.type is not None: - self.write(' ') + self.write(" ") self.visit(node.type) if node.name is not None: - self.write(' as ') + self.write(" as ") self.visit(node.name) - self.write(':') + self.write(":") self.body(node.body) diff --git a/lib/mako/ast.py b/lib/mako/ast.py index 65fd84df..cfae2806 100644 --- a/lib/mako/ast.py +++ b/lib/mako/ast.py @@ -1,5 +1,5 @@ # mako/ast.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -7,11 +7,17 @@ """utilities for analyzing expressions and blocks of Python code, as well as generating Python from AST nodes""" -from mako import exceptions, pyparser, compat import re +from mako import compat +from mako import exceptions +from mako import pyparser + + class PythonCode(object): + """represents information about a string containing Python code""" + def __init__(self, code, **exception_kwargs): self.code = code @@ -41,8 +47,11 @@ class PythonCode(object): f = pyparser.FindIdentifiers(self, **exception_kwargs) f.visit(expr) + class ArgumentList(object): + """parses a fragment of code as a comma-separated list of expressions""" + def __init__(self, code, **exception_kwargs): self.codeargs = [] self.args = [] @@ -52,7 +61,7 @@ class ArgumentList(object): if re.match(r"\S", code) and not re.match(r",\s*$", code): # if theres text and no trailing comma, insure its parsed # as a tuple by adding a trailing comma - code += "," + code += "," expr = pyparser.parse(code, "exec", **exception_kwargs) else: expr = code @@ -60,58 +69,69 @@ class ArgumentList(object): f = pyparser.FindTuple(self, PythonCode, **exception_kwargs) f.visit(expr) + class PythonFragment(PythonCode): + """extends PythonCode to provide identifier lookups in partial control statements - e.g. + e.g.:: + for x in 5: elif y==9: except (MyException, e): - etc. + """ + def __init__(self, code, **exception_kwargs): - m = re.match(r'^(\w+)(?:\s+(.*?))?:\s*(#|$)', code.strip(), re.S) + m = re.match(r"^(\w+)(?:\s+(.*?))?:\s*(#|$)", code.strip(), re.S) if not m: raise exceptions.CompileException( - "Fragment '%s' is not a partial control statement" % - code, **exception_kwargs) + "Fragment '%s' is not a partial control statement" % code, + **exception_kwargs + ) if m.group(3): - code = code[:m.start(3)] - (keyword, expr) = m.group(1,2) - if keyword in ['for','if', 'while']: + code = code[: m.start(3)] + (keyword, expr) = m.group(1, 2) + if keyword in ["for", "if", "while"]: code = code + "pass" - elif keyword == 'try': + elif keyword == "try": code = code + "pass\nexcept:pass" - elif keyword == 'elif' or keyword == 'else': + elif keyword == "elif" or keyword == "else": code = "if False:pass\n" + code + "pass" - elif keyword == 'except': + elif keyword == "except": code = "try:pass\n" + code + "pass" - elif keyword == 'with': + elif keyword == "with": code = code + "pass" else: raise exceptions.CompileException( - "Unsupported control keyword: '%s'" % - keyword, **exception_kwargs) + "Unsupported control keyword: '%s'" % keyword, + **exception_kwargs + ) super(PythonFragment, self).__init__(code, **exception_kwargs) class FunctionDecl(object): + """function declaration""" + def __init__(self, code, allow_kwargs=True, **exception_kwargs): self.code = code expr = pyparser.parse(code, "exec", **exception_kwargs) f = pyparser.ParseFunc(self, **exception_kwargs) f.visit(expr) - if not hasattr(self, 'funcname'): + if not hasattr(self, "funcname"): raise exceptions.CompileException( - "Code '%s' is not a function declaration" % code, - **exception_kwargs) + "Code '%s' is not a function declaration" % code, + **exception_kwargs + ) if not allow_kwargs and self.kwargs: raise exceptions.CompileException( - "'**%s' keyword argument not allowed here" % - self.kwargnames[-1], **exception_kwargs) + "'**%s' keyword argument not allowed here" + % self.kwargnames[-1], + **exception_kwargs + ) def get_argument_expressions(self, as_call=False): """Return the argument declarations of this FunctionDecl as a printable @@ -146,8 +166,10 @@ class FunctionDecl(object): # `def foo(*, a=1, b, c=3)` namedecls.append(name) else: - namedecls.append("%s=%s" % ( - name, pyparser.ExpressionGenerator(default).value())) + namedecls.append( + "%s=%s" + % (name, pyparser.ExpressionGenerator(default).value()) + ) else: namedecls.append(name) @@ -160,8 +182,10 @@ class FunctionDecl(object): namedecls.append(name) else: default = defaults.pop(0) - namedecls.append("%s=%s" % ( - name, pyparser.ExpressionGenerator(default).value())) + namedecls.append( + "%s=%s" + % (name, pyparser.ExpressionGenerator(default).value()) + ) namedecls.reverse() return namedecls @@ -170,9 +194,12 @@ class FunctionDecl(object): def allargnames(self): return tuple(self.argnames) + tuple(self.kwargnames) + class FunctionArgs(FunctionDecl): + """the argument portion of a function declaration""" def __init__(self, code, **kwargs): - super(FunctionArgs, self).__init__("def ANON(%s):pass" % code, - **kwargs) + super(FunctionArgs, self).__init__( + "def ANON(%s):pass" % code, **kwargs + ) diff --git a/lib/mako/cache.py b/lib/mako/cache.py index c405c517..26aa93ee 100644 --- a/lib/mako/cache.py +++ b/lib/mako/cache.py @@ -1,10 +1,11 @@ # mako/cache.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -from mako import compat, util +from mako import compat +from mako import util _cache_plugins = util.PluginLoader("mako.cache") @@ -13,6 +14,7 @@ register_plugin("beaker", "mako.ext.beaker_cache", "BeakerCacheImpl") class Cache(object): + """Represents a data content cache made available to the module space of a specific :class:`.Template` object. @@ -89,12 +91,11 @@ class Cache(object): return creation_function() return self.impl.get_or_create( - key, - creation_function, - **self._get_cache_kw(kw, context)) + key, creation_function, **self._get_cache_kw(kw, context) + ) def set(self, key, value, **kw): - """Place a value in the cache. + r"""Place a value in the cache. :param key: the value's key. :param value: the value. @@ -112,7 +113,7 @@ class Cache(object): """ def get(self, key, **kw): - """Retrieve a value from the cache. + r"""Retrieve a value from the cache. :param key: the value's key. :param \**kw: cache configuration arguments. The @@ -124,7 +125,7 @@ class Cache(object): return self.impl.get(key, **self._get_cache_kw(kw, None)) def invalidate(self, key, **kw): - """Invalidate a value in the cache. + r"""Invalidate a value in the cache. :param key: the value's key. :param \**kw: cache configuration arguments. The @@ -140,7 +141,7 @@ class Cache(object): template. """ - self.invalidate('render_body', __M_defname='render_body') + self.invalidate("render_body", __M_defname="render_body") def invalidate_def(self, name): """Invalidate the cached content of a particular ``<%def>`` within this @@ -148,7 +149,7 @@ class Cache(object): """ - self.invalidate('render_%s' % name, __M_defname='render_%s' % name) + self.invalidate("render_%s" % name, __M_defname="render_%s" % name) def invalidate_closure(self, name): """Invalidate a nested ``<%def>`` within this template. @@ -164,7 +165,7 @@ class Cache(object): self.invalidate(name, __M_defname=name) def _get_cache_kw(self, kw, context): - defname = kw.pop('__M_defname', None) + defname = kw.pop("__M_defname", None) if not defname: tmpl_kw = self.template.cache_args.copy() tmpl_kw.update(kw) @@ -176,11 +177,12 @@ class Cache(object): self._def_regions[defname] = tmpl_kw if context and self.impl.pass_context: tmpl_kw = tmpl_kw.copy() - tmpl_kw.setdefault('context', context) + tmpl_kw.setdefault("context", context) return tmpl_kw class CacheImpl(object): + """Provide a cache implementation for use by :class:`.Cache`.""" def __init__(self, cache): @@ -192,7 +194,7 @@ class CacheImpl(object): """ def get_or_create(self, key, creation_function, **kw): - """Retrieve a value from the cache, using the given creation function + r"""Retrieve a value from the cache, using the given creation function to generate a new value. This function *must* return a value, either from @@ -210,7 +212,7 @@ class CacheImpl(object): raise NotImplementedError() def set(self, key, value, **kw): - """Place a value in the cache. + r"""Place a value in the cache. :param key: the value's key. :param value: the value. @@ -220,7 +222,7 @@ class CacheImpl(object): raise NotImplementedError() def get(self, key, **kw): - """Retrieve a value from the cache. + r"""Retrieve a value from the cache. :param key: the value's key. :param \**kw: cache configuration arguments. @@ -229,7 +231,7 @@ class CacheImpl(object): raise NotImplementedError() def invalidate(self, key, **kw): - """Invalidate a value in the cache. + r"""Invalidate a value in the cache. :param key: the value's key. :param \**kw: cache configuration arguments. diff --git a/lib/mako/cmd.py b/lib/mako/cmd.py index 1a9ca566..c0f2c754 100755 --- a/lib/mako/cmd.py +++ b/lib/mako/cmd.py @@ -1,43 +1,73 @@ # mako/cmd.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php from argparse import ArgumentParser -from os.path import isfile, dirname +import io +from os.path import dirname +from os.path import isfile import sys -from mako.template import Template -from mako.lookup import TemplateLookup + from mako import exceptions +from mako.lookup import TemplateLookup +from mako.template import Template + def varsplit(var): if "=" not in var: return (var, "") return var.split("=", 1) + def _exit(): sys.stderr.write(exceptions.text_error_template().render()) sys.exit(1) + def cmdline(argv=None): - parser = ArgumentParser("usage: %prog [FILENAME]") - parser.add_argument("--var", default=[], action="append", - help="variable (can be used multiple times, use name=value)") - parser.add_argument("--template-dir", default=[], action="append", - help="Directory to use for template lookup (multiple " - "directories may be provided). If not given then if the " - "template is read from stdin, the value defaults to be " - "the current directory, otherwise it defaults to be the " - "parent directory of the file provided.") - parser.add_argument('input', nargs='?', default='-') + parser = ArgumentParser() + parser.add_argument( + "--var", + default=[], + action="append", + help="variable (can be used multiple times, use name=value)", + ) + parser.add_argument( + "--template-dir", + default=[], + action="append", + help="Directory to use for template lookup (multiple " + "directories may be provided). If not given then if the " + "template is read from stdin, the value defaults to be " + "the current directory, otherwise it defaults to be the " + "parent directory of the file provided.", + ) + parser.add_argument( + "--output-encoding", default=None, help="force output encoding" + ) + parser.add_argument( + "--output-file", + default=None, + help="Write to file upon successful render instead of stdout", + ) + parser.add_argument("input", nargs="?", default="-") options = parser.parse_args(argv) - if options.input == '-': + + output_encoding = options.output_encoding + output_file = options.output_file + + if options.input == "-": lookup_dirs = options.template_dir or ["."] lookup = TemplateLookup(lookup_dirs) try: - template = Template(sys.stdin.read(), lookup=lookup) + template = Template( + sys.stdin.read(), + lookup=lookup, + output_encoding=output_encoding, + ) except: _exit() else: @@ -47,15 +77,26 @@ def cmdline(argv=None): lookup_dirs = options.template_dir or [dirname(filename)] lookup = TemplateLookup(lookup_dirs) try: - template = Template(filename=filename, lookup=lookup) + template = Template( + filename=filename, + lookup=lookup, + output_encoding=output_encoding, + ) except: _exit() kw = dict([varsplit(var) for var in options.var]) try: - print(template.render(**kw)) + rendered = template.render(**kw) except: _exit() + else: + if output_file: + io.open(output_file, "wt", encoding=output_encoding).write( + rendered + ) + else: + sys.stdout.write(rendered) if __name__ == "__main__": diff --git a/lib/mako/codegen.py b/lib/mako/codegen.py index 4b0bda86..a9ae55b8 100644 --- a/lib/mako/codegen.py +++ b/lib/mako/codegen.py @@ -1,5 +1,5 @@ # mako/codegen.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -7,11 +7,17 @@ """provides functionality for rendering a parsetree constructing into module source code.""" -import time +import json import re -from mako.pygen import PythonPrinter -from mako import util, ast, parsetree, filters, exceptions +import time + +from mako import ast from mako import compat +from mako import exceptions +from mako import filters +from mako import parsetree +from mako import util +from mako.pygen import PythonPrinter MAGIC_NUMBER = 10 @@ -19,22 +25,25 @@ MAGIC_NUMBER = 10 # names which are hardwired into the # template and are not accessed via the # context itself -RESERVED_NAMES = set(['context', 'loop', 'UNDEFINED']) +TOPLEVEL_DECLARED = set(["UNDEFINED", "STOP_RENDERING"]) +RESERVED_NAMES = set(["context", "loop"]).union(TOPLEVEL_DECLARED) -def compile(node, - uri, - filename=None, - default_filters=None, - buffer_filters=None, - imports=None, - future_imports=None, - source_encoding=None, - generate_magic_comment=True, - disable_unicode=False, - strict_undefined=False, - enable_loop=True, - reserved_names=frozenset()): +def compile( # noqa + node, + uri, + filename=None, + default_filters=None, + buffer_filters=None, + imports=None, + future_imports=None, + source_encoding=None, + generate_magic_comment=True, + disable_unicode=False, + strict_undefined=False, + enable_loop=True, + reserved_names=frozenset(), +): """Generate module source code given a parsetree node, uri, and optional source filename""" @@ -45,40 +54,46 @@ def compile(node, if not compat.py3k and isinstance(source_encoding, compat.text_type): source_encoding = source_encoding.encode(source_encoding) - buf = util.FastEncodingBuffer() printer = PythonPrinter(buf) - _GenerateRenderMethod(printer, - _CompileContext(uri, - filename, - default_filters, - buffer_filters, - imports, - future_imports, - source_encoding, - generate_magic_comment, - disable_unicode, - strict_undefined, - enable_loop, - reserved_names), - node) + _GenerateRenderMethod( + printer, + _CompileContext( + uri, + filename, + default_filters, + buffer_filters, + imports, + future_imports, + source_encoding, + generate_magic_comment, + disable_unicode, + strict_undefined, + enable_loop, + reserved_names, + ), + node, + ) return buf.getvalue() + class _CompileContext(object): - def __init__(self, - uri, - filename, - default_filters, - buffer_filters, - imports, - future_imports, - source_encoding, - generate_magic_comment, - disable_unicode, - strict_undefined, - enable_loop, - reserved_names): + def __init__( + self, + uri, + filename, + default_filters, + buffer_filters, + imports, + future_imports, + source_encoding, + generate_magic_comment, + disable_unicode, + strict_undefined, + enable_loop, + reserved_names, + ): self.uri = uri self.filename = filename self.default_filters = default_filters @@ -92,11 +107,14 @@ class _CompileContext(object): self.enable_loop = enable_loop self.reserved_names = reserved_names + class _GenerateRenderMethod(object): + """A template visitor object which generates the full module source for a template. """ + def __init__(self, printer, compiler, node): self.printer = printer self.compiler = compiler @@ -108,12 +126,12 @@ class _GenerateRenderMethod(object): name = "render_%s" % node.funcname args = node.get_argument_expressions() filtered = len(node.filter_args.args) > 0 - buffered = eval(node.attributes.get('buffered', 'False')) - cached = eval(node.attributes.get('cached', 'False')) + buffered = eval(node.attributes.get("buffered", "False")) + cached = eval(node.attributes.get("cached", "False")) defs = None pagetag = None if node.is_block and not node.is_anonymous: - args += ['**pageargs'] + args += ["**pageargs"] else: defs = self.write_toplevel() pagetag = self.compiler.pagetag @@ -121,25 +139,23 @@ class _GenerateRenderMethod(object): if pagetag is not None: args = pagetag.body_decl.get_argument_expressions() if not pagetag.body_decl.kwargs: - args += ['**pageargs'] - cached = eval(pagetag.attributes.get('cached', 'False')) + args += ["**pageargs"] + cached = eval(pagetag.attributes.get("cached", "False")) self.compiler.enable_loop = self.compiler.enable_loop or eval( - pagetag.attributes.get( - 'enable_loop', 'False') - ) + pagetag.attributes.get("enable_loop", "False") + ) else: - args = ['**pageargs'] + args = ["**pageargs"] cached = False buffered = filtered = False if args is None: - args = ['context'] + args = ["context"] else: - args = [a for a in ['context'] + args] + args = [a for a in ["context"] + args] self.write_render_callable( - pagetag or node, - name, args, - buffered, filtered, cached) + pagetag or node, name, args, buffered, filtered, cached + ) if defs is not None: for node in defs: @@ -149,8 +165,9 @@ class _GenerateRenderMethod(object): self.write_metadata_struct() def write_metadata_struct(self): - self.printer.source_map[self.printer.lineno] = \ - max(self.printer.source_map) + self.printer.source_map[self.printer.lineno] = max( + self.printer.source_map + ) struct = { "filename": self.compiler.filename, "uri": self.compiler.uri, @@ -159,10 +176,9 @@ class _GenerateRenderMethod(object): } self.printer.writelines( '"""', - '__M_BEGIN_METADATA', - compat.json.dumps(struct), - '__M_END_METADATA\n' - '"""' + "__M_BEGIN_METADATA", + json.dumps(struct), + "__M_END_METADATA\n" '"""', ) @property @@ -183,10 +199,13 @@ class _GenerateRenderMethod(object): class FindTopLevel(object): def visitInheritTag(s, node): inherit.append(node) + def visitNamespaceTag(s, node): namespaces[node.name] = node + def visitPageTag(s, node): self.compiler.pagetag = node + def visitCode(s, node): if node.ismodule: module_code.append(node) @@ -205,52 +224,63 @@ class _GenerateRenderMethod(object): module_identifiers.declared = module_ident # module-level names, python code - if self.compiler.generate_magic_comment and \ - self.compiler.source_encoding: - self.printer.writeline("# -*- coding:%s -*-" % - self.compiler.source_encoding) + if ( + self.compiler.generate_magic_comment + and self.compiler.source_encoding + ): + self.printer.writeline( + "# -*- coding:%s -*-" % self.compiler.source_encoding + ) if self.compiler.future_imports: - self.printer.writeline("from __future__ import %s" % - (", ".join(self.compiler.future_imports),)) + self.printer.writeline( + "from __future__ import %s" + % (", ".join(self.compiler.future_imports),) + ) self.printer.writeline("from mako import runtime, filters, cache") self.printer.writeline("UNDEFINED = runtime.UNDEFINED") + self.printer.writeline("STOP_RENDERING = runtime.STOP_RENDERING") self.printer.writeline("__M_dict_builtin = dict") self.printer.writeline("__M_locals_builtin = locals") self.printer.writeline("_magic_number = %r" % MAGIC_NUMBER) self.printer.writeline("_modified_time = %r" % time.time()) self.printer.writeline("_enable_loop = %r" % self.compiler.enable_loop) self.printer.writeline( - "_template_filename = %r" % self.compiler.filename) + "_template_filename = %r" % self.compiler.filename + ) self.printer.writeline("_template_uri = %r" % self.compiler.uri) self.printer.writeline( - "_source_encoding = %r" % self.compiler.source_encoding) + "_source_encoding = %r" % self.compiler.source_encoding + ) if self.compiler.imports: - buf = '' + buf = "" for imp in self.compiler.imports: buf += imp + "\n" self.printer.writeline(imp) impcode = ast.PythonCode( - buf, - source='', lineno=0, - pos=0, - filename='template defined imports') + buf, + source="", + lineno=0, + pos=0, + filename="template defined imports", + ) else: impcode = None main_identifiers = module_identifiers.branch(self.node) - module_identifiers.topleveldefs = \ - module_identifiers.topleveldefs.\ - union(main_identifiers.topleveldefs) - module_identifiers.declared.add("UNDEFINED") + mit = module_identifiers.topleveldefs + module_identifiers.topleveldefs = mit.union( + main_identifiers.topleveldefs + ) + module_identifiers.declared.update(TOPLEVEL_DECLARED) if impcode: module_identifiers.declared.update(impcode.declared_identifiers) self.compiler.identifiers = module_identifiers - self.printer.writeline("_exports = %r" % - [n.name for n in - main_identifiers.topleveldefs.values()] - ) + self.printer.writeline( + "_exports = %r" + % [n.name for n in main_identifiers.topleveldefs.values()] + ) self.printer.write_blanks(2) if len(module_code): @@ -264,8 +294,9 @@ class _GenerateRenderMethod(object): return list(main_identifiers.topleveldefs.values()) - def write_render_callable(self, node, name, args, buffered, filtered, - cached): + def write_render_callable( + self, node, name, args, buffered, filtered, cached + ): """write a top-level render callable. this could be the main render() method or that of a top-level def.""" @@ -274,32 +305,38 @@ class _GenerateRenderMethod(object): decorator = node.decorator if decorator: self.printer.writeline( - "@runtime._decorate_toplevel(%s)" % decorator) + "@runtime._decorate_toplevel(%s)" % decorator + ) self.printer.start_source(node.lineno) self.printer.writelines( - "def %s(%s):" % (name, ','.join(args)), - # push new frame, assign current frame to __M_caller - "__M_caller = context.caller_stack._push_frame()", - "try:" + "def %s(%s):" % (name, ",".join(args)), + # push new frame, assign current frame to __M_caller + "__M_caller = context.caller_stack._push_frame()", + "try:", ) if buffered or filtered or cached: self.printer.writeline("context._push_buffer()") self.identifier_stack.append( - self.compiler.identifiers.branch(self.node)) - if (not self.in_def or self.node.is_block) and '**pageargs' in args: - self.identifier_stack[-1].argument_declared.add('pageargs') + self.compiler.identifiers.branch(self.node) + ) + if (not self.in_def or self.node.is_block) and "**pageargs" in args: + self.identifier_stack[-1].argument_declared.add("pageargs") if not self.in_def and ( - len(self.identifiers.locally_assigned) > 0 or - len(self.identifiers.argument_declared) > 0 - ): - self.printer.writeline("__M_locals = __M_dict_builtin(%s)" % - ','.join([ - "%s=%s" % (x, x) for x in - self.identifiers.argument_declared - ])) + len(self.identifiers.locally_assigned) > 0 + or len(self.identifiers.argument_declared) > 0 + ): + self.printer.writeline( + "__M_locals = __M_dict_builtin(%s)" + % ",".join( + [ + "%s=%s" % (x, x) + for x in self.identifiers.argument_declared + ] + ) + ) self.write_variable_declares(self.identifiers, toplevel=True) @@ -311,44 +348,42 @@ class _GenerateRenderMethod(object): self.printer.write_blanks(2) if cached: self.write_cache_decorator( - node, name, - args, buffered, - self.identifiers, toplevel=True) + node, name, args, buffered, self.identifiers, toplevel=True + ) def write_module_code(self, module_code): """write module-level template code, i.e. that which is enclosed in <%! %> tags in the template.""" for n in module_code: - self.printer.start_source(n.lineno) - self.printer.write_indented_block(n.text) + self.printer.write_indented_block(n.text, starting_lineno=n.lineno) def write_inherit(self, node): """write the module-level inheritance-determination callable.""" self.printer.writelines( "def _mako_inherit(template, context):", - "_mako_generate_namespaces(context)", - "return runtime._inherit_from(context, %s, _template_uri)" % - (node.parsed_attributes['file']), - None + "_mako_generate_namespaces(context)", + "return runtime._inherit_from(context, %s, _template_uri)" + % (node.parsed_attributes["file"]), + None, ) def write_namespaces(self, namespaces): """write the module-level namespace-generating callable.""" self.printer.writelines( "def _mako_get_namespace(context, name):", - "try:", - "return context.namespaces[(__name__, name)]", - "except KeyError:", - "_mako_generate_namespaces(context)", - "return context.namespaces[(__name__, name)]", - None, None + "try:", + "return context.namespaces[(__name__, name)]", + "except KeyError:", + "_mako_generate_namespaces(context)", + "return context.namespaces[(__name__, name)]", + None, + None, ) self.printer.writeline("def _mako_generate_namespaces(context):") - for node in namespaces.values(): - if 'import' in node.attributes: + if "import" in node.attributes: self.compiler.has_ns_imports = True self.printer.start_source(node.lineno) if len(node.nodes): @@ -356,6 +391,7 @@ class _GenerateRenderMethod(object): export = [] identifiers = self.compiler.identifiers.branch(node) self.in_def = True + class NSDefVisitor(object): def visitDefTag(s, node): s.visitDefOrBase(node) @@ -372,56 +408,54 @@ class _GenerateRenderMethod(object): ) self.write_inline_def(node, identifiers, nested=False) export.append(node.funcname) + vis = NSDefVisitor() for n in node.nodes: n.accept_visitor(vis) - self.printer.writeline("return [%s]" % (','.join(export))) + self.printer.writeline("return [%s]" % (",".join(export))) self.printer.writeline(None) self.in_def = False callable_name = "make_namespace()" else: callable_name = "None" - if 'file' in node.parsed_attributes: + if "file" in node.parsed_attributes: self.printer.writeline( - "ns = runtime.TemplateNamespace(%r," - " context._clean_inheritance_tokens()," - " templateuri=%s, callables=%s, " - " calling_uri=_template_uri)" % - ( - node.name, - node.parsed_attributes.get('file', 'None'), - callable_name, - ) - ) - elif 'module' in node.parsed_attributes: + "ns = runtime.TemplateNamespace(%r," + " context._clean_inheritance_tokens()," + " templateuri=%s, callables=%s, " + " calling_uri=_template_uri)" + % ( + node.name, + node.parsed_attributes.get("file", "None"), + callable_name, + ) + ) + elif "module" in node.parsed_attributes: self.printer.writeline( - "ns = runtime.ModuleNamespace(%r," - " context._clean_inheritance_tokens()," - " callables=%s, calling_uri=_template_uri," - " module=%s)" % - ( - node.name, - callable_name, - node.parsed_attributes.get( - 'module', 'None') - ) - ) + "ns = runtime.ModuleNamespace(%r," + " context._clean_inheritance_tokens()," + " callables=%s, calling_uri=_template_uri," + " module=%s)" + % ( + node.name, + callable_name, + node.parsed_attributes.get("module", "None"), + ) + ) else: self.printer.writeline( - "ns = runtime.Namespace(%r," - " context._clean_inheritance_tokens()," - " callables=%s, calling_uri=_template_uri)" % - ( - node.name, - callable_name, - ) - ) - if eval(node.attributes.get('inheritable', "False")): + "ns = runtime.Namespace(%r," + " context._clean_inheritance_tokens()," + " callables=%s, calling_uri=_template_uri)" + % (node.name, callable_name) + ) + if eval(node.attributes.get("inheritable", "False")): self.printer.writeline("context['self'].%s = ns" % (node.name)) self.printer.writeline( - "context.namespaces[(__name__, %s)] = ns" % repr(node.name)) + "context.namespaces[(__name__, %s)] = ns" % repr(node.name) + ) self.printer.write_blanks(1) if not len(namespaces): self.printer.writeline("pass") @@ -457,7 +491,8 @@ class _GenerateRenderMethod(object): # write closure functions for closures that we define # right here to_write = to_write.union( - [c.funcname for c in identifiers.closuredefs.values()]) + [c.funcname for c in identifiers.closuredefs.values()] + ) # remove identifiers that are declared in the argument # signature of the callable @@ -481,23 +516,22 @@ class _GenerateRenderMethod(object): if limit is not None: to_write = to_write.intersection(limit) - if toplevel and getattr(self.compiler, 'has_ns_imports', False): + if toplevel and getattr(self.compiler, "has_ns_imports", False): self.printer.writeline("_import_ns = {}") self.compiler.has_imports = True for ident, ns in self.compiler.namespaces.items(): - if 'import' in ns.attributes: + if "import" in ns.attributes: self.printer.writeline( - "_mako_get_namespace(context, %r)." - "_populate(_import_ns, %r)" % - ( - ident, - re.split(r'\s*,\s*', ns.attributes['import']) - )) + "_mako_get_namespace(context, %r)." + "_populate(_import_ns, %r)" + % ( + ident, + re.split(r"\s*,\s*", ns.attributes["import"]), + ) + ) if has_loop: - self.printer.writeline( - 'loop = __M_loop = runtime.LoopStack()' - ) + self.printer.writeline("loop = __M_loop = runtime.LoopStack()") for ident in to_write: if ident in comp_idents: @@ -515,36 +549,36 @@ class _GenerateRenderMethod(object): elif ident in self.compiler.namespaces: self.printer.writeline( - "%s = _mako_get_namespace(context, %r)" % - (ident, ident) - ) + "%s = _mako_get_namespace(context, %r)" % (ident, ident) + ) else: - if getattr(self.compiler, 'has_ns_imports', False): + if getattr(self.compiler, "has_ns_imports", False): if self.compiler.strict_undefined: self.printer.writelines( - "%s = _import_ns.get(%r, UNDEFINED)" % - (ident, ident), - "if %s is UNDEFINED:" % ident, + "%s = _import_ns.get(%r, UNDEFINED)" + % (ident, ident), + "if %s is UNDEFINED:" % ident, "try:", - "%s = context[%r]" % (ident, ident), + "%s = context[%r]" % (ident, ident), "except KeyError:", - "raise NameError(\"'%s' is not defined\")" % - ident, - None, None + "raise NameError(\"'%s' is not defined\")" % ident, + None, + None, ) else: self.printer.writeline( - "%s = _import_ns.get(%r, context.get(%r, UNDEFINED))" % - (ident, ident, ident)) + "%s = _import_ns.get" + "(%r, context.get(%r, UNDEFINED))" + % (ident, ident, ident) + ) else: if self.compiler.strict_undefined: self.printer.writelines( "try:", - "%s = context[%r]" % (ident, ident), + "%s = context[%r]" % (ident, ident), "except KeyError:", - "raise NameError(\"'%s' is not defined\")" % - ident, - None + "raise NameError(\"'%s' is not defined\")" % ident, + None, ) else: self.printer.writeline( @@ -560,14 +594,16 @@ class _GenerateRenderMethod(object): nameargs = node.get_argument_expressions(as_call=True) if not self.in_def and ( - len(self.identifiers.locally_assigned) > 0 or - len(self.identifiers.argument_declared) > 0): - nameargs.insert(0, 'context._locals(__M_locals)') + len(self.identifiers.locally_assigned) > 0 + or len(self.identifiers.argument_declared) > 0 + ): + nameargs.insert(0, "context._locals(__M_locals)") else: - nameargs.insert(0, 'context') + nameargs.insert(0, "context") self.printer.writeline("def %s(%s):" % (funcname, ",".join(namedecls))) self.printer.writeline( - "return render_%s(%s)" % (funcname, ",".join(nameargs))) + "return render_%s(%s)" % (funcname, ",".join(nameargs)) + ) self.printer.writeline(None) def write_inline_def(self, node, identifiers, nested): @@ -578,21 +614,21 @@ class _GenerateRenderMethod(object): decorator = node.decorator if decorator: self.printer.writeline( - "@runtime._decorate_inline(context, %s)" % decorator) + "@runtime._decorate_inline(context, %s)" % decorator + ) self.printer.writeline( - "def %s(%s):" % (node.funcname, ",".join(namedecls))) + "def %s(%s):" % (node.funcname, ",".join(namedecls)) + ) filtered = len(node.filter_args.args) > 0 - buffered = eval(node.attributes.get('buffered', 'False')) - cached = eval(node.attributes.get('cached', 'False')) + buffered = eval(node.attributes.get("buffered", "False")) + cached = eval(node.attributes.get("cached", "False")) self.printer.writelines( # push new frame, assign current frame to __M_caller "__M_caller = context.caller_stack._push_frame()", - "try:" + "try:", ) if buffered or filtered or cached: - self.printer.writelines( - "context._push_buffer()", - ) + self.printer.writelines("context._push_buffer()") identifiers = identifiers.branch(node, nested=nested) @@ -606,12 +642,19 @@ class _GenerateRenderMethod(object): self.write_def_finish(node, buffered, filtered, cached) self.printer.writeline(None) if cached: - self.write_cache_decorator(node, node.funcname, - namedecls, False, identifiers, - inline=True, toplevel=False) + self.write_cache_decorator( + node, + node.funcname, + namedecls, + False, + identifiers, + inline=True, + toplevel=False, + ) - def write_def_finish(self, node, buffered, filtered, cached, - callstack=True): + def write_def_finish( + self, node, buffered, filtered, cached, callstack=True + ): """write the end section of a rendering function, either outermost or inline. @@ -624,9 +667,7 @@ class _GenerateRenderMethod(object): self.printer.writeline("return ''") if callstack: self.printer.writelines( - "finally:", - "context.caller_stack._pop_frame()", - None + "finally:", "context.caller_stack._pop_frame()", None ) if buffered or filtered or cached: @@ -636,13 +677,12 @@ class _GenerateRenderMethod(object): # implemenation might be using a context with no # extra buffers self.printer.writelines( - "finally:", - "__M_buf = context._pop_buffer()" + "finally:", "__M_buf = context._pop_buffer()" ) else: self.printer.writelines( "finally:", - "__M_buf, __M_writer = context._pop_buffer_and_writer()" + "__M_buf, __M_writer = context._pop_buffer_and_writer()", ) if callstack: @@ -650,89 +690,100 @@ class _GenerateRenderMethod(object): s = "__M_buf.getvalue()" if filtered: - s = self.create_filter_callable(node.filter_args.args, s, - False) + s = self.create_filter_callable( + node.filter_args.args, s, False + ) self.printer.writeline(None) if buffered and not cached: - s = self.create_filter_callable(self.compiler.buffer_filters, - s, False) + s = self.create_filter_callable( + self.compiler.buffer_filters, s, False + ) if buffered or cached: self.printer.writeline("return %s" % s) else: - self.printer.writelines( - "__M_writer(%s)" % s, - "return ''" - ) + self.printer.writelines("__M_writer(%s)" % s, "return ''") - def write_cache_decorator(self, node_or_pagetag, name, - args, buffered, identifiers, - inline=False, toplevel=False): + def write_cache_decorator( + self, + node_or_pagetag, + name, + args, + buffered, + identifiers, + inline=False, + toplevel=False, + ): """write a post-function decorator to replace a rendering callable with a cached version of itself.""" self.printer.writeline("__M_%s = %s" % (name, name)) - cachekey = node_or_pagetag.parsed_attributes.get('cache_key', - repr(name)) + cachekey = node_or_pagetag.parsed_attributes.get( + "cache_key", repr(name) + ) cache_args = {} if self.compiler.pagetag is not None: cache_args.update( - ( - pa[6:], - self.compiler.pagetag.parsed_attributes[pa] - ) + (pa[6:], self.compiler.pagetag.parsed_attributes[pa]) for pa in self.compiler.pagetag.parsed_attributes - if pa.startswith('cache_') and pa != 'cache_key' + if pa.startswith("cache_") and pa != "cache_key" ) cache_args.update( - ( - pa[6:], - node_or_pagetag.parsed_attributes[pa] - ) for pa in node_or_pagetag.parsed_attributes - if pa.startswith('cache_') and pa != 'cache_key' + (pa[6:], node_or_pagetag.parsed_attributes[pa]) + for pa in node_or_pagetag.parsed_attributes + if pa.startswith("cache_") and pa != "cache_key" ) - if 'timeout' in cache_args: - cache_args['timeout'] = int(eval(cache_args['timeout'])) + if "timeout" in cache_args: + cache_args["timeout"] = int(eval(cache_args["timeout"])) - self.printer.writeline("def %s(%s):" % (name, ','.join(args))) + self.printer.writeline("def %s(%s):" % (name, ",".join(args))) # form "arg1, arg2, arg3=arg3, arg4=arg4", etc. pass_args = [ - "%s=%s" % ((a.split('=')[0],) * 2) if '=' in a else a - for a in args - ] + "%s=%s" % ((a.split("=")[0],) * 2) if "=" in a else a for a in args + ] self.write_variable_declares( - identifiers, - toplevel=toplevel, - limit=node_or_pagetag.undeclared_identifiers() - ) + identifiers, + toplevel=toplevel, + limit=node_or_pagetag.undeclared_identifiers(), + ) if buffered: - s = "context.get('local')."\ - "cache._ctx_get_or_create("\ - "%s, lambda:__M_%s(%s), context, %s__M_defname=%r)" % ( - cachekey, name, ','.join(pass_args), - ''.join(["%s=%s, " % (k, v) - for k, v in cache_args.items()]), - name - ) + s = ( + "context.get('local')." + "cache._ctx_get_or_create(" + "%s, lambda:__M_%s(%s), context, %s__M_defname=%r)" + % ( + cachekey, + name, + ",".join(pass_args), + "".join( + ["%s=%s, " % (k, v) for k, v in cache_args.items()] + ), + name, + ) + ) # apply buffer_filters - s = self.create_filter_callable(self.compiler.buffer_filters, s, - False) + s = self.create_filter_callable( + self.compiler.buffer_filters, s, False + ) self.printer.writelines("return " + s, None) else: self.printer.writelines( - "__M_writer(context.get('local')." - "cache._ctx_get_or_create(" - "%s, lambda:__M_%s(%s), context, %s__M_defname=%r))" % - ( - cachekey, name, ','.join(pass_args), - ''.join(["%s=%s, " % (k, v) - for k, v in cache_args.items()]), - name, + "__M_writer(context.get('local')." + "cache._ctx_get_or_create(" + "%s, lambda:__M_%s(%s), context, %s__M_defname=%r))" + % ( + cachekey, + name, + ",".join(pass_args), + "".join( + ["%s=%s, " % (k, v) for k, v in cache_args.items()] ), - "return ''", - None + name, + ), + "return ''", + None, ) def create_filter_callable(self, args, target, is_expression): @@ -741,24 +792,24 @@ class _GenerateRenderMethod(object): 'default' filter aliases as needed.""" def locate_encode(name): - if re.match(r'decode\..+', name): + if re.match(r"decode\..+", name): return "filters." + name elif self.compiler.disable_unicode: return filters.NON_UNICODE_ESCAPES.get(name, name) else: return filters.DEFAULT_ESCAPES.get(name, name) - if 'n' not in args: + if "n" not in args: if is_expression: if self.compiler.pagetag: args = self.compiler.pagetag.filter_args.args + args - if self.compiler.default_filters: + if self.compiler.default_filters and "n" not in args: args = self.compiler.default_filters + args for e in args: # if filter given as a function, get just the identifier portion - if e == 'n': + if e == "n": continue - m = re.match(r'(.+?)(\(.*\))', e) + m = re.match(r"(.+?)(\(.*\))", e) if m: ident, fargs = m.group(1, 2) f = locate_encode(ident) @@ -771,15 +822,18 @@ class _GenerateRenderMethod(object): def visitExpression(self, node): self.printer.start_source(node.lineno) - if len(node.escapes) or \ - ( - self.compiler.pagetag is not None and - len(self.compiler.pagetag.filter_args.args) - ) or \ - len(self.compiler.default_filters): + if ( + len(node.escapes) + or ( + self.compiler.pagetag is not None + and len(self.compiler.pagetag.filter_args.args) + ) + or len(self.compiler.default_filters) + ): - s = self.create_filter_callable(node.escapes_code.args, - "%s" % node.text, True) + s = self.create_filter_callable( + node.escapes_code.args, "%s" % node.text, True + ) self.printer.writeline("__M_writer(%s)" % s) else: self.printer.writeline("__M_writer(%s)" % node.text) @@ -788,12 +842,12 @@ class _GenerateRenderMethod(object): if node.isend: self.printer.writeline(None) if node.has_loop_context: - self.printer.writeline('finally:') + self.printer.writeline("finally:") self.printer.writeline("loop = __M_loop._exit()") self.printer.writeline(None) else: self.printer.start_source(node.lineno) - if self.compiler.enable_loop and node.keyword == 'for': + if self.compiler.enable_loop and node.keyword == "for": text = mangle_mako_loop(node, self.printer) else: text = node.text @@ -805,12 +859,16 @@ class _GenerateRenderMethod(object): # and end control lines, and # 3) any control line with no content other than comments if not children or ( - compat.all(isinstance(c, (parsetree.Comment, - parsetree.ControlLine)) - for c in children) and - compat.all((node.is_ternary(c.keyword) or c.isend) - for c in children - if isinstance(c, parsetree.ControlLine))): + compat.all( + isinstance(c, (parsetree.Comment, parsetree.ControlLine)) + for c in children + ) + and compat.all( + (node.is_ternary(c.keyword) or c.isend) + for c in children + if isinstance(c, parsetree.ControlLine) + ) + ): self.printer.writeline("pass") def visitText(self, node): @@ -821,8 +879,7 @@ class _GenerateRenderMethod(object): filtered = len(node.filter_args.args) > 0 if filtered: self.printer.writelines( - "__M_writer = context._push_writer()", - "try:", + "__M_writer = context._push_writer()", "try:" ) for n in node.nodes: n.accept_visitor(self) @@ -830,18 +887,18 @@ class _GenerateRenderMethod(object): self.printer.writelines( "finally:", "__M_buf, __M_writer = context._pop_buffer_and_writer()", - "__M_writer(%s)" % - self.create_filter_callable( - node.filter_args.args, - "__M_buf.getvalue()", - False), - None + "__M_writer(%s)" + % self.create_filter_callable( + node.filter_args.args, "__M_buf.getvalue()", False + ), + None, ) def visitCode(self, node): if not node.ismodule: - self.printer.start_source(node.lineno) - self.printer.write_indented_block(node.text) + self.printer.write_indented_block( + node.text, starting_lineno=node.lineno + ) if not self.in_def and len(self.identifiers.locally_assigned) > 0: # if we are the "template" def, fudge locally @@ -849,24 +906,28 @@ class _GenerateRenderMethod(object): # which is used for def calls within the same template, # to simulate "enclosing scope" self.printer.writeline( - '__M_locals_builtin_stored = __M_locals_builtin()') + "__M_locals_builtin_stored = __M_locals_builtin()" + ) self.printer.writeline( - '__M_locals.update(__M_dict_builtin([(__M_key,' - ' __M_locals_builtin_stored[__M_key]) for __M_key in' - ' [%s] if __M_key in __M_locals_builtin_stored]))' % - ','.join([repr(x) for x in node.declared_identifiers()])) + "__M_locals.update(__M_dict_builtin([(__M_key," + " __M_locals_builtin_stored[__M_key]) for __M_key in" + " [%s] if __M_key in __M_locals_builtin_stored]))" + % ",".join([repr(x) for x in node.declared_identifiers()]) + ) def visitIncludeTag(self, node): self.printer.start_source(node.lineno) - args = node.attributes.get('args') + args = node.attributes.get("args") if args: self.printer.writeline( - "runtime._include_file(context, %s, _template_uri, %s)" % - (node.parsed_attributes['file'], args)) + "runtime._include_file(context, %s, _template_uri, %s)" + % (node.parsed_attributes["file"], args) + ) else: self.printer.writeline( - "runtime._include_file(context, %s, _template_uri)" % - (node.parsed_attributes['file'])) + "runtime._include_file(context, %s, _template_uri)" + % (node.parsed_attributes["file"]) + ) def visitNamespaceTag(self, node): pass @@ -879,12 +940,14 @@ class _GenerateRenderMethod(object): self.printer.writeline("%s()" % node.funcname) else: nameargs = node.get_argument_expressions(as_call=True) - nameargs += ['**pageargs'] - self.printer.writeline("if 'parent' not in context._data or " - "not hasattr(context._data['parent'], '%s'):" - % node.funcname) + nameargs += ["**pageargs"] self.printer.writeline( - "context['self'].%s(%s)" % (node.funcname, ",".join(nameargs))) + "if 'parent' not in context._data or " + "not hasattr(context._data['parent'], '%s'):" % node.funcname + ) + self.printer.writeline( + "context['self'].%s(%s)" % (node.funcname, ",".join(nameargs)) + ) self.printer.writeline("\n") def visitCallNamespaceTag(self, node): @@ -895,16 +958,17 @@ class _GenerateRenderMethod(object): def visitCallTag(self, node): self.printer.writeline("def ccall(caller):") - export = ['body'] + export = ["body"] callable_identifiers = self.identifiers.branch(node, nested=True) body_identifiers = callable_identifiers.branch(node, nested=False) # we want the 'caller' passed to ccall to be used # for the body() function, but for other non-body() # <%def>s within <%call> we want the current caller # off the call stack (if any) - body_identifiers.add_declared('caller') + body_identifiers.add_declared("caller") self.identifier_stack.append(body_identifiers) + class DefVisitor(object): def visitDefTag(s, node): s.visitDefOrBase(node) @@ -927,16 +991,13 @@ class _GenerateRenderMethod(object): self.identifier_stack.pop() bodyargs = node.body_decl.get_argument_expressions() - self.printer.writeline("def body(%s):" % ','.join(bodyargs)) + self.printer.writeline("def body(%s):" % ",".join(bodyargs)) # TODO: figure out best way to specify # buffering/nonbuffering (at call time would be better) buffered = False if buffered: - self.printer.writelines( - "context._push_buffer()", - "try:" - ) + self.printer.writelines("context._push_buffer()", "try:") self.write_variable_declares(body_identifiers) self.identifier_stack.append(body_identifiers) @@ -945,28 +1006,27 @@ class _GenerateRenderMethod(object): self.identifier_stack.pop() self.write_def_finish(node, buffered, False, False, callstack=False) - self.printer.writelines( - None, - "return [%s]" % (','.join(export)), - None - ) + self.printer.writelines(None, "return [%s]" % (",".join(export)), None) self.printer.writelines( # push on caller for nested call "context.caller_stack.nextcaller = " - "runtime.Namespace('caller', context, " - "callables=ccall(__M_caller))", - "try:") + "runtime.Namespace('caller', context, " + "callables=ccall(__M_caller))", + "try:", + ) self.printer.start_source(node.lineno) self.printer.writelines( - "__M_writer(%s)" % self.create_filter_callable( - [], node.expression, True), + "__M_writer(%s)" + % self.create_filter_callable([], node.expression, True), "finally:", - "context.caller_stack.nextcaller = None", - None + "context.caller_stack.nextcaller = None", + None, ) + class _Identifiers(object): + """tracks the status of identifier names as template code is rendered.""" def __init__(self, compiler, node=None, parent=None, nested=False): @@ -979,10 +1039,12 @@ class _Identifiers(object): else: # things that have already been declared # in an enclosing namespace (i.e. names we can just use) - self.declared = set(parent.declared).\ - union([c.name for c in parent.closuredefs.values()]).\ - union(parent.locally_declared).\ - union(parent.argument_declared) + self.declared = ( + set(parent.declared) + .union([c.name for c in parent.closuredefs.values()]) + .union(parent.locally_declared) + .union(parent.argument_declared) + ) # if these identifiers correspond to a "nested" # scope, it means whatever the parent identifiers @@ -1026,12 +1088,13 @@ class _Identifiers(object): node.accept_visitor(self) illegal_names = self.compiler.reserved_names.intersection( - self.locally_declared) + self.locally_declared + ) if illegal_names: raise exceptions.NameConflictError( - "Reserved words declared in template: %s" % - ", ".join(illegal_names)) - + "Reserved words declared in template: %s" + % ", ".join(illegal_names) + ) def branch(self, node, **kwargs): """create a new Identifiers for a new Node, with @@ -1044,24 +1107,28 @@ class _Identifiers(object): return set(self.topleveldefs.union(self.closuredefs).values()) def __repr__(self): - return "Identifiers(declared=%r, locally_declared=%r, "\ - "undeclared=%r, topleveldefs=%r, closuredefs=%r, "\ - "argumentdeclared=%r)" %\ - ( - list(self.declared), - list(self.locally_declared), - list(self.undeclared), - [c.name for c in self.topleveldefs.values()], - [c.name for c in self.closuredefs.values()], - self.argument_declared) + return ( + "Identifiers(declared=%r, locally_declared=%r, " + "undeclared=%r, topleveldefs=%r, closuredefs=%r, " + "argumentdeclared=%r)" + % ( + list(self.declared), + list(self.locally_declared), + list(self.undeclared), + [c.name for c in self.topleveldefs.values()], + [c.name for c in self.closuredefs.values()], + self.argument_declared, + ) + ) def check_declared(self, node): """update the state of this Identifiers with the undeclared and declared identifiers of the given node.""" for ident in node.undeclared_identifiers(): - if ident != 'context' and\ - ident not in self.declared.union(self.locally_declared): + if ident != "context" and ident not in self.declared.union( + self.locally_declared + ): self.undeclared.add(ident) for ident in node.declared_identifiers(): self.locally_declared.add(ident) @@ -1081,7 +1148,8 @@ class _Identifiers(object): if not node.ismodule: self.check_declared(node) self.locally_assigned = self.locally_assigned.union( - node.declared_identifiers()) + node.declared_identifiers() + ) def visitNamespaceTag(self, node): # only traverse into the sub-elements of a @@ -1094,13 +1162,16 @@ class _Identifiers(object): def _check_name_exists(self, collection, node): existing = collection.get(node.funcname) collection[node.funcname] = node - if existing is not None and \ - existing is not node and \ - (node.is_block or existing.is_block): + if ( + existing is not None + and existing is not node + and (node.is_block or existing.is_block) + ): raise exceptions.CompileException( - "%%def or %%block named '%s' already " - "exists in this template." % - node.funcname, **node.exception_kwargs) + "%%def or %%block named '%s' already " + "exists in this template." % node.funcname, + **node.exception_kwargs + ) def visitDefTag(self, node): if node.is_root() and not node.is_anonymous: @@ -1109,8 +1180,9 @@ class _Identifiers(object): self._check_name_exists(self.closuredefs, node) for ident in node.undeclared_identifiers(): - if ident != 'context' and \ - ident not in self.declared.union(self.locally_declared): + if ident != "context" and ident not in self.declared.union( + self.locally_declared + ): self.undeclared.add(ident) # visit defs only one level deep @@ -1126,17 +1198,23 @@ class _Identifiers(object): if isinstance(self.node, parsetree.DefTag): raise exceptions.CompileException( - "Named block '%s' not allowed inside of def '%s'" - % (node.name, self.node.name), **node.exception_kwargs) - elif isinstance(self.node, - (parsetree.CallTag, parsetree.CallNamespaceTag)): + "Named block '%s' not allowed inside of def '%s'" + % (node.name, self.node.name), + **node.exception_kwargs + ) + elif isinstance( + self.node, (parsetree.CallTag, parsetree.CallNamespaceTag) + ): raise exceptions.CompileException( - "Named block '%s' not allowed inside of <%%call> tag" - % (node.name, ), **node.exception_kwargs) + "Named block '%s' not allowed inside of <%%call> tag" + % (node.name,), + **node.exception_kwargs + ) for ident in node.undeclared_identifiers(): - if ident != 'context' and \ - ident not in self.declared.union(self.locally_declared): + if ident != "context" and ident not in self.declared.union( + self.locally_declared + ): self.undeclared.add(ident) if not node.is_anonymous: @@ -1151,8 +1229,9 @@ class _Identifiers(object): def visitTextTag(self, node): for ident in node.undeclared_identifiers(): - if ident != 'context' and \ - ident not in self.declared.union(self.locally_declared): + if ident != "context" and ident not in self.declared.union( + self.locally_declared + ): self.undeclared.add(ident) def visitIncludeTag(self, node): @@ -1169,9 +1248,9 @@ class _Identifiers(object): def visitCallTag(self, node): if node is self.node: for ident in node.undeclared_identifiers(): - if ident != 'context' and \ - ident not in self.declared.union( - self.locally_declared): + if ident != "context" and ident not in self.declared.union( + self.locally_declared + ): self.undeclared.add(ident) for ident in node.declared_identifiers(): self.argument_declared.add(ident) @@ -1179,17 +1258,18 @@ class _Identifiers(object): n.accept_visitor(self) else: for ident in node.undeclared_identifiers(): - if ident != 'context' and \ - ident not in self.declared.union( - self.locally_declared): + if ident != "context" and ident not in self.declared.union( + self.locally_declared + ): self.undeclared.add(ident) _FOR_LOOP = re.compile( - r'^for\s+((?:\(?)\s*[A-Za-z_][A-Za-z_0-9]*' - r'(?:\s*,\s*(?:[A-Za-z_][A-Za-z0-9_]*),??)*\s*(?:\)?))\s+in\s+(.*):' + r"^for\s+((?:\(?)\s*[A-Za-z_][A-Za-z_0-9]*" + r"(?:\s*,\s*(?:[A-Za-z_][A-Za-z0-9_]*),??)*\s*(?:\)?))\s+in\s+(.*):" ) + def mangle_mako_loop(node, printer): """converts a for loop into a context manager wrapped around a for loop when access to the `loop` variable has been detected in the for loop body @@ -1201,11 +1281,11 @@ def mangle_mako_loop(node, printer): match = _FOR_LOOP.match(node.text) if match: printer.writelines( - 'loop = __M_loop._enter(%s)' % match.group(2), - 'try:' - #'with __M_loop(%s) as loop:' % match.group(2) + "loop = __M_loop._enter(%s)" % match.group(2), + "try:" + # 'with __M_loop(%s) as loop:' % match.group(2) ) - text = 'for %s in loop:' % match.group(1) + text = "for %s in loop:" % match.group(1) else: raise SyntaxError("Couldn't apply loop context: %s" % node.text) else: @@ -1214,6 +1294,7 @@ def mangle_mako_loop(node, printer): class LoopVariable(object): + """A node visitor which looks for the name 'loop' within undeclared identifiers.""" @@ -1221,7 +1302,7 @@ class LoopVariable(object): self.detected = False def _loop_reference_detected(self, node): - if 'loop' in node.undeclared_identifiers(): + if "loop" in node.undeclared_identifiers(): self.detected = True else: for n in node.get_children(): diff --git a/lib/mako/compat.py b/lib/mako/compat.py index fe277bbf..06bb8d99 100644 --- a/lib/mako/compat.py +++ b/lib/mako/compat.py @@ -1,20 +1,61 @@ +# mako/compat.py +# Copyright 2006-2020 the Mako authors and contributors +# +# This module is part of Mako and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import collections +import inspect import sys -import time py3k = sys.version_info >= (3, 0) -py33 = sys.version_info >= (3, 3) py2k = sys.version_info < (3,) -py26 = sys.version_info >= (2, 6) -jython = sys.platform.startswith('java') -win32 = sys.platform.startswith('win') -pypy = hasattr(sys, 'pypy_version_info') +py27 = sys.version_info >= (2, 7) +jython = sys.platform.startswith("java") +win32 = sys.platform.startswith("win") +pypy = hasattr(sys, "pypy_version_info") + +ArgSpec = collections.namedtuple( + "ArgSpec", ["args", "varargs", "keywords", "defaults"] +) + + +def inspect_getargspec(func): + """getargspec based on fully vendored getfullargspec from Python 3.3.""" + + if inspect.ismethod(func): + func = func.__func__ + if not inspect.isfunction(func): + raise TypeError("{!r} is not a Python function".format(func)) + + co = func.__code__ + if not inspect.iscode(co): + raise TypeError("{!r} is not a code object".format(co)) + + nargs = co.co_argcount + names = co.co_varnames + nkwargs = co.co_kwonlyargcount if py3k else 0 + args = list(names[:nargs]) + + nargs += nkwargs + varargs = None + if co.co_flags & inspect.CO_VARARGS: + varargs = co.co_varnames[nargs] + nargs = nargs + 1 + varkw = None + if co.co_flags & inspect.CO_VARKEYWORDS: + varkw = co.co_varnames[nargs] + + return ArgSpec(args, varargs, varkw, func.__defaults__) + if py3k: from io import StringIO import builtins as compat_builtins from urllib.parse import quote_plus, unquote_plus from html.entities import codepoint2name, name2codepoint - string_types = str, + + string_types = (str,) binary_type = bytes text_type = str @@ -29,8 +70,10 @@ if py3k: def octal(lit): return eval("0o" + lit) + else: - import __builtin__ as compat_builtins + import __builtin__ as compat_builtins # noqa + try: from cStringIO import StringIO except: @@ -38,14 +81,15 @@ else: byte_buffer = StringIO - from urllib import quote_plus, unquote_plus - from htmlentitydefs import codepoint2name, name2codepoint - string_types = basestring, + from urllib import quote_plus, unquote_plus # noqa + from htmlentitydefs import codepoint2name, name2codepoint # noqa + + string_types = (basestring,) # noqa binary_type = str - text_type = unicode + text_type = unicode # noqa def u(s): - return unicode(s, "utf-8") + return unicode(s, "utf-8") # noqa def b(s): return s @@ -54,105 +98,62 @@ else: return eval("0" + lit) -if py33: - from importlib import machinery - def load_module(module_id, path): - return machinery.SourceFileLoader(module_id, path).load_module() +if py3k: + from importlib import machinery, util + + if hasattr(util, 'module_from_spec'): + # Python 3.5+ + def load_module(module_id, path): + spec = util.spec_from_file_location(module_id, path) + module = util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + else: + def load_module(module_id, path): + module = machinery.SourceFileLoader(module_id, path).load_module() + del sys.modules[module_id] + return module + else: import imp + def load_module(module_id, path): - fp = open(path, 'rb') + fp = open(path, "rb") try: - return imp.load_source(module_id, path, fp) + module = imp.load_source(module_id, path, fp) + del sys.modules[module_id] + return module finally: fp.close() if py3k: + def reraise(tp, value, tb=None, cause=None): if cause is not None: value.__cause__ = cause if value.__traceback__ is not tb: raise value.with_traceback(tb) raise value + + else: - exec("def reraise(tp, value, tb=None, cause=None):\n" - " raise tp, value, tb\n") + exec( + "def reraise(tp, value, tb=None, cause=None):\n" + " raise tp, value, tb\n" + ) def exception_as(): return sys.exc_info()[1] -try: - import threading - if py3k: - import _thread as thread - else: - import thread -except ImportError: - import dummy_threading as threading - if py3k: - import _dummy_thread as thread - else: - import dummy_thread as thread -if win32 or jython: - time_func = time.clock -else: - time_func = time.time +all = all # noqa -try: - from functools import partial -except: - def partial(func, *args, **keywords): - def newfunc(*fargs, **fkeywords): - newkeywords = keywords.copy() - newkeywords.update(fkeywords) - return func(*(args + fargs), **newkeywords) - return newfunc - - -all = all -import json def exception_name(exc): return exc.__class__.__name__ -try: - from inspect import CO_VARKEYWORDS, CO_VARARGS - def inspect_func_args(fn): - if py3k: - co = fn.__code__ - else: - co = fn.func_code - - nargs = co.co_argcount - names = co.co_varnames - args = list(names[:nargs]) - - varargs = None - if co.co_flags & CO_VARARGS: - varargs = co.co_varnames[nargs] - nargs = nargs + 1 - varkw = None - if co.co_flags & CO_VARKEYWORDS: - varkw = co.co_varnames[nargs] - - if py3k: - return args, varargs, varkw, fn.__defaults__ - else: - return args, varargs, varkw, fn.func_defaults -except ImportError: - import inspect - def inspect_func_args(fn): - return inspect.getargspec(fn) - -if py3k: - def callable(fn): - return hasattr(fn, '__call__') -else: - callable = callable - ################################################ # cross-compatible metaclass implementation @@ -160,6 +161,8 @@ else: def with_metaclass(meta, base=object): """Create a base class with a metaclass.""" return meta("%sBase" % meta.__name__, (base,), {}) + + ################################################ @@ -168,7 +171,7 @@ def arg_stringname(func_arg): In Python3.4 a function's args are of _ast.arg type not _ast.name """ - if hasattr(func_arg, 'arg'): + if hasattr(func_arg, "arg"): return func_arg.arg else: return str(func_arg) diff --git a/lib/mako/exceptions.py b/lib/mako/exceptions.py index c531f211..ea7b20db 100644 --- a/lib/mako/exceptions.py +++ b/lib/mako/exceptions.py @@ -1,21 +1,26 @@ # mako/exceptions.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php """exception classes""" -import traceback import sys -from mako import util, compat +import traceback + +from mako import compat +from mako import util + class MakoException(Exception): pass + class RuntimeException(MakoException): pass + def _format_filepos(lineno, pos, filename): if filename is None: return " at line: %d char: %d" % (lineno, pos) @@ -25,41 +30,53 @@ def _format_filepos(lineno, pos, filename): class CompileException(MakoException): def __init__(self, message, source, lineno, pos, filename): - MakoException.__init__(self, - message + _format_filepos(lineno, pos, filename)) + MakoException.__init__( + self, message + _format_filepos(lineno, pos, filename) + ) self.lineno = lineno self.pos = pos self.filename = filename self.source = source + class SyntaxException(MakoException): def __init__(self, message, source, lineno, pos, filename): - MakoException.__init__(self, - message + _format_filepos(lineno, pos, filename)) + MakoException.__init__( + self, message + _format_filepos(lineno, pos, filename) + ) self.lineno = lineno self.pos = pos self.filename = filename self.source = source + class UnsupportedError(MakoException): + """raised when a retired feature is used.""" + class NameConflictError(MakoException): + """raised when a reserved word is used inappropriately""" + class TemplateLookupException(MakoException): pass + class TopLevelLookupException(TemplateLookupException): pass + class RichTraceback(object): + """Pull the current exception from the ``sys`` traceback and extracts Mako-specific template information. See the usage examples in :ref:`handling_exceptions`. """ + def __init__(self, error=None, traceback=None): self.source, self.lineno = "", 0 @@ -98,7 +115,7 @@ class RichTraceback(object): # str(Exception(u'\xe6')) work in Python < 2.6 self.message = self.error.args[0] if not isinstance(self.message, compat.text_type): - self.message = compat.text_type(self.message, 'ascii', 'replace') + self.message = compat.text_type(self.message, "ascii", "replace") def _get_reformatted_records(self, records): for rec in records: @@ -134,25 +151,28 @@ class RichTraceback(object): source, and code line from that line number of the template.""" import mako.template + mods = {} rawrecords = traceback.extract_tb(trcback) new_trcback = [] for filename, lineno, function, line in rawrecords: if not line: - line = '' + line = "" try: - (line_map, template_lines) = mods[filename] + (line_map, template_lines, template_filename) = mods[filename] except KeyError: try: info = mako.template._get_module_info(filename) module_source = info.code template_source = info.source - template_filename = info.template_filename or filename + template_filename = ( + info.template_filename or info.template_uri or filename + ) except KeyError: # A normal .py file (not a Template) if not compat.py3k: try: - fp = open(filename, 'rb') + fp = open(filename, "rb") encoding = util.parse_encoding(fp) fp.close() except IOError: @@ -160,21 +180,33 @@ class RichTraceback(object): if encoding: line = line.decode(encoding) else: - line = line.decode('ascii', 'replace') - new_trcback.append((filename, lineno, function, line, - None, None, None, None)) + line = line.decode("ascii", "replace") + new_trcback.append( + ( + filename, + lineno, + function, + line, + None, + None, + None, + None, + ) + ) continue template_ln = 1 - source_map = mako.template.ModuleInfo.\ - get_module_source_metadata( - module_source, full_line_map=True) - line_map = source_map['full_line_map'] + mtm = mako.template.ModuleInfo + source_map = mtm.get_module_source_metadata( + module_source, full_line_map=True + ) + line_map = source_map["full_line_map"] - template_lines = [line for line in - template_source.split("\n")] - mods[filename] = (line_map, template_lines) + template_lines = [ + line_ for line_ in template_source.split("\n") + ] + mods[filename] = (line_map, template_lines, template_filename) template_ln = line_map[lineno - 1] @@ -182,9 +214,18 @@ class RichTraceback(object): template_line = template_lines[template_ln - 1] else: template_line = None - new_trcback.append((filename, lineno, function, - line, template_filename, template_ln, - template_line, template_source)) + new_trcback.append( + ( + filename, + lineno, + function, + line, + template_filename, + template_ln, + template_line, + template_source, + ) + ) if not self.source: for l in range(len(new_trcback) - 1, 0, -1): if new_trcback[l][5]: @@ -195,15 +236,17 @@ class RichTraceback(object): if new_trcback: try: # A normal .py file (not a Template) - fp = open(new_trcback[-1][0], 'rb') + fp = open(new_trcback[-1][0], "rb") encoding = util.parse_encoding(fp) + if compat.py3k and not encoding: + encoding = "utf-8" fp.seek(0) self.source = fp.read() fp.close() if encoding: self.source = self.source.decode(encoding) except IOError: - self.source = '' + self.source = "" self.lineno = new_trcback[-1][1] return new_trcback @@ -216,7 +259,9 @@ def text_error_template(lookup=None): """ import mako.template - return mako.template.Template(r""" + + return mako.template.Template( + r""" <%page args="error=None, traceback=None"/> <%! from mako.exceptions import RichTraceback @@ -230,28 +275,36 @@ Traceback (most recent call last): ${line | trim} % endfor ${tback.errorname}: ${tback.message} -""") +""" + ) def _install_pygments(): global syntax_highlight, pygments_html_formatter - from mako.ext.pygmentplugin import syntax_highlight,\ - pygments_html_formatter + from mako.ext.pygmentplugin import syntax_highlight # noqa + from mako.ext.pygmentplugin import pygments_html_formatter # noqa + def _install_fallback(): global syntax_highlight, pygments_html_formatter from mako.filters import html_escape + pygments_html_formatter = None - def syntax_highlight(filename='', language=None): + + def syntax_highlight(filename="", language=None): return html_escape + def _install_highlighting(): try: _install_pygments() except ImportError: _install_fallback() + + _install_highlighting() + def html_error_template(): """Provides a template that renders a stack trace in an HTML format, providing an excerpt of code as well as substituting source template @@ -266,7 +319,9 @@ def html_error_template(): """ import mako.template - return mako.template.Template(r""" + + return mako.template.Template( + r""" <%! from mako.exceptions import RichTraceback, syntax_highlight,\ pygments_html_formatter @@ -369,5 +424,7 @@ def html_error_template(): % endif -""", output_encoding=sys.getdefaultencoding(), - encoding_errors='htmlentityreplace') +""", + output_encoding=sys.getdefaultencoding(), + encoding_errors="htmlentityreplace", + ) diff --git a/lib/mako/ext/autohandler.py b/lib/mako/ext/autohandler.py index 8deaae19..8b1324ef 100644 --- a/lib/mako/ext/autohandler.py +++ b/lib/mako/ext/autohandler.py @@ -1,5 +1,5 @@ # ext/autohandler.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -8,26 +8,29 @@ requires that the TemplateLookup class is used with templates. -usage: +usage:: -<%! - from mako.ext.autohandler import autohandler -%> -<%inherit file="${autohandler(template, context)}"/> + <%! + from mako.ext.autohandler import autohandler + %> + <%inherit file="${autohandler(template, context)}"/> -or with custom autohandler filename: +or with custom autohandler filename:: -<%! - from mako.ext.autohandler import autohandler -%> -<%inherit file="${autohandler(template, context, name='somefilename')}"/> + <%! + from mako.ext.autohandler import autohandler + %> + <%inherit file="${autohandler(template, context, name='somefilename')}"/> """ -import posixpath, os, re +import os +import posixpath +import re -def autohandler(template, context, name='autohandler'): + +def autohandler(template, context, name="autohandler"): lookup = context.lookup _template_uri = template.module._template_uri if not lookup.filesystem_checks: @@ -36,30 +39,32 @@ def autohandler(template, context, name='autohandler'): except KeyError: pass - tokens = re.findall(r'([^/]+)', posixpath.dirname(_template_uri)) + [name] + tokens = re.findall(r"([^/]+)", posixpath.dirname(_template_uri)) + [name] while len(tokens): - path = '/' + '/'.join(tokens) + path = "/" + "/".join(tokens) if path != _template_uri and _file_exists(lookup, path): if not lookup.filesystem_checks: return lookup._uri_cache.setdefault( - (autohandler, _template_uri, name), path) + (autohandler, _template_uri, name), path + ) else: return path if len(tokens) == 1: break tokens[-2:] = [name] - + if not lookup.filesystem_checks: return lookup._uri_cache.setdefault( - (autohandler, _template_uri, name), None) + (autohandler, _template_uri, name), None + ) else: return None + def _file_exists(lookup, path): - psub = re.sub(r'^/', '',path) + psub = re.sub(r"^/", "", path) for d in lookup.directories: - if os.path.exists(d + '/' + psub): + if os.path.exists(d + "/" + psub): return True else: return False - diff --git a/lib/mako/ext/babelplugin.py b/lib/mako/ext/babelplugin.py index ead70811..76bbc5b0 100644 --- a/lib/mako/ext/babelplugin.py +++ b/lib/mako/ext/babelplugin.py @@ -1,11 +1,12 @@ # ext/babelplugin.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php """gettext message extraction via Babel: http://babel.edgewall.org/""" from babel.messages.extract import extract_python + from mako.ext.extract import MessageExtractor @@ -14,22 +15,30 @@ class BabelMakoExtractor(MessageExtractor): self.keywords = keywords self.options = options self.config = { - 'comment-tags': u' '.join(comment_tags), - 'encoding': options.get('input_encoding', - options.get('encoding', None)), - } + "comment-tags": u" ".join(comment_tags), + "encoding": options.get( + "input_encoding", options.get("encoding", None) + ), + } super(BabelMakoExtractor, self).__init__() def __call__(self, fileobj): return self.process_file(fileobj) def process_python(self, code, code_lineno, translator_strings): - comment_tags = self.config['comment-tags'] - for lineno, funcname, messages, python_translator_comments \ - in extract_python(code, - self.keywords, comment_tags, self.options): - yield (code_lineno + (lineno - 1), funcname, messages, - translator_strings + python_translator_comments) + comment_tags = self.config["comment-tags"] + for ( + lineno, + funcname, + messages, + python_translator_comments, + ) in extract_python(code, self.keywords, comment_tags, self.options): + yield ( + code_lineno + (lineno - 1), + funcname, + messages, + translator_strings + python_translator_comments, + ) def extract(fileobj, keywords, comment_tags, options): diff --git a/lib/mako/ext/beaker_cache.py b/lib/mako/ext/beaker_cache.py index 40ef7745..f65ce43a 100644 --- a/lib/mako/ext/beaker_cache.py +++ b/lib/mako/ext/beaker_cache.py @@ -1,7 +1,12 @@ +# ext/beaker_cache.py +# Copyright 2006-2020 the Mako authors and contributors +# +# This module is part of Mako and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + """Provide a :class:`.CacheImpl` for the Beaker caching system.""" from mako import exceptions - from mako.cache import CacheImpl try: @@ -15,6 +20,7 @@ _beaker_cache = None class BeakerCacheImpl(CacheImpl): + """A :class:`.CacheImpl` provided for the Beaker caching system. This plugin is used by default, based on the default @@ -26,36 +32,37 @@ class BeakerCacheImpl(CacheImpl): def __init__(self, cache): if not has_beaker: raise exceptions.RuntimeException( - "Can't initialize Beaker plugin; Beaker is not installed.") + "Can't initialize Beaker plugin; Beaker is not installed." + ) global _beaker_cache if _beaker_cache is None: - if 'manager' in cache.template.cache_args: - _beaker_cache = cache.template.cache_args['manager'] + if "manager" in cache.template.cache_args: + _beaker_cache = cache.template.cache_args["manager"] else: _beaker_cache = beaker_cache.CacheManager() super(BeakerCacheImpl, self).__init__(cache) def _get_cache(self, **kw): - expiretime = kw.pop('timeout', None) - if 'dir' in kw: - kw['data_dir'] = kw.pop('dir') + expiretime = kw.pop("timeout", None) + if "dir" in kw: + kw["data_dir"] = kw.pop("dir") elif self.cache.template.module_directory: - kw['data_dir'] = self.cache.template.module_directory + kw["data_dir"] = self.cache.template.module_directory - if 'manager' in kw: - kw.pop('manager') + if "manager" in kw: + kw.pop("manager") - if kw.get('type') == 'memcached': - kw['type'] = 'ext:memcached' + if kw.get("type") == "memcached": + kw["type"] = "ext:memcached" - if 'region' in kw: - region = kw.pop('region') + if "region" in kw: + region = kw.pop("region") cache = _beaker_cache.get_cache_region(self.cache.id, region, **kw) else: cache = _beaker_cache.get_cache(self.cache.id, **kw) - cache_args = {'starttime': self.cache.starttime} + cache_args = {"starttime": self.cache.starttime} if expiretime: - cache_args['expiretime'] = expiretime + cache_args["expiretime"] = expiretime return cache, cache_args def get_or_create(self, key, creation_function, **kw): diff --git a/lib/mako/ext/extract.py b/lib/mako/ext/extract.py index 5ce51757..ad2348a5 100644 --- a/lib/mako/ext/extract.py +++ b/lib/mako/ext/extract.py @@ -1,4 +1,11 @@ +# ext/extract.py +# Copyright 2006-2020 the Mako authors and contributors +# +# This module is part of Mako and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + import re + from mako import compat from mako import lexer from mako import parsetree @@ -7,22 +14,26 @@ from mako import parsetree class MessageExtractor(object): def process_file(self, fileobj): template_node = lexer.Lexer( - fileobj.read(), - input_encoding=self.config['encoding']).parse() + fileobj.read(), input_encoding=self.config["encoding"] + ).parse() for extracted in self.extract_nodes(template_node.get_children()): yield extracted def extract_nodes(self, nodes): translator_comments = [] in_translator_comments = False + input_encoding = self.config["encoding"] or "ascii" comment_tags = list( - filter(None, re.split(r'\s+', self.config['comment-tags']))) + filter(None, re.split(r"\s+", self.config["comment-tags"])) + ) for node in nodes: child_nodes = None - if in_translator_comments and \ - isinstance(node, parsetree.Text) and \ - not node.content.strip(): + if ( + in_translator_comments + and isinstance(node, parsetree.Text) + and not node.content.strip() + ): # Ignore whitespace within translator comments continue @@ -30,13 +41,15 @@ class MessageExtractor(object): value = node.text.strip() if in_translator_comments: translator_comments.extend( - self._split_comment(node.lineno, value)) + self._split_comment(node.lineno, value) + ) continue for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True translator_comments.extend( - self._split_comment(node.lineno, value)) + self._split_comment(node.lineno, value) + ) continue if isinstance(node, parsetree.DefTag): @@ -66,22 +79,31 @@ class MessageExtractor(object): else: continue - # Comments don't apply unless they immediately preceed the message - if translator_comments and \ - translator_comments[-1][0] < node.lineno - 1: + # Comments don't apply unless they immediately precede the message + if ( + translator_comments + and translator_comments[-1][0] < node.lineno - 1 + ): translator_comments = [] translator_strings = [ - comment[1] for comment in translator_comments] + comment[1] for comment in translator_comments + ] if isinstance(code, compat.text_type): - code = code.encode('ascii', 'backslashreplace') + code = code.encode(input_encoding, "backslashreplace") used_translator_comments = False - code = compat.byte_buffer(code) + # We add extra newline to work around a pybabel bug + # (see python-babel/babel#274, parse_encoding dies if the first + # input string of the input is non-ascii) + # Also, because we added it, we have to subtract one from + # node.lineno + code = compat.byte_buffer(compat.b("\n") + code) for message in self.process_python( - code, node.lineno, translator_strings): + code, node.lineno - 1, translator_strings + ): yield message used_translator_comments = True @@ -97,5 +119,7 @@ class MessageExtractor(object): def _split_comment(lineno, comment): """Return the multiline comment at lineno split into a list of comment line numbers and the accompanying comment line""" - return [(lineno + index, line) for index, line in - enumerate(comment.splitlines())] + return [ + (lineno + index, line) + for index, line in enumerate(comment.splitlines()) + ] diff --git a/lib/mako/ext/linguaplugin.py b/lib/mako/ext/linguaplugin.py index a809072c..c40fa748 100644 --- a/lib/mako/ext/linguaplugin.py +++ b/lib/mako/ext/linguaplugin.py @@ -1,38 +1,65 @@ +# ext/linguaplugin.py +# Copyright 2006-2020 the Mako authors and contributors +# +# This module is part of Mako and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + import io + from lingua.extractors import Extractor -from lingua.extractors import Message from lingua.extractors import get_extractor -from mako.ext.extract import MessageExtractor +from lingua.extractors import Message + from mako import compat +from mako.ext.extract import MessageExtractor class LinguaMakoExtractor(Extractor, MessageExtractor): - '''Mako templates''' - extensions = ['.mako'] - default_config = { - 'encoding': 'utf-8', - 'comment-tags': '', - } + + """Mako templates""" + + extensions = [".mako"] + default_config = {"encoding": "utf-8", "comment-tags": ""} def __call__(self, filename, options, fileobj=None): self.options = options self.filename = filename - self.python_extractor = get_extractor('x.py') + self.python_extractor = get_extractor("x.py") if fileobj is None: - fileobj = open(filename, 'rb') - return self.process_file(fileobj) + fileobj = open(filename, "rb") + must_close = True + else: + must_close = False + try: + for message in self.process_file(fileobj): + yield message + finally: + if must_close: + fileobj.close() def process_python(self, code, code_lineno, translator_strings): source = code.getvalue().strip() - if source.endswith(compat.b(':')): - source += compat.b(' pass') - code = io.BytesIO(source) + if source.endswith(compat.b(":")): + if source in ( + compat.b("try:"), + compat.b("else:"), + ) or source.startswith(compat.b("except")): + source = compat.b("") # Ignore try/except and else + elif source.startswith(compat.b("elif")): + source = source[2:] # Replace "elif" with "if" + source += compat.b("pass") + code = io.BytesIO(source) for msg in self.python_extractor( - self.filename, self.options, code, code_lineno): + self.filename, self.options, code, code_lineno - 1 + ): if translator_strings: - msg = Message(msg.msgctxt, msg.msgid, msg.msgid_plural, - msg.flags, - compat.u(' ').join( - translator_strings + [msg.comment]), - msg.tcomment, msg.location) + msg = Message( + msg.msgctxt, + msg.msgid, + msg.msgid_plural, + msg.flags, + compat.u(" ").join(translator_strings + [msg.comment]), + msg.tcomment, + msg.location, + ) yield msg diff --git a/lib/mako/ext/preprocessors.py b/lib/mako/ext/preprocessors.py index c24893bd..9cc06214 100644 --- a/lib/mako/ext/preprocessors.py +++ b/lib/mako/ext/preprocessors.py @@ -1,20 +1,20 @@ # ext/preprocessors.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -"""preprocessing functions, used with the 'preprocessor' +"""preprocessing functions, used with the 'preprocessor' argument on Template, TemplateLookup""" import re + def convert_comments(text): """preprocess old style comments. - + example: - + from mako.ext.preprocessors import convert_comments t = Template(..., preprocessor=convert_comments)""" - return re.sub(r'(?<=\n)\s*#[^#]', "##", text) - + return re.sub(r"(?<=\n)\s*#[^#]", "##", text) diff --git a/lib/mako/ext/pygmentplugin.py b/lib/mako/ext/pygmentplugin.py index 3adcfb8c..943a67a4 100644 --- a/lib/mako/ext/pygmentplugin.py +++ b/lib/mako/ext/pygmentplugin.py @@ -1,44 +1,73 @@ # ext/pygmentplugin.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -from pygments.lexers.web import \ - HtmlLexer, XmlLexer, JavascriptLexer, CssLexer -from pygments.lexers.agile import PythonLexer, Python3Lexer -from pygments.lexer import DelegatingLexer, RegexLexer, bygroups, \ - include, using -from pygments.token import \ - Text, Comment, Operator, Keyword, Name, String, Other -from pygments.formatters.html import HtmlFormatter from pygments import highlight +from pygments.formatters.html import HtmlFormatter +from pygments.lexer import bygroups +from pygments.lexer import DelegatingLexer +from pygments.lexer import include +from pygments.lexer import RegexLexer +from pygments.lexer import using +from pygments.lexers.agile import Python3Lexer +from pygments.lexers.agile import PythonLexer +from pygments.lexers.web import CssLexer +from pygments.lexers.web import HtmlLexer +from pygments.lexers.web import JavascriptLexer +from pygments.lexers.web import XmlLexer +from pygments.token import Comment +from pygments.token import Keyword +from pygments.token import Name +from pygments.token import Operator +from pygments.token import Other +from pygments.token import String +from pygments.token import Text + from mako import compat + class MakoLexer(RegexLexer): - name = 'Mako' - aliases = ['mako'] - filenames = ['*.mao'] + name = "Mako" + aliases = ["mako"] + filenames = ["*.mao"] tokens = { - 'root': [ - (r'(\s*)(\%)(\s*end(?:\w+))(\n|\Z)', - bygroups(Text, Comment.Preproc, Keyword, Other)), - (r'(\s*)(\%(?!%))([^\n]*)(\n|\Z)', - bygroups(Text, Comment.Preproc, using(PythonLexer), Other)), - (r'(\s*)(##[^\n]*)(\n|\Z)', - bygroups(Text, Comment.Preproc, Other)), - (r'''(?s)<%doc>.*?''', Comment.Preproc), - (r'(<%)([\w\.\:]+)', - bygroups(Comment.Preproc, Name.Builtin), 'tag'), - (r'()', - bygroups(Comment.Preproc, Name.Builtin, Comment.Preproc)), - (r'<%(?=([\w\.\:]+))', Comment.Preproc, 'ondeftags'), - (r'(<%(?:!?))(.*?)(%>)(?s)', - bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), - (r'(\$\{)(.*?)(\})', - bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc)), - (r'''(?sx) + "root": [ + ( + r"(\s*)(\%)(\s*end(?:\w+))(\n|\Z)", + bygroups(Text, Comment.Preproc, Keyword, Other), + ), + ( + r"(\s*)(\%(?!%))([^\n]*)(\n|\Z)", + bygroups(Text, Comment.Preproc, using(PythonLexer), Other), + ), + ( + r"(\s*)(##[^\n]*)(\n|\Z)", + bygroups(Text, Comment.Preproc, Other), + ), + (r"""(?s)<%doc>.*?""", Comment.Preproc), + ( + r"(<%)([\w\.\:]+)", + bygroups(Comment.Preproc, Name.Builtin), + "tag", + ), + ( + r"()", + bygroups(Comment.Preproc, Name.Builtin, Comment.Preproc), + ), + (r"<%(?=([\w\.\:]+))", Comment.Preproc, "ondeftags"), + ( + r"(?s)(<%(?:!?))(.*?)(%>)", + bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc), + ), + ( + r"(\$\{)(.*?)(\})", + bygroups(Comment.Preproc, using(PythonLexer), Comment.Preproc), + ), + ( + r"""(?sx) (.+?) # anything, followed by: (?: (?<=\n)(?=%(?!%)|\#\#) | # an eval or comment line @@ -51,72 +80,78 @@ class MakoLexer(RegexLexer): (\\\n) | # an escaped newline \Z # end of string ) - ''', bygroups(Other, Operator)), - (r'\s+', Text), + """, + bygroups(Other, Operator), + ), + (r"\s+", Text), ], - 'ondeftags': [ - (r'<%', Comment.Preproc), - (r'(?<=<%)(include|inherit|namespace|page)', Name.Builtin), - include('tag'), + "ondeftags": [ + (r"<%", Comment.Preproc), + (r"(?<=<%)(include|inherit|namespace|page)", Name.Builtin), + include("tag"), ], - 'tag': [ - (r'((?:\w+)\s*=)\s*(".*?")', - bygroups(Name.Attribute, String)), - (r'/?\s*>', Comment.Preproc, '#pop'), - (r'\s+', Text), + "tag": [ + (r'((?:\w+)\s*=)\s*(".*?")', bygroups(Name.Attribute, String)), + (r"/?\s*>", Comment.Preproc, "#pop"), + (r"\s+", Text), ], - 'attr': [ - ('".*?"', String, '#pop'), - ("'.*?'", String, '#pop'), - (r'[^\s>]+', String, '#pop'), + "attr": [ + ('".*?"', String, "#pop"), + ("'.*?'", String, "#pop"), + (r"[^\s>]+", String, "#pop"), ], } class MakoHtmlLexer(DelegatingLexer): - name = 'HTML+Mako' - aliases = ['html+mako'] + name = "HTML+Mako" + aliases = ["html+mako"] def __init__(self, **options): - super(MakoHtmlLexer, self).__init__(HtmlLexer, MakoLexer, - **options) + super(MakoHtmlLexer, self).__init__(HtmlLexer, MakoLexer, **options) + class MakoXmlLexer(DelegatingLexer): - name = 'XML+Mako' - aliases = ['xml+mako'] + name = "XML+Mako" + aliases = ["xml+mako"] def __init__(self, **options): - super(MakoXmlLexer, self).__init__(XmlLexer, MakoLexer, - **options) + super(MakoXmlLexer, self).__init__(XmlLexer, MakoLexer, **options) + class MakoJavascriptLexer(DelegatingLexer): - name = 'JavaScript+Mako' - aliases = ['js+mako', 'javascript+mako'] + name = "JavaScript+Mako" + aliases = ["js+mako", "javascript+mako"] def __init__(self, **options): - super(MakoJavascriptLexer, self).__init__(JavascriptLexer, - MakoLexer, **options) + super(MakoJavascriptLexer, self).__init__( + JavascriptLexer, MakoLexer, **options + ) + class MakoCssLexer(DelegatingLexer): - name = 'CSS+Mako' - aliases = ['css+mako'] + name = "CSS+Mako" + aliases = ["css+mako"] def __init__(self, **options): - super(MakoCssLexer, self).__init__(CssLexer, MakoLexer, - **options) + super(MakoCssLexer, self).__init__(CssLexer, MakoLexer, **options) -pygments_html_formatter = HtmlFormatter(cssclass='syntax-highlighted', - linenos=True) -def syntax_highlight(filename='', language=None): +pygments_html_formatter = HtmlFormatter( + cssclass="syntax-highlighted", linenos=True +) + + +def syntax_highlight(filename="", language=None): mako_lexer = MakoLexer() if compat.py3k: python_lexer = Python3Lexer() else: python_lexer = PythonLexer() - if filename.startswith('memory:') or language == 'mako': - return lambda string: highlight(string, mako_lexer, - pygments_html_formatter) - return lambda string: highlight(string, python_lexer, - pygments_html_formatter) - + if filename.startswith("memory:") or language == "mako": + return lambda string: highlight( + string, mako_lexer, pygments_html_formatter + ) + return lambda string: highlight( + string, python_lexer, pygments_html_formatter + ) diff --git a/lib/mako/ext/turbogears.py b/lib/mako/ext/turbogears.py index d3976d97..722a6b4b 100644 --- a/lib/mako/ext/turbogears.py +++ b/lib/mako/ext/turbogears.py @@ -1,18 +1,19 @@ # ext/turbogears.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import inspect from mako import compat from mako.lookup import TemplateLookup from mako.template import Template + class TGPlugin(object): + """TurboGears compatible Template Plugin.""" - def __init__(self, extra_vars_func=None, options=None, extension='mak'): + def __init__(self, extra_vars_func=None, options=None, extension="mak"): self.extra_vars_func = extra_vars_func self.extension = extension if not options: @@ -21,16 +22,16 @@ class TGPlugin(object): # Pull the options out and initialize the lookup lookup_options = {} for k, v in options.items(): - if k.startswith('mako.'): + if k.startswith("mako."): lookup_options[k[5:]] = v - elif k in ['directories', 'filesystem_checks', 'module_directory']: + elif k in ["directories", "filesystem_checks", "module_directory"]: lookup_options[k] = v self.lookup = TemplateLookup(**lookup_options) self.tmpl_options = {} # transfer lookup args to template args, based on those available # in getargspec - for kw in inspect.getargspec(Template.__init__)[0]: + for kw in compat.inspect_getargspec(Template.__init__)[0]: if kw in lookup_options: self.tmpl_options[kw] = lookup_options[kw] @@ -39,14 +40,17 @@ class TGPlugin(object): if template_string is not None: return Template(template_string, **self.tmpl_options) # Translate TG dot notation to normal / template path - if '/' not in templatename: - templatename = '/' + templatename.replace('.', '/') + '.' +\ - self.extension + if "/" not in templatename: + templatename = ( + "/" + templatename.replace(".", "/") + "." + self.extension + ) # Lookup template return self.lookup.get_template(templatename) - def render(self, info, format="html", fragment=False, template=None): + def render( + self, info, format="html", fragment=False, template=None # noqa + ): if isinstance(template, compat.string_types): template = self.load_template(template) @@ -55,4 +59,3 @@ class TGPlugin(object): info.update(self.extra_vars_func()) return template.render(**info) - diff --git a/lib/mako/filters.py b/lib/mako/filters.py index d79ce238..0ae33ff4 100644 --- a/lib/mako/filters.py +++ b/lib/mako/filters.py @@ -1,29 +1,31 @@ # mako/filters.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import re import codecs - -from mako.compat import quote_plus, unquote_plus, codepoint2name, \ - name2codepoint +import re from mako import compat +from mako.compat import codepoint2name +from mako.compat import name2codepoint +from mako.compat import quote_plus +from mako.compat import unquote_plus xml_escapes = { - '&': '&', - '>': '>', - '<': '<', - '"': '"', # also " in html-only - "'": ''' # also ' in html-only + "&": "&", + ">": ">", + "<": "<", + '"': """, # also " in html-only + "'": "'", # also ' in html-only } # XXX: " is valid in HTML and XML # ' is not valid HTML, but is valid XML + def legacy_html_escape(s): """legacy HTML escape for non-unicode mode.""" s = s.replace("&", "&") @@ -36,28 +38,34 @@ def legacy_html_escape(s): try: import markupsafe + html_escape = markupsafe.escape except ImportError: html_escape = legacy_html_escape + def xml_escape(string): return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string) + def url_escape(string): # convert into a list of octets string = string.encode("utf8") return quote_plus(string) + def legacy_url_escape(string): # convert into a list of octets return quote_plus(string) + def url_unescape(string): text = unquote_plus(string) if not is_ascii_str(text): text = text.decode("utf8") return text + def trim(string): return string.strip() @@ -71,21 +79,31 @@ class Decode(object): return decode(str(x)) else: return compat.text_type(x, encoding=key) + return decode + + decode = Decode() -_ASCII_re = re.compile(r'\A[\x00-\x7f]*\Z') +_ASCII_re = re.compile(r"\A[\x00-\x7f]*\Z") + def is_ascii_str(text): return isinstance(text, str) and _ASCII_re.match(text) + ################################################################ + class XMLEntityEscaper(object): def __init__(self, codepoint2name, name2codepoint): - self.codepoint2entity = dict([(c, compat.text_type('&%s;' % n)) - for c, n in codepoint2name.items()]) + self.codepoint2entity = dict( + [ + (c, compat.text_type("&%s;" % n)) + for c, n in codepoint2name.items() + ] + ) self.name2codepoint = name2codepoint def escape_entities(self, text): @@ -100,8 +118,7 @@ class XMLEntityEscaper(object): try: return self.codepoint2entity[codepoint] except (KeyError, IndexError): - return '&#x%X;' % codepoint - + return "&#x%X;" % codepoint __escapable = re.compile(r'["&<>]|[^\x00-\x7f]') @@ -114,19 +131,22 @@ class XMLEntityEscaper(object): The return value is guaranteed to be ASCII. """ - return self.__escapable.sub(self.__escape, compat.text_type(text) - ).encode('ascii') + return self.__escapable.sub( + self.__escape, compat.text_type(text) + ).encode("ascii") # XXX: This regexp will not match all valid XML entity names__. # (It punts on details involving involving CombiningChars and Extenders.) # # .. __: http://www.w3.org/TR/2000/REC-xml-20001006#NT-EntityRef - __characterrefs = re.compile(r'''& (?: + __characterrefs = re.compile( + r"""& (?: \#(\d+) | \#x([\da-f]+) | ( (?!\d) [:\w] [-.:\w]+ ) - ) ;''', - re.X | re.UNICODE) + ) ;""", + re.X | re.UNICODE, + ) def __unescape(self, m): dval, hval, name = m.groups() @@ -135,7 +155,7 @@ class XMLEntityEscaper(object): elif hval: codepoint = int(hval, 16) else: - codepoint = self.name2codepoint.get(name, 0xfffd) + codepoint = self.name2codepoint.get(name, 0xFFFD) # U+FFFD = "REPLACEMENT CHARACTER" if codepoint < 128: return chr(codepoint) @@ -159,43 +179,41 @@ html_entities_unescape = _html_entities_escaper.unescape def htmlentityreplace_errors(ex): """An encoding error handler. - This python `codecs`_ error handler replaces unencodable + This python codecs error handler replaces unencodable characters with HTML entities, or, if no HTML entity exists for - the character, XML character references. + the character, XML character references:: - >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace') - 'The cost was €12.' + >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace') + 'The cost was €12.' """ if isinstance(ex, UnicodeEncodeError): # Handle encoding errors - bad_text = ex.object[ex.start:ex.end] + bad_text = ex.object[ex.start : ex.end] text = _html_entities_escaper.escape(bad_text) return (compat.text_type(text), ex.end) raise ex -codecs.register_error('htmlentityreplace', htmlentityreplace_errors) + +codecs.register_error("htmlentityreplace", htmlentityreplace_errors) # TODO: options to make this dynamic per-compilation will be added in a later # release DEFAULT_ESCAPES = { - 'x': 'filters.xml_escape', - 'h': 'filters.html_escape', - 'u': 'filters.url_escape', - 'trim': 'filters.trim', - 'entity': 'filters.html_entities_escape', - 'unicode': 'unicode', - 'decode': 'decode', - 'str': 'str', - 'n': 'n' + "x": "filters.xml_escape", + "h": "filters.html_escape", + "u": "filters.url_escape", + "trim": "filters.trim", + "entity": "filters.html_entities_escape", + "unicode": "unicode", + "decode": "decode", + "str": "str", + "n": "n", } if compat.py3k: - DEFAULT_ESCAPES.update({ - 'unicode': 'str' - }) + DEFAULT_ESCAPES.update({"unicode": "str"}) NON_UNICODE_ESCAPES = DEFAULT_ESCAPES.copy() -NON_UNICODE_ESCAPES['h'] = 'filters.legacy_html_escape' -NON_UNICODE_ESCAPES['u'] = 'filters.legacy_url_escape' - +NON_UNICODE_ESCAPES["h"] = "filters.legacy_html_escape" +NON_UNICODE_ESCAPES["u"] = "filters.legacy_url_escape" diff --git a/lib/mako/lexer.py b/lib/mako/lexer.py index 1dda3982..bbf0c3a5 100644 --- a/lib/mako/lexer.py +++ b/lib/mako/lexer.py @@ -1,22 +1,31 @@ # mako/lexer.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php """provides the Lexer class for parsing template strings into parse trees.""" -import re import codecs -from mako import parsetree, exceptions, compat +import re + +from mako import compat +from mako import exceptions +from mako import parsetree from mako.pygen import adjust_whitespace _regexp_cache = {} + class Lexer(object): - def __init__(self, text, filename=None, - disable_unicode=False, - input_encoding=None, preprocessor=None): + def __init__( + self, + text, + filename=None, + disable_unicode=False, + input_encoding=None, + preprocessor=None, + ): self.text = text self.filename = filename self.template = parsetree.TemplateNode(self.filename) @@ -32,22 +41,24 @@ class Lexer(object): if compat.py3k and disable_unicode: raise exceptions.UnsupportedError( - "Mako for Python 3 does not " - "support disabling Unicode") + "Mako for Python 3 does not " "support disabling Unicode" + ) if preprocessor is None: self.preprocessor = [] - elif not hasattr(preprocessor, '__iter__'): + elif not hasattr(preprocessor, "__iter__"): self.preprocessor = [preprocessor] else: self.preprocessor = preprocessor @property def exception_kwargs(self): - return {'source': self.text, - 'lineno': self.matched_lineno, - 'pos': self.matched_charpos, - 'filename': self.filename} + return { + "source": self.text, + "lineno": self.matched_lineno, + "pos": self.matched_charpos, + "filename": self.filename, + } def match(self, regexp, flags=None): """compile the given regexp, cache the reg, and call match_reg().""" @@ -81,54 +92,63 @@ class Lexer(object): else: self.match_position = end self.matched_lineno = self.lineno - lines = re.findall(r"\n", self.text[mp:self.match_position]) + lines = re.findall(r"\n", self.text[mp : self.match_position]) cp = mp - 1 - while (cp >= 0 and cp < self.textlength and self.text[cp] != '\n'): + while cp >= 0 and cp < self.textlength and self.text[cp] != "\n": cp -= 1 self.matched_charpos = mp - cp self.lineno += len(lines) - #print "MATCHED:", match.group(0), "LINE START:", + # print "MATCHED:", match.group(0), "LINE START:", # self.matched_lineno, "LINE END:", self.lineno - #print "MATCH:", regexp, "\n", self.text[mp : mp + 15], \ + # print "MATCH:", regexp, "\n", self.text[mp : mp + 15], \ # (match and "TRUE" or "FALSE") return match - def parse_until_text(self, *text): + def parse_until_text(self, watch_nesting, *text): startpos = self.match_position - text_re = r'|'.join(text) + text_re = r"|".join(text) brace_level = 0 + paren_level = 0 + bracket_level = 0 while True: - match = self.match(r'#.*\n') + match = self.match(r"#.*\n") if match: continue - match = self.match(r'(\"\"\"|\'\'\'|\"|\')((? 0 or paren_level > 0 or bracket_level > 0) + ): + return ( + self.text[ + startpos : self.match_position - len(match.group(1)) + ], + match.group(1), + ) + elif not match: + match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) if match: - if match.group(1) == '}' and brace_level > 0: - brace_level -= 1 - continue - return \ - self.text[startpos: - self.match_position - len(match.group(1))],\ - match.group(1) - match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) - if match: - brace_level += match.group(1).count('{') - brace_level -= match.group(1).count('}') + brace_level += match.group(1).count("{") + brace_level -= match.group(1).count("}") + paren_level += match.group(1).count("(") + paren_level -= match.group(1).count(")") + bracket_level += match.group(1).count("[") + bracket_level -= match.group(1).count("]") continue raise exceptions.SyntaxException( - "Expected: %s" % - ','.join(text), - **self.exception_kwargs) + "Expected: %s" % ",".join(text), **self.exception_kwargs + ) def append_node(self, nodecls, *args, **kwargs): - kwargs.setdefault('source', self.text) - kwargs.setdefault('lineno', self.matched_lineno) - kwargs.setdefault('pos', self.matched_charpos) - kwargs['filename'] = self.filename + kwargs.setdefault("source", self.text) + kwargs.setdefault("lineno", self.matched_lineno) + kwargs.setdefault("pos", self.matched_charpos) + kwargs["filename"] = self.filename node = nodecls(*args, **kwargs) if len(self.tag): self.tag[-1].nodes.append(node) @@ -141,8 +161,10 @@ class Lexer(object): if self.control_line: control_frame = self.control_line[-1] control_frame.nodes.append(node) - if not (isinstance(node, parsetree.ControlLine) and - control_frame.is_ternary(node.keyword)): + if not ( + isinstance(node, parsetree.ControlLine) + and control_frame.is_ternary(node.keyword) + ): if self.ternary_stack and self.ternary_stack[-1]: self.ternary_stack[-1][-1].nodes.append(node) if isinstance(node, parsetree.Tag): @@ -156,17 +178,20 @@ class Lexer(object): elif node.is_primary: self.control_line.append(node) self.ternary_stack.append([]) - elif self.control_line and \ - self.control_line[-1].is_ternary(node.keyword): + elif self.control_line and self.control_line[-1].is_ternary( + node.keyword + ): self.ternary_stack[-1].append(node) - elif self.control_line and \ - not self.control_line[-1].is_ternary(node.keyword): + elif self.control_line and not self.control_line[-1].is_ternary( + node.keyword + ): raise exceptions.SyntaxException( - "Keyword '%s' not a legal ternary for keyword '%s'" % - (node.keyword, self.control_line[-1].keyword), - **self.exception_kwargs) + "Keyword '%s' not a legal ternary for keyword '%s'" + % (node.keyword, self.control_line[-1].keyword), + **self.exception_kwargs + ) - _coding_re = re.compile(r'#.*coding[:=]\s*([-\w.]+).*\r?\n') + _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n") def decode_raw_stream(self, text, decode_raw, known_encoding, filename): """given string/unicode or bytes/string, determine encoding @@ -176,43 +201,48 @@ class Lexer(object): """ if isinstance(text, compat.text_type): m = self._coding_re.match(text) - encoding = m and m.group(1) or known_encoding or 'ascii' + encoding = m and m.group(1) or known_encoding or "utf-8" return encoding, text if text.startswith(codecs.BOM_UTF8): - text = text[len(codecs.BOM_UTF8):] - parsed_encoding = 'utf-8' - m = self._coding_re.match(text.decode('utf-8', 'ignore')) - if m is not None and m.group(1) != 'utf-8': + text = text[len(codecs.BOM_UTF8) :] + parsed_encoding = "utf-8" + m = self._coding_re.match(text.decode("utf-8", "ignore")) + if m is not None and m.group(1) != "utf-8": raise exceptions.CompileException( - "Found utf-8 BOM in file, with conflicting " - "magic encoding comment of '%s'" % m.group(1), - text.decode('utf-8', 'ignore'), - 0, 0, filename) + "Found utf-8 BOM in file, with conflicting " + "magic encoding comment of '%s'" % m.group(1), + text.decode("utf-8", "ignore"), + 0, + 0, + filename, + ) else: - m = self._coding_re.match(text.decode('utf-8', 'ignore')) + m = self._coding_re.match(text.decode("utf-8", "ignore")) if m: parsed_encoding = m.group(1) else: - parsed_encoding = known_encoding or 'ascii' + parsed_encoding = known_encoding or "utf-8" if decode_raw: try: text = text.decode(parsed_encoding) except UnicodeDecodeError: raise exceptions.CompileException( - "Unicode decode operation of encoding '%s' failed" % - parsed_encoding, - text.decode('utf-8', 'ignore'), - 0, 0, filename) + "Unicode decode operation of encoding '%s' failed" + % parsed_encoding, + text.decode("utf-8", "ignore"), + 0, + 0, + filename, + ) return parsed_encoding, text def parse(self): - self.encoding, self.text = self.decode_raw_stream(self.text, - not self.disable_unicode, - self.encoding, - self.filename,) + self.encoding, self.text = self.decode_raw_stream( + self.text, not self.disable_unicode, self.encoding, self.filename + ) for preproc in self.preprocessor: self.text = preproc(self.text) @@ -223,7 +253,7 @@ class Lexer(object): self.textlength = len(self.text) - while (True): + while True: if self.match_position > self.textlength: break @@ -249,20 +279,24 @@ class Lexer(object): raise exceptions.CompileException("assertion failed") if len(self.tag): - raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % - self.tag[-1].keyword, - **self.exception_kwargs) + raise exceptions.SyntaxException( + "Unclosed tag: <%%%s>" % self.tag[-1].keyword, + **self.exception_kwargs + ) if len(self.control_line): raise exceptions.SyntaxException( - "Unterminated control keyword: '%s'" % - self.control_line[-1].keyword, - self.text, - self.control_line[-1].lineno, - self.control_line[-1].pos, self.filename) + "Unterminated control keyword: '%s'" + % self.control_line[-1].keyword, + self.text, + self.control_line[-1].lineno, + self.control_line[-1].pos, + self.filename, + ) return self.template def match_tag_start(self): - match = self.match(r''' + match = self.match( + r""" \<% # opening tag ([\w\.\:]+) # keyword @@ -274,9 +308,9 @@ class Lexer(object): (/)?> # closing - ''', - - re.I | re.S | re.X) + """, + re.I | re.S | re.X, + ) if match: keyword, attr, isend = match.groups() @@ -284,22 +318,23 @@ class Lexer(object): attributes = {} if attr: for att in re.findall( - r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr): + r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr + ): key, val1, val2 = att text = val1 or val2 - text = text.replace('\r\n', '\n') + text = text.replace("\r\n", "\n") attributes[key] = text self.append_node(parsetree.Tag, keyword, attributes) if isend: self.tag.pop() else: - if keyword == 'text': - match = self.match(r'(.*?)(?=\)', re.S) + if keyword == "text": + match = self.match(r"(.*?)(?=\)", re.S) if not match: raise exceptions.SyntaxException( - "Unclosed tag: <%%%s>" % - self.tag[-1].keyword, - **self.exception_kwargs) + "Unclosed tag: <%%%s>" % self.tag[-1].keyword, + **self.exception_kwargs + ) self.append_node(parsetree.Text, match.group(1)) return self.match_tag_end() return True @@ -307,25 +342,27 @@ class Lexer(object): return False def match_tag_end(self): - match = self.match(r'\') + match = self.match(r"\") if match: if not len(self.tag): raise exceptions.SyntaxException( - "Closing tag without opening tag: " % - match.group(1), - **self.exception_kwargs) + "Closing tag without opening tag: " + % match.group(1), + **self.exception_kwargs + ) elif self.tag[-1].keyword != match.group(1): raise exceptions.SyntaxException( - "Closing tag does not match tag: <%%%s>" % - (match.group(1), self.tag[-1].keyword), - **self.exception_kwargs) + "Closing tag does not match tag: <%%%s>" + % (match.group(1), self.tag[-1].keyword), + **self.exception_kwargs + ) self.tag.pop() return True else: return False def match_end(self): - match = self.match(r'\Z', re.S) + match = self.match(r"\Z", re.S) if match: string = match.group() if string: @@ -336,7 +373,8 @@ class Lexer(object): return False def match_text(self): - match = self.match(r""" + match = self.match( + r""" (.*?) # anything, followed by: ( (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based @@ -351,7 +389,9 @@ class Lexer(object): (\\\r?\n) # an escaped newline - throw away | \Z # end of string - )""", re.X | re.S) + )""", + re.X | re.S, + ) if match: text = match.group(1) @@ -365,14 +405,17 @@ class Lexer(object): match = self.match(r"<%(!)?") if match: line, pos = self.matched_lineno, self.matched_charpos - text, end = self.parse_until_text(r'%>') + text, end = self.parse_until_text(False, r"%>") # the trailing newline helps # compiler.parse() not complain about indentation text = adjust_whitespace(text) + "\n" self.append_node( - parsetree.Code, - text, - match.group(1) == '!', lineno=line, pos=pos) + parsetree.Code, + text, + match.group(1) == "!", + lineno=line, + pos=pos, + ) return True else: return False @@ -381,48 +424,55 @@ class Lexer(object): match = self.match(r"\${") if match: line, pos = self.matched_lineno, self.matched_charpos - text, end = self.parse_until_text(r'\|', r'}') - if end == '|': - escapes, end = self.parse_until_text(r'}') + text, end = self.parse_until_text(True, r"\|", r"}") + if end == "|": + escapes, end = self.parse_until_text(True, r"}") else: escapes = "" - text = text.replace('\r\n', '\n') + text = text.replace("\r\n", "\n") self.append_node( - parsetree.Expression, - text, escapes.strip(), - lineno=line, pos=pos) + parsetree.Expression, + text, + escapes.strip(), + lineno=line, + pos=pos, + ) return True else: return False def match_control_line(self): match = self.match( - r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\r?\n)|[^\r\n])*)" - r"(?:\r?\n|\Z)", re.M) + r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\\r?\n)|[^\r\n])*)" + r"(?:\r?\n|\Z)", + re.M, + ) if match: operator = match.group(1) text = match.group(2) - if operator == '%': - m2 = re.match(r'(end)?(\w+)\s*(.*)', text) + if operator == "%": + m2 = re.match(r"(end)?(\w+)\s*(.*)", text) if not m2: raise exceptions.SyntaxException( - "Invalid control line: '%s'" % - text, - **self.exception_kwargs) + "Invalid control line: '%s'" % text, + **self.exception_kwargs + ) isend, keyword = m2.group(1, 2) - isend = (isend is not None) + isend = isend is not None if isend: if not len(self.control_line): raise exceptions.SyntaxException( - "No starting keyword '%s' for '%s'" % - (keyword, text), - **self.exception_kwargs) + "No starting keyword '%s' for '%s'" + % (keyword, text), + **self.exception_kwargs + ) elif self.control_line[-1].keyword != keyword: raise exceptions.SyntaxException( - "Keyword '%s' doesn't match keyword '%s'" % - (text, self.control_line[-1].keyword), - **self.exception_kwargs) + "Keyword '%s' doesn't match keyword '%s'" + % (text, self.control_line[-1].keyword), + **self.exception_kwargs + ) self.append_node(parsetree.ControlLine, keyword, isend, text) else: self.append_node(parsetree.Comment, text) @@ -438,4 +488,3 @@ class Lexer(object): return True else: return False - diff --git a/lib/mako/lookup.py b/lib/mako/lookup.py index 2af54119..476326d4 100644 --- a/lib/mako/lookup.py +++ b/lib/mako/lookup.py @@ -1,11 +1,16 @@ # mako/lookup.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -import os, stat, posixpath, re -from mako import exceptions, util +import os +import posixpath +import re +import stat + +from mako import exceptions +from mako import util from mako.template import Template try: @@ -13,7 +18,9 @@ try: except: import dummy_threading as threading + class TemplateCollection(object): + """Represent a collection of :class:`.Template` objects, identifiable via URI. @@ -79,7 +86,9 @@ class TemplateCollection(object): """ return uri + class TemplateLookup(TemplateCollection): + """Represent a collection of templates that locates template source files from the local filesystem. @@ -144,40 +153,41 @@ class TemplateLookup(TemplateCollection): """ - def __init__(self, - directories=None, - module_directory=None, - filesystem_checks=True, - collection_size=-1, - format_exceptions=False, - error_handler=None, - disable_unicode=False, - bytestring_passthrough=False, - output_encoding=None, - encoding_errors='strict', + def __init__( + self, + directories=None, + module_directory=None, + filesystem_checks=True, + collection_size=-1, + format_exceptions=False, + error_handler=None, + disable_unicode=False, + bytestring_passthrough=False, + output_encoding=None, + encoding_errors="strict", + cache_args=None, + cache_impl="beaker", + cache_enabled=True, + cache_type=None, + cache_dir=None, + cache_url=None, + modulename_callable=None, + module_writer=None, + default_filters=None, + buffer_filters=(), + strict_undefined=False, + imports=None, + future_imports=None, + enable_loop=True, + input_encoding=None, + preprocessor=None, + lexer_cls=None, + include_error_handler=None, + ): - cache_args=None, - cache_impl='beaker', - cache_enabled=True, - cache_type=None, - cache_dir=None, - cache_url=None, - - modulename_callable=None, - module_writer=None, - default_filters=None, - buffer_filters=(), - strict_undefined=False, - imports=None, - future_imports=None, - enable_loop=True, - input_encoding=None, - preprocessor=None, - lexer_cls=None): - - self.directories = [posixpath.normpath(d) for d in - util.to_list(directories, ()) - ] + self.directories = [ + posixpath.normpath(d) for d in util.to_list(directories, ()) + ] self.module_directory = module_directory self.modulename_callable = modulename_callable self.filesystem_checks = filesystem_checks @@ -187,33 +197,34 @@ class TemplateLookup(TemplateCollection): cache_args = {} # transfer deprecated cache_* args if cache_dir: - cache_args.setdefault('dir', cache_dir) + cache_args.setdefault("dir", cache_dir) if cache_url: - cache_args.setdefault('url', cache_url) + cache_args.setdefault("url", cache_url) if cache_type: - cache_args.setdefault('type', cache_type) + cache_args.setdefault("type", cache_type) self.template_args = { - 'format_exceptions':format_exceptions, - 'error_handler':error_handler, - 'disable_unicode':disable_unicode, - 'bytestring_passthrough':bytestring_passthrough, - 'output_encoding':output_encoding, - 'cache_impl':cache_impl, - 'encoding_errors':encoding_errors, - 'input_encoding':input_encoding, - 'module_directory':module_directory, - 'module_writer':module_writer, - 'cache_args':cache_args, - 'cache_enabled':cache_enabled, - 'default_filters':default_filters, - 'buffer_filters':buffer_filters, - 'strict_undefined':strict_undefined, - 'imports':imports, - 'future_imports':future_imports, - 'enable_loop':enable_loop, - 'preprocessor':preprocessor, - 'lexer_cls':lexer_cls + "format_exceptions": format_exceptions, + "error_handler": error_handler, + "include_error_handler": include_error_handler, + "disable_unicode": disable_unicode, + "bytestring_passthrough": bytestring_passthrough, + "output_encoding": output_encoding, + "cache_impl": cache_impl, + "encoding_errors": encoding_errors, + "input_encoding": input_encoding, + "module_directory": module_directory, + "module_writer": module_writer, + "cache_args": cache_args, + "cache_enabled": cache_enabled, + "default_filters": default_filters, + "buffer_filters": buffer_filters, + "strict_undefined": strict_undefined, + "imports": imports, + "future_imports": future_imports, + "enable_loop": enable_loop, + "preprocessor": preprocessor, + "lexer_cls": lexer_cls, } if collection_size == -1: @@ -228,7 +239,8 @@ class TemplateLookup(TemplateCollection): """Return a :class:`.Template` object corresponding to the given ``uri``. - .. note:: The ``relativeto`` argument is not supported here at the moment. + .. note:: The ``relativeto`` argument is not supported here at + the moment. """ @@ -238,14 +250,18 @@ class TemplateLookup(TemplateCollection): else: return self._collection[uri] except KeyError: - u = re.sub(r'^\/+', '', uri) - for dir in self.directories: - srcfile = posixpath.normpath(posixpath.join(dir, u)) + u = re.sub(r"^\/+", "", uri) + for dir_ in self.directories: + # make sure the path seperators are posix - os.altsep is empty + # on POSIX and cannot be used. + dir_ = dir_.replace(os.path.sep, posixpath.sep) + srcfile = posixpath.normpath(posixpath.join(dir_, u)) if os.path.isfile(srcfile): return self._load(srcfile, uri) else: raise exceptions.TopLevelLookupException( - "Cant locate template for uri %r" % uri) + "Cant locate template for uri %r" % uri + ) def adjust_uri(self, uri, relativeto): """Adjust the given ``uri`` based on the given relative URI.""" @@ -254,17 +270,17 @@ class TemplateLookup(TemplateCollection): if key in self._uri_cache: return self._uri_cache[key] - if uri[0] != '/': + if uri[0] != "/": if relativeto is not None: v = self._uri_cache[key] = posixpath.join( - posixpath.dirname(relativeto), uri) + posixpath.dirname(relativeto), uri + ) else: - v = self._uri_cache[key] = '/' + uri + v = self._uri_cache[key] = "/" + uri else: v = self._uri_cache[key] = uri return v - def filename_to_uri(self, filename): """Convert the given ``filename`` to a URI relative to this :class:`.TemplateCollection`.""" @@ -283,9 +299,9 @@ class TemplateLookup(TemplateCollection): """ filename = posixpath.normpath(filename) - for dir in self.directories: - if filename[0:len(dir)] == dir: - return filename[len(dir):] + for dir_ in self.directories: + if filename[0 : len(dir_)] == dir_: + return filename[len(dir_) :] else: return None @@ -304,11 +320,12 @@ class TemplateLookup(TemplateCollection): else: module_filename = None self._collection[uri] = template = Template( - uri=uri, - filename=posixpath.normpath(filename), - lookup=self, - module_filename=module_filename, - **self.template_args) + uri=uri, + filename=posixpath.normpath(filename), + lookup=self, + module_filename=module_filename, + **self.template_args + ) return template except: # if compilation fails etc, ensure @@ -325,8 +342,7 @@ class TemplateLookup(TemplateCollection): try: template_stat = os.stat(template.filename) - if template.module._modified_time < \ - template_stat[stat.ST_MTIME]: + if template.module._modified_time < template_stat[stat.ST_MTIME]: self._collection.pop(uri, None) return self._load(template.filename, uri) else: @@ -334,8 +350,8 @@ class TemplateLookup(TemplateCollection): except OSError: self._collection.pop(uri, None) raise exceptions.TemplateLookupException( - "Cant locate template for uri %r" % uri) - + "Cant locate template for uri %r" % uri + ) def put_string(self, uri, text): """Place a new :class:`.Template` object into this @@ -344,10 +360,8 @@ class TemplateLookup(TemplateCollection): """ self._collection[uri] = Template( - text, - lookup=self, - uri=uri, - **self.template_args) + text, lookup=self, uri=uri, **self.template_args + ) def put_template(self, uri, template): """Place a new :class:`.Template` object into this @@ -356,4 +370,3 @@ class TemplateLookup(TemplateCollection): """ self._collection[uri] = template - diff --git a/lib/mako/parsetree.py b/lib/mako/parsetree.py index 49ec4e06..801e48a7 100644 --- a/lib/mako/parsetree.py +++ b/lib/mako/parsetree.py @@ -1,15 +1,22 @@ # mako/parsetree.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php """defines the parse tree components for Mako templates.""" -from mako import exceptions, ast, util, filters, compat import re +from mako import ast +from mako import compat +from mako import exceptions +from mako import filters +from mako import util + + class Node(object): + """base class for a Node in the parse tree.""" def __init__(self, source, lineno, pos, filename): @@ -20,8 +27,12 @@ class Node(object): @property def exception_kwargs(self): - return {'source': self.source, 'lineno': self.lineno, - 'pos': self.pos, 'filename': self.filename} + return { + "source": self.source, + "lineno": self.lineno, + "pos": self.pos, + "filename": self.filename, + } def get_children(self): return [] @@ -34,11 +45,13 @@ class Node(object): method = getattr(visitor, "visit" + self.__class__.__name__, traverse) method(self) + class TemplateNode(Node): + """a 'container' node that stores the overall collection of nodes.""" def __init__(self, filename): - super(TemplateNode, self).__init__('', 0, 0, filename) + super(TemplateNode, self).__init__("", 0, 0, filename) self.nodes = [] self.page_attributes = {} @@ -47,10 +60,13 @@ class TemplateNode(Node): def __repr__(self): return "TemplateNode(%s, %r)" % ( - util.sorted_dict_repr(self.page_attributes), - self.nodes) + util.sorted_dict_repr(self.page_attributes), + self.nodes, + ) + class ControlLine(Node): + """defines a control line, a line-oriented python line or end tag. e.g.:: @@ -68,7 +84,7 @@ class ControlLine(Node): self.text = text self.keyword = keyword self.isend = isend - self.is_primary = keyword in ['for', 'if', 'while', 'try', 'with'] + self.is_primary = keyword in ["for", "if", "while", "try", "with"] self.nodes = [] if self.isend: self._declared_identifiers = [] @@ -92,9 +108,9 @@ class ControlLine(Node): for this ControlLine""" return keyword in { - 'if':set(['else', 'elif']), - 'try':set(['except', 'finally']), - 'for':set(['else']) + "if": set(["else", "elif"]), + "try": set(["except", "finally"]), + "for": set(["else"]), }.get(self.keyword, []) def __repr__(self): @@ -102,10 +118,12 @@ class ControlLine(Node): self.keyword, self.text, self.isend, - (self.lineno, self.pos) + (self.lineno, self.pos), ) + class Text(Node): + """defines plain text in the template.""" def __init__(self, content, **kwargs): @@ -115,7 +133,9 @@ class Text(Node): def __repr__(self): return "Text(%r, %r)" % (self.content, (self.lineno, self.pos)) + class Code(Node): + """defines a Python code block, either inline or module level. e.g.:: @@ -148,10 +168,12 @@ class Code(Node): return "Code(%r, %r, %r)" % ( self.text, self.ismodule, - (self.lineno, self.pos) + (self.lineno, self.pos), ) + class Comment(Node): + """defines a comment line. # this is a comment @@ -165,7 +187,9 @@ class Comment(Node): def __repr__(self): return "Comment(%r, %r)" % (self.text, (self.lineno, self.pos)) + class Expression(Node): + """defines an inline expression. ${x+y} @@ -185,62 +209,76 @@ class Expression(Node): def undeclared_identifiers(self): # TODO: make the "filter" shortcut list configurable at parse/gen time return self.code.undeclared_identifiers.union( - self.escapes_code.undeclared_identifiers.difference( - set(filters.DEFAULT_ESCAPES.keys()) - ) - ).difference(self.code.declared_identifiers) + self.escapes_code.undeclared_identifiers.difference( + set(filters.DEFAULT_ESCAPES.keys()) + ) + ).difference(self.code.declared_identifiers) def __repr__(self): return "Expression(%r, %r, %r)" % ( self.text, self.escapes_code.args, - (self.lineno, self.pos) + (self.lineno, self.pos), ) + class _TagMeta(type): + """metaclass to allow Tag to produce a subclass according to its keyword""" _classmap = {} - def __init__(cls, clsname, bases, dict): - if getattr(cls, '__keyword__', None) is not None: + def __init__(cls, clsname, bases, dict_): + if getattr(cls, "__keyword__", None) is not None: cls._classmap[cls.__keyword__] = cls - super(_TagMeta, cls).__init__(clsname, bases, dict) + super(_TagMeta, cls).__init__(clsname, bases, dict_) def __call__(cls, keyword, attributes, **kwargs): if ":" in keyword: - ns, defname = keyword.split(':') - return type.__call__(CallNamespaceTag, ns, defname, - attributes, **kwargs) + ns, defname = keyword.split(":") + return type.__call__( + CallNamespaceTag, ns, defname, attributes, **kwargs + ) try: cls = _TagMeta._classmap[keyword] except KeyError: raise exceptions.CompileException( "No such tag: '%s'" % keyword, - source=kwargs['source'], - lineno=kwargs['lineno'], - pos=kwargs['pos'], - filename=kwargs['filename'] + source=kwargs["source"], + lineno=kwargs["lineno"], + pos=kwargs["pos"], + filename=kwargs["filename"], ) return type.__call__(cls, keyword, attributes, **kwargs) + class Tag(compat.with_metaclass(_TagMeta, Node)): """abstract base class for tags. - <%sometag/> + e.g.:: - <%someothertag> - stuff - + <%sometag/> + + <%someothertag> + stuff + """ + __keyword__ = None - def __init__(self, keyword, attributes, expressions, - nonexpressions, required, **kwargs): - """construct a new Tag instance. + def __init__( + self, + keyword, + attributes, + expressions, + nonexpressions, + required, + **kwargs + ): + r"""construct a new Tag instance. this constructor not called directly, and is only called by subclasses. @@ -266,9 +304,10 @@ class Tag(compat.with_metaclass(_TagMeta, Node)): missing = [r for r in required if r not in self.parsed_attributes] if len(missing): raise exceptions.CompileException( - "Missing attribute(s): %s" % - ",".join([repr(m) for m in missing]), - **self.exception_kwargs) + "Missing attribute(s): %s" + % ",".join([repr(m) for m in missing]), + **self.exception_kwargs + ) self.parent = None self.nodes = [] @@ -284,36 +323,40 @@ class Tag(compat.with_metaclass(_TagMeta, Node)): for key in self.attributes: if key in expressions: expr = [] - for x in re.compile(r'(\${.+?})', - re.S).split(self.attributes[key]): - m = re.compile(r'^\${(.+?)}$', re.S).match(x) + for x in re.compile(r"(\${.+?})", re.S).split( + self.attributes[key] + ): + m = re.compile(r"^\${(.+?)}$", re.S).match(x) if m: - code = ast.PythonCode(m.group(1).rstrip(), - **self.exception_kwargs) + code = ast.PythonCode( + m.group(1).rstrip(), **self.exception_kwargs + ) # we aren't discarding "declared_identifiers" here, # which we do so that list comprehension-declared # variables aren't counted. As yet can't find a # condition that requires it here. - undeclared_identifiers = \ - undeclared_identifiers.union( - code.undeclared_identifiers) - expr.append('(%s)' % m.group(1)) + undeclared_identifiers = undeclared_identifiers.union( + code.undeclared_identifiers + ) + expr.append("(%s)" % m.group(1)) else: if x: expr.append(repr(x)) - self.parsed_attributes[key] = " + ".join(expr) or repr('') + self.parsed_attributes[key] = " + ".join(expr) or repr("") elif key in nonexpressions: - if re.search(r'\${.+?}', self.attributes[key]): + if re.search(r"\${.+?}", self.attributes[key]): raise exceptions.CompileException( - "Attibute '%s' in tag '%s' does not allow embedded " - "expressions" % (key, self.keyword), - **self.exception_kwargs) + "Attibute '%s' in tag '%s' does not allow embedded " + "expressions" % (key, self.keyword), + **self.exception_kwargs + ) self.parsed_attributes[key] = repr(self.attributes[key]) else: raise exceptions.CompileException( - "Invalid attribute for tag '%s': '%s'" % - (self.keyword, key), - **self.exception_kwargs) + "Invalid attribute for tag '%s': '%s'" + % (self.keyword, key), + **self.exception_kwargs + ) self.expression_undeclared_identifiers = undeclared_identifiers def declared_identifiers(self): @@ -323,54 +366,64 @@ class Tag(compat.with_metaclass(_TagMeta, Node)): return self.expression_undeclared_identifiers def __repr__(self): - return "%s(%r, %s, %r, %r)" % (self.__class__.__name__, - self.keyword, - util.sorted_dict_repr(self.attributes), - (self.lineno, self.pos), - self.nodes - ) + return "%s(%r, %s, %r, %r)" % ( + self.__class__.__name__, + self.keyword, + util.sorted_dict_repr(self.attributes), + (self.lineno, self.pos), + self.nodes, + ) + class IncludeTag(Tag): - __keyword__ = 'include' + __keyword__ = "include" def __init__(self, keyword, attributes, **kwargs): super(IncludeTag, self).__init__( - keyword, - attributes, - ('file', 'import', 'args'), - (), ('file',), **kwargs) + keyword, + attributes, + ("file", "import", "args"), + (), + ("file",), + **kwargs + ) self.page_args = ast.PythonCode( - "__DUMMY(%s)" % attributes.get('args', ''), - **self.exception_kwargs) + "__DUMMY(%s)" % attributes.get("args", ""), **self.exception_kwargs + ) def declared_identifiers(self): return [] def undeclared_identifiers(self): - identifiers = self.page_args.undeclared_identifiers.\ - difference(set(["__DUMMY"])).\ - difference(self.page_args.declared_identifiers) - return identifiers.union(super(IncludeTag, self). - undeclared_identifiers()) + identifiers = self.page_args.undeclared_identifiers.difference( + set(["__DUMMY"]) + ).difference(self.page_args.declared_identifiers) + return identifiers.union( + super(IncludeTag, self).undeclared_identifiers() + ) + class NamespaceTag(Tag): - __keyword__ = 'namespace' + __keyword__ = "namespace" def __init__(self, keyword, attributes, **kwargs): super(NamespaceTag, self).__init__( - keyword, attributes, - ('file',), - ('name','inheritable', - 'import','module'), - (), **kwargs) + keyword, + attributes, + ("file",), + ("name", "inheritable", "import", "module"), + (), + **kwargs + ) - self.name = attributes.get('name', '__anon_%s' % hex(abs(id(self)))) - if not 'name' in attributes and not 'import' in attributes: + self.name = attributes.get("name", "__anon_%s" % hex(abs(id(self)))) + if "name" not in attributes and "import" not in attributes: raise exceptions.CompileException( "'name' and/or 'import' attributes are required " "for <%namespace>", - **self.exception_kwargs) - if 'file' in attributes and 'module' in attributes: + **self.exception_kwargs + ) + if "file" in attributes and "module" in attributes: raise exceptions.CompileException( "<%namespace> may only have one of 'file' or 'module'", **self.exception_kwargs @@ -379,52 +432,53 @@ class NamespaceTag(Tag): def declared_identifiers(self): return [] + class TextTag(Tag): - __keyword__ = 'text' + __keyword__ = "text" def __init__(self, keyword, attributes, **kwargs): super(TextTag, self).__init__( - keyword, - attributes, (), - ('filter'), (), **kwargs) + keyword, attributes, (), ("filter"), (), **kwargs + ) self.filter_args = ast.ArgumentList( - attributes.get('filter', ''), - **self.exception_kwargs) + attributes.get("filter", ""), **self.exception_kwargs + ) def undeclared_identifiers(self): - return self.filter_args.\ - undeclared_identifiers.\ - difference(filters.DEFAULT_ESCAPES.keys()).union( - self.expression_undeclared_identifiers - ) + return self.filter_args.undeclared_identifiers.difference( + filters.DEFAULT_ESCAPES.keys() + ).union(self.expression_undeclared_identifiers) + class DefTag(Tag): - __keyword__ = 'def' + __keyword__ = "def" def __init__(self, keyword, attributes, **kwargs): - expressions = ['buffered', 'cached'] + [ - c for c in attributes if c.startswith('cache_')] - + expressions = ["buffered", "cached"] + [ + c for c in attributes if c.startswith("cache_") + ] super(DefTag, self).__init__( - keyword, - attributes, - expressions, - ('name', 'filter', 'decorator'), - ('name',), - **kwargs) - name = attributes['name'] - if re.match(r'^[\w_]+$', name): + keyword, + attributes, + expressions, + ("name", "filter", "decorator"), + ("name",), + **kwargs + ) + name = attributes["name"] + if re.match(r"^[\w_]+$", name): raise exceptions.CompileException( - "Missing parenthesis in %def", - **self.exception_kwargs) - self.function_decl = ast.FunctionDecl("def " + name + ":pass", - **self.exception_kwargs) + "Missing parenthesis in %def", **self.exception_kwargs + ) + self.function_decl = ast.FunctionDecl( + "def " + name + ":pass", **self.exception_kwargs + ) self.name = self.function_decl.funcname - self.decorator = attributes.get('decorator', '') + self.decorator = attributes.get("decorator", "") self.filter_args = ast.ArgumentList( - attributes.get('filter', ''), - **self.exception_kwargs) + attributes.get("filter", ""), **self.exception_kwargs + ) is_anonymous = False is_block = False @@ -442,51 +496,58 @@ class DefTag(Tag): def undeclared_identifiers(self): res = [] for c in self.function_decl.defaults: - res += list(ast.PythonCode(c, **self.exception_kwargs). - undeclared_identifiers) - return set(res).union( - self.filter_args.\ - undeclared_identifiers.\ - difference(filters.DEFAULT_ESCAPES.keys()) - ).union( - self.expression_undeclared_identifiers - ).difference( - self.function_decl.allargnames + res += list( + ast.PythonCode( + c, **self.exception_kwargs + ).undeclared_identifiers + ) + return ( + set(res) + .union( + self.filter_args.undeclared_identifiers.difference( + filters.DEFAULT_ESCAPES.keys() + ) + ) + .union(self.expression_undeclared_identifiers) + .difference(self.function_decl.allargnames) ) + class BlockTag(Tag): - __keyword__ = 'block' + __keyword__ = "block" def __init__(self, keyword, attributes, **kwargs): - expressions = ['buffered', 'cached', 'args'] + [ - c for c in attributes if c.startswith('cache_')] + expressions = ["buffered", "cached", "args"] + [ + c for c in attributes if c.startswith("cache_") + ] super(BlockTag, self).__init__( - keyword, - attributes, - expressions, - ('name','filter', 'decorator'), - (), - **kwargs) - name = attributes.get('name') - if name and not re.match(r'^[\w_]+$',name): + keyword, + attributes, + expressions, + ("name", "filter", "decorator"), + (), + **kwargs + ) + name = attributes.get("name") + if name and not re.match(r"^[\w_]+$", name): raise exceptions.CompileException( - "%block may not specify an argument signature", - **self.exception_kwargs) - if not name and attributes.get('args', None): + "%block may not specify an argument signature", + **self.exception_kwargs + ) + if not name and attributes.get("args", None): raise exceptions.CompileException( - "Only named %blocks may specify args", - **self.exception_kwargs - ) - self.body_decl = ast.FunctionArgs(attributes.get('args', ''), - **self.exception_kwargs) + "Only named %blocks may specify args", **self.exception_kwargs + ) + self.body_decl = ast.FunctionArgs( + attributes.get("args", ""), **self.exception_kwargs + ) self.name = name - self.decorator = attributes.get('decorator', '') + self.decorator = attributes.get("decorator", "") self.filter_args = ast.ArgumentList( - attributes.get('filter', ''), - **self.exception_kwargs) - + attributes.get("filter", ""), **self.exception_kwargs + ) is_block = True @@ -496,7 +557,7 @@ class BlockTag(Tag): @property def funcname(self): - return self.name or "__M_anon_%d" % (self.lineno, ) + return self.name or "__M_anon_%d" % (self.lineno,) def get_argument_expressions(self, **kw): return self.body_decl.get_argument_expressions(**kw) @@ -505,90 +566,100 @@ class BlockTag(Tag): return self.body_decl.allargnames def undeclared_identifiers(self): - return (self.filter_args.\ - undeclared_identifiers.\ - difference(filters.DEFAULT_ESCAPES.keys()) - ).union(self.expression_undeclared_identifiers) - + return ( + self.filter_args.undeclared_identifiers.difference( + filters.DEFAULT_ESCAPES.keys() + ) + ).union(self.expression_undeclared_identifiers) class CallTag(Tag): - __keyword__ = 'call' + __keyword__ = "call" def __init__(self, keyword, attributes, **kwargs): - super(CallTag, self).__init__(keyword, attributes, - ('args'), ('expr',), ('expr',), **kwargs) - self.expression = attributes['expr'] - self.code = ast.PythonCode(self.expression, **self.exception_kwargs) - self.body_decl = ast.FunctionArgs(attributes.get('args', ''), - **self.exception_kwargs) - - def declared_identifiers(self): - return self.code.declared_identifiers.union(self.body_decl.allargnames) - - def undeclared_identifiers(self): - return self.code.undeclared_identifiers.\ - difference(self.code.declared_identifiers) - -class CallNamespaceTag(Tag): - - def __init__(self, namespace, defname, attributes, **kwargs): - super(CallNamespaceTag, self).__init__( - namespace + ":" + defname, - attributes, - tuple(attributes.keys()) + ('args', ), - (), - (), - **kwargs) - - self.expression = "%s.%s(%s)" % ( - namespace, - defname, - ",".join(["%s=%s" % (k, v) for k, v in - self.parsed_attributes.items() - if k != 'args']) - ) + super(CallTag, self).__init__( + keyword, attributes, ("args"), ("expr",), ("expr",), **kwargs + ) + self.expression = attributes["expr"] self.code = ast.PythonCode(self.expression, **self.exception_kwargs) self.body_decl = ast.FunctionArgs( - attributes.get('args', ''), - **self.exception_kwargs) + attributes.get("args", ""), **self.exception_kwargs + ) def declared_identifiers(self): return self.code.declared_identifiers.union(self.body_decl.allargnames) def undeclared_identifiers(self): - return self.code.undeclared_identifiers.\ - difference(self.code.declared_identifiers) + return self.code.undeclared_identifiers.difference( + self.code.declared_identifiers + ) + + +class CallNamespaceTag(Tag): + def __init__(self, namespace, defname, attributes, **kwargs): + super(CallNamespaceTag, self).__init__( + namespace + ":" + defname, + attributes, + tuple(attributes.keys()) + ("args",), + (), + (), + **kwargs + ) + + self.expression = "%s.%s(%s)" % ( + namespace, + defname, + ",".join( + [ + "%s=%s" % (k, v) + for k, v in self.parsed_attributes.items() + if k != "args" + ] + ), + ) + self.code = ast.PythonCode(self.expression, **self.exception_kwargs) + self.body_decl = ast.FunctionArgs( + attributes.get("args", ""), **self.exception_kwargs + ) + + def declared_identifiers(self): + return self.code.declared_identifiers.union(self.body_decl.allargnames) + + def undeclared_identifiers(self): + return self.code.undeclared_identifiers.difference( + self.code.declared_identifiers + ) + class InheritTag(Tag): - __keyword__ = 'inherit' + __keyword__ = "inherit" def __init__(self, keyword, attributes, **kwargs): super(InheritTag, self).__init__( - keyword, attributes, - ('file',), (), ('file',), **kwargs) + keyword, attributes, ("file",), (), ("file",), **kwargs + ) + class PageTag(Tag): - __keyword__ = 'page' + __keyword__ = "page" def __init__(self, keyword, attributes, **kwargs): - expressions = ['cached', 'args', 'expression_filter', 'enable_loop'] + [ - c for c in attributes if c.startswith('cache_')] + expressions = [ + "cached", + "args", + "expression_filter", + "enable_loop", + ] + [c for c in attributes if c.startswith("cache_")] super(PageTag, self).__init__( - keyword, - attributes, - expressions, - (), - (), - **kwargs) - self.body_decl = ast.FunctionArgs(attributes.get('args', ''), - **self.exception_kwargs) + keyword, attributes, expressions, (), (), **kwargs + ) + self.body_decl = ast.FunctionArgs( + attributes.get("args", ""), **self.exception_kwargs + ) self.filter_args = ast.ArgumentList( - attributes.get('expression_filter', ''), - **self.exception_kwargs) + attributes.get("expression_filter", ""), **self.exception_kwargs + ) def declared_identifiers(self): return self.body_decl.allargnames - - diff --git a/lib/mako/pygen.py b/lib/mako/pygen.py index 5ba5125a..947721f1 100644 --- a/lib/mako/pygen.py +++ b/lib/mako/pygen.py @@ -1,5 +1,5 @@ # mako/pygen.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -7,8 +7,10 @@ """utilities for generating and formatting literal Python code.""" import re + from mako import exceptions + class PythonPrinter(object): def __init__(self, stream): # indentation counter @@ -52,14 +54,16 @@ class PythonPrinter(object): self.stream.write("\n" * num) self._update_lineno(num) - def write_indented_block(self, block): + def write_indented_block(self, block, starting_lineno=None): """print a line or lines of python which already contain indentation. The indentation of the total block of lines will be adjusted to that of the current indent level.""" self.in_indent_lines = False - for l in re.split(r'\r?\n', block): + for i, l in enumerate(re.split(r"\r?\n", block)): self.line_buffer.append(l) + if starting_lineno is not None: + self.start_source(starting_lineno + i) self._update_lineno(1) def writelines(self, *lines): @@ -80,20 +84,19 @@ class PythonPrinter(object): self._flush_adjusted_lines() self.in_indent_lines = True - if (line is None or - re.match(r"^\s*#",line) or - re.match(r"^\s*$", line) - ): + if ( + line is None + or re.match(r"^\s*#", line) + or re.match(r"^\s*$", line) + ): hastext = False else: hastext = True - is_comment = line and len(line) and line[0] == '#' + is_comment = line and len(line) and line[0] == "#" # see if this line should decrease the indentation level - if (not is_comment and - (not hastext or self._is_unindentor(line)) - ): + if not is_comment and (not hastext or self._is_unindentor(line)): if self.indent > 0: self.indent -= 1 @@ -102,7 +105,8 @@ class PythonPrinter(object): # module wont compile. if len(self.indent_detail) == 0: raise exceptions.SyntaxException( - "Too many whitespace closures") + "Too many whitespace closures" + ) self.indent_detail.pop() if line is None: @@ -132,8 +136,9 @@ class PythonPrinter(object): # its not a "compound" keyword. but lets also # test for valid Python keywords that might be indenting us, # else assume its a non-indenting line - m2 = re.match(r"^\s*(def|class|else|elif|except|finally)", - line) + m2 = re.match( + r"^\s*(def|class|else|elif|except|finally)", line + ) if m2: self.indent += 1 self.indent_detail.append(indentor) @@ -172,27 +177,28 @@ class PythonPrinter(object): # should we decide that its not good enough, heres # more stuff to check. - #keyword = match.group(1) + # keyword = match.group(1) # match the original indent keyword - #for crit in [ + # for crit in [ # (r'if|elif', r'else|elif'), # (r'try', r'except|finally|else'), # (r'while|for', r'else'), - #]: + # ]: # if re.match(crit[0], indentor) and re.match(crit[1], keyword): # return True - #return False + # return False - def _indent_line(self, line, stripspace=''): + def _indent_line(self, line, stripspace=""): """indent the given line according to the current indent level. stripspace is a string of space that will be truncated from the start of the line before indenting.""" - return re.sub(r"^%s" % stripspace, self.indentstring - * self.indent, line) + return re.sub( + r"^%s" % stripspace, self.indentstring * self.indent, line + ) def _reset_multi_line_flags(self): """reset the flags which would indicate we are in a backslashed @@ -210,7 +216,7 @@ class PythonPrinter(object): # a literal multiline string with unfortunately placed # whitespace - current_state = (self.backslashed or self.triplequoted) + current_state = self.backslashed or self.triplequoted if re.search(r"\\$", line): self.backslashed = True @@ -247,7 +253,7 @@ def adjust_whitespace(text): (backslashed, triplequoted) = (0, 1) def in_multi_line(line): - start_state = (state[backslashed] or state[triplequoted]) + start_state = state[backslashed] or state[triplequoted] if re.search(r"\\$", line): state[backslashed] = True @@ -257,7 +263,7 @@ def adjust_whitespace(text): def match(reg, t): m = re.match(reg, t) if m: - return m, t[len(m.group(0)):] + return m, t[len(m.group(0)) :] else: return None, t @@ -269,7 +275,7 @@ def adjust_whitespace(text): else: m, line = match(r".*?(?=%s|$)" % state[triplequoted], line) else: - m, line = match(r'#', line) + m, line = match(r"#", line) if m: return start_state @@ -282,13 +288,13 @@ def adjust_whitespace(text): return start_state - def _indent_line(line, stripspace=''): - return re.sub(r"^%s" % stripspace, '', line) + def _indent_line(line, stripspace=""): + return re.sub(r"^%s" % stripspace, "", line) lines = [] stripspace = None - for line in re.split(r'\r?\n', text): + for line in re.split(r"\r?\n", text): if in_multi_line(line): lines.append(line) else: diff --git a/lib/mako/pyparser.py b/lib/mako/pyparser.py index bfa46a9f..b16672d6 100644 --- a/lib/mako/pyparser.py +++ b/lib/mako/pyparser.py @@ -1,5 +1,5 @@ # mako/pyparser.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -10,46 +10,52 @@ Parsing to AST is done via _ast on Python > 2.5, otherwise the compiler module is used. """ -from mako import exceptions, util, compat -from mako.compat import arg_stringname import operator +import _ast + +from mako import _ast_util +from mako import compat +from mako import exceptions +from mako import util +from mako.compat import arg_stringname + if compat.py3k: # words that cannot be assigned to (notably # smaller than the total keys in __builtins__) - reserved = set(['True', 'False', 'None', 'print']) + reserved = set(["True", "False", "None", "print"]) # the "id" attribute on a function node - arg_id = operator.attrgetter('arg') + arg_id = operator.attrgetter("arg") else: # words that cannot be assigned to (notably # smaller than the total keys in __builtins__) - reserved = set(['True', 'False', 'None']) + reserved = set(["True", "False", "None"]) # the "id" attribute on a function node - arg_id = operator.attrgetter('id') + arg_id = operator.attrgetter("id") -import _ast util.restore__ast(_ast) -from mako import _ast_util -def parse(code, mode='exec', **exception_kwargs): +def parse(code, mode="exec", **exception_kwargs): """Parse an expression into AST""" try: - return _ast_util.parse(code, '', mode) + return _ast_util.parse(code, "", mode) except Exception: raise exceptions.SyntaxException( - "(%s) %s (%r)" % ( - compat.exception_as().__class__.__name__, - compat.exception_as(), - code[0:50] - ), **exception_kwargs) + "(%s) %s (%r)" + % ( + compat.exception_as().__class__.__name__, + compat.exception_as(), + code[0:50], + ), + **exception_kwargs + ) class FindIdentifiers(_ast_util.NodeVisitor): - def __init__(self, listener, **exception_kwargs): self.in_function = False self.in_assign_targets = False @@ -119,9 +125,9 @@ class FindIdentifiers(_ast_util.NodeVisitor): self.in_function = True local_ident_stack = self.local_ident_stack - self.local_ident_stack = local_ident_stack.union([ - arg_id(arg) for arg in self._expand_tuples(node.args.args) - ]) + self.local_ident_stack = local_ident_stack.union( + [arg_id(arg) for arg in self._expand_tuples(node.args.args)] + ) if islambda: self.visit(node.body) else: @@ -146,9 +152,11 @@ class FindIdentifiers(_ast_util.NodeVisitor): # this is eqiuvalent to visit_AssName in # compiler self._add_declared(node.id) - elif node.id not in reserved and node.id \ - not in self.listener.declared_identifiers and node.id \ - not in self.local_ident_stack: + elif ( + node.id not in reserved + and node.id not in self.listener.declared_identifiers + and node.id not in self.local_ident_stack + ): self.listener.undeclared_identifiers.add(node.id) def visit_Import(self, node): @@ -156,24 +164,25 @@ class FindIdentifiers(_ast_util.NodeVisitor): if name.asname is not None: self._add_declared(name.asname) else: - self._add_declared(name.name.split('.')[0]) + self._add_declared(name.name.split(".")[0]) def visit_ImportFrom(self, node): for name in node.names: if name.asname is not None: self._add_declared(name.asname) else: - if name.name == '*': + if name.name == "*": raise exceptions.CompileException( "'import *' is not supported, since all identifier " "names must be explicitly declared. Please use the " "form 'from import , , " - "...' instead.", **self.exception_kwargs) + "...' instead.", + **self.exception_kwargs + ) self._add_declared(name.name) class FindTuple(_ast_util.NodeVisitor): - def __init__(self, listener, code_factory, **exception_kwargs): self.listener = listener self.exception_kwargs = exception_kwargs @@ -184,16 +193,17 @@ class FindTuple(_ast_util.NodeVisitor): p = self.code_factory(n, **self.exception_kwargs) self.listener.codeargs.append(p) self.listener.args.append(ExpressionGenerator(n).value()) - self.listener.declared_identifiers = \ - self.listener.declared_identifiers.union( - p.declared_identifiers) - self.listener.undeclared_identifiers = \ - self.listener.undeclared_identifiers.union( - p.undeclared_identifiers) + ldi = self.listener.declared_identifiers + self.listener.declared_identifiers = ldi.union( + p.declared_identifiers + ) + lui = self.listener.undeclared_identifiers + self.listener.undeclared_identifiers = lui.union( + p.undeclared_identifiers + ) class ParseFunc(_ast_util.NodeVisitor): - def __init__(self, listener, **exception_kwargs): self.listener = listener self.exception_kwargs = exception_kwargs @@ -222,11 +232,11 @@ class ParseFunc(_ast_util.NodeVisitor): self.listener.varargs = node.args.vararg self.listener.kwargs = node.args.kwarg -class ExpressionGenerator(object): +class ExpressionGenerator(object): def __init__(self, astnode): - self.generator = _ast_util.SourceGenerator(' ' * 4) + self.generator = _ast_util.SourceGenerator(" " * 4) self.generator.visit(astnode) def value(self): - return ''.join(self.generator.result) + return "".join(self.generator.result) diff --git a/lib/mako/runtime.py b/lib/mako/runtime.py index 6b6a35a9..465908e6 100644 --- a/lib/mako/runtime.py +++ b/lib/mako/runtime.py @@ -1,5 +1,5 @@ # mako/runtime.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -7,12 +7,17 @@ """provides runtime services for templates, including Context, Namespace, and various helper functions.""" -from mako import exceptions, util, compat -from mako.compat import compat_builtins +import functools import sys +from mako import compat +from mako import exceptions +from mako import util +from mako.compat import compat_builtins + class Context(object): + """Provides runtime namespace, output buffer, and various callstacks for templates. @@ -33,18 +38,19 @@ class Context(object): # "capture" function which proxies to the # generic "capture" function - self._data['capture'] = compat.partial(capture, self) + self._data["capture"] = functools.partial(capture, self) # "caller" stack used by def calls with content - self.caller_stack = self._data['caller'] = CallerStack() + self.caller_stack = self._data["caller"] = CallerStack() def _set_with_template(self, t): self._with_template = t illegal_names = t.reserved_names.intersection(self._data) if illegal_names: raise exceptions.NameConflictError( - "Reserved words passed to render(): %s" % - ", ".join(illegal_names)) + "Reserved words passed to render(): %s" + % ", ".join(illegal_names) + ) @property def lookup(self): @@ -80,7 +86,6 @@ class Context(object): """Push a ``caller`` callable onto the callstack for this :class:`.Context`.""" - self.caller_stack.append(caller) def pop_caller(self): @@ -177,11 +182,12 @@ class Context(object): c = self._copy() x = c._data - x.pop('self', None) - x.pop('parent', None) - x.pop('next', None) + x.pop("self", None) + x.pop("parent", None) + x.pop("next", None) return c + class CallerStack(list): def __init__(self): self.nextcaller = None @@ -211,6 +217,7 @@ class CallerStack(list): class Undefined(object): + """Represents an undefined value in a template. All template modules have a constant value @@ -218,6 +225,7 @@ class Undefined(object): object. """ + def __str__(self): raise NameError("Undefined") @@ -227,9 +235,13 @@ class Undefined(object): def __bool__(self): return False + UNDEFINED = Undefined() +STOP_RENDERING = "" + class LoopStack(object): + """a stack for LoopContexts that implements the context manager protocol to automatically pop off the top of the stack on context exit """ @@ -269,6 +281,7 @@ class LoopStack(object): class LoopContext(object): + """A magic loop variable. Automatically accessible in any ``% for`` block. @@ -336,6 +349,7 @@ class LoopContext(object): class _NSAttr(object): def __init__(self, parent): self.__parent = parent + def __getattr__(self, key): ns = self.__parent while ns: @@ -345,7 +359,9 @@ class _NSAttr(object): ns = ns.inherits raise AttributeError(key) + class Namespace(object): + """Provides access to collections of rendering methods, which can be local, from other templates, or from imported modules. @@ -361,9 +377,15 @@ class Namespace(object): """ - def __init__(self, name, context, - callables=None, inherits=None, - populate_self=True, calling_uri=None): + def __init__( + self, + name, + context, + callables=None, + inherits=None, + populate_self=True, + calling_uri=None, + ): self.name = name self.context = context self.inherits = inherits @@ -461,9 +483,12 @@ class Namespace(object): if key in self.context.namespaces: return self.context.namespaces[key] else: - ns = TemplateNamespace(uri, self.context._copy(), - templateuri=uri, - calling_uri=self._templateuri) + ns = TemplateNamespace( + uri, + self.context._copy(), + templateuri=uri, + calling_uri=self._templateuri, + ) self.context.namespaces[key] = ns return ns @@ -506,7 +531,7 @@ class Namespace(object): def _populate(self, d, l): for ident in l: - if ident == '*': + if ident == "*": for (k, v) in self._get_star(): d[k] = v else: @@ -524,17 +549,27 @@ class Namespace(object): val = getattr(self.inherits, key) else: raise AttributeError( - "Namespace '%s' has no member '%s'" % - (self.name, key)) + "Namespace '%s' has no member '%s'" % (self.name, key) + ) setattr(self, key, val) return val + class TemplateNamespace(Namespace): + """A :class:`.Namespace` specific to a :class:`.Template` instance.""" - def __init__(self, name, context, template=None, templateuri=None, - callables=None, inherits=None, - populate_self=True, calling_uri=None): + def __init__( + self, + name, + context, + template=None, + templateuri=None, + callables=None, + inherits=None, + populate_self=True, + calling_uri=None, + ): self.name = name self.context = context self.inherits = inherits @@ -542,8 +577,7 @@ class TemplateNamespace(Namespace): self.callables = dict([(c.__name__, c) for c in callables]) if templateuri is not None: - self.template = _lookup_template(context, templateuri, - calling_uri) + self.template = _lookup_template(context, templateuri, calling_uri) self._templateuri = self.template.module._template_uri elif template is not None: self.template = template @@ -552,9 +586,9 @@ class TemplateNamespace(Namespace): raise TypeError("'template' argument is required.") if populate_self: - lclcallable, lclcontext = \ - _populate_self_namespace(context, self.template, - self_ns=self) + lclcallable, lclcontext = _populate_self_namespace( + context, self.template, self_ns=self + ) @property def module(self): @@ -589,9 +623,11 @@ class TemplateNamespace(Namespace): if self.callables: for key in self.callables: yield (key, self.callables[key]) + def get(key): callable_ = self.template._get_def_callable(key) - return compat.partial(callable_, self.context) + return functools.partial(callable_, self.context) + for k in self.template.module._exports: yield (k, get(k)) @@ -600,23 +636,32 @@ class TemplateNamespace(Namespace): val = self.callables[key] elif self.template.has_def(key): callable_ = self.template._get_def_callable(key) - val = compat.partial(callable_, self.context) + val = functools.partial(callable_, self.context) elif self.inherits: val = getattr(self.inherits, key) else: raise AttributeError( - "Namespace '%s' has no member '%s'" % - (self.name, key)) + "Namespace '%s' has no member '%s'" % (self.name, key) + ) setattr(self, key, val) return val + class ModuleNamespace(Namespace): + """A :class:`.Namespace` specific to a Python module instance.""" - def __init__(self, name, context, module, - callables=None, inherits=None, - populate_self=True, calling_uri=None): + def __init__( + self, + name, + context, + module, + callables=None, + inherits=None, + populate_self=True, + calling_uri=None, + ): self.name = name self.context = context self.inherits = inherits @@ -624,7 +669,7 @@ class ModuleNamespace(Namespace): self.callables = dict([(c.__name__, c) for c in callables]) mod = __import__(module) - for token in module.split('.')[1:]: + for token in module.split(".")[1:]: mod = getattr(mod, token) self.module = mod @@ -640,27 +685,27 @@ class ModuleNamespace(Namespace): for key in self.callables: yield (key, self.callables[key]) for key in dir(self.module): - if key[0] != '_': + if key[0] != "_": callable_ = getattr(self.module, key) - if compat.callable(callable_): - yield key, compat.partial(callable_, self.context) - + if callable(callable_): + yield key, functools.partial(callable_, self.context) def __getattr__(self, key): if key in self.callables: val = self.callables[key] elif hasattr(self.module, key): callable_ = getattr(self.module, key) - val = compat.partial(callable_, self.context) + val = functools.partial(callable_, self.context) elif self.inherits: val = getattr(self.inherits, key) else: raise AttributeError( - "Namespace '%s' has no member '%s'" % - (self.name, key)) + "Namespace '%s' has no member '%s'" % (self.name, key) + ) setattr(self, key, val) return val + def supports_caller(func): """Apply a caller_stack compatibility decorator to a plain Python function. @@ -675,8 +720,10 @@ def supports_caller(func): return func(context, *args, **kwargs) finally: context.caller_stack._pop_frame() + return wrap_stackframe + def capture(context, callable_, *args, **kwargs): """Execute the given template def, capturing the output into a buffer. @@ -685,11 +732,11 @@ def capture(context, callable_, *args, **kwargs): """ - if not compat.callable(callable_): + if not callable(callable_): raise exceptions.RuntimeException( - "capture() function expects a callable as " - "its argument (i.e. capture(func, *args, **kwargs))" - ) + "capture() function expects a callable as " + "its argument (i.e. capture(func, *args, **kwargs))" + ) context._push_buffer() try: callable_(*args, **kwargs) @@ -697,37 +744,56 @@ def capture(context, callable_, *args, **kwargs): buf = context._pop_buffer() return buf.getvalue() + def _decorate_toplevel(fn): def decorate_render(render_fn): def go(context, *args, **kw): def y(*args, **kw): return render_fn(context, *args, **kw) + try: y.__name__ = render_fn.__name__[7:] except TypeError: # < Python 2.4 pass return fn(y)(context, *args, **kw) + return go + return decorate_render + def _decorate_inline(context, fn): def decorate_render(render_fn): dec = fn(render_fn) + def go(*args, **kw): return dec(context, *args, **kw) + return go + return decorate_render + def _include_file(context, uri, calling_uri, **kwargs): """locate the template from the given uri and include it in the current output.""" template = _lookup_template(context, uri, calling_uri) (callable_, ctx) = _populate_self_namespace( - context._clean_inheritance_tokens(), - template) - callable_(ctx, **_kwargs_for_include(callable_, context._data, **kwargs)) + context._clean_inheritance_tokens(), template + ) + kwargs = _kwargs_for_include(callable_, context._data, **kwargs) + if template.include_error_handler: + try: + callable_(ctx, **kwargs) + except Exception: + result = template.include_error_handler(ctx, compat.exception_as()) + if not result: + compat.reraise(*sys.exc_info()) + else: + callable_(ctx, **kwargs) + def _inherit_from(context, uri, calling_uri): """called by the _inherit method in template modules to set @@ -737,51 +803,60 @@ def _inherit_from(context, uri, calling_uri): if uri is None: return None template = _lookup_template(context, uri, calling_uri) - self_ns = context['self'] + self_ns = context["self"] ih = self_ns while ih.inherits is not None: ih = ih.inherits - lclcontext = context._locals({'next': ih}) - ih.inherits = TemplateNamespace("self:%s" % template.uri, - lclcontext, - template=template, - populate_self=False) - context._data['parent'] = lclcontext._data['local'] = ih.inherits - callable_ = getattr(template.module, '_mako_inherit', None) + lclcontext = context._locals({"next": ih}) + ih.inherits = TemplateNamespace( + "self:%s" % template.uri, + lclcontext, + template=template, + populate_self=False, + ) + context._data["parent"] = lclcontext._data["local"] = ih.inherits + callable_ = getattr(template.module, "_mako_inherit", None) if callable_ is not None: ret = callable_(template, lclcontext) if ret: return ret - gen_ns = getattr(template.module, '_mako_generate_namespaces', None) + gen_ns = getattr(template.module, "_mako_generate_namespaces", None) if gen_ns is not None: gen_ns(context) return (template.callable_, lclcontext) + def _lookup_template(context, uri, relativeto): lookup = context._with_template.lookup if lookup is None: raise exceptions.TemplateLookupException( - "Template '%s' has no TemplateLookup associated" % - context._with_template.uri) + "Template '%s' has no TemplateLookup associated" + % context._with_template.uri + ) uri = lookup.adjust_uri(uri, relativeto) try: return lookup.get_template(uri) except exceptions.TopLevelLookupException: raise exceptions.TemplateLookupException(str(compat.exception_as())) + def _populate_self_namespace(context, template, self_ns=None): if self_ns is None: - self_ns = TemplateNamespace('self:%s' % template.uri, - context, template=template, - populate_self=False) - context._data['self'] = context._data['local'] = self_ns - if hasattr(template.module, '_mako_inherit'): + self_ns = TemplateNamespace( + "self:%s" % template.uri, + context, + template=template, + populate_self=False, + ) + context._data["self"] = context._data["local"] = self_ns + if hasattr(template.module, "_mako_inherit"): ret = template.module._mako_inherit(template, context) if ret: return ret return (template.callable_, context) + def _render(template, callable_, args, data, as_unicode=False): """create a Context and return the string output of the given template and template callable.""" @@ -792,19 +867,26 @@ def _render(template, callable_, args, data, as_unicode=False): buf = compat.StringIO() else: buf = util.FastEncodingBuffer( - as_unicode=as_unicode, - encoding=template.output_encoding, - errors=template.encoding_errors) + as_unicode=as_unicode, + encoding=template.output_encoding, + errors=template.encoding_errors, + ) context = Context(buf, **data) context._outputting_as_unicode = as_unicode context._set_with_template(template) - _render_context(template, callable_, context, *args, - **_kwargs_for_callable(callable_, data)) + _render_context( + template, + callable_, + context, + *args, + **_kwargs_for_callable(callable_, data) + ) return context._pop_buffer().getvalue() + def _kwargs_for_callable(callable_, data): - argspec = compat.inspect_func_args(callable_) + argspec = compat.inspect_getargspec(callable_) # for normal pages, **pageargs is usually present if argspec[2]: return data @@ -813,20 +895,23 @@ def _kwargs_for_callable(callable_, data): namedargs = argspec[0] + [v for v in argspec[1:3] if v is not None] kwargs = {} for arg in namedargs: - if arg != 'context' and arg in data and arg not in kwargs: + if arg != "context" and arg in data and arg not in kwargs: kwargs[arg] = data[arg] return kwargs + def _kwargs_for_include(callable_, data, **kwargs): - argspec = compat.inspect_func_args(callable_) + argspec = compat.inspect_getargspec(callable_) namedargs = argspec[0] + [v for v in argspec[1:3] if v is not None] for arg in namedargs: - if arg != 'context' and arg in data and arg not in kwargs: + if arg != "context" and arg in data and arg not in kwargs: kwargs[arg] = data[arg] return kwargs + def _render_context(tmpl, callable_, context, *args, **kwargs): import mako.template as template + # create polymorphic 'self' namespace for this # template with possibly updated context if not isinstance(tmpl, template.DefTemplate): @@ -838,6 +923,7 @@ def _render_context(tmpl, callable_, context, *args, **kwargs): (inherit, lclcontext) = _populate_self_namespace(context, tmpl.parent) _exec_template(callable_, context, args=args, kwargs=kwargs) + def _exec_template(callable_, context, args=None, kwargs=None): """execute a rendering callable given the callable, a Context, and optional explicit arguments @@ -847,8 +933,9 @@ def _exec_template(callable_, context, args=None, kwargs=None): be interpreted here. """ template = context._with_template - if template is not None and \ - (template.format_exceptions or template.error_handler): + if template is not None and ( + template.format_exceptions or template.error_handler + ): try: callable_(context, *args, **kwargs) except Exception: @@ -859,6 +946,7 @@ def _exec_template(callable_, context, args=None, kwargs=None): else: callable_(context, *args, **kwargs) + def _render_error(template, context, error): if template.error_handler: result = template.error_handler(context, error) @@ -868,11 +956,15 @@ def _render_error(template, context, error): error_template = exceptions.html_error_template() if context._outputting_as_unicode: context._buffer_stack[:] = [ - util.FastEncodingBuffer(as_unicode=True)] + util.FastEncodingBuffer(as_unicode=True) + ] else: - context._buffer_stack[:] = [util.FastEncodingBuffer( - error_template.output_encoding, - error_template.encoding_errors)] + context._buffer_stack[:] = [ + util.FastEncodingBuffer( + error_template.output_encoding, + error_template.encoding_errors, + ) + ] context._set_with_template(error_template) error_template.render_context(context, error=error) diff --git a/lib/mako/template.py b/lib/mako/template.py index fb610628..5ed23204 100644 --- a/lib/mako/template.py +++ b/lib/mako/template.py @@ -1,5 +1,5 @@ # mako/template.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php @@ -7,20 +7,27 @@ """Provides the Template class, a facade for parsing, generating and executing template strings, as well as template runtime operations.""" -from mako.lexer import Lexer -from mako import runtime, util, exceptions, codegen, cache, compat +import json import os import re import shutil import stat -import sys import tempfile import types import weakref +from mako import cache +from mako import codegen +from mako import compat +from mako import exceptions +from mako import runtime +from mako import util +from mako.lexer import Lexer + class Template(object): - """Represents a compiled template. + + r"""Represents a compiled template. :class:`.Template` includes a reference to the original template source (via the :attr:`.source` attribute) @@ -108,6 +115,11 @@ class Template(object): completes. Is used to provide custom error-rendering functions. + .. seealso:: + + :paramref:`.Template.include_error_handler` - include-specific + error handler function + :param format_exceptions: if ``True``, exceptions which occur during the render phase of this template will be caught and formatted into an HTML error page, which then becomes the @@ -128,6 +140,16 @@ class Template(object): import will not appear as the first executed statement in the generated code and will therefore not have the desired effect. + :param include_error_handler: An error handler that runs when this template + is included within another one via the ``<%include>`` tag, and raises an + error. Compare to the :paramref:`.Template.error_handler` option. + + .. versionadded:: 1.0.6 + + .. seealso:: + + :paramref:`.Template.error_handler` - top-level error handler function + :param input_encoding: Encoding of the template's source code. Can be used in lieu of the coding comment. See :ref:`usage_unicode` as well as :ref:`unicode_toplevel` for @@ -214,40 +236,43 @@ class Template(object): lexer_cls = Lexer - def __init__(self, - text=None, - filename=None, - uri=None, - format_exceptions=False, - error_handler=None, - lookup=None, - output_encoding=None, - encoding_errors='strict', - module_directory=None, - cache_args=None, - cache_impl='beaker', - cache_enabled=True, - cache_type=None, - cache_dir=None, - cache_url=None, - module_filename=None, - input_encoding=None, - disable_unicode=False, - module_writer=None, - bytestring_passthrough=False, - default_filters=None, - buffer_filters=(), - strict_undefined=False, - imports=None, - future_imports=None, - enable_loop=True, - preprocessor=None, - lexer_cls=None): + def __init__( + self, + text=None, + filename=None, + uri=None, + format_exceptions=False, + error_handler=None, + lookup=None, + output_encoding=None, + encoding_errors="strict", + module_directory=None, + cache_args=None, + cache_impl="beaker", + cache_enabled=True, + cache_type=None, + cache_dir=None, + cache_url=None, + module_filename=None, + input_encoding=None, + disable_unicode=False, + module_writer=None, + bytestring_passthrough=False, + default_filters=None, + buffer_filters=(), + strict_undefined=False, + imports=None, + future_imports=None, + enable_loop=True, + preprocessor=None, + lexer_cls=None, + include_error_handler=None, + ): if uri: - self.module_id = re.sub(r'\W', "_", uri) + self.module_id = re.sub(r"\W", "_", uri) self.uri = uri elif filename: - self.module_id = re.sub(r'\W', "_", filename) + self.module_id = re.sub(r"\W", "_", filename) drive, path = os.path.splitdrive(filename) path = os.path.normpath(path).replace(os.path.sep, "/") self.uri = path @@ -261,9 +286,10 @@ class Template(object): u_norm = os.path.normpath(u_norm) if u_norm.startswith(".."): raise exceptions.TemplateLookupException( - "Template uri \"%s\" is invalid - " - "it cannot be relative outside " - "of the root path." % self.uri) + 'Template uri "%s" is invalid - ' + "it cannot be relative outside " + "of the root path." % self.uri + ) self.input_encoding = input_encoding self.output_encoding = output_encoding @@ -276,17 +302,18 @@ class Template(object): if compat.py3k and disable_unicode: raise exceptions.UnsupportedError( - "Mako for Python 3 does not " - "support disabling Unicode") + "Mako for Python 3 does not " "support disabling Unicode" + ) elif output_encoding and disable_unicode: raise exceptions.UnsupportedError( - "output_encoding must be set to " - "None when disable_unicode is used.") + "output_encoding must be set to " + "None when disable_unicode is used." + ) if default_filters is None: if compat.py3k or self.disable_unicode: - self.default_filters = ['str'] + self.default_filters = ["str"] else: - self.default_filters = ['unicode'] + self.default_filters = ["unicode"] else: self.default_filters = default_filters self.buffer_filters = buffer_filters @@ -303,7 +330,7 @@ class Template(object): (code, module) = _compile_text(self, text, filename) self._code = code self._source = text - ModuleInfo(module, None, self, filename, code, text) + ModuleInfo(module, None, self, filename, code, text, uri) elif filename is not None: # if template filename and a module directory, load # a filesystem-based module file, generating if needed @@ -311,43 +338,53 @@ class Template(object): path = module_filename elif module_directory is not None: path = os.path.abspath( - os.path.join( - os.path.normpath(module_directory), - u_norm + ".py" - ) - ) + os.path.join( + os.path.normpath(module_directory), u_norm + ".py" + ) + ) else: path = None module = self._compile_from_file(path, filename) else: raise exceptions.RuntimeException( - "Template requires text or filename") + "Template requires text or filename" + ) self.module = module self.filename = filename self.callable_ = self.module.render_body self.format_exceptions = format_exceptions self.error_handler = error_handler + self.include_error_handler = include_error_handler self.lookup = lookup self.module_directory = module_directory self._setup_cache_args( - cache_impl, cache_enabled, cache_args, - cache_type, cache_dir, cache_url + cache_impl, + cache_enabled, + cache_args, + cache_type, + cache_dir, + cache_url, ) - @util.memoized_property def reserved_names(self): if self.enable_loop: return codegen.RESERVED_NAMES else: - return codegen.RESERVED_NAMES.difference(['loop']) + return codegen.RESERVED_NAMES.difference(["loop"]) - def _setup_cache_args(self, - cache_impl, cache_enabled, cache_args, - cache_type, cache_dir, cache_url): + def _setup_cache_args( + self, + cache_impl, + cache_enabled, + cache_args, + cache_type, + cache_dir, + cache_url, + ): self.cache_impl = cache_impl self.cache_enabled = cache_enabled if cache_args: @@ -357,49 +394,40 @@ class Template(object): # transfer deprecated cache_* args if cache_type: - self.cache_args['type'] = cache_type + self.cache_args["type"] = cache_type if cache_dir: - self.cache_args['dir'] = cache_dir + self.cache_args["dir"] = cache_dir if cache_url: - self.cache_args['url'] = cache_url + self.cache_args["url"] = cache_url def _compile_from_file(self, path, filename): if path is not None: util.verify_directory(os.path.dirname(path)) filemtime = os.stat(filename)[stat.ST_MTIME] - if not os.path.exists(path) or \ - os.stat(path)[stat.ST_MTIME] < filemtime: + if ( + not os.path.exists(path) + or os.stat(path)[stat.ST_MTIME] < filemtime + ): data = util.read_file(filename) _compile_module_file( - self, - data, - filename, - path, - self.module_writer) + self, data, filename, path, self.module_writer + ) module = compat.load_module(self.module_id, path) - del sys.modules[self.module_id] if module._magic_number != codegen.MAGIC_NUMBER: data = util.read_file(filename) _compile_module_file( - self, - data, - filename, - path, - self.module_writer) + self, data, filename, path, self.module_writer + ) module = compat.load_module(self.module_id, path) - del sys.modules[self.module_id] - ModuleInfo(module, path, self, filename, None, None) + ModuleInfo(module, path, self, filename, None, None, None) else: # template filename and no module directory, compile code # in memory data = util.read_file(filename) - code, module = _compile_text( - self, - data, - filename) + code, module = _compile_text(self, data, filename) self._source = None self._code = code - ModuleInfo(module, None, self, filename, code, None) + ModuleInfo(module, None, self, filename, code, None, None) return module @property @@ -420,13 +448,15 @@ class Template(object): @property def cache_dir(self): - return self.cache_args['dir'] + return self.cache_args["dir"] + @property def cache_url(self): - return self.cache_args['url'] + return self.cache_args["url"] + @property def cache_type(self): - return self.cache_args['type'] + return self.cache_args["type"] def render(self, *args, **data): """Render the output of this template as a string. @@ -445,11 +475,9 @@ class Template(object): def render_unicode(self, *args, **data): """Render the output of this template as a unicode object.""" - return runtime._render(self, - self.callable_, - args, - data, - as_unicode=True) + return runtime._render( + self, self.callable_, args, data, as_unicode=True + ) def render_context(self, context, *args, **kwargs): """Render this :class:`.Template` with the given context. @@ -457,13 +485,9 @@ class Template(object): The data is written to the context's buffer. """ - if getattr(context, '_with_template', None) is None: + if getattr(context, "_with_template", None) is None: context._set_with_template(self) - runtime._render_context(self, - self.callable_, - context, - *args, - **kwargs) + runtime._render_context(self, self.callable_, context, *args, **kwargs) def has_def(self, name): return hasattr(self.module, "render_%s" % name) @@ -473,6 +497,14 @@ class Template(object): return DefTemplate(self, getattr(self.module, "render_%s" % name)) + def list_defs(self): + """return a list of defs in the template. + + .. versionadded:: 1.0.4 + + """ + return [i[7:] for i in dir(self.module) if i[:7] == "render_"] + def _get_def_callable(self, name): return getattr(self.module, "render_%s" % name) @@ -480,44 +512,49 @@ class Template(object): def last_modified(self): return self.module._modified_time + class ModuleTemplate(Template): + """A Template which is constructed given an existing Python module. - e.g.:: + e.g.:: - t = Template("this is a template") - f = file("mymodule.py", "w") - f.write(t.code) - f.close() + t = Template("this is a template") + f = file("mymodule.py", "w") + f.write(t.code) + f.close() - import mymodule + import mymodule - t = ModuleTemplate(mymodule) - print t.render() + t = ModuleTemplate(mymodule) + print(t.render()) """ - def __init__(self, module, - module_filename=None, - template=None, - template_filename=None, - module_source=None, - template_source=None, - output_encoding=None, - encoding_errors='strict', - disable_unicode=False, - bytestring_passthrough=False, - format_exceptions=False, - error_handler=None, - lookup=None, - cache_args=None, - cache_impl='beaker', - cache_enabled=True, - cache_type=None, - cache_dir=None, - cache_url=None, + def __init__( + self, + module, + module_filename=None, + template=None, + template_filename=None, + module_source=None, + template_source=None, + output_encoding=None, + encoding_errors="strict", + disable_unicode=False, + bytestring_passthrough=False, + format_exceptions=False, + error_handler=None, + lookup=None, + cache_args=None, + cache_impl="beaker", + cache_enabled=True, + cache_type=None, + cache_dir=None, + cache_url=None, + include_error_handler=None, ): - self.module_id = re.sub(r'\W', "_", module._template_uri) + self.module_id = re.sub(r"\W", "_", module._template_uri) self.uri = module._template_uri self.input_encoding = module._source_encoding self.output_encoding = output_encoding @@ -528,32 +565,43 @@ class ModuleTemplate(Template): if compat.py3k and disable_unicode: raise exceptions.UnsupportedError( - "Mako for Python 3 does not " - "support disabling Unicode") + "Mako for Python 3 does not " "support disabling Unicode" + ) elif output_encoding and disable_unicode: raise exceptions.UnsupportedError( - "output_encoding must be set to " - "None when disable_unicode is used.") + "output_encoding must be set to " + "None when disable_unicode is used." + ) self.module = module self.filename = template_filename - ModuleInfo(module, - module_filename, - self, - template_filename, - module_source, - template_source) + ModuleInfo( + module, + module_filename, + self, + template_filename, + module_source, + template_source, + module._template_uri, + ) self.callable_ = self.module.render_body self.format_exceptions = format_exceptions self.error_handler = error_handler + self.include_error_handler = include_error_handler self.lookup = lookup self._setup_cache_args( - cache_impl, cache_enabled, cache_args, - cache_type, cache_dir, cache_url + cache_impl, + cache_enabled, + cache_args, + cache_type, + cache_dir, + cache_url, ) + class DefTemplate(Template): + """A :class:`.Template` which represents a callable def in a parent template.""" @@ -565,6 +613,7 @@ class DefTemplate(Template): self.encoding_errors = parent.encoding_errors self.format_exceptions = parent.format_exceptions self.error_handler = parent.error_handler + self.include_error_handler = parent.include_error_handler self.enable_loop = parent.enable_loop self.lookup = parent.lookup self.bytestring_passthrough = parent.bytestring_passthrough @@ -572,26 +621,33 @@ class DefTemplate(Template): def get_def(self, name): return self.parent.get_def(name) + class ModuleInfo(object): + """Stores information about a module currently loaded into memory, provides reverse lookups of template source, module source code based on a module's identifier. """ + _modules = weakref.WeakValueDictionary() - def __init__(self, - module, - module_filename, - template, - template_filename, - module_source, - template_source): + def __init__( + self, + module, + module_filename, + template, + template_filename, + module_source, + template_source, + template_uri, + ): self.module = module self.module_filename = module_filename self.template_filename = template_filename self.module_source = module_source self.template_source = template_source + self.template_uri = template_uri self._modules[module.__name__] = template._mmarker = self if module_filename: self._modules[module_filename] = self @@ -599,14 +655,15 @@ class ModuleInfo(object): @classmethod def get_module_source_metadata(cls, module_source, full_line_map=False): source_map = re.search( - r"__M_BEGIN_METADATA(.+?)__M_END_METADATA", - module_source, re.S).group(1) - source_map = compat.json.loads(source_map) - source_map['line_map'] = dict((int(k), int(v)) - for k, v in source_map['line_map'].items()) + r"__M_BEGIN_METADATA(.+?)__M_END_METADATA", module_source, re.S + ).group(1) + source_map = json.loads(source_map) + source_map["line_map"] = dict( + (int(k), int(v)) for k, v in source_map["line_map"].items() + ) if full_line_map: - f_line_map = source_map['full_line_map'] = [] - line_map = source_map['line_map'] + f_line_map = source_map["full_line_map"] = [] + line_map = source_map["line_map"] curr_templ_line = 1 for mod_line in range(1, max(line_map)): @@ -625,10 +682,12 @@ class ModuleInfo(object): @property def source(self): if self.template_source is not None: - if self.module._source_encoding and \ - not isinstance(self.template_source, compat.text_type): + if self.module._source_encoding and not isinstance( + self.template_source, compat.text_type + ): return self.template_source.decode( - self.module._source_encoding) + self.module._source_encoding + ) else: return self.template_source else: @@ -638,49 +697,61 @@ class ModuleInfo(object): else: return data + def _compile(template, text, filename, generate_magic_comment): - lexer = template.lexer_cls(text, - filename, - disable_unicode=template.disable_unicode, - input_encoding=template.input_encoding, - preprocessor=template.preprocessor) + lexer = template.lexer_cls( + text, + filename, + disable_unicode=template.disable_unicode, + input_encoding=template.input_encoding, + preprocessor=template.preprocessor, + ) node = lexer.parse() - source = codegen.compile(node, - template.uri, - filename, - default_filters=template.default_filters, - buffer_filters=template.buffer_filters, - imports=template.imports, - future_imports=template.future_imports, - source_encoding=lexer.encoding, - generate_magic_comment=generate_magic_comment, - disable_unicode=template.disable_unicode, - strict_undefined=template.strict_undefined, - enable_loop=template.enable_loop, - reserved_names=template.reserved_names) + source = codegen.compile( + node, + template.uri, + filename, + default_filters=template.default_filters, + buffer_filters=template.buffer_filters, + imports=template.imports, + future_imports=template.future_imports, + source_encoding=lexer.encoding, + generate_magic_comment=generate_magic_comment, + disable_unicode=template.disable_unicode, + strict_undefined=template.strict_undefined, + enable_loop=template.enable_loop, + reserved_names=template.reserved_names, + ) return source, lexer + def _compile_text(template, text, filename): identifier = template.module_id - source, lexer = _compile(template, text, filename, - generate_magic_comment=template.disable_unicode) + source, lexer = _compile( + template, + text, + filename, + generate_magic_comment=template.disable_unicode, + ) cid = identifier if not compat.py3k and isinstance(cid, compat.text_type): cid = cid.encode() module = types.ModuleType(cid) - code = compile(source, cid, 'exec') + code = compile(source, cid, "exec") # this exec() works for 2.4->3.3. exec(code, module.__dict__, module.__dict__) return (source, module) + def _compile_module_file(template, text, filename, outputpath, module_writer): - source, lexer = _compile(template, text, filename, - generate_magic_comment=True) + source, lexer = _compile( + template, text, filename, generate_magic_comment=True + ) if isinstance(source, compat.text_type): - source = source.encode(lexer.encoding or 'ascii') + source = source.encode(lexer.encoding or "ascii") if module_writer: module_writer(source, outputpath) @@ -694,12 +765,13 @@ def _compile_module_file(template, text, filename, outputpath, module_writer): os.close(dest) shutil.move(name, outputpath) + def _get_module_info_from_callable(callable_): if compat.py3k: - return _get_module_info(callable_.__globals__['__name__']) + return _get_module_info(callable_.__globals__["__name__"]) else: - return _get_module_info(callable_.func_globals['__name__']) + return _get_module_info(callable_.func_globals["__name__"]) + def _get_module_info(filename): return ModuleInfo._modules[filename] - diff --git a/lib/mako/util.py b/lib/mako/util.py index cba2ab79..16e3c726 100644 --- a/lib/mako/util.py +++ b/lib/mako/util.py @@ -1,15 +1,20 @@ # mako/util.py -# Copyright (C) 2006-2015 the Mako authors and contributors +# Copyright 2006-2020 the Mako authors and contributors # # This module is part of Mako and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php +from __future__ import absolute_import -import re -import collections +from ast import parse import codecs -import os -from mako import compat +import collections import operator +import os +import re +import timeit + +from mako import compat + def update_wrapper(decorated, fn): decorated.__wrapped__ = fn @@ -27,16 +32,16 @@ class PluginLoader(object): return self.impls[name]() else: import pkg_resources - for impl in pkg_resources.iter_entry_points( - self.group, - name): + + for impl in pkg_resources.iter_entry_points(self.group, name): self.impls[name] = impl.load return impl.load() else: from mako import exceptions + raise exceptions.RuntimeException( - "Can't load plugin %s %s" % - (self.group, name)) + "Can't load plugin %s %s" % (self.group, name) + ) def register(self, name, modulepath, objname): def load(): @@ -44,21 +49,24 @@ class PluginLoader(object): for token in modulepath.split(".")[1:]: mod = getattr(mod, token) return getattr(mod, objname) + self.impls[name] = load -def verify_directory(dir): + +def verify_directory(dir_): """create and/or verify a filesystem directory.""" tries = 0 - while not os.path.exists(dir): + while not os.path.exists(dir_): try: tries += 1 - os.makedirs(dir, compat.octal("0775")) + os.makedirs(dir_, compat.octal("0775")) except: if tries > 5: raise + def to_list(x, default=None): if x is None: return default @@ -69,7 +77,9 @@ def to_list(x, default=None): class memoized_property(object): + """A read-only @property that is only evaluated once.""" + def __init__(self, fget, doc=None): self.fget = fget self.__doc__ = doc or fget.__doc__ @@ -81,7 +91,9 @@ class memoized_property(object): obj.__dict__[self.__name__] = result = self.fget(obj) return result + class memoized_instancemethod(object): + """Decorate a method memoize its return value. Best applied to no-arg methods: memoization is not sensitive to @@ -89,6 +101,7 @@ class memoized_instancemethod(object): called with different arguments. """ + def __init__(self, fget, doc=None): self.fget = fget self.__doc__ = doc or fget.__doc__ @@ -97,19 +110,27 @@ class memoized_instancemethod(object): def __get__(self, obj, cls): if obj is None: return self + def oneshot(*args, **kw): result = self.fget(obj, *args, **kw) - memo = lambda *a, **kw: result + + def memo(*a, **kw): + return result + memo.__name__ = self.__name__ memo.__doc__ = self.__doc__ obj.__dict__[self.__name__] = memo return result + oneshot.__name__ = self.__name__ oneshot.__doc__ = self.__doc__ return oneshot + class SetLikeDict(dict): + """a dictionary that has some setlike methods on it""" + def union(self, other): """produce a 'union' of this dict and another (at the key level). @@ -118,17 +139,19 @@ class SetLikeDict(dict): x.update(other) return x + class FastEncodingBuffer(object): + """a very rudimentary buffer that is faster than StringIO, but doesn't crash on unicode data like cStringIO.""" - def __init__(self, encoding=None, errors='strict', as_unicode=False): + def __init__(self, encoding=None, errors="strict", as_unicode=False): self.data = collections.deque() self.encoding = encoding if as_unicode: - self.delim = compat.u('') + self.delim = compat.u("") else: - self.delim = '' + self.delim = "" self.as_unicode = as_unicode self.errors = errors self.write = self.data.append @@ -139,12 +162,15 @@ class FastEncodingBuffer(object): def getvalue(self): if self.encoding: - return self.delim.join(self.data).encode(self.encoding, - self.errors) + return self.delim.join(self.data).encode( + self.encoding, self.errors + ) else: return self.delim.join(self.data) + class LRUCache(dict): + """A dictionary-like object that stores a limited number of items, discarding lesser used items periodically. @@ -157,17 +183,18 @@ class LRUCache(dict): def __init__(self, key, value): self.key = key self.value = value - self.timestamp = compat.time_func() + self.timestamp = timeit.default_timer() + def __repr__(self): return repr(self.value) - def __init__(self, capacity, threshold=.5): + def __init__(self, capacity, threshold=0.5): self.capacity = capacity self.threshold = threshold def __getitem__(self, key): item = dict.__getitem__(self, key) - item.timestamp = compat.time_func() + item.timestamp = timeit.default_timer() return item.value def values(self): @@ -191,9 +218,12 @@ class LRUCache(dict): def _manage_size(self): while len(self) > self.capacity + self.capacity * self.threshold: - bytime = sorted(dict.values(self), - key=operator.attrgetter('timestamp'), reverse=True) - for item in bytime[self.capacity:]: + bytime = sorted( + dict.values(self), + key=operator.attrgetter("timestamp"), + reverse=True, + ) + for item in bytime[self.capacity :]: try: del self[item.key] except KeyError: @@ -201,10 +231,12 @@ class LRUCache(dict): # broke in on us. loop around and try again break + # Regexp to match python magic encoding line _PYTHON_MAGIC_COMMENT_re = re.compile( - r'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', - re.VERBOSE) + r"[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)", re.VERBOSE +) + def parse_encoding(fp): """Deduce the encoding of a Python source file (binary mode) from magic @@ -222,13 +254,12 @@ def parse_encoding(fp): line1 = fp.readline() has_bom = line1.startswith(codecs.BOM_UTF8) if has_bom: - line1 = line1[len(codecs.BOM_UTF8):] + line1 = line1[len(codecs.BOM_UTF8) :] - m = _PYTHON_MAGIC_COMMENT_re.match(line1.decode('ascii', 'ignore')) + m = _PYTHON_MAGIC_COMMENT_re.match(line1.decode("ascii", "ignore")) if not m: try: - import parser - parser.suite(line1.decode('ascii', 'ignore')) + parse(line1.decode("ascii", "ignore")) except (ImportError, SyntaxError): # Either it's a real syntax error, in which case the source # is not valid python source, or line2 is a continuation of @@ -238,13 +269,16 @@ def parse_encoding(fp): else: line2 = fp.readline() m = _PYTHON_MAGIC_COMMENT_re.match( - line2.decode('ascii', 'ignore')) + line2.decode("ascii", "ignore") + ) if has_bom: if m: - raise SyntaxError("python refuses to compile code with both a UTF8" \ - " byte-order-mark and a magic encoding comment") - return 'utf_8' + raise SyntaxError( + "python refuses to compile code with both a UTF8" + " byte-order-mark and a magic encoding comment" + ) + return "utf_8" elif m: return m.group(1) else: @@ -252,6 +286,7 @@ def parse_encoding(fp): finally: fp.seek(pos) + def sorted_dict_repr(d): """repr() a dictionary with the keys in order. @@ -262,14 +297,16 @@ def sorted_dict_repr(d): keys.sort() return "{" + ", ".join(["%r: %r" % (k, d[k]) for k in keys]) + "}" + def restore__ast(_ast): """Attempt to restore the required classes to the _ast module if it appears to be missing them """ - if hasattr(_ast, 'AST'): + if hasattr(_ast, "AST"): return _ast.PyCF_ONLY_AST = 2 << 9 - m = compile("""\ + m = compile( + """\ def foo(): pass class Bar(object): pass if False: pass @@ -282,13 +319,17 @@ baz = 'mako' baz and 'foo' or 'bar' (mako is baz == baz) is not baz != mako mako > baz < mako >= baz <= mako -mako in baz not in mako""", '', 'exec', _ast.PyCF_ONLY_AST) +mako in baz not in mako""", + "", + "exec", + _ast.PyCF_ONLY_AST, + ) _ast.Module = type(m) for cls in _ast.Module.__mro__: - if cls.__name__ == 'mod': + if cls.__name__ == "mod": _ast.mod = cls - elif cls.__name__ == 'AST': + elif cls.__name__ == "AST": _ast.AST = cls _ast.FunctionDef = type(m.body[0]) @@ -338,8 +379,7 @@ mako in baz not in mako""", '', 'exec', _ast.PyCF_ONLY_AST) _ast.NotIn = type(m.body[12].value.ops[1]) - -def read_file(path, mode='rb'): +def read_file(path, mode="rb"): fp = open(path, mode) try: data = fp.read() @@ -347,6 +387,7 @@ def read_file(path, mode='rb'): finally: fp.close() + def read_python_file(path): fp = open(path, "rb") try: @@ -357,4 +398,3 @@ def read_python_file(path): return data finally: fp.close() - diff --git a/lib/mediafile.py b/lib/mediafile.py new file mode 100644 index 00000000..ca3747de --- /dev/null +++ b/lib/mediafile.py @@ -0,0 +1,2370 @@ +# -*- coding: utf-8 -*- +# This file is part of MediaFile. +# Copyright 2016, Adrian Sampson. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +"""Handles low-level interfacing for files' tags. Wraps Mutagen to +automatically detect file types and provide a unified interface for a +useful subset of music files' tags. + +Usage: + + >>> f = MediaFile('Lucy.mp3') + >>> f.title + u'Lucy in the Sky with Diamonds' + >>> f.artist = 'The Beatles' + >>> f.save() + +A field will always return a reasonable value of the correct type, even +if no tag is present. If no value is available, the value will be false +(e.g., zero or the empty string). + +Internally ``MediaFile`` uses ``MediaField`` descriptors to access the +data from the tags. In turn ``MediaField`` uses a number of +``StorageStyle`` strategies to handle format specific logic. +""" +from __future__ import division, absolute_import, print_function + +import mutagen +import mutagen.id3 +import mutagen.mp3 +import mutagen.mp4 +import mutagen.flac +import mutagen.asf +import mutagen._util + +import base64 +import binascii +import codecs +import datetime +import enum +import functools +import imghdr +import logging +import math +import os +import re +import six +import struct +import traceback + + +__version__ = '0.9.0' +__all__ = ['UnreadableFileError', 'FileTypeError', 'MediaFile'] + +log = logging.getLogger(__name__) + +# Human-readable type names. +TYPES = { + 'mp3': 'MP3', + 'aac': 'AAC', + 'alac': 'ALAC', + 'ogg': 'OGG', + 'opus': 'Opus', + 'flac': 'FLAC', + 'ape': 'APE', + 'wv': 'WavPack', + 'mpc': 'Musepack', + 'asf': 'Windows Media', + 'aiff': 'AIFF', + 'dsf': 'DSD Stream File', + 'wav': 'WAVE', +} + +PREFERRED_IMAGE_EXTENSIONS = {'jpeg': 'jpg'} + + +# Exceptions. + +class UnreadableFileError(Exception): + """Mutagen is not able to extract information from the file. + """ + def __init__(self, filename, msg): + Exception.__init__(self, msg if msg else repr(filename)) + + +class FileTypeError(UnreadableFileError): + """Reading this type of file is not supported. + + If passed the `mutagen_type` argument this indicates that the + mutagen type is not supported by `Mediafile`. + """ + def __init__(self, filename, mutagen_type=None): + if mutagen_type is None: + msg = u'{0!r}: not in a recognized format'.format(filename) + else: + msg = u'{0}: of mutagen type {1}'.format( + repr(filename), mutagen_type + ) + Exception.__init__(self, msg) + + +class MutagenError(UnreadableFileError): + """Raised when Mutagen fails unexpectedly---probably due to a bug. + """ + def __init__(self, filename, mutagen_exc): + msg = u'{0}: {1}'.format(repr(filename), mutagen_exc) + Exception.__init__(self, msg) + + +# Interacting with Mutagen. + + +def mutagen_call(action, filename, func, *args, **kwargs): + """Call a Mutagen function with appropriate error handling. + + `action` is a string describing what the function is trying to do, + and `filename` is the relevant filename. The rest of the arguments + describe the callable to invoke. + + We require at least Mutagen 1.33, where `IOError` is *never* used, + neither for internal parsing errors *nor* for ordinary IO error + conditions such as a bad filename. Mutagen-specific parsing errors and IO + errors are reraised as `UnreadableFileError`. Other exceptions + raised inside Mutagen---i.e., bugs---are reraised as `MutagenError`. + """ + try: + return func(*args, **kwargs) + except mutagen.MutagenError as exc: + log.debug(u'%s failed: %s', action, six.text_type(exc)) + raise UnreadableFileError(filename, six.text_type(exc)) + except UnreadableFileError: + # Reraise our errors without changes. + # Used in case of decorating functions (e.g. by `loadfile`). + raise + except Exception as exc: + # Isolate bugs in Mutagen. + log.debug(u'%s', traceback.format_exc()) + log.error(u'uncaught Mutagen exception in %s: %s', action, exc) + raise MutagenError(filename, exc) + + +def loadfile(method=True, writable=False, create=False): + """A decorator that works like `mutagen._util.loadfile` but with + additional error handling. + + Opens a file and passes a `mutagen._utils.FileThing` to the + decorated function. Should be used as a decorator for functions + using a `filething` parameter. + """ + def decorator(func): + f = mutagen._util.loadfile(method, writable, create)(func) + + @functools.wraps(func) + def wrapper(*args, **kwargs): + return mutagen_call('loadfile', '', f, *args, **kwargs) + return wrapper + return decorator + + +# Utility. + +def _update_filething(filething): + """Reopen a `filething` if it's a local file. + + A filething that is *not* an actual file is left unchanged; a + filething with a filename is reopened and a new object is returned. + """ + if filething.filename: + return mutagen._util.FileThing( + None, filething.filename, filething.name + ) + else: + return filething + + +def _safe_cast(out_type, val): + """Try to covert val to out_type but never raise an exception. + + If the value does not exist, return None. Or, if the value + can't be converted, then a sensible default value is returned. + out_type should be bool, int, or unicode; otherwise, the value + is just passed through. + """ + if val is None: + return None + + if out_type == int: + if isinstance(val, int) or isinstance(val, float): + # Just a number. + return int(val) + else: + # Process any other type as a string. + if isinstance(val, bytes): + val = val.decode('utf-8', 'ignore') + elif not isinstance(val, six.string_types): + val = six.text_type(val) + # Get a number from the front of the string. + match = re.match(r'[\+-]?[0-9]+', val.strip()) + return int(match.group(0)) if match else 0 + + elif out_type == bool: + try: + # Should work for strings, bools, ints: + return bool(int(val)) + except ValueError: + return False + + elif out_type == six.text_type: + if isinstance(val, bytes): + return val.decode('utf-8', 'ignore') + elif isinstance(val, six.text_type): + return val + else: + return six.text_type(val) + + elif out_type == float: + if isinstance(val, int) or isinstance(val, float): + return float(val) + else: + if isinstance(val, bytes): + val = val.decode('utf-8', 'ignore') + else: + val = six.text_type(val) + match = re.match(r'[\+-]?([0-9]+\.?[0-9]*|[0-9]*\.[0-9]+)', + val.strip()) + if match: + val = match.group(0) + if val: + return float(val) + return 0.0 + + else: + return val + + +# Image coding for ASF/WMA. + +def _unpack_asf_image(data): + """Unpack image data from a WM/Picture tag. Return a tuple + containing the MIME type, the raw image data, a type indicator, and + the image's description. + + This function is treated as "untrusted" and could throw all manner + of exceptions (out-of-bounds, etc.). We should clean this up + sometime so that the failure modes are well-defined. + """ + type, size = struct.unpack_from(' 0: + gain = math.log10(maxgain / 1000.0) * -10 + else: + # Invalid gain value found. + gain = 0.0 + + # SoundCheck stores peak values as the actual value of the sample, + # and again separately for the left and right channels. We need to + # convert this to a percentage of full scale, which is 32768 for a + # 16 bit sample. Once again, we play it safe by using the larger of + # the two values. + peak = max(soundcheck[6:8]) / 32768.0 + + return round(gain, 2), round(peak, 6) + + +def _sc_encode(gain, peak): + """Encode ReplayGain gain/peak values as a Sound Check string. + """ + # SoundCheck stores the peak value as the actual value of the + # sample, rather than the percentage of full scale that RG uses, so + # we do a simple conversion assuming 16 bit samples. + peak *= 32768.0 + + # SoundCheck stores absolute RMS values in some unknown units rather + # than the dB values RG uses. We can calculate these absolute values + # from the gain ratio using a reference value of 1000 units. We also + # enforce the maximum and minimum value here, which is equivalent to + # about -18.2dB and 30.0dB. + g1 = int(min(round((10 ** (gain / -10)) * 1000), 65534)) or 1 + # Same as above, except our reference level is 2500 units. + g2 = int(min(round((10 ** (gain / -10)) * 2500), 65534)) or 1 + + # The purpose of these values are unknown, but they also seem to be + # unused so we just use zero. + uk = 0 + values = (g1, g1, g2, g2, uk, uk, int(peak), int(peak), uk, uk) + return (u' %08X' * 10) % values + + +# Cover art and other images. +def _imghdr_what_wrapper(data): + """A wrapper around imghdr.what to account for jpeg files that can only be + identified as such using their magic bytes + See #1545 + See https://github.com/file/file/blob/master/magic/Magdir/jpeg#L12 + """ + # imghdr.what returns none for jpegs with only the magic bytes, so + # _wider_test_jpeg is run in that case. It still returns None if it didn't + # match such a jpeg file. + return imghdr.what(None, h=data) or _wider_test_jpeg(data) + + +def _wider_test_jpeg(data): + """Test for a jpeg file following the UNIX file implementation which + uses the magic bytes rather than just looking for the bytes that + represent 'JFIF' or 'EXIF' at a fixed position. + """ + if data[:2] == b'\xff\xd8': + return 'jpeg' + + +def image_mime_type(data): + """Return the MIME type of the image data (a bytestring). + """ + # This checks for a jpeg file with only the magic bytes (unrecognized by + # imghdr.what). imghdr.what returns none for that type of file, so + # _wider_test_jpeg is run in that case. It still returns None if it didn't + # match such a jpeg file. + kind = _imghdr_what_wrapper(data) + if kind in ['gif', 'jpeg', 'png', 'tiff', 'bmp']: + return 'image/{0}'.format(kind) + elif kind == 'pgm': + return 'image/x-portable-graymap' + elif kind == 'pbm': + return 'image/x-portable-bitmap' + elif kind == 'ppm': + return 'image/x-portable-pixmap' + elif kind == 'xbm': + return 'image/x-xbitmap' + else: + return 'image/x-{0}'.format(kind) + + +def image_extension(data): + ext = _imghdr_what_wrapper(data) + return PREFERRED_IMAGE_EXTENSIONS.get(ext, ext) + + +class ImageType(enum.Enum): + """Indicates the kind of an `Image` stored in a file's tag. + """ + other = 0 + icon = 1 + other_icon = 2 + front = 3 + back = 4 + leaflet = 5 + media = 6 + lead_artist = 7 + artist = 8 + conductor = 9 + group = 10 + composer = 11 + lyricist = 12 + recording_location = 13 + recording_session = 14 + performance = 15 + screen_capture = 16 + fish = 17 + illustration = 18 + artist_logo = 19 + publisher_logo = 20 + + +class Image(object): + """Structure representing image data and metadata that can be + stored and retrieved from tags. + + The structure has four properties. + * ``data`` The binary data of the image + * ``desc`` An optional description of the image + * ``type`` An instance of `ImageType` indicating the kind of image + * ``mime_type`` Read-only property that contains the mime type of + the binary data + """ + def __init__(self, data, desc=None, type=None): + assert isinstance(data, bytes) + if desc is not None: + assert isinstance(desc, six.text_type) + self.data = data + self.desc = desc + if isinstance(type, int): + try: + type = list(ImageType)[type] + except IndexError: + log.debug(u"ignoring unknown image type index %s", type) + type = ImageType.other + self.type = type + + @property + def mime_type(self): + if self.data: + return image_mime_type(self.data) + + @property + def type_index(self): + if self.type is None: + # This method is used when a tag format requires the type + # index to be set, so we return "other" as the default value. + return 0 + return self.type.value + + +# StorageStyle classes describe strategies for accessing values in +# Mutagen file objects. + +class StorageStyle(object): + """A strategy for storing a value for a certain tag format (or set + of tag formats). This basic StorageStyle describes simple 1:1 + mapping from raw values to keys in a Mutagen file object; subclasses + describe more sophisticated translations or format-specific access + strategies. + + MediaFile uses a StorageStyle via three methods: ``get()``, + ``set()``, and ``delete()``. It passes a Mutagen file object to + each. + + Internally, the StorageStyle implements ``get()`` and ``set()`` + using two steps that may be overridden by subtypes. To get a value, + the StorageStyle first calls ``fetch()`` to retrieve the value + corresponding to a key and then ``deserialize()`` to convert the raw + Mutagen value to a consumable Python value. Similarly, to set a + field, we call ``serialize()`` to encode the value and then + ``store()`` to assign the result into the Mutagen object. + + Each StorageStyle type has a class-level `formats` attribute that is + a list of strings indicating the formats that the style applies to. + MediaFile only uses StorageStyles that apply to the correct type for + a given audio file. + """ + + formats = ['FLAC', 'OggOpus', 'OggTheora', 'OggSpeex', 'OggVorbis', + 'OggFlac', 'APEv2File', 'WavPack', 'Musepack', 'MonkeysAudio'] + """List of mutagen classes the StorageStyle can handle. + """ + + def __init__(self, key, as_type=six.text_type, suffix=None, + float_places=2, read_only=False): + """Create a basic storage strategy. Parameters: + + - `key`: The key on the Mutagen file object used to access the + field's data. + - `as_type`: The Python type that the value is stored as + internally (`unicode`, `int`, `bool`, or `bytes`). + - `suffix`: When `as_type` is a string type, append this before + storing the value. + - `float_places`: When the value is a floating-point number and + encoded as a string, the number of digits to store after the + decimal point. + - `read_only`: When true, writing to this field is disabled. + Primary use case is so wrongly named fields can be addressed + in a graceful manner. This does not block the delete method. + + """ + self.key = key + self.as_type = as_type + self.suffix = suffix + self.float_places = float_places + self.read_only = read_only + + # Convert suffix to correct string type. + if self.suffix and self.as_type is six.text_type \ + and not isinstance(self.suffix, six.text_type): + self.suffix = self.suffix.decode('utf-8') + + # Getter. + + def get(self, mutagen_file): + """Get the value for the field using this style. + """ + return self.deserialize(self.fetch(mutagen_file)) + + def fetch(self, mutagen_file): + """Retrieve the raw value of for this tag from the Mutagen file + object. + """ + try: + return mutagen_file[self.key][0] + except (KeyError, IndexError): + return None + + def deserialize(self, mutagen_value): + """Given a raw value stored on a Mutagen object, decode and + return the represented value. + """ + if self.suffix and isinstance(mutagen_value, six.text_type) \ + and mutagen_value.endswith(self.suffix): + return mutagen_value[:-len(self.suffix)] + else: + return mutagen_value + + # Setter. + + def set(self, mutagen_file, value): + """Assign the value for the field using this style. + """ + self.store(mutagen_file, self.serialize(value)) + + def store(self, mutagen_file, value): + """Store a serialized value in the Mutagen file object. + """ + mutagen_file[self.key] = [value] + + def serialize(self, value): + """Convert the external Python value to a type that is suitable for + storing in a Mutagen file object. + """ + if isinstance(value, float) and self.as_type is six.text_type: + value = u'{0:.{1}f}'.format(value, self.float_places) + value = self.as_type(value) + elif self.as_type is six.text_type: + if isinstance(value, bool): + # Store bools as 1/0 instead of True/False. + value = six.text_type(int(bool(value))) + elif isinstance(value, bytes): + value = value.decode('utf-8', 'ignore') + else: + value = six.text_type(value) + else: + value = self.as_type(value) + + if self.suffix: + value += self.suffix + + return value + + def delete(self, mutagen_file): + """Remove the tag from the file. + """ + if self.key in mutagen_file: + del mutagen_file[self.key] + + +class ListStorageStyle(StorageStyle): + """Abstract storage style that provides access to lists. + + The ListMediaField descriptor uses a ListStorageStyle via two + methods: ``get_list()`` and ``set_list()``. It passes a Mutagen file + object to each. + + Subclasses may overwrite ``fetch`` and ``store``. ``fetch`` must + return a (possibly empty) list and ``store`` receives a serialized + list of values as the second argument. + + The `serialize` and `deserialize` methods (from the base + `StorageStyle`) are still called with individual values. This class + handles packing and unpacking the values into lists. + """ + def get(self, mutagen_file): + """Get the first value in the field's value list. + """ + try: + return self.get_list(mutagen_file)[0] + except IndexError: + return None + + def get_list(self, mutagen_file): + """Get a list of all values for the field using this style. + """ + return [self.deserialize(item) for item in self.fetch(mutagen_file)] + + def fetch(self, mutagen_file): + """Get the list of raw (serialized) values. + """ + try: + return mutagen_file[self.key] + except KeyError: + return [] + + def set(self, mutagen_file, value): + """Set an individual value as the only value for the field using + this style. + """ + self.set_list(mutagen_file, [value]) + + def set_list(self, mutagen_file, values): + """Set all values for the field using this style. `values` + should be an iterable. + """ + self.store(mutagen_file, [self.serialize(value) for value in values]) + + def store(self, mutagen_file, values): + """Set the list of all raw (serialized) values for this field. + """ + mutagen_file[self.key] = values + + +class SoundCheckStorageStyleMixin(object): + """A mixin for storage styles that read and write iTunes SoundCheck + analysis values. The object must have an `index` field that + indicates which half of the gain/peak pair---0 or 1---the field + represents. + """ + def get(self, mutagen_file): + data = self.fetch(mutagen_file) + if data is not None: + return _sc_decode(data)[self.index] + + def set(self, mutagen_file, value): + data = self.fetch(mutagen_file) + if data is None: + gain_peak = [0, 0] + else: + gain_peak = list(_sc_decode(data)) + gain_peak[self.index] = value or 0 + data = self.serialize(_sc_encode(*gain_peak)) + self.store(mutagen_file, data) + + +class ASFStorageStyle(ListStorageStyle): + """A general storage style for Windows Media/ASF files. + """ + formats = ['ASF'] + + def deserialize(self, data): + if isinstance(data, mutagen.asf.ASFBaseAttribute): + data = data.value + return data + + +class MP4StorageStyle(StorageStyle): + """A general storage style for MPEG-4 tags. + """ + formats = ['MP4'] + + def serialize(self, value): + value = super(MP4StorageStyle, self).serialize(value) + if self.key.startswith('----:') and isinstance(value, six.text_type): + value = value.encode('utf-8') + return value + + +class MP4TupleStorageStyle(MP4StorageStyle): + """A style for storing values as part of a pair of numbers in an + MPEG-4 file. + """ + def __init__(self, key, index=0, **kwargs): + super(MP4TupleStorageStyle, self).__init__(key, **kwargs) + self.index = index + + def deserialize(self, mutagen_value): + items = mutagen_value or [] + packing_length = 2 + return list(items) + [0] * (packing_length - len(items)) + + def get(self, mutagen_file): + value = super(MP4TupleStorageStyle, self).get(mutagen_file)[self.index] + if value == 0: + # The values are always present and saved as integers. So we + # assume that "0" indicates it is not set. + return None + else: + return value + + def set(self, mutagen_file, value): + if value is None: + value = 0 + items = self.deserialize(self.fetch(mutagen_file)) + items[self.index] = int(value) + self.store(mutagen_file, items) + + def delete(self, mutagen_file): + if self.index == 0: + super(MP4TupleStorageStyle, self).delete(mutagen_file) + else: + self.set(mutagen_file, None) + + +class MP4ListStorageStyle(ListStorageStyle, MP4StorageStyle): + pass + + +class MP4SoundCheckStorageStyle(SoundCheckStorageStyleMixin, MP4StorageStyle): + def __init__(self, key, index=0, **kwargs): + super(MP4SoundCheckStorageStyle, self).__init__(key, **kwargs) + self.index = index + + +class MP4BoolStorageStyle(MP4StorageStyle): + """A style for booleans in MPEG-4 files. (MPEG-4 has an atom type + specifically for representing booleans.) + """ + def get(self, mutagen_file): + try: + return mutagen_file[self.key] + except KeyError: + return None + + def get_list(self, mutagen_file): + raise NotImplementedError(u'MP4 bool storage does not support lists') + + def set(self, mutagen_file, value): + mutagen_file[self.key] = value + + def set_list(self, mutagen_file, values): + raise NotImplementedError(u'MP4 bool storage does not support lists') + + +class MP4ImageStorageStyle(MP4ListStorageStyle): + """Store images as MPEG-4 image atoms. Values are `Image` objects. + """ + def __init__(self, **kwargs): + super(MP4ImageStorageStyle, self).__init__(key='covr', **kwargs) + + def deserialize(self, data): + return Image(data) + + def serialize(self, image): + if image.mime_type == 'image/png': + kind = mutagen.mp4.MP4Cover.FORMAT_PNG + elif image.mime_type == 'image/jpeg': + kind = mutagen.mp4.MP4Cover.FORMAT_JPEG + else: + raise ValueError(u'MP4 files only supports PNG and JPEG images') + return mutagen.mp4.MP4Cover(image.data, kind) + + +class MP3StorageStyle(StorageStyle): + """Store data in ID3 frames. + """ + formats = ['MP3', 'AIFF', 'DSF', 'WAVE'] + + def __init__(self, key, id3_lang=None, **kwargs): + """Create a new ID3 storage style. `id3_lang` is the value for + the language field of newly created frames. + """ + self.id3_lang = id3_lang + super(MP3StorageStyle, self).__init__(key, **kwargs) + + def fetch(self, mutagen_file): + try: + return mutagen_file[self.key].text[0] + except (KeyError, IndexError): + return None + + def store(self, mutagen_file, value): + frame = mutagen.id3.Frames[self.key](encoding=3, text=[value]) + mutagen_file.tags.setall(self.key, [frame]) + + +class MP3PeopleStorageStyle(MP3StorageStyle): + """Store list of people in ID3 frames. + """ + def __init__(self, key, involvement='', **kwargs): + self.involvement = involvement + super(MP3PeopleStorageStyle, self).__init__(key, **kwargs) + + def store(self, mutagen_file, value): + frames = mutagen_file.tags.getall(self.key) + + # Try modifying in place. + found = False + for frame in frames: + if frame.encoding == mutagen.id3.Encoding.UTF8: + for pair in frame.people: + if pair[0].lower() == self.involvement.lower(): + pair[1] = value + found = True + + # Try creating a new frame. + if not found: + frame = mutagen.id3.Frames[self.key]( + encoding=mutagen.id3.Encoding.UTF8, + people=[[self.involvement, value]] + ) + mutagen_file.tags.add(frame) + + def fetch(self, mutagen_file): + for frame in mutagen_file.tags.getall(self.key): + for pair in frame.people: + if pair[0].lower() == self.involvement.lower(): + try: + return pair[1] + except IndexError: + return None + + +class MP3ListStorageStyle(ListStorageStyle, MP3StorageStyle): + """Store lists of data in multiple ID3 frames. + """ + def fetch(self, mutagen_file): + try: + return mutagen_file[self.key].text + except KeyError: + return [] + + def store(self, mutagen_file, values): + frame = mutagen.id3.Frames[self.key](encoding=3, text=values) + mutagen_file.tags.setall(self.key, [frame]) + + +class MP3UFIDStorageStyle(MP3StorageStyle): + """Store string data in a UFID ID3 frame with a particular owner. + """ + def __init__(self, owner, **kwargs): + self.owner = owner + super(MP3UFIDStorageStyle, self).__init__('UFID:' + owner, **kwargs) + + def fetch(self, mutagen_file): + try: + return mutagen_file[self.key].data + except KeyError: + return None + + def store(self, mutagen_file, value): + # This field type stores text data as encoded data. + assert isinstance(value, six.text_type) + value = value.encode('utf-8') + + frames = mutagen_file.tags.getall(self.key) + for frame in frames: + # Replace existing frame data. + if frame.owner == self.owner: + frame.data = value + else: + # New frame. + frame = mutagen.id3.UFID(owner=self.owner, data=value) + mutagen_file.tags.setall(self.key, [frame]) + + +class MP3DescStorageStyle(MP3StorageStyle): + """Store data in a TXXX (or similar) ID3 frame. The frame is + selected based its ``desc`` field. + ``attr`` allows to specify name of data accessor property in the frame. + Most of frames use `text`. + ``multispec`` specifies if frame data is ``mutagen.id3.MultiSpec`` + which means that the data is being packed in the list. + """ + def __init__(self, desc=u'', key='TXXX', attr='text', multispec=True, + **kwargs): + assert isinstance(desc, six.text_type) + self.description = desc + self.attr = attr + self.multispec = multispec + super(MP3DescStorageStyle, self).__init__(key=key, **kwargs) + + def store(self, mutagen_file, value): + frames = mutagen_file.tags.getall(self.key) + if self.multispec: + value = [value] + + # Try modifying in place. + found = False + for frame in frames: + if frame.desc.lower() == self.description.lower(): + setattr(frame, self.attr, value) + frame.encoding = mutagen.id3.Encoding.UTF8 + found = True + + # Try creating a new frame. + if not found: + frame = mutagen.id3.Frames[self.key]( + desc=self.description, + encoding=mutagen.id3.Encoding.UTF8, + **{self.attr: value} + ) + if self.id3_lang: + frame.lang = self.id3_lang + mutagen_file.tags.add(frame) + + def fetch(self, mutagen_file): + for frame in mutagen_file.tags.getall(self.key): + if frame.desc.lower() == self.description.lower(): + if not self.multispec: + return getattr(frame, self.attr) + try: + return getattr(frame, self.attr)[0] + except IndexError: + return None + + def delete(self, mutagen_file): + found_frame = None + for frame in mutagen_file.tags.getall(self.key): + if frame.desc.lower() == self.description.lower(): + found_frame = frame + break + if found_frame is not None: + del mutagen_file[frame.HashKey] + + +class MP3ListDescStorageStyle(MP3DescStorageStyle, ListStorageStyle): + def __init__(self, desc=u'', key='TXXX', split_v23=False, **kwargs): + self.split_v23 = split_v23 + super(MP3ListDescStorageStyle, self).__init__( + desc=desc, key=key, **kwargs + ) + + def fetch(self, mutagen_file): + for frame in mutagen_file.tags.getall(self.key): + if frame.desc.lower() == self.description.lower(): + if mutagen_file.tags.version == (2, 3, 0) and self.split_v23: + return sum((el.split('/') for el in frame.text), []) + else: + return frame.text + return [] + + def store(self, mutagen_file, values): + self.delete(mutagen_file) + frame = mutagen.id3.Frames[self.key]( + desc=self.description, + text=values, + encoding=mutagen.id3.Encoding.UTF8, + ) + if self.id3_lang: + frame.lang = self.id3_lang + mutagen_file.tags.add(frame) + + +class MP3SlashPackStorageStyle(MP3StorageStyle): + """Store value as part of pair that is serialized as a slash- + separated string. + """ + def __init__(self, key, pack_pos=0, **kwargs): + super(MP3SlashPackStorageStyle, self).__init__(key, **kwargs) + self.pack_pos = pack_pos + + def _fetch_unpacked(self, mutagen_file): + data = self.fetch(mutagen_file) + if data: + items = six.text_type(data).split('/') + else: + items = [] + packing_length = 2 + return list(items) + [None] * (packing_length - len(items)) + + def get(self, mutagen_file): + return self._fetch_unpacked(mutagen_file)[self.pack_pos] + + def set(self, mutagen_file, value): + items = self._fetch_unpacked(mutagen_file) + items[self.pack_pos] = value + if items[0] is None: + items[0] = '' + if items[1] is None: + items.pop() # Do not store last value + self.store(mutagen_file, '/'.join(map(six.text_type, items))) + + def delete(self, mutagen_file): + if self.pack_pos == 0: + super(MP3SlashPackStorageStyle, self).delete(mutagen_file) + else: + self.set(mutagen_file, None) + + +class MP3ImageStorageStyle(ListStorageStyle, MP3StorageStyle): + """Converts between APIC frames and ``Image`` instances. + + The `get_list` method inherited from ``ListStorageStyle`` returns a + list of ``Image``s. Similarly, the `set_list` method accepts a + list of ``Image``s as its ``values`` argument. + """ + def __init__(self): + super(MP3ImageStorageStyle, self).__init__(key='APIC') + self.as_type = bytes + + def deserialize(self, apic_frame): + """Convert APIC frame into Image.""" + return Image(data=apic_frame.data, desc=apic_frame.desc, + type=apic_frame.type) + + def fetch(self, mutagen_file): + return mutagen_file.tags.getall(self.key) + + def store(self, mutagen_file, frames): + mutagen_file.tags.setall(self.key, frames) + + def delete(self, mutagen_file): + mutagen_file.tags.delall(self.key) + + def serialize(self, image): + """Return an APIC frame populated with data from ``image``. + """ + assert isinstance(image, Image) + frame = mutagen.id3.Frames[self.key]() + frame.data = image.data + frame.mime = image.mime_type + frame.desc = image.desc or u'' + + # For compatibility with OS X/iTunes prefer latin-1 if possible. + # See issue #899 + try: + frame.desc.encode("latin-1") + except UnicodeEncodeError: + frame.encoding = mutagen.id3.Encoding.UTF16 + else: + frame.encoding = mutagen.id3.Encoding.LATIN1 + + frame.type = image.type_index + return frame + + +class MP3SoundCheckStorageStyle(SoundCheckStorageStyleMixin, + MP3DescStorageStyle): + def __init__(self, index=0, **kwargs): + super(MP3SoundCheckStorageStyle, self).__init__(**kwargs) + self.index = index + + +class ASFImageStorageStyle(ListStorageStyle): + """Store images packed into Windows Media/ASF byte array attributes. + Values are `Image` objects. + """ + formats = ['ASF'] + + def __init__(self): + super(ASFImageStorageStyle, self).__init__(key='WM/Picture') + + def deserialize(self, asf_picture): + mime, data, type, desc = _unpack_asf_image(asf_picture.value) + return Image(data, desc=desc, type=type) + + def serialize(self, image): + pic = mutagen.asf.ASFByteArrayAttribute() + pic.value = _pack_asf_image(image.mime_type, image.data, + type=image.type_index, + description=image.desc or u'') + return pic + + +class VorbisImageStorageStyle(ListStorageStyle): + """Store images in Vorbis comments. Both legacy COVERART fields and + modern METADATA_BLOCK_PICTURE tags are supported. Data is + base64-encoded. Values are `Image` objects. + """ + formats = ['OggOpus', 'OggTheora', 'OggSpeex', 'OggVorbis', + 'OggFlac'] + + def __init__(self): + super(VorbisImageStorageStyle, self).__init__( + key='metadata_block_picture' + ) + self.as_type = bytes + + def fetch(self, mutagen_file): + images = [] + if 'metadata_block_picture' not in mutagen_file: + # Try legacy COVERART tags. + if 'coverart' in mutagen_file: + for data in mutagen_file['coverart']: + images.append(Image(base64.b64decode(data))) + return images + for data in mutagen_file["metadata_block_picture"]: + try: + pic = mutagen.flac.Picture(base64.b64decode(data)) + except (TypeError, AttributeError): + continue + images.append(Image(data=pic.data, desc=pic.desc, + type=pic.type)) + return images + + def store(self, mutagen_file, image_data): + # Strip all art, including legacy COVERART. + if 'coverart' in mutagen_file: + del mutagen_file['coverart'] + if 'coverartmime' in mutagen_file: + del mutagen_file['coverartmime'] + super(VorbisImageStorageStyle, self).store(mutagen_file, image_data) + + def serialize(self, image): + """Turn a Image into a base64 encoded FLAC picture block. + """ + pic = mutagen.flac.Picture() + pic.data = image.data + pic.type = image.type_index + pic.mime = image.mime_type + pic.desc = image.desc or u'' + + # Encoding with base64 returns bytes on both Python 2 and 3. + # Mutagen requires the data to be a Unicode string, so we decode + # it before passing it along. + return base64.b64encode(pic.write()).decode('ascii') + + +class FlacImageStorageStyle(ListStorageStyle): + """Converts between ``mutagen.flac.Picture`` and ``Image`` instances. + """ + formats = ['FLAC'] + + def __init__(self): + super(FlacImageStorageStyle, self).__init__(key='') + + def fetch(self, mutagen_file): + return mutagen_file.pictures + + def deserialize(self, flac_picture): + return Image(data=flac_picture.data, desc=flac_picture.desc, + type=flac_picture.type) + + def store(self, mutagen_file, pictures): + """``pictures`` is a list of mutagen.flac.Picture instances. + """ + mutagen_file.clear_pictures() + for pic in pictures: + mutagen_file.add_picture(pic) + + def serialize(self, image): + """Turn a Image into a mutagen.flac.Picture. + """ + pic = mutagen.flac.Picture() + pic.data = image.data + pic.type = image.type_index + pic.mime = image.mime_type + pic.desc = image.desc or u'' + return pic + + def delete(self, mutagen_file): + """Remove all images from the file. + """ + mutagen_file.clear_pictures() + + +class APEv2ImageStorageStyle(ListStorageStyle): + """Store images in APEv2 tags. Values are `Image` objects. + """ + formats = ['APEv2File', 'WavPack', 'Musepack', 'MonkeysAudio', 'OptimFROG'] + + TAG_NAMES = { + ImageType.other: 'Cover Art (other)', + ImageType.icon: 'Cover Art (icon)', + ImageType.other_icon: 'Cover Art (other icon)', + ImageType.front: 'Cover Art (front)', + ImageType.back: 'Cover Art (back)', + ImageType.leaflet: 'Cover Art (leaflet)', + ImageType.media: 'Cover Art (media)', + ImageType.lead_artist: 'Cover Art (lead)', + ImageType.artist: 'Cover Art (artist)', + ImageType.conductor: 'Cover Art (conductor)', + ImageType.group: 'Cover Art (band)', + ImageType.composer: 'Cover Art (composer)', + ImageType.lyricist: 'Cover Art (lyricist)', + ImageType.recording_location: 'Cover Art (studio)', + ImageType.recording_session: 'Cover Art (recording)', + ImageType.performance: 'Cover Art (performance)', + ImageType.screen_capture: 'Cover Art (movie scene)', + ImageType.fish: 'Cover Art (colored fish)', + ImageType.illustration: 'Cover Art (illustration)', + ImageType.artist_logo: 'Cover Art (band logo)', + ImageType.publisher_logo: 'Cover Art (publisher logo)', + } + + def __init__(self): + super(APEv2ImageStorageStyle, self).__init__(key='') + + def fetch(self, mutagen_file): + images = [] + for cover_type, cover_tag in self.TAG_NAMES.items(): + try: + frame = mutagen_file[cover_tag] + text_delimiter_index = frame.value.find(b'\x00') + if text_delimiter_index > 0: + comment = frame.value[0:text_delimiter_index] + comment = comment.decode('utf-8', 'replace') + else: + comment = None + image_data = frame.value[text_delimiter_index + 1:] + images.append(Image(data=image_data, type=cover_type, + desc=comment)) + except KeyError: + pass + + return images + + def set_list(self, mutagen_file, values): + self.delete(mutagen_file) + + for image in values: + image_type = image.type or ImageType.other + comment = image.desc or '' + image_data = comment.encode('utf-8') + b'\x00' + image.data + cover_tag = self.TAG_NAMES[image_type] + mutagen_file[cover_tag] = image_data + + def delete(self, mutagen_file): + """Remove all images from the file. + """ + for cover_tag in self.TAG_NAMES.values(): + try: + del mutagen_file[cover_tag] + except KeyError: + pass + + +# MediaField is a descriptor that represents a single logical field. It +# aggregates several StorageStyles describing how to access the data for +# each file type. + +class MediaField(object): + """A descriptor providing access to a particular (abstract) metadata + field. + """ + def __init__(self, *styles, **kwargs): + """Creates a new MediaField. + + :param styles: `StorageStyle` instances that describe the strategy + for reading and writing the field in particular + formats. There must be at least one style for + each possible file format. + + :param out_type: the type of the value that should be returned when + getting this property. + + """ + self.out_type = kwargs.get('out_type', six.text_type) + self._styles = styles + + def styles(self, mutagen_file): + """Yields the list of storage styles of this field that can + handle the MediaFile's format. + """ + for style in self._styles: + if mutagen_file.__class__.__name__ in style.formats: + yield style + + def __get__(self, mediafile, owner=None): + out = None + for style in self.styles(mediafile.mgfile): + out = style.get(mediafile.mgfile) + if out: + break + return _safe_cast(self.out_type, out) + + def __set__(self, mediafile, value): + if value is None: + value = self._none_value() + for style in self.styles(mediafile.mgfile): + if not style.read_only: + style.set(mediafile.mgfile, value) + + def __delete__(self, mediafile): + for style in self.styles(mediafile.mgfile): + style.delete(mediafile.mgfile) + + def _none_value(self): + """Get an appropriate "null" value for this field's type. This + is used internally when setting the field to None. + """ + if self.out_type == int: + return 0 + elif self.out_type == float: + return 0.0 + elif self.out_type == bool: + return False + elif self.out_type == six.text_type: + return u'' + + +class ListMediaField(MediaField): + """Property descriptor that retrieves a list of multiple values from + a tag. + + Uses ``get_list`` and set_list`` methods of its ``StorageStyle`` + strategies to do the actual work. + """ + def __get__(self, mediafile, _): + values = [] + for style in self.styles(mediafile.mgfile): + values.extend(style.get_list(mediafile.mgfile)) + return [_safe_cast(self.out_type, value) for value in values] + + def __set__(self, mediafile, values): + for style in self.styles(mediafile.mgfile): + if not style.read_only: + style.set_list(mediafile.mgfile, values) + + def single_field(self): + """Returns a ``MediaField`` descriptor that gets and sets the + first item. + """ + options = {'out_type': self.out_type} + return MediaField(*self._styles, **options) + + +class DateField(MediaField): + """Descriptor that handles serializing and deserializing dates + + The getter parses value from tags into a ``datetime.date`` instance + and setter serializes such an instance into a string. + + For granular access to year, month, and day, use the ``*_field`` + methods to create corresponding `DateItemField`s. + """ + def __init__(self, *date_styles, **kwargs): + """``date_styles`` is a list of ``StorageStyle``s to store and + retrieve the whole date from. The ``year`` option is an + additional list of fallback styles for the year. The year is + always set on this style, but is only retrieved if the main + storage styles do not return a value. + """ + super(DateField, self).__init__(*date_styles) + year_style = kwargs.get('year', None) + if year_style: + self._year_field = MediaField(*year_style) + + def __get__(self, mediafile, owner=None): + year, month, day = self._get_date_tuple(mediafile) + if not year: + return None + try: + return datetime.date( + year, + month or 1, + day or 1 + ) + except ValueError: # Out of range values. + return None + + def __set__(self, mediafile, date): + if date is None: + self._set_date_tuple(mediafile, None, None, None) + else: + self._set_date_tuple(mediafile, date.year, date.month, date.day) + + def __delete__(self, mediafile): + super(DateField, self).__delete__(mediafile) + if hasattr(self, '_year_field'): + self._year_field.__delete__(mediafile) + + def _get_date_tuple(self, mediafile): + """Get a 3-item sequence representing the date consisting of a + year, month, and day number. Each number is either an integer or + None. + """ + # Get the underlying data and split on hyphens and slashes. + datestring = super(DateField, self).__get__(mediafile, None) + if isinstance(datestring, six.string_types): + datestring = re.sub(r'[Tt ].*$', '', six.text_type(datestring)) + items = re.split('[-/]', six.text_type(datestring)) + else: + items = [] + + # Ensure that we have exactly 3 components, possibly by + # truncating or padding. + items = items[:3] + if len(items) < 3: + items += [None] * (3 - len(items)) + + # Use year field if year is missing. + if not items[0] and hasattr(self, '_year_field'): + items[0] = self._year_field.__get__(mediafile) + + # Convert each component to an integer if possible. + items_ = [] + for item in items: + try: + items_.append(int(item)) + except (TypeError, ValueError): + items_.append(None) + return items_ + + def _set_date_tuple(self, mediafile, year, month=None, day=None): + """Set the value of the field given a year, month, and day + number. Each number can be an integer or None to indicate an + unset component. + """ + if year is None: + self.__delete__(mediafile) + return + + date = [u'{0:04d}'.format(int(year))] + if month: + date.append(u'{0:02d}'.format(int(month))) + if month and day: + date.append(u'{0:02d}'.format(int(day))) + date = map(six.text_type, date) + super(DateField, self).__set__(mediafile, u'-'.join(date)) + + if hasattr(self, '_year_field'): + self._year_field.__set__(mediafile, year) + + def year_field(self): + return DateItemField(self, 0) + + def month_field(self): + return DateItemField(self, 1) + + def day_field(self): + return DateItemField(self, 2) + + +class DateItemField(MediaField): + """Descriptor that gets and sets constituent parts of a `DateField`: + the month, day, or year. + """ + def __init__(self, date_field, item_pos): + self.date_field = date_field + self.item_pos = item_pos + + def __get__(self, mediafile, _): + return self.date_field._get_date_tuple(mediafile)[self.item_pos] + + def __set__(self, mediafile, value): + items = self.date_field._get_date_tuple(mediafile) + items[self.item_pos] = value + self.date_field._set_date_tuple(mediafile, *items) + + def __delete__(self, mediafile): + self.__set__(mediafile, None) + + +class CoverArtField(MediaField): + """A descriptor that provides access to the *raw image data* for the + cover image on a file. This is used for backwards compatibility: the + full `ImageListField` provides richer `Image` objects. + + When there are multiple images we try to pick the most likely to be a front + cover. + """ + def __init__(self): + pass + + def __get__(self, mediafile, _): + candidates = mediafile.images + if candidates: + return self.guess_cover_image(candidates).data + else: + return None + + @staticmethod + def guess_cover_image(candidates): + if len(candidates) == 1: + return candidates[0] + try: + return next(c for c in candidates if c.type == ImageType.front) + except StopIteration: + return candidates[0] + + def __set__(self, mediafile, data): + if data: + mediafile.images = [Image(data=data)] + else: + mediafile.images = [] + + def __delete__(self, mediafile): + delattr(mediafile, 'images') + + +class QNumberField(MediaField): + """Access integer-represented Q number fields. + + Access a fixed-point fraction as a float. The stored value is shifted by + `fraction_bits` binary digits to the left and then rounded, yielding a + simple integer. + """ + def __init__(self, fraction_bits, *args, **kwargs): + super(QNumberField, self).__init__(out_type=int, *args, **kwargs) + self.__fraction_bits = fraction_bits + + def __get__(self, mediafile, owner=None): + q_num = super(QNumberField, self).__get__(mediafile, owner) + if q_num is None: + return None + return q_num / pow(2, self.__fraction_bits) + + def __set__(self, mediafile, value): + q_num = round(value * pow(2, self.__fraction_bits)) + q_num = int(q_num) # needed for py2.7 + super(QNumberField, self).__set__(mediafile, q_num) + + +class ImageListField(ListMediaField): + """Descriptor to access the list of images embedded in tags. + + The getter returns a list of `Image` instances obtained from + the tags. The setter accepts a list of `Image` instances to be + written to the tags. + """ + def __init__(self): + # The storage styles used here must implement the + # `ListStorageStyle` interface and get and set lists of + # `Image`s. + super(ImageListField, self).__init__( + MP3ImageStorageStyle(), + MP4ImageStorageStyle(), + ASFImageStorageStyle(), + VorbisImageStorageStyle(), + FlacImageStorageStyle(), + APEv2ImageStorageStyle(), + out_type=Image, + ) + + +# MediaFile is a collection of fields. + +class MediaFile(object): + """Represents a multimedia file on disk and provides access to its + metadata. + """ + @loadfile() + def __init__(self, filething, id3v23=False): + """Constructs a new `MediaFile` reflecting the provided file. + + `filething` can be a path to a file (i.e., a string) or a + file-like object. + + May throw `UnreadableFileError`. + + By default, MP3 files are saved with ID3v2.4 tags. You can use + the older ID3v2.3 standard by specifying the `id3v23` option. + """ + self.filething = filething + + self.mgfile = mutagen_call( + 'open', self.filename, mutagen.File, filething + ) + + if self.mgfile is None: + # Mutagen couldn't guess the type + raise FileTypeError(self.filename) + elif type(self.mgfile).__name__ in ['M4A', 'MP4']: + info = self.mgfile.info + if info.codec and info.codec.startswith('alac'): + self.type = 'alac' + else: + self.type = 'aac' + elif type(self.mgfile).__name__ in ['ID3', 'MP3']: + self.type = 'mp3' + elif type(self.mgfile).__name__ == 'FLAC': + self.type = 'flac' + elif type(self.mgfile).__name__ == 'OggOpus': + self.type = 'opus' + elif type(self.mgfile).__name__ == 'OggVorbis': + self.type = 'ogg' + elif type(self.mgfile).__name__ == 'MonkeysAudio': + self.type = 'ape' + elif type(self.mgfile).__name__ == 'WavPack': + self.type = 'wv' + elif type(self.mgfile).__name__ == 'Musepack': + self.type = 'mpc' + elif type(self.mgfile).__name__ == 'ASF': + self.type = 'asf' + elif type(self.mgfile).__name__ == 'AIFF': + self.type = 'aiff' + elif type(self.mgfile).__name__ == 'DSF': + self.type = 'dsf' + elif type(self.mgfile).__name__ == 'WAVE': + self.type = 'wav' + else: + raise FileTypeError(self.filename, type(self.mgfile).__name__) + + # Add a set of tags if it's missing. + if self.mgfile.tags is None: + self.mgfile.add_tags() + + # Set the ID3v2.3 flag only for MP3s. + self.id3v23 = id3v23 and self.type == 'mp3' + + @property + def filename(self): + """The name of the file. + + This is the path if this object was opened from the filesystem, + or the name of the file-like object. + """ + return self.filething.name + + @filename.setter + def filename(self, val): + """Silently skips setting filename. + Workaround for `mutagen._util._openfile` setting instance's filename. + """ + pass + + @property + def path(self): + """The path to the file. + + This is `None` if the data comes from a file-like object instead + of a filesystem path. + """ + return self.filething.filename + + @property + def filesize(self): + """The size (in bytes) of the underlying file. + """ + if self.filething.filename: + return os.path.getsize(self.filething.filename) + if hasattr(self.filething.fileobj, '__len__'): + return len(self.filething.fileobj) + else: + tell = self.filething.fileobj.tell() + filesize = self.filething.fileobj.seek(0, 2) + self.filething.fileobj.seek(tell) + return filesize + + def save(self, **kwargs): + """Write the object's tags back to the file. + + May throw `UnreadableFileError`. Accepts keyword arguments to be + passed to Mutagen's `save` function. + """ + # Possibly save the tags to ID3v2.3. + if self.id3v23: + id3 = self.mgfile + if hasattr(id3, 'tags'): + # In case this is an MP3 object, not an ID3 object. + id3 = id3.tags + id3.update_to_v23() + kwargs['v2_version'] = 3 + + mutagen_call('save', self.filename, self.mgfile.save, + _update_filething(self.filething), **kwargs) + + def delete(self): + """Remove the current metadata tag from the file. May + throw `UnreadableFileError`. + """ + mutagen_call('delete', self.filename, self.mgfile.delete, + _update_filething(self.filething)) + + # Convenient access to the set of available fields. + + @classmethod + def fields(cls): + """Get the names of all writable properties that reflect + metadata tags (i.e., those that are instances of + :class:`MediaField`). + """ + for property, descriptor in cls.__dict__.items(): + if isinstance(descriptor, MediaField): + if isinstance(property, bytes): + # On Python 2, class field names are bytes. This method + # produces text strings. + yield property.decode('utf8', 'ignore') + else: + yield property + + @classmethod + def _field_sort_name(cls, name): + """Get a sort key for a field name that determines the order + fields should be written in. + + Fields names are kept unchanged, unless they are instances of + :class:`DateItemField`, in which case `year`, `month`, and `day` + are replaced by `date0`, `date1`, and `date2`, respectively, to + make them appear in that order. + """ + if isinstance(cls.__dict__[name], DateItemField): + name = re.sub('year', 'date0', name) + name = re.sub('month', 'date1', name) + name = re.sub('day', 'date2', name) + return name + + @classmethod + def sorted_fields(cls): + """Get the names of all writable metadata fields, sorted in the + order that they should be written. + + This is a lexicographic order, except for instances of + :class:`DateItemField`, which are sorted in year-month-day + order. + """ + for property in sorted(cls.fields(), key=cls._field_sort_name): + yield property + + @classmethod + def readable_fields(cls): + """Get all metadata fields: the writable ones from + :meth:`fields` and also other audio properties. + """ + for property in cls.fields(): + yield property + for property in ('length', 'samplerate', 'bitdepth', 'bitrate', + 'bitrate_mode', 'channels', 'encoder_info', + 'encoder_settings', 'format'): + yield property + + @classmethod + def add_field(cls, name, descriptor): + """Add a field to store custom tags. + + :param name: the name of the property the field is accessed + through. It must not already exist on this class. + + :param descriptor: an instance of :class:`MediaField`. + """ + if not isinstance(descriptor, MediaField): + raise ValueError( + u'{0} must be an instance of MediaField'.format(descriptor)) + if name in cls.__dict__: + raise ValueError( + u'property "{0}" already exists on MediaFile'.format(name)) + setattr(cls, name, descriptor) + + def update(self, dict): + """Set all field values from a dictionary. + + For any key in `dict` that is also a field to store tags the + method retrieves the corresponding value from `dict` and updates + the `MediaFile`. If a key has the value `None`, the + corresponding property is deleted from the `MediaFile`. + """ + for field in self.sorted_fields(): + if field in dict: + if dict[field] is None: + delattr(self, field) + else: + setattr(self, field, dict[field]) + + def as_dict(self): + """Get a dictionary with all writable properties that reflect + metadata tags (i.e., those that are instances of + :class:`MediaField`). + """ + return dict((x, getattr(self, x)) for x in self.fields()) + + # Field definitions. + + title = MediaField( + MP3StorageStyle('TIT2'), + MP4StorageStyle('\xa9nam'), + StorageStyle('TITLE'), + ASFStorageStyle('Title'), + ) + artist = MediaField( + MP3StorageStyle('TPE1'), + MP4StorageStyle('\xa9ART'), + StorageStyle('ARTIST'), + ASFStorageStyle('Author'), + ) + artists = ListMediaField( + MP3ListDescStorageStyle(desc=u'ARTISTS'), + MP4ListStorageStyle('----:com.apple.iTunes:ARTISTS'), + ListStorageStyle('ARTISTS'), + ASFStorageStyle('WM/ARTISTS'), + ) + album = MediaField( + MP3StorageStyle('TALB'), + MP4StorageStyle('\xa9alb'), + StorageStyle('ALBUM'), + ASFStorageStyle('WM/AlbumTitle'), + ) + genres = ListMediaField( + MP3ListStorageStyle('TCON'), + MP4ListStorageStyle('\xa9gen'), + ListStorageStyle('GENRE'), + ASFStorageStyle('WM/Genre'), + ) + genre = genres.single_field() + + lyricist = MediaField( + MP3StorageStyle('TEXT'), + MP4StorageStyle('----:com.apple.iTunes:LYRICIST'), + StorageStyle('LYRICIST'), + ASFStorageStyle('WM/Writer'), + ) + composer = MediaField( + MP3StorageStyle('TCOM'), + MP4StorageStyle('\xa9wrt'), + StorageStyle('COMPOSER'), + ASFStorageStyle('WM/Composer'), + ) + composer_sort = MediaField( + MP3StorageStyle('TSOC'), + MP4StorageStyle('soco'), + StorageStyle('COMPOSERSORT'), + ASFStorageStyle('WM/Composersortorder'), + ) + arranger = MediaField( + MP3PeopleStorageStyle('TIPL', involvement='arranger'), + MP4StorageStyle('----:com.apple.iTunes:Arranger'), + StorageStyle('ARRANGER'), + ASFStorageStyle('beets/Arranger'), + ) + + grouping = MediaField( + MP3StorageStyle('TIT1'), + MP4StorageStyle('\xa9grp'), + StorageStyle('GROUPING'), + ASFStorageStyle('WM/ContentGroupDescription'), + ) + track = MediaField( + MP3SlashPackStorageStyle('TRCK', pack_pos=0), + MP4TupleStorageStyle('trkn', index=0), + StorageStyle('TRACK'), + StorageStyle('TRACKNUMBER'), + ASFStorageStyle('WM/TrackNumber'), + out_type=int, + ) + tracktotal = MediaField( + MP3SlashPackStorageStyle('TRCK', pack_pos=1), + MP4TupleStorageStyle('trkn', index=1), + StorageStyle('TRACKTOTAL'), + StorageStyle('TRACKC'), + StorageStyle('TOTALTRACKS'), + ASFStorageStyle('TotalTracks'), + out_type=int, + ) + disc = MediaField( + MP3SlashPackStorageStyle('TPOS', pack_pos=0), + MP4TupleStorageStyle('disk', index=0), + StorageStyle('DISC'), + StorageStyle('DISCNUMBER'), + ASFStorageStyle('WM/PartOfSet'), + out_type=int, + ) + disctotal = MediaField( + MP3SlashPackStorageStyle('TPOS', pack_pos=1), + MP4TupleStorageStyle('disk', index=1), + StorageStyle('DISCTOTAL'), + StorageStyle('DISCC'), + StorageStyle('TOTALDISCS'), + ASFStorageStyle('TotalDiscs'), + out_type=int, + ) + + url = MediaField( + MP3DescStorageStyle(key='WXXX', attr='url', multispec=False), + MP4StorageStyle('\xa9url'), + StorageStyle('URL'), + ASFStorageStyle('WM/URL'), + ) + lyrics = MediaField( + MP3DescStorageStyle(key='USLT', multispec=False), + MP4StorageStyle('\xa9lyr'), + StorageStyle('LYRICS'), + ASFStorageStyle('WM/Lyrics'), + ) + comments = MediaField( + MP3DescStorageStyle(key='COMM'), + MP4StorageStyle('\xa9cmt'), + StorageStyle('DESCRIPTION'), + StorageStyle('COMMENT'), + ASFStorageStyle('WM/Comments'), + ASFStorageStyle('Description') + ) + copyright = MediaField( + MP3StorageStyle('TCOP'), + MP4StorageStyle('cprt'), + StorageStyle('COPYRIGHT'), + ASFStorageStyle('Copyright'), + ) + bpm = MediaField( + MP3StorageStyle('TBPM'), + MP4StorageStyle('tmpo', as_type=int), + StorageStyle('BPM'), + ASFStorageStyle('WM/BeatsPerMinute'), + out_type=int, + ) + comp = MediaField( + MP3StorageStyle('TCMP'), + MP4BoolStorageStyle('cpil'), + StorageStyle('COMPILATION'), + ASFStorageStyle('WM/IsCompilation', as_type=bool), + out_type=bool, + ) + albumartist = MediaField( + MP3StorageStyle('TPE2'), + MP4StorageStyle('aART'), + StorageStyle('ALBUM ARTIST'), + StorageStyle('ALBUMARTIST'), + ASFStorageStyle('WM/AlbumArtist'), + ) + albumartists = ListMediaField( + MP3ListDescStorageStyle(desc=u'ALBUMARTISTS'), + MP4ListStorageStyle('----:com.apple.iTunes:ALBUMARTISTS'), + ListStorageStyle('ALBUMARTISTS'), + ASFStorageStyle('WM/AlbumArtists'), + ) + albumtype = MediaField( + MP3DescStorageStyle(u'MusicBrainz Album Type'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Type'), + StorageStyle('RELEASETYPE'), + StorageStyle('MUSICBRAINZ_ALBUMTYPE'), + ASFStorageStyle('MusicBrainz/Album Type'), + ) + label = MediaField( + MP3StorageStyle('TPUB'), + MP4StorageStyle('----:com.apple.iTunes:LABEL'), + MP4StorageStyle('----:com.apple.iTunes:publisher'), + MP4StorageStyle('----:com.apple.iTunes:Label', read_only=True), + StorageStyle('LABEL'), + StorageStyle('PUBLISHER'), # Traktor + ASFStorageStyle('WM/Publisher'), + ) + artist_sort = MediaField( + MP3StorageStyle('TSOP'), + MP4StorageStyle('soar'), + StorageStyle('ARTISTSORT'), + ASFStorageStyle('WM/ArtistSortOrder'), + ) + albumartist_sort = MediaField( + MP3DescStorageStyle(u'ALBUMARTISTSORT'), + MP4StorageStyle('soaa'), + StorageStyle('ALBUMARTISTSORT'), + ASFStorageStyle('WM/AlbumArtistSortOrder'), + ) + asin = MediaField( + MP3DescStorageStyle(u'ASIN'), + MP4StorageStyle('----:com.apple.iTunes:ASIN'), + StorageStyle('ASIN'), + ASFStorageStyle('MusicBrainz/ASIN'), + ) + catalognum = MediaField( + MP3DescStorageStyle(u'CATALOGNUMBER'), + MP4StorageStyle('----:com.apple.iTunes:CATALOGNUMBER'), + StorageStyle('CATALOGNUMBER'), + ASFStorageStyle('WM/CatalogNo'), + ) + barcode = MediaField( + MP3DescStorageStyle(u'BARCODE'), + MP4StorageStyle('----:com.apple.iTunes:BARCODE'), + StorageStyle('BARCODE'), + StorageStyle('UPC', read_only=True), + StorageStyle('EAN/UPN', read_only=True), + StorageStyle('EAN', read_only=True), + StorageStyle('UPN', read_only=True), + ASFStorageStyle('WM/Barcode'), + ) + isrc = MediaField( + MP3StorageStyle(u'TSRC'), + MP4StorageStyle('----:com.apple.iTunes:ISRC'), + StorageStyle('ISRC'), + ASFStorageStyle('WM/ISRC'), + ) + disctitle = MediaField( + MP3StorageStyle('TSST'), + MP4StorageStyle('----:com.apple.iTunes:DISCSUBTITLE'), + StorageStyle('DISCSUBTITLE'), + ASFStorageStyle('WM/SetSubTitle'), + ) + encoder = MediaField( + MP3StorageStyle('TENC'), + MP4StorageStyle('\xa9too'), + StorageStyle('ENCODEDBY'), + StorageStyle('ENCODER'), + ASFStorageStyle('WM/EncodedBy'), + ) + script = MediaField( + MP3DescStorageStyle(u'Script'), + MP4StorageStyle('----:com.apple.iTunes:SCRIPT'), + StorageStyle('SCRIPT'), + ASFStorageStyle('WM/Script'), + ) + language = MediaField( + MP3StorageStyle('TLAN'), + MP4StorageStyle('----:com.apple.iTunes:LANGUAGE'), + StorageStyle('LANGUAGE'), + ASFStorageStyle('WM/Language'), + ) + country = MediaField( + MP3DescStorageStyle(u'MusicBrainz Album Release Country'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz ' + 'Album Release Country'), + StorageStyle('RELEASECOUNTRY'), + ASFStorageStyle('MusicBrainz/Album Release Country'), + ) + albumstatus = MediaField( + MP3DescStorageStyle(u'MusicBrainz Album Status'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Status'), + StorageStyle('RELEASESTATUS'), + StorageStyle('MUSICBRAINZ_ALBUMSTATUS'), + ASFStorageStyle('MusicBrainz/Album Status'), + ) + media = MediaField( + MP3StorageStyle('TMED'), + MP4StorageStyle('----:com.apple.iTunes:MEDIA'), + StorageStyle('MEDIA'), + ASFStorageStyle('WM/Media'), + ) + albumdisambig = MediaField( + # This tag mapping was invented for beets (not used by Picard, etc). + MP3DescStorageStyle(u'MusicBrainz Album Comment'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Comment'), + StorageStyle('MUSICBRAINZ_ALBUMCOMMENT'), + ASFStorageStyle('MusicBrainz/Album Comment'), + ) + + # Release date. + date = DateField( + MP3StorageStyle('TDRC'), + MP4StorageStyle('\xa9day'), + StorageStyle('DATE'), + ASFStorageStyle('WM/Year'), + year=(StorageStyle('YEAR'),)) + + year = date.year_field() + month = date.month_field() + day = date.day_field() + + # *Original* release date. + original_date = DateField( + MP3StorageStyle('TDOR'), + MP4StorageStyle('----:com.apple.iTunes:ORIGINAL YEAR'), + StorageStyle('ORIGINALDATE'), + ASFStorageStyle('WM/OriginalReleaseYear')) + + original_year = original_date.year_field() + original_month = original_date.month_field() + original_day = original_date.day_field() + + # Nonstandard metadata. + artist_credit = MediaField( + MP3DescStorageStyle(u'Artist Credit'), + MP4StorageStyle('----:com.apple.iTunes:Artist Credit'), + StorageStyle('ARTIST_CREDIT'), + ASFStorageStyle('beets/Artist Credit'), + ) + albumartist_credit = MediaField( + MP3DescStorageStyle(u'Album Artist Credit'), + MP4StorageStyle('----:com.apple.iTunes:Album Artist Credit'), + StorageStyle('ALBUMARTIST_CREDIT'), + ASFStorageStyle('beets/Album Artist Credit'), + ) + + # Legacy album art field + art = CoverArtField() + + # Image list + images = ImageListField() + + # MusicBrainz IDs. + mb_trackid = MediaField( + MP3UFIDStorageStyle(owner='http://musicbrainz.org'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Track Id'), + StorageStyle('MUSICBRAINZ_TRACKID'), + ASFStorageStyle('MusicBrainz/Track Id'), + ) + mb_releasetrackid = MediaField( + MP3DescStorageStyle(u'MusicBrainz Release Track Id'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Release Track Id'), + StorageStyle('MUSICBRAINZ_RELEASETRACKID'), + ASFStorageStyle('MusicBrainz/Release Track Id'), + ) + mb_workid = MediaField( + MP3DescStorageStyle(u'MusicBrainz Work Id'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Work Id'), + StorageStyle('MUSICBRAINZ_WORKID'), + ASFStorageStyle('MusicBrainz/Work Id'), + ) + mb_albumid = MediaField( + MP3DescStorageStyle(u'MusicBrainz Album Id'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Album Id'), + StorageStyle('MUSICBRAINZ_ALBUMID'), + ASFStorageStyle('MusicBrainz/Album Id'), + ) + mb_artistids = ListMediaField( + MP3ListDescStorageStyle(u'MusicBrainz Artist Id', split_v23=True), + MP4ListStorageStyle('----:com.apple.iTunes:MusicBrainz Artist Id'), + ListStorageStyle('MUSICBRAINZ_ARTISTID'), + ASFStorageStyle('MusicBrainz/Artist Id'), + ) + mb_artistid = mb_artistids.single_field() + + mb_albumartistids = ListMediaField( + MP3ListDescStorageStyle( + u'MusicBrainz Album Artist Id', + split_v23=True, + ), + MP4ListStorageStyle( + '----:com.apple.iTunes:MusicBrainz Album Artist Id', + ), + ListStorageStyle('MUSICBRAINZ_ALBUMARTISTID'), + ASFStorageStyle('MusicBrainz/Album Artist Id'), + ) + mb_albumartistid = mb_albumartistids.single_field() + + mb_releasegroupid = MediaField( + MP3DescStorageStyle(u'MusicBrainz Release Group Id'), + MP4StorageStyle('----:com.apple.iTunes:MusicBrainz Release Group Id'), + StorageStyle('MUSICBRAINZ_RELEASEGROUPID'), + ASFStorageStyle('MusicBrainz/Release Group Id'), + ) + + # Acoustid fields. + acoustid_fingerprint = MediaField( + MP3DescStorageStyle(u'Acoustid Fingerprint'), + MP4StorageStyle('----:com.apple.iTunes:Acoustid Fingerprint'), + StorageStyle('ACOUSTID_FINGERPRINT'), + ASFStorageStyle('Acoustid/Fingerprint'), + ) + acoustid_id = MediaField( + MP3DescStorageStyle(u'Acoustid Id'), + MP4StorageStyle('----:com.apple.iTunes:Acoustid Id'), + StorageStyle('ACOUSTID_ID'), + ASFStorageStyle('Acoustid/Id'), + ) + + # ReplayGain fields. + rg_track_gain = MediaField( + MP3DescStorageStyle( + u'REPLAYGAIN_TRACK_GAIN', + float_places=2, suffix=u' dB' + ), + MP3DescStorageStyle( + u'replaygain_track_gain', + float_places=2, suffix=u' dB' + ), + MP3SoundCheckStorageStyle( + key='COMM', + index=0, desc=u'iTunNORM', + id3_lang='eng' + ), + MP4StorageStyle( + '----:com.apple.iTunes:replaygain_track_gain', + float_places=2, suffix=' dB' + ), + MP4SoundCheckStorageStyle( + '----:com.apple.iTunes:iTunNORM', + index=0 + ), + StorageStyle( + u'REPLAYGAIN_TRACK_GAIN', + float_places=2, suffix=u' dB' + ), + ASFStorageStyle( + u'replaygain_track_gain', + float_places=2, suffix=u' dB' + ), + out_type=float + ) + rg_album_gain = MediaField( + MP3DescStorageStyle( + u'REPLAYGAIN_ALBUM_GAIN', + float_places=2, suffix=u' dB' + ), + MP3DescStorageStyle( + u'replaygain_album_gain', + float_places=2, suffix=u' dB' + ), + MP4StorageStyle( + '----:com.apple.iTunes:replaygain_album_gain', + float_places=2, suffix=' dB' + ), + StorageStyle( + u'REPLAYGAIN_ALBUM_GAIN', + float_places=2, suffix=u' dB' + ), + ASFStorageStyle( + u'replaygain_album_gain', + float_places=2, suffix=u' dB' + ), + out_type=float + ) + rg_track_peak = MediaField( + MP3DescStorageStyle( + u'REPLAYGAIN_TRACK_PEAK', + float_places=6 + ), + MP3DescStorageStyle( + u'replaygain_track_peak', + float_places=6 + ), + MP3SoundCheckStorageStyle( + key=u'COMM', + index=1, desc=u'iTunNORM', + id3_lang='eng' + ), + MP4StorageStyle( + '----:com.apple.iTunes:replaygain_track_peak', + float_places=6 + ), + MP4SoundCheckStorageStyle( + '----:com.apple.iTunes:iTunNORM', + index=1 + ), + StorageStyle(u'REPLAYGAIN_TRACK_PEAK', float_places=6), + ASFStorageStyle(u'replaygain_track_peak', float_places=6), + out_type=float, + ) + rg_album_peak = MediaField( + MP3DescStorageStyle( + u'REPLAYGAIN_ALBUM_PEAK', + float_places=6 + ), + MP3DescStorageStyle( + u'replaygain_album_peak', + float_places=6 + ), + MP4StorageStyle( + '----:com.apple.iTunes:replaygain_album_peak', + float_places=6 + ), + StorageStyle(u'REPLAYGAIN_ALBUM_PEAK', float_places=6), + ASFStorageStyle(u'replaygain_album_peak', float_places=6), + out_type=float, + ) + + # EBU R128 fields. + r128_track_gain = QNumberField( + 8, + MP3DescStorageStyle( + u'R128_TRACK_GAIN' + ), + MP4StorageStyle( + '----:com.apple.iTunes:R128_TRACK_GAIN' + ), + StorageStyle( + u'R128_TRACK_GAIN' + ), + ASFStorageStyle( + u'R128_TRACK_GAIN' + ), + ) + r128_album_gain = QNumberField( + 8, + MP3DescStorageStyle( + u'R128_ALBUM_GAIN' + ), + MP4StorageStyle( + '----:com.apple.iTunes:R128_ALBUM_GAIN' + ), + StorageStyle( + u'R128_ALBUM_GAIN' + ), + ASFStorageStyle( + u'R128_ALBUM_GAIN' + ), + ) + + initial_key = MediaField( + MP3StorageStyle('TKEY'), + MP4StorageStyle('----:com.apple.iTunes:initialkey'), + StorageStyle('INITIALKEY'), + ASFStorageStyle('INITIALKEY'), + ) + + @property + def length(self): + """The duration of the audio in seconds (a float).""" + return self.mgfile.info.length + + @property + def samplerate(self): + """The audio's sample rate (an int).""" + if hasattr(self.mgfile.info, 'sample_rate'): + return self.mgfile.info.sample_rate + elif self.type == 'opus': + # Opus is always 48kHz internally. + return 48000 + return 0 + + @property + def bitdepth(self): + """The number of bits per sample in the audio encoding (an int). + Only available for certain file formats (zero where + unavailable). + """ + if hasattr(self.mgfile.info, 'bits_per_sample'): + return self.mgfile.info.bits_per_sample + return 0 + + @property + def channels(self): + """The number of channels in the audio (an int).""" + if hasattr(self.mgfile.info, 'channels'): + return self.mgfile.info.channels + return 0 + + @property + def bitrate(self): + """The number of bits per seconds used in the audio coding (an + int). If this is provided explicitly by the compressed file + format, this is a precise reflection of the encoding. Otherwise, + it is estimated from the on-disk file size. In this case, some + imprecision is possible because the file header is incorporated + in the file size. + """ + if hasattr(self.mgfile.info, 'bitrate') and self.mgfile.info.bitrate: + # Many formats provide it explicitly. + return self.mgfile.info.bitrate + else: + # Otherwise, we calculate bitrate from the file size. (This + # is the case for all of the lossless formats.) + if not self.length: + # Avoid division by zero if length is not available. + return 0 + return int(self.filesize * 8 / self.length) + + @property + def bitrate_mode(self): + """The mode of the bitrate used in the audio coding + (a string, eg. "CBR", "VBR" or "ABR"). + Only available for the MP3 file format (empty where unavailable). + """ + if hasattr(self.mgfile.info, 'bitrate_mode'): + return { + mutagen.mp3.BitrateMode.CBR: 'CBR', + mutagen.mp3.BitrateMode.VBR: 'VBR', + mutagen.mp3.BitrateMode.ABR: 'ABR', + }.get(self.mgfile.info.bitrate_mode, '') + else: + return '' + + @property + def encoder_info(self): + """The name and/or version of the encoder used + (a string, eg. "LAME 3.97.0"). + Only available for some formats (empty where unavailable). + """ + if hasattr(self.mgfile.info, 'encoder_info'): + return self.mgfile.info.encoder_info + else: + return '' + + @property + def encoder_settings(self): + """A guess of the settings used for the encoder (a string, eg. "-V2"). + Only available for the MP3 file format (empty where unavailable). + """ + if hasattr(self.mgfile.info, 'encoder_settings'): + return self.mgfile.info.encoder_settings + else: + return '' + + @property + def format(self): + """A string describing the file format/codec.""" + return TYPES[self.type] diff --git a/lib/more_itertools/__init__.py b/lib/more_itertools/__init__.py new file mode 100644 index 00000000..ea38bef1 --- /dev/null +++ b/lib/more_itertools/__init__.py @@ -0,0 +1,4 @@ +from .more import * # noqa +from .recipes import * # noqa + +__version__ = '8.12.0' diff --git a/lib/more_itertools/__init__.pyi b/lib/more_itertools/__init__.pyi new file mode 100644 index 00000000..96f6e36c --- /dev/null +++ b/lib/more_itertools/__init__.pyi @@ -0,0 +1,2 @@ +from .more import * +from .recipes import * diff --git a/lib/more_itertools/more.py b/lib/more_itertools/more.py new file mode 100755 index 00000000..630af973 --- /dev/null +++ b/lib/more_itertools/more.py @@ -0,0 +1,4317 @@ +import warnings + +from collections import Counter, defaultdict, deque, abc +from collections.abc import Sequence +from concurrent.futures import ThreadPoolExecutor +from functools import partial, reduce, wraps +from heapq import merge, heapify, heapreplace, heappop +from itertools import ( + chain, + compress, + count, + cycle, + dropwhile, + groupby, + islice, + repeat, + starmap, + takewhile, + tee, + zip_longest, +) +from math import exp, factorial, floor, log +from queue import Empty, Queue +from random import random, randrange, uniform +from operator import itemgetter, mul, sub, gt, lt, ge, le +from sys import hexversion, maxsize +from time import monotonic + +from .recipes import ( + consume, + flatten, + pairwise, + powerset, + take, + unique_everseen, +) + +__all__ = [ + 'AbortThread', + 'SequenceView', + 'UnequalIterablesError', + 'adjacent', + 'all_unique', + 'always_iterable', + 'always_reversible', + 'bucket', + 'callback_iter', + 'chunked', + 'chunked_even', + 'circular_shifts', + 'collapse', + 'collate', + 'combination_index', + 'consecutive_groups', + 'consumer', + 'count_cycle', + 'countable', + 'difference', + 'distinct_combinations', + 'distinct_permutations', + 'distribute', + 'divide', + 'duplicates_everseen', + 'duplicates_justseen', + 'exactly_n', + 'filter_except', + 'first', + 'groupby_transform', + 'ichunked', + 'ilen', + 'interleave', + 'interleave_evenly', + 'interleave_longest', + 'intersperse', + 'is_sorted', + 'islice_extended', + 'iterate', + 'last', + 'locate', + 'lstrip', + 'make_decorator', + 'map_except', + 'map_if', + 'map_reduce', + 'mark_ends', + 'minmax', + 'nth_or_last', + 'nth_permutation', + 'nth_product', + 'numeric_range', + 'one', + 'only', + 'padded', + 'partitions', + 'peekable', + 'permutation_index', + 'product_index', + 'raise_', + 'repeat_each', + 'repeat_last', + 'replace', + 'rlocate', + 'rstrip', + 'run_length', + 'sample', + 'seekable', + 'set_partitions', + 'side_effect', + 'sliced', + 'sort_together', + 'split_after', + 'split_at', + 'split_before', + 'split_into', + 'split_when', + 'spy', + 'stagger', + 'strip', + 'strictly_n', + 'substrings', + 'substrings_indexes', + 'time_limited', + 'unique_in_window', + 'unique_to_each', + 'unzip', + 'value_chain', + 'windowed', + 'windowed_complete', + 'with_iter', + 'zip_broadcast', + 'zip_equal', + 'zip_offset', +] + + +_marker = object() + + +def chunked(iterable, n, strict=False): + """Break *iterable* into lists of length *n*: + + >>> list(chunked([1, 2, 3, 4, 5, 6], 3)) + [[1, 2, 3], [4, 5, 6]] + + By the default, the last yielded list will have fewer than *n* elements + if the length of *iterable* is not divisible by *n*: + + >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3)) + [[1, 2, 3], [4, 5, 6], [7, 8]] + + To use a fill-in value instead, see the :func:`grouper` recipe. + + If the length of *iterable* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + list is yielded. + + """ + iterator = iter(partial(take, n, iter(iterable)), []) + if strict: + if n is None: + raise ValueError('n must not be None when using strict mode.') + + def ret(): + for chunk in iterator: + if len(chunk) != n: + raise ValueError('iterable is not divisible by n.') + yield chunk + + return iter(ret()) + else: + return iterator + + +def first(iterable, default=_marker): + """Return the first item of *iterable*, or *default* if *iterable* is + empty. + + >>> first([0, 1, 2, 3]) + 0 + >>> first([], 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + + :func:`first` is useful when you have a generator of expensive-to-retrieve + values and want any arbitrary one. It is marginally shorter than + ``next(iter(iterable), default)``. + + """ + try: + return next(iter(iterable)) + except StopIteration as e: + if default is _marker: + raise ValueError( + 'first() was called on an empty iterable, and no ' + 'default value was provided.' + ) from e + return default + + +def last(iterable, default=_marker): + """Return the last item of *iterable*, or *default* if *iterable* is + empty. + + >>> last([0, 1, 2, 3]) + 3 + >>> last([], 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + try: + if isinstance(iterable, Sequence): + return iterable[-1] + # Work around https://bugs.python.org/issue38525 + elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0): + return next(reversed(iterable)) + else: + return deque(iterable, maxlen=1)[-1] + except (IndexError, TypeError, StopIteration): + if default is _marker: + raise ValueError( + 'last() was called on an empty iterable, and no default was ' + 'provided.' + ) + return default + + +def nth_or_last(iterable, n, default=_marker): + """Return the nth or the last item of *iterable*, + or *default* if *iterable* is empty. + + >>> nth_or_last([0, 1, 2, 3], 2) + 2 + >>> nth_or_last([0, 1], 2) + 1 + >>> nth_or_last([], 0, 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + return last(islice(iterable, n + 1), default=default) + + +class peekable: + """Wrap an iterator to allow lookahead and prepending elements. + + Call :meth:`peek` on the result to get the value that will be returned + by :func:`next`. This won't advance the iterator: + + >>> p = peekable(['a', 'b']) + >>> p.peek() + 'a' + >>> next(p) + 'a' + + Pass :meth:`peek` a default value to return that instead of raising + ``StopIteration`` when the iterator is exhausted. + + >>> p = peekable([]) + >>> p.peek('hi') + 'hi' + + peekables also offer a :meth:`prepend` method, which "inserts" items + at the head of the iterable: + + >>> p = peekable([1, 2, 3]) + >>> p.prepend(10, 11, 12) + >>> next(p) + 10 + >>> p.peek() + 11 + >>> list(p) + [11, 12, 1, 2, 3] + + peekables can be indexed. Index 0 is the item that will be returned by + :func:`next`, index 1 is the item after that, and so on: + The values up to the given index will be cached. + + >>> p = peekable(['a', 'b', 'c', 'd']) + >>> p[0] + 'a' + >>> p[1] + 'b' + >>> next(p) + 'a' + + Negative indexes are supported, but be aware that they will cache the + remaining items in the source iterator, which may require significant + storage. + + To check whether a peekable is exhausted, check its truth value: + + >>> p = peekable(['a', 'b']) + >>> if p: # peekable has items + ... list(p) + ['a', 'b'] + >>> if not p: # peekable is exhausted + ... list(p) + [] + + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self._cache = deque() + + def __iter__(self): + return self + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + """Return the item that will be next returned from ``next()``. + + Return ``default`` if there are no items left. If ``default`` is not + provided, raise ``StopIteration``. + + """ + if not self._cache: + try: + self._cache.append(next(self._it)) + except StopIteration: + if default is _marker: + raise + return default + return self._cache[0] + + def prepend(self, *items): + """Stack up items to be the next ones returned from ``next()`` or + ``self.peek()``. The items will be returned in + first in, first out order:: + + >>> p = peekable([1, 2, 3]) + >>> p.prepend(10, 11, 12) + >>> next(p) + 10 + >>> list(p) + [11, 12, 1, 2, 3] + + It is possible, by prepending items, to "resurrect" a peekable that + previously raised ``StopIteration``. + + >>> p = peekable([]) + >>> next(p) + Traceback (most recent call last): + ... + StopIteration + >>> p.prepend(1) + >>> next(p) + 1 + >>> next(p) + Traceback (most recent call last): + ... + StopIteration + + """ + self._cache.extendleft(reversed(items)) + + def __next__(self): + if self._cache: + return self._cache.popleft() + + return next(self._it) + + def _get_slice(self, index): + # Normalize the slice's arguments + step = 1 if (index.step is None) else index.step + if step > 0: + start = 0 if (index.start is None) else index.start + stop = maxsize if (index.stop is None) else index.stop + elif step < 0: + start = -1 if (index.start is None) else index.start + stop = (-maxsize - 1) if (index.stop is None) else index.stop + else: + raise ValueError('slice step cannot be zero') + + # If either the start or stop index is negative, we'll need to cache + # the rest of the iterable in order to slice from the right side. + if (start < 0) or (stop < 0): + self._cache.extend(self._it) + # Otherwise we'll need to find the rightmost index and cache to that + # point. + else: + n = min(max(start, stop) + 1, maxsize) + cache_len = len(self._cache) + if n >= cache_len: + self._cache.extend(islice(self._it, n - cache_len)) + + return list(self._cache)[index] + + def __getitem__(self, index): + if isinstance(index, slice): + return self._get_slice(index) + + cache_len = len(self._cache) + if index < 0: + self._cache.extend(self._it) + elif index >= cache_len: + self._cache.extend(islice(self._it, index + 1 - cache_len)) + + return self._cache[index] + + +def collate(*iterables, **kwargs): + """Return a sorted merge of the items from each of several already-sorted + *iterables*. + + >>> list(collate('ACDZ', 'AZ', 'JKL')) + ['A', 'A', 'C', 'D', 'J', 'K', 'L', 'Z', 'Z'] + + Works lazily, keeping only the next value from each iterable in memory. Use + :func:`collate` to, for example, perform a n-way mergesort of items that + don't fit in memory. + + If a *key* function is specified, the iterables will be sorted according + to its result: + + >>> key = lambda s: int(s) # Sort by numeric value, not by string + >>> list(collate(['1', '10'], ['2', '11'], key=key)) + ['1', '2', '10', '11'] + + + If the *iterables* are sorted in descending order, set *reverse* to + ``True``: + + >>> list(collate([5, 3, 1], [4, 2, 0], reverse=True)) + [5, 4, 3, 2, 1, 0] + + If the elements of the passed-in iterables are out of order, you might get + unexpected results. + + On Python 3.5+, this function is an alias for :func:`heapq.merge`. + + """ + warnings.warn( + "collate is no longer part of more_itertools, use heapq.merge", + DeprecationWarning, + ) + return merge(*iterables, **kwargs) + + +def consumer(func): + """Decorator that automatically advances a PEP-342-style "reverse iterator" + to its first yield point so you don't have to call ``next()`` on it + manually. + + >>> @consumer + ... def tally(): + ... i = 0 + ... while True: + ... print('Thing number %s is %s.' % (i, (yield))) + ... i += 1 + ... + >>> t = tally() + >>> t.send('red') + Thing number 0 is red. + >>> t.send('fish') + Thing number 1 is fish. + + Without the decorator, you would have to call ``next(t)`` before + ``t.send()`` could be used. + + """ + + @wraps(func) + def wrapper(*args, **kwargs): + gen = func(*args, **kwargs) + next(gen) + return gen + + return wrapper + + +def ilen(iterable): + """Return the number of items in *iterable*. + + >>> ilen(x for x in range(1000000) if x % 3 == 0) + 333334 + + This consumes the iterable, so handle with care. + + """ + # This approach was selected because benchmarks showed it's likely the + # fastest of the known implementations at the time of writing. + # See GitHub tracker: #236, #230. + counter = count() + deque(zip(iterable, counter), maxlen=0) + return next(counter) + + +def iterate(func, start): + """Return ``start``, ``func(start)``, ``func(func(start))``, ... + + >>> from itertools import islice + >>> list(islice(iterate(lambda x: 2*x, 1), 10)) + [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] + + """ + while True: + yield start + start = func(start) + + +def with_iter(context_manager): + """Wrap an iterable in a ``with`` statement, so it closes once exhausted. + + For example, this will close the file when the iterator is exhausted:: + + upper_lines = (line.upper() for line in with_iter(open('foo'))) + + Any context manager which returns an iterable is a candidate for + ``with_iter``. + + """ + with context_manager as iterable: + yield from iterable + + +def one(iterable, too_short=None, too_long=None): + """Return the first item from *iterable*, which is expected to contain only + that item. Raise an exception if *iterable* is empty or has more than one + item. + + :func:`one` is useful for ensuring that an iterable contains only one item. + For example, it can be used to retrieve the result of a database query + that is expected to return a single row. + + If *iterable* is empty, ``ValueError`` will be raised. You may specify a + different exception with the *too_short* keyword: + + >>> it = [] + >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too many items in iterable (expected 1)' + >>> too_short = IndexError('too few items') + >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + IndexError: too few items + + Similarly, if *iterable* contains more than one item, ``ValueError`` will + be raised. You may specify a different exception with the *too_long* + keyword: + + >>> it = ['too', 'many'] + >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 'too', + 'many', and perhaps more. + >>> too_long = RuntimeError + >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + RuntimeError + + Note that :func:`one` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check iterable + contents less destructively. + + """ + it = iter(iterable) + + try: + first_value = next(it) + except StopIteration as e: + raise ( + too_short or ValueError('too few items in iterable (expected 1)') + ) from e + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +def raise_(exception, *args): + raise exception(*args) + + +def strictly_n(iterable, n, too_short=None, too_long=None): + """Validate that *iterable* has exactly *n* items and return them if + it does. If it has fewer than *n* items, call function *too_short* + with those items. If it has more than *n* items, call function + *too_long* with the first ``n + 1`` items. + + >>> iterable = ['a', 'b', 'c', 'd'] + >>> n = 4 + >>> list(strictly_n(iterable, n)) + ['a', 'b', 'c', 'd'] + + By default, *too_short* and *too_long* are functions that raise + ``ValueError``. + + >>> list(strictly_n('ab', 3)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too few items in iterable (got 2) + + >>> list(strictly_n('abc', 2)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too many items in iterable (got at least 3) + + You can instead supply functions that do something else. + *too_short* will be called with the number of items in *iterable*. + *too_long* will be called with `n + 1`. + + >>> def too_short(item_count): + ... raise RuntimeError + >>> it = strictly_n('abcd', 6, too_short=too_short) + >>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + RuntimeError + + >>> def too_long(item_count): + ... print('The boss is going to hear about this') + >>> it = strictly_n('abcdef', 4, too_long=too_long) + >>> list(it) + The boss is going to hear about this + ['a', 'b', 'c', 'd'] + + """ + if too_short is None: + too_short = lambda item_count: raise_( + ValueError, + 'Too few items in iterable (got {})'.format(item_count), + ) + + if too_long is None: + too_long = lambda item_count: raise_( + ValueError, + 'Too many items in iterable (got at least {})'.format(item_count), + ) + + it = iter(iterable) + for i in range(n): + try: + item = next(it) + except StopIteration: + too_short(i) + return + else: + yield item + + try: + next(it) + except StopIteration: + pass + else: + too_long(n + 1) + + +def distinct_permutations(iterable, r=None): + """Yield successive distinct permutations of the elements in *iterable*. + + >>> sorted(distinct_permutations([1, 0, 1])) + [(0, 1, 1), (1, 0, 1), (1, 1, 0)] + + Equivalent to ``set(permutations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + Duplicate permutations arise when there are duplicated elements in the + input iterable. The number of items returned is + `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of + items input, and each `x_i` is the count of a distinct item in the input + sequence. + + If *r* is given, only the *r*-length permutations are yielded. + + >>> sorted(distinct_permutations([1, 0, 1], r=2)) + [(0, 1), (1, 0), (1, 1)] + >>> sorted(distinct_permutations(range(3), r=2)) + [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] + + """ + # Algorithm: https://w.wiki/Qai + def _full(A): + while True: + # Yield the permutation we have + yield tuple(A) + + # Find the largest index i such that A[i] < A[i + 1] + for i in range(size - 2, -1, -1): + if A[i] < A[i + 1]: + break + # If no such index exists, this permutation is the last one + else: + return + + # Find the largest index j greater than j such that A[i] < A[j] + for j in range(size - 1, i, -1): + if A[i] < A[j]: + break + + # Swap the value of A[i] with that of A[j], then reverse the + # sequence from A[i + 1] to form the new permutation + A[i], A[j] = A[j], A[i] + A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1] + + # Algorithm: modified from the above + def _partial(A, r): + # Split A into the first r items and the last r items + head, tail = A[:r], A[r:] + right_head_indexes = range(r - 1, -1, -1) + left_tail_indexes = range(len(tail)) + + while True: + # Yield the permutation we have + yield tuple(head) + + # Starting from the right, find the first index of the head with + # value smaller than the maximum value of the tail - call it i. + pivot = tail[-1] + for i in right_head_indexes: + if head[i] < pivot: + break + pivot = head[i] + else: + return + + # Starting from the left, find the first value of the tail + # with a value greater than head[i] and swap. + for j in left_tail_indexes: + if tail[j] > head[i]: + head[i], tail[j] = tail[j], head[i] + break + # If we didn't find one, start from the right and find the first + # index of the head with a value greater than head[i] and swap. + else: + for j in right_head_indexes: + if head[j] > head[i]: + head[i], head[j] = head[j], head[i] + break + + # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)] + tail += head[: i - r : -1] # head[i + 1:][::-1] + i += 1 + head[i:], tail[:] = tail[: r - i], tail[r - i :] + + items = sorted(iterable) + + size = len(items) + if r is None: + r = size + + if 0 < r <= size: + return _full(items) if (r == size) else _partial(items, r) + + return iter(() if r else ((),)) + + +def intersperse(e, iterable, n=1): + """Intersperse filler element *e* among the items in *iterable*, leaving + *n* items between each filler element. + + >>> list(intersperse('!', [1, 2, 3, 4, 5])) + [1, '!', 2, '!', 3, '!', 4, '!', 5] + + >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2)) + [1, 2, None, 3, 4, None, 5] + + """ + if n == 0: + raise ValueError('n must be > 0') + elif n == 1: + # interleave(repeat(e), iterable) -> e, x_0, e, x_1, e, x_2... + # islice(..., 1, None) -> x_0, e, x_1, e, x_2... + return islice(interleave(repeat(e), iterable), 1, None) + else: + # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]... + # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]... + # flatten(...) -> x_0, x_1, e, x_2, x_3... + filler = repeat([e]) + chunks = chunked(iterable, n) + return flatten(islice(interleave(filler, chunks), 1, None)) + + +def unique_to_each(*iterables): + """Return the elements from each of the input iterables that aren't in the + other input iterables. + + For example, suppose you have a set of packages, each with a set of + dependencies:: + + {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}} + + If you remove one package, which dependencies can also be removed? + + If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not + associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for + ``pkg_2``, and ``D`` is only needed for ``pkg_3``:: + + >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'}) + [['A'], ['C'], ['D']] + + If there are duplicates in one input iterable that aren't in the others + they will be duplicated in the output. Input order is preserved:: + + >>> unique_to_each("mississippi", "missouri") + [['p', 'p'], ['o', 'u', 'r']] + + It is assumed that the elements of each iterable are hashable. + + """ + pool = [list(it) for it in iterables] + counts = Counter(chain.from_iterable(map(set, pool))) + uniques = {element for element in counts if counts[element] == 1} + return [list(filter(uniques.__contains__, it)) for it in pool] + + +def windowed(seq, n, fillvalue=None, step=1): + """Return a sliding window of width *n* over the given iterable. + + >>> all_windows = windowed([1, 2, 3, 4, 5], 3) + >>> list(all_windows) + [(1, 2, 3), (2, 3, 4), (3, 4, 5)] + + When the window is larger than the iterable, *fillvalue* is used in place + of missing values: + + >>> list(windowed([1, 2, 3], 4)) + [(1, 2, 3, None)] + + Each window will advance in increments of *step*: + + >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2)) + [(1, 2, 3), (3, 4, 5), (5, 6, '!')] + + To slide into the iterable's items, use :func:`chain` to add filler items + to the left: + + >>> iterable = [1, 2, 3, 4] + >>> n = 3 + >>> padding = [None] * (n - 1) + >>> list(windowed(chain(padding, iterable), 3)) + [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)] + """ + if n < 0: + raise ValueError('n must be >= 0') + if n == 0: + yield tuple() + return + if step < 1: + raise ValueError('step must be >= 1') + + window = deque(maxlen=n) + i = n + for _ in map(window.append, seq): + i -= 1 + if not i: + i = step + yield tuple(window) + + size = len(window) + if size < n: + yield tuple(chain(window, repeat(fillvalue, n - size))) + elif 0 < i < min(step, n): + window += (fillvalue,) * i + yield tuple(window) + + +def substrings(iterable): + """Yield all of the substrings of *iterable*. + + >>> [''.join(s) for s in substrings('more')] + ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more'] + + Note that non-string iterables can also be subdivided. + + >>> list(substrings([0, 1, 2])) + [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)] + + """ + # The length-1 substrings + seq = [] + for item in iter(iterable): + seq.append(item) + yield (item,) + seq = tuple(seq) + item_count = len(seq) + + # And the rest + for n in range(2, item_count + 1): + for i in range(item_count - n + 1): + yield seq[i : i + n] + + +def substrings_indexes(seq, reverse=False): + """Yield all substrings and their positions in *seq* + + The items yielded will be a tuple of the form ``(substr, i, j)``, where + ``substr == seq[i:j]``. + + This function only works for iterables that support slicing, such as + ``str`` objects. + + >>> for item in substrings_indexes('more'): + ... print(item) + ('m', 0, 1) + ('o', 1, 2) + ('r', 2, 3) + ('e', 3, 4) + ('mo', 0, 2) + ('or', 1, 3) + ('re', 2, 4) + ('mor', 0, 3) + ('ore', 1, 4) + ('more', 0, 4) + + Set *reverse* to ``True`` to yield the same items in the opposite order. + + + """ + r = range(1, len(seq) + 1) + if reverse: + r = reversed(r) + return ( + (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1) + ) + + +class bucket: + """Wrap *iterable* and return an object that buckets it iterable into + child iterables based on a *key* function. + + >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3'] + >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character + >>> sorted(list(s)) # Get the keys + ['a', 'b', 'c'] + >>> a_iterable = s['a'] + >>> next(a_iterable) + 'a1' + >>> next(a_iterable) + 'a2' + >>> list(s['b']) + ['b1', 'b2', 'b3'] + + The original iterable will be advanced and its items will be cached until + they are used by the child iterables. This may require significant storage. + + By default, attempting to select a bucket to which no items belong will + exhaust the iterable and cache all values. + If you specify a *validator* function, selected buckets will instead be + checked against it. + + >>> from itertools import count + >>> it = count(1, 2) # Infinite sequence of odd numbers + >>> key = lambda x: x % 10 # Bucket by last digit + >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only + >>> s = bucket(it, key=key, validator=validator) + >>> 2 in s + False + >>> list(s[2]) + [] + + """ + + def __init__(self, iterable, key, validator=None): + self._it = iter(iterable) + self._key = key + self._cache = defaultdict(deque) + self._validator = validator or (lambda x: True) + + def __contains__(self, value): + if not self._validator(value): + return False + + try: + item = next(self[value]) + except StopIteration: + return False + else: + self._cache[value].appendleft(item) + + return True + + def _get_values(self, value): + """ + Helper to yield items from the parent iterator that match *value*. + Items that don't match are stored in the local cache as they + are encountered. + """ + while True: + # If we've cached some items that match the target value, emit + # the first one and evict it from the cache. + if self._cache[value]: + yield self._cache[value].popleft() + # Otherwise we need to advance the parent iterator to search for + # a matching item, caching the rest. + else: + while True: + try: + item = next(self._it) + except StopIteration: + return + item_value = self._key(item) + if item_value == value: + yield item + break + elif self._validator(item_value): + self._cache[item_value].append(item) + + def __iter__(self): + for item in self._it: + item_value = self._key(item) + if self._validator(item_value): + self._cache[item_value].append(item) + + yield from self._cache.keys() + + def __getitem__(self, value): + if not self._validator(value): + return iter(()) + + return self._get_values(value) + + +def spy(iterable, n=1): + """Return a 2-tuple with a list containing the first *n* elements of + *iterable*, and an iterator with the same items as *iterable*. + This allows you to "look ahead" at the items in the iterable without + advancing it. + + There is one item in the list by default: + + >>> iterable = 'abcdefg' + >>> head, iterable = spy(iterable) + >>> head + ['a'] + >>> list(iterable) + ['a', 'b', 'c', 'd', 'e', 'f', 'g'] + + You may use unpacking to retrieve items instead of lists: + + >>> (head,), iterable = spy('abcdefg') + >>> head + 'a' + >>> (first, second), iterable = spy('abcdefg', 2) + >>> first + 'a' + >>> second + 'b' + + The number of items requested can be larger than the number of items in + the iterable: + + >>> iterable = [1, 2, 3, 4, 5] + >>> head, iterable = spy(iterable, 10) + >>> head + [1, 2, 3, 4, 5] + >>> list(iterable) + [1, 2, 3, 4, 5] + + """ + it = iter(iterable) + head = take(n, it) + + return head.copy(), chain(head, it) + + +def interleave(*iterables): + """Return a new iterable yielding from each iterable in turn, + until the shortest is exhausted. + + >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8])) + [1, 4, 6, 2, 5, 7] + + For a version that doesn't terminate after the shortest iterable is + exhausted, see :func:`interleave_longest`. + + """ + return chain.from_iterable(zip(*iterables)) + + +def interleave_longest(*iterables): + """Return a new iterable yielding from each iterable in turn, + skipping any that are exhausted. + + >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8])) + [1, 4, 6, 2, 5, 7, 3, 8] + + This function produces the same output as :func:`roundrobin`, but may + perform better for some inputs (in particular when the number of iterables + is large). + + """ + i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker)) + return (x for x in i if x is not _marker) + + +def interleave_evenly(iterables, lengths=None): + """ + Interleave multiple iterables so that their elements are evenly distributed + throughout the output sequence. + + >>> iterables = [1, 2, 3, 4, 5], ['a', 'b'] + >>> list(interleave_evenly(iterables)) + [1, 2, 'a', 3, 4, 'b', 5] + + >>> iterables = [[1, 2, 3], [4, 5], [6, 7, 8]] + >>> list(interleave_evenly(iterables)) + [1, 6, 4, 2, 7, 3, 8, 5] + + This function requires iterables of known length. Iterables without + ``__len__()`` can be used by manually specifying lengths with *lengths*: + + >>> from itertools import combinations, repeat + >>> iterables = [combinations(range(4), 2), ['a', 'b', 'c']] + >>> lengths = [4 * (4 - 1) // 2, 3] + >>> list(interleave_evenly(iterables, lengths=lengths)) + [(0, 1), (0, 2), 'a', (0, 3), (1, 2), 'b', (1, 3), (2, 3), 'c'] + + Based on Bresenham's algorithm. + """ + if lengths is None: + try: + lengths = [len(it) for it in iterables] + except TypeError: + raise ValueError( + 'Iterable lengths could not be determined automatically. ' + 'Specify them with the lengths keyword.' + ) + elif len(iterables) != len(lengths): + raise ValueError('Mismatching number of iterables and lengths.') + + dims = len(lengths) + + # sort iterables by length, descending + lengths_permute = sorted( + range(dims), key=lambda i: lengths[i], reverse=True + ) + lengths_desc = [lengths[i] for i in lengths_permute] + iters_desc = [iter(iterables[i]) for i in lengths_permute] + + # the longest iterable is the primary one (Bresenham: the longest + # distance along an axis) + delta_primary, deltas_secondary = lengths_desc[0], lengths_desc[1:] + iter_primary, iters_secondary = iters_desc[0], iters_desc[1:] + errors = [delta_primary // dims] * len(deltas_secondary) + + to_yield = sum(lengths) + while to_yield: + yield next(iter_primary) + to_yield -= 1 + # update errors for each secondary iterable + errors = [e - delta for e, delta in zip(errors, deltas_secondary)] + + # those iterables for which the error is negative are yielded + # ("diagonal step" in Bresenham) + for i, e in enumerate(errors): + if e < 0: + yield next(iters_secondary[i]) + to_yield -= 1 + errors[i] += delta_primary + + +def collapse(iterable, base_type=None, levels=None): + """Flatten an iterable with multiple levels of nesting (e.g., a list of + lists of tuples) into non-iterable types. + + >>> iterable = [(1, 2), ([3, 4], [[5], [6]])] + >>> list(collapse(iterable)) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and + will not be collapsed. + + To avoid collapsing other types, specify *base_type*: + + >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']] + >>> list(collapse(iterable, base_type=tuple)) + ['ab', ('cd', 'ef'), 'gh', 'ij'] + + Specify *levels* to stop flattening after a certain level: + + >>> iterable = [('a', ['b']), ('c', ['d'])] + >>> list(collapse(iterable)) # Fully flattened + ['a', 'b', 'c', 'd'] + >>> list(collapse(iterable, levels=1)) # Only one level flattened + ['a', ['b'], 'c', ['d']] + + """ + + def walk(node, level): + if ( + ((levels is not None) and (level > levels)) + or isinstance(node, (str, bytes)) + or ((base_type is not None) and isinstance(node, base_type)) + ): + yield node + return + + try: + tree = iter(node) + except TypeError: + yield node + return + else: + for child in tree: + yield from walk(child, level + 1) + + yield from walk(iterable, 0) + + +def side_effect(func, iterable, chunk_size=None, before=None, after=None): + """Invoke *func* on each item in *iterable* (or on each *chunk_size* group + of items) before yielding the item. + + `func` must be a function that takes a single argument. Its return value + will be discarded. + + *before* and *after* are optional functions that take no arguments. They + will be executed before iteration starts and after it ends, respectively. + + `side_effect` can be used for logging, updating progress bars, or anything + that is not functionally "pure." + + Emitting a status message: + + >>> from more_itertools import consume + >>> func = lambda item: print('Received {}'.format(item)) + >>> consume(side_effect(func, range(2))) + Received 0 + Received 1 + + Operating on chunks of items: + + >>> pair_sums = [] + >>> func = lambda chunk: pair_sums.append(sum(chunk)) + >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2)) + [0, 1, 2, 3, 4, 5] + >>> list(pair_sums) + [1, 5, 9] + + Writing to a file-like object: + + >>> from io import StringIO + >>> from more_itertools import consume + >>> f = StringIO() + >>> func = lambda x: print(x, file=f) + >>> before = lambda: print(u'HEADER', file=f) + >>> after = f.close + >>> it = [u'a', u'b', u'c'] + >>> consume(side_effect(func, it, before=before, after=after)) + >>> f.closed + True + + """ + try: + if before is not None: + before() + + if chunk_size is None: + for item in iterable: + func(item) + yield item + else: + for chunk in chunked(iterable, chunk_size): + func(chunk) + yield from chunk + finally: + if after is not None: + after() + + +def sliced(seq, n, strict=False): + """Yield slices of length *n* from the sequence *seq*. + + >>> list(sliced((1, 2, 3, 4, 5, 6), 3)) + [(1, 2, 3), (4, 5, 6)] + + By the default, the last yielded slice will have fewer than *n* elements + if the length of *seq* is not divisible by *n*: + + >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3)) + [(1, 2, 3), (4, 5, 6), (7, 8)] + + If the length of *seq* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + slice is yielded. + + This function will only work for iterables that support slicing. + For non-sliceable iterables, see :func:`chunked`. + + """ + iterator = takewhile(len, (seq[i : i + n] for i in count(0, n))) + if strict: + + def ret(): + for _slice in iterator: + if len(_slice) != n: + raise ValueError("seq is not divisible by n.") + yield _slice + + return iter(ret()) + else: + return iterator + + +def split_at(iterable, pred, maxsplit=-1, keep_separator=False): + """Yield lists of items from *iterable*, where each list is delimited by + an item where callable *pred* returns ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b')) + [['a'], ['c', 'd', 'c'], ['a']] + + >>> list(split_at(range(10), lambda n: n % 2 == 1)) + [[0], [2], [4], [6], [8], []] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2)) + [[0], [2], [4, 5, 6, 7, 8, 9]] + + By default, the delimiting items are not included in the output. + The include them, set *keep_separator* to ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True)) + [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']] + + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + if pred(item): + yield buf + if keep_separator: + yield [item] + if maxsplit == 1: + yield list(it) + return + buf = [] + maxsplit -= 1 + else: + buf.append(item) + yield buf + + +def split_before(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends just before + an item for which callable *pred* returns ``True``: + + >>> list(split_before('OneTwo', lambda s: s.isupper())) + [['O', 'n', 'e'], ['T', 'w', 'o']] + + >>> list(split_before(range(10), lambda n: n % 3 == 0)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]] + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + if pred(item) and buf: + yield buf + if maxsplit == 1: + yield [item] + list(it) + return + buf = [] + maxsplit -= 1 + buf.append(item) + if buf: + yield buf + + +def split_after(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends with an + item where callable *pred* returns ``True``: + + >>> list(split_after('one1two2', lambda s: s.isdigit())) + [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']] + + >>> list(split_after(range(10), lambda n: n % 3 == 0)) + [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + buf.append(item) + if pred(item) and buf: + yield buf + if maxsplit == 1: + yield list(it) + return + buf = [] + maxsplit -= 1 + if buf: + yield buf + + +def split_when(iterable, pred, maxsplit=-1): + """Split *iterable* into pieces based on the output of *pred*. + *pred* should be a function that takes successive pairs of items and + returns ``True`` if the iterable should be split in between them. + + For example, to find runs of increasing numbers, split the iterable when + element ``i`` is larger than element ``i + 1``: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y)) + [[1, 2, 3, 3], [2, 5], [2, 4], [2]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], + ... lambda x, y: x > y, maxsplit=2)) + [[1, 2, 3, 3], [2, 5], [2, 4, 2]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + it = iter(iterable) + try: + cur_item = next(it) + except StopIteration: + return + + buf = [cur_item] + for next_item in it: + if pred(cur_item, next_item): + yield buf + if maxsplit == 1: + yield [next_item] + list(it) + return + buf = [] + maxsplit -= 1 + + buf.append(next_item) + cur_item = next_item + + yield buf + + +def split_into(iterable, sizes): + """Yield a list of sequential items from *iterable* of length 'n' for each + integer 'n' in *sizes*. + + >>> list(split_into([1,2,3,4,5,6], [1,2,3])) + [[1], [2, 3], [4, 5, 6]] + + If the sum of *sizes* is smaller than the length of *iterable*, then the + remaining items of *iterable* will not be returned. + + >>> list(split_into([1,2,3,4,5,6], [2,3])) + [[1, 2], [3, 4, 5]] + + If the sum of *sizes* is larger than the length of *iterable*, fewer items + will be returned in the iteration that overruns *iterable* and further + lists will be empty: + + >>> list(split_into([1,2,3,4], [1,2,3,4])) + [[1], [2, 3], [4], []] + + When a ``None`` object is encountered in *sizes*, the returned list will + contain items up to the end of *iterable* the same way that itertools.slice + does: + + >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None])) + [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]] + + :func:`split_into` can be useful for grouping a series of items where the + sizes of the groups are not uniform. An example would be where in a row + from a table, multiple columns represent elements of the same feature + (e.g. a point represented by x,y,z) but, the format is not the same for + all columns. + """ + # convert the iterable argument into an iterator so its contents can + # be consumed by islice in case it is a generator + it = iter(iterable) + + for size in sizes: + if size is None: + yield list(it) + return + else: + yield list(islice(it, size)) + + +def padded(iterable, fillvalue=None, n=None, next_multiple=False): + """Yield the elements from *iterable*, followed by *fillvalue*, such that + at least *n* items are emitted. + + >>> list(padded([1, 2, 3], '?', 5)) + [1, 2, 3, '?', '?'] + + If *next_multiple* is ``True``, *fillvalue* will be emitted until the + number of items emitted is a multiple of *n*:: + + >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True)) + [1, 2, 3, 4, None, None] + + If *n* is ``None``, *fillvalue* will be emitted indefinitely. + + """ + it = iter(iterable) + if n is None: + yield from chain(it, repeat(fillvalue)) + elif n < 1: + raise ValueError('n must be at least 1') + else: + item_count = 0 + for item in it: + yield item + item_count += 1 + + remaining = (n - item_count) % n if next_multiple else n - item_count + for _ in range(remaining): + yield fillvalue + + +def repeat_each(iterable, n=2): + """Repeat each element in *iterable* *n* times. + + >>> list(repeat_each('ABC', 3)) + ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C'] + """ + return chain.from_iterable(map(repeat, iterable, repeat(n))) + + +def repeat_last(iterable, default=None): + """After the *iterable* is exhausted, keep yielding its last element. + + >>> list(islice(repeat_last(range(3)), 5)) + [0, 1, 2, 2, 2] + + If the iterable is empty, yield *default* forever:: + + >>> list(islice(repeat_last(range(0), 42), 5)) + [42, 42, 42, 42, 42] + + """ + item = _marker + for item in iterable: + yield item + final = default if item is _marker else item + yield from repeat(final) + + +def distribute(n, iterable): + """Distribute the items from *iterable* among *n* smaller iterables. + + >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 3, 5] + >>> list(group_2) + [2, 4, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 4, 7], [2, 5], [3, 6]] + + If the length of *iterable* is smaller than *n*, then the last returned + iterables will be empty: + + >>> children = distribute(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function uses :func:`itertools.tee` and may require significant + storage. If you need the order items in the smaller iterables to match the + original iterable, see :func:`divide`. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + children = tee(iterable, n) + return [islice(it, index, None, n) for index, it in enumerate(children)] + + +def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None): + """Yield tuples whose elements are offset from *iterable*. + The amount by which the `i`-th item in each tuple is offset is given by + the `i`-th item in *offsets*. + + >>> list(stagger([0, 1, 2, 3])) + [(None, 0, 1), (0, 1, 2), (1, 2, 3)] + >>> list(stagger(range(8), offsets=(0, 2, 4))) + [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)] + + By default, the sequence will end when the final element of a tuple is the + last item in the iterable. To continue until the first element of a tuple + is the last item in the iterable, set *longest* to ``True``:: + + >>> list(stagger([0, 1, 2, 3], longest=True)) + [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] + + By default, ``None`` will be used to replace offsets beyond the end of the + sequence. Specify *fillvalue* to use some other value. + + """ + children = tee(iterable, len(offsets)) + + return zip_offset( + *children, offsets=offsets, longest=longest, fillvalue=fillvalue + ) + + +class UnequalIterablesError(ValueError): + def __init__(self, details=None): + msg = 'Iterables have different lengths' + if details is not None: + msg += (': index 0 has length {}; index {} has length {}').format( + *details + ) + + super().__init__(msg) + + +def _zip_equal_generator(iterables): + for combo in zip_longest(*iterables, fillvalue=_marker): + for val in combo: + if val is _marker: + raise UnequalIterablesError() + yield combo + + +def _zip_equal(*iterables): + # Check whether the iterables are all the same size. + try: + first_size = len(iterables[0]) + for i, it in enumerate(iterables[1:], 1): + size = len(it) + if size != first_size: + break + else: + # If we didn't break out, we can use the built-in zip. + return zip(*iterables) + + # If we did break out, there was a mismatch. + raise UnequalIterablesError(details=(first_size, i, size)) + # If any one of the iterables didn't have a length, start reading + # them until one runs out. + except TypeError: + return _zip_equal_generator(iterables) + + +def zip_equal(*iterables): + """``zip`` the input *iterables* together, but raise + ``UnequalIterablesError`` if they aren't all the same length. + + >>> it_1 = range(3) + >>> it_2 = iter('abc') + >>> list(zip_equal(it_1, it_2)) + [(0, 'a'), (1, 'b'), (2, 'c')] + + >>> it_1 = range(3) + >>> it_2 = iter('abcd') + >>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + more_itertools.more.UnequalIterablesError: Iterables have different + lengths + + """ + if hexversion >= 0x30A00A6: + warnings.warn( + ( + 'zip_equal will be removed in a future version of ' + 'more-itertools. Use the builtin zip function with ' + 'strict=True instead.' + ), + DeprecationWarning, + ) + + return _zip_equal(*iterables) + + +def zip_offset(*iterables, offsets, longest=False, fillvalue=None): + """``zip`` the input *iterables* together, but offset the `i`-th iterable + by the `i`-th item in *offsets*. + + >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1))) + [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')] + + This can be used as a lightweight alternative to SciPy or pandas to analyze + data sets in which some series have a lead or lag relationship. + + By default, the sequence will end when the shortest iterable is exhausted. + To continue until the longest iterable is exhausted, set *longest* to + ``True``. + + >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True)) + [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')] + + By default, ``None`` will be used to replace offsets beyond the end of the + sequence. Specify *fillvalue* to use some other value. + + """ + if len(iterables) != len(offsets): + raise ValueError("Number of iterables and offsets didn't match") + + staggered = [] + for it, n in zip(iterables, offsets): + if n < 0: + staggered.append(chain(repeat(fillvalue, -n), it)) + elif n > 0: + staggered.append(islice(it, n, None)) + else: + staggered.append(it) + + if longest: + return zip_longest(*staggered, fillvalue=fillvalue) + + return zip(*staggered) + + +def sort_together(iterables, key_list=(0,), key=None, reverse=False): + """Return the input iterables sorted together, with *key_list* as the + priority for sorting. All iterables are trimmed to the length of the + shortest one. + + This can be used like the sorting function in a spreadsheet. If each + iterable represents a column of data, the key list determines which + columns are used for sorting. + + By default, all iterables are sorted using the ``0``-th iterable:: + + >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')] + >>> sort_together(iterables) + [(1, 2, 3, 4), ('d', 'c', 'b', 'a')] + + Set a different key list to sort according to another iterable. + Specifying multiple keys dictates how ties are broken:: + + >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')] + >>> sort_together(iterables, key_list=(1, 2)) + [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')] + + To sort by a function of the elements of the iterable, pass a *key* + function. Its arguments are the elements of the iterables corresponding to + the key list:: + + >>> names = ('a', 'b', 'c') + >>> lengths = (1, 2, 3) + >>> widths = (5, 2, 1) + >>> def area(length, width): + ... return length * width + >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area) + [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)] + + Set *reverse* to ``True`` to sort in descending order. + + >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True) + [(3, 2, 1), ('a', 'b', 'c')] + + """ + if key is None: + # if there is no key function, the key argument to sorted is an + # itemgetter + key_argument = itemgetter(*key_list) + else: + # if there is a key function, call it with the items at the offsets + # specified by the key function as arguments + key_list = list(key_list) + if len(key_list) == 1: + # if key_list contains a single item, pass the item at that offset + # as the only argument to the key function + key_offset = key_list[0] + key_argument = lambda zipped_items: key(zipped_items[key_offset]) + else: + # if key_list contains multiple items, use itemgetter to return a + # tuple of items, which we pass as *args to the key function + get_key_items = itemgetter(*key_list) + key_argument = lambda zipped_items: key( + *get_key_items(zipped_items) + ) + + return list( + zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse)) + ) + + +def unzip(iterable): + """The inverse of :func:`zip`, this function disaggregates the elements + of the zipped *iterable*. + + The ``i``-th iterable contains the ``i``-th element from each element + of the zipped iterable. The first element is used to to determine the + length of the remaining elements. + + >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> letters, numbers = unzip(iterable) + >>> list(letters) + ['a', 'b', 'c', 'd'] + >>> list(numbers) + [1, 2, 3, 4] + + This is similar to using ``zip(*iterable)``, but it avoids reading + *iterable* into memory. Note, however, that this function uses + :func:`itertools.tee` and thus may require significant storage. + + """ + head, iterable = spy(iter(iterable)) + if not head: + # empty iterable, e.g. zip([], [], []) + return () + # spy returns a one-length iterable as head + head = head[0] + iterables = tee(iterable, len(head)) + + def itemgetter(i): + def getter(obj): + try: + return obj[i] + except IndexError: + # basically if we have an iterable like + # iter([(1, 2, 3), (4, 5), (6,)]) + # the second unzipped iterable would fail at the third tuple + # since it would try to access tup[1] + # same with the third unzipped iterable and the second tuple + # to support these "improperly zipped" iterables, + # we create a custom itemgetter + # which just stops the unzipped iterables + # at first length mismatch + raise StopIteration + + return getter + + return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables)) + + +def divide(n, iterable): + """Divide the elements from *iterable* into *n* parts, maintaining + order. + + >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 2, 3] + >>> list(group_2) + [4, 5, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 2, 3], [4, 5], [6, 7]] + + If the length of the iterable is smaller than n, then the last returned + iterables will be empty: + + >>> children = divide(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function will exhaust the iterable before returning and may require + significant storage. If order is not important, see :func:`distribute`, + which does not first pull the iterable into memory. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + try: + iterable[:0] + except TypeError: + seq = tuple(iterable) + else: + seq = iterable + + q, r = divmod(len(seq), n) + + ret = [] + stop = 0 + for i in range(1, n + 1): + start = stop + stop += q + 1 if i <= r else q + ret.append(iter(seq[start:stop])) + + return ret + + +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) + + +def adjacent(predicate, iterable, distance=1): + """Return an iterable over `(bool, item)` tuples where the `item` is + drawn from *iterable* and the `bool` indicates whether + that item satisfies the *predicate* or is adjacent to an item that does. + + For example, to find whether items are adjacent to a ``3``:: + + >>> list(adjacent(lambda x: x == 3, range(6))) + [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)] + + Set *distance* to change what counts as adjacent. For example, to find + whether items are two places away from a ``3``: + + >>> list(adjacent(lambda x: x == 3, range(6), distance=2)) + [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)] + + This is useful for contextualizing the results of a search function. + For example, a code comparison tool might want to identify lines that + have changed, but also surrounding lines to give the viewer of the diff + context. + + The predicate function will only be called once for each item in the + iterable. + + See also :func:`groupby_transform`, which can be used with this function + to group ranges of items with the same `bool` value. + + """ + # Allow distance=0 mainly for testing that it reproduces results with map() + if distance < 0: + raise ValueError('distance must be at least 0') + + i1, i2 = tee(iterable) + padding = [False] * distance + selected = chain(padding, map(predicate, i1), padding) + adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1)) + return zip(adjacent_to_selected, i2) + + +def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None): + """An extension of :func:`itertools.groupby` that can apply transformations + to the grouped data. + + * *keyfunc* is a function computing a key value for each item in *iterable* + * *valuefunc* is a function that transforms the individual items from + *iterable* after grouping + * *reducefunc* is a function that transforms each group of items + + >>> iterable = 'aAAbBBcCC' + >>> keyfunc = lambda k: k.upper() + >>> valuefunc = lambda v: v.lower() + >>> reducefunc = lambda g: ''.join(g) + >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc)) + [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')] + + Each optional argument defaults to an identity function if not specified. + + :func:`groupby_transform` is useful when grouping elements of an iterable + using a separate iterable as the key. To do this, :func:`zip` the iterables + and pass a *keyfunc* that extracts the first element and a *valuefunc* + that extracts the second element:: + + >>> from operator import itemgetter + >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3] + >>> values = 'abcdefghi' + >>> iterable = zip(keys, values) + >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1)) + >>> [(k, ''.join(g)) for k, g in grouper] + [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')] + + Note that the order of items in the iterable is significant. + Only adjacent items are grouped together, so if you don't want any + duplicate groups, you should sort the iterable by the key function. + + """ + ret = groupby(iterable, keyfunc) + if valuefunc: + ret = ((k, map(valuefunc, g)) for k, g in ret) + if reducefunc: + ret = ((k, reducefunc(g)) for k, g in ret) + + return ret + + +class numeric_range(abc.Sequence, abc.Hashable): + """An extension of the built-in ``range()`` function whose arguments can + be any orderable numeric type. + + With only *stop* specified, *start* defaults to ``0`` and *step* + defaults to ``1``. The output items will match the type of *stop*: + + >>> list(numeric_range(3.5)) + [0.0, 1.0, 2.0, 3.0] + + With only *start* and *stop* specified, *step* defaults to ``1``. The + output items will match the type of *start*: + + >>> from decimal import Decimal + >>> start = Decimal('2.1') + >>> stop = Decimal('5.1') + >>> list(numeric_range(start, stop)) + [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')] + + With *start*, *stop*, and *step* specified the output items will match + the type of ``start + step``: + + >>> from fractions import Fraction + >>> start = Fraction(1, 2) # Start at 1/2 + >>> stop = Fraction(5, 2) # End at 5/2 + >>> step = Fraction(1, 2) # Count by 1/2 + >>> list(numeric_range(start, stop, step)) + [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)] + + If *step* is zero, ``ValueError`` is raised. Negative steps are supported: + + >>> list(numeric_range(3, -1, -1.0)) + [3.0, 2.0, 1.0, 0.0] + + Be aware of the limitations of floating point numbers; the representation + of the yielded numbers may be surprising. + + ``datetime.datetime`` objects can be used for *start* and *stop*, if *step* + is a ``datetime.timedelta`` object: + + >>> import datetime + >>> start = datetime.datetime(2019, 1, 1) + >>> stop = datetime.datetime(2019, 1, 3) + >>> step = datetime.timedelta(days=1) + >>> items = iter(numeric_range(start, stop, step)) + >>> next(items) + datetime.datetime(2019, 1, 1, 0, 0) + >>> next(items) + datetime.datetime(2019, 1, 2, 0, 0) + + """ + + _EMPTY_HASH = hash(range(0, 0)) + + def __init__(self, *args): + argc = len(args) + if argc == 1: + (self._stop,) = args + self._start = type(self._stop)(0) + self._step = type(self._stop - self._start)(1) + elif argc == 2: + self._start, self._stop = args + self._step = type(self._stop - self._start)(1) + elif argc == 3: + self._start, self._stop, self._step = args + elif argc == 0: + raise TypeError( + 'numeric_range expected at least ' + '1 argument, got {}'.format(argc) + ) + else: + raise TypeError( + 'numeric_range expected at most ' + '3 arguments, got {}'.format(argc) + ) + + self._zero = type(self._step)(0) + if self._step == self._zero: + raise ValueError('numeric_range() arg 3 must not be zero') + self._growing = self._step > self._zero + self._init_len() + + def __bool__(self): + if self._growing: + return self._start < self._stop + else: + return self._start > self._stop + + def __contains__(self, elem): + if self._growing: + if self._start <= elem < self._stop: + return (elem - self._start) % self._step == self._zero + else: + if self._start >= elem > self._stop: + return (self._start - elem) % (-self._step) == self._zero + + return False + + def __eq__(self, other): + if isinstance(other, numeric_range): + empty_self = not bool(self) + empty_other = not bool(other) + if empty_self or empty_other: + return empty_self and empty_other # True if both empty + else: + return ( + self._start == other._start + and self._step == other._step + and self._get_by_index(-1) == other._get_by_index(-1) + ) + else: + return False + + def __getitem__(self, key): + if isinstance(key, int): + return self._get_by_index(key) + elif isinstance(key, slice): + step = self._step if key.step is None else key.step * self._step + + if key.start is None or key.start <= -self._len: + start = self._start + elif key.start >= self._len: + start = self._stop + else: # -self._len < key.start < self._len + start = self._get_by_index(key.start) + + if key.stop is None or key.stop >= self._len: + stop = self._stop + elif key.stop <= -self._len: + stop = self._start + else: # -self._len < key.stop < self._len + stop = self._get_by_index(key.stop) + + return numeric_range(start, stop, step) + else: + raise TypeError( + 'numeric range indices must be ' + 'integers or slices, not {}'.format(type(key).__name__) + ) + + def __hash__(self): + if self: + return hash((self._start, self._get_by_index(-1), self._step)) + else: + return self._EMPTY_HASH + + def __iter__(self): + values = (self._start + (n * self._step) for n in count()) + if self._growing: + return takewhile(partial(gt, self._stop), values) + else: + return takewhile(partial(lt, self._stop), values) + + def __len__(self): + return self._len + + def _init_len(self): + if self._growing: + start = self._start + stop = self._stop + step = self._step + else: + start = self._stop + stop = self._start + step = -self._step + distance = stop - start + if distance <= self._zero: + self._len = 0 + else: # distance > 0 and step > 0: regular euclidean division + q, r = divmod(distance, step) + self._len = int(q) + int(r != self._zero) + + def __reduce__(self): + return numeric_range, (self._start, self._stop, self._step) + + def __repr__(self): + if self._step == 1: + return "numeric_range({}, {})".format( + repr(self._start), repr(self._stop) + ) + else: + return "numeric_range({}, {}, {})".format( + repr(self._start), repr(self._stop), repr(self._step) + ) + + def __reversed__(self): + return iter( + numeric_range( + self._get_by_index(-1), self._start - self._step, -self._step + ) + ) + + def count(self, value): + return int(value in self) + + def index(self, value): + if self._growing: + if self._start <= value < self._stop: + q, r = divmod(value - self._start, self._step) + if r == self._zero: + return int(q) + else: + if self._start >= value > self._stop: + q, r = divmod(self._start - value, -self._step) + if r == self._zero: + return int(q) + + raise ValueError("{} is not in numeric range".format(value)) + + def _get_by_index(self, i): + if i < 0: + i += self._len + if i < 0 or i >= self._len: + raise IndexError("numeric range object index out of range") + return self._start + i * self._step + + +def count_cycle(iterable, n=None): + """Cycle through the items from *iterable* up to *n* times, yielding + the number of completed cycles along with each item. If *n* is omitted the + process repeats indefinitely. + + >>> list(count_cycle('AB', 3)) + [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')] + + """ + iterable = tuple(iterable) + if not iterable: + return iter(()) + counter = count() if n is None else range(n) + return ((i, item) for i in counter for item in iterable) + + +def mark_ends(iterable): + """Yield 3-tuples of the form ``(is_first, is_last, item)``. + + >>> list(mark_ends('ABC')) + [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')] + + Use this when looping over an iterable to take special action on its first + and/or last items: + + >>> iterable = ['Header', 100, 200, 'Footer'] + >>> total = 0 + >>> for is_first, is_last, item in mark_ends(iterable): + ... if is_first: + ... continue # Skip the header + ... if is_last: + ... continue # Skip the footer + ... total += item + >>> print(total) + 300 + """ + it = iter(iterable) + + try: + b = next(it) + except StopIteration: + return + + try: + for i in count(): + a = b + b = next(it) + yield i == 0, False, a + + except StopIteration: + yield i == 0, True, a + + +def locate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(locate([0, 1, 1, 0, 1, 0, 0])) + [1, 2, 4] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item. + + >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b')) + [1, 3] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(locate(iterable, pred=pred, window_size=3)) + [1, 5, 9] + + Use with :func:`seekable` to find indexes and then retrieve the associated + items: + + >>> from itertools import count + >>> from more_itertools import seekable + >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count()) + >>> it = seekable(source) + >>> pred = lambda x: x > 100 + >>> indexes = locate(it, pred=pred) + >>> i = next(indexes) + >>> it.seek(i) + >>> next(it) + 106 + + """ + if window_size is None: + return compress(count(), map(pred, iterable)) + + if window_size < 1: + raise ValueError('window size must be at least 1') + + it = windowed(iterable, window_size, fillvalue=_marker) + return compress(count(), starmap(pred, it)) + + +def lstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the beginning + for which *pred* returns ``True``. + + For example, to remove a set of items from the start of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(lstrip(iterable, pred)) + [1, 2, None, 3, False, None] + + This function is analogous to to :func:`str.lstrip`, and is essentially + an wrapper for :func:`itertools.dropwhile`. + + """ + return dropwhile(pred, iterable) + + +def rstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the end + for which *pred* returns ``True``. + + For example, to remove a set of items from the end of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(rstrip(iterable, pred)) + [None, False, None, 1, 2, None, 3] + + This function is analogous to :func:`str.rstrip`. + + """ + cache = [] + cache_append = cache.append + cache_clear = cache.clear + for x in iterable: + if pred(x): + cache_append(x) + else: + yield from cache + cache_clear() + yield x + + +def strip(iterable, pred): + """Yield the items from *iterable*, but strip any from the + beginning and end for which *pred* returns ``True``. + + For example, to remove a set of items from both ends of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(strip(iterable, pred)) + [1, 2, None, 3] + + This function is analogous to :func:`str.strip`. + + """ + return rstrip(lstrip(iterable, pred), pred) + + +class islice_extended: + """An extension of :func:`itertools.islice` that supports negative values + for *stop*, *start*, and *step*. + + >>> iterable = iter('abcdefgh') + >>> list(islice_extended(iterable, -4, -1)) + ['e', 'f', 'g'] + + Slices with negative values require some caching of *iterable*, but this + function takes care to minimize the amount of memory required. + + For example, you can use a negative step with an infinite iterator: + + >>> from itertools import count + >>> list(islice_extended(count(), 110, 99, -2)) + [110, 108, 106, 104, 102, 100] + + You can also use slice notation directly: + + >>> iterable = map(str, count()) + >>> it = islice_extended(iterable)[10:20:2] + >>> list(it) + ['10', '12', '14', '16', '18'] + + """ + + def __init__(self, iterable, *args): + it = iter(iterable) + if args: + self._iterable = _islice_helper(it, slice(*args)) + else: + self._iterable = it + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterable) + + def __getitem__(self, key): + if isinstance(key, slice): + return islice_extended(_islice_helper(self._iterable, key)) + + raise TypeError('islice_extended.__getitem__ argument must be a slice') + + +def _islice_helper(it, s): + start = s.start + stop = s.stop + if s.step == 0: + raise ValueError('step argument must be a non-zero integer or None.') + step = s.step or 1 + + if step > 0: + start = 0 if (start is None) else start + + if start < 0: + # Consume all but the last -start items + cache = deque(enumerate(it, 1), maxlen=-start) + len_iter = cache[-1][0] if cache else 0 + + # Adjust start to be positive + i = max(len_iter + start, 0) + + # Adjust stop to be positive + if stop is None: + j = len_iter + elif stop >= 0: + j = min(stop, len_iter) + else: + j = max(len_iter + stop, 0) + + # Slice the cache + n = j - i + if n <= 0: + return + + for index, item in islice(cache, 0, n, step): + yield item + elif (stop is not None) and (stop < 0): + # Advance to the start position + next(islice(it, start, start), None) + + # When stop is negative, we have to carry -stop items while + # iterating + cache = deque(islice(it, -stop), maxlen=-stop) + + for index, item in enumerate(it): + cached_item = cache.popleft() + if index % step == 0: + yield cached_item + cache.append(item) + else: + # When both start and stop are positive we have the normal case + yield from islice(it, start, stop, step) + else: + start = -1 if (start is None) else start + + if (stop is not None) and (stop < 0): + # Consume all but the last items + n = -stop - 1 + cache = deque(enumerate(it, 1), maxlen=n) + len_iter = cache[-1][0] if cache else 0 + + # If start and stop are both negative they are comparable and + # we can just slice. Otherwise we can adjust start to be negative + # and then slice. + if start < 0: + i, j = start, stop + else: + i, j = min(start - len_iter, -1), None + + for index, item in list(cache)[i:j:step]: + yield item + else: + # Advance to the stop position + if stop is not None: + m = stop + 1 + next(islice(it, m, m), None) + + # stop is positive, so if start is negative they are not comparable + # and we need the rest of the items. + if start < 0: + i = start + n = None + # stop is None and start is positive, so we just need items up to + # the start index. + elif stop is None: + i = None + n = start + 1 + # Both stop and start are positive, so they are comparable. + else: + i = None + n = start - stop + if n <= 0: + return + + cache = list(islice(it, n)) + + yield from cache[i::step] + + +def always_reversible(iterable): + """An extension of :func:`reversed` that supports all iterables, not + just those which implement the ``Reversible`` or ``Sequence`` protocols. + + >>> print(*always_reversible(x for x in range(3))) + 2 1 0 + + If the iterable is already reversible, this function returns the + result of :func:`reversed()`. If the iterable is not reversible, + this function will cache the remaining items in the iterable and + yield them in reverse order, which may require significant storage. + """ + try: + return reversed(iterable) + except TypeError: + return reversed(list(iterable)) + + +def consecutive_groups(iterable, ordering=lambda x: x): + """Yield groups of consecutive items using :func:`itertools.groupby`. + The *ordering* function determines whether two items are adjacent by + returning their position. + + By default, the ordering function is the identity function. This is + suitable for finding runs of numbers: + + >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40] + >>> for group in consecutive_groups(iterable): + ... print(list(group)) + [1] + [10, 11, 12] + [20] + [30, 31, 32, 33] + [40] + + For finding runs of adjacent letters, try using the :meth:`index` method + of a string of letters: + + >>> from string import ascii_lowercase + >>> iterable = 'abcdfgilmnop' + >>> ordering = ascii_lowercase.index + >>> for group in consecutive_groups(iterable, ordering): + ... print(list(group)) + ['a', 'b', 'c', 'd'] + ['f', 'g'] + ['i'] + ['l', 'm', 'n', 'o', 'p'] + + Each group of consecutive items is an iterator that shares it source with + *iterable*. When an an output group is advanced, the previous group is + no longer available unless its elements are copied (e.g., into a ``list``). + + >>> iterable = [1, 2, 11, 12, 21, 22] + >>> saved_groups = [] + >>> for group in consecutive_groups(iterable): + ... saved_groups.append(list(group)) # Copy group elements + >>> saved_groups + [[1, 2], [11, 12], [21, 22]] + + """ + for k, g in groupby( + enumerate(iterable), key=lambda x: x[0] - ordering(x[1]) + ): + yield map(itemgetter(1), g) + + +def difference(iterable, func=sub, *, initial=None): + """This function is the inverse of :func:`itertools.accumulate`. By default + it will compute the first difference of *iterable* using + :func:`operator.sub`: + + >>> from itertools import accumulate + >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10 + >>> list(difference(iterable)) + [0, 1, 2, 3, 4] + + *func* defaults to :func:`operator.sub`, but other functions can be + specified. They will be applied as follows:: + + A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ... + + For example, to do progressive division: + + >>> iterable = [1, 2, 6, 24, 120] + >>> func = lambda x, y: x // y + >>> list(difference(iterable, func)) + [1, 2, 3, 4, 5] + + If the *initial* keyword is set, the first element will be skipped when + computing successive differences. + + >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10) + >>> list(difference(it, initial=10)) + [1, 2, 3] + + """ + a, b = tee(iterable) + try: + first = [next(b)] + except StopIteration: + return iter([]) + + if initial is not None: + first = [] + + return chain(first, starmap(func, zip(b, a))) + + +class SequenceView(Sequence): + """Return a read-only view of the sequence object *target*. + + :class:`SequenceView` objects are analogous to Python's built-in + "dictionary view" types. They provide a dynamic view of a sequence's items, + meaning that when the sequence updates, so does the view. + + >>> seq = ['0', '1', '2'] + >>> view = SequenceView(seq) + >>> view + SequenceView(['0', '1', '2']) + >>> seq.append('3') + >>> view + SequenceView(['0', '1', '2', '3']) + + Sequence views support indexing, slicing, and length queries. They act + like the underlying sequence, except they don't allow assignment: + + >>> view[1] + '1' + >>> view[1:-1] + ['1', '2'] + >>> len(view) + 4 + + Sequence views are useful as an alternative to copying, as they don't + require (much) extra storage. + + """ + + def __init__(self, target): + if not isinstance(target, Sequence): + raise TypeError + self._target = target + + def __getitem__(self, index): + return self._target[index] + + def __len__(self): + return len(self._target) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, repr(self._target)) + + +class seekable: + """Wrap an iterator to allow for seeking backward and forward. This + progressively caches the items in the source iterable so they can be + re-visited. + + Call :meth:`seek` with an index to seek to that position in the source + iterable. + + To "reset" an iterator, seek to ``0``: + + >>> from itertools import count + >>> it = seekable((str(n) for n in count())) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> it.seek(0) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> next(it) + '3' + + You can also seek forward: + + >>> it = seekable((str(n) for n in range(20))) + >>> it.seek(10) + >>> next(it) + '10' + >>> it.seek(20) # Seeking past the end of the source isn't a problem + >>> list(it) + [] + >>> it.seek(0) # Resetting works even after hitting the end + >>> next(it), next(it), next(it) + ('0', '1', '2') + + Call :meth:`peek` to look ahead one item without advancing the iterator: + + >>> it = seekable('1234') + >>> it.peek() + '1' + >>> list(it) + ['1', '2', '3', '4'] + >>> it.peek(default='empty') + 'empty' + + Before the iterator is at its end, calling :func:`bool` on it will return + ``True``. After it will return ``False``: + + >>> it = seekable('5678') + >>> bool(it) + True + >>> list(it) + ['5', '6', '7', '8'] + >>> bool(it) + False + + You may view the contents of the cache with the :meth:`elements` method. + That returns a :class:`SequenceView`, a view that updates automatically: + + >>> it = seekable((str(n) for n in range(10))) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> elements = it.elements() + >>> elements + SequenceView(['0', '1', '2']) + >>> next(it) + '3' + >>> elements + SequenceView(['0', '1', '2', '3']) + + By default, the cache grows as the source iterable progresses, so beware of + wrapping very large or infinite iterables. Supply *maxlen* to limit the + size of the cache (this of course limits how far back you can seek). + + >>> from itertools import count + >>> it = seekable((str(n) for n in count()), maxlen=2) + >>> next(it), next(it), next(it), next(it) + ('0', '1', '2', '3') + >>> list(it.elements()) + ['2', '3'] + >>> it.seek(0) + >>> next(it), next(it), next(it), next(it) + ('2', '3', '4', '5') + >>> next(it) + '6' + + """ + + def __init__(self, iterable, maxlen=None): + self._source = iter(iterable) + if maxlen is None: + self._cache = [] + else: + self._cache = deque([], maxlen) + self._index = None + + def __iter__(self): + return self + + def __next__(self): + if self._index is not None: + try: + item = self._cache[self._index] + except IndexError: + self._index = None + else: + self._index += 1 + return item + + item = next(self._source) + self._cache.append(item) + return item + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + try: + peeked = next(self) + except StopIteration: + if default is _marker: + raise + return default + if self._index is None: + self._index = len(self._cache) + self._index -= 1 + return peeked + + def elements(self): + return SequenceView(self._cache) + + def seek(self, index): + self._index = index + remainder = index - len(self._cache) + if remainder > 0: + consume(self, remainder) + + +class run_length: + """ + :func:`run_length.encode` compresses an iterable with run-length encoding. + It yields groups of repeated items with the count of how many times they + were repeated: + + >>> uncompressed = 'abbcccdddd' + >>> list(run_length.encode(uncompressed)) + [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + + :func:`run_length.decode` decompresses an iterable that was previously + compressed with run-length encoding. It yields the items of the + decompressed iterable: + + >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> list(run_length.decode(compressed)) + ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd'] + + """ + + @staticmethod + def encode(iterable): + return ((k, ilen(g)) for k, g in groupby(iterable)) + + @staticmethod + def decode(iterable): + return chain.from_iterable(repeat(k, n) for k, n in iterable) + + +def exactly_n(iterable, n, predicate=bool): + """Return ``True`` if exactly ``n`` items in the iterable are ``True`` + according to the *predicate* function. + + >>> exactly_n([True, True, False], 2) + True + >>> exactly_n([True, True, False], 1) + False + >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3) + True + + The iterable will be advanced until ``n + 1`` truthy items are encountered, + so avoid calling it on infinite iterables. + + """ + return len(take(n + 1, filter(predicate, iterable))) == n + + +def circular_shifts(iterable): + """Return a list of circular shifts of *iterable*. + + >>> circular_shifts(range(4)) + [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] + """ + lst = list(iterable) + return take(len(lst), windowed(cycle(lst), len(lst))) + + +def make_decorator(wrapping_func, result_index=0): + """Return a decorator version of *wrapping_func*, which is a function that + modifies an iterable. *result_index* is the position in that function's + signature where the iterable goes. + + This lets you use itertools on the "production end," i.e. at function + definition. This can augment what the function returns without changing the + function's code. + + For example, to produce a decorator version of :func:`chunked`: + + >>> from more_itertools import chunked + >>> chunker = make_decorator(chunked, result_index=0) + >>> @chunker(3) + ... def iter_range(n): + ... return iter(range(n)) + ... + >>> list(iter_range(9)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + + To only allow truthy items to be returned: + + >>> truth_serum = make_decorator(filter, result_index=1) + >>> @truth_serum(bool) + ... def boolean_test(): + ... return [0, 1, '', ' ', False, True] + ... + >>> list(boolean_test()) + [1, ' ', True] + + The :func:`peekable` and :func:`seekable` wrappers make for practical + decorators: + + >>> from more_itertools import peekable + >>> peekable_function = make_decorator(peekable) + >>> @peekable_function() + ... def str_range(*args): + ... return (str(x) for x in range(*args)) + ... + >>> it = str_range(1, 20, 2) + >>> next(it), next(it), next(it) + ('1', '3', '5') + >>> it.peek() + '7' + >>> next(it) + '7' + + """ + # See https://sites.google.com/site/bbayles/index/decorator_factory for + # notes on how this works. + def decorator(*wrapping_args, **wrapping_kwargs): + def outer_wrapper(f): + def inner_wrapper(*args, **kwargs): + result = f(*args, **kwargs) + wrapping_args_ = list(wrapping_args) + wrapping_args_.insert(result_index, result) + return wrapping_func(*wrapping_args_, **wrapping_kwargs) + + return inner_wrapper + + return outer_wrapper + + return decorator + + +def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None): + """Return a dictionary that maps the items in *iterable* to categories + defined by *keyfunc*, transforms them with *valuefunc*, and + then summarizes them by category with *reducefunc*. + + *valuefunc* defaults to the identity function if it is unspecified. + If *reducefunc* is unspecified, no summarization takes place: + + >>> keyfunc = lambda x: x.upper() + >>> result = map_reduce('abbccc', keyfunc) + >>> sorted(result.items()) + [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])] + + Specifying *valuefunc* transforms the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> result = map_reduce('abbccc', keyfunc, valuefunc) + >>> sorted(result.items()) + [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])] + + Specifying *reducefunc* summarizes the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> reducefunc = sum + >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc) + >>> sorted(result.items()) + [('A', 1), ('B', 2), ('C', 3)] + + You may want to filter the input iterable before applying the map/reduce + procedure: + + >>> all_items = range(30) + >>> items = [x for x in all_items if 10 <= x <= 20] # Filter + >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1 + >>> categories = map_reduce(items, keyfunc=keyfunc) + >>> sorted(categories.items()) + [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])] + >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum) + >>> sorted(summaries.items()) + [(0, 90), (1, 75)] + + Note that all items in the iterable are gathered into a list before the + summarization step, which may require significant storage. + + The returned object is a :obj:`collections.defaultdict` with the + ``default_factory`` set to ``None``, such that it behaves like a normal + dictionary. + + """ + valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc + + ret = defaultdict(list) + for item in iterable: + key = keyfunc(item) + value = valuefunc(item) + ret[key].append(value) + + if reducefunc is not None: + for key, value_list in ret.items(): + ret[key] = reducefunc(value_list) + + ret.default_factory = None + return ret + + +def rlocate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``, starting from the right and moving left. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4 + [4, 2, 1] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item: + + >>> iterable = iter('abcb') + >>> pred = lambda x: x == 'b' + >>> list(rlocate(iterable, pred)) + [3, 1] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(rlocate(iterable, pred=pred, window_size=3)) + [9, 5, 1] + + Beware, this function won't return anything for infinite iterables. + If *iterable* is reversible, ``rlocate`` will reverse it and search from + the right. Otherwise, it will search from the left and return the results + in reverse order. + + See :func:`locate` to for other example applications. + + """ + if window_size is None: + try: + len_iter = len(iterable) + return (len_iter - i - 1 for i in locate(reversed(iterable), pred)) + except TypeError: + pass + + return reversed(list(locate(iterable, pred, window_size))) + + +def replace(iterable, pred, substitutes, count=None, window_size=1): + """Yield the items from *iterable*, replacing the items for which *pred* + returns ``True`` with the items from the iterable *substitutes*. + + >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1] + >>> pred = lambda x: x == 0 + >>> substitutes = (2, 3) + >>> list(replace(iterable, pred, substitutes)) + [1, 1, 2, 3, 1, 1, 2, 3, 1, 1] + + If *count* is given, the number of replacements will be limited: + + >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0] + >>> pred = lambda x: x == 0 + >>> substitutes = [None] + >>> list(replace(iterable, pred, substitutes, count=2)) + [1, 1, None, 1, 1, None, 1, 1, 0] + + Use *window_size* to control the number of items passed as arguments to + *pred*. This allows for locating and replacing subsequences. + + >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5] + >>> window_size = 3 + >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred + >>> substitutes = [3, 4] # Splice in these items + >>> list(replace(iterable, pred, substitutes, window_size=window_size)) + [3, 4, 5, 3, 4, 5] + + """ + if window_size < 1: + raise ValueError('window_size must be at least 1') + + # Save the substitutes iterable, since it's used more than once + substitutes = tuple(substitutes) + + # Add padding such that the number of windows matches the length of the + # iterable + it = chain(iterable, [_marker] * (window_size - 1)) + windows = windowed(it, window_size) + + n = 0 + for w in windows: + # If the current window matches our predicate (and we haven't hit + # our maximum number of replacements), splice in the substitutes + # and then consume the following windows that overlap with this one. + # For example, if the iterable is (0, 1, 2, 3, 4...) + # and the window size is 2, we have (0, 1), (1, 2), (2, 3)... + # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2) + if pred(*w): + if (count is None) or (n < count): + n += 1 + yield from substitutes + consume(windows, window_size - 1) + continue + + # If there was no match (or we've reached the replacement limit), + # yield the first item from the window. + if w and (w[0] is not _marker): + yield w[0] + + +def partitions(iterable): + """Yield all possible order-preserving partitions of *iterable*. + + >>> iterable = 'abc' + >>> for part in partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['a', 'b', 'c'] + + This is unrelated to :func:`partition`. + + """ + sequence = list(iterable) + n = len(sequence) + for i in powerset(range(1, n)): + yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))] + + +def set_partitions(iterable, k=None): + """ + Yield the set partitions of *iterable* into *k* parts. Set partitions are + not order-preserving. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable, 2): + ... print([''.join(p) for p in part]) + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + + + If *k* is not given, every set partition is generated. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + ['a', 'b', 'c'] + + """ + L = list(iterable) + n = len(L) + if k is not None: + if k < 1: + raise ValueError( + "Can't partition in a negative or zero number of groups" + ) + elif k > n: + return + + def set_partitions_helper(L, k): + n = len(L) + if k == 1: + yield [L] + elif n == k: + yield [[s] for s in L] + else: + e, *M = L + for p in set_partitions_helper(M, k - 1): + yield [[e], *p] + for p in set_partitions_helper(M, k): + for i in range(len(p)): + yield p[:i] + [[e] + p[i]] + p[i + 1 :] + + if k is None: + for k in range(1, n + 1): + yield from set_partitions_helper(L, k) + else: + yield from set_partitions_helper(L, k) + + +class time_limited: + """ + Yield items from *iterable* until *limit_seconds* have passed. + If the time limit expires before all items have been yielded, the + ``timed_out`` parameter will be set to ``True``. + + >>> from time import sleep + >>> def generator(): + ... yield 1 + ... yield 2 + ... sleep(0.2) + ... yield 3 + >>> iterable = time_limited(0.1, generator()) + >>> list(iterable) + [1, 2] + >>> iterable.timed_out + True + + Note that the time is checked before each item is yielded, and iteration + stops if the time elapsed is greater than *limit_seconds*. If your time + limit is 1 second, but it takes 2 seconds to generate the first item from + the iterable, the function will run for 2 seconds and not yield anything. + + """ + + def __init__(self, limit_seconds, iterable): + if limit_seconds < 0: + raise ValueError('limit_seconds must be positive') + self.limit_seconds = limit_seconds + self._iterable = iter(iterable) + self._start_time = monotonic() + self.timed_out = False + + def __iter__(self): + return self + + def __next__(self): + item = next(self._iterable) + if monotonic() - self._start_time > self.limit_seconds: + self.timed_out = True + raise StopIteration + + return item + + +def only(iterable, default=None, too_long=None): + """If *iterable* has only one item, return it. + If it has zero items, return *default*. + If it has more than one item, raise the exception given by *too_long*, + which is ``ValueError`` by default. + + >>> only([], default='missing') + 'missing' + >>> only([1]) + 1 + >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 1, 2, + and perhaps more.' + >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError + + Note that :func:`only` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check + iterable contents less destructively. + """ + it = iter(iterable) + first_value = next(it, default) + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +def ichunked(iterable, n): + """Break *iterable* into sub-iterables with *n* elements each. + :func:`ichunked` is like :func:`chunked`, but it yields iterables + instead of lists. + + If the sub-iterables are read in order, the elements of *iterable* + won't be stored in memory. + If they are read out of order, :func:`itertools.tee` is used to cache + elements as necessary. + + >>> from itertools import count + >>> all_chunks = ichunked(count(), 4) + >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks) + >>> list(c_2) # c_1's elements have been cached; c_3's haven't been + [4, 5, 6, 7] + >>> list(c_1) + [0, 1, 2, 3] + >>> list(c_3) + [8, 9, 10, 11] + + """ + source = iter(iterable) + + while True: + # Check to see whether we're at the end of the source iterable + item = next(source, _marker) + if item is _marker: + return + + # Clone the source and yield an n-length slice + source, it = tee(chain([item], source)) + yield islice(it, n) + + # Advance the source iterable + consume(source, n) + + +def distinct_combinations(iterable, r): + """Yield the distinct combinations of *r* items taken from *iterable*. + + >>> list(distinct_combinations([0, 0, 1], 2)) + [(0, 0), (0, 1)] + + Equivalent to ``set(combinations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + """ + if r < 0: + raise ValueError('r must be non-negative') + elif r == 0: + yield () + return + pool = tuple(iterable) + generators = [unique_everseen(enumerate(pool), key=itemgetter(1))] + current_combo = [None] * r + level = 0 + while generators: + try: + cur_idx, p = next(generators[-1]) + except StopIteration: + generators.pop() + level -= 1 + continue + current_combo[level] = p + if level + 1 == r: + yield tuple(current_combo) + else: + generators.append( + unique_everseen( + enumerate(pool[cur_idx + 1 :], cur_idx + 1), + key=itemgetter(1), + ) + ) + level += 1 + + +def filter_except(validator, iterable, *exceptions): + """Yield the items from *iterable* for which the *validator* function does + not raise one of the specified *exceptions*. + + *validator* is called for each item in *iterable*. + It should be a function that accepts one argument and raises an exception + if that item is not valid. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(filter_except(int, iterable, ValueError, TypeError)) + ['1', '2', '4'] + + If an exception other than one given by *exceptions* is raised by + *validator*, it is raised like normal. + """ + for item in iterable: + try: + validator(item) + except exceptions: + pass + else: + yield item + + +def map_except(function, iterable, *exceptions): + """Transform each item from *iterable* with *function* and yield the + result, unless *function* raises one of the specified *exceptions*. + + *function* is called to transform each item in *iterable*. + It should accept one argument. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(map_except(int, iterable, ValueError, TypeError)) + [1, 2, 4] + + If an exception other than one given by *exceptions* is raised by + *function*, it is raised like normal. + """ + for item in iterable: + try: + yield function(item) + except exceptions: + pass + + +def map_if(iterable, pred, func, func_else=lambda x: x): + """Evaluate each item from *iterable* using *pred*. If the result is + equivalent to ``True``, transform the item with *func* and yield it. + Otherwise, transform the item with *func_else* and yield it. + + *pred*, *func*, and *func_else* should each be functions that accept + one argument. By default, *func_else* is the identity function. + + >>> from math import sqrt + >>> iterable = list(range(-5, 5)) + >>> iterable + [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4] + >>> list(map_if(iterable, lambda x: x > 3, lambda x: 'toobig')) + [-5, -4, -3, -2, -1, 0, 1, 2, 3, 'toobig'] + >>> list(map_if(iterable, lambda x: x >= 0, + ... lambda x: f'{sqrt(x):.2f}', lambda x: None)) + [None, None, None, None, None, '0.00', '1.00', '1.41', '1.73', '2.00'] + """ + for item in iterable: + yield func(item) if pred(item) else func_else(item) + + +def _sample_unweighted(iterable, k): + # Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li: + # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))". + + # Fill up the reservoir (collection of samples) with the first `k` samples + reservoir = take(k, iterable) + + # Generate random number that's the largest in a sample of k U(0,1) numbers + # Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic + W = exp(log(random()) / k) + + # The number of elements to skip before changing the reservoir is a random + # number with a geometric distribution. Sample it using random() and logs. + next_index = k + floor(log(random()) / log(1 - W)) + + for index, element in enumerate(iterable, k): + + if index == next_index: + reservoir[randrange(k)] = element + # The new W is the largest in a sample of k U(0, `old_W`) numbers + W *= exp(log(random()) / k) + next_index += floor(log(random()) / log(1 - W)) + 1 + + return reservoir + + +def _sample_weighted(iterable, k, weights): + # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. : + # "Weighted random sampling with a reservoir". + + # Log-transform for numerical stability for weights that are small/large + weight_keys = (log(random()) / weight for weight in weights) + + # Fill up the reservoir (collection of samples) with the first `k` + # weight-keys and elements, then heapify the list. + reservoir = take(k, zip(weight_keys, iterable)) + heapify(reservoir) + + # The number of jumps before changing the reservoir is a random variable + # with an exponential distribution. Sample it using random() and logs. + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + + for weight, element in zip(weights, iterable): + if weight >= weights_to_skip: + # The notation here is consistent with the paper, but we store + # the weight-keys in log-space for better numerical stability. + smallest_weight_key, _ = reservoir[0] + t_w = exp(weight * smallest_weight_key) + r_2 = uniform(t_w, 1) # generate U(t_w, 1) + weight_key = log(r_2) / weight + heapreplace(reservoir, (weight_key, element)) + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + else: + weights_to_skip -= weight + + # Equivalent to [element for weight_key, element in sorted(reservoir)] + return [heappop(reservoir)[1] for _ in range(k)] + + +def sample(iterable, k, weights=None): + """Return a *k*-length list of elements chosen (without replacement) + from the *iterable*. Like :func:`random.sample`, but works on iterables + of unknown length. + + >>> iterable = range(100) + >>> sample(iterable, 5) # doctest: +SKIP + [81, 60, 96, 16, 4] + + An iterable with *weights* may also be given: + + >>> iterable = range(100) + >>> weights = (i * i + 1 for i in range(100)) + >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP + [79, 67, 74, 66, 78] + + The algorithm can also be used to generate weighted random permutations. + The relative weight of each item determines the probability that it + appears late in the permutation. + + >>> data = "abcdefgh" + >>> weights = range(1, len(data) + 1) + >>> sample(data, k=len(data), weights=weights) # doctest: +SKIP + ['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f'] + """ + if k == 0: + return [] + + iterable = iter(iterable) + if weights is None: + return _sample_unweighted(iterable, k) + else: + weights = iter(weights) + return _sample_weighted(iterable, k, weights) + + +def is_sorted(iterable, key=None, reverse=False, strict=False): + """Returns ``True`` if the items of iterable are in sorted order, and + ``False`` otherwise. *key* and *reverse* have the same meaning that they do + in the built-in :func:`sorted` function. + + >>> is_sorted(['1', '2', '3', '4', '5'], key=int) + True + >>> is_sorted([5, 4, 3, 1, 2], reverse=True) + False + + If *strict*, tests for strict sorting, that is, returns ``False`` if equal + elements are found: + + >>> is_sorted([1, 2, 2]) + True + >>> is_sorted([1, 2, 2], strict=True) + False + + The function returns ``False`` after encountering the first out-of-order + item. If there are no out-of-order items, the iterable is exhausted. + """ + + compare = (le if reverse else ge) if strict else (lt if reverse else gt) + it = iterable if key is None else map(key, iterable) + return not any(starmap(compare, pairwise(it))) + + +class AbortThread(BaseException): + pass + + +class callback_iter: + """Convert a function that uses callbacks to an iterator. + + Let *func* be a function that takes a `callback` keyword argument. + For example: + + >>> def func(callback=None): + ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]: + ... if callback: + ... callback(i, c) + ... return 4 + + + Use ``with callback_iter(func)`` to get an iterator over the parameters + that are delivered to the callback. + + >>> with callback_iter(func) as it: + ... for args, kwargs in it: + ... print(args) + (1, 'a') + (2, 'b') + (3, 'c') + + The function will be called in a background thread. The ``done`` property + indicates whether it has completed execution. + + >>> it.done + True + + If it completes successfully, its return value will be available + in the ``result`` property. + + >>> it.result + 4 + + Notes: + + * If the function uses some keyword argument besides ``callback``, supply + *callback_kwd*. + * If it finished executing, but raised an exception, accessing the + ``result`` property will raise the same exception. + * If it hasn't finished executing, accessing the ``result`` + property from within the ``with`` block will raise ``RuntimeError``. + * If it hasn't finished executing, accessing the ``result`` property from + outside the ``with`` block will raise a + ``more_itertools.AbortThread`` exception. + * Provide *wait_seconds* to adjust how frequently the it is polled for + output. + + """ + + def __init__(self, func, callback_kwd='callback', wait_seconds=0.1): + self._func = func + self._callback_kwd = callback_kwd + self._aborted = False + self._future = None + self._wait_seconds = wait_seconds + self._executor = ThreadPoolExecutor(max_workers=1) + self._iterator = self._reader() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._aborted = True + self._executor.shutdown() + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterator) + + @property + def done(self): + if self._future is None: + return False + return self._future.done() + + @property + def result(self): + if not self.done: + raise RuntimeError('Function has not yet completed') + + return self._future.result() + + def _reader(self): + q = Queue() + + def callback(*args, **kwargs): + if self._aborted: + raise AbortThread('canceled by user') + + q.put((args, kwargs)) + + self._future = self._executor.submit( + self._func, **{self._callback_kwd: callback} + ) + + while True: + try: + item = q.get(timeout=self._wait_seconds) + except Empty: + pass + else: + q.task_done() + yield item + + if self._future.done(): + break + + remaining = [] + while True: + try: + item = q.get_nowait() + except Empty: + break + else: + q.task_done() + remaining.append(item) + q.join() + yield from remaining + + +def windowed_complete(iterable, n): + """ + Yield ``(beginning, middle, end)`` tuples, where: + + * Each ``middle`` has *n* items from *iterable* + * Each ``beginning`` has the items before the ones in ``middle`` + * Each ``end`` has the items after the ones in ``middle`` + + >>> iterable = range(7) + >>> n = 3 + >>> for beginning, middle, end in windowed_complete(iterable, n): + ... print(beginning, middle, end) + () (0, 1, 2) (3, 4, 5, 6) + (0,) (1, 2, 3) (4, 5, 6) + (0, 1) (2, 3, 4) (5, 6) + (0, 1, 2) (3, 4, 5) (6,) + (0, 1, 2, 3) (4, 5, 6) () + + Note that *n* must be at least 0 and most equal to the length of + *iterable*. + + This function will exhaust the iterable and may require significant + storage. + """ + if n < 0: + raise ValueError('n must be >= 0') + + seq = tuple(iterable) + size = len(seq) + + if n > size: + raise ValueError('n must be <= len(seq)') + + for i in range(size - n + 1): + beginning = seq[:i] + middle = seq[i : i + n] + end = seq[i + n :] + yield beginning, middle, end + + +def all_unique(iterable, key=None): + """ + Returns ``True`` if all the elements of *iterable* are unique (no two + elements are equal). + + >>> all_unique('ABCB') + False + + If a *key* function is specified, it will be used to make comparisons. + + >>> all_unique('ABCb') + True + >>> all_unique('ABCb', str.lower) + False + + The function returns as soon as the first non-unique element is + encountered. Iterables with a mix of hashable and unhashable items can + be used, but the function will be slower for unhashable items. + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + for element in map(key, iterable) if key else iterable: + try: + if element in seenset: + return False + seenset_add(element) + except TypeError: + if element in seenlist: + return False + seenlist_add(element) + return True + + +def nth_product(index, *args): + """Equivalent to ``list(product(*args))[index]``. + + The products of *args* can be ordered lexicographically. + :func:`nth_product` computes the product at sort position *index* without + computing the previous products. + + >>> nth_product(8, range(2), range(2), range(2), range(2)) + (1, 0, 0, 0) + + ``IndexError`` will be raised if the given *index* is invalid. + """ + pools = list(map(tuple, reversed(args))) + ns = list(map(len, pools)) + + c = reduce(mul, ns) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + result = [] + for pool, n in zip(pools, ns): + result.append(pool[index % n]) + index //= n + + return tuple(reversed(result)) + + +def nth_permutation(iterable, r, index): + """Equivalent to ``list(permutations(iterable, r))[index]``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`nth_permutation` + computes the subsequence at sort position *index* directly, without + computing the previous subsequences. + + >>> nth_permutation('ghijk', 2, 5) + ('h', 'i') + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = list(iterable) + n = len(pool) + + if r is None or r == n: + r, c = n, factorial(n) + elif not 0 <= r < n: + raise ValueError + else: + c = factorial(n) // factorial(n - r) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + if c == 0: + return tuple() + + result = [0] * r + q = index * factorial(n) // c if r < n else index + for d in range(1, n + 1): + q, i = divmod(q, d) + if 0 <= n - d < r: + result[n - d] = i + if q == 0: + break + + return tuple(map(pool.pop, result)) + + +def value_chain(*args): + """Yield all arguments passed to the function in the same order in which + they were passed. If an argument itself is iterable then iterate over its + values. + + >>> list(value_chain(1, 2, 3, [4, 5, 6])) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and are emitted + as-is: + + >>> list(value_chain('12', '34', ['56', '78'])) + ['12', '34', '56', '78'] + + + Multiple levels of nesting are not flattened. + + """ + for value in args: + if isinstance(value, (str, bytes)): + yield value + continue + try: + yield from value + except TypeError: + yield value + + +def product_index(element, *args): + """Equivalent to ``list(product(*args)).index(element)`` + + The products of *args* can be ordered lexicographically. + :func:`product_index` computes the first index of *element* without + computing the previous products. + + >>> product_index([8, 2], range(10), range(5)) + 42 + + ``ValueError`` will be raised if the given *element* isn't in the product + of *args*. + """ + index = 0 + + for x, pool in zip_longest(element, args, fillvalue=_marker): + if x is _marker or pool is _marker: + raise ValueError('element is not a product of args') + + pool = tuple(pool) + index = index * len(pool) + pool.index(x) + + return index + + +def combination_index(element, iterable): + """Equivalent to ``list(combinations(iterable, r)).index(element)`` + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`combination_index` computes the index of the + first *element*, without computing the previous combinations. + + >>> combination_index('adf', 'abcdefg') + 10 + + ``ValueError`` will be raised if the given *element* isn't one of the + combinations of *iterable*. + """ + element = enumerate(element) + k, y = next(element, (None, None)) + if k is None: + return 0 + + indexes = [] + pool = enumerate(iterable) + for n, x in pool: + if x == y: + indexes.append(n) + tmp, y = next(element, (None, None)) + if tmp is None: + break + else: + k = tmp + else: + raise ValueError('element is not a combination of iterable') + + n, _ = last(pool, default=(n, None)) + + # Python versiosn below 3.8 don't have math.comb + index = 1 + for i, j in enumerate(reversed(indexes), start=1): + j = n - j + if i <= j: + index += factorial(j) // (factorial(i) * factorial(j - i)) + + return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index + + +def permutation_index(element, iterable): + """Equivalent to ``list(permutations(iterable, r)).index(element)``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`permutation_index` + computes the index of the first *element* directly, without computing + the previous permutations. + + >>> permutation_index([1, 3, 2], range(5)) + 19 + + ``ValueError`` will be raised if the given *element* isn't one of the + permutations of *iterable*. + """ + index = 0 + pool = list(iterable) + for i, x in zip(range(len(pool), -1, -1), element): + r = pool.index(x) + index = index * i + r + del pool[r] + + return index + + +class countable: + """Wrap *iterable* and keep a count of how many items have been consumed. + + The ``items_seen`` attribute starts at ``0`` and increments as the iterable + is consumed: + + >>> iterable = map(str, range(10)) + >>> it = countable(iterable) + >>> it.items_seen + 0 + >>> next(it), next(it) + ('0', '1') + >>> list(it) + ['2', '3', '4', '5', '6', '7', '8', '9'] + >>> it.items_seen + 10 + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self.items_seen = 0 + + def __iter__(self): + return self + + def __next__(self): + item = next(self._it) + self.items_seen += 1 + + return item + + +def chunked_even(iterable, n): + """Break *iterable* into lists of approximately length *n*. + Items are distributed such the lengths of the lists differ by at most + 1 item. + + >>> iterable = [1, 2, 3, 4, 5, 6, 7] + >>> n = 3 + >>> list(chunked_even(iterable, n)) # List lengths: 3, 2, 2 + [[1, 2, 3], [4, 5], [6, 7]] + >>> list(chunked(iterable, n)) # List lengths: 3, 3, 1 + [[1, 2, 3], [4, 5, 6], [7]] + + """ + + len_method = getattr(iterable, '__len__', None) + + if len_method is None: + return _chunked_even_online(iterable, n) + else: + return _chunked_even_finite(iterable, len_method(), n) + + +def _chunked_even_online(iterable, n): + buffer = [] + maxbuf = n + (n - 2) * (n - 1) + for x in iterable: + buffer.append(x) + if len(buffer) == maxbuf: + yield buffer[:n] + buffer = buffer[n:] + yield from _chunked_even_finite(buffer, len(buffer), n) + + +def _chunked_even_finite(iterable, N, n): + if N < 1: + return + + # Lists are either size `full_size <= n` or `partial_size = full_size - 1` + q, r = divmod(N, n) + num_lists = q + (1 if r > 0 else 0) + q, r = divmod(N, num_lists) + full_size = q + (1 if r > 0 else 0) + partial_size = full_size - 1 + num_full = N - partial_size * num_lists + num_partial = num_lists - num_full + + buffer = [] + iterator = iter(iterable) + + # Yield num_full lists of full_size + for x in iterator: + buffer.append(x) + if len(buffer) == full_size: + yield buffer + buffer = [] + num_full -= 1 + if num_full <= 0: + break + + # Yield num_partial lists of partial_size + for x in iterator: + buffer.append(x) + if len(buffer) == partial_size: + yield buffer + buffer = [] + num_partial -= 1 + + +def zip_broadcast(*objects, scalar_types=(str, bytes), strict=False): + """A version of :func:`zip` that "broadcasts" any scalar + (i.e., non-iterable) items into output tuples. + + >>> iterable_1 = [1, 2, 3] + >>> iterable_2 = ['a', 'b', 'c'] + >>> scalar = '_' + >>> list(zip_broadcast(iterable_1, iterable_2, scalar)) + [(1, 'a', '_'), (2, 'b', '_'), (3, 'c', '_')] + + The *scalar_types* keyword argument determines what types are considered + scalar. It is set to ``(str, bytes)`` by default. Set it to ``None`` to + treat strings and byte strings as iterable: + + >>> list(zip_broadcast('abc', 0, 'xyz', scalar_types=None)) + [('a', 0, 'x'), ('b', 0, 'y'), ('c', 0, 'z')] + + If the *strict* keyword argument is ``True``, then + ``UnequalIterablesError`` will be raised if any of the iterables have + different lengthss. + """ + + def is_scalar(obj): + if scalar_types and isinstance(obj, scalar_types): + return True + try: + iter(obj) + except TypeError: + return True + else: + return False + + size = len(objects) + if not size: + return + + iterables, iterable_positions = [], [] + scalars, scalar_positions = [], [] + for i, obj in enumerate(objects): + if is_scalar(obj): + scalars.append(obj) + scalar_positions.append(i) + else: + iterables.append(iter(obj)) + iterable_positions.append(i) + + if len(scalars) == size: + yield tuple(objects) + return + + zipper = _zip_equal if strict else zip + for item in zipper(*iterables): + new_item = [None] * size + + for i, elem in zip(iterable_positions, item): + new_item[i] = elem + + for i, elem in zip(scalar_positions, scalars): + new_item[i] = elem + + yield tuple(new_item) + + +def unique_in_window(iterable, n, key=None): + """Yield the items from *iterable* that haven't been seen recently. + *n* is the size of the lookback window. + + >>> iterable = [0, 1, 0, 2, 3, 0] + >>> n = 3 + >>> list(unique_in_window(iterable, n)) + [0, 1, 2, 3, 0] + + The *key* function, if provided, will be used to determine uniqueness: + + >>> list(unique_in_window('abAcda', 3, key=lambda x: x.lower())) + ['a', 'b', 'c', 'd', 'a'] + + The items in *iterable* must be hashable. + + """ + if n <= 0: + raise ValueError('n must be greater than 0') + + window = deque(maxlen=n) + uniques = set() + use_key = key is not None + + for item in iterable: + k = key(item) if use_key else item + if k in uniques: + continue + + if len(uniques) == n: + uniques.discard(window[0]) + + uniques.add(k) + window.append(k) + + yield item + + +def duplicates_everseen(iterable, key=None): + """Yield duplicate elements after their first appearance. + + >>> list(duplicates_everseen('mississippi')) + ['s', 'i', 's', 's', 'i', 'p', 'i'] + >>> list(duplicates_everseen('AaaBbbCccAaa', str.lower)) + ['a', 'a', 'b', 'b', 'c', 'c', 'A', 'a', 'a'] + + This function is analagous to :func:`unique_everseen` and is subject to + the same performance considerations. + + """ + seen_set = set() + seen_list = [] + use_key = key is not None + + for element in iterable: + k = key(element) if use_key else element + try: + if k not in seen_set: + seen_set.add(k) + else: + yield element + except TypeError: + if k not in seen_list: + seen_list.append(k) + else: + yield element + + +def duplicates_justseen(iterable, key=None): + """Yields serially-duplicate elements after their first appearance. + + >>> list(duplicates_justseen('mississippi')) + ['s', 's', 'p'] + >>> list(duplicates_justseen('AaaBbbCccAaa', str.lower)) + ['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a'] + + This function is analagous to :func:`unique_justseen`. + + """ + return flatten( + map( + lambda group_tuple: islice_extended(group_tuple[1])[1:], + groupby(iterable, key), + ) + ) + + +def minmax(iterable_or_value, *others, key=None, default=_marker): + """Returns both the smallest and largest items in an iterable + or the largest of two or more arguments. + + >>> minmax([3, 1, 5]) + (1, 5) + + >>> minmax(4, 2, 6) + (2, 6) + + If a *key* function is provided, it will be used to transform the input + items for comparison. + + >>> minmax([5, 30], key=str) # '30' sorts before '5' + (30, 5) + + If a *default* value is provided, it will be returned if there are no + input items. + + >>> minmax([], default=(0, 0)) + (0, 0) + + Otherwise ``ValueError`` is raised. + + This function is based on the + `recipe `__ by + Raymond Hettinger and takes care to minimize the number of comparisons + performed. + """ + iterable = (iterable_or_value, *others) if others else iterable_or_value + + it = iter(iterable) + + try: + lo = hi = next(it) + except StopIteration as e: + if default is _marker: + raise ValueError( + '`minmax()` argument is an empty iterable. ' + 'Provide a `default` value to suppress this error.' + ) from e + return default + + # Different branches depending on the presence of key. This saves a lot + # of unimportant copies which would slow the "key=None" branch + # significantly down. + if key is None: + for x, y in zip_longest(it, it, fillvalue=lo): + if y < x: + x, y = y, x + if x < lo: + lo = x + if hi < y: + hi = y + + else: + lo_key = hi_key = key(lo) + + for x, y in zip_longest(it, it, fillvalue=lo): + + x_key, y_key = key(x), key(y) + + if y_key < x_key: + x, y, x_key, y_key = y, x, y_key, x_key + if x_key < lo_key: + lo, lo_key = x, x_key + if hi_key < y_key: + hi, hi_key = y, y_key + + return lo, hi diff --git a/lib/more_itertools/more.pyi b/lib/more_itertools/more.pyi new file mode 100644 index 00000000..fe7d4bdd --- /dev/null +++ b/lib/more_itertools/more.pyi @@ -0,0 +1,664 @@ +"""Stubs for more_itertools.more""" + +from typing import ( + Any, + Callable, + Container, + Dict, + Generic, + Hashable, + Iterable, + Iterator, + List, + Optional, + Reversible, + Sequence, + Sized, + Tuple, + Union, + TypeVar, + type_check_only, +) +from types import TracebackType +from typing_extensions import ContextManager, Protocol, Type, overload + +# Type and type variable definitions +_T = TypeVar('_T') +_T1 = TypeVar('_T1') +_T2 = TypeVar('_T2') +_U = TypeVar('_U') +_V = TypeVar('_V') +_W = TypeVar('_W') +_T_co = TypeVar('_T_co', covariant=True) +_GenFn = TypeVar('_GenFn', bound=Callable[..., Iterator[object]]) +_Raisable = Union[BaseException, 'Type[BaseException]'] + +@type_check_only +class _SizedIterable(Protocol[_T_co], Sized, Iterable[_T_co]): ... + +@type_check_only +class _SizedReversible(Protocol[_T_co], Sized, Reversible[_T_co]): ... + +def chunked( + iterable: Iterable[_T], n: Optional[int], strict: bool = ... +) -> Iterator[List[_T]]: ... +@overload +def first(iterable: Iterable[_T]) -> _T: ... +@overload +def first(iterable: Iterable[_T], default: _U) -> Union[_T, _U]: ... +@overload +def last(iterable: Iterable[_T]) -> _T: ... +@overload +def last(iterable: Iterable[_T], default: _U) -> Union[_T, _U]: ... +@overload +def nth_or_last(iterable: Iterable[_T], n: int) -> _T: ... +@overload +def nth_or_last( + iterable: Iterable[_T], n: int, default: _U +) -> Union[_T, _U]: ... + +class peekable(Generic[_T], Iterator[_T]): + def __init__(self, iterable: Iterable[_T]) -> None: ... + def __iter__(self) -> peekable[_T]: ... + def __bool__(self) -> bool: ... + @overload + def peek(self) -> _T: ... + @overload + def peek(self, default: _U) -> Union[_T, _U]: ... + def prepend(self, *items: _T) -> None: ... + def __next__(self) -> _T: ... + @overload + def __getitem__(self, index: int) -> _T: ... + @overload + def __getitem__(self, index: slice) -> List[_T]: ... + +def collate(*iterables: Iterable[_T], **kwargs: Any) -> Iterable[_T]: ... +def consumer(func: _GenFn) -> _GenFn: ... +def ilen(iterable: Iterable[object]) -> int: ... +def iterate(func: Callable[[_T], _T], start: _T) -> Iterator[_T]: ... +def with_iter( + context_manager: ContextManager[Iterable[_T]], +) -> Iterator[_T]: ... +def one( + iterable: Iterable[_T], + too_short: Optional[_Raisable] = ..., + too_long: Optional[_Raisable] = ..., +) -> _T: ... +def raise_(exception: _Raisable, *args: Any) -> None: ... +def strictly_n( + iterable: Iterable[_T], + n: int, + too_short: Optional[_GenFn] = ..., + too_long: Optional[_GenFn] = ..., +) -> List[_T]: ... +def distinct_permutations( + iterable: Iterable[_T], r: Optional[int] = ... +) -> Iterator[Tuple[_T, ...]]: ... +def intersperse( + e: _U, iterable: Iterable[_T], n: int = ... +) -> Iterator[Union[_T, _U]]: ... +def unique_to_each(*iterables: Iterable[_T]) -> List[List[_T]]: ... +@overload +def windowed( + seq: Iterable[_T], n: int, *, step: int = ... +) -> Iterator[Tuple[Optional[_T], ...]]: ... +@overload +def windowed( + seq: Iterable[_T], n: int, fillvalue: _U, step: int = ... +) -> Iterator[Tuple[Union[_T, _U], ...]]: ... +def substrings(iterable: Iterable[_T]) -> Iterator[Tuple[_T, ...]]: ... +def substrings_indexes( + seq: Sequence[_T], reverse: bool = ... +) -> Iterator[Tuple[Sequence[_T], int, int]]: ... + +class bucket(Generic[_T, _U], Container[_U]): + def __init__( + self, + iterable: Iterable[_T], + key: Callable[[_T], _U], + validator: Optional[Callable[[object], object]] = ..., + ) -> None: ... + def __contains__(self, value: object) -> bool: ... + def __iter__(self) -> Iterator[_U]: ... + def __getitem__(self, value: object) -> Iterator[_T]: ... + +def spy( + iterable: Iterable[_T], n: int = ... +) -> Tuple[List[_T], Iterator[_T]]: ... +def interleave(*iterables: Iterable[_T]) -> Iterator[_T]: ... +def interleave_longest(*iterables: Iterable[_T]) -> Iterator[_T]: ... +def interleave_evenly( + iterables: List[Iterable[_T]], lengths: Optional[List[int]] = ... +) -> Iterator[_T]: ... +def collapse( + iterable: Iterable[Any], + base_type: Optional[type] = ..., + levels: Optional[int] = ..., +) -> Iterator[Any]: ... +@overload +def side_effect( + func: Callable[[_T], object], + iterable: Iterable[_T], + chunk_size: None = ..., + before: Optional[Callable[[], object]] = ..., + after: Optional[Callable[[], object]] = ..., +) -> Iterator[_T]: ... +@overload +def side_effect( + func: Callable[[List[_T]], object], + iterable: Iterable[_T], + chunk_size: int, + before: Optional[Callable[[], object]] = ..., + after: Optional[Callable[[], object]] = ..., +) -> Iterator[_T]: ... +def sliced( + seq: Sequence[_T], n: int, strict: bool = ... +) -> Iterator[Sequence[_T]]: ... +def split_at( + iterable: Iterable[_T], + pred: Callable[[_T], object], + maxsplit: int = ..., + keep_separator: bool = ..., +) -> Iterator[List[_T]]: ... +def split_before( + iterable: Iterable[_T], pred: Callable[[_T], object], maxsplit: int = ... +) -> Iterator[List[_T]]: ... +def split_after( + iterable: Iterable[_T], pred: Callable[[_T], object], maxsplit: int = ... +) -> Iterator[List[_T]]: ... +def split_when( + iterable: Iterable[_T], + pred: Callable[[_T, _T], object], + maxsplit: int = ..., +) -> Iterator[List[_T]]: ... +def split_into( + iterable: Iterable[_T], sizes: Iterable[Optional[int]] +) -> Iterator[List[_T]]: ... +@overload +def padded( + iterable: Iterable[_T], + *, + n: Optional[int] = ..., + next_multiple: bool = ... +) -> Iterator[Optional[_T]]: ... +@overload +def padded( + iterable: Iterable[_T], + fillvalue: _U, + n: Optional[int] = ..., + next_multiple: bool = ..., +) -> Iterator[Union[_T, _U]]: ... +@overload +def repeat_last(iterable: Iterable[_T]) -> Iterator[_T]: ... +@overload +def repeat_last( + iterable: Iterable[_T], default: _U +) -> Iterator[Union[_T, _U]]: ... +def distribute(n: int, iterable: Iterable[_T]) -> List[Iterator[_T]]: ... +@overload +def stagger( + iterable: Iterable[_T], + offsets: _SizedIterable[int] = ..., + longest: bool = ..., +) -> Iterator[Tuple[Optional[_T], ...]]: ... +@overload +def stagger( + iterable: Iterable[_T], + offsets: _SizedIterable[int] = ..., + longest: bool = ..., + fillvalue: _U = ..., +) -> Iterator[Tuple[Union[_T, _U], ...]]: ... + +class UnequalIterablesError(ValueError): + def __init__( + self, details: Optional[Tuple[int, int, int]] = ... + ) -> None: ... + +@overload +def zip_equal(__iter1: Iterable[_T1]) -> Iterator[Tuple[_T1]]: ... +@overload +def zip_equal( + __iter1: Iterable[_T1], __iter2: Iterable[_T2] +) -> Iterator[Tuple[_T1, _T2]]: ... +@overload +def zip_equal( + __iter1: Iterable[_T], + __iter2: Iterable[_T], + __iter3: Iterable[_T], + *iterables: Iterable[_T] +) -> Iterator[Tuple[_T, ...]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T1], + *, + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: None = None +) -> Iterator[Tuple[Optional[_T1]]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T1], + __iter2: Iterable[_T2], + *, + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: None = None +) -> Iterator[Tuple[Optional[_T1], Optional[_T2]]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T], + __iter2: Iterable[_T], + __iter3: Iterable[_T], + *iterables: Iterable[_T], + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: None = None +) -> Iterator[Tuple[Optional[_T], ...]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T1], + *, + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: _U, +) -> Iterator[Tuple[Union[_T1, _U]]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T1], + __iter2: Iterable[_T2], + *, + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: _U, +) -> Iterator[Tuple[Union[_T1, _U], Union[_T2, _U]]]: ... +@overload +def zip_offset( + __iter1: Iterable[_T], + __iter2: Iterable[_T], + __iter3: Iterable[_T], + *iterables: Iterable[_T], + offsets: _SizedIterable[int], + longest: bool = ..., + fillvalue: _U, +) -> Iterator[Tuple[Union[_T, _U], ...]]: ... +def sort_together( + iterables: Iterable[Iterable[_T]], + key_list: Iterable[int] = ..., + key: Optional[Callable[..., Any]] = ..., + reverse: bool = ..., +) -> List[Tuple[_T, ...]]: ... +def unzip(iterable: Iterable[Sequence[_T]]) -> Tuple[Iterator[_T], ...]: ... +def divide(n: int, iterable: Iterable[_T]) -> List[Iterator[_T]]: ... +def always_iterable( + obj: object, + base_type: Union[ + type, Tuple[Union[type, Tuple[Any, ...]], ...], None + ] = ..., +) -> Iterator[Any]: ... +def adjacent( + predicate: Callable[[_T], bool], + iterable: Iterable[_T], + distance: int = ..., +) -> Iterator[Tuple[bool, _T]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: None = None, + valuefunc: None = None, + reducefunc: None = None, +) -> Iterator[Tuple[_T, Iterator[_T]]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: None, + reducefunc: None, +) -> Iterator[Tuple[_U, Iterator[_T]]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: None, + valuefunc: Callable[[_T], _V], + reducefunc: None, +) -> Iterable[Tuple[_T, Iterable[_V]]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: Callable[[_T], _V], + reducefunc: None, +) -> Iterable[Tuple[_U, Iterator[_V]]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: None, + valuefunc: None, + reducefunc: Callable[[Iterator[_T]], _W], +) -> Iterable[Tuple[_T, _W]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: None, + reducefunc: Callable[[Iterator[_T]], _W], +) -> Iterable[Tuple[_U, _W]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: None, + valuefunc: Callable[[_T], _V], + reducefunc: Callable[[Iterable[_V]], _W], +) -> Iterable[Tuple[_T, _W]]: ... +@overload +def groupby_transform( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: Callable[[_T], _V], + reducefunc: Callable[[Iterable[_V]], _W], +) -> Iterable[Tuple[_U, _W]]: ... + +class numeric_range(Generic[_T, _U], Sequence[_T], Hashable, Reversible[_T]): + @overload + def __init__(self, __stop: _T) -> None: ... + @overload + def __init__(self, __start: _T, __stop: _T) -> None: ... + @overload + def __init__(self, __start: _T, __stop: _T, __step: _U) -> None: ... + def __bool__(self) -> bool: ... + def __contains__(self, elem: object) -> bool: ... + def __eq__(self, other: object) -> bool: ... + @overload + def __getitem__(self, key: int) -> _T: ... + @overload + def __getitem__(self, key: slice) -> numeric_range[_T, _U]: ... + def __hash__(self) -> int: ... + def __iter__(self) -> Iterator[_T]: ... + def __len__(self) -> int: ... + def __reduce__( + self, + ) -> Tuple[Type[numeric_range[_T, _U]], Tuple[_T, _T, _U]]: ... + def __repr__(self) -> str: ... + def __reversed__(self) -> Iterator[_T]: ... + def count(self, value: _T) -> int: ... + def index(self, value: _T) -> int: ... # type: ignore + +def count_cycle( + iterable: Iterable[_T], n: Optional[int] = ... +) -> Iterable[Tuple[int, _T]]: ... +def mark_ends( + iterable: Iterable[_T], +) -> Iterable[Tuple[bool, bool, _T]]: ... +def locate( + iterable: Iterable[object], + pred: Callable[..., Any] = ..., + window_size: Optional[int] = ..., +) -> Iterator[int]: ... +def lstrip( + iterable: Iterable[_T], pred: Callable[[_T], object] +) -> Iterator[_T]: ... +def rstrip( + iterable: Iterable[_T], pred: Callable[[_T], object] +) -> Iterator[_T]: ... +def strip( + iterable: Iterable[_T], pred: Callable[[_T], object] +) -> Iterator[_T]: ... + +class islice_extended(Generic[_T], Iterator[_T]): + def __init__( + self, iterable: Iterable[_T], *args: Optional[int] + ) -> None: ... + def __iter__(self) -> islice_extended[_T]: ... + def __next__(self) -> _T: ... + def __getitem__(self, index: slice) -> islice_extended[_T]: ... + +def always_reversible(iterable: Iterable[_T]) -> Iterator[_T]: ... +def consecutive_groups( + iterable: Iterable[_T], ordering: Callable[[_T], int] = ... +) -> Iterator[Iterator[_T]]: ... +@overload +def difference( + iterable: Iterable[_T], + func: Callable[[_T, _T], _U] = ..., + *, + initial: None = ... +) -> Iterator[Union[_T, _U]]: ... +@overload +def difference( + iterable: Iterable[_T], func: Callable[[_T, _T], _U] = ..., *, initial: _U +) -> Iterator[_U]: ... + +class SequenceView(Generic[_T], Sequence[_T]): + def __init__(self, target: Sequence[_T]) -> None: ... + @overload + def __getitem__(self, index: int) -> _T: ... + @overload + def __getitem__(self, index: slice) -> Sequence[_T]: ... + def __len__(self) -> int: ... + +class seekable(Generic[_T], Iterator[_T]): + def __init__( + self, iterable: Iterable[_T], maxlen: Optional[int] = ... + ) -> None: ... + def __iter__(self) -> seekable[_T]: ... + def __next__(self) -> _T: ... + def __bool__(self) -> bool: ... + @overload + def peek(self) -> _T: ... + @overload + def peek(self, default: _U) -> Union[_T, _U]: ... + def elements(self) -> SequenceView[_T]: ... + def seek(self, index: int) -> None: ... + +class run_length: + @staticmethod + def encode(iterable: Iterable[_T]) -> Iterator[Tuple[_T, int]]: ... + @staticmethod + def decode(iterable: Iterable[Tuple[_T, int]]) -> Iterator[_T]: ... + +def exactly_n( + iterable: Iterable[_T], n: int, predicate: Callable[[_T], object] = ... +) -> bool: ... +def circular_shifts(iterable: Iterable[_T]) -> List[Tuple[_T, ...]]: ... +def make_decorator( + wrapping_func: Callable[..., _U], result_index: int = ... +) -> Callable[..., Callable[[Callable[..., Any]], Callable[..., _U]]]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: None = ..., + reducefunc: None = ..., +) -> Dict[_U, List[_T]]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: Callable[[_T], _V], + reducefunc: None = ..., +) -> Dict[_U, List[_V]]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: None = ..., + reducefunc: Callable[[List[_T]], _W] = ..., +) -> Dict[_U, _W]: ... +@overload +def map_reduce( + iterable: Iterable[_T], + keyfunc: Callable[[_T], _U], + valuefunc: Callable[[_T], _V], + reducefunc: Callable[[List[_V]], _W], +) -> Dict[_U, _W]: ... +def rlocate( + iterable: Iterable[_T], + pred: Callable[..., object] = ..., + window_size: Optional[int] = ..., +) -> Iterator[int]: ... +def replace( + iterable: Iterable[_T], + pred: Callable[..., object], + substitutes: Iterable[_U], + count: Optional[int] = ..., + window_size: int = ..., +) -> Iterator[Union[_T, _U]]: ... +def partitions(iterable: Iterable[_T]) -> Iterator[List[List[_T]]]: ... +def set_partitions( + iterable: Iterable[_T], k: Optional[int] = ... +) -> Iterator[List[List[_T]]]: ... + +class time_limited(Generic[_T], Iterator[_T]): + def __init__( + self, limit_seconds: float, iterable: Iterable[_T] + ) -> None: ... + def __iter__(self) -> islice_extended[_T]: ... + def __next__(self) -> _T: ... + +@overload +def only( + iterable: Iterable[_T], *, too_long: Optional[_Raisable] = ... +) -> Optional[_T]: ... +@overload +def only( + iterable: Iterable[_T], default: _U, too_long: Optional[_Raisable] = ... +) -> Union[_T, _U]: ... +def ichunked(iterable: Iterable[_T], n: int) -> Iterator[Iterator[_T]]: ... +def distinct_combinations( + iterable: Iterable[_T], r: int +) -> Iterator[Tuple[_T, ...]]: ... +def filter_except( + validator: Callable[[Any], object], + iterable: Iterable[_T], + *exceptions: Type[BaseException] +) -> Iterator[_T]: ... +def map_except( + function: Callable[[Any], _U], + iterable: Iterable[_T], + *exceptions: Type[BaseException] +) -> Iterator[_U]: ... +def map_if( + iterable: Iterable[Any], + pred: Callable[[Any], bool], + func: Callable[[Any], Any], + func_else: Optional[Callable[[Any], Any]] = ..., +) -> Iterator[Any]: ... +def sample( + iterable: Iterable[_T], + k: int, + weights: Optional[Iterable[float]] = ..., +) -> List[_T]: ... +def is_sorted( + iterable: Iterable[_T], + key: Optional[Callable[[_T], _U]] = ..., + reverse: bool = False, + strict: bool = False, +) -> bool: ... + +class AbortThread(BaseException): + pass + +class callback_iter(Generic[_T], Iterator[_T]): + def __init__( + self, + func: Callable[..., Any], + callback_kwd: str = ..., + wait_seconds: float = ..., + ) -> None: ... + def __enter__(self) -> callback_iter[_T]: ... + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> Optional[bool]: ... + def __iter__(self) -> callback_iter[_T]: ... + def __next__(self) -> _T: ... + def _reader(self) -> Iterator[_T]: ... + @property + def done(self) -> bool: ... + @property + def result(self) -> Any: ... + +def windowed_complete( + iterable: Iterable[_T], n: int +) -> Iterator[Tuple[_T, ...]]: ... +def all_unique( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = ... +) -> bool: ... +def nth_product(index: int, *args: Iterable[_T]) -> Tuple[_T, ...]: ... +def nth_permutation( + iterable: Iterable[_T], r: int, index: int +) -> Tuple[_T, ...]: ... +def value_chain(*args: Union[_T, Iterable[_T]]) -> Iterable[_T]: ... +def product_index(element: Iterable[_T], *args: Iterable[_T]) -> int: ... +def combination_index( + element: Iterable[_T], iterable: Iterable[_T] +) -> int: ... +def permutation_index( + element: Iterable[_T], iterable: Iterable[_T] +) -> int: ... +def repeat_each(iterable: Iterable[_T], n: int = ...) -> Iterator[_T]: ... + +class countable(Generic[_T], Iterator[_T]): + def __init__(self, iterable: Iterable[_T]) -> None: ... + def __iter__(self) -> countable[_T]: ... + def __next__(self) -> _T: ... + +def chunked_even(iterable: Iterable[_T], n: int) -> Iterator[List[_T]]: ... +def zip_broadcast( + *objects: Union[_T, Iterable[_T]], + scalar_types: Union[ + type, Tuple[Union[type, Tuple[Any, ...]], ...], None + ] = ..., + strict: bool = ... +) -> Iterable[Tuple[_T, ...]]: ... +def unique_in_window( + iterable: Iterable[_T], n: int, key: Optional[Callable[[_T], _U]] = ... +) -> Iterator[_T]: ... +def duplicates_everseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = ... +) -> Iterator[_T]: ... +def duplicates_justseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = ... +) -> Iterator[_T]: ... + +class _SupportsLessThan(Protocol): + def __lt__(self, __other: Any) -> bool: ... + +_SupportsLessThanT = TypeVar("_SupportsLessThanT", bound=_SupportsLessThan) + +@overload +def minmax( + iterable_or_value: Iterable[_SupportsLessThanT], *, key: None = None +) -> Tuple[_SupportsLessThanT, _SupportsLessThanT]: ... +@overload +def minmax( + iterable_or_value: Iterable[_T], *, key: Callable[[_T], _SupportsLessThan] +) -> Tuple[_T, _T]: ... +@overload +def minmax( + iterable_or_value: Iterable[_SupportsLessThanT], + *, + key: None = None, + default: _U +) -> Union[_U, Tuple[_SupportsLessThanT, _SupportsLessThanT]]: ... +@overload +def minmax( + iterable_or_value: Iterable[_T], + *, + key: Callable[[_T], _SupportsLessThan], + default: _U, +) -> Union[_U, Tuple[_T, _T]]: ... +@overload +def minmax( + iterable_or_value: _SupportsLessThanT, + __other: _SupportsLessThanT, + *others: _SupportsLessThanT +) -> Tuple[_SupportsLessThanT, _SupportsLessThanT]: ... +@overload +def minmax( + iterable_or_value: _T, + __other: _T, + *others: _T, + key: Callable[[_T], _SupportsLessThan] +) -> Tuple[_T, _T]: ... diff --git a/lib/more_itertools/py.typed b/lib/more_itertools/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/lib/more_itertools/recipes.py b/lib/more_itertools/recipes.py new file mode 100644 index 00000000..a2596423 --- /dev/null +++ b/lib/more_itertools/recipes.py @@ -0,0 +1,698 @@ +"""Imported from the recipes section of the itertools documentation. + +All functions taken from the recipes section of the itertools library docs +[1]_. +Some backward-compatible usability improvements have been made. + +.. [1] http://docs.python.org/library/itertools.html#recipes + +""" +import warnings +from collections import deque +from itertools import ( + chain, + combinations, + count, + cycle, + groupby, + islice, + repeat, + starmap, + tee, + zip_longest, +) +import operator +from random import randrange, sample, choice + +__all__ = [ + 'all_equal', + 'before_and_after', + 'consume', + 'convolve', + 'dotproduct', + 'first_true', + 'flatten', + 'grouper', + 'iter_except', + 'ncycles', + 'nth', + 'nth_combination', + 'padnone', + 'pad_none', + 'pairwise', + 'partition', + 'powerset', + 'prepend', + 'quantify', + 'random_combination_with_replacement', + 'random_combination', + 'random_permutation', + 'random_product', + 'repeatfunc', + 'roundrobin', + 'sliding_window', + 'tabulate', + 'tail', + 'take', + 'triplewise', + 'unique_everseen', + 'unique_justseen', +] + + +def take(n, iterable): + """Return first *n* items of the iterable as a list. + + >>> take(3, range(10)) + [0, 1, 2] + + If there are fewer than *n* items in the iterable, all of them are + returned. + + >>> take(10, range(3)) + [0, 1, 2] + + """ + return list(islice(iterable, n)) + + +def tabulate(function, start=0): + """Return an iterator over the results of ``func(start)``, + ``func(start + 1)``, ``func(start + 2)``... + + *func* should be a function that accepts one integer argument. + + If *start* is not specified it defaults to 0. It will be incremented each + time the iterator is advanced. + + >>> square = lambda x: x ** 2 + >>> iterator = tabulate(square, -3) + >>> take(4, iterator) + [9, 4, 1, 0] + + """ + return map(function, count(start)) + + +def tail(n, iterable): + """Return an iterator over the last *n* items of *iterable*. + + >>> t = tail(3, 'ABCDEFG') + >>> list(t) + ['E', 'F', 'G'] + + """ + return iter(deque(iterable, maxlen=n)) + + +def consume(iterator, n=None): + """Advance *iterable* by *n* steps. If *n* is ``None``, consume it + entirely. + + Efficiently exhausts an iterator without returning values. Defaults to + consuming the whole iterator, but an optional second argument may be + provided to limit consumption. + + >>> i = (x for x in range(10)) + >>> next(i) + 0 + >>> consume(i, 3) + >>> next(i) + 4 + >>> consume(i) + >>> next(i) + Traceback (most recent call last): + File "", line 1, in + StopIteration + + If the iterator has fewer items remaining than the provided limit, the + whole iterator will be consumed. + + >>> i = (x for x in range(3)) + >>> consume(i, 5) + >>> next(i) + Traceback (most recent call last): + File "", line 1, in + StopIteration + + """ + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) + + +def nth(iterable, n, default=None): + """Returns the nth item or a default value. + + >>> l = range(10) + >>> nth(l, 3) + 3 + >>> nth(l, 20, "zebra") + 'zebra' + + """ + return next(islice(iterable, n, None), default) + + +def all_equal(iterable): + """ + Returns ``True`` if all the elements are equal to each other. + + >>> all_equal('aaaa') + True + >>> all_equal('aaab') + False + + """ + g = groupby(iterable) + return next(g, True) and not next(g, False) + + +def quantify(iterable, pred=bool): + """Return the how many times the predicate is true. + + >>> quantify([True, False, True]) + 2 + + """ + return sum(map(pred, iterable)) + + +def pad_none(iterable): + """Returns the sequence of elements and then returns ``None`` indefinitely. + + >>> take(5, pad_none(range(3))) + [0, 1, 2, None, None] + + Useful for emulating the behavior of the built-in :func:`map` function. + + See also :func:`padded`. + + """ + return chain(iterable, repeat(None)) + + +padnone = pad_none + + +def ncycles(iterable, n): + """Returns the sequence elements *n* times + + >>> list(ncycles(["a", "b"], 3)) + ['a', 'b', 'a', 'b', 'a', 'b'] + + """ + return chain.from_iterable(repeat(tuple(iterable), n)) + + +def dotproduct(vec1, vec2): + """Returns the dot product of the two iterables. + + >>> dotproduct([10, 10], [20, 20]) + 400 + + """ + return sum(map(operator.mul, vec1, vec2)) + + +def flatten(listOfLists): + """Return an iterator flattening one level of nesting in a list of lists. + + >>> list(flatten([[0, 1], [2, 3]])) + [0, 1, 2, 3] + + See also :func:`collapse`, which can flatten multiple levels of nesting. + + """ + return chain.from_iterable(listOfLists) + + +def repeatfunc(func, times=None, *args): + """Call *func* with *args* repeatedly, returning an iterable over the + results. + + If *times* is specified, the iterable will terminate after that many + repetitions: + + >>> from operator import add + >>> times = 4 + >>> args = 3, 5 + >>> list(repeatfunc(add, times, *args)) + [8, 8, 8, 8] + + If *times* is ``None`` the iterable will not terminate: + + >>> from random import randrange + >>> times = None + >>> args = 1, 11 + >>> take(6, repeatfunc(randrange, times, *args)) # doctest:+SKIP + [2, 4, 8, 1, 8, 4] + + """ + if times is None: + return starmap(func, repeat(args)) + return starmap(func, repeat(args, times)) + + +def _pairwise(iterable): + """Returns an iterator of paired items, overlapping, from the original + + >>> take(4, pairwise(count())) + [(0, 1), (1, 2), (2, 3), (3, 4)] + + On Python 3.10 and above, this is an alias for :func:`itertools.pairwise`. + + """ + a, b = tee(iterable) + next(b, None) + yield from zip(a, b) + + +try: + from itertools import pairwise as itertools_pairwise +except ImportError: + pairwise = _pairwise +else: + + def pairwise(iterable): + yield from itertools_pairwise(iterable) + + pairwise.__doc__ = _pairwise.__doc__ + + +def grouper(iterable, n, fillvalue=None): + """Collect data into fixed-length chunks or blocks. + + >>> list(grouper('ABCDEFG', 3, 'x')) + [('A', 'B', 'C'), ('D', 'E', 'F'), ('G', 'x', 'x')] + + """ + if isinstance(iterable, int): + warnings.warn( + "grouper expects iterable as first parameter", DeprecationWarning + ) + n, iterable = iterable, n + args = [iter(iterable)] * n + return zip_longest(fillvalue=fillvalue, *args) + + +def roundrobin(*iterables): + """Yields an item from each iterable, alternating between them. + + >>> list(roundrobin('ABC', 'D', 'EF')) + ['A', 'D', 'E', 'B', 'F', 'C'] + + This function produces the same output as :func:`interleave_longest`, but + may perform better for some inputs (in particular when the number of + iterables is small). + + """ + # Recipe credited to George Sakkis + pending = len(iterables) + nexts = cycle(iter(it).__next__ for it in iterables) + while pending: + try: + for next in nexts: + yield next() + except StopIteration: + pending -= 1 + nexts = cycle(islice(nexts, pending)) + + +def partition(pred, iterable): + """ + Returns a 2-tuple of iterables derived from the input iterable. + The first yields the items that have ``pred(item) == False``. + The second yields the items that have ``pred(item) == True``. + + >>> is_odd = lambda x: x % 2 != 0 + >>> iterable = range(10) + >>> even_items, odd_items = partition(is_odd, iterable) + >>> list(even_items), list(odd_items) + ([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]) + + If *pred* is None, :func:`bool` is used. + + >>> iterable = [0, 1, False, True, '', ' '] + >>> false_items, true_items = partition(None, iterable) + >>> list(false_items), list(true_items) + ([0, False, ''], [1, True, ' ']) + + """ + if pred is None: + pred = bool + + evaluations = ((pred(x), x) for x in iterable) + t1, t2 = tee(evaluations) + return ( + (x for (cond, x) in t1 if not cond), + (x for (cond, x) in t2 if cond), + ) + + +def powerset(iterable): + """Yields all possible subsets of the iterable. + + >>> list(powerset([1, 2, 3])) + [(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)] + + :func:`powerset` will operate on iterables that aren't :class:`set` + instances, so repeated elements in the input will produce repeated elements + in the output. Use :func:`unique_everseen` on the input to avoid generating + duplicates: + + >>> seq = [1, 1, 0] + >>> list(powerset(seq)) + [(), (1,), (1,), (0,), (1, 1), (1, 0), (1, 0), (1, 1, 0)] + >>> from more_itertools import unique_everseen + >>> list(powerset(unique_everseen(seq))) + [(), (1,), (0,), (1, 0)] + + """ + s = list(iterable) + return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) + + +def unique_everseen(iterable, key=None): + """ + Yield unique elements, preserving order. + + >>> list(unique_everseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D'] + >>> list(unique_everseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'D'] + + Sequences with a mix of hashable and unhashable items can be used. + The function will be slower (i.e., `O(n^2)`) for unhashable items. + + Remember that ``list`` objects are unhashable - you can use the *key* + parameter to transform the list to a tuple (which is hashable) to + avoid a slowdown. + + >>> iterable = ([1, 2], [2, 3], [1, 2]) + >>> list(unique_everseen(iterable)) # Slow + [[1, 2], [2, 3]] + >>> list(unique_everseen(iterable, key=tuple)) # Faster + [[1, 2], [2, 3]] + + Similary, you may want to convert unhashable ``set`` objects with + ``key=frozenset``. For ``dict`` objects, + ``key=lambda x: frozenset(x.items())`` can be used. + + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + use_key = key is not None + + for element in iterable: + k = key(element) if use_key else element + try: + if k not in seenset: + seenset_add(k) + yield element + except TypeError: + if k not in seenlist: + seenlist_add(k) + yield element + + +def unique_justseen(iterable, key=None): + """Yields elements in order, ignoring serial duplicates + + >>> list(unique_justseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D', 'A', 'B'] + >>> list(unique_justseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'A', 'D'] + + """ + return map(next, map(operator.itemgetter(1), groupby(iterable, key))) + + +def iter_except(func, exception, first=None): + """Yields results from a function repeatedly until an exception is raised. + + Converts a call-until-exception interface to an iterator interface. + Like ``iter(func, sentinel)``, but uses an exception instead of a sentinel + to end the loop. + + >>> l = [0, 1, 2] + >>> list(iter_except(l.pop, IndexError)) + [2, 1, 0] + + Multiple exceptions can be specified as a stopping condition: + + >>> l = [1, 2, 3, '...', 4, 5, 6] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [7, 6, 5] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [4, 3, 2] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [] + + """ + try: + if first is not None: + yield first() + while 1: + yield func() + except exception: + pass + + +def first_true(iterable, default=None, pred=None): + """ + Returns the first true value in the iterable. + + If no true value is found, returns *default* + + If *pred* is not None, returns the first item for which + ``pred(item) == True`` . + + >>> first_true(range(10)) + 1 + >>> first_true(range(10), pred=lambda x: x > 5) + 6 + >>> first_true(range(10), default='missing', pred=lambda x: x > 9) + 'missing' + + """ + return next(filter(pred, iterable), default) + + +def random_product(*args, repeat=1): + """Draw an item at random from each of the input iterables. + + >>> random_product('abc', range(4), 'XYZ') # doctest:+SKIP + ('c', 3, 'Z') + + If *repeat* is provided as a keyword argument, that many items will be + drawn from each iterable. + + >>> random_product('abcd', range(4), repeat=2) # doctest:+SKIP + ('a', 2, 'd', 3) + + This equivalent to taking a random selection from + ``itertools.product(*args, **kwarg)``. + + """ + pools = [tuple(pool) for pool in args] * repeat + return tuple(choice(pool) for pool in pools) + + +def random_permutation(iterable, r=None): + """Return a random *r* length permutation of the elements in *iterable*. + + If *r* is not specified or is ``None``, then *r* defaults to the length of + *iterable*. + + >>> random_permutation(range(5)) # doctest:+SKIP + (3, 4, 0, 1, 2) + + This equivalent to taking a random selection from + ``itertools.permutations(iterable, r)``. + + """ + pool = tuple(iterable) + r = len(pool) if r is None else r + return tuple(sample(pool, r)) + + +def random_combination(iterable, r): + """Return a random *r* length subsequence of the elements in *iterable*. + + >>> random_combination(range(5), 3) # doctest:+SKIP + (2, 3, 4) + + This equivalent to taking a random selection from + ``itertools.combinations(iterable, r)``. + + """ + pool = tuple(iterable) + n = len(pool) + indices = sorted(sample(range(n), r)) + return tuple(pool[i] for i in indices) + + +def random_combination_with_replacement(iterable, r): + """Return a random *r* length subsequence of elements in *iterable*, + allowing individual elements to be repeated. + + >>> random_combination_with_replacement(range(3), 5) # doctest:+SKIP + (0, 0, 1, 2, 2) + + This equivalent to taking a random selection from + ``itertools.combinations_with_replacement(iterable, r)``. + + """ + pool = tuple(iterable) + n = len(pool) + indices = sorted(randrange(n) for i in range(r)) + return tuple(pool[i] for i in indices) + + +def nth_combination(iterable, r, index): + """Equivalent to ``list(combinations(iterable, r))[index]``. + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`nth_combination` computes the subsequence at + sort position *index* directly, without computing the previous + subsequences. + + >>> nth_combination(range(5), 3, 5) + (0, 3, 4) + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = tuple(iterable) + n = len(pool) + if (r < 0) or (r > n): + raise ValueError + + c = 1 + k = min(r, n - r) + for i in range(1, k + 1): + c = c * (n - k + i) // i + + if index < 0: + index += c + + if (index < 0) or (index >= c): + raise IndexError + + result = [] + while r: + c, n, r = c * r // n, n - 1, r - 1 + while index >= c: + index -= c + c, n = c * (n - r) // n, n - 1 + result.append(pool[-1 - n]) + + return tuple(result) + + +def prepend(value, iterator): + """Yield *value*, followed by the elements in *iterator*. + + >>> value = '0' + >>> iterator = ['1', '2', '3'] + >>> list(prepend(value, iterator)) + ['0', '1', '2', '3'] + + To prepend multiple values, see :func:`itertools.chain` + or :func:`value_chain`. + + """ + return chain([value], iterator) + + +def convolve(signal, kernel): + """Convolve the iterable *signal* with the iterable *kernel*. + + >>> signal = (1, 2, 3, 4, 5) + >>> kernel = [3, 2, 1] + >>> list(convolve(signal, kernel)) + [3, 8, 14, 20, 26, 14, 5] + + Note: the input arguments are not interchangeable, as the *kernel* + is immediately consumed and stored. + + """ + kernel = tuple(kernel)[::-1] + n = len(kernel) + window = deque([0], maxlen=n) * n + for x in chain(signal, repeat(0, n - 1)): + window.append(x) + yield sum(map(operator.mul, kernel, window)) + + +def before_and_after(predicate, it): + """A variant of :func:`takewhile` that allows complete access to the + remainder of the iterator. + + >>> it = iter('ABCdEfGhI') + >>> all_upper, remainder = before_and_after(str.isupper, it) + >>> ''.join(all_upper) + 'ABC' + >>> ''.join(remainder) # takewhile() would lose the 'd' + 'dEfGhI' + + Note that the first iterator must be fully consumed before the second + iterator can generate valid results. + """ + it = iter(it) + transition = [] + + def true_iterator(): + for elem in it: + if predicate(elem): + yield elem + else: + transition.append(elem) + return + + def remainder_iterator(): + yield from transition + yield from it + + return true_iterator(), remainder_iterator() + + +def triplewise(iterable): + """Return overlapping triplets from *iterable*. + + >>> list(triplewise('ABCDE')) + [('A', 'B', 'C'), ('B', 'C', 'D'), ('C', 'D', 'E')] + + """ + for (a, _), (b, c) in pairwise(pairwise(iterable)): + yield a, b, c + + +def sliding_window(iterable, n): + """Return a sliding window of width *n* over *iterable*. + + >>> list(sliding_window(range(6), 4)) + [(0, 1, 2, 3), (1, 2, 3, 4), (2, 3, 4, 5)] + + If *iterable* has fewer than *n* items, then nothing is yielded: + + >>> list(sliding_window(range(3), 4)) + [] + + For a variant with more features, see :func:`windowed`. + """ + it = iter(iterable) + window = deque(islice(it, n), maxlen=n) + if len(window) == n: + yield tuple(window) + for x in it: + window.append(x) + yield tuple(window) diff --git a/lib/more_itertools/recipes.pyi b/lib/more_itertools/recipes.pyi new file mode 100644 index 00000000..4648a41b --- /dev/null +++ b/lib/more_itertools/recipes.pyi @@ -0,0 +1,112 @@ +"""Stubs for more_itertools.recipes""" +from typing import ( + Any, + Callable, + Iterable, + Iterator, + List, + Optional, + Tuple, + TypeVar, + Union, +) +from typing_extensions import overload, Type + +# Type and type variable definitions +_T = TypeVar('_T') +_U = TypeVar('_U') + +def take(n: int, iterable: Iterable[_T]) -> List[_T]: ... +def tabulate( + function: Callable[[int], _T], start: int = ... +) -> Iterator[_T]: ... +def tail(n: int, iterable: Iterable[_T]) -> Iterator[_T]: ... +def consume(iterator: Iterable[object], n: Optional[int] = ...) -> None: ... +@overload +def nth(iterable: Iterable[_T], n: int) -> Optional[_T]: ... +@overload +def nth(iterable: Iterable[_T], n: int, default: _U) -> Union[_T, _U]: ... +def all_equal(iterable: Iterable[object]) -> bool: ... +def quantify( + iterable: Iterable[_T], pred: Callable[[_T], bool] = ... +) -> int: ... +def pad_none(iterable: Iterable[_T]) -> Iterator[Optional[_T]]: ... +def padnone(iterable: Iterable[_T]) -> Iterator[Optional[_T]]: ... +def ncycles(iterable: Iterable[_T], n: int) -> Iterator[_T]: ... +def dotproduct(vec1: Iterable[object], vec2: Iterable[object]) -> object: ... +def flatten(listOfLists: Iterable[Iterable[_T]]) -> Iterator[_T]: ... +def repeatfunc( + func: Callable[..., _U], times: Optional[int] = ..., *args: Any +) -> Iterator[_U]: ... +def pairwise(iterable: Iterable[_T]) -> Iterator[Tuple[_T, _T]]: ... +@overload +def grouper( + iterable: Iterable[_T], n: int +) -> Iterator[Tuple[Optional[_T], ...]]: ... +@overload +def grouper( + iterable: Iterable[_T], n: int, fillvalue: _U +) -> Iterator[Tuple[Union[_T, _U], ...]]: ... +@overload +def grouper( # Deprecated interface + iterable: int, n: Iterable[_T] +) -> Iterator[Tuple[Optional[_T], ...]]: ... +@overload +def grouper( # Deprecated interface + iterable: int, n: Iterable[_T], fillvalue: _U +) -> Iterator[Tuple[Union[_T, _U], ...]]: ... +def roundrobin(*iterables: Iterable[_T]) -> Iterator[_T]: ... +def partition( + pred: Optional[Callable[[_T], object]], iterable: Iterable[_T] +) -> Tuple[Iterator[_T], Iterator[_T]]: ... +def powerset(iterable: Iterable[_T]) -> Iterator[Tuple[_T, ...]]: ... +def unique_everseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = ... +) -> Iterator[_T]: ... +def unique_justseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], object]] = ... +) -> Iterator[_T]: ... +@overload +def iter_except( + func: Callable[[], _T], + exception: Union[Type[BaseException], Tuple[Type[BaseException], ...]], + first: None = ..., +) -> Iterator[_T]: ... +@overload +def iter_except( + func: Callable[[], _T], + exception: Union[Type[BaseException], Tuple[Type[BaseException], ...]], + first: Callable[[], _U], +) -> Iterator[Union[_T, _U]]: ... +@overload +def first_true( + iterable: Iterable[_T], *, pred: Optional[Callable[[_T], object]] = ... +) -> Optional[_T]: ... +@overload +def first_true( + iterable: Iterable[_T], + default: _U, + pred: Optional[Callable[[_T], object]] = ..., +) -> Union[_T, _U]: ... +def random_product( + *args: Iterable[_T], repeat: int = ... +) -> Tuple[_T, ...]: ... +def random_permutation( + iterable: Iterable[_T], r: Optional[int] = ... +) -> Tuple[_T, ...]: ... +def random_combination(iterable: Iterable[_T], r: int) -> Tuple[_T, ...]: ... +def random_combination_with_replacement( + iterable: Iterable[_T], r: int +) -> Tuple[_T, ...]: ... +def nth_combination( + iterable: Iterable[_T], r: int, index: int +) -> Tuple[_T, ...]: ... +def prepend(value: _T, iterator: Iterable[_U]) -> Iterator[Union[_T, _U]]: ... +def convolve(signal: Iterable[_T], kernel: Iterable[_T]) -> Iterator[_T]: ... +def before_and_after( + predicate: Callable[[_T], bool], it: Iterable[_T] +) -> Tuple[Iterator[_T], Iterator[_T]]: ... +def triplewise(iterable: Iterable[_T]) -> Iterator[Tuple[_T, _T, _T]]: ... +def sliding_window( + iterable: Iterable[_T], n: int +) -> Iterator[Tuple[_T, ...]]: ... diff --git a/lib/munkres.py b/lib/munkres.py index f5337223..ab982d7e 100644 --- a/lib/munkres.py +++ b/lib/munkres.py @@ -781,6 +781,6 @@ if __name__ == '__main__': for r, c in indexes: x = cost_matrix[r][c] total_cost += x - print('(%d, %d) -> %d' % (r, c, x)) - print('lowest cost=%d' % total_cost) + print(('(%d, %d) -> %d' % (r, c, x))) + print(('lowest cost=%d' % total_cost)) assert expected_total == total_cost diff --git a/lib/musicbrainzngs/__init__.py b/lib/musicbrainzngs/__init__.py old mode 100755 new mode 100644 diff --git a/lib/musicbrainzngs/caa.py b/lib/musicbrainzngs/caa.py old mode 100755 new mode 100644 index 235c560a..5eaf7357 --- a/lib/musicbrainzngs/caa.py +++ b/lib/musicbrainzngs/caa.py @@ -16,13 +16,21 @@ from musicbrainzngs import musicbrainz from musicbrainzngs.util import _unicode hostname = "coverartarchive.org" +https = True -def set_caa_hostname(new_hostname): +def set_caa_hostname(new_hostname, use_https=False): """Set the base hostname for Cover Art Archive requests. - Defaults to 'coverartarchive.org'.""" + Defaults to 'coverartarchive.org', accessing over https. + For backwards compatibility, `use_https` is False by default. + + :param str new_hostname: The hostname (and port) of the CAA server to connect to + :param bool use_https: `True` if the host should be accessed using https. Default is `False` +""" global hostname + global https hostname = new_hostname + https = use_https def _caa_request(mbid, imageid=None, size=None, entitytype="release"): @@ -32,7 +40,7 @@ def _caa_request(mbid, imageid=None, size=None, entitytype="release"): with :meth:`get_image_list`. :type imageid: str - :param size: 250, 500 + :param size: "250", "500", "1200" :type size: str or None :param entitytype: ``release`` or ``release-group`` @@ -46,7 +54,7 @@ def _caa_request(mbid, imageid=None, size=None, entitytype="release"): elif imageid: path.append(imageid) url = compat.urlunparse(( - 'http', + 'https' if https else 'http', hostname, '/%s' % '/'.join(path), '', @@ -93,8 +101,9 @@ def get_image_list(releaseid): If an error occurs then a :class:`~musicbrainzngs.ResponseError` will be raised with one of the following HTTP codes: - * 400: `Releaseid` is not a valid UUID - * 404: No release exists with an MBID of `releaseid` + * 400: `releaseid` is not a valid UUID + * 404: The release with an MBID of `releaseid` does not exist or + there is no cover art available for it. * 503: Ratelimit exceeded """ return _caa_request(releaseid) @@ -110,8 +119,9 @@ def get_release_group_image_list(releasegroupid): If an error occurs then a :class:`~musicbrainzngs.ResponseError` will be raised with one of the following HTTP codes: - * 400: `Releaseid` is not a valid UUID - * 404: No release exists with an MBID of `releaseid` + * 400: `releasegroupid` is not a valid UUID + * 404: The release group with an MBID of `releasegroupid` does not exist or + there is no cover art available for it. * 503: Ratelimit exceeded """ return _caa_request(releasegroupid, entitytype="release-group") @@ -123,7 +133,7 @@ def get_release_group_image_front(releasegroupid, size=None): :meth:`get_image`. """ return get_image(releasegroupid, "front", size=size, - entitytype="release-group") + entitytype="release-group") def get_image_front(releaseid, size=None): @@ -152,18 +162,19 @@ def get_image(mbid, coverid, size=None, entitytype="release"): If an error occurs then a :class:`~musicbrainzngs.ResponseError` will be raised with one of the following HTTP codes: - * 400: `Releaseid` is not a valid UUID or `coverid` is invalid - * 404: No release exists with an MBID of `releaseid` + * 400: `releaseid` is not a valid UUID or `coverid` is invalid + * 404: The release with an MBID of `releaseid` does not exist or no cover + art with an id of `coverid` exists. * 503: Ratelimit exceeded - :param coverid: ``front``, ``back`` or a number from the listing obtained with - :meth:`get_image_list` + :param coverid: ``front``, ``back`` or a number from the listing obtained + with :meth:`get_image_list` :type coverid: int or str - :param size: 250, 500 or None. If it is None, the largest available picture - will be downloaded. If the image originally uploaded to the - Cover Art Archive was smaller than the requested size, only - the original image will be returned. + :param size: "250", "500", "1200" or None. If it is None, the largest + available picture will be downloaded. If the image originally + uploaded to the Cover Art Archive was smaller than the + requested size, only the original image will be returned. :type size: str or None :param entitytype: The type of entity for which to download the cover art. diff --git a/lib/musicbrainzngs/compat.py b/lib/musicbrainzngs/compat.py old mode 100755 new mode 100644 index 47503992..689e0834 --- a/lib/musicbrainzngs/compat.py +++ b/lib/musicbrainzngs/compat.py @@ -43,7 +43,7 @@ if is_py2: HTTPHandler, build_opener, HTTPError, URLError from httplib import BadStatusLine, HTTPException from urlparse import urlunparse - from urllib import urlencode + from urllib import urlencode, quote_plus bytes = str unicode = unicode @@ -54,7 +54,7 @@ elif is_py3: HTTPHandler, build_opener from urllib.error import HTTPError, URLError from http.client import HTTPException, BadStatusLine - from urllib.parse import urlunparse, urlencode + from urllib.parse import urlunparse, urlencode, quote_plus unicode = str bytes = bytes diff --git a/lib/musicbrainzngs/mbxml.py b/lib/musicbrainzngs/mbxml.py old mode 100755 new mode 100644 index 60236dc7..e85fe7cb --- a/lib/musicbrainzngs/mbxml.py +++ b/lib/musicbrainzngs/mbxml.py @@ -7,29 +7,26 @@ import re import xml.etree.ElementTree as ET import logging -from musicbrainzngs import util +from . import util -try: - from ET import fixtag -except: - # Python < 2.7 - def fixtag(tag, namespaces): - # given a decorated tag (of the form {uri}tag), return prefixed - # tag and namespace declaration, if any - if isinstance(tag, ET.QName): - tag = tag.text - namespace_uri, tag = tag[1:].split("}", 1) - prefix = namespaces.get(namespace_uri) - if prefix is None: - prefix = "ns%d" % len(namespaces) - namespaces[namespace_uri] = prefix - if prefix == "xml": - xmlns = None - else: - xmlns = ("xmlns:%s" % prefix, namespace_uri) - else: + +def fixtag(tag, namespaces): + # given a decorated tag (of the form {uri}tag), return prefixed + # tag and namespace declaration, if any + if isinstance(tag, ET.QName): + tag = tag.text + namespace_uri, tag = tag[1:].split("}", 1) + prefix = namespaces.get(namespace_uri) + if prefix is None: + prefix = "ns%d" % len(namespaces) + namespaces[namespace_uri] = prefix + if prefix == "xml": xmlns = None - return "%s:%s" % (prefix, tag), xmlns + else: + xmlns = ("xmlns:%s" % prefix, namespace_uri) + else: + xmlns = None + return "%s:%s" % (prefix, tag), xmlns NS_MAP = {"http://musicbrainz.org/ns/mmd-2.0#": "ws2", @@ -377,7 +374,7 @@ def parse_relation(relation): result.update(parse_elements(elements, inner_els, relation)) # We parse attribute-list again to get attributes that have both # text and attribute values - result.update(parse_elements([], {"attribute-list": parse_relation_attribute_list}, relation)) + result.update(parse_elements(['target-credit'], {"attribute-list": parse_relation_attribute_list}, relation)) return result @@ -505,7 +502,6 @@ def parse_recording(recording): "user-tag-list": parse_tag_list, "rating": parse_rating, "isrc-list": parse_external_id_list, - "echoprint-list": parse_external_id_list, "relation-list": parse_relation_list, "annotation": parse_annotation} diff --git a/lib/musicbrainzngs/musicbrainz.py b/lib/musicbrainzngs/musicbrainz.py old mode 100755 new mode 100644 index e56e67ce..b0e5067c --- a/lib/musicbrainzngs/musicbrainz.py +++ b/lib/musicbrainzngs/musicbrainz.py @@ -3,6 +3,7 @@ # This file is distributed under a BSD-2-Clause type license. # See the COPYING file for more information. +import base64 import re import threading import time @@ -20,10 +21,8 @@ from musicbrainzngs import mbxml from musicbrainzngs import util from musicbrainzngs import compat -# headphones -import base64 +_version = "0.7.1modified" -_version = "0.7devheadphones" _log = logging.getLogger("musicbrainzngs") LUCENE_SPECIAL = r'([+\-&|!(){}\[\]\^"~*?:\\\/])' @@ -32,11 +31,11 @@ LUCENE_SPECIAL = r'([+\-&|!(){}\[\]\^"~*?:\\\/])' RELATABLE_TYPES = ['area', 'artist', 'label', 'place', 'event', 'recording', 'release', 'release-group', 'series', 'url', 'work', 'instrument'] RELATION_INCLUDES = [entity + '-rels' for entity in RELATABLE_TYPES] -TAG_INCLUDES = ["tags", "user-tags"] +TAG_INCLUDES = ["tags", "user-tags", "genres", "user-genres"] RATING_INCLUDES = ["ratings", "user-ratings"] VALID_INCLUDES = { - 'area' : ["aliases", "annotation"] + RELATION_INCLUDES, + 'area' : ["aliases", "annotation"] + RELATION_INCLUDES + TAG_INCLUDES, 'artist': [ "recordings", "releases", "release-groups", "works", # Subqueries "various-artists", "discids", "media", "isrcs", @@ -61,7 +60,7 @@ VALID_INCLUDES = { ] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, 'release': [ "artists", "labels", "recordings", "release-groups", "media", - "artist-credits", "discids", "puids", "isrcs", + "artist-credits", "discids", "isrcs", "recording-level-rels", "work-level-rels", "annotation", "aliases" ] + TAG_INCLUDES + RELATION_INCLUDES, 'release-group': [ @@ -70,18 +69,17 @@ VALID_INCLUDES = { ] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, 'series': [ "annotation", "aliases" - ] + RELATION_INCLUDES, + ] + RELATION_INCLUDES + TAG_INCLUDES, 'work': [ - "artists", # Subqueries "aliases", "annotation" ] + TAG_INCLUDES + RATING_INCLUDES + RELATION_INCLUDES, 'url': RELATION_INCLUDES, 'discid': [ # Discid should be the same as release "artists", "labels", "recordings", "release-groups", "media", - "artist-credits", "discids", "puids", "isrcs", + "artist-credits", "discids", "isrcs", "recording-level-rels", "work-level-rels", "annotation", "aliases" ] + RELATION_INCLUDES, - 'isrc': ["artists", "releases", "puids", "isrcs"], + 'isrc': ["artists", "releases", "isrcs"], 'iswc': ["artists"], 'collection': ['releases'], } @@ -103,8 +101,8 @@ VALID_RELEASE_TYPES = [ "nat", "album", "single", "ep", "broadcast", "other", # primary types "compilation", "soundtrack", "spokenword", "interview", "audiobook", - "live", "remix", "dj-mix", "mixtape/street", # secondary types - "demo", # headphones + "live", "remix", "dj-mix", "mixtape/street", "audio drama", # secondary types + "demo" #headphones ] #: These can be used to filter whenever releases or release-groups are involved VALID_RELEASE_STATUSES = ["official", "promotion", "bootleg", "pseudo-release"] @@ -113,48 +111,58 @@ VALID_SEARCH_FIELDS = { 'entity', 'name', 'text', 'type' ], 'area': [ - 'aid', 'area', 'alias', 'begin', 'comment', 'end', 'ended', - 'iso', 'iso1', 'iso2', 'iso3', 'type' + 'aid', 'alias', 'area', 'areaaccent', 'begin', 'comment', 'end', + 'ended', 'iso', 'iso1', 'iso2', 'iso3', 'sortname', 'tag', 'type' ], 'artist': [ - 'arid', 'artist', 'artistaccent', 'alias', 'begin', 'comment', - 'country', 'end', 'ended', 'gender', 'ipi', 'sortname', 'tag', 'type', - 'area', 'beginarea', 'endarea' + 'alias', 'area', 'arid', 'artist', 'artistaccent', 'begin', 'beginarea', + 'comment', 'country', 'end', 'endarea', 'ended', 'gender', + 'ipi', 'isni', 'primary_alias', 'sortname', 'tag', 'type' + ], + 'event': [ + 'aid', 'alias', 'area', 'arid', 'artist', 'begin', 'comment', 'eid', + 'end', 'ended', 'event', 'eventaccent', 'pid', 'place', 'tag', 'type' + ], + 'instrument': [ + 'alias', 'comment', 'description', 'iid', 'instrument', + 'instrumentaccent', 'tag', 'type' ], 'label': [ - 'alias', 'begin', 'code', 'comment', 'country', 'end', 'ended', - 'ipi', 'label', 'labelaccent', 'laid', 'sortname', 'type', 'tag', - 'area' - ], - 'recording': [ - 'arid', 'artist', 'artistname', 'creditname', 'comment', - 'country', 'date', 'dur', 'format', 'isrc', 'number', - 'position', 'primarytype', 'puid', 'qdur', 'recording', - 'recordingaccent', 'reid', 'release', 'rgid', 'rid', - 'secondarytype', 'status', 'tnum', 'tracks', 'tracksrelease', - 'tag', 'type', 'video' - ], - 'release-group': [ - 'arid', 'artist', 'artistname', 'comment', 'creditname', - 'primarytype', 'rgid', 'releasegroup', 'releasegroupaccent', - 'releases', 'release', 'reid', 'secondarytype', 'status', + 'alias', 'area', 'begin', 'code', 'comment', 'country', 'end', 'ended', + 'ipi', 'label', 'labelaccent', 'laid', 'release_count', 'sortname', 'tag', 'type' ], + 'place': [ + 'address', 'alias', 'area', 'begin', 'comment', 'end', 'ended', 'lat', 'long', + 'pid', 'place', 'placeaccent', 'type' + ], + 'recording': [ + 'alias', 'arid', 'artist', 'artistname', 'comment', 'country', + 'creditname', 'date', 'dur', 'format', 'isrc', 'number', 'position', + 'primarytype', 'qdur', 'recording', 'recordingaccent', 'reid', + 'release', 'rgid', 'rid', 'secondarytype', 'status', 'tag', 'tid', + 'tnum', 'tracks', 'tracksrelease', 'type', 'video'], + + 'release-group': [ + 'alias', 'arid', 'artist', 'artistname', 'comment', 'creditname', + 'primarytype', 'reid', 'release', 'releasegroup', 'releasegroupaccent', + 'releases', 'rgid', 'secondarytype', 'status', 'tag', 'type' + ], 'release': [ - 'arid', 'artist', 'artistname', 'asin', 'barcode', 'creditname', - 'catno', 'comment', 'country', 'creditname', 'date', 'discids', - 'discidsmedium', 'format', 'laid', 'label', 'lang', 'mediums', - 'primarytype', 'puid', 'quality', 'reid', 'release', 'releaseaccent', - 'rgid', 'script', 'secondarytype', 'status', 'tag', 'tracks', - 'tracksmedium', 'type' + 'alias', 'arid', 'artist', 'artistname', 'asin', 'barcode', 'catno', + 'comment', 'country', 'creditname', 'date', 'discids', 'discidsmedium', + 'format', 'label', 'laid', 'lang', 'mediums', 'primarytype', 'quality', + 'reid', 'release', 'releaseaccent', 'rgid', 'script', 'secondarytype', + 'status', 'tag', 'tracks', 'tracksmedium', 'type' ], 'series': [ - 'alias', 'comment', 'sid', 'series', 'type' + 'alias', 'comment', 'orderingattribute', 'series', 'seriesaccent', + 'sid', 'tag', 'type' ], 'work': [ - 'alias', 'arid', 'artist', 'comment', 'iswc', 'lang', 'tag', - 'type', 'wid', 'work', 'workaccent' - ], + 'alias', 'arid', 'artist', 'comment', 'iswc', 'lang', 'recording', + 'recording_count', 'rid', 'tag', 'type', 'wid', 'work', 'workaccent' + ] } # Constants @@ -163,6 +171,9 @@ class AUTH_NO: pass class AUTH_IFSET: pass +AUTH_REQUIRED_INCLUDES = ["user-tags", "user-ratings", "user-genres"] + + # Exceptions. class MusicBrainzError(Exception): @@ -282,8 +293,6 @@ def _docstring_search(entity): def _docstring_impl(name, values): def _decorator(func): - # puids are allowed so nothing breaks, but not documented - if "puids" in values: values.remove("puids") vstr = ", ".join(values) args = {name: vstr} if func.__doc__: @@ -297,6 +306,7 @@ def _docstring_impl(name, values): user = password = "" hostname = "musicbrainz.org" +https = True _client = "" _useragent = "" mb_auth = False @@ -309,7 +319,6 @@ def auth(u, p): user = u password = p -# headphones def hpauth(u, p): """Set the username and password to be used in subsequent queries to the MusicBrainz XML API that require authentication. @@ -338,12 +347,21 @@ def set_useragent(app, version, contact=None): _client = "%s-%s" % (app, version) _log.debug("set user-agent to %s" % _useragent) -def set_hostname(new_hostname): + +def set_hostname(new_hostname, use_https=False): """Set the hostname for MusicBrainz webservice requests. - Defaults to 'musicbrainz.org'. - You can also include a port: 'localhost:8000'.""" + Defaults to 'musicbrainz.org', accessing over https. + For backwards compatibility, `use_https` is False by default. + + :param str new_hostname: The hostname (and port) of the MusicBrainz server to connect to + :param bool use_https: `True` if the host should be accessed using https. Default is `False` + + Specify a non-standard port by adding it to the hostname, + for example 'localhost:8000'.""" global hostname + global https hostname = new_hostname + https = use_https # Rate limiting. @@ -482,7 +500,7 @@ class _MusicbrainzHttpRequest(compat.Request): # Core (internal) functions for calling the MB API. -def _safe_read(opener, req, body=None, max_retries=8, retry_delay_delta=2.0): +def _safe_read(opener, req, body=None, max_retries=3, retry_delay_delta=2.0): """Open an HTTP request with a given URL opener and (optionally) a request body. Transient errors lead to retries. Permanent errors and repeated errors are translated into a small set of handleable @@ -647,7 +665,7 @@ def _mb_request(path, method='GET', auth_required=AUTH_NO, # Construct the full URL for the request, including hostname and # query string. url = compat.urlunparse(( - 'http', + 'https' if https else 'http', hostname, '/ws/2/%s' % path, '', @@ -687,8 +705,10 @@ def _mb_request(path, method='GET', auth_required=AUTH_NO, # Add headphones credentials if mb_auth: - base64string = base64.encodestring('%s:%s' % (hpuser, hppassword)).replace('\n', '') - req.add_header("Authorization", "Basic %s" % base64string) + credentials = f"{hpuser}:{hppassword}" + base64bytes = base64.encodebytes(credentials.encode('utf-8')) + base64string = base64bytes.decode('utf-8').replace('\n', '') + req.add_header("Authorization", f"Basic {base64string}") _log.debug("requesting with UA %s" % _useragent) if body: @@ -701,11 +721,12 @@ def _mb_request(path, method='GET', auth_required=AUTH_NO, return parser_fun(resp) + def _get_auth_type(entity, id, includes): """ Some calls require authentication. This returns - True if a call does, False otherwise + a constant (Yes, No, IfSet) for the auth status of the call. """ - if "user-tags" in includes or "user-ratings" in includes: + if "user-tags" in includes or "user-ratings" in includes or "user-genres" in includes: return AUTH_YES elif entity.startswith("collection"): if not id: @@ -715,6 +736,7 @@ def _get_auth_type(entity, id, includes): else: return AUTH_NO + def _do_mb_query(entity, id, includes=[], params={}): """Make a single GET call to the MusicBrainz XML API. `entity` is a string indicated the type of object to be retrieved. The id may be @@ -764,10 +786,6 @@ def _do_mb_search(entity, query='', fields={}, raise InvalidSearchFieldError( '%s is not a valid search field for %s' % (key, entity) ) - elif key == "puid": - warn("PUID support was removed from server\n" - "the 'puid' field is ignored", - Warning, stacklevel=2) # Escape Lucene's special characters. value = util._unicode(value) @@ -1051,27 +1069,6 @@ def get_releases_by_discid(id, includes=[], toc=None, cdstubs=True, media_format params["media-format"] = media_format return _do_mb_query("discid", id, includes, params) -@_docstring_get("recording") -def get_recordings_by_echoprint(echoprint, includes=[], release_status=[], - release_type=[]): - """Search for recordings with an `echoprint `_. - (not available on server)""" - warn("Echoprints were never introduced\n" - "and will not be found (404)", - Warning, stacklevel=2) - raise ResponseError(cause=compat.HTTPError( - None, 404, "Not Found", None, None)) - -@_docstring_get("recording") -def get_recordings_by_puid(puid, includes=[], release_status=[], - release_type=[]): - """Search for recordings with a :musicbrainz:`PUID`. - (not available on server)""" - warn("PUID support was removed from the server\n" - "and no PUIDs will be found (404)", - Warning, stacklevel=2) - raise ResponseError(cause=compat.HTTPError( - None, 404, "Not Found", None, None)) @_docstring_get("recording") def get_recordings_by_isrc(isrc, includes=[], release_status=[], @@ -1288,23 +1285,6 @@ def submit_barcodes(release_barcode): query = mbxml.make_barcode_request(release_barcode) return _do_mb_post("release", query) -def submit_puids(recording_puids): - """Submit PUIDs. - (Functionality removed from server) - """ - warn("PUID support was dropped at the server\n" - "nothing will be submitted", - Warning, stacklevel=2) - return {'message': {'text': 'OK'}} - -def submit_echoprints(recording_echoprints): - """Submit echoprints. - (Functionality removed from server) - """ - warn("Echoprints were never introduced\n" - "nothing will be submitted", - Warning, stacklevel=2) - return {'message': {'text': 'OK'}} def submit_isrcs(recording_isrcs): """Submit ISRCs. diff --git a/lib/musicbrainzngs/util.py b/lib/musicbrainzngs/util.py old mode 100755 new mode 100644 diff --git a/lib/mutagen/_compat.py b/lib/mutagen/_compat.py index ebb80ea4..167670ad 100755 --- a/lib/mutagen/_compat.py +++ b/lib/mutagen/_compat.py @@ -13,15 +13,15 @@ PY2 = sys.version_info[0] == 2 PY3 = not PY2 if PY2: - from StringIO import StringIO + from io import StringIO BytesIO = StringIO - from cStringIO import StringIO as cBytesIO - from itertools import izip + from io import StringIO as cBytesIO + - long_ = long - integer_types = (int, long) - string_types = (str, unicode) - text_type = unicode + long_ = int + integer_types = (int, int) + string_types = (str, str) + text_type = str xrange = xrange cmp = cmp @@ -30,9 +30,9 @@ if PY2: def endswith(text, end): return text.endswith(end) - iteritems = lambda d: d.iteritems() - itervalues = lambda d: d.itervalues() - iterkeys = lambda d: d.iterkeys() + iteritems = lambda d: iter(d.items()) + itervalues = lambda d: iter(d.values()) + iterkeys = lambda d: iter(d.keys()) iterbytes = lambda b: iter(b) @@ -73,9 +73,9 @@ elif PY3: end = end.encode("ascii") return text.endswith(end) - iteritems = lambda d: iter(d.items()) - itervalues = lambda d: iter(d.values()) - iterkeys = lambda d: iter(d.keys()) + iteritems = lambda d: iter(list(d.items())) + itervalues = lambda d: iter(list(d.values())) + iterkeys = lambda d: iter(list(d.keys())) iterbytes = lambda b: (bytes([v]) for v in b) diff --git a/lib/mutagen/_constants.py b/lib/mutagen/_constants.py index 5c1c1a10..a866b6f8 100755 --- a/lib/mutagen/_constants.py +++ b/lib/mutagen/_constants.py @@ -8,197 +8,197 @@ """Constants used by Mutagen.""" GENRES = [ - u"Blues", - u"Classic Rock", - u"Country", - u"Dance", - u"Disco", - u"Funk", - u"Grunge", - u"Hip-Hop", - u"Jazz", - u"Metal", - u"New Age", - u"Oldies", - u"Other", - u"Pop", - u"R&B", - u"Rap", - u"Reggae", - u"Rock", - u"Techno", - u"Industrial", - u"Alternative", - u"Ska", - u"Death Metal", - u"Pranks", - u"Soundtrack", - u"Euro-Techno", - u"Ambient", - u"Trip-Hop", - u"Vocal", - u"Jazz+Funk", - u"Fusion", - u"Trance", - u"Classical", - u"Instrumental", - u"Acid", - u"House", - u"Game", - u"Sound Clip", - u"Gospel", - u"Noise", - u"Alt. Rock", - u"Bass", - u"Soul", - u"Punk", - u"Space", - u"Meditative", - u"Instrumental Pop", - u"Instrumental Rock", - u"Ethnic", - u"Gothic", - u"Darkwave", - u"Techno-Industrial", - u"Electronic", - u"Pop-Folk", - u"Eurodance", - u"Dream", - u"Southern Rock", - u"Comedy", - u"Cult", - u"Gangsta Rap", - u"Top 40", - u"Christian Rap", - u"Pop/Funk", - u"Jungle", - u"Native American", - u"Cabaret", - u"New Wave", - u"Psychedelic", - u"Rave", - u"Showtunes", - u"Trailer", - u"Lo-Fi", - u"Tribal", - u"Acid Punk", - u"Acid Jazz", - u"Polka", - u"Retro", - u"Musical", - u"Rock & Roll", - u"Hard Rock", - u"Folk", - u"Folk-Rock", - u"National Folk", - u"Swing", - u"Fast-Fusion", - u"Bebop", - u"Latin", - u"Revival", - u"Celtic", - u"Bluegrass", - u"Avantgarde", - u"Gothic Rock", - u"Progressive Rock", - u"Psychedelic Rock", - u"Symphonic Rock", - u"Slow Rock", - u"Big Band", - u"Chorus", - u"Easy Listening", - u"Acoustic", - u"Humour", - u"Speech", - u"Chanson", - u"Opera", - u"Chamber Music", - u"Sonata", - u"Symphony", - u"Booty Bass", - u"Primus", - u"Porn Groove", - u"Satire", - u"Slow Jam", - u"Club", - u"Tango", - u"Samba", - u"Folklore", - u"Ballad", - u"Power Ballad", - u"Rhythmic Soul", - u"Freestyle", - u"Duet", - u"Punk Rock", - u"Drum Solo", - u"A Cappella", - u"Euro-House", - u"Dance Hall", - u"Goa", - u"Drum & Bass", - u"Club-House", - u"Hardcore", - u"Terror", - u"Indie", - u"BritPop", - u"Afro-Punk", - u"Polsk Punk", - u"Beat", - u"Christian Gangsta Rap", - u"Heavy Metal", - u"Black Metal", - u"Crossover", - u"Contemporary Christian", - u"Christian Rock", - u"Merengue", - u"Salsa", - u"Thrash Metal", - u"Anime", - u"JPop", - u"Synthpop", - u"Abstract", - u"Art Rock", - u"Baroque", - u"Bhangra", - u"Big Beat", - u"Breakbeat", - u"Chillout", - u"Downtempo", - u"Dub", - u"EBM", - u"Eclectic", - u"Electro", - u"Electroclash", - u"Emo", - u"Experimental", - u"Garage", - u"Global", - u"IDM", - u"Illbient", - u"Industro-Goth", - u"Jam Band", - u"Krautrock", - u"Leftfield", - u"Lounge", - u"Math Rock", - u"New Romantic", - u"Nu-Breakz", - u"Post-Punk", - u"Post-Rock", - u"Psytrance", - u"Shoegaze", - u"Space Rock", - u"Trop Rock", - u"World Music", - u"Neoclassical", - u"Audiobook", - u"Audio Theatre", - u"Neue Deutsche Welle", - u"Podcast", - u"Indie Rock", - u"G-Funk", - u"Dubstep", - u"Garage Rock", - u"Psybient", + "Blues", + "Classic Rock", + "Country", + "Dance", + "Disco", + "Funk", + "Grunge", + "Hip-Hop", + "Jazz", + "Metal", + "New Age", + "Oldies", + "Other", + "Pop", + "R&B", + "Rap", + "Reggae", + "Rock", + "Techno", + "Industrial", + "Alternative", + "Ska", + "Death Metal", + "Pranks", + "Soundtrack", + "Euro-Techno", + "Ambient", + "Trip-Hop", + "Vocal", + "Jazz+Funk", + "Fusion", + "Trance", + "Classical", + "Instrumental", + "Acid", + "House", + "Game", + "Sound Clip", + "Gospel", + "Noise", + "Alt. Rock", + "Bass", + "Soul", + "Punk", + "Space", + "Meditative", + "Instrumental Pop", + "Instrumental Rock", + "Ethnic", + "Gothic", + "Darkwave", + "Techno-Industrial", + "Electronic", + "Pop-Folk", + "Eurodance", + "Dream", + "Southern Rock", + "Comedy", + "Cult", + "Gangsta Rap", + "Top 40", + "Christian Rap", + "Pop/Funk", + "Jungle", + "Native American", + "Cabaret", + "New Wave", + "Psychedelic", + "Rave", + "Showtunes", + "Trailer", + "Lo-Fi", + "Tribal", + "Acid Punk", + "Acid Jazz", + "Polka", + "Retro", + "Musical", + "Rock & Roll", + "Hard Rock", + "Folk", + "Folk-Rock", + "National Folk", + "Swing", + "Fast-Fusion", + "Bebop", + "Latin", + "Revival", + "Celtic", + "Bluegrass", + "Avantgarde", + "Gothic Rock", + "Progressive Rock", + "Psychedelic Rock", + "Symphonic Rock", + "Slow Rock", + "Big Band", + "Chorus", + "Easy Listening", + "Acoustic", + "Humour", + "Speech", + "Chanson", + "Opera", + "Chamber Music", + "Sonata", + "Symphony", + "Booty Bass", + "Primus", + "Porn Groove", + "Satire", + "Slow Jam", + "Club", + "Tango", + "Samba", + "Folklore", + "Ballad", + "Power Ballad", + "Rhythmic Soul", + "Freestyle", + "Duet", + "Punk Rock", + "Drum Solo", + "A Cappella", + "Euro-House", + "Dance Hall", + "Goa", + "Drum & Bass", + "Club-House", + "Hardcore", + "Terror", + "Indie", + "BritPop", + "Afro-Punk", + "Polsk Punk", + "Beat", + "Christian Gangsta Rap", + "Heavy Metal", + "Black Metal", + "Crossover", + "Contemporary Christian", + "Christian Rock", + "Merengue", + "Salsa", + "Thrash Metal", + "Anime", + "JPop", + "Synthpop", + "Abstract", + "Art Rock", + "Baroque", + "Bhangra", + "Big Beat", + "Breakbeat", + "Chillout", + "Downtempo", + "Dub", + "EBM", + "Eclectic", + "Electro", + "Electroclash", + "Emo", + "Experimental", + "Garage", + "Global", + "IDM", + "Illbient", + "Industro-Goth", + "Jam Band", + "Krautrock", + "Leftfield", + "Lounge", + "Math Rock", + "New Romantic", + "Nu-Breakz", + "Post-Punk", + "Post-Rock", + "Psytrance", + "Shoegaze", + "Space Rock", + "Trop Rock", + "World Music", + "Neoclassical", + "Audiobook", + "Audio Theatre", + "Neue Deutsche Welle", + "Podcast", + "Indie Rock", + "G-Funk", + "Dubstep", + "Garage Rock", + "Psybient", ] """The ID3v1 genre list.""" diff --git a/lib/mutagen/_file.py b/lib/mutagen/_file.py index 1ccbf240..1f2b6d14 100755 --- a/lib/mutagen/_file.py +++ b/lib/mutagen/_file.py @@ -94,7 +94,7 @@ class FileType(DictMixin): if self.tags is None: return [] else: - return self.tags.keys() + return list(self.tags.keys()) @loadfile(writable=True) def delete(self, filething): @@ -286,7 +286,7 @@ def File(filething, options=None, easy=False): results = [(Kind.score(filething.name, fileobj, header), Kind.__name__) for Kind in options] - results = list(izip(results, options)) + results = list(zip(results, options)) results.sort() (score, name), Kind = results[-1] if score > 0: diff --git a/lib/mutagen/_senf/_compat.py b/lib/mutagen/_senf/_compat.py index a31c4b87..4de1ef95 100755 --- a/lib/mutagen/_senf/_compat.py +++ b/lib/mutagen/_senf/_compat.py @@ -20,20 +20,21 @@ PY3 = not PY2 if PY2: - from urlparse import urlparse, urlunparse + from urllib.parse import urlparse, urlunparse urlparse, urlunparse - from urllib import pathname2url, url2pathname, quote, unquote + from urllib.request import pathname2url, url2pathname + from urllib.parse import quote, unquote pathname2url, url2pathname, quote, unquote - from StringIO import StringIO + from io import StringIO BytesIO = StringIO from io import StringIO as TextIO TextIO - string_types = (str, unicode) - text_type = unicode + string_types = (str, str) + text_type = str - iteritems = lambda d: d.iteritems() + iteritems = lambda d: iter(d.items()) elif PY3: from urllib.parse import urlparse, quote, unquote, urlunparse urlparse, quote, unquote, urlunparse @@ -49,4 +50,4 @@ elif PY3: string_types = (str,) text_type = str - iteritems = lambda d: iter(d.items()) + iteritems = lambda d: iter(list(d.items())) diff --git a/lib/mutagen/_senf/_environ.py b/lib/mutagen/_senf/_environ.py index 4f8a0640..760bf19d 100755 --- a/lib/mutagen/_senf/_environ.py +++ b/lib/mutagen/_senf/_environ.py @@ -86,23 +86,23 @@ def read_windows_environ(): res = ctypes.cast(res, ctypes.POINTER(ctypes.c_wchar)) done = [] - current = u"" + current = "" i = 0 while 1: c = res[i] i += 1 - if c == u"\x00": + if c == "\x00": if not current: break done.append(current) - current = u"" + current = "" continue current += c dict_ = {} for entry in done: try: - key, value = entry.split(u"=", 1) + key, value = entry.split("=", 1) except ValueError: continue key = _norm_key(key) diff --git a/lib/mutagen/_senf/_fsnative.py b/lib/mutagen/_senf/_fsnative.py index c210995e..76626f3c 100755 --- a/lib/mutagen/_senf/_fsnative.py +++ b/lib/mutagen/_senf/_fsnative.py @@ -59,7 +59,7 @@ def _codec_fails_on_encode_surrogates(codec, _cache={}): return _cache[codec] except KeyError: try: - u"\uD800\uDC01".encode(codec) + "\uD800\uDC01".encode(codec) except UnicodeEncodeError: _cache[codec] = True else: @@ -76,7 +76,7 @@ def _codec_can_decode_with_surrogatepass(codec, _cache={}): return _cache[codec] except KeyError: try: - u"\ud83d".encode( + "\ud83d".encode( codec, _surrogatepass).decode(codec, _surrogatepass) except UnicodeDecodeError: _cache[codec] = False @@ -173,14 +173,14 @@ def _fsnative(text): path = text.encode("utf-8", _surrogatepass) if b"\x00" in path: - path = path.replace(b"\x00", fsn2bytes(_fsnative(u"\uFFFD"), None)) + path = path.replace(b"\x00", fsn2bytes(_fsnative("\uFFFD"), None)) if PY3: return path.decode(_encoding, "surrogateescape") return path else: - if u"\x00" in text: - text = text.replace(u"\x00", u"\uFFFD") + if "\x00" in text: + text = text.replace("\x00", "\uFFFD") return text @@ -235,7 +235,7 @@ def _create_fsnative(type_): the `str` only contains ASCII and no NULL. """ - def __new__(cls, text=u""): + def __new__(cls, text=""): return _fsnative(text) new_type = meta("fsnative", (object,), dict(impl.__dict__)) @@ -259,7 +259,7 @@ def _typecheck_fsnative(path): return False if PY3 or is_win: - if u"\x00" in path: + if "\x00" in path: return False if is_unix and not _is_unicode_encoding: @@ -309,7 +309,7 @@ def _fsn2native(path): if b"\x00" in path: raise TypeError("fsnative can't contain nulls") else: - if u"\x00" in path: + if "\x00" in path: raise TypeError("fsnative can't contain nulls") return path @@ -370,7 +370,7 @@ def path2fsn(path): if b"\x00" in data: raise ValueError("embedded null") else: - if u"\x00" in path: + if "\x00" in path: raise ValueError("embedded null") if not isinstance(path, fsnative_type): @@ -495,7 +495,7 @@ def bytes2fsn(data, encoding): path = _bytes2winpath(data, encoding) except LookupError: raise ValueError("invalid encoding %r" % encoding) - if u"\x00" in path: + if "\x00" in path: raise ValueError("contains nulls") return path else: @@ -548,7 +548,7 @@ def uri2fsn(uri): path = "\\\\" + path if PY2: path = path.decode("utf-8") - if u"\x00" in path: + if "\x00" in path: raise ValueError("embedded null") return path else: @@ -607,4 +607,4 @@ def fsn2uri(path): return _quote_path(uri.encode("utf-8", _surrogatepass)) else: - return u"file://" + _quote_path(path) + return "file://" + _quote_path(path) diff --git a/lib/mutagen/_senf/_print.py b/lib/mutagen/_senf/_print.py index 051118e5..ca19279a 100755 --- a/lib/mutagen/_senf/_print.py +++ b/lib/mutagen/_senf/_print.py @@ -155,7 +155,7 @@ def _print_windows(objects, sep, end, file, flush): if not isinstance(end, text_type): raise TypeError - if end == u"\n": + if end == "\n": end = os.linesep text = sep.join(parts) + end @@ -225,7 +225,7 @@ def _readline_windows(): buf = ctypes.create_string_buffer(buf_size * ctypes.sizeof(winapi.WCHAR)) read = winapi.DWORD() - text = u"" + text = "" while True: if winapi.ReadConsoleW( h, buf, buf_size, ctypes.byref(read), None) == 0: @@ -234,7 +234,7 @@ def _readline_windows(): raise ctypes.WinError() data = buf[:read.value * ctypes.sizeof(winapi.WCHAR)] text += data.decode("utf-16-le", _surrogatepass) - if text.endswith(u"\r\n"): + if text.endswith("\r\n"): return text[:-2] @@ -253,7 +253,7 @@ def _decode_codepage(codepage, data): assert isinstance(data, bytes) if not data: - return u"" + return "" # get the required buffer length first length = winapi.MultiByteToWideChar(codepage, 0, data, len(data), None, 0) diff --git a/lib/mutagen/_senf/_stdlib.py b/lib/mutagen/_senf/_stdlib.py index 82415035..bfaf271c 100755 --- a/lib/mutagen/_senf/_stdlib.py +++ b/lib/mutagen/_senf/_stdlib.py @@ -38,7 +38,7 @@ def getcwd(): """ if is_win and PY2: - return os.getcwdu() + return os.getcwd() return os.getcwd() diff --git a/lib/mutagen/_senf/_winansi.py b/lib/mutagen/_senf/_winansi.py index edcd01cf..e6f65537 100755 --- a/lib/mutagen/_senf/_winansi.py +++ b/lib/mutagen/_senf/_winansi.py @@ -25,7 +25,7 @@ def ansi_parse(code): return code[-1:], tuple([int(v or "0") for v in code[2:-1].split(";")]) -def ansi_split(text, _re=re.compile(u"(\x1b\[(\d*;?)*\S)")): +def ansi_split(text, _re=re.compile("(\x1b\[(\d*;?)*\S)")): """Yields (is_ansi, text)""" for part in _re.split(text): diff --git a/lib/mutagen/_tools/mid3cp.py b/lib/mutagen/_tools/mid3cp.py index 1339548d..4d4a5c44 100755 --- a/lib/mutagen/_tools/mid3cp.py +++ b/lib/mutagen/_tools/mid3cp.py @@ -52,14 +52,14 @@ def copy(src, dst, merge, write_v1=True, excluded_tags=None, verbose=False): try: id3 = mutagen.id3.ID3(src, translate=False) except mutagen.id3.ID3NoHeaderError: - print_(u"No ID3 header found in ", src, file=sys.stderr) + print_("No ID3 header found in ", src, file=sys.stderr) return 1 except Exception as err: print_(str(err), file=sys.stderr) return 1 if verbose: - print_(u"File", src, u"contains:", file=sys.stderr) + print_("File", src, "contains:", file=sys.stderr) print_(id3.pprint(), file=sys.stderr) for tag in excluded_tags: @@ -75,7 +75,7 @@ def copy(src, dst, merge, write_v1=True, excluded_tags=None, verbose=False): print_(str(err), file=sys.stderr) return 1 else: - for frame in id3.values(): + for frame in list(id3.values()): target.add(frame) id3 = target @@ -91,12 +91,12 @@ def copy(src, dst, merge, write_v1=True, excluded_tags=None, verbose=False): try: id3.save(dst, v1=(2 if write_v1 else 0), v2_version=v2_version) except Exception as err: - print_(u"Error saving", dst, u":\n%s" % text_type(err), + print_("Error saving", dst, ":\n%s" % text_type(err), file=sys.stderr) return 1 else: if verbose: - print_(u"Successfully saved", dst, file=sys.stderr) + print_("Successfully saved", dst, file=sys.stderr) return 0 @@ -120,12 +120,12 @@ def main(argv): (src, dst) = args if not os.path.isfile(src): - print_(u"File not found:", src, file=sys.stderr) + print_("File not found:", src, file=sys.stderr) parser.print_help(file=sys.stderr) return 1 if not os.path.isfile(dst): - printerr(u"File not found:", dst, file=sys.stderr) + printerr("File not found:", dst, file=sys.stderr) parser.print_help(file=sys.stderr) return 1 diff --git a/lib/mutagen/_tools/mid3iconv.py b/lib/mutagen/_tools/mid3iconv.py index f2d725a2..5faa3ba0 100755 --- a/lib/mutagen/_tools/mid3iconv.py +++ b/lib/mutagen/_tools/mid3iconv.py @@ -75,7 +75,7 @@ def update(options, filenames): for filename in filenames: with _sig.block(): if verbose != "quiet": - print_(u"Updating", filename) + print_("Updating", filename) if has_id3v1(filename) and not noupdate and force_v1: mutagen.id3.delete(filename, False, True) @@ -84,13 +84,13 @@ def update(options, filenames): id3 = mutagen.id3.ID3(filename) except mutagen.id3.ID3NoHeaderError: if verbose != "quiet": - print_(u"No ID3 header found; skipping...") + print_("No ID3 header found; skipping...") continue except Exception as err: print_(text_type(err), file=sys.stderr) continue - for tag in filter(lambda t: t.startswith(("T", "COMM")), id3): + for tag in [t for t in id3 if t.startswith(("T", "COMM"))]: frame = id3[tag] if isinstance(frame, mutagen.id3.TimeStampTextFrame): # non-unicode fields @@ -105,7 +105,7 @@ def update(options, filenames): continue else: frame.text = text - if not text or min(map(isascii, text)): + if not text or min(list(map(isascii, text))): frame.encoding = 3 else: frame.encoding = 1 @@ -154,9 +154,9 @@ def main(argv): for i, arg in enumerate(argv): if arg == "-v1": - argv[i] = fsnative(u"--force-v1") + argv[i] = fsnative("--force-v1") elif arg == "-removev1": - argv[i] = fsnative(u"--remove-v1") + argv[i] = fsnative("--remove-v1") (options, args) = parser.parse_args(argv[1:]) diff --git a/lib/mutagen/_tools/mid3v2.py b/lib/mutagen/_tools/mid3v2.py index 8d21c13e..4d813536 100755 --- a/lib/mutagen/_tools/mid3v2.py +++ b/lib/mutagen/_tools/mid3v2.py @@ -55,23 +55,23 @@ Any editing operation will cause the ID3 tag to be upgraded to ID3v2.4. def list_frames(option, opt, value, parser): - items = mutagen.id3.Frames.items() + items = list(mutagen.id3.Frames.items()) for name, frame in sorted(items): - print_(u" --%s %s" % (name, frame.__doc__.split("\n")[0])) + print_(" --%s %s" % (name, frame.__doc__.split("\n")[0])) raise SystemExit def list_frames_2_2(option, opt, value, parser): - items = mutagen.id3.Frames_2_2.items() + items = list(mutagen.id3.Frames_2_2.items()) items.sort() for name, frame in items: - print_(u" --%s %s" % (name, frame.__doc__.split("\n")[0])) + print_(" --%s %s" % (name, frame.__doc__.split("\n")[0])) raise SystemExit def list_genres(option, opt, value, parser): for i, genre in enumerate(mutagen.id3.TCON.GENRES): - print_(u"%3d: %s" % (i, genre)) + print_("%3d: %s" % (i, genre)) raise SystemExit @@ -79,7 +79,7 @@ def delete_tags(filenames, v1, v2): for filename in filenames: with _sig.block(): if verbose: - print_(u"deleting ID3 tag info in", filename, file=sys.stderr) + print_("deleting ID3 tag info in", filename, file=sys.stderr) mutagen.id3.delete(filename, v1, v2) @@ -95,13 +95,13 @@ def delete_frames(deletes, filenames): for filename in filenames: with _sig.block(): if verbose: - print_(u"deleting %s from" % deletes, filename, + print_("deleting %s from" % deletes, filename, file=sys.stderr) try: id3 = mutagen.id3.ID3(filename) except mutagen.id3.ID3NoHeaderError: if verbose: - print_(u"No ID3 header found; skipping.", file=sys.stderr) + print_("No ID3 header found; skipping.", file=sys.stderr) except Exception as err: print_(text_type(err), file=sys.stderr) raise SystemExit(1) @@ -177,7 +177,7 @@ def write_files(edits, filenames, escape): try: value = value_from_fsnative(value, escape) except ValueError as err: - error(u"%s: %s" % (frame, text_type(err))) + error("%s: %s" % (frame, text_type(err))) assert isinstance(value, text_type) @@ -205,18 +205,18 @@ def write_files(edits, filenames, escape): for filename in filenames: with _sig.block(): if verbose: - print_(u"Writing", filename, file=sys.stderr) + print_("Writing", filename, file=sys.stderr) try: id3 = mutagen.id3.ID3(filename) except mutagen.id3.ID3NoHeaderError: if verbose: - print_(u"No ID3 header found; creating a new tag", + print_("No ID3 header found; creating a new tag", file=sys.stderr) id3 = mutagen.id3.ID3() except Exception as err: print_(str(err), file=sys.stderr) continue - for (frame, vlist) in edits.items(): + for (frame, vlist) in list(edits.items()): if frame == "POPM": for value in vlist: values = string_split(value, ":") @@ -240,13 +240,13 @@ def write_files(edits, filenames, escape): if len(values) >= 2: desc = values[1] else: - desc = u"cover" + desc = "cover" if len(values) >= 3: try: picture_type = int(values[2]) except ValueError: - error(u"Invalid picture type: %r" % values[1]) + error("Invalid picture type: %r" % values[1]) else: picture_type = PictureType.COVER_FRONT @@ -287,7 +287,7 @@ def write_files(edits, filenames, escape): for value in vlist: values = string_split(value, ":") if len(values) != 2: - error(u"Invalid value: %r" % values) + error("Invalid value: %r" % values) owner = values[0] data = values[1].encode("utf-8") frame = mutagen.id3.UFID(owner=owner, data=data) @@ -318,7 +318,7 @@ def list_tags(filenames): try: id3 = mutagen.id3.ID3(filename, translate=False) except mutagen.id3.ID3NoHeaderError: - print_(u"No ID3 header found; skipping.") + print_("No ID3 header found; skipping.") except Exception as err: print_(text_type(err), file=sys.stderr) raise SystemExit(1) @@ -332,12 +332,12 @@ def list_tags_raw(filenames): try: id3 = mutagen.id3.ID3(filename, translate=False) except mutagen.id3.ID3NoHeaderError: - print_(u"No ID3 header found; skipping.") + print_("No ID3 header found; skipping.") except Exception as err: print_(text_type(err), file=sys.stderr) raise SystemExit(1) else: - for frame in id3.values(): + for frame in list(id3.values()): print_(text_type(repr(frame))) @@ -387,46 +387,46 @@ def main(argv): parser.add_option( "-a", "--artist", metavar='"ARTIST"', action="callback", help="Set the artist information", type="string", - callback=lambda *args: args[3].edits.append((fsnative(u"--TPE1"), + callback=lambda *args: args[3].edits.append((fsnative("--TPE1"), args[2]))) parser.add_option( "-A", "--album", metavar='"ALBUM"', action="callback", help="Set the album title information", type="string", - callback=lambda *args: args[3].edits.append((fsnative(u"--TALB"), + callback=lambda *args: args[3].edits.append((fsnative("--TALB"), args[2]))) parser.add_option( "-t", "--song", metavar='"SONG"', action="callback", help="Set the song title information", type="string", - callback=lambda *args: args[3].edits.append((fsnative(u"--TIT2"), + callback=lambda *args: args[3].edits.append((fsnative("--TIT2"), args[2]))) parser.add_option( "-c", "--comment", metavar='"DESCRIPTION":"COMMENT":"LANGUAGE"', action="callback", help="Set the comment information", type="string", - callback=lambda *args: args[3].edits.append((fsnative(u"--COMM"), + callback=lambda *args: args[3].edits.append((fsnative("--COMM"), args[2]))) parser.add_option( "-p", "--picture", metavar='"FILENAME":"DESCRIPTION":"IMAGE-TYPE":"MIME-TYPE"', action="callback", help="Set the picture", type="string", - callback=lambda *args: args[3].edits.append((fsnative(u"--APIC"), + callback=lambda *args: args[3].edits.append((fsnative("--APIC"), args[2]))) parser.add_option( "-g", "--genre", metavar='"GENRE"', action="callback", help="Set the genre or genre number", type="string", - callback=lambda *args: args[3].edits.append((fsnative(u"--TCON"), + callback=lambda *args: args[3].edits.append((fsnative("--TCON"), args[2]))) parser.add_option( "-y", "--year", "--date", metavar='YYYY[-MM-DD]', action="callback", help="Set the year/date", type="string", - callback=lambda *args: args[3].edits.append((fsnative(u"--TDRC"), + callback=lambda *args: args[3].edits.append((fsnative("--TDRC"), args[2]))) parser.add_option( "-T", "--track", metavar='"num/num"', action="callback", help="Set the track number/(optional) total tracks", type="string", - callback=lambda *args: args[3].edits.append((fsnative(u"--TRCK"), + callback=lambda *args: args[3].edits.append((fsnative("--TRCK"), args[2]))) - for key, frame in mutagen.id3.Frames.items(): + for key, frame in list(mutagen.id3.Frames.items()): if (issubclass(frame, mutagen.id3.TextFrame) or issubclass(frame, mutagen.id3.UrlFrame) or issubclass(frame, mutagen.id3.POPM) diff --git a/lib/mutagen/_tools/moggsplit.py b/lib/mutagen/_tools/moggsplit.py index 12cd2522..8246e132 100755 --- a/lib/mutagen/_tools/moggsplit.py +++ b/lib/mutagen/_tools/moggsplit.py @@ -66,7 +66,7 @@ def main(argv): if m3u: m3u.write(new_filename + "\r\n") fileobjs[page.serial].write(page.write()) - for f in fileobjs.values(): + for f in list(fileobjs.values()): f.close() diff --git a/lib/mutagen/_tools/mutagen_inspect.py b/lib/mutagen/_tools/mutagen_inspect.py index 6bd6c614..c16d38cf 100755 --- a/lib/mutagen/_tools/mutagen_inspect.py +++ b/lib/mutagen/_tools/mutagen_inspect.py @@ -30,14 +30,14 @@ def main(argv): raise SystemExit(parser.print_help() or 1) for filename in args: - print_(u"--", filename) + print_("--", filename) try: - print_(u"-", File(filename).pprint()) + print_("-", File(filename).pprint()) except AttributeError: - print_(u"- Unknown file type") + print_("- Unknown file type") except Exception as err: print_(text_type(err)) - print_(u"") + print_("") def entry_point(): diff --git a/lib/mutagen/_tools/mutagen_pony.py b/lib/mutagen/_tools/mutagen_pony.py index e4a496c7..7db4b668 100755 --- a/lib/mutagen/_tools/mutagen_pony.py +++ b/lib/mutagen/_tools/mutagen_pony.py @@ -83,7 +83,7 @@ def check_dir(path): from mutagen.mp3 import MP3 rep = Report(path) - print_(u"Scanning", path) + print_("Scanning", path) for path, dirs, files in os.walk(path): files.sort() for fn in files: @@ -105,7 +105,7 @@ def check_dir(path): def main(argv): if len(argv) == 1: - print_(u"Usage:", argv[0], u"directory ...") + print_("Usage:", argv[0], "directory ...") else: for path in argv[1:]: check_dir(path) diff --git a/lib/mutagen/_util.py b/lib/mutagen/_util.py index a178eaa4..887d4fa2 100755 --- a/lib/mutagen/_util.py +++ b/lib/mutagen/_util.py @@ -93,7 +93,7 @@ def fileobj_name(fileobj): path type, but might be empty or non-existent. """ - value = getattr(fileobj, "name", u"") + value = getattr(fileobj, "name", "") if not isinstance(value, (text_type, bytes)): value = text_type(value) return value @@ -360,7 +360,7 @@ def flags(cls): def str_(self): value = int(self) matches = [] - for k, v in map_.items(): + for k, v in list(map_.items()): if value & k: matches.append("%s.%s" % (type(self).__name__, v)) value &= ~k @@ -395,7 +395,7 @@ class DictMixin(object): """ def __iter__(self): - return iter(self.keys()) + return iter(list(self.keys())) def __has_key(self, key): try: @@ -411,19 +411,19 @@ class DictMixin(object): __contains__ = __has_key if PY2: - iterkeys = lambda self: iter(self.keys()) + iterkeys = lambda self: iter(list(self.keys())) def values(self): - return [self[k] for k in self.keys()] + return [self[k] for k in list(self.keys())] if PY2: - itervalues = lambda self: iter(self.values()) + itervalues = lambda self: iter(list(self.values())) def items(self): - return list(izip(self.keys(), self.values())) + return list(zip(list(self.keys()), list(self.values()))) if PY2: - iteritems = lambda s: iter(s.items()) + iteritems = lambda s: iter(list(s.items())) def clear(self): for key in list(self.keys()): @@ -443,7 +443,7 @@ class DictMixin(object): return value def popitem(self): - for key in self.keys(): + for key in list(self.keys()): break else: raise KeyError("dictionary is empty") @@ -455,7 +455,7 @@ class DictMixin(object): other = {} try: - for key, value in other.items(): + for key, value in list(other.items()): self.__setitem__(key, value) except AttributeError: for key, value in other: @@ -475,18 +475,18 @@ class DictMixin(object): return default def __repr__(self): - return repr(dict(self.items())) + return repr(dict(list(self.items()))) def __eq__(self, other): - return dict(self.items()) == other + return dict(list(self.items())) == other def __lt__(self, other): - return dict(self.items()) < other + return dict(list(self.items())) < other __hash__ = object.__hash__ def __len__(self): - return len(self.keys()) + return len(list(self.keys())) class DictProxy(DictMixin): @@ -504,7 +504,7 @@ class DictProxy(DictMixin): del(self.__dict[key]) def keys(self): - return self.__dict.keys() + return list(self.__dict.keys()) def _fill_cdata(cls): @@ -568,8 +568,8 @@ class cdata(object): error = error bitswap = b''.join( - chr_(sum(((val >> i) & 1) << (7 - i) for i in xrange(8))) - for val in xrange(256)) + chr_(sum(((val >> i) & 1) << (7 - i) for i in range(8))) + for val in range(256)) test_bit = staticmethod(lambda value, n: bool((value >> n) & 1)) @@ -976,15 +976,15 @@ def decode_terminated(data, encoding, strict=True): r = [] for i, b in enumerate(iterbytes(data)): c = decoder.decode(b) - if c == u"\x00": - return u"".join(r), data[i + 1:] + if c == "\x00": + return "".join(r), data[i + 1:] r.append(c) else: # make sure the decoder is finished r.append(decoder.decode(b"", True)) if strict: raise ValueError("not null terminated") - return u"".join(r), b"" + return "".join(r), b"" class BitReaderError(Exception): @@ -1037,7 +1037,7 @@ class BitReader(object): raise BitReaderError("not enough data") return data - return bytes(bytearray(self.bits(8) for _ in xrange(count))) + return bytes(bytearray(self.bits(8) for _ in range(count))) def skip(self, count): """Skip `count` bits. diff --git a/lib/mutagen/_vorbis.py b/lib/mutagen/_vorbis.py index 5831453c..ec2fc8b7 100755 --- a/lib/mutagen/_vorbis.py +++ b/lib/mutagen/_vorbis.py @@ -71,7 +71,7 @@ class VComment(mutagen.Tags, list): vendor (text): the stream 'vendor' (i.e. writer); default 'Mutagen' """ - vendor = u"Mutagen " + mutagen.version_string + vendor = "Mutagen " + mutagen.version_string def __init__(self, data=None, *args, **kwargs): self._size = 0 @@ -104,7 +104,7 @@ class VComment(mutagen.Tags, list): vendor_length = cdata.uint_le(fileobj.read(4)) self.vendor = fileobj.read(vendor_length).decode('utf-8', errors) count = cdata.uint_le(fileobj.read(4)) - for i in xrange(count): + for i in range(count): length = cdata.uint_le(fileobj.read(4)) try: string = fileobj.read(length).decode('utf-8', errors) @@ -116,7 +116,7 @@ class VComment(mutagen.Tags, list): if errors == "ignore": continue elif errors == "replace": - tag, value = u"unknown%d" % i, string + tag, value = "unknown%d" % i, string else: reraise(VorbisEncodingError, err, sys.exc_info()[2]) try: @@ -217,8 +217,8 @@ class VComment(mutagen.Tags, list): return value.decode('utf-8', 'replace') return value - tags = [u"%s=%s" % (_decode(k), _decode(v)) for k, v in self] - return u"\n".join(tags) + tags = ["%s=%s" % (_decode(k), _decode(v)) for k, v in self] + return "\n".join(tags) class VCommentDict(VComment, DictMixin): @@ -324,4 +324,4 @@ class VCommentDict(VComment, DictMixin): def as_dict(self): """Return a copy of the comment data in a real dict.""" - return dict([(key, self[key]) for key in self.keys()]) + return dict([(key, self[key]) for key in list(self.keys())]) diff --git a/lib/mutagen/aac.py b/lib/mutagen/aac.py index fa6f7064..ef220180 100755 --- a/lib/mutagen/aac.py +++ b/lib/mutagen/aac.py @@ -243,7 +243,7 @@ class ProgramConfigElement(object): elms = num_front_channel_elements + num_side_channel_elements + \ num_back_channel_elements channels = 0 - for i in xrange(elms): + for i in range(elms): channels += 1 element_is_cpe = r.bits(1) if element_is_cpe: @@ -323,7 +323,7 @@ class AACInfo(StreamInfo): self.channels = pce.channels # other pces.. - for i in xrange(npce): + for i in range(npce): ProgramConfigElement(r) r.align() except BitReaderError as e: @@ -347,7 +347,7 @@ class AACInfo(StreamInfo): # Try up to X times to find a sync word and read up to Y frames. # If more than Z frames are valid we assume a valid stream offset = start_offset - for i in xrange(max_sync_tries): + for i in range(max_sync_tries): fileobj.seek(offset) s = _ADTSStream.find_stream(fileobj, max_initial_read) if s is None: @@ -355,7 +355,7 @@ class AACInfo(StreamInfo): # start right after the last found offset offset += s.offset + 1 - for i in xrange(frames_max): + for i in range(frames_max): if not s.parse_frame(): break if not s.sync(max_resync_read): @@ -378,7 +378,7 @@ class AACInfo(StreamInfo): self.length = float(s.samples * stream_size) / (s.size * s.frequency) def pprint(self): - return u"AAC (%s), %d Hz, %.2f seconds, %d channel(s), %d bps" % ( + return "AAC (%s), %d Hz, %.2f seconds, %d channel(s), %d bps" % ( self._type, self.sample_rate, self.length, self.channels, self.bitrate) diff --git a/lib/mutagen/aiff.py b/lib/mutagen/aiff.py index 74cf5f88..814c042e 100755 --- a/lib/mutagen/aiff.py +++ b/lib/mutagen/aiff.py @@ -39,8 +39,8 @@ _HUGE_VAL = 1.79769313486231e+308 def is_valid_chunk_id(id): assert isinstance(id, text_type) - return ((len(id) <= 4) and (min(id) >= u' ') and - (max(id) <= u'~')) + return ((len(id) <= 4) and (min(id) >= ' ') and + (max(id) <= '~')) def read_float(data): # 10 bytes @@ -140,7 +140,7 @@ class IFFFile(object): # AIFF Files always start with the FORM chunk which contains a 4 byte # ID before the start of other chunks fileobj.seek(0) - self.__chunks[u'FORM'] = IFFChunk(fileobj) + self.__chunks['FORM'] = IFFChunk(fileobj) # Skip past the 4 byte FORM id fileobj.seek(IFFChunk.HEADER_SIZE + 4) @@ -153,7 +153,7 @@ class IFFFile(object): # Load all of the chunks while True: try: - chunk = IFFChunk(fileobj, self[u'FORM']) + chunk = IFFChunk(fileobj, self['FORM']) except InvalidChunk: break self.__chunks[chunk.id.strip()] = chunk @@ -209,8 +209,8 @@ class IFFFile(object): self.__fileobj.seek(self.__next_offset) self.__fileobj.write(pack('>4si', id_.ljust(4).encode('ascii'), 0)) self.__fileobj.seek(self.__next_offset) - chunk = IFFChunk(self.__fileobj, self[u'FORM']) - self[u'FORM']._update_size(self[u'FORM'].data_size + chunk.size) + chunk = IFFChunk(self.__fileobj, self['FORM']) + self['FORM']._update_size(self['FORM'].data_size + chunk.size) self.__chunks[id_] = chunk self.__next_offset = chunk.offset + chunk.size @@ -242,7 +242,7 @@ class AIFFInfo(StreamInfo): iff = IFFFile(fileobj) try: - common_chunk = iff[u'COMM'] + common_chunk = iff['COMM'] except KeyError as e: raise error(str(e)) @@ -260,7 +260,7 @@ class AIFFInfo(StreamInfo): self.length = frame_count / float(self.sample_rate) def pprint(self): - return u"%d channel AIFF @ %d bps, %s Hz, %.2f seconds" % ( + return "%d channel AIFF @ %d bps, %s Hz, %.2f seconds" % ( self.channels, self.bitrate, self.sample_rate, self.length) @@ -269,7 +269,7 @@ class _IFFID3(ID3): def _pre_load_header(self, fileobj): try: - fileobj.seek(IFFFile(fileobj)[u'ID3'].data_offset) + fileobj.seek(IFFFile(fileobj)['ID3'].data_offset) except (InvalidChunk, KeyError): raise ID3NoHeaderError("No ID3 chunk") @@ -282,10 +282,10 @@ class _IFFID3(ID3): iff_file = IFFFile(fileobj) - if u'ID3' not in iff_file: - iff_file.insert_chunk(u'ID3') + if 'ID3' not in iff_file: + iff_file.insert_chunk('ID3') - chunk = iff_file[u'ID3'] + chunk = iff_file['ID3'] try: data = self._prepare_data( @@ -316,7 +316,7 @@ def delete(filething): """Completely removes the ID3 chunk from the AIFF file""" try: - del IFFFile(filething.fileobj)[u'ID3'] + del IFFFile(filething.fileobj)['ID3'] except KeyError: pass diff --git a/lib/mutagen/apev2.py b/lib/mutagen/apev2.py index f4d5c5da..7b485466 100755 --- a/lib/mutagen/apev2.py +++ b/lib/mutagen/apev2.py @@ -52,16 +52,16 @@ def is_valid_apev2_key(key): return False # PY26 - Change to set literal syntax (since set is faster than list here) - return ((2 <= len(key) <= 255) and (min(key) >= u' ') and - (max(key) <= u'~') and - (key not in [u"OggS", u"TAG", u"ID3", u"MP+"])) + return ((2 <= len(key) <= 255) and (min(key) >= ' ') and + (max(key) <= '~') and + (key not in ["OggS", "TAG", "ID3", "MP+"])) # There are three different kinds of APE tag values. # "0: Item contains text information coded in UTF-8 # 1: Item contains binary information # 2: Item is a locator of external stored information [e.g. URL] # 3: reserved" -TEXT, BINARY, EXTERNAL = xrange(3) +TEXT, BINARY, EXTERNAL = range(3) HAS_HEADER = 1 << 31 HAS_NO_FOOTER = 1 << 30 @@ -263,7 +263,7 @@ class _CIDictProxy(DictMixin): del(self.__dict[lower]) def keys(self): - return [self.__casemap.get(key, key) for key in self.__dict.keys()] + return [self.__casemap.get(key, key) for key in list(self.__dict.keys())] class APEv2(_CIDictProxy, Metadata): @@ -280,7 +280,7 @@ class APEv2(_CIDictProxy, Metadata): """Return tag key=value pairs in a human-readable format.""" items = sorted(self.items()) - return u"\n".join(u"%s=%s" % (k, v.pprint()) for k, v in items) + return "\n".join("%s=%s" % (k, v.pprint()) for k, v in items) @convert_error(IOError, error) @loadfile() @@ -303,7 +303,7 @@ class APEv2(_CIDictProxy, Metadata): fileobj = cBytesIO(tag) - for i in xrange(count): + for i in range(count): tag_data = fileobj.read(8) # someone writes wrong item counts if not tag_data: @@ -401,7 +401,7 @@ class APEv2(_CIDictProxy, Metadata): items.append(v) # list? text. - value = APEValue(u"\0".join(items), TEXT) + value = APEValue("\0".join(items), TEXT) else: if PY3: value = APEValue(value, BINARY) @@ -441,7 +441,7 @@ class APEv2(_CIDictProxy, Metadata): fileobj.seek(0, 2) tags = [] - for key, value in self.items(): + for key, value in list(self.items()): # Packed format for an item: # 4B: Value length # 4B: Value type @@ -627,13 +627,13 @@ class APETextValue(_APEUtf8Value, MutableSequence): def __iter__(self): """Iterate over the strings of the value (not the characters)""" - return iter(self.value.split(u"\0")) + return iter(self.value.split("\0")) def __getitem__(self, index): - return self.value.split(u"\0")[index] + return self.value.split("\0")[index] def __len__(self): - return self.value.count(u"\0") + 1 + return self.value.count("\0") + 1 def __setitem__(self, index, value): if not isinstance(value, text_type): @@ -644,7 +644,7 @@ class APETextValue(_APEUtf8Value, MutableSequence): values = list(self) values[index] = value - self.value = u"\0".join(values) + self.value = "\0".join(values) def insert(self, index, value): if not isinstance(value, text_type): @@ -655,15 +655,15 @@ class APETextValue(_APEUtf8Value, MutableSequence): values = list(self) values.insert(index, value) - self.value = u"\0".join(values) + self.value = "\0".join(values) def __delitem__(self, index): values = list(self) del values[index] - self.value = u"\0".join(values) + self.value = "\0".join(values) def pprint(self): - return u" / ".join(self) + return " / ".join(self) @swap_to_string @@ -697,7 +697,7 @@ class APEBinaryValue(_APEValue): return self.value < other def pprint(self): - return u"[%d bytes]" % len(self) + return "[%d bytes]" % len(self) class APEExtValue(_APEUtf8Value): @@ -709,7 +709,7 @@ class APEExtValue(_APEUtf8Value): kind = EXTERNAL def pprint(self): - return u"[External] %s" % self.value + return "[External] %s" % self.value class APEv2File(FileType): @@ -731,7 +731,7 @@ class APEv2File(FileType): @staticmethod def pprint(): - return u"Unknown format with APEv2 tag." + return "Unknown format with APEv2 tag." @loadfile() def load(self, filething): diff --git a/lib/mutagen/asf/__init__.py b/lib/mutagen/asf/__init__.py index c5d81596..bcee1be4 100755 --- a/lib/mutagen/asf/__init__.py +++ b/lib/mutagen/asf/__init__.py @@ -51,26 +51,26 @@ class ASFInfo(StreamInfo): sample_rate = 0 bitrate = 0 channels = 0 - codec_type = u"" - codec_name = u"" - codec_description = u"" + codec_type = "" + codec_name = "" + codec_description = "" def __init__(self): self.length = 0.0 self.sample_rate = 0 self.bitrate = 0 self.channels = 0 - self.codec_type = u"" - self.codec_name = u"" - self.codec_description = u"" + self.codec_type = "" + self.codec_name = "" + self.codec_description = "" def pprint(self): """Returns: text: a stream information text summary """ - s = u"ASF (%s) %d bps, %s Hz, %d channels, %.2f seconds" % ( - self.codec_type or self.codec_name or u"???", self.bitrate, + s = "ASF (%s) %d bps, %s Hz, %d channels, %.2f seconds" % ( + self.codec_type or self.codec_name or "???", self.bitrate, self.sample_rate, self.channels, self.length) return s @@ -163,7 +163,7 @@ class ASFTags(list, DictMixin, Tags): def keys(self): """Return a sequence of all keys in the comment.""" - return self and set(next(izip(*self))) + return self and set(next(zip(*self))) def as_dict(self): """Return a copy of the comment data in a real dict.""" diff --git a/lib/mutagen/asf/_objects.py b/lib/mutagen/asf/_objects.py index 1c15d613..ccb1b69b 100755 --- a/lib/mutagen/asf/_objects.py +++ b/lib/mutagen/asf/_objects.py @@ -89,7 +89,7 @@ class HeaderObject(BaseObject): remaining_header, num_objects = cls.parse_size(fileobj) remaining_header -= 30 - for i in xrange(num_objects): + for i in range(num_objects): obj_header_size = 24 if remaining_header < obj_header_size: raise ASFHeaderError("invalid header size") @@ -180,11 +180,11 @@ class ContentDescriptionObject(BaseObject): GUID = guid2bytes("75B22633-668E-11CF-A6D9-00AA0062CE6C") NAMES = [ - u"Title", - u"Author", - u"Copyright", - u"Description", - u"Rating", + "Title", + "Author", + "Copyright", + "Description", + "Rating", ] def parse(self, asf, data): @@ -195,12 +195,12 @@ class ContentDescriptionObject(BaseObject): for length in lengths: end = pos + length if length > 0: - texts.append(data[pos:end].decode("utf-16-le").strip(u"\x00")) + texts.append(data[pos:end].decode("utf-16-le").strip("\x00")) else: texts.append(None) pos = end - for key, value in izip(self.NAMES, texts): + for key, value in zip(self.NAMES, texts): if value is not None: value = ASFUnicodeAttribute(value=value) asf._tags.setdefault(self.GUID, []).append((key, value)) @@ -214,7 +214,7 @@ class ContentDescriptionObject(BaseObject): return b"" texts = [render_text(x) for x in self.NAMES] - data = struct.pack(" u'\x7f': + if v and max(v) > '\x7f': enc = 3 break @@ -215,7 +215,7 @@ class EasyID3(DictMixin, Metadata): def __setitem__(self, key, value): if PY2: - if isinstance(value, basestring): + if isinstance(value, str): value = [value] else: if isinstance(value, text_type): @@ -235,7 +235,7 @@ class EasyID3(DictMixin, Metadata): def keys(self): keys = [] - for key in self.Get.keys(): + for key in list(self.Get.keys()): if key in self.List: keys.extend(self.List[key](self.__id3, key)) elif key in self: @@ -398,7 +398,7 @@ def gain_get(id3, key): except KeyError: raise EasyID3KeyError(key) else: - return [u"%+f dB" % frame.gain] + return ["%+f dB" % frame.gain] def gain_set(id3, key, value): @@ -432,7 +432,7 @@ def peak_get(id3, key): except KeyError: raise EasyID3KeyError(key) else: - return [u"%f" % frame.peak] + return ["%f" % frame.peak] def peak_set(id3, key, value): @@ -520,26 +520,26 @@ EasyID3.RegisterKey("replaygain_*_peak", peak_get, peak_set, peak_delete) # http://bugs.musicbrainz.org/ticket/1383 # http://musicbrainz.org/doc/MusicBrainzTag for desc, key in iteritems({ - u"MusicBrainz Artist Id": "musicbrainz_artistid", - u"MusicBrainz Album Id": "musicbrainz_albumid", - u"MusicBrainz Album Artist Id": "musicbrainz_albumartistid", - u"MusicBrainz TRM Id": "musicbrainz_trmid", - u"MusicIP PUID": "musicip_puid", - u"MusicMagic Fingerprint": "musicip_fingerprint", - u"MusicBrainz Album Status": "musicbrainz_albumstatus", - u"MusicBrainz Album Type": "musicbrainz_albumtype", - u"MusicBrainz Album Release Country": "releasecountry", - u"MusicBrainz Disc Id": "musicbrainz_discid", - u"ASIN": "asin", - u"ALBUMARTISTSORT": "albumartistsort", - u"PERFORMER": "performer", - u"BARCODE": "barcode", - u"CATALOGNUMBER": "catalognumber", - u"MusicBrainz Release Track Id": "musicbrainz_releasetrackid", - u"MusicBrainz Release Group Id": "musicbrainz_releasegroupid", - u"MusicBrainz Work Id": "musicbrainz_workid", - u"Acoustid Fingerprint": "acoustid_fingerprint", - u"Acoustid Id": "acoustid_id", + "MusicBrainz Artist Id": "musicbrainz_artistid", + "MusicBrainz Album Id": "musicbrainz_albumid", + "MusicBrainz Album Artist Id": "musicbrainz_albumartistid", + "MusicBrainz TRM Id": "musicbrainz_trmid", + "MusicIP PUID": "musicip_puid", + "MusicMagic Fingerprint": "musicip_fingerprint", + "MusicBrainz Album Status": "musicbrainz_albumstatus", + "MusicBrainz Album Type": "musicbrainz_albumtype", + "MusicBrainz Album Release Country": "releasecountry", + "MusicBrainz Disc Id": "musicbrainz_discid", + "ASIN": "asin", + "ALBUMARTISTSORT": "albumartistsort", + "PERFORMER": "performer", + "BARCODE": "barcode", + "CATALOGNUMBER": "catalognumber", + "MusicBrainz Release Track Id": "musicbrainz_releasetrackid", + "MusicBrainz Release Group Id": "musicbrainz_releasegroupid", + "MusicBrainz Work Id": "musicbrainz_workid", + "Acoustid Fingerprint": "acoustid_fingerprint", + "Acoustid Id": "acoustid_id", }): EasyID3.RegisterTXXXKey(key, desc) diff --git a/lib/mutagen/easymp4.py b/lib/mutagen/easymp4.py index 98530c62..a026e754 100755 --- a/lib/mutagen/easymp4.py +++ b/lib/mutagen/easymp4.py @@ -121,7 +121,7 @@ class EasyMP4Tags(DictMixin, Tags): ret = [] for (track, total) in tags[atomid]: if total: - ret.append(u"%d/%d" % (track, total)) + ret.append("%d/%d" % (track, total)) else: ret.append(text_type(track)) return ret @@ -188,7 +188,7 @@ class EasyMP4Tags(DictMixin, Tags): key = key.lower() if PY2: - if isinstance(value, basestring): + if isinstance(value, str): value = [value] else: if isinstance(value, text_type): @@ -210,7 +210,7 @@ class EasyMP4Tags(DictMixin, Tags): def keys(self): keys = [] - for key in self.Get.keys(): + for key in list(self.Get.keys()): if key in self.List: keys.extend(self.List[key](self.__mp4, key)) elif key in self: @@ -226,7 +226,7 @@ class EasyMP4Tags(DictMixin, Tags): strings.append("%s=%s" % (key, value)) return "\n".join(strings) -for atomid, key in { +for atomid, key in list({ '\xa9nam': 'title', '\xa9alb': 'album', '\xa9ART': 'artist', @@ -242,10 +242,10 @@ for atomid, key in { 'soar': 'artistsort', 'sonm': 'titlesort', 'soco': 'composersort', -}.items(): +}.items()): EasyMP4Tags.RegisterTextKey(key, atomid) -for name, key in { +for name, key in list({ 'MusicBrainz Artist Id': 'musicbrainz_artistid', 'MusicBrainz Track Id': 'musicbrainz_trackid', 'MusicBrainz Album Id': 'musicbrainz_albumid', @@ -254,18 +254,18 @@ for name, key in { 'MusicBrainz Album Status': 'musicbrainz_albumstatus', 'MusicBrainz Album Type': 'musicbrainz_albumtype', 'MusicBrainz Release Country': 'releasecountry', -}.items(): +}.items()): EasyMP4Tags.RegisterFreeformKey(key, name) -for name, key in { +for name, key in list({ "tmpo": "bpm", -}.items(): +}.items()): EasyMP4Tags.RegisterIntKey(key, name) -for name, key in { +for name, key in list({ "trkn": "tracknumber", "disk": "discnumber", -}.items(): +}.items()): EasyMP4Tags.RegisterIntPairKey(key, name) diff --git a/lib/mutagen/flac.py b/lib/mutagen/flac.py index 3ce27dbb..825e947a 100755 --- a/lib/mutagen/flac.py +++ b/lib/mutagen/flac.py @@ -259,7 +259,7 @@ class StreamInfo(MetadataBlock, mutagen.StreamInfo): return f.getvalue() def pprint(self): - return u"FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate) + return "FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate) class SeekPoint(tuple): @@ -484,7 +484,7 @@ class CueSheet(MetadataBlock): self.lead_in_samples = lead_in_samples self.compact_disc = bool(flags & 0x80) self.tracks = [] - for i in xrange(num_tracks): + for i in range(num_tracks): track = data.read(self.__CUESHEET_TRACK_SIZE) start_offset, track_number, isrc_padded, flags, num_indexes = \ struct.unpack(self.__CUESHEET_TRACK_FORMAT, track) @@ -493,7 +493,7 @@ class CueSheet(MetadataBlock): pre_emphasis = bool(flags & 0x40) val = CueSheetTrack( track_number, start_offset, isrc, type_, pre_emphasis) - for j in xrange(num_indexes): + for j in range(num_indexes): index = data.read(self.__CUESHEET_TRACKINDEX_SIZE) index_offset, index_number = struct.unpack( self.__CUESHEET_TRACKINDEX_FORMAT, index) @@ -574,8 +574,8 @@ class Picture(MetadataBlock): def __init__(self, data=None): self.type = 0 - self.mime = u'' - self.desc = u'' + self.mime = '' + self.desc = '' self.width = 0 self.height = 0 self.depth = 0 diff --git a/lib/mutagen/id3/_file.py b/lib/mutagen/id3/_file.py index 69c3a3f0..3d19389a 100755 --- a/lib/mutagen/id3/_file.py +++ b/lib/mutagen/id3/_file.py @@ -154,7 +154,7 @@ class ID3(ID3Tags, mutagen.Metadata): raise self.version = ID3Header._V11 - for v in frames.values(): + for v in list(frames.values()): self.add(v) else: # XXX: attach to the header object so we have it in spec parsing.. @@ -352,7 +352,7 @@ class ID3FileType(mutagen.FileType): @staticmethod def pprint(): - return u"Unknown format with ID3 tag" + return "Unknown format with ID3 tag" @staticmethod def score(filename, fileobj, header_data): diff --git a/lib/mutagen/id3/_frames.py b/lib/mutagen/id3/_frames.py index 5e3de5a6..593276f6 100755 --- a/lib/mutagen/id3/_frames.py +++ b/lib/mutagen/id3/_frames.py @@ -61,7 +61,7 @@ class Frame(object): # ask the sub class to fill in our data other._to_other(self) else: - for checker, val in izip(self._framespec, args): + for checker, val in zip(self._framespec, args): setattr(self, checker.name, val) for checker in self._framespec[len(args):]: setattr(self, checker.name, @@ -329,11 +329,11 @@ class CHAP(Frame): __hash__ = Frame.__hash__ def _pprint(self): - frame_pprint = u"" + frame_pprint = "" for frame in itervalues(self.sub_frames): for line in frame.pprint().splitlines(): frame_pprint += "\n" + " " * 4 + line - return u"%s time=%d..%d offset=%d..%d%s" % ( + return "%s time=%d..%d offset=%d..%d%s" % ( self.element_id, self.start_time, self.end_time, self.start_offset, self.end_offset, frame_pprint) @@ -368,13 +368,13 @@ class CTOC(Frame): self.child_element_ids == other.child_element_ids def _pprint(self): - frame_pprint = u"" + frame_pprint = "" if getattr(self, "sub_frames", None): frame_pprint += "\n" + "\n".join( - [" " * 4 + f.pprint() for f in self.sub_frames.values()]) - return u"%s flags=%d child_element_ids=%s%s" % ( + [" " * 4 + f.pprint() for f in list(self.sub_frames.values())]) + return "%s flags=%d child_element_ids=%s%s" % ( self.element_id, int(self.flags), - u",".join(self.child_element_ids), frame_pprint) + ",".join(self.child_element_ids), frame_pprint) @swap_to_string @@ -395,14 +395,14 @@ class TextFrame(Frame): _framespec = [ EncodingSpec('encoding', default=Encoding.UTF16), - MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), + MultiSpec('text', EncodedTextSpec('text'), sep='\u0000', default=[]), ] def __bytes__(self): return text_type(self).encode('utf-8') def __str__(self): - return u'\u0000'.join(self.text) + return '\u0000'.join(self.text) def __eq__(self, other): if isinstance(other, bytes): @@ -451,7 +451,7 @@ class NumericTextFrame(TextFrame): _framespec = [ EncodingSpec('encoding', default=Encoding.UTF16), - MultiSpec('text', EncodedNumericTextSpec('text'), sep=u'\u0000', + MultiSpec('text', EncodedNumericTextSpec('text'), sep='\u0000', default=[]), ] @@ -472,7 +472,7 @@ class NumericPartTextFrame(TextFrame): _framespec = [ EncodingSpec('encoding', default=Encoding.UTF16), - MultiSpec('text', EncodedNumericPartTextSpec('text'), sep=u'\u0000', + MultiSpec('text', EncodedNumericPartTextSpec('text'), sep='\u0000', default=[]), ] @@ -490,17 +490,17 @@ class TimeStampTextFrame(TextFrame): _framespec = [ EncodingSpec('encoding', default=Encoding.UTF16), - MultiSpec('text', TimeStampSpec('stamp'), sep=u',', default=[]), + MultiSpec('text', TimeStampSpec('stamp'), sep=',', default=[]), ] def __bytes__(self): return text_type(self).encode('utf-8') def __str__(self): - return u','.join([stamp.text for stamp in self.text]) + return ','.join([stamp.text for stamp in self.text]) def _pprint(self): - return u" / ".join([stamp.text for stamp in self.text]) + return " / ".join([stamp.text for stamp in self.text]) @swap_to_string @@ -574,11 +574,11 @@ class TCON(TextFrame): try: genres.append(self.GENRES[int(value)]) except IndexError: - genres.append(u"Unknown") + genres.append("Unknown") elif value == "CR": - genres.append(u"Cover") + genres.append("Cover") elif value == "RX": - genres.append(u"Remix") + genres.append("Remix") elif value: newgenres = [] genreid, dummy, genrename = genre_re.match(value).groups() @@ -589,11 +589,11 @@ class TCON(TextFrame): gid = text_type(self.GENRES[int(gid)]) newgenres.append(gid) elif gid == "CR": - newgenres.append(u"Cover") + newgenres.append("Cover") elif gid == "RX": - newgenres.append(u"Remix") + newgenres.append("Remix") else: - newgenres.append(u"Unknown") + newgenres.append("Unknown") if genrename: # "Unescaping" the first parenthesis @@ -860,7 +860,7 @@ class TXXX(TextFrame): _framespec = [ EncodingSpec('encoding'), EncodedTextSpec('desc'), - MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), + MultiSpec('text', EncodedTextSpec('text'), sep='\u0000', default=[]), ] @property @@ -1045,7 +1045,7 @@ class USLT(Frame): _framespec = [ EncodingSpec('encoding', default=Encoding.UTF16), - StringSpec('lang', length=3, default=u"XXX"), + StringSpec('lang', length=3, default="XXX"), EncodedTextSpec('desc'), EncodedTextSpec('text'), ] @@ -1072,7 +1072,7 @@ class SYLT(Frame): _framespec = [ EncodingSpec('encoding'), - StringSpec('lang', length=3, default=u"XXX"), + StringSpec('lang', length=3, default="XXX"), ByteSpec('format', default=1), ByteSpec('type', default=0), EncodedTextSpec('desc'), @@ -1089,7 +1089,7 @@ class SYLT(Frame): __hash__ = Frame.__hash__ def __str__(self): - return u"".join(text for (text, time) in self.text) + return "".join(text for (text, time) in self.text) def __bytes__(self): return text_type(self).encode("utf-8") @@ -1106,7 +1106,7 @@ class COMM(TextFrame): EncodingSpec('encoding'), StringSpec('lang', length=3, default="XXX"), EncodedTextSpec('desc'), - MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000', default=[]), + MultiSpec('text', EncodedTextSpec('text'), sep='\u0000', default=[]), ] @property @@ -1263,7 +1263,7 @@ class APIC(Frame): return '%s:%s' % (self.FrameID, self.desc) def _merge_frame(self, other): - other.desc += u" " + other.desc += " " return other def _pprint(self): @@ -1550,7 +1550,7 @@ class USER(Frame): _framespec = [ EncodingSpec('encoding'), - StringSpec('lang', length=3, default=u"XXX"), + StringSpec('lang', length=3, default="XXX"), EncodedTextSpec('text'), ] @@ -1580,7 +1580,7 @@ class OWNE(Frame): _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'), - StringSpec('date', length=8, default=u"19700101"), + StringSpec('date', length=8, default="19700101"), EncodedTextSpec('seller'), ] @@ -1602,7 +1602,7 @@ class COMR(Frame): _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'), - StringSpec('valid_until', length=8, default=u"19700101"), + StringSpec('valid_until', length=8, default="19700101"), Latin1TextSpec('contact'), ByteSpec('format', default=0), EncodedTextSpec('seller'), diff --git a/lib/mutagen/id3/_id3v1.py b/lib/mutagen/id3/_id3v1.py index d41d00d0..df9303be 100755 --- a/lib/mutagen/id3/_id3v1.py +++ b/lib/mutagen/id3/_id3v1.py @@ -94,8 +94,8 @@ def ParseID3v1(data): def fix(data): return data.split(b"\x00")[0].strip().decode('latin1') - title, artist, album, year, comment = map( - fix, [title, artist, album, year, comment]) + title, artist, album, year, comment = list(map( + fix, [title, artist, album, year, comment])) frames = {} if title: @@ -123,8 +123,8 @@ def MakeID3v1(id3): v1 = {} - for v2id, name in {"TIT2": "title", "TPE1": "artist", - "TALB": "album"}.items(): + for v2id, name in list({"TIT2": "title", "TPE1": "artist", + "TALB": "album"}.items()): if v2id in id3: text = id3[v2id].text[0].encode('latin1', 'replace')[:30] else: diff --git a/lib/mutagen/id3/_specs.py b/lib/mutagen/id3/_specs.py index bb27f72c..48ed6369 100755 --- a/lib/mutagen/id3/_specs.py +++ b/lib/mutagen/id3/_specs.py @@ -278,7 +278,7 @@ class StringSpec(Spec): def __init__(self, name, length, default=None): if default is None: - default = u" " * length + default = " " * length super(StringSpec, self).__init__(name, default) self.len = length @@ -402,7 +402,7 @@ class RVASpec(Spec): class FrameIDSpec(StringSpec): def __init__(self, name, length): - super(FrameIDSpec, self).__init__(name, length, u"X" * length) + super(FrameIDSpec, self).__init__(name, length, "X" * length) def validate(self, frame, value): value = super(FrameIDSpec, self).validate(frame, value) @@ -464,7 +464,7 @@ class EncodedTextSpec(Spec): Encoding.UTF8: ('utf8', b'\x00'), } - def __init__(self, name, default=u""): + def __init__(self, name, default=""): super(EncodedTextSpec, self).__init__(name, default) def read(self, header, frame, data): @@ -522,7 +522,7 @@ class MultiSpec(Spec): data.append(self.specs[0].write(config, frame, v)) else: for record in value: - for v, s in izip(record, self.specs): + for v, s in zip(record, self.specs): data.append(s.write(config, frame, v)) return b''.join(data) @@ -534,14 +534,14 @@ class MultiSpec(Spec): return [self.specs[0].validate(frame, v) for v in value] else: return [ - [s.validate(frame, v) for (v, s) in izip(val, self.specs)] + [s.validate(frame, v) for (v, s) in zip(val, self.specs)] for val in value] raise ValueError('Invalid MultiSpec data: %r' % value) def _validate23(self, frame, value, **kwargs): if len(self.specs) != 1: return [[s._validate23(frame, v, **kwargs) - for (v, s) in izip(val, self.specs)] + for (v, s) in zip(val, self.specs)] for val in value] spec = self.specs[0] @@ -568,7 +568,7 @@ class EncodedNumericPartTextSpec(EncodedTextSpec): class Latin1TextSpec(Spec): - def __init__(self, name, default=u""): + def __init__(self, name, default=""): super(Latin1TextSpec, self).__init__(name, default) def read(self, header, frame, data): @@ -602,7 +602,7 @@ class ID3FramesSpec(Spec): from ._tags import ID3Tags v = ID3Tags() - for frame in value.values(): + for frame in list(value.values()): v.add(frame._get_v23_frame(**kwargs)) return v @@ -632,7 +632,7 @@ class Latin1TextListSpec(Spec): def read(self, header, frame, data): count, data = self._bspec.read(header, frame, data) entries = [] - for i in xrange(count): + for i in range(count): entry, data = self._lspec.read(header, frame, data) entries.append(entry) return entries, data @@ -683,7 +683,7 @@ class ID3TimeStamp(object): if part is None: break pieces.append(self.__formats[i] % part + self.__seps[i]) - return u''.join(pieces)[:-1] + return ''.join(pieces)[:-1] def set_text(self, text, splitre=re.compile('[-T:/.]|\s+')): year, month, day, hour, minute, second = \ @@ -736,7 +736,7 @@ class TimeStampSpec(EncodedTextSpec): class ChannelSpec(ByteSpec): (OTHER, MASTER, FRONTRIGHT, FRONTLEFT, BACKRIGHT, BACKLEFT, FRONTCENTRE, - BACKCENTRE, SUBWOOFER) = xrange(9) + BACKCENTRE, SUBWOOFER) = range(9) class VolumeAdjustmentSpec(Spec): @@ -771,7 +771,7 @@ class VolumePeakSpec(Spec): if vol_bytes + 1 > len(data): raise SpecError("not enough frame data") shift = ((8 - (bits & 7)) & 7) + (4 - vol_bytes) * 8 - for i in xrange(1, vol_bytes + 1): + for i in range(1, vol_bytes + 1): peak *= 256 peak += data_array[i] peak *= 2 ** shift diff --git a/lib/mutagen/id3/_tags.py b/lib/mutagen/id3/_tags.py index 99845faa..5ffca7bc 100755 --- a/lib/mutagen/id3/_tags.py +++ b/lib/mutagen/id3/_tags.py @@ -236,7 +236,7 @@ class ID3Tags(DictProxy, Tags): return [self[key]] else: key = key + ":" - return [v for s, v in self.items() if s.startswith(key)] + return [v for s, v in list(self.items()) if s.startswith(key)] def setall(self, key, values): """Delete frames of the given type and add frames in 'values'. @@ -280,7 +280,7 @@ class ID3Tags(DictProxy, Tags): ``POPM=user@example.org=3 128/255`` """ - frames = sorted(Frame.pprint(s) for s in self.values()) + frames = sorted(Frame.pprint(s) for s in list(self.values())) return "\n".join(frames) def _add(self, frame, strict): @@ -371,7 +371,7 @@ class ID3Tags(DictProxy, Tags): # TDAT, TYER, and TIME have been turned into TDRC. try: date = text_type(self.get("TYER", "")) - if date.strip(u"\x00"): + if date.strip("\x00"): self.pop("TYER") dat = text_type(self.get("TDAT", "")) if dat.strip("\x00"): @@ -482,7 +482,7 @@ class ID3Tags(DictProxy, Tags): def _copy(self): """Creates a shallow copy of all tags""" - items = self.items() + items = list(self.items()) subs = {} for f in (self.getall("CHAP") + self.getall("CTOC")): subs[f.HashKey] = f.sub_frames._copy() diff --git a/lib/mutagen/m4a.py b/lib/mutagen/m4a.py index c7583f8e..5830e8b6 100755 --- a/lib/mutagen/m4a.py +++ b/lib/mutagen/m4a.py @@ -66,7 +66,7 @@ class M4ATags(DictProxy, Tags): raise error("deprecated") def pprint(self): - return u"" + return "" class M4AInfo(StreamInfo): @@ -77,7 +77,7 @@ class M4AInfo(StreamInfo): raise error("deprecated") def pprint(self): - return u"" + return "" class M4A(FileType): diff --git a/lib/mutagen/monkeysaudio.py b/lib/mutagen/monkeysaudio.py index ae7df766..5c6bdb50 100755 --- a/lib/mutagen/monkeysaudio.py +++ b/lib/mutagen/monkeysaudio.py @@ -74,7 +74,7 @@ class MonkeysAudioInfo(StreamInfo): self.length = float(total_blocks) / self.sample_rate def pprint(self): - return u"Monkey's Audio %.2f, %.2f seconds, %d Hz" % ( + return "Monkey's Audio %.2f, %.2f seconds, %d Hz" % ( self.version, self.length, self.sample_rate) diff --git a/lib/mutagen/mp3/__init__.py b/lib/mutagen/mp3/__init__.py index 9e26c6dc..ac4e2e2b 100755 --- a/lib/mutagen/mp3/__init__.py +++ b/lib/mutagen/mp3/__init__.py @@ -75,7 +75,7 @@ def _guess_xing_bitrate_mode(xing): # Mode values. -STEREO, JOINTSTEREO, DUALCHANNEL, MONO = xrange(4) +STEREO, JOINTSTEREO, DUALCHANNEL, MONO = range(4) class MPEGFrame(object): @@ -95,7 +95,7 @@ class MPEGFrame(object): } __BITRATE[(2, 3)] = __BITRATE[(2, 2)] - for i in xrange(1, 4): + for i in range(1, 4): __BITRATE[(2.5, i)] = __BITRATE[(2, i)] # Map version to sample rates. @@ -197,7 +197,7 @@ class MPEGFrame(object): if xing.bytes != -1 and self.length: self.bitrate = int((xing.bytes * 8) / self.length) if xing.lame_version_desc: - self.encoder_info = u"LAME %s" % xing.lame_version_desc + self.encoder_info = "LAME %s" % xing.lame_version_desc if lame is not None: self.track_gain = lame.track_gain_adjustment self.track_peak = lame.track_peak @@ -213,7 +213,7 @@ class MPEGFrame(object): pass else: self.bitrate_mode = BitrateMode.VBR - self.encoder_info = u"FhG" + self.encoder_info = "FhG" self.sketchy = False self.length = float(frame_size * vbri.frames) / self.sample_rate if self.length: @@ -323,8 +323,8 @@ class MPEGInfo(StreamInfo): """ sketchy = False - encoder_info = u"" - encoder_settings = u"" + encoder_info = "" + encoder_settings = "" bitrate_mode = BitrateMode.UNKNOWN track_gain = track_peak = album_gain = album_peak = None @@ -363,7 +363,7 @@ class MPEGInfo(StreamInfo): if max_syncs <= 0: break - for _ in xrange(enough_frames): + for _ in range(enough_frames): try: frame = MPEGFrame(fileobj) except HeaderNotFoundError: @@ -411,16 +411,16 @@ class MPEGInfo(StreamInfo): def pprint(self): info = str(self.bitrate_mode).split(".", 1)[-1] if self.bitrate_mode == BitrateMode.UNKNOWN: - info = u"CBR?" + info = "CBR?" if self.encoder_info: info += ", %s" % self.encoder_info if self.encoder_settings: info += ", %s" % self.encoder_settings - s = u"MPEG %s layer %d, %d bps (%s), %s Hz, %d chn, %.2f seconds" % ( + s = "MPEG %s layer %d, %d bps (%s), %s Hz, %d chn, %.2f seconds" % ( self.version, self.layer, self.bitrate, info, self.sample_rate, self.channels, self.length) if self.sketchy: - s += u" (sketchy)" + s += " (sketchy)" return s diff --git a/lib/mutagen/mp3/_util.py b/lib/mutagen/mp3/_util.py index 40b28775..973b2101 100755 --- a/lib/mutagen/mp3/_util.py +++ b/lib/mutagen/mp3/_util.py @@ -194,64 +194,64 @@ class LAMEHeader(object): if self.vbr_method == 2: if version in ((3, 90), (3, 91), (3, 92)) and self.encoding_flags: if self.bitrate < 255: - return u"--alt-preset %d" % self.bitrate + return "--alt-preset %d" % self.bitrate else: - return u"--alt-preset %d+" % self.bitrate + return "--alt-preset %d+" % self.bitrate if self.preset_used != 0: - return u"--preset %d" % self.preset_used + return "--preset %d" % self.preset_used elif self.bitrate < 255: - return u"--abr %d" % self.bitrate + return "--abr %d" % self.bitrate else: - return u"--abr %d+" % self.bitrate + return "--abr %d+" % self.bitrate elif self.vbr_method == 1: if self.preset_used == 0: if self.bitrate < 255: - return u"-b %d" % self.bitrate + return "-b %d" % self.bitrate else: - return u"-b 255+" + return "-b 255+" elif self.preset_used == 1003: - return u"--preset insane" - return u"-b %d" % self.preset_used + return "--preset insane" + return "-b %d" % self.preset_used elif version in ((3, 90), (3, 91), (3, 92)): preset_key = (self.vbr_quality, self.quality, self.vbr_method, self.lowpass_filter, self.ath_type) if preset_key == (1, 2, 4, 19500, 3): - return u"--preset r3mix" + return "--preset r3mix" if preset_key == (2, 2, 3, 19000, 4): - return u"--alt-preset standard" + return "--alt-preset standard" if preset_key == (2, 2, 3, 19500, 2): - return u"--alt-preset extreme" + return "--alt-preset extreme" if self.vbr_method == 3: - return u"-V %s" % self.vbr_quality + return "-V %s" % self.vbr_quality elif self.vbr_method in (4, 5): - return u"-V %s --vbr-new" % self.vbr_quality + return "-V %s --vbr-new" % self.vbr_quality elif version in ((3, 93), (3, 94), (3, 95), (3, 96), (3, 97)): if self.preset_used == 1001: - return u"--preset standard" + return "--preset standard" elif self.preset_used == 1002: - return u"--preset extreme" + return "--preset extreme" elif self.preset_used == 1004: - return u"--preset fast standard" + return "--preset fast standard" elif self.preset_used == 1005: - return u"--preset fast extreme" + return "--preset fast extreme" elif self.preset_used == 1006: - return u"--preset medium" + return "--preset medium" elif self.preset_used == 1007: - return u"--preset fast medium" + return "--preset fast medium" if self.vbr_method == 3: - return u"-V %s" % self.vbr_quality + return "-V %s" % self.vbr_quality elif self.vbr_method in (4, 5): - return u"-V %s --vbr-new" % self.vbr_quality + return "-V %s --vbr-new" % self.vbr_quality elif version == (3, 98): if self.vbr_method == 3: - return u"-V %s --vbr-old" % self.vbr_quality + return "-V %s --vbr-old" % self.vbr_quality elif self.vbr_method in (4, 5): - return u"-V %s" % self.vbr_quality + return "-V %s" % self.vbr_quality elif version >= (3, 99): if self.vbr_method == 3: - return u"-V %s --vbr-old" % self.vbr_quality + return "-V %s --vbr-old" % self.vbr_quality elif self.vbr_method in (4, 5): p = self.vbr_quality adjust_key = (p, self.bitrate, self.lowpass_filter) @@ -261,9 +261,9 @@ class LAMEHeader(object): (5, 8, 0): 8, (6, 8, 0): 9, }.get(adjust_key, p) - return u"-V %s" % p + return "-V %s" % p - return u"" + return "" @classmethod def parse_version(cls, fileobj): @@ -303,36 +303,36 @@ class LAMEHeader(object): if (major, minor) < (3, 90) or ( (major, minor) == (3, 90) and data[-11:-10] == b"("): flag = data.strip(b"\x00").rstrip().decode("ascii") - return (major, minor), u"%d.%d%s" % (major, minor, flag), False + return (major, minor), "%d.%d%s" % (major, minor, flag), False if len(data) < 11: raise LAMEError("Invalid version: too long") flag = data[:-11].rstrip(b"\x00") - flag_string = u"" - patch = u"" + flag_string = "" + patch = "" if flag == b"a": - flag_string = u" (alpha)" + flag_string = " (alpha)" elif flag == b"b": - flag_string = u" (beta)" + flag_string = " (beta)" elif flag == b"r": - patch = u".1+" + patch = ".1+" elif flag == b" ": if (major, minor) > (3, 96): - patch = u".0" + patch = ".0" else: - patch = u".0+" + patch = ".0+" elif flag == b"" or flag == b".": - patch = u".0+" + patch = ".0+" else: - flag_string = u" (?)" + flag_string = " (?)" # extended header, seek back to 9 bytes for the caller fileobj.seek(-11, 1) return (major, minor), \ - u"%d.%d%s%s" % (major, minor, patch, flag_string), True + "%d.%d%s%s" % (major, minor, patch, flag_string), True class XingHeaderError(Exception): @@ -369,7 +369,7 @@ class XingHeader(object): lame_version = (0, 0) """The LAME version as two element tuple (major, minor)""" - lame_version_desc = u"" + lame_version_desc = "" """The version of the LAME encoder e.g. '3.99.0'. Empty if unknown""" is_info = False @@ -425,7 +425,7 @@ class XingHeader(object): """Returns the guessed encoder settings""" if self.lame_header is None: - return u"" + return "" return self.lame_header.guess_settings(*self.lame_version) @classmethod @@ -513,7 +513,7 @@ class VBRIHeader(object): else: raise VBRIHeaderError("Invalid TOC entry size") - self.toc = [unpack(i)[0] for i in xrange(0, toc_size, toc_entry_size)] + self.toc = [unpack(i)[0] for i in range(0, toc_size, toc_entry_size)] @classmethod def get_offset(cls, info): diff --git a/lib/mutagen/mp4/__init__.py b/lib/mutagen/mp4/__init__.py index d37474af..a74a303a 100755 --- a/lib/mutagen/mp4/__init__.py +++ b/lib/mutagen/mp4/__init__.py @@ -246,7 +246,7 @@ def _item_sort_key(key, value): "\xa9gen", "gnre", "trkn", "disk", "\xa9day", "cpil", "pgap", "pcst", "tmpo", "\xa9too", "----", "covr", "\xa9lyr"] - order = dict(izip(order, xrange(len(order)))) + order = dict(zip(order, range(len(order)))) last = len(order) # If there's no key-based way to distinguish, order by length. # If there's still no way, go by string comparison on the @@ -393,7 +393,7 @@ class MP4Tags(DictProxy, Tags): def save(self, filething, padding=None): values = [] - items = sorted(self.items(), key=lambda kv: _item_sort_key(*kv)) + items = sorted(list(self.items()), key=lambda kv: _item_sort_key(*kv)) for key, value in items: try: values.append(self._render(key, value)) @@ -868,22 +868,22 @@ class MP4Tags(DictProxy, Tags): def to_line(key, value): assert isinstance(key, text_type) if isinstance(value, text_type): - return u"%s=%s" % (key, value) - return u"%s=%r" % (key, value) + return "%s=%s" % (key, value) + return "%s=%r" % (key, value) values = [] for key, value in sorted(iteritems(self)): if not isinstance(key, text_type): key = key.decode("latin-1") if key == "covr": - values.append(u"%s=%s" % (key, u", ".join( - [u"[%d bytes of data]" % len(data) for data in value]))) + values.append("%s=%s" % (key, ", ".join( + ["[%d bytes of data]" % len(data) for data in value]))) elif isinstance(value, list): for v in value: values.append(to_line(key, v)) else: values.append(to_line(key, value)) - return u"\n".join(values) + return "\n".join(values) class MP4Info(StreamInfo): @@ -915,8 +915,8 @@ class MP4Info(StreamInfo): channels = 0 sample_rate = 0 bits_per_sample = 0 - codec = u"" - codec_description = u"" + codec = "" + codec_description = "" def __init__(self, *args, **kwargs): if args or kwargs: diff --git a/lib/mutagen/mp4/_as_entry.py b/lib/mutagen/mp4/_as_entry.py index 15b7e6bc..e2408351 100755 --- a/lib/mutagen/mp4/_as_entry.py +++ b/lib/mutagen/mp4/_as_entry.py @@ -211,7 +211,7 @@ class BaseDescriptor(object): """May raise ValueError""" value = 0 - for i in xrange(4): + for i in range(4): try: b = cdata.uint8(fileobj.read(1)) except cdata.error as e: @@ -318,10 +318,10 @@ class DecoderConfigDescriptor(BaseDescriptor): def codec_param(self): """string""" - param = u".%X" % self.objectTypeIndication + param = ".%X" % self.objectTypeIndication info = self.decSpecificInfo if info is not None: - param += u".%d" % info.audioObjectType + param += ".%d" % info.audioObjectType return param @property diff --git a/lib/mutagen/mp4/_atom.py b/lib/mutagen/mp4/_atom.py index cd43a1fe..4ff4f7b5 100755 --- a/lib/mutagen/mp4/_atom.py +++ b/lib/mutagen/mp4/_atom.py @@ -181,7 +181,7 @@ class Atoms(object): """ if PY2: - if isinstance(names, basestring): + if isinstance(names, str): names = names.split(b".") else: if isinstance(names, bytes): diff --git a/lib/mutagen/musepack.py b/lib/mutagen/musepack.py index f4d210d1..2c088b61 100755 --- a/lib/mutagen/musepack.py +++ b/lib/mutagen/musepack.py @@ -44,7 +44,7 @@ def _parse_sv8_int(fileobj, limit=9): """ num = 0 - for i in xrange(limit): + for i in range(limit): c = fileobj.read(1) if len(c) != 1: raise EOFError @@ -253,12 +253,12 @@ class MusepackInfo(StreamInfo): def pprint(self): rg_data = [] if hasattr(self, "title_gain"): - rg_data.append(u"%+0.2f (title)" % self.title_gain) + rg_data.append("%+0.2f (title)" % self.title_gain) if hasattr(self, "album_gain"): - rg_data.append(u"%+0.2f (album)" % self.album_gain) + rg_data.append("%+0.2f (album)" % self.album_gain) rg_data = (rg_data and ", Gain: " + ", ".join(rg_data)) or "" - return u"Musepack SV%d, %.2f seconds, %d Hz, %d bps%s" % ( + return "Musepack SV%d, %.2f seconds, %d Hz, %d bps%s" % ( self.version, self.length, self.sample_rate, self.bitrate, rg_data) diff --git a/lib/mutagen/ogg.py b/lib/mutagen/ogg.py index 8376e3ac..dd976bb4 100755 --- a/lib/mutagen/ogg.py +++ b/lib/mutagen/ogg.py @@ -387,8 +387,8 @@ class OggPage(object): # Number the new pages starting from the first old page. first = old_pages[0].sequence - for page, seq in izip(new_pages, - xrange(first, first + len(new_pages))): + for page, seq in zip(new_pages, + range(first, first + len(new_pages))): page.sequence = seq page.serial = old_pages[0].serial @@ -416,7 +416,7 @@ class OggPage(object): offset_adjust = 0 new_data_end = None assert len(old_pages) == len(new_data) - for old_page, data in izip(old_pages, new_data): + for old_page, data in zip(old_pages, new_data): offset = old_page.offset + offset_adjust data_size = len(data) resize_bytes(fileobj, old_page.size, data_size, offset) diff --git a/lib/mutagen/oggflac.py b/lib/mutagen/oggflac.py index 3dc3fe2e..029c4638 100755 --- a/lib/mutagen/oggflac.py +++ b/lib/mutagen/oggflac.py @@ -83,7 +83,7 @@ class OggFLACStreamInfo(StreamInfo): self.length = page.position / float(self.sample_rate) def pprint(self): - return u"Ogg FLAC, %.2f seconds, %d Hz" % ( + return "Ogg FLAC, %.2f seconds, %d Hz" % ( self.length, self.sample_rate) diff --git a/lib/mutagen/oggopus.py b/lib/mutagen/oggopus.py index 758fcf9a..baca7849 100755 --- a/lib/mutagen/oggopus.py +++ b/lib/mutagen/oggopus.py @@ -75,7 +75,7 @@ class OggOpusInfo(StreamInfo): self.length = (page.position - self.__pre_skip) / float(48000) def pprint(self): - return u"Ogg Opus, %.2f seconds" % (self.length) + return "Ogg Opus, %.2f seconds" % (self.length) class OggOpusVComment(VCommentDict): diff --git a/lib/mutagen/oggspeex.py b/lib/mutagen/oggspeex.py index b3c1a17b..0645ba0c 100755 --- a/lib/mutagen/oggspeex.py +++ b/lib/mutagen/oggspeex.py @@ -70,7 +70,7 @@ class OggSpeexInfo(StreamInfo): self.length = page.position / float(self.sample_rate) def pprint(self): - return u"Ogg Speex, %.2f seconds" % self.length + return "Ogg Speex, %.2f seconds" % self.length class OggSpeexVComment(VCommentDict): diff --git a/lib/mutagen/oggtheora.py b/lib/mutagen/oggtheora.py index 85512d08..cd1fa72f 100755 --- a/lib/mutagen/oggtheora.py +++ b/lib/mutagen/oggtheora.py @@ -76,7 +76,7 @@ class OggTheoraInfo(StreamInfo): self.length = frames / float(self.fps) def pprint(self): - return u"Ogg Theora, %.2f seconds, %d bps" % (self.length, + return "Ogg Theora, %.2f seconds, %d bps" % (self.length, self.bitrate) diff --git a/lib/mutagen/oggvorbis.py b/lib/mutagen/oggvorbis.py index 9c1d8fb0..4bc75265 100755 --- a/lib/mutagen/oggvorbis.py +++ b/lib/mutagen/oggvorbis.py @@ -89,7 +89,7 @@ class OggVorbisInfo(StreamInfo): self.length = page.position / float(self.sample_rate) def pprint(self): - return u"Ogg Vorbis, %.2f seconds, %d bps" % ( + return "Ogg Vorbis, %.2f seconds, %d bps" % ( self.length, self.bitrate) diff --git a/lib/mutagen/optimfrog.py b/lib/mutagen/optimfrog.py index 830224d6..5c929d01 100755 --- a/lib/mutagen/optimfrog.py +++ b/lib/mutagen/optimfrog.py @@ -62,7 +62,7 @@ class OptimFROGInfo(StreamInfo): self.length = 0.0 def pprint(self): - return u"OptimFROG, %.2f seconds, %d Hz" % (self.length, + return "OptimFROG, %.2f seconds, %d Hz" % (self.length, self.sample_rate) diff --git a/lib/mutagen/smf.py b/lib/mutagen/smf.py index f41d8142..abaa4ae8 100755 --- a/lib/mutagen/smf.py +++ b/lib/mutagen/smf.py @@ -36,7 +36,7 @@ def _var_int(data, offset=0): def _read_track(chunk): """Retuns a list of midi events and tempo change events""" - TEMPO, MIDI = range(2) + TEMPO, MIDI = list(range(2)) # Deviations: The running status should be reset on non midi events, but # some files contain meta events inbetween. @@ -91,7 +91,7 @@ def _read_track(chunk): def _read_midi_length(fileobj): """Returns the duration in seconds. Can raise all kind of errors...""" - TEMPO, MIDI = range(2) + TEMPO, MIDI = list(range(2)) def read_chunk(fileobj): info = fileobj.read(8) @@ -123,7 +123,7 @@ def _read_midi_length(fileobj): # get a list of events and tempo changes for each track tracks = [] first_tempos = None - for tracknum in xrange(ntracks): + for tracknum in range(ntracks): identifier, chunk = read_chunk(fileobj) if identifier != b"MTrk": continue @@ -178,7 +178,7 @@ class SMFInfo(StreamInfo): self.length = _read_midi_length(fileobj) def pprint(self): - return u"SMF, %.2f seconds" % self.length + return "SMF, %.2f seconds" % self.length class SMF(FileType): diff --git a/lib/mutagen/trueaudio.py b/lib/mutagen/trueaudio.py index 882e3a62..fd5ca050 100755 --- a/lib/mutagen/trueaudio.py +++ b/lib/mutagen/trueaudio.py @@ -54,7 +54,7 @@ class TrueAudioInfo(StreamInfo): self.length = float(samples) / self.sample_rate def pprint(self): - return u"True Audio, %.2f seconds, %d Hz." % ( + return "True Audio, %.2f seconds, %d Hz." % ( self.length, self.sample_rate) diff --git a/lib/mutagen/wavpack.py b/lib/mutagen/wavpack.py index 290b90c3..ca471b1e 100755 --- a/lib/mutagen/wavpack.py +++ b/lib/mutagen/wavpack.py @@ -109,7 +109,7 @@ class WavPackInfo(StreamInfo): self.length = float(samples) / self.sample_rate def pprint(self): - return u"WavPack, %.2f seconds, %d Hz" % (self.length, + return "WavPack, %.2f seconds, %d Hz" % (self.length, self.sample_rate) diff --git a/lib/oauth2/__init__.py b/lib/oauth2/__init__.py deleted file mode 100644 index 0ea3803e..00000000 --- a/lib/oauth2/__init__.py +++ /dev/null @@ -1,704 +0,0 @@ -""" -The MIT License - -Copyright (c) 2007 Leah Culver, Joe Stump, Mark Paschal, Vic Fryzel - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" - -import urllib -import time -import random -import urlparse -import hmac -import binascii -import httplib2 as httplib2 - -try: - from urlparse import parse_qs, parse_qsl -except ImportError: - from cgi import parse_qs, parse_qsl - - -VERSION = '1.0' # Hi Blaine! -HTTP_METHOD = 'GET' -SIGNATURE_METHOD = 'PLAINTEXT' - - -class Error(RuntimeError): - """Generic exception class.""" - - def __init__(self, message='OAuth error occured.'): - self._message = message - - @property - def message(self): - """A hack to get around the deprecation errors in 2.6.""" - return self._message - - def __str__(self): - return self._message - -class MissingSignature(Error): - pass - -def build_authenticate_header(realm=''): - """Optional WWW-Authenticate header (401 error)""" - return {'WWW-Authenticate': 'OAuth realm="%s"' % realm} - - -def escape(s): - """Escape a URL including any /.""" - return urllib.quote(s, safe='~') - - -def generate_timestamp(): - """Get seconds since epoch (UTC).""" - return int(time.time()) - - -def generate_nonce(length=8): - """Generate pseudorandom number.""" - return ''.join([str(random.randint(0, 9)) for i in range(length)]) - - -def generate_verifier(length=8): - """Generate pseudorandom number.""" - return ''.join([str(random.randint(0, 9)) for i in range(length)]) - - -class Consumer(object): - """A consumer of OAuth-protected services. - - The OAuth consumer is a "third-party" service that wants to access - protected resources from an OAuth service provider on behalf of an end - user. It's kind of the OAuth client. - - Usually a consumer must be registered with the service provider by the - developer of the consumer software. As part of that process, the service - provider gives the consumer a *key* and a *secret* with which the consumer - software can identify itself to the service. The consumer will include its - key in each request to identify itself, but will use its secret only when - signing requests, to prove that the request is from that particular - registered consumer. - - Once registered, the consumer can then use its consumer credentials to ask - the service provider for a request token, kicking off the OAuth - authorization process. - """ - - key = None - secret = None - - def __init__(self, key, secret): - self.key = key - self.secret = secret - - if self.key is None or self.secret is None: - raise ValueError("Key and secret must be set.") - - def __str__(self): - data = { - 'oauth_consumer_key': self.key, - 'oauth_consumer_secret': self.secret - } - - return urllib.urlencode(data) - - -class Token(object): - """An OAuth credential used to request authorization or a protected - resource. - - Tokens in OAuth comprise a *key* and a *secret*. The key is included in - requests to identify the token being used, but the secret is used only in - the signature, to prove that the requester is who the server gave the - token to. - - When first negotiating the authorization, the consumer asks for a *request - token* that the live user authorizes with the service provider. The - consumer then exchanges the request token for an *access token* that can - be used to access protected resources. - """ - - key = None - secret = None - callback = None - callback_confirmed = None - verifier = None - - def __init__(self, key, secret): - self.key = key - self.secret = secret - - if self.key is None or self.secret is None: - raise ValueError("Key and secret must be set.") - - def set_callback(self, callback): - self.callback = callback - self.callback_confirmed = 'true' - - def set_verifier(self, verifier=None): - if verifier is not None: - self.verifier = verifier - else: - self.verifier = generate_verifier() - - def get_callback_url(self): - if self.callback and self.verifier: - # Append the oauth_verifier. - parts = urlparse.urlparse(self.callback) - scheme, netloc, path, params, query, fragment = parts[:6] - if query: - query = '%s&oauth_verifier=%s' % (query, self.verifier) - else: - query = 'oauth_verifier=%s' % self.verifier - return urlparse.urlunparse((scheme, netloc, path, params, - query, fragment)) - return self.callback - - def to_string(self): - """Returns this token as a plain string, suitable for storage. - - The resulting string includes the token's secret, so you should never - send or store this string where a third party can read it. - """ - - data = { - 'oauth_token': self.key, - 'oauth_token_secret': self.secret, - } - - if self.callback_confirmed is not None: - data['oauth_callback_confirmed'] = self.callback_confirmed - return urllib.urlencode(data) - - @staticmethod - def from_string(s): - """Deserializes a token from a string like one returned by - `to_string()`.""" - - if not len(s): - raise ValueError("Invalid parameter string.") - - params = parse_qs(s, keep_blank_values=False) - if not len(params): - raise ValueError("Invalid parameter string.") - - try: - key = params['oauth_token'][0] - except Exception: - raise ValueError("'oauth_token' not found in OAuth request.") - - try: - secret = params['oauth_token_secret'][0] - except Exception: - raise ValueError("'oauth_token_secret' not found in " - "OAuth request.") - - token = Token(key, secret) - try: - token.callback_confirmed = params['oauth_callback_confirmed'][0] - except KeyError: - pass # 1.0, no callback confirmed. - return token - - def __str__(self): - return self.to_string() - - -def setter(attr): - name = attr.__name__ - - def getter(self): - try: - return self.__dict__[name] - except KeyError: - raise AttributeError(name) - - def deleter(self): - del self.__dict__[name] - - return property(getter, attr, deleter) - - -class Request(dict): - - """The parameters and information for an HTTP request, suitable for - authorizing with OAuth credentials. - - When a consumer wants to access a service's protected resources, it does - so using a signed HTTP request identifying itself (the consumer) with its - key, and providing an access token authorized by the end user to access - those resources. - - """ - - http_method = HTTP_METHOD - http_url = None - version = VERSION - - def __init__(self, method=HTTP_METHOD, url=None, parameters=None): - if method is not None: - self.method = method - - if url is not None: - self.url = url - - if parameters is not None: - self.update(parameters) - - @setter - def url(self, value): - parts = urlparse.urlparse(value) - scheme, netloc, path = parts[:3] - - # Exclude default port numbers. - if scheme == 'http' and netloc[-3:] == ':80': - netloc = netloc[:-3] - elif scheme == 'https' and netloc[-4:] == ':443': - netloc = netloc[:-4] - - if scheme != 'http' and scheme != 'https': - raise ValueError("Unsupported URL %s (%s)." % (value, scheme)) - - value = '%s://%s%s' % (scheme, netloc, path) - self.__dict__['url'] = value - - @setter - def method(self, value): - self.__dict__['method'] = value.upper() - - def _get_timestamp_nonce(self): - return self['oauth_timestamp'], self['oauth_nonce'] - - def get_nonoauth_parameters(self): - """Get any non-OAuth parameters.""" - return dict([(k, v) for k, v in self.iteritems() - if not k.startswith('oauth_')]) - - def to_header(self, realm=''): - """Serialize as a header for an HTTPAuth request.""" - oauth_params = ((k, v) for k, v in self.items() - if k.startswith('oauth_')) - stringy_params = ((k, escape(str(v))) for k, v in oauth_params) - header_params = ('%s="%s"' % (k, v) for k, v in stringy_params) - params_header = ', '.join(header_params) - - auth_header = 'OAuth realm="%s"' % realm - if params_header: - auth_header = "%s, %s" % (auth_header, params_header) - - return {'Authorization': auth_header} - - def to_postdata(self): - """Serialize as post data for a POST request.""" - return self.encode_postdata(self) - - def encode_postdata(self, data): - # tell urlencode to deal with sequence values and map them correctly - # to resulting querystring. for example self["k"] = ["v1", "v2"] will - # result in 'k=v1&k=v2' and not k=%5B%27v1%27%2C+%27v2%27%5D - return urllib.urlencode(data, True) - - def to_url(self): - """Serialize as a URL for a GET request.""" - return '%s?%s' % (self.url, self.to_postdata()) - - def get_parameter(self, parameter): - ret = self.get(parameter) - if ret is None: - raise Error('Parameter not found: %s' % parameter) - - return ret - - def get_normalized_parameters(self): - """Return a string that contains the parameters that must be signed.""" - items = [(k, v) for k, v in self.items() if k != 'oauth_signature'] - encoded_str = urllib.urlencode(sorted(items), True) - # Encode signature parameters per Oauth Core 1.0 protocol - # spec draft 7, section 3.6 - # (http://tools.ietf.org/html/draft-hammer-oauth-07#section-3.6) - # Spaces must be encoded with "%20" instead of "+" - return encoded_str.replace('+', '%20') - - def sign_request(self, signature_method, consumer, token): - """Set the signature parameter to the result of sign.""" - - if 'oauth_consumer_key' not in self: - self['oauth_consumer_key'] = consumer.key - - if token and 'oauth_token' not in self: - self['oauth_token'] = token.key - - self['oauth_signature_method'] = signature_method.name - self['oauth_signature'] = signature_method.sign(self, consumer, token) - - @classmethod - def make_timestamp(cls): - """Get seconds since epoch (UTC).""" - return str(int(time.time())) - - @classmethod - def make_nonce(cls): - """Generate pseudorandom number.""" - return str(random.randint(0, 100000000)) - - @classmethod - def from_request(cls, http_method, http_url, headers=None, parameters=None, - query_string=None): - """Combines multiple parameter sources.""" - if parameters is None: - parameters = {} - - # Headers - if headers and 'Authorization' in headers: - auth_header = headers['Authorization'] - # Check that the authorization header is OAuth. - if auth_header[:6] == 'OAuth ': - auth_header = auth_header[6:] - try: - # Get the parameters from the header. - header_params = cls._split_header(auth_header) - parameters.update(header_params) - except: - raise Error('Unable to parse OAuth parameters from ' - 'Authorization header.') - - # GET or POST query string. - if query_string: - query_params = cls._split_url_string(query_string) - parameters.update(query_params) - - # URL parameters. - param_str = urlparse.urlparse(http_url)[4] # query - url_params = cls._split_url_string(param_str) - parameters.update(url_params) - - if parameters: - return cls(http_method, http_url, parameters) - - return None - - @classmethod - def from_consumer_and_token(cls, consumer, token=None, - http_method=HTTP_METHOD, http_url=None, parameters=None): - if not parameters: - parameters = {} - - defaults = { - 'oauth_consumer_key': consumer.key, - 'oauth_timestamp': cls.make_timestamp(), - 'oauth_nonce': cls.make_nonce(), - 'oauth_version': cls.version, - } - - defaults.update(parameters) - parameters = defaults - - if token: - parameters['oauth_token'] = token.key - - return Request(http_method, http_url, parameters) - - @classmethod - def from_token_and_callback(cls, token, callback=None, - http_method=HTTP_METHOD, http_url=None, parameters=None): - - if not parameters: - parameters = {} - - parameters['oauth_token'] = token.key - - if callback: - parameters['oauth_callback'] = callback - - return cls(http_method, http_url, parameters) - - @staticmethod - def _split_header(header): - """Turn Authorization: header into parameters.""" - params = {} - parts = header.split(',') - for param in parts: - # Ignore realm parameter. - if param.find('realm') > -1: - continue - # Remove whitespace. - param = param.strip() - # Split key-value. - param_parts = param.split('=', 1) - # Remove quotes and unescape the value. - params[param_parts[0]] = urllib.unquote(param_parts[1].strip('\"')) - return params - - @staticmethod - def _split_url_string(param_str): - """Turn URL string into parameters.""" - parameters = parse_qs(param_str, keep_blank_values=False) - for k, v in parameters.iteritems(): - parameters[k] = urllib.unquote(v[0]) - return parameters - - -class Server(object): - """A skeletal implementation of a service provider, providing protected - resources to requests from authorized consumers. - - This class implements the logic to check requests for authorization. You - can use it with your web server or web framework to protect certain - resources with OAuth. - """ - - timestamp_threshold = 300 # In seconds, five minutes. - version = VERSION - signature_methods = None - - def __init__(self, signature_methods=None): - self.signature_methods = signature_methods or {} - - def add_signature_method(self, signature_method): - self.signature_methods[signature_method.name] = signature_method - return self.signature_methods - - def verify_request(self, request, consumer, token): - """Verifies an api call and checks all the parameters.""" - - version = self._get_version(request) - self._check_signature(request, consumer, token) - parameters = request.get_nonoauth_parameters() - return parameters - - def build_authenticate_header(self, realm=''): - """Optional support for the authenticate header.""" - return {'WWW-Authenticate': 'OAuth realm="%s"' % realm} - - def _get_version(self, request): - """Verify the correct version request for this server.""" - try: - version = request.get_parameter('oauth_version') - except: - version = VERSION - - if version and version != self.version: - raise Error('OAuth version %s not supported.' % str(version)) - - return version - - def _get_signature_method(self, request): - """Figure out the signature with some defaults.""" - try: - signature_method = request.get_parameter('oauth_signature_method') - except: - signature_method = SIGNATURE_METHOD - - try: - # Get the signature method object. - signature_method = self.signature_methods[signature_method] - except: - signature_method_names = ', '.join(self.signature_methods.keys()) - raise Error('Signature method %s not supported try one of the following: %s' % (signature_method, signature_method_names)) - - return signature_method - - def _get_verifier(self, request): - return request.get_parameter('oauth_verifier') - - def _check_signature(self, request, consumer, token): - timestamp, nonce = request._get_timestamp_nonce() - self._check_timestamp(timestamp) - signature_method = self._get_signature_method(request) - - try: - signature = request.get_parameter('oauth_signature') - except: - raise MissingSignature('Missing oauth_signature.') - - # Validate the signature. - valid = signature_method.check(request, consumer, token, signature) - - if not valid: - key, base = signature_method.signing_base(request, consumer, token) - - raise Error('Invalid signature. Expected signature base ' - 'string: %s' % base) - - built = signature_method.sign(request, consumer, token) - - def _check_timestamp(self, timestamp): - """Verify that timestamp is recentish.""" - timestamp = int(timestamp) - now = int(time.time()) - lapsed = now - timestamp - if lapsed > self.timestamp_threshold: - raise Error('Expired timestamp: given %d and now %s has a ' - 'greater difference than threshold %d' % (timestamp, now, self.timestamp_threshold)) - - -class Client(httplib2.Http): - """OAuthClient is a worker to attempt to execute a request.""" - - def __init__(self, consumer, token=None, cache=None, timeout=None, - proxy_info=None): - - if consumer is not None and not isinstance(consumer, Consumer): - raise ValueError("Invalid consumer.") - - if token is not None and not isinstance(token, Token): - raise ValueError("Invalid token.") - - self.consumer = consumer - self.token = token - self.method = SignatureMethod_HMAC_SHA1() - - httplib2.Http.__init__(self, cache=cache, timeout=timeout, - proxy_info=proxy_info) - - def set_signature_method(self, method): - if not isinstance(method, SignatureMethod): - raise ValueError("Invalid signature method.") - - self.method = method - - def request(self, uri, method="GET", body=None, headers=None, - redirections=httplib2.DEFAULT_MAX_REDIRECTS, connection_type=None, - force_auth_header=False): - - if not isinstance(headers, dict): - headers = {} - - if body and method == "POST": - parameters = dict(parse_qsl(body)) - elif method == "GET": - parsed = urlparse.urlparse(uri) - parameters = parse_qs(parsed.query) - else: - parameters = None - - req = Request.from_consumer_and_token(self.consumer, token=self.token, - http_method=method, http_url=uri, parameters=parameters) - - req.sign_request(self.method, self.consumer, self.token) - - if force_auth_header: - # ensure we always send Authorization - headers.update(req.to_header()) - - if method == "POST": - if not force_auth_header: - body = req.to_postdata() - else: - body = req.encode_postdata(req.get_nonoauth_parameters()) - headers['Content-Type'] = 'application/x-www-form-urlencoded' - elif method == "GET": - if not force_auth_header: - uri = req.to_url() - else: - if not force_auth_header: - # don't call update twice. - headers.update(req.to_header()) - - return httplib2.Http.request(self, uri, method=method, body=body, - headers=headers, redirections=redirections, - connection_type=connection_type) - - -class SignatureMethod(object): - """A way of signing requests. - - The OAuth protocol lets consumers and service providers pick a way to sign - requests. This interface shows the methods expected by the other `oauth` - modules for signing requests. Subclass it and implement its methods to - provide a new way to sign requests. - """ - - def signing_base(self, request, consumer, token): - """Calculates the string that needs to be signed. - - This method returns a 2-tuple containing the starting key for the - signing and the message to be signed. The latter may be used in error - messages to help clients debug their software. - - """ - raise NotImplementedError - - def sign(self, request, consumer, token): - """Returns the signature for the given request, based on the consumer - and token also provided. - - You should use your implementation of `signing_base()` to build the - message to sign. Otherwise it may be less useful for debugging. - - """ - raise NotImplementedError - - def check(self, request, consumer, token, signature): - """Returns whether the given signature is the correct signature for - the given consumer and token signing the given request.""" - built = self.sign(request, consumer, token) - return built == signature - - -class SignatureMethod_HMAC_SHA1(SignatureMethod): - name = 'HMAC-SHA1' - - def signing_base(self, request, consumer, token): - sig = ( - escape(request.method), - escape(request.url), - escape(request.get_normalized_parameters()), - ) - - key = '%s&' % escape(consumer.secret) - if token: - key += escape(token.secret) - raw = '&'.join(sig) - return key, raw - - def sign(self, request, consumer, token): - """Builds the base signature string.""" - key, raw = self.signing_base(request, consumer, token) - - # HMAC object. - try: - import hashlib # 2.5 - hashed = hmac.new(key, raw, hashlib.sha1) - except ImportError: - import sha # Deprecated - hashed = hmac.new(key, raw, sha) - - # Calculate the digest base 64. - return binascii.b2a_base64(hashed.digest())[:-1] - -class SignatureMethod_PLAINTEXT(SignatureMethod): - - name = 'PLAINTEXT' - - def signing_base(self, request, consumer, token): - """Concatenates the consumer key and secret with the token's - secret.""" - sig = '%s&' % escape(consumer.secret) - if token: - sig = sig + escape(token.secret) - return sig, sig - - def sign(self, request, consumer, token): - key, raw = self.signing_base(request, consumer, token) - return raw - diff --git a/lib/oauthlib/__init__.py b/lib/oauthlib/__init__.py new file mode 100644 index 00000000..a94cf941 --- /dev/null +++ b/lib/oauthlib/__init__.py @@ -0,0 +1,34 @@ +""" + oauthlib + ~~~~~~~~ + + A generic, spec-compliant, thorough implementation of the OAuth + request-signing logic. + + :copyright: (c) 2019 by The OAuthlib Community + :license: BSD, see LICENSE for details. +""" +import logging +from logging import NullHandler + +__author__ = 'The OAuthlib Community' +__version__ = '3.1.1' + +logging.getLogger('oauthlib').addHandler(NullHandler()) + +_DEBUG = False + +def set_debug(debug_val): + """Set value of debug flag + + :param debug_val: Value to set. Must be a bool value. + """ + global _DEBUG + _DEBUG = debug_val + +def get_debug(): + """Get debug mode value. + + :return: `True` if debug mode is on, `False` otherwise + """ + return _DEBUG diff --git a/lib/oauthlib/common.py b/lib/oauthlib/common.py new file mode 100644 index 00000000..b5fbf52e --- /dev/null +++ b/lib/oauthlib/common.py @@ -0,0 +1,434 @@ +""" +oauthlib.common +~~~~~~~~~~~~~~ + +This module provides data structures and utilities common +to all implementations of OAuth. +""" +import collections +import datetime +import logging +import re +import time +import urllib.parse as urlparse +from urllib.parse import ( + quote as _quote, unquote as _unquote, urlencode as _urlencode, +) + +from . import get_debug + +try: + from secrets import randbits + from secrets import SystemRandom +except ImportError: + from random import getrandbits as randbits + from random import SystemRandom + +UNICODE_ASCII_CHARACTER_SET = ('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '0123456789') + +CLIENT_ID_CHARACTER_SET = (r' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMN' + 'OPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}') + +SANITIZE_PATTERN = re.compile(r'([^&;]*(?:password|token)[^=]*=)[^&;]+', re.IGNORECASE) +INVALID_HEX_PATTERN = re.compile(r'%[^0-9A-Fa-f]|%[0-9A-Fa-f][^0-9A-Fa-f]') + +always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' + 'abcdefghijklmnopqrstuvwxyz' + '0123456789' '_.-') + +log = logging.getLogger('oauthlib') + + +# 'safe' must be bytes (Python 2.6 requires bytes, other versions allow either) +def quote(s, safe=b'/'): + s = s.encode('utf-8') if isinstance(s, str) else s + s = _quote(s, safe) + # PY3 always returns unicode. PY2 may return either, depending on whether + # it had to modify the string. + if isinstance(s, bytes): + s = s.decode('utf-8') + return s + + +def unquote(s): + s = _unquote(s) + # PY3 always returns unicode. PY2 seems to always return what you give it, + # which differs from quote's behavior. Just to be safe, make sure it is + # unicode before we return. + if isinstance(s, bytes): + s = s.decode('utf-8') + return s + + +def urlencode(params): + utf8_params = encode_params_utf8(params) + urlencoded = _urlencode(utf8_params) + if isinstance(urlencoded, str): + return urlencoded + else: + return urlencoded.decode("utf-8") + + +def encode_params_utf8(params): + """Ensures that all parameters in a list of 2-element tuples are encoded to + bytestrings using UTF-8 + """ + encoded = [] + for k, v in params: + encoded.append(( + k.encode('utf-8') if isinstance(k, str) else k, + v.encode('utf-8') if isinstance(v, str) else v)) + return encoded + + +def decode_params_utf8(params): + """Ensures that all parameters in a list of 2-element tuples are decoded to + unicode using UTF-8. + """ + decoded = [] + for k, v in params: + decoded.append(( + k.decode('utf-8') if isinstance(k, bytes) else k, + v.decode('utf-8') if isinstance(v, bytes) else v)) + return decoded + + +urlencoded = set(always_safe) | set('=&;:%+~,*@!()/?\'$') + + +def urldecode(query): + """Decode a query string in x-www-form-urlencoded format into a sequence + of two-element tuples. + + Unlike urlparse.parse_qsl(..., strict_parsing=True) urldecode will enforce + correct formatting of the query string by validation. If validation fails + a ValueError will be raised. urllib.parse_qsl will only raise errors if + any of name-value pairs omits the equals sign. + """ + # Check if query contains invalid characters + if query and not set(query) <= urlencoded: + error = ("Error trying to decode a non urlencoded string. " + "Found invalid characters: %s " + "in the string: '%s'. " + "Please ensure the request/response body is " + "x-www-form-urlencoded.") + raise ValueError(error % (set(query) - urlencoded, query)) + + # Check for correctly hex encoded values using a regular expression + # All encoded values begin with % followed by two hex characters + # correct = %00, %A0, %0A, %FF + # invalid = %G0, %5H, %PO + if INVALID_HEX_PATTERN.search(query): + raise ValueError('Invalid hex encoding in query string.') + + # We want to allow queries such as "c2" whereas urlparse.parse_qsl + # with the strict_parsing flag will not. + params = urlparse.parse_qsl(query, keep_blank_values=True) + + # unicode all the things + return decode_params_utf8(params) + + +def extract_params(raw): + """Extract parameters and return them as a list of 2-tuples. + + Will successfully extract parameters from urlencoded query strings, + dicts, or lists of 2-tuples. Empty strings/dicts/lists will return an + empty list of parameters. Any other input will result in a return + value of None. + """ + if isinstance(raw, (bytes, str)): + try: + params = urldecode(raw) + except ValueError: + params = None + elif hasattr(raw, '__iter__'): + try: + dict(raw) + except ValueError: + params = None + except TypeError: + params = None + else: + params = list(raw.items() if isinstance(raw, dict) else raw) + params = decode_params_utf8(params) + else: + params = None + + return params + + +def generate_nonce(): + """Generate pseudorandom nonce that is unlikely to repeat. + + Per `section 3.3`_ of the OAuth 1 RFC 5849 spec. + Per `section 3.2.1`_ of the MAC Access Authentication spec. + + A random 64-bit number is appended to the epoch timestamp for both + randomness and to decrease the likelihood of collisions. + + .. _`section 3.2.1`: https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01#section-3.2.1 + .. _`section 3.3`: https://tools.ietf.org/html/rfc5849#section-3.3 + """ + return str(str(randbits(64)) + generate_timestamp()) + + +def generate_timestamp(): + """Get seconds since epoch (UTC). + + Per `section 3.3`_ of the OAuth 1 RFC 5849 spec. + Per `section 3.2.1`_ of the MAC Access Authentication spec. + + .. _`section 3.2.1`: https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01#section-3.2.1 + .. _`section 3.3`: https://tools.ietf.org/html/rfc5849#section-3.3 + """ + return str(int(time.time())) + + +def generate_token(length=30, chars=UNICODE_ASCII_CHARACTER_SET): + """Generates a non-guessable OAuth token + + OAuth (1 and 2) does not specify the format of tokens except that they + should be strings of random characters. Tokens should not be guessable + and entropy when generating the random characters is important. Which is + why SystemRandom is used instead of the default random.choice method. + """ + rand = SystemRandom() + return ''.join(rand.choice(chars) for x in range(length)) + + +def generate_signed_token(private_pem, request): + import jwt + + now = datetime.datetime.utcnow() + + claims = { + 'scope': request.scope, + 'exp': now + datetime.timedelta(seconds=request.expires_in) + } + + claims.update(request.claims) + + token = jwt.encode(claims, private_pem, 'RS256') + token = to_unicode(token, "UTF-8") + + return token + + +def verify_signed_token(public_pem, token): + import jwt + + return jwt.decode(token, public_pem, algorithms=['RS256']) + + +def generate_client_id(length=30, chars=CLIENT_ID_CHARACTER_SET): + """Generates an OAuth client_id + + OAuth 2 specify the format of client_id in + https://tools.ietf.org/html/rfc6749#appendix-A. + """ + return generate_token(length, chars) + + +def add_params_to_qs(query, params): + """Extend a query with a list of two-tuples.""" + if isinstance(params, dict): + params = params.items() + queryparams = urlparse.parse_qsl(query, keep_blank_values=True) + queryparams.extend(params) + return urlencode(queryparams) + + +def add_params_to_uri(uri, params, fragment=False): + """Add a list of two-tuples to the uri query components.""" + sch, net, path, par, query, fra = urlparse.urlparse(uri) + if fragment: + fra = add_params_to_qs(fra, params) + else: + query = add_params_to_qs(query, params) + return urlparse.urlunparse((sch, net, path, par, query, fra)) + + +def safe_string_equals(a, b): + """ Near-constant time string comparison. + + Used in order to avoid timing attacks on sensitive information such + as secret keys during request verification (`rootLabs`_). + + .. _`rootLabs`: http://rdist.root.org/2010/01/07/timing-independent-array-comparison/ + + """ + if len(a) != len(b): + return False + + result = 0 + for x, y in zip(a, b): + result |= ord(x) ^ ord(y) + return result == 0 + + +def to_unicode(data, encoding='UTF-8'): + """Convert a number of different types of objects to unicode.""" + if isinstance(data, str): + return data + + if isinstance(data, bytes): + return str(data, encoding=encoding) + + if hasattr(data, '__iter__'): + try: + dict(data) + except TypeError: + pass + except ValueError: + # Assume it's a one dimensional data structure + return (to_unicode(i, encoding) for i in data) + else: + # We support 2.6 which lacks dict comprehensions + if hasattr(data, 'items'): + data = data.items() + return {to_unicode(k, encoding): to_unicode(v, encoding) for k, v in data} + + return data + + +class CaseInsensitiveDict(dict): + + """Basic case insensitive dict with strings only keys.""" + + proxy = {} + + def __init__(self, data): + self.proxy = {k.lower(): k for k in data} + for k in data: + self[k] = data[k] + + def __contains__(self, k): + return k.lower() in self.proxy + + def __delitem__(self, k): + key = self.proxy[k.lower()] + super().__delitem__(key) + del self.proxy[k.lower()] + + def __getitem__(self, k): + key = self.proxy[k.lower()] + return super().__getitem__(key) + + def get(self, k, default=None): + return self[k] if k in self else default + + def __setitem__(self, k, v): + super().__setitem__(k, v) + self.proxy[k.lower()] = k + + def update(self, *args, **kwargs): + super().update(*args, **kwargs) + for k in dict(*args, **kwargs): + self.proxy[k.lower()] = k + + +class Request: + + """A malleable representation of a signable HTTP request. + + Body argument may contain any data, but parameters will only be decoded if + they are one of: + + * urlencoded query string + * dict + * list of 2-tuples + + Anything else will be treated as raw body data to be passed through + unmolested. + """ + + def __init__(self, uri, http_method='GET', body=None, headers=None, + encoding='utf-8'): + # Convert to unicode using encoding if given, else assume unicode + encode = lambda x: to_unicode(x, encoding) if encoding else x + + self.uri = encode(uri) + self.http_method = encode(http_method) + self.headers = CaseInsensitiveDict(encode(headers or {})) + self.body = encode(body) + self.decoded_body = extract_params(self.body) + self.oauth_params = [] + self.validator_log = {} + + self._params = { + "access_token": None, + "client": None, + "client_id": None, + "client_secret": None, + "code": None, + "code_challenge": None, + "code_challenge_method": None, + "code_verifier": None, + "extra_credentials": None, + "grant_type": None, + "redirect_uri": None, + "refresh_token": None, + "request_token": None, + "response_type": None, + "scope": None, + "scopes": None, + "state": None, + "token": None, + "user": None, + "token_type_hint": None, + + # OpenID Connect + "response_mode": None, + "nonce": None, + "display": None, + "prompt": None, + "claims": None, + "max_age": None, + "ui_locales": None, + "id_token_hint": None, + "login_hint": None, + "acr_values": None + } + self._params.update(dict(urldecode(self.uri_query))) + self._params.update(dict(self.decoded_body or [])) + + def __getattr__(self, name): + if name in self._params: + return self._params[name] + else: + raise AttributeError(name) + + def __repr__(self): + if not get_debug(): + return "" + body = self.body + headers = self.headers.copy() + if body: + body = SANITIZE_PATTERN.sub('\1', str(body)) + if 'Authorization' in headers: + headers['Authorization'] = '' + return ''.format( + self.uri, self.http_method, headers, body) + + @property + def uri_query(self): + return urlparse.urlparse(self.uri).query + + @property + def uri_query_params(self): + if not self.uri_query: + return [] + return urlparse.parse_qsl(self.uri_query, keep_blank_values=True, + strict_parsing=True) + + @property + def duplicate_params(self): + seen_keys = collections.defaultdict(int) + all_keys = (p[0] + for p in (self.decoded_body or []) + self.uri_query_params) + for k in all_keys: + seen_keys[k] += 1 + return [k for k, c in seen_keys.items() if c > 1] diff --git a/lib/oauthlib/oauth1/__init__.py b/lib/oauthlib/oauth1/__init__.py new file mode 100644 index 00000000..07ef4226 --- /dev/null +++ b/lib/oauthlib/oauth1/__init__.py @@ -0,0 +1,28 @@ +""" +oauthlib.oauth1 +~~~~~~~~~~~~~~ + +This module is a wrapper for the most recent implementation of OAuth 1.0 Client +and Server classes. +""" +from .rfc5849 import Client +from .rfc5849 import (SIGNATURE_HMAC, + SIGNATURE_HMAC_SHA1, + SIGNATURE_HMAC_SHA256, + SIGNATURE_HMAC_SHA512, + SIGNATURE_RSA, + SIGNATURE_RSA_SHA1, + SIGNATURE_RSA_SHA256, + SIGNATURE_RSA_SHA512, + SIGNATURE_PLAINTEXT) +from .rfc5849 import SIGNATURE_TYPE_AUTH_HEADER, SIGNATURE_TYPE_QUERY +from .rfc5849 import SIGNATURE_TYPE_BODY +from .rfc5849.request_validator import RequestValidator +from .rfc5849.endpoints import RequestTokenEndpoint, AuthorizationEndpoint +from .rfc5849.endpoints import AccessTokenEndpoint, ResourceEndpoint +from .rfc5849.endpoints import SignatureOnlyEndpoint, WebApplicationServer +from .rfc5849.errors import (InsecureTransportError, + InvalidClientError, + InvalidRequestError, + InvalidSignatureMethodError, + OAuth1Error) diff --git a/lib/oauthlib/oauth1/rfc5849/__init__.py b/lib/oauthlib/oauth1/rfc5849/__init__.py new file mode 100644 index 00000000..c559251f --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/__init__.py @@ -0,0 +1,365 @@ +""" +oauthlib.oauth1.rfc5849 +~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for signing and checking OAuth 1.0 RFC 5849 requests. + +It supports all three standard signature methods defined in RFC 5849: + +- HMAC-SHA1 +- RSA-SHA1 +- PLAINTEXT + +It also supports signature methods that are not defined in RFC 5849. These are +based on the standard ones but replace SHA-1 with the more secure SHA-256: + +- HMAC-SHA256 +- RSA-SHA256 + +""" +import base64 +import hashlib +import logging +import urllib.parse as urlparse + +from oauthlib.common import ( + Request, generate_nonce, generate_timestamp, to_unicode, urlencode, +) + +from . import parameters, signature + +log = logging.getLogger(__name__) + +# Available signature methods +# +# Note: SIGNATURE_HMAC and SIGNATURE_RSA are kept for backward compatibility +# with previous versions of this library, when it the only HMAC-based and +# RSA-based signature methods were HMAC-SHA1 and RSA-SHA1. But now that it +# supports other hashing algorithms besides SHA1, explicitly identifying which +# hashing algorithm is being used is recommended. +# +# Note: if additional values are defined here, don't forget to update the +# imports in "../__init__.py" so they are available outside this module. + +SIGNATURE_HMAC_SHA1 = "HMAC-SHA1" +SIGNATURE_HMAC_SHA256 = "HMAC-SHA256" +SIGNATURE_HMAC_SHA512 = "HMAC-SHA512" +SIGNATURE_HMAC = SIGNATURE_HMAC_SHA1 # deprecated variable for HMAC-SHA1 + +SIGNATURE_RSA_SHA1 = "RSA-SHA1" +SIGNATURE_RSA_SHA256 = "RSA-SHA256" +SIGNATURE_RSA_SHA512 = "RSA-SHA512" +SIGNATURE_RSA = SIGNATURE_RSA_SHA1 # deprecated variable for RSA-SHA1 + +SIGNATURE_PLAINTEXT = "PLAINTEXT" + +SIGNATURE_METHODS = ( + SIGNATURE_HMAC_SHA1, + SIGNATURE_HMAC_SHA256, + SIGNATURE_HMAC_SHA512, + SIGNATURE_RSA_SHA1, + SIGNATURE_RSA_SHA256, + SIGNATURE_RSA_SHA512, + SIGNATURE_PLAINTEXT +) + +SIGNATURE_TYPE_AUTH_HEADER = 'AUTH_HEADER' +SIGNATURE_TYPE_QUERY = 'QUERY' +SIGNATURE_TYPE_BODY = 'BODY' + +CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' + + +class Client: + + """A client used to sign OAuth 1.0 RFC 5849 requests.""" + SIGNATURE_METHODS = { + SIGNATURE_HMAC_SHA1: signature.sign_hmac_sha1_with_client, + SIGNATURE_HMAC_SHA256: signature.sign_hmac_sha256_with_client, + SIGNATURE_HMAC_SHA512: signature.sign_hmac_sha512_with_client, + SIGNATURE_RSA_SHA1: signature.sign_rsa_sha1_with_client, + SIGNATURE_RSA_SHA256: signature.sign_rsa_sha256_with_client, + SIGNATURE_RSA_SHA512: signature.sign_rsa_sha512_with_client, + SIGNATURE_PLAINTEXT: signature.sign_plaintext_with_client + } + + @classmethod + def register_signature_method(cls, method_name, method_callback): + cls.SIGNATURE_METHODS[method_name] = method_callback + + def __init__(self, client_key, + client_secret=None, + resource_owner_key=None, + resource_owner_secret=None, + callback_uri=None, + signature_method=SIGNATURE_HMAC_SHA1, + signature_type=SIGNATURE_TYPE_AUTH_HEADER, + rsa_key=None, verifier=None, realm=None, + encoding='utf-8', decoding=None, + nonce=None, timestamp=None): + """Create an OAuth 1 client. + + :param client_key: Client key (consumer key), mandatory. + :param resource_owner_key: Resource owner key (oauth token). + :param resource_owner_secret: Resource owner secret (oauth token secret). + :param callback_uri: Callback used when obtaining request token. + :param signature_method: SIGNATURE_HMAC, SIGNATURE_RSA or SIGNATURE_PLAINTEXT. + :param signature_type: SIGNATURE_TYPE_AUTH_HEADER (default), + SIGNATURE_TYPE_QUERY or SIGNATURE_TYPE_BODY + depending on where you want to embed the oauth + credentials. + :param rsa_key: RSA key used with SIGNATURE_RSA. + :param verifier: Verifier used when obtaining an access token. + :param realm: Realm (scope) to which access is being requested. + :param encoding: If you provide non-unicode input you may use this + to have oauthlib automatically convert. + :param decoding: If you wish that the returned uri, headers and body + from sign be encoded back from unicode, then set + decoding to your preferred encoding, i.e. utf-8. + :param nonce: Use this nonce instead of generating one. (Mainly for testing) + :param timestamp: Use this timestamp instead of using current. (Mainly for testing) + """ + # Convert to unicode using encoding if given, else assume unicode + encode = lambda x: to_unicode(x, encoding) if encoding else x + + self.client_key = encode(client_key) + self.client_secret = encode(client_secret) + self.resource_owner_key = encode(resource_owner_key) + self.resource_owner_secret = encode(resource_owner_secret) + self.signature_method = encode(signature_method) + self.signature_type = encode(signature_type) + self.callback_uri = encode(callback_uri) + self.rsa_key = encode(rsa_key) + self.verifier = encode(verifier) + self.realm = encode(realm) + self.encoding = encode(encoding) + self.decoding = encode(decoding) + self.nonce = encode(nonce) + self.timestamp = encode(timestamp) + + def __repr__(self): + attrs = vars(self).copy() + attrs['client_secret'] = '****' if attrs['client_secret'] else None + attrs['rsa_key'] = '****' if attrs['rsa_key'] else None + attrs[ + 'resource_owner_secret'] = '****' if attrs['resource_owner_secret'] else None + attribute_str = ', '.join('{}={}'.format(k, v) for k, v in attrs.items()) + return '<{} {}>'.format(self.__class__.__name__, attribute_str) + + def get_oauth_signature(self, request): + """Get an OAuth signature to be used in signing a request + + To satisfy `section 3.4.1.2`_ item 2, if the request argument's + headers dict attribute contains a Host item, its value will + replace any netloc part of the request argument's uri attribute + value. + + .. _`section 3.4.1.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.2 + """ + if self.signature_method == SIGNATURE_PLAINTEXT: + # fast-path + return signature.sign_plaintext(self.client_secret, + self.resource_owner_secret) + + uri, headers, body = self._render(request) + + collected_params = signature.collect_parameters( + uri_query=urlparse.urlparse(uri).query, + body=body, + headers=headers) + log.debug("Collected params: {}".format(collected_params)) + + normalized_params = signature.normalize_parameters(collected_params) + normalized_uri = signature.base_string_uri(uri, headers.get('Host', None)) + log.debug("Normalized params: {}".format(normalized_params)) + log.debug("Normalized URI: {}".format(normalized_uri)) + + base_string = signature.signature_base_string(request.http_method, + normalized_uri, normalized_params) + + log.debug("Signing: signature base string: {}".format(base_string)) + + if self.signature_method not in self.SIGNATURE_METHODS: + raise ValueError('Invalid signature method.') + + sig = self.SIGNATURE_METHODS[self.signature_method](base_string, self) + + log.debug("Signature: {}".format(sig)) + return sig + + def get_oauth_params(self, request): + """Get the basic OAuth parameters to be used in generating a signature. + """ + nonce = (generate_nonce() + if self.nonce is None else self.nonce) + timestamp = (generate_timestamp() + if self.timestamp is None else self.timestamp) + params = [ + ('oauth_nonce', nonce), + ('oauth_timestamp', timestamp), + ('oauth_version', '1.0'), + ('oauth_signature_method', self.signature_method), + ('oauth_consumer_key', self.client_key), + ] + if self.resource_owner_key: + params.append(('oauth_token', self.resource_owner_key)) + if self.callback_uri: + params.append(('oauth_callback', self.callback_uri)) + if self.verifier: + params.append(('oauth_verifier', self.verifier)) + + # providing body hash for requests other than x-www-form-urlencoded + # as described in https://tools.ietf.org/html/draft-eaton-oauth-bodyhash-00#section-4.1.1 + # 4.1.1. When to include the body hash + # * [...] MUST NOT include an oauth_body_hash parameter on requests with form-encoded request bodies + # * [...] SHOULD include the oauth_body_hash parameter on all other requests. + # Note that SHA-1 is vulnerable. The spec acknowledges that in https://tools.ietf.org/html/draft-eaton-oauth-bodyhash-00#section-6.2 + # At this time, no further effort has been made to replace SHA-1 for the OAuth Request Body Hash extension. + content_type = request.headers.get('Content-Type', None) + content_type_eligible = content_type and content_type.find('application/x-www-form-urlencoded') < 0 + if request.body is not None and content_type_eligible: + params.append(('oauth_body_hash', base64.b64encode(hashlib.sha1(request.body.encode('utf-8')).digest()).decode('utf-8'))) + + return params + + def _render(self, request, formencode=False, realm=None): + """Render a signed request according to signature type + + Returns a 3-tuple containing the request URI, headers, and body. + + If the formencode argument is True and the body contains parameters, it + is escaped and returned as a valid formencoded string. + """ + # TODO what if there are body params on a header-type auth? + # TODO what if there are query params on a body-type auth? + + uri, headers, body = request.uri, request.headers, request.body + + # TODO: right now these prepare_* methods are very narrow in scope--they + # only affect their little thing. In some cases (for example, with + # header auth) it might be advantageous to allow these methods to touch + # other parts of the request, like the headers—so the prepare_headers + # method could also set the Content-Type header to x-www-form-urlencoded + # like the spec requires. This would be a fundamental change though, and + # I'm not sure how I feel about it. + if self.signature_type == SIGNATURE_TYPE_AUTH_HEADER: + headers = parameters.prepare_headers( + request.oauth_params, request.headers, realm=realm) + elif self.signature_type == SIGNATURE_TYPE_BODY and request.decoded_body is not None: + body = parameters.prepare_form_encoded_body( + request.oauth_params, request.decoded_body) + if formencode: + body = urlencode(body) + headers['Content-Type'] = 'application/x-www-form-urlencoded' + elif self.signature_type == SIGNATURE_TYPE_QUERY: + uri = parameters.prepare_request_uri_query( + request.oauth_params, request.uri) + else: + raise ValueError('Unknown signature type specified.') + + return uri, headers, body + + def sign(self, uri, http_method='GET', body=None, headers=None, realm=None): + """Sign a request + + Signs an HTTP request with the specified parts. + + Returns a 3-tuple of the signed request's URI, headers, and body. + Note that http_method is not returned as it is unaffected by the OAuth + signing process. Also worth noting is that duplicate parameters + will be included in the signature, regardless of where they are + specified (query, body). + + The body argument may be a dict, a list of 2-tuples, or a formencoded + string. The Content-Type header must be 'application/x-www-form-urlencoded' + if it is present. + + If the body argument is not one of the above, it will be returned + verbatim as it is unaffected by the OAuth signing process. Attempting to + sign a request with non-formencoded data using the OAuth body signature + type is invalid and will raise an exception. + + If the body does contain parameters, it will be returned as a properly- + formatted formencoded string. + + Body may not be included if the http_method is either GET or HEAD as + this changes the semantic meaning of the request. + + All string data MUST be unicode or be encoded with the same encoding + scheme supplied to the Client constructor, default utf-8. This includes + strings inside body dicts, for example. + """ + # normalize request data + request = Request(uri, http_method, body, headers, + encoding=self.encoding) + + # sanity check + content_type = request.headers.get('Content-Type', None) + multipart = content_type and content_type.startswith('multipart/') + should_have_params = content_type == CONTENT_TYPE_FORM_URLENCODED + has_params = request.decoded_body is not None + # 3.4.1.3.1. Parameter Sources + # [Parameters are collected from the HTTP request entity-body, but only + # if [...]: + # * The entity-body is single-part. + if multipart and has_params: + raise ValueError( + "Headers indicate a multipart body but body contains parameters.") + # * The entity-body follows the encoding requirements of the + # "application/x-www-form-urlencoded" content-type as defined by + # [W3C.REC-html40-19980424]. + elif should_have_params and not has_params: + raise ValueError( + "Headers indicate a formencoded body but body was not decodable.") + # * The HTTP request entity-header includes the "Content-Type" + # header field set to "application/x-www-form-urlencoded". + elif not should_have_params and has_params: + raise ValueError( + "Body contains parameters but Content-Type header was {} " + "instead of {}".format(content_type or "not set", + CONTENT_TYPE_FORM_URLENCODED)) + + # 3.5.2. Form-Encoded Body + # Protocol parameters can be transmitted in the HTTP request entity- + # body, but only if the following REQUIRED conditions are met: + # o The entity-body is single-part. + # o The entity-body follows the encoding requirements of the + # "application/x-www-form-urlencoded" content-type as defined by + # [W3C.REC-html40-19980424]. + # o The HTTP request entity-header includes the "Content-Type" header + # field set to "application/x-www-form-urlencoded". + elif self.signature_type == SIGNATURE_TYPE_BODY and not ( + should_have_params and has_params and not multipart): + raise ValueError( + 'Body signatures may only be used with form-urlencoded content') + + # We amend https://tools.ietf.org/html/rfc5849#section-3.4.1.3.1 + # with the clause that parameters from body should only be included + # in non GET or HEAD requests. Extracting the request body parameters + # and including them in the signature base string would give semantic + # meaning to the body, which it should not have according to the + # HTTP 1.1 spec. + elif http_method.upper() in ('GET', 'HEAD') and has_params: + raise ValueError('GET/HEAD requests should not include body.') + + # generate the basic OAuth parameters + request.oauth_params = self.get_oauth_params(request) + + # generate the signature + request.oauth_params.append( + ('oauth_signature', self.get_oauth_signature(request))) + + # render the signed request and return it + uri, headers, body = self._render(request, formencode=True, + realm=(realm or self.realm)) + + if self.decoding: + log.debug('Encoding URI, headers and body to %s.', self.decoding) + uri = uri.encode(self.decoding) + body = body.encode(self.decoding) if body else body + new_headers = {} + for k, v in headers.items(): + new_headers[k.encode(self.decoding)] = v.encode(self.decoding) + headers = new_headers + return uri, headers, body diff --git a/lib/oauthlib/oauth1/rfc5849/endpoints/__init__.py b/lib/oauthlib/oauth1/rfc5849/endpoints/__init__.py new file mode 100644 index 00000000..9f30389f --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/endpoints/__init__.py @@ -0,0 +1,8 @@ +from .access_token import AccessTokenEndpoint +from .authorization import AuthorizationEndpoint +from .base import BaseEndpoint +from .request_token import RequestTokenEndpoint +from .resource import ResourceEndpoint +from .signature_only import SignatureOnlyEndpoint + +from .pre_configured import WebApplicationServer # isort:skip diff --git a/lib/oauthlib/oauth1/rfc5849/endpoints/access_token.py b/lib/oauthlib/oauth1/rfc5849/endpoints/access_token.py new file mode 100644 index 00000000..13665db0 --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/endpoints/access_token.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth1.rfc5849.endpoints.access_token +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of the access token provider logic of +OAuth 1.0 RFC 5849. It validates the correctness of access token requests, +creates and persists tokens as well as create the proper response to be +returned to the client. +""" +import logging + +from oauthlib.common import urlencode + +from .. import errors +from .base import BaseEndpoint + +log = logging.getLogger(__name__) + + +class AccessTokenEndpoint(BaseEndpoint): + + """An endpoint responsible for providing OAuth 1 access tokens. + + Typical use is to instantiate with a request validator and invoke the + ``create_access_token_response`` from a view function. The tuple returned + has all information necessary (body, status, headers) to quickly form + and return a proper response. See :doc:`/oauth1/validator` for details on which + validator methods to implement for this endpoint. + """ + + def create_access_token(self, request, credentials): + """Create and save a new access token. + + Similar to OAuth 2, indication of granted scopes will be included as a + space separated list in ``oauth_authorized_realms``. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: The token as an urlencoded string. + """ + request.realms = self.request_validator.get_realms( + request.resource_owner_key, request) + token = { + 'oauth_token': self.token_generator(), + 'oauth_token_secret': self.token_generator(), + # Backport the authorized scopes indication used in OAuth2 + 'oauth_authorized_realms': ' '.join(request.realms) + } + token.update(credentials) + self.request_validator.save_access_token(token, request) + return urlencode(token.items()) + + def create_access_token_response(self, uri, http_method='GET', body=None, + headers=None, credentials=None): + """Create an access token response, with a new request token if valid. + + :param uri: The full URI of the token request. + :param http_method: A valid HTTP verb, i.e. GET, POST, PUT, HEAD, etc. + :param body: The request body as a string. + :param headers: The request headers as a dict. + :param credentials: A list of extra credentials to include in the token. + :returns: A tuple of 3 elements. + 1. A dict of headers to set on the response. + 2. The response body as a string. + 3. The response status code as an integer. + + An example of a valid request:: + + >>> from your_validator import your_validator + >>> from oauthlib.oauth1 import AccessTokenEndpoint + >>> endpoint = AccessTokenEndpoint(your_validator) + >>> h, b, s = endpoint.create_access_token_response( + ... 'https://your.provider/access_token?foo=bar', + ... headers={ + ... 'Authorization': 'OAuth oauth_token=234lsdkf....' + ... }, + ... credentials={ + ... 'my_specific': 'argument', + ... }) + >>> h + {'Content-Type': 'application/x-www-form-urlencoded'} + >>> b + 'oauth_token=lsdkfol23w54jlksdef&oauth_token_secret=qwe089234lkjsdf&oauth_authorized_realms=movies+pics&my_specific=argument' + >>> s + 200 + + An response to invalid request would have a different body and status:: + + >>> b + 'error=invalid_request&description=missing+resource+owner+key' + >>> s + 400 + + The same goes for an an unauthorized request: + + >>> b + '' + >>> s + 401 + """ + resp_headers = {'Content-Type': 'application/x-www-form-urlencoded'} + try: + request = self._create_request(uri, http_method, body, headers) + valid, processed_request = self.validate_access_token_request( + request) + if valid: + token = self.create_access_token(request, credentials or {}) + self.request_validator.invalidate_request_token( + request.client_key, + request.resource_owner_key, + request) + return resp_headers, token, 200 + else: + return {}, None, 401 + except errors.OAuth1Error as e: + return resp_headers, e.urlencoded, e.status_code + + def validate_access_token_request(self, request): + """Validate an access token request. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :raises: OAuth1Error if the request is invalid. + :returns: A tuple of 2 elements. + 1. The validation result (True or False). + 2. The request object. + """ + self._check_transport_security(request) + self._check_mandatory_parameters(request) + + if not request.resource_owner_key: + raise errors.InvalidRequestError( + description='Missing resource owner.') + + if not self.request_validator.check_request_token( + request.resource_owner_key): + raise errors.InvalidRequestError( + description='Invalid resource owner key format.') + + if not request.verifier: + raise errors.InvalidRequestError( + description='Missing verifier.') + + if not self.request_validator.check_verifier(request.verifier): + raise errors.InvalidRequestError( + description='Invalid verifier format.') + + if not self.request_validator.validate_timestamp_and_nonce( + request.client_key, request.timestamp, request.nonce, request, + request_token=request.resource_owner_key): + return False, request + + # The server SHOULD return a 401 (Unauthorized) status code when + # receiving a request with invalid client credentials. + # Note: This is postponed in order to avoid timing attacks, instead + # a dummy client is assigned and used to maintain near constant + # time request verification. + # + # Note that early exit would enable client enumeration + valid_client = self.request_validator.validate_client_key( + request.client_key, request) + if not valid_client: + request.client_key = self.request_validator.dummy_client + + # The server SHOULD return a 401 (Unauthorized) status code when + # receiving a request with invalid or expired token. + # Note: This is postponed in order to avoid timing attacks, instead + # a dummy token is assigned and used to maintain near constant + # time request verification. + # + # Note that early exit would enable resource owner enumeration + valid_resource_owner = self.request_validator.validate_request_token( + request.client_key, request.resource_owner_key, request) + if not valid_resource_owner: + request.resource_owner_key = self.request_validator.dummy_request_token + + # The server MUST verify (Section 3.2) the validity of the request, + # ensure that the resource owner has authorized the provisioning of + # token credentials to the client, and ensure that the temporary + # credentials have not expired or been used before. The server MUST + # also verify the verification code received from the client. + # .. _`Section 3.2`: https://tools.ietf.org/html/rfc5849#section-3.2 + # + # Note that early exit would enable resource owner authorization + # verifier enumertion. + valid_verifier = self.request_validator.validate_verifier( + request.client_key, + request.resource_owner_key, + request.verifier, + request) + + valid_signature = self._check_signature(request, is_token_request=True) + + # log the results to the validator_log + # this lets us handle internal reporting and analysis + request.validator_log['client'] = valid_client + request.validator_log['resource_owner'] = valid_resource_owner + request.validator_log['verifier'] = valid_verifier + request.validator_log['signature'] = valid_signature + + # We delay checking validity until the very end, using dummy values for + # calculations and fetching secrets/keys to ensure the flow of every + # request remains almost identical regardless of whether valid values + # have been supplied. This ensures near constant time execution and + # prevents malicious users from guessing sensitive information + v = all((valid_client, valid_resource_owner, valid_verifier, + valid_signature)) + if not v: + log.info("[Failure] request verification failed.") + log.info("Valid client:, %s", valid_client) + log.info("Valid token:, %s", valid_resource_owner) + log.info("Valid verifier:, %s", valid_verifier) + log.info("Valid signature:, %s", valid_signature) + return v, request diff --git a/lib/oauthlib/oauth1/rfc5849/endpoints/authorization.py b/lib/oauthlib/oauth1/rfc5849/endpoints/authorization.py new file mode 100644 index 00000000..00d9576b --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/endpoints/authorization.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth1.rfc5849.endpoints.authorization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for signing and checking OAuth 1.0 RFC 5849 requests. +""" +from urllib.parse import urlencode + +from oauthlib.common import add_params_to_uri + +from .. import errors +from .base import BaseEndpoint + + +class AuthorizationEndpoint(BaseEndpoint): + + """An endpoint responsible for letting authenticated users authorize access + to their protected resources to a client. + + Typical use would be to have two views, one for displaying the authorization + form and one to process said form on submission. + + The first view will want to utilize ``get_realms_and_credentials`` to fetch + requested realms and useful client credentials, such as name and + description, to be used when creating the authorization form. + + During form processing you can use ``create_authorization_response`` to + validate the request, create a verifier as well as prepare the final + redirection URI used to send the user back to the client. + + See :doc:`/oauth1/validator` for details on which validator methods to implement + for this endpoint. + """ + + def create_verifier(self, request, credentials): + """Create and save a new request token. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param credentials: A dict of extra token credentials. + :returns: The verifier as a dict. + """ + verifier = { + 'oauth_token': request.resource_owner_key, + 'oauth_verifier': self.token_generator(), + } + verifier.update(credentials) + self.request_validator.save_verifier( + request.resource_owner_key, verifier, request) + return verifier + + def create_authorization_response(self, uri, http_method='GET', body=None, + headers=None, realms=None, credentials=None): + """Create an authorization response, with a new request token if valid. + + :param uri: The full URI of the token request. + :param http_method: A valid HTTP verb, i.e. GET, POST, PUT, HEAD, etc. + :param body: The request body as a string. + :param headers: The request headers as a dict. + :param credentials: A list of credentials to include in the verifier. + :returns: A tuple of 3 elements. + 1. A dict of headers to set on the response. + 2. The response body as a string. + 3. The response status code as an integer. + + If the callback URI tied to the current token is "oob", a response with + a 200 status code will be returned. In this case, it may be desirable to + modify the response to better display the verifier to the client. + + An example of an authorization request:: + + >>> from your_validator import your_validator + >>> from oauthlib.oauth1 import AuthorizationEndpoint + >>> endpoint = AuthorizationEndpoint(your_validator) + >>> h, b, s = endpoint.create_authorization_response( + ... 'https://your.provider/authorize?oauth_token=...', + ... credentials={ + ... 'extra': 'argument', + ... }) + >>> h + {'Location': 'https://the.client/callback?oauth_verifier=...&extra=argument'} + >>> b + None + >>> s + 302 + + An example of a request with an "oob" callback:: + + >>> from your_validator import your_validator + >>> from oauthlib.oauth1 import AuthorizationEndpoint + >>> endpoint = AuthorizationEndpoint(your_validator) + >>> h, b, s = endpoint.create_authorization_response( + ... 'https://your.provider/authorize?foo=bar', + ... credentials={ + ... 'extra': 'argument', + ... }) + >>> h + {'Content-Type': 'application/x-www-form-urlencoded'} + >>> b + 'oauth_verifier=...&extra=argument' + >>> s + 200 + """ + request = self._create_request(uri, http_method=http_method, body=body, + headers=headers) + + if not request.resource_owner_key: + raise errors.InvalidRequestError( + 'Missing mandatory parameter oauth_token.') + if not self.request_validator.verify_request_token( + request.resource_owner_key, request): + raise errors.InvalidClientError() + + request.realms = realms + if (request.realms and not self.request_validator.verify_realms( + request.resource_owner_key, request.realms, request)): + raise errors.InvalidRequestError( + description=('User granted access to realms outside of ' + 'what the client may request.')) + + verifier = self.create_verifier(request, credentials or {}) + redirect_uri = self.request_validator.get_redirect_uri( + request.resource_owner_key, request) + if redirect_uri == 'oob': + response_headers = { + 'Content-Type': 'application/x-www-form-urlencoded'} + response_body = urlencode(verifier) + return response_headers, response_body, 200 + else: + populated_redirect = add_params_to_uri( + redirect_uri, verifier.items()) + return {'Location': populated_redirect}, None, 302 + + def get_realms_and_credentials(self, uri, http_method='GET', body=None, + headers=None): + """Fetch realms and credentials for the presented request token. + + :param uri: The full URI of the token request. + :param http_method: A valid HTTP verb, i.e. GET, POST, PUT, HEAD, etc. + :param body: The request body as a string. + :param headers: The request headers as a dict. + :returns: A tuple of 2 elements. + 1. A list of request realms. + 2. A dict of credentials which may be useful in creating the + authorization form. + """ + request = self._create_request(uri, http_method=http_method, body=body, + headers=headers) + + if not self.request_validator.verify_request_token( + request.resource_owner_key, request): + raise errors.InvalidClientError() + + realms = self.request_validator.get_realms( + request.resource_owner_key, request) + return realms, {'resource_owner_key': request.resource_owner_key} diff --git a/lib/oauthlib/oauth1/rfc5849/endpoints/base.py b/lib/oauthlib/oauth1/rfc5849/endpoints/base.py new file mode 100644 index 00000000..3a8c2677 --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/endpoints/base.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth1.rfc5849.endpoints.base +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for signing and checking OAuth 1.0 RFC 5849 requests. +""" +import time + +from oauthlib.common import CaseInsensitiveDict, Request, generate_token + +from .. import ( + CONTENT_TYPE_FORM_URLENCODED, + SIGNATURE_HMAC_SHA1, SIGNATURE_HMAC_SHA256, SIGNATURE_HMAC_SHA512, + SIGNATURE_RSA_SHA1, SIGNATURE_RSA_SHA256, SIGNATURE_RSA_SHA512, + SIGNATURE_PLAINTEXT, + SIGNATURE_TYPE_AUTH_HEADER, SIGNATURE_TYPE_BODY, + SIGNATURE_TYPE_QUERY, errors, signature, utils) + + +class BaseEndpoint: + + def __init__(self, request_validator, token_generator=None): + self.request_validator = request_validator + self.token_generator = token_generator or generate_token + + def _get_signature_type_and_params(self, request): + """Extracts parameters from query, headers and body. Signature type + is set to the source in which parameters were found. + """ + # Per RFC5849, only the Authorization header may contain the 'realm' + # optional parameter. + header_params = signature.collect_parameters(headers=request.headers, + exclude_oauth_signature=False, with_realm=True) + body_params = signature.collect_parameters(body=request.body, + exclude_oauth_signature=False) + query_params = signature.collect_parameters(uri_query=request.uri_query, + exclude_oauth_signature=False) + + params = [] + params.extend(header_params) + params.extend(body_params) + params.extend(query_params) + signature_types_with_oauth_params = list(filter(lambda s: s[2], ( + (SIGNATURE_TYPE_AUTH_HEADER, params, + utils.filter_oauth_params(header_params)), + (SIGNATURE_TYPE_BODY, params, + utils.filter_oauth_params(body_params)), + (SIGNATURE_TYPE_QUERY, params, + utils.filter_oauth_params(query_params)) + ))) + + if len(signature_types_with_oauth_params) > 1: + found_types = [s[0] for s in signature_types_with_oauth_params] + raise errors.InvalidRequestError( + description=('oauth_ params must come from only 1 signature' + 'type but were found in %s', + ', '.join(found_types))) + + try: + signature_type, params, oauth_params = signature_types_with_oauth_params[ + 0] + except IndexError: + raise errors.InvalidRequestError( + description='Missing mandatory OAuth parameters.') + + return signature_type, params, oauth_params + + def _create_request(self, uri, http_method, body, headers): + # Only include body data from x-www-form-urlencoded requests + headers = CaseInsensitiveDict(headers or {}) + if ("Content-Type" in headers and + CONTENT_TYPE_FORM_URLENCODED in headers["Content-Type"]): + request = Request(uri, http_method, body, headers) + else: + request = Request(uri, http_method, '', headers) + + signature_type, params, oauth_params = ( + self._get_signature_type_and_params(request)) + + # The server SHOULD return a 400 (Bad Request) status code when + # receiving a request with duplicated protocol parameters. + if len(dict(oauth_params)) != len(oauth_params): + raise errors.InvalidRequestError( + description='Duplicate OAuth1 entries.') + + oauth_params = dict(oauth_params) + request.signature = oauth_params.get('oauth_signature') + request.client_key = oauth_params.get('oauth_consumer_key') + request.resource_owner_key = oauth_params.get('oauth_token') + request.nonce = oauth_params.get('oauth_nonce') + request.timestamp = oauth_params.get('oauth_timestamp') + request.redirect_uri = oauth_params.get('oauth_callback') + request.verifier = oauth_params.get('oauth_verifier') + request.signature_method = oauth_params.get('oauth_signature_method') + request.realm = dict(params).get('realm') + request.oauth_params = oauth_params + + # Parameters to Client depend on signature method which may vary + # for each request. Note that HMAC-SHA1 and PLAINTEXT share parameters + request.params = [(k, v) for k, v in params if k != "oauth_signature"] + + if 'realm' in request.headers.get('Authorization', ''): + request.params = [(k, v) + for k, v in request.params if k != "realm"] + + return request + + def _check_transport_security(self, request): + # TODO: move into oauthlib.common from oauth2.utils + if (self.request_validator.enforce_ssl and + not request.uri.lower().startswith("https://")): + raise errors.InsecureTransportError() + + def _check_mandatory_parameters(self, request): + # The server SHOULD return a 400 (Bad Request) status code when + # receiving a request with missing parameters. + if not all((request.signature, request.client_key, + request.nonce, request.timestamp, + request.signature_method)): + raise errors.InvalidRequestError( + description='Missing mandatory OAuth parameters.') + + # OAuth does not mandate a particular signature method, as each + # implementation can have its own unique requirements. Servers are + # free to implement and document their own custom methods. + # Recommending any particular method is beyond the scope of this + # specification. Implementers should review the Security + # Considerations section (`Section 4`_) before deciding on which + # method to support. + # .. _`Section 4`: https://tools.ietf.org/html/rfc5849#section-4 + if (not request.signature_method in + self.request_validator.allowed_signature_methods): + raise errors.InvalidSignatureMethodError( + description="Invalid signature, {} not in {!r}.".format( + request.signature_method, + self.request_validator.allowed_signature_methods)) + + # Servers receiving an authenticated request MUST validate it by: + # If the "oauth_version" parameter is present, ensuring its value is + # "1.0". + if ('oauth_version' in request.oauth_params and + request.oauth_params['oauth_version'] != '1.0'): + raise errors.InvalidRequestError( + description='Invalid OAuth version.') + + # The timestamp value MUST be a positive integer. Unless otherwise + # specified by the server's documentation, the timestamp is expressed + # in the number of seconds since January 1, 1970 00:00:00 GMT. + if len(request.timestamp) != 10: + raise errors.InvalidRequestError( + description='Invalid timestamp size') + + try: + ts = int(request.timestamp) + + except ValueError: + raise errors.InvalidRequestError( + description='Timestamp must be an integer.') + + else: + # To avoid the need to retain an infinite number of nonce values for + # future checks, servers MAY choose to restrict the time period after + # which a request with an old timestamp is rejected. + if abs(time.time() - ts) > self.request_validator.timestamp_lifetime: + raise errors.InvalidRequestError( + description=('Timestamp given is invalid, differ from ' + 'allowed by over %s seconds.' % ( + self.request_validator.timestamp_lifetime))) + + # Provider specific validation of parameters, used to enforce + # restrictions such as character set and length. + if not self.request_validator.check_client_key(request.client_key): + raise errors.InvalidRequestError( + description='Invalid client key format.') + + if not self.request_validator.check_nonce(request.nonce): + raise errors.InvalidRequestError( + description='Invalid nonce format.') + + def _check_signature(self, request, is_token_request=False): + # ---- RSA Signature verification ---- + if request.signature_method == SIGNATURE_RSA_SHA1 or \ + request.signature_method == SIGNATURE_RSA_SHA256 or \ + request.signature_method == SIGNATURE_RSA_SHA512: + # RSA-based signature method + + # The server verifies the signature per `[RFC3447] section 8.2.2`_ + # .. _`[RFC3447] section 8.2.2`: https://tools.ietf.org/html/rfc3447#section-8.2.1 + + rsa_key = self.request_validator.get_rsa_key( + request.client_key, request) + + if request.signature_method == SIGNATURE_RSA_SHA1: + valid_signature = signature.verify_rsa_sha1(request, rsa_key) + elif request.signature_method == SIGNATURE_RSA_SHA256: + valid_signature = signature.verify_rsa_sha256(request, rsa_key) + elif request.signature_method == SIGNATURE_RSA_SHA512: + valid_signature = signature.verify_rsa_sha512(request, rsa_key) + else: + valid_signature = False + + # ---- HMAC or Plaintext Signature verification ---- + else: + # Non-RSA based signature method + + # Servers receiving an authenticated request MUST validate it by: + # Recalculating the request signature independently as described in + # `Section 3.4`_ and comparing it to the value received from the + # client via the "oauth_signature" parameter. + # .. _`Section 3.4`: https://tools.ietf.org/html/rfc5849#section-3.4 + + client_secret = self.request_validator.get_client_secret( + request.client_key, request) + + resource_owner_secret = None + if request.resource_owner_key: + if is_token_request: + resource_owner_secret = \ + self.request_validator.get_request_token_secret( + request.client_key, request.resource_owner_key, + request) + else: + resource_owner_secret = \ + self.request_validator.get_access_token_secret( + request.client_key, request.resource_owner_key, + request) + + if request.signature_method == SIGNATURE_HMAC_SHA1: + valid_signature = signature.verify_hmac_sha1( + request, client_secret, resource_owner_secret) + elif request.signature_method == SIGNATURE_HMAC_SHA256: + valid_signature = signature.verify_hmac_sha256( + request, client_secret, resource_owner_secret) + elif request.signature_method == SIGNATURE_HMAC_SHA512: + valid_signature = signature.verify_hmac_sha512( + request, client_secret, resource_owner_secret) + elif request.signature_method == SIGNATURE_PLAINTEXT: + valid_signature = signature.verify_plaintext( + request, client_secret, resource_owner_secret) + else: + valid_signature = False + + return valid_signature diff --git a/lib/oauthlib/oauth1/rfc5849/endpoints/pre_configured.py b/lib/oauthlib/oauth1/rfc5849/endpoints/pre_configured.py new file mode 100644 index 00000000..23e3cfc8 --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/endpoints/pre_configured.py @@ -0,0 +1,14 @@ +from . import ( + AccessTokenEndpoint, AuthorizationEndpoint, RequestTokenEndpoint, + ResourceEndpoint, +) + + +class WebApplicationServer(RequestTokenEndpoint, AuthorizationEndpoint, + AccessTokenEndpoint, ResourceEndpoint): + + def __init__(self, request_validator): + RequestTokenEndpoint.__init__(self, request_validator) + AuthorizationEndpoint.__init__(self, request_validator) + AccessTokenEndpoint.__init__(self, request_validator) + ResourceEndpoint.__init__(self, request_validator) diff --git a/lib/oauthlib/oauth1/rfc5849/endpoints/request_token.py b/lib/oauthlib/oauth1/rfc5849/endpoints/request_token.py new file mode 100644 index 00000000..bb67e71e --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/endpoints/request_token.py @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth1.rfc5849.endpoints.request_token +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of the request token provider logic of +OAuth 1.0 RFC 5849. It validates the correctness of request token requests, +creates and persists tokens as well as create the proper response to be +returned to the client. +""" +import logging + +from oauthlib.common import urlencode + +from .. import errors +from .base import BaseEndpoint + +log = logging.getLogger(__name__) + + +class RequestTokenEndpoint(BaseEndpoint): + + """An endpoint responsible for providing OAuth 1 request tokens. + + Typical use is to instantiate with a request validator and invoke the + ``create_request_token_response`` from a view function. The tuple returned + has all information necessary (body, status, headers) to quickly form + and return a proper response. See :doc:`/oauth1/validator` for details on which + validator methods to implement for this endpoint. + """ + + def create_request_token(self, request, credentials): + """Create and save a new request token. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param credentials: A dict of extra token credentials. + :returns: The token as an urlencoded string. + """ + token = { + 'oauth_token': self.token_generator(), + 'oauth_token_secret': self.token_generator(), + 'oauth_callback_confirmed': 'true' + } + token.update(credentials) + self.request_validator.save_request_token(token, request) + return urlencode(token.items()) + + def create_request_token_response(self, uri, http_method='GET', body=None, + headers=None, credentials=None): + """Create a request token response, with a new request token if valid. + + :param uri: The full URI of the token request. + :param http_method: A valid HTTP verb, i.e. GET, POST, PUT, HEAD, etc. + :param body: The request body as a string. + :param headers: The request headers as a dict. + :param credentials: A list of extra credentials to include in the token. + :returns: A tuple of 3 elements. + 1. A dict of headers to set on the response. + 2. The response body as a string. + 3. The response status code as an integer. + + An example of a valid request:: + + >>> from your_validator import your_validator + >>> from oauthlib.oauth1 import RequestTokenEndpoint + >>> endpoint = RequestTokenEndpoint(your_validator) + >>> h, b, s = endpoint.create_request_token_response( + ... 'https://your.provider/request_token?foo=bar', + ... headers={ + ... 'Authorization': 'OAuth realm=movies user, oauth_....' + ... }, + ... credentials={ + ... 'my_specific': 'argument', + ... }) + >>> h + {'Content-Type': 'application/x-www-form-urlencoded'} + >>> b + 'oauth_token=lsdkfol23w54jlksdef&oauth_token_secret=qwe089234lkjsdf&oauth_callback_confirmed=true&my_specific=argument' + >>> s + 200 + + An response to invalid request would have a different body and status:: + + >>> b + 'error=invalid_request&description=missing+callback+uri' + >>> s + 400 + + The same goes for an an unauthorized request: + + >>> b + '' + >>> s + 401 + """ + resp_headers = {'Content-Type': 'application/x-www-form-urlencoded'} + try: + request = self._create_request(uri, http_method, body, headers) + valid, processed_request = self.validate_request_token_request( + request) + if valid: + token = self.create_request_token(request, credentials or {}) + return resp_headers, token, 200 + else: + return {}, None, 401 + except errors.OAuth1Error as e: + return resp_headers, e.urlencoded, e.status_code + + def validate_request_token_request(self, request): + """Validate a request token request. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :raises: OAuth1Error if the request is invalid. + :returns: A tuple of 2 elements. + 1. The validation result (True or False). + 2. The request object. + """ + self._check_transport_security(request) + self._check_mandatory_parameters(request) + + if request.realm: + request.realms = request.realm.split(' ') + else: + request.realms = self.request_validator.get_default_realms( + request.client_key, request) + if not self.request_validator.check_realms(request.realms): + raise errors.InvalidRequestError( + description='Invalid realm {}. Allowed are {!r}.'.format( + request.realms, self.request_validator.realms)) + + if not request.redirect_uri: + raise errors.InvalidRequestError( + description='Missing callback URI.') + + if not self.request_validator.validate_timestamp_and_nonce( + request.client_key, request.timestamp, request.nonce, request, + request_token=request.resource_owner_key): + return False, request + + # The server SHOULD return a 401 (Unauthorized) status code when + # receiving a request with invalid client credentials. + # Note: This is postponed in order to avoid timing attacks, instead + # a dummy client is assigned and used to maintain near constant + # time request verification. + # + # Note that early exit would enable client enumeration + valid_client = self.request_validator.validate_client_key( + request.client_key, request) + if not valid_client: + request.client_key = self.request_validator.dummy_client + + # Note that `realm`_ is only used in authorization headers and how + # it should be interepreted is not included in the OAuth spec. + # However they could be seen as a scope or realm to which the + # client has access and as such every client should be checked + # to ensure it is authorized access to that scope or realm. + # .. _`realm`: https://tools.ietf.org/html/rfc2617#section-1.2 + # + # Note that early exit would enable client realm access enumeration. + # + # The require_realm indicates this is the first step in the OAuth + # workflow where a client requests access to a specific realm. + # This first step (obtaining request token) need not require a realm + # and can then be identified by checking the require_resource_owner + # flag and abscence of realm. + # + # Clients obtaining an access token will not supply a realm and it will + # not be checked. Instead the previously requested realm should be + # transferred from the request token to the access token. + # + # Access to protected resources will always validate the realm but note + # that the realm is now tied to the access token and not provided by + # the client. + valid_realm = self.request_validator.validate_requested_realms( + request.client_key, request.realms, request) + + # Callback is normally never required, except for requests for + # a Temporary Credential as described in `Section 2.1`_ + # .._`Section 2.1`: https://tools.ietf.org/html/rfc5849#section-2.1 + valid_redirect = self.request_validator.validate_redirect_uri( + request.client_key, request.redirect_uri, request) + if not request.redirect_uri: + raise NotImplementedError('Redirect URI must either be provided ' + 'or set to a default during validation.') + + valid_signature = self._check_signature(request) + + # log the results to the validator_log + # this lets us handle internal reporting and analysis + request.validator_log['client'] = valid_client + request.validator_log['realm'] = valid_realm + request.validator_log['callback'] = valid_redirect + request.validator_log['signature'] = valid_signature + + # We delay checking validity until the very end, using dummy values for + # calculations and fetching secrets/keys to ensure the flow of every + # request remains almost identical regardless of whether valid values + # have been supplied. This ensures near constant time execution and + # prevents malicious users from guessing sensitive information + v = all((valid_client, valid_realm, valid_redirect, valid_signature)) + if not v: + log.info("[Failure] request verification failed.") + log.info("Valid client: %s.", valid_client) + log.info("Valid realm: %s.", valid_realm) + log.info("Valid callback: %s.", valid_redirect) + log.info("Valid signature: %s.", valid_signature) + return v, request diff --git a/lib/oauthlib/oauth1/rfc5849/endpoints/resource.py b/lib/oauthlib/oauth1/rfc5849/endpoints/resource.py new file mode 100644 index 00000000..45bdaaac --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/endpoints/resource.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth1.rfc5849.endpoints.resource +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of the resource protection provider logic of +OAuth 1.0 RFC 5849. +""" +import logging + +from .. import errors +from .base import BaseEndpoint + +log = logging.getLogger(__name__) + + +class ResourceEndpoint(BaseEndpoint): + + """An endpoint responsible for protecting resources. + + Typical use is to instantiate with a request validator and invoke the + ``validate_protected_resource_request`` in a decorator around a view + function. If the request is valid, invoke and return the response of the + view. If invalid create and return an error response directly from the + decorator. + + See :doc:`/oauth1/validator` for details on which validator methods to implement + for this endpoint. + + An example decorator:: + + from functools import wraps + from your_validator import your_validator + from oauthlib.oauth1 import ResourceEndpoint + endpoint = ResourceEndpoint(your_validator) + + def require_oauth(realms=None): + def decorator(f): + @wraps(f) + def wrapper(request, *args, **kwargs): + v, r = provider.validate_protected_resource_request( + request.url, + http_method=request.method, + body=request.data, + headers=request.headers, + realms=realms or []) + if v: + return f(*args, **kwargs) + else: + return abort(403) + """ + + def validate_protected_resource_request(self, uri, http_method='GET', + body=None, headers=None, realms=None): + """Create a request token response, with a new request token if valid. + + :param uri: The full URI of the token request. + :param http_method: A valid HTTP verb, i.e. GET, POST, PUT, HEAD, etc. + :param body: The request body as a string. + :param headers: The request headers as a dict. + :param realms: A list of realms the resource is protected under. + This will be supplied to the ``validate_realms`` + method of the request validator. + :returns: A tuple of 2 elements. + 1. True if valid, False otherwise. + 2. An oauthlib.common.Request object. + """ + try: + request = self._create_request(uri, http_method, body, headers) + except errors.OAuth1Error: + return False, None + + try: + self._check_transport_security(request) + self._check_mandatory_parameters(request) + except errors.OAuth1Error: + return False, request + + if not request.resource_owner_key: + return False, request + + if not self.request_validator.check_access_token( + request.resource_owner_key): + return False, request + + if not self.request_validator.validate_timestamp_and_nonce( + request.client_key, request.timestamp, request.nonce, request, + access_token=request.resource_owner_key): + return False, request + + # The server SHOULD return a 401 (Unauthorized) status code when + # receiving a request with invalid client credentials. + # Note: This is postponed in order to avoid timing attacks, instead + # a dummy client is assigned and used to maintain near constant + # time request verification. + # + # Note that early exit would enable client enumeration + valid_client = self.request_validator.validate_client_key( + request.client_key, request) + if not valid_client: + request.client_key = self.request_validator.dummy_client + + # The server SHOULD return a 401 (Unauthorized) status code when + # receiving a request with invalid or expired token. + # Note: This is postponed in order to avoid timing attacks, instead + # a dummy token is assigned and used to maintain near constant + # time request verification. + # + # Note that early exit would enable resource owner enumeration + valid_resource_owner = self.request_validator.validate_access_token( + request.client_key, request.resource_owner_key, request) + if not valid_resource_owner: + request.resource_owner_key = self.request_validator.dummy_access_token + + # Note that `realm`_ is only used in authorization headers and how + # it should be interepreted is not included in the OAuth spec. + # However they could be seen as a scope or realm to which the + # client has access and as such every client should be checked + # to ensure it is authorized access to that scope or realm. + # .. _`realm`: https://tools.ietf.org/html/rfc2617#section-1.2 + # + # Note that early exit would enable client realm access enumeration. + # + # The require_realm indicates this is the first step in the OAuth + # workflow where a client requests access to a specific realm. + # This first step (obtaining request token) need not require a realm + # and can then be identified by checking the require_resource_owner + # flag and abscence of realm. + # + # Clients obtaining an access token will not supply a realm and it will + # not be checked. Instead the previously requested realm should be + # transferred from the request token to the access token. + # + # Access to protected resources will always validate the realm but note + # that the realm is now tied to the access token and not provided by + # the client. + valid_realm = self.request_validator.validate_realms(request.client_key, + request.resource_owner_key, request, uri=request.uri, + realms=realms) + + valid_signature = self._check_signature(request) + + # log the results to the validator_log + # this lets us handle internal reporting and analysis + request.validator_log['client'] = valid_client + request.validator_log['resource_owner'] = valid_resource_owner + request.validator_log['realm'] = valid_realm + request.validator_log['signature'] = valid_signature + + # We delay checking validity until the very end, using dummy values for + # calculations and fetching secrets/keys to ensure the flow of every + # request remains almost identical regardless of whether valid values + # have been supplied. This ensures near constant time execution and + # prevents malicious users from guessing sensitive information + v = all((valid_client, valid_resource_owner, valid_realm, + valid_signature)) + if not v: + log.info("[Failure] request verification failed.") + log.info("Valid client: %s", valid_client) + log.info("Valid token: %s", valid_resource_owner) + log.info("Valid realm: %s", valid_realm) + log.info("Valid signature: %s", valid_signature) + return v, request diff --git a/lib/oauthlib/oauth1/rfc5849/endpoints/signature_only.py b/lib/oauthlib/oauth1/rfc5849/endpoints/signature_only.py new file mode 100644 index 00000000..d693ccb7 --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/endpoints/signature_only.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth1.rfc5849.endpoints.signature_only +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of the signing logic of OAuth 1.0 RFC 5849. +""" + +import logging + +from .. import errors +from .base import BaseEndpoint + +log = logging.getLogger(__name__) + + +class SignatureOnlyEndpoint(BaseEndpoint): + + """An endpoint only responsible for verifying an oauth signature.""" + + def validate_request(self, uri, http_method='GET', + body=None, headers=None): + """Validate a signed OAuth request. + + :param uri: The full URI of the token request. + :param http_method: A valid HTTP verb, i.e. GET, POST, PUT, HEAD, etc. + :param body: The request body as a string. + :param headers: The request headers as a dict. + :returns: A tuple of 2 elements. + 1. True if valid, False otherwise. + 2. An oauthlib.common.Request object. + """ + try: + request = self._create_request(uri, http_method, body, headers) + except errors.OAuth1Error as err: + log.info( + 'Exception caught while validating request, %s.' % err) + return False, None + + try: + self._check_transport_security(request) + self._check_mandatory_parameters(request) + except errors.OAuth1Error as err: + log.info( + 'Exception caught while validating request, %s.' % err) + return False, request + + if not self.request_validator.validate_timestamp_and_nonce( + request.client_key, request.timestamp, request.nonce, request): + log.debug('[Failure] verification failed: timestamp/nonce') + return False, request + + # The server SHOULD return a 401 (Unauthorized) status code when + # receiving a request with invalid client credentials. + # Note: This is postponed in order to avoid timing attacks, instead + # a dummy client is assigned and used to maintain near constant + # time request verification. + # + # Note that early exit would enable client enumeration + valid_client = self.request_validator.validate_client_key( + request.client_key, request) + if not valid_client: + request.client_key = self.request_validator.dummy_client + + valid_signature = self._check_signature(request) + + # log the results to the validator_log + # this lets us handle internal reporting and analysis + request.validator_log['client'] = valid_client + request.validator_log['signature'] = valid_signature + + # We delay checking validity until the very end, using dummy values for + # calculations and fetching secrets/keys to ensure the flow of every + # request remains almost identical regardless of whether valid values + # have been supplied. This ensures near constant time execution and + # prevents malicious users from guessing sensitive information + v = all((valid_client, valid_signature)) + if not v: + log.info("[Failure] request verification failed.") + log.info("Valid client: %s", valid_client) + log.info("Valid signature: %s", valid_signature) + return v, request diff --git a/lib/oauthlib/oauth1/rfc5849/errors.py b/lib/oauthlib/oauth1/rfc5849/errors.py new file mode 100644 index 00000000..8774d407 --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/errors.py @@ -0,0 +1,76 @@ +""" +oauthlib.oauth1.rfc5849.errors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Error used both by OAuth 1 clients and provicers to represent the spec +defined error responses for all four core grant types. +""" +from oauthlib.common import add_params_to_uri, urlencode + + +class OAuth1Error(Exception): + error = None + description = '' + + def __init__(self, description=None, uri=None, status_code=400, + request=None): + """ + description: A human-readable ASCII [USASCII] text providing + additional information, used to assist the client + developer in understanding the error that occurred. + Values for the "error_description" parameter MUST NOT + include characters outside the set + x20-21 / x23-5B / x5D-7E. + + uri: A URI identifying a human-readable web page with information + about the error, used to provide the client developer with + additional information about the error. Values for the + "error_uri" parameter MUST conform to the URI- Reference + syntax, and thus MUST NOT include characters outside the set + x21 / x23-5B / x5D-7E. + + state: A CSRF protection value received from the client. + + request: Oauthlib Request object + """ + self.description = description or self.description + message = '({}) {}'.format(self.error, self.description) + if request: + message += ' ' + repr(request) + super().__init__(message) + + self.uri = uri + self.status_code = status_code + + def in_uri(self, uri): + return add_params_to_uri(uri, self.twotuples) + + @property + def twotuples(self): + error = [('error', self.error)] + if self.description: + error.append(('error_description', self.description)) + if self.uri: + error.append(('error_uri', self.uri)) + return error + + @property + def urlencoded(self): + return urlencode(self.twotuples) + + +class InsecureTransportError(OAuth1Error): + error = 'insecure_transport_protocol' + description = 'Only HTTPS connections are permitted.' + + +class InvalidSignatureMethodError(OAuth1Error): + error = 'invalid_signature_method' + + +class InvalidRequestError(OAuth1Error): + error = 'invalid_request' + + +class InvalidClientError(OAuth1Error): + error = 'invalid_client' diff --git a/lib/oauthlib/oauth1/rfc5849/parameters.py b/lib/oauthlib/oauth1/rfc5849/parameters.py new file mode 100644 index 00000000..2163772d --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/parameters.py @@ -0,0 +1,133 @@ +""" +oauthlib.parameters +~~~~~~~~~~~~~~~~~~~ + +This module contains methods related to `section 3.5`_ of the OAuth 1.0a spec. + +.. _`section 3.5`: https://tools.ietf.org/html/rfc5849#section-3.5 +""" +from urllib.parse import urlparse, urlunparse + +from oauthlib.common import extract_params, urlencode + +from . import utils + + +# TODO: do we need filter_params now that oauth_params are handled by Request? +# We can easily pass in just oauth protocol params. +@utils.filter_params +def prepare_headers(oauth_params, headers=None, realm=None): + """**Prepare the Authorization header.** + Per `section 3.5.1`_ of the spec. + + Protocol parameters can be transmitted using the HTTP "Authorization" + header field as defined by `RFC2617`_ with the auth-scheme name set to + "OAuth" (case insensitive). + + For example:: + + Authorization: OAuth realm="Example", + oauth_consumer_key="0685bd9184jfhq22", + oauth_token="ad180jjd733klru7", + oauth_signature_method="HMAC-SHA1", + oauth_signature="wOJIO9A2W5mFwDgiDvZbTSMK%2FPY%3D", + oauth_timestamp="137131200", + oauth_nonce="4572616e48616d6d65724c61686176", + oauth_version="1.0" + + + .. _`section 3.5.1`: https://tools.ietf.org/html/rfc5849#section-3.5.1 + .. _`RFC2617`: https://tools.ietf.org/html/rfc2617 + """ + headers = headers or {} + + # Protocol parameters SHALL be included in the "Authorization" header + # field as follows: + authorization_header_parameters_parts = [] + for oauth_parameter_name, value in oauth_params: + # 1. Parameter names and values are encoded per Parameter Encoding + # (`Section 3.6`_) + # + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 + escaped_name = utils.escape(oauth_parameter_name) + escaped_value = utils.escape(value) + + # 2. Each parameter's name is immediately followed by an "=" character + # (ASCII code 61), a """ character (ASCII code 34), the parameter + # value (MAY be empty), and another """ character (ASCII code 34). + part = '{}="{}"'.format(escaped_name, escaped_value) + + authorization_header_parameters_parts.append(part) + + # 3. Parameters are separated by a "," character (ASCII code 44) and + # OPTIONAL linear whitespace per `RFC2617`_. + # + # .. _`RFC2617`: https://tools.ietf.org/html/rfc2617 + authorization_header_parameters = ', '.join( + authorization_header_parameters_parts) + + # 4. The OPTIONAL "realm" parameter MAY be added and interpreted per + # `RFC2617 section 1.2`_. + # + # .. _`RFC2617 section 1.2`: https://tools.ietf.org/html/rfc2617#section-1.2 + if realm: + # NOTE: realm should *not* be escaped + authorization_header_parameters = ('realm="%s", ' % realm + + authorization_header_parameters) + + # the auth-scheme name set to "OAuth" (case insensitive). + authorization_header = 'OAuth %s' % authorization_header_parameters + + # contribute the Authorization header to the given headers + full_headers = {} + full_headers.update(headers) + full_headers['Authorization'] = authorization_header + return full_headers + + +def _append_params(oauth_params, params): + """Append OAuth params to an existing set of parameters. + + Both params and oauth_params is must be lists of 2-tuples. + + Per `section 3.5.2`_ and `3.5.3`_ of the spec. + + .. _`section 3.5.2`: https://tools.ietf.org/html/rfc5849#section-3.5.2 + .. _`3.5.3`: https://tools.ietf.org/html/rfc5849#section-3.5.3 + + """ + merged = list(params) + merged.extend(oauth_params) + # The request URI / entity-body MAY include other request-specific + # parameters, in which case, the protocol parameters SHOULD be appended + # following the request-specific parameters, properly separated by an "&" + # character (ASCII code 38) + merged.sort(key=lambda i: i[0].startswith('oauth_')) + return merged + + +def prepare_form_encoded_body(oauth_params, body): + """Prepare the Form-Encoded Body. + + Per `section 3.5.2`_ of the spec. + + .. _`section 3.5.2`: https://tools.ietf.org/html/rfc5849#section-3.5.2 + + """ + # append OAuth params to the existing body + return _append_params(oauth_params, body) + + +def prepare_request_uri_query(oauth_params, uri): + """Prepare the Request URI Query. + + Per `section 3.5.3`_ of the spec. + + .. _`section 3.5.3`: https://tools.ietf.org/html/rfc5849#section-3.5.3 + + """ + # append OAuth params to the existing set of query components + sch, net, path, par, query, fra = urlparse(uri) + query = urlencode( + _append_params(oauth_params, extract_params(query) or [])) + return urlunparse((sch, net, path, par, query, fra)) diff --git a/lib/oauthlib/oauth1/rfc5849/request_validator.py b/lib/oauthlib/oauth1/rfc5849/request_validator.py new file mode 100644 index 00000000..dc5bf0eb --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/request_validator.py @@ -0,0 +1,849 @@ +""" +oauthlib.oauth1.rfc5849 +~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for signing and checking OAuth 1.0 RFC 5849 requests. +""" +from . import SIGNATURE_METHODS, utils + + +class RequestValidator: + + """A validator/datastore interaction base class for OAuth 1 providers. + + OAuth providers should inherit from RequestValidator and implement the + methods and properties outlined below. Further details are provided in the + documentation for each method and property. + + Methods used to check the format of input parameters. Common tests include + length, character set, membership, range or pattern. These tests are + referred to as `whitelisting or blacklisting`_. Whitelisting is better + but blacklisting can be usefull to spot malicious activity. + The following have methods a default implementation: + + - check_client_key + - check_request_token + - check_access_token + - check_nonce + - check_verifier + - check_realms + + The methods above default to whitelist input parameters, checking that they + are alphanumerical and between a minimum and maximum length. Rather than + overloading the methods a few properties can be used to configure these + methods. + + * @safe_characters -> (character set) + * @client_key_length -> (min, max) + * @request_token_length -> (min, max) + * @access_token_length -> (min, max) + * @nonce_length -> (min, max) + * @verifier_length -> (min, max) + * @realms -> [list, of, realms] + + Methods used to validate/invalidate input parameters. These checks usually + hit either persistent or temporary storage such as databases or the + filesystem. See each methods documentation for detailed usage. + The following methods must be implemented: + + - validate_client_key + - validate_request_token + - validate_access_token + - validate_timestamp_and_nonce + - validate_redirect_uri + - validate_requested_realms + - validate_realms + - validate_verifier + - invalidate_request_token + + Methods used to retrieve sensitive information from storage. + The following methods must be implemented: + + - get_client_secret + - get_request_token_secret + - get_access_token_secret + - get_rsa_key + - get_realms + - get_default_realms + - get_redirect_uri + + Methods used to save credentials. + The following methods must be implemented: + + - save_request_token + - save_verifier + - save_access_token + + Methods used to verify input parameters. This methods are used during + authorizing request token by user (AuthorizationEndpoint), to check if + parameters are valid. During token authorization request is not signed, + thus 'validation' methods can not be used. The following methods must be + implemented: + + - verify_realms + - verify_request_token + + To prevent timing attacks it is necessary to not exit early even if the + client key or resource owner key is invalid. Instead dummy values should + be used during the remaining verification process. It is very important + that the dummy client and token are valid input parameters to the methods + get_client_secret, get_rsa_key and get_(access/request)_token_secret and + that the running time of those methods when given a dummy value remain + equivalent to the running time when given a valid client/resource owner. + The following properties must be implemented: + + * @dummy_client + * @dummy_request_token + * @dummy_access_token + + Example implementations have been provided, note that the database used is + a simple dictionary and serves only an illustrative purpose. Use whichever + database suits your project and how to access it is entirely up to you. + The methods are introduced in an order which should make understanding + their use more straightforward and as such it could be worth reading what + follows in chronological order. + + .. _`whitelisting or blacklisting`: https://www.schneier.com/blog/archives/2011/01/whitelisting_vs.html + """ + + def __init__(self): + pass + + @property + def allowed_signature_methods(self): + return SIGNATURE_METHODS + + @property + def safe_characters(self): + return set(utils.UNICODE_ASCII_CHARACTER_SET) + + @property + def client_key_length(self): + return 20, 30 + + @property + def request_token_length(self): + return 20, 30 + + @property + def access_token_length(self): + return 20, 30 + + @property + def timestamp_lifetime(self): + return 600 + + @property + def nonce_length(self): + return 20, 30 + + @property + def verifier_length(self): + return 20, 30 + + @property + def realms(self): + return [] + + @property + def enforce_ssl(self): + return True + + def check_client_key(self, client_key): + """Check that the client key only contains safe characters + and is no shorter than lower and no longer than upper. + """ + lower, upper = self.client_key_length + return (set(client_key) <= self.safe_characters and + lower <= len(client_key) <= upper) + + def check_request_token(self, request_token): + """Checks that the request token contains only safe characters + and is no shorter than lower and no longer than upper. + """ + lower, upper = self.request_token_length + return (set(request_token) <= self.safe_characters and + lower <= len(request_token) <= upper) + + def check_access_token(self, request_token): + """Checks that the token contains only safe characters + and is no shorter than lower and no longer than upper. + """ + lower, upper = self.access_token_length + return (set(request_token) <= self.safe_characters and + lower <= len(request_token) <= upper) + + def check_nonce(self, nonce): + """Checks that the nonce only contains only safe characters + and is no shorter than lower and no longer than upper. + """ + lower, upper = self.nonce_length + return (set(nonce) <= self.safe_characters and + lower <= len(nonce) <= upper) + + def check_verifier(self, verifier): + """Checks that the verifier contains only safe characters + and is no shorter than lower and no longer than upper. + """ + lower, upper = self.verifier_length + return (set(verifier) <= self.safe_characters and + lower <= len(verifier) <= upper) + + def check_realms(self, realms): + """Check that the realm is one of a set allowed realms.""" + return all(r in self.realms for r in realms) + + def _subclass_must_implement(self, fn): + """ + Returns a NotImplementedError for a function that should be implemented. + :param fn: name of the function + """ + m = "Missing function implementation in {}: {}".format(type(self), fn) + return NotImplementedError(m) + + @property + def dummy_client(self): + """Dummy client used when an invalid client key is supplied. + + :returns: The dummy client key string. + + The dummy client should be associated with either a client secret, + a rsa key or both depending on which signature methods are supported. + Providers should make sure that + + get_client_secret(dummy_client) + get_rsa_key(dummy_client) + + return a valid secret or key for the dummy client. + + This method is used by + + * AccessTokenEndpoint + * RequestTokenEndpoint + * ResourceEndpoint + * SignatureOnlyEndpoint + """ + raise self._subclass_must_implement("dummy_client") + + @property + def dummy_request_token(self): + """Dummy request token used when an invalid token was supplied. + + :returns: The dummy request token string. + + The dummy request token should be associated with a request token + secret such that get_request_token_secret(.., dummy_request_token) + returns a valid secret. + + This method is used by + + * AccessTokenEndpoint + """ + raise self._subclass_must_implement("dummy_request_token") + + @property + def dummy_access_token(self): + """Dummy access token used when an invalid token was supplied. + + :returns: The dummy access token string. + + The dummy access token should be associated with an access token + secret such that get_access_token_secret(.., dummy_access_token) + returns a valid secret. + + This method is used by + + * ResourceEndpoint + """ + raise self._subclass_must_implement("dummy_access_token") + + def get_client_secret(self, client_key, request): + """Retrieves the client secret associated with the client key. + + :param client_key: The client/consumer key. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: The client secret as a string. + + This method must allow the use of a dummy client_key value. + Fetching the secret using the dummy key must take the same amount of + time as fetching a secret for a valid client:: + + # Unlikely to be near constant time as it uses two database + # lookups for a valid client, and only one for an invalid. + from your_datastore import ClientSecret + if ClientSecret.has(client_key): + return ClientSecret.get(client_key) + else: + return 'dummy' + + # Aim to mimic number of latency inducing operations no matter + # whether the client is valid or not. + from your_datastore import ClientSecret + return ClientSecret.get(client_key, 'dummy') + + Note that the returned key must be in plaintext. + + This method is used by + + * AccessTokenEndpoint + * RequestTokenEndpoint + * ResourceEndpoint + * SignatureOnlyEndpoint + """ + raise self._subclass_must_implement('get_client_secret') + + def get_request_token_secret(self, client_key, token, request): + """Retrieves the shared secret associated with the request token. + + :param client_key: The client/consumer key. + :param token: The request token string. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: The token secret as a string. + + This method must allow the use of a dummy values and the running time + must be roughly equivalent to that of the running time of valid values:: + + # Unlikely to be near constant time as it uses two database + # lookups for a valid client, and only one for an invalid. + from your_datastore import RequestTokenSecret + if RequestTokenSecret.has(client_key): + return RequestTokenSecret.get((client_key, request_token)) + else: + return 'dummy' + + # Aim to mimic number of latency inducing operations no matter + # whether the client is valid or not. + from your_datastore import RequestTokenSecret + return ClientSecret.get((client_key, request_token), 'dummy') + + Note that the returned key must be in plaintext. + + This method is used by + + * AccessTokenEndpoint + """ + raise self._subclass_must_implement('get_request_token_secret') + + def get_access_token_secret(self, client_key, token, request): + """Retrieves the shared secret associated with the access token. + + :param client_key: The client/consumer key. + :param token: The access token string. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: The token secret as a string. + + This method must allow the use of a dummy values and the running time + must be roughly equivalent to that of the running time of valid values:: + + # Unlikely to be near constant time as it uses two database + # lookups for a valid client, and only one for an invalid. + from your_datastore import AccessTokenSecret + if AccessTokenSecret.has(client_key): + return AccessTokenSecret.get((client_key, request_token)) + else: + return 'dummy' + + # Aim to mimic number of latency inducing operations no matter + # whether the client is valid or not. + from your_datastore import AccessTokenSecret + return ClientSecret.get((client_key, request_token), 'dummy') + + Note that the returned key must be in plaintext. + + This method is used by + + * ResourceEndpoint + """ + raise self._subclass_must_implement("get_access_token_secret") + + def get_default_realms(self, client_key, request): + """Get the default realms for a client. + + :param client_key: The client/consumer key. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: The list of default realms associated with the client. + + The list of default realms will be set during client registration and + is outside the scope of OAuthLib. + + This method is used by + + * RequestTokenEndpoint + """ + raise self._subclass_must_implement("get_default_realms") + + def get_realms(self, token, request): + """Get realms associated with a request token. + + :param token: The request token string. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: The list of realms associated with the request token. + + This method is used by + + * AuthorizationEndpoint + * AccessTokenEndpoint + """ + raise self._subclass_must_implement("get_realms") + + def get_redirect_uri(self, token, request): + """Get the redirect URI associated with a request token. + + :param token: The request token string. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: The redirect URI associated with the request token. + + It may be desirable to return a custom URI if the redirect is set to "oob". + In this case, the user will be redirected to the returned URI and at that + endpoint the verifier can be displayed. + + This method is used by + + * AuthorizationEndpoint + """ + raise self._subclass_must_implement("get_redirect_uri") + + def get_rsa_key(self, client_key, request): + """Retrieves a previously stored client provided RSA key. + + :param client_key: The client/consumer key. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: The rsa public key as a string. + + This method must allow the use of a dummy client_key value. Fetching + the rsa key using the dummy key must take the same amount of time + as fetching a key for a valid client. The dummy key must also be of + the same bit length as client keys. + + Note that the key must be returned in plaintext. + + This method is used by + + * AccessTokenEndpoint + * RequestTokenEndpoint + * ResourceEndpoint + * SignatureOnlyEndpoint + """ + raise self._subclass_must_implement("get_rsa_key") + + def invalidate_request_token(self, client_key, request_token, request): + """Invalidates a used request token. + + :param client_key: The client/consumer key. + :param request_token: The request token string. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: None + + Per `Section 2.3`__ of the spec: + + "The server MUST (...) ensure that the temporary + credentials have not expired or been used before." + + .. _`Section 2.3`: https://tools.ietf.org/html/rfc5849#section-2.3 + + This method should ensure that provided token won't validate anymore. + It can be simply removing RequestToken from storage or setting + specific flag that makes it invalid (note that such flag should be + also validated during request token validation). + + This method is used by + + * AccessTokenEndpoint + """ + raise self._subclass_must_implement("invalidate_request_token") + + def validate_client_key(self, client_key, request): + """Validates that supplied client key is a registered and valid client. + + :param client_key: The client/consumer key. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: True or False + + Note that if the dummy client is supplied it should validate in same + or nearly the same amount of time as a valid one. + + Ensure latency inducing tasks are mimiced even for dummy clients. + For example, use:: + + from your_datastore import Client + try: + return Client.exists(client_key, access_token) + except DoesNotExist: + return False + + Rather than:: + + from your_datastore import Client + if access_token == self.dummy_access_token: + return False + else: + return Client.exists(client_key, access_token) + + This method is used by + + * AccessTokenEndpoint + * RequestTokenEndpoint + * ResourceEndpoint + * SignatureOnlyEndpoint + """ + raise self._subclass_must_implement("validate_client_key") + + def validate_request_token(self, client_key, token, request): + """Validates that supplied request token is registered and valid. + + :param client_key: The client/consumer key. + :param token: The request token string. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: True or False + + Note that if the dummy request_token is supplied it should validate in + the same nearly the same amount of time as a valid one. + + Ensure latency inducing tasks are mimiced even for dummy clients. + For example, use:: + + from your_datastore import RequestToken + try: + return RequestToken.exists(client_key, access_token) + except DoesNotExist: + return False + + Rather than:: + + from your_datastore import RequestToken + if access_token == self.dummy_access_token: + return False + else: + return RequestToken.exists(client_key, access_token) + + This method is used by + + * AccessTokenEndpoint + """ + raise self._subclass_must_implement("validate_request_token") + + def validate_access_token(self, client_key, token, request): + """Validates that supplied access token is registered and valid. + + :param client_key: The client/consumer key. + :param token: The access token string. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: True or False + + Note that if the dummy access token is supplied it should validate in + the same or nearly the same amount of time as a valid one. + + Ensure latency inducing tasks are mimiced even for dummy clients. + For example, use:: + + from your_datastore import AccessToken + try: + return AccessToken.exists(client_key, access_token) + except DoesNotExist: + return False + + Rather than:: + + from your_datastore import AccessToken + if access_token == self.dummy_access_token: + return False + else: + return AccessToken.exists(client_key, access_token) + + This method is used by + + * ResourceEndpoint + """ + raise self._subclass_must_implement("validate_access_token") + + def validate_timestamp_and_nonce(self, client_key, timestamp, nonce, + request, request_token=None, access_token=None): + """Validates that the nonce has not been used before. + + :param client_key: The client/consumer key. + :param timestamp: The ``oauth_timestamp`` parameter. + :param nonce: The ``oauth_nonce`` parameter. + :param request_token: Request token string, if any. + :param access_token: Access token string, if any. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: True or False + + Per `Section 3.3`_ of the spec. + + "A nonce is a random string, uniquely generated by the client to allow + the server to verify that a request has never been made before and + helps prevent replay attacks when requests are made over a non-secure + channel. The nonce value MUST be unique across all requests with the + same timestamp, client credentials, and token combinations." + + .. _`Section 3.3`: https://tools.ietf.org/html/rfc5849#section-3.3 + + One of the first validation checks that will be made is for the validity + of the nonce and timestamp, which are associated with a client key and + possibly a token. If invalid then immediately fail the request + by returning False. If the nonce/timestamp pair has been used before and + you may just have detected a replay attack. Therefore it is an essential + part of OAuth security that you not allow nonce/timestamp reuse. + Note that this validation check is done before checking the validity of + the client and token.:: + + nonces_and_timestamps_database = [ + (u'foo', 1234567890, u'rannoMstrInghere', u'bar') + ] + + def validate_timestamp_and_nonce(self, client_key, timestamp, nonce, + request_token=None, access_token=None): + + return ((client_key, timestamp, nonce, request_token or access_token) + not in self.nonces_and_timestamps_database) + + This method is used by + + * AccessTokenEndpoint + * RequestTokenEndpoint + * ResourceEndpoint + * SignatureOnlyEndpoint + """ + raise self._subclass_must_implement("validate_timestamp_and_nonce") + + def validate_redirect_uri(self, client_key, redirect_uri, request): + """Validates the client supplied redirection URI. + + :param client_key: The client/consumer key. + :param redirect_uri: The URI the client which to redirect back to after + authorization is successful. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: True or False + + It is highly recommended that OAuth providers require their clients + to register all redirection URIs prior to using them in requests and + register them as absolute URIs. See `CWE-601`_ for more information + about open redirection attacks. + + By requiring registration of all redirection URIs it should be + straightforward for the provider to verify whether the supplied + redirect_uri is valid or not. + + Alternatively per `Section 2.1`_ of the spec: + + "If the client is unable to receive callbacks or a callback URI has + been established via other means, the parameter value MUST be set to + "oob" (case sensitive), to indicate an out-of-band configuration." + + .. _`CWE-601`: http://cwe.mitre.org/top25/index.html#CWE-601 + .. _`Section 2.1`: https://tools.ietf.org/html/rfc5849#section-2.1 + + This method is used by + + * RequestTokenEndpoint + """ + raise self._subclass_must_implement("validate_redirect_uri") + + def validate_requested_realms(self, client_key, realms, request): + """Validates that the client may request access to the realm. + + :param client_key: The client/consumer key. + :param realms: The list of realms that client is requesting access to. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: True or False + + This method is invoked when obtaining a request token and should + tie a realm to the request token and after user authorization + this realm restriction should transfer to the access token. + + This method is used by + + * RequestTokenEndpoint + """ + raise self._subclass_must_implement("validate_requested_realms") + + def validate_realms(self, client_key, token, request, uri=None, + realms=None): + """Validates access to the request realm. + + :param client_key: The client/consumer key. + :param token: A request token string. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param uri: The URI the realms is protecting. + :param realms: A list of realms that must have been granted to + the access token. + :returns: True or False + + How providers choose to use the realm parameter is outside the OAuth + specification but it is commonly used to restrict access to a subset + of protected resources such as "photos". + + realms is a convenience parameter which can be used to provide + a per view method pre-defined list of allowed realms. + + Can be as simple as:: + + from your_datastore import RequestToken + request_token = RequestToken.get(token, None) + + if not request_token: + return False + return set(request_token.realms).issuperset(set(realms)) + + This method is used by + + * ResourceEndpoint + """ + raise self._subclass_must_implement("validate_realms") + + def validate_verifier(self, client_key, token, verifier, request): + """Validates a verification code. + + :param client_key: The client/consumer key. + :param token: A request token string. + :param verifier: The authorization verifier string. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: True or False + + OAuth providers issue a verification code to clients after the + resource owner authorizes access. This code is used by the client to + obtain token credentials and the provider must verify that the + verifier is valid and associated with the client as well as the + resource owner. + + Verifier validation should be done in near constant time + (to avoid verifier enumeration). To achieve this we need a + constant time string comparison which is provided by OAuthLib + in ``oauthlib.common.safe_string_equals``:: + + from your_datastore import Verifier + correct_verifier = Verifier.get(client_key, request_token) + from oauthlib.common import safe_string_equals + return safe_string_equals(verifier, correct_verifier) + + This method is used by + + * AccessTokenEndpoint + """ + raise self._subclass_must_implement("validate_verifier") + + def verify_request_token(self, token, request): + """Verify that the given OAuth1 request token is valid. + + :param token: A request token string. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: True or False + + This method is used only in AuthorizationEndpoint to check whether the + oauth_token given in the authorization URL is valid or not. + This request is not signed and thus similar ``validate_request_token`` + method can not be used. + + This method is used by + + * AuthorizationEndpoint + """ + raise self._subclass_must_implement("verify_request_token") + + def verify_realms(self, token, realms, request): + """Verify authorized realms to see if they match those given to token. + + :param token: An access token string. + :param realms: A list of realms the client attempts to access. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :returns: True or False + + This prevents the list of authorized realms sent by the client during + the authorization step to be altered to include realms outside what + was bound with the request token. + + Can be as simple as:: + + valid_realms = self.get_realms(token) + return all((r in valid_realms for r in realms)) + + This method is used by + + * AuthorizationEndpoint + """ + raise self._subclass_must_implement("verify_realms") + + def save_access_token(self, token, request): + """Save an OAuth1 access token. + + :param token: A dict with token credentials. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + + The token dictionary will at minimum include + + * ``oauth_token`` the access token string. + * ``oauth_token_secret`` the token specific secret used in signing. + * ``oauth_authorized_realms`` a space separated list of realms. + + Client key can be obtained from ``request.client_key``. + + The list of realms (not joined string) can be obtained from + ``request.realm``. + + This method is used by + + * AccessTokenEndpoint + """ + raise self._subclass_must_implement("save_access_token") + + def save_request_token(self, token, request): + """Save an OAuth1 request token. + + :param token: A dict with token credentials. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + + The token dictionary will at minimum include + + * ``oauth_token`` the request token string. + * ``oauth_token_secret`` the token specific secret used in signing. + * ``oauth_callback_confirmed`` the string ``true``. + + Client key can be obtained from ``request.client_key``. + + This method is used by + + * RequestTokenEndpoint + """ + raise self._subclass_must_implement("save_request_token") + + def save_verifier(self, token, verifier, request): + """Associate an authorization verifier with a request token. + + :param token: A request token string. + :param verifier A dictionary containing the oauth_verifier and + oauth_token + :param request: OAuthlib request. + :type request: oauthlib.common.Request + + We need to associate verifiers with tokens for validation during the + access token request. + + Note that unlike save_x_token token here is the ``oauth_token`` token + string from the request token saved previously. + + This method is used by + + * AuthorizationEndpoint + """ + raise self._subclass_must_implement("save_verifier") diff --git a/lib/oauthlib/oauth1/rfc5849/signature.py b/lib/oauthlib/oauth1/rfc5849/signature.py new file mode 100644 index 00000000..a370ccd6 --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/signature.py @@ -0,0 +1,843 @@ +""" +This module is an implementation of `section 3.4`_ of RFC 5849. + +**Usage** + +Steps for signing a request: + +1. Collect parameters from the request using ``collect_parameters``. +2. Normalize those parameters using ``normalize_parameters``. +3. Create the *base string URI* using ``base_string_uri``. +4. Create the *signature base string* from the above three components + using ``signature_base_string``. +5. Pass the *signature base string* and the client credentials to one of the + sign-with-client functions. The HMAC-based signing functions needs + client credentials with secrets. The RSA-based signing functions needs + client credentials with an RSA private key. + +To verify a request, pass the request and credentials to one of the verify +functions. The HMAC-based signing functions needs the shared secrets. The +RSA-based verify functions needs the RSA public key. + +**Scope** + +All of the functions in this module should be considered internal to OAuthLib, +since they are not imported into the "oauthlib.oauth1" module. Programs using +OAuthLib should not use directly invoke any of the functions in this module. + +**Deprecated functions** + +The "sign_" methods that are not "_with_client" have been deprecated. They may +be removed in a future release. Since they are all internal functions, this +should have no impact on properly behaving programs. + +.. _`section 3.4`: https://tools.ietf.org/html/rfc5849#section-3.4 +""" + +import binascii +import hashlib +import hmac +import logging +import warnings + +from oauthlib.common import extract_params, safe_string_equals, urldecode +import urllib.parse as urlparse + +from . import utils + + +log = logging.getLogger(__name__) + + +# ==== Common functions ========================================== + +def signature_base_string( + http_method: str, + base_str_uri: str, + normalized_encoded_request_parameters: str) -> str: + """ + Construct the signature base string. + + The *signature base string* is the value that is calculated and signed by + the client. It is also independently calculated by the server to verify + the signature, and therefore must produce the exact same value at both + ends or the signature won't verify. + + The rules for calculating the *signature base string* are defined in + section 3.4.1.1`_ of RFC 5849. + + .. _`section 3.4.1.1`: https://tools.ietf.org/html/rfc5849#section-3.4.1.1 + """ + + # The signature base string is constructed by concatenating together, + # in order, the following HTTP request elements: + + # 1. The HTTP request method in uppercase. For example: "HEAD", + # "GET", "POST", etc. If the request uses a custom HTTP method, it + # MUST be encoded (`Section 3.6`_). + # + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 + base_string = utils.escape(http_method.upper()) + + # 2. An "&" character (ASCII code 38). + base_string += '&' + + # 3. The base string URI from `Section 3.4.1.2`_, after being encoded + # (`Section 3.6`_). + # + # .. _`Section 3.4.1.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.2 + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 + base_string += utils.escape(base_str_uri) + + # 4. An "&" character (ASCII code 38). + base_string += '&' + + # 5. The request parameters as normalized in `Section 3.4.1.3.2`_, after + # being encoded (`Section 3.6`). + # + # .. _`Sec 3.4.1.3.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3.2 + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 + base_string += utils.escape(normalized_encoded_request_parameters) + + return base_string + + +def base_string_uri(uri: str, host: str = None) -> str: + """ + Calculates the _base string URI_. + + The *base string URI* is one of the components that make up the + *signature base string*. + + The ``host`` is optional. If provided, it is used to override any host and + port values in the ``uri``. The value for ``host`` is usually extracted from + the "Host" request header from the HTTP request. Its value may be just the + hostname, or the hostname followed by a colon and a TCP/IP port number + (hostname:port). If a value for the``host`` is provided but it does not + contain a port number, the default port number is used (i.e. if the ``uri`` + contained a port number, it will be discarded). + + The rules for calculating the *base string URI* are defined in + section 3.4.1.2`_ of RFC 5849. + + .. _`section 3.4.1.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.2 + + :param uri: URI + :param host: hostname with optional port number, separated by a colon + :return: base string URI + """ + + if not isinstance(uri, str): + raise ValueError('uri must be a string.') + + # FIXME: urlparse does not support unicode + scheme, netloc, path, params, query, fragment = urlparse.urlparse(uri) + + # The scheme, authority, and path of the request resource URI `RFC3986` + # are included by constructing an "http" or "https" URI representing + # the request resource (without the query or fragment) as follows: + # + # .. _`RFC3986`: https://tools.ietf.org/html/rfc3986 + + if not scheme: + raise ValueError('missing scheme') + + # Per `RFC 2616 section 5.1.2`_: + # + # Note that the absolute path cannot be empty; if none is present in + # the original URI, it MUST be given as "/" (the server root). + # + # .. _`RFC 2616 5.1.2`: https://tools.ietf.org/html/rfc2616#section-5.1.2 + if not path: + path = '/' + + # 1. The scheme and host MUST be in lowercase. + scheme = scheme.lower() + netloc = netloc.lower() + # Note: if ``host`` is used, it will be converted to lowercase below + + # 2. The host and port values MUST match the content of the HTTP + # request "Host" header field. + if host is not None: + netloc = host.lower() # override value in uri with provided host + + # 3. The port MUST be included if it is not the default port for the + # scheme, and MUST be excluded if it is the default. Specifically, + # the port MUST be excluded when making an HTTP request `RFC2616`_ + # to port 80 or when making an HTTPS request `RFC2818`_ to port 443. + # All other non-default port numbers MUST be included. + # + # .. _`RFC2616`: https://tools.ietf.org/html/rfc2616 + # .. _`RFC2818`: https://tools.ietf.org/html/rfc2818 + + if ':' in netloc: + # Contains a colon ":", so try to parse as "host:port" + + hostname, port_str = netloc.split(':', 1) + + if len(hostname) == 0: + raise ValueError('missing host') # error: netloc was ":port" or ":" + + if len(port_str) == 0: + netloc = hostname # was "host:", so just use the host part + else: + try: + port_num = int(port_str) # try to parse into an integer number + except ValueError: + raise ValueError('port is not an integer') + + if port_num <= 0 or 65535 < port_num: + raise ValueError('port out of range') # 16-bit unsigned ints + if (scheme, port_num) in (('http', 80), ('https', 443)): + netloc = hostname # default port for scheme: exclude port num + else: + netloc = hostname + ':' + str(port_num) # use hostname:port + else: + # Does not contain a colon, so entire value must be the hostname + + if len(netloc) == 0: + raise ValueError('missing host') # error: netloc was empty string + + v = urlparse.urlunparse((scheme, netloc, path, params, '', '')) + + # RFC 5849 does not specify which characters are encoded in the + # "base string URI", nor how they are encoded - which is very bad, since + # the signatures won't match if there are any differences. Fortunately, + # most URIs only use characters that are clearly not encoded (e.g. digits + # and A-Z, a-z), so have avoided any differences between implementations. + # + # The example from its section 3.4.1.2 illustrates that spaces in + # the path are percent encoded. But it provides no guidance as to what other + # characters (if any) must be encoded (nor how); nor if characters in the + # other components are to be encoded or not. + # + # This implementation **assumes** that **only** the space is percent-encoded + # and it is done to the entire value (not just to spaces in the path). + # + # This code may need to be changed if it is discovered that other characters + # are expected to be encoded. + # + # Note: the "base string URI" returned by this function will be encoded + # again before being concatenated into the "signature base string". So any + # spaces in the URI will actually appear in the "signature base string" + # as "%2520" (the "%20" further encoded according to section 3.6). + + return v.replace(' ', '%20') + + +def collect_parameters(uri_query='', body=None, headers=None, + exclude_oauth_signature=True, with_realm=False): + """ + Gather the request parameters from all the parameter sources. + + This function is used to extract all the parameters, which are then passed + to ``normalize_parameters`` to produce one of the components that make up + the *signature base string*. + + Parameters starting with `oauth_` will be unescaped. + + Body parameters must be supplied as a dict, a list of 2-tuples, or a + form encoded query string. + + Headers must be supplied as a dict. + + The rules where the parameters must be sourced from are defined in + `section 3.4.1.3.1`_ of RFC 5849. + + .. _`Sec 3.4.1.3.1`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3.1 + """ + if body is None: + body = [] + headers = headers or {} + params = [] + + # The parameters from the following sources are collected into a single + # list of name/value pairs: + + # * The query component of the HTTP request URI as defined by + # `RFC3986, Section 3.4`_. The query component is parsed into a list + # of name/value pairs by treating it as an + # "application/x-www-form-urlencoded" string, separating the names + # and values and decoding them as defined by W3C.REC-html40-19980424 + # `W3C-HTML-4.0`_, Section 17.13.4. + # + # .. _`RFC3986, Sec 3.4`: https://tools.ietf.org/html/rfc3986#section-3.4 + # .. _`W3C-HTML-4.0`: https://www.w3.org/TR/1998/REC-html40-19980424/ + if uri_query: + params.extend(urldecode(uri_query)) + + # * The OAuth HTTP "Authorization" header field (`Section 3.5.1`_) if + # present. The header's content is parsed into a list of name/value + # pairs excluding the "realm" parameter if present. The parameter + # values are decoded as defined by `Section 3.5.1`_. + # + # .. _`Section 3.5.1`: https://tools.ietf.org/html/rfc5849#section-3.5.1 + if headers: + headers_lower = {k.lower(): v for k, v in headers.items()} + authorization_header = headers_lower.get('authorization') + if authorization_header is not None: + params.extend([i for i in utils.parse_authorization_header( + authorization_header) if with_realm or i[0] != 'realm']) + + # * The HTTP request entity-body, but only if all of the following + # conditions are met: + # * The entity-body is single-part. + # + # * The entity-body follows the encoding requirements of the + # "application/x-www-form-urlencoded" content-type as defined by + # W3C.REC-html40-19980424 `W3C-HTML-4.0`_. + + # * The HTTP request entity-header includes the "Content-Type" + # header field set to "application/x-www-form-urlencoded". + # + # .. _`W3C-HTML-4.0`: https://www.w3.org/TR/1998/REC-html40-19980424/ + + # TODO: enforce header param inclusion conditions + bodyparams = extract_params(body) or [] + params.extend(bodyparams) + + # ensure all oauth params are unescaped + unescaped_params = [] + for k, v in params: + if k.startswith('oauth_'): + v = utils.unescape(v) + unescaped_params.append((k, v)) + + # The "oauth_signature" parameter MUST be excluded from the signature + # base string if present. + if exclude_oauth_signature: + unescaped_params = list(filter(lambda i: i[0] != 'oauth_signature', + unescaped_params)) + + return unescaped_params + + +def normalize_parameters(params) -> str: + """ + Calculate the normalized request parameters. + + The *normalized request parameters* is one of the components that make up + the *signature base string*. + + The rules for parameter normalization are defined in `section 3.4.1.3.2`_ of + RFC 5849. + + .. _`Sec 3.4.1.3.2`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3.2 + """ + + # The parameters collected in `Section 3.4.1.3`_ are normalized into a + # single string as follows: + # + # .. _`Section 3.4.1.3`: https://tools.ietf.org/html/rfc5849#section-3.4.1.3 + + # 1. First, the name and value of each parameter are encoded + # (`Section 3.6`_). + # + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 + key_values = [(utils.escape(k), utils.escape(v)) for k, v in params] + + # 2. The parameters are sorted by name, using ascending byte value + # ordering. If two or more parameters share the same name, they + # are sorted by their value. + key_values.sort() + + # 3. The name of each parameter is concatenated to its corresponding + # value using an "=" character (ASCII code 61) as a separator, even + # if the value is empty. + parameter_parts = ['{}={}'.format(k, v) for k, v in key_values] + + # 4. The sorted name/value pairs are concatenated together into a + # single string by using an "&" character (ASCII code 38) as + # separator. + return '&'.join(parameter_parts) + + +# ==== Common functions for HMAC-based signature methods ========= + +def _sign_hmac(hash_algorithm_name: str, + sig_base_str: str, + client_secret: str, + resource_owner_secret: str): + """ + **HMAC-SHA256** + + The "HMAC-SHA256" signature method uses the HMAC-SHA256 signature + algorithm as defined in `RFC4634`_:: + + digest = HMAC-SHA256 (key, text) + + Per `section 3.4.2`_ of the spec. + + .. _`RFC4634`: https://tools.ietf.org/html/rfc4634 + .. _`section 3.4.2`: https://tools.ietf.org/html/rfc5849#section-3.4.2 + """ + + # The HMAC-SHA256 function variables are used in following way: + + # text is set to the value of the signature base string from + # `Section 3.4.1.1`_. + # + # .. _`Section 3.4.1.1`: https://tools.ietf.org/html/rfc5849#section-3.4.1.1 + text = sig_base_str + + # key is set to the concatenated values of: + # 1. The client shared-secret, after being encoded (`Section 3.6`_). + # + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 + key = utils.escape(client_secret or '') + + # 2. An "&" character (ASCII code 38), which MUST be included + # even when either secret is empty. + key += '&' + + # 3. The token shared-secret, after being encoded (`Section 3.6`_). + # + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 + key += utils.escape(resource_owner_secret or '') + + # Get the hashing algorithm to use + + m = { + 'SHA-1': hashlib.sha1, + 'SHA-256': hashlib.sha256, + 'SHA-512': hashlib.sha512, + } + hash_alg = m[hash_algorithm_name] + + # Calculate the signature + + # FIXME: HMAC does not support unicode! + key_utf8 = key.encode('utf-8') + text_utf8 = text.encode('utf-8') + signature = hmac.new(key_utf8, text_utf8, hash_alg) + + # digest is used to set the value of the "oauth_signature" protocol + # parameter, after the result octet string is base64-encoded + # per `RFC2045, Section 6.8`. + # + # .. _`RFC2045, Sec 6.8`: https://tools.ietf.org/html/rfc2045#section-6.8 + return binascii.b2a_base64(signature.digest())[:-1].decode('utf-8') + + +def _verify_hmac(hash_algorithm_name: str, + request, + client_secret=None, + resource_owner_secret=None): + """Verify a HMAC-SHA1 signature. + + Per `section 3.4`_ of the spec. + + .. _`section 3.4`: https://tools.ietf.org/html/rfc5849#section-3.4 + + To satisfy `RFC2616 section 5.2`_ item 1, the request argument's uri + attribute MUST be an absolute URI whose netloc part identifies the + origin server or gateway on which the resource resides. Any Host + item of the request argument's headers dict attribute will be + ignored. + + .. _`RFC2616 section 5.2`: https://tools.ietf.org/html/rfc2616#section-5.2 + + """ + norm_params = normalize_parameters(request.params) + bs_uri = base_string_uri(request.uri) + sig_base_str = signature_base_string(request.http_method, bs_uri, + norm_params) + signature = _sign_hmac(hash_algorithm_name, sig_base_str, + client_secret, resource_owner_secret) + match = safe_string_equals(signature, request.signature) + if not match: + log.debug('Verify HMAC failed: signature base string: %s', sig_base_str) + return match + + +# ==== HMAC-SHA1 ================================================= + +def sign_hmac_sha1_with_client(sig_base_str, client): + return _sign_hmac('SHA-1', sig_base_str, + client.client_secret, client.resource_owner_secret) + + +def verify_hmac_sha1(request, client_secret=None, resource_owner_secret=None): + return _verify_hmac('SHA-1', request, client_secret, resource_owner_secret) + + +def sign_hmac_sha1(base_string, client_secret, resource_owner_secret): + """ + Deprecated function for calculating a HMAC-SHA1 signature. + + This function has been replaced by invoking ``sign_hmac`` with "SHA-1" + as the hash algorithm name. + + This function was invoked by sign_hmac_sha1_with_client and + test_signatures.py, but does any application invoke it directly? If not, + it can be removed. + """ + warnings.warn('use sign_hmac_sha1_with_client instead of sign_hmac_sha1', + DeprecationWarning) + + # For some unknown reason, the original implementation assumed base_string + # could either be bytes or str. The signature base string calculating + # function always returned a str, so the new ``sign_rsa`` only expects that. + + base_string = base_string.decode('ascii') \ + if isinstance(base_string, bytes) else base_string + + return _sign_hmac('SHA-1', base_string, + client_secret, resource_owner_secret) + + +# ==== HMAC-SHA256 =============================================== + +def sign_hmac_sha256_with_client(sig_base_str, client): + return _sign_hmac('SHA-256', sig_base_str, + client.client_secret, client.resource_owner_secret) + + +def verify_hmac_sha256(request, client_secret=None, resource_owner_secret=None): + return _verify_hmac('SHA-256', request, + client_secret, resource_owner_secret) + + +def sign_hmac_sha256(base_string, client_secret, resource_owner_secret): + """ + Deprecated function for calculating a HMAC-SHA256 signature. + + This function has been replaced by invoking ``sign_hmac`` with "SHA-256" + as the hash algorithm name. + + This function was invoked by sign_hmac_sha256_with_client and + test_signatures.py, but does any application invoke it directly? If not, + it can be removed. + """ + warnings.warn( + 'use sign_hmac_sha256_with_client instead of sign_hmac_sha256', + DeprecationWarning) + + # For some unknown reason, the original implementation assumed base_string + # could either be bytes or str. The signature base string calculating + # function always returned a str, so the new ``sign_rsa`` only expects that. + + base_string = base_string.decode('ascii') \ + if isinstance(base_string, bytes) else base_string + + return _sign_hmac('SHA-256', base_string, + client_secret, resource_owner_secret) + + +# ==== HMAC-SHA512 =============================================== + +def sign_hmac_sha512_with_client(sig_base_str: str, + client): + return _sign_hmac('SHA-512', sig_base_str, + client.client_secret, client.resource_owner_secret) + + +def verify_hmac_sha512(request, + client_secret: str = None, + resource_owner_secret: str = None): + return _verify_hmac('SHA-512', request, + client_secret, resource_owner_secret) + + +# ==== Common functions for RSA-based signature methods ========== + +_jwt_rsa = {} # cache of RSA-hash implementations from PyJWT jwt.algorithms + + +def _get_jwt_rsa_algorithm(hash_algorithm_name: str): + """ + Obtains an RSAAlgorithm object that implements RSA with the hash algorithm. + + This method maintains the ``_jwt_rsa`` cache. + + Returns a jwt.algorithm.RSAAlgorithm. + """ + if hash_algorithm_name in _jwt_rsa: + # Found in cache: return it + return _jwt_rsa[hash_algorithm_name] + else: + # Not in cache: instantiate a new RSAAlgorithm + + # PyJWT has some nice pycrypto/cryptography abstractions + import jwt.algorithms as jwt_algorithms + m = { + 'SHA-1': jwt_algorithms.hashes.SHA1, + 'SHA-256': jwt_algorithms.hashes.SHA256, + 'SHA-512': jwt_algorithms.hashes.SHA512, + } + v = jwt_algorithms.RSAAlgorithm(m[hash_algorithm_name]) + + _jwt_rsa[hash_algorithm_name] = v # populate cache + + return v + + +def _prepare_key_plus(alg, keystr): + """ + Prepare a PEM encoded key (public or private), by invoking the `prepare_key` + method on alg with the keystr. + + The keystr should be a string or bytes. If the keystr is bytes, it is + decoded as UTF-8 before being passed to prepare_key. Otherwise, it + is passed directly. + """ + if isinstance(keystr, bytes): + keystr = keystr.decode('utf-8') + return alg.prepare_key(keystr) + + +def _sign_rsa(hash_algorithm_name: str, + sig_base_str: str, + rsa_private_key: str): + """ + Calculate the signature for an RSA-based signature method. + + The ``alg`` is used to calculate the digest over the signature base string. + For the "RSA_SHA1" signature method, the alg must be SHA-1. While OAuth 1.0a + only defines the RSA-SHA1 signature method, this function can be used for + other non-standard signature methods that only differ from RSA-SHA1 by the + digest algorithm. + + Signing for the RSA-SHA1 signature method is defined in + `section 3.4.3`_ of RFC 5849. + + The RSASSA-PKCS1-v1_5 signature algorithm used defined by + `RFC3447, Section 8.2`_ (also known as PKCS#1), with the `alg` as the + hash function for EMSA-PKCS1-v1_5. To + use this method, the client MUST have established client credentials + with the server that included its RSA public key (in a manner that is + beyond the scope of this specification). + + .. _`section 3.4.3`: https://tools.ietf.org/html/rfc5849#section-3.4.3 + .. _`RFC3447, Section 8.2`: https://tools.ietf.org/html/rfc3447#section-8.2 + """ + + # Get the implementation of RSA-hash + + alg = _get_jwt_rsa_algorithm(hash_algorithm_name) + + # Check private key + + if not rsa_private_key: + raise ValueError('rsa_private_key required for RSA with ' + + alg.hash_alg.name + ' signature method') + + # Convert the "signature base string" into a sequence of bytes (M) + # + # The signature base string, by definition, only contain printable US-ASCII + # characters. So encoding it as 'ascii' will always work. It will raise a + # ``UnicodeError`` if it can't encode the value, which will never happen + # if the signature base string was created correctly. Therefore, using + # 'ascii' encoding provides an extra level of error checking. + + m = sig_base_str.encode('ascii') + + # Perform signing: S = RSASSA-PKCS1-V1_5-SIGN (K, M) + + key = _prepare_key_plus(alg, rsa_private_key) + s = alg.sign(m, key) + + # base64-encoded per RFC2045 section 6.8. + # + # 1. While b2a_base64 implements base64 defined by RFC 3548. As used here, + # it is the same as base64 defined by RFC 2045. + # 2. b2a_base64 includes a "\n" at the end of its result ([:-1] removes it) + # 3. b2a_base64 produces a binary string. Use decode to produce a str. + # It should only contain only printable US-ASCII characters. + + return binascii.b2a_base64(s)[:-1].decode('ascii') + + +def _verify_rsa(hash_algorithm_name: str, + request, + rsa_public_key: str): + """ + Verify a base64 encoded signature for a RSA-based signature method. + + The ``alg`` is used to calculate the digest over the signature base string. + For the "RSA_SHA1" signature method, the alg must be SHA-1. While OAuth 1.0a + only defines the RSA-SHA1 signature method, this function can be used for + other non-standard signature methods that only differ from RSA-SHA1 by the + digest algorithm. + + Verification for the RSA-SHA1 signature method is defined in + `section 3.4.3`_ of RFC 5849. + + .. _`section 3.4.3`: https://tools.ietf.org/html/rfc5849#section-3.4.3 + + To satisfy `RFC2616 section 5.2`_ item 1, the request argument's uri + attribute MUST be an absolute URI whose netloc part identifies the + origin server or gateway on which the resource resides. Any Host + item of the request argument's headers dict attribute will be + ignored. + + .. _`RFC2616 Sec 5.2`: https://tools.ietf.org/html/rfc2616#section-5.2 + """ + + try: + # Calculate the *signature base string* of the actual received request + + norm_params = normalize_parameters(request.params) + bs_uri = base_string_uri(request.uri) + sig_base_str = signature_base_string( + request.http_method, bs_uri, norm_params) + + # Obtain the signature that was received in the request + + sig = binascii.a2b_base64(request.signature.encode('ascii')) + + # Get the implementation of RSA-with-hash algorithm to use + + alg = _get_jwt_rsa_algorithm(hash_algorithm_name) + + # Verify the received signature was produced by the private key + # corresponding to the `rsa_public_key`, signing exact same + # *signature base string*. + # + # RSASSA-PKCS1-V1_5-VERIFY ((n, e), M, S) + + key = _prepare_key_plus(alg, rsa_public_key) + + # The signature base string only contain printable US-ASCII characters. + # The ``encode`` method with the default "strict" error handling will + # raise a ``UnicodeError`` if it can't encode the value. So using + # "ascii" will always work. + + verify_ok = alg.verify(sig_base_str.encode('ascii'), key, sig) + + if not verify_ok: + log.debug('Verify failed: RSA with ' + alg.hash_alg.name + + ': signature base string=%s' + sig_base_str) + return verify_ok + + except UnicodeError: + # A properly encoded signature will only contain printable US-ASCII + # characters. The ``encode`` method with the default "strict" error + # handling will raise a ``UnicodeError`` if it can't decode the value. + # So using "ascii" will work with all valid signatures. But an + # incorrectly or maliciously produced signature could contain other + # bytes. + # + # This implementation treats that situation as equivalent to the + # signature verification having failed. + # + # Note: simply changing the encode to use 'utf-8' will not remove this + # case, since an incorrect or malicious request can contain bytes which + # are invalid as UTF-8. + return False + + +# ==== RSA-SHA1 ================================================== + +def sign_rsa_sha1_with_client(sig_base_str, client): + # For some reason, this function originally accepts both str and bytes. + # This behaviour is preserved here. But won't be done for the newer + # sign_rsa_sha256_with_client and sign_rsa_sha512_with_client functions, + # which will only accept strings. The function to calculate a + # "signature base string" always produces a string, so it is not clear + # why support for bytes would ever be needed. + sig_base_str = sig_base_str.decode('ascii')\ + if isinstance(sig_base_str, bytes) else sig_base_str + + return _sign_rsa('SHA-1', sig_base_str, client.rsa_key) + + +def verify_rsa_sha1(request, rsa_public_key: str): + return _verify_rsa('SHA-1', request, rsa_public_key) + + +def sign_rsa_sha1(base_string, rsa_private_key): + """ + Deprecated function for calculating a RSA-SHA1 signature. + + This function has been replaced by invoking ``sign_rsa`` with "SHA-1" + as the hash algorithm name. + + This function was invoked by sign_rsa_sha1_with_client and + test_signatures.py, but does any application invoke it directly? If not, + it can be removed. + """ + warnings.warn('use _sign_rsa("SHA-1", ...) instead of sign_rsa_sha1', + DeprecationWarning) + + if isinstance(base_string, bytes): + base_string = base_string.decode('ascii') + + return _sign_rsa('SHA-1', base_string, rsa_private_key) + + +# ==== RSA-SHA256 ================================================ + +def sign_rsa_sha256_with_client(sig_base_str: str, client): + return _sign_rsa('SHA-256', sig_base_str, client.rsa_key) + + +def verify_rsa_sha256(request, rsa_public_key: str): + return _verify_rsa('SHA-256', request, rsa_public_key) + + +# ==== RSA-SHA512 ================================================ + +def sign_rsa_sha512_with_client(sig_base_str: str, client): + return _sign_rsa('SHA-512', sig_base_str, client.rsa_key) + + +def verify_rsa_sha512(request, rsa_public_key: str): + return _verify_rsa('SHA-512', request, rsa_public_key) + + +# ==== PLAINTEXT ================================================= + +def sign_plaintext_with_client(_signature_base_string, client): + # _signature_base_string is not used because the signature with PLAINTEXT + # is just the secret: it isn't a real signature. + return sign_plaintext(client.client_secret, client.resource_owner_secret) + + +def sign_plaintext(client_secret, resource_owner_secret): + """Sign a request using plaintext. + + Per `section 3.4.4`_ of the spec. + + The "PLAINTEXT" method does not employ a signature algorithm. It + MUST be used with a transport-layer mechanism such as TLS or SSL (or + sent over a secure channel with equivalent protections). It does not + utilize the signature base string or the "oauth_timestamp" and + "oauth_nonce" parameters. + + .. _`section 3.4.4`: https://tools.ietf.org/html/rfc5849#section-3.4.4 + + """ + + # The "oauth_signature" protocol parameter is set to the concatenated + # value of: + + # 1. The client shared-secret, after being encoded (`Section 3.6`_). + # + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 + signature = utils.escape(client_secret or '') + + # 2. An "&" character (ASCII code 38), which MUST be included even + # when either secret is empty. + signature += '&' + + # 3. The token shared-secret, after being encoded (`Section 3.6`_). + # + # .. _`Section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 + signature += utils.escape(resource_owner_secret or '') + + return signature + + +def verify_plaintext(request, client_secret=None, resource_owner_secret=None): + """Verify a PLAINTEXT signature. + + Per `section 3.4`_ of the spec. + + .. _`section 3.4`: https://tools.ietf.org/html/rfc5849#section-3.4 + """ + signature = sign_plaintext(client_secret, resource_owner_secret) + match = safe_string_equals(signature, request.signature) + if not match: + log.debug('Verify PLAINTEXT failed') + return match diff --git a/lib/oauthlib/oauth1/rfc5849/utils.py b/lib/oauthlib/oauth1/rfc5849/utils.py new file mode 100644 index 00000000..8fb8302e --- /dev/null +++ b/lib/oauthlib/oauth1/rfc5849/utils.py @@ -0,0 +1,83 @@ +""" +oauthlib.utils +~~~~~~~~~~~~~~ + +This module contains utility methods used by various parts of the OAuth +spec. +""" +import urllib.request as urllib2 + +from oauthlib.common import quote, unquote + +UNICODE_ASCII_CHARACTER_SET = ('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '0123456789') + + +def filter_params(target): + """Decorator which filters params to remove non-oauth_* parameters + + Assumes the decorated method takes a params dict or list of tuples as its + first argument. + """ + def wrapper(params, *args, **kwargs): + params = filter_oauth_params(params) + return target(params, *args, **kwargs) + + wrapper.__doc__ = target.__doc__ + return wrapper + + +def filter_oauth_params(params): + """Removes all non oauth parameters from a dict or a list of params.""" + is_oauth = lambda kv: kv[0].startswith("oauth_") + if isinstance(params, dict): + return list(filter(is_oauth, list(params.items()))) + else: + return list(filter(is_oauth, params)) + + +def escape(u): + """Escape a unicode string in an OAuth-compatible fashion. + + Per `section 3.6`_ of the spec. + + .. _`section 3.6`: https://tools.ietf.org/html/rfc5849#section-3.6 + + """ + if not isinstance(u, str): + raise ValueError('Only unicode objects are escapable. ' + + 'Got {!r} of type {}.'.format(u, type(u))) + # Letters, digits, and the characters '_.-' are already treated as safe + # by urllib.quote(). We need to add '~' to fully support rfc5849. + return quote(u, safe=b'~') + + +def unescape(u): + if not isinstance(u, str): + raise ValueError('Only unicode objects are unescapable.') + return unquote(u) + + +def parse_keqv_list(l): + """A unicode-safe version of urllib2.parse_keqv_list""" + # With Python 2.6, parse_http_list handles unicode fine + return urllib2.parse_keqv_list(l) + + +def parse_http_list(u): + """A unicode-safe version of urllib2.parse_http_list""" + # With Python 2.6, parse_http_list handles unicode fine + return urllib2.parse_http_list(u) + + +def parse_authorization_header(authorization_header): + """Parse an OAuth authorization header into a list of 2-tuples""" + auth_scheme = 'OAuth '.lower() + if authorization_header[:len(auth_scheme)].lower().startswith(auth_scheme): + items = parse_http_list(authorization_header[len(auth_scheme):]) + try: + return list(parse_keqv_list(items).items()) + except (IndexError, ValueError): + pass + raise ValueError('Malformed authorization header') diff --git a/lib/oauthlib/oauth2/__init__.py b/lib/oauthlib/oauth2/__init__.py new file mode 100644 index 00000000..a6e1cccd --- /dev/null +++ b/lib/oauthlib/oauth2/__init__.py @@ -0,0 +1,35 @@ +""" +oauthlib.oauth2 +~~~~~~~~~~~~~~ + +This module is a wrapper for the most recent implementation of OAuth 2.0 Client +and Server classes. +""" +from .rfc6749.clients import ( + BackendApplicationClient, Client, LegacyApplicationClient, + MobileApplicationClient, ServiceApplicationClient, WebApplicationClient, +) +from .rfc6749.endpoints import ( + AuthorizationEndpoint, BackendApplicationServer, IntrospectEndpoint, + LegacyApplicationServer, MetadataEndpoint, MobileApplicationServer, + ResourceEndpoint, RevocationEndpoint, Server, TokenEndpoint, + WebApplicationServer, +) +from .rfc6749.errors import ( + AccessDeniedError, FatalClientError, InsecureTransportError, + InvalidClientError, InvalidClientIdError, InvalidGrantError, + InvalidRedirectURIError, InvalidRequestError, InvalidRequestFatalError, + InvalidScopeError, MismatchingRedirectURIError, MismatchingStateError, + MissingClientIdError, MissingCodeError, MissingRedirectURIError, + MissingResponseTypeError, MissingTokenError, MissingTokenTypeError, + OAuth2Error, ServerError, TemporarilyUnavailableError, TokenExpiredError, + UnauthorizedClientError, UnsupportedGrantTypeError, + UnsupportedResponseTypeError, UnsupportedTokenTypeError, +) +from .rfc6749.grant_types import ( + AuthorizationCodeGrant, ClientCredentialsGrant, ImplicitGrant, + RefreshTokenGrant, ResourceOwnerPasswordCredentialsGrant, +) +from .rfc6749.request_validator import RequestValidator +from .rfc6749.tokens import BearerToken, OAuth2Token +from .rfc6749.utils import is_secure_transport diff --git a/lib/oauthlib/oauth2/rfc6749/__init__.py b/lib/oauthlib/oauth2/rfc6749/__init__.py new file mode 100644 index 00000000..4b75a8a1 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/__init__.py @@ -0,0 +1,16 @@ +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +import functools +import logging + +from .endpoints.base import BaseEndpoint, catch_errors_and_unavailability +from .errors import ( + FatalClientError, OAuth2Error, ServerError, TemporarilyUnavailableError, +) + +log = logging.getLogger(__name__) diff --git a/lib/oauthlib/oauth2/rfc6749/clients/__init__.py b/lib/oauthlib/oauth2/rfc6749/clients/__init__.py new file mode 100644 index 00000000..8fc6c955 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/clients/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming OAuth 2.0 RFC6749. +""" +from .backend_application import BackendApplicationClient +from .base import AUTH_HEADER, BODY, URI_QUERY, Client +from .legacy_application import LegacyApplicationClient +from .mobile_application import MobileApplicationClient +from .service_application import ServiceApplicationClient +from .web_application import WebApplicationClient diff --git a/lib/oauthlib/oauth2/rfc6749/clients/backend_application.py b/lib/oauthlib/oauth2/rfc6749/clients/backend_application.py new file mode 100644 index 00000000..0e2a8299 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/clients/backend_application.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +from ..parameters import prepare_token_request +from .base import Client + + +class BackendApplicationClient(Client): + + """A public client utilizing the client credentials grant workflow. + + The client can request an access token using only its client + credentials (or other supported means of authentication) when the + client is requesting access to the protected resources under its + control, or those of another resource owner which has been previously + arranged with the authorization server (the method of which is beyond + the scope of this specification). + + The client credentials grant type MUST only be used by confidential + clients. + + Since the client authentication is used as the authorization grant, + no additional authorization request is needed. + """ + + grant_type = 'client_credentials' + + def prepare_request_body(self, body='', scope=None, + include_client_id=False, **kwargs): + """Add the client credentials to the request body. + + The client makes a request to the token endpoint by adding the + following parameters using the "application/x-www-form-urlencoded" + format per `Appendix B`_ in the HTTP request entity-body: + + :param body: Existing request body (URL encoded string) to embed parameters + into. This may contain extra paramters. Default ''. + :param scope: The scope of the access request as described by + `Section 3.3`_. + + :param include_client_id: `True` to send the `client_id` in the + body of the upstream request. This is required + if the client is not authenticating with the + authorization server as described in + `Section 3.2.1`_. False otherwise (default). + :type include_client_id: Boolean + + :param kwargs: Extra credentials to include in the token request. + + The client MUST authenticate with the authorization server as + described in `Section 3.2.1`_. + + The prepared body will include all provided credentials as well as + the ``grant_type`` parameter set to ``client_credentials``:: + + >>> from oauthlib.oauth2 import BackendApplicationClient + >>> client = BackendApplicationClient('your_id') + >>> client.prepare_request_body(scope=['hello', 'world']) + 'grant_type=client_credentials&scope=hello+world' + + .. _`Appendix B`: https://tools.ietf.org/html/rfc6749#appendix-B + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + .. _`Section 3.2.1`: https://tools.ietf.org/html/rfc6749#section-3.2.1 + """ + kwargs['client_id'] = self.client_id + kwargs['include_client_id'] = include_client_id + scope = self.scope if scope is None else scope + return prepare_token_request(self.grant_type, body=body, + scope=scope, **kwargs) diff --git a/lib/oauthlib/oauth2/rfc6749/clients/base.py b/lib/oauthlib/oauth2/rfc6749/clients/base.py new file mode 100644 index 00000000..88065ab3 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/clients/base.py @@ -0,0 +1,522 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming OAuth 2.0 RFC6749. +""" +import time +import warnings + +from oauthlib.common import generate_token +from oauthlib.oauth2.rfc6749 import tokens +from oauthlib.oauth2.rfc6749.errors import ( + InsecureTransportError, TokenExpiredError, +) +from oauthlib.oauth2.rfc6749.parameters import ( + parse_token_response, prepare_token_request, + prepare_token_revocation_request, +) +from oauthlib.oauth2.rfc6749.utils import is_secure_transport + +AUTH_HEADER = 'auth_header' +URI_QUERY = 'query' +BODY = 'body' + +FORM_ENC_HEADERS = { + 'Content-Type': 'application/x-www-form-urlencoded' +} + + +class Client: + """Base OAuth2 client responsible for access token management. + + This class also acts as a generic interface providing methods common to all + client types such as ``prepare_authorization_request`` and + ``prepare_token_revocation_request``. The ``prepare_x_request`` methods are + the recommended way of interacting with clients (as opposed to the abstract + prepare uri/body/etc methods). They are recommended over the older set + because they are easier to use (more consistent) and add a few additional + security checks, such as HTTPS and state checking. + + Some of these methods require further implementation only provided by the + specific purpose clients such as + :py:class:`oauthlib.oauth2.MobileApplicationClient` and thus you should always + seek to use the client class matching the OAuth workflow you need. For + Python, this is usually :py:class:`oauthlib.oauth2.WebApplicationClient`. + + """ + refresh_token_key = 'refresh_token' + + def __init__(self, client_id, + default_token_placement=AUTH_HEADER, + token_type='Bearer', + access_token=None, + refresh_token=None, + mac_key=None, + mac_algorithm=None, + token=None, + scope=None, + state=None, + redirect_url=None, + state_generator=generate_token, + **kwargs): + """Initialize a client with commonly used attributes. + + :param client_id: Client identifier given by the OAuth provider upon + registration. + + :param default_token_placement: Tokens can be supplied in the Authorization + header (default), the URL query component (``query``) or the request + body (``body``). + + :param token_type: OAuth 2 token type. Defaults to Bearer. Change this + if you specify the ``access_token`` parameter and know it is of a + different token type, such as a MAC, JWT or SAML token. Can + also be supplied as ``token_type`` inside the ``token`` dict parameter. + + :param access_token: An access token (string) used to authenticate + requests to protected resources. Can also be supplied inside the + ``token`` dict parameter. + + :param refresh_token: A refresh token (string) used to refresh expired + tokens. Can also be supplied inside the ``token`` dict parameter. + + :param mac_key: Encryption key used with MAC tokens. + + :param mac_algorithm: Hashing algorithm for MAC tokens. + + :param token: A dict of token attributes such as ``access_token``, + ``token_type`` and ``expires_at``. + + :param scope: A list of default scopes to request authorization for. + + :param state: A CSRF protection string used during authorization. + + :param redirect_url: The redirection endpoint on the client side to which + the user returns after authorization. + + :param state_generator: A no argument state generation callable. Defaults + to :py:meth:`oauthlib.common.generate_token`. + """ + + self.client_id = client_id + self.default_token_placement = default_token_placement + self.token_type = token_type + self.access_token = access_token + self.refresh_token = refresh_token + self.mac_key = mac_key + self.mac_algorithm = mac_algorithm + self.token = token or {} + self.scope = scope + self.state_generator = state_generator + self.state = state + self.redirect_url = redirect_url + self.code = None + self.expires_in = None + self._expires_at = None + self.populate_token_attributes(self.token) + + @property + def token_types(self): + """Supported token types and their respective methods + + Additional tokens can be supported by extending this dictionary. + + The Bearer token spec is stable and safe to use. + + The MAC token spec is not yet stable and support for MAC tokens + is experimental and currently matching version 00 of the spec. + """ + return { + 'Bearer': self._add_bearer_token, + 'MAC': self._add_mac_token + } + + def prepare_request_uri(self, *args, **kwargs): + """Abstract method used to create request URIs.""" + raise NotImplementedError("Must be implemented by inheriting classes.") + + def prepare_request_body(self, *args, **kwargs): + """Abstract method used to create request bodies.""" + raise NotImplementedError("Must be implemented by inheriting classes.") + + def parse_request_uri_response(self, *args, **kwargs): + """Abstract method used to parse redirection responses.""" + raise NotImplementedError("Must be implemented by inheriting classes.") + + def add_token(self, uri, http_method='GET', body=None, headers=None, + token_placement=None, **kwargs): + """Add token to the request uri, body or authorization header. + + The access token type provides the client with the information + required to successfully utilize the access token to make a protected + resource request (along with type-specific attributes). The client + MUST NOT use an access token if it does not understand the token + type. + + For example, the "bearer" token type defined in + [`I-D.ietf-oauth-v2-bearer`_] is utilized by simply including the access + token string in the request: + + .. code-block:: http + + GET /resource/1 HTTP/1.1 + Host: example.com + Authorization: Bearer mF_9.B5f-4.1JqM + + while the "mac" token type defined in [`I-D.ietf-oauth-v2-http-mac`_] is + utilized by issuing a MAC key together with the access token which is + used to sign certain components of the HTTP requests: + + .. code-block:: http + + GET /resource/1 HTTP/1.1 + Host: example.com + Authorization: MAC id="h480djs93hd8", + nonce="274312:dj83hs9s", + mac="kDZvddkndxvhGRXZhvuDjEWhGeE=" + + .. _`I-D.ietf-oauth-v2-bearer`: https://tools.ietf.org/html/rfc6749#section-12.2 + .. _`I-D.ietf-oauth-v2-http-mac`: https://tools.ietf.org/html/rfc6749#section-12.2 + """ + if not is_secure_transport(uri): + raise InsecureTransportError() + + token_placement = token_placement or self.default_token_placement + + case_insensitive_token_types = { + k.lower(): v for k, v in self.token_types.items()} + if not self.token_type.lower() in case_insensitive_token_types: + raise ValueError("Unsupported token type: %s" % self.token_type) + + if not (self.access_token or self.token.get('access_token')): + raise ValueError("Missing access token.") + + if self._expires_at and self._expires_at < time.time(): + raise TokenExpiredError() + + return case_insensitive_token_types[self.token_type.lower()](uri, http_method, body, + headers, token_placement, **kwargs) + + def prepare_authorization_request(self, authorization_url, state=None, + redirect_url=None, scope=None, **kwargs): + """Prepare the authorization request. + + This is the first step in many OAuth flows in which the user is + redirected to a certain authorization URL. This method adds + required parameters to the authorization URL. + + :param authorization_url: Provider authorization endpoint URL. + + :param state: CSRF protection string. Will be automatically created if + not provided. The generated state is available via the ``state`` + attribute. Clients should verify that the state is unchanged and + present in the authorization response. This verification is done + automatically if using the ``authorization_response`` parameter + with ``prepare_token_request``. + + :param redirect_url: Redirect URL to which the user will be returned + after authorization. Must be provided unless previously setup with + the provider. If provided then it must also be provided in the + token request. + + :param scope: List of scopes to request. Must be equal to + or a subset of the scopes granted when obtaining the refresh + token. If none is provided, the ones provided in the constructor are + used. + + :param kwargs: Additional parameters to included in the request. + + :returns: The prepared request tuple with (url, headers, body). + """ + if not is_secure_transport(authorization_url): + raise InsecureTransportError() + + self.state = state or self.state_generator() + self.redirect_url = redirect_url or self.redirect_url + # do not assign scope to self automatically anymore + scope = self.scope if scope is None else scope + auth_url = self.prepare_request_uri( + authorization_url, redirect_uri=self.redirect_url, + scope=scope, state=self.state, **kwargs) + return auth_url, FORM_ENC_HEADERS, '' + + def prepare_token_request(self, token_url, authorization_response=None, + redirect_url=None, state=None, body='', **kwargs): + """Prepare a token creation request. + + Note that these requests usually require client authentication, either + by including client_id or a set of provider specific authentication + credentials. + + :param token_url: Provider token creation endpoint URL. + + :param authorization_response: The full redirection URL string, i.e. + the location to which the user was redirected after successfull + authorization. Used to mine credentials needed to obtain a token + in this step, such as authorization code. + + :param redirect_url: The redirect_url supplied with the authorization + request (if there was one). + + :param state: + + :param body: Existing request body (URL encoded string) to embed parameters + into. This may contain extra paramters. Default ''. + + :param kwargs: Additional parameters to included in the request. + + :returns: The prepared request tuple with (url, headers, body). + """ + if not is_secure_transport(token_url): + raise InsecureTransportError() + + state = state or self.state + if authorization_response: + self.parse_request_uri_response( + authorization_response, state=state) + self.redirect_url = redirect_url or self.redirect_url + body = self.prepare_request_body(body=body, + redirect_uri=self.redirect_url, **kwargs) + + return token_url, FORM_ENC_HEADERS, body + + def prepare_refresh_token_request(self, token_url, refresh_token=None, + body='', scope=None, **kwargs): + """Prepare an access token refresh request. + + Expired access tokens can be replaced by new access tokens without + going through the OAuth dance if the client obtained a refresh token. + This refresh token and authentication credentials can be used to + obtain a new access token, and possibly a new refresh token. + + :param token_url: Provider token refresh endpoint URL. + + :param refresh_token: Refresh token string. + + :param body: Existing request body (URL encoded string) to embed parameters + into. This may contain extra paramters. Default ''. + + :param scope: List of scopes to request. Must be equal to + or a subset of the scopes granted when obtaining the refresh + token. If none is provided, the ones provided in the constructor are + used. + + :param kwargs: Additional parameters to included in the request. + + :returns: The prepared request tuple with (url, headers, body). + """ + if not is_secure_transport(token_url): + raise InsecureTransportError() + + # do not assign scope to self automatically anymore + scope = self.scope if scope is None else scope + body = self.prepare_refresh_body(body=body, + refresh_token=refresh_token, scope=scope, **kwargs) + return token_url, FORM_ENC_HEADERS, body + + def prepare_token_revocation_request(self, revocation_url, token, + token_type_hint="access_token", body='', callback=None, **kwargs): + """Prepare a token revocation request. + + :param revocation_url: Provider token revocation endpoint URL. + + :param token: The access or refresh token to be revoked (string). + + :param token_type_hint: ``"access_token"`` (default) or + ``"refresh_token"``. This is optional and if you wish to not pass it you + must provide ``token_type_hint=None``. + + :param body: + + :param callback: A jsonp callback such as ``package.callback`` to be invoked + upon receiving the response. Not that it should not include a () suffix. + + :param kwargs: Additional parameters to included in the request. + + :returns: The prepared request tuple with (url, headers, body). + + Note that JSONP request may use GET requests as the parameters will + be added to the request URL query as opposed to the request body. + + An example of a revocation request + + .. code-block: http + + POST /revoke HTTP/1.1 + Host: server.example.com + Content-Type: application/x-www-form-urlencoded + Authorization: Basic czZCaGRSa3F0MzpnWDFmQmF0M2JW + + token=45ghiukldjahdnhzdauz&token_type_hint=refresh_token + + An example of a jsonp revocation request + + .. code-block: http + + GET /revoke?token=agabcdefddddafdd&callback=package.myCallback HTTP/1.1 + Host: server.example.com + Content-Type: application/x-www-form-urlencoded + Authorization: Basic czZCaGRSa3F0MzpnWDFmQmF0M2JW + + and an error response + + .. code-block: http + + package.myCallback({"error":"unsupported_token_type"}); + + Note that these requests usually require client credentials, client_id in + the case for public clients and provider specific authentication + credentials for confidential clients. + """ + if not is_secure_transport(revocation_url): + raise InsecureTransportError() + + return prepare_token_revocation_request(revocation_url, token, + token_type_hint=token_type_hint, body=body, callback=callback, + **kwargs) + + def parse_request_body_response(self, body, scope=None, **kwargs): + """Parse the JSON response body. + + If the access token request is valid and authorized, the + authorization server issues an access token as described in + `Section 5.1`_. A refresh token SHOULD NOT be included. If the request + failed client authentication or is invalid, the authorization server + returns an error response as described in `Section 5.2`_. + + :param body: The response body from the token request. + :param scope: Scopes originally requested. If none is provided, the ones + provided in the constructor are used. + :return: Dictionary of token parameters. + :raises: Warning if scope has changed. OAuth2Error if response is invalid. + + These response are json encoded and could easily be parsed without + the assistance of OAuthLib. However, there are a few subtle issues + to be aware of regarding the response which are helpfully addressed + through the raising of various errors. + + A successful response should always contain + + **access_token** + The access token issued by the authorization server. Often + a random string. + + **token_type** + The type of the token issued as described in `Section 7.1`_. + Commonly ``Bearer``. + + While it is not mandated it is recommended that the provider include + + **expires_in** + The lifetime in seconds of the access token. For + example, the value "3600" denotes that the access token will + expire in one hour from the time the response was generated. + If omitted, the authorization server SHOULD provide the + expiration time via other means or document the default value. + + **scope** + Providers may supply this in all responses but are required to only + if it has changed since the authorization request. + + .. _`Section 5.1`: https://tools.ietf.org/html/rfc6749#section-5.1 + .. _`Section 5.2`: https://tools.ietf.org/html/rfc6749#section-5.2 + .. _`Section 7.1`: https://tools.ietf.org/html/rfc6749#section-7.1 + """ + scope = self.scope if scope is None else scope + self.token = parse_token_response(body, scope=scope) + self.populate_token_attributes(self.token) + return self.token + + def prepare_refresh_body(self, body='', refresh_token=None, scope=None, **kwargs): + """Prepare an access token request, using a refresh token. + + If the authorization server issued a refresh token to the client, the + client makes a refresh request to the token endpoint by adding the + following parameters using the "application/x-www-form-urlencoded" + format in the HTTP request entity-body: + + grant_type + REQUIRED. Value MUST be set to "refresh_token". + refresh_token + REQUIRED. The refresh token issued to the client. + scope + OPTIONAL. The scope of the access request as described by + Section 3.3. The requested scope MUST NOT include any scope + not originally granted by the resource owner, and if omitted is + treated as equal to the scope originally granted by the + resource owner. Note that if none is provided, the ones provided + in the constructor are used if any. + """ + refresh_token = refresh_token or self.refresh_token + scope = self.scope if scope is None else scope + return prepare_token_request(self.refresh_token_key, body=body, scope=scope, + refresh_token=refresh_token, **kwargs) + + def _add_bearer_token(self, uri, http_method='GET', body=None, + headers=None, token_placement=None): + """Add a bearer token to the request uri, body or authorization header.""" + if token_placement == AUTH_HEADER: + headers = tokens.prepare_bearer_headers(self.access_token, headers) + + elif token_placement == URI_QUERY: + uri = tokens.prepare_bearer_uri(self.access_token, uri) + + elif token_placement == BODY: + body = tokens.prepare_bearer_body(self.access_token, body) + + else: + raise ValueError("Invalid token placement.") + return uri, headers, body + + def _add_mac_token(self, uri, http_method='GET', body=None, + headers=None, token_placement=AUTH_HEADER, ext=None, **kwargs): + """Add a MAC token to the request authorization header. + + Warning: MAC token support is experimental as the spec is not yet stable. + """ + if token_placement != AUTH_HEADER: + raise ValueError("Invalid token placement.") + + headers = tokens.prepare_mac_header(self.access_token, uri, + self.mac_key, http_method, headers=headers, body=body, ext=ext, + hash_algorithm=self.mac_algorithm, **kwargs) + return uri, headers, body + + def _populate_attributes(self, response): + warnings.warn("Please switch to the public method " + "populate_token_attributes.", DeprecationWarning) + return self.populate_token_attributes(response) + + def populate_code_attributes(self, response): + """Add attributes from an auth code response to self.""" + + if 'code' in response: + self.code = response.get('code') + + def populate_token_attributes(self, response): + """Add attributes from a token exchange response to self.""" + + if 'access_token' in response: + self.access_token = response.get('access_token') + + if 'refresh_token' in response: + self.refresh_token = response.get('refresh_token') + + if 'token_type' in response: + self.token_type = response.get('token_type') + + if 'expires_in' in response: + self.expires_in = response.get('expires_in') + self._expires_at = time.time() + int(self.expires_in) + + if 'expires_at' in response: + self._expires_at = int(response.get('expires_at')) + + if 'mac_key' in response: + self.mac_key = response.get('mac_key') + + if 'mac_algorithm' in response: + self.mac_algorithm = response.get('mac_algorithm') diff --git a/lib/oauthlib/oauth2/rfc6749/clients/legacy_application.py b/lib/oauthlib/oauth2/rfc6749/clients/legacy_application.py new file mode 100644 index 00000000..7af68f34 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/clients/legacy_application.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +from ..parameters import prepare_token_request +from .base import Client + + +class LegacyApplicationClient(Client): + + """A public client using the resource owner password and username directly. + + The resource owner password credentials grant type is suitable in + cases where the resource owner has a trust relationship with the + client, such as the device operating system or a highly privileged + application. The authorization server should take special care when + enabling this grant type, and only allow it when other flows are not + viable. + + The grant type is suitable for clients capable of obtaining the + resource owner's credentials (username and password, typically using + an interactive form). It is also used to migrate existing clients + using direct authentication schemes such as HTTP Basic or Digest + authentication to OAuth by converting the stored credentials to an + access token. + + The method through which the client obtains the resource owner + credentials is beyond the scope of this specification. The client + MUST discard the credentials once an access token has been obtained. + """ + + grant_type = 'password' + + def __init__(self, client_id, **kwargs): + super().__init__(client_id, **kwargs) + + def prepare_request_body(self, username, password, body='', scope=None, + include_client_id=False, **kwargs): + """Add the resource owner password and username to the request body. + + The client makes a request to the token endpoint by adding the + following parameters using the "application/x-www-form-urlencoded" + format per `Appendix B`_ in the HTTP request entity-body: + + :param username: The resource owner username. + :param password: The resource owner password. + :param body: Existing request body (URL encoded string) to embed parameters + into. This may contain extra paramters. Default ''. + :param scope: The scope of the access request as described by + `Section 3.3`_. + :param include_client_id: `True` to send the `client_id` in the + body of the upstream request. This is required + if the client is not authenticating with the + authorization server as described in + `Section 3.2.1`_. False otherwise (default). + :type include_client_id: Boolean + :param kwargs: Extra credentials to include in the token request. + + If the client type is confidential or the client was issued client + credentials (or assigned other authentication requirements), the + client MUST authenticate with the authorization server as described + in `Section 3.2.1`_. + + The prepared body will include all provided credentials as well as + the ``grant_type`` parameter set to ``password``:: + + >>> from oauthlib.oauth2 import LegacyApplicationClient + >>> client = LegacyApplicationClient('your_id') + >>> client.prepare_request_body(username='foo', password='bar', scope=['hello', 'world']) + 'grant_type=password&username=foo&scope=hello+world&password=bar' + + .. _`Appendix B`: https://tools.ietf.org/html/rfc6749#appendix-B + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + .. _`Section 3.2.1`: https://tools.ietf.org/html/rfc6749#section-3.2.1 + """ + kwargs['client_id'] = self.client_id + kwargs['include_client_id'] = include_client_id + scope = self.scope if scope is None else scope + return prepare_token_request(self.grant_type, body=body, username=username, + password=password, scope=scope, **kwargs) diff --git a/lib/oauthlib/oauth2/rfc6749/clients/mobile_application.py b/lib/oauthlib/oauth2/rfc6749/clients/mobile_application.py new file mode 100644 index 00000000..cd325f4e --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/clients/mobile_application.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +from ..parameters import parse_implicit_response, prepare_grant_uri +from .base import Client + + +class MobileApplicationClient(Client): + + """A public client utilizing the implicit code grant workflow. + + A user-agent-based application is a public client in which the + client code is downloaded from a web server and executes within a + user-agent (e.g. web browser) on the device used by the resource + owner. Protocol data and credentials are easily accessible (and + often visible) to the resource owner. Since such applications + reside within the user-agent, they can make seamless use of the + user-agent capabilities when requesting authorization. + + The implicit grant type is used to obtain access tokens (it does not + support the issuance of refresh tokens) and is optimized for public + clients known to operate a particular redirection URI. These clients + are typically implemented in a browser using a scripting language + such as JavaScript. + + As a redirection-based flow, the client must be capable of + interacting with the resource owner's user-agent (typically a web + browser) and capable of receiving incoming requests (via redirection) + from the authorization server. + + Unlike the authorization code grant type in which the client makes + separate requests for authorization and access token, the client + receives the access token as the result of the authorization request. + + The implicit grant type does not include client authentication, and + relies on the presence of the resource owner and the registration of + the redirection URI. Because the access token is encoded into the + redirection URI, it may be exposed to the resource owner and other + applications residing on the same device. + """ + + response_type = 'token' + + def prepare_request_uri(self, uri, redirect_uri=None, scope=None, + state=None, **kwargs): + """Prepare the implicit grant request URI. + + The client constructs the request URI by adding the following + parameters to the query component of the authorization endpoint URI + using the "application/x-www-form-urlencoded" format, per `Appendix B`_: + + :param redirect_uri: OPTIONAL. The redirect URI must be an absolute URI + and it should have been registerd with the OAuth + provider prior to use. As described in `Section 3.1.2`_. + + :param scope: OPTIONAL. The scope of the access request as described by + Section 3.3`_. These may be any string but are commonly + URIs or various categories such as ``videos`` or ``documents``. + + :param state: RECOMMENDED. An opaque value used by the client to maintain + state between the request and callback. The authorization + server includes this value when redirecting the user-agent back + to the client. The parameter SHOULD be used for preventing + cross-site request forgery as described in `Section 10.12`_. + + :param kwargs: Extra arguments to include in the request URI. + + In addition to supplied parameters, OAuthLib will append the ``client_id`` + that was provided in the constructor as well as the mandatory ``response_type`` + argument, set to ``token``:: + + >>> from oauthlib.oauth2 import MobileApplicationClient + >>> client = MobileApplicationClient('your_id') + >>> client.prepare_request_uri('https://example.com') + 'https://example.com?client_id=your_id&response_type=token' + >>> client.prepare_request_uri('https://example.com', redirect_uri='https://a.b/callback') + 'https://example.com?client_id=your_id&response_type=token&redirect_uri=https%3A%2F%2Fa.b%2Fcallback' + >>> client.prepare_request_uri('https://example.com', scope=['profile', 'pictures']) + 'https://example.com?client_id=your_id&response_type=token&scope=profile+pictures' + >>> client.prepare_request_uri('https://example.com', foo='bar') + 'https://example.com?client_id=your_id&response_type=token&foo=bar' + + .. _`Appendix B`: https://tools.ietf.org/html/rfc6749#appendix-B + .. _`Section 2.2`: https://tools.ietf.org/html/rfc6749#section-2.2 + .. _`Section 3.1.2`: https://tools.ietf.org/html/rfc6749#section-3.1.2 + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + .. _`Section 10.12`: https://tools.ietf.org/html/rfc6749#section-10.12 + """ + scope = self.scope if scope is None else scope + return prepare_grant_uri(uri, self.client_id, self.response_type, + redirect_uri=redirect_uri, state=state, scope=scope, **kwargs) + + def parse_request_uri_response(self, uri, state=None, scope=None): + """Parse the response URI fragment. + + If the resource owner grants the access request, the authorization + server issues an access token and delivers it to the client by adding + the following parameters to the fragment component of the redirection + URI using the "application/x-www-form-urlencoded" format: + + :param uri: The callback URI that resulted from the user being redirected + back from the provider to you, the client. + :param state: The state provided in the authorization request. + :param scope: The scopes provided in the authorization request. + :return: Dictionary of token parameters. + :raises: OAuth2Error if response is invalid. + + A successful response should always contain + + **access_token** + The access token issued by the authorization server. Often + a random string. + + **token_type** + The type of the token issued as described in `Section 7.1`_. + Commonly ``Bearer``. + + **state** + If you provided the state parameter in the authorization phase, then + the provider is required to include that exact state value in the + response. + + While it is not mandated it is recommended that the provider include + + **expires_in** + The lifetime in seconds of the access token. For + example, the value "3600" denotes that the access token will + expire in one hour from the time the response was generated. + If omitted, the authorization server SHOULD provide the + expiration time via other means or document the default value. + + **scope** + Providers may supply this in all responses but are required to only + if it has changed since the authorization request. + + A few example responses can be seen below:: + + >>> response_uri = 'https://example.com/callback#access_token=sdlfkj452&state=ss345asyht&token_type=Bearer&scope=hello+world' + >>> from oauthlib.oauth2 import MobileApplicationClient + >>> client = MobileApplicationClient('your_id') + >>> client.parse_request_uri_response(response_uri) + { + 'access_token': 'sdlfkj452', + 'token_type': 'Bearer', + 'state': 'ss345asyht', + 'scope': [u'hello', u'world'] + } + >>> client.parse_request_uri_response(response_uri, state='other') + Traceback (most recent call last): + File "", line 1, in + File "oauthlib/oauth2/rfc6749/__init__.py", line 598, in parse_request_uri_response + **scope** + File "oauthlib/oauth2/rfc6749/parameters.py", line 197, in parse_implicit_response + raise ValueError("Mismatching or missing state in params.") + ValueError: Mismatching or missing state in params. + >>> def alert_scope_changed(message, old, new): + ... print(message, old, new) + ... + >>> oauthlib.signals.scope_changed.connect(alert_scope_changed) + >>> client.parse_request_body_response(response_body, scope=['other']) + ('Scope has changed from "other" to "hello world".', ['other'], ['hello', 'world']) + + .. _`Section 7.1`: https://tools.ietf.org/html/rfc6749#section-7.1 + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + """ + scope = self.scope if scope is None else scope + self.token = parse_implicit_response(uri, state=state, scope=scope) + self.populate_token_attributes(self.token) + return self.token diff --git a/lib/oauthlib/oauth2/rfc6749/clients/service_application.py b/lib/oauthlib/oauth2/rfc6749/clients/service_application.py new file mode 100644 index 00000000..c751c8b0 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/clients/service_application.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +import time + +from oauthlib.common import to_unicode + +from ..parameters import prepare_token_request +from .base import Client + + +class ServiceApplicationClient(Client): + """A public client utilizing the JWT bearer grant. + + JWT bearer tokes can be used to request an access token when a client + wishes to utilize an existing trust relationship, expressed through the + semantics of (and digital signature or keyed message digest calculated + over) the JWT, without a direct user approval step at the authorization + server. + + This grant type does not involve an authorization step. It may be + used by both public and confidential clients. + """ + + grant_type = 'urn:ietf:params:oauth:grant-type:jwt-bearer' + + def __init__(self, client_id, private_key=None, subject=None, issuer=None, + audience=None, **kwargs): + """Initalize a JWT client with defaults for implicit use later. + + :param client_id: Client identifier given by the OAuth provider upon + registration. + + :param private_key: Private key used for signing and encrypting. + Must be given as a string. + + :param subject: The principal that is the subject of the JWT, i.e. + which user is the token requested on behalf of. + For example, ``foo@example.com. + + :param issuer: The JWT MUST contain an "iss" (issuer) claim that + contains a unique identifier for the entity that issued + the JWT. For example, ``your-client@provider.com``. + + :param audience: A value identifying the authorization server as an + intended audience, e.g. + ``https://provider.com/oauth2/token``. + + :param kwargs: Additional arguments to pass to base client, such as + state and token. See ``Client.__init__.__doc__`` for + details. + """ + super().__init__(client_id, **kwargs) + self.private_key = private_key + self.subject = subject + self.issuer = issuer + self.audience = audience + + def prepare_request_body(self, + private_key=None, + subject=None, + issuer=None, + audience=None, + expires_at=None, + issued_at=None, + extra_claims=None, + body='', + scope=None, + include_client_id=False, + **kwargs): + """Create and add a JWT assertion to the request body. + + :param private_key: Private key used for signing and encrypting. + Must be given as a string. + + :param subject: (sub) The principal that is the subject of the JWT, + i.e. which user is the token requested on behalf of. + For example, ``foo@example.com. + + :param issuer: (iss) The JWT MUST contain an "iss" (issuer) claim that + contains a unique identifier for the entity that issued + the JWT. For example, ``your-client@provider.com``. + + :param audience: (aud) A value identifying the authorization server as an + intended audience, e.g. + ``https://provider.com/oauth2/token``. + + :param expires_at: A unix expiration timestamp for the JWT. Defaults + to an hour from now, i.e. ``time.time() + 3600``. + + :param issued_at: A unix timestamp of when the JWT was created. + Defaults to now, i.e. ``time.time()``. + + :param extra_claims: A dict of additional claims to include in the JWT. + + :param body: Existing request body (URL encoded string) to embed parameters + into. This may contain extra paramters. Default ''. + + :param scope: The scope of the access request. + + :param include_client_id: `True` to send the `client_id` in the + body of the upstream request. This is required + if the client is not authenticating with the + authorization server as described in + `Section 3.2.1`_. False otherwise (default). + :type include_client_id: Boolean + + :param not_before: A unix timestamp after which the JWT may be used. + Not included unless provided. * + + :param jwt_id: A unique JWT token identifier. Not included unless + provided. * + + :param kwargs: Extra credentials to include in the token request. + + Parameters marked with a `*` above are not explicit arguments in the + function signature, but are specially documented arguments for items + appearing in the generic `**kwargs` keyworded input. + + The "scope" parameter may be used, as defined in the Assertion + Framework for OAuth 2.0 Client Authentication and Authorization Grants + [I-D.ietf-oauth-assertions] specification, to indicate the requested + scope. + + Authentication of the client is optional, as described in + `Section 3.2.1`_ of OAuth 2.0 [RFC6749] and consequently, the + "client_id" is only needed when a form of client authentication that + relies on the parameter is used. + + The following non-normative example demonstrates an Access Token + Request with a JWT as an authorization grant (with extra line breaks + for display purposes only): + + .. code-block: http + + POST /token.oauth2 HTTP/1.1 + Host: as.example.com + Content-Type: application/x-www-form-urlencoded + + grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Ajwt-bearer + &assertion=eyJhbGciOiJFUzI1NiJ9. + eyJpc3Mi[...omitted for brevity...]. + J9l-ZhwP[...omitted for brevity...] + + .. _`Section 3.2.1`: https://tools.ietf.org/html/rfc6749#section-3.2.1 + """ + import jwt + + key = private_key or self.private_key + if not key: + raise ValueError('An encryption key must be supplied to make JWT' + ' token requests.') + claim = { + 'iss': issuer or self.issuer, + 'aud': audience or self.audience, + 'sub': subject or self.subject, + 'exp': int(expires_at or time.time() + 3600), + 'iat': int(issued_at or time.time()), + } + + for attr in ('iss', 'aud', 'sub'): + if claim[attr] is None: + raise ValueError( + 'Claim must include %s but none was given.' % attr) + + if 'not_before' in kwargs: + claim['nbf'] = kwargs.pop('not_before') + + if 'jwt_id' in kwargs: + claim['jti'] = kwargs.pop('jwt_id') + + claim.update(extra_claims or {}) + + assertion = jwt.encode(claim, key, 'RS256') + assertion = to_unicode(assertion) + + kwargs['client_id'] = self.client_id + kwargs['include_client_id'] = include_client_id + scope = self.scope if scope is None else scope + return prepare_token_request(self.grant_type, + body=body, + assertion=assertion, + scope=scope, + **kwargs) diff --git a/lib/oauthlib/oauth2/rfc6749/clients/web_application.py b/lib/oauthlib/oauth2/rfc6749/clients/web_application.py new file mode 100644 index 00000000..a1f3db1d --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/clients/web_application.py @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +import warnings + +from ..parameters import ( + parse_authorization_code_response, prepare_grant_uri, + prepare_token_request, +) +from .base import Client + + +class WebApplicationClient(Client): + + """A client utilizing the authorization code grant workflow. + + A web application is a confidential client running on a web + server. Resource owners access the client via an HTML user + interface rendered in a user-agent on the device used by the + resource owner. The client credentials as well as any access + token issued to the client are stored on the web server and are + not exposed to or accessible by the resource owner. + + The authorization code grant type is used to obtain both access + tokens and refresh tokens and is optimized for confidential clients. + As a redirection-based flow, the client must be capable of + interacting with the resource owner's user-agent (typically a web + browser) and capable of receiving incoming requests (via redirection) + from the authorization server. + """ + + grant_type = 'authorization_code' + + def __init__(self, client_id, code=None, **kwargs): + super().__init__(client_id, **kwargs) + self.code = code + + def prepare_request_uri(self, uri, redirect_uri=None, scope=None, + state=None, **kwargs): + """Prepare the authorization code request URI + + The client constructs the request URI by adding the following + parameters to the query component of the authorization endpoint URI + using the "application/x-www-form-urlencoded" format, per `Appendix B`_: + + :param redirect_uri: OPTIONAL. The redirect URI must be an absolute URI + and it should have been registerd with the OAuth + provider prior to use. As described in `Section 3.1.2`_. + + :param scope: OPTIONAL. The scope of the access request as described by + Section 3.3`_. These may be any string but are commonly + URIs or various categories such as ``videos`` or ``documents``. + + :param state: RECOMMENDED. An opaque value used by the client to maintain + state between the request and callback. The authorization + server includes this value when redirecting the user-agent back + to the client. The parameter SHOULD be used for preventing + cross-site request forgery as described in `Section 10.12`_. + + :param kwargs: Extra arguments to include in the request URI. + + In addition to supplied parameters, OAuthLib will append the ``client_id`` + that was provided in the constructor as well as the mandatory ``response_type`` + argument, set to ``code``:: + + >>> from oauthlib.oauth2 import WebApplicationClient + >>> client = WebApplicationClient('your_id') + >>> client.prepare_request_uri('https://example.com') + 'https://example.com?client_id=your_id&response_type=code' + >>> client.prepare_request_uri('https://example.com', redirect_uri='https://a.b/callback') + 'https://example.com?client_id=your_id&response_type=code&redirect_uri=https%3A%2F%2Fa.b%2Fcallback' + >>> client.prepare_request_uri('https://example.com', scope=['profile', 'pictures']) + 'https://example.com?client_id=your_id&response_type=code&scope=profile+pictures' + >>> client.prepare_request_uri('https://example.com', foo='bar') + 'https://example.com?client_id=your_id&response_type=code&foo=bar' + + .. _`Appendix B`: https://tools.ietf.org/html/rfc6749#appendix-B + .. _`Section 2.2`: https://tools.ietf.org/html/rfc6749#section-2.2 + .. _`Section 3.1.2`: https://tools.ietf.org/html/rfc6749#section-3.1.2 + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + .. _`Section 10.12`: https://tools.ietf.org/html/rfc6749#section-10.12 + """ + scope = self.scope if scope is None else scope + return prepare_grant_uri(uri, self.client_id, 'code', + redirect_uri=redirect_uri, scope=scope, state=state, **kwargs) + + def prepare_request_body(self, code=None, redirect_uri=None, body='', + include_client_id=True, **kwargs): + """Prepare the access token request body. + + The client makes a request to the token endpoint by adding the + following parameters using the "application/x-www-form-urlencoded" + format in the HTTP request entity-body: + + :param code: REQUIRED. The authorization code received from the + authorization server. + + :param redirect_uri: REQUIRED, if the "redirect_uri" parameter was included in the + authorization request as described in `Section 4.1.1`_, and their + values MUST be identical. + + :param body: Existing request body (URL encoded string) to embed parameters + into. This may contain extra paramters. Default ''. + + :param include_client_id: `True` (default) to send the `client_id` in the + body of the upstream request. This is required + if the client is not authenticating with the + authorization server as described in `Section 3.2.1`_. + :type include_client_id: Boolean + + :param kwargs: Extra parameters to include in the token request. + + In addition OAuthLib will add the ``grant_type`` parameter set to + ``authorization_code``. + + If the client type is confidential or the client was issued client + credentials (or assigned other authentication requirements), the + client MUST authenticate with the authorization server as described + in `Section 3.2.1`_:: + + >>> from oauthlib.oauth2 import WebApplicationClient + >>> client = WebApplicationClient('your_id') + >>> client.prepare_request_body(code='sh35ksdf09sf') + 'grant_type=authorization_code&code=sh35ksdf09sf' + >>> client.prepare_request_body(code='sh35ksdf09sf', foo='bar') + 'grant_type=authorization_code&code=sh35ksdf09sf&foo=bar' + + `Section 3.2.1` also states: + In the "authorization_code" "grant_type" request to the token + endpoint, an unauthenticated client MUST send its "client_id" to + prevent itself from inadvertently accepting a code intended for a + client with a different "client_id". This protects the client from + substitution of the authentication code. (It provides no additional + security for the protected resource.) + + .. _`Section 4.1.1`: https://tools.ietf.org/html/rfc6749#section-4.1.1 + .. _`Section 3.2.1`: https://tools.ietf.org/html/rfc6749#section-3.2.1 + """ + code = code or self.code + if 'client_id' in kwargs: + warnings.warn("`client_id` has been deprecated in favor of " + "`include_client_id`, a boolean value which will " + "include the already configured `self.client_id`.", + DeprecationWarning) + if kwargs['client_id'] != self.client_id: + raise ValueError("`client_id` was supplied as an argument, but " + "it does not match `self.client_id`") + + kwargs['client_id'] = self.client_id + kwargs['include_client_id'] = include_client_id + return prepare_token_request(self.grant_type, code=code, body=body, + redirect_uri=redirect_uri, **kwargs) + + def parse_request_uri_response(self, uri, state=None): + """Parse the URI query for code and state. + + If the resource owner grants the access request, the authorization + server issues an authorization code and delivers it to the client by + adding the following parameters to the query component of the + redirection URI using the "application/x-www-form-urlencoded" format: + + :param uri: The callback URI that resulted from the user being redirected + back from the provider to you, the client. + :param state: The state provided in the authorization request. + + **code** + The authorization code generated by the authorization server. + The authorization code MUST expire shortly after it is issued + to mitigate the risk of leaks. A maximum authorization code + lifetime of 10 minutes is RECOMMENDED. The client MUST NOT + use the authorization code more than once. If an authorization + code is used more than once, the authorization server MUST deny + the request and SHOULD revoke (when possible) all tokens + previously issued based on that authorization code. + The authorization code is bound to the client identifier and + redirection URI. + + **state** + If the "state" parameter was present in the authorization request. + + This method is mainly intended to enforce strict state checking with + the added benefit of easily extracting parameters from the URI:: + + >>> from oauthlib.oauth2 import WebApplicationClient + >>> client = WebApplicationClient('your_id') + >>> uri = 'https://example.com/callback?code=sdfkjh345&state=sfetw45' + >>> client.parse_request_uri_response(uri, state='sfetw45') + {'state': 'sfetw45', 'code': 'sdfkjh345'} + >>> client.parse_request_uri_response(uri, state='other') + Traceback (most recent call last): + File "", line 1, in + File "oauthlib/oauth2/rfc6749/__init__.py", line 357, in parse_request_uri_response + back from the provider to you, the client. + File "oauthlib/oauth2/rfc6749/parameters.py", line 153, in parse_authorization_code_response + raise MismatchingStateError() + oauthlib.oauth2.rfc6749.errors.MismatchingStateError + """ + response = parse_authorization_code_response(uri, state=state) + self.populate_code_attributes(response) + return response diff --git a/lib/oauthlib/oauth2/rfc6749/endpoints/__init__.py b/lib/oauthlib/oauth2/rfc6749/endpoints/__init__.py new file mode 100644 index 00000000..1695b41b --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/endpoints/__init__.py @@ -0,0 +1,17 @@ +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +from .authorization import AuthorizationEndpoint +from .introspect import IntrospectEndpoint +from .metadata import MetadataEndpoint +from .pre_configured import ( + BackendApplicationServer, LegacyApplicationServer, MobileApplicationServer, + Server, WebApplicationServer, +) +from .resource import ResourceEndpoint +from .revocation import RevocationEndpoint +from .token import TokenEndpoint diff --git a/lib/oauthlib/oauth2/rfc6749/endpoints/authorization.py b/lib/oauthlib/oauth2/rfc6749/endpoints/authorization.py new file mode 100644 index 00000000..71967865 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/endpoints/authorization.py @@ -0,0 +1,114 @@ +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +import logging + +from oauthlib.common import Request +from oauthlib.oauth2.rfc6749 import utils + +from .base import BaseEndpoint, catch_errors_and_unavailability + +log = logging.getLogger(__name__) + + +class AuthorizationEndpoint(BaseEndpoint): + + """Authorization endpoint - used by the client to obtain authorization + from the resource owner via user-agent redirection. + + The authorization endpoint is used to interact with the resource + owner and obtain an authorization grant. The authorization server + MUST first verify the identity of the resource owner. The way in + which the authorization server authenticates the resource owner (e.g. + username and password login, session cookies) is beyond the scope of + this specification. + + The endpoint URI MAY include an "application/x-www-form-urlencoded" + formatted (per `Appendix B`_) query component, + which MUST be retained when adding additional query parameters. The + endpoint URI MUST NOT include a fragment component:: + + https://example.com/path?query=component # OK + https://example.com/path?query=component#fragment # Not OK + + Since requests to the authorization endpoint result in user + authentication and the transmission of clear-text credentials (in the + HTTP response), the authorization server MUST require the use of TLS + as described in Section 1.6 when sending requests to the + authorization endpoint:: + + # We will deny any request which URI schema is not with https + + The authorization server MUST support the use of the HTTP "GET" + method [RFC2616] for the authorization endpoint, and MAY support the + use of the "POST" method as well:: + + # HTTP method is currently not enforced + + Parameters sent without a value MUST be treated as if they were + omitted from the request. The authorization server MUST ignore + unrecognized request parameters. Request and response parameters + MUST NOT be included more than once:: + + # Enforced through the design of oauthlib.common.Request + + .. _`Appendix B`: https://tools.ietf.org/html/rfc6749#appendix-B + """ + + def __init__(self, default_response_type, default_token_type, + response_types): + BaseEndpoint.__init__(self) + self._response_types = response_types + self._default_response_type = default_response_type + self._default_token_type = default_token_type + + @property + def response_types(self): + return self._response_types + + @property + def default_response_type(self): + return self._default_response_type + + @property + def default_response_type_handler(self): + return self.response_types.get(self.default_response_type) + + @property + def default_token_type(self): + return self._default_token_type + + @catch_errors_and_unavailability + def create_authorization_response(self, uri, http_method='GET', body=None, + headers=None, scopes=None, credentials=None): + """Extract response_type and route to the designated handler.""" + request = Request( + uri, http_method=http_method, body=body, headers=headers) + request.scopes = scopes + # TODO: decide whether this should be a required argument + request.user = None # TODO: explain this in docs + for k, v in (credentials or {}).items(): + setattr(request, k, v) + response_type_handler = self.response_types.get( + request.response_type, self.default_response_type_handler) + log.debug('Dispatching response_type %s request to %r.', + request.response_type, response_type_handler) + return response_type_handler.create_authorization_response( + request, self.default_token_type) + + @catch_errors_and_unavailability + def validate_authorization_request(self, uri, http_method='GET', body=None, + headers=None): + """Extract response_type and route to the designated handler.""" + request = Request( + uri, http_method=http_method, body=body, headers=headers) + + request.scopes = utils.scope_to_list(request.scope) + + response_type_handler = self.response_types.get( + request.response_type, self.default_response_type_handler) + return response_type_handler.validate_authorization_request(request) diff --git a/lib/oauthlib/oauth2/rfc6749/endpoints/base.py b/lib/oauthlib/oauth2/rfc6749/endpoints/base.py new file mode 100644 index 00000000..3f239917 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/endpoints/base.py @@ -0,0 +1,113 @@ +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +import functools +import logging + +from ..errors import ( + FatalClientError, InvalidClientError, InvalidRequestError, OAuth2Error, + ServerError, TemporarilyUnavailableError, UnsupportedTokenTypeError, +) + +log = logging.getLogger(__name__) + + +class BaseEndpoint: + + def __init__(self): + self._available = True + self._catch_errors = False + self._valid_request_methods = None + + @property + def valid_request_methods(self): + return self._valid_request_methods + + @valid_request_methods.setter + def valid_request_methods(self, valid_request_methods): + if valid_request_methods is not None: + valid_request_methods = [x.upper() for x in valid_request_methods] + self._valid_request_methods = valid_request_methods + + + @property + def available(self): + return self._available + + @available.setter + def available(self, available): + self._available = available + + @property + def catch_errors(self): + return self._catch_errors + + @catch_errors.setter + def catch_errors(self, catch_errors): + self._catch_errors = catch_errors + + def _raise_on_missing_token(self, request): + """Raise error on missing token.""" + if not request.token: + raise InvalidRequestError(request=request, + description='Missing token parameter.') + def _raise_on_invalid_client(self, request): + """Raise on failed client authentication.""" + if self.request_validator.client_authentication_required(request): + if not self.request_validator.authenticate_client(request): + log.debug('Client authentication failed, %r.', request) + raise InvalidClientError(request=request) + elif not self.request_validator.authenticate_client_id(request.client_id, request): + log.debug('Client authentication failed, %r.', request) + raise InvalidClientError(request=request) + + def _raise_on_unsupported_token(self, request): + """Raise on unsupported tokens.""" + if (request.token_type_hint and + request.token_type_hint in self.valid_token_types and + request.token_type_hint not in self.supported_token_types): + raise UnsupportedTokenTypeError(request=request) + + def _raise_on_bad_method(self, request): + if self.valid_request_methods is None: + raise ValueError('Configure "valid_request_methods" property first') + if request.http_method.upper() not in self.valid_request_methods: + raise InvalidRequestError(request=request, + description=('Unsupported request method %s' % request.http_method.upper())) + + def _raise_on_bad_post_request(self, request): + """Raise if invalid POST request received + """ + if request.http_method.upper() == 'POST': + query_params = request.uri_query or "" + if query_params: + raise InvalidRequestError(request=request, + description=('URL query parameters are not allowed')) + +def catch_errors_and_unavailability(f): + @functools.wraps(f) + def wrapper(endpoint, uri, *args, **kwargs): + if not endpoint.available: + e = TemporarilyUnavailableError() + log.info('Endpoint unavailable, ignoring request %s.' % uri) + return {}, e.json, 503 + + if endpoint.catch_errors: + try: + return f(endpoint, uri, *args, **kwargs) + except OAuth2Error: + raise + except FatalClientError: + raise + except Exception as e: + error = ServerError() + log.warning( + 'Exception caught while processing request, %s.' % e) + return {}, error.json, 500 + else: + return f(endpoint, uri, *args, **kwargs) + return wrapper diff --git a/lib/oauthlib/oauth2/rfc6749/endpoints/introspect.py b/lib/oauthlib/oauth2/rfc6749/endpoints/introspect.py new file mode 100644 index 00000000..63570d9c --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/endpoints/introspect.py @@ -0,0 +1,122 @@ +""" +oauthlib.oauth2.rfc6749.endpoint.introspect +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +An implementation of the OAuth 2.0 `Token Introspection`. + +.. _`Token Introspection`: https://tools.ietf.org/html/rfc7662 +""" +import json +import logging + +from oauthlib.common import Request + +from ..errors import OAuth2Error +from .base import BaseEndpoint, catch_errors_and_unavailability + +log = logging.getLogger(__name__) + + +class IntrospectEndpoint(BaseEndpoint): + + """Introspect token endpoint. + + This endpoint defines a method to query an OAuth 2.0 authorization + server to determine the active state of an OAuth 2.0 token and to + determine meta-information about this token. OAuth 2.0 deployments + can use this method to convey information about the authorization + context of the token from the authorization server to the protected + resource. + + To prevent the values of access tokens from leaking into + server-side logs via query parameters, an authorization server + offering token introspection MAY disallow the use of HTTP GET on + the introspection endpoint and instead require the HTTP POST method + to be used at the introspection endpoint. + """ + + valid_token_types = ('access_token', 'refresh_token') + valid_request_methods = ('POST',) + + def __init__(self, request_validator, supported_token_types=None): + BaseEndpoint.__init__(self) + self.request_validator = request_validator + self.supported_token_types = ( + supported_token_types or self.valid_token_types) + + @catch_errors_and_unavailability + def create_introspect_response(self, uri, http_method='POST', body=None, + headers=None): + """Create introspect valid or invalid response + + If the authorization server is unable to determine the state + of the token without additional information, it SHOULD return + an introspection response indicating the token is not active + as described in Section 2.2. + """ + resp_headers = { + 'Content-Type': 'application/json', + 'Cache-Control': 'no-store', + 'Pragma': 'no-cache', + } + request = Request(uri, http_method, body, headers) + try: + self.validate_introspect_request(request) + log.debug('Token introspect valid for %r.', request) + except OAuth2Error as e: + log.debug('Client error during validation of %r. %r.', request, e) + resp_headers.update(e.headers) + return resp_headers, e.json, e.status_code + + claims = self.request_validator.introspect_token( + request.token, + request.token_type_hint, + request + ) + if claims is None: + return resp_headers, json.dumps(dict(active=False)), 200 + if "active" in claims: + claims.pop("active") + return resp_headers, json.dumps(dict(active=True, **claims)), 200 + + def validate_introspect_request(self, request): + """Ensure the request is valid. + + The protected resource calls the introspection endpoint using + an HTTP POST request with parameters sent as + "application/x-www-form-urlencoded". + + token REQUIRED. The string value of the token. + + token_type_hint OPTIONAL. + A hint about the type of the token submitted for + introspection. The protected resource MAY pass this parameter to + help the authorization server optimize the token lookup. If the + server is unable to locate the token using the given hint, it MUST + extend its search across all of its supported token types. An + authorization server MAY ignore this parameter, particularly if it + is able to detect the token type automatically. + * access_token: An Access Token as defined in [`RFC6749`], + `section 1.4`_ + + * refresh_token: A Refresh Token as defined in [`RFC6749`], + `section 1.5`_ + + The introspection endpoint MAY accept other OPTIONAL + parameters to provide further context to the query. For + instance, an authorization server may desire to know the IP + address of the client accessing the protected resource to + determine if the correct client is likely to be presenting the + token. The definition of this or any other parameters are + outside the scope of this specification, to be defined by + service documentation or extensions to this specification. + + .. _`section 1.4`: http://tools.ietf.org/html/rfc6749#section-1.4 + .. _`section 1.5`: http://tools.ietf.org/html/rfc6749#section-1.5 + .. _`RFC6749`: http://tools.ietf.org/html/rfc6749 + """ + self._raise_on_bad_method(request) + self._raise_on_bad_post_request(request) + self._raise_on_missing_token(request) + self._raise_on_invalid_client(request) + self._raise_on_unsupported_token(request) diff --git a/lib/oauthlib/oauth2/rfc6749/endpoints/metadata.py b/lib/oauthlib/oauth2/rfc6749/endpoints/metadata.py new file mode 100644 index 00000000..81ee1def --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/endpoints/metadata.py @@ -0,0 +1,237 @@ +""" +oauthlib.oauth2.rfc6749.endpoint.metadata +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +An implementation of the `OAuth 2.0 Authorization Server Metadata`. + +.. _`OAuth 2.0 Authorization Server Metadata`: https://tools.ietf.org/html/rfc8414 +""" +import copy +import json +import logging + +from .. import grant_types +from .authorization import AuthorizationEndpoint +from .base import BaseEndpoint, catch_errors_and_unavailability +from .introspect import IntrospectEndpoint +from .revocation import RevocationEndpoint +from .token import TokenEndpoint + +log = logging.getLogger(__name__) + + +class MetadataEndpoint(BaseEndpoint): + + """OAuth2.0 Authorization Server Metadata endpoint. + + This specification generalizes the metadata format defined by + `OpenID Connect Discovery 1.0` in a way that is compatible + with OpenID Connect Discovery while being applicable to a wider set + of OAuth 2.0 use cases. This is intentionally parallel to the way + that OAuth 2.0 Dynamic Client Registration Protocol [`RFC7591`_] + generalized the dynamic client registration mechanisms defined by + OpenID Connect Dynamic Client Registration 1.0 + in a way that is compatible with it. + + .. _`OpenID Connect Discovery 1.0`: https://openid.net/specs/openid-connect-discovery-1_0.html + .. _`RFC7591`: https://tools.ietf.org/html/rfc7591 + """ + + def __init__(self, endpoints, claims={}, raise_errors=True): + assert isinstance(claims, dict) + for endpoint in endpoints: + assert isinstance(endpoint, BaseEndpoint) + + BaseEndpoint.__init__(self) + self.raise_errors = raise_errors + self.endpoints = endpoints + self.initial_claims = claims + self.claims = self.validate_metadata_server() + + @catch_errors_and_unavailability + def create_metadata_response(self, uri, http_method='GET', body=None, + headers=None): + """Create metadata response + """ + headers = { + 'Content-Type': 'application/json' + } + return headers, json.dumps(self.claims), 200 + + def validate_metadata(self, array, key, is_required=False, is_list=False, is_url=False, is_issuer=False): + if not self.raise_errors: + return + + if key not in array: + if is_required: + raise ValueError("key {} is a mandatory metadata.".format(key)) + + elif is_issuer: + if not array[key].startswith("https"): + raise ValueError("key {}: {} must be an HTTPS URL".format(key, array[key])) + if "?" in array[key] or "&" in array[key] or "#" in array[key]: + raise ValueError("key {}: {} must not contain query or fragment components".format(key, array[key])) + + elif is_url: + if not array[key].startswith("http"): + raise ValueError("key {}: {} must be an URL".format(key, array[key])) + + elif is_list: + if not isinstance(array[key], list): + raise ValueError("key {}: {} must be an Array".format(key, array[key])) + for elem in array[key]: + if not isinstance(elem, str): + raise ValueError("array {}: {} must contains only string (not {})".format(key, array[key], elem)) + + def validate_metadata_token(self, claims, endpoint): + """ + If the token endpoint is used in the grant type, the value of this + parameter MUST be the same as the value of the "grant_type" + parameter passed to the token endpoint defined in the grant type + definition. + """ + self._grant_types.extend(endpoint._grant_types.keys()) + claims.setdefault("token_endpoint_auth_methods_supported", ["client_secret_post", "client_secret_basic"]) + + self.validate_metadata(claims, "token_endpoint_auth_methods_supported", is_list=True) + self.validate_metadata(claims, "token_endpoint_auth_signing_alg_values_supported", is_list=True) + self.validate_metadata(claims, "token_endpoint", is_required=True, is_url=True) + + def validate_metadata_authorization(self, claims, endpoint): + claims.setdefault("response_types_supported", + list(filter(lambda x: x != "none", endpoint._response_types.keys()))) + claims.setdefault("response_modes_supported", ["query", "fragment"]) + + # The OAuth2.0 Implicit flow is defined as a "grant type" but it is not + # using the "token" endpoint, as such, we have to add it explicitly to + # the list of "grant_types_supported" when enabled. + if "token" in claims["response_types_supported"]: + self._grant_types.append("implicit") + + self.validate_metadata(claims, "response_types_supported", is_required=True, is_list=True) + self.validate_metadata(claims, "response_modes_supported", is_list=True) + if "code" in claims["response_types_supported"]: + code_grant = endpoint._response_types["code"] + if not isinstance(code_grant, grant_types.AuthorizationCodeGrant) and hasattr(code_grant, "default_grant"): + code_grant = code_grant.default_grant + + claims.setdefault("code_challenge_methods_supported", + list(code_grant._code_challenge_methods.keys())) + self.validate_metadata(claims, "code_challenge_methods_supported", is_list=True) + self.validate_metadata(claims, "authorization_endpoint", is_required=True, is_url=True) + + def validate_metadata_revocation(self, claims, endpoint): + claims.setdefault("revocation_endpoint_auth_methods_supported", + ["client_secret_post", "client_secret_basic"]) + + self.validate_metadata(claims, "revocation_endpoint_auth_methods_supported", is_list=True) + self.validate_metadata(claims, "revocation_endpoint_auth_signing_alg_values_supported", is_list=True) + self.validate_metadata(claims, "revocation_endpoint", is_required=True, is_url=True) + + def validate_metadata_introspection(self, claims, endpoint): + claims.setdefault("introspection_endpoint_auth_methods_supported", + ["client_secret_post", "client_secret_basic"]) + + self.validate_metadata(claims, "introspection_endpoint_auth_methods_supported", is_list=True) + self.validate_metadata(claims, "introspection_endpoint_auth_signing_alg_values_supported", is_list=True) + self.validate_metadata(claims, "introspection_endpoint", is_required=True, is_url=True) + + def validate_metadata_server(self): + """ + Authorization servers can have metadata describing their + configuration. The following authorization server metadata values + are used by this specification. More details can be found in + `RFC8414 section 2`_ : + + issuer + REQUIRED + + authorization_endpoint + URL of the authorization server's authorization endpoint + [`RFC6749#Authorization`_]. This is REQUIRED unless no grant types are supported + that use the authorization endpoint. + + token_endpoint + URL of the authorization server's token endpoint [`RFC6749#Token`_]. This + is REQUIRED unless only the implicit grant type is supported. + + scopes_supported + RECOMMENDED. + + response_types_supported + REQUIRED. + + Other OPTIONAL fields: + jwks_uri, + registration_endpoint, + response_modes_supported + + grant_types_supported + OPTIONAL. JSON array containing a list of the OAuth 2.0 grant + type values that this authorization server supports. The array + values used are the same as those used with the "grant_types" + parameter defined by "OAuth 2.0 Dynamic Client Registration + Protocol" [`RFC7591`_]. If omitted, the default value is + "["authorization_code", "implicit"]". + + token_endpoint_auth_methods_supported + + token_endpoint_auth_signing_alg_values_supported + + service_documentation + + ui_locales_supported + + op_policy_uri + + op_tos_uri + + revocation_endpoint + + revocation_endpoint_auth_methods_supported + + revocation_endpoint_auth_signing_alg_values_supported + + introspection_endpoint + + introspection_endpoint_auth_methods_supported + + introspection_endpoint_auth_signing_alg_values_supported + + code_challenge_methods_supported + + Additional authorization server metadata parameters MAY also be used. + Some are defined by other specifications, such as OpenID Connect + Discovery 1.0 [`OpenID.Discovery`_]. + + .. _`RFC8414 section 2`: https://tools.ietf.org/html/rfc8414#section-2 + .. _`RFC6749#Authorization`: https://tools.ietf.org/html/rfc6749#section-3.1 + .. _`RFC6749#Token`: https://tools.ietf.org/html/rfc6749#section-3.2 + .. _`RFC7591`: https://tools.ietf.org/html/rfc7591 + .. _`OpenID.Discovery`: https://openid.net/specs/openid-connect-discovery-1_0.html + """ + claims = copy.deepcopy(self.initial_claims) + self.validate_metadata(claims, "issuer", is_required=True, is_issuer=True) + self.validate_metadata(claims, "jwks_uri", is_url=True) + self.validate_metadata(claims, "scopes_supported", is_list=True) + self.validate_metadata(claims, "service_documentation", is_url=True) + self.validate_metadata(claims, "ui_locales_supported", is_list=True) + self.validate_metadata(claims, "op_policy_uri", is_url=True) + self.validate_metadata(claims, "op_tos_uri", is_url=True) + + self._grant_types = [] + for endpoint in self.endpoints: + if isinstance(endpoint, TokenEndpoint): + self.validate_metadata_token(claims, endpoint) + if isinstance(endpoint, AuthorizationEndpoint): + self.validate_metadata_authorization(claims, endpoint) + if isinstance(endpoint, RevocationEndpoint): + self.validate_metadata_revocation(claims, endpoint) + if isinstance(endpoint, IntrospectEndpoint): + self.validate_metadata_introspection(claims, endpoint) + + # "grant_types_supported" is a combination of all OAuth2 grant types + # allowed in the current provider implementation. + claims.setdefault("grant_types_supported", self._grant_types) + self.validate_metadata(claims, "grant_types_supported", is_list=True) + return claims diff --git a/lib/oauthlib/oauth2/rfc6749/endpoints/pre_configured.py b/lib/oauthlib/oauth2/rfc6749/endpoints/pre_configured.py new file mode 100644 index 00000000..d64a1663 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/endpoints/pre_configured.py @@ -0,0 +1,216 @@ +""" +oauthlib.oauth2.rfc6749.endpoints.pre_configured +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various endpoints needed +for providing OAuth 2.0 RFC6749 servers. +""" +from ..grant_types import ( + AuthorizationCodeGrant, ClientCredentialsGrant, ImplicitGrant, + RefreshTokenGrant, ResourceOwnerPasswordCredentialsGrant, +) +from ..tokens import BearerToken +from .authorization import AuthorizationEndpoint +from .introspect import IntrospectEndpoint +from .resource import ResourceEndpoint +from .revocation import RevocationEndpoint +from .token import TokenEndpoint + + +class Server(AuthorizationEndpoint, IntrospectEndpoint, TokenEndpoint, + ResourceEndpoint, RevocationEndpoint): + + """An all-in-one endpoint featuring all four major grant types.""" + + def __init__(self, request_validator, token_expires_in=None, + token_generator=None, refresh_token_generator=None, + *args, **kwargs): + """Construct a new all-grants-in-one server. + + :param request_validator: An implementation of + oauthlib.oauth2.RequestValidator. + :param token_expires_in: An int or a function to generate a token + expiration offset (in seconds) given a + oauthlib.common.Request object. + :param token_generator: A function to generate a token from a request. + :param refresh_token_generator: A function to generate a token from a + request for the refresh token. + :param kwargs: Extra parameters to pass to authorization-, + token-, resource-, and revocation-endpoint constructors. + """ + self.auth_grant = AuthorizationCodeGrant(request_validator) + self.implicit_grant = ImplicitGrant(request_validator) + self.password_grant = ResourceOwnerPasswordCredentialsGrant( + request_validator) + self.credentials_grant = ClientCredentialsGrant(request_validator) + self.refresh_grant = RefreshTokenGrant(request_validator) + + self.bearer = BearerToken(request_validator, token_generator, + token_expires_in, refresh_token_generator) + + AuthorizationEndpoint.__init__(self, default_response_type='code', + response_types={ + 'code': self.auth_grant, + 'token': self.implicit_grant, + 'none': self.auth_grant + }, + default_token_type=self.bearer) + + TokenEndpoint.__init__(self, default_grant_type='authorization_code', + grant_types={ + 'authorization_code': self.auth_grant, + 'password': self.password_grant, + 'client_credentials': self.credentials_grant, + 'refresh_token': self.refresh_grant, + }, + default_token_type=self.bearer) + ResourceEndpoint.__init__(self, default_token='Bearer', + token_types={'Bearer': self.bearer}) + RevocationEndpoint.__init__(self, request_validator) + IntrospectEndpoint.__init__(self, request_validator) + + +class WebApplicationServer(AuthorizationEndpoint, IntrospectEndpoint, TokenEndpoint, + ResourceEndpoint, RevocationEndpoint): + + """An all-in-one endpoint featuring Authorization code grant and Bearer tokens.""" + + def __init__(self, request_validator, token_generator=None, + token_expires_in=None, refresh_token_generator=None, **kwargs): + """Construct a new web application server. + + :param request_validator: An implementation of + oauthlib.oauth2.RequestValidator. + :param token_expires_in: An int or a function to generate a token + expiration offset (in seconds) given a + oauthlib.common.Request object. + :param token_generator: A function to generate a token from a request. + :param refresh_token_generator: A function to generate a token from a + request for the refresh token. + :param kwargs: Extra parameters to pass to authorization-, + token-, resource-, and revocation-endpoint constructors. + """ + self.auth_grant = AuthorizationCodeGrant(request_validator) + self.refresh_grant = RefreshTokenGrant(request_validator) + self.bearer = BearerToken(request_validator, token_generator, + token_expires_in, refresh_token_generator) + AuthorizationEndpoint.__init__(self, default_response_type='code', + response_types={'code': self.auth_grant}, + default_token_type=self.bearer) + TokenEndpoint.__init__(self, default_grant_type='authorization_code', + grant_types={ + 'authorization_code': self.auth_grant, + 'refresh_token': self.refresh_grant, + }, + default_token_type=self.bearer) + ResourceEndpoint.__init__(self, default_token='Bearer', + token_types={'Bearer': self.bearer}) + RevocationEndpoint.__init__(self, request_validator) + IntrospectEndpoint.__init__(self, request_validator) + + +class MobileApplicationServer(AuthorizationEndpoint, IntrospectEndpoint, + ResourceEndpoint, RevocationEndpoint): + + """An all-in-one endpoint featuring Implicit code grant and Bearer tokens.""" + + def __init__(self, request_validator, token_generator=None, + token_expires_in=None, refresh_token_generator=None, **kwargs): + """Construct a new implicit grant server. + + :param request_validator: An implementation of + oauthlib.oauth2.RequestValidator. + :param token_expires_in: An int or a function to generate a token + expiration offset (in seconds) given a + oauthlib.common.Request object. + :param token_generator: A function to generate a token from a request. + :param refresh_token_generator: A function to generate a token from a + request for the refresh token. + :param kwargs: Extra parameters to pass to authorization-, + token-, resource-, and revocation-endpoint constructors. + """ + self.implicit_grant = ImplicitGrant(request_validator) + self.bearer = BearerToken(request_validator, token_generator, + token_expires_in, refresh_token_generator) + AuthorizationEndpoint.__init__(self, default_response_type='token', + response_types={ + 'token': self.implicit_grant}, + default_token_type=self.bearer) + ResourceEndpoint.__init__(self, default_token='Bearer', + token_types={'Bearer': self.bearer}) + RevocationEndpoint.__init__(self, request_validator, + supported_token_types=['access_token']) + IntrospectEndpoint.__init__(self, request_validator, + supported_token_types=['access_token']) + + +class LegacyApplicationServer(TokenEndpoint, IntrospectEndpoint, + ResourceEndpoint, RevocationEndpoint): + + """An all-in-one endpoint featuring Resource Owner Password Credentials grant and Bearer tokens.""" + + def __init__(self, request_validator, token_generator=None, + token_expires_in=None, refresh_token_generator=None, **kwargs): + """Construct a resource owner password credentials grant server. + + :param request_validator: An implementation of + oauthlib.oauth2.RequestValidator. + :param token_expires_in: An int or a function to generate a token + expiration offset (in seconds) given a + oauthlib.common.Request object. + :param token_generator: A function to generate a token from a request. + :param refresh_token_generator: A function to generate a token from a + request for the refresh token. + :param kwargs: Extra parameters to pass to authorization-, + token-, resource-, and revocation-endpoint constructors. + """ + self.password_grant = ResourceOwnerPasswordCredentialsGrant( + request_validator) + self.refresh_grant = RefreshTokenGrant(request_validator) + self.bearer = BearerToken(request_validator, token_generator, + token_expires_in, refresh_token_generator) + TokenEndpoint.__init__(self, default_grant_type='password', + grant_types={ + 'password': self.password_grant, + 'refresh_token': self.refresh_grant, + }, + default_token_type=self.bearer) + ResourceEndpoint.__init__(self, default_token='Bearer', + token_types={'Bearer': self.bearer}) + RevocationEndpoint.__init__(self, request_validator) + IntrospectEndpoint.__init__(self, request_validator) + + +class BackendApplicationServer(TokenEndpoint, IntrospectEndpoint, + ResourceEndpoint, RevocationEndpoint): + + """An all-in-one endpoint featuring Client Credentials grant and Bearer tokens.""" + + def __init__(self, request_validator, token_generator=None, + token_expires_in=None, refresh_token_generator=None, **kwargs): + """Construct a client credentials grant server. + + :param request_validator: An implementation of + oauthlib.oauth2.RequestValidator. + :param token_expires_in: An int or a function to generate a token + expiration offset (in seconds) given a + oauthlib.common.Request object. + :param token_generator: A function to generate a token from a request. + :param refresh_token_generator: A function to generate a token from a + request for the refresh token. + :param kwargs: Extra parameters to pass to authorization-, + token-, resource-, and revocation-endpoint constructors. + """ + self.credentials_grant = ClientCredentialsGrant(request_validator) + self.bearer = BearerToken(request_validator, token_generator, + token_expires_in, refresh_token_generator) + TokenEndpoint.__init__(self, default_grant_type='client_credentials', + grant_types={ + 'client_credentials': self.credentials_grant}, + default_token_type=self.bearer) + ResourceEndpoint.__init__(self, default_token='Bearer', + token_types={'Bearer': self.bearer}) + RevocationEndpoint.__init__(self, request_validator, + supported_token_types=['access_token']) + IntrospectEndpoint.__init__(self, request_validator, + supported_token_types=['access_token']) diff --git a/lib/oauthlib/oauth2/rfc6749/endpoints/resource.py b/lib/oauthlib/oauth2/rfc6749/endpoints/resource.py new file mode 100644 index 00000000..f7562255 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/endpoints/resource.py @@ -0,0 +1,84 @@ +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +import logging + +from oauthlib.common import Request + +from .base import BaseEndpoint, catch_errors_and_unavailability + +log = logging.getLogger(__name__) + + +class ResourceEndpoint(BaseEndpoint): + + """Authorizes access to protected resources. + + The client accesses protected resources by presenting the access + token to the resource server. The resource server MUST validate the + access token and ensure that it has not expired and that its scope + covers the requested resource. The methods used by the resource + server to validate the access token (as well as any error responses) + are beyond the scope of this specification but generally involve an + interaction or coordination between the resource server and the + authorization server:: + + # For most cases, returning a 403 should suffice. + + The method in which the client utilizes the access token to + authenticate with the resource server depends on the type of access + token issued by the authorization server. Typically, it involves + using the HTTP "Authorization" request header field [RFC2617] with an + authentication scheme defined by the specification of the access + token type used, such as [RFC6750]:: + + # Access tokens may also be provided in query and body + https://example.com/protected?access_token=kjfch2345sdf # Query + access_token=sdf23409df # Body + """ + + def __init__(self, default_token, token_types): + BaseEndpoint.__init__(self) + self._tokens = token_types + self._default_token = default_token + + @property + def default_token(self): + return self._default_token + + @property + def default_token_type_handler(self): + return self.tokens.get(self.default_token) + + @property + def tokens(self): + return self._tokens + + @catch_errors_and_unavailability + def verify_request(self, uri, http_method='GET', body=None, headers=None, + scopes=None): + """Validate client, code etc, return body + headers""" + request = Request(uri, http_method, body, headers) + request.token_type = self.find_token_type(request) + request.scopes = scopes + token_type_handler = self.tokens.get(request.token_type, + self.default_token_type_handler) + log.debug('Dispatching token_type %s request to %r.', + request.token_type, token_type_handler) + return token_type_handler.validate_request(request), request + + def find_token_type(self, request): + """Token type identification. + + RFC 6749 does not provide a method for easily differentiating between + different token types during protected resource access. We estimate + the most likely token type (if any) by asking each known token type + to give an estimation based on the request. + """ + estimates = sorted(((t.estimate_type(request), n) + for n, t in self.tokens.items()), reverse=True) + return estimates[0][1] if len(estimates) else None diff --git a/lib/oauthlib/oauth2/rfc6749/endpoints/revocation.py b/lib/oauthlib/oauth2/rfc6749/endpoints/revocation.py new file mode 100644 index 00000000..4aa5ec6e --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/endpoints/revocation.py @@ -0,0 +1,126 @@ +""" +oauthlib.oauth2.rfc6749.endpoint.revocation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +An implementation of the OAuth 2 `Token Revocation`_ spec (draft 11). + +.. _`Token Revocation`: https://tools.ietf.org/html/draft-ietf-oauth-revocation-11 +""" +import logging + +from oauthlib.common import Request + +from ..errors import OAuth2Error +from .base import BaseEndpoint, catch_errors_and_unavailability + +log = logging.getLogger(__name__) + + +class RevocationEndpoint(BaseEndpoint): + + """Token revocation endpoint. + + Endpoint used by authenticated clients to revoke access and refresh tokens. + Commonly this will be part of the Authorization Endpoint. + """ + + valid_token_types = ('access_token', 'refresh_token') + valid_request_methods = ('POST',) + + def __init__(self, request_validator, supported_token_types=None, + enable_jsonp=False): + BaseEndpoint.__init__(self) + self.request_validator = request_validator + self.supported_token_types = ( + supported_token_types or self.valid_token_types) + self.enable_jsonp = enable_jsonp + + @catch_errors_and_unavailability + def create_revocation_response(self, uri, http_method='POST', body=None, + headers=None): + """Revoke supplied access or refresh token. + + + The authorization server responds with HTTP status code 200 if the + token has been revoked sucessfully or if the client submitted an + invalid token. + + Note: invalid tokens do not cause an error response since the client + cannot handle such an error in a reasonable way. Moreover, the purpose + of the revocation request, invalidating the particular token, is + already achieved. + + The content of the response body is ignored by the client as all + necessary information is conveyed in the response code. + + An invalid token type hint value is ignored by the authorization server + and does not influence the revocation response. + """ + resp_headers = { + 'Content-Type': 'application/json', + 'Cache-Control': 'no-store', + 'Pragma': 'no-cache', + } + request = Request( + uri, http_method=http_method, body=body, headers=headers) + try: + self.validate_revocation_request(request) + log.debug('Token revocation valid for %r.', request) + except OAuth2Error as e: + log.debug('Client error during validation of %r. %r.', request, e) + response_body = e.json + if self.enable_jsonp and request.callback: + response_body = '{}({});'.format(request.callback, response_body) + resp_headers.update(e.headers) + return resp_headers, response_body, e.status_code + + self.request_validator.revoke_token(request.token, + request.token_type_hint, request) + + response_body = '' + if self.enable_jsonp and request.callback: + response_body = request.callback + '();' + return {}, response_body, 200 + + def validate_revocation_request(self, request): + """Ensure the request is valid. + + The client constructs the request by including the following parameters + using the "application/x-www-form-urlencoded" format in the HTTP + request entity-body: + + token (REQUIRED). The token that the client wants to get revoked. + + token_type_hint (OPTIONAL). A hint about the type of the token + submitted for revocation. Clients MAY pass this parameter in order to + help the authorization server to optimize the token lookup. If the + server is unable to locate the token using the given hint, it MUST + extend its search accross all of its supported token types. An + authorization server MAY ignore this parameter, particularly if it is + able to detect the token type automatically. This specification + defines two such values: + + * access_token: An Access Token as defined in [RFC6749], + `section 1.4`_ + + * refresh_token: A Refresh Token as defined in [RFC6749], + `section 1.5`_ + + Specific implementations, profiles, and extensions of this + specification MAY define other values for this parameter using + the registry defined in `Section 4.1.2`_. + + The client also includes its authentication credentials as described in + `Section 2.3`_. of [`RFC6749`_]. + + .. _`section 1.4`: https://tools.ietf.org/html/rfc6749#section-1.4 + .. _`section 1.5`: https://tools.ietf.org/html/rfc6749#section-1.5 + .. _`section 2.3`: https://tools.ietf.org/html/rfc6749#section-2.3 + .. _`Section 4.1.2`: https://tools.ietf.org/html/draft-ietf-oauth-revocation-11#section-4.1.2 + .. _`RFC6749`: https://tools.ietf.org/html/rfc6749 + """ + self._raise_on_bad_method(request) + self._raise_on_bad_post_request(request) + self._raise_on_missing_token(request) + self._raise_on_invalid_client(request) + self._raise_on_unsupported_token(request) diff --git a/lib/oauthlib/oauth2/rfc6749/endpoints/token.py b/lib/oauthlib/oauth2/rfc6749/endpoints/token.py new file mode 100644 index 00000000..ab9e0918 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/endpoints/token.py @@ -0,0 +1,119 @@ +""" +oauthlib.oauth2.rfc6749 +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OAuth 2.0 RFC6749. +""" +import logging + +from oauthlib.common import Request +from oauthlib.oauth2.rfc6749 import utils + +from .base import BaseEndpoint, catch_errors_and_unavailability + +log = logging.getLogger(__name__) + + +class TokenEndpoint(BaseEndpoint): + + """Token issuing endpoint. + + The token endpoint is used by the client to obtain an access token by + presenting its authorization grant or refresh token. The token + endpoint is used with every authorization grant except for the + implicit grant type (since an access token is issued directly). + + The means through which the client obtains the location of the token + endpoint are beyond the scope of this specification, but the location + is typically provided in the service documentation. + + The endpoint URI MAY include an "application/x-www-form-urlencoded" + formatted (per `Appendix B`_) query component, + which MUST be retained when adding additional query parameters. The + endpoint URI MUST NOT include a fragment component:: + + https://example.com/path?query=component # OK + https://example.com/path?query=component#fragment # Not OK + + Since requests to the token endpoint result in the transmission of + clear-text credentials (in the HTTP request and response), the + authorization server MUST require the use of TLS as described in + Section 1.6 when sending requests to the token endpoint:: + + # We will deny any request which URI schema is not with https + + The client MUST use the HTTP "POST" method when making access token + requests:: + + # HTTP method is currently not enforced + + Parameters sent without a value MUST be treated as if they were + omitted from the request. The authorization server MUST ignore + unrecognized request parameters. Request and response parameters + MUST NOT be included more than once:: + + # Delegated to each grant type. + + .. _`Appendix B`: https://tools.ietf.org/html/rfc6749#appendix-B + """ + + valid_request_methods = ('POST',) + + def __init__(self, default_grant_type, default_token_type, grant_types): + BaseEndpoint.__init__(self) + self._grant_types = grant_types + self._default_token_type = default_token_type + self._default_grant_type = default_grant_type + + @property + def grant_types(self): + return self._grant_types + + @property + def default_grant_type(self): + return self._default_grant_type + + @property + def default_grant_type_handler(self): + return self.grant_types.get(self.default_grant_type) + + @property + def default_token_type(self): + return self._default_token_type + + @catch_errors_and_unavailability + def create_token_response(self, uri, http_method='POST', body=None, + headers=None, credentials=None, grant_type_for_scope=None, + claims=None): + """Extract grant_type and route to the designated handler.""" + request = Request( + uri, http_method=http_method, body=body, headers=headers) + self.validate_token_request(request) + # 'scope' is an allowed Token Request param in both the "Resource Owner Password Credentials Grant" + # and "Client Credentials Grant" flows + # https://tools.ietf.org/html/rfc6749#section-4.3.2 + # https://tools.ietf.org/html/rfc6749#section-4.4.2 + request.scopes = utils.scope_to_list(request.scope) + + request.extra_credentials = credentials + if grant_type_for_scope: + request.grant_type = grant_type_for_scope + + # OpenID Connect claims, if provided. The server using oauthlib might choose + # to implement the claims parameter of the Authorization Request. In this case + # it should retrieve those claims and pass them via the claims argument here, + # as a dict. + if claims: + request.claims = claims + + grant_type_handler = self.grant_types.get(request.grant_type, + self.default_grant_type_handler) + log.debug('Dispatching grant_type %s request to %r.', + request.grant_type, grant_type_handler) + return grant_type_handler.create_token_response( + request, self.default_token_type) + + def validate_token_request(self, request): + self._raise_on_bad_method(request) + self._raise_on_bad_post_request(request) diff --git a/lib/oauthlib/oauth2/rfc6749/errors.py b/lib/oauthlib/oauth2/rfc6749/errors.py new file mode 100644 index 00000000..b01e247b --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/errors.py @@ -0,0 +1,403 @@ +""" +oauthlib.oauth2.rfc6749.errors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Error used both by OAuth 2 clients and providers to represent the spec +defined error responses for all four core grant types. +""" +import json + +from oauthlib.common import add_params_to_uri, urlencode + + +class OAuth2Error(Exception): + error = None + status_code = 400 + description = '' + + def __init__(self, description=None, uri=None, state=None, + status_code=None, request=None): + """ + :param description: A human-readable ASCII [USASCII] text providing + additional information, used to assist the client + developer in understanding the error that occurred. + Values for the "error_description" parameter + MUST NOT include characters outside the set + x20-21 / x23-5B / x5D-7E. + + :param uri: A URI identifying a human-readable web page with information + about the error, used to provide the client developer with + additional information about the error. Values for the + "error_uri" parameter MUST conform to the URI- Reference + syntax, and thus MUST NOT include characters outside the set + x21 / x23-5B / x5D-7E. + + :param state: A CSRF protection value received from the client. + + :param status_code: + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + if description is not None: + self.description = description + + message = '({}) {}'.format(self.error, self.description) + if request: + message += ' ' + repr(request) + super().__init__(message) + + self.uri = uri + self.state = state + + if status_code: + self.status_code = status_code + + if request: + self.redirect_uri = request.redirect_uri + self.client_id = request.client_id + self.scopes = request.scopes + self.response_type = request.response_type + self.response_mode = request.response_mode + self.grant_type = request.grant_type + if not state: + self.state = request.state + else: + self.redirect_uri = None + self.client_id = None + self.scopes = None + self.response_type = None + self.response_mode = None + self.grant_type = None + + def in_uri(self, uri): + fragment = self.response_mode == "fragment" + return add_params_to_uri(uri, self.twotuples, fragment) + + @property + def twotuples(self): + error = [('error', self.error)] + if self.description: + error.append(('error_description', self.description)) + if self.uri: + error.append(('error_uri', self.uri)) + if self.state: + error.append(('state', self.state)) + return error + + @property + def urlencoded(self): + return urlencode(self.twotuples) + + @property + def json(self): + return json.dumps(dict(self.twotuples)) + + @property + def headers(self): + if self.status_code == 401: + """ + https://tools.ietf.org/html/rfc6750#section-3 + + All challenges defined by this specification MUST use the auth-scheme + value "Bearer". This scheme MUST be followed by one or more + auth-param values. + """ + authvalues = [ + "Bearer", + 'error="{}"'.format(self.error) + ] + if self.description: + authvalues.append('error_description="{}"'.format(self.description)) + if self.uri: + authvalues.append('error_uri="{}"'.format(self.uri)) + return {"WWW-Authenticate": ", ".join(authvalues)} + return {} + + +class TokenExpiredError(OAuth2Error): + error = 'token_expired' + + +class InsecureTransportError(OAuth2Error): + error = 'insecure_transport' + description = 'OAuth 2 MUST utilize https.' + + +class MismatchingStateError(OAuth2Error): + error = 'mismatching_state' + description = 'CSRF Warning! State not equal in request and response.' + + +class MissingCodeError(OAuth2Error): + error = 'missing_code' + + +class MissingTokenError(OAuth2Error): + error = 'missing_token' + + +class MissingTokenTypeError(OAuth2Error): + error = 'missing_token_type' + + +class FatalClientError(OAuth2Error): + """ + Errors during authorization where user should not be redirected back. + + If the request fails due to a missing, invalid, or mismatching + redirection URI, or if the client identifier is missing or invalid, + the authorization server SHOULD inform the resource owner of the + error and MUST NOT automatically redirect the user-agent to the + invalid redirection URI. + + Instead the user should be informed of the error by the provider itself. + """ + pass + + +class InvalidRequestFatalError(FatalClientError): + """ + For fatal errors, the request is missing a required parameter, includes + an invalid parameter value, includes a parameter more than once, or is + otherwise malformed. + """ + error = 'invalid_request' + + +class InvalidRedirectURIError(InvalidRequestFatalError): + description = 'Invalid redirect URI.' + + +class MissingRedirectURIError(InvalidRequestFatalError): + description = 'Missing redirect URI.' + + +class MismatchingRedirectURIError(InvalidRequestFatalError): + description = 'Mismatching redirect URI.' + + +class InvalidClientIdError(InvalidRequestFatalError): + description = 'Invalid client_id parameter value.' + + +class MissingClientIdError(InvalidRequestFatalError): + description = 'Missing client_id parameter.' + + +class InvalidRequestError(OAuth2Error): + """ + The request is missing a required parameter, includes an invalid + parameter value, includes a parameter more than once, or is + otherwise malformed. + """ + error = 'invalid_request' + + +class MissingResponseTypeError(InvalidRequestError): + description = 'Missing response_type parameter.' + + +class MissingCodeChallengeError(InvalidRequestError): + """ + If the server requires Proof Key for Code Exchange (PKCE) by OAuth + public clients and the client does not send the "code_challenge" in + the request, the authorization endpoint MUST return the authorization + error response with the "error" value set to "invalid_request". The + "error_description" or the response of "error_uri" SHOULD explain the + nature of error, e.g., code challenge required. + """ + description = 'Code challenge required.' + + +class MissingCodeVerifierError(InvalidRequestError): + """ + The request to the token endpoint, when PKCE is enabled, has + the parameter `code_verifier` REQUIRED. + """ + description = 'Code verifier required.' + + +class AccessDeniedError(OAuth2Error): + """ + The resource owner or authorization server denied the request. + """ + error = 'access_denied' + + +class UnsupportedResponseTypeError(OAuth2Error): + """ + The authorization server does not support obtaining an authorization + code using this method. + """ + error = 'unsupported_response_type' + + +class UnsupportedCodeChallengeMethodError(InvalidRequestError): + """ + If the server supporting PKCE does not support the requested + transformation, the authorization endpoint MUST return the + authorization error response with "error" value set to + "invalid_request". The "error_description" or the response of + "error_uri" SHOULD explain the nature of error, e.g., transform + algorithm not supported. + """ + description = 'Transform algorithm not supported.' + + +class InvalidScopeError(OAuth2Error): + """ + The requested scope is invalid, unknown, or malformed, or + exceeds the scope granted by the resource owner. + + https://tools.ietf.org/html/rfc6749#section-5.2 + """ + error = 'invalid_scope' + + +class ServerError(OAuth2Error): + """ + The authorization server encountered an unexpected condition that + prevented it from fulfilling the request. (This error code is needed + because a 500 Internal Server Error HTTP status code cannot be returned + to the client via a HTTP redirect.) + """ + error = 'server_error' + + +class TemporarilyUnavailableError(OAuth2Error): + """ + The authorization server is currently unable to handle the request + due to a temporary overloading or maintenance of the server. + (This error code is needed because a 503 Service Unavailable HTTP + status code cannot be returned to the client via a HTTP redirect.) + """ + error = 'temporarily_unavailable' + + +class InvalidClientError(FatalClientError): + """ + Client authentication failed (e.g. unknown client, no client + authentication included, or unsupported authentication method). + The authorization server MAY return an HTTP 401 (Unauthorized) status + code to indicate which HTTP authentication schemes are supported. + If the client attempted to authenticate via the "Authorization" request + header field, the authorization server MUST respond with an + HTTP 401 (Unauthorized) status code, and include the "WWW-Authenticate" + response header field matching the authentication scheme used by the + client. + """ + error = 'invalid_client' + status_code = 401 + + +class InvalidGrantError(OAuth2Error): + """ + The provided authorization grant (e.g. authorization code, resource + owner credentials) or refresh token is invalid, expired, revoked, does + not match the redirection URI used in the authorization request, or was + issued to another client. + + https://tools.ietf.org/html/rfc6749#section-5.2 + """ + error = 'invalid_grant' + status_code = 400 + + +class UnauthorizedClientError(OAuth2Error): + """ + The authenticated client is not authorized to use this authorization + grant type. + """ + error = 'unauthorized_client' + + +class UnsupportedGrantTypeError(OAuth2Error): + """ + The authorization grant type is not supported by the authorization + server. + """ + error = 'unsupported_grant_type' + + +class UnsupportedTokenTypeError(OAuth2Error): + """ + The authorization server does not support the hint of the + presented token type. I.e. the client tried to revoke an access token + on a server not supporting this feature. + """ + error = 'unsupported_token_type' + + +class InvalidTokenError(OAuth2Error): + """ + The access token provided is expired, revoked, malformed, or + invalid for other reasons. The resource SHOULD respond with + the HTTP 401 (Unauthorized) status code. The client MAY + request a new access token and retry the protected resource + request. + """ + error = 'invalid_token' + status_code = 401 + description = ("The access token provided is expired, revoked, malformed, " + "or invalid for other reasons.") + + +class InsufficientScopeError(OAuth2Error): + """ + The request requires higher privileges than provided by the + access token. The resource server SHOULD respond with the HTTP + 403 (Forbidden) status code and MAY include the "scope" + attribute with the scope necessary to access the protected + resource. + """ + error = 'insufficient_scope' + status_code = 403 + description = ("The request requires higher privileges than provided by " + "the access token.") + + +class ConsentRequired(OAuth2Error): + """ + The Authorization Server requires End-User consent. + + This error MAY be returned when the prompt parameter value in the + Authentication Request is none, but the Authentication Request cannot be + completed without displaying a user interface for End-User consent. + """ + error = 'consent_required' + + +class LoginRequired(OAuth2Error): + """ + The Authorization Server requires End-User authentication. + + This error MAY be returned when the prompt parameter value in the + Authentication Request is none, but the Authentication Request cannot be + completed without displaying a user interface for End-User authentication. + """ + error = 'login_required' + + +class CustomOAuth2Error(OAuth2Error): + """ + This error is a placeholder for all custom errors not described by the RFC. + Some of the popular OAuth2 providers are using custom errors. + """ + def __init__(self, error, *args, **kwargs): + self.error = error + super().__init__(*args, **kwargs) + + +def raise_from_error(error, params=None): + import inspect + import sys + kwargs = { + 'description': params.get('error_description'), + 'uri': params.get('error_uri'), + 'state': params.get('state') + } + for _, cls in inspect.getmembers(sys.modules[__name__], inspect.isclass): + if cls.error == error: + raise cls(**kwargs) + raise CustomOAuth2Error(error=error, **kwargs) diff --git a/lib/oauthlib/oauth2/rfc6749/grant_types/__init__.py b/lib/oauthlib/oauth2/rfc6749/grant_types/__init__.py new file mode 100644 index 00000000..eb88cfc2 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/grant_types/__init__.py @@ -0,0 +1,11 @@ +""" +oauthlib.oauth2.rfc6749.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +from .authorization_code import AuthorizationCodeGrant +from .client_credentials import ClientCredentialsGrant +from .implicit import ImplicitGrant +from .refresh_token import RefreshTokenGrant +from .resource_owner_password_credentials import ( + ResourceOwnerPasswordCredentialsGrant, +) diff --git a/lib/oauthlib/oauth2/rfc6749/grant_types/authorization_code.py b/lib/oauthlib/oauth2/rfc6749/grant_types/authorization_code.py new file mode 100644 index 00000000..bf42d889 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/grant_types/authorization_code.py @@ -0,0 +1,545 @@ +""" +oauthlib.oauth2.rfc6749.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import base64 +import hashlib +import json +import logging + +from oauthlib import common + +from .. import errors +from .base import GrantTypeBase + +log = logging.getLogger(__name__) + + +def code_challenge_method_s256(verifier, challenge): + """ + If the "code_challenge_method" from `Section 4.3`_ was "S256", the + received "code_verifier" is hashed by SHA-256, base64url-encoded, and + then compared to the "code_challenge", i.e.: + + BASE64URL-ENCODE(SHA256(ASCII(code_verifier))) == code_challenge + + How to implement a base64url-encoding + function without padding, based upon the standard base64-encoding + function that uses padding. + + To be concrete, example C# code implementing these functions is shown + below. Similar code could be used in other languages. + + static string base64urlencode(byte [] arg) + { + string s = Convert.ToBase64String(arg); // Regular base64 encoder + s = s.Split('=')[0]; // Remove any trailing '='s + s = s.Replace('+', '-'); // 62nd char of encoding + s = s.Replace('/', '_'); // 63rd char of encoding + return s; + } + + In python urlsafe_b64encode is already replacing '+' and '/', but preserve + the trailing '='. So we have to remove it. + + .. _`Section 4.3`: https://tools.ietf.org/html/rfc7636#section-4.3 + """ + return base64.urlsafe_b64encode( + hashlib.sha256(verifier.encode()).digest() + ).decode().rstrip('=') == challenge + + +def code_challenge_method_plain(verifier, challenge): + """ + If the "code_challenge_method" from `Section 4.3`_ was "plain", they are + compared directly, i.e.: + + code_verifier == code_challenge. + + .. _`Section 4.3`: https://tools.ietf.org/html/rfc7636#section-4.3 + """ + return verifier == challenge + + +class AuthorizationCodeGrant(GrantTypeBase): + + """`Authorization Code Grant`_ + + The authorization code grant type is used to obtain both access + tokens and refresh tokens and is optimized for confidential clients. + Since this is a redirection-based flow, the client must be capable of + interacting with the resource owner's user-agent (typically a web + browser) and capable of receiving incoming requests (via redirection) + from the authorization server:: + + +----------+ + | Resource | + | Owner | + | | + +----------+ + ^ + | + (B) + +----|-----+ Client Identifier +---------------+ + | -+----(A)-- & Redirection URI ---->| | + | User- | | Authorization | + | Agent -+----(B)-- User authenticates --->| Server | + | | | | + | -+----(C)-- Authorization Code ---<| | + +-|----|---+ +---------------+ + | | ^ v + (A) (C) | | + | | | | + ^ v | | + +---------+ | | + | |>---(D)-- Authorization Code ---------' | + | Client | & Redirection URI | + | | | + | |<---(E)----- Access Token -------------------' + +---------+ (w/ Optional Refresh Token) + + Note: The lines illustrating steps (A), (B), and (C) are broken into + two parts as they pass through the user-agent. + + Figure 3: Authorization Code Flow + + The flow illustrated in Figure 3 includes the following steps: + + (A) The client initiates the flow by directing the resource owner's + user-agent to the authorization endpoint. The client includes + its client identifier, requested scope, local state, and a + redirection URI to which the authorization server will send the + user-agent back once access is granted (or denied). + + (B) The authorization server authenticates the resource owner (via + the user-agent) and establishes whether the resource owner + grants or denies the client's access request. + + (C) Assuming the resource owner grants access, the authorization + server redirects the user-agent back to the client using the + redirection URI provided earlier (in the request or during + client registration). The redirection URI includes an + authorization code and any local state provided by the client + earlier. + + (D) The client requests an access token from the authorization + server's token endpoint by including the authorization code + received in the previous step. When making the request, the + client authenticates with the authorization server. The client + includes the redirection URI used to obtain the authorization + code for verification. + + (E) The authorization server authenticates the client, validates the + authorization code, and ensures that the redirection URI + received matches the URI used to redirect the client in + step (C). If valid, the authorization server responds back with + an access token and, optionally, a refresh token. + + OAuth 2.0 public clients utilizing the Authorization Code Grant are + susceptible to the authorization code interception attack. + + A technique to mitigate against the threat through the use of Proof Key for Code + Exchange (PKCE, pronounced "pixy") is implemented in the current oauthlib + implementation. + + .. _`Authorization Code Grant`: https://tools.ietf.org/html/rfc6749#section-4.1 + .. _`PKCE`: https://tools.ietf.org/html/rfc7636 + """ + + default_response_mode = 'query' + response_types = ['code'] + + # This dict below is private because as RFC mention it: + # "S256" is Mandatory To Implement (MTI) on the server. + # + _code_challenge_methods = { + 'plain': code_challenge_method_plain, + 'S256': code_challenge_method_s256 + } + + def create_authorization_code(self, request): + """ + Generates an authorization grant represented as a dictionary. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + grant = {'code': common.generate_token()} + if hasattr(request, 'state') and request.state: + grant['state'] = request.state + log.debug('Created authorization code grant %r for request %r.', + grant, request) + return grant + + def create_authorization_response(self, request, token_handler): + """ + The client constructs the request URI by adding the following + parameters to the query component of the authorization endpoint URI + using the "application/x-www-form-urlencoded" format, per `Appendix B`_: + + response_type + REQUIRED. Value MUST be set to "code" for standard OAuth2 + authorization flow. For OpenID Connect it must be one of + "code token", "code id_token", or "code token id_token" - we + essentially test that "code" appears in the response_type. + client_id + REQUIRED. The client identifier as described in `Section 2.2`_. + redirect_uri + OPTIONAL. As described in `Section 3.1.2`_. + scope + OPTIONAL. The scope of the access request as described by + `Section 3.3`_. + state + RECOMMENDED. An opaque value used by the client to maintain + state between the request and callback. The authorization + server includes this value when redirecting the user-agent back + to the client. The parameter SHOULD be used for preventing + cross-site request forgery as described in `Section 10.12`_. + + The client directs the resource owner to the constructed URI using an + HTTP redirection response, or by other means available to it via the + user-agent. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param token_handler: A token handler instance, for example of type + oauthlib.oauth2.BearerToken. + :returns: headers, body, status + :raises: FatalClientError on invalid redirect URI or client id. + + A few examples:: + + >>> from your_validator import your_validator + >>> request = Request('https://example.com/authorize?client_id=valid' + ... '&redirect_uri=http%3A%2F%2Fclient.com%2F') + >>> from oauthlib.common import Request + >>> from oauthlib.oauth2 import AuthorizationCodeGrant, BearerToken + >>> token = BearerToken(your_validator) + >>> grant = AuthorizationCodeGrant(your_validator) + >>> request.scopes = ['authorized', 'in', 'some', 'form'] + >>> grant.create_authorization_response(request, token) + (u'http://client.com/?error=invalid_request&error_description=Missing+response_type+parameter.', None, None, 400) + >>> request = Request('https://example.com/authorize?client_id=valid' + ... '&redirect_uri=http%3A%2F%2Fclient.com%2F' + ... '&response_type=code') + >>> request.scopes = ['authorized', 'in', 'some', 'form'] + >>> grant.create_authorization_response(request, token) + (u'http://client.com/?code=u3F05aEObJuP2k7DordviIgW5wl52N', None, None, 200) + >>> # If the client id or redirect uri fails validation + >>> grant.create_authorization_response(request, token) + Traceback (most recent call last): + File "", line 1, in + File "oauthlib/oauth2/rfc6749/grant_types.py", line 515, in create_authorization_response + >>> grant.create_authorization_response(request, token) + File "oauthlib/oauth2/rfc6749/grant_types.py", line 591, in validate_authorization_request + oauthlib.oauth2.rfc6749.errors.InvalidClientIdError + + .. _`Appendix B`: https://tools.ietf.org/html/rfc6749#appendix-B + .. _`Section 2.2`: https://tools.ietf.org/html/rfc6749#section-2.2 + .. _`Section 3.1.2`: https://tools.ietf.org/html/rfc6749#section-3.1.2 + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + .. _`Section 10.12`: https://tools.ietf.org/html/rfc6749#section-10.12 + """ + try: + self.validate_authorization_request(request) + log.debug('Pre resource owner authorization validation ok for %r.', + request) + + # If the request fails due to a missing, invalid, or mismatching + # redirection URI, or if the client identifier is missing or invalid, + # the authorization server SHOULD inform the resource owner of the + # error and MUST NOT automatically redirect the user-agent to the + # invalid redirection URI. + except errors.FatalClientError as e: + log.debug('Fatal client error during validation of %r. %r.', + request, e) + raise + + # If the resource owner denies the access request or if the request + # fails for reasons other than a missing or invalid redirection URI, + # the authorization server informs the client by adding the following + # parameters to the query component of the redirection URI using the + # "application/x-www-form-urlencoded" format, per Appendix B: + # https://tools.ietf.org/html/rfc6749#appendix-B + except errors.OAuth2Error as e: + log.debug('Client error during validation of %r. %r.', request, e) + request.redirect_uri = request.redirect_uri or self.error_uri + redirect_uri = common.add_params_to_uri( + request.redirect_uri, e.twotuples, + fragment=request.response_mode == "fragment") + return {'Location': redirect_uri}, None, 302 + + grant = self.create_authorization_code(request) + for modifier in self._code_modifiers: + grant = modifier(grant, token_handler, request) + log.debug('Saving grant %r for %r.', grant, request) + self.request_validator.save_authorization_code( + request.client_id, grant, request) + return self.prepare_authorization_response( + request, grant, {}, None, 302) + + def create_token_response(self, request, token_handler): + """Validate the authorization code. + + The client MUST NOT use the authorization code more than once. If an + authorization code is used more than once, the authorization server + MUST deny the request and SHOULD revoke (when possible) all tokens + previously issued based on that authorization code. The authorization + code is bound to the client identifier and redirection URI. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param token_handler: A token handler instance, for example of type + oauthlib.oauth2.BearerToken. + + """ + headers = self._get_default_headers() + try: + self.validate_token_request(request) + log.debug('Token request validation ok for %r.', request) + except errors.OAuth2Error as e: + log.debug('Client error during validation of %r. %r.', request, e) + headers.update(e.headers) + return headers, e.json, e.status_code + + token = token_handler.create_token(request, refresh_token=self.refresh_token) + + for modifier in self._token_modifiers: + token = modifier(token, token_handler, request) + + self.request_validator.save_token(token, request) + self.request_validator.invalidate_authorization_code( + request.client_id, request.code, request) + return headers, json.dumps(token), 200 + + def validate_authorization_request(self, request): + """Check the authorization request for normal and fatal errors. + + A normal error could be a missing response_type parameter or the client + attempting to access scope it is not allowed to ask authorization for. + Normal errors can safely be included in the redirection URI and + sent back to the client. + + Fatal errors occur when the client_id or redirect_uri is invalid or + missing. These must be caught by the provider and handled, how this + is done is outside of the scope of OAuthLib but showing an error + page describing the issue is a good idea. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + + # First check for fatal errors + + # If the request fails due to a missing, invalid, or mismatching + # redirection URI, or if the client identifier is missing or invalid, + # the authorization server SHOULD inform the resource owner of the + # error and MUST NOT automatically redirect the user-agent to the + # invalid redirection URI. + + # First check duplicate parameters + for param in ('client_id', 'response_type', 'redirect_uri', 'scope', 'state'): + try: + duplicate_params = request.duplicate_params + except ValueError: + raise errors.InvalidRequestFatalError(description='Unable to parse query string', request=request) + if param in duplicate_params: + raise errors.InvalidRequestFatalError(description='Duplicate %s parameter.' % param, request=request) + + # REQUIRED. The client identifier as described in Section 2.2. + # https://tools.ietf.org/html/rfc6749#section-2.2 + if not request.client_id: + raise errors.MissingClientIdError(request=request) + + if not self.request_validator.validate_client_id(request.client_id, request): + raise errors.InvalidClientIdError(request=request) + + # OPTIONAL. As described in Section 3.1.2. + # https://tools.ietf.org/html/rfc6749#section-3.1.2 + log.debug('Validating redirection uri %s for client %s.', + request.redirect_uri, request.client_id) + + # OPTIONAL. As described in Section 3.1.2. + # https://tools.ietf.org/html/rfc6749#section-3.1.2 + self._handle_redirects(request) + + # Then check for normal errors. + + # If the resource owner denies the access request or if the request + # fails for reasons other than a missing or invalid redirection URI, + # the authorization server informs the client by adding the following + # parameters to the query component of the redirection URI using the + # "application/x-www-form-urlencoded" format, per Appendix B. + # https://tools.ietf.org/html/rfc6749#appendix-B + + # Note that the correct parameters to be added are automatically + # populated through the use of specific exceptions. + + request_info = {} + for validator in self.custom_validators.pre_auth: + request_info.update(validator(request)) + + # REQUIRED. + if request.response_type is None: + raise errors.MissingResponseTypeError(request=request) + # Value MUST be set to "code" or one of the OpenID authorization code including + # response_types "code token", "code id_token", "code token id_token" + elif not 'code' in request.response_type and request.response_type != 'none': + raise errors.UnsupportedResponseTypeError(request=request) + + if not self.request_validator.validate_response_type(request.client_id, + request.response_type, + request.client, request): + + log.debug('Client %s is not authorized to use response_type %s.', + request.client_id, request.response_type) + raise errors.UnauthorizedClientError(request=request) + + # OPTIONAL. Validate PKCE request or reply with "error"/"invalid_request" + # https://tools.ietf.org/html/rfc6749#section-4.4.1 + if self.request_validator.is_pkce_required(request.client_id, request) is True: + if request.code_challenge is None: + raise errors.MissingCodeChallengeError(request=request) + + if request.code_challenge is not None: + request_info["code_challenge"] = request.code_challenge + + # OPTIONAL, defaults to "plain" if not present in the request. + if request.code_challenge_method is None: + request.code_challenge_method = "plain" + + if request.code_challenge_method not in self._code_challenge_methods: + raise errors.UnsupportedCodeChallengeMethodError(request=request) + request_info["code_challenge_method"] = request.code_challenge_method + + # OPTIONAL. The scope of the access request as described by Section 3.3 + # https://tools.ietf.org/html/rfc6749#section-3.3 + self.validate_scopes(request) + + request_info.update({ + 'client_id': request.client_id, + 'redirect_uri': request.redirect_uri, + 'response_type': request.response_type, + 'state': request.state, + 'request': request + }) + + for validator in self.custom_validators.post_auth: + request_info.update(validator(request)) + + return request.scopes, request_info + + def validate_token_request(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + # REQUIRED. Value MUST be set to "authorization_code". + if request.grant_type not in ('authorization_code', 'openid'): + raise errors.UnsupportedGrantTypeError(request=request) + + for validator in self.custom_validators.pre_token: + validator(request) + + if request.code is None: + raise errors.InvalidRequestError( + description='Missing code parameter.', request=request) + + for param in ('client_id', 'grant_type', 'redirect_uri'): + if param in request.duplicate_params: + raise errors.InvalidRequestError(description='Duplicate %s parameter.' % param, + request=request) + + if self.request_validator.client_authentication_required(request): + # If the client type is confidential or the client was issued client + # credentials (or assigned other authentication requirements), the + # client MUST authenticate with the authorization server as described + # in Section 3.2.1. + # https://tools.ietf.org/html/rfc6749#section-3.2.1 + if not self.request_validator.authenticate_client(request): + log.debug('Client authentication failed, %r.', request) + raise errors.InvalidClientError(request=request) + elif not self.request_validator.authenticate_client_id(request.client_id, request): + # REQUIRED, if the client is not authenticating with the + # authorization server as described in Section 3.2.1. + # https://tools.ietf.org/html/rfc6749#section-3.2.1 + log.debug('Client authentication failed, %r.', request) + raise errors.InvalidClientError(request=request) + + if not hasattr(request.client, 'client_id'): + raise NotImplementedError('Authenticate client must set the ' + 'request.client.client_id attribute ' + 'in authenticate_client.') + + request.client_id = request.client_id or request.client.client_id + + # Ensure client is authorized use of this grant type + self.validate_grant_type(request) + + # REQUIRED. The authorization code received from the + # authorization server. + if not self.request_validator.validate_code(request.client_id, + request.code, request.client, request): + log.debug('Client, %r (%r), is not allowed access to scopes %r.', + request.client_id, request.client, request.scopes) + raise errors.InvalidGrantError(request=request) + + # OPTIONAL. Validate PKCE code_verifier + challenge = self.request_validator.get_code_challenge(request.code, request) + + if challenge is not None: + if request.code_verifier is None: + raise errors.MissingCodeVerifierError(request=request) + + challenge_method = self.request_validator.get_code_challenge_method(request.code, request) + if challenge_method is None: + raise errors.InvalidGrantError(request=request, description="Challenge method not found") + + if challenge_method not in self._code_challenge_methods: + raise errors.ServerError( + description="code_challenge_method {} is not supported.".format(challenge_method), + request=request + ) + + if not self.validate_code_challenge(challenge, + challenge_method, + request.code_verifier): + log.debug('request provided a invalid code_verifier.') + raise errors.InvalidGrantError(request=request) + elif self.request_validator.is_pkce_required(request.client_id, request) is True: + if request.code_verifier is None: + raise errors.MissingCodeVerifierError(request=request) + raise errors.InvalidGrantError(request=request, description="Challenge not found") + + for attr in ('user', 'scopes'): + if getattr(request, attr, None) is None: + log.debug('request.%s was not set on code validation.', attr) + + # REQUIRED, if the "redirect_uri" parameter was included in the + # authorization request as described in Section 4.1.1, and their + # values MUST be identical. + if request.redirect_uri is None: + request.using_default_redirect_uri = True + request.redirect_uri = self.request_validator.get_default_redirect_uri( + request.client_id, request) + log.debug('Using default redirect_uri %s.', request.redirect_uri) + if not request.redirect_uri: + raise errors.MissingRedirectURIError(request=request) + else: + request.using_default_redirect_uri = False + log.debug('Using provided redirect_uri %s', request.redirect_uri) + + if not self.request_validator.confirm_redirect_uri(request.client_id, request.code, + request.redirect_uri, request.client, + request): + log.debug('Redirect_uri (%r) invalid for client %r (%r).', + request.redirect_uri, request.client_id, request.client) + raise errors.MismatchingRedirectURIError(request=request) + + for validator in self.custom_validators.post_token: + validator(request) + + def validate_code_challenge(self, challenge, challenge_method, verifier): + if challenge_method in self._code_challenge_methods: + return self._code_challenge_methods[challenge_method](verifier, challenge) + raise NotImplementedError('Unknown challenge_method %s' % challenge_method) diff --git a/lib/oauthlib/oauth2/rfc6749/grant_types/base.py b/lib/oauthlib/oauth2/rfc6749/grant_types/base.py new file mode 100644 index 00000000..a64f168c --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/grant_types/base.py @@ -0,0 +1,250 @@ +""" +oauthlib.oauth2.rfc6749.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import logging +from itertools import chain + +from oauthlib.common import add_params_to_uri +from oauthlib.oauth2.rfc6749 import errors, utils +from oauthlib.uri_validate import is_absolute_uri + +from ..request_validator import RequestValidator + +log = logging.getLogger(__name__) + + +class ValidatorsContainer: + """ + Container object for holding custom validator callables to be invoked + as part of the grant type `validate_authorization_request()` or + `validate_authorization_request()` methods on the various grant types. + + Authorization validators must be callables that take a request object and + return a dict, which may contain items to be added to the `request_info` + returned from the grant_type after validation. + + Token validators must be callables that take a request object and + return None. + + Both authorization validators and token validators may raise OAuth2 + exceptions if validation conditions fail. + + Authorization validators added to `pre_auth` will be run BEFORE + the standard validations (but after the critical ones that raise + fatal errors) as part of `validate_authorization_request()` + + Authorization validators added to `post_auth` will be run AFTER + the standard validations as part of `validate_authorization_request()` + + Token validators added to `pre_token` will be run BEFORE + the standard validations as part of `validate_token_request()` + + Token validators added to `post_token` will be run AFTER + the standard validations as part of `validate_token_request()` + + For example: + + >>> def my_auth_validator(request): + ... return {'myval': True} + >>> auth_code_grant = AuthorizationCodeGrant(request_validator) + >>> auth_code_grant.custom_validators.pre_auth.append(my_auth_validator) + >>> def my_token_validator(request): + ... if not request.everything_okay: + ... raise errors.OAuth2Error("uh-oh") + >>> auth_code_grant.custom_validators.post_token.append(my_token_validator) + """ + + def __init__(self, post_auth, post_token, + pre_auth, pre_token): + self.pre_auth = pre_auth + self.post_auth = post_auth + self.pre_token = pre_token + self.post_token = post_token + + @property + def all_pre(self): + return chain(self.pre_auth, self.pre_token) + + @property + def all_post(self): + return chain(self.post_auth, self.post_token) + + +class GrantTypeBase: + error_uri = None + request_validator = None + default_response_mode = 'fragment' + refresh_token = True + response_types = ['code'] + + def __init__(self, request_validator=None, **kwargs): + self.request_validator = request_validator or RequestValidator() + + # Transforms class variables into instance variables: + self.response_types = self.response_types + self.refresh_token = self.refresh_token + self._setup_custom_validators(kwargs) + self._code_modifiers = [] + self._token_modifiers = [] + + for kw, val in kwargs.items(): + setattr(self, kw, val) + + def _setup_custom_validators(self, kwargs): + post_auth = kwargs.get('post_auth', []) + post_token = kwargs.get('post_token', []) + pre_auth = kwargs.get('pre_auth', []) + pre_token = kwargs.get('pre_token', []) + if not hasattr(self, 'validate_authorization_request'): + if post_auth or pre_auth: + msg = ("{} does not support authorization validators. Use " + "token validators instead.").format(self.__class__.__name__) + raise ValueError(msg) + # Using tuples here because they can't be appended to: + post_auth, pre_auth = (), () + self.custom_validators = ValidatorsContainer(post_auth, post_token, + pre_auth, pre_token) + + def register_response_type(self, response_type): + self.response_types.append(response_type) + + def register_code_modifier(self, modifier): + self._code_modifiers.append(modifier) + + def register_token_modifier(self, modifier): + self._token_modifiers.append(modifier) + + def create_authorization_response(self, request, token_handler): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param token_handler: A token handler instance, for example of type + oauthlib.oauth2.BearerToken. + """ + raise NotImplementedError('Subclasses must implement this method.') + + def create_token_response(self, request, token_handler): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param token_handler: A token handler instance, for example of type + oauthlib.oauth2.BearerToken. + """ + raise NotImplementedError('Subclasses must implement this method.') + + def add_token(self, token, token_handler, request): + """ + :param token: + :param token_handler: A token handler instance, for example of type + oauthlib.oauth2.BearerToken. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + # Only add a hybrid access token on auth step if asked for + if not request.response_type in ["token", "code token", "id_token token", "code id_token token"]: + return token + + token.update(token_handler.create_token(request, refresh_token=False)) + return token + + def validate_grant_type(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + client_id = getattr(request, 'client_id', None) + if not self.request_validator.validate_grant_type(client_id, + request.grant_type, request.client, request): + log.debug('Unauthorized from %r (%r) access to grant type %s.', + request.client_id, request.client, request.grant_type) + raise errors.UnauthorizedClientError(request=request) + + def validate_scopes(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + if not request.scopes: + request.scopes = utils.scope_to_list(request.scope) or utils.scope_to_list( + self.request_validator.get_default_scopes(request.client_id, request)) + log.debug('Validating access to scopes %r for client %r (%r).', + request.scopes, request.client_id, request.client) + if not self.request_validator.validate_scopes(request.client_id, + request.scopes, request.client, request): + raise errors.InvalidScopeError(request=request) + + def prepare_authorization_response(self, request, token, headers, body, status): + """Place token according to response mode. + + Base classes can define a default response mode for their authorization + response by overriding the static `default_response_mode` member. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param token: + :param headers: + :param body: + :param status: + """ + request.response_mode = request.response_mode or self.default_response_mode + + if request.response_mode not in ('query', 'fragment'): + log.debug('Overriding invalid response mode %s with %s', + request.response_mode, self.default_response_mode) + request.response_mode = self.default_response_mode + + token_items = token.items() + + if request.response_type == 'none': + state = token.get('state', None) + if state: + token_items = [('state', state)] + else: + token_items = [] + + if request.response_mode == 'query': + headers['Location'] = add_params_to_uri( + request.redirect_uri, token_items, fragment=False) + return headers, body, status + + if request.response_mode == 'fragment': + headers['Location'] = add_params_to_uri( + request.redirect_uri, token_items, fragment=True) + return headers, body, status + + raise NotImplementedError( + 'Subclasses must set a valid default_response_mode') + + def _get_default_headers(self): + """Create default headers for grant responses.""" + return { + 'Content-Type': 'application/json', + 'Cache-Control': 'no-store', + 'Pragma': 'no-cache', + } + + def _handle_redirects(self, request): + if request.redirect_uri is not None: + request.using_default_redirect_uri = False + log.debug('Using provided redirect_uri %s', request.redirect_uri) + if not is_absolute_uri(request.redirect_uri): + raise errors.InvalidRedirectURIError(request=request) + + # The authorization server MUST verify that the redirection URI + # to which it will redirect the access token matches a + # redirection URI registered by the client as described in + # Section 3.1.2. + # https://tools.ietf.org/html/rfc6749#section-3.1.2 + if not self.request_validator.validate_redirect_uri( + request.client_id, request.redirect_uri, request): + raise errors.MismatchingRedirectURIError(request=request) + else: + request.redirect_uri = self.request_validator.get_default_redirect_uri( + request.client_id, request) + request.using_default_redirect_uri = True + log.debug('Using default redirect_uri %s.', request.redirect_uri) + if not request.redirect_uri: + raise errors.MissingRedirectURIError(request=request) + if not is_absolute_uri(request.redirect_uri): + raise errors.InvalidRedirectURIError(request=request) diff --git a/lib/oauthlib/oauth2/rfc6749/grant_types/client_credentials.py b/lib/oauthlib/oauth2/rfc6749/grant_types/client_credentials.py new file mode 100644 index 00000000..e7b46189 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/grant_types/client_credentials.py @@ -0,0 +1,123 @@ +""" +oauthlib.oauth2.rfc6749.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import json +import logging + +from .. import errors +from .base import GrantTypeBase + +log = logging.getLogger(__name__) + + +class ClientCredentialsGrant(GrantTypeBase): + + """`Client Credentials Grant`_ + + The client can request an access token using only its client + credentials (or other supported means of authentication) when the + client is requesting access to the protected resources under its + control, or those of another resource owner that have been previously + arranged with the authorization server (the method of which is beyond + the scope of this specification). + + The client credentials grant type MUST only be used by confidential + clients:: + + +---------+ +---------------+ + : : : : + : :>-- A - Client Authentication --->: Authorization : + : Client : : Server : + : :<-- B ---- Access Token ---------<: : + : : : : + +---------+ +---------------+ + + Figure 6: Client Credentials Flow + + The flow illustrated in Figure 6 includes the following steps: + + (A) The client authenticates with the authorization server and + requests an access token from the token endpoint. + + (B) The authorization server authenticates the client, and if valid, + issues an access token. + + .. _`Client Credentials Grant`: https://tools.ietf.org/html/rfc6749#section-4.4 + """ + + def create_token_response(self, request, token_handler): + """Return token or error in JSON format. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param token_handler: A token handler instance, for example of type + oauthlib.oauth2.BearerToken. + + If the access token request is valid and authorized, the + authorization server issues an access token as described in + `Section 5.1`_. A refresh token SHOULD NOT be included. If the request + failed client authentication or is invalid, the authorization server + returns an error response as described in `Section 5.2`_. + + .. _`Section 5.1`: https://tools.ietf.org/html/rfc6749#section-5.1 + .. _`Section 5.2`: https://tools.ietf.org/html/rfc6749#section-5.2 + """ + headers = self._get_default_headers() + try: + log.debug('Validating access token request, %r.', request) + self.validate_token_request(request) + except errors.OAuth2Error as e: + log.debug('Client error in token request. %s.', e) + headers.update(e.headers) + return headers, e.json, e.status_code + + token = token_handler.create_token(request, refresh_token=False) + + for modifier in self._token_modifiers: + token = modifier(token) + + self.request_validator.save_token(token, request) + + log.debug('Issuing token to client id %r (%r), %r.', + request.client_id, request.client, token) + return headers, json.dumps(token), 200 + + def validate_token_request(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + for validator in self.custom_validators.pre_token: + validator(request) + + if not getattr(request, 'grant_type', None): + raise errors.InvalidRequestError('Request is missing grant type.', + request=request) + + if not request.grant_type == 'client_credentials': + raise errors.UnsupportedGrantTypeError(request=request) + + for param in ('grant_type', 'scope'): + if param in request.duplicate_params: + raise errors.InvalidRequestError(description='Duplicate %s parameter.' % param, + request=request) + + log.debug('Authenticating client, %r.', request) + if not self.request_validator.authenticate_client(request): + log.debug('Client authentication failed, %r.', request) + raise errors.InvalidClientError(request=request) + else: + if not hasattr(request.client, 'client_id'): + raise NotImplementedError('Authenticate client must set the ' + 'request.client.client_id attribute ' + 'in authenticate_client.') + # Ensure client is authorized use of this grant type + self.validate_grant_type(request) + + request.client_id = request.client_id or request.client.client_id + log.debug('Authorizing access to client %r.', request.client_id) + self.validate_scopes(request) + + for validator in self.custom_validators.post_token: + validator(request) diff --git a/lib/oauthlib/oauth2/rfc6749/grant_types/implicit.py b/lib/oauthlib/oauth2/rfc6749/grant_types/implicit.py new file mode 100644 index 00000000..6110b6f3 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/grant_types/implicit.py @@ -0,0 +1,376 @@ +""" +oauthlib.oauth2.rfc6749.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import logging + +from oauthlib import common + +from .. import errors +from .base import GrantTypeBase + +log = logging.getLogger(__name__) + + +class ImplicitGrant(GrantTypeBase): + + """`Implicit Grant`_ + + The implicit grant type is used to obtain access tokens (it does not + support the issuance of refresh tokens) and is optimized for public + clients known to operate a particular redirection URI. These clients + are typically implemented in a browser using a scripting language + such as JavaScript. + + Unlike the authorization code grant type, in which the client makes + separate requests for authorization and for an access token, the + client receives the access token as the result of the authorization + request. + + The implicit grant type does not include client authentication, and + relies on the presence of the resource owner and the registration of + the redirection URI. Because the access token is encoded into the + redirection URI, it may be exposed to the resource owner and other + applications residing on the same device:: + + +----------+ + | Resource | + | Owner | + | | + +----------+ + ^ + | + (B) + +----|-----+ Client Identifier +---------------+ + | -+----(A)-- & Redirection URI --->| | + | User- | | Authorization | + | Agent -|----(B)-- User authenticates -->| Server | + | | | | + | |<---(C)--- Redirection URI ----<| | + | | with Access Token +---------------+ + | | in Fragment + | | +---------------+ + | |----(D)--- Redirection URI ---->| Web-Hosted | + | | without Fragment | Client | + | | | Resource | + | (F) |<---(E)------- Script ---------<| | + | | +---------------+ + +-|--------+ + | | + (A) (G) Access Token + | | + ^ v + +---------+ + | | + | Client | + | | + +---------+ + + Note: The lines illustrating steps (A) and (B) are broken into two + parts as they pass through the user-agent. + + Figure 4: Implicit Grant Flow + + The flow illustrated in Figure 4 includes the following steps: + + (A) The client initiates the flow by directing the resource owner's + user-agent to the authorization endpoint. The client includes + its client identifier, requested scope, local state, and a + redirection URI to which the authorization server will send the + user-agent back once access is granted (or denied). + + (B) The authorization server authenticates the resource owner (via + the user-agent) and establishes whether the resource owner + grants or denies the client's access request. + + (C) Assuming the resource owner grants access, the authorization + server redirects the user-agent back to the client using the + redirection URI provided earlier. The redirection URI includes + the access token in the URI fragment. + + (D) The user-agent follows the redirection instructions by making a + request to the web-hosted client resource (which does not + include the fragment per [RFC2616]). The user-agent retains the + fragment information locally. + + (E) The web-hosted client resource returns a web page (typically an + HTML document with an embedded script) capable of accessing the + full redirection URI including the fragment retained by the + user-agent, and extracting the access token (and other + parameters) contained in the fragment. + + (F) The user-agent executes the script provided by the web-hosted + client resource locally, which extracts the access token. + + (G) The user-agent passes the access token to the client. + + See `Section 10.3`_ and `Section 10.16`_ for important security considerations + when using the implicit grant. + + .. _`Implicit Grant`: https://tools.ietf.org/html/rfc6749#section-4.2 + .. _`Section 10.3`: https://tools.ietf.org/html/rfc6749#section-10.3 + .. _`Section 10.16`: https://tools.ietf.org/html/rfc6749#section-10.16 + """ + + response_types = ['token'] + grant_allows_refresh_token = False + + def create_authorization_response(self, request, token_handler): + """Create an authorization response. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param token_handler: A token handler instance, for example of type + oauthlib.oauth2.BearerToken. + + The client constructs the request URI by adding the following + parameters to the query component of the authorization endpoint URI + using the "application/x-www-form-urlencoded" format, per `Appendix B`_: + + response_type + REQUIRED. Value MUST be set to "token" for standard OAuth2 implicit flow + or "id_token token" or just "id_token" for OIDC implicit flow + + client_id + REQUIRED. The client identifier as described in `Section 2.2`_. + + redirect_uri + OPTIONAL. As described in `Section 3.1.2`_. + + scope + OPTIONAL. The scope of the access request as described by + `Section 3.3`_. + + state + RECOMMENDED. An opaque value used by the client to maintain + state between the request and callback. The authorization + server includes this value when redirecting the user-agent back + to the client. The parameter SHOULD be used for preventing + cross-site request forgery as described in `Section 10.12`_. + + The authorization server validates the request to ensure that all + required parameters are present and valid. The authorization server + MUST verify that the redirection URI to which it will redirect the + access token matches a redirection URI registered by the client as + described in `Section 3.1.2`_. + + .. _`Section 2.2`: https://tools.ietf.org/html/rfc6749#section-2.2 + .. _`Section 3.1.2`: https://tools.ietf.org/html/rfc6749#section-3.1.2 + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + .. _`Section 10.12`: https://tools.ietf.org/html/rfc6749#section-10.12 + .. _`Appendix B`: https://tools.ietf.org/html/rfc6749#appendix-B + """ + return self.create_token_response(request, token_handler) + + def create_token_response(self, request, token_handler): + """Return token or error embedded in the URI fragment. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param token_handler: A token handler instance, for example of type + oauthlib.oauth2.BearerToken. + + If the resource owner grants the access request, the authorization + server issues an access token and delivers it to the client by adding + the following parameters to the fragment component of the redirection + URI using the "application/x-www-form-urlencoded" format, per + `Appendix B`_: + + access_token + REQUIRED. The access token issued by the authorization server. + + token_type + REQUIRED. The type of the token issued as described in + `Section 7.1`_. Value is case insensitive. + + expires_in + RECOMMENDED. The lifetime in seconds of the access token. For + example, the value "3600" denotes that the access token will + expire in one hour from the time the response was generated. + If omitted, the authorization server SHOULD provide the + expiration time via other means or document the default value. + + scope + OPTIONAL, if identical to the scope requested by the client; + otherwise, REQUIRED. The scope of the access token as + described by `Section 3.3`_. + + state + REQUIRED if the "state" parameter was present in the client + authorization request. The exact value received from the + client. + + The authorization server MUST NOT issue a refresh token. + + .. _`Appendix B`: https://tools.ietf.org/html/rfc6749#appendix-B + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + .. _`Section 7.1`: https://tools.ietf.org/html/rfc6749#section-7.1 + """ + try: + self.validate_token_request(request) + + # If the request fails due to a missing, invalid, or mismatching + # redirection URI, or if the client identifier is missing or invalid, + # the authorization server SHOULD inform the resource owner of the + # error and MUST NOT automatically redirect the user-agent to the + # invalid redirection URI. + except errors.FatalClientError as e: + log.debug('Fatal client error during validation of %r. %r.', + request, e) + raise + + # If the resource owner denies the access request or if the request + # fails for reasons other than a missing or invalid redirection URI, + # the authorization server informs the client by adding the following + # parameters to the fragment component of the redirection URI using the + # "application/x-www-form-urlencoded" format, per Appendix B: + # https://tools.ietf.org/html/rfc6749#appendix-B + except errors.OAuth2Error as e: + log.debug('Client error during validation of %r. %r.', request, e) + return {'Location': common.add_params_to_uri(request.redirect_uri, e.twotuples, + fragment=True)}, None, 302 + + # In OIDC implicit flow it is possible to have a request_type that does not include the access_token! + # "id_token token" - return the access token and the id token + # "id_token" - don't return the access token + if "token" in request.response_type.split(): + token = token_handler.create_token(request, refresh_token=False) + else: + token = {} + + if request.state is not None: + token['state'] = request.state + + for modifier in self._token_modifiers: + token = modifier(token, token_handler, request) + + # In OIDC implicit flow it is possible to have a request_type that does + # not include the access_token! In this case there is no need to save a token. + if "token" in request.response_type.split(): + self.request_validator.save_token(token, request) + + return self.prepare_authorization_response( + request, token, {}, None, 302) + + def validate_authorization_request(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + return self.validate_token_request(request) + + def validate_token_request(self, request): + """Check the token request for normal and fatal errors. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + + This method is very similar to validate_authorization_request in + the AuthorizationCodeGrant but differ in a few subtle areas. + + A normal error could be a missing response_type parameter or the client + attempting to access scope it is not allowed to ask authorization for. + Normal errors can safely be included in the redirection URI and + sent back to the client. + + Fatal errors occur when the client_id or redirect_uri is invalid or + missing. These must be caught by the provider and handled, how this + is done is outside of the scope of OAuthLib but showing an error + page describing the issue is a good idea. + """ + + # First check for fatal errors + + # If the request fails due to a missing, invalid, or mismatching + # redirection URI, or if the client identifier is missing or invalid, + # the authorization server SHOULD inform the resource owner of the + # error and MUST NOT automatically redirect the user-agent to the + # invalid redirection URI. + + # First check duplicate parameters + for param in ('client_id', 'response_type', 'redirect_uri', 'scope', 'state'): + try: + duplicate_params = request.duplicate_params + except ValueError: + raise errors.InvalidRequestFatalError(description='Unable to parse query string', request=request) + if param in duplicate_params: + raise errors.InvalidRequestFatalError(description='Duplicate %s parameter.' % param, request=request) + + # REQUIRED. The client identifier as described in Section 2.2. + # https://tools.ietf.org/html/rfc6749#section-2.2 + if not request.client_id: + raise errors.MissingClientIdError(request=request) + + if not self.request_validator.validate_client_id(request.client_id, request): + raise errors.InvalidClientIdError(request=request) + + # OPTIONAL. As described in Section 3.1.2. + # https://tools.ietf.org/html/rfc6749#section-3.1.2 + self._handle_redirects(request) + + # Then check for normal errors. + + request_info = self._run_custom_validators(request, + self.custom_validators.all_pre) + + # If the resource owner denies the access request or if the request + # fails for reasons other than a missing or invalid redirection URI, + # the authorization server informs the client by adding the following + # parameters to the fragment component of the redirection URI using the + # "application/x-www-form-urlencoded" format, per Appendix B. + # https://tools.ietf.org/html/rfc6749#appendix-B + + # Note that the correct parameters to be added are automatically + # populated through the use of specific exceptions + + # REQUIRED. + if request.response_type is None: + raise errors.MissingResponseTypeError(request=request) + # Value MUST be one of our registered types: "token" by default or if using OIDC "id_token" or "id_token token" + elif not set(request.response_type.split()).issubset(self.response_types): + raise errors.UnsupportedResponseTypeError(request=request) + + log.debug('Validating use of response_type token for client %r (%r).', + request.client_id, request.client) + if not self.request_validator.validate_response_type(request.client_id, + request.response_type, + request.client, request): + + log.debug('Client %s is not authorized to use response_type %s.', + request.client_id, request.response_type) + raise errors.UnauthorizedClientError(request=request) + + # OPTIONAL. The scope of the access request as described by Section 3.3 + # https://tools.ietf.org/html/rfc6749#section-3.3 + self.validate_scopes(request) + + request_info.update({ + 'client_id': request.client_id, + 'redirect_uri': request.redirect_uri, + 'response_type': request.response_type, + 'state': request.state, + 'request': request, + }) + + request_info = self._run_custom_validators( + request, + self.custom_validators.all_post, + request_info + ) + + return request.scopes, request_info + + def _run_custom_validators(self, + request, + validations, + request_info=None): + # Make a copy so we don't modify the existing request_info dict + request_info = {} if request_info is None else request_info.copy() + # For implicit grant, auth_validators and token_validators are + # basically equivalent since the token is returned from the + # authorization endpoint. + for validator in validations: + result = validator(request) + if result is not None: + request_info.update(result) + return request_info diff --git a/lib/oauthlib/oauth2/rfc6749/grant_types/refresh_token.py b/lib/oauthlib/oauth2/rfc6749/grant_types/refresh_token.py new file mode 100644 index 00000000..8698a3d5 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/grant_types/refresh_token.py @@ -0,0 +1,135 @@ +""" +oauthlib.oauth2.rfc6749.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import json +import logging + +from .. import errors, utils +from .base import GrantTypeBase + +log = logging.getLogger(__name__) + + +class RefreshTokenGrant(GrantTypeBase): + + """`Refresh token grant`_ + + .. _`Refresh token grant`: https://tools.ietf.org/html/rfc6749#section-6 + """ + + def __init__(self, request_validator=None, + issue_new_refresh_tokens=True, + **kwargs): + super().__init__( + request_validator, + issue_new_refresh_tokens=issue_new_refresh_tokens, + **kwargs) + + def create_token_response(self, request, token_handler): + """Create a new access token from a refresh_token. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param token_handler: A token handler instance, for example of type + oauthlib.oauth2.BearerToken. + + If valid and authorized, the authorization server issues an access + token as described in `Section 5.1`_. If the request failed + verification or is invalid, the authorization server returns an error + response as described in `Section 5.2`_. + + The authorization server MAY issue a new refresh token, in which case + the client MUST discard the old refresh token and replace it with the + new refresh token. The authorization server MAY revoke the old + refresh token after issuing a new refresh token to the client. If a + new refresh token is issued, the refresh token scope MUST be + identical to that of the refresh token included by the client in the + request. + + .. _`Section 5.1`: https://tools.ietf.org/html/rfc6749#section-5.1 + .. _`Section 5.2`: https://tools.ietf.org/html/rfc6749#section-5.2 + """ + headers = self._get_default_headers() + try: + log.debug('Validating refresh token request, %r.', request) + self.validate_token_request(request) + except errors.OAuth2Error as e: + log.debug('Client error in token request, %s.', e) + headers.update(e.headers) + return headers, e.json, e.status_code + + token = token_handler.create_token(request, + refresh_token=self.issue_new_refresh_tokens) + + for modifier in self._token_modifiers: + token = modifier(token) + + self.request_validator.save_token(token, request) + + log.debug('Issuing new token to client id %r (%r), %r.', + request.client_id, request.client, token) + return headers, json.dumps(token), 200 + + def validate_token_request(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + # REQUIRED. Value MUST be set to "refresh_token". + if request.grant_type != 'refresh_token': + raise errors.UnsupportedGrantTypeError(request=request) + + for validator in self.custom_validators.pre_token: + validator(request) + + if request.refresh_token is None: + raise errors.InvalidRequestError( + description='Missing refresh token parameter.', + request=request) + + # Because refresh tokens are typically long-lasting credentials used to + # request additional access tokens, the refresh token is bound to the + # client to which it was issued. If the client type is confidential or + # the client was issued client credentials (or assigned other + # authentication requirements), the client MUST authenticate with the + # authorization server as described in Section 3.2.1. + # https://tools.ietf.org/html/rfc6749#section-3.2.1 + if self.request_validator.client_authentication_required(request): + log.debug('Authenticating client, %r.', request) + if not self.request_validator.authenticate_client(request): + log.debug('Invalid client (%r), denying access.', request) + raise errors.InvalidClientError(request=request) + elif not self.request_validator.authenticate_client_id(request.client_id, request): + log.debug('Client authentication failed, %r.', request) + raise errors.InvalidClientError(request=request) + + # Ensure client is authorized use of this grant type + self.validate_grant_type(request) + + # REQUIRED. The refresh token issued to the client. + log.debug('Validating refresh token %s for client %r.', + request.refresh_token, request.client) + if not self.request_validator.validate_refresh_token( + request.refresh_token, request.client, request): + log.debug('Invalid refresh token, %s, for client %r.', + request.refresh_token, request.client) + raise errors.InvalidGrantError(request=request) + + original_scopes = utils.scope_to_list( + self.request_validator.get_original_scopes( + request.refresh_token, request)) + + if request.scope: + request.scopes = utils.scope_to_list(request.scope) + if (not all(s in original_scopes for s in request.scopes) + and not self.request_validator.is_within_original_scope( + request.scopes, request.refresh_token, request)): + log.debug('Refresh token %s lack requested scopes, %r.', + request.refresh_token, request.scopes) + raise errors.InvalidScopeError(request=request) + else: + request.scopes = original_scopes + + for validator in self.custom_validators.post_token: + validator(request) diff --git a/lib/oauthlib/oauth2/rfc6749/grant_types/resource_owner_password_credentials.py b/lib/oauthlib/oauth2/rfc6749/grant_types/resource_owner_password_credentials.py new file mode 100644 index 00000000..4b0de5bf --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/grant_types/resource_owner_password_credentials.py @@ -0,0 +1,199 @@ +""" +oauthlib.oauth2.rfc6749.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import json +import logging + +from .. import errors +from .base import GrantTypeBase + +log = logging.getLogger(__name__) + + +class ResourceOwnerPasswordCredentialsGrant(GrantTypeBase): + + """`Resource Owner Password Credentials Grant`_ + + The resource owner password credentials grant type is suitable in + cases where the resource owner has a trust relationship with the + client, such as the device operating system or a highly privileged + application. The authorization server should take special care when + enabling this grant type and only allow it when other flows are not + viable. + + This grant type is suitable for clients capable of obtaining the + resource owner's credentials (username and password, typically using + an interactive form). It is also used to migrate existing clients + using direct authentication schemes such as HTTP Basic or Digest + authentication to OAuth by converting the stored credentials to an + access token:: + + +----------+ + | Resource | + | Owner | + | | + +----------+ + v + | Resource Owner + (A) Password Credentials + | + v + +---------+ +---------------+ + | |>--(B)---- Resource Owner ------->| | + | | Password Credentials | Authorization | + | Client | | Server | + | |<--(C)---- Access Token ---------<| | + | | (w/ Optional Refresh Token) | | + +---------+ +---------------+ + + Figure 5: Resource Owner Password Credentials Flow + + The flow illustrated in Figure 5 includes the following steps: + + (A) The resource owner provides the client with its username and + password. + + (B) The client requests an access token from the authorization + server's token endpoint by including the credentials received + from the resource owner. When making the request, the client + authenticates with the authorization server. + + (C) The authorization server authenticates the client and validates + the resource owner credentials, and if valid, issues an access + token. + + .. _`Resource Owner Password Credentials Grant`: https://tools.ietf.org/html/rfc6749#section-4.3 + """ + + def create_token_response(self, request, token_handler): + """Return token or error in json format. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param token_handler: A token handler instance, for example of type + oauthlib.oauth2.BearerToken. + + If the access token request is valid and authorized, the + authorization server issues an access token and optional refresh + token as described in `Section 5.1`_. If the request failed client + authentication or is invalid, the authorization server returns an + error response as described in `Section 5.2`_. + + .. _`Section 5.1`: https://tools.ietf.org/html/rfc6749#section-5.1 + .. _`Section 5.2`: https://tools.ietf.org/html/rfc6749#section-5.2 + """ + headers = self._get_default_headers() + try: + if self.request_validator.client_authentication_required(request): + log.debug('Authenticating client, %r.', request) + if not self.request_validator.authenticate_client(request): + log.debug('Client authentication failed, %r.', request) + raise errors.InvalidClientError(request=request) + elif not self.request_validator.authenticate_client_id(request.client_id, request): + log.debug('Client authentication failed, %r.', request) + raise errors.InvalidClientError(request=request) + log.debug('Validating access token request, %r.', request) + self.validate_token_request(request) + except errors.OAuth2Error as e: + log.debug('Client error in token request, %s.', e) + headers.update(e.headers) + return headers, e.json, e.status_code + + token = token_handler.create_token(request, self.refresh_token) + + for modifier in self._token_modifiers: + token = modifier(token) + + self.request_validator.save_token(token, request) + + log.debug('Issuing token %r to client id %r (%r) and username %s.', + token, request.client_id, request.client, request.username) + return headers, json.dumps(token), 200 + + def validate_token_request(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + + The client makes a request to the token endpoint by adding the + following parameters using the "application/x-www-form-urlencoded" + format per Appendix B with a character encoding of UTF-8 in the HTTP + request entity-body: + + grant_type + REQUIRED. Value MUST be set to "password". + + username + REQUIRED. The resource owner username. + + password + REQUIRED. The resource owner password. + + scope + OPTIONAL. The scope of the access request as described by + `Section 3.3`_. + + If the client type is confidential or the client was issued client + credentials (or assigned other authentication requirements), the + client MUST authenticate with the authorization server as described + in `Section 3.2.1`_. + + The authorization server MUST: + + o require client authentication for confidential clients or for any + client that was issued client credentials (or with other + authentication requirements), + + o authenticate the client if client authentication is included, and + + o validate the resource owner password credentials using its + existing password validation algorithm. + + Since this access token request utilizes the resource owner's + password, the authorization server MUST protect the endpoint against + brute force attacks (e.g., using rate-limitation or generating + alerts). + + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + .. _`Section 3.2.1`: https://tools.ietf.org/html/rfc6749#section-3.2.1 + """ + for validator in self.custom_validators.pre_token: + validator(request) + + for param in ('grant_type', 'username', 'password'): + if not getattr(request, param, None): + raise errors.InvalidRequestError( + 'Request is missing %s parameter.' % param, request=request) + + for param in ('grant_type', 'username', 'password', 'scope'): + if param in request.duplicate_params: + raise errors.InvalidRequestError(description='Duplicate %s parameter.' % param, request=request) + + # This error should rarely (if ever) occur if requests are routed to + # grant type handlers based on the grant_type parameter. + if not request.grant_type == 'password': + raise errors.UnsupportedGrantTypeError(request=request) + + log.debug('Validating username %s.', request.username) + if not self.request_validator.validate_user(request.username, + request.password, request.client, request): + raise errors.InvalidGrantError( + 'Invalid credentials given.', request=request) + else: + if not hasattr(request.client, 'client_id'): + raise NotImplementedError( + 'Validate user must set the ' + 'request.client.client_id attribute ' + 'in authenticate_client.') + log.debug('Authorizing access to user %r.', request.user) + + # Ensure client is authorized use of this grant type + self.validate_grant_type(request) + + if request.client: + request.client_id = request.client_id or request.client.client_id + self.validate_scopes(request) + + for validator in self.custom_validators.post_token: + validator(request) diff --git a/lib/oauthlib/oauth2/rfc6749/parameters.py b/lib/oauthlib/oauth2/rfc6749/parameters.py new file mode 100644 index 00000000..f07b8bd2 --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/parameters.py @@ -0,0 +1,455 @@ +""" +oauthlib.oauth2.rfc6749.parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module contains methods related to `Section 4`_ of the OAuth 2 RFC. + +.. _`Section 4`: https://tools.ietf.org/html/rfc6749#section-4 +""" +import json +import os +import time +import urllib.parse as urlparse + +from oauthlib.common import add_params_to_qs, add_params_to_uri +from oauthlib.signals import scope_changed + +from .errors import ( + InsecureTransportError, MismatchingStateError, MissingCodeError, + MissingTokenError, MissingTokenTypeError, raise_from_error, +) +from .tokens import OAuth2Token +from .utils import is_secure_transport, list_to_scope, scope_to_list + + +def prepare_grant_uri(uri, client_id, response_type, redirect_uri=None, + scope=None, state=None, **kwargs): + """Prepare the authorization grant request URI. + + The client constructs the request URI by adding the following + parameters to the query component of the authorization endpoint URI + using the ``application/x-www-form-urlencoded`` format as defined by + [`W3C.REC-html401-19991224`_]: + + :param uri: + :param client_id: The client identifier as described in `Section 2.2`_. + :param response_type: To indicate which OAuth 2 grant/flow is required, + "code" and "token". + :param redirect_uri: The client provided URI to redirect back to after + authorization as described in `Section 3.1.2`_. + :param scope: The scope of the access request as described by + `Section 3.3`_. + :param state: An opaque value used by the client to maintain + state between the request and callback. The authorization + server includes this value when redirecting the user-agent + back to the client. The parameter SHOULD be used for + preventing cross-site request forgery as described in + `Section 10.12`_. + :param kwargs: Extra arguments to embed in the grant/authorization URL. + + An example of an authorization code grant authorization URL: + + .. code-block:: http + + GET /authorize?response_type=code&client_id=s6BhdRkqt3&state=xyz + &redirect_uri=https%3A%2F%2Fclient%2Eexample%2Ecom%2Fcb HTTP/1.1 + Host: server.example.com + + .. _`W3C.REC-html401-19991224`: https://tools.ietf.org/html/rfc6749#ref-W3C.REC-html401-19991224 + .. _`Section 2.2`: https://tools.ietf.org/html/rfc6749#section-2.2 + .. _`Section 3.1.2`: https://tools.ietf.org/html/rfc6749#section-3.1.2 + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + .. _`section 10.12`: https://tools.ietf.org/html/rfc6749#section-10.12 + """ + if not is_secure_transport(uri): + raise InsecureTransportError() + + params = [(('response_type', response_type)), + (('client_id', client_id))] + + if redirect_uri: + params.append(('redirect_uri', redirect_uri)) + if scope: + params.append(('scope', list_to_scope(scope))) + if state: + params.append(('state', state)) + + for k in kwargs: + if kwargs[k]: + params.append((str(k), kwargs[k])) + + return add_params_to_uri(uri, params) + + +def prepare_token_request(grant_type, body='', include_client_id=True, **kwargs): + """Prepare the access token request. + + The client makes a request to the token endpoint by adding the + following parameters using the ``application/x-www-form-urlencoded`` + format in the HTTP request entity-body: + + :param grant_type: To indicate grant type being used, i.e. "password", + "authorization_code" or "client_credentials". + + :param body: Existing request body (URL encoded string) to embed parameters + into. This may contain extra parameters. Default ''. + + :param include_client_id: `True` (default) to send the `client_id` in the + body of the upstream request. This is required + if the client is not authenticating with the + authorization server as described in + `Section 3.2.1`_. + :type include_client_id: Boolean + + :param client_id: Unicode client identifier. Will only appear if + `include_client_id` is True. * + + :param client_secret: Unicode client secret. Will only appear if set to a + value that is not `None`. Invoking this function with + an empty string will send an empty `client_secret` + value to the server. * + + :param code: If using authorization_code grant, pass the previously + obtained authorization code as the ``code`` argument. * + + :param redirect_uri: If the "redirect_uri" parameter was included in the + authorization request as described in + `Section 4.1.1`_, and their values MUST be identical. * + + :param kwargs: Extra arguments to embed in the request body. + + Parameters marked with a `*` above are not explicit arguments in the + function signature, but are specially documented arguments for items + appearing in the generic `**kwargs` keyworded input. + + An example of an authorization code token request body: + + .. code-block:: http + + grant_type=authorization_code&code=SplxlOBeZQQYbYS6WxSbIA + &redirect_uri=https%3A%2F%2Fclient%2Eexample%2Ecom%2Fcb + + .. _`Section 4.1.1`: https://tools.ietf.org/html/rfc6749#section-4.1.1 + """ + params = [('grant_type', grant_type)] + + if 'scope' in kwargs: + kwargs['scope'] = list_to_scope(kwargs['scope']) + + # pull the `client_id` out of the kwargs. + client_id = kwargs.pop('client_id', None) + if include_client_id: + if client_id is not None: + params.append(('client_id', client_id)) + + # the kwargs iteration below only supports including boolean truth (truthy) + # values, but some servers may require an empty string for `client_secret` + client_secret = kwargs.pop('client_secret', None) + if client_secret is not None: + params.append(('client_secret', client_secret)) + + # this handles: `code`, `redirect_uri`, and other undocumented params + for k in kwargs: + if kwargs[k]: + params.append((str(k), kwargs[k])) + + return add_params_to_qs(body, params) + + +def prepare_token_revocation_request(url, token, token_type_hint="access_token", + callback=None, body='', **kwargs): + """Prepare a token revocation request. + + The client constructs the request by including the following parameters + using the ``application/x-www-form-urlencoded`` format in the HTTP request + entity-body: + + :param token: REQUIRED. The token that the client wants to get revoked. + + :param token_type_hint: OPTIONAL. A hint about the type of the token + submitted for revocation. Clients MAY pass this + parameter in order to help the authorization server + to optimize the token lookup. If the server is + unable to locate the token using the given hint, it + MUST extend its search across all of its supported + token types. An authorization server MAY ignore + this parameter, particularly if it is able to detect + the token type automatically. + + This specification defines two values for `token_type_hint`: + + * access_token: An access token as defined in [RFC6749], + `Section 1.4`_ + + * refresh_token: A refresh token as defined in [RFC6749], + `Section 1.5`_ + + Specific implementations, profiles, and extensions of this + specification MAY define other values for this parameter using the + registry defined in `Section 4.1.2`_. + + .. _`Section 1.4`: https://tools.ietf.org/html/rfc6749#section-1.4 + .. _`Section 1.5`: https://tools.ietf.org/html/rfc6749#section-1.5 + .. _`Section 4.1.2`: https://tools.ietf.org/html/rfc7009#section-4.1.2 + + """ + if not is_secure_transport(url): + raise InsecureTransportError() + + params = [('token', token)] + + if token_type_hint: + params.append(('token_type_hint', token_type_hint)) + + for k in kwargs: + if kwargs[k]: + params.append((str(k), kwargs[k])) + + headers = {'Content-Type': 'application/x-www-form-urlencoded'} + + if callback: + params.append(('callback', callback)) + return add_params_to_uri(url, params), headers, body + else: + return url, headers, add_params_to_qs(body, params) + + +def parse_authorization_code_response(uri, state=None): + """Parse authorization grant response URI into a dict. + + If the resource owner grants the access request, the authorization + server issues an authorization code and delivers it to the client by + adding the following parameters to the query component of the + redirection URI using the ``application/x-www-form-urlencoded`` format: + + **code** + REQUIRED. The authorization code generated by the + authorization server. The authorization code MUST expire + shortly after it is issued to mitigate the risk of leaks. A + maximum authorization code lifetime of 10 minutes is + RECOMMENDED. The client MUST NOT use the authorization code + more than once. If an authorization code is used more than + once, the authorization server MUST deny the request and SHOULD + revoke (when possible) all tokens previously issued based on + that authorization code. The authorization code is bound to + the client identifier and redirection URI. + + **state** + REQUIRED if the "state" parameter was present in the client + authorization request. The exact value received from the + client. + + :param uri: The full redirect URL back to the client. + :param state: The state parameter from the authorization request. + + For example, the authorization server redirects the user-agent by + sending the following HTTP response: + + .. code-block:: http + + HTTP/1.1 302 Found + Location: https://client.example.com/cb?code=SplxlOBeZQQYbYS6WxSbIA + &state=xyz + + """ + if not is_secure_transport(uri): + raise InsecureTransportError() + + query = urlparse.urlparse(uri).query + params = dict(urlparse.parse_qsl(query)) + + if state and params.get('state', None) != state: + raise MismatchingStateError() + + if 'error' in params: + raise_from_error(params.get('error'), params) + + if not 'code' in params: + raise MissingCodeError("Missing code parameter in response.") + + return params + + +def parse_implicit_response(uri, state=None, scope=None): + """Parse the implicit token response URI into a dict. + + If the resource owner grants the access request, the authorization + server issues an access token and delivers it to the client by adding + the following parameters to the fragment component of the redirection + URI using the ``application/x-www-form-urlencoded`` format: + + **access_token** + REQUIRED. The access token issued by the authorization server. + + **token_type** + REQUIRED. The type of the token issued as described in + Section 7.1. Value is case insensitive. + + **expires_in** + RECOMMENDED. The lifetime in seconds of the access token. For + example, the value "3600" denotes that the access token will + expire in one hour from the time the response was generated. + If omitted, the authorization server SHOULD provide the + expiration time via other means or document the default value. + + **scope** + OPTIONAL, if identical to the scope requested by the client, + otherwise REQUIRED. The scope of the access token as described + by Section 3.3. + + **state** + REQUIRED if the "state" parameter was present in the client + authorization request. The exact value received from the + client. + + :param uri: + :param state: + :param scope: + + Similar to the authorization code response, but with a full token provided + in the URL fragment: + + .. code-block:: http + + HTTP/1.1 302 Found + Location: http://example.com/cb#access_token=2YotnFZFEjr1zCsicMWpAA + &state=xyz&token_type=example&expires_in=3600 + """ + if not is_secure_transport(uri): + raise InsecureTransportError() + + fragment = urlparse.urlparse(uri).fragment + params = dict(urlparse.parse_qsl(fragment, keep_blank_values=True)) + + for key in ('expires_in',): + if key in params: # cast things to int + params[key] = int(params[key]) + + if 'scope' in params: + params['scope'] = scope_to_list(params['scope']) + + if 'expires_in' in params: + params['expires_at'] = time.time() + int(params['expires_in']) + + if state and params.get('state', None) != state: + raise ValueError("Mismatching or missing state in params.") + + params = OAuth2Token(params, old_scope=scope) + validate_token_parameters(params) + return params + + +def parse_token_response(body, scope=None): + """Parse the JSON token response body into a dict. + + The authorization server issues an access token and optional refresh + token, and constructs the response by adding the following parameters + to the entity body of the HTTP response with a 200 (OK) status code: + + access_token + REQUIRED. The access token issued by the authorization server. + token_type + REQUIRED. The type of the token issued as described in + `Section 7.1`_. Value is case insensitive. + expires_in + RECOMMENDED. The lifetime in seconds of the access token. For + example, the value "3600" denotes that the access token will + expire in one hour from the time the response was generated. + If omitted, the authorization server SHOULD provide the + expiration time via other means or document the default value. + refresh_token + OPTIONAL. The refresh token which can be used to obtain new + access tokens using the same authorization grant as described + in `Section 6`_. + scope + OPTIONAL, if identical to the scope requested by the client, + otherwise REQUIRED. The scope of the access token as described + by `Section 3.3`_. + + The parameters are included in the entity body of the HTTP response + using the "application/json" media type as defined by [`RFC4627`_]. The + parameters are serialized into a JSON structure by adding each + parameter at the highest structure level. Parameter names and string + values are included as JSON strings. Numerical values are included + as JSON numbers. The order of parameters does not matter and can + vary. + + :param body: The full json encoded response body. + :param scope: The scope requested during authorization. + + For example: + + .. code-block:: http + + HTTP/1.1 200 OK + Content-Type: application/json + Cache-Control: no-store + Pragma: no-cache + + { + "access_token":"2YotnFZFEjr1zCsicMWpAA", + "token_type":"example", + "expires_in":3600, + "refresh_token":"tGzv3JOkF0XG5Qx2TlKWIA", + "example_parameter":"example_value" + } + + .. _`Section 7.1`: https://tools.ietf.org/html/rfc6749#section-7.1 + .. _`Section 6`: https://tools.ietf.org/html/rfc6749#section-6 + .. _`Section 3.3`: https://tools.ietf.org/html/rfc6749#section-3.3 + .. _`RFC4627`: https://tools.ietf.org/html/rfc4627 + """ + try: + params = json.loads(body) + except ValueError: + + # Fall back to URL-encoded string, to support old implementations, + # including (at time of writing) Facebook. See: + # https://github.com/oauthlib/oauthlib/issues/267 + + params = dict(urlparse.parse_qsl(body)) + for key in ('expires_in',): + if key in params: # cast things to int + params[key] = int(params[key]) + + if 'scope' in params: + params['scope'] = scope_to_list(params['scope']) + + if 'expires_in' in params: + if params['expires_in'] is None: + params.pop('expires_in') + else: + params['expires_at'] = time.time() + int(params['expires_in']) + + params = OAuth2Token(params, old_scope=scope) + validate_token_parameters(params) + return params + + +def validate_token_parameters(params): + """Ensures token presence, token type, expiration and scope in params.""" + if 'error' in params: + raise_from_error(params.get('error'), params) + + if not 'access_token' in params: + raise MissingTokenError(description="Missing access token parameter.") + + if not 'token_type' in params: + if os.environ.get('OAUTHLIB_STRICT_TOKEN_TYPE'): + raise MissingTokenTypeError() + + # If the issued access token scope is different from the one requested by + # the client, the authorization server MUST include the "scope" response + # parameter to inform the client of the actual scope granted. + # https://tools.ietf.org/html/rfc6749#section-3.3 + if params.scope_changed: + message = 'Scope has changed from "{old}" to "{new}".'.format( + old=params.old_scope, new=params.scope, + ) + scope_changed.send(message=message, old=params.old_scopes, new=params.scopes) + if not os.environ.get('OAUTHLIB_RELAX_TOKEN_SCOPE', None): + w = Warning(message) + w.token = params + w.old_scope = params.old_scopes + w.new_scope = params.scopes + raise w diff --git a/lib/oauthlib/oauth2/rfc6749/request_validator.py b/lib/oauthlib/oauth2/rfc6749/request_validator.py new file mode 100644 index 00000000..817d594b --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/request_validator.py @@ -0,0 +1,651 @@ +""" +oauthlib.oauth2.rfc6749.request_validator +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import logging + +log = logging.getLogger(__name__) + + +class RequestValidator: + + def client_authentication_required(self, request, *args, **kwargs): + """Determine if client authentication is required for current request. + + According to the rfc6749, client authentication is required in the following cases: + - Resource Owner Password Credentials Grant, when Client type is Confidential or when + Client was issued client credentials or whenever Client provided client + authentication, see `Section 4.3.2`_. + - Authorization Code Grant, when Client type is Confidential or when Client was issued + client credentials or whenever Client provided client authentication, + see `Section 4.1.3`_. + - Refresh Token Grant, when Client type is Confidential or when Client was issued + client credentials or whenever Client provided client authentication, see + `Section 6`_ + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant + - Resource Owner Password Credentials Grant + - Refresh Token Grant + + .. _`Section 4.3.2`: https://tools.ietf.org/html/rfc6749#section-4.3.2 + .. _`Section 4.1.3`: https://tools.ietf.org/html/rfc6749#section-4.1.3 + .. _`Section 6`: https://tools.ietf.org/html/rfc6749#section-6 + """ + return True + + def authenticate_client(self, request, *args, **kwargs): + """Authenticate client through means outside the OAuth 2 spec. + + Means of authentication is negotiated beforehand and may for example + be `HTTP Basic Authentication Scheme`_ which utilizes the Authorization + header. + + Headers may be accesses through request.headers and parameters found in + both body and query can be obtained by direct attribute access, i.e. + request.client_id for client_id in the URL query. + + The authentication process is required to contain the identification of + the client (i.e. search the database based on the client_id). In case the + client doesn't exist based on the received client_id, this method has to + return False and the HTTP response created by the library will contain + 'invalid_client' message. + + After the client identification succeeds, this method needs to set the + client on the request, i.e. request.client = client. A client object's + class must contain the 'client_id' attribute and the 'client_id' must have + a value. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant + - Resource Owner Password Credentials Grant (may be disabled) + - Client Credentials Grant + - Refresh Token Grant + + .. _`HTTP Basic Authentication Scheme`: https://tools.ietf.org/html/rfc1945#section-11.1 + """ + raise NotImplementedError('Subclasses must implement this method.') + + def authenticate_client_id(self, client_id, request, *args, **kwargs): + """Ensure client_id belong to a non-confidential client. + + A non-confidential client is one that is not required to authenticate + through other means, such as using HTTP Basic. + + Note, while not strictly necessary it can often be very convenient + to set request.client to the client object associated with the + given client_id. + + :param client_id: Unicode client identifier. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def confirm_redirect_uri(self, client_id, code, redirect_uri, client, request, + *args, **kwargs): + """Ensure that the authorization process represented by this authorization + code began with this 'redirect_uri'. + + If the client specifies a redirect_uri when obtaining code then that + redirect URI must be bound to the code and verified equal in this + method, according to RFC 6749 section 4.1.3. Do not compare against + the client's allowed redirect URIs, but against the URI used when the + code was saved. + + :param client_id: Unicode client identifier. + :param code: Unicode authorization_code. + :param redirect_uri: Unicode absolute URI. + :param client: Client object set by you, see ``.authenticate_client``. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant (during token request) + """ + raise NotImplementedError('Subclasses must implement this method.') + + def get_default_redirect_uri(self, client_id, request, *args, **kwargs): + """Get the default redirect URI for the client. + + :param client_id: Unicode client identifier. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: The default redirect URI for the client + + Method is used by: + - Authorization Code Grant + - Implicit Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def get_default_scopes(self, client_id, request, *args, **kwargs): + """Get the default scopes for the client. + + :param client_id: Unicode client identifier. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: List of default scopes + + Method is used by all core grant types: + - Authorization Code Grant + - Implicit Grant + - Resource Owner Password Credentials Grant + - Client Credentials grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def get_original_scopes(self, refresh_token, request, *args, **kwargs): + """Get the list of scopes associated with the refresh token. + + :param refresh_token: Unicode refresh token. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: List of scopes. + + Method is used by: + - Refresh token grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def is_within_original_scope(self, request_scopes, refresh_token, request, *args, **kwargs): + """Check if requested scopes are within a scope of the refresh token. + + When access tokens are refreshed the scope of the new token + needs to be within the scope of the original token. This is + ensured by checking that all requested scopes strings are on + the list returned by the get_original_scopes. If this check + fails, is_within_original_scope is called. The method can be + used in situations where returning all valid scopes from the + get_original_scopes is not practical. + + :param request_scopes: A list of scopes that were requested by client. + :param refresh_token: Unicode refresh_token. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Refresh token grant + """ + return False + + def introspect_token(self, token, token_type_hint, request, *args, **kwargs): + """Introspect an access or refresh token. + + Called once the introspect request is validated. This method should + verify the *token* and either return a dictionary with the list of + claims associated, or `None` in case the token is unknown. + + Below the list of registered claims you should be interested in: + - scope : space-separated list of scopes + - client_id : client identifier + - username : human-readable identifier for the resource owner + - token_type : type of the token + - exp : integer timestamp indicating when this token will expire + - iat : integer timestamp indicating when this token was issued + - nbf : integer timestamp indicating when it can be "not-before" used + - sub : subject of the token - identifier of the resource owner + - aud : list of string identifiers representing the intended audience + - iss : string representing issuer of this token + - jti : string identifier for the token + + Note that most of them are coming directly from JWT RFC. More details + can be found in `Introspect Claims`_ or `_JWT Claims`_. + + The implementation can use *token_type_hint* to improve lookup + efficency, but must fallback to other types to be compliant with RFC. + + The dict of claims is added to request.token after this method. + + :param token: The token string. + :param token_type_hint: access_token or refresh_token. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + + Method is used by: + - Introspect Endpoint (all grants are compatible) + + .. _`Introspect Claims`: https://tools.ietf.org/html/rfc7662#section-2.2 + .. _`JWT Claims`: https://tools.ietf.org/html/rfc7519#section-4 + """ + raise NotImplementedError('Subclasses must implement this method.') + + def invalidate_authorization_code(self, client_id, code, request, *args, **kwargs): + """Invalidate an authorization code after use. + + :param client_id: Unicode client identifier. + :param code: The authorization code grant (request.code). + :param request: OAuthlib request. + :type request: oauthlib.common.Request + + Method is used by: + - Authorization Code Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def revoke_token(self, token, token_type_hint, request, *args, **kwargs): + """Revoke an access or refresh token. + + :param token: The token string. + :param token_type_hint: access_token or refresh_token. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + + Method is used by: + - Revocation Endpoint + """ + raise NotImplementedError('Subclasses must implement this method.') + + def rotate_refresh_token(self, request): + """Determine whether to rotate the refresh token. Default, yes. + + When access tokens are refreshed the old refresh token can be kept + or replaced with a new one (rotated). Return True to rotate and + and False for keeping original. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Refresh Token Grant + """ + return True + + def save_authorization_code(self, client_id, code, request, *args, **kwargs): + """Persist the authorization_code. + + The code should at minimum be stored with: + - the client_id (``client_id``) + - the redirect URI used (``request.redirect_uri``) + - a resource owner / user (``request.user``) + - the authorized scopes (``request.scopes``) + + To support PKCE, you MUST associate the code with: + - Code Challenge (``request.code_challenge``) and + - Code Challenge Method (``request.code_challenge_method``) + + To support OIDC, you MUST associate the code with: + - nonce, if present (``code["nonce"]``) + + The ``code`` argument is actually a dictionary, containing at least a + ``code`` key with the actual authorization code: + + ``{'code': 'sdf345jsdf0934f'}`` + + It may also have a ``claims`` parameter which, when present, will be a dict + deserialized from JSON as described at + http://openid.net/specs/openid-connect-core-1_0.html#ClaimsParameter + This value should be saved in this method and used again in ``.validate_code``. + + :param client_id: Unicode client identifier. + :param code: A dict of the authorization code grant and, optionally, state. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + + Method is used by: + - Authorization Code Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def save_token(self, token, request, *args, **kwargs): + """Persist the token with a token type specific method. + + Currently, only save_bearer_token is supported. + + :param token: A (Bearer) token dict. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + return self.save_bearer_token(token, request, *args, **kwargs) + + def save_bearer_token(self, token, request, *args, **kwargs): + """Persist the Bearer token. + + The Bearer token should at minimum be associated with: + - a client and it's client_id, if available + - a resource owner / user (request.user) + - authorized scopes (request.scopes) + - an expiration time + - a refresh token, if issued + - a claims document, if present in request.claims + + The Bearer token dict may hold a number of items:: + + { + 'token_type': 'Bearer', + 'access_token': 'askfjh234as9sd8', + 'expires_in': 3600, + 'scope': 'string of space separated authorized scopes', + 'refresh_token': '23sdf876234', # if issued + 'state': 'given_by_client', # if supplied by client (implicit ONLY) + } + + Note that while "scope" is a string-separated list of authorized scopes, + the original list is still available in request.scopes. + + The token dict is passed as a reference so any changes made to the dictionary + will go back to the user. If additional information must return to the client + user, and it is only possible to get this information after writing the token + to storage, it should be added to the token dictionary. If the token + dictionary must be modified but the changes should not go back to the user, + a copy of the dictionary must be made before making the changes. + + Also note that if an Authorization Code grant request included a valid claims + parameter (for OpenID Connect) then the request.claims property will contain + the claims dict, which should be saved for later use when generating the + id_token and/or UserInfo response content. + + :param token: A Bearer token dict. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: The default redirect URI for the client + + Method is used by all core grant types issuing Bearer tokens: + - Authorization Code Grant + - Implicit Grant + - Resource Owner Password Credentials Grant (might not associate a client) + - Client Credentials grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_bearer_token(self, token, scopes, request): + """Ensure the Bearer token is valid and authorized access to scopes. + + :param token: A string of random characters. + :param scopes: A list of scopes associated with the protected resource. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + + A key to OAuth 2 security and restricting impact of leaked tokens is + the short expiration time of tokens, *always ensure the token has not + expired!*. + + Two different approaches to scope validation: + + 1) all(scopes). The token must be authorized access to all scopes + associated with the resource. For example, the + token has access to ``read-only`` and ``images``, + thus the client can view images but not upload new. + Allows for fine grained access control through + combining various scopes. + + 2) any(scopes). The token must be authorized access to one of the + scopes associated with the resource. For example, + token has access to ``read-only-images``. + Allows for fine grained, although arguably less + convenient, access control. + + A powerful way to use scopes would mimic UNIX ACLs and see a scope + as a group with certain privileges. For a restful API these might + map to HTTP verbs instead of read, write and execute. + + Note, the request.user attribute can be set to the resource owner + associated with this token. Similarly the request.client and + request.scopes attribute can be set to associated client object + and authorized scopes. If you then use a decorator such as the + one provided for django these attributes will be made available + in all protected views as keyword arguments. + + :param token: Unicode Bearer token + :param scopes: List of scopes (defined by you) + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is indirectly used by all core Bearer token issuing grant types: + - Authorization Code Grant + - Implicit Grant + - Resource Owner Password Credentials Grant + - Client Credentials Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_client_id(self, client_id, request, *args, **kwargs): + """Ensure client_id belong to a valid and active client. + + Note, while not strictly necessary it can often be very convenient + to set request.client to the client object associated with the + given client_id. + + :param client_id: Unicode client identifier. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant + - Implicit Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_code(self, client_id, code, client, request, *args, **kwargs): + """Verify that the authorization_code is valid and assigned to the given + client. + + Before returning true, set the following based on the information stored + with the code in 'save_authorization_code': + + - request.user + - request.scopes + - request.claims (if given) + OBS! The request.user attribute should be set to the resource owner + associated with this authorization code. Similarly request.scopes + must also be set. + + The request.claims property, if it was given, should assigned a dict. + + If PKCE is enabled (see 'is_pkce_required' and 'save_authorization_code') + you MUST set the following based on the information stored: + - request.code_challenge + - request.code_challenge_method + + :param client_id: Unicode client identifier. + :param code: Unicode authorization code. + :param client: Client object set by you, see ``.authenticate_client``. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_grant_type(self, client_id, grant_type, client, request, *args, **kwargs): + """Ensure client is authorized to use the grant_type requested. + + :param client_id: Unicode client identifier. + :param grant_type: Unicode grant type, i.e. authorization_code, password. + :param client: Client object set by you, see ``.authenticate_client``. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant + - Resource Owner Password Credentials Grant + - Client Credentials Grant + - Refresh Token Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_redirect_uri(self, client_id, redirect_uri, request, *args, **kwargs): + """Ensure client is authorized to redirect to the redirect_uri requested. + + All clients should register the absolute URIs of all URIs they intend + to redirect to. The registration is outside of the scope of oauthlib. + + :param client_id: Unicode client identifier. + :param redirect_uri: Unicode absolute URI. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant + - Implicit Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_refresh_token(self, refresh_token, client, request, *args, **kwargs): + """Ensure the Bearer token is valid and authorized access to scopes. + + OBS! The request.user attribute should be set to the resource owner + associated with this refresh token. + + :param refresh_token: Unicode refresh token. + :param client: Client object set by you, see ``.authenticate_client``. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant (indirectly by issuing refresh tokens) + - Resource Owner Password Credentials Grant (also indirectly) + - Refresh Token Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_response_type(self, client_id, response_type, client, request, *args, **kwargs): + """Ensure client is authorized to use the response_type requested. + + :param client_id: Unicode client identifier. + :param response_type: Unicode response type, i.e. code, token. + :param client: Client object set by you, see ``.authenticate_client``. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant + - Implicit Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_scopes(self, client_id, scopes, client, request, *args, **kwargs): + """Ensure the client is authorized access to requested scopes. + + :param client_id: Unicode client identifier. + :param scopes: List of scopes (defined by you). + :param client: Client object set by you, see ``.authenticate_client``. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by all core grant types: + - Authorization Code Grant + - Implicit Grant + - Resource Owner Password Credentials Grant + - Client Credentials Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_user(self, username, password, client, request, *args, **kwargs): + """Ensure the username and password is valid. + + OBS! The validation should also set the user attribute of the request + to a valid resource owner, i.e. request.user = username or similar. If + not set you will be unable to associate a token with a user in the + persistance method used (commonly, save_bearer_token). + + :param username: Unicode username. + :param password: Unicode password. + :param client: Client object set by you, see ``.authenticate_client``. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Resource Owner Password Credentials Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def is_pkce_required(self, client_id, request): + """Determine if current request requires PKCE. Default, False. + This is called for both "authorization" and "token" requests. + + Override this method by ``return True`` to enable PKCE for everyone. + You might want to enable it only for public clients. + Note that PKCE can also be used in addition of a client authentication. + + OAuth 2.0 public clients utilizing the Authorization Code Grant are + susceptible to the authorization code interception attack. This + specification describes the attack as well as a technique to mitigate + against the threat through the use of Proof Key for Code Exchange + (PKCE, pronounced "pixy"). See `RFC7636`_. + + :param client_id: Client identifier. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - Authorization Code Grant + + .. _`RFC7636`: https://tools.ietf.org/html/rfc7636 + """ + return False + + def get_code_challenge(self, code, request): + """Is called for every "token" requests. + + When the server issues the authorization code in the authorization + response, it MUST associate the ``code_challenge`` and + ``code_challenge_method`` values with the authorization code so it can + be verified later. + + Typically, the ``code_challenge`` and ``code_challenge_method`` values + are stored in encrypted form in the ``code`` itself but could + alternatively be stored on the server associated with the code. The + server MUST NOT include the ``code_challenge`` value in client requests + in a form that other entities can extract. + + Return the ``code_challenge`` associated to the code. + If ``None`` is returned, code is considered to not be associated to any + challenges. + + :param code: Authorization code. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: code_challenge string + + Method is used by: + - Authorization Code Grant - when PKCE is active + + """ + return None + + def get_code_challenge_method(self, code, request): + """Is called during the "token" request processing, when a + ``code_verifier`` and a ``code_challenge`` has been provided. + + See ``.get_code_challenge``. + + Must return ``plain`` or ``S256``. You can return a custom value if you have + implemented your own ``AuthorizationCodeGrant`` class. + + :param code: Authorization code. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: code_challenge_method string + + Method is used by: + - Authorization Code Grant - when PKCE is active + + """ + raise NotImplementedError('Subclasses must implement this method.') diff --git a/lib/oauthlib/oauth2/rfc6749/tokens.py b/lib/oauthlib/oauth2/rfc6749/tokens.py new file mode 100644 index 00000000..6284248d --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/tokens.py @@ -0,0 +1,355 @@ +""" +oauthlib.oauth2.rfc6749.tokens +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module contains methods for adding two types of access tokens to requests. + +- Bearer https://tools.ietf.org/html/rfc6750 +- MAC https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01 +""" +import hashlib +import hmac +import warnings +from binascii import b2a_base64 +from urllib.parse import urlparse + +from oauthlib import common +from oauthlib.common import add_params_to_qs, add_params_to_uri + +from . import utils + + +class OAuth2Token(dict): + + def __init__(self, params, old_scope=None): + super().__init__(params) + self._new_scope = None + if 'scope' in params and params['scope']: + self._new_scope = set(utils.scope_to_list(params['scope'])) + if old_scope is not None: + self._old_scope = set(utils.scope_to_list(old_scope)) + if self._new_scope is None: + # the rfc says that if the scope hasn't changed, it's optional + # in params so set the new scope to the old scope + self._new_scope = self._old_scope + else: + self._old_scope = self._new_scope + + @property + def scope_changed(self): + return self._new_scope != self._old_scope + + @property + def old_scope(self): + return utils.list_to_scope(self._old_scope) + + @property + def old_scopes(self): + return list(self._old_scope) + + @property + def scope(self): + return utils.list_to_scope(self._new_scope) + + @property + def scopes(self): + return list(self._new_scope) + + @property + def missing_scopes(self): + return list(self._old_scope - self._new_scope) + + @property + def additional_scopes(self): + return list(self._new_scope - self._old_scope) + + +def prepare_mac_header(token, uri, key, http_method, + nonce=None, + headers=None, + body=None, + ext='', + hash_algorithm='hmac-sha-1', + issue_time=None, + draft=0): + """Add an `MAC Access Authentication`_ signature to headers. + + Unlike OAuth 1, this HMAC signature does not require inclusion of the + request payload/body, neither does it use a combination of client_secret + and token_secret but rather a mac_key provided together with the access + token. + + Currently two algorithms are supported, "hmac-sha-1" and "hmac-sha-256", + `extension algorithms`_ are not supported. + + Example MAC Authorization header, linebreaks added for clarity + + Authorization: MAC id="h480djs93hd8", + nonce="1336363200:dj83hs9s", + mac="bhCQXTVyfj5cmA9uKkPFx1zeOXM=" + + .. _`MAC Access Authentication`: https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01 + .. _`extension algorithms`: https://tools.ietf.org/html/draft-ietf-oauth-v2-http-mac-01#section-7.1 + + :param token: + :param uri: Request URI. + :param key: MAC given provided by token endpoint. + :param http_method: HTTP Request method. + :param nonce: + :param headers: Request headers as a dictionary. + :param body: + :param ext: + :param hash_algorithm: HMAC algorithm provided by token endpoint. + :param issue_time: Time when the MAC credentials were issued (datetime). + :param draft: MAC authentication specification version. + :return: headers dictionary with the authorization field added. + """ + http_method = http_method.upper() + host, port = utils.host_from_uri(uri) + + if hash_algorithm.lower() == 'hmac-sha-1': + h = hashlib.sha1 + elif hash_algorithm.lower() == 'hmac-sha-256': + h = hashlib.sha256 + else: + raise ValueError('unknown hash algorithm') + + if draft == 0: + nonce = nonce or '{}:{}'.format(utils.generate_age(issue_time), + common.generate_nonce()) + else: + ts = common.generate_timestamp() + nonce = common.generate_nonce() + + sch, net, path, par, query, fra = urlparse(uri) + + if query: + request_uri = path + '?' + query + else: + request_uri = path + + # Hash the body/payload + if body is not None and draft == 0: + body = body.encode('utf-8') + bodyhash = b2a_base64(h(body).digest())[:-1].decode('utf-8') + else: + bodyhash = '' + + # Create the normalized base string + base = [] + if draft == 0: + base.append(nonce) + else: + base.append(ts) + base.append(nonce) + base.append(http_method.upper()) + base.append(request_uri) + base.append(host) + base.append(port) + if draft == 0: + base.append(bodyhash) + base.append(ext or '') + base_string = '\n'.join(base) + '\n' + + # hmac struggles with unicode strings - http://bugs.python.org/issue5285 + if isinstance(key, str): + key = key.encode('utf-8') + sign = hmac.new(key, base_string.encode('utf-8'), h) + sign = b2a_base64(sign.digest())[:-1].decode('utf-8') + + header = [] + header.append('MAC id="%s"' % token) + if draft != 0: + header.append('ts="%s"' % ts) + header.append('nonce="%s"' % nonce) + if bodyhash: + header.append('bodyhash="%s"' % bodyhash) + if ext: + header.append('ext="%s"' % ext) + header.append('mac="%s"' % sign) + + headers = headers or {} + headers['Authorization'] = ', '.join(header) + return headers + + +def prepare_bearer_uri(token, uri): + """Add a `Bearer Token`_ to the request URI. + Not recommended, use only if client can't use authorization header or body. + + http://www.example.com/path?access_token=h480djs93hd8 + + .. _`Bearer Token`: https://tools.ietf.org/html/rfc6750 + + :param token: + :param uri: + """ + return add_params_to_uri(uri, [(('access_token', token))]) + + +def prepare_bearer_headers(token, headers=None): + """Add a `Bearer Token`_ to the request URI. + Recommended method of passing bearer tokens. + + Authorization: Bearer h480djs93hd8 + + .. _`Bearer Token`: https://tools.ietf.org/html/rfc6750 + + :param token: + :param headers: + """ + headers = headers or {} + headers['Authorization'] = 'Bearer %s' % token + return headers + + +def prepare_bearer_body(token, body=''): + """Add a `Bearer Token`_ to the request body. + + access_token=h480djs93hd8 + + .. _`Bearer Token`: https://tools.ietf.org/html/rfc6750 + + :param token: + :param body: + """ + return add_params_to_qs(body, [(('access_token', token))]) + + +def random_token_generator(request, refresh_token=False): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param refresh_token: + """ + return common.generate_token() + + +def signed_token_generator(private_pem, **kwargs): + """ + :param private_pem: + """ + def signed_token_generator(request): + request.claims = kwargs + return common.generate_signed_token(private_pem, request) + + return signed_token_generator + + +def get_token_from_header(request): + """ + Helper function to extract a token from the request header. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :return: Return the token or None if the Authorization header is malformed. + """ + token = None + + if 'Authorization' in request.headers: + split_header = request.headers.get('Authorization').split() + if len(split_header) == 2 and split_header[0].lower() == 'bearer': + token = split_header[1] + else: + token = request.access_token + + return token + + +class TokenBase: + + def __call__(self, request, refresh_token=False): + raise NotImplementedError('Subclasses must implement this method.') + + def validate_request(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + raise NotImplementedError('Subclasses must implement this method.') + + def estimate_type(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + raise NotImplementedError('Subclasses must implement this method.') + + +class BearerToken(TokenBase): + __slots__ = ( + 'request_validator', 'token_generator', + 'refresh_token_generator', 'expires_in' + ) + + def __init__(self, request_validator=None, token_generator=None, + expires_in=None, refresh_token_generator=None): + self.request_validator = request_validator + self.token_generator = token_generator or random_token_generator + self.refresh_token_generator = ( + refresh_token_generator or self.token_generator + ) + self.expires_in = expires_in or 3600 + + def create_token(self, request, refresh_token=False, **kwargs): + """ + Create a BearerToken, by default without refresh token. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :param refresh_token: + """ + if "save_token" in kwargs: + warnings.warn("`save_token` has been deprecated, it was not called internally." + "If you do, call `request_validator.save_token()` instead.", + DeprecationWarning) + + if callable(self.expires_in): + expires_in = self.expires_in(request) + else: + expires_in = self.expires_in + + request.expires_in = expires_in + + token = { + 'access_token': self.token_generator(request), + 'expires_in': expires_in, + 'token_type': 'Bearer', + } + + # If provided, include - this is optional in some cases https://tools.ietf.org/html/rfc6749#section-3.3 but + # there is currently no mechanism to coordinate issuing a token for only a subset of the requested scopes so + # all tokens issued are for the entire set of requested scopes. + if request.scopes is not None: + token['scope'] = ' '.join(request.scopes) + + if refresh_token: + if (request.refresh_token and + not self.request_validator.rotate_refresh_token(request)): + token['refresh_token'] = request.refresh_token + else: + token['refresh_token'] = self.refresh_token_generator(request) + + token.update(request.extra_credentials or {}) + return OAuth2Token(token) + + def validate_request(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + token = get_token_from_header(request) + return self.request_validator.validate_bearer_token( + token, request.scopes, request) + + def estimate_type(self, request): + """ + :param request: OAuthlib request. + :type request: oauthlib.common.Request + """ + if request.headers.get('Authorization', '').split(' ')[0].lower() == 'bearer': + return 9 + elif request.access_token is not None: + return 5 + else: + return 0 diff --git a/lib/oauthlib/oauth2/rfc6749/utils.py b/lib/oauthlib/oauth2/rfc6749/utils.py new file mode 100644 index 00000000..7dc27b3d --- /dev/null +++ b/lib/oauthlib/oauth2/rfc6749/utils.py @@ -0,0 +1,83 @@ +""" +oauthlib.utils +~~~~~~~~~~~~~~ + +This module contains utility methods used by various parts of the OAuth 2 spec. +""" +import datetime +import os +from urllib.parse import quote, urlparse + +from oauthlib.common import urldecode + + +def list_to_scope(scope): + """Convert a list of scopes to a space separated string.""" + if isinstance(scope, str) or scope is None: + return scope + elif isinstance(scope, (set, tuple, list)): + return " ".join([str(s) for s in scope]) + else: + raise ValueError("Invalid scope (%s), must be string, tuple, set, or list." % scope) + + +def scope_to_list(scope): + """Convert a space separated string to a list of scopes.""" + if isinstance(scope, (tuple, list, set)): + return [str(s) for s in scope] + elif scope is None: + return None + else: + return scope.strip().split(" ") + + +def params_from_uri(uri): + params = dict(urldecode(urlparse(uri).query)) + if 'scope' in params: + params['scope'] = scope_to_list(params['scope']) + return params + + +def host_from_uri(uri): + """Extract hostname and port from URI. + + Will use default port for HTTP and HTTPS if none is present in the URI. + """ + default_ports = { + 'HTTP': '80', + 'HTTPS': '443', + } + + sch, netloc, path, par, query, fra = urlparse(uri) + if ':' in netloc: + netloc, port = netloc.split(':', 1) + else: + port = default_ports.get(sch.upper()) + + return netloc, port + + +def escape(u): + """Escape a string in an OAuth-compatible fashion. + + TODO: verify whether this can in fact be used for OAuth 2 + + """ + if not isinstance(u, str): + raise ValueError('Only unicode objects are escapable.') + return quote(u.encode('utf-8'), safe=b'~') + + +def generate_age(issue_time): + """Generate a age parameter for MAC authentication draft 00.""" + td = datetime.datetime.now() - issue_time + age = (td.microseconds + (td.seconds + td.days * 24 * 3600) + * 10 ** 6) / 10 ** 6 + return str(age) + + +def is_secure_transport(uri): + """Check if the uri is over ssl.""" + if os.environ.get('OAUTHLIB_INSECURE_TRANSPORT'): + return True + return uri.lower().startswith('https://') diff --git a/lib/oauthlib/openid/__init__.py b/lib/oauthlib/openid/__init__.py new file mode 100644 index 00000000..e3174374 --- /dev/null +++ b/lib/oauthlib/openid/__init__.py @@ -0,0 +1,7 @@ +""" +oauthlib.openid +~~~~~~~~~~~~~~ + +""" +from .connect.core.endpoints import Server, UserInfoEndpoint +from .connect.core.request_validator import RequestValidator diff --git a/lib/oauthlib/openid/connect/__init__.py b/lib/oauthlib/openid/connect/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/oauthlib/openid/connect/core/__init__.py b/lib/oauthlib/openid/connect/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/oauthlib/openid/connect/core/endpoints/__init__.py b/lib/oauthlib/openid/connect/core/endpoints/__init__.py new file mode 100644 index 00000000..7017ff4f --- /dev/null +++ b/lib/oauthlib/openid/connect/core/endpoints/__init__.py @@ -0,0 +1,9 @@ +""" +oauthlib.oopenid.core +~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various logic needed +for consuming and providing OpenID Connect +""" +from .pre_configured import Server +from .userinfo import UserInfoEndpoint diff --git a/lib/oauthlib/openid/connect/core/endpoints/pre_configured.py b/lib/oauthlib/openid/connect/core/endpoints/pre_configured.py new file mode 100644 index 00000000..8ce8bee6 --- /dev/null +++ b/lib/oauthlib/openid/connect/core/endpoints/pre_configured.py @@ -0,0 +1,97 @@ +""" +oauthlib.openid.connect.core.endpoints.pre_configured +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of various endpoints needed +for providing OpenID Connect servers. +""" +from oauthlib.oauth2.rfc6749.endpoints import ( + AuthorizationEndpoint, IntrospectEndpoint, ResourceEndpoint, + RevocationEndpoint, TokenEndpoint, +) +from oauthlib.oauth2.rfc6749.grant_types import ( + AuthorizationCodeGrant as OAuth2AuthorizationCodeGrant, + ClientCredentialsGrant, ImplicitGrant as OAuth2ImplicitGrant, + RefreshTokenGrant, ResourceOwnerPasswordCredentialsGrant, +) +from oauthlib.oauth2.rfc6749.tokens import BearerToken + +from ..grant_types import AuthorizationCodeGrant, HybridGrant, ImplicitGrant +from ..grant_types.dispatchers import ( + AuthorizationCodeGrantDispatcher, AuthorizationTokenGrantDispatcher, + ImplicitTokenGrantDispatcher, +) +from ..tokens import JWTToken +from .userinfo import UserInfoEndpoint + + +class Server(AuthorizationEndpoint, IntrospectEndpoint, TokenEndpoint, + ResourceEndpoint, RevocationEndpoint, UserInfoEndpoint): + + """An all-in-one endpoint featuring all four major grant types.""" + + def __init__(self, request_validator, token_expires_in=None, + token_generator=None, refresh_token_generator=None, + *args, **kwargs): + """Construct a new all-grants-in-one server. + + :param request_validator: An implementation of + oauthlib.oauth2.RequestValidator. + :param token_expires_in: An int or a function to generate a token + expiration offset (in seconds) given a + oauthlib.common.Request object. + :param token_generator: A function to generate a token from a request. + :param refresh_token_generator: A function to generate a token from a + request for the refresh token. + :param kwargs: Extra parameters to pass to authorization-, + token-, resource-, and revocation-endpoint constructors. + """ + self.auth_grant = OAuth2AuthorizationCodeGrant(request_validator) + self.implicit_grant = OAuth2ImplicitGrant(request_validator) + self.password_grant = ResourceOwnerPasswordCredentialsGrant( + request_validator) + self.credentials_grant = ClientCredentialsGrant(request_validator) + self.refresh_grant = RefreshTokenGrant(request_validator) + self.openid_connect_auth = AuthorizationCodeGrant(request_validator) + self.openid_connect_implicit = ImplicitGrant(request_validator) + self.openid_connect_hybrid = HybridGrant(request_validator) + + self.bearer = BearerToken(request_validator, token_generator, + token_expires_in, refresh_token_generator) + + self.jwt = JWTToken(request_validator, token_generator, + token_expires_in, refresh_token_generator) + + self.auth_grant_choice = AuthorizationCodeGrantDispatcher(default_grant=self.auth_grant, oidc_grant=self.openid_connect_auth) + self.implicit_grant_choice = ImplicitTokenGrantDispatcher(default_grant=self.implicit_grant, oidc_grant=self.openid_connect_implicit) + + # See http://openid.net/specs/oauth-v2-multiple-response-types-1_0.html#Combinations for valid combinations + # internally our AuthorizationEndpoint will ensure they can appear in any order for any valid combination + AuthorizationEndpoint.__init__(self, default_response_type='code', + response_types={ + 'code': self.auth_grant_choice, + 'token': self.implicit_grant_choice, + 'id_token': self.openid_connect_implicit, + 'id_token token': self.openid_connect_implicit, + 'code token': self.openid_connect_hybrid, + 'code id_token': self.openid_connect_hybrid, + 'code id_token token': self.openid_connect_hybrid, + 'none': self.auth_grant + }, + default_token_type=self.bearer) + + self.token_grant_choice = AuthorizationTokenGrantDispatcher(request_validator, default_grant=self.auth_grant, oidc_grant=self.openid_connect_auth) + + TokenEndpoint.__init__(self, default_grant_type='authorization_code', + grant_types={ + 'authorization_code': self.token_grant_choice, + 'password': self.password_grant, + 'client_credentials': self.credentials_grant, + 'refresh_token': self.refresh_grant, + }, + default_token_type=self.bearer) + ResourceEndpoint.__init__(self, default_token='Bearer', + token_types={'Bearer': self.bearer, 'JWT': self.jwt}) + RevocationEndpoint.__init__(self, request_validator) + IntrospectEndpoint.__init__(self, request_validator) + UserInfoEndpoint.__init__(self, request_validator) diff --git a/lib/oauthlib/openid/connect/core/endpoints/userinfo.py b/lib/oauthlib/openid/connect/core/endpoints/userinfo.py new file mode 100644 index 00000000..1c29cc55 --- /dev/null +++ b/lib/oauthlib/openid/connect/core/endpoints/userinfo.py @@ -0,0 +1,99 @@ +""" +oauthlib.openid.connect.core.endpoints.userinfo +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module is an implementation of userinfo endpoint. +""" +import json +import logging + +from oauthlib.common import Request +from oauthlib.oauth2.rfc6749 import errors +from oauthlib.oauth2.rfc6749.endpoints.base import ( + BaseEndpoint, catch_errors_and_unavailability, +) +from oauthlib.oauth2.rfc6749.tokens import BearerToken + +log = logging.getLogger(__name__) + + +class UserInfoEndpoint(BaseEndpoint): + """Authorizes access to userinfo resource. + """ + def __init__(self, request_validator): + self.bearer = BearerToken(request_validator, None, None, None) + self.request_validator = request_validator + BaseEndpoint.__init__(self) + + @catch_errors_and_unavailability + def create_userinfo_response(self, uri, http_method='GET', body=None, headers=None): + """Validate BearerToken and return userinfo from RequestValidator + + The UserInfo Endpoint MUST return a + content-type header to indicate which format is being returned. The + content-type of the HTTP response MUST be application/json if the + response body is a text JSON object; the response body SHOULD be encoded + using UTF-8. + """ + request = Request(uri, http_method, body, headers) + request.scopes = ["openid"] + self.validate_userinfo_request(request) + + claims = self.request_validator.get_userinfo_claims(request) + if claims is None: + log.error('Userinfo MUST have claims for %r.', request) + raise errors.ServerError(status_code=500) + + if isinstance(claims, dict): + resp_headers = { + 'Content-Type': 'application/json' + } + if "sub" not in claims: + log.error('Userinfo MUST have "sub" for %r.', request) + raise errors.ServerError(status_code=500) + body = json.dumps(claims) + elif isinstance(claims, str): + resp_headers = { + 'Content-Type': 'application/jwt' + } + body = claims + else: + log.error('Userinfo return unknown response for %r.', request) + raise errors.ServerError(status_code=500) + log.debug('Userinfo access valid for %r.', request) + return resp_headers, body, 200 + + def validate_userinfo_request(self, request): + """Ensure the request is valid. + + 5.3.1. UserInfo Request + The Client sends the UserInfo Request using either HTTP GET or HTTP + POST. The Access Token obtained from an OpenID Connect Authentication + Request MUST be sent as a Bearer Token, per Section 2 of OAuth 2.0 + Bearer Token Usage [RFC6750]. + + It is RECOMMENDED that the request use the HTTP GET method and the + Access Token be sent using the Authorization header field. + + The following is a non-normative example of a UserInfo Request: + + GET /userinfo HTTP/1.1 + Host: server.example.com + Authorization: Bearer SlAV32hkKG + + 5.3.3. UserInfo Error Response + When an error condition occurs, the UserInfo Endpoint returns an Error + Response as defined in Section 3 of OAuth 2.0 Bearer Token Usage + [RFC6750]. (HTTP errors unrelated to RFC 6750 are returned to the User + Agent using the appropriate HTTP status code.) + + The following is a non-normative example of a UserInfo Error Response: + + HTTP/1.1 401 Unauthorized + WWW-Authenticate: Bearer error="invalid_token", + error_description="The Access Token expired" + """ + if not self.bearer.validate_request(request): + raise errors.InvalidTokenError() + if "openid" not in request.scopes: + raise errors.InsufficientScopeError() diff --git a/lib/oauthlib/openid/connect/core/exceptions.py b/lib/oauthlib/openid/connect/core/exceptions.py new file mode 100644 index 00000000..099b84e2 --- /dev/null +++ b/lib/oauthlib/openid/connect/core/exceptions.py @@ -0,0 +1,149 @@ +""" +oauthlib.oauth2.rfc6749.errors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Error used both by OAuth 2 clients and providers to represent the spec +defined error responses for all four core grant types. +""" +from oauthlib.oauth2.rfc6749.errors import FatalClientError, OAuth2Error + + +class FatalOpenIDClientError(FatalClientError): + pass + + +class OpenIDClientError(OAuth2Error): + pass + + +class InteractionRequired(OpenIDClientError): + """ + The Authorization Server requires End-User interaction to proceed. + + This error MAY be returned when the prompt parameter value in the + Authentication Request is none, but the Authentication Request cannot be + completed without displaying a user interface for End-User interaction. + """ + error = 'interaction_required' + status_code = 401 + + +class LoginRequired(OpenIDClientError): + """ + The Authorization Server requires End-User authentication. + + This error MAY be returned when the prompt parameter value in the + Authentication Request is none, but the Authentication Request cannot be + completed without displaying a user interface for End-User authentication. + """ + error = 'login_required' + status_code = 401 + + +class AccountSelectionRequired(OpenIDClientError): + """ + The End-User is REQUIRED to select a session at the Authorization Server. + + The End-User MAY be authenticated at the Authorization Server with + different associated accounts, but the End-User did not select a session. + This error MAY be returned when the prompt parameter value in the + Authentication Request is none, but the Authentication Request cannot be + completed without displaying a user interface to prompt for a session to + use. + """ + error = 'account_selection_required' + + +class ConsentRequired(OpenIDClientError): + """ + The Authorization Server requires End-User consent. + + This error MAY be returned when the prompt parameter value in the + Authentication Request is none, but the Authentication Request cannot be + completed without displaying a user interface for End-User consent. + """ + error = 'consent_required' + status_code = 401 + + +class InvalidRequestURI(OpenIDClientError): + """ + The request_uri in the Authorization Request returns an error or + contains invalid data. + """ + error = 'invalid_request_uri' + description = 'The request_uri in the Authorization Request returns an ' \ + 'error or contains invalid data.' + + +class InvalidRequestObject(OpenIDClientError): + """ + The request parameter contains an invalid Request Object. + """ + error = 'invalid_request_object' + description = 'The request parameter contains an invalid Request Object.' + + +class RequestNotSupported(OpenIDClientError): + """ + The OP does not support use of the request parameter. + """ + error = 'request_not_supported' + description = 'The request parameter is not supported.' + + +class RequestURINotSupported(OpenIDClientError): + """ + The OP does not support use of the request_uri parameter. + """ + error = 'request_uri_not_supported' + description = 'The request_uri parameter is not supported.' + + +class RegistrationNotSupported(OpenIDClientError): + """ + The OP does not support use of the registration parameter. + """ + error = 'registration_not_supported' + description = 'The registration parameter is not supported.' + + +class InvalidTokenError(OAuth2Error): + """ + The access token provided is expired, revoked, malformed, or + invalid for other reasons. The resource SHOULD respond with + the HTTP 401 (Unauthorized) status code. The client MAY + request a new access token and retry the protected resource + request. + """ + error = 'invalid_token' + status_code = 401 + description = ("The access token provided is expired, revoked, malformed, " + "or invalid for other reasons.") + + +class InsufficientScopeError(OAuth2Error): + """ + The request requires higher privileges than provided by the + access token. The resource server SHOULD respond with the HTTP + 403 (Forbidden) status code and MAY include the "scope" + attribute with the scope necessary to access the protected + resource. + """ + error = 'insufficient_scope' + status_code = 403 + description = ("The request requires higher privileges than provided by " + "the access token.") + + +def raise_from_error(error, params=None): + import inspect + import sys + kwargs = { + 'description': params.get('error_description'), + 'uri': params.get('error_uri'), + 'state': params.get('state') + } + for _, cls in inspect.getmembers(sys.modules[__name__], inspect.isclass): + if cls.error == error: + raise cls(**kwargs) diff --git a/lib/oauthlib/openid/connect/core/grant_types/__init__.py b/lib/oauthlib/openid/connect/core/grant_types/__init__.py new file mode 100644 index 00000000..887a5850 --- /dev/null +++ b/lib/oauthlib/openid/connect/core/grant_types/__init__.py @@ -0,0 +1,12 @@ +""" +oauthlib.openid.connect.core.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +from .authorization_code import AuthorizationCodeGrant +from .base import GrantTypeBase +from .dispatchers import ( + AuthorizationCodeGrantDispatcher, AuthorizationTokenGrantDispatcher, + ImplicitTokenGrantDispatcher, +) +from .hybrid import HybridGrant +from .implicit import ImplicitGrant diff --git a/lib/oauthlib/openid/connect/core/grant_types/authorization_code.py b/lib/oauthlib/openid/connect/core/grant_types/authorization_code.py new file mode 100644 index 00000000..6b2dcc3b --- /dev/null +++ b/lib/oauthlib/openid/connect/core/grant_types/authorization_code.py @@ -0,0 +1,43 @@ +""" +oauthlib.openid.connect.core.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import logging + +from oauthlib.oauth2.rfc6749.grant_types.authorization_code import ( + AuthorizationCodeGrant as OAuth2AuthorizationCodeGrant, +) + +from .base import GrantTypeBase + +log = logging.getLogger(__name__) + + +class AuthorizationCodeGrant(GrantTypeBase): + + def __init__(self, request_validator=None, **kwargs): + self.proxy_target = OAuth2AuthorizationCodeGrant( + request_validator=request_validator, **kwargs) + self.custom_validators.post_auth.append( + self.openid_authorization_validator) + self.register_token_modifier(self.add_id_token) + + def add_id_token(self, token, token_handler, request): + """ + Construct an initial version of id_token, and let the + request_validator sign or encrypt it. + + The authorization_code version of this method is used to + retrieve the nonce accordingly to the code storage. + """ + # Treat it as normal OAuth 2 auth code request if openid is not present + if not request.scopes or 'openid' not in request.scopes: + return token + + nonce = self.request_validator.get_authorization_code_nonce( + request.client_id, + request.code, + request.redirect_uri, + request + ) + return super().add_id_token(token, token_handler, request, nonce=nonce) diff --git a/lib/oauthlib/openid/connect/core/grant_types/base.py b/lib/oauthlib/openid/connect/core/grant_types/base.py new file mode 100644 index 00000000..76173e6c --- /dev/null +++ b/lib/oauthlib/openid/connect/core/grant_types/base.py @@ -0,0 +1,327 @@ +import base64 +import hashlib +import logging +import time +from json import loads + +from oauthlib.oauth2.rfc6749.errors import ( + ConsentRequired, InvalidRequestError, LoginRequired, +) + + +log = logging.getLogger(__name__) + + +class GrantTypeBase: + + # Just proxy the majority of method calls through to the + # proxy_target grant type handler, which will usually be either + # the standard OAuth2 AuthCode or Implicit grant types. + def __getattr__(self, attr): + return getattr(self.proxy_target, attr) + + def __setattr__(self, attr, value): + proxied_attrs = {'refresh_token', 'response_types'} + if attr in proxied_attrs: + setattr(self.proxy_target, attr, value) + else: + super(OpenIDConnectBase, self).__setattr__(attr, value) + + def validate_authorization_request(self, request): + """Validates the OpenID Connect authorization request parameters. + + :returns: (list of scopes, dict of request info) + """ + return self.proxy_target.validate_authorization_request(request) + + def _inflate_claims(self, request): + # this may be called multiple times in a single request so make sure we only de-serialize the claims once + if request.claims and not isinstance(request.claims, dict): + # specific claims are requested during the Authorization Request and may be requested for inclusion + # in either the id_token or the UserInfo endpoint response + # see http://openid.net/specs/openid-connect-core-1_0.html#ClaimsParameter + try: + request.claims = loads(request.claims) + except Exception as ex: + raise InvalidRequestError(description="Malformed claims parameter", + uri="http://openid.net/specs/openid-connect-core-1_0.html#ClaimsParameter") + + def id_token_hash(self, value, hashfunc=hashlib.sha256): + """ + Its value is the base64url encoding of the left-most half of the + hash of the octets of the ASCII representation of the access_token + value, where the hash algorithm used is the hash algorithm used in + the alg Header Parameter of the ID Token's JOSE Header. + + For instance, if the alg is RS256, hash the access_token value + with SHA-256, then take the left-most 128 bits and + base64url-encode them. + For instance, if the alg is HS512, hash the code value with + SHA-512, then take the left-most 256 bits and base64url-encode + them. The c_hash value is a case-sensitive string. + + Example of hash from OIDC specification (bound to a JWS using RS256): + + code: + Qcb0Orv1zh30vL1MPRsbm-diHiMwcLyZvn1arpZv-Jxf_11jnpEX3Tgfvk + + c_hash: + LDktKdoQak3Pk0cnXxCltA + """ + digest = hashfunc(value.encode()).digest() + left_most = len(digest) // 2 + return base64.urlsafe_b64encode(digest[:left_most]).decode().rstrip("=") + + def add_id_token(self, token, token_handler, request, nonce=None): + """ + Construct an initial version of id_token, and let the + request_validator sign or encrypt it. + + The initial version can contain the fields below, accordingly + to the spec: + - aud + - iat + - nonce + - at_hash + - c_hash + """ + # Treat it as normal OAuth 2 auth code request if openid is not present + if not request.scopes or 'openid' not in request.scopes: + return token + + # Only add an id token on auth/token step if asked for. + if request.response_type and 'id_token' not in request.response_type: + return token + + # Implementation mint its own id_token without help. + id_token = self.request_validator.get_id_token(token, token_handler, request) + if id_token: + token['id_token'] = id_token + return token + + # Fallback for asking some help from oauthlib framework. + # Start with technicals fields bound to the specification. + id_token = {} + id_token['aud'] = request.client_id + id_token['iat'] = int(time.time()) + + # nonce is REQUIRED when response_type value is: + # - id_token token (Implicit) + # - id_token (Implicit) + # - code id_token (Hybrid) + # - code id_token token (Hybrid) + # + # nonce is OPTIONAL when response_type value is: + # - code (Authorization Code) + # - code token (Hybrid) + if nonce is not None: + id_token["nonce"] = nonce + + # at_hash is REQUIRED when response_type value is: + # - id_token token (Implicit) + # - code id_token token (Hybrid) + # + # at_hash is OPTIONAL when: + # - code (Authorization code) + # - code id_token (Hybrid) + # - code token (Hybrid) + # + # at_hash MAY NOT be used when: + # - id_token (Implicit) + if "access_token" in token: + id_token["at_hash"] = self.id_token_hash(token["access_token"]) + + # c_hash is REQUIRED when response_type value is: + # - code id_token (Hybrid) + # - code id_token token (Hybrid) + # + # c_hash is OPTIONAL for others. + if "code" in token: + id_token["c_hash"] = self.id_token_hash(token["code"]) + + # Call request_validator to complete/sign/encrypt id_token + token['id_token'] = self.request_validator.finalize_id_token(id_token, token, token_handler, request) + + return token + + def openid_authorization_validator(self, request): + """Perform OpenID Connect specific authorization request validation. + + nonce + OPTIONAL. String value used to associate a Client session with + an ID Token, and to mitigate replay attacks. The value is + passed through unmodified from the Authentication Request to + the ID Token. Sufficient entropy MUST be present in the nonce + values used to prevent attackers from guessing values + + display + OPTIONAL. ASCII string value that specifies how the + Authorization Server displays the authentication and consent + user interface pages to the End-User. The defined values are: + + page - The Authorization Server SHOULD display the + authentication and consent UI consistent with a full User + Agent page view. If the display parameter is not specified, + this is the default display mode. + + popup - The Authorization Server SHOULD display the + authentication and consent UI consistent with a popup User + Agent window. The popup User Agent window should be of an + appropriate size for a login-focused dialog and should not + obscure the entire window that it is popping up over. + + touch - The Authorization Server SHOULD display the + authentication and consent UI consistent with a device that + leverages a touch interface. + + wap - The Authorization Server SHOULD display the + authentication and consent UI consistent with a "feature + phone" type display. + + The Authorization Server MAY also attempt to detect the + capabilities of the User Agent and present an appropriate + display. + + prompt + OPTIONAL. Space delimited, case sensitive list of ASCII string + values that specifies whether the Authorization Server prompts + the End-User for reauthentication and consent. The defined + values are: + + none - The Authorization Server MUST NOT display any + authentication or consent user interface pages. An error is + returned if an End-User is not already authenticated or the + Client does not have pre-configured consent for the + requested Claims or does not fulfill other conditions for + processing the request. The error code will typically be + login_required, interaction_required, or another code + defined in Section 3.1.2.6. This can be used as a method to + check for existing authentication and/or consent. + + login - The Authorization Server SHOULD prompt the End-User + for reauthentication. If it cannot reauthenticate the + End-User, it MUST return an error, typically + login_required. + + consent - The Authorization Server SHOULD prompt the + End-User for consent before returning information to the + Client. If it cannot obtain consent, it MUST return an + error, typically consent_required. + + select_account - The Authorization Server SHOULD prompt the + End-User to select a user account. This enables an End-User + who has multiple accounts at the Authorization Server to + select amongst the multiple accounts that they might have + current sessions for. If it cannot obtain an account + selection choice made by the End-User, it MUST return an + error, typically account_selection_required. + + The prompt parameter can be used by the Client to make sure + that the End-User is still present for the current session or + to bring attention to the request. If this parameter contains + none with any other value, an error is returned. + + max_age + OPTIONAL. Maximum Authentication Age. Specifies the allowable + elapsed time in seconds since the last time the End-User was + actively authenticated by the OP. If the elapsed time is + greater than this value, the OP MUST attempt to actively + re-authenticate the End-User. (The max_age request parameter + corresponds to the OpenID 2.0 PAPE [OpenID.PAPE] max_auth_age + request parameter.) When max_age is used, the ID Token returned + MUST include an auth_time Claim Value. + + ui_locales + OPTIONAL. End-User's preferred languages and scripts for the + user interface, represented as a space-separated list of BCP47 + [RFC5646] language tag values, ordered by preference. For + instance, the value "fr-CA fr en" represents a preference for + French as spoken in Canada, then French (without a region + designation), followed by English (without a region + designation). An error SHOULD NOT result if some or all of the + requested locales are not supported by the OpenID Provider. + + id_token_hint + OPTIONAL. ID Token previously issued by the Authorization + Server being passed as a hint about the End-User's current or + past authenticated session with the Client. If the End-User + identified by the ID Token is logged in or is logged in by the + request, then the Authorization Server returns a positive + response; otherwise, it SHOULD return an error, such as + login_required. When possible, an id_token_hint SHOULD be + present when prompt=none is used and an invalid_request error + MAY be returned if it is not; however, the server SHOULD + respond successfully when possible, even if it is not present. + The Authorization Server need not be listed as an audience of + the ID Token when it is used as an id_token_hint value. If the + ID Token received by the RP from the OP is encrypted, to use it + as an id_token_hint, the Client MUST decrypt the signed ID + Token contained within the encrypted ID Token. The Client MAY + re-encrypt the signed ID token to the Authentication Server + using a key that enables the server to decrypt the ID Token, + and use the re-encrypted ID token as the id_token_hint value. + + login_hint + OPTIONAL. Hint to the Authorization Server about the login + identifier the End-User might use to log in (if necessary). + This hint can be used by an RP if it first asks the End-User + for their e-mail address (or other identifier) and then wants + to pass that value as a hint to the discovered authorization + service. It is RECOMMENDED that the hint value match the value + used for discovery. This value MAY also be a phone number in + the format specified for the phone_number Claim. The use of + this parameter is left to the OP's discretion. + + acr_values + OPTIONAL. Requested Authentication Context Class Reference + values. Space-separated string that specifies the acr values + that the Authorization Server is being requested to use for + processing this Authentication Request, with the values + appearing in order of preference. The Authentication Context + Class satisfied by the authentication performed is returned as + the acr Claim Value, as specified in Section 2. The acr Claim + is requested as a Voluntary Claim by this parameter. + """ + + # Treat it as normal OAuth 2 auth code request if openid is not present + if not request.scopes or 'openid' not in request.scopes: + return {} + + prompt = request.prompt if request.prompt else [] + if hasattr(prompt, 'split'): + prompt = prompt.strip().split() + prompt = set(prompt) + + if 'none' in prompt: + + if len(prompt) > 1: + msg = "Prompt none is mutually exclusive with other values." + raise InvalidRequestError(request=request, description=msg) + + if not self.request_validator.validate_silent_login(request): + raise LoginRequired(request=request) + + if not self.request_validator.validate_silent_authorization(request): + raise ConsentRequired(request=request) + + self._inflate_claims(request) + + if not self.request_validator.validate_user_match( + request.id_token_hint, request.scopes, request.claims, request): + msg = "Session user does not match client supplied user." + raise LoginRequired(request=request, description=msg) + + request_info = { + 'display': request.display, + 'nonce': request.nonce, + 'prompt': prompt, + 'ui_locales': request.ui_locales.split() if request.ui_locales else [], + 'id_token_hint': request.id_token_hint, + 'login_hint': request.login_hint, + 'claims': request.claims + } + + return request_info + + +OpenIDConnectBase = GrantTypeBase diff --git a/lib/oauthlib/openid/connect/core/grant_types/dispatchers.py b/lib/oauthlib/openid/connect/core/grant_types/dispatchers.py new file mode 100644 index 00000000..2734c387 --- /dev/null +++ b/lib/oauthlib/openid/connect/core/grant_types/dispatchers.py @@ -0,0 +1,101 @@ +import logging + +log = logging.getLogger(__name__) + + +class Dispatcher: + default_grant = None + oidc_grant = None + + +class AuthorizationCodeGrantDispatcher(Dispatcher): + """ + This is an adapter class that will route simple Authorization Code + requests, those that have `response_type=code` and a scope including + `openid` to either the `default_grant` or the `oidc_grant` based on + the scopes requested. + """ + def __init__(self, default_grant=None, oidc_grant=None): + self.default_grant = default_grant + self.oidc_grant = oidc_grant + + def _handler_for_request(self, request): + handler = self.default_grant + + if request.scopes and "openid" in request.scopes: + handler = self.oidc_grant + + log.debug('Selecting handler for request %r.', handler) + return handler + + def create_authorization_response(self, request, token_handler): + """Read scope and route to the designated handler.""" + return self._handler_for_request(request).create_authorization_response(request, token_handler) + + def validate_authorization_request(self, request): + """Read scope and route to the designated handler.""" + return self._handler_for_request(request).validate_authorization_request(request) + + +class ImplicitTokenGrantDispatcher(Dispatcher): + """ + This is an adapter class that will route simple Authorization + requests, those that have `id_token` in `response_type` and a scope + including `openid` to either the `default_grant` or the `oidc_grant` + based on the scopes requested. + """ + def __init__(self, default_grant=None, oidc_grant=None): + self.default_grant = default_grant + self.oidc_grant = oidc_grant + + def _handler_for_request(self, request): + handler = self.default_grant + + if request.scopes and "openid" in request.scopes and 'id_token' in request.response_type: + handler = self.oidc_grant + + log.debug('Selecting handler for request %r.', handler) + return handler + + def create_authorization_response(self, request, token_handler): + """Read scope and route to the designated handler.""" + return self._handler_for_request(request).create_authorization_response(request, token_handler) + + def validate_authorization_request(self, request): + """Read scope and route to the designated handler.""" + return self._handler_for_request(request).validate_authorization_request(request) + + +class AuthorizationTokenGrantDispatcher(Dispatcher): + """ + This is an adapter class that will route simple Token requests, those that authorization_code have a scope + including 'openid' to either the default_grant or the oidc_grant based on the scopes requested. + """ + def __init__(self, request_validator, default_grant=None, oidc_grant=None): + self.default_grant = default_grant + self.oidc_grant = oidc_grant + self.request_validator = request_validator + + def _handler_for_request(self, request): + handler = self.default_grant + scopes = () + parameters = dict(request.decoded_body) + client_id = parameters.get('client_id', None) + code = parameters.get('code', None) + redirect_uri = parameters.get('redirect_uri', None) + + # If code is not pressent fallback to `default_grant` which will + # raise an error for the missing `code` in `create_token_response` step. + if code: + scopes = self.request_validator.get_authorization_code_scopes(client_id, code, redirect_uri, request) + + if 'openid' in scopes: + handler = self.oidc_grant + + log.debug('Selecting handler for request %r.', handler) + return handler + + def create_token_response(self, request, token_handler): + """Read scope and route to the designated handler.""" + handler = self._handler_for_request(request) + return handler.create_token_response(request, token_handler) diff --git a/lib/oauthlib/openid/connect/core/grant_types/hybrid.py b/lib/oauthlib/openid/connect/core/grant_types/hybrid.py new file mode 100644 index 00000000..7cb0758b --- /dev/null +++ b/lib/oauthlib/openid/connect/core/grant_types/hybrid.py @@ -0,0 +1,63 @@ +""" +oauthlib.openid.connect.core.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import logging + +from oauthlib.oauth2.rfc6749.errors import InvalidRequestError +from oauthlib.oauth2.rfc6749.grant_types.authorization_code import ( + AuthorizationCodeGrant as OAuth2AuthorizationCodeGrant, +) + +from ..request_validator import RequestValidator +from .base import GrantTypeBase + +log = logging.getLogger(__name__) + + +class HybridGrant(GrantTypeBase): + + def __init__(self, request_validator=None, **kwargs): + self.request_validator = request_validator or RequestValidator() + + self.proxy_target = OAuth2AuthorizationCodeGrant( + request_validator=request_validator, **kwargs) + # All hybrid response types should be fragment-encoded. + self.proxy_target.default_response_mode = "fragment" + self.register_response_type('code id_token') + self.register_response_type('code token') + self.register_response_type('code id_token token') + self.custom_validators.post_auth.append( + self.openid_authorization_validator) + # Hybrid flows can return the id_token from the authorization + # endpoint as part of the 'code' response + self.register_code_modifier(self.add_token) + self.register_code_modifier(self.add_id_token) + self.register_token_modifier(self.add_id_token) + + def add_id_token(self, token, token_handler, request): + return super().add_id_token(token, token_handler, request, nonce=request.nonce) + + def openid_authorization_validator(self, request): + """Additional validation when following the Authorization Code flow. + """ + request_info = super().openid_authorization_validator(request) + if not request_info: # returns immediately if OAuth2.0 + return request_info + + # REQUIRED if the Response Type of the request is `code + # id_token` or `code id_token token` and OPTIONAL when the + # Response Type of the request is `code token`. It is a string + # value used to associate a Client session with an ID Token, + # and to mitigate replay attacks. The value is passed through + # unmodified from the Authentication Request to the ID + # Token. Sufficient entropy MUST be present in the `nonce` + # values used to prevent attackers from guessing values. For + # implementation notes, see Section 15.5.2. + if request.response_type in ["code id_token", "code id_token token"]: + if not request.nonce: + raise InvalidRequestError( + request=request, + description='Request is missing mandatory nonce parameter.' + ) + return request_info diff --git a/lib/oauthlib/openid/connect/core/grant_types/implicit.py b/lib/oauthlib/openid/connect/core/grant_types/implicit.py new file mode 100644 index 00000000..a4fe6049 --- /dev/null +++ b/lib/oauthlib/openid/connect/core/grant_types/implicit.py @@ -0,0 +1,51 @@ +""" +oauthlib.openid.connect.core.grant_types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import logging + +from oauthlib.oauth2.rfc6749.errors import InvalidRequestError +from oauthlib.oauth2.rfc6749.grant_types.implicit import ( + ImplicitGrant as OAuth2ImplicitGrant, +) + +from .base import GrantTypeBase + +log = logging.getLogger(__name__) + + +class ImplicitGrant(GrantTypeBase): + + def __init__(self, request_validator=None, **kwargs): + self.proxy_target = OAuth2ImplicitGrant( + request_validator=request_validator, **kwargs) + self.register_response_type('id_token') + self.register_response_type('id_token token') + self.custom_validators.post_auth.append( + self.openid_authorization_validator) + self.register_token_modifier(self.add_id_token) + + def add_id_token(self, token, token_handler, request): + if 'state' not in token and request.state: + token['state'] = request.state + return super().add_id_token(token, token_handler, request, nonce=request.nonce) + + def openid_authorization_validator(self, request): + """Additional validation when following the implicit flow. + """ + request_info = super().openid_authorization_validator(request) + if not request_info: # returns immediately if OAuth2.0 + return request_info + + # REQUIRED. String value used to associate a Client session with an ID + # Token, and to mitigate replay attacks. The value is passed through + # unmodified from the Authentication Request to the ID Token. + # Sufficient entropy MUST be present in the nonce values used to + # prevent attackers from guessing values. For implementation notes, see + # Section 15.5.2. + if not request.nonce: + raise InvalidRequestError( + request=request, + description='Request is missing mandatory nonce parameter.' + ) + return request_info diff --git a/lib/oauthlib/openid/connect/core/request_validator.py b/lib/oauthlib/openid/connect/core/request_validator.py new file mode 100644 index 00000000..e8f334b0 --- /dev/null +++ b/lib/oauthlib/openid/connect/core/request_validator.py @@ -0,0 +1,308 @@ +""" +oauthlib.openid.connect.core.request_validator +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" +import logging + +from oauthlib.oauth2.rfc6749.request_validator import ( + RequestValidator as OAuth2RequestValidator, +) + +log = logging.getLogger(__name__) + + +class RequestValidator(OAuth2RequestValidator): + + def get_authorization_code_scopes(self, client_id, code, redirect_uri, request): + """ Extracts scopes from saved authorization code. + + The scopes returned by this method is used to route token requests + based on scopes passed to Authorization Code requests. + + With that the token endpoint knows when to include OpenIDConnect + id_token in token response only based on authorization code scopes. + + Only code param should be sufficient to retrieve grant code from + any storage you are using, `client_id` and `redirect_uri` can have a + blank value `""` don't forget to check it before using those values + in a select query if a database is used. + + :param client_id: Unicode client identifier + :param code: Unicode authorization code grant + :param redirect_uri: Unicode absolute URI + :return: A list of scope + + Method is used by: + - Authorization Token Grant Dispatcher + """ + raise NotImplementedError('Subclasses must implement this method.') + + def get_authorization_code_nonce(self, client_id, code, redirect_uri, request): + """ Extracts nonce from saved authorization code. + + If present in the Authentication Request, Authorization + Servers MUST include a nonce Claim in the ID Token with the + Claim Value being the nonce value sent in the Authentication + Request. Authorization Servers SHOULD perform no other + processing on nonce values used. The nonce value is a + case-sensitive string. + + Only code param should be sufficient to retrieve grant code from + any storage you are using. However, `client_id` and `redirect_uri` + have been validated and can be used also. + + :param client_id: Unicode client identifier + :param code: Unicode authorization code grant + :param redirect_uri: Unicode absolute URI + :return: Unicode nonce + + Method is used by: + - Authorization Token Grant Dispatcher + """ + raise NotImplementedError('Subclasses must implement this method.') + + def get_jwt_bearer_token(self, token, token_handler, request): + """Get JWT Bearer token or OpenID Connect ID token + + If using OpenID Connect this SHOULD call `oauthlib.oauth2.RequestValidator.get_id_token` + + :param token: A Bearer token dict + :param token_handler: the token handler (BearerToken class) + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :return: The JWT Bearer token or OpenID Connect ID token (a JWS signed JWT) + + Method is used by JWT Bearer and OpenID Connect tokens: + - JWTToken.create_token + """ + raise NotImplementedError('Subclasses must implement this method.') + + def get_id_token(self, token, token_handler, request): + """Get OpenID Connect ID token + + This method is OPTIONAL and is NOT RECOMMENDED. + `finalize_id_token` SHOULD be implemented instead. However, if you + want a full control over the minting of the `id_token`, you + MAY want to override `get_id_token` instead of using + `finalize_id_token`. + + In the OpenID Connect workflows when an ID Token is requested this method is called. + Subclasses should implement the construction, signing and optional encryption of the + ID Token as described in the OpenID Connect spec. + + In addition to the standard OAuth2 request properties, the request may also contain + these OIDC specific properties which are useful to this method: + + - nonce, if workflow is implicit or hybrid and it was provided + - claims, if provided to the original Authorization Code request + + The token parameter is a dict which may contain an ``access_token`` entry, in which + case the resulting ID Token *should* include a calculated ``at_hash`` claim. + + Similarly, when the request parameter has a ``code`` property defined, the ID Token + *should* include a calculated ``c_hash`` claim. + + http://openid.net/specs/openid-connect-core-1_0.html (sections `3.1.3.6`_, `3.2.2.10`_, `3.3.2.11`_) + + .. _`3.1.3.6`: http://openid.net/specs/openid-connect-core-1_0.html#CodeIDToken + .. _`3.2.2.10`: http://openid.net/specs/openid-connect-core-1_0.html#ImplicitIDToken + .. _`3.3.2.11`: http://openid.net/specs/openid-connect-core-1_0.html#HybridIDToken + + :param token: A Bearer token dict + :param token_handler: the token handler (BearerToken class) + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :return: The ID Token (a JWS signed JWT) + """ + return None + + def finalize_id_token(self, id_token, token, token_handler, request): + """Finalize OpenID Connect ID token & Sign or Encrypt. + + In the OpenID Connect workflows when an ID Token is requested + this method is called. Subclasses should implement the + construction, signing and optional encryption of the ID Token + as described in the OpenID Connect spec. + + The `id_token` parameter is a dict containing a couple of OIDC + technical fields related to the specification. Prepopulated + attributes are: + + - `aud`, equals to `request.client_id`. + - `iat`, equals to current time. + - `nonce`, if present, is equals to the `nonce` from the + authorization request. + - `at_hash`, hash of `access_token`, if relevant. + - `c_hash`, hash of `code`, if relevant. + + This method MUST provide required fields as below: + + - `iss`, REQUIRED. Issuer Identifier for the Issuer of the response. + - `sub`, REQUIRED. Subject Identifier + - `exp`, REQUIRED. Expiration time on or after which the ID + Token MUST NOT be accepted by the RP when performing + authentication with the OP. + + Additionals claims must be added, note that `request.scope` + should be used to determine the list of claims. + + More information can be found at `OpenID Connect Core#Claims`_ + + .. _`OpenID Connect Core#Claims`: https://openid.net/specs/openid-connect-core-1_0.html#Claims + + :param id_token: A dict containing technical fields of id_token + :param token: A Bearer token dict + :param token_handler: the token handler (BearerToken class) + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :return: The ID Token (a JWS signed JWT or JWE encrypted JWT) + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_jwt_bearer_token(self, token, scopes, request): + """Ensure the JWT Bearer token or OpenID Connect ID token are valids and authorized access to scopes. + + If using OpenID Connect this SHOULD call `oauthlib.oauth2.RequestValidator.get_id_token` + + If not using OpenID Connect this can `return None` to avoid 5xx rather 401/3 response. + + OpenID connect core 1.0 describe how to validate an id_token: + - http://openid.net/specs/openid-connect-core-1_0.html#IDTokenValidation + - http://openid.net/specs/openid-connect-core-1_0.html#ImplicitIDTValidation + - http://openid.net/specs/openid-connect-core-1_0.html#HybridIDTValidation + - http://openid.net/specs/openid-connect-core-1_0.html#HybridIDTValidation2 + + :param token: Unicode Bearer token + :param scopes: List of scopes (defined by you) + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is indirectly used by all core OpenID connect JWT token issuing grant types: + - Authorization Code Grant + - Implicit Grant + - Hybrid Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_id_token(self, token, scopes, request): + """Ensure the id token is valid and authorized access to scopes. + + OpenID connect core 1.0 describe how to validate an id_token: + - http://openid.net/specs/openid-connect-core-1_0.html#IDTokenValidation + - http://openid.net/specs/openid-connect-core-1_0.html#ImplicitIDTValidation + - http://openid.net/specs/openid-connect-core-1_0.html#HybridIDTValidation + - http://openid.net/specs/openid-connect-core-1_0.html#HybridIDTValidation2 + + :param token: Unicode Bearer token + :param scopes: List of scopes (defined by you) + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is indirectly used by all core OpenID connect JWT token issuing grant types: + - Authorization Code Grant + - Implicit Grant + - Hybrid Grant + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_silent_authorization(self, request): + """Ensure the logged in user has authorized silent OpenID authorization. + + Silent OpenID authorization allows access tokens and id tokens to be + granted to clients without any user prompt or interaction. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - OpenIDConnectAuthCode + - OpenIDConnectImplicit + - OpenIDConnectHybrid + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_silent_login(self, request): + """Ensure session user has authorized silent OpenID login. + + If no user is logged in or has not authorized silent login, this + method should return False. + + If the user is logged in but associated with multiple accounts and + not selected which one to link to the token then this method should + raise an oauthlib.oauth2.AccountSelectionRequired error. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - OpenIDConnectAuthCode + - OpenIDConnectImplicit + - OpenIDConnectHybrid + """ + raise NotImplementedError('Subclasses must implement this method.') + + def validate_user_match(self, id_token_hint, scopes, claims, request): + """Ensure client supplied user id hint matches session user. + + If the sub claim or id_token_hint is supplied then the session + user must match the given ID. + + :param id_token_hint: User identifier string. + :param scopes: List of OAuth 2 scopes and OpenID claims (strings). + :param claims: OpenID Connect claims dict. + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: True or False + + Method is used by: + - OpenIDConnectAuthCode + - OpenIDConnectImplicit + - OpenIDConnectHybrid + """ + raise NotImplementedError('Subclasses must implement this method.') + + def get_userinfo_claims(self, request): + """Return the UserInfo claims in JSON or Signed or Encrypted. + + The UserInfo Claims MUST be returned as the members of a JSON object + unless a signed or encrypted response was requested during Client + Registration. The Claims defined in Section 5.1 can be returned, as can + additional Claims not specified there. + + For privacy reasons, OpenID Providers MAY elect to not return values for + some requested Claims. + + If a Claim is not returned, that Claim Name SHOULD be omitted from the + JSON object representing the Claims; it SHOULD NOT be present with a + null or empty string value. + + The sub (subject) Claim MUST always be returned in the UserInfo + Response. + + Upon receipt of the UserInfo Request, the UserInfo Endpoint MUST return + the JSON Serialization of the UserInfo Response as in Section 13.3 in + the HTTP response body unless a different format was specified during + Registration [OpenID.Registration]. + + If the UserInfo Response is signed and/or encrypted, then the Claims are + returned in a JWT and the content-type MUST be application/jwt. The + response MAY be encrypted without also being signed. If both signing and + encryption are requested, the response MUST be signed then encrypted, + with the result being a Nested JWT, as defined in [JWT]. + + If signed, the UserInfo Response SHOULD contain the Claims iss (issuer) + and aud (audience) as members. The iss value SHOULD be the OP's Issuer + Identifier URL. The aud value SHOULD be or include the RP's Client ID + value. + + :param request: OAuthlib request. + :type request: oauthlib.common.Request + :rtype: Claims as a dict OR JWT/JWS/JWE as a string + + Method is used by: + UserInfoEndpoint + """ diff --git a/lib/oauthlib/openid/connect/core/tokens.py b/lib/oauthlib/openid/connect/core/tokens.py new file mode 100644 index 00000000..a312e2d2 --- /dev/null +++ b/lib/oauthlib/openid/connect/core/tokens.py @@ -0,0 +1,46 @@ +""" +authlib.openid.connect.core.tokens +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This module contains methods for adding JWT tokens to requests. +""" +from oauthlib.oauth2.rfc6749.tokens import TokenBase, random_token_generator, get_token_from_header + + +class JWTToken(TokenBase): + __slots__ = ( + 'request_validator', 'token_generator', + 'refresh_token_generator', 'expires_in' + ) + + def __init__(self, request_validator=None, token_generator=None, + expires_in=None, refresh_token_generator=None): + self.request_validator = request_validator + self.token_generator = token_generator or random_token_generator + self.refresh_token_generator = ( + refresh_token_generator or self.token_generator + ) + self.expires_in = expires_in or 3600 + + def create_token(self, request, refresh_token=False): + """Create a JWT Token, using requestvalidator method.""" + + if callable(self.expires_in): + expires_in = self.expires_in(request) + else: + expires_in = self.expires_in + + request.expires_in = expires_in + + return self.request_validator.get_jwt_bearer_token(None, None, request) + + def validate_request(self, request): + token = get_token_from_header(request) + return self.request_validator.validate_jwt_bearer_token( + token, request.scopes, request) + + def estimate_type(self, request): + token = get_token_from_header(request) + if token and token.startswith('ey') and token.count('.') in (2, 4): + return 10 + return 0 diff --git a/lib/oauthlib/signals.py b/lib/oauthlib/signals.py new file mode 100644 index 00000000..8fd347a5 --- /dev/null +++ b/lib/oauthlib/signals.py @@ -0,0 +1,40 @@ +""" + Implements signals based on blinker if available, otherwise + falls silently back to a noop. Shamelessly stolen from flask.signals: + https://github.com/mitsuhiko/flask/blob/master/flask/signals.py +""" +signals_available = False +try: + from blinker import Namespace + signals_available = True +except ImportError: # noqa + class Namespace: + def signal(self, name, doc=None): + return _FakeSignal(name, doc) + + class _FakeSignal: + """If blinker is unavailable, create a fake class with the same + interface that allows sending of signals but will fail with an + error on anything else. Instead of doing anything on send, it + will just ignore the arguments and do nothing instead. + """ + + def __init__(self, name, doc=None): + self.name = name + self.__doc__ = doc + def _fail(self, *args, **kwargs): + raise RuntimeError('signalling support is unavailable ' + 'because the blinker library is ' + 'not installed.') + send = lambda *a, **kw: None + connect = disconnect = has_receivers_for = receivers_for = \ + temporarily_connected_to = connected_to = _fail + del _fail + +# The namespace for code signals. If you are not oauthlib code, do +# not put signals in here. Create your own namespace instead. +_signals = Namespace() + + +# Core signals. +scope_changed = _signals.signal('scope-changed') diff --git a/lib/oauthlib/uri_validate.py b/lib/oauthlib/uri_validate.py new file mode 100644 index 00000000..8a6d9c27 --- /dev/null +++ b/lib/oauthlib/uri_validate.py @@ -0,0 +1,190 @@ +""" +Regex for URIs + +These regex are directly derived from the collected ABNF in RFC3986 +(except for DIGIT, ALPHA and HEXDIG, defined by RFC2234). + +They should be processed with re.VERBOSE. + +Thanks Mark Nottingham for this code - https://gist.github.com/138549 +""" +import re + +# basics + +DIGIT = r"[\x30-\x39]" + +ALPHA = r"[\x41-\x5A\x61-\x7A]" + +HEXDIG = r"[\x30-\x39A-Fa-f]" + +# pct-encoded = "%" HEXDIG HEXDIG +pct_encoded = r" %% %(HEXDIG)s %(HEXDIG)s" % locals() + +# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +unreserved = r"(?: %(ALPHA)s | %(DIGIT)s | \- | \. | _ | ~ )" % locals() + +# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +gen_delims = r"(?: : | / | \? | \# | \[ | \] | @ )" + +# sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +# / "*" / "+" / "," / ";" / "=" +sub_delims = r"""(?: ! | \$ | & | ' | \( | \) | + \* | \+ | , | ; | = )""" + +# pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +pchar = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s | : | @ )" % locals( +) + +# reserved = gen-delims / sub-delims +reserved = r"(?: %(gen_delims)s | %(sub_delims)s )" % locals() + + +# scheme + +# scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +scheme = r"%(ALPHA)s (?: %(ALPHA)s | %(DIGIT)s | \+ | \- | \. )*" % locals() + + +# authority + +# dec-octet = DIGIT ; 0-9 +# / %x31-39 DIGIT ; 10-99 +# / "1" 2DIGIT ; 100-199 +# / "2" %x30-34 DIGIT ; 200-249 +# / "25" %x30-35 ; 250-255 +dec_octet = r"""(?: %(DIGIT)s | + [\x31-\x39] %(DIGIT)s | + 1 %(DIGIT)s{2} | + 2 [\x30-\x34] %(DIGIT)s | + 25 [\x30-\x35] + ) +""" % locals() + +# IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +IPv4address = r"%(dec_octet)s \. %(dec_octet)s \. %(dec_octet)s \. %(dec_octet)s" % locals( +) + +# IPv6address +IPv6address = r"([A-Fa-f0-9:]+:+)+[A-Fa-f0-9]+" + +# IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) +IPvFuture = r"v %(HEXDIG)s+ \. (?: %(unreserved)s | %(sub_delims)s | : )+" % locals() + +# IP-literal = "[" ( IPv6address / IPvFuture ) "]" +IP_literal = r"\[ (?: %(IPv6address)s | %(IPvFuture)s ) \]" % locals() + +# reg-name = *( unreserved / pct-encoded / sub-delims ) +reg_name = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s )*" % locals() + +# userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +userinfo = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s | : )" % locals( +) + +# host = IP-literal / IPv4address / reg-name +host = r"(?: %(IP_literal)s | %(IPv4address)s | %(reg_name)s )" % locals() + +# port = *DIGIT +port = r"(?: %(DIGIT)s )*" % locals() + +# authority = [ userinfo "@" ] host [ ":" port ] +authority = r"(?: %(userinfo)s @)? %(host)s (?: : %(port)s)?" % locals() + +# Path + +# segment = *pchar +segment = r"%(pchar)s*" % locals() + +# segment-nz = 1*pchar +segment_nz = r"%(pchar)s+" % locals() + +# segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) +# ; non-zero-length segment without any colon ":" +segment_nz_nc = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s | @ )+" % locals() + +# path-abempty = *( "/" segment ) +path_abempty = r"(?: / %(segment)s )*" % locals() + +# path-absolute = "/" [ segment-nz *( "/" segment ) ] +path_absolute = r"/ (?: %(segment_nz)s (?: / %(segment)s )* )?" % locals() + +# path-noscheme = segment-nz-nc *( "/" segment ) +path_noscheme = r"%(segment_nz_nc)s (?: / %(segment)s )*" % locals() + +# path-rootless = segment-nz *( "/" segment ) +path_rootless = r"%(segment_nz)s (?: / %(segment)s )*" % locals() + +# path-empty = 0 +path_empty = r"" # FIXME + +# path = path-abempty ; begins with "/" or is empty +# / path-absolute ; begins with "/" but not "//" +# / path-noscheme ; begins with a non-colon segment +# / path-rootless ; begins with a segment +# / path-empty ; zero characters +path = r"""(?: %(path_abempty)s | + %(path_absolute)s | + %(path_noscheme)s | + %(path_rootless)s | + %(path_empty)s + ) +""" % locals() + +### Query and Fragment + +# query = *( pchar / "/" / "?" ) +query = r"(?: %(pchar)s | / | \? )*" % locals() + +# fragment = *( pchar / "/" / "?" ) +fragment = r"(?: %(pchar)s | / | \? )*" % locals() + +# URIs + +# hier-part = "//" authority path-abempty +# / path-absolute +# / path-rootless +# / path-empty +hier_part = r"""(?: (?: // %(authority)s %(path_abempty)s ) | + %(path_absolute)s | + %(path_rootless)s | + %(path_empty)s + ) +""" % locals() + +# relative-part = "//" authority path-abempty +# / path-absolute +# / path-noscheme +# / path-empty +relative_part = r"""(?: (?: // %(authority)s %(path_abempty)s ) | + %(path_absolute)s | + %(path_noscheme)s | + %(path_empty)s + ) +""" % locals() + +# relative-ref = relative-part [ "?" query ] [ "#" fragment ] +relative_ref = r"%(relative_part)s (?: \? %(query)s)? (?: \# %(fragment)s)?" % locals( +) + +# URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +URI = r"^(?: %(scheme)s : %(hier_part)s (?: \? %(query)s )? (?: \# %(fragment)s )? )$" % locals( +) + +# URI-reference = URI / relative-ref +URI_reference = r"^(?: %(URI)s | %(relative_ref)s )$" % locals() + +# absolute-URI = scheme ":" hier-part [ "?" query ] +absolute_URI = r"^(?: %(scheme)s : %(hier_part)s (?: \? %(query)s )? )$" % locals( +) + + +def is_uri(uri): + return re.match(URI, uri, re.VERBOSE) + + +def is_uri_reference(uri): + return re.match(URI_reference, uri, re.VERBOSE) + + +def is_absolute_uri(uri): + return re.match(absolute_URI, uri, re.VERBOSE) diff --git a/lib/ordereddict.py b/lib/ordereddict.py index 7242b506..a5b896dd 100644 --- a/lib/ordereddict.py +++ b/lib/ordereddict.py @@ -70,9 +70,9 @@ class OrderedDict(dict, DictMixin): if not self: raise KeyError('dictionary is empty') if last: - key = reversed(self).next() + key = next(reversed(self)) else: - key = iter(self).next() + key = next(iter(self)) value = self.pop(key) return key, value @@ -101,7 +101,7 @@ class OrderedDict(dict, DictMixin): def __repr__(self): if not self: return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, self.items()) + return '%s(%r)' % (self.__class__.__name__, list(self.items())) def copy(self): return self.__class__(self) @@ -117,7 +117,7 @@ class OrderedDict(dict, DictMixin): if isinstance(other, OrderedDict): if len(self) != len(other): return False - for p, q in zip(self.items(), other.items()): + for p, q in zip(list(self.items()), list(other.items())): if p != q: return False return True diff --git a/lib/osxnotify/registerapp.py b/lib/osxnotify/registerapp.py index 51197d73..ed9d4b5b 100644 --- a/lib/osxnotify/registerapp.py +++ b/lib/osxnotify/registerapp.py @@ -129,5 +129,5 @@ if __name__ == '__main__': return app_path, 'App registered' - except Exception, e: + except Exception as e: return None, 'Error creating App %s. %s' % (app_path, e) \ No newline at end of file diff --git a/lib/pkg_resources.py b/lib/pkg_resources/__init__.py similarity index 64% rename from lib/pkg_resources.py rename to lib/pkg_resources/__init__.py index 511068a6..f98516d1 100644 --- a/lib/pkg_resources.py +++ b/lib/pkg_resources/__init__.py @@ -19,36 +19,38 @@ import os import io import time import re -import imp +import types import zipfile import zipimport import warnings import stat import functools import pkgutil -import token -import symbol import operator import platform import collections import plistlib import email.parser +import errno import tempfile +import textwrap +import itertools +import inspect +import ntpath +import posixpath +import importlib from pkgutil import get_importer -PY3 = sys.version_info > (3,) -PY2 = not PY3 +try: + import _imp +except ImportError: + # Python 3.2 compatibility + import imp as _imp -if PY3: - from urllib.parse import urlparse, urlunparse - -if PY2: - from urlparse import urlparse, urlunparse - -if PY3: - string_types = str, -else: - string_types = str, eval('unicode') +try: + FileExistsError +except NameError: + FileExistsError = OSError # capture these to bypass sandboxing from os import utime @@ -62,50 +64,103 @@ except ImportError: from os import open as os_open from os.path import isdir, split -# Avoid try/except due to potential problems with delayed import mechanisms. -if sys.version_info >= (3, 3) and sys.implementation.name == "cpython": - import importlib._bootstrap as importlib_bootstrap -else: - importlib_bootstrap = None - try: - import parser + import importlib.machinery as importlib_machinery + # access attribute to force import under delayed import mechanisms. + importlib_machinery.__name__ except ImportError: - pass + importlib_machinery = None + +from pkg_resources.extern import appdirs +from pkg_resources.extern import packaging +__import__('pkg_resources.extern.packaging.version') +__import__('pkg_resources.extern.packaging.specifiers') +__import__('pkg_resources.extern.packaging.requirements') +__import__('pkg_resources.extern.packaging.markers') + +if sys.version_info < (3, 5): + raise RuntimeError("Python 3.5 or later is required") + +# declare some globals that will be defined later to +# satisfy the linters. +require = None +working_set = None +add_activation_listener = None +resources_stream = None +cleanup_resources = None +resource_dir = None +resource_stream = None +set_extraction_path = None +resource_isdir = None +resource_string = None +iter_entry_points = None +resource_listdir = None +resource_filename = None +resource_exists = None +_distribution_finders = None +_namespace_handlers = None +_namespace_packages = None + + +class PEP440Warning(RuntimeWarning): + """ + Used when there is an issue with a version or specifier not complying with + PEP 440. + """ + + +def parse_version(v): + try: + return packaging.version.Version(v) + except packaging.version.InvalidVersion: + warnings.warn( + f"{v} is an invalid version and will not be supported in " + "a future release", + PkgResourcesDeprecationWarning, + ) + return packaging.version.LegacyVersion(v) _state_vars = {} + def _declare_state(vartype, **kw): globals().update(kw) _state_vars.update(dict.fromkeys(kw, vartype)) + def __getstate__(): state = {} g = globals() for k, v in _state_vars.items(): - state[k] = g['_sget_'+v](g[k]) + state[k] = g['_sget_' + v](g[k]) return state + def __setstate__(state): g = globals() for k, v in state.items(): - g['_sset_'+_state_vars[k]](k, g[k], v) + g['_sset_' + _state_vars[k]](k, g[k], v) return state + def _sget_dict(val): return val.copy() + def _sset_dict(key, ob, state): ob.clear() ob.update(state) + def _sget_object(val): return val.__getstate__() + def _sset_object(key, ob, state): ob.__setstate__(state) + _sget_none = _sset_none = lambda *args: None @@ -113,10 +168,10 @@ def get_supported_platform(): """Return this platform's maximum compatible version. distutils.util.get_platform() normally reports the minimum version - of Mac OS X that would be required to *use* extensions produced by + of macOS that would be required to *use* extensions produced by distutils. But what we want when checking compatibility is to know the - version of Mac OS X that we are *running*. To allow usage of packages that - explicitly require a newer version of Mac OS X, we must also know the + version of macOS that we are *running*. To allow usage of packages that + explicitly require a newer version of macOS, we must also know the current version of the OS. If this condition occurs for any other platform with a version in its @@ -126,15 +181,16 @@ def get_supported_platform(): m = macosVersionString.match(plat) if m is not None and sys.platform == "darwin": try: - plat = 'macosx-%s-%s' % ('.'.join(_macosx_vers()[:2]), m.group(3)) + plat = 'macosx-%s-%s' % ('.'.join(_macos_vers()[:2]), m.group(3)) except ValueError: - # not Mac OS X + # not macOS pass return plat + __all__ = [ # Basic resource access and distribution/entry point discovery - 'require', 'run_script', 'get_provider', 'get_distribution', + 'require', 'run_script', 'get_provider', 'get_distribution', 'load_entry_point', 'get_entry_map', 'get_entry_info', 'iter_entry_points', 'resource_string', 'resource_stream', 'resource_filename', @@ -153,6 +209,9 @@ __all__ = [ 'ResolutionError', 'VersionConflict', 'DistributionNotFound', 'UnknownExtra', 'ExtractionError', + # Warnings + 'PEP440Warning', + # Parsing functions and string utilities 'parse_requirements', 'parse_version', 'safe_name', 'safe_version', 'get_platform', 'compatible_platforms', 'yield_lines', 'split_sections', @@ -171,32 +230,107 @@ __all__ = [ 'register_finder', 'register_namespace_handler', 'register_loader_type', 'fixup_namespace_packages', 'get_importer', + # Warnings + 'PkgResourcesDeprecationWarning', + # Deprecated/backward compatibility only 'run_main', 'AvailableDistributions', ] + class ResolutionError(Exception): """Abstract base for dependency resolution errors""" + def __repr__(self): - return self.__class__.__name__+repr(self.args) + return self.__class__.__name__ + repr(self.args) + class VersionConflict(ResolutionError): - """An already-installed version conflicts with the requested version""" + """ + An already-installed version conflicts with the requested version. + + Should be initialized with the installed Distribution and the requested + Requirement. + """ + + _template = "{self.dist} is installed but {self.req} is required" + + @property + def dist(self): + return self.args[0] + + @property + def req(self): + return self.args[1] + + def report(self): + return self._template.format(**locals()) + + def with_context(self, required_by): + """ + If required_by is non-empty, return a version of self that is a + ContextualVersionConflict. + """ + if not required_by: + return self + args = self.args + (required_by,) + return ContextualVersionConflict(*args) + + +class ContextualVersionConflict(VersionConflict): + """ + A VersionConflict that accepts a third parameter, the set of the + requirements that required the installed Distribution. + """ + + _template = VersionConflict._template + ' by {self.required_by}' + + @property + def required_by(self): + return self.args[2] + class DistributionNotFound(ResolutionError): """A requested distribution was not found""" + _template = ("The '{self.req}' distribution was not found " + "and is required by {self.requirers_str}") + + @property + def req(self): + return self.args[0] + + @property + def requirers(self): + return self.args[1] + + @property + def requirers_str(self): + if not self.requirers: + return 'the application' + return ', '.join(self.requirers) + + def report(self): + return self._template.format(**locals()) + + def __str__(self): + return self.report() + + class UnknownExtra(ResolutionError): """Distribution doesn't have an "extra feature" of the given name""" + + _provider_factories = {} -PY_MAJOR = sys.version[:3] +PY_MAJOR = '{}.{}'.format(*sys.version_info) EGG_DIST = 3 BINARY_DIST = 2 SOURCE_DIST = 1 CHECKOUT_DIST = 0 DEVELOP_DIST = -1 + def register_loader_type(loader_type, provider_factory): """Register `provider_factory` to make providers for `loader_type` @@ -206,6 +340,7 @@ def register_loader_type(loader_type, provider_factory): """ _provider_factories[loader_type] = provider_factory + def get_provider(moduleOrReq): """Return an IResourceProvider for the named module or requirement""" if isinstance(moduleOrReq, Requirement): @@ -218,7 +353,8 @@ def get_provider(moduleOrReq): loader = getattr(module, '__loader__', None) return _find_adapter(_provider_factories, loader)(module) -def _macosx_vers(_cache=[]): + +def _macos_vers(_cache=[]): if not _cache: version = platform.mac_ver()[0] # fallback for MacPorts @@ -233,34 +369,35 @@ def _macosx_vers(_cache=[]): _cache.append(version.split('.')) return _cache[0] -def _macosx_arch(machine): + +def _macos_arch(machine): return {'PowerPC': 'ppc', 'Power_Macintosh': 'ppc'}.get(machine, machine) + def get_build_platform(): """Return this platform's string for platform-specific distributions XXX Currently this is the same as ``distutils.util.get_platform()``, but it - needs some hacks for Linux and Mac OS X. + needs some hacks for Linux and macOS. """ - try: - # Python 2.7 or >=3.2 - from sysconfig import get_platform - except ImportError: - from distutils.util import get_platform + from sysconfig import get_platform plat = get_platform() if sys.platform == "darwin" and not plat.startswith('macosx-'): try: - version = _macosx_vers() + version = _macos_vers() machine = os.uname()[4].replace(" ", "_") - return "macosx-%d.%d-%s" % (int(version[0]), int(version[1]), - _macosx_arch(machine)) + return "macosx-%d.%d-%s" % ( + int(version[0]), int(version[1]), + _macos_arch(machine), + ) except ValueError: # if someone is running a non-Mac darwin system, this will fall # through to the default implementation pass return plat + macosVersionString = re.compile(r"macosx-(\d+)\.(\d+)-(.*)") darwinVersionString = re.compile(r"darwin-(\d+)\.(\d+)\.(\d+)-(.*)") # XXX backward compat @@ -274,11 +411,11 @@ def compatible_platforms(provided, required): XXX Needs compatibility checks for Linux and other unixy OSes. """ - if provided is None or required is None or provided==required: + if provided is None or required is None or provided == required: # easy case return True - # Mac OS X special cases + # macOS special cases reqMac = macosVersionString.match(required) if reqMac: provMac = macosVersionString.match(provided) @@ -287,7 +424,7 @@ def compatible_platforms(provided, required): if not provMac: # this is backwards compatibility for packages built before # setuptools 0.6. All packages built after this point will - # use the new macosx designation. + # use the new macOS designation. provDarwin = darwinVersionString.match(provided) if provDarwin: dversion = int(provDarwin.group(1)) @@ -295,7 +432,7 @@ def compatible_platforms(provided, required): if dversion == 7 and macosversion >= "10.3" or \ dversion == 8 and macosversion >= "10.4": return True - # egg isn't macosx or legacy darwin + # egg isn't macOS or legacy darwin return False # are they the same major version and machine type? @@ -321,12 +458,14 @@ def run_script(dist_spec, script_name): ns['__name__'] = name require(dist_spec)[0].run_script(script_name, ns) + # backward compatibility run_main = run_script + def get_distribution(dist): """Return a current distribution object for a Requirement or string""" - if isinstance(dist, string_types): + if isinstance(dist, str): dist = Requirement.parse(dist) if isinstance(dist, Requirement): dist = get_provider(dist) @@ -334,21 +473,23 @@ def get_distribution(dist): raise TypeError("Expected string, Requirement, or Distribution", dist) return dist + def load_entry_point(dist, group, name): """Return `name` entry point of `group` for `dist` or raise ImportError""" return get_distribution(dist).load_entry_point(group, name) + def get_entry_map(dist, group=None): """Return the entry point map for `group`, or the full entry map""" return get_distribution(dist).get_entry_map(group) + def get_entry_info(dist, group, name): """Return the EntryPoint object for `group`+`name`, or ``None``""" return get_distribution(dist).get_entry_info(group, name) class IMetadataProvider: - def has_metadata(name): """Does the package's distribution contain the named metadata?""" @@ -399,7 +540,7 @@ class IResourceProvider(IMetadataProvider): """List of resource names in the directory (like ``os.listdir()``)""" -class WorkingSet(object): +class WorkingSet: """A collection of active distributions on sys.path (or a similar list)""" def __init__(self, entries=None): @@ -490,8 +631,7 @@ class WorkingSet(object): if dist is not None and dist not in req: # XXX add more info raise VersionConflict(dist, req) - else: - return dist + return dist def iter_entry_points(self, group, name=None): """Yield entry point objects from `group` matching `name` @@ -500,13 +640,12 @@ class WorkingSet(object): distributions in the working set, otherwise only ones matching both `group` and `name` are yielded (in distribution order). """ - for dist in self: - entries = dist.get_entry_map(group) - if name is None: - for ep in entries.values(): - yield ep - elif name in entries: - yield entries[name] + return ( + entry + for dist in self + for entry in dist.get_entry_map(group).values() + if name is None or name == entry.name + ) def run_script(self, requires, script_name): """Locate distribution for `requires` and run `script_name` script""" @@ -530,7 +669,7 @@ class WorkingSet(object): for key in self.entry_keys[item]: if key not in seen: - seen[key]=1 + seen[key] = 1 yield self.by_key[key] def add(self, dist, entry=None, insert=True, replace=False): @@ -546,12 +685,12 @@ class WorkingSet(object): will be called. """ if insert: - dist.insert_on(self.entries, entry) + dist.insert_on(self.entries, entry, replace=replace) if entry is None: entry = dist.location - keys = self.entry_keys.setdefault(entry,[]) - keys2 = self.entry_keys.setdefault(dist.location,[]) + keys = self.entry_keys.setdefault(entry, []) + keys2 = self.entry_keys.setdefault(dist.location, []) if not replace and dist.key in self.by_key: # ignore hidden distros return @@ -563,8 +702,9 @@ class WorkingSet(object): keys2.append(dist.key) self._added_new(dist) - def resolve(self, requirements, env=None, installer=None, - replace_conflicting=False): + # FIXME: 'WorkingSet.resolve' is too complex (11) + def resolve(self, requirements, env=None, installer=None, # noqa: C901 + replace_conflicting=False, extras=None): """List all distributions needed to (recursively) meet `requirements` `requirements` must be a sequence of ``Requirement`` objects. `env`, @@ -575,11 +715,18 @@ class WorkingSet(object): already-installed distribution; it should return a ``Distribution`` or ``None``. - Unless `replace_conflicting=True`, raises a VersionConflict exception if + Unless `replace_conflicting=True`, raises a VersionConflict exception + if any requirements are found on the path that have the correct name but the wrong version. Otherwise, if an `installer` is supplied it will be invoked to obtain the correct version of the requirement and activate it. + + `extras` is a list of the extras to be used with these requirements. + This is important because extra requirements may look like `my_req; + extra = "my_extra"`, which would otherwise be interpreted as a purely + optional requirement. Instead, we want to be able to assert that these + requirements are truly required. """ # set up the stack @@ -590,6 +737,8 @@ class WorkingSet(object): best = {} to_activate = [] + req_extras = _ReqExtras() + # Mapping of requirement to set of distributions that required it; # useful for reporting info about conflicts. required_by = collections.defaultdict(set) @@ -600,6 +749,10 @@ class WorkingSet(object): if req in processed: # Ignore cyclic or redundant dependencies continue + + if not req_extras.markers_pass(req, extras): + continue + dist = best.get(req.key) if dist is None: # Find the best distribution and add it to the map @@ -615,21 +768,18 @@ class WorkingSet(object): # distribution env = Environment([]) ws = WorkingSet([]) - dist = best[req.key] = env.best_match(req, ws, installer) + dist = best[req.key] = env.best_match( + req, ws, installer, + replace_conflicting=replace_conflicting + ) if dist is None: - #msg = ("The '%s' distribution was not found on this " - # "system, and is required by this application.") - #raise DistributionNotFound(msg % req) - - # unfortunately, zc.buildout uses a str(err) - # to get the name of the distribution here.. - raise DistributionNotFound(req) + requirers = required_by.get(req, None) + raise DistributionNotFound(req, requirers) to_activate.append(dist) if dist not in req: # Oops, the "best" so far conflicts with a dependency - tmpl = "%s is installed but %s is required by %s" - args = dist, req, list(required_by.get(req, [])) - raise VersionConflict(tmpl % args) + dependent_req = required_by[req] + raise VersionConflict(dist, req).with_context(dependent_req) # push the new requirements onto the stack new_requirements = dist.requires(req.extras)[::-1] @@ -638,14 +788,15 @@ class WorkingSet(object): # Register the new requirements needed by req for new_requirement in new_requirements: required_by[new_requirement].add(req.project_name) + req_extras[new_requirement] = req.extras processed[req] = True # return list of distros to activate return to_activate - def find_plugins(self, plugin_env, full_env=None, installer=None, - fallback=True): + def find_plugins( + self, plugin_env, full_env=None, installer=None, fallback=True): """Find all activatable distributions in `plugin_env` Example usage:: @@ -706,8 +857,7 @@ class WorkingSet(object): try: resolvees = shadow_set.resolve(req, env, installer) - except ResolutionError: - v = sys.exc_info()[1] + except ResolutionError as v: # save error info error_info[dist] = v if fallback: @@ -745,11 +895,17 @@ class WorkingSet(object): return needed - def subscribe(self, callback): - """Invoke `callback` for all distributions (including existing ones)""" + def subscribe(self, callback, existing=True): + """Invoke `callback` for all distributions + + If `existing=True` (default), + call on all existing ones, as well. + """ if callback in self.callbacks: return self.callbacks.append(callback) + if not existing: + return for dist in self: callback(dist) @@ -771,10 +927,31 @@ class WorkingSet(object): self.callbacks = callbacks[:] -class Environment(object): +class _ReqExtras(dict): + """ + Map each requirement to the extras that demanded it. + """ + + def markers_pass(self, req, extras=None): + """ + Evaluate markers for req against each extra that + demanded it. + + Return False if the req has a marker and fails + evaluation. Otherwise, return True. + """ + extra_evals = ( + req.marker.evaluate({'extra': extra}) + for extra in self.get(req, ()) + (extras or (None,)) + ) + return not req.marker or any(extra_evals) + + +class Environment: """Searchable snapshot of distributions on a search path""" - def __init__(self, search_path=None, platform=get_supported_platform(), + def __init__( + self, search_path=None, platform=get_supported_platform(), python=PY_MAJOR): """Snapshot distributions available on a search path @@ -785,7 +962,7 @@ class Environment(object): `platform` is an optional string specifying the name of the platform that platform-specific distributions must be compatible with. If unspecified, it defaults to the current platform. `python` is an - optional string naming the desired version of Python (e.g. ``'3.3'``); + optional string naming the desired version of Python (e.g. ``'3.6'``); it defaults to the current version. You may explicitly set `platform` (and/or `python`) to ``None`` if you @@ -804,9 +981,12 @@ class Environment(object): requirements specified when this environment was created, or False is returned. """ - return (self.python is None or dist.py_version is None - or dist.py_version==self.python) \ - and compatible_platforms(dist.platform, self.platform) + py_compat = ( + self.python is None + or dist.py_version is None + or dist.py_version == self.python + ) + return py_compat and compatible_platforms(dist.platform, self.platform) def remove(self, dist): """Remove `dist` from the environment""" @@ -847,7 +1027,8 @@ class Environment(object): dists.append(dist) dists.sort(key=operator.attrgetter('hashcmp'), reverse=True) - def best_match(self, req, working_set, installer=None): + def best_match( + self, req, working_set, installer=None, replace_conflicting=False): """Find distribution best matching `req` and usable on `working_set` This calls the ``find(req)`` method of the `working_set` to see if a @@ -860,7 +1041,12 @@ class Environment(object): calling the environment's ``obtain(req, installer)`` method will be returned. """ - dist = working_set.find(req) + try: + dist = working_set.find(req) + except VersionConflict: + if not replace_conflicting: + raise + dist = None if dist is not None: return dist for dist in self[req.key]: @@ -974,22 +1160,23 @@ class ResourceManager: old_exc = sys.exc_info()[1] cache_path = self.extraction_path or get_default_cache() - err = ExtractionError("""Can't extract file(s) to egg cache + tmpl = textwrap.dedent(""" + Can't extract file(s) to egg cache -The following error occurred while trying to extract file(s) to the Python egg -cache: + The following error occurred while trying to extract file(s) + to the Python egg cache: - %s + {old_exc} -The Python egg cache directory is currently set to: + The Python egg cache directory is currently set to: - %s + {cache_path} -Perhaps your account does not have write access to this directory? You can -change the cache directory by setting the PYTHON_EGG_CACHE environment -variable to point to an accessible directory. -""" % (old_exc, cache_path) - ) + Perhaps your account does not have write access to this directory? + You can change the cache directory by setting the PYTHON_EGG_CACHE + environment variable to point to an accessible directory. + """).lstrip() + err = ExtractionError(tmpl.format(**locals())) err.manager = self err.cache_path = cache_path err.original_error = old_exc @@ -1009,10 +1196,10 @@ variable to point to an accessible directory. extract, as it tracks the generated names for possible cleanup later. """ extract_path = self.extraction_path or get_default_cache() - target_path = os.path.join(extract_path, archive_name+'-tmp', *names) + target_path = os.path.join(extract_path, archive_name + '-tmp', *names) try: _bypass_ensure_directory(target_path) - except: + except Exception: self.extraction_error() self._warn_unsafe_extraction_path(extract_path) @@ -1037,11 +1224,14 @@ variable to point to an accessible directory. return mode = os.stat(path).st_mode if mode & stat.S_IWOTH or mode & stat.S_IWGRP: - msg = ("%s is writable by group/others and vulnerable to attack " - "when " - "used with get_resource_filename. Consider a more secure " + msg = ( + "Extraction path is writable by group/others " + "and vulnerable to attack when " + "used with get_resource_filename ({path}). " + "Consider a more secure " "location (set with .set_extraction_path or the " - "PYTHON_EGG_CACHE environment variable)." % path) + "PYTHON_EGG_CACHE environment variable)." + ).format(**locals()) warnings.warn(msg, UserWarning) def postprocess(self, tempname, filename): @@ -1103,49 +1293,18 @@ variable to point to an accessible directory. """ # XXX + def get_default_cache(): - """Determine the default cache location - - This returns the ``PYTHON_EGG_CACHE`` environment variable, if set. - Otherwise, on Windows, it returns a "Python-Eggs" subdirectory of the - "Application Data" directory. On all other systems, it's "~/.python-eggs". """ - try: - return os.environ['PYTHON_EGG_CACHE'] - except KeyError: - pass + Return the ``PYTHON_EGG_CACHE`` environment variable + or a platform-relevant user cache dir for an app + named "Python-Eggs". + """ + return ( + os.environ.get('PYTHON_EGG_CACHE') + or appdirs.user_cache_dir(appname='Python-Eggs') + ) - if os.name!='nt': - return os.path.expanduser('~/.python-eggs') - - # XXX this may be locale-specific! - app_data = 'Application Data' - app_homes = [ - # best option, should be locale-safe - (('APPDATA',), None), - (('USERPROFILE',), app_data), - (('HOMEDRIVE','HOMEPATH'), app_data), - (('HOMEPATH',), app_data), - (('HOME',), None), - # 95/98/ME - (('WINDIR',), app_data), - ] - - for keys, subdir in app_homes: - dirname = '' - for key in keys: - if key in os.environ: - dirname = os.path.join(dirname, os.environ[key]) - else: - break - else: - if subdir: - dirname = os.path.join(dirname, subdir) - return os.path.join(dirname, 'Python-Eggs') - else: - raise RuntimeError( - "Please set the PYTHON_EGG_CACHE enviroment variable" - ) def safe_name(name): """Convert an arbitrary string to a standard distribution name @@ -1156,13 +1315,15 @@ def safe_name(name): def safe_version(version): - """Convert an arbitrary string to a standard version string - - Spaces become dots, and all other non-alphanumeric characters become - dashes, with runs of multiple dashes condensed to a single dash. """ - version = version.replace(' ','.') - return re.sub('[^A-Za-z0-9.]+', '-', version) + Convert an arbitrary string to a standard version string + """ + try: + # normalize the version + return str(packaging.version.Version(version)) + except packaging.version.InvalidVersion: + version = version.replace(' ', '.') + return re.sub('[^A-Za-z0-9.]+', '-', version) def safe_extra(extra): @@ -1171,7 +1332,7 @@ def safe_extra(extra): Any runs of non-alphanumeric characters are replaced with a single '_', and the result is always lowercased. """ - return re.sub('[^A-Za-z0-9.]+', '_', extra).lower() + return re.sub('[^A-Za-z0-9.-]+', '_', extra).lower() def to_filename(name): @@ -1179,194 +1340,37 @@ def to_filename(name): Any '-' characters are currently replaced with '_'. """ - return name.replace('-','_') + return name.replace('-', '_') -class MarkerEvaluation(object): - values = { - 'os_name': lambda: os.name, - 'sys_platform': lambda: sys.platform, - 'python_full_version': platform.python_version, - 'python_version': lambda: platform.python_version()[:3], - 'platform_version': platform.version, - 'platform_machine': platform.machine, - 'python_implementation': platform.python_implementation, - } +def invalid_marker(text): + """ + Validate text as a PEP 508 environment marker; return an exception + if invalid or False otherwise. + """ + try: + evaluate_marker(text) + except SyntaxError as e: + e.filename = None + e.lineno = None + return e + return False - @classmethod - def is_invalid_marker(cls, text): - """ - Validate text as a PEP 426 environment marker; return an exception - if invalid or False otherwise. - """ - try: - cls.evaluate_marker(text) - except SyntaxError: - return cls.normalize_exception(sys.exc_info()[1]) - return False - @staticmethod - def normalize_exception(exc): - """ - Given a SyntaxError from a marker evaluation, normalize the error - message: - - Remove indications of filename and line number. - - Replace platform-specific error messages with standard error - messages. - """ - subs = { - 'unexpected EOF while parsing': 'invalid syntax', - 'parenthesis is never closed': 'invalid syntax', - } - exc.filename = None - exc.lineno = None - exc.msg = subs.get(exc.msg, exc.msg) - return exc +def evaluate_marker(text, extra=None): + """ + Evaluate a PEP 508 environment marker. + Return a boolean indicating the marker result in this environment. + Raise SyntaxError if marker is invalid. - @classmethod - def and_test(cls, nodelist): - # MUST NOT short-circuit evaluation, or invalid syntax can be skipped! - items = [ - cls.interpret(nodelist[i]) - for i in range(1, len(nodelist), 2) - ] - return functools.reduce(operator.and_, items) + This implementation uses the 'pyparsing' module. + """ + try: + marker = packaging.markers.Marker(text) + return marker.evaluate() + except packaging.markers.InvalidMarker as e: + raise SyntaxError(e) from e - @classmethod - def test(cls, nodelist): - # MUST NOT short-circuit evaluation, or invalid syntax can be skipped! - items = [ - cls.interpret(nodelist[i]) - for i in range(1, len(nodelist), 2) - ] - return functools.reduce(operator.or_, items) - - @classmethod - def atom(cls, nodelist): - t = nodelist[1][0] - if t == token.LPAR: - if nodelist[2][0] == token.RPAR: - raise SyntaxError("Empty parentheses") - return cls.interpret(nodelist[2]) - msg = "Language feature not supported in environment markers" - raise SyntaxError(msg) - - @classmethod - def comparison(cls, nodelist): - if len(nodelist) > 4: - msg = "Chained comparison not allowed in environment markers" - raise SyntaxError(msg) - comp = nodelist[2][1] - cop = comp[1] - if comp[0] == token.NAME: - if len(nodelist[2]) == 3: - if cop == 'not': - cop = 'not in' - else: - cop = 'is not' - try: - cop = cls.get_op(cop) - except KeyError: - msg = repr(cop) + " operator not allowed in environment markers" - raise SyntaxError(msg) - return cop(cls.evaluate(nodelist[1]), cls.evaluate(nodelist[3])) - - @classmethod - def get_op(cls, op): - ops = { - symbol.test: cls.test, - symbol.and_test: cls.and_test, - symbol.atom: cls.atom, - symbol.comparison: cls.comparison, - 'not in': lambda x, y: x not in y, - 'in': lambda x, y: x in y, - '==': operator.eq, - '!=': operator.ne, - } - if hasattr(symbol, 'or_test'): - ops[symbol.or_test] = cls.test - return ops[op] - - @classmethod - def evaluate_marker(cls, text, extra=None): - """ - Evaluate a PEP 426 environment marker on CPython 2.4+. - Return a boolean indicating the marker result in this environment. - Raise SyntaxError if marker is invalid. - - This implementation uses the 'parser' module, which is not implemented - on - Jython and has been superseded by the 'ast' module in Python 2.6 and - later. - """ - return cls.interpret(parser.expr(text).totuple(1)[1]) - - @classmethod - def _markerlib_evaluate(cls, text): - """ - Evaluate a PEP 426 environment marker using markerlib. - Return a boolean indicating the marker result in this environment. - Raise SyntaxError if marker is invalid. - """ - import _markerlib - # markerlib implements Metadata 1.2 (PEP 345) environment markers. - # Translate the variables to Metadata 2.0 (PEP 426). - env = _markerlib.default_environment() - for key in env.keys(): - new_key = key.replace('.', '_') - env[new_key] = env.pop(key) - try: - result = _markerlib.interpret(text, env) - except NameError: - e = sys.exc_info()[1] - raise SyntaxError(e.args[0]) - return result - - if 'parser' not in globals(): - # Fall back to less-complete _markerlib implementation if 'parser' module - # is not available. - evaluate_marker = _markerlib_evaluate - - @classmethod - def interpret(cls, nodelist): - while len(nodelist)==2: nodelist = nodelist[1] - try: - op = cls.get_op(nodelist[0]) - except KeyError: - raise SyntaxError("Comparison or logical expression expected") - return op(nodelist) - - @classmethod - def evaluate(cls, nodelist): - while len(nodelist)==2: nodelist = nodelist[1] - kind = nodelist[0] - name = nodelist[1] - if kind==token.NAME: - try: - op = cls.values[name] - except KeyError: - raise SyntaxError("Unknown name %r" % name) - return op() - if kind==token.STRING: - s = nodelist[1] - if not cls._safe_string(s): - raise SyntaxError( - "Only plain strings allowed in environment markers") - return s[1:-1] - msg = "Language feature not supported in environment markers" - raise SyntaxError(msg) - - @staticmethod - def _safe_string(cand): - return ( - cand[:1] in "'\"" and - not cand.startswith('"""') and - not cand.startswith("'''") and - '\\' not in cand - ) - -invalid_marker = MarkerEvaluation.is_invalid_marker -evaluate_marker = MarkerEvaluation.evaluate_marker class NullProvider: """Try to implement resources and metadata for arbitrary PEP 302 loaders""" @@ -1391,19 +1395,28 @@ class NullProvider: def has_resource(self, resource_name): return self._has(self._fn(self.module_path, resource_name)) - def has_metadata(self, name): - return self.egg_info and self._has(self._fn(self.egg_info, name)) + def _get_metadata_path(self, name): + return self._fn(self.egg_info, name) - if sys.version_info <= (3,): - def get_metadata(self, name): - if not self.egg_info: - return "" - return self._get(self._fn(self.egg_info, name)) - else: - def get_metadata(self, name): - if not self.egg_info: - return "" - return self._get(self._fn(self.egg_info, name)).decode("utf-8") + def has_metadata(self, name): + if not self.egg_info: + return self.egg_info + + path = self._get_metadata_path(name) + return self._has(path) + + def get_metadata(self, name): + if not self.egg_info: + return "" + path = self._get_metadata_path(name) + value = self._get(path) + try: + return value.decode('utf-8') + except UnicodeDecodeError as exc: + # Include the path in the error message to simplify + # troubleshooting, and without changing the exception type. + exc.reason += ' in {} file at path: {}'.format(name, path) + raise def get_metadata_lines(self, name): return yield_lines(self.get_metadata(name)) @@ -1423,15 +1436,19 @@ class NullProvider: return [] def run_script(self, script_name, namespace): - script = 'scripts/'+script_name + script = 'scripts/' + script_name if not self.has_metadata(script): - raise ResolutionError("No script named %r" % script_name) + raise ResolutionError( + "Script {script!r} not found in metadata at {self.egg_info!r}" + .format(**locals()), + ) script_text = self.get_metadata(script).replace('\r\n', '\n') script_text = script_text.replace('\r', '\n') script_filename = self._fn(self.egg_info, script) namespace['__file__'] = script_filename if os.path.exists(script_filename): - source = open(script_filename).read() + with open(script_filename) as fid: + source = fid.read() code = compile(source, script_filename, 'exec') exec(code, namespace, namespace) else: @@ -1439,7 +1456,7 @@ class NullProvider: cache[script_filename] = ( len(script_text), 0, script_text.split('\n'), script_filename ) - script_code = compile(script_text, script_filename,'exec') + script_code = compile(script_text, script_filename, 'exec') exec(script_code, namespace, namespace) def _has(self, path): @@ -1458,10 +1475,86 @@ class NullProvider: ) def _fn(self, base, resource_name): + self._validate_resource_path(resource_name) if resource_name: return os.path.join(base, *resource_name.split('/')) return base + @staticmethod + def _validate_resource_path(path): + """ + Validate the resource paths according to the docs. + https://setuptools.pypa.io/en/latest/pkg_resources.html#basic-resource-access + + >>> warned = getfixture('recwarn') + >>> warnings.simplefilter('always') + >>> vrp = NullProvider._validate_resource_path + >>> vrp('foo/bar.txt') + >>> bool(warned) + False + >>> vrp('../foo/bar.txt') + >>> bool(warned) + True + >>> warned.clear() + >>> vrp('/foo/bar.txt') + >>> bool(warned) + True + >>> vrp('foo/../../bar.txt') + >>> bool(warned) + True + >>> warned.clear() + >>> vrp('foo/f../bar.txt') + >>> bool(warned) + False + + Windows path separators are straight-up disallowed. + >>> vrp(r'\\foo/bar.txt') + Traceback (most recent call last): + ... + ValueError: Use of .. or absolute path in a resource path \ +is not allowed. + + >>> vrp(r'C:\\foo/bar.txt') + Traceback (most recent call last): + ... + ValueError: Use of .. or absolute path in a resource path \ +is not allowed. + + Blank values are allowed + + >>> vrp('') + >>> bool(warned) + False + + Non-string values are not. + + >>> vrp(None) + Traceback (most recent call last): + ... + AttributeError: ... + """ + invalid = ( + os.path.pardir in path.split(posixpath.sep) or + posixpath.isabs(path) or + ntpath.isabs(path) + ) + if not invalid: + return + + msg = "Use of .. or absolute path in a resource path is not allowed." + + # Aggressively disallow Windows absolute paths + if ntpath.isabs(path) and not posixpath.isabs(path): + raise ValueError(msg) + + # for compatibility, warn; in future + # raise ValueError(msg) + warnings.warn( + msg[:-1] + " and will raise exceptions in a future release.", + DeprecationWarning, + stacklevel=4, + ) + def _get(self, path): if hasattr(self.loader, 'get_data'): return self.loader.get_data(path) @@ -1469,9 +1562,21 @@ class NullProvider: "Can't perform this operation for loaders without 'get_data()'" ) + register_loader_type(object, NullProvider) +def _parents(path): + """ + yield all parents of path including path + """ + last = None + while path != last: + yield path + last = path + path, _ = os.path.split(path) + + class EggProvider(NullProvider): """Provider based on a virtual filesystem""" @@ -1480,18 +1585,17 @@ class EggProvider(NullProvider): self._setup_prefix() def _setup_prefix(self): - # we assume here that our metadata may be nested inside a "basket" - # of multiple eggs; that's why we use module_path instead of .archive - path = self.module_path - old = None - while path!=old: - if path.lower().endswith('.egg'): - self.egg_name = os.path.basename(path) - self.egg_info = os.path.join(path, 'EGG-INFO') - self.egg_root = path - break - old = path - path, base = os.path.split(path) + # Assume that metadata may be nested inside a "basket" + # of multiple eggs and use module_path instead of .archive. + eggs = filter(_is_egg_path, _parents(self.module_path)) + egg = next(eggs, None) + egg and self._set_egg(egg) + + def _set_egg(self, path): + self.egg_name = os.path.basename(path) + self.egg_info = os.path.join(path, 'EGG-INFO') + self.egg_root = path + class DefaultProvider(EggProvider): """Provides access to package resources in the filesystem""" @@ -1512,23 +1616,34 @@ class DefaultProvider(EggProvider): with open(path, 'rb') as stream: return stream.read() -register_loader_type(type(None), DefaultProvider) + @classmethod + def _register(cls): + loader_names = 'SourceFileLoader', 'SourcelessFileLoader', + for name in loader_names: + loader_cls = getattr(importlib_machinery, name, type(None)) + register_loader_type(loader_cls, cls) -if importlib_bootstrap is not None: - register_loader_type(importlib_bootstrap.SourceFileLoader, DefaultProvider) + +DefaultProvider._register() class EmptyProvider(NullProvider): """Provider that returns nothing for all requests""" - _isdir = _has = lambda self, path: False - _get = lambda self, path: '' - _listdir = lambda self, path: [] module_path = None + _isdir = _has = lambda self, path: False + + def _get(self, path): + return '' + + def _listdir(self, path): + return [] + def __init__(self): pass + empty_provider = EmptyProvider() @@ -1546,7 +1661,7 @@ class ZipManifests(dict): Use a platform-specific path separator (os.sep) for the path keys for compatibility with pypy on Windows. """ - with ContextualZipFile(path) as zfile: + with zipfile.ZipFile(path) as zfile: items = ( ( name.replace('/', os.sep), @@ -1579,26 +1694,6 @@ class MemoizedZipManifests(ZipManifests): return self[path].manifest -class ContextualZipFile(zipfile.ZipFile): - """ - Supplement ZipFile class to support context manager for Python 2.6 - """ - - def __enter__(self): - return self - - def __exit__(self, type, value, traceback): - self.close() - - def __new__(cls, *args, **kwargs): - """ - Construct a ZipFile or ContextualZipFile as appropriate - """ - if hasattr(zipfile.ZipFile, '__exit__'): - return zipfile.ZipFile(*args, **kwargs) - return super(ContextualZipFile, cls).__new__(cls) - - class ZipProvider(EggProvider): """Resource support for zips and eggs""" @@ -1607,11 +1702,14 @@ class ZipProvider(EggProvider): def __init__(self, module): EggProvider.__init__(self, module) - self.zip_pre = self.loader.archive+os.sep + self.zip_pre = self.loader.archive + os.sep def _zipinfo_name(self, fspath): # Convert a virtual filename (full path to file) into a zipfile subpath # usable with the zipimport directory cache for our target archive + fspath = fspath.rstrip(os.sep) + if fspath == self.loader.archive: + return '' if fspath.startswith(self.zip_pre): return fspath[len(self.zip_pre):] raise AssertionError( @@ -1621,9 +1719,9 @@ class ZipProvider(EggProvider): def _parts(self, zip_path): # Convert a zipfile subpath into an egg-relative path part list. # pseudo-fs path - fspath = self.zip_pre+zip_path - if fspath.startswith(self.egg_root+os.sep): - return fspath[len(self.egg_root)+1:].split(os.sep) + fspath = self.zip_pre + zip_path + if fspath.startswith(self.egg_root + os.sep): + return fspath[len(self.egg_root) + 1:].split(os.sep) raise AssertionError( "%s is not a subpath of %s" % (fspath, self.egg_root) ) @@ -1654,7 +1752,8 @@ class ZipProvider(EggProvider): timestamp = time.mktime(date_time) return timestamp, size - def _extract_resource(self, manager, zip_path): + # FIXME: 'ZipProvider._extract_resource' is too complex (12) + def _extract_resource(self, manager, zip_path): # noqa: C901 if zip_path in self._index(): for name in self._index()[zip_path]: @@ -1678,7 +1777,10 @@ class ZipProvider(EggProvider): if self._is_current(real_path, zip_path): return real_path - outf, tmpnam = _mkstemp(".$extract", dir=os.path.dirname(real_path)) + outf, tmpnam = _mkstemp( + ".$extract", + dir=os.path.dirname(real_path), + ) os.write(outf, self.loader.get_data(zip_path)) os.close(outf) utime(tmpnam, (timestamp, timestamp)) @@ -1694,7 +1796,7 @@ class ZipProvider(EggProvider): # so proceed. return real_path # Windows, del old file and retry - elif os.name=='nt': + elif os.name == 'nt': unlink(real_path) rename(tmpnam, real_path) return real_path @@ -1714,7 +1816,7 @@ class ZipProvider(EggProvider): if not os.path.isfile(file_path): return False stat = os.stat(file_path) - if stat.st_size!=size or stat.st_mtime!=timestamp: + if stat.st_size != size or stat.st_mtime != timestamp: return False # check that the contents match zip_contents = self.loader.get_data(zip_path) @@ -1764,6 +1866,7 @@ class ZipProvider(EggProvider): def _resource_to_zip(self, resource_name): return self._zipinfo_name(self._fn(self.module_path, resource_name)) + register_loader_type(zipimport.zipimporter, ZipProvider) @@ -1782,15 +1885,27 @@ class FileMetadata(EmptyProvider): def __init__(self, path): self.path = path + def _get_metadata_path(self, name): + return self.path + def has_metadata(self, name): - return name=='PKG-INFO' + return name == 'PKG-INFO' and os.path.isfile(self.path) def get_metadata(self, name): - if name=='PKG-INFO': - with open(self.path,'rU') as f: - metadata = f.read() - return metadata - raise KeyError("No metadata except PKG-INFO is available") + if name != 'PKG-INFO': + raise KeyError("No metadata except PKG-INFO is available") + + with io.open(self.path, encoding='utf-8', errors="replace") as f: + metadata = f.read() + self._warn_on_replacement(metadata) + return metadata + + def _warn_on_replacement(self, metadata): + replacement_char = '�' + if replacement_char in metadata: + tmpl = "{self.path} could not be properly decoded in UTF-8" + msg = tmpl.format(**locals()) + warnings.warn(msg) def get_metadata_lines(self, name): return yield_lines(self.get_metadata(name)) @@ -1827,7 +1942,7 @@ class EggMetadata(ZipProvider): def __init__(self, importer): """Create a metadata provider from a zipimporter""" - self.zip_pre = importer.archive+os.sep + self.zip_pre = importer.archive + os.sep self.loader = importer if importer.prefix: self.module_path = os.path.join(importer.archive, importer.prefix) @@ -1835,7 +1950,9 @@ class EggMetadata(ZipProvider): self.module_path = importer.archive self._setup_prefix() -_declare_state('dict', _distribution_finders = {}) + +_declare_state('dict', _distribution_finders={}) + def register_finder(importer_type, distribution_finder): """Register `distribution_finder` to find distributions in sys.path items @@ -1853,6 +1970,7 @@ def find_distributions(path_item, only=False): finder = _find_adapter(_distribution_finders, importer) return finder(importer, path_item, only) + def find_eggs_in_zip(importer, path_item, only=False): """ Find eggs in zip files; possibly multiple nested eggs. @@ -1867,63 +1985,194 @@ def find_eggs_in_zip(importer, path_item, only=False): if only: # don't yield nested distros return - for subitem in metadata.resource_listdir('/'): - if subitem.endswith('.egg'): + for subitem in metadata.resource_listdir(''): + if _is_egg_path(subitem): subpath = os.path.join(path_item, subitem) - for dist in find_eggs_in_zip(zipimport.zipimporter(subpath), subpath): + dists = find_eggs_in_zip(zipimport.zipimporter(subpath), subpath) + for dist in dists: yield dist + elif subitem.lower().endswith(('.dist-info', '.egg-info')): + subpath = os.path.join(path_item, subitem) + submeta = EggMetadata(zipimport.zipimporter(subpath)) + submeta.egg_info = subpath + yield Distribution.from_location(path_item, subitem, submeta) + register_finder(zipimport.zipimporter, find_eggs_in_zip) + def find_nothing(importer, path_item, only=False): return () + + register_finder(object, find_nothing) + +def _by_version_descending(names): + """ + Given a list of filenames, return them in descending order + by version number. + + >>> names = 'bar', 'foo', 'Python-2.7.10.egg', 'Python-2.7.2.egg' + >>> _by_version_descending(names) + ['Python-2.7.10.egg', 'Python-2.7.2.egg', 'bar', 'foo'] + >>> names = 'Setuptools-1.2.3b1.egg', 'Setuptools-1.2.3.egg' + >>> _by_version_descending(names) + ['Setuptools-1.2.3.egg', 'Setuptools-1.2.3b1.egg'] + >>> names = 'Setuptools-1.2.3b1.egg', 'Setuptools-1.2.3.post1.egg' + >>> _by_version_descending(names) + ['Setuptools-1.2.3.post1.egg', 'Setuptools-1.2.3b1.egg'] + """ + def try_parse(name): + """ + Attempt to parse as a version or return a null version. + """ + try: + return packaging.version.Version(name) + except Exception: + return packaging.version.Version('0') + + def _by_version(name): + """ + Parse each component of the filename + """ + name, ext = os.path.splitext(name) + parts = itertools.chain(name.split('-'), [ext]) + return [try_parse(part) for part in parts] + + return sorted(names, key=_by_version, reverse=True) + + def find_on_path(importer, path_item, only=False): """Yield distributions accessible on a sys.path directory""" path_item = _normalize_cached(path_item) - if os.path.isdir(path_item) and os.access(path_item, os.R_OK): - if path_item.lower().endswith('.egg'): - # unpacked egg - yield Distribution.from_filename( - path_item, metadata=PathMetadata( - path_item, os.path.join(path_item,'EGG-INFO') - ) + if _is_unpacked_egg(path_item): + yield Distribution.from_filename( + path_item, metadata=PathMetadata( + path_item, os.path.join(path_item, 'EGG-INFO') ) - else: - # scan for .egg and .egg-info in directory - for entry in os.listdir(path_item): - lower = entry.lower() - if lower.endswith('.egg-info') or lower.endswith('.dist-info'): - fullpath = os.path.join(path_item, entry) - if os.path.isdir(fullpath): - # egg-info directory, allow getting metadata - metadata = PathMetadata(path_item, fullpath) - else: - metadata = FileMetadata(fullpath) - yield Distribution.from_location( - path_item, entry, metadata, precedence=DEVELOP_DIST - ) - elif not only and lower.endswith('.egg'): - dists = find_distributions(os.path.join(path_item, entry)) - for dist in dists: - yield dist - elif not only and lower.endswith('.egg-link'): - with open(os.path.join(path_item, entry)) as entry_file: - entry_lines = entry_file.readlines() - for line in entry_lines: - if not line.strip(): - continue - path = os.path.join(path_item, line.rstrip()) - dists = find_distributions(path) - for item in dists: - yield item - break + ) + return + + entries = ( + os.path.join(path_item, child) + for child in safe_listdir(path_item) + ) + + # for performance, before sorting by version, + # screen entries for only those that will yield + # distributions + filtered = ( + entry + for entry in entries + if dist_factory(path_item, entry, only) + ) + + # scan for .egg and .egg-info in directory + path_item_entries = _by_version_descending(filtered) + for entry in path_item_entries: + fullpath = os.path.join(path_item, entry) + factory = dist_factory(path_item, entry, only) + for dist in factory(fullpath): + yield dist + + +def dist_factory(path_item, entry, only): + """Return a dist_factory for the given entry.""" + lower = entry.lower() + is_egg_info = lower.endswith('.egg-info') + is_dist_info = ( + lower.endswith('.dist-info') and + os.path.isdir(os.path.join(path_item, entry)) + ) + is_meta = is_egg_info or is_dist_info + return ( + distributions_from_metadata + if is_meta else + find_distributions + if not only and _is_egg_path(entry) else + resolve_egg_link + if not only and lower.endswith('.egg-link') else + NoDists() + ) + + +class NoDists: + """ + >>> bool(NoDists()) + False + + >>> list(NoDists()('anything')) + [] + """ + def __bool__(self): + return False + + def __call__(self, fullpath): + return iter(()) + + +def safe_listdir(path): + """ + Attempt to list contents of path, but suppress some exceptions. + """ + try: + return os.listdir(path) + except (PermissionError, NotADirectoryError): + pass + except OSError as e: + # Ignore the directory if does not exist, not a directory or + # permission denied + if e.errno not in (errno.ENOTDIR, errno.EACCES, errno.ENOENT): + raise + return () + + +def distributions_from_metadata(path): + root = os.path.dirname(path) + if os.path.isdir(path): + if len(os.listdir(path)) == 0: + # empty metadata dir; skip + return + metadata = PathMetadata(root, path) + else: + metadata = FileMetadata(path) + entry = os.path.basename(path) + yield Distribution.from_location( + root, entry, metadata, precedence=DEVELOP_DIST, + ) + + +def non_empty_lines(path): + """ + Yield non-empty lines from file at path + """ + with open(path) as f: + for line in f: + line = line.strip() + if line: + yield line + + +def resolve_egg_link(path): + """ + Given a path to an .egg-link, resolve distributions + present in the referenced path. + """ + referenced_paths = non_empty_lines(path) + resolved_paths = ( + os.path.join(os.path.dirname(path), ref) + for ref in referenced_paths + ) + dist_groups = map(find_distributions, resolved_paths) + return next(dist_groups, ()) + + register_finder(pkgutil.ImpImporter, find_on_path) -if importlib_bootstrap is not None: - register_finder(importlib_bootstrap.FileFinder, find_on_path) +if hasattr(importlib_machinery, 'FileFinder'): + register_finder(importlib_machinery.FileFinder, find_on_path) _declare_state('dict', _namespace_handlers={}) _declare_state('dict', _namespace_packages={}) @@ -1946,56 +2195,100 @@ def register_namespace_handler(importer_type, namespace_handler): """ _namespace_handlers[importer_type] = namespace_handler + def _handle_ns(packageName, path_item): """Ensure that named package includes a subpath of path_item (if needed)""" importer = get_importer(path_item) if importer is None: return None - loader = importer.find_module(packageName) + + # use find_spec (PEP 451) and fall-back to find_module (PEP 302) + try: + loader = importer.find_spec(packageName).loader + except AttributeError: + # capture warnings due to #1111 + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + loader = importer.find_module(packageName) + if loader is None: return None module = sys.modules.get(packageName) if module is None: - module = sys.modules[packageName] = imp.new_module(packageName) + module = sys.modules[packageName] = types.ModuleType(packageName) module.__path__ = [] _set_parent_ns(packageName) - elif not hasattr(module,'__path__'): + elif not hasattr(module, '__path__'): raise TypeError("Not a package:", packageName) handler = _find_adapter(_namespace_handlers, importer) subpath = handler(importer, path_item, packageName, module) if subpath is not None: path = module.__path__ path.append(subpath) - loader.load_module(packageName) - for path_item in path: - if path_item not in module.__path__: - module.__path__.append(path_item) + importlib.import_module(packageName) + _rebuild_mod_path(path, packageName, module) return subpath + +def _rebuild_mod_path(orig_path, package_name, module): + """ + Rebuild module.__path__ ensuring that all entries are ordered + corresponding to their sys.path order + """ + sys_path = [_normalize_cached(p) for p in sys.path] + + def safe_sys_path_index(entry): + """ + Workaround for #520 and #513. + """ + try: + return sys_path.index(entry) + except ValueError: + return float('inf') + + def position_in_sys_path(path): + """ + Return the ordinal of the path based on its position in sys.path + """ + path_parts = path.split(os.sep) + module_parts = package_name.count('.') + 1 + parts = path_parts[:-module_parts] + return safe_sys_path_index(_normalize_cached(os.sep.join(parts))) + + new_path = sorted(orig_path, key=position_in_sys_path) + new_path = [_normalize_cached(p) for p in new_path] + + if isinstance(module.__path__, list): + module.__path__[:] = new_path + else: + module.__path__ = new_path + + def declare_namespace(packageName): """Declare that package 'packageName' is a namespace package""" - imp.acquire_lock() + _imp.acquire_lock() try: if packageName in _namespace_packages: return - path, parent = sys.path, None - if '.' in packageName: - parent = '.'.join(packageName.split('.')[:-1]) + path = sys.path + parent, _, _ = packageName.rpartition('.') + + if parent: declare_namespace(parent) if parent not in _namespace_packages: __import__(parent) try: path = sys.modules[parent].__path__ - except AttributeError: - raise TypeError("Not a package:", parent) + except AttributeError as e: + raise TypeError("Not a package:", parent) from e # Track what packages are namespaces, so when new path items are added, # they can be updated - _namespace_packages.setdefault(parent,[]).append(packageName) - _namespace_packages.setdefault(packageName,[]) + _namespace_packages.setdefault(parent or None, []).append(packageName) + _namespace_packages.setdefault(packageName, []) for path_item in path: # Ensure all the parent's path items are reflected in the child, @@ -2003,18 +2296,20 @@ def declare_namespace(packageName): _handle_ns(packageName, path_item) finally: - imp.release_lock() + _imp.release_lock() + def fixup_namespace_packages(path_item, parent=None): """Ensure that previously-declared namespace packages include path_item""" - imp.acquire_lock() + _imp.acquire_lock() try: - for package in _namespace_packages.get(parent,()): + for package in _namespace_packages.get(parent, ()): subpath = _handle_ns(package, path_item) if subpath: fixup_namespace_packages(subpath, package) finally: - imp.release_lock() + _imp.release_lock() + def file_ns_handler(importer, path_item, packageName, module): """Compute an ns-package subpath for a filesystem or zipfile importer""" @@ -2022,28 +2317,43 @@ def file_ns_handler(importer, path_item, packageName, module): subpath = os.path.join(path_item, packageName.split('.')[-1]) normalized = _normalize_cached(subpath) for item in module.__path__: - if _normalize_cached(item)==normalized: + if _normalize_cached(item) == normalized: break else: # Only return the path if it's not already there return subpath + register_namespace_handler(pkgutil.ImpImporter, file_ns_handler) register_namespace_handler(zipimport.zipimporter, file_ns_handler) -if importlib_bootstrap is not None: - register_namespace_handler(importlib_bootstrap.FileFinder, file_ns_handler) +if hasattr(importlib_machinery, 'FileFinder'): + register_namespace_handler(importlib_machinery.FileFinder, file_ns_handler) def null_ns_handler(importer, path_item, packageName, module): return None + register_namespace_handler(object, null_ns_handler) def normalize_path(filename): """Normalize a file/dir name for comparison purposes""" - return os.path.normcase(os.path.realpath(filename)) + return os.path.normcase(os.path.realpath(os.path.normpath( + _cygwin_patch(filename)))) + + +def _cygwin_patch(filename): # pragma: nocover + """ + Contrary to POSIX 2008, on Cygwin, getcwd (3) contains + symlink components. Using + os.path.abspath() works around this limitation. A fix in os.getcwd() + would probably better, in Cygwin even more so, except + that this seems to be by design... + """ + return os.path.abspath(filename) if sys.platform == 'cygwin' else filename + def _normalize_cached(filename, _cache={}): try: @@ -2052,6 +2362,32 @@ def _normalize_cached(filename, _cache={}): _cache[filename] = result = normalize_path(filename) return result + +def _is_egg_path(path): + """ + Determine if given path appears to be an egg. + """ + return _is_zip_egg(path) or _is_unpacked_egg(path) + + +def _is_zip_egg(path): + return ( + path.lower().endswith('.egg') and + os.path.isfile(path) and + zipfile.is_zipfile(path) + ) + + +def _is_unpacked_egg(path): + """ + Determine if given path appears to be an unpacked egg. + """ + return ( + path.lower().endswith('.egg') and + os.path.isfile(os.path.join(path, 'EGG-INFO', 'PKG-INFO')) + ) + + def _set_parent_ns(packageName): parts = packageName.split('.') name = parts.pop() @@ -2060,101 +2396,37 @@ def _set_parent_ns(packageName): setattr(sys.modules[parent], name, sys.modules[packageName]) -def yield_lines(strs): - """Yield non-empty/non-comment lines of a string or sequence""" - if isinstance(strs, string_types): - for s in strs.splitlines(): - s = s.strip() - # skip blank lines/comments - if s and not s.startswith('#'): - yield s - else: - for ss in strs: - for s in yield_lines(ss): - yield s +def _nonblank(str): + return str and not str.startswith('#') + + +@functools.singledispatch +def yield_lines(iterable): + """Yield valid lines of a string or iterable""" + return itertools.chain.from_iterable(map(yield_lines, iterable)) + + +@yield_lines.register(str) +def _(text): + return filter(_nonblank, map(str.strip, text.splitlines())) + -# whitespace and comment -LINE_END = re.compile(r"\s*(#.*)?$").match -# line continuation -CONTINUE = re.compile(r"\s*\\\s*(#.*)?$").match -# Distribution or extra -DISTRO = re.compile(r"\s*((\w|[-.])+)").match -# ver. info -VERSION = re.compile(r"\s*(<=?|>=?|==|!=)\s*((\w|[-.])+)").match -# comma between items -COMMA = re.compile(r"\s*,").match -OBRACKET = re.compile(r"\s*\[").match -CBRACKET = re.compile(r"\s*\]").match MODULE = re.compile(r"\w+(\.\w+)*$").match EGG_NAME = re.compile( - r"(?P[^-]+)" - r"( -(?P[^-]+) (-py(?P[^-]+) (-(?P.+))? )? )?", - re.VERBOSE | re.IGNORECASE + r""" + (?P[^-]+) ( + -(?P[^-]+) ( + -py(?P[^-]+) ( + -(?P.+) + )? + )? + )? + """, + re.VERBOSE | re.IGNORECASE, ).match -component_re = re.compile(r'(\d+ | [a-z]+ | \.| -)', re.VERBOSE) -replace = {'pre':'c', 'preview':'c','-':'final-','rc':'c','dev':'@'}.get -def _parse_version_parts(s): - for part in component_re.split(s): - part = replace(part, part) - if not part or part=='.': - continue - if part[:1] in '0123456789': - # pad for numeric comparison - yield part.zfill(8) - else: - yield '*'+part - - # ensure that alpha/beta/candidate are before final - yield '*final' - -def parse_version(s): - """Convert a version string to a chronologically-sortable key - - This is a rough cross between distutils' StrictVersion and LooseVersion; - if you give it versions that would work with StrictVersion, then it behaves - the same; otherwise it acts like a slightly-smarter LooseVersion. It is - *possible* to create pathological version coding schemes that will fool - this parser, but they should be very rare in practice. - - The returned value will be a tuple of strings. Numeric portions of the - version are padded to 8 digits so they will compare numerically, but - without relying on how numbers compare relative to strings. Dots are - dropped, but dashes are retained. Trailing zeros between alpha segments - or dashes are suppressed, so that e.g. "2.4.0" is considered the same as - "2.4". Alphanumeric parts are lower-cased. - - The algorithm assumes that strings like "-" and any alpha string that - alphabetically follows "final" represents a "patch level". So, "2.4-1" - is assumed to be a branch or patch of "2.4", and therefore "2.4.1" is - considered newer than "2.4-1", which in turn is newer than "2.4". - - Strings like "a", "b", "c", "alpha", "beta", "candidate" and so on (that - come before "final" alphabetically) are assumed to be pre-release versions, - so that the version "2.4" is considered newer than "2.4a1". - - Finally, to handle miscellaneous cases, the strings "pre", "preview", and - "rc" are treated as if they were "c", i.e. as though they were release - candidates, and therefore are not as new as a version string that does not - contain them, and "dev" is replaced with an '@' so that it sorts lower than - than any other pre-release tag. - """ - parts = [] - for part in _parse_version_parts(s.lower()): - if part.startswith('*'): - # remove '-' before a prerelease tag - if part < '*final': - while parts and parts[-1] == '*final-': - parts.pop() - # remove trailing zeros from each series of numeric parts - while parts and parts[-1]=='00000000': - parts.pop() - parts.append(part) - return tuple(parts) - - -class EntryPoint(object): +class EntryPoint: """Object representing an advertised importable object""" def __init__(self, name, module_name, attrs=(), extras=(), dist=None): @@ -2163,7 +2435,7 @@ class EntryPoint(object): self.name = name self.module_name = module_name self.attrs = tuple(attrs) - self.extras = Requirement.parse(("x[%s]" % ','.join(extras))).extras + self.extras = tuple(extras) self.dist = dist def __str__(self): @@ -2177,25 +2449,53 @@ class EntryPoint(object): def __repr__(self): return "EntryPoint.parse(%r)" % str(self) - def load(self, require=True, env=None, installer=None): + def load(self, require=True, *args, **kwargs): + """ + Require packages for this EntryPoint, then resolve it. + """ + if not require or args or kwargs: + warnings.warn( + "Parameters to load are deprecated. Call .resolve and " + ".require separately.", + PkgResourcesDeprecationWarning, + stacklevel=2, + ) if require: - self.require(env, installer) - entry = __import__(self.module_name, globals(), globals(), - ['__name__']) - for attr in self.attrs: - try: - entry = getattr(entry, attr) - except AttributeError: - raise ImportError("%r has no %r attribute" % (entry, attr)) - return entry + self.require(*args, **kwargs) + return self.resolve() + + def resolve(self): + """ + Resolve the entry point from its module and attrs. + """ + module = __import__(self.module_name, fromlist=['__name__'], level=0) + try: + return functools.reduce(getattr, self.attrs, module) + except AttributeError as exc: + raise ImportError(str(exc)) from exc def require(self, env=None, installer=None): if self.extras and not self.dist: raise UnknownExtra("Can't require() without a distribution", self) + + # Get the requirements for this entry point with all its extras and + # then resolve them. We have to pass `extras` along when resolving so + # that the working set knows what extras we want. Otherwise, for + # dist-info distributions, the working set will assume that the + # requirements for that extra are purely optional and skip over them. reqs = self.dist.requires(self.extras) - items = working_set.resolve(reqs, env, installer) + items = working_set.resolve(reqs, env, installer, extras=self.extras) list(map(working_set.add, items)) + pattern = re.compile( + r'\s*' + r'(?P.+?)\s*' + r'=\s*' + r'(?P[\w.]+)\s*' + r'(:\s*(?P[\w.]+))?\s*' + r'(?P\[.*\])?\s*$' + ) + @classmethod def parse(cls, src, dist=None): """Parse a single entry point from string `src` @@ -2207,25 +2507,23 @@ class EntryPoint(object): The entry name and module name are required, but the ``:attrs`` and ``[extras]`` parts are optional """ - try: - attrs = extras = () - name, value = src.split('=', 1) - if '[' in value: - value, extras = value.split('[', 1) - req = Requirement.parse("x[" + extras) - if req.specs: - raise ValueError - extras = req.extras - if ':' in value: - value, attrs = value.split(':', 1) - if not MODULE(attrs.rstrip()): - raise ValueError - attrs = attrs.rstrip().split('.') - except ValueError: + m = cls.pattern.match(src) + if not m: msg = "EntryPoint must be in 'name=module:attrs [extras]' format" raise ValueError(msg, src) - else: - return cls(name.strip(), value.strip(), attrs, extras, dist) + res = m.groupdict() + extras = cls._parse_extras(res['extras']) + attrs = res['attr'].split('.') if res['attr'] else () + return cls(res['name'], res['module'], attrs, extras, dist) + + @classmethod + def _parse_extras(cls, extras_spec): + if not extras_spec: + return () + req = Requirement.parse('x' + extras_spec) + if req.specs: + raise ValueError() + return req.extras @classmethod def parse_group(cls, group, lines, dist=None): @@ -2237,7 +2535,7 @@ class EntryPoint(object): ep = cls.parse(line, dist) if ep.name in this: raise ValueError("Duplicate entry point", group, ep.name) - this[ep.name]=ep + this[ep.name] = ep return this @classmethod @@ -2260,20 +2558,25 @@ class EntryPoint(object): return maps -def _remove_md5_fragment(location): - if not location: - return '' - parsed = urlparse(location) - if parsed[-1].startswith('md5='): - return urlunparse(parsed[:-1] + ('',)) - return location +def _version_from_file(lines): + """ + Given an iterable of lines from a Metadata file, return + the value of the Version field, if present, or None otherwise. + """ + def is_version_line(line): + return line.lower().startswith('version:') + version_lines = filter(is_version_line, lines) + line = next(iter(version_lines), '') + _, _, value = line.partition(':') + return safe_version(value.strip()) or None -class Distribution(object): +class Distribution: """Wrap an actual or potential sys.path entry w/metadata""" PKG_INFO = 'PKG-INFO' - def __init__(self, location=None, metadata=None, project_name=None, + def __init__( + self, location=None, metadata=None, project_name=None, version=None, py_version=PY_MAJOR, platform=None, precedence=EGG_DIST): self.project_name = safe_name(project_name or 'Unknown') @@ -2286,31 +2589,34 @@ class Distribution(object): self._provider = metadata or empty_provider @classmethod - def from_location(cls, location, basename, metadata=None,**kw): - project_name, version, py_version, platform = [None]*4 + def from_location(cls, location, basename, metadata=None, **kw): + project_name, version, py_version, platform = [None] * 4 basename, ext = os.path.splitext(basename) if ext.lower() in _distributionImpl: - # .dist-info gets much metadata differently + cls = _distributionImpl[ext.lower()] + match = EGG_NAME(basename) if match: project_name, version, py_version, platform = match.group( - 'name','ver','pyver','plat' + 'name', 'ver', 'pyver', 'plat' ) - cls = _distributionImpl[ext.lower()] return cls( location, metadata, project_name=project_name, version=version, py_version=py_version, platform=platform, **kw - ) + )._reload_version() + + def _reload_version(self): + return self @property def hashcmp(self): return ( - getattr(self, 'parsed_version', ()), + self.parsed_version, self.precedence, self.key, - _remove_md5_fragment(self.location), - self.py_version, - self.platform, + self.location, + self.py_version or '', + self.platform or '', ) def __hash__(self): @@ -2351,44 +2657,92 @@ class Distribution(object): @property def parsed_version(self): - try: - return self._parsed_version - except AttributeError: - self._parsed_version = pv = parse_version(self.version) - return pv + if not hasattr(self, "_parsed_version"): + self._parsed_version = parse_version(self.version) + + return self._parsed_version + + def _warn_legacy_version(self): + LV = packaging.version.LegacyVersion + is_legacy = isinstance(self._parsed_version, LV) + if not is_legacy: + return + + # While an empty version is technically a legacy version and + # is not a valid PEP 440 version, it's also unlikely to + # actually come from someone and instead it is more likely that + # it comes from setuptools attempting to parse a filename and + # including it in the list. So for that we'll gate this warning + # on if the version is anything at all or not. + if not self.version: + return + + tmpl = textwrap.dedent(""" + '{project_name} ({version})' is being parsed as a legacy, + non PEP 440, + version. You may find odd behavior and sort order. + In particular it will be sorted as less than 0.0. It + is recommended to migrate to PEP 440 compatible + versions. + """).strip().replace('\n', ' ') + + warnings.warn(tmpl.format(**vars(self)), PEP440Warning) @property def version(self): try: return self._version - except AttributeError: - for line in self._get_metadata(self.PKG_INFO): - if line.lower().startswith('version:'): - self._version = safe_version(line.split(':',1)[1].strip()) - return self._version - else: - tmpl = "Missing 'Version:' header and/or %s file" - raise ValueError(tmpl % self.PKG_INFO, self) + except AttributeError as e: + version = self._get_version() + if version is None: + path = self._get_metadata_path_for_display(self.PKG_INFO) + msg = ( + "Missing 'Version:' header and/or {} file at path: {}" + ).format(self.PKG_INFO, path) + raise ValueError(msg, self) from e + + return version @property def _dep_map(self): + """ + A map of extra to its list of (direct) requirements + for this distribution, including the null extra. + """ try: return self.__dep_map except AttributeError: - dm = self.__dep_map = {None: []} - for name in 'requires.txt', 'depends.txt': - for extra, reqs in split_sections(self._get_metadata(name)): - if extra: - if ':' in extra: - extra, marker = extra.split(':', 1) - if invalid_marker(marker): - # XXX warn - reqs=[] - elif not evaluate_marker(marker): - reqs=[] - extra = safe_extra(extra) or None - dm.setdefault(extra,[]).extend(parse_requirements(reqs)) - return dm + self.__dep_map = self._filter_extras(self._build_dep_map()) + return self.__dep_map + + @staticmethod + def _filter_extras(dm): + """ + Given a mapping of extras to dependencies, strip off + environment markers and filter out any dependencies + not matching the markers. + """ + for extra in list(filter(None, dm)): + new_extra = extra + reqs = dm.pop(extra) + new_extra, _, marker = extra.partition(':') + fails_marker = marker and ( + invalid_marker(marker) + or not evaluate_marker(marker) + ) + if fails_marker: + reqs = [] + new_extra = safe_extra(new_extra) or None + + dm.setdefault(new_extra, []).extend(reqs) + return dm + + def _build_dep_map(self): + dm = {} + for name in 'requires.txt', 'depends.txt': + for extra, reqs in split_sections(self._get_metadata(name)): + dm.setdefault(extra, []).extend(parse_requirements(reqs)) + return dm def requires(self, extras=()): """List of Requirements needed for this distro if `extras` are used""" @@ -2398,22 +2752,45 @@ class Distribution(object): for ext in extras: try: deps.extend(dm[safe_extra(ext)]) - except KeyError: + except KeyError as e: raise UnknownExtra( "%s has no such extra feature %r" % (self, ext) - ) + ) from e return deps + def _get_metadata_path_for_display(self, name): + """ + Return the path to the given metadata file, if available. + """ + try: + # We need to access _get_metadata_path() on the provider object + # directly rather than through this class's __getattr__() + # since _get_metadata_path() is marked private. + path = self._provider._get_metadata_path(name) + + # Handle exceptions e.g. in case the distribution's metadata + # provider doesn't support _get_metadata_path(). + except Exception: + return '[could not detect]' + + return path + def _get_metadata(self, name): if self.has_metadata(name): for line in self.get_metadata_lines(name): yield line - def activate(self, path=None): + def _get_version(self): + lines = self._get_metadata(self.PKG_INFO) + version = _version_from_file(lines) + + return version + + def activate(self, path=None, replace=False): """Ensure distribution is importable on `path` (default=sys.path)""" if path is None: path = sys.path - self.insert_on(path) + self.insert_on(path, replace=replace) if path is sys.path: fixup_namespace_packages(self.location) for pkg in self._get_metadata('namespace_packages.txt'): @@ -2451,6 +2828,15 @@ class Distribution(object): raise AttributeError(attr) return getattr(self._provider, attr) + def __dir__(self): + return list( + set(super(Distribution, self).__dir__()) + | set( + attr for attr in self._provider.__dir__() + if not attr.startswith('_') + ) + ) + @classmethod def from_filename(cls, filename, metadata=None, **kw): return cls.from_location( @@ -2460,7 +2846,12 @@ class Distribution(object): def as_requirement(self): """Return a ``Requirement`` that matches this distribution exactly""" - return Requirement.parse('%s==%s' % (self.project_name, self.version)) + if isinstance(self.parsed_version, packaging.version.Version): + spec = "%s==%s" % (self.project_name, self.parsed_version) + else: + spec = "%s===%s" % (self.project_name, self.parsed_version) + + return Requirement.parse(spec) def load_entry_point(self, group, name): """Return the `name` entry point of `group` or raise ImportError""" @@ -2478,15 +2869,33 @@ class Distribution(object): self._get_metadata('entry_points.txt'), self ) if group is not None: - return ep_map.get(group,{}) + return ep_map.get(group, {}) return ep_map def get_entry_info(self, group, name): """Return the EntryPoint object for `group`+`name`, or ``None``""" return self.get_entry_map(group).get(name) - def insert_on(self, path, loc = None): - """Insert self.location in path before its nearest parent directory""" + # FIXME: 'Distribution.insert_on' is too complex (13) + def insert_on(self, path, loc=None, replace=False): # noqa: C901 + """Ensure self.location is on path + + If replace=False (default): + - If location is already in path anywhere, do nothing. + - Else: + - If it's an egg and its parent directory is on path, + insert just ahead of the parent. + - Else: add to the end of path. + If replace=True: + - If location is already on path anywhere (not eggs) + or higher priority than its parent (eggs) + do nothing. + - Else: + - If it's an egg and its parent directory is on path, + insert just ahead of the parent, + removing any lower-priority entries. + - Else: add it to the front of path. + """ loc = loc or self.location if not loc: @@ -2494,13 +2903,21 @@ class Distribution(object): nloc = _normalize_cached(loc) bdir = os.path.dirname(nloc) - npath= [(p and _normalize_cached(p) or p) for p in path] + npath = [(p and _normalize_cached(p) or p) for p in path] for p, item in enumerate(npath): if item == nloc: - break + if replace: + break + else: + # don't modify path (even removing duplicates) if + # found and not replace + return elif item == bdir and self.precedence == EGG_DIST: # if it's an .egg, give it precedence over its directory + # UNLESS it's already been added to sys.path and replace=False + if (not replace) and nloc in npath[p:]: + return if path is sys.path: self.check_version_conflict() path.insert(p, loc) @@ -2509,13 +2926,16 @@ class Distribution(object): else: if path is sys.path: self.check_version_conflict() - path.append(loc) + if replace: + path.insert(0, loc) + else: + path.append(loc) return # p is the spot where we found or inserted loc; now remove duplicates while True: try: - np = npath.index(nloc, p+1) + np = npath.index(nloc, p + 1) except ValueError: break else: @@ -2555,7 +2975,7 @@ class Distribution(object): return False return True - def clone(self,**kw): + def clone(self, **kw): """Copy this distribution, substituting in any changed keyword args""" names = 'project_name version py_version platform location precedence' for attr in names.split(): @@ -2568,8 +2988,30 @@ class Distribution(object): return [dep for dep in self._dep_map if dep] +class EggInfoDistribution(Distribution): + def _reload_version(self): + """ + Packages installed by distutils (e.g. numpy or scipy), + which uses an old safe_version, and so + their version numbers can get mangled when + converted to filenames (e.g., 1.11.0.dev0+2329eae to + 1.11.0.dev0_2329eae). These distributions will not be + parsed properly + downstream by Distribution and safe_version, so + take an extra step and try to get the version number from + the metadata file itself instead of the filename. + """ + md_version = self._get_version() + if md_version: + self._version = md_version + return self + + class DistInfoDistribution(Distribution): - """Wrap an actual or potential sys.path entry w/metadata, .dist-info style""" + """ + Wrap an actual or potential sys.path entry + w/metadata, .dist-info style. + """ PKG_INFO = 'METADATA' EQEQ = re.compile(r"([\(,])\s*(\d.*?)\s*([,\)])") @@ -2591,54 +3033,38 @@ class DistInfoDistribution(Distribution): self.__dep_map = self._compute_dependencies() return self.__dep_map - def _preparse_requirement(self, requires_dist): - """Convert 'Foobar (1); baz' to ('Foobar ==1', 'baz') - Split environment marker, add == prefix to version specifiers as - necessary, and remove parenthesis. - """ - parts = requires_dist.split(';', 1) + [''] - distvers = parts[0].strip() - mark = parts[1].strip() - distvers = re.sub(self.EQEQ, r"\1==\2\3", distvers) - distvers = distvers.replace('(', '').replace(')', '') - return (distvers, mark) - def _compute_dependencies(self): """Recompute this distribution's dependencies.""" - from _markerlib import compile as compile_marker dm = self.__dep_map = {None: []} reqs = [] # Including any condition expressions for req in self._parsed_pkg_info.get_all('Requires-Dist') or []: - distvers, mark = self._preparse_requirement(req) - parsed = next(parse_requirements(distvers)) - parsed.marker_fn = compile_marker(mark) - reqs.append(parsed) + reqs.extend(parse_requirements(req)) def reqs_for_extra(extra): for req in reqs: - if req.marker_fn(override={'extra':extra}): + if not req.marker or req.marker.evaluate({'extra': extra}): yield req - common = frozenset(reqs_for_extra(None)) + common = types.MappingProxyType(dict.fromkeys(reqs_for_extra(None))) dm[None].extend(common) for extra in self._parsed_pkg_info.get_all('Provides-Extra') or []: - extra = safe_extra(extra.strip()) - dm[extra] = list(frozenset(reqs_for_extra(extra)) - common) + s_extra = safe_extra(extra.strip()) + dm[s_extra] = [r for r in reqs_for_extra(extra) if r not in common] return dm _distributionImpl = { '.egg': Distribution, - '.egg-info': Distribution, + '.egg-info': EggInfoDistribution, '.dist-info': DistInfoDistribution, - } +} -def issue_warning(*args,**kw): +def issue_warning(*args, **kw): level = 1 g = globals() try: @@ -2659,158 +3085,90 @@ def parse_requirements(strs): # create a steppable iterator, so we can handle \-continuations lines = iter(yield_lines(strs)) - def scan_list(ITEM, TERMINATOR, line, p, groups, item_name): - - items = [] - - while not TERMINATOR(line, p): - if CONTINUE(line, p): - try: - line = next(lines) - p = 0 - except StopIteration: - raise ValueError( - "\\ must not appear on the last nonblank line" - ) - - match = ITEM(line, p) - if not match: - msg = "Expected " + item_name + " in" - raise ValueError(msg, line, "at", line[p:]) - - items.append(match.group(*groups)) - p = match.end() - - match = COMMA(line, p) - if match: - # skip the comma - p = match.end() - elif not TERMINATOR(line, p): - msg = "Expected ',' or end-of-list in" - raise ValueError(msg, line, "at", line[p:]) - - match = TERMINATOR(line, p) - # skip the terminator, if any - if match: - p = match.end() - return line, p, items - for line in lines: - match = DISTRO(line) - if not match: - raise ValueError("Missing distribution spec", line) - project_name = match.group(1) - p = match.end() - extras = [] - - match = OBRACKET(line, p) - if match: - p = match.end() - line, p, extras = scan_list( - DISTRO, CBRACKET, line, p, (1,), "'extra' name" - ) - - line, p, specs = scan_list(VERSION, LINE_END, line, p, (1, 2), - "version spec") - specs = [(op, safe_version(val)) for op, val in specs] - yield Requirement(project_name, specs, extras) + # Drop comments -- a hash without a space may be in a URL. + if ' #' in line: + line = line[:line.find(' #')] + # If there is a line continuation, drop it, and append the next line. + if line.endswith('\\'): + line = line[:-2].strip() + try: + line += next(lines) + except StopIteration: + return + yield Requirement(line) -class Requirement: - def __init__(self, project_name, specs, extras): +class RequirementParseError(packaging.requirements.InvalidRequirement): + "Compatibility wrapper for InvalidRequirement" + + +class Requirement(packaging.requirements.Requirement): + def __init__(self, requirement_string): """DO NOT CALL THIS UNDOCUMENTED METHOD; use Requirement.parse()!""" - self.unsafe_name, project_name = project_name, safe_name(project_name) + super(Requirement, self).__init__(requirement_string) + self.unsafe_name = self.name + project_name = safe_name(self.name) self.project_name, self.key = project_name, project_name.lower() - index = [ - (parse_version(v), state_machine[op], op, v) - for op, v in specs - ] - index.sort() - self.specs = [(op, ver) for parsed, trans, op, ver in index] - self.index, self.extras = index, tuple(map(safe_extra, extras)) + self.specs = [ + (spec.operator, spec.version) for spec in self.specifier] + self.extras = tuple(map(safe_extra, self.extras)) self.hashCmp = ( self.key, - tuple((op, parsed) for parsed, trans, op, ver in index), + self.url, + self.specifier, frozenset(self.extras), + str(self.marker) if self.marker else None, ) self.__hash = hash(self.hashCmp) - def __str__(self): - specs = ','.join([''.join(s) for s in self.specs]) - extras = ','.join(self.extras) - if extras: - extras = '[%s]' % extras - return '%s%s%s' % (self.project_name, extras, specs) - def __eq__(self, other): return ( isinstance(other, Requirement) and self.hashCmp == other.hashCmp ) + def __ne__(self, other): + return not self == other + def __contains__(self, item): if isinstance(item, Distribution): if item.key != self.key: return False - # only get if we need it - if self.index: - item = item.parsed_version - elif isinstance(item, string_types): - item = parse_version(item) - last = None - # -1, 0, 1 - compare = lambda a, b: (a > b) - (a < b) - for parsed, trans, op, ver in self.index: - # Indexing: 0, 1, -1 - action = trans[compare(item, parsed)] - if action == 'F': - return False - elif action == 'T': - return True - elif action == '+': - last = True - elif action == '-' or last is None: - last = False - # no rules encountered - if last is None: - last = True - return last + + item = item.version + + # Allow prereleases always in order to match the previous behavior of + # this method. In the future this should be smarter and follow PEP 440 + # more accurately. + return self.specifier.contains(item, prereleases=True) def __hash__(self): return self.__hash - def __repr__(self): return "Requirement.parse(%r)" % str(self) + def __repr__(self): + return "Requirement.parse(%r)" % str(self) @staticmethod def parse(s): - reqs = list(parse_requirements(s)) - if reqs: - if len(reqs) == 1: - return reqs[0] - raise ValueError("Expected only one requirement", s) - raise ValueError("No requirements found", s) - -state_machine = { - # =>< - '<': '--T', - '<=': 'T-T', - '>': 'F+F', - '>=': 'T+F', - '==': 'T..', - '!=': 'F++', -} + req, = parse_requirements(s) + return req -def _get_mro(cls): - """Get an mro for a type or classic class""" - if not isinstance(cls, type): - class cls(cls, object): pass - return cls.__mro__[1:] - return cls.__mro__ +def _always_object(classes): + """ + Ensure object appears in the mro even + for old-style classes. + """ + if object not in classes: + return classes + (object,) + return classes + def _find_adapter(registry, ob): """Return an adapter factory for `ob` from `registry`""" - for t in _get_mro(getattr(ob, '__class__', type(ob))): + types = _always_object(inspect.getmro(getattr(ob, '__class__', type(ob)))) + for t in types: if t in registry: return registry[t] @@ -2818,18 +3176,20 @@ def _find_adapter(registry, ob): def ensure_directory(path): """Ensure that the parent directory of `path` exists""" dirname = os.path.dirname(path) - if not os.path.isdir(dirname): - os.makedirs(dirname) + os.makedirs(dirname, exist_ok=True) -def _bypass_ensure_directory(path, mode=0o777): +def _bypass_ensure_directory(path): """Sandbox-bypassing version of ensure_directory()""" if not WRITE_SUPPORT: raise IOError('"os.mkdir" not supported on this platform.') dirname, filename = split(path) if dirname and filename and not isdir(dirname): _bypass_ensure_directory(dirname) - mkdir(dirname, mode) + try: + mkdir(dirname, 0o755) + except FileExistsError: + pass def split_sections(s): @@ -2857,39 +3217,87 @@ def split_sections(s): # wrap up last segment yield section, content -def _mkstemp(*args,**kw): + +def _mkstemp(*args, **kw): old_open = os.open try: # temporarily bypass sandboxing os.open = os_open - return tempfile.mkstemp(*args,**kw) + return tempfile.mkstemp(*args, **kw) finally: # and then put it back os.open = old_open -# Set up global resource manager (deliberately not state-saved) -_manager = ResourceManager() -def _initialize(g): - for name in dir(_manager): - if not name.startswith('_'): - g[name] = getattr(_manager, name) -_initialize(globals()) +# Silence the PEP440Warning by default, so that end users don't get hit by it +# randomly just because they use pkg_resources. We want to append the rule +# because we want earlier uses of filterwarnings to take precedence over this +# one. +warnings.filterwarnings("ignore", category=PEP440Warning, append=True) -# Prepare the master working set and make the ``require()`` API available -working_set = WorkingSet._build_master() -_declare_state('object', working_set=working_set) -require = working_set.require -iter_entry_points = working_set.iter_entry_points -add_activation_listener = working_set.subscribe -run_script = working_set.run_script -# backward compatibility -run_main = run_script -# Activate all distributions already on sys.path, and ensure that -# all distributions added to the working set in the future (e.g. by -# calling ``require()``) will get activated as well. -add_activation_listener(lambda dist: dist.activate()) -working_set.entries=[] -# match order -list(map(working_set.add_entry, sys.path)) +# from jaraco.functools 1.3 +def _call_aside(f, *args, **kwargs): + f(*args, **kwargs) + return f + + +@_call_aside +def _initialize(g=globals()): + "Set up global resource manager (deliberately not state-saved)" + manager = ResourceManager() + g['_manager'] = manager + g.update( + (name, getattr(manager, name)) + for name in dir(manager) + if not name.startswith('_') + ) + + +class PkgResourcesDeprecationWarning(Warning): + """ + Base class for warning about deprecations in ``pkg_resources`` + + This class is not derived from ``DeprecationWarning``, and as such is + visible by default. + """ + + +@_call_aside +def _initialize_master_working_set(): + """ + Prepare the master working set and make the ``require()`` + API available. + + This function has explicit effects on the global state + of pkg_resources. It is intended to be invoked once at + the initialization of this module. + + Invocation by other packages is unsupported and done + at their own risk. + """ + working_set = WorkingSet._build_master() + _declare_state('object', working_set=working_set) + + require = working_set.require + iter_entry_points = working_set.iter_entry_points + add_activation_listener = working_set.subscribe + run_script = working_set.run_script + # backward compatibility + run_main = run_script + # Activate all distributions already on sys.path with replace=False and + # ensure that all distributions added to the working set in the future + # (e.g. by calling ``require()``) will get activated as well, + # with higher priority (replace=True). + tuple( + dist.activate(replace=False) + for dist in working_set + ) + add_activation_listener( + lambda dist: dist.activate(replace=True), + existing=False, + ) + working_set.entries = [] + # match order + list(map(working_set.add_entry, sys.path)) + globals().update(locals()) diff --git a/lib/pkg_resources/_vendor/__init__.py b/lib/pkg_resources/_vendor/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/appdirs.py b/lib/pkg_resources/_vendor/appdirs.py new file mode 100644 index 00000000..ae67001a --- /dev/null +++ b/lib/pkg_resources/_vendor/appdirs.py @@ -0,0 +1,608 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2005-2010 ActiveState Software Inc. +# Copyright (c) 2013 Eddy Petrișor + +"""Utilities for determining application-specific dirs. + +See for details and usage. +""" +# Dev Notes: +# - MSDN on where to store app data files: +# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120 +# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html +# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html + +__version_info__ = (1, 4, 3) +__version__ = '.'.join(map(str, __version_info__)) + + +import sys +import os + +PY3 = sys.version_info[0] == 3 + +if PY3: + unicode = str + +if sys.platform.startswith('java'): + import platform + os_name = platform.java_ver()[3][0] + if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc. + system = 'win32' + elif os_name.startswith('Mac'): # "Mac OS X", etc. + system = 'darwin' + else: # "Linux", "SunOS", "FreeBSD", etc. + # Setting this to "linux2" is not ideal, but only Windows or Mac + # are actually checked for and the rest of the module expects + # *sys.platform* style strings. + system = 'linux2' +else: + system = sys.platform + + + +def user_data_dir(appname=None, appauthor=None, version=None, roaming=False): + r"""Return full path to the user-specific data dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "roaming" (boolean, default False) can be set True to use the Windows + roaming appdata directory. That means that for users on a Windows + network setup for roaming profiles, this user data will be + sync'd on login. See + + for a discussion of issues. + + Typical user data directories are: + Mac OS X: ~/Library/Application Support/ + Unix: ~/.local/share/ # or in $XDG_DATA_HOME, if defined + Win XP (not roaming): C:\Documents and Settings\\Application Data\\ + Win XP (roaming): C:\Documents and Settings\\Local Settings\Application Data\\ + Win 7 (not roaming): C:\Users\\AppData\Local\\ + Win 7 (roaming): C:\Users\\AppData\Roaming\\ + + For Unix, we follow the XDG spec and support $XDG_DATA_HOME. + That means, by default "~/.local/share/". + """ + if system == "win32": + if appauthor is None: + appauthor = appname + const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA" + path = os.path.normpath(_get_win_folder(const)) + if appname: + if appauthor is not False: + path = os.path.join(path, appauthor, appname) + else: + path = os.path.join(path, appname) + elif system == 'darwin': + path = os.path.expanduser('~/Library/Application Support/') + if appname: + path = os.path.join(path, appname) + else: + path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share")) + if appname: + path = os.path.join(path, appname) + if appname and version: + path = os.path.join(path, version) + return path + + +def site_data_dir(appname=None, appauthor=None, version=None, multipath=False): + r"""Return full path to the user-shared data dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "multipath" is an optional parameter only applicable to *nix + which indicates that the entire list of data dirs should be + returned. By default, the first item from XDG_DATA_DIRS is + returned, or '/usr/local/share/', + if XDG_DATA_DIRS is not set + + Typical site data directories are: + Mac OS X: /Library/Application Support/ + Unix: /usr/local/share/ or /usr/share/ + Win XP: C:\Documents and Settings\All Users\Application Data\\ + Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) + Win 7: C:\ProgramData\\ # Hidden, but writeable on Win 7. + + For Unix, this is using the $XDG_DATA_DIRS[0] default. + + WARNING: Do not use this on Windows. See the Vista-Fail note above for why. + """ + if system == "win32": + if appauthor is None: + appauthor = appname + path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA")) + if appname: + if appauthor is not False: + path = os.path.join(path, appauthor, appname) + else: + path = os.path.join(path, appname) + elif system == 'darwin': + path = os.path.expanduser('/Library/Application Support') + if appname: + path = os.path.join(path, appname) + else: + # XDG default for $XDG_DATA_DIRS + # only first, if multipath is False + path = os.getenv('XDG_DATA_DIRS', + os.pathsep.join(['/usr/local/share', '/usr/share'])) + pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] + if appname: + if version: + appname = os.path.join(appname, version) + pathlist = [os.sep.join([x, appname]) for x in pathlist] + + if multipath: + path = os.pathsep.join(pathlist) + else: + path = pathlist[0] + return path + + if appname and version: + path = os.path.join(path, version) + return path + + +def user_config_dir(appname=None, appauthor=None, version=None, roaming=False): + r"""Return full path to the user-specific config dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "roaming" (boolean, default False) can be set True to use the Windows + roaming appdata directory. That means that for users on a Windows + network setup for roaming profiles, this user data will be + sync'd on login. See + + for a discussion of issues. + + Typical user config directories are: + Mac OS X: same as user_data_dir + Unix: ~/.config/ # or in $XDG_CONFIG_HOME, if defined + Win *: same as user_data_dir + + For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME. + That means, by default "~/.config/". + """ + if system in ["win32", "darwin"]: + path = user_data_dir(appname, appauthor, None, roaming) + else: + path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config")) + if appname: + path = os.path.join(path, appname) + if appname and version: + path = os.path.join(path, version) + return path + + +def site_config_dir(appname=None, appauthor=None, version=None, multipath=False): + r"""Return full path to the user-shared data dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "multipath" is an optional parameter only applicable to *nix + which indicates that the entire list of config dirs should be + returned. By default, the first item from XDG_CONFIG_DIRS is + returned, or '/etc/xdg/', if XDG_CONFIG_DIRS is not set + + Typical site config directories are: + Mac OS X: same as site_data_dir + Unix: /etc/xdg/ or $XDG_CONFIG_DIRS[i]/ for each value in + $XDG_CONFIG_DIRS + Win *: same as site_data_dir + Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) + + For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False + + WARNING: Do not use this on Windows. See the Vista-Fail note above for why. + """ + if system in ["win32", "darwin"]: + path = site_data_dir(appname, appauthor) + if appname and version: + path = os.path.join(path, version) + else: + # XDG default for $XDG_CONFIG_DIRS + # only first, if multipath is False + path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg') + pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] + if appname: + if version: + appname = os.path.join(appname, version) + pathlist = [os.sep.join([x, appname]) for x in pathlist] + + if multipath: + path = os.pathsep.join(pathlist) + else: + path = pathlist[0] + return path + + +def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True): + r"""Return full path to the user-specific cache dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "opinion" (boolean) can be False to disable the appending of + "Cache" to the base app data dir for Windows. See + discussion below. + + Typical user cache directories are: + Mac OS X: ~/Library/Caches/ + Unix: ~/.cache/ (XDG default) + Win XP: C:\Documents and Settings\\Local Settings\Application Data\\\Cache + Vista: C:\Users\\AppData\Local\\\Cache + + On Windows the only suggestion in the MSDN docs is that local settings go in + the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming + app data dir (the default returned by `user_data_dir` above). Apps typically + put cache data somewhere *under* the given dir here. Some examples: + ...\Mozilla\Firefox\Profiles\\Cache + ...\Acme\SuperApp\Cache\1.0 + OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value. + This can be disabled with the `opinion=False` option. + """ + if system == "win32": + if appauthor is None: + appauthor = appname + path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) + if appname: + if appauthor is not False: + path = os.path.join(path, appauthor, appname) + else: + path = os.path.join(path, appname) + if opinion: + path = os.path.join(path, "Cache") + elif system == 'darwin': + path = os.path.expanduser('~/Library/Caches') + if appname: + path = os.path.join(path, appname) + else: + path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) + if appname: + path = os.path.join(path, appname) + if appname and version: + path = os.path.join(path, version) + return path + + +def user_state_dir(appname=None, appauthor=None, version=None, roaming=False): + r"""Return full path to the user-specific state dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "roaming" (boolean, default False) can be set True to use the Windows + roaming appdata directory. That means that for users on a Windows + network setup for roaming profiles, this user data will be + sync'd on login. See + + for a discussion of issues. + + Typical user state directories are: + Mac OS X: same as user_data_dir + Unix: ~/.local/state/ # or in $XDG_STATE_HOME, if defined + Win *: same as user_data_dir + + For Unix, we follow this Debian proposal + to extend the XDG spec and support $XDG_STATE_HOME. + + That means, by default "~/.local/state/". + """ + if system in ["win32", "darwin"]: + path = user_data_dir(appname, appauthor, None, roaming) + else: + path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state")) + if appname: + path = os.path.join(path, appname) + if appname and version: + path = os.path.join(path, version) + return path + + +def user_log_dir(appname=None, appauthor=None, version=None, opinion=True): + r"""Return full path to the user-specific log dir for this application. + + "appname" is the name of application. + If None, just the system directory is returned. + "appauthor" (only used on Windows) is the name of the + appauthor or distributing body for this application. Typically + it is the owning company name. This falls back to appname. You may + pass False to disable it. + "version" is an optional version path element to append to the + path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this + would typically be ".". + Only applied when appname is present. + "opinion" (boolean) can be False to disable the appending of + "Logs" to the base app data dir for Windows, and "log" to the + base cache dir for Unix. See discussion below. + + Typical user log directories are: + Mac OS X: ~/Library/Logs/ + Unix: ~/.cache//log # or under $XDG_CACHE_HOME if defined + Win XP: C:\Documents and Settings\\Local Settings\Application Data\\\Logs + Vista: C:\Users\\AppData\Local\\\Logs + + On Windows the only suggestion in the MSDN docs is that local settings + go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in + examples of what some windows apps use for a logs dir.) + + OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA` + value for Windows and appends "log" to the user cache dir for Unix. + This can be disabled with the `opinion=False` option. + """ + if system == "darwin": + path = os.path.join( + os.path.expanduser('~/Library/Logs'), + appname) + elif system == "win32": + path = user_data_dir(appname, appauthor, version) + version = False + if opinion: + path = os.path.join(path, "Logs") + else: + path = user_cache_dir(appname, appauthor, version) + version = False + if opinion: + path = os.path.join(path, "log") + if appname and version: + path = os.path.join(path, version) + return path + + +class AppDirs(object): + """Convenience wrapper for getting application dirs.""" + def __init__(self, appname=None, appauthor=None, version=None, + roaming=False, multipath=False): + self.appname = appname + self.appauthor = appauthor + self.version = version + self.roaming = roaming + self.multipath = multipath + + @property + def user_data_dir(self): + return user_data_dir(self.appname, self.appauthor, + version=self.version, roaming=self.roaming) + + @property + def site_data_dir(self): + return site_data_dir(self.appname, self.appauthor, + version=self.version, multipath=self.multipath) + + @property + def user_config_dir(self): + return user_config_dir(self.appname, self.appauthor, + version=self.version, roaming=self.roaming) + + @property + def site_config_dir(self): + return site_config_dir(self.appname, self.appauthor, + version=self.version, multipath=self.multipath) + + @property + def user_cache_dir(self): + return user_cache_dir(self.appname, self.appauthor, + version=self.version) + + @property + def user_state_dir(self): + return user_state_dir(self.appname, self.appauthor, + version=self.version) + + @property + def user_log_dir(self): + return user_log_dir(self.appname, self.appauthor, + version=self.version) + + +#---- internal support stuff + +def _get_win_folder_from_registry(csidl_name): + """This is a fallback technique at best. I'm not sure if using the + registry for this guarantees us the correct answer for all CSIDL_* + names. + """ + if PY3: + import winreg as _winreg + else: + import _winreg + + shell_folder_name = { + "CSIDL_APPDATA": "AppData", + "CSIDL_COMMON_APPDATA": "Common AppData", + "CSIDL_LOCAL_APPDATA": "Local AppData", + }[csidl_name] + + key = _winreg.OpenKey( + _winreg.HKEY_CURRENT_USER, + r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" + ) + dir, type = _winreg.QueryValueEx(key, shell_folder_name) + return dir + + +def _get_win_folder_with_pywin32(csidl_name): + from win32com.shell import shellcon, shell + dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0) + # Try to make this a unicode path because SHGetFolderPath does + # not return unicode strings when there is unicode data in the + # path. + try: + dir = unicode(dir) + + # Downgrade to short path name if have highbit chars. See + # . + has_high_char = False + for c in dir: + if ord(c) > 255: + has_high_char = True + break + if has_high_char: + try: + import win32api + dir = win32api.GetShortPathName(dir) + except ImportError: + pass + except UnicodeError: + pass + return dir + + +def _get_win_folder_with_ctypes(csidl_name): + import ctypes + + csidl_const = { + "CSIDL_APPDATA": 26, + "CSIDL_COMMON_APPDATA": 35, + "CSIDL_LOCAL_APPDATA": 28, + }[csidl_name] + + buf = ctypes.create_unicode_buffer(1024) + ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) + + # Downgrade to short path name if have highbit chars. See + # . + has_high_char = False + for c in buf: + if ord(c) > 255: + has_high_char = True + break + if has_high_char: + buf2 = ctypes.create_unicode_buffer(1024) + if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): + buf = buf2 + + return buf.value + +def _get_win_folder_with_jna(csidl_name): + import array + from com.sun import jna + from com.sun.jna.platform import win32 + + buf_size = win32.WinDef.MAX_PATH * 2 + buf = array.zeros('c', buf_size) + shell = win32.Shell32.INSTANCE + shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf) + dir = jna.Native.toString(buf.tostring()).rstrip("\0") + + # Downgrade to short path name if have highbit chars. See + # . + has_high_char = False + for c in dir: + if ord(c) > 255: + has_high_char = True + break + if has_high_char: + buf = array.zeros('c', buf_size) + kernel = win32.Kernel32.INSTANCE + if kernel.GetShortPathName(dir, buf, buf_size): + dir = jna.Native.toString(buf.tostring()).rstrip("\0") + + return dir + +if system == "win32": + try: + import win32com.shell + _get_win_folder = _get_win_folder_with_pywin32 + except ImportError: + try: + from ctypes import windll + _get_win_folder = _get_win_folder_with_ctypes + except ImportError: + try: + import com.sun.jna + _get_win_folder = _get_win_folder_with_jna + except ImportError: + _get_win_folder = _get_win_folder_from_registry + + +#---- self test code + +if __name__ == "__main__": + appname = "MyApp" + appauthor = "MyCompany" + + props = ("user_data_dir", + "user_config_dir", + "user_cache_dir", + "user_state_dir", + "user_log_dir", + "site_data_dir", + "site_config_dir") + + print("-- app dirs %s --" % __version__) + + print("-- app dirs (with optional 'version')") + dirs = AppDirs(appname, appauthor, version="1.0") + for prop in props: + print("%s: %s" % (prop, getattr(dirs, prop))) + + print("\n-- app dirs (without optional 'version')") + dirs = AppDirs(appname, appauthor) + for prop in props: + print("%s: %s" % (prop, getattr(dirs, prop))) + + print("\n-- app dirs (without optional 'appauthor')") + dirs = AppDirs(appname) + for prop in props: + print("%s: %s" % (prop, getattr(dirs, prop))) + + print("\n-- app dirs (with disabled 'appauthor')") + dirs = AppDirs(appname, appauthor=False) + for prop in props: + print("%s: %s" % (prop, getattr(dirs, prop))) diff --git a/lib/pkg_resources/_vendor/packaging/LICENSE b/lib/pkg_resources/_vendor/packaging/LICENSE new file mode 100644 index 00000000..6f62d44e --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/LICENSE @@ -0,0 +1,3 @@ +This software is made available under the terms of *either* of the licenses +found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made +under the terms of *both* these licenses. diff --git a/lib/pkg_resources/_vendor/packaging/LICENSE.APACHE b/lib/pkg_resources/_vendor/packaging/LICENSE.APACHE new file mode 100644 index 00000000..f433b1a5 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/LICENSE.APACHE @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/lib/pkg_resources/_vendor/packaging/LICENSE.BSD b/lib/pkg_resources/_vendor/packaging/LICENSE.BSD new file mode 100644 index 00000000..42ce7b75 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/LICENSE.BSD @@ -0,0 +1,23 @@ +Copyright (c) Donald Stufft and individual contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/lib/pkg_resources/_vendor/packaging/__about__.py b/lib/pkg_resources/_vendor/packaging/__about__.py new file mode 100644 index 00000000..c359122f --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/__about__.py @@ -0,0 +1,26 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +__all__ = [ + "__title__", + "__summary__", + "__uri__", + "__version__", + "__author__", + "__email__", + "__license__", + "__copyright__", +] + +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" + +__version__ = "21.2" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014-2019 %s" % __author__ diff --git a/lib/pkg_resources/_vendor/packaging/__init__.py b/lib/pkg_resources/_vendor/packaging/__init__.py new file mode 100644 index 00000000..3c50c5dc --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/__init__.py @@ -0,0 +1,25 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +from .__about__ import ( + __author__, + __copyright__, + __email__, + __license__, + __summary__, + __title__, + __uri__, + __version__, +) + +__all__ = [ + "__title__", + "__summary__", + "__uri__", + "__version__", + "__author__", + "__email__", + "__license__", + "__copyright__", +] diff --git a/lib/pkg_resources/_vendor/packaging/_manylinux.py b/lib/pkg_resources/_vendor/packaging/_manylinux.py new file mode 100644 index 00000000..4c379aa6 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/_manylinux.py @@ -0,0 +1,301 @@ +import collections +import functools +import os +import re +import struct +import sys +import warnings +from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple + + +# Python does not provide platform information at sufficient granularity to +# identify the architecture of the running executable in some cases, so we +# determine it dynamically by reading the information from the running +# process. This only applies on Linux, which uses the ELF format. +class _ELFFileHeader: + # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header + class _InvalidELFFileHeader(ValueError): + """ + An invalid ELF file header was found. + """ + + ELF_MAGIC_NUMBER = 0x7F454C46 + ELFCLASS32 = 1 + ELFCLASS64 = 2 + ELFDATA2LSB = 1 + ELFDATA2MSB = 2 + EM_386 = 3 + EM_S390 = 22 + EM_ARM = 40 + EM_X86_64 = 62 + EF_ARM_ABIMASK = 0xFF000000 + EF_ARM_ABI_VER5 = 0x05000000 + EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + def __init__(self, file: IO[bytes]) -> None: + def unpack(fmt: str) -> int: + try: + data = file.read(struct.calcsize(fmt)) + result: Tuple[int, ...] = struct.unpack(fmt, data) + except struct.error: + raise _ELFFileHeader._InvalidELFFileHeader() + return result[0] + + self.e_ident_magic = unpack(">I") + if self.e_ident_magic != self.ELF_MAGIC_NUMBER: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_class = unpack("B") + if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_data = unpack("B") + if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_version = unpack("B") + self.e_ident_osabi = unpack("B") + self.e_ident_abiversion = unpack("B") + self.e_ident_pad = file.read(7) + format_h = "H" + format_i = "I" + format_q = "Q" + format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q + self.e_type = unpack(format_h) + self.e_machine = unpack(format_h) + self.e_version = unpack(format_i) + self.e_entry = unpack(format_p) + self.e_phoff = unpack(format_p) + self.e_shoff = unpack(format_p) + self.e_flags = unpack(format_i) + self.e_ehsize = unpack(format_h) + self.e_phentsize = unpack(format_h) + self.e_phnum = unpack(format_h) + self.e_shentsize = unpack(format_h) + self.e_shnum = unpack(format_h) + self.e_shstrndx = unpack(format_h) + + +def _get_elf_header() -> Optional[_ELFFileHeader]: + try: + with open(sys.executable, "rb") as f: + elf_header = _ELFFileHeader(f) + except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): + return None + return elf_header + + +def _is_linux_armhf() -> bool: + # hard-float ABI can be detected from the ELF header of the running + # process + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + elf_header = _get_elf_header() + if elf_header is None: + return False + result = elf_header.e_ident_class == elf_header.ELFCLASS32 + result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB + result &= elf_header.e_machine == elf_header.EM_ARM + result &= ( + elf_header.e_flags & elf_header.EF_ARM_ABIMASK + ) == elf_header.EF_ARM_ABI_VER5 + result &= ( + elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD + ) == elf_header.EF_ARM_ABI_FLOAT_HARD + return result + + +def _is_linux_i686() -> bool: + elf_header = _get_elf_header() + if elf_header is None: + return False + result = elf_header.e_ident_class == elf_header.ELFCLASS32 + result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB + result &= elf_header.e_machine == elf_header.EM_386 + return result + + +def _have_compatible_abi(arch: str) -> bool: + if arch == "armv7l": + return _is_linux_armhf() + if arch == "i686": + return _is_linux_i686() + return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} + + +# If glibc ever changes its major version, we need to know what the last +# minor version was, so we can build the complete list of all versions. +# For now, guess what the highest minor version might be, assume it will +# be 50 for testing. Once this actually happens, update the dictionary +# with the actual value. +_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50) + + +class _GLibCVersion(NamedTuple): + major: int + minor: int + + +def _glibc_version_string_confstr() -> Optional[str]: + """ + Primary implementation of glibc_version_string using os.confstr. + """ + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module. + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 + try: + # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". + version_string = os.confstr("CS_GNU_LIBC_VERSION") + assert version_string is not None + _, version = version_string.split() + except (AssertionError, AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def _glibc_version_string_ctypes() -> Optional[str]: + """ + Fallback implementation of glibc_version_string using ctypes. + """ + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + # + # We must also handle the special case where the executable is not a + # dynamically linked executable. This can occur when using musl libc, + # for example. In this situation, dlopen() will error, leading to an + # OSError. Interestingly, at least in the case of musl, there is no + # errno set on the OSError. The single string argument used to construct + # OSError comes from libc itself and is therefore not portable to + # hard code here. In any case, failure to call dlopen() means we + # can proceed, so we bail on our attempt. + try: + process_namespace = ctypes.CDLL(None) + except OSError: + return None + + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str: str = gnu_get_libc_version() + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +def _glibc_version_string() -> Optional[str]: + """Returns glibc version string, or None if not using glibc.""" + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() + + +def _parse_glibc_version(version_str: str) -> Tuple[int, int]: + """Parse glibc version. + + We use a regexp instead of str.split because we want to discard any + random junk that might come after the minor version -- this might happen + in patched/forked versions of glibc (e.g. Linaro's version of glibc + uses version strings like "2.20-2014.11"). See gh-3588. + """ + m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) + if not m: + warnings.warn( + "Expected glibc version with 2 components major.minor," + " got: %s" % version_str, + RuntimeWarning, + ) + return -1, -1 + return int(m.group("major")), int(m.group("minor")) + + +@functools.lru_cache() +def _get_glibc_version() -> Tuple[int, int]: + version_str = _glibc_version_string() + if version_str is None: + return (-1, -1) + return _parse_glibc_version(version_str) + + +# From PEP 513, PEP 600 +def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: + sys_glibc = _get_glibc_version() + if sys_glibc < version: + return False + # Check for presence of _manylinux module. + try: + import _manylinux # noqa + except ImportError: + return True + if hasattr(_manylinux, "manylinux_compatible"): + result = _manylinux.manylinux_compatible(version[0], version[1], arch) + if result is not None: + return bool(result) + return True + if version == _GLibCVersion(2, 5): + if hasattr(_manylinux, "manylinux1_compatible"): + return bool(_manylinux.manylinux1_compatible) + if version == _GLibCVersion(2, 12): + if hasattr(_manylinux, "manylinux2010_compatible"): + return bool(_manylinux.manylinux2010_compatible) + if version == _GLibCVersion(2, 17): + if hasattr(_manylinux, "manylinux2014_compatible"): + return bool(_manylinux.manylinux2014_compatible) + return True + + +_LEGACY_MANYLINUX_MAP = { + # CentOS 7 w/ glibc 2.17 (PEP 599) + (2, 17): "manylinux2014", + # CentOS 6 w/ glibc 2.12 (PEP 571) + (2, 12): "manylinux2010", + # CentOS 5 w/ glibc 2.5 (PEP 513) + (2, 5): "manylinux1", +} + + +def platform_tags(linux: str, arch: str) -> Iterator[str]: + if not _have_compatible_abi(arch): + return + # Oldest glibc to be supported regardless of architecture is (2, 17). + too_old_glibc2 = _GLibCVersion(2, 16) + if arch in {"x86_64", "i686"}: + # On x86/i686 also oldest glibc to be supported is (2, 5). + too_old_glibc2 = _GLibCVersion(2, 4) + current_glibc = _GLibCVersion(*_get_glibc_version()) + glibc_max_list = [current_glibc] + # We can assume compatibility across glibc major versions. + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 + # + # Build a list of maximum glibc versions so that we can + # output the canonical list of all glibc from current_glibc + # down to too_old_glibc2, including all intermediary versions. + for glibc_major in range(current_glibc.major - 1, 1, -1): + glibc_minor = _LAST_GLIBC_MINOR[glibc_major] + glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(tag, arch, glibc_version): + yield linux.replace("linux", tag) + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(legacy_tag, arch, glibc_version): + yield linux.replace("linux", legacy_tag) diff --git a/lib/pkg_resources/_vendor/packaging/_musllinux.py b/lib/pkg_resources/_vendor/packaging/_musllinux.py new file mode 100644 index 00000000..85450faf --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/_musllinux.py @@ -0,0 +1,136 @@ +"""PEP 656 support. + +This module implements logic to detect if the currently running Python is +linked against musl, and what musl version is used. +""" + +import contextlib +import functools +import operator +import os +import re +import struct +import subprocess +import sys +from typing import IO, Iterator, NamedTuple, Optional, Tuple + + +def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, f.read(struct.calcsize(fmt))) + + +def _parse_ld_musl_from_elf(f: IO[bytes]) -> Optional[str]: + """Detect musl libc location by parsing the Python executable. + + Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca + ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html + """ + f.seek(0) + try: + ident = _read_unpacked(f, "16B") + except struct.error: + return None + if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF. + return None + f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version. + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, p_fmt, p_idx = { + 1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit. + 2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit. + }[ident[4]] + except KeyError: + return None + else: + p_get = operator.itemgetter(*p_idx) + + # Find the interpreter section and return its content. + try: + _, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt) + except struct.error: + return None + for i in range(e_phnum + 1): + f.seek(e_phoff + e_phentsize * i) + try: + p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt)) + except struct.error: + return None + if p_type != 3: # Not PT_INTERP. + continue + f.seek(p_offset) + interpreter = os.fsdecode(f.read(p_filesz)).strip("\0") + if "musl" not in interpreter: + return None + return interpreter + return None + + +class _MuslVersion(NamedTuple): + major: int + minor: int + + +def _parse_musl_version(output: str) -> Optional[_MuslVersion]: + lines = [n for n in (n.strip() for n in output.splitlines()) if n] + if len(lines) < 2 or lines[0][:4] != "musl": + return None + m = re.match(r"Version (\d+)\.(\d+)", lines[1]) + if not m: + return None + return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2))) + + +@functools.lru_cache() +def _get_musl_version(executable: str) -> Optional[_MuslVersion]: + """Detect currently-running musl runtime version. + + This is done by checking the specified executable's dynamic linking + information, and invoking the loader to parse its output for a version + string. If the loader is musl, the output would be something like:: + + musl libc (x86_64) + Version 1.2.2 + Dynamic Program Loader + """ + with contextlib.ExitStack() as stack: + try: + f = stack.enter_context(open(executable, "rb")) + except IOError: + return None + ld = _parse_ld_musl_from_elf(f) + if not ld: + return None + proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True) + return _parse_musl_version(proc.stderr) + + +def platform_tags(arch: str) -> Iterator[str]: + """Generate musllinux tags compatible to the current platform. + + :param arch: Should be the part of platform tag after the ``linux_`` + prefix, e.g. ``x86_64``. The ``linux_`` prefix is assumed as a + prerequisite for the current platform to be musllinux-compatible. + + :returns: An iterator of compatible musllinux tags. + """ + sys_musl = _get_musl_version(sys.executable) + if sys_musl is None: # Python not dynamically linked against musl. + return + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + + +if __name__ == "__main__": # pragma: no cover + import sysconfig + + plat = sysconfig.get_platform() + assert plat.startswith("linux-"), "not linux" + + print("plat:", plat) + print("musl:", _get_musl_version(sys.executable)) + print("tags:", end=" ") + for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])): + print(t, end="\n ") diff --git a/lib/pkg_resources/_vendor/packaging/_structures.py b/lib/pkg_resources/_vendor/packaging/_structures.py new file mode 100644 index 00000000..95154975 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/_structures.py @@ -0,0 +1,67 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + + +class InfinityType: + def __repr__(self) -> str: + return "Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return False + + def __le__(self, other: object) -> bool: + return False + + def __eq__(self, other: object) -> bool: + return isinstance(other, self.__class__) + + def __ne__(self, other: object) -> bool: + return not isinstance(other, self.__class__) + + def __gt__(self, other: object) -> bool: + return True + + def __ge__(self, other: object) -> bool: + return True + + def __neg__(self: object) -> "NegativeInfinityType": + return NegativeInfinity + + +Infinity = InfinityType() + + +class NegativeInfinityType: + def __repr__(self) -> str: + return "-Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return True + + def __le__(self, other: object) -> bool: + return True + + def __eq__(self, other: object) -> bool: + return isinstance(other, self.__class__) + + def __ne__(self, other: object) -> bool: + return not isinstance(other, self.__class__) + + def __gt__(self, other: object) -> bool: + return False + + def __ge__(self, other: object) -> bool: + return False + + def __neg__(self: object) -> InfinityType: + return Infinity + + +NegativeInfinity = NegativeInfinityType() diff --git a/lib/pkg_resources/_vendor/packaging/markers.py b/lib/pkg_resources/_vendor/packaging/markers.py new file mode 100644 index 00000000..18769b09 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/markers.py @@ -0,0 +1,304 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import operator +import os +import platform +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +from pkg_resources.extern.pyparsing import ( # noqa: N817 + Forward, + Group, + Literal as L, + ParseException, + ParseResults, + QuotedString, + ZeroOrMore, + stringEnd, + stringStart, +) + +from .specifiers import InvalidSpecifier, Specifier + +__all__ = [ + "InvalidMarker", + "UndefinedComparison", + "UndefinedEnvironmentName", + "Marker", + "default_environment", +] + +Operator = Callable[[str, str], bool] + + +class InvalidMarker(ValueError): + """ + An invalid marker was found, users should refer to PEP 508. + """ + + +class UndefinedComparison(ValueError): + """ + An invalid operation was attempted on a value that doesn't support it. + """ + + +class UndefinedEnvironmentName(ValueError): + """ + A name was attempted to be used that does not exist inside of the + environment. + """ + + +class Node: + def __init__(self, value: Any) -> None: + self.value = value + + def __str__(self) -> str: + return str(self.value) + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +VARIABLE = ( + L("implementation_version") + | L("platform_python_implementation") + | L("implementation_name") + | L("python_full_version") + | L("platform_release") + | L("platform_version") + | L("platform_machine") + | L("platform_system") + | L("python_version") + | L("sys_platform") + | L("os_name") + | L("os.name") # PEP-345 + | L("sys.platform") # PEP-345 + | L("platform.version") # PEP-345 + | L("platform.machine") # PEP-345 + | L("platform.python_implementation") # PEP-345 + | L("python_implementation") # undocumented setuptools legacy + | L("extra") # PEP-508 +) +ALIASES = { + "os.name": "os_name", + "sys.platform": "sys_platform", + "platform.version": "platform_version", + "platform.machine": "platform_machine", + "platform.python_implementation": "platform_python_implementation", + "python_implementation": "platform_python_implementation", +} +VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) + +VERSION_CMP = ( + L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") +) + +MARKER_OP = VERSION_CMP | L("not in") | L("in") +MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) + +MARKER_VALUE = QuotedString("'") | QuotedString('"') +MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) + +BOOLOP = L("and") | L("or") + +MARKER_VAR = VARIABLE | MARKER_VALUE + +MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) +MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) + +LPAREN = L("(").suppress() +RPAREN = L(")").suppress() + +MARKER_EXPR = Forward() +MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) +MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) + +MARKER = stringStart + MARKER_EXPR + stringEnd + + +def _coerce_parse_result(results: Union[ParseResults, List[Any]]) -> List[Any]: + if isinstance(results, ParseResults): + return [_coerce_parse_result(i) for i in results] + else: + return results + + +def _format_marker( + marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True +) -> str: + + assert isinstance(marker, (list, tuple, str)) + + # Sometimes we have a structure like [[...]] which is a single item list + # where the single item is itself it's own list. In that case we want skip + # the rest of this function so that we don't get extraneous () on the + # outside. + if ( + isinstance(marker, list) + and len(marker) == 1 + and isinstance(marker[0], (list, tuple)) + ): + return _format_marker(marker[0]) + + if isinstance(marker, list): + inner = (_format_marker(m, first=False) for m in marker) + if first: + return " ".join(inner) + else: + return "(" + " ".join(inner) + ")" + elif isinstance(marker, tuple): + return " ".join([m.serialize() for m in marker]) + else: + return marker + + +_operators: Dict[str, Operator] = { + "in": lambda lhs, rhs: lhs in rhs, + "not in": lambda lhs, rhs: lhs not in rhs, + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + "!=": operator.ne, + ">=": operator.ge, + ">": operator.gt, +} + + +def _eval_op(lhs: str, op: Op, rhs: str) -> bool: + try: + spec = Specifier("".join([op.serialize(), rhs])) + except InvalidSpecifier: + pass + else: + return spec.contains(lhs) + + oper: Optional[Operator] = _operators.get(op.serialize()) + if oper is None: + raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.") + + return oper(lhs, rhs) + + +class Undefined: + pass + + +_undefined = Undefined() + + +def _get_env(environment: Dict[str, str], name: str) -> str: + value: Union[str, Undefined] = environment.get(name, _undefined) + + if isinstance(value, Undefined): + raise UndefinedEnvironmentName( + f"{name!r} does not exist in evaluation environment." + ) + + return value + + +def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: + groups: List[List[bool]] = [[]] + + for marker in markers: + assert isinstance(marker, (list, tuple, str)) + + if isinstance(marker, list): + groups[-1].append(_evaluate_markers(marker, environment)) + elif isinstance(marker, tuple): + lhs, op, rhs = marker + + if isinstance(lhs, Variable): + lhs_value = _get_env(environment, lhs.value) + rhs_value = rhs.value + else: + lhs_value = lhs.value + rhs_value = _get_env(environment, rhs.value) + + groups[-1].append(_eval_op(lhs_value, op, rhs_value)) + else: + assert marker in ["and", "or"] + if marker == "or": + groups.append([]) + + return any(all(item) for item in groups) + + +def format_full_version(info: "sys._version_info") -> str: + version = "{0.major}.{0.minor}.{0.micro}".format(info) + kind = info.releaselevel + if kind != "final": + version += kind[0] + str(info.serial) + return version + + +def default_environment() -> Dict[str, str]: + iver = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name + return { + "implementation_name": implementation_name, + "implementation_version": iver, + "os_name": os.name, + "platform_machine": platform.machine(), + "platform_release": platform.release(), + "platform_system": platform.system(), + "platform_version": platform.version(), + "python_full_version": platform.python_version(), + "platform_python_implementation": platform.python_implementation(), + "python_version": ".".join(platform.python_version_tuple()[:2]), + "sys_platform": sys.platform, + } + + +class Marker: + def __init__(self, marker: str) -> None: + try: + self._markers = _coerce_parse_result(MARKER.parseString(marker)) + except ParseException as e: + raise InvalidMarker( + f"Invalid marker: {marker!r}, parse error at " + f"{marker[e.loc : e.loc + 8]!r}" + ) + + def __str__(self) -> str: + return _format_marker(self._markers) + + def __repr__(self) -> str: + return f"" + + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: + """Evaluate a marker. + + Return the boolean from evaluating the given marker against the + environment. environment is an optional argument to override all or + part of the determined environment. + + The environment is determined from the current Python process. + """ + current_environment = default_environment() + if environment is not None: + current_environment.update(environment) + + return _evaluate_markers(self._markers, current_environment) diff --git a/lib/pkg_resources/_vendor/packaging/py.typed b/lib/pkg_resources/_vendor/packaging/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/_vendor/packaging/requirements.py b/lib/pkg_resources/_vendor/packaging/requirements.py new file mode 100644 index 00000000..6af14ec4 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/requirements.py @@ -0,0 +1,146 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import re +import string +import urllib.parse +from typing import List, Optional as TOptional, Set + +from pkg_resources.extern.pyparsing import ( # noqa + Combine, + Literal as L, + Optional, + ParseException, + Regex, + Word, + ZeroOrMore, + originalTextFor, + stringEnd, + stringStart, +) + +from .markers import MARKER_EXPR, Marker +from .specifiers import LegacySpecifier, Specifier, SpecifierSet + + +class InvalidRequirement(ValueError): + """ + An invalid requirement was found, users should refer to PEP 508. + """ + + +ALPHANUM = Word(string.ascii_letters + string.digits) + +LBRACKET = L("[").suppress() +RBRACKET = L("]").suppress() +LPAREN = L("(").suppress() +RPAREN = L(")").suppress() +COMMA = L(",").suppress() +SEMICOLON = L(";").suppress() +AT = L("@").suppress() + +PUNCTUATION = Word("-_.") +IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) +IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) + +NAME = IDENTIFIER("name") +EXTRA = IDENTIFIER + +URI = Regex(r"[^ ]+")("url") +URL = AT + URI + +EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) +EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") + +VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) +VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) + +VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY +VERSION_MANY = Combine( + VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False +)("_raw_spec") +_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY) +_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") + +VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") +VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) + +MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker") +MARKER_EXPR.setParseAction( + lambda s, l, t: Marker(s[t._original_start : t._original_end]) +) +MARKER_SEPARATOR = SEMICOLON +MARKER = MARKER_SEPARATOR + MARKER_EXPR + +VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) +URL_AND_MARKER = URL + Optional(MARKER) + +NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) + +REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd +# pkg_resources.extern.pyparsing isn't thread safe during initialization, so we do it eagerly, see +# issue #104 +REQUIREMENT.parseString("x[]") + + +class Requirement: + """Parse a requirement. + + Parse a given requirement string into its parts, such as name, specifier, + URL, and extras. Raises InvalidRequirement on a badly-formed requirement + string. + """ + + # TODO: Can we test whether something is contained within a requirement? + # If so how do we do that? Do we need to test against the _name_ of + # the thing as well as the version? What about the markers? + # TODO: Can we normalize the name and extra name? + + def __init__(self, requirement_string: str) -> None: + try: + req = REQUIREMENT.parseString(requirement_string) + except ParseException as e: + raise InvalidRequirement( + f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}' + ) + + self.name: str = req.name + if req.url: + parsed_url = urllib.parse.urlparse(req.url) + if parsed_url.scheme == "file": + if urllib.parse.urlunparse(parsed_url) != req.url: + raise InvalidRequirement("Invalid URL given") + elif not (parsed_url.scheme and parsed_url.netloc) or ( + not parsed_url.scheme and not parsed_url.netloc + ): + raise InvalidRequirement(f"Invalid URL: {req.url}") + self.url: TOptional[str] = req.url + else: + self.url = None + self.extras: Set[str] = set(req.extras.asList() if req.extras else []) + self.specifier: SpecifierSet = SpecifierSet(req.specifier) + self.marker: TOptional[Marker] = req.marker if req.marker else None + + def __str__(self) -> str: + parts: List[str] = [self.name] + + if self.extras: + formatted_extras = ",".join(sorted(self.extras)) + parts.append(f"[{formatted_extras}]") + + if self.specifier: + parts.append(str(self.specifier)) + + if self.url: + parts.append(f"@ {self.url}") + if self.marker: + parts.append(" ") + + if self.marker: + parts.append(f"; {self.marker}") + + return "".join(parts) + + def __repr__(self) -> str: + return f"" diff --git a/lib/pkg_resources/_vendor/packaging/specifiers.py b/lib/pkg_resources/_vendor/packaging/specifiers.py new file mode 100644 index 00000000..ce66bd4a --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/specifiers.py @@ -0,0 +1,828 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import abc +import functools +import itertools +import re +import warnings +from typing import ( + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + Pattern, + Set, + Tuple, + TypeVar, + Union, +) + +from .utils import canonicalize_version +from .version import LegacyVersion, Version, parse + +ParsedVersion = Union[Version, LegacyVersion] +UnparsedVersion = Union[Version, LegacyVersion, str] +VersionTypeVar = TypeVar("VersionTypeVar", bound=UnparsedVersion) +CallableOperator = Callable[[ParsedVersion, str], bool] + + +class InvalidSpecifier(ValueError): + """ + An invalid specifier was found, users should refer to PEP 440. + """ + + +class BaseSpecifier(metaclass=abc.ABCMeta): + @abc.abstractmethod + def __str__(self) -> str: + """ + Returns the str representation of this Specifier like object. This + should be representative of the Specifier itself. + """ + + @abc.abstractmethod + def __hash__(self) -> int: + """ + Returns a hash value for this Specifier like object. + """ + + @abc.abstractmethod + def __eq__(self, other: object) -> bool: + """ + Returns a boolean representing whether or not the two Specifier like + objects are equal. + """ + + @abc.abstractmethod + def __ne__(self, other: object) -> bool: + """ + Returns a boolean representing whether or not the two Specifier like + objects are not equal. + """ + + @abc.abstractproperty + def prereleases(self) -> Optional[bool]: + """ + Returns whether or not pre-releases as a whole are allowed by this + specifier. + """ + + @prereleases.setter + def prereleases(self, value: bool) -> None: + """ + Sets whether or not pre-releases as a whole are allowed by this + specifier. + """ + + @abc.abstractmethod + def contains(self, item: str, prereleases: Optional[bool] = None) -> bool: + """ + Determines if the given item is contained within this specifier. + """ + + @abc.abstractmethod + def filter( + self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None + ) -> Iterable[VersionTypeVar]: + """ + Takes an iterable of items and filters them so that only items which + are contained within this specifier are allowed in it. + """ + + +class _IndividualSpecifier(BaseSpecifier): + + _operators: Dict[str, str] = {} + _regex: Pattern[str] + + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + def __repr__(self) -> str: + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return "<{}({!r}{})>".format(self.__class__.__name__, str(self), pre) + + def __str__(self) -> str: + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + return self._spec[0], canonicalize_version(self._spec[1]) + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def __ne__(self, other: object) -> bool: + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._spec != other._spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _coerce_version(self, version: UnparsedVersion) -> ParsedVersion: + if not isinstance(version, (LegacyVersion, Version)): + version = parse(version) + return version + + @property + def operator(self) -> str: + return self._spec[0] + + @property + def version(self) -> str: + return self._spec[1] + + @property + def prereleases(self) -> Optional[bool]: + return self._prereleases + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + def __contains__(self, item: str) -> bool: + return self.contains(item) + + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version or LegacyVersion, this allows us to have + # a shortcut for ``"2.0" in Specifier(">=2") + normalized_item = self._coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None + ) -> Iterable[VersionTypeVar]: + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = self._coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later in case nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version + + +class LegacySpecifier(_IndividualSpecifier): + + _regex_str = r""" + (?P(==|!=|<=|>=|<|>)) + \s* + (?P + [^,;\s)]* # Since this is a "legacy" specifier, and the version + # string can be just about anything, we match everything + # except for whitespace, a semi-colon for marker support, + # a closing paren since versions can be enclosed in + # them, and a comma since it's a version separator. + ) + """ + + _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + + _operators = { + "==": "equal", + "!=": "not_equal", + "<=": "less_than_equal", + ">=": "greater_than_equal", + "<": "less_than", + ">": "greater_than", + } + + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + super().__init__(spec, prereleases) + + warnings.warn( + "Creating a LegacyVersion has been deprecated and will be " + "removed in the next major release", + DeprecationWarning, + ) + + def _coerce_version(self, version: UnparsedVersion) -> LegacyVersion: + if not isinstance(version, LegacyVersion): + version = LegacyVersion(str(version)) + return version + + def _compare_equal(self, prospective: LegacyVersion, spec: str) -> bool: + return prospective == self._coerce_version(spec) + + def _compare_not_equal(self, prospective: LegacyVersion, spec: str) -> bool: + return prospective != self._coerce_version(spec) + + def _compare_less_than_equal(self, prospective: LegacyVersion, spec: str) -> bool: + return prospective <= self._coerce_version(spec) + + def _compare_greater_than_equal( + self, prospective: LegacyVersion, spec: str + ) -> bool: + return prospective >= self._coerce_version(spec) + + def _compare_less_than(self, prospective: LegacyVersion, spec: str) -> bool: + return prospective < self._coerce_version(spec) + + def _compare_greater_than(self, prospective: LegacyVersion, spec: str) -> bool: + return prospective > self._coerce_version(spec) + + +def _require_version_compare( + fn: Callable[["Specifier", ParsedVersion, str], bool] +) -> Callable[["Specifier", ParsedVersion, str], bool]: + @functools.wraps(fn) + def wrapped(self: "Specifier", prospective: ParsedVersion, spec: str) -> bool: + if not isinstance(prospective, Version): + return False + return fn(self, prospective, spec) + + return wrapped + + +class Specifier(_IndividualSpecifier): + + _regex_str = r""" + (?P(~=|==|!=|<=|>=|<|>|===)) + (?P + (?: + # The identity operators allow for an escape hatch that will + # do an exact string match of the version you wish to install. + # This will not be parsed by PEP 440 and we cannot determine + # any semantic meaning from it. This operator is discouraged + # but included entirely as an escape hatch. + (?<====) # Only match for the identity operator + \s* + [^\s]* # We just match everything, except for whitespace + # since we are only testing for strict identity. + ) + | + (?: + # The (non)equality operators allow for wild card and local + # versions to be specified so we have to define these two + # operators separately to enable that. + (?<===|!=) # Only match for equals and not equals + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)* # release + (?: # pre release + [-_\.]? + (a|b|c|rc|alpha|beta|pre|preview) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + + # You cannot use a wild card and a dev or local version + # together so group them with a | and make them optional. + (?: + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local + | + \.\* # Wild card syntax of .* + )? + ) + | + (?: + # The compatible operator requires at least two digits in the + # release segment. + (?<=~=) # Only match for the compatible operator + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) + (?: # pre release + [-_\.]? + (a|b|c|rc|alpha|beta|pre|preview) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + ) + | + (?: + # All other operators only allow a sub set of what the + # (non)equality operators do. Specifically they do not allow + # local versions to be specified nor do they allow the prefix + # matching wild cards. + (?=": "greater_than_equal", + "<": "less_than", + ">": "greater_than", + "===": "arbitrary", + } + + @_require_version_compare + def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: + + # Compatible releases have an equivalent combination of >= and ==. That + # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to + # implement this in terms of the other specifiers instead of + # implementing it ourselves. The only thing we need to do is construct + # the other specifiers. + + # We want everything but the last item in the version, but we want to + # ignore suffix segments. + prefix = ".".join( + list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1] + ) + + # Add the prefix notation to the end of our string + prefix += ".*" + + return self._get_operator(">=")(prospective, spec) and self._get_operator("==")( + prospective, prefix + ) + + @_require_version_compare + def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: + + # We need special logic to handle prefix matching + if spec.endswith(".*"): + # In the case of prefix matching we want to ignore local segment. + prospective = Version(prospective.public) + # Split the spec out by dots, and pretend that there is an implicit + # dot in between a release segment and a pre-release segment. + split_spec = _version_split(spec[:-2]) # Remove the trailing .* + + # Split the prospective version out by dots, and pretend that there + # is an implicit dot in between a release segment and a pre-release + # segment. + split_prospective = _version_split(str(prospective)) + + # Shorten the prospective version to be the same length as the spec + # so that we can determine if the specifier is a prefix of the + # prospective version or not. + shortened_prospective = split_prospective[: len(split_spec)] + + # Pad out our two sides with zeros so that they both equal the same + # length. + padded_spec, padded_prospective = _pad_version( + split_spec, shortened_prospective + ) + + return padded_prospective == padded_spec + else: + # Convert our spec string into a Version + spec_version = Version(spec) + + # If the specifier does not have a local segment, then we want to + # act as if the prospective version also does not have a local + # segment. + if not spec_version.local: + prospective = Version(prospective.public) + + return prospective == spec_version + + @_require_version_compare + def _compare_not_equal(self, prospective: ParsedVersion, spec: str) -> bool: + return not self._compare_equal(prospective, spec) + + @_require_version_compare + def _compare_less_than_equal(self, prospective: ParsedVersion, spec: str) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) <= Version(spec) + + @_require_version_compare + def _compare_greater_than_equal( + self, prospective: ParsedVersion, spec: str + ) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) >= Version(spec) + + @_require_version_compare + def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is less than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective < spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a pre-release version, that we do not accept pre-release + # versions for the version mentioned in the specifier (e.g. <3.1 should + # not match 3.1.dev0, but should match 3.0.dev0). + if not spec.is_prerelease and prospective.is_prerelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # less than the spec version *and* it's not a pre-release of the same + # version in the spec. + return True + + @_require_version_compare + def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is greater than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective > spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a post-release version, that we do not accept + # post-release versions for the version mentioned in the specifier + # (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0). + if not spec.is_postrelease and prospective.is_postrelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # Ensure that we do not allow a local version of the version mentioned + # in the specifier, which is technically greater than, to match. + if prospective.local is not None: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # greater than the spec version *and* it's not a pre-release of the + # same version in the spec. + return True + + def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: + return str(prospective).lower() == str(spec).lower() + + @property + def prereleases(self) -> bool: + + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if parse(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + +_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") + + +def _version_split(version: str) -> List[str]: + result: List[str] = [] + for item in version.split("."): + match = _prefix_regex.search(item) + if match: + result.extend(match.groups()) + else: + result.append(item) + return result + + +def _is_not_suffix(segment: str) -> bool: + return not any( + segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post") + ) + + +def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str]]: + left_split, right_split = [], [] + + # Get the release segment of our versions + left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left))) + right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right))) + + # Get the rest of our versions + left_split.append(left[len(left_split[0]) :]) + right_split.append(right[len(right_split[0]) :]) + + # Insert our padding + left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0]))) + right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0]))) + + return (list(itertools.chain(*left_split)), list(itertools.chain(*right_split))) + + +class SpecifierSet(BaseSpecifier): + def __init__( + self, specifiers: str = "", prereleases: Optional[bool] = None + ) -> None: + + # Split on , to break each individual specifier into it's own item, and + # strip each item to remove leading/trailing whitespace. + split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] + + # Parsed each individual specifier, attempting first to make it a + # Specifier and falling back to a LegacySpecifier. + parsed: Set[_IndividualSpecifier] = set() + for specifier in split_specifiers: + try: + parsed.add(Specifier(specifier)) + except InvalidSpecifier: + parsed.add(LegacySpecifier(specifier)) + + # Turn our parsed specifiers into a frozen set and save them for later. + self._specs = frozenset(parsed) + + # Store our prereleases value so we can use it later to determine if + # we accept prereleases or not. + self._prereleases = prereleases + + def __repr__(self) -> str: + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return "".format(str(self), pre) + + def __str__(self) -> str: + return ",".join(sorted(str(s) for s in self._specs)) + + def __hash__(self) -> int: + return hash(self._specs) + + def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + if isinstance(other, str): + other = SpecifierSet(other) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + specifier = SpecifierSet() + specifier._specs = frozenset(self._specs | other._specs) + + if self._prereleases is None and other._prereleases is not None: + specifier._prereleases = other._prereleases + elif self._prereleases is not None and other._prereleases is None: + specifier._prereleases = self._prereleases + elif self._prereleases == other._prereleases: + specifier._prereleases = self._prereleases + else: + raise ValueError( + "Cannot combine SpecifierSets with True and False prerelease " + "overrides." + ) + + return specifier + + def __eq__(self, other: object) -> bool: + if isinstance(other, (str, _IndividualSpecifier)): + other = SpecifierSet(str(other)) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + return self._specs == other._specs + + def __ne__(self, other: object) -> bool: + if isinstance(other, (str, _IndividualSpecifier)): + other = SpecifierSet(str(other)) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + return self._specs != other._specs + + def __len__(self) -> int: + return len(self._specs) + + def __iter__(self) -> Iterator[_IndividualSpecifier]: + return iter(self._specs) + + @property + def prereleases(self) -> Optional[bool]: + + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + def __contains__(self, item: UnparsedVersion) -> bool: + return self.contains(item) + + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + + # Ensure that our item is a Version or LegacyVersion instance. + if not isinstance(item, (LegacyVersion, Version)): + item = parse(item) + + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # We can determine if we're going to allow pre-releases by looking to + # see if any of the underlying items supports them. If none of them do + # and this item is a pre-release then we do not allow it and we can + # short circuit that here. + # Note: This means that 1.0.dev1 would not be contained in something + # like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0 + if not prereleases and item.is_prerelease: + return False + + # We simply dispatch to the underlying specs here to make sure that the + # given version is contained within all of them. + # Note: This use of all() here means that an empty set of specifiers + # will always return True, this is an explicit design decision. + return all(s.contains(item, prereleases=prereleases) for s in self._specs) + + def filter( + self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None + ) -> Iterable[VersionTypeVar]: + + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # If we have any specifiers, then we want to wrap our iterable in the + # filter method for each one, this will act as a logical AND amongst + # each specifier. + if self._specs: + for spec in self._specs: + iterable = spec.filter(iterable, prereleases=bool(prereleases)) + return iterable + # If we do not have any specifiers, then we need to have a rough filter + # which will filter out any pre-releases, unless there are no final + # releases, and which will filter out LegacyVersion in general. + else: + filtered: List[VersionTypeVar] = [] + found_prereleases: List[VersionTypeVar] = [] + + item: UnparsedVersion + parsed_version: Union[Version, LegacyVersion] + + for item in iterable: + # Ensure that we some kind of Version class for this item. + if not isinstance(item, (LegacyVersion, Version)): + parsed_version = parse(item) + else: + parsed_version = item + + # Filter out any item which is parsed as a LegacyVersion + if isinstance(parsed_version, LegacyVersion): + continue + + # Store any item which is a pre-release for later unless we've + # already found a final version or we are accepting prereleases + if parsed_version.is_prerelease and not prereleases: + if not filtered: + found_prereleases.append(item) + else: + filtered.append(item) + + # If we've found no items except for pre-releases, then we'll go + # ahead and use the pre-releases + if not filtered and found_prereleases and prereleases is None: + return found_prereleases + + return filtered diff --git a/lib/pkg_resources/_vendor/packaging/tags.py b/lib/pkg_resources/_vendor/packaging/tags.py new file mode 100644 index 00000000..e65890a9 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/tags.py @@ -0,0 +1,484 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import logging +import platform +import sys +import sysconfig +from importlib.machinery import EXTENSION_SUFFIXES +from typing import ( + Dict, + FrozenSet, + Iterable, + Iterator, + List, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from . import _manylinux, _musllinux + +logger = logging.getLogger(__name__) + +PythonVersion = Sequence[int] +MacVersion = Tuple[int, int] + +INTERPRETER_SHORT_NAMES: Dict[str, str] = { + "python": "py", # Generic. + "cpython": "cp", + "pypy": "pp", + "ironpython": "ip", + "jython": "jy", +} + + +_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32 + + +class Tag: + """ + A representation of the tag triple for a wheel. + + Instances are considered immutable and thus are hashable. Equality checking + is also supported. + """ + + __slots__ = ["_interpreter", "_abi", "_platform", "_hash"] + + def __init__(self, interpreter: str, abi: str, platform: str) -> None: + self._interpreter = interpreter.lower() + self._abi = abi.lower() + self._platform = platform.lower() + # The __hash__ of every single element in a Set[Tag] will be evaluated each time + # that a set calls its `.disjoint()` method, which may be called hundreds of + # times when scanning a page of links for packages with tags matching that + # Set[Tag]. Pre-computing the value here produces significant speedups for + # downstream consumers. + self._hash = hash((self._interpreter, self._abi, self._platform)) + + @property + def interpreter(self) -> str: + return self._interpreter + + @property + def abi(self) -> str: + return self._abi + + @property + def platform(self) -> str: + return self._platform + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Tag): + return NotImplemented + + return ( + (self._hash == other._hash) # Short-circuit ASAP for perf reasons. + and (self._platform == other._platform) + and (self._abi == other._abi) + and (self._interpreter == other._interpreter) + ) + + def __hash__(self) -> int: + return self._hash + + def __str__(self) -> str: + return f"{self._interpreter}-{self._abi}-{self._platform}" + + def __repr__(self) -> str: + return "<{self} @ {self_id}>".format(self=self, self_id=id(self)) + + +def parse_tag(tag: str) -> FrozenSet[Tag]: + """ + Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances. + + Returning a set is required due to the possibility that the tag is a + compressed tag set. + """ + tags = set() + interpreters, abis, platforms = tag.split("-") + for interpreter in interpreters.split("."): + for abi in abis.split("."): + for platform_ in platforms.split("."): + tags.add(Tag(interpreter, abi, platform_)) + return frozenset(tags) + + +def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: + value = sysconfig.get_config_var(name) + if value is None and warn: + logger.debug( + "Config variable '%s' is unset, Python ABI tag may be incorrect", name + ) + return value + + +def _normalize_string(string: str) -> str: + return string.replace(".", "_").replace("-", "_") + + +def _abi3_applies(python_version: PythonVersion) -> bool: + """ + Determine if the Python version supports abi3. + + PEP 384 was first implemented in Python 3.2. + """ + return len(python_version) > 1 and tuple(python_version) >= (3, 2) + + +def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]: + py_version = tuple(py_version) # To allow for version comparison. + abis = [] + version = _version_nodot(py_version[:2]) + debug = pymalloc = ucs4 = "" + with_debug = _get_config_var("Py_DEBUG", warn) + has_refcount = hasattr(sys, "gettotalrefcount") + # Windows doesn't set Py_DEBUG, so checking for support of debug-compiled + # extension modules is the best option. + # https://github.com/pypa/pip/issues/3383#issuecomment-173267692 + has_ext = "_d.pyd" in EXTENSION_SUFFIXES + if with_debug or (with_debug is None and (has_refcount or has_ext)): + debug = "d" + if py_version < (3, 8): + with_pymalloc = _get_config_var("WITH_PYMALLOC", warn) + if with_pymalloc or with_pymalloc is None: + pymalloc = "m" + if py_version < (3, 3): + unicode_size = _get_config_var("Py_UNICODE_SIZE", warn) + if unicode_size == 4 or ( + unicode_size is None and sys.maxunicode == 0x10FFFF + ): + ucs4 = "u" + elif debug: + # Debug builds can also load "normal" extension modules. + # We can also assume no UCS-4 or pymalloc requirement. + abis.append(f"cp{version}") + abis.insert( + 0, + "cp{version}{debug}{pymalloc}{ucs4}".format( + version=version, debug=debug, pymalloc=pymalloc, ucs4=ucs4 + ), + ) + return abis + + +def cpython_tags( + python_version: Optional[PythonVersion] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a CPython interpreter. + + The tags consist of: + - cp-- + - cp-abi3- + - cp-none- + - cp-abi3- # Older Python versions down to 3.2. + + If python_version only specifies a major version then user-provided ABIs and + the 'none' ABItag will be used. + + If 'abi3' or 'none' are specified in 'abis' then they will be yielded at + their normal position and not at the beginning. + """ + if not python_version: + python_version = sys.version_info[:2] + + interpreter = "cp{}".format(_version_nodot(python_version[:2])) + + if abis is None: + if len(python_version) > 1: + abis = _cpython_abis(python_version, warn) + else: + abis = [] + abis = list(abis) + # 'abi3' and 'none' are explicitly handled later. + for explicit_abi in ("abi3", "none"): + try: + abis.remove(explicit_abi) + except ValueError: + pass + + platforms = list(platforms or platform_tags()) + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + if _abi3_applies(python_version): + yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms) + yield from (Tag(interpreter, "none", platform_) for platform_ in platforms) + + if _abi3_applies(python_version): + for minor_version in range(python_version[1] - 1, 1, -1): + for platform_ in platforms: + interpreter = "cp{version}".format( + version=_version_nodot((python_version[0], minor_version)) + ) + yield Tag(interpreter, "abi3", platform_) + + +def _generic_abi() -> Iterator[str]: + abi = sysconfig.get_config_var("SOABI") + if abi: + yield _normalize_string(abi) + + +def generic_tags( + interpreter: Optional[str] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a generic interpreter. + + The tags consist of: + - -- + + The "none" ABI will be added if it was not explicitly provided. + """ + if not interpreter: + interp_name = interpreter_name() + interp_version = interpreter_version(warn=warn) + interpreter = "".join([interp_name, interp_version]) + if abis is None: + abis = _generic_abi() + platforms = list(platforms or platform_tags()) + abis = list(abis) + if "none" not in abis: + abis.append("none") + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + +def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]: + """ + Yields Python versions in descending order. + + After the latest version, the major-only version will be yielded, and then + all previous versions of that major version. + """ + if len(py_version) > 1: + yield "py{version}".format(version=_version_nodot(py_version[:2])) + yield "py{major}".format(major=py_version[0]) + if len(py_version) > 1: + for minor in range(py_version[1] - 1, -1, -1): + yield "py{version}".format(version=_version_nodot((py_version[0], minor))) + + +def compatible_tags( + python_version: Optional[PythonVersion] = None, + interpreter: Optional[str] = None, + platforms: Optional[Iterable[str]] = None, +) -> Iterator[Tag]: + """ + Yields the sequence of tags that are compatible with a specific version of Python. + + The tags consist of: + - py*-none- + - -none-any # ... if `interpreter` is provided. + - py*-none-any + """ + if not python_version: + python_version = sys.version_info[:2] + platforms = list(platforms or platform_tags()) + for version in _py_interpreter_range(python_version): + for platform_ in platforms: + yield Tag(version, "none", platform_) + if interpreter: + yield Tag(interpreter, "none", "any") + for version in _py_interpreter_range(python_version): + yield Tag(version, "none", "any") + + +def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str: + if not is_32bit: + return arch + + if arch.startswith("ppc"): + return "ppc" + + return "i386" + + +def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]: + formats = [cpu_arch] + if cpu_arch == "x86_64": + if version < (10, 4): + return [] + formats.extend(["intel", "fat64", "fat32"]) + + elif cpu_arch == "i386": + if version < (10, 4): + return [] + formats.extend(["intel", "fat32", "fat"]) + + elif cpu_arch == "ppc64": + # TODO: Need to care about 32-bit PPC for ppc64 through 10.2? + if version > (10, 5) or version < (10, 4): + return [] + formats.append("fat64") + + elif cpu_arch == "ppc": + if version > (10, 6): + return [] + formats.extend(["fat32", "fat"]) + + if cpu_arch in {"arm64", "x86_64"}: + formats.append("universal2") + + if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}: + formats.append("universal") + + return formats + + +def mac_platforms( + version: Optional[MacVersion] = None, arch: Optional[str] = None +) -> Iterator[str]: + """ + Yields the platform tags for a macOS system. + + The `version` parameter is a two-item tuple specifying the macOS version to + generate platform tags for. The `arch` parameter is the CPU architecture to + generate platform tags for. Both parameters default to the appropriate value + for the current system. + """ + version_str, _, cpu_arch = platform.mac_ver() + if version is None: + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + else: + version = version + if arch is None: + arch = _mac_arch(cpu_arch) + else: + arch = arch + + if (10, 0) <= version and version < (11, 0): + # Prior to Mac OS 11, each yearly release of Mac OS bumped the + # "minor" version number. The major version was always 10. + for minor_version in range(version[1], -1, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=10, minor=minor_version, binary_format=binary_format + ) + + if version >= (11, 0): + # Starting with Mac OS 11, each yearly release bumps the major version + # number. The minor versions are now the midyear updates. + for major_version in range(version[0], 10, -1): + compat_version = major_version, 0 + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=major_version, minor=0, binary_format=binary_format + ) + + if version >= (11, 0): + # Mac OS 11 on x86_64 is compatible with binaries from previous releases. + # Arm64 support was introduced in 11.0, so no Arm binaries from previous + # releases exist. + # + # However, the "universal2" binary format can have a + # macOS version earlier than 11.0 when the x86_64 part of the binary supports + # that version of macOS. + if arch == "x86_64": + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + else: + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_format = "universal2" + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + + +def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: + linux = _normalize_string(sysconfig.get_platform()) + if is_32bit: + if linux == "linux_x86_64": + linux = "linux_i686" + elif linux == "linux_aarch64": + linux = "linux_armv7l" + _, arch = linux.split("_", 1) + yield from _manylinux.platform_tags(linux, arch) + yield from _musllinux.platform_tags(arch) + yield linux + + +def _generic_platforms() -> Iterator[str]: + yield _normalize_string(sysconfig.get_platform()) + + +def platform_tags() -> Iterator[str]: + """ + Provides the platform tags for this installation. + """ + if platform.system() == "Darwin": + return mac_platforms() + elif platform.system() == "Linux": + return _linux_platforms() + else: + return _generic_platforms() + + +def interpreter_name() -> str: + """ + Returns the name of the running interpreter. + """ + name = sys.implementation.name + return INTERPRETER_SHORT_NAMES.get(name) or name + + +def interpreter_version(*, warn: bool = False) -> str: + """ + Returns the version of the running interpreter. + """ + version = _get_config_var("py_version_nodot", warn=warn) + if version: + version = str(version) + else: + version = _version_nodot(sys.version_info[:2]) + return version + + +def _version_nodot(version: PythonVersion) -> str: + return "".join(map(str, version)) + + +def sys_tags(*, warn: bool = False) -> Iterator[Tag]: + """ + Returns the sequence of tag triples for the running interpreter. + + The order of the sequence corresponds to priority order for the + interpreter, from most to least important. + """ + + interp_name = interpreter_name() + if interp_name == "cp": + yield from cpython_tags(warn=warn) + else: + yield from generic_tags() + + yield from compatible_tags() diff --git a/lib/pkg_resources/_vendor/packaging/utils.py b/lib/pkg_resources/_vendor/packaging/utils.py new file mode 100644 index 00000000..bab11b80 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/utils.py @@ -0,0 +1,136 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import re +from typing import FrozenSet, NewType, Tuple, Union, cast + +from .tags import Tag, parse_tag +from .version import InvalidVersion, Version + +BuildTag = Union[Tuple[()], Tuple[int, str]] +NormalizedName = NewType("NormalizedName", str) + + +class InvalidWheelFilename(ValueError): + """ + An invalid wheel filename was found, users should refer to PEP 427. + """ + + +class InvalidSdistFilename(ValueError): + """ + An invalid sdist filename was found, users should refer to the packaging user guide. + """ + + +_canonicalize_regex = re.compile(r"[-_.]+") +# PEP 427: The build number must start with a digit. +_build_tag_regex = re.compile(r"(\d+)(.*)") + + +def canonicalize_name(name: str) -> NormalizedName: + # This is taken from PEP 503. + value = _canonicalize_regex.sub("-", name).lower() + return cast(NormalizedName, value) + + +def canonicalize_version(version: Union[Version, str]) -> str: + """ + This is very similar to Version.__str__, but has one subtle difference + with the way it handles the release segment. + """ + if isinstance(version, str): + try: + parsed = Version(version) + except InvalidVersion: + # Legacy versions cannot be normalized + return version + else: + parsed = version + + parts = [] + + # Epoch + if parsed.epoch != 0: + parts.append(f"{parsed.epoch}!") + + # Release segment + # NB: This strips trailing '.0's to normalize + parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in parsed.release))) + + # Pre-release + if parsed.pre is not None: + parts.append("".join(str(x) for x in parsed.pre)) + + # Post-release + if parsed.post is not None: + parts.append(f".post{parsed.post}") + + # Development release + if parsed.dev is not None: + parts.append(f".dev{parsed.dev}") + + # Local version segment + if parsed.local is not None: + parts.append(f"+{parsed.local}") + + return "".join(parts) + + +def parse_wheel_filename( + filename: str, +) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]: + if not filename.endswith(".whl"): + raise InvalidWheelFilename( + f"Invalid wheel filename (extension must be '.whl'): {filename}" + ) + + filename = filename[:-4] + dashes = filename.count("-") + if dashes not in (4, 5): + raise InvalidWheelFilename( + f"Invalid wheel filename (wrong number of parts): {filename}" + ) + + parts = filename.split("-", dashes - 2) + name_part = parts[0] + # See PEP 427 for the rules on escaping the project name + if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: + raise InvalidWheelFilename(f"Invalid project name: {filename}") + name = canonicalize_name(name_part) + version = Version(parts[1]) + if dashes == 5: + build_part = parts[2] + build_match = _build_tag_regex.match(build_part) + if build_match is None: + raise InvalidWheelFilename( + f"Invalid build number: {build_part} in '{filename}'" + ) + build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2))) + else: + build = () + tags = parse_tag(parts[-1]) + return (name, version, build, tags) + + +def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]: + if filename.endswith(".tar.gz"): + file_stem = filename[: -len(".tar.gz")] + elif filename.endswith(".zip"): + file_stem = filename[: -len(".zip")] + else: + raise InvalidSdistFilename( + f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):" + f" {filename}" + ) + + # We are requiring a PEP 440 version, which cannot contain dashes, + # so we split on the last dash. + name_part, sep, version_part = file_stem.rpartition("-") + if not sep: + raise InvalidSdistFilename(f"Invalid sdist filename: {filename}") + + name = canonicalize_name(name_part) + version = Version(version_part) + return (name, version) diff --git a/lib/pkg_resources/_vendor/packaging/version.py b/lib/pkg_resources/_vendor/packaging/version.py new file mode 100644 index 00000000..de9a09a4 --- /dev/null +++ b/lib/pkg_resources/_vendor/packaging/version.py @@ -0,0 +1,504 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import collections +import itertools +import re +import warnings +from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union + +from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType + +__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] + +InfiniteTypes = Union[InfinityType, NegativeInfinityType] +PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] +SubLocalType = Union[InfiniteTypes, int, str] +LocalType = Union[ + NegativeInfinityType, + Tuple[ + Union[ + SubLocalType, + Tuple[SubLocalType, str], + Tuple[NegativeInfinityType, SubLocalType], + ], + ..., + ], +] +CmpKey = Tuple[ + int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType +] +LegacyCmpKey = Tuple[int, Tuple[str, ...]] +VersionComparisonMethod = Callable[ + [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool +] + +_Version = collections.namedtuple( + "_Version", ["epoch", "release", "dev", "pre", "post", "local"] +) + + +def parse(version: str) -> Union["LegacyVersion", "Version"]: + """ + Parse the given version string and return either a :class:`Version` object + or a :class:`LegacyVersion` object depending on if the given version is + a valid PEP 440 version or a legacy version. + """ + try: + return Version(version) + except InvalidVersion: + return LegacyVersion(version) + + +class InvalidVersion(ValueError): + """ + An invalid version was found, users should refer to PEP 440. + """ + + +class _BaseVersion: + _key: Union[CmpKey, LegacyCmpKey] + + def __hash__(self) -> int: + return hash(self._key) + + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key < other._key + + def __le__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key <= other._key + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key == other._key + + def __ge__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key >= other._key + + def __gt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key > other._key + + def __ne__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key != other._key + + +class LegacyVersion(_BaseVersion): + def __init__(self, version: str) -> None: + self._version = str(version) + self._key = _legacy_cmpkey(self._version) + + warnings.warn( + "Creating a LegacyVersion has been deprecated and will be " + "removed in the next major release", + DeprecationWarning, + ) + + def __str__(self) -> str: + return self._version + + def __repr__(self) -> str: + return f"" + + @property + def public(self) -> str: + return self._version + + @property + def base_version(self) -> str: + return self._version + + @property + def epoch(self) -> int: + return -1 + + @property + def release(self) -> None: + return None + + @property + def pre(self) -> None: + return None + + @property + def post(self) -> None: + return None + + @property + def dev(self) -> None: + return None + + @property + def local(self) -> None: + return None + + @property + def is_prerelease(self) -> bool: + return False + + @property + def is_postrelease(self) -> bool: + return False + + @property + def is_devrelease(self) -> bool: + return False + + +_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) + +_legacy_version_replacement_map = { + "pre": "c", + "preview": "c", + "-": "final-", + "rc": "c", + "dev": "@", +} + + +def _parse_version_parts(s: str) -> Iterator[str]: + for part in _legacy_version_component_re.split(s): + part = _legacy_version_replacement_map.get(part, part) + + if not part or part == ".": + continue + + if part[:1] in "0123456789": + # pad for numeric comparison + yield part.zfill(8) + else: + yield "*" + part + + # ensure that alpha/beta/candidate are before final + yield "*final" + + +def _legacy_cmpkey(version: str) -> LegacyCmpKey: + + # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch + # greater than or equal to 0. This will effectively put the LegacyVersion, + # which uses the defacto standard originally implemented by setuptools, + # as before all PEP 440 versions. + epoch = -1 + + # This scheme is taken from pkg_resources.parse_version setuptools prior to + # it's adoption of the packaging library. + parts: List[str] = [] + for part in _parse_version_parts(version.lower()): + if part.startswith("*"): + # remove "-" before a prerelease tag + if part < "*final": + while parts and parts[-1] == "*final-": + parts.pop() + + # remove trailing zeros from each series of numeric parts + while parts and parts[-1] == "00000000": + parts.pop() + + parts.append(part) + + return epoch, tuple(parts) + + +# Deliberately not anchored to the start and end of the string, to make it +# easier for 3rd party code to reuse +VERSION_PATTERN = r""" + v? + (?: + (?:(?P[0-9]+)!)? # epoch + (?P[0-9]+(?:\.[0-9]+)*) # release segment + (?P
                                          # pre-release
+            [-_\.]?
+            (?P(a|b|c|rc|alpha|beta|pre|preview))
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+        (?P                                         # post release
+            (?:-(?P[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?Ppost|rev|r)
+                [-_\.]?
+                (?P[0-9]+)?
+            )
+        )?
+        (?P                                          # dev release
+            [-_\.]?
+            (?Pdev)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+    )
+    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+
+class Version(_BaseVersion):
+
+    _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+
+    def __init__(self, version: str) -> None:
+
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion(f"Invalid version: '{version}'")
+
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
+            post=_parse_letter_version(
+                match.group("post_l"), match.group("post_n1") or match.group("post_n2")
+            ),
+            dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
+            local=_parse_local_version(match.group("local")),
+        )
+
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+
+    def __repr__(self) -> str:
+        return f""
+
+    def __str__(self) -> str:
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        # Pre-release
+        if self.pre is not None:
+            parts.append("".join(str(x) for x in self.pre))
+
+        # Post-release
+        if self.post is not None:
+            parts.append(f".post{self.post}")
+
+        # Development release
+        if self.dev is not None:
+            parts.append(f".dev{self.dev}")
+
+        # Local version segment
+        if self.local is not None:
+            parts.append(f"+{self.local}")
+
+        return "".join(parts)
+
+    @property
+    def epoch(self) -> int:
+        _epoch: int = self._version.epoch
+        return _epoch
+
+    @property
+    def release(self) -> Tuple[int, ...]:
+        _release: Tuple[int, ...] = self._version.release
+        return _release
+
+    @property
+    def pre(self) -> Optional[Tuple[str, int]]:
+        _pre: Optional[Tuple[str, int]] = self._version.pre
+        return _pre
+
+    @property
+    def post(self) -> Optional[int]:
+        return self._version.post[1] if self._version.post else None
+
+    @property
+    def dev(self) -> Optional[int]:
+        return self._version.dev[1] if self._version.dev else None
+
+    @property
+    def local(self) -> Optional[str]:
+        if self._version.local:
+            return ".".join(str(x) for x in self._version.local)
+        else:
+            return None
+
+    @property
+    def public(self) -> str:
+        return str(self).split("+", 1)[0]
+
+    @property
+    def base_version(self) -> str:
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        return "".join(parts)
+
+    @property
+    def is_prerelease(self) -> bool:
+        return self.dev is not None or self.pre is not None
+
+    @property
+    def is_postrelease(self) -> bool:
+        return self.post is not None
+
+    @property
+    def is_devrelease(self) -> bool:
+        return self.dev is not None
+
+    @property
+    def major(self) -> int:
+        return self.release[0] if len(self.release) >= 1 else 0
+
+    @property
+    def minor(self) -> int:
+        return self.release[1] if len(self.release) >= 2 else 0
+
+    @property
+    def micro(self) -> int:
+        return self.release[2] if len(self.release) >= 3 else 0
+
+
+def _parse_letter_version(
+    letter: str, number: Union[str, bytes, SupportsInt]
+) -> Optional[Tuple[str, int]]:
+
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+
+        return letter, int(number)
+    if not letter and number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+
+        return letter, int(number)
+
+    return None
+
+
+_local_version_separators = re.compile(r"[\._-]")
+
+
+def _parse_local_version(local: str) -> Optional[LocalType]:
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_separators.split(local)
+        )
+    return None
+
+
+def _cmpkey(
+    epoch: int,
+    release: Tuple[int, ...],
+    pre: Optional[Tuple[str, int]],
+    post: Optional[Tuple[str, int]],
+    dev: Optional[Tuple[str, int]],
+    local: Optional[Tuple[SubLocalType]],
+) -> CmpKey:
+
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    _release = tuple(
+        reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
+    )
+
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        _pre: PrePostDevType = NegativeInfinity
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        _pre = Infinity
+    else:
+        _pre = pre
+
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        _post: PrePostDevType = NegativeInfinity
+
+    else:
+        _post = post
+
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        _dev: PrePostDevType = Infinity
+
+    else:
+        _dev = dev
+
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        _local: LocalType = NegativeInfinity
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        _local = tuple(
+            (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
+        )
+
+    return epoch, _release, _pre, _post, _dev, _local
diff --git a/lib/pkg_resources/_vendor/pyparsing.LICENSE.txt b/lib/pkg_resources/_vendor/pyparsing.LICENSE.txt
new file mode 100644
index 00000000..1bf98523
--- /dev/null
+++ b/lib/pkg_resources/_vendor/pyparsing.LICENSE.txt
@@ -0,0 +1,18 @@
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/lib/pkg_resources/_vendor/pyparsing.py b/lib/pkg_resources/_vendor/pyparsing.py
new file mode 100644
index 00000000..cf75e1e5
--- /dev/null
+++ b/lib/pkg_resources/_vendor/pyparsing.py
@@ -0,0 +1,5742 @@
+# module pyparsing.py
+#
+# Copyright (c) 2003-2018  Paul T. McGuire
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+__doc__ = \
+"""
+pyparsing module - Classes and methods to define and execute parsing grammars
+=============================================================================
+
+The pyparsing module is an alternative approach to creating and executing simple grammars,
+vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
+don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
+provides a library of classes that you use to construct the grammar directly in Python.
+
+Here is a program to parse "Hello, World!" (or any greeting of the form 
+C{", !"}), built up using L{Word}, L{Literal}, and L{And} elements 
+(L{'+'} operator gives L{And} expressions, strings are auto-converted to
+L{Literal} expressions)::
+
+    from pyparsing import Word, alphas
+
+    # define grammar of a greeting
+    greet = Word(alphas) + "," + Word(alphas) + "!"
+
+    hello = "Hello, World!"
+    print (hello, "->", greet.parseString(hello))
+
+The program outputs the following::
+
+    Hello, World! -> ['Hello', ',', 'World', '!']
+
+The Python representation of the grammar is quite readable, owing to the self-explanatory
+class names, and the use of '+', '|' and '^' operators.
+
+The L{ParseResults} object returned from L{ParserElement.parseString} can be accessed as a nested list, a dictionary, or an
+object with named attributes.
+
+The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
+ - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
+ - quoted strings
+ - embedded comments
+
+
+Getting Started -
+-----------------
+Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
+classes inherit from. Use the docstrings for examples of how to:
+ - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
+ - construct character word-group expressions using the L{Word} class
+ - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
+ - use L{'+'}, L{'|'}, L{'^'}, and L{'&'} operators to combine simple expressions into more complex ones
+ - associate names with your parsed results using L{ParserElement.setResultsName}
+ - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
+ - find more useful common expressions in the L{pyparsing_common} namespace class
+"""
+
+__version__ = "2.2.1"
+__versionTime__ = "18 Sep 2018 00:49 UTC"
+__author__ = "Paul McGuire "
+
+import string
+from weakref import ref as wkref
+import copy
+import sys
+import warnings
+import re
+import sre_constants
+import collections
+import pprint
+import traceback
+import types
+from datetime import datetime
+
+try:
+    from _thread import RLock
+except ImportError:
+    from threading import RLock
+
+try:
+    # Python 3
+    from collections.abc import Iterable
+    from collections.abc import MutableMapping
+except ImportError:
+    # Python 2.7
+    from collections import Iterable
+    from collections import MutableMapping
+
+try:
+    from collections import OrderedDict as _OrderedDict
+except ImportError:
+    try:
+        from ordereddict import OrderedDict as _OrderedDict
+    except ImportError:
+        _OrderedDict = None
+
+#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
+
+__all__ = [
+'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
+'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
+'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
+'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
+'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
+'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 
+'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
+'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
+'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
+'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
+'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
+'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
+'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
+'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
+'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
+'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
+'CloseMatch', 'tokenMap', 'pyparsing_common',
+]
+
+system_version = tuple(sys.version_info)[:3]
+PY_3 = system_version[0] == 3
+if PY_3:
+    _MAX_INT = sys.maxsize
+    basestring = str
+    unichr = chr
+    _ustr = str
+
+    # build list of single arg builtins, that can be used as parse actions
+    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
+
+else:
+    _MAX_INT = sys.maxint
+    range = xrange
+
+    def _ustr(obj):
+        """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
+           str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
+           then < returns the unicode object | encodes it with the default encoding | ... >.
+        """
+        if isinstance(obj,unicode):
+            return obj
+
+        try:
+            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
+            # it won't break any existing code.
+            return str(obj)
+
+        except UnicodeEncodeError:
+            # Else encode it
+            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
+            xmlcharref = Regex(r'&#\d+;')
+            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
+            return xmlcharref.transformString(ret)
+
+    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
+    singleArgBuiltins = []
+    import __builtin__
+    for fname in "sum len sorted reversed list tuple set any all min max".split():
+        try:
+            singleArgBuiltins.append(getattr(__builtin__,fname))
+        except AttributeError:
+            continue
+            
+_generatorType = type((y for y in range(1)))
+ 
+def _xml_escape(data):
+    """Escape &, <, >, ", ', etc. in a string of data."""
+
+    # ampersand must be replaced first
+    from_symbols = '&><"\''
+    to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
+    for from_,to_ in zip(from_symbols, to_symbols):
+        data = data.replace(from_, to_)
+    return data
+
+class _Constants(object):
+    pass
+
+alphas     = string.ascii_uppercase + string.ascii_lowercase
+nums       = "0123456789"
+hexnums    = nums + "ABCDEFabcdef"
+alphanums  = alphas + nums
+_bslash    = chr(92)
+printables = "".join(c for c in string.printable if c not in string.whitespace)
+
+class ParseBaseException(Exception):
+    """base exception class for all parsing runtime exceptions"""
+    # Performance tuning: we construct a *lot* of these, so keep this
+    # constructor as small and fast as possible
+    def __init__( self, pstr, loc=0, msg=None, elem=None ):
+        self.loc = loc
+        if msg is None:
+            self.msg = pstr
+            self.pstr = ""
+        else:
+            self.msg = msg
+            self.pstr = pstr
+        self.parserElement = elem
+        self.args = (pstr, loc, msg)
+
+    @classmethod
+    def _from_exception(cls, pe):
+        """
+        internal factory method to simplify creating one type of ParseException 
+        from another - avoids having __init__ signature conflicts among subclasses
+        """
+        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
+
+    def __getattr__( self, aname ):
+        """supported attributes by name are:
+            - lineno - returns the line number of the exception text
+            - col - returns the column number of the exception text
+            - line - returns the line containing the exception text
+        """
+        if( aname == "lineno" ):
+            return lineno( self.loc, self.pstr )
+        elif( aname in ("col", "column") ):
+            return col( self.loc, self.pstr )
+        elif( aname == "line" ):
+            return line( self.loc, self.pstr )
+        else:
+            raise AttributeError(aname)
+
+    def __str__( self ):
+        return "%s (at char %d), (line:%d, col:%d)" % \
+                ( self.msg, self.loc, self.lineno, self.column )
+    def __repr__( self ):
+        return _ustr(self)
+    def markInputline( self, markerString = ">!<" ):
+        """Extracts the exception line from the input string, and marks
+           the location of the exception with a special symbol.
+        """
+        line_str = self.line
+        line_column = self.column - 1
+        if markerString:
+            line_str = "".join((line_str[:line_column],
+                                markerString, line_str[line_column:]))
+        return line_str.strip()
+    def __dir__(self):
+        return "lineno col line".split() + dir(type(self))
+
+class ParseException(ParseBaseException):
+    """
+    Exception thrown when parse expressions don't match class;
+    supported attributes by name are:
+     - lineno - returns the line number of the exception text
+     - col - returns the column number of the exception text
+     - line - returns the line containing the exception text
+        
+    Example::
+        try:
+            Word(nums).setName("integer").parseString("ABC")
+        except ParseException as pe:
+            print(pe)
+            print("column: {}".format(pe.col))
+            
+    prints::
+       Expected integer (at char 0), (line:1, col:1)
+        column: 1
+    """
+    pass
+
+class ParseFatalException(ParseBaseException):
+    """user-throwable exception thrown when inconsistent parse content
+       is found; stops all parsing immediately"""
+    pass
+
+class ParseSyntaxException(ParseFatalException):
+    """just like L{ParseFatalException}, but thrown internally when an
+       L{ErrorStop} ('-' operator) indicates that parsing is to stop 
+       immediately because an unbacktrackable syntax error has been found"""
+    pass
+
+#~ class ReparseException(ParseBaseException):
+    #~ """Experimental class - parse actions can raise this exception to cause
+       #~ pyparsing to reparse the input string:
+        #~ - with a modified input string, and/or
+        #~ - with a modified start location
+       #~ Set the values of the ReparseException in the constructor, and raise the
+       #~ exception in a parse action to cause pyparsing to use the new string/location.
+       #~ Setting the values as None causes no change to be made.
+       #~ """
+    #~ def __init_( self, newstring, restartLoc ):
+        #~ self.newParseText = newstring
+        #~ self.reparseLoc = restartLoc
+
+class RecursiveGrammarException(Exception):
+    """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
+    def __init__( self, parseElementList ):
+        self.parseElementTrace = parseElementList
+
+    def __str__( self ):
+        return "RecursiveGrammarException: %s" % self.parseElementTrace
+
+class _ParseResultsWithOffset(object):
+    def __init__(self,p1,p2):
+        self.tup = (p1,p2)
+    def __getitem__(self,i):
+        return self.tup[i]
+    def __repr__(self):
+        return repr(self.tup[0])
+    def setOffset(self,i):
+        self.tup = (self.tup[0],i)
+
+class ParseResults(object):
+    """
+    Structured parse results, to provide multiple means of access to the parsed data:
+       - as a list (C{len(results)})
+       - by list index (C{results[0], results[1]}, etc.)
+       - by attribute (C{results.} - see L{ParserElement.setResultsName})
+
+    Example::
+        integer = Word(nums)
+        date_str = (integer.setResultsName("year") + '/' 
+                        + integer.setResultsName("month") + '/' 
+                        + integer.setResultsName("day"))
+        # equivalent form:
+        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+        # parseString returns a ParseResults object
+        result = date_str.parseString("1999/12/31")
+
+        def test(s, fn=repr):
+            print("%s -> %s" % (s, fn(eval(s))))
+        test("list(result)")
+        test("result[0]")
+        test("result['month']")
+        test("result.day")
+        test("'month' in result")
+        test("'minutes' in result")
+        test("result.dump()", str)
+    prints::
+        list(result) -> ['1999', '/', '12', '/', '31']
+        result[0] -> '1999'
+        result['month'] -> '12'
+        result.day -> '31'
+        'month' in result -> True
+        'minutes' in result -> False
+        result.dump() -> ['1999', '/', '12', '/', '31']
+        - day: 31
+        - month: 12
+        - year: 1999
+    """
+    def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
+        if isinstance(toklist, cls):
+            return toklist
+        retobj = object.__new__(cls)
+        retobj.__doinit = True
+        return retobj
+
+    # Performance tuning: we construct a *lot* of these, so keep this
+    # constructor as small and fast as possible
+    def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
+        if self.__doinit:
+            self.__doinit = False
+            self.__name = None
+            self.__parent = None
+            self.__accumNames = {}
+            self.__asList = asList
+            self.__modal = modal
+            if toklist is None:
+                toklist = []
+            if isinstance(toklist, list):
+                self.__toklist = toklist[:]
+            elif isinstance(toklist, _generatorType):
+                self.__toklist = list(toklist)
+            else:
+                self.__toklist = [toklist]
+            self.__tokdict = dict()
+
+        if name is not None and name:
+            if not modal:
+                self.__accumNames[name] = 0
+            if isinstance(name,int):
+                name = _ustr(name) # will always return a str, but use _ustr for consistency
+            self.__name = name
+            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
+                if isinstance(toklist,basestring):
+                    toklist = [ toklist ]
+                if asList:
+                    if isinstance(toklist,ParseResults):
+                        self[name] = _ParseResultsWithOffset(toklist.copy(),0)
+                    else:
+                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
+                    self[name].__name = name
+                else:
+                    try:
+                        self[name] = toklist[0]
+                    except (KeyError,TypeError,IndexError):
+                        self[name] = toklist
+
+    def __getitem__( self, i ):
+        if isinstance( i, (int,slice) ):
+            return self.__toklist[i]
+        else:
+            if i not in self.__accumNames:
+                return self.__tokdict[i][-1][0]
+            else:
+                return ParseResults([ v[0] for v in self.__tokdict[i] ])
+
+    def __setitem__( self, k, v, isinstance=isinstance ):
+        if isinstance(v,_ParseResultsWithOffset):
+            self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
+            sub = v[0]
+        elif isinstance(k,(int,slice)):
+            self.__toklist[k] = v
+            sub = v
+        else:
+            self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
+            sub = v
+        if isinstance(sub,ParseResults):
+            sub.__parent = wkref(self)
+
+    def __delitem__( self, i ):
+        if isinstance(i,(int,slice)):
+            mylen = len( self.__toklist )
+            del self.__toklist[i]
+
+            # convert int to slice
+            if isinstance(i, int):
+                if i < 0:
+                    i += mylen
+                i = slice(i, i+1)
+            # get removed indices
+            removed = list(range(*i.indices(mylen)))
+            removed.reverse()
+            # fixup indices in token dictionary
+            for name,occurrences in self.__tokdict.items():
+                for j in removed:
+                    for k, (value, position) in enumerate(occurrences):
+                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
+        else:
+            del self.__tokdict[i]
+
+    def __contains__( self, k ):
+        return k in self.__tokdict
+
+    def __len__( self ): return len( self.__toklist )
+    def __bool__(self): return ( not not self.__toklist )
+    __nonzero__ = __bool__
+    def __iter__( self ): return iter( self.__toklist )
+    def __reversed__( self ): return iter( self.__toklist[::-1] )
+    def _iterkeys( self ):
+        if hasattr(self.__tokdict, "iterkeys"):
+            return self.__tokdict.iterkeys()
+        else:
+            return iter(self.__tokdict)
+
+    def _itervalues( self ):
+        return (self[k] for k in self._iterkeys())
+            
+    def _iteritems( self ):
+        return ((k, self[k]) for k in self._iterkeys())
+
+    if PY_3:
+        keys = _iterkeys       
+        """Returns an iterator of all named result keys (Python 3.x only)."""
+
+        values = _itervalues
+        """Returns an iterator of all named result values (Python 3.x only)."""
+
+        items = _iteritems
+        """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
+
+    else:
+        iterkeys = _iterkeys
+        """Returns an iterator of all named result keys (Python 2.x only)."""
+
+        itervalues = _itervalues
+        """Returns an iterator of all named result values (Python 2.x only)."""
+
+        iteritems = _iteritems
+        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
+
+        def keys( self ):
+            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.iterkeys())
+
+        def values( self ):
+            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.itervalues())
+                
+        def items( self ):
+            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.iteritems())
+
+    def haskeys( self ):
+        """Since keys() returns an iterator, this method is helpful in bypassing
+           code that looks for the existence of any defined results names."""
+        return bool(self.__tokdict)
+        
+    def pop( self, *args, **kwargs):
+        """
+        Removes and returns item at specified index (default=C{last}).
+        Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
+        argument or an integer argument, it will use C{list} semantics
+        and pop tokens from the list of parsed tokens. If passed a 
+        non-integer argument (most likely a string), it will use C{dict}
+        semantics and pop the corresponding value from any defined 
+        results names. A second default return value argument is 
+        supported, just as in C{dict.pop()}.
+
+        Example::
+            def remove_first(tokens):
+                tokens.pop(0)
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
+
+            label = Word(alphas)
+            patt = label("LABEL") + OneOrMore(Word(nums))
+            print(patt.parseString("AAB 123 321").dump())
+
+            # Use pop() in a parse action to remove named result (note that corresponding value is not
+            # removed from list form of results)
+            def remove_LABEL(tokens):
+                tokens.pop("LABEL")
+                return tokens
+            patt.addParseAction(remove_LABEL)
+            print(patt.parseString("AAB 123 321").dump())
+        prints::
+            ['AAB', '123', '321']
+            - LABEL: AAB
+
+            ['AAB', '123', '321']
+        """
+        if not args:
+            args = [-1]
+        for k,v in kwargs.items():
+            if k == 'default':
+                args = (args[0], v)
+            else:
+                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
+        if (isinstance(args[0], int) or 
+                        len(args) == 1 or 
+                        args[0] in self):
+            index = args[0]
+            ret = self[index]
+            del self[index]
+            return ret
+        else:
+            defaultvalue = args[1]
+            return defaultvalue
+
+    def get(self, key, defaultValue=None):
+        """
+        Returns named result matching the given key, or if there is no
+        such name, then returns the given C{defaultValue} or C{None} if no
+        C{defaultValue} is specified.
+
+        Similar to C{dict.get()}.
+        
+        Example::
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
+
+            result = date_str.parseString("1999/12/31")
+            print(result.get("year")) # -> '1999'
+            print(result.get("hour", "not specified")) # -> 'not specified'
+            print(result.get("hour")) # -> None
+        """
+        if key in self:
+            return self[key]
+        else:
+            return defaultValue
+
+    def insert( self, index, insStr ):
+        """
+        Inserts new element at location index in the list of parsed tokens.
+        
+        Similar to C{list.insert()}.
+
+        Example::
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+
+            # use a parse action to insert the parse location in the front of the parsed results
+            def insert_locn(locn, tokens):
+                tokens.insert(0, locn)
+            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
+        """
+        self.__toklist.insert(index, insStr)
+        # fixup indices in token dictionary
+        for name,occurrences in self.__tokdict.items():
+            for k, (value, position) in enumerate(occurrences):
+                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
+
+    def append( self, item ):
+        """
+        Add single element to end of ParseResults list of elements.
+
+        Example::
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+            
+            # use a parse action to compute the sum of the parsed integers, and add it to the end
+            def append_sum(tokens):
+                tokens.append(sum(map(int, tokens)))
+            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
+        """
+        self.__toklist.append(item)
+
+    def extend( self, itemseq ):
+        """
+        Add sequence of elements to end of ParseResults list of elements.
+
+        Example::
+            patt = OneOrMore(Word(alphas))
+            
+            # use a parse action to append the reverse of the matched strings, to make a palindrome
+            def make_palindrome(tokens):
+                tokens.extend(reversed([t[::-1] for t in tokens]))
+                return ''.join(tokens)
+            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
+        """
+        if isinstance(itemseq, ParseResults):
+            self += itemseq
+        else:
+            self.__toklist.extend(itemseq)
+
+    def clear( self ):
+        """
+        Clear all elements and results names.
+        """
+        del self.__toklist[:]
+        self.__tokdict.clear()
+
+    def __getattr__( self, name ):
+        try:
+            return self[name]
+        except KeyError:
+            return ""
+            
+        if name in self.__tokdict:
+            if name not in self.__accumNames:
+                return self.__tokdict[name][-1][0]
+            else:
+                return ParseResults([ v[0] for v in self.__tokdict[name] ])
+        else:
+            return ""
+
+    def __add__( self, other ):
+        ret = self.copy()
+        ret += other
+        return ret
+
+    def __iadd__( self, other ):
+        if other.__tokdict:
+            offset = len(self.__toklist)
+            addoffset = lambda a: offset if a<0 else a+offset
+            otheritems = other.__tokdict.items()
+            otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
+                                for (k,vlist) in otheritems for v in vlist]
+            for k,v in otherdictitems:
+                self[k] = v
+                if isinstance(v[0],ParseResults):
+                    v[0].__parent = wkref(self)
+            
+        self.__toklist += other.__toklist
+        self.__accumNames.update( other.__accumNames )
+        return self
+
+    def __radd__(self, other):
+        if isinstance(other,int) and other == 0:
+            # useful for merging many ParseResults using sum() builtin
+            return self.copy()
+        else:
+            # this may raise a TypeError - so be it
+            return other + self
+        
+    def __repr__( self ):
+        return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
+
+    def __str__( self ):
+        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
+
+    def _asStringList( self, sep='' ):
+        out = []
+        for item in self.__toklist:
+            if out and sep:
+                out.append(sep)
+            if isinstance( item, ParseResults ):
+                out += item._asStringList()
+            else:
+                out.append( _ustr(item) )
+        return out
+
+    def asList( self ):
+        """
+        Returns the parse results as a nested list of matching tokens, all converted to strings.
+
+        Example::
+            patt = OneOrMore(Word(alphas))
+            result = patt.parseString("sldkj lsdkj sldkj")
+            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
+            print(type(result), result) # ->  ['sldkj', 'lsdkj', 'sldkj']
+            
+            # Use asList() to create an actual list
+            result_list = result.asList()
+            print(type(result_list), result_list) # ->  ['sldkj', 'lsdkj', 'sldkj']
+        """
+        return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
+
+    def asDict( self ):
+        """
+        Returns the named parse results as a nested dictionary.
+
+        Example::
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+            
+            result = date_str.parseString('12/31/1999')
+            print(type(result), repr(result)) # ->  (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
+            
+            result_dict = result.asDict()
+            print(type(result_dict), repr(result_dict)) # ->  {'day': '1999', 'year': '12', 'month': '31'}
+
+            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
+            import json
+            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
+            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
+        """
+        if PY_3:
+            item_fn = self.items
+        else:
+            item_fn = self.iteritems
+            
+        def toItem(obj):
+            if isinstance(obj, ParseResults):
+                if obj.haskeys():
+                    return obj.asDict()
+                else:
+                    return [toItem(v) for v in obj]
+            else:
+                return obj
+                
+        return dict((k,toItem(v)) for k,v in item_fn())
+
+    def copy( self ):
+        """
+        Returns a new copy of a C{ParseResults} object.
+        """
+        ret = ParseResults( self.__toklist )
+        ret.__tokdict = self.__tokdict.copy()
+        ret.__parent = self.__parent
+        ret.__accumNames.update( self.__accumNames )
+        ret.__name = self.__name
+        return ret
+
+    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
+        """
+        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
+        """
+        nl = "\n"
+        out = []
+        namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
+                                                            for v in vlist)
+        nextLevelIndent = indent + "  "
+
+        # collapse out indents if formatting is not desired
+        if not formatted:
+            indent = ""
+            nextLevelIndent = ""
+            nl = ""
+
+        selfTag = None
+        if doctag is not None:
+            selfTag = doctag
+        else:
+            if self.__name:
+                selfTag = self.__name
+
+        if not selfTag:
+            if namedItemsOnly:
+                return ""
+            else:
+                selfTag = "ITEM"
+
+        out += [ nl, indent, "<", selfTag, ">" ]
+
+        for i,res in enumerate(self.__toklist):
+            if isinstance(res,ParseResults):
+                if i in namedItems:
+                    out += [ res.asXML(namedItems[i],
+                                        namedItemsOnly and doctag is None,
+                                        nextLevelIndent,
+                                        formatted)]
+                else:
+                    out += [ res.asXML(None,
+                                        namedItemsOnly and doctag is None,
+                                        nextLevelIndent,
+                                        formatted)]
+            else:
+                # individual token, see if there is a name for it
+                resTag = None
+                if i in namedItems:
+                    resTag = namedItems[i]
+                if not resTag:
+                    if namedItemsOnly:
+                        continue
+                    else:
+                        resTag = "ITEM"
+                xmlBodyText = _xml_escape(_ustr(res))
+                out += [ nl, nextLevelIndent, "<", resTag, ">",
+                                                xmlBodyText,
+                                                "" ]
+
+        out += [ nl, indent, "" ]
+        return "".join(out)
+
+    def __lookup(self,sub):
+        for k,vlist in self.__tokdict.items():
+            for v,loc in vlist:
+                if sub is v:
+                    return k
+        return None
+
+    def getName(self):
+        r"""
+        Returns the results name for this token expression. Useful when several 
+        different expressions might match at a particular location.
+
+        Example::
+            integer = Word(nums)
+            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
+            house_number_expr = Suppress('#') + Word(nums, alphanums)
+            user_data = (Group(house_number_expr)("house_number") 
+                        | Group(ssn_expr)("ssn")
+                        | Group(integer)("age"))
+            user_info = OneOrMore(user_data)
+            
+            result = user_info.parseString("22 111-22-3333 #221B")
+            for item in result:
+                print(item.getName(), ':', item[0])
+        prints::
+            age : 22
+            ssn : 111-22-3333
+            house_number : 221B
+        """
+        if self.__name:
+            return self.__name
+        elif self.__parent:
+            par = self.__parent()
+            if par:
+                return par.__lookup(self)
+            else:
+                return None
+        elif (len(self) == 1 and
+               len(self.__tokdict) == 1 and
+               next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
+            return next(iter(self.__tokdict.keys()))
+        else:
+            return None
+
+    def dump(self, indent='', depth=0, full=True):
+        """
+        Diagnostic method for listing out the contents of a C{ParseResults}.
+        Accepts an optional C{indent} argument so that this string can be embedded
+        in a nested display of other data.
+
+        Example::
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+            
+            result = date_str.parseString('12/31/1999')
+            print(result.dump())
+        prints::
+            ['12', '/', '31', '/', '1999']
+            - day: 1999
+            - month: 31
+            - year: 12
+        """
+        out = []
+        NL = '\n'
+        out.append( indent+_ustr(self.asList()) )
+        if full:
+            if self.haskeys():
+                items = sorted((str(k), v) for k,v in self.items())
+                for k,v in items:
+                    if out:
+                        out.append(NL)
+                    out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
+                    if isinstance(v,ParseResults):
+                        if v:
+                            out.append( v.dump(indent,depth+1) )
+                        else:
+                            out.append(_ustr(v))
+                    else:
+                        out.append(repr(v))
+            elif any(isinstance(vv,ParseResults) for vv in self):
+                v = self
+                for i,vv in enumerate(v):
+                    if isinstance(vv,ParseResults):
+                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),vv.dump(indent,depth+1) ))
+                    else:
+                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),_ustr(vv)))
+            
+        return "".join(out)
+
+    def pprint(self, *args, **kwargs):
+        """
+        Pretty-printer for parsed results as a list, using the C{pprint} module.
+        Accepts additional positional or keyword args as defined for the 
+        C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
+
+        Example::
+            ident = Word(alphas, alphanums)
+            num = Word(nums)
+            func = Forward()
+            term = ident | num | Group('(' + func + ')')
+            func <<= ident + Group(Optional(delimitedList(term)))
+            result = func.parseString("fna a,b,(fnb c,d,200),100")
+            result.pprint(width=40)
+        prints::
+            ['fna',
+             ['a',
+              'b',
+              ['(', 'fnb', ['c', 'd', '200'], ')'],
+              '100']]
+        """
+        pprint.pprint(self.asList(), *args, **kwargs)
+
+    # add support for pickle protocol
+    def __getstate__(self):
+        return ( self.__toklist,
+                 ( self.__tokdict.copy(),
+                   self.__parent is not None and self.__parent() or None,
+                   self.__accumNames,
+                   self.__name ) )
+
+    def __setstate__(self,state):
+        self.__toklist = state[0]
+        (self.__tokdict,
+         par,
+         inAccumNames,
+         self.__name) = state[1]
+        self.__accumNames = {}
+        self.__accumNames.update(inAccumNames)
+        if par is not None:
+            self.__parent = wkref(par)
+        else:
+            self.__parent = None
+
+    def __getnewargs__(self):
+        return self.__toklist, self.__name, self.__asList, self.__modal
+
+    def __dir__(self):
+        return (dir(type(self)) + list(self.keys()))
+
+MutableMapping.register(ParseResults)
+
+def col (loc,strg):
+    """Returns current column within a string, counting newlines as line separators.
+   The first column is number 1.
+
+   Note: the default parsing behavior is to expand tabs in the input string
+   before starting the parsing process.  See L{I{ParserElement.parseString}} for more information
+   on parsing strings containing C{}s, and suggested methods to maintain a
+   consistent view of the parsed string, the parse location, and line and column
+   positions within the parsed string.
+   """
+    s = strg
+    return 1 if 0} for more information
+   on parsing strings containing C{}s, and suggested methods to maintain a
+   consistent view of the parsed string, the parse location, and line and column
+   positions within the parsed string.
+   """
+    return strg.count("\n",0,loc) + 1
+
+def line( loc, strg ):
+    """Returns the line of text containing loc within a string, counting newlines as line separators.
+       """
+    lastCR = strg.rfind("\n", 0, loc)
+    nextCR = strg.find("\n", loc)
+    if nextCR >= 0:
+        return strg[lastCR+1:nextCR]
+    else:
+        return strg[lastCR+1:]
+
+def _defaultStartDebugAction( instring, loc, expr ):
+    print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
+
+def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
+    print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
+
+def _defaultExceptionDebugAction( instring, loc, expr, exc ):
+    print ("Exception raised:" + _ustr(exc))
+
+def nullDebugAction(*args):
+    """'Do-nothing' debug action, to suppress debugging output during parsing."""
+    pass
+
+# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
+#~ 'decorator to trim function calls to match the arity of the target'
+#~ def _trim_arity(func, maxargs=3):
+    #~ if func in singleArgBuiltins:
+        #~ return lambda s,l,t: func(t)
+    #~ limit = 0
+    #~ foundArity = False
+    #~ def wrapper(*args):
+        #~ nonlocal limit,foundArity
+        #~ while 1:
+            #~ try:
+                #~ ret = func(*args[limit:])
+                #~ foundArity = True
+                #~ return ret
+            #~ except TypeError:
+                #~ if limit == maxargs or foundArity:
+                    #~ raise
+                #~ limit += 1
+                #~ continue
+    #~ return wrapper
+
+# this version is Python 2.x-3.x cross-compatible
+'decorator to trim function calls to match the arity of the target'
+def _trim_arity(func, maxargs=2):
+    if func in singleArgBuiltins:
+        return lambda s,l,t: func(t)
+    limit = [0]
+    foundArity = [False]
+    
+    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
+    if system_version[:2] >= (3,5):
+        def extract_stack(limit=0):
+            # special handling for Python 3.5.0 - extra deep call stack by 1
+            offset = -3 if system_version == (3,5,0) else -2
+            frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
+            return [frame_summary[:2]]
+        def extract_tb(tb, limit=0):
+            frames = traceback.extract_tb(tb, limit=limit)
+            frame_summary = frames[-1]
+            return [frame_summary[:2]]
+    else:
+        extract_stack = traceback.extract_stack
+        extract_tb = traceback.extract_tb
+    
+    # synthesize what would be returned by traceback.extract_stack at the call to 
+    # user's parse action 'func', so that we don't incur call penalty at parse time
+    
+    LINE_DIFF = 6
+    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 
+    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
+    this_line = extract_stack(limit=2)[-1]
+    pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
+
+    def wrapper(*args):
+        while 1:
+            try:
+                ret = func(*args[limit[0]:])
+                foundArity[0] = True
+                return ret
+            except TypeError:
+                # re-raise TypeErrors if they did not come from our arity testing
+                if foundArity[0]:
+                    raise
+                else:
+                    try:
+                        tb = sys.exc_info()[-1]
+                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
+                            raise
+                    finally:
+                        del tb
+
+                if limit[0] <= maxargs:
+                    limit[0] += 1
+                    continue
+                raise
+
+    # copy func name to wrapper for sensible debug output
+    func_name = ""
+    try:
+        func_name = getattr(func, '__name__', 
+                            getattr(func, '__class__').__name__)
+    except Exception:
+        func_name = str(func)
+    wrapper.__name__ = func_name
+
+    return wrapper
+
+class ParserElement(object):
+    """Abstract base level parser element class."""
+    DEFAULT_WHITE_CHARS = " \n\t\r"
+    verbose_stacktrace = False
+
+    @staticmethod
+    def setDefaultWhitespaceChars( chars ):
+        r"""
+        Overrides the default whitespace chars
+
+        Example::
+            # default whitespace chars are space,  and newline
+            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
+            
+            # change to just treat newline as significant
+            ParserElement.setDefaultWhitespaceChars(" \t")
+            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
+        """
+        ParserElement.DEFAULT_WHITE_CHARS = chars
+
+    @staticmethod
+    def inlineLiteralsUsing(cls):
+        """
+        Set class to be used for inclusion of string literals into a parser.
+        
+        Example::
+            # default literal class used is Literal
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
+
+
+            # change to Suppress
+            ParserElement.inlineLiteralsUsing(Suppress)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
+        """
+        ParserElement._literalStringClass = cls
+
+    def __init__( self, savelist=False ):
+        self.parseAction = list()
+        self.failAction = None
+        #~ self.name = ""  # don't define self.name, let subclasses try/except upcall
+        self.strRepr = None
+        self.resultsName = None
+        self.saveAsList = savelist
+        self.skipWhitespace = True
+        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+        self.copyDefaultWhiteChars = True
+        self.mayReturnEmpty = False # used when checking for left-recursion
+        self.keepTabs = False
+        self.ignoreExprs = list()
+        self.debug = False
+        self.streamlined = False
+        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
+        self.errmsg = ""
+        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
+        self.debugActions = ( None, None, None ) #custom debug actions
+        self.re = None
+        self.callPreparse = True # used to avoid redundant calls to preParse
+        self.callDuringTry = False
+
+    def copy( self ):
+        """
+        Make a copy of this C{ParserElement}.  Useful for defining different parse actions
+        for the same parsing pattern, using copies of the original parse element.
+        
+        Example::
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
+            integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
+            
+            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
+        prints::
+            [5120, 100, 655360, 268435456]
+        Equivalent form of C{expr.copy()} is just C{expr()}::
+            integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
+        """
+        cpy = copy.copy( self )
+        cpy.parseAction = self.parseAction[:]
+        cpy.ignoreExprs = self.ignoreExprs[:]
+        if self.copyDefaultWhiteChars:
+            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+        return cpy
+
+    def setName( self, name ):
+        """
+        Define name for this expression, makes debugging and exception messages clearer.
+        
+        Example::
+            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
+            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
+        """
+        self.name = name
+        self.errmsg = "Expected " + self.name
+        if hasattr(self,"exception"):
+            self.exception.msg = self.errmsg
+        return self
+
+    def setResultsName( self, name, listAllMatches=False ):
+        """
+        Define name for referencing matching tokens as a nested attribute
+        of the returned parse results.
+        NOTE: this returns a *copy* of the original C{ParserElement} object;
+        this is so that the client can define a basic element, such as an
+        integer, and reference it in multiple places with different names.
+
+        You can also set results names using the abbreviated syntax,
+        C{expr("name")} in place of C{expr.setResultsName("name")} - 
+        see L{I{__call__}<__call__>}.
+
+        Example::
+            date_str = (integer.setResultsName("year") + '/' 
+                        + integer.setResultsName("month") + '/' 
+                        + integer.setResultsName("day"))
+
+            # equivalent form:
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+        """
+        newself = self.copy()
+        if name.endswith("*"):
+            name = name[:-1]
+            listAllMatches=True
+        newself.resultsName = name
+        newself.modalResults = not listAllMatches
+        return newself
+
+    def setBreak(self,breakFlag = True):
+        """Method to invoke the Python pdb debugger when this element is
+           about to be parsed. Set C{breakFlag} to True to enable, False to
+           disable.
+        """
+        if breakFlag:
+            _parseMethod = self._parse
+            def breaker(instring, loc, doActions=True, callPreParse=True):
+                import pdb
+                pdb.set_trace()
+                return _parseMethod( instring, loc, doActions, callPreParse )
+            breaker._originalParseMethod = _parseMethod
+            self._parse = breaker
+        else:
+            if hasattr(self._parse,"_originalParseMethod"):
+                self._parse = self._parse._originalParseMethod
+        return self
+
+    def setParseAction( self, *fns, **kwargs ):
+        """
+        Define one or more actions to perform when successfully matching parse element definition.
+        Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
+        C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
+         - s   = the original string being parsed (see note below)
+         - loc = the location of the matching substring
+         - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
+        If the functions in fns modify the tokens, they can return them as the return
+        value from fn, and the modified list of tokens will replace the original.
+        Otherwise, fn does not need to return any value.
+
+        Optional keyword arguments:
+         - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
+
+        Note: the default parsing behavior is to expand tabs in the input string
+        before starting the parsing process.  See L{I{parseString}} for more information
+        on parsing strings containing C{}s, and suggested methods to maintain a
+        consistent view of the parsed string, the parse location, and line and column
+        positions within the parsed string.
+        
+        Example::
+            integer = Word(nums)
+            date_str = integer + '/' + integer + '/' + integer
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
+
+            # use parse action to convert to ints at parse time
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            date_str = integer + '/' + integer + '/' + integer
+
+            # note that integer fields are now ints, not strings
+            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
+        """
+        self.parseAction = list(map(_trim_arity, list(fns)))
+        self.callDuringTry = kwargs.get("callDuringTry", False)
+        return self
+
+    def addParseAction( self, *fns, **kwargs ):
+        """
+        Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}}.
+        
+        See examples in L{I{copy}}.
+        """
+        self.parseAction += list(map(_trim_arity, list(fns)))
+        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
+        return self
+
+    def addCondition(self, *fns, **kwargs):
+        """Add a boolean predicate function to expression's list of parse actions. See 
+        L{I{setParseAction}} for function call signatures. Unlike C{setParseAction}, 
+        functions passed to C{addCondition} need to return boolean success/fail of the condition.
+
+        Optional keyword arguments:
+         - message = define a custom message to be used in the raised exception
+         - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
+         
+        Example::
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            year_int = integer.copy()
+            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
+            date_str = year_int + '/' + integer + '/' + integer
+
+            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
+        """
+        msg = kwargs.get("message", "failed user-defined condition")
+        exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
+        for fn in fns:
+            def pa(s,l,t):
+                if not bool(_trim_arity(fn)(s,l,t)):
+                    raise exc_type(s,l,msg)
+            self.parseAction.append(pa)
+        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
+        return self
+
+    def setFailAction( self, fn ):
+        """Define action to perform if parsing fails at this expression.
+           Fail acton fn is a callable function that takes the arguments
+           C{fn(s,loc,expr,err)} where:
+            - s = string being parsed
+            - loc = location where expression match was attempted and failed
+            - expr = the parse expression that failed
+            - err = the exception thrown
+           The function returns no value.  It may throw C{L{ParseFatalException}}
+           if it is desired to stop parsing immediately."""
+        self.failAction = fn
+        return self
+
+    def _skipIgnorables( self, instring, loc ):
+        exprsFound = True
+        while exprsFound:
+            exprsFound = False
+            for e in self.ignoreExprs:
+                try:
+                    while 1:
+                        loc,dummy = e._parse( instring, loc )
+                        exprsFound = True
+                except ParseException:
+                    pass
+        return loc
+
+    def preParse( self, instring, loc ):
+        if self.ignoreExprs:
+            loc = self._skipIgnorables( instring, loc )
+
+        if self.skipWhitespace:
+            wt = self.whiteChars
+            instrlen = len(instring)
+            while loc < instrlen and instring[loc] in wt:
+                loc += 1
+
+        return loc
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        return loc, []
+
+    def postParse( self, instring, loc, tokenlist ):
+        return tokenlist
+
+    #~ @profile
+    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
+        debugging = ( self.debug ) #and doActions )
+
+        if debugging or self.failAction:
+            #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+            if (self.debugActions[0] ):
+                self.debugActions[0]( instring, loc, self )
+            if callPreParse and self.callPreparse:
+                preloc = self.preParse( instring, loc )
+            else:
+                preloc = loc
+            tokensStart = preloc
+            try:
+                try:
+                    loc,tokens = self.parseImpl( instring, preloc, doActions )
+                except IndexError:
+                    raise ParseException( instring, len(instring), self.errmsg, self )
+            except ParseBaseException as err:
+                #~ print ("Exception raised:", err)
+                if self.debugActions[2]:
+                    self.debugActions[2]( instring, tokensStart, self, err )
+                if self.failAction:
+                    self.failAction( instring, tokensStart, self, err )
+                raise
+        else:
+            if callPreParse and self.callPreparse:
+                preloc = self.preParse( instring, loc )
+            else:
+                preloc = loc
+            tokensStart = preloc
+            if self.mayIndexError or preloc >= len(instring):
+                try:
+                    loc,tokens = self.parseImpl( instring, preloc, doActions )
+                except IndexError:
+                    raise ParseException( instring, len(instring), self.errmsg, self )
+            else:
+                loc,tokens = self.parseImpl( instring, preloc, doActions )
+
+        tokens = self.postParse( instring, loc, tokens )
+
+        retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
+        if self.parseAction and (doActions or self.callDuringTry):
+            if debugging:
+                try:
+                    for fn in self.parseAction:
+                        tokens = fn( instring, tokensStart, retTokens )
+                        if tokens is not None:
+                            retTokens = ParseResults( tokens,
+                                                      self.resultsName,
+                                                      asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+                                                      modal=self.modalResults )
+                except ParseBaseException as err:
+                    #~ print "Exception raised in user parse action:", err
+                    if (self.debugActions[2] ):
+                        self.debugActions[2]( instring, tokensStart, self, err )
+                    raise
+            else:
+                for fn in self.parseAction:
+                    tokens = fn( instring, tokensStart, retTokens )
+                    if tokens is not None:
+                        retTokens = ParseResults( tokens,
+                                                  self.resultsName,
+                                                  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+                                                  modal=self.modalResults )
+        if debugging:
+            #~ print ("Matched",self,"->",retTokens.asList())
+            if (self.debugActions[1] ):
+                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
+
+        return loc, retTokens
+
+    def tryParse( self, instring, loc ):
+        try:
+            return self._parse( instring, loc, doActions=False )[0]
+        except ParseFatalException:
+            raise ParseException( instring, loc, self.errmsg, self)
+    
+    def canParseNext(self, instring, loc):
+        try:
+            self.tryParse(instring, loc)
+        except (ParseException, IndexError):
+            return False
+        else:
+            return True
+
+    class _UnboundedCache(object):
+        def __init__(self):
+            cache = {}
+            self.not_in_cache = not_in_cache = object()
+
+            def get(self, key):
+                return cache.get(key, not_in_cache)
+
+            def set(self, key, value):
+                cache[key] = value
+
+            def clear(self):
+                cache.clear()
+                
+            def cache_len(self):
+                return len(cache)
+
+            self.get = types.MethodType(get, self)
+            self.set = types.MethodType(set, self)
+            self.clear = types.MethodType(clear, self)
+            self.__len__ = types.MethodType(cache_len, self)
+
+    if _OrderedDict is not None:
+        class _FifoCache(object):
+            def __init__(self, size):
+                self.not_in_cache = not_in_cache = object()
+
+                cache = _OrderedDict()
+
+                def get(self, key):
+                    return cache.get(key, not_in_cache)
+
+                def set(self, key, value):
+                    cache[key] = value
+                    while len(cache) > size:
+                        try:
+                            cache.popitem(False)
+                        except KeyError:
+                            pass
+
+                def clear(self):
+                    cache.clear()
+
+                def cache_len(self):
+                    return len(cache)
+
+                self.get = types.MethodType(get, self)
+                self.set = types.MethodType(set, self)
+                self.clear = types.MethodType(clear, self)
+                self.__len__ = types.MethodType(cache_len, self)
+
+    else:
+        class _FifoCache(object):
+            def __init__(self, size):
+                self.not_in_cache = not_in_cache = object()
+
+                cache = {}
+                key_fifo = collections.deque([], size)
+
+                def get(self, key):
+                    return cache.get(key, not_in_cache)
+
+                def set(self, key, value):
+                    cache[key] = value
+                    while len(key_fifo) > size:
+                        cache.pop(key_fifo.popleft(), None)
+                    key_fifo.append(key)
+
+                def clear(self):
+                    cache.clear()
+                    key_fifo.clear()
+
+                def cache_len(self):
+                    return len(cache)
+
+                self.get = types.MethodType(get, self)
+                self.set = types.MethodType(set, self)
+                self.clear = types.MethodType(clear, self)
+                self.__len__ = types.MethodType(cache_len, self)
+
+    # argument cache for optimizing repeated calls when backtracking through recursive expressions
+    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
+    packrat_cache_lock = RLock()
+    packrat_cache_stats = [0, 0]
+
+    # this method gets repeatedly called during backtracking with the same arguments -
+    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
+    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
+        HIT, MISS = 0, 1
+        lookup = (self, instring, loc, callPreParse, doActions)
+        with ParserElement.packrat_cache_lock:
+            cache = ParserElement.packrat_cache
+            value = cache.get(lookup)
+            if value is cache.not_in_cache:
+                ParserElement.packrat_cache_stats[MISS] += 1
+                try:
+                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
+                except ParseBaseException as pe:
+                    # cache a copy of the exception, without the traceback
+                    cache.set(lookup, pe.__class__(*pe.args))
+                    raise
+                else:
+                    cache.set(lookup, (value[0], value[1].copy()))
+                    return value
+            else:
+                ParserElement.packrat_cache_stats[HIT] += 1
+                if isinstance(value, Exception):
+                    raise value
+                return (value[0], value[1].copy())
+
+    _parse = _parseNoCache
+
+    @staticmethod
+    def resetCache():
+        ParserElement.packrat_cache.clear()
+        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
+
+    _packratEnabled = False
+    @staticmethod
+    def enablePackrat(cache_size_limit=128):
+        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
+           Repeated parse attempts at the same string location (which happens
+           often in many complex grammars) can immediately return a cached value,
+           instead of re-executing parsing/validating code.  Memoizing is done of
+           both valid results and parsing exceptions.
+           
+           Parameters:
+            - cache_size_limit - (default=C{128}) - if an integer value is provided
+              will limit the size of the packrat cache; if None is passed, then
+              the cache size will be unbounded; if 0 is passed, the cache will
+              be effectively disabled.
+            
+           This speedup may break existing programs that use parse actions that
+           have side-effects.  For this reason, packrat parsing is disabled when
+           you first import pyparsing.  To activate the packrat feature, your
+           program must call the class method C{ParserElement.enablePackrat()}.  If
+           your program uses C{psyco} to "compile as you go", you must call
+           C{enablePackrat} before calling C{psyco.full()}.  If you do not do this,
+           Python will crash.  For best results, call C{enablePackrat()} immediately
+           after importing pyparsing.
+           
+           Example::
+               import pyparsing
+               pyparsing.ParserElement.enablePackrat()
+        """
+        if not ParserElement._packratEnabled:
+            ParserElement._packratEnabled = True
+            if cache_size_limit is None:
+                ParserElement.packrat_cache = ParserElement._UnboundedCache()
+            else:
+                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
+            ParserElement._parse = ParserElement._parseCache
+
+    def parseString( self, instring, parseAll=False ):
+        """
+        Execute the parse expression with the given string.
+        This is the main interface to the client code, once the complete
+        expression has been built.
+
+        If you want the grammar to require that the entire input string be
+        successfully parsed, then set C{parseAll} to True (equivalent to ending
+        the grammar with C{L{StringEnd()}}).
+
+        Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
+        in order to report proper column numbers in parse actions.
+        If the input string contains tabs and
+        the grammar uses parse actions that use the C{loc} argument to index into the
+        string being parsed, you can ensure you have a consistent view of the input
+        string by:
+         - calling C{parseWithTabs} on your grammar before calling C{parseString}
+           (see L{I{parseWithTabs}})
+         - define your parse action using the full C{(s,loc,toks)} signature, and
+           reference the input string using the parse action's C{s} argument
+         - explictly expand the tabs in your input string before calling
+           C{parseString}
+        
+        Example::
+            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
+            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
+        """
+        ParserElement.resetCache()
+        if not self.streamlined:
+            self.streamline()
+            #~ self.saveAsList = True
+        for e in self.ignoreExprs:
+            e.streamline()
+        if not self.keepTabs:
+            instring = instring.expandtabs()
+        try:
+            loc, tokens = self._parse( instring, 0 )
+            if parseAll:
+                loc = self.preParse( instring, loc )
+                se = Empty() + StringEnd()
+                se._parse( instring, loc )
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+        else:
+            return tokens
+
+    def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
+        """
+        Scan the input string for expression matches.  Each match will return the
+        matching tokens, start location, and end location.  May be called with optional
+        C{maxMatches} argument, to clip scanning after 'n' matches are found.  If
+        C{overlap} is specified, then overlapping matches will be reported.
+
+        Note that the start and end locations are reported relative to the string
+        being parsed.  See L{I{parseString}} for more information on parsing
+        strings with embedded tabs.
+
+        Example::
+            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
+            print(source)
+            for tokens,start,end in Word(alphas).scanString(source):
+                print(' '*start + '^'*(end-start))
+                print(' '*start + tokens[0])
+        
+        prints::
+        
+            sldjf123lsdjjkf345sldkjf879lkjsfd987
+            ^^^^^
+            sldjf
+                    ^^^^^^^
+                    lsdjjkf
+                              ^^^^^^
+                              sldkjf
+                                       ^^^^^^
+                                       lkjsfd
+        """
+        if not self.streamlined:
+            self.streamline()
+        for e in self.ignoreExprs:
+            e.streamline()
+
+        if not self.keepTabs:
+            instring = _ustr(instring).expandtabs()
+        instrlen = len(instring)
+        loc = 0
+        preparseFn = self.preParse
+        parseFn = self._parse
+        ParserElement.resetCache()
+        matches = 0
+        try:
+            while loc <= instrlen and matches < maxMatches:
+                try:
+                    preloc = preparseFn( instring, loc )
+                    nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
+                except ParseException:
+                    loc = preloc+1
+                else:
+                    if nextLoc > loc:
+                        matches += 1
+                        yield tokens, preloc, nextLoc
+                        if overlap:
+                            nextloc = preparseFn( instring, loc )
+                            if nextloc > loc:
+                                loc = nextLoc
+                            else:
+                                loc += 1
+                        else:
+                            loc = nextLoc
+                    else:
+                        loc = preloc+1
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def transformString( self, instring ):
+        """
+        Extension to C{L{scanString}}, to modify matching text with modified tokens that may
+        be returned from a parse action.  To use C{transformString}, define a grammar and
+        attach a parse action to it that modifies the returned token list.
+        Invoking C{transformString()} on a target string will then scan for matches,
+        and replace the matched text patterns according to the logic in the parse
+        action.  C{transformString()} returns the resulting transformed string.
+        
+        Example::
+            wd = Word(alphas)
+            wd.setParseAction(lambda toks: toks[0].title())
+            
+            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
+        Prints::
+            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
+        """
+        out = []
+        lastE = 0
+        # force preservation of s, to minimize unwanted transformation of string, and to
+        # keep string locs straight between transformString and scanString
+        self.keepTabs = True
+        try:
+            for t,s,e in self.scanString( instring ):
+                out.append( instring[lastE:s] )
+                if t:
+                    if isinstance(t,ParseResults):
+                        out += t.asList()
+                    elif isinstance(t,list):
+                        out += t
+                    else:
+                        out.append(t)
+                lastE = e
+            out.append(instring[lastE:])
+            out = [o for o in out if o]
+            return "".join(map(_ustr,_flatten(out)))
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def searchString( self, instring, maxMatches=_MAX_INT ):
+        """
+        Another extension to C{L{scanString}}, simplifying the access to the tokens found
+        to match the given parse expression.  May be called with optional
+        C{maxMatches} argument, to clip searching after 'n' matches are found.
+        
+        Example::
+            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
+            cap_word = Word(alphas.upper(), alphas.lower())
+            
+            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
+
+            # the sum() builtin can be used to merge results into a single ParseResults object
+            print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
+        prints::
+            [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
+            ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
+        """
+        try:
+            return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
+        """
+        Generator method to split a string using the given expression as a separator.
+        May be called with optional C{maxsplit} argument, to limit the number of splits;
+        and the optional C{includeSeparators} argument (default=C{False}), if the separating
+        matching text should be included in the split results.
+        
+        Example::        
+            punc = oneOf(list(".,;:/-!?"))
+            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
+        prints::
+            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
+        """
+        splits = 0
+        last = 0
+        for t,s,e in self.scanString(instring, maxMatches=maxsplit):
+            yield instring[last:s]
+            if includeSeparators:
+                yield t[0]
+            last = e
+        yield instring[last:]
+
+    def __add__(self, other ):
+        """
+        Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
+        converts them to L{Literal}s by default.
+        
+        Example::
+            greet = Word(alphas) + "," + Word(alphas) + "!"
+            hello = "Hello, World!"
+            print (hello, "->", greet.parseString(hello))
+        Prints::
+            Hello, World! -> ['Hello', ',', 'World', '!']
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return And( [ self, other ] )
+
+    def __radd__(self, other ):
+        """
+        Implementation of + operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other + self
+
+    def __sub__(self, other):
+        """
+        Implementation of - operator, returns C{L{And}} with error stop
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return self + And._ErrorStop() + other
+
+    def __rsub__(self, other ):
+        """
+        Implementation of - operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other - self
+
+    def __mul__(self,other):
+        """
+        Implementation of * operator, allows use of C{expr * 3} in place of
+        C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer
+        tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples
+        may also include C{None} as in:
+         - C{expr*(n,None)} or C{expr*(n,)} is equivalent
+              to C{expr*n + L{ZeroOrMore}(expr)}
+              (read as "at least n instances of C{expr}")
+         - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
+              (read as "0 to n instances of C{expr}")
+         - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
+         - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
+
+        Note that C{expr*(None,n)} does not raise an exception if
+        more than n exprs exist in the input stream; that is,
+        C{expr*(None,n)} does not enforce a maximum number of expr
+        occurrences.  If this behavior is desired, then write
+        C{expr*(None,n) + ~expr}
+        """
+        if isinstance(other,int):
+            minElements, optElements = other,0
+        elif isinstance(other,tuple):
+            other = (other + (None, None))[:2]
+            if other[0] is None:
+                other = (0, other[1])
+            if isinstance(other[0],int) and other[1] is None:
+                if other[0] == 0:
+                    return ZeroOrMore(self)
+                if other[0] == 1:
+                    return OneOrMore(self)
+                else:
+                    return self*other[0] + ZeroOrMore(self)
+            elif isinstance(other[0],int) and isinstance(other[1],int):
+                minElements, optElements = other
+                optElements -= minElements
+            else:
+                raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
+        else:
+            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
+
+        if minElements < 0:
+            raise ValueError("cannot multiply ParserElement by negative value")
+        if optElements < 0:
+            raise ValueError("second tuple value must be greater or equal to first tuple value")
+        if minElements == optElements == 0:
+            raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
+
+        if (optElements):
+            def makeOptionalList(n):
+                if n>1:
+                    return Optional(self + makeOptionalList(n-1))
+                else:
+                    return Optional(self)
+            if minElements:
+                if minElements == 1:
+                    ret = self + makeOptionalList(optElements)
+                else:
+                    ret = And([self]*minElements) + makeOptionalList(optElements)
+            else:
+                ret = makeOptionalList(optElements)
+        else:
+            if minElements == 1:
+                ret = self
+            else:
+                ret = And([self]*minElements)
+        return ret
+
+    def __rmul__(self, other):
+        return self.__mul__(other)
+
+    def __or__(self, other ):
+        """
+        Implementation of | operator - returns C{L{MatchFirst}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return MatchFirst( [ self, other ] )
+
+    def __ror__(self, other ):
+        """
+        Implementation of | operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other | self
+
+    def __xor__(self, other ):
+        """
+        Implementation of ^ operator - returns C{L{Or}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return Or( [ self, other ] )
+
+    def __rxor__(self, other ):
+        """
+        Implementation of ^ operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other ^ self
+
+    def __and__(self, other ):
+        """
+        Implementation of & operator - returns C{L{Each}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return Each( [ self, other ] )
+
+    def __rand__(self, other ):
+        """
+        Implementation of & operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other & self
+
+    def __invert__( self ):
+        """
+        Implementation of ~ operator - returns C{L{NotAny}}
+        """
+        return NotAny( self )
+
+    def __call__(self, name=None):
+        """
+        Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
+        
+        If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
+        passed as C{True}.
+           
+        If C{name} is omitted, same as calling C{L{copy}}.
+
+        Example::
+            # these are equivalent
+            userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
+            userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")             
+        """
+        if name is not None:
+            return self.setResultsName(name)
+        else:
+            return self.copy()
+
+    def suppress( self ):
+        """
+        Suppresses the output of this C{ParserElement}; useful to keep punctuation from
+        cluttering up returned output.
+        """
+        return Suppress( self )
+
+    def leaveWhitespace( self ):
+        """
+        Disables the skipping of whitespace before matching the characters in the
+        C{ParserElement}'s defined pattern.  This is normally only used internally by
+        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+        """
+        self.skipWhitespace = False
+        return self
+
+    def setWhitespaceChars( self, chars ):
+        """
+        Overrides the default whitespace chars
+        """
+        self.skipWhitespace = True
+        self.whiteChars = chars
+        self.copyDefaultWhiteChars = False
+        return self
+
+    def parseWithTabs( self ):
+        """
+        Overrides default behavior to expand C{}s to spaces before parsing the input string.
+        Must be called before C{parseString} when the input grammar contains elements that
+        match C{} characters.
+        """
+        self.keepTabs = True
+        return self
+
+    def ignore( self, other ):
+        """
+        Define expression to be ignored (e.g., comments) while doing pattern
+        matching; may be called repeatedly, to define multiple comment or other
+        ignorable patterns.
+        
+        Example::
+            patt = OneOrMore(Word(alphas))
+            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
+            
+            patt.ignore(cStyleComment)
+            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
+        """
+        if isinstance(other, basestring):
+            other = Suppress(other)
+
+        if isinstance( other, Suppress ):
+            if other not in self.ignoreExprs:
+                self.ignoreExprs.append(other)
+        else:
+            self.ignoreExprs.append( Suppress( other.copy() ) )
+        return self
+
+    def setDebugActions( self, startAction, successAction, exceptionAction ):
+        """
+        Enable display of debugging messages while doing pattern matching.
+        """
+        self.debugActions = (startAction or _defaultStartDebugAction,
+                             successAction or _defaultSuccessDebugAction,
+                             exceptionAction or _defaultExceptionDebugAction)
+        self.debug = True
+        return self
+
+    def setDebug( self, flag=True ):
+        """
+        Enable display of debugging messages while doing pattern matching.
+        Set C{flag} to True to enable, False to disable.
+
+        Example::
+            wd = Word(alphas).setName("alphaword")
+            integer = Word(nums).setName("numword")
+            term = wd | integer
+            
+            # turn on debugging for wd
+            wd.setDebug()
+
+            OneOrMore(term).parseString("abc 123 xyz 890")
+        
+        prints::
+            Match alphaword at loc 0(1,1)
+            Matched alphaword -> ['abc']
+            Match alphaword at loc 3(1,4)
+            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
+            Match alphaword at loc 7(1,8)
+            Matched alphaword -> ['xyz']
+            Match alphaword at loc 11(1,12)
+            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
+            Match alphaword at loc 15(1,16)
+            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
+
+        The output shown is that produced by the default debug actions - custom debug actions can be
+        specified using L{setDebugActions}. Prior to attempting
+        to match the C{wd} expression, the debugging message C{"Match  at loc (,)"}
+        is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
+        message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
+        which makes debugging and exception messages easier to understand - for instance, the default
+        name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
+        """
+        if flag:
+            self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
+        else:
+            self.debug = False
+        return self
+
+    def __str__( self ):
+        return self.name
+
+    def __repr__( self ):
+        return _ustr(self)
+
+    def streamline( self ):
+        self.streamlined = True
+        self.strRepr = None
+        return self
+
+    def checkRecursion( self, parseElementList ):
+        pass
+
+    def validate( self, validateTrace=[] ):
+        """
+        Check defined expressions for valid structure, check for infinite recursive definitions.
+        """
+        self.checkRecursion( [] )
+
+    def parseFile( self, file_or_filename, parseAll=False ):
+        """
+        Execute the parse expression on the given file or filename.
+        If a filename is specified (instead of a file object),
+        the entire file is opened, read, and closed before parsing.
+        """
+        try:
+            file_contents = file_or_filename.read()
+        except AttributeError:
+            with open(file_or_filename, "r") as f:
+                file_contents = f.read()
+        try:
+            return self.parseString(file_contents, parseAll)
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def __eq__(self,other):
+        if isinstance(other, ParserElement):
+            return self is other or vars(self) == vars(other)
+        elif isinstance(other, basestring):
+            return self.matches(other)
+        else:
+            return super(ParserElement,self)==other
+
+    def __ne__(self,other):
+        return not (self == other)
+
+    def __hash__(self):
+        return hash(id(self))
+
+    def __req__(self,other):
+        return self == other
+
+    def __rne__(self,other):
+        return not (self == other)
+
+    def matches(self, testString, parseAll=True):
+        """
+        Method for quick testing of a parser against a test string. Good for simple 
+        inline microtests of sub expressions while building up larger parser.
+           
+        Parameters:
+         - testString - to test against this expression for a match
+         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
+            
+        Example::
+            expr = Word(nums)
+            assert expr.matches("100")
+        """
+        try:
+            self.parseString(_ustr(testString), parseAll=parseAll)
+            return True
+        except ParseBaseException:
+            return False
+                
+    def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
+        """
+        Execute the parse expression on a series of test strings, showing each
+        test, the parsed results or where the parse failed. Quick and easy way to
+        run a parse expression against a list of sample strings.
+           
+        Parameters:
+         - tests - a list of separate test strings, or a multiline string of test strings
+         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests           
+         - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 
+              string; pass None to disable comment filtering
+         - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
+              if False, only dump nested list
+         - printResults - (default=C{True}) prints test output to stdout
+         - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
+
+        Returns: a (success, results) tuple, where success indicates that all tests succeeded
+        (or failed if C{failureTests} is True), and the results contain a list of lines of each 
+        test's output
+        
+        Example::
+            number_expr = pyparsing_common.number.copy()
+
+            result = number_expr.runTests('''
+                # unsigned integer
+                100
+                # negative integer
+                -100
+                # float with scientific notation
+                6.02e23
+                # integer with scientific notation
+                1e-12
+                ''')
+            print("Success" if result[0] else "Failed!")
+
+            result = number_expr.runTests('''
+                # stray character
+                100Z
+                # missing leading digit before '.'
+                -.100
+                # too many '.'
+                3.14.159
+                ''', failureTests=True)
+            print("Success" if result[0] else "Failed!")
+        prints::
+            # unsigned integer
+            100
+            [100]
+
+            # negative integer
+            -100
+            [-100]
+
+            # float with scientific notation
+            6.02e23
+            [6.02e+23]
+
+            # integer with scientific notation
+            1e-12
+            [1e-12]
+
+            Success
+            
+            # stray character
+            100Z
+               ^
+            FAIL: Expected end of text (at char 3), (line:1, col:4)
+
+            # missing leading digit before '.'
+            -.100
+            ^
+            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
+
+            # too many '.'
+            3.14.159
+                ^
+            FAIL: Expected end of text (at char 4), (line:1, col:5)
+
+            Success
+
+        Each test string must be on a single line. If you want to test a string that spans multiple
+        lines, create a test like this::
+
+            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
+        
+        (Note that this is a raw string literal, you must include the leading 'r'.)
+        """
+        if isinstance(tests, basestring):
+            tests = list(map(str.strip, tests.rstrip().splitlines()))
+        if isinstance(comment, basestring):
+            comment = Literal(comment)
+        allResults = []
+        comments = []
+        success = True
+        for t in tests:
+            if comment is not None and comment.matches(t, False) or comments and not t:
+                comments.append(t)
+                continue
+            if not t:
+                continue
+            out = ['\n'.join(comments), t]
+            comments = []
+            try:
+                t = t.replace(r'\n','\n')
+                result = self.parseString(t, parseAll=parseAll)
+                out.append(result.dump(full=fullDump))
+                success = success and not failureTests
+            except ParseBaseException as pe:
+                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
+                if '\n' in t:
+                    out.append(line(pe.loc, t))
+                    out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
+                else:
+                    out.append(' '*pe.loc + '^' + fatal)
+                out.append("FAIL: " + str(pe))
+                success = success and failureTests
+                result = pe
+            except Exception as exc:
+                out.append("FAIL-EXCEPTION: " + str(exc))
+                success = success and failureTests
+                result = exc
+
+            if printResults:
+                if fullDump:
+                    out.append('')
+                print('\n'.join(out))
+
+            allResults.append((t, result))
+        
+        return success, allResults
+
+        
+class Token(ParserElement):
+    """
+    Abstract C{ParserElement} subclass, for defining atomic matching patterns.
+    """
+    def __init__( self ):
+        super(Token,self).__init__( savelist=False )
+
+
+class Empty(Token):
+    """
+    An empty token, will always match.
+    """
+    def __init__( self ):
+        super(Empty,self).__init__()
+        self.name = "Empty"
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+
+
+class NoMatch(Token):
+    """
+    A token that will never match.
+    """
+    def __init__( self ):
+        super(NoMatch,self).__init__()
+        self.name = "NoMatch"
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+        self.errmsg = "Unmatchable token"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        raise ParseException(instring, loc, self.errmsg, self)
+
+
+class Literal(Token):
+    """
+    Token to exactly match a specified string.
+    
+    Example::
+        Literal('blah').parseString('blah')  # -> ['blah']
+        Literal('blah').parseString('blahfooblah')  # -> ['blah']
+        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
+    
+    For case-insensitive matching, use L{CaselessLiteral}.
+    
+    For keyword matching (force word break before and after the matched string),
+    use L{Keyword} or L{CaselessKeyword}.
+    """
+    def __init__( self, matchString ):
+        super(Literal,self).__init__()
+        self.match = matchString
+        self.matchLen = len(matchString)
+        try:
+            self.firstMatchChar = matchString[0]
+        except IndexError:
+            warnings.warn("null string passed to Literal; use Empty() instead",
+                            SyntaxWarning, stacklevel=2)
+            self.__class__ = Empty
+        self.name = '"%s"' % _ustr(self.match)
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = False
+        self.mayIndexError = False
+
+    # Performance tuning: this routine gets called a *lot*
+    # if this is a single character match string  and the first character matches,
+    # short-circuit as quickly as possible, and avoid calling startswith
+    #~ @profile
+    def parseImpl( self, instring, loc, doActions=True ):
+        if (instring[loc] == self.firstMatchChar and
+            (self.matchLen==1 or instring.startswith(self.match,loc)) ):
+            return loc+self.matchLen, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+_L = Literal
+ParserElement._literalStringClass = Literal
+
+class Keyword(Token):
+    """
+    Token to exactly match a specified string as a keyword, that is, it must be
+    immediately followed by a non-keyword character.  Compare with C{L{Literal}}:
+     - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
+     - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
+    Accepts two optional constructor arguments in addition to the keyword string:
+     - C{identChars} is a string of characters that would be valid identifier characters,
+          defaulting to all alphanumerics + "_" and "$"
+     - C{caseless} allows case-insensitive matching, default is C{False}.
+       
+    Example::
+        Keyword("start").parseString("start")  # -> ['start']
+        Keyword("start").parseString("starting")  # -> Exception
+
+    For case-insensitive matching, use L{CaselessKeyword}.
+    """
+    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
+
+    def __init__( self, matchString, identChars=None, caseless=False ):
+        super(Keyword,self).__init__()
+        if identChars is None:
+            identChars = Keyword.DEFAULT_KEYWORD_CHARS
+        self.match = matchString
+        self.matchLen = len(matchString)
+        try:
+            self.firstMatchChar = matchString[0]
+        except IndexError:
+            warnings.warn("null string passed to Keyword; use Empty() instead",
+                            SyntaxWarning, stacklevel=2)
+        self.name = '"%s"' % self.match
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = False
+        self.mayIndexError = False
+        self.caseless = caseless
+        if caseless:
+            self.caselessmatch = matchString.upper()
+            identChars = identChars.upper()
+        self.identChars = set(identChars)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.caseless:
+            if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
+                 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
+                return loc+self.matchLen, self.match
+        else:
+            if (instring[loc] == self.firstMatchChar and
+                (self.matchLen==1 or instring.startswith(self.match,loc)) and
+                (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
+                (loc == 0 or instring[loc-1] not in self.identChars) ):
+                return loc+self.matchLen, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+
+    def copy(self):
+        c = super(Keyword,self).copy()
+        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
+        return c
+
+    @staticmethod
+    def setDefaultKeywordChars( chars ):
+        """Overrides the default Keyword chars
+        """
+        Keyword.DEFAULT_KEYWORD_CHARS = chars
+
+class CaselessLiteral(Literal):
+    """
+    Token to match a specified string, ignoring case of letters.
+    Note: the matched results will always be in the case of the given
+    match string, NOT the case of the input text.
+
+    Example::
+        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
+        
+    (Contrast with example for L{CaselessKeyword}.)
+    """
+    def __init__( self, matchString ):
+        super(CaselessLiteral,self).__init__( matchString.upper() )
+        # Preserve the defining literal.
+        self.returnString = matchString
+        self.name = "'%s'" % self.returnString
+        self.errmsg = "Expected " + self.name
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if instring[ loc:loc+self.matchLen ].upper() == self.match:
+            return loc+self.matchLen, self.returnString
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class CaselessKeyword(Keyword):
+    """
+    Caseless version of L{Keyword}.
+
+    Example::
+        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
+        
+    (Contrast with example for L{CaselessLiteral}.)
+    """
+    def __init__( self, matchString, identChars=None ):
+        super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+             (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
+            return loc+self.matchLen, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class CloseMatch(Token):
+    """
+    A variation on L{Literal} which matches "close" matches, that is, 
+    strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
+     - C{match_string} - string to be matched
+     - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
+    
+    The results from a successful parse will contain the matched text from the input string and the following named results:
+     - C{mismatches} - a list of the positions within the match_string where mismatches were found
+     - C{original} - the original match_string used to compare against the input string
+    
+    If C{mismatches} is an empty list, then the match was an exact match.
+    
+    Example::
+        patt = CloseMatch("ATCATCGAATGGA")
+        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
+        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
+
+        # exact match
+        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
+
+        # close match allowing up to 2 mismatches
+        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
+        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
+    """
+    def __init__(self, match_string, maxMismatches=1):
+        super(CloseMatch,self).__init__()
+        self.name = match_string
+        self.match_string = match_string
+        self.maxMismatches = maxMismatches
+        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
+        self.mayIndexError = False
+        self.mayReturnEmpty = False
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        start = loc
+        instrlen = len(instring)
+        maxloc = start + len(self.match_string)
+
+        if maxloc <= instrlen:
+            match_string = self.match_string
+            match_stringloc = 0
+            mismatches = []
+            maxMismatches = self.maxMismatches
+
+            for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
+                src,mat = s_m
+                if src != mat:
+                    mismatches.append(match_stringloc)
+                    if len(mismatches) > maxMismatches:
+                        break
+            else:
+                loc = match_stringloc + 1
+                results = ParseResults([instring[start:loc]])
+                results['original'] = self.match_string
+                results['mismatches'] = mismatches
+                return loc, results
+
+        raise ParseException(instring, loc, self.errmsg, self)
+
+
+class Word(Token):
+    """
+    Token for matching words composed of allowed character sets.
+    Defined with string containing all allowed initial characters,
+    an optional string containing allowed body characters (if omitted,
+    defaults to the initial character set), and an optional minimum,
+    maximum, and/or exact length.  The default value for C{min} is 1 (a
+    minimum value < 1 is not valid); the default values for C{max} and C{exact}
+    are 0, meaning no maximum or exact length restriction. An optional
+    C{excludeChars} parameter can list characters that might be found in 
+    the input C{bodyChars} string; useful to define a word of all printables
+    except for one or two characters, for instance.
+    
+    L{srange} is useful for defining custom character set strings for defining 
+    C{Word} expressions, using range notation from regular expression character sets.
+    
+    A common mistake is to use C{Word} to match a specific literal string, as in 
+    C{Word("Address")}. Remember that C{Word} uses the string argument to define
+    I{sets} of matchable characters. This expression would match "Add", "AAA",
+    "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
+    To match an exact literal string, use L{Literal} or L{Keyword}.
+
+    pyparsing includes helper strings for building Words:
+     - L{alphas}
+     - L{nums}
+     - L{alphanums}
+     - L{hexnums}
+     - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
+     - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
+     - L{printables} (any non-whitespace character)
+
+    Example::
+        # a word composed of digits
+        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
+        
+        # a word with a leading capital, and zero or more lowercase
+        capital_word = Word(alphas.upper(), alphas.lower())
+
+        # hostnames are alphanumeric, with leading alpha, and '-'
+        hostname = Word(alphas, alphanums+'-')
+        
+        # roman numeral (not a strict parser, accepts invalid mix of characters)
+        roman = Word("IVXLCDM")
+        
+        # any string of non-whitespace characters, except for ','
+        csv_value = Word(printables, excludeChars=",")
+    """
+    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
+        super(Word,self).__init__()
+        if excludeChars:
+            initChars = ''.join(c for c in initChars if c not in excludeChars)
+            if bodyChars:
+                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
+        self.initCharsOrig = initChars
+        self.initChars = set(initChars)
+        if bodyChars :
+            self.bodyCharsOrig = bodyChars
+            self.bodyChars = set(bodyChars)
+        else:
+            self.bodyCharsOrig = initChars
+            self.bodyChars = set(initChars)
+
+        self.maxSpecified = max > 0
+
+        if min < 1:
+            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
+
+        self.minLen = min
+
+        if max > 0:
+            self.maxLen = max
+        else:
+            self.maxLen = _MAX_INT
+
+        if exact > 0:
+            self.maxLen = exact
+            self.minLen = exact
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.asKeyword = asKeyword
+
+        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
+            if self.bodyCharsOrig == self.initCharsOrig:
+                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
+            elif len(self.initCharsOrig) == 1:
+                self.reString = "%s[%s]*" % \
+                                      (re.escape(self.initCharsOrig),
+                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
+            else:
+                self.reString = "[%s][%s]*" % \
+                                      (_escapeRegexRangeChars(self.initCharsOrig),
+                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
+            if self.asKeyword:
+                self.reString = r"\b"+self.reString+r"\b"
+            try:
+                self.re = re.compile( self.reString )
+            except Exception:
+                self.re = None
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.re:
+            result = self.re.match(instring,loc)
+            if not result:
+                raise ParseException(instring, loc, self.errmsg, self)
+
+            loc = result.end()
+            return loc, result.group()
+
+        if not(instring[ loc ] in self.initChars):
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        start = loc
+        loc += 1
+        instrlen = len(instring)
+        bodychars = self.bodyChars
+        maxloc = start + self.maxLen
+        maxloc = min( maxloc, instrlen )
+        while loc < maxloc and instring[loc] in bodychars:
+            loc += 1
+
+        throwException = False
+        if loc - start < self.minLen:
+            throwException = True
+        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
+            throwException = True
+        if self.asKeyword:
+            if (start>0 and instring[start-1] in bodychars) or (loc4:
+                    return s[:4]+"..."
+                else:
+                    return s
+
+            if ( self.initCharsOrig != self.bodyCharsOrig ):
+                self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
+            else:
+                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
+
+        return self.strRepr
+
+
+class Regex(Token):
+    r"""
+    Token for matching strings that match a given regular expression.
+    Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+    If the given regex contains named groups (defined using C{(?P...)}), these will be preserved as 
+    named parse results.
+
+    Example::
+        realnum = Regex(r"[+-]?\d+\.\d*")
+        date = Regex(r'(?P\d{4})-(?P\d\d?)-(?P\d\d?)')
+        # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
+        roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
+    """
+    compiledREtype = type(re.compile("[A-Z]"))
+    def __init__( self, pattern, flags=0):
+        """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
+        super(Regex,self).__init__()
+
+        if isinstance(pattern, basestring):
+            if not pattern:
+                warnings.warn("null string passed to Regex; use Empty() instead",
+                        SyntaxWarning, stacklevel=2)
+
+            self.pattern = pattern
+            self.flags = flags
+
+            try:
+                self.re = re.compile(self.pattern, self.flags)
+                self.reString = self.pattern
+            except sre_constants.error:
+                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+                    SyntaxWarning, stacklevel=2)
+                raise
+
+        elif isinstance(pattern, Regex.compiledREtype):
+            self.re = pattern
+            self.pattern = \
+            self.reString = str(pattern)
+            self.flags = flags
+            
+        else:
+            raise ValueError("Regex may only be constructed with a string or a compiled RE object")
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        result = self.re.match(instring,loc)
+        if not result:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        loc = result.end()
+        d = result.groupdict()
+        ret = ParseResults(result.group())
+        if d:
+            for k in d:
+                ret[k] = d[k]
+        return loc,ret
+
+    def __str__( self ):
+        try:
+            return super(Regex,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            self.strRepr = "Re:(%s)" % repr(self.pattern)
+
+        return self.strRepr
+
+
+class QuotedString(Token):
+    r"""
+    Token for matching strings that are delimited by quoting characters.
+    
+    Defined with the following parameters:
+        - quoteChar - string of one or more characters defining the quote delimiting string
+        - escChar - character to escape quotes, typically backslash (default=C{None})
+        - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
+        - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
+        - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
+        - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
+        - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
+
+    Example::
+        qs = QuotedString('"')
+        print(qs.searchString('lsjdf "This is the quote" sldjf'))
+        complex_qs = QuotedString('{{', endQuoteChar='}}')
+        print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
+        sql_qs = QuotedString('"', escQuote='""')
+        print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
+    prints::
+        [['This is the quote']]
+        [['This is the "quote"']]
+        [['This is the quote with "embedded" quotes']]
+    """
+    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
+        super(QuotedString,self).__init__()
+
+        # remove white space from quote chars - wont work anyway
+        quoteChar = quoteChar.strip()
+        if not quoteChar:
+            warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+            raise SyntaxError()
+
+        if endQuoteChar is None:
+            endQuoteChar = quoteChar
+        else:
+            endQuoteChar = endQuoteChar.strip()
+            if not endQuoteChar:
+                warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+                raise SyntaxError()
+
+        self.quoteChar = quoteChar
+        self.quoteCharLen = len(quoteChar)
+        self.firstQuoteChar = quoteChar[0]
+        self.endQuoteChar = endQuoteChar
+        self.endQuoteCharLen = len(endQuoteChar)
+        self.escChar = escChar
+        self.escQuote = escQuote
+        self.unquoteResults = unquoteResults
+        self.convertWhitespaceEscapes = convertWhitespaceEscapes
+
+        if multiline:
+            self.flags = re.MULTILINE | re.DOTALL
+            self.pattern = r'%s(?:[^%s%s]' % \
+                ( re.escape(self.quoteChar),
+                  _escapeRegexRangeChars(self.endQuoteChar[0]),
+                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+        else:
+            self.flags = 0
+            self.pattern = r'%s(?:[^%s\n\r%s]' % \
+                ( re.escape(self.quoteChar),
+                  _escapeRegexRangeChars(self.endQuoteChar[0]),
+                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+        if len(self.endQuoteChar) > 1:
+            self.pattern += (
+                '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
+                                               _escapeRegexRangeChars(self.endQuoteChar[i]))
+                                    for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
+                )
+        if escQuote:
+            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
+        if escChar:
+            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
+            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
+        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
+
+        try:
+            self.re = re.compile(self.pattern, self.flags)
+            self.reString = self.pattern
+        except sre_constants.error:
+            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
+                SyntaxWarning, stacklevel=2)
+            raise
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
+        if not result:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        loc = result.end()
+        ret = result.group()
+
+        if self.unquoteResults:
+
+            # strip off quotes
+            ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
+
+            if isinstance(ret,basestring):
+                # replace escaped whitespace
+                if '\\' in ret and self.convertWhitespaceEscapes:
+                    ws_map = {
+                        r'\t' : '\t',
+                        r'\n' : '\n',
+                        r'\f' : '\f',
+                        r'\r' : '\r',
+                    }
+                    for wslit,wschar in ws_map.items():
+                        ret = ret.replace(wslit, wschar)
+
+                # replace escaped characters
+                if self.escChar:
+                    ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
+
+                # replace escaped quotes
+                if self.escQuote:
+                    ret = ret.replace(self.escQuote, self.endQuoteChar)
+
+        return loc, ret
+
+    def __str__( self ):
+        try:
+            return super(QuotedString,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
+
+        return self.strRepr
+
+
+class CharsNotIn(Token):
+    """
+    Token for matching words composed of characters I{not} in a given set (will
+    include whitespace in matched characters if not listed in the provided exclusion set - see example).
+    Defined with string containing all disallowed characters, and an optional
+    minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a
+    minimum value < 1 is not valid); the default values for C{max} and C{exact}
+    are 0, meaning no maximum or exact length restriction.
+
+    Example::
+        # define a comma-separated-value as anything that is not a ','
+        csv_value = CharsNotIn(',')
+        print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
+    prints::
+        ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
+    """
+    def __init__( self, notChars, min=1, max=0, exact=0 ):
+        super(CharsNotIn,self).__init__()
+        self.skipWhitespace = False
+        self.notChars = notChars
+
+        if min < 1:
+            raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
+
+        self.minLen = min
+
+        if max > 0:
+            self.maxLen = max
+        else:
+            self.maxLen = _MAX_INT
+
+        if exact > 0:
+            self.maxLen = exact
+            self.minLen = exact
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = ( self.minLen == 0 )
+        self.mayIndexError = False
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if instring[loc] in self.notChars:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        start = loc
+        loc += 1
+        notchars = self.notChars
+        maxlen = min( start+self.maxLen, len(instring) )
+        while loc < maxlen and \
+              (instring[loc] not in notchars):
+            loc += 1
+
+        if loc - start < self.minLen:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        return loc, instring[start:loc]
+
+    def __str__( self ):
+        try:
+            return super(CharsNotIn, self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            if len(self.notChars) > 4:
+                self.strRepr = "!W:(%s...)" % self.notChars[:4]
+            else:
+                self.strRepr = "!W:(%s)" % self.notChars
+
+        return self.strRepr
+
+class White(Token):
+    """
+    Special matching class for matching whitespace.  Normally, whitespace is ignored
+    by pyparsing grammars.  This class is included when some whitespace structures
+    are significant.  Define with a string containing the whitespace characters to be
+    matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments,
+    as defined for the C{L{Word}} class.
+    """
+    whiteStrs = {
+        " " : "",
+        "\t": "",
+        "\n": "",
+        "\r": "",
+        "\f": "",
+        }
+    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
+        super(White,self).__init__()
+        self.matchWhite = ws
+        self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
+        #~ self.leaveWhitespace()
+        self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
+        self.mayReturnEmpty = True
+        self.errmsg = "Expected " + self.name
+
+        self.minLen = min
+
+        if max > 0:
+            self.maxLen = max
+        else:
+            self.maxLen = _MAX_INT
+
+        if exact > 0:
+            self.maxLen = exact
+            self.minLen = exact
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if not(instring[ loc ] in self.matchWhite):
+            raise ParseException(instring, loc, self.errmsg, self)
+        start = loc
+        loc += 1
+        maxloc = start + self.maxLen
+        maxloc = min( maxloc, len(instring) )
+        while loc < maxloc and instring[loc] in self.matchWhite:
+            loc += 1
+
+        if loc - start < self.minLen:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        return loc, instring[start:loc]
+
+
+class _PositionToken(Token):
+    def __init__( self ):
+        super(_PositionToken,self).__init__()
+        self.name=self.__class__.__name__
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+
+class GoToColumn(_PositionToken):
+    """
+    Token to advance to a specific column of input text; useful for tabular report scraping.
+    """
+    def __init__( self, colno ):
+        super(GoToColumn,self).__init__()
+        self.col = colno
+
+    def preParse( self, instring, loc ):
+        if col(loc,instring) != self.col:
+            instrlen = len(instring)
+            if self.ignoreExprs:
+                loc = self._skipIgnorables( instring, loc )
+            while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
+                loc += 1
+        return loc
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        thiscol = col( loc, instring )
+        if thiscol > self.col:
+            raise ParseException( instring, loc, "Text not in expected column", self )
+        newloc = loc + self.col - thiscol
+        ret = instring[ loc: newloc ]
+        return newloc, ret
+
+
+class LineStart(_PositionToken):
+    """
+    Matches if current position is at the beginning of a line within the parse string
+    
+    Example::
+    
+        test = '''\
+        AAA this line
+        AAA and this line
+          AAA but not this one
+        B AAA and definitely not this one
+        '''
+
+        for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
+            print(t)
+    
+    Prints::
+        ['AAA', ' this line']
+        ['AAA', ' and this line']    
+
+    """
+    def __init__( self ):
+        super(LineStart,self).__init__()
+        self.errmsg = "Expected start of line"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if col(loc, instring) == 1:
+            return loc, []
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class LineEnd(_PositionToken):
+    """
+    Matches if current position is at the end of a line within the parse string
+    """
+    def __init__( self ):
+        super(LineEnd,self).__init__()
+        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+        self.errmsg = "Expected end of line"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if loc len(instring):
+            return loc, []
+        else:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+class WordStart(_PositionToken):
+    """
+    Matches if the current position is at the beginning of a Word, and
+    is not preceded by any character in a given set of C{wordChars}
+    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+    use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
+    the string being parsed, or at the beginning of a line.
+    """
+    def __init__(self, wordChars = printables):
+        super(WordStart,self).__init__()
+        self.wordChars = set(wordChars)
+        self.errmsg = "Not at the start of a word"
+
+    def parseImpl(self, instring, loc, doActions=True ):
+        if loc != 0:
+            if (instring[loc-1] in self.wordChars or
+                instring[loc] not in self.wordChars):
+                raise ParseException(instring, loc, self.errmsg, self)
+        return loc, []
+
+class WordEnd(_PositionToken):
+    """
+    Matches if the current position is at the end of a Word, and
+    is not followed by any character in a given set of C{wordChars}
+    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+    use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
+    the string being parsed, or at the end of a line.
+    """
+    def __init__(self, wordChars = printables):
+        super(WordEnd,self).__init__()
+        self.wordChars = set(wordChars)
+        self.skipWhitespace = False
+        self.errmsg = "Not at the end of a word"
+
+    def parseImpl(self, instring, loc, doActions=True ):
+        instrlen = len(instring)
+        if instrlen>0 and loc maxExcLoc:
+                    maxException = err
+                    maxExcLoc = err.loc
+            except IndexError:
+                if len(instring) > maxExcLoc:
+                    maxException = ParseException(instring,len(instring),e.errmsg,self)
+                    maxExcLoc = len(instring)
+            else:
+                # save match among all matches, to retry longest to shortest
+                matches.append((loc2, e))
+
+        if matches:
+            matches.sort(key=lambda x: -x[0])
+            for _,e in matches:
+                try:
+                    return e._parse( instring, loc, doActions )
+                except ParseException as err:
+                    err.__traceback__ = None
+                    if err.loc > maxExcLoc:
+                        maxException = err
+                        maxExcLoc = err.loc
+
+        if maxException is not None:
+            maxException.msg = self.errmsg
+            raise maxException
+        else:
+            raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+
+    def __ixor__(self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        return self.append( other ) #Or( [ self, other ] )
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+
+
+class MatchFirst(ParseExpression):
+    """
+    Requires that at least one C{ParseExpression} is found.
+    If two expressions match, the first one listed is the one that will match.
+    May be constructed using the C{'|'} operator.
+
+    Example::
+        # construct MatchFirst using '|' operator
+        
+        # watch the order of expressions to match
+        number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
+        print(number.searchString("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']]
+
+        # put more selective expression first
+        number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
+        print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
+    """
+    def __init__( self, exprs, savelist = False ):
+        super(MatchFirst,self).__init__(exprs, savelist)
+        if self.exprs:
+            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+        else:
+            self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        maxExcLoc = -1
+        maxException = None
+        for e in self.exprs:
+            try:
+                ret = e._parse( instring, loc, doActions )
+                return ret
+            except ParseException as err:
+                if err.loc > maxExcLoc:
+                    maxException = err
+                    maxExcLoc = err.loc
+            except IndexError:
+                if len(instring) > maxExcLoc:
+                    maxException = ParseException(instring,len(instring),e.errmsg,self)
+                    maxExcLoc = len(instring)
+
+        # only got here if no expression matched, raise exception for match that made it the furthest
+        else:
+            if maxException is not None:
+                maxException.msg = self.errmsg
+                raise maxException
+            else:
+                raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+    def __ior__(self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        return self.append( other ) #MatchFirst( [ self, other ] )
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+
+
+class Each(ParseExpression):
+    """
+    Requires all given C{ParseExpression}s to be found, but in any order.
+    Expressions may be separated by whitespace.
+    May be constructed using the C{'&'} operator.
+
+    Example::
+        color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
+        shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
+        integer = Word(nums)
+        shape_attr = "shape:" + shape_type("shape")
+        posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
+        color_attr = "color:" + color("color")
+        size_attr = "size:" + integer("size")
+
+        # use Each (using operator '&') to accept attributes in any order 
+        # (shape and posn are required, color and size are optional)
+        shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
+
+        shape_spec.runTests('''
+            shape: SQUARE color: BLACK posn: 100, 120
+            shape: CIRCLE size: 50 color: BLUE posn: 50,80
+            color:GREEN size:20 shape:TRIANGLE posn:20,40
+            '''
+            )
+    prints::
+        shape: SQUARE color: BLACK posn: 100, 120
+        ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
+        - color: BLACK
+        - posn: ['100', ',', '120']
+          - x: 100
+          - y: 120
+        - shape: SQUARE
+
+
+        shape: CIRCLE size: 50 color: BLUE posn: 50,80
+        ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
+        - color: BLUE
+        - posn: ['50', ',', '80']
+          - x: 50
+          - y: 80
+        - shape: CIRCLE
+        - size: 50
+
+
+        color: GREEN size: 20 shape: TRIANGLE posn: 20,40
+        ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
+        - color: GREEN
+        - posn: ['20', ',', '40']
+          - x: 20
+          - y: 40
+        - shape: TRIANGLE
+        - size: 20
+    """
+    def __init__( self, exprs, savelist = True ):
+        super(Each,self).__init__(exprs, savelist)
+        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
+        self.skipWhitespace = True
+        self.initExprGroups = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.initExprGroups:
+            self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
+            opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
+            opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
+            self.optionals = opt1 + opt2
+            self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
+            self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
+            self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
+            self.required += self.multirequired
+            self.initExprGroups = False
+        tmpLoc = loc
+        tmpReqd = self.required[:]
+        tmpOpt  = self.optionals[:]
+        matchOrder = []
+
+        keepMatching = True
+        while keepMatching:
+            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
+            failed = []
+            for e in tmpExprs:
+                try:
+                    tmpLoc = e.tryParse( instring, tmpLoc )
+                except ParseException:
+                    failed.append(e)
+                else:
+                    matchOrder.append(self.opt1map.get(id(e),e))
+                    if e in tmpReqd:
+                        tmpReqd.remove(e)
+                    elif e in tmpOpt:
+                        tmpOpt.remove(e)
+            if len(failed) == len(tmpExprs):
+                keepMatching = False
+
+        if tmpReqd:
+            missing = ", ".join(_ustr(e) for e in tmpReqd)
+            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
+
+        # add any unmatched Optionals, in case they have default values defined
+        matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
+
+        resultlist = []
+        for e in matchOrder:
+            loc,results = e._parse(instring,loc,doActions)
+            resultlist.append(results)
+
+        finalResults = sum(resultlist, ParseResults([]))
+        return loc, finalResults
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+
+
+class ParseElementEnhance(ParserElement):
+    """
+    Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
+    """
+    def __init__( self, expr, savelist=False ):
+        super(ParseElementEnhance,self).__init__(savelist)
+        if isinstance( expr, basestring ):
+            if issubclass(ParserElement._literalStringClass, Token):
+                expr = ParserElement._literalStringClass(expr)
+            else:
+                expr = ParserElement._literalStringClass(Literal(expr))
+        self.expr = expr
+        self.strRepr = None
+        if expr is not None:
+            self.mayIndexError = expr.mayIndexError
+            self.mayReturnEmpty = expr.mayReturnEmpty
+            self.setWhitespaceChars( expr.whiteChars )
+            self.skipWhitespace = expr.skipWhitespace
+            self.saveAsList = expr.saveAsList
+            self.callPreparse = expr.callPreparse
+            self.ignoreExprs.extend(expr.ignoreExprs)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.expr is not None:
+            return self.expr._parse( instring, loc, doActions, callPreParse=False )
+        else:
+            raise ParseException("",loc,self.errmsg,self)
+
+    def leaveWhitespace( self ):
+        self.skipWhitespace = False
+        self.expr = self.expr.copy()
+        if self.expr is not None:
+            self.expr.leaveWhitespace()
+        return self
+
+    def ignore( self, other ):
+        if isinstance( other, Suppress ):
+            if other not in self.ignoreExprs:
+                super( ParseElementEnhance, self).ignore( other )
+                if self.expr is not None:
+                    self.expr.ignore( self.ignoreExprs[-1] )
+        else:
+            super( ParseElementEnhance, self).ignore( other )
+            if self.expr is not None:
+                self.expr.ignore( self.ignoreExprs[-1] )
+        return self
+
+    def streamline( self ):
+        super(ParseElementEnhance,self).streamline()
+        if self.expr is not None:
+            self.expr.streamline()
+        return self
+
+    def checkRecursion( self, parseElementList ):
+        if self in parseElementList:
+            raise RecursiveGrammarException( parseElementList+[self] )
+        subRecCheckList = parseElementList[:] + [ self ]
+        if self.expr is not None:
+            self.expr.checkRecursion( subRecCheckList )
+
+    def validate( self, validateTrace=[] ):
+        tmp = validateTrace[:]+[self]
+        if self.expr is not None:
+            self.expr.validate(tmp)
+        self.checkRecursion( [] )
+
+    def __str__( self ):
+        try:
+            return super(ParseElementEnhance,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None and self.expr is not None:
+            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
+        return self.strRepr
+
+
+class FollowedBy(ParseElementEnhance):
+    """
+    Lookahead matching of the given parse expression.  C{FollowedBy}
+    does I{not} advance the parsing position within the input string, it only
+    verifies that the specified parse expression matches at the current
+    position.  C{FollowedBy} always returns a null token list.
+
+    Example::
+        # use FollowedBy to match a label only if it is followed by a ':'
+        data_word = Word(alphas)
+        label = data_word + FollowedBy(':')
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        
+        OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
+    prints::
+        [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
+    """
+    def __init__( self, expr ):
+        super(FollowedBy,self).__init__(expr)
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        self.expr.tryParse( instring, loc )
+        return loc, []
+
+
+class NotAny(ParseElementEnhance):
+    """
+    Lookahead to disallow matching with the given parse expression.  C{NotAny}
+    does I{not} advance the parsing position within the input string, it only
+    verifies that the specified parse expression does I{not} match at the current
+    position.  Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
+    always returns a null token list.  May be constructed using the '~' operator.
+
+    Example::
+        
+    """
+    def __init__( self, expr ):
+        super(NotAny,self).__init__(expr)
+        #~ self.leaveWhitespace()
+        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
+        self.mayReturnEmpty = True
+        self.errmsg = "Found unwanted token, "+_ustr(self.expr)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.expr.canParseNext(instring, loc):
+            raise ParseException(instring, loc, self.errmsg, self)
+        return loc, []
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "~{" + _ustr(self.expr) + "}"
+
+        return self.strRepr
+
+class _MultipleMatch(ParseElementEnhance):
+    def __init__( self, expr, stopOn=None):
+        super(_MultipleMatch, self).__init__(expr)
+        self.saveAsList = True
+        ender = stopOn
+        if isinstance(ender, basestring):
+            ender = ParserElement._literalStringClass(ender)
+        self.not_ender = ~ender if ender is not None else None
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        self_expr_parse = self.expr._parse
+        self_skip_ignorables = self._skipIgnorables
+        check_ender = self.not_ender is not None
+        if check_ender:
+            try_not_ender = self.not_ender.tryParse
+        
+        # must be at least one (but first see if we are the stopOn sentinel;
+        # if so, fail)
+        if check_ender:
+            try_not_ender(instring, loc)
+        loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
+        try:
+            hasIgnoreExprs = (not not self.ignoreExprs)
+            while 1:
+                if check_ender:
+                    try_not_ender(instring, loc)
+                if hasIgnoreExprs:
+                    preloc = self_skip_ignorables( instring, loc )
+                else:
+                    preloc = loc
+                loc, tmptokens = self_expr_parse( instring, preloc, doActions )
+                if tmptokens or tmptokens.haskeys():
+                    tokens += tmptokens
+        except (ParseException,IndexError):
+            pass
+
+        return loc, tokens
+        
+class OneOrMore(_MultipleMatch):
+    """
+    Repetition of one or more of the given expression.
+    
+    Parameters:
+     - expr - expression that must match one or more times
+     - stopOn - (default=C{None}) - expression for a terminating sentinel
+          (only required if the sentinel would ordinarily match the repetition 
+          expression)          
+
+    Example::
+        data_word = Word(alphas)
+        label = data_word + FollowedBy(':')
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
+
+        text = "shape: SQUARE posn: upper left color: BLACK"
+        OneOrMore(attr_expr).parseString(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
+
+        # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
+        
+        # could also be written as
+        (attr_expr * (1,)).parseString(text).pprint()
+    """
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + _ustr(self.expr) + "}..."
+
+        return self.strRepr
+
+class ZeroOrMore(_MultipleMatch):
+    """
+    Optional repetition of zero or more of the given expression.
+    
+    Parameters:
+     - expr - expression that must match zero or more times
+     - stopOn - (default=C{None}) - expression for a terminating sentinel
+          (only required if the sentinel would ordinarily match the repetition 
+          expression)          
+
+    Example: similar to L{OneOrMore}
+    """
+    def __init__( self, expr, stopOn=None):
+        super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
+        self.mayReturnEmpty = True
+        
+    def parseImpl( self, instring, loc, doActions=True ):
+        try:
+            return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
+        except (ParseException,IndexError):
+            return loc, []
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "[" + _ustr(self.expr) + "]..."
+
+        return self.strRepr
+
+class _NullToken(object):
+    def __bool__(self):
+        return False
+    __nonzero__ = __bool__
+    def __str__(self):
+        return ""
+
+_optionalNotMatched = _NullToken()
+class Optional(ParseElementEnhance):
+    """
+    Optional matching of the given expression.
+
+    Parameters:
+     - expr - expression that must match zero or more times
+     - default (optional) - value to be returned if the optional expression is not found.
+
+    Example::
+        # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
+        zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
+        zip.runTests('''
+            # traditional ZIP code
+            12345
+            
+            # ZIP+4 form
+            12101-0001
+            
+            # invalid ZIP
+            98765-
+            ''')
+    prints::
+        # traditional ZIP code
+        12345
+        ['12345']
+
+        # ZIP+4 form
+        12101-0001
+        ['12101-0001']
+
+        # invalid ZIP
+        98765-
+             ^
+        FAIL: Expected end of text (at char 5), (line:1, col:6)
+    """
+    def __init__( self, expr, default=_optionalNotMatched ):
+        super(Optional,self).__init__( expr, savelist=False )
+        self.saveAsList = self.expr.saveAsList
+        self.defaultValue = default
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        try:
+            loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+        except (ParseException,IndexError):
+            if self.defaultValue is not _optionalNotMatched:
+                if self.expr.resultsName:
+                    tokens = ParseResults([ self.defaultValue ])
+                    tokens[self.expr.resultsName] = self.defaultValue
+                else:
+                    tokens = [ self.defaultValue ]
+            else:
+                tokens = []
+        return loc, tokens
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "[" + _ustr(self.expr) + "]"
+
+        return self.strRepr
+
+class SkipTo(ParseElementEnhance):
+    """
+    Token for skipping over all undefined text until the matched expression is found.
+
+    Parameters:
+     - expr - target expression marking the end of the data to be skipped
+     - include - (default=C{False}) if True, the target expression is also parsed 
+          (the skipped text and target expression are returned as a 2-element list).
+     - ignore - (default=C{None}) used to define grammars (typically quoted strings and 
+          comments) that might contain false matches to the target expression
+     - failOn - (default=C{None}) define expressions that are not allowed to be 
+          included in the skipped test; if found before the target expression is found, 
+          the SkipTo is not a match
+
+    Example::
+        report = '''
+            Outstanding Issues Report - 1 Jan 2000
+
+               # | Severity | Description                               |  Days Open
+            -----+----------+-------------------------------------------+-----------
+             101 | Critical | Intermittent system crash                 |          6
+              94 | Cosmetic | Spelling error on Login ('log|n')         |         14
+              79 | Minor    | System slow when running too many reports |         47
+            '''
+        integer = Word(nums)
+        SEP = Suppress('|')
+        # use SkipTo to simply match everything up until the next SEP
+        # - ignore quoted strings, so that a '|' character inside a quoted string does not match
+        # - parse action will call token.strip() for each matched token, i.e., the description body
+        string_data = SkipTo(SEP, ignore=quotedString)
+        string_data.setParseAction(tokenMap(str.strip))
+        ticket_expr = (integer("issue_num") + SEP 
+                      + string_data("sev") + SEP 
+                      + string_data("desc") + SEP 
+                      + integer("days_open"))
+        
+        for tkt in ticket_expr.searchString(report):
+            print tkt.dump()
+    prints::
+        ['101', 'Critical', 'Intermittent system crash', '6']
+        - days_open: 6
+        - desc: Intermittent system crash
+        - issue_num: 101
+        - sev: Critical
+        ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
+        - days_open: 14
+        - desc: Spelling error on Login ('log|n')
+        - issue_num: 94
+        - sev: Cosmetic
+        ['79', 'Minor', 'System slow when running too many reports', '47']
+        - days_open: 47
+        - desc: System slow when running too many reports
+        - issue_num: 79
+        - sev: Minor
+    """
+    def __init__( self, other, include=False, ignore=None, failOn=None ):
+        super( SkipTo, self ).__init__( other )
+        self.ignoreExpr = ignore
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+        self.includeMatch = include
+        self.asList = False
+        if isinstance(failOn, basestring):
+            self.failOn = ParserElement._literalStringClass(failOn)
+        else:
+            self.failOn = failOn
+        self.errmsg = "No match found for "+_ustr(self.expr)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        startloc = loc
+        instrlen = len(instring)
+        expr = self.expr
+        expr_parse = self.expr._parse
+        self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
+        self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
+        
+        tmploc = loc
+        while tmploc <= instrlen:
+            if self_failOn_canParseNext is not None:
+                # break if failOn expression matches
+                if self_failOn_canParseNext(instring, tmploc):
+                    break
+                    
+            if self_ignoreExpr_tryParse is not None:
+                # advance past ignore expressions
+                while 1:
+                    try:
+                        tmploc = self_ignoreExpr_tryParse(instring, tmploc)
+                    except ParseBaseException:
+                        break
+            
+            try:
+                expr_parse(instring, tmploc, doActions=False, callPreParse=False)
+            except (ParseException, IndexError):
+                # no match, advance loc in string
+                tmploc += 1
+            else:
+                # matched skipto expr, done
+                break
+
+        else:
+            # ran off the end of the input string without matching skipto expr, fail
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        # build up return values
+        loc = tmploc
+        skiptext = instring[startloc:loc]
+        skipresult = ParseResults(skiptext)
+        
+        if self.includeMatch:
+            loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
+            skipresult += mat
+
+        return loc, skipresult
+
+class Forward(ParseElementEnhance):
+    """
+    Forward declaration of an expression to be defined later -
+    used for recursive grammars, such as algebraic infix notation.
+    When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
+
+    Note: take care when assigning to C{Forward} not to overlook precedence of operators.
+    Specifically, '|' has a lower precedence than '<<', so that::
+        fwdExpr << a | b | c
+    will actually be evaluated as::
+        (fwdExpr << a) | b | c
+    thereby leaving b and c out as parseable alternatives.  It is recommended that you
+    explicitly group the values inserted into the C{Forward}::
+        fwdExpr << (a | b | c)
+    Converting to use the '<<=' operator instead will avoid this problem.
+
+    See L{ParseResults.pprint} for an example of a recursive parser created using
+    C{Forward}.
+    """
+    def __init__( self, other=None ):
+        super(Forward,self).__init__( other, savelist=False )
+
+    def __lshift__( self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass(other)
+        self.expr = other
+        self.strRepr = None
+        self.mayIndexError = self.expr.mayIndexError
+        self.mayReturnEmpty = self.expr.mayReturnEmpty
+        self.setWhitespaceChars( self.expr.whiteChars )
+        self.skipWhitespace = self.expr.skipWhitespace
+        self.saveAsList = self.expr.saveAsList
+        self.ignoreExprs.extend(self.expr.ignoreExprs)
+        return self
+        
+    def __ilshift__(self, other):
+        return self << other
+    
+    def leaveWhitespace( self ):
+        self.skipWhitespace = False
+        return self
+
+    def streamline( self ):
+        if not self.streamlined:
+            self.streamlined = True
+            if self.expr is not None:
+                self.expr.streamline()
+        return self
+
+    def validate( self, validateTrace=[] ):
+        if self not in validateTrace:
+            tmp = validateTrace[:]+[self]
+            if self.expr is not None:
+                self.expr.validate(tmp)
+        self.checkRecursion([])
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+        return self.__class__.__name__ + ": ..."
+
+        # stubbed out for now - creates awful memory and perf issues
+        self._revertClass = self.__class__
+        self.__class__ = _ForwardNoRecurse
+        try:
+            if self.expr is not None:
+                retString = _ustr(self.expr)
+            else:
+                retString = "None"
+        finally:
+            self.__class__ = self._revertClass
+        return self.__class__.__name__ + ": " + retString
+
+    def copy(self):
+        if self.expr is not None:
+            return super(Forward,self).copy()
+        else:
+            ret = Forward()
+            ret <<= self
+            return ret
+
+class _ForwardNoRecurse(Forward):
+    def __str__( self ):
+        return "..."
+
+class TokenConverter(ParseElementEnhance):
+    """
+    Abstract subclass of C{ParseExpression}, for converting parsed results.
+    """
+    def __init__( self, expr, savelist=False ):
+        super(TokenConverter,self).__init__( expr )#, savelist )
+        self.saveAsList = False
+
+class Combine(TokenConverter):
+    """
+    Converter to concatenate all matching tokens to a single string.
+    By default, the matching patterns must also be contiguous in the input string;
+    this can be disabled by specifying C{'adjacent=False'} in the constructor.
+
+    Example::
+        real = Word(nums) + '.' + Word(nums)
+        print(real.parseString('3.1416')) # -> ['3', '.', '1416']
+        # will also erroneously match the following
+        print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
+
+        real = Combine(Word(nums) + '.' + Word(nums))
+        print(real.parseString('3.1416')) # -> ['3.1416']
+        # no match when there are internal spaces
+        print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
+    """
+    def __init__( self, expr, joinString="", adjacent=True ):
+        super(Combine,self).__init__( expr )
+        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
+        if adjacent:
+            self.leaveWhitespace()
+        self.adjacent = adjacent
+        self.skipWhitespace = True
+        self.joinString = joinString
+        self.callPreparse = True
+
+    def ignore( self, other ):
+        if self.adjacent:
+            ParserElement.ignore(self, other)
+        else:
+            super( Combine, self).ignore( other )
+        return self
+
+    def postParse( self, instring, loc, tokenlist ):
+        retToks = tokenlist.copy()
+        del retToks[:]
+        retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
+
+        if self.resultsName and retToks.haskeys():
+            return [ retToks ]
+        else:
+            return retToks
+
+class Group(TokenConverter):
+    """
+    Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
+
+    Example::
+        ident = Word(alphas)
+        num = Word(nums)
+        term = ident | num
+        func = ident + Optional(delimitedList(term))
+        print(func.parseString("fn a,b,100"))  # -> ['fn', 'a', 'b', '100']
+
+        func = ident + Group(Optional(delimitedList(term)))
+        print(func.parseString("fn a,b,100"))  # -> ['fn', ['a', 'b', '100']]
+    """
+    def __init__( self, expr ):
+        super(Group,self).__init__( expr )
+        self.saveAsList = True
+
+    def postParse( self, instring, loc, tokenlist ):
+        return [ tokenlist ]
+
+class Dict(TokenConverter):
+    """
+    Converter to return a repetitive expression as a list, but also as a dictionary.
+    Each element can also be referenced using the first token in the expression as its key.
+    Useful for tabular report scraping when the first column can be used as a item key.
+
+    Example::
+        data_word = Word(alphas)
+        label = data_word + FollowedBy(':')
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
+
+        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
+        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        
+        # print attributes as plain groups
+        print(OneOrMore(attr_expr).parseString(text).dump())
+        
+        # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
+        result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
+        print(result.dump())
+        
+        # access named fields as dict entries, or output as dict
+        print(result['shape'])        
+        print(result.asDict())
+    prints::
+        ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
+
+        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
+        - color: light blue
+        - posn: upper left
+        - shape: SQUARE
+        - texture: burlap
+        SQUARE
+        {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
+    See more examples at L{ParseResults} of accessing fields by results name.
+    """
+    def __init__( self, expr ):
+        super(Dict,self).__init__( expr )
+        self.saveAsList = True
+
+    def postParse( self, instring, loc, tokenlist ):
+        for i,tok in enumerate(tokenlist):
+            if len(tok) == 0:
+                continue
+            ikey = tok[0]
+            if isinstance(ikey,int):
+                ikey = _ustr(tok[0]).strip()
+            if len(tok)==1:
+                tokenlist[ikey] = _ParseResultsWithOffset("",i)
+            elif len(tok)==2 and not isinstance(tok[1],ParseResults):
+                tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
+            else:
+                dictvalue = tok.copy() #ParseResults(i)
+                del dictvalue[0]
+                if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
+                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
+                else:
+                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
+
+        if self.resultsName:
+            return [ tokenlist ]
+        else:
+            return tokenlist
+
+
+class Suppress(TokenConverter):
+    """
+    Converter for ignoring the results of a parsed expression.
+
+    Example::
+        source = "a, b, c,d"
+        wd = Word(alphas)
+        wd_list1 = wd + ZeroOrMore(',' + wd)
+        print(wd_list1.parseString(source))
+
+        # often, delimiters that are useful during parsing are just in the
+        # way afterward - use Suppress to keep them out of the parsed output
+        wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
+        print(wd_list2.parseString(source))
+    prints::
+        ['a', ',', 'b', ',', 'c', ',', 'd']
+        ['a', 'b', 'c', 'd']
+    (See also L{delimitedList}.)
+    """
+    def postParse( self, instring, loc, tokenlist ):
+        return []
+
+    def suppress( self ):
+        return self
+
+
+class OnlyOnce(object):
+    """
+    Wrapper for parse actions, to ensure they are only called once.
+    """
+    def __init__(self, methodCall):
+        self.callable = _trim_arity(methodCall)
+        self.called = False
+    def __call__(self,s,l,t):
+        if not self.called:
+            results = self.callable(s,l,t)
+            self.called = True
+            return results
+        raise ParseException(s,l,"")
+    def reset(self):
+        self.called = False
+
+def traceParseAction(f):
+    """
+    Decorator for debugging parse actions. 
+    
+    When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
+    When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
+
+    Example::
+        wd = Word(alphas)
+
+        @traceParseAction
+        def remove_duplicate_chars(tokens):
+            return ''.join(sorted(set(''.join(tokens))))
+
+        wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
+        print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
+    prints::
+        >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
+        <3:
+            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
+        sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
+        try:
+            ret = f(*paArgs)
+        except Exception as exc:
+            sys.stderr.write( "< ['aa', 'bb', 'cc']
+        delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
+    """
+    dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
+    if combine:
+        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
+    else:
+        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
+
+def countedArray( expr, intExpr=None ):
+    """
+    Helper to define a counted list of expressions.
+    This helper defines a pattern of the form::
+        integer expr expr expr...
+    where the leading integer tells how many expr expressions follow.
+    The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+    
+    If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
+
+    Example::
+        countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd']
+
+        # in this parser, the leading integer value is given in binary,
+        # '10' indicating that 2 values are in the array
+        binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
+        countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd']
+    """
+    arrayExpr = Forward()
+    def countFieldParseAction(s,l,t):
+        n = t[0]
+        arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
+        return []
+    if intExpr is None:
+        intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
+    else:
+        intExpr = intExpr.copy()
+    intExpr.setName("arrayLen")
+    intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
+    return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
+
+def _flatten(L):
+    ret = []
+    for i in L:
+        if isinstance(i,list):
+            ret.extend(_flatten(i))
+        else:
+            ret.append(i)
+    return ret
+
+def matchPreviousLiteral(expr):
+    """
+    Helper to define an expression that is indirectly defined from
+    the tokens matched in a previous expression, that is, it looks
+    for a 'repeat' of a previous expression.  For example::
+        first = Word(nums)
+        second = matchPreviousLiteral(first)
+        matchExpr = first + ":" + second
+    will match C{"1:1"}, but not C{"1:2"}.  Because this matches a
+    previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
+    If this is not desired, use C{matchPreviousExpr}.
+    Do I{not} use with packrat parsing enabled.
+    """
+    rep = Forward()
+    def copyTokenToRepeater(s,l,t):
+        if t:
+            if len(t) == 1:
+                rep << t[0]
+            else:
+                # flatten t tokens
+                tflat = _flatten(t.asList())
+                rep << And(Literal(tt) for tt in tflat)
+        else:
+            rep << Empty()
+    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+    rep.setName('(prev) ' + _ustr(expr))
+    return rep
+
+def matchPreviousExpr(expr):
+    """
+    Helper to define an expression that is indirectly defined from
+    the tokens matched in a previous expression, that is, it looks
+    for a 'repeat' of a previous expression.  For example::
+        first = Word(nums)
+        second = matchPreviousExpr(first)
+        matchExpr = first + ":" + second
+    will match C{"1:1"}, but not C{"1:2"}.  Because this matches by
+    expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
+    the expressions are evaluated first, and then compared, so
+    C{"1"} is compared with C{"10"}.
+    Do I{not} use with packrat parsing enabled.
+    """
+    rep = Forward()
+    e2 = expr.copy()
+    rep <<= e2
+    def copyTokenToRepeater(s,l,t):
+        matchTokens = _flatten(t.asList())
+        def mustMatchTheseTokens(s,l,t):
+            theseTokens = _flatten(t.asList())
+            if  theseTokens != matchTokens:
+                raise ParseException("",0,"")
+        rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
+    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+    rep.setName('(prev) ' + _ustr(expr))
+    return rep
+
+def _escapeRegexRangeChars(s):
+    #~  escape these chars: ^-]
+    for c in r"\^-]":
+        s = s.replace(c,_bslash+c)
+    s = s.replace("\n",r"\n")
+    s = s.replace("\t",r"\t")
+    return _ustr(s)
+
+def oneOf( strs, caseless=False, useRegex=True ):
+    """
+    Helper to quickly define a set of alternative Literals, and makes sure to do
+    longest-first testing when there is a conflict, regardless of the input order,
+    but returns a C{L{MatchFirst}} for best performance.
+
+    Parameters:
+     - strs - a string of space-delimited literals, or a collection of string literals
+     - caseless - (default=C{False}) - treat all literals as caseless
+     - useRegex - (default=C{True}) - as an optimization, will generate a Regex
+          object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
+          if creating a C{Regex} raises an exception)
+
+    Example::
+        comp_oper = oneOf("< = > <= >= !=")
+        var = Word(alphas)
+        number = Word(nums)
+        term = var | number
+        comparison_expr = term + comp_oper + term
+        print(comparison_expr.searchString("B = 12  AA=23 B<=AA AA>12"))
+    prints::
+        [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
+    """
+    if caseless:
+        isequal = ( lambda a,b: a.upper() == b.upper() )
+        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
+        parseElementClass = CaselessLiteral
+    else:
+        isequal = ( lambda a,b: a == b )
+        masks = ( lambda a,b: b.startswith(a) )
+        parseElementClass = Literal
+
+    symbols = []
+    if isinstance(strs,basestring):
+        symbols = strs.split()
+    elif isinstance(strs, Iterable):
+        symbols = list(strs)
+    else:
+        warnings.warn("Invalid argument to oneOf, expected string or iterable",
+                SyntaxWarning, stacklevel=2)
+    if not symbols:
+        return NoMatch()
+
+    i = 0
+    while i < len(symbols)-1:
+        cur = symbols[i]
+        for j,other in enumerate(symbols[i+1:]):
+            if ( isequal(other, cur) ):
+                del symbols[i+j+1]
+                break
+            elif ( masks(cur, other) ):
+                del symbols[i+j+1]
+                symbols.insert(i,other)
+                cur = other
+                break
+        else:
+            i += 1
+
+    if not caseless and useRegex:
+        #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
+        try:
+            if len(symbols)==len("".join(symbols)):
+                return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
+            else:
+                return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
+        except Exception:
+            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
+                    SyntaxWarning, stacklevel=2)
+
+
+    # last resort, just use MatchFirst
+    return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
+
+def dictOf( key, value ):
+    """
+    Helper to easily and clearly define a dictionary by specifying the respective patterns
+    for the key and value.  Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
+    in the proper order.  The key pattern can include delimiting markers or punctuation,
+    as long as they are suppressed, thereby leaving the significant key text.  The value
+    pattern can include named results, so that the C{Dict} results can include named token
+    fields.
+
+    Example::
+        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
+        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        print(OneOrMore(attr_expr).parseString(text).dump())
+        
+        attr_label = label
+        attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
+
+        # similar to Dict, but simpler call format
+        result = dictOf(attr_label, attr_value).parseString(text)
+        print(result.dump())
+        print(result['shape'])
+        print(result.shape)  # object attribute access works too
+        print(result.asDict())
+    prints::
+        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
+        - color: light blue
+        - posn: upper left
+        - shape: SQUARE
+        - texture: burlap
+        SQUARE
+        SQUARE
+        {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
+    """
+    return Dict( ZeroOrMore( Group ( key + value ) ) )
+
+def originalTextFor(expr, asString=True):
+    """
+    Helper to return the original, untokenized text for a given expression.  Useful to
+    restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+    revert separate tokens with intervening whitespace back to the original matching
+    input text. By default, returns astring containing the original parsed text.  
+       
+    If the optional C{asString} argument is passed as C{False}, then the return value is a 
+    C{L{ParseResults}} containing any results names that were originally matched, and a 
+    single token containing the original matched text from the input string.  So if 
+    the expression passed to C{L{originalTextFor}} contains expressions with defined
+    results names, you must set C{asString} to C{False} if you want to preserve those
+    results name values.
+
+    Example::
+        src = "this is test  bold text  normal text "
+        for tag in ("b","i"):
+            opener,closer = makeHTMLTags(tag)
+            patt = originalTextFor(opener + SkipTo(closer) + closer)
+            print(patt.searchString(src)[0])
+    prints::
+        [' bold text ']
+        ['text']
+    """
+    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+    endlocMarker = locMarker.copy()
+    endlocMarker.callPreparse = False
+    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
+    if asString:
+        extractText = lambda s,l,t: s[t._original_start:t._original_end]
+    else:
+        def extractText(s,l,t):
+            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
+    matchExpr.setParseAction(extractText)
+    matchExpr.ignoreExprs = expr.ignoreExprs
+    return matchExpr
+
+def ungroup(expr): 
+    """
+    Helper to undo pyparsing's default grouping of And expressions, even
+    if all but one are non-empty.
+    """
+    return TokenConverter(expr).setParseAction(lambda t:t[0])
+
+def locatedExpr(expr):
+    """
+    Helper to decorate a returned token with its starting and ending locations in the input string.
+    This helper adds the following results names:
+     - locn_start = location where matched expression begins
+     - locn_end = location where matched expression ends
+     - value = the actual parsed results
+
+    Be careful if the input text contains C{} characters, you may want to call
+    C{L{ParserElement.parseWithTabs}}
+
+    Example::
+        wd = Word(alphas)
+        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
+            print(match)
+    prints::
+        [[0, 'ljsdf', 5]]
+        [[8, 'lksdjjf', 15]]
+        [[18, 'lkkjj', 23]]
+    """
+    locator = Empty().setParseAction(lambda s,l,t: l)
+    return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
+
+
+# convenience constants for positional expressions
+empty       = Empty().setName("empty")
+lineStart   = LineStart().setName("lineStart")
+lineEnd     = LineEnd().setName("lineEnd")
+stringStart = StringStart().setName("stringStart")
+stringEnd   = StringEnd().setName("stringEnd")
+
+_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
+_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
+_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
+_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
+_charRange = Group(_singleChar + Suppress("-") + _singleChar)
+_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
+
+def srange(s):
+    r"""
+    Helper to easily define string ranges for use in Word construction.  Borrows
+    syntax from regexp '[]' string range definitions::
+        srange("[0-9]")   -> "0123456789"
+        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
+        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
+    The input string must be enclosed in []'s, and the returned string is the expanded
+    character set joined into a single string.
+    The values enclosed in the []'s may be:
+     - a single character
+     - an escaped character with a leading backslash (such as C{\-} or C{\]})
+     - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 
+         (C{\0x##} is also supported for backwards compatibility) 
+     - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
+     - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
+     - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
+    """
+    _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
+    try:
+        return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
+    except Exception:
+        return ""
+
+def matchOnlyAtCol(n):
+    """
+    Helper method for defining parse actions that require matching at a specific
+    column in the input text.
+    """
+    def verifyCol(strg,locn,toks):
+        if col(locn,strg) != n:
+            raise ParseException(strg,locn,"matched token not at column %d" % n)
+    return verifyCol
+
+def replaceWith(replStr):
+    """
+    Helper method for common parse actions that simply return a literal value.  Especially
+    useful when used with C{L{transformString}()}.
+
+    Example::
+        num = Word(nums).setParseAction(lambda toks: int(toks[0]))
+        na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
+        term = na | num
+        
+        OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
+    """
+    return lambda s,l,t: [replStr]
+
+def removeQuotes(s,l,t):
+    """
+    Helper parse action for removing quotation marks from parsed quoted strings.
+
+    Example::
+        # by default, quotation marks are included in parsed results
+        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
+
+        # use removeQuotes to strip quotation marks from parsed results
+        quotedString.setParseAction(removeQuotes)
+        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
+    """
+    return t[0][1:-1]
+
+def tokenMap(func, *args):
+    """
+    Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 
+    args are passed, they are forwarded to the given function as additional arguments after
+    the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
+    parsed data to an integer using base 16.
+
+    Example (compare the last to example in L{ParserElement.transformString}::
+        hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
+        hex_ints.runTests('''
+            00 11 22 aa FF 0a 0d 1a
+            ''')
+        
+        upperword = Word(alphas).setParseAction(tokenMap(str.upper))
+        OneOrMore(upperword).runTests('''
+            my kingdom for a horse
+            ''')
+
+        wd = Word(alphas).setParseAction(tokenMap(str.title))
+        OneOrMore(wd).setParseAction(' '.join).runTests('''
+            now is the winter of our discontent made glorious summer by this sun of york
+            ''')
+    prints::
+        00 11 22 aa FF 0a 0d 1a
+        [0, 17, 34, 170, 255, 10, 13, 26]
+
+        my kingdom for a horse
+        ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
+
+        now is the winter of our discontent made glorious summer by this sun of york
+        ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
+    """
+    def pa(s,l,t):
+        return [func(tokn, *args) for tokn in t]
+
+    try:
+        func_name = getattr(func, '__name__', 
+                            getattr(func, '__class__').__name__)
+    except Exception:
+        func_name = str(func)
+    pa.__name__ = func_name
+
+    return pa
+
+upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
+"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
+
+downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
+"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
+    
+def _makeTags(tagStr, xml):
+    """Internal helper to construct opening and closing tag expressions, given a tag name"""
+    if isinstance(tagStr,basestring):
+        resname = tagStr
+        tagStr = Keyword(tagStr, caseless=not xml)
+    else:
+        resname = tagStr.name
+
+    tagAttrName = Word(alphas,alphanums+"_-:")
+    if (xml):
+        tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
+        openTag = Suppress("<") + tagStr("tag") + \
+                Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
+                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+    else:
+        printablesLessRAbrack = "".join(c for c in printables if c not in ">")
+        tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
+        openTag = Suppress("<") + tagStr("tag") + \
+                Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
+                Optional( Suppress("=") + tagAttrValue ) ))) + \
+                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+    closeTag = Combine(_L("")
+
+    openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
+    closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % resname)
+    openTag.tag = resname
+    closeTag.tag = resname
+    return openTag, closeTag
+
+def makeHTMLTags(tagStr):
+    """
+    Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
+    tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
+
+    Example::
+        text = 'More info at the pyparsing wiki page'
+        # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
+        a,a_end = makeHTMLTags("A")
+        link_expr = a + SkipTo(a_end)("link_text") + a_end
+        
+        for link in link_expr.searchString(text):
+            # attributes in the  tag (like "href" shown here) are also accessible as named results
+            print(link.link_text, '->', link.href)
+    prints::
+        pyparsing -> http://pyparsing.wikispaces.com
+    """
+    return _makeTags( tagStr, False )
+
+def makeXMLTags(tagStr):
+    """
+    Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
+    tags only in the given upper/lower case.
+
+    Example: similar to L{makeHTMLTags}
+    """
+    return _makeTags( tagStr, True )
+
+def withAttribute(*args,**attrDict):
+    """
+    Helper to create a validating parse action to be used with start tags created
+    with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
+    with a required attribute value, to avoid false matches on common tags such as
+    C{} or C{
}. + + Call C{withAttribute} with a series of attribute names and values. Specify the list + of filter attributes names and values as: + - keyword arguments, as in C{(align="right")}, or + - as an explicit dict with C{**} operator, when an attribute name is also a Python + reserved word, as in C{**{"class":"Customer", "align":"right"}} + - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) + For attribute names with a namespace prefix, you must use the second form. Attribute + names are matched insensitive to upper/lower case. + + If just testing for C{class} (with or without a namespace), use C{L{withClass}}. + + To verify that the attribute exists, but without specifying a value, pass + C{withAttribute.ANY_VALUE} as the value. + + Example:: + html = ''' +
+ Some text +
1 4 0 1 0
+
1,3 2,3 1,1
+
this has no type
+
+ + ''' + div,div_end = makeHTMLTags("div") + + # only match div tag having a type attribute with value "grid" + div_grid = div().setParseAction(withAttribute(type="grid")) + grid_expr = div_grid + SkipTo(div | div_end)("body") + for grid_header in grid_expr.searchString(html): + print(grid_header.body) + + # construct a match with any div tag having a type attribute, regardless of the value + div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) + div_expr = div_any_type + SkipTo(div | div_end)("body") + for div_header in div_expr.searchString(html): + print(div_header.body) + prints:: + 1 4 0 1 0 + + 1 4 0 1 0 + 1,3 2,3 1,1 + """ + if args: + attrs = args[:] + else: + attrs = attrDict.items() + attrs = [(k,v) for k,v in attrs] + def pa(s,l,tokens): + for attrName,attrValue in attrs: + if attrName not in tokens: + raise ParseException(s,l,"no matching attribute " + attrName) + if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: + raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % + (attrName, tokens[attrName], attrValue)) + return pa +withAttribute.ANY_VALUE = object() + +def withClass(classname, namespace=''): + """ + Simplified version of C{L{withAttribute}} when matching on a div class - made + difficult because C{class} is a reserved word in Python. + + Example:: + html = ''' +
+ Some text +
1 4 0 1 0
+
1,3 2,3 1,1
+
this <div> has no class
+
+ + ''' + div,div_end = makeHTMLTags("div") + div_grid = div().setParseAction(withClass("grid")) + + grid_expr = div_grid + SkipTo(div | div_end)("body") + for grid_header in grid_expr.searchString(html): + print(grid_header.body) + + div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) + div_expr = div_any_type + SkipTo(div | div_end)("body") + for div_header in div_expr.searchString(html): + print(div_header.body) + prints:: + 1 4 0 1 0 + + 1 4 0 1 0 + 1,3 2,3 1,1 + """ + classattr = "%s:class" % namespace if namespace else "class" + return withAttribute(**{classattr : classname}) + +opAssoc = _Constants() +opAssoc.LEFT = object() +opAssoc.RIGHT = object() + +def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): + """ + Helper method for constructing grammars of expressions made up of + operators working in a precedence hierarchy. Operators may be unary or + binary, left- or right-associative. Parse actions can also be attached + to operator expressions. The generated parser will also recognize the use + of parentheses to override operator precedences (see example below). + + Note: if you define a deep operator list, you may see performance issues + when using infixNotation. See L{ParserElement.enablePackrat} for a + mechanism to potentially improve your parser performance. + + Parameters: + - baseExpr - expression representing the most basic element for the nested + - opList - list of tuples, one for each operator precedence level in the + expression grammar; each tuple is of the form + (opExpr, numTerms, rightLeftAssoc, parseAction), where: + - opExpr is the pyparsing expression for the operator; + may also be a string, which will be converted to a Literal; + if numTerms is 3, opExpr is a tuple of two expressions, for the + two operators separating the 3 terms + - numTerms is the number of terms for this operator (must + be 1, 2, or 3) + - rightLeftAssoc is the indicator whether the operator is + right or left associative, using the pyparsing-defined + constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. + - parseAction is the parse action to be associated with + expressions matching this operator expression (the + parse action tuple member may be omitted); if the parse action + is passed a tuple or list of functions, this is equivalent to + calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) + - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) + - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) + + Example:: + # simple example of four-function arithmetic with ints and variable names + integer = pyparsing_common.signed_integer + varname = pyparsing_common.identifier + + arith_expr = infixNotation(integer | varname, + [ + ('-', 1, opAssoc.RIGHT), + (oneOf('* /'), 2, opAssoc.LEFT), + (oneOf('+ -'), 2, opAssoc.LEFT), + ]) + + arith_expr.runTests(''' + 5+3*6 + (5+3)*6 + -2--11 + ''', fullDump=False) + prints:: + 5+3*6 + [[5, '+', [3, '*', 6]]] + + (5+3)*6 + [[[5, '+', 3], '*', 6]] + + -2--11 + [[['-', 2], '-', ['-', 11]]] + """ + ret = Forward() + lastExpr = baseExpr | ( lpar + ret + rpar ) + for i,operDef in enumerate(opList): + opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] + termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr + if arity == 3: + if opExpr is None or len(opExpr) != 2: + raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") + opExpr1, opExpr2 = opExpr + thisExpr = Forward().setName(termName) + if rightLeftAssoc == opAssoc.LEFT: + if arity == 1: + matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) + elif arity == 2: + if opExpr is not None: + matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) + else: + matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) + elif arity == 3: + matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ + Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + elif rightLeftAssoc == opAssoc.RIGHT: + if arity == 1: + # try to avoid LR with this extra test + if not isinstance(opExpr, Optional): + opExpr = Optional(opExpr) + matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) + elif arity == 2: + if opExpr is not None: + matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) + else: + matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) + elif arity == 3: + matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ + Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + else: + raise ValueError("operator must indicate right or left associativity") + if pa: + if isinstance(pa, (tuple, list)): + matchExpr.setParseAction(*pa) + else: + matchExpr.setParseAction(pa) + thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) + lastExpr = thisExpr + ret <<= lastExpr + return ret + +operatorPrecedence = infixNotation +"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" + +dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") +sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") +quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| + Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") +unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") + +def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): + """ + Helper method for defining nested lists enclosed in opening and closing + delimiters ("(" and ")" are the default). + + Parameters: + - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression + - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression + - content - expression for items within the nested lists (default=C{None}) + - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) + + If an expression is not provided for the content argument, the nested + expression will capture all whitespace-delimited content between delimiters + as a list of separate values. + + Use the C{ignoreExpr} argument to define expressions that may contain + opening or closing characters that should not be treated as opening + or closing characters for nesting, such as quotedString or a comment + expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. + The default is L{quotedString}, but if no expressions are to be ignored, + then pass C{None} for this argument. + + Example:: + data_type = oneOf("void int short long char float double") + decl_data_type = Combine(data_type + Optional(Word('*'))) + ident = Word(alphas+'_', alphanums+'_') + number = pyparsing_common.number + arg = Group(decl_data_type + ident) + LPAR,RPAR = map(Suppress, "()") + + code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) + + c_function = (decl_data_type("type") + + ident("name") + + LPAR + Optional(delimitedList(arg), [])("args") + RPAR + + code_body("body")) + c_function.ignore(cStyleComment) + + source_code = ''' + int is_odd(int x) { + return (x%2); + } + + int dec_to_hex(char hchar) { + if (hchar >= '0' && hchar <= '9') { + return (ord(hchar)-ord('0')); + } else { + return (10+ord(hchar)-ord('A')); + } + } + ''' + for func in c_function.searchString(source_code): + print("%(name)s (%(type)s) args: %(args)s" % func) + + prints:: + is_odd (int) args: [['int', 'x']] + dec_to_hex (int) args: [['char', 'hchar']] + """ + if opener == closer: + raise ValueError("opening and closing strings cannot be the same") + if content is None: + if isinstance(opener,basestring) and isinstance(closer,basestring): + if len(opener) == 1 and len(closer)==1: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS + ).setParseAction(lambda t:t[0].strip())) + else: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + ~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + raise ValueError("opening and closing arguments must be strings if no content expression is given") + ret = Forward() + if ignoreExpr is not None: + ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) + else: + ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) + ret.setName('nested %s%s expression' % (opener,closer)) + return ret + +def indentedBlock(blockStatementExpr, indentStack, indent=True): + """ + Helper method for defining space-delimited indentation blocks, such as + those used to define block statements in Python source code. + + Parameters: + - blockStatementExpr - expression defining syntax of statement that + is repeated within the indented block + - indentStack - list created by caller to manage indentation stack + (multiple statementWithIndentedBlock expressions within a single grammar + should share a common indentStack) + - indent - boolean indicating whether block must be indented beyond the + the current level; set to False for block of left-most statements + (default=C{True}) + + A valid block must contain at least one C{blockStatement}. + + Example:: + data = ''' + def A(z): + A1 + B = 100 + G = A2 + A2 + A3 + B + def BB(a,b,c): + BB1 + def BBA(): + bba1 + bba2 + bba3 + C + D + def spam(x,y): + def eggs(z): + pass + ''' + + + indentStack = [1] + stmt = Forward() + + identifier = Word(alphas, alphanums) + funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") + func_body = indentedBlock(stmt, indentStack) + funcDef = Group( funcDecl + func_body ) + + rvalue = Forward() + funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") + rvalue << (funcCall | identifier | Word(nums)) + assignment = Group(identifier + "=" + rvalue) + stmt << ( funcDef | assignment | identifier ) + + module_body = OneOrMore(stmt) + + parseTree = module_body.parseString(data) + parseTree.pprint() + prints:: + [['def', + 'A', + ['(', 'z', ')'], + ':', + [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], + 'B', + ['def', + 'BB', + ['(', 'a', 'b', 'c', ')'], + ':', + [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], + 'C', + 'D', + ['def', + 'spam', + ['(', 'x', 'y', ')'], + ':', + [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] + """ + def checkPeerIndent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if curCol != indentStack[-1]: + if curCol > indentStack[-1]: + raise ParseFatalException(s,l,"illegal nesting") + raise ParseException(s,l,"not a peer entry") + + def checkSubIndent(s,l,t): + curCol = col(l,s) + if curCol > indentStack[-1]: + indentStack.append( curCol ) + else: + raise ParseException(s,l,"not a subentry") + + def checkUnindent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): + raise ParseException(s,l,"not an unindent") + indentStack.pop() + + NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) + INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') + PEER = Empty().setParseAction(checkPeerIndent).setName('') + UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') + if indent: + smExpr = Group( Optional(NL) + + #~ FollowedBy(blockStatementExpr) + + INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) + else: + smExpr = Group( Optional(NL) + + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) + blockStatementExpr.ignore(_bslash + LineEnd()) + return smExpr.setName('indented block') + +alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") +punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") + +anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) +_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) +commonHTMLEntity = Regex('&(?P' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") +def replaceHTMLEntity(t): + """Helper parser action to replace common HTML entities with their special characters""" + return _htmlEntityMap.get(t.entity) + +# it's easy to get these comment structures wrong - they're very common, so may as well make them available +cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") +"Comment of the form C{/* ... */}" + +htmlComment = Regex(r"").setName("HTML comment") +"Comment of the form C{}" + +restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") +dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") +"Comment of the form C{// ... (to end of line)}" + +cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") +"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" + +javaStyleComment = cppStyleComment +"Same as C{L{cppStyleComment}}" + +pythonStyleComment = Regex(r"#.*").setName("Python style comment") +"Comment of the form C{# ... (to end of line)}" + +_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + + Optional( Word(" \t") + + ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") +commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") +"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. + This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" + +# some other useful expressions - using lower-case class name since we are really using this as a namespace +class pyparsing_common: + """ + Here are some common low-level expressions that may be useful in jump-starting parser development: + - numeric forms (L{integers}, L{reals}, L{scientific notation}) + - common L{programming identifiers} + - network addresses (L{MAC}, L{IPv4}, L{IPv6}) + - ISO8601 L{dates} and L{datetime} + - L{UUID} + - L{comma-separated list} + Parse actions: + - C{L{convertToInteger}} + - C{L{convertToFloat}} + - C{L{convertToDate}} + - C{L{convertToDatetime}} + - C{L{stripHTMLTags}} + - C{L{upcaseTokens}} + - C{L{downcaseTokens}} + + Example:: + pyparsing_common.number.runTests(''' + # any int or real number, returned as the appropriate type + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + ''') + + pyparsing_common.fnumber.runTests(''' + # any int or real number, returned as float + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + ''') + + pyparsing_common.hex_integer.runTests(''' + # hex numbers + 100 + FF + ''') + + pyparsing_common.fraction.runTests(''' + # fractions + 1/2 + -3/4 + ''') + + pyparsing_common.mixed_integer.runTests(''' + # mixed fractions + 1 + 1/2 + -3/4 + 1-3/4 + ''') + + import uuid + pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) + pyparsing_common.uuid.runTests(''' + # uuid + 12345678-1234-5678-1234-567812345678 + ''') + prints:: + # any int or real number, returned as the appropriate type + 100 + [100] + + -100 + [-100] + + +100 + [100] + + 3.14159 + [3.14159] + + 6.02e23 + [6.02e+23] + + 1e-12 + [1e-12] + + # any int or real number, returned as float + 100 + [100.0] + + -100 + [-100.0] + + +100 + [100.0] + + 3.14159 + [3.14159] + + 6.02e23 + [6.02e+23] + + 1e-12 + [1e-12] + + # hex numbers + 100 + [256] + + FF + [255] + + # fractions + 1/2 + [0.5] + + -3/4 + [-0.75] + + # mixed fractions + 1 + [1] + + 1/2 + [0.5] + + -3/4 + [-0.75] + + 1-3/4 + [1.75] + + # uuid + 12345678-1234-5678-1234-567812345678 + [UUID('12345678-1234-5678-1234-567812345678')] + """ + + convertToInteger = tokenMap(int) + """ + Parse action for converting parsed integers to Python int + """ + + convertToFloat = tokenMap(float) + """ + Parse action for converting parsed numbers to Python float + """ + + integer = Word(nums).setName("integer").setParseAction(convertToInteger) + """expression that parses an unsigned integer, returns an int""" + + hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) + """expression that parses a hexadecimal integer, returns an int""" + + signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) + """expression that parses an integer with optional leading sign, returns an int""" + + fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") + """fractional expression of an integer divided by an integer, returns a float""" + fraction.addParseAction(lambda t: t[0]/t[-1]) + + mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") + """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" + mixed_integer.addParseAction(sum) + + real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) + """expression that parses a floating point number and returns a float""" + + sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) + """expression that parses a floating point number with optional scientific notation and returns a float""" + + # streamlining this expression makes the docs nicer-looking + number = (sci_real | real | signed_integer).streamline() + """any numeric expression, returns the corresponding Python type""" + + fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) + """any int or real number, returned as float""" + + identifier = Word(alphas+'_', alphanums+'_').setName("identifier") + """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" + + ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") + "IPv4 address (C{0.0.0.0 - 255.255.255.255})" + + _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") + _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") + _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") + _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) + _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") + ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") + "IPv6 address (long, short, or mixed form)" + + mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") + "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" + + @staticmethod + def convertToDate(fmt="%Y-%m-%d"): + """ + Helper to create a parse action for converting parsed date string to Python datetime.date + + Params - + - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) + + Example:: + date_expr = pyparsing_common.iso8601_date.copy() + date_expr.setParseAction(pyparsing_common.convertToDate()) + print(date_expr.parseString("1999-12-31")) + prints:: + [datetime.date(1999, 12, 31)] + """ + def cvt_fn(s,l,t): + try: + return datetime.strptime(t[0], fmt).date() + except ValueError as ve: + raise ParseException(s, l, str(ve)) + return cvt_fn + + @staticmethod + def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): + """ + Helper to create a parse action for converting parsed datetime string to Python datetime.datetime + + Params - + - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) + + Example:: + dt_expr = pyparsing_common.iso8601_datetime.copy() + dt_expr.setParseAction(pyparsing_common.convertToDatetime()) + print(dt_expr.parseString("1999-12-31T23:59:59.999")) + prints:: + [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] + """ + def cvt_fn(s,l,t): + try: + return datetime.strptime(t[0], fmt) + except ValueError as ve: + raise ParseException(s, l, str(ve)) + return cvt_fn + + iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") + "ISO8601 date (C{yyyy-mm-dd})" + + iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") + "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" + + uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") + "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" + + _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() + @staticmethod + def stripHTMLTags(s, l, tokens): + """ + Parse action to remove HTML tags from web page HTML source + + Example:: + # strip HTML links from normal text + text = 'More info at the
pyparsing wiki page' + td,td_end = makeHTMLTags("TD") + table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end + + print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' + """ + return pyparsing_common._html_stripper.transformString(tokens[0]) + + _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') + + Optional( White(" \t") ) ) ).streamline().setName("commaItem") + comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") + """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" + + upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) + """Parse action to convert tokens to upper case.""" + + downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) + """Parse action to convert tokens to lower case.""" + + +if __name__ == "__main__": + + selectToken = CaselessLiteral("select") + fromToken = CaselessLiteral("from") + + ident = Word(alphas, alphanums + "_$") + + columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) + columnNameList = Group(delimitedList(columnName)).setName("columns") + columnSpec = ('*' | columnNameList) + + tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) + tableNameList = Group(delimitedList(tableName)).setName("tables") + + simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") + + # demo runTests method, including embedded comments in test string + simpleSQL.runTests(""" + # '*' as column list and dotted table name + select * from SYS.XYZZY + + # caseless match on "SELECT", and casts back to "select" + SELECT * from XYZZY, ABC + + # list of column names, and mixed case SELECT keyword + Select AA,BB,CC from Sys.dual + + # multiple tables + Select A, B, C from Sys.dual, Table2 + + # invalid SELECT keyword - should fail + Xelect A, B, C from Sys.dual + + # incomplete command - should fail + Select + + # invalid column name - should fail + Select ^^^ frox Sys.dual + + """) + + pyparsing_common.number.runTests(""" + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + """) + + # any int or real number, returned as float + pyparsing_common.fnumber.runTests(""" + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + """) + + pyparsing_common.hex_integer.runTests(""" + 100 + FF + """) + + import uuid + pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) + pyparsing_common.uuid.runTests(""" + 12345678-1234-5678-1234-567812345678 + """) diff --git a/lib/pkg_resources/_vendor/vendored.txt b/lib/pkg_resources/_vendor/vendored.txt new file mode 100644 index 00000000..444ed25b --- /dev/null +++ b/lib/pkg_resources/_vendor/vendored.txt @@ -0,0 +1,3 @@ +packaging==21.2 +pyparsing==2.2.1 +appdirs==1.4.3 diff --git a/lib/pkg_resources/api_tests.txt b/lib/pkg_resources/api_tests.txt new file mode 100644 index 00000000..ded18800 --- /dev/null +++ b/lib/pkg_resources/api_tests.txt @@ -0,0 +1,401 @@ +Pluggable Distributions of Python Software +========================================== + +Distributions +------------- + +A "Distribution" is a collection of files that represent a "Release" of a +"Project" as of a particular point in time, denoted by a +"Version":: + + >>> import sys, pkg_resources + >>> from pkg_resources import Distribution + >>> Distribution(project_name="Foo", version="1.2") + Foo 1.2 + +Distributions have a location, which can be a filename, URL, or really anything +else you care to use:: + + >>> dist = Distribution( + ... location="http://example.com/something", + ... project_name="Bar", version="0.9" + ... ) + + >>> dist + Bar 0.9 (http://example.com/something) + + +Distributions have various introspectable attributes:: + + >>> dist.location + 'http://example.com/something' + + >>> dist.project_name + 'Bar' + + >>> dist.version + '0.9' + + >>> dist.py_version == '{}.{}'.format(*sys.version_info) + True + + >>> print(dist.platform) + None + +Including various computed attributes:: + + >>> from pkg_resources import parse_version + >>> dist.parsed_version == parse_version(dist.version) + True + + >>> dist.key # case-insensitive form of the project name + 'bar' + +Distributions are compared (and hashed) by version first:: + + >>> Distribution(version='1.0') == Distribution(version='1.0') + True + >>> Distribution(version='1.0') == Distribution(version='1.1') + False + >>> Distribution(version='1.0') < Distribution(version='1.1') + True + +but also by project name (case-insensitive), platform, Python version, +location, etc.:: + + >>> Distribution(project_name="Foo",version="1.0") == \ + ... Distribution(project_name="Foo",version="1.0") + True + + >>> Distribution(project_name="Foo",version="1.0") == \ + ... Distribution(project_name="foo",version="1.0") + True + + >>> Distribution(project_name="Foo",version="1.0") == \ + ... Distribution(project_name="Foo",version="1.1") + False + + >>> Distribution(project_name="Foo",py_version="2.3",version="1.0") == \ + ... Distribution(project_name="Foo",py_version="2.4",version="1.0") + False + + >>> Distribution(location="spam",version="1.0") == \ + ... Distribution(location="spam",version="1.0") + True + + >>> Distribution(location="spam",version="1.0") == \ + ... Distribution(location="baz",version="1.0") + False + + + +Hash and compare distribution by prio/plat + +Get version from metadata +provider capabilities +egg_name() +as_requirement() +from_location, from_filename (w/path normalization) + +Releases may have zero or more "Requirements", which indicate +what releases of another project the release requires in order to +function. A Requirement names the other project, expresses some criteria +as to what releases of that project are acceptable, and lists any "Extras" +that the requiring release may need from that project. (An Extra is an +optional feature of a Release, that can only be used if its additional +Requirements are satisfied.) + + + +The Working Set +--------------- + +A collection of active distributions is called a Working Set. Note that a +Working Set can contain any importable distribution, not just pluggable ones. +For example, the Python standard library is an importable distribution that +will usually be part of the Working Set, even though it is not pluggable. +Similarly, when you are doing development work on a project, the files you are +editing are also a Distribution. (And, with a little attention to the +directory names used, and including some additional metadata, such a +"development distribution" can be made pluggable as well.) + + >>> from pkg_resources import WorkingSet + +A working set's entries are the sys.path entries that correspond to the active +distributions. By default, the working set's entries are the items on +``sys.path``:: + + >>> ws = WorkingSet() + >>> ws.entries == sys.path + True + +But you can also create an empty working set explicitly, and add distributions +to it:: + + >>> ws = WorkingSet([]) + >>> ws.add(dist) + >>> ws.entries + ['http://example.com/something'] + >>> dist in ws + True + >>> Distribution('foo',version="") in ws + False + +And you can iterate over its distributions:: + + >>> list(ws) + [Bar 0.9 (http://example.com/something)] + +Adding the same distribution more than once is a no-op:: + + >>> ws.add(dist) + >>> list(ws) + [Bar 0.9 (http://example.com/something)] + +For that matter, adding multiple distributions for the same project also does +nothing, because a working set can only hold one active distribution per +project -- the first one added to it:: + + >>> ws.add( + ... Distribution( + ... 'http://example.com/something', project_name="Bar", + ... version="7.2" + ... ) + ... ) + >>> list(ws) + [Bar 0.9 (http://example.com/something)] + +You can append a path entry to a working set using ``add_entry()``:: + + >>> ws.entries + ['http://example.com/something'] + >>> ws.add_entry(pkg_resources.__file__) + >>> ws.entries + ['http://example.com/something', '...pkg_resources...'] + +Multiple additions result in multiple entries, even if the entry is already in +the working set (because ``sys.path`` can contain the same entry more than +once):: + + >>> ws.add_entry(pkg_resources.__file__) + >>> ws.entries + ['...example.com...', '...pkg_resources...', '...pkg_resources...'] + +And you can specify the path entry a distribution was found under, using the +optional second parameter to ``add()``:: + + >>> ws = WorkingSet([]) + >>> ws.add(dist,"foo") + >>> ws.entries + ['foo'] + +But even if a distribution is found under multiple path entries, it still only +shows up once when iterating the working set: + + >>> ws.add_entry(ws.entries[0]) + >>> list(ws) + [Bar 0.9 (http://example.com/something)] + +You can ask a WorkingSet to ``find()`` a distribution matching a requirement:: + + >>> from pkg_resources import Requirement + >>> print(ws.find(Requirement.parse("Foo==1.0"))) # no match, return None + None + + >>> ws.find(Requirement.parse("Bar==0.9")) # match, return distribution + Bar 0.9 (http://example.com/something) + +Note that asking for a conflicting version of a distribution already in a +working set triggers a ``pkg_resources.VersionConflict`` error: + + >>> try: + ... ws.find(Requirement.parse("Bar==1.0")) + ... except pkg_resources.VersionConflict as exc: + ... print(str(exc)) + ... else: + ... raise AssertionError("VersionConflict was not raised") + (Bar 0.9 (http://example.com/something), Requirement.parse('Bar==1.0')) + +You can subscribe a callback function to receive notifications whenever a new +distribution is added to a working set. The callback is immediately invoked +once for each existing distribution in the working set, and then is called +again for new distributions added thereafter:: + + >>> def added(dist): print("Added %s" % dist) + >>> ws.subscribe(added) + Added Bar 0.9 + >>> foo12 = Distribution(project_name="Foo", version="1.2", location="f12") + >>> ws.add(foo12) + Added Foo 1.2 + +Note, however, that only the first distribution added for a given project name +will trigger a callback, even during the initial ``subscribe()`` callback:: + + >>> foo14 = Distribution(project_name="Foo", version="1.4", location="f14") + >>> ws.add(foo14) # no callback, because Foo 1.2 is already active + + >>> ws = WorkingSet([]) + >>> ws.add(foo12) + >>> ws.add(foo14) + >>> ws.subscribe(added) + Added Foo 1.2 + +And adding a callback more than once has no effect, either:: + + >>> ws.subscribe(added) # no callbacks + + # and no double-callbacks on subsequent additions, either + >>> just_a_test = Distribution(project_name="JustATest", version="0.99") + >>> ws.add(just_a_test) + Added JustATest 0.99 + + +Finding Plugins +--------------- + +``WorkingSet`` objects can be used to figure out what plugins in an +``Environment`` can be loaded without any resolution errors:: + + >>> from pkg_resources import Environment + + >>> plugins = Environment([]) # normally, a list of plugin directories + >>> plugins.add(foo12) + >>> plugins.add(foo14) + >>> plugins.add(just_a_test) + +In the simplest case, we just get the newest version of each distribution in +the plugin environment:: + + >>> ws = WorkingSet([]) + >>> ws.find_plugins(plugins) + ([JustATest 0.99, Foo 1.4 (f14)], {}) + +But if there's a problem with a version conflict or missing requirements, the +method falls back to older versions, and the error info dict will contain an +exception instance for each unloadable plugin:: + + >>> ws.add(foo12) # this will conflict with Foo 1.4 + >>> ws.find_plugins(plugins) + ([JustATest 0.99, Foo 1.2 (f12)], {Foo 1.4 (f14): VersionConflict(...)}) + +But if you disallow fallbacks, the failed plugin will be skipped instead of +trying older versions:: + + >>> ws.find_plugins(plugins, fallback=False) + ([JustATest 0.99], {Foo 1.4 (f14): VersionConflict(...)}) + + + +Platform Compatibility Rules +---------------------------- + +On the Mac, there are potential compatibility issues for modules compiled +on newer versions of macOS than what the user is running. Additionally, +macOS will soon have two platforms to contend with: Intel and PowerPC. + +Basic equality works as on other platforms:: + + >>> from pkg_resources import compatible_platforms as cp + >>> reqd = 'macosx-10.4-ppc' + >>> cp(reqd, reqd) + True + >>> cp("win32", reqd) + False + +Distributions made on other machine types are not compatible:: + + >>> cp("macosx-10.4-i386", reqd) + False + +Distributions made on earlier versions of the OS are compatible, as +long as they are from the same top-level version. The patchlevel version +number does not matter:: + + >>> cp("macosx-10.4-ppc", reqd) + True + >>> cp("macosx-10.3-ppc", reqd) + True + >>> cp("macosx-10.5-ppc", reqd) + False + >>> cp("macosx-9.5-ppc", reqd) + False + +Backwards compatibility for packages made via earlier versions of +setuptools is provided as well:: + + >>> cp("darwin-8.2.0-Power_Macintosh", reqd) + True + >>> cp("darwin-7.2.0-Power_Macintosh", reqd) + True + >>> cp("darwin-8.2.0-Power_Macintosh", "macosx-10.3-ppc") + False + + +Environment Markers +------------------- + + >>> from pkg_resources import invalid_marker as im, evaluate_marker as em + >>> import os + + >>> print(im("sys_platform")) + Invalid marker: 'sys_platform', parse error at '' + + >>> print(im("sys_platform==")) + Invalid marker: 'sys_platform==', parse error at '' + + >>> print(im("sys_platform=='win32'")) + False + + >>> print(im("sys=='x'")) + Invalid marker: "sys=='x'", parse error at "sys=='x'" + + >>> print(im("(extra)")) + Invalid marker: '(extra)', parse error at ')' + + >>> print(im("(extra")) + Invalid marker: '(extra', parse error at '' + + >>> print(im("os.open('foo')=='y'")) + Invalid marker: "os.open('foo')=='y'", parse error at 'os.open(' + + >>> print(im("'x'=='y' and os.open('foo')=='y'")) # no short-circuit! + Invalid marker: "'x'=='y' and os.open('foo')=='y'", parse error at 'and os.o' + + >>> print(im("'x'=='x' or os.open('foo')=='y'")) # no short-circuit! + Invalid marker: "'x'=='x' or os.open('foo')=='y'", parse error at 'or os.op' + + >>> print(im("'x' < 'y' < 'z'")) + Invalid marker: "'x' < 'y' < 'z'", parse error at "< 'z'" + + >>> print(im("r'x'=='x'")) + Invalid marker: "r'x'=='x'", parse error at "r'x'=='x" + + >>> print(im("'''x'''=='x'")) + Invalid marker: "'''x'''=='x'", parse error at "'x'''=='" + + >>> print(im('"""x"""=="x"')) + Invalid marker: '"""x"""=="x"', parse error at '"x"""=="' + + >>> print(im(r"x\n=='x'")) + Invalid marker: "x\\n=='x'", parse error at "x\\n=='x'" + + >>> print(im("os.open=='y'")) + Invalid marker: "os.open=='y'", parse error at 'os.open=' + + >>> em("sys_platform=='win32'") == (sys.platform=='win32') + True + + >>> em("python_version >= '2.7'") + True + + >>> em("python_version > '2.6'") + True + + >>> im("implementation_name=='cpython'") + False + + >>> im("platform_python_implementation=='CPython'") + False + + >>> im("implementation_version=='3.5.1'") + False diff --git a/lib/pkg_resources/extern/__init__.py b/lib/pkg_resources/extern/__init__.py new file mode 100644 index 00000000..fed59295 --- /dev/null +++ b/lib/pkg_resources/extern/__init__.py @@ -0,0 +1,73 @@ +import importlib.util +import sys + + +class VendorImporter: + """ + A PEP 302 meta path importer for finding optionally-vendored + or otherwise naturally-installed packages from root_name. + """ + + def __init__(self, root_name, vendored_names=(), vendor_pkg=None): + self.root_name = root_name + self.vendored_names = set(vendored_names) + self.vendor_pkg = vendor_pkg or root_name.replace('extern', '_vendor') + + @property + def search_path(self): + """ + Search first the vendor package then as a natural package. + """ + yield self.vendor_pkg + '.' + yield '' + + def _module_matches_namespace(self, fullname): + """Figure out if the target module is vendored.""" + root, base, target = fullname.partition(self.root_name + '.') + return not root and any(map(target.startswith, self.vendored_names)) + + def load_module(self, fullname): + """ + Iterate over the search path to locate and load fullname. + """ + root, base, target = fullname.partition(self.root_name + '.') + for prefix in self.search_path: + try: + extant = prefix + target + __import__(extant) + mod = sys.modules[extant] + sys.modules[fullname] = mod + return mod + except ImportError: + pass + else: + raise ImportError( + "The '{target}' package is required; " + "normally this is bundled with this package so if you get " + "this warning, consult the packager of your " + "distribution.".format(**locals()) + ) + + def create_module(self, spec): + return self.load_module(spec.name) + + def exec_module(self, module): + pass + + def find_spec(self, fullname, path=None, target=None): + """Return a module spec for vendored names.""" + return ( + importlib.util.spec_from_loader(fullname, self) + if self._module_matches_namespace(fullname) else None + ) + + def install(self): + """ + Install this importer into sys.meta_path if not already present. + """ + if self not in sys.meta_path: + sys.meta_path.append(self) + + +names = 'packaging', 'pyparsing', 'appdirs' +VendorImporter(__name__, names).install() diff --git a/lib/pkg_resources/tests/__init__.py b/lib/pkg_resources/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/tests/data/my-test-package-source/setup.cfg b/lib/pkg_resources/tests/data/my-test-package-source/setup.cfg new file mode 100644 index 00000000..e69de29b diff --git a/lib/pkg_resources/tests/data/my-test-package-source/setup.py b/lib/pkg_resources/tests/data/my-test-package-source/setup.py new file mode 100644 index 00000000..fe80d28f --- /dev/null +++ b/lib/pkg_resources/tests/data/my-test-package-source/setup.py @@ -0,0 +1,6 @@ +import setuptools +setuptools.setup( + name="my-test-package", + version="1.0", + zip_safe=True, +) diff --git a/lib/pkg_resources/tests/data/my-test-package-zip/my-test-package.zip b/lib/pkg_resources/tests/data/my-test-package-zip/my-test-package.zip new file mode 100644 index 0000000000000000000000000000000000000000..81f9a0170f7aca13a0d1d8e5201c2c186554ce7e GIT binary patch literal 1809 zcmWIWW@h1H0D)azeSu&Gl#pbQVaTnFFG(#fi7!Y@&Q45E)k{rJ*UijJ%hwML;bdUe zpMO0bgi9;985mh!GBYrMi2%5fTY*MSX1i|02s9Fe)$kcvl3x&?lUkOVqgPT<0ybX_ zXf_C=na_pW0$qF-q@)(4=B1?OC0EAhWaecTQ(%uGK6|P%3v`PU(^3igK?E2i91Ng9 z>Qt-#nFi!t0AdwCnp#)4OE7jTz7~^SAlTG(bAA8AyDPR;f4!dkX8HNea$EmA zwK}WtMDnKS#3`EQP;!goS%>XQ>Q?o zm|3%DM}%Z96?pah?Q{8e$DdA|KbtFL*1Rr(A|2h6C-j}3zh3rCE$x|tORdr5;LlwX zx9nUZrY@c;Hl^Wh6DQjfm1#?!ia%9yVL&9U4{oU0ffE=l#i=Ew1$xP8>0qa`%*2{J z5upMy;`CJ|oe*G%WB@S-+=zloxC!NRF%tOcGrA}AwE{K#bTxG~PM$xn`OL}S>j58L zJywMipb!W>g&G2(ml#h^T`DG;{_OcpEy-7s4y~(|Jj5D;Ma>j94s5Nm*O1WXqEz}f>TdjX9DWiPC$3=%$=nG4y~L2%*?dE<9UVG zTUYDcne&^246YbI_~d=YcWcmzwO4dKb@eXldib34^YMS`6etukYxeAjkj$k5ub#hs zF8}WM(~0wEbA`;B*CkM-qkHm%zSHy9%buyFJyUS0HJTj!xohH&~mV7-!xlG;;_T+IdotQYo{ zdscNMxpHZVUROLaU%=<_fu5xwzcf|f-Fr0ogxjs30UtHBu5OoL>{fg&CcQwgsq5zY z{)cy0Y^(lyJ^9V@^PAE-|c literal 0 HcmV?d00001 diff --git a/lib/pkg_resources/tests/test_find_distributions.py b/lib/pkg_resources/tests/test_find_distributions.py new file mode 100644 index 00000000..b01b4827 --- /dev/null +++ b/lib/pkg_resources/tests/test_find_distributions.py @@ -0,0 +1,43 @@ +import py +import pytest +import pkg_resources + + +TESTS_DATA_DIR = py.path.local(__file__).dirpath('data') + + +class TestFindDistributions: + + @pytest.fixture + def target_dir(self, tmpdir): + target_dir = tmpdir.mkdir('target') + # place a .egg named directory in the target that is not an egg: + target_dir.mkdir('not.an.egg') + return target_dir + + def test_non_egg_dir_named_egg(self, target_dir): + dists = pkg_resources.find_distributions(str(target_dir)) + assert not list(dists) + + def test_standalone_egg_directory(self, target_dir): + (TESTS_DATA_DIR / 'my-test-package_unpacked-egg').copy(target_dir) + dists = pkg_resources.find_distributions(str(target_dir)) + assert [dist.project_name for dist in dists] == ['my-test-package'] + dists = pkg_resources.find_distributions(str(target_dir), only=True) + assert not list(dists) + + def test_zipped_egg(self, target_dir): + (TESTS_DATA_DIR / 'my-test-package_zipped-egg').copy(target_dir) + dists = pkg_resources.find_distributions(str(target_dir)) + assert [dist.project_name for dist in dists] == ['my-test-package'] + dists = pkg_resources.find_distributions(str(target_dir), only=True) + assert not list(dists) + + def test_zipped_sdist_one_level_removed(self, target_dir): + (TESTS_DATA_DIR / 'my-test-package-zip').copy(target_dir) + dists = pkg_resources.find_distributions( + str(target_dir / "my-test-package.zip")) + assert [dist.project_name for dist in dists] == ['my-test-package'] + dists = pkg_resources.find_distributions( + str(target_dir / "my-test-package.zip"), only=True) + assert not list(dists) diff --git a/lib/pkg_resources/tests/test_markers.py b/lib/pkg_resources/tests/test_markers.py new file mode 100644 index 00000000..15a3b499 --- /dev/null +++ b/lib/pkg_resources/tests/test_markers.py @@ -0,0 +1,8 @@ +import mock + +from pkg_resources import evaluate_marker + + +@mock.patch('platform.python_version', return_value='2.7.10') +def test_ordering(python_version_mock): + assert evaluate_marker("python_full_version > '2.7.3'") is True diff --git a/lib/pkg_resources/tests/test_pkg_resources.py b/lib/pkg_resources/tests/test_pkg_resources.py new file mode 100644 index 00000000..6518820e --- /dev/null +++ b/lib/pkg_resources/tests/test_pkg_resources.py @@ -0,0 +1,415 @@ +import sys +import tempfile +import os +import zipfile +import datetime +import time +import subprocess +import stat +import distutils.dist +import distutils.command.install_egg_info + +try: + from unittest import mock +except ImportError: + import mock + +from pkg_resources import ( + DistInfoDistribution, Distribution, EggInfoDistribution, +) + +import pytest + +import pkg_resources + + +def timestamp(dt): + """ + Return a timestamp for a local, naive datetime instance. + """ + try: + return dt.timestamp() + except AttributeError: + # Python 3.2 and earlier + return time.mktime(dt.timetuple()) + + +class EggRemover(str): + def __call__(self): + if self in sys.path: + sys.path.remove(self) + if os.path.exists(self): + os.remove(self) + + +class TestZipProvider: + finalizers = [] + + ref_time = datetime.datetime(2013, 5, 12, 13, 25, 0) + "A reference time for a file modification" + + @classmethod + def setup_class(cls): + "create a zip egg and add it to sys.path" + egg = tempfile.NamedTemporaryFile(suffix='.egg', delete=False) + zip_egg = zipfile.ZipFile(egg, 'w') + zip_info = zipfile.ZipInfo() + zip_info.filename = 'mod.py' + zip_info.date_time = cls.ref_time.timetuple() + zip_egg.writestr(zip_info, 'x = 3\n') + zip_info = zipfile.ZipInfo() + zip_info.filename = 'data.dat' + zip_info.date_time = cls.ref_time.timetuple() + zip_egg.writestr(zip_info, 'hello, world!') + zip_info = zipfile.ZipInfo() + zip_info.filename = 'subdir/mod2.py' + zip_info.date_time = cls.ref_time.timetuple() + zip_egg.writestr(zip_info, 'x = 6\n') + zip_info = zipfile.ZipInfo() + zip_info.filename = 'subdir/data2.dat' + zip_info.date_time = cls.ref_time.timetuple() + zip_egg.writestr(zip_info, 'goodbye, world!') + zip_egg.close() + egg.close() + + sys.path.append(egg.name) + subdir = os.path.join(egg.name, 'subdir') + sys.path.append(subdir) + cls.finalizers.append(EggRemover(subdir)) + cls.finalizers.append(EggRemover(egg.name)) + + @classmethod + def teardown_class(cls): + for finalizer in cls.finalizers: + finalizer() + + def test_resource_listdir(self): + import mod + zp = pkg_resources.ZipProvider(mod) + + expected_root = ['data.dat', 'mod.py', 'subdir'] + assert sorted(zp.resource_listdir('')) == expected_root + + expected_subdir = ['data2.dat', 'mod2.py'] + assert sorted(zp.resource_listdir('subdir')) == expected_subdir + assert sorted(zp.resource_listdir('subdir/')) == expected_subdir + + assert zp.resource_listdir('nonexistent') == [] + assert zp.resource_listdir('nonexistent/') == [] + + import mod2 + zp2 = pkg_resources.ZipProvider(mod2) + + assert sorted(zp2.resource_listdir('')) == expected_subdir + + assert zp2.resource_listdir('subdir') == [] + assert zp2.resource_listdir('subdir/') == [] + + def test_resource_filename_rewrites_on_change(self): + """ + If a previous call to get_resource_filename has saved the file, but + the file has been subsequently mutated with different file of the + same size and modification time, it should not be overwritten on a + subsequent call to get_resource_filename. + """ + import mod + manager = pkg_resources.ResourceManager() + zp = pkg_resources.ZipProvider(mod) + filename = zp.get_resource_filename(manager, 'data.dat') + actual = datetime.datetime.fromtimestamp(os.stat(filename).st_mtime) + assert actual == self.ref_time + f = open(filename, 'w') + f.write('hello, world?') + f.close() + ts = timestamp(self.ref_time) + os.utime(filename, (ts, ts)) + filename = zp.get_resource_filename(manager, 'data.dat') + with open(filename) as f: + assert f.read() == 'hello, world!' + manager.cleanup_resources() + + +class TestResourceManager: + def test_get_cache_path(self): + mgr = pkg_resources.ResourceManager() + path = mgr.get_cache_path('foo') + type_ = str(type(path)) + message = "Unexpected type from get_cache_path: " + type_ + assert isinstance(path, str), message + + def test_get_cache_path_race(self, tmpdir): + # Patch to os.path.isdir to create a race condition + def patched_isdir(dirname, unpatched_isdir=pkg_resources.isdir): + patched_isdir.dirnames.append(dirname) + + was_dir = unpatched_isdir(dirname) + if not was_dir: + os.makedirs(dirname) + return was_dir + + patched_isdir.dirnames = [] + + # Get a cache path with a "race condition" + mgr = pkg_resources.ResourceManager() + mgr.set_extraction_path(str(tmpdir)) + + archive_name = os.sep.join(('foo', 'bar', 'baz')) + with mock.patch.object(pkg_resources, 'isdir', new=patched_isdir): + mgr.get_cache_path(archive_name) + + # Because this test relies on the implementation details of this + # function, these assertions are a sentinel to ensure that the + # test suite will not fail silently if the implementation changes. + called_dirnames = patched_isdir.dirnames + assert len(called_dirnames) == 2 + assert called_dirnames[0].split(os.sep)[-2:] == ['foo', 'bar'] + assert called_dirnames[1].split(os.sep)[-1:] == ['foo'] + + """ + Tests to ensure that pkg_resources runs independently from setuptools. + """ + + def test_setuptools_not_imported(self): + """ + In a separate Python environment, import pkg_resources and assert + that action doesn't cause setuptools to be imported. + """ + lines = ( + 'import pkg_resources', + 'import sys', + ( + 'assert "setuptools" not in sys.modules, ' + '"setuptools was imported"' + ), + ) + cmd = [sys.executable, '-c', '; '.join(lines)] + subprocess.check_call(cmd) + + +def make_test_distribution(metadata_path, metadata): + """ + Make a test Distribution object, and return it. + + :param metadata_path: the path to the metadata file that should be + created. This should be inside a distribution directory that should + also be created. For example, an argument value might end with + ".dist-info/METADATA". + :param metadata: the desired contents of the metadata file, as bytes. + """ + dist_dir = os.path.dirname(metadata_path) + os.mkdir(dist_dir) + with open(metadata_path, 'wb') as f: + f.write(metadata) + dists = list(pkg_resources.distributions_from_metadata(dist_dir)) + dist, = dists + + return dist + + +def test_get_metadata__bad_utf8(tmpdir): + """ + Test a metadata file with bytes that can't be decoded as utf-8. + """ + filename = 'METADATA' + # Convert the tmpdir LocalPath object to a string before joining. + metadata_path = os.path.join(str(tmpdir), 'foo.dist-info', filename) + # Encode a non-ascii string with the wrong encoding (not utf-8). + metadata = 'née'.encode('iso-8859-1') + dist = make_test_distribution(metadata_path, metadata=metadata) + + with pytest.raises(UnicodeDecodeError) as excinfo: + dist.get_metadata(filename) + + exc = excinfo.value + actual = str(exc) + expected = ( + # The error message starts with "'utf-8' codec ..." However, the + # spelling of "utf-8" can vary (e.g. "utf8") so we don't include it + "codec can't decode byte 0xe9 in position 1: " + 'invalid continuation byte in METADATA file at path: ' + ) + assert expected in actual, 'actual: {}'.format(actual) + assert actual.endswith(metadata_path), 'actual: {}'.format(actual) + + +def make_distribution_no_version(tmpdir, basename): + """ + Create a distribution directory with no file containing the version. + """ + dist_dir = tmpdir / basename + dist_dir.ensure_dir() + # Make the directory non-empty so distributions_from_metadata() + # will detect it and yield it. + dist_dir.join('temp.txt').ensure() + + if sys.version_info < (3, 6): + dist_dir = str(dist_dir) + + dists = list(pkg_resources.distributions_from_metadata(dist_dir)) + assert len(dists) == 1 + dist, = dists + + return dist, dist_dir + + +@pytest.mark.parametrize( + 'suffix, expected_filename, expected_dist_type', + [ + ('egg-info', 'PKG-INFO', EggInfoDistribution), + ('dist-info', 'METADATA', DistInfoDistribution), + ], +) +def test_distribution_version_missing( + tmpdir, suffix, expected_filename, expected_dist_type): + """ + Test Distribution.version when the "Version" header is missing. + """ + basename = 'foo.{}'.format(suffix) + dist, dist_dir = make_distribution_no_version(tmpdir, basename) + + expected_text = ( + "Missing 'Version:' header and/or {} file at path: " + ).format(expected_filename) + metadata_path = os.path.join(dist_dir, expected_filename) + + # Now check the exception raised when the "version" attribute is accessed. + with pytest.raises(ValueError) as excinfo: + dist.version + + err = str(excinfo.value) + # Include a string expression after the assert so the full strings + # will be visible for inspection on failure. + assert expected_text in err, str((expected_text, err)) + + # Also check the args passed to the ValueError. + msg, dist = excinfo.value.args + assert expected_text in msg + # Check that the message portion contains the path. + assert metadata_path in msg, str((metadata_path, msg)) + assert type(dist) == expected_dist_type + + +def test_distribution_version_missing_undetected_path(): + """ + Test Distribution.version when the "Version" header is missing and + the path can't be detected. + """ + # Create a Distribution object with no metadata argument, which results + # in an empty metadata provider. + dist = Distribution('/foo') + with pytest.raises(ValueError) as excinfo: + dist.version + + msg, dist = excinfo.value.args + expected = ( + "Missing 'Version:' header and/or PKG-INFO file at path: " + '[could not detect]' + ) + assert msg == expected + + +@pytest.mark.parametrize('only', [False, True]) +def test_dist_info_is_not_dir(tmp_path, only): + """Test path containing a file with dist-info extension.""" + dist_info = tmp_path / 'foobar.dist-info' + dist_info.touch() + assert not pkg_resources.dist_factory(str(tmp_path), str(dist_info), only) + + +class TestDeepVersionLookupDistutils: + @pytest.fixture + def env(self, tmpdir): + """ + Create a package environment, similar to a virtualenv, + in which packages are installed. + """ + + class Environment(str): + pass + + env = Environment(tmpdir) + tmpdir.chmod(stat.S_IRWXU) + subs = 'home', 'lib', 'scripts', 'data', 'egg-base' + env.paths = dict( + (dirname, str(tmpdir / dirname)) + for dirname in subs + ) + list(map(os.mkdir, env.paths.values())) + return env + + def create_foo_pkg(self, env, version): + """ + Create a foo package installed (distutils-style) to env.paths['lib'] + as version. + """ + ld = "This package has unicode metadata! ❄" + attrs = dict(name='foo', version=version, long_description=ld) + dist = distutils.dist.Distribution(attrs) + iei_cmd = distutils.command.install_egg_info.install_egg_info(dist) + iei_cmd.initialize_options() + iei_cmd.install_dir = env.paths['lib'] + iei_cmd.finalize_options() + iei_cmd.run() + + def test_version_resolved_from_egg_info(self, env): + version = '1.11.0.dev0+2329eae' + self.create_foo_pkg(env, version) + + # this requirement parsing will raise a VersionConflict unless the + # .egg-info file is parsed (see #419 on BitBucket) + req = pkg_resources.Requirement.parse('foo>=1.9') + dist = pkg_resources.WorkingSet([env.paths['lib']]).find(req) + assert dist.version == version + + @pytest.mark.parametrize( + 'unnormalized, normalized', + [ + ('foo', 'foo'), + ('foo/', 'foo'), + ('foo/bar', 'foo/bar'), + ('foo/bar/', 'foo/bar'), + ], + ) + def test_normalize_path_trailing_sep(self, unnormalized, normalized): + """Ensure the trailing slash is cleaned for path comparison. + + See pypa/setuptools#1519. + """ + result_from_unnormalized = pkg_resources.normalize_path(unnormalized) + result_from_normalized = pkg_resources.normalize_path(normalized) + assert result_from_unnormalized == result_from_normalized + + @pytest.mark.skipif( + os.path.normcase('A') != os.path.normcase('a'), + reason='Testing case-insensitive filesystems.', + ) + @pytest.mark.parametrize( + 'unnormalized, normalized', + [ + ('MiXeD/CasE', 'mixed/case'), + ], + ) + def test_normalize_path_normcase(self, unnormalized, normalized): + """Ensure mixed case is normalized on case-insensitive filesystems. + """ + result_from_unnormalized = pkg_resources.normalize_path(unnormalized) + result_from_normalized = pkg_resources.normalize_path(normalized) + assert result_from_unnormalized == result_from_normalized + + @pytest.mark.skipif( + os.path.sep != '\\', + reason='Testing systems using backslashes as path separators.', + ) + @pytest.mark.parametrize( + 'unnormalized, expected', + [ + ('forward/slash', 'forward\\slash'), + ('forward/slash/', 'forward\\slash'), + ('backward\\slash\\', 'backward\\slash'), + ], + ) + def test_normalize_path_backslash_sep(self, unnormalized, expected): + """Ensure path seps are cleaned on backslash path sep systems. + """ + result = pkg_resources.normalize_path(unnormalized) + assert result.endswith(expected) diff --git a/lib/pkg_resources/tests/test_resources.py b/lib/pkg_resources/tests/test_resources.py new file mode 100644 index 00000000..107dda7b --- /dev/null +++ b/lib/pkg_resources/tests/test_resources.py @@ -0,0 +1,884 @@ +import os +import sys +import string +import platform +import itertools + +import pytest +from pkg_resources.extern import packaging + +import pkg_resources +from pkg_resources import ( + parse_requirements, VersionConflict, parse_version, + Distribution, EntryPoint, Requirement, safe_version, safe_name, + WorkingSet) + + +# from Python 3.6 docs. +def pairwise(iterable): + "s -> (s0,s1), (s1,s2), (s2, s3), ..." + a, b = itertools.tee(iterable) + next(b, None) + return zip(a, b) + + +class Metadata(pkg_resources.EmptyProvider): + """Mock object to return metadata as if from an on-disk distribution""" + + def __init__(self, *pairs): + self.metadata = dict(pairs) + + def has_metadata(self, name): + return name in self.metadata + + def get_metadata(self, name): + return self.metadata[name] + + def get_metadata_lines(self, name): + return pkg_resources.yield_lines(self.get_metadata(name)) + + +dist_from_fn = pkg_resources.Distribution.from_filename + + +class TestDistro: + def testCollection(self): + # empty path should produce no distributions + ad = pkg_resources.Environment([], platform=None, python=None) + assert list(ad) == [] + assert ad['FooPkg'] == [] + ad.add(dist_from_fn("FooPkg-1.3_1.egg")) + ad.add(dist_from_fn("FooPkg-1.4-py2.4-win32.egg")) + ad.add(dist_from_fn("FooPkg-1.2-py2.4.egg")) + + # Name is in there now + assert ad['FooPkg'] + # But only 1 package + assert list(ad) == ['foopkg'] + + # Distributions sort by version + expected = ['1.4', '1.3-1', '1.2'] + assert [dist.version for dist in ad['FooPkg']] == expected + + # Removing a distribution leaves sequence alone + ad.remove(ad['FooPkg'][1]) + assert [dist.version for dist in ad['FooPkg']] == ['1.4', '1.2'] + + # And inserting adds them in order + ad.add(dist_from_fn("FooPkg-1.9.egg")) + assert [dist.version for dist in ad['FooPkg']] == ['1.9', '1.4', '1.2'] + + ws = WorkingSet([]) + foo12 = dist_from_fn("FooPkg-1.2-py2.4.egg") + foo14 = dist_from_fn("FooPkg-1.4-py2.4-win32.egg") + req, = parse_requirements("FooPkg>=1.3") + + # Nominal case: no distros on path, should yield all applicable + assert ad.best_match(req, ws).version == '1.9' + # If a matching distro is already installed, should return only that + ws.add(foo14) + assert ad.best_match(req, ws).version == '1.4' + + # If the first matching distro is unsuitable, it's a version conflict + ws = WorkingSet([]) + ws.add(foo12) + ws.add(foo14) + with pytest.raises(VersionConflict): + ad.best_match(req, ws) + + # If more than one match on the path, the first one takes precedence + ws = WorkingSet([]) + ws.add(foo14) + ws.add(foo12) + ws.add(foo14) + assert ad.best_match(req, ws).version == '1.4' + + def checkFooPkg(self, d): + assert d.project_name == "FooPkg" + assert d.key == "foopkg" + assert d.version == "1.3.post1" + assert d.py_version == "2.4" + assert d.platform == "win32" + assert d.parsed_version == parse_version("1.3-1") + + def testDistroBasics(self): + d = Distribution( + "/some/path", + project_name="FooPkg", + version="1.3-1", + py_version="2.4", + platform="win32", + ) + self.checkFooPkg(d) + + d = Distribution("/some/path") + assert d.py_version == '{}.{}'.format(*sys.version_info) + assert d.platform is None + + def testDistroParse(self): + d = dist_from_fn("FooPkg-1.3.post1-py2.4-win32.egg") + self.checkFooPkg(d) + d = dist_from_fn("FooPkg-1.3.post1-py2.4-win32.egg-info") + self.checkFooPkg(d) + + def testDistroMetadata(self): + d = Distribution( + "/some/path", project_name="FooPkg", + py_version="2.4", platform="win32", + metadata=Metadata( + ('PKG-INFO', "Metadata-Version: 1.0\nVersion: 1.3-1\n") + ), + ) + self.checkFooPkg(d) + + def distRequires(self, txt): + return Distribution("/foo", metadata=Metadata(('depends.txt', txt))) + + def checkRequires(self, dist, txt, extras=()): + assert list(dist.requires(extras)) == list(parse_requirements(txt)) + + def testDistroDependsSimple(self): + for v in "Twisted>=1.5", "Twisted>=1.5\nZConfig>=2.0": + self.checkRequires(self.distRequires(v), v) + + needs_object_dir = pytest.mark.skipif( + not hasattr(object, '__dir__'), + reason='object.__dir__ necessary for self.__dir__ implementation', + ) + + def test_distribution_dir(self): + d = pkg_resources.Distribution() + dir(d) + + @needs_object_dir + def test_distribution_dir_includes_provider_dir(self): + d = pkg_resources.Distribution() + before = d.__dir__() + assert 'test_attr' not in before + d._provider.test_attr = None + after = d.__dir__() + assert len(after) == len(before) + 1 + assert 'test_attr' in after + + @needs_object_dir + def test_distribution_dir_ignores_provider_dir_leading_underscore(self): + d = pkg_resources.Distribution() + before = d.__dir__() + assert '_test_attr' not in before + d._provider._test_attr = None + after = d.__dir__() + assert len(after) == len(before) + assert '_test_attr' not in after + + def testResolve(self): + ad = pkg_resources.Environment([]) + ws = WorkingSet([]) + # Resolving no requirements -> nothing to install + assert list(ws.resolve([], ad)) == [] + # Request something not in the collection -> DistributionNotFound + with pytest.raises(pkg_resources.DistributionNotFound): + ws.resolve(parse_requirements("Foo"), ad) + + Foo = Distribution.from_filename( + "/foo_dir/Foo-1.2.egg", + metadata=Metadata(('depends.txt', "[bar]\nBaz>=2.0")) + ) + ad.add(Foo) + ad.add(Distribution.from_filename("Foo-0.9.egg")) + + # Request thing(s) that are available -> list to activate + for i in range(3): + targets = list(ws.resolve(parse_requirements("Foo"), ad)) + assert targets == [Foo] + list(map(ws.add, targets)) + with pytest.raises(VersionConflict): + ws.resolve(parse_requirements("Foo==0.9"), ad) + ws = WorkingSet([]) # reset + + # Request an extra that causes an unresolved dependency for "Baz" + with pytest.raises(pkg_resources.DistributionNotFound): + ws.resolve(parse_requirements("Foo[bar]"), ad) + Baz = Distribution.from_filename( + "/foo_dir/Baz-2.1.egg", metadata=Metadata(('depends.txt', "Foo")) + ) + ad.add(Baz) + + # Activation list now includes resolved dependency + assert ( + list(ws.resolve(parse_requirements("Foo[bar]"), ad)) + == [Foo, Baz] + ) + # Requests for conflicting versions produce VersionConflict + with pytest.raises(VersionConflict) as vc: + ws.resolve(parse_requirements("Foo==1.2\nFoo!=1.2"), ad) + + msg = 'Foo 0.9 is installed but Foo==1.2 is required' + assert vc.value.report() == msg + + def test_environment_marker_evaluation_negative(self): + """Environment markers are evaluated at resolution time.""" + ad = pkg_resources.Environment([]) + ws = WorkingSet([]) + res = ws.resolve(parse_requirements("Foo;python_version<'2'"), ad) + assert list(res) == [] + + def test_environment_marker_evaluation_positive(self): + ad = pkg_resources.Environment([]) + ws = WorkingSet([]) + Foo = Distribution.from_filename("/foo_dir/Foo-1.2.dist-info") + ad.add(Foo) + res = ws.resolve(parse_requirements("Foo;python_version>='2'"), ad) + assert list(res) == [Foo] + + def test_environment_marker_evaluation_called(self): + """ + If one package foo requires bar without any extras, + markers should pass for bar without extras. + """ + parent_req, = parse_requirements("foo") + req, = parse_requirements("bar;python_version>='2'") + req_extras = pkg_resources._ReqExtras({req: parent_req.extras}) + assert req_extras.markers_pass(req) + + parent_req, = parse_requirements("foo[]") + req, = parse_requirements("bar;python_version>='2'") + req_extras = pkg_resources._ReqExtras({req: parent_req.extras}) + assert req_extras.markers_pass(req) + + def test_marker_evaluation_with_extras(self): + """Extras are also evaluated as markers at resolution time.""" + ad = pkg_resources.Environment([]) + ws = WorkingSet([]) + Foo = Distribution.from_filename( + "/foo_dir/Foo-1.2.dist-info", + metadata=Metadata(("METADATA", "Provides-Extra: baz\n" + "Requires-Dist: quux; extra=='baz'")) + ) + ad.add(Foo) + assert list(ws.resolve(parse_requirements("Foo"), ad)) == [Foo] + quux = Distribution.from_filename("/foo_dir/quux-1.0.dist-info") + ad.add(quux) + res = list(ws.resolve(parse_requirements("Foo[baz]"), ad)) + assert res == [Foo, quux] + + def test_marker_evaluation_with_extras_normlized(self): + """Extras are also evaluated as markers at resolution time.""" + ad = pkg_resources.Environment([]) + ws = WorkingSet([]) + Foo = Distribution.from_filename( + "/foo_dir/Foo-1.2.dist-info", + metadata=Metadata(("METADATA", "Provides-Extra: baz-lightyear\n" + "Requires-Dist: quux; extra=='baz-lightyear'")) + ) + ad.add(Foo) + assert list(ws.resolve(parse_requirements("Foo"), ad)) == [Foo] + quux = Distribution.from_filename("/foo_dir/quux-1.0.dist-info") + ad.add(quux) + res = list(ws.resolve(parse_requirements("Foo[baz-lightyear]"), ad)) + assert res == [Foo, quux] + + def test_marker_evaluation_with_multiple_extras(self): + ad = pkg_resources.Environment([]) + ws = WorkingSet([]) + Foo = Distribution.from_filename( + "/foo_dir/Foo-1.2.dist-info", + metadata=Metadata(("METADATA", "Provides-Extra: baz\n" + "Requires-Dist: quux; extra=='baz'\n" + "Provides-Extra: bar\n" + "Requires-Dist: fred; extra=='bar'\n")) + ) + ad.add(Foo) + quux = Distribution.from_filename("/foo_dir/quux-1.0.dist-info") + ad.add(quux) + fred = Distribution.from_filename("/foo_dir/fred-0.1.dist-info") + ad.add(fred) + res = list(ws.resolve(parse_requirements("Foo[baz,bar]"), ad)) + assert sorted(res) == [fred, quux, Foo] + + def test_marker_evaluation_with_extras_loop(self): + ad = pkg_resources.Environment([]) + ws = WorkingSet([]) + a = Distribution.from_filename( + "/foo_dir/a-0.2.dist-info", + metadata=Metadata(("METADATA", "Requires-Dist: c[a]")) + ) + b = Distribution.from_filename( + "/foo_dir/b-0.3.dist-info", + metadata=Metadata(("METADATA", "Requires-Dist: c[b]")) + ) + c = Distribution.from_filename( + "/foo_dir/c-1.0.dist-info", + metadata=Metadata(("METADATA", "Provides-Extra: a\n" + "Requires-Dist: b;extra=='a'\n" + "Provides-Extra: b\n" + "Requires-Dist: foo;extra=='b'")) + ) + foo = Distribution.from_filename("/foo_dir/foo-0.1.dist-info") + for dist in (a, b, c, foo): + ad.add(dist) + res = list(ws.resolve(parse_requirements("a"), ad)) + assert res == [a, c, b, foo] + + def testDistroDependsOptions(self): + d = self.distRequires(""" + Twisted>=1.5 + [docgen] + ZConfig>=2.0 + docutils>=0.3 + [fastcgi] + fcgiapp>=0.1""") + self.checkRequires(d, "Twisted>=1.5") + self.checkRequires( + d, "Twisted>=1.5 ZConfig>=2.0 docutils>=0.3".split(), ["docgen"] + ) + self.checkRequires( + d, "Twisted>=1.5 fcgiapp>=0.1".split(), ["fastcgi"] + ) + self.checkRequires( + d, "Twisted>=1.5 ZConfig>=2.0 docutils>=0.3 fcgiapp>=0.1".split(), + ["docgen", "fastcgi"] + ) + self.checkRequires( + d, "Twisted>=1.5 fcgiapp>=0.1 ZConfig>=2.0 docutils>=0.3".split(), + ["fastcgi", "docgen"] + ) + with pytest.raises(pkg_resources.UnknownExtra): + d.requires(["foo"]) + + +class TestWorkingSet: + def test_find_conflicting(self): + ws = WorkingSet([]) + Foo = Distribution.from_filename("/foo_dir/Foo-1.2.egg") + ws.add(Foo) + + # create a requirement that conflicts with Foo 1.2 + req = next(parse_requirements("Foo<1.2")) + + with pytest.raises(VersionConflict) as vc: + ws.find(req) + + msg = 'Foo 1.2 is installed but Foo<1.2 is required' + assert vc.value.report() == msg + + def test_resolve_conflicts_with_prior(self): + """ + A ContextualVersionConflict should be raised when a requirement + conflicts with a prior requirement for a different package. + """ + # Create installation where Foo depends on Baz 1.0 and Bar depends on + # Baz 2.0. + ws = WorkingSet([]) + md = Metadata(('depends.txt', "Baz==1.0")) + Foo = Distribution.from_filename("/foo_dir/Foo-1.0.egg", metadata=md) + ws.add(Foo) + md = Metadata(('depends.txt', "Baz==2.0")) + Bar = Distribution.from_filename("/foo_dir/Bar-1.0.egg", metadata=md) + ws.add(Bar) + Baz = Distribution.from_filename("/foo_dir/Baz-1.0.egg") + ws.add(Baz) + Baz = Distribution.from_filename("/foo_dir/Baz-2.0.egg") + ws.add(Baz) + + with pytest.raises(VersionConflict) as vc: + ws.resolve(parse_requirements("Foo\nBar\n")) + + msg = "Baz 1.0 is installed but Baz==2.0 is required by " + msg += repr(set(['Bar'])) + assert vc.value.report() == msg + + +class TestEntryPoints: + def assertfields(self, ep): + assert ep.name == "foo" + assert ep.module_name == "pkg_resources.tests.test_resources" + assert ep.attrs == ("TestEntryPoints",) + assert ep.extras == ("x",) + assert ep.load() is TestEntryPoints + expect = "foo = pkg_resources.tests.test_resources:TestEntryPoints [x]" + assert str(ep) == expect + + def setup_method(self, method): + self.dist = Distribution.from_filename( + "FooPkg-1.2-py2.4.egg", metadata=Metadata(('requires.txt', '[x]'))) + + def testBasics(self): + ep = EntryPoint( + "foo", "pkg_resources.tests.test_resources", ["TestEntryPoints"], + ["x"], self.dist + ) + self.assertfields(ep) + + def testParse(self): + s = "foo = pkg_resources.tests.test_resources:TestEntryPoints [x]" + ep = EntryPoint.parse(s, self.dist) + self.assertfields(ep) + + ep = EntryPoint.parse("bar baz= spammity[PING]") + assert ep.name == "bar baz" + assert ep.module_name == "spammity" + assert ep.attrs == () + assert ep.extras == ("ping",) + + ep = EntryPoint.parse(" fizzly = wocka:foo") + assert ep.name == "fizzly" + assert ep.module_name == "wocka" + assert ep.attrs == ("foo",) + assert ep.extras == () + + # plus in the name + spec = "html+mako = mako.ext.pygmentplugin:MakoHtmlLexer" + ep = EntryPoint.parse(spec) + assert ep.name == 'html+mako' + + reject_specs = "foo", "x=a:b:c", "q=x/na", "fez=pish:tush-z", "x=f[a]>2" + + @pytest.mark.parametrize("reject_spec", reject_specs) + def test_reject_spec(self, reject_spec): + with pytest.raises(ValueError): + EntryPoint.parse(reject_spec) + + def test_printable_name(self): + """ + Allow any printable character in the name. + """ + # Create a name with all printable characters; strip the whitespace. + name = string.printable.strip() + spec = "{name} = module:attr".format(**locals()) + ep = EntryPoint.parse(spec) + assert ep.name == name + + def checkSubMap(self, m): + assert len(m) == len(self.submap_expect) + for key, ep in self.submap_expect.items(): + assert m.get(key).name == ep.name + assert m.get(key).module_name == ep.module_name + assert sorted(m.get(key).attrs) == sorted(ep.attrs) + assert sorted(m.get(key).extras) == sorted(ep.extras) + + submap_expect = dict( + feature1=EntryPoint('feature1', 'somemodule', ['somefunction']), + feature2=EntryPoint( + 'feature2', 'another.module', ['SomeClass'], ['extra1', 'extra2']), + feature3=EntryPoint('feature3', 'this.module', extras=['something']) + ) + submap_str = """ + # define features for blah blah + feature1 = somemodule:somefunction + feature2 = another.module:SomeClass [extra1,extra2] + feature3 = this.module [something] + """ + + def testParseList(self): + self.checkSubMap(EntryPoint.parse_group("xyz", self.submap_str)) + with pytest.raises(ValueError): + EntryPoint.parse_group("x a", "foo=bar") + with pytest.raises(ValueError): + EntryPoint.parse_group("x", ["foo=baz", "foo=bar"]) + + def testParseMap(self): + m = EntryPoint.parse_map({'xyz': self.submap_str}) + self.checkSubMap(m['xyz']) + assert list(m.keys()) == ['xyz'] + m = EntryPoint.parse_map("[xyz]\n" + self.submap_str) + self.checkSubMap(m['xyz']) + assert list(m.keys()) == ['xyz'] + with pytest.raises(ValueError): + EntryPoint.parse_map(["[xyz]", "[xyz]"]) + with pytest.raises(ValueError): + EntryPoint.parse_map(self.submap_str) + + def testDeprecationWarnings(self): + ep = EntryPoint( + "foo", "pkg_resources.tests.test_resources", ["TestEntryPoints"], + ["x"] + ) + with pytest.warns(pkg_resources.PkgResourcesDeprecationWarning): + ep.load(require=False) + + +class TestRequirements: + def testBasics(self): + r = Requirement.parse("Twisted>=1.2") + assert str(r) == "Twisted>=1.2" + assert repr(r) == "Requirement.parse('Twisted>=1.2')" + assert r == Requirement("Twisted>=1.2") + assert r == Requirement("twisTed>=1.2") + assert r != Requirement("Twisted>=2.0") + assert r != Requirement("Zope>=1.2") + assert r != Requirement("Zope>=3.0") + assert r != Requirement("Twisted[extras]>=1.2") + + def testOrdering(self): + r1 = Requirement("Twisted==1.2c1,>=1.2") + r2 = Requirement("Twisted>=1.2,==1.2c1") + assert r1 == r2 + assert str(r1) == str(r2) + assert str(r2) == "Twisted==1.2c1,>=1.2" + assert ( + Requirement("Twisted") + != + Requirement("Twisted @ https://localhost/twisted.zip") + ) + + def testBasicContains(self): + r = Requirement("Twisted>=1.2") + foo_dist = Distribution.from_filename("FooPkg-1.3_1.egg") + twist11 = Distribution.from_filename("Twisted-1.1.egg") + twist12 = Distribution.from_filename("Twisted-1.2.egg") + assert parse_version('1.2') in r + assert parse_version('1.1') not in r + assert '1.2' in r + assert '1.1' not in r + assert foo_dist not in r + assert twist11 not in r + assert twist12 in r + + def testOptionsAndHashing(self): + r1 = Requirement.parse("Twisted[foo,bar]>=1.2") + r2 = Requirement.parse("Twisted[bar,FOO]>=1.2") + assert r1 == r2 + assert set(r1.extras) == set(("foo", "bar")) + assert set(r2.extras) == set(("foo", "bar")) + assert hash(r1) == hash(r2) + assert ( + hash(r1) + == + hash(( + "twisted", + None, + packaging.specifiers.SpecifierSet(">=1.2"), + frozenset(["foo", "bar"]), + None + )) + ) + assert ( + hash(Requirement.parse("Twisted @ https://localhost/twisted.zip")) + == + hash(( + "twisted", + "https://localhost/twisted.zip", + packaging.specifiers.SpecifierSet(), + frozenset(), + None + )) + ) + + def testVersionEquality(self): + r1 = Requirement.parse("foo==0.3a2") + r2 = Requirement.parse("foo!=0.3a4") + d = Distribution.from_filename + + assert d("foo-0.3a4.egg") not in r1 + assert d("foo-0.3a1.egg") not in r1 + assert d("foo-0.3a4.egg") not in r2 + + assert d("foo-0.3a2.egg") in r1 + assert d("foo-0.3a2.egg") in r2 + assert d("foo-0.3a3.egg") in r2 + assert d("foo-0.3a5.egg") in r2 + + def testSetuptoolsProjectName(self): + """ + The setuptools project should implement the setuptools package. + """ + + assert ( + Requirement.parse('setuptools').project_name == 'setuptools') + # setuptools 0.7 and higher means setuptools. + assert ( + Requirement.parse('setuptools == 0.7').project_name + == 'setuptools' + ) + assert ( + Requirement.parse('setuptools == 0.7a1').project_name + == 'setuptools' + ) + assert ( + Requirement.parse('setuptools >= 0.7').project_name + == 'setuptools' + ) + + +class TestParsing: + def testEmptyParse(self): + assert list(parse_requirements('')) == [] + + def testYielding(self): + for inp, out in [ + ([], []), ('x', ['x']), ([[]], []), (' x\n y', ['x', 'y']), + (['x\n\n', 'y'], ['x', 'y']), + ]: + assert list(pkg_resources.yield_lines(inp)) == out + + def testSplitting(self): + sample = """ + x + [Y] + z + + a + [b ] + # foo + c + [ d] + [q] + v + """ + assert ( + list(pkg_resources.split_sections(sample)) + == + [ + (None, ["x"]), + ("Y", ["z", "a"]), + ("b", ["c"]), + ("d", []), + ("q", ["v"]), + ] + ) + with pytest.raises(ValueError): + list(pkg_resources.split_sections("[foo")) + + def testSafeName(self): + assert safe_name("adns-python") == "adns-python" + assert safe_name("WSGI Utils") == "WSGI-Utils" + assert safe_name("WSGI Utils") == "WSGI-Utils" + assert safe_name("Money$$$Maker") == "Money-Maker" + assert safe_name("peak.web") != "peak-web" + + def testSafeVersion(self): + assert safe_version("1.2-1") == "1.2.post1" + assert safe_version("1.2 alpha") == "1.2.alpha" + assert safe_version("2.3.4 20050521") == "2.3.4.20050521" + assert safe_version("Money$$$Maker") == "Money-Maker" + assert safe_version("peak.web") == "peak.web" + + def testSimpleRequirements(self): + assert ( + list(parse_requirements('Twis-Ted>=1.2-1')) + == + [Requirement('Twis-Ted>=1.2-1')] + ) + assert ( + list(parse_requirements('Twisted >=1.2, \\ # more\n<2.0')) + == + [Requirement('Twisted>=1.2,<2.0')] + ) + assert ( + Requirement.parse("FooBar==1.99a3") + == + Requirement("FooBar==1.99a3") + ) + with pytest.raises(ValueError): + Requirement.parse(">=2.3") + with pytest.raises(ValueError): + Requirement.parse("x\\") + with pytest.raises(ValueError): + Requirement.parse("x==2 q") + with pytest.raises(ValueError): + Requirement.parse("X==1\nY==2") + with pytest.raises(ValueError): + Requirement.parse("#") + + def test_requirements_with_markers(self): + assert ( + Requirement.parse("foobar;os_name=='a'") + == + Requirement.parse("foobar;os_name=='a'") + ) + assert ( + Requirement.parse("name==1.1;python_version=='2.7'") + != + Requirement.parse("name==1.1;python_version=='3.6'") + ) + assert ( + Requirement.parse("name==1.0;python_version=='2.7'") + != + Requirement.parse("name==1.2;python_version=='2.7'") + ) + assert ( + Requirement.parse("name[foo]==1.0;python_version=='3.6'") + != + Requirement.parse("name[foo,bar]==1.0;python_version=='3.6'") + ) + + def test_local_version(self): + req, = parse_requirements('foo==1.0+org1') + + def test_spaces_between_multiple_versions(self): + req, = parse_requirements('foo>=1.0, <3') + req, = parse_requirements('foo >= 1.0, < 3') + + @pytest.mark.parametrize( + ['lower', 'upper'], + [ + ('1.2-rc1', '1.2rc1'), + ('0.4', '0.4.0'), + ('0.4.0.0', '0.4.0'), + ('0.4.0-0', '0.4-0'), + ('0post1', '0.0post1'), + ('0pre1', '0.0c1'), + ('0.0.0preview1', '0c1'), + ('0.0c1', '0-rc1'), + ('1.2a1', '1.2.a.1'), + ('1.2.a', '1.2a'), + ], + ) + def testVersionEquality(self, lower, upper): + assert parse_version(lower) == parse_version(upper) + + torture = """ + 0.80.1-3 0.80.1-2 0.80.1-1 0.79.9999+0.80.0pre4-1 + 0.79.9999+0.80.0pre2-3 0.79.9999+0.80.0pre2-2 + 0.77.2-1 0.77.1-1 0.77.0-1 + """ + + @pytest.mark.parametrize( + ['lower', 'upper'], + [ + ('2.1', '2.1.1'), + ('2a1', '2b0'), + ('2a1', '2.1'), + ('2.3a1', '2.3'), + ('2.1-1', '2.1-2'), + ('2.1-1', '2.1.1'), + ('2.1', '2.1post4'), + ('2.1a0-20040501', '2.1'), + ('1.1', '02.1'), + ('3.2', '3.2.post0'), + ('3.2post1', '3.2post2'), + ('0.4', '4.0'), + ('0.0.4', '0.4.0'), + ('0post1', '0.4post1'), + ('2.1.0-rc1', '2.1.0'), + ('2.1dev', '2.1a0'), + ] + list(pairwise(reversed(torture.split()))), + ) + def testVersionOrdering(self, lower, upper): + assert parse_version(lower) < parse_version(upper) + + def testVersionHashable(self): + """ + Ensure that our versions stay hashable even though we've subclassed + them and added some shim code to them. + """ + assert ( + hash(parse_version("1.0")) + == + hash(parse_version("1.0")) + ) + + +class TestNamespaces: + + ns_str = "__import__('pkg_resources').declare_namespace(__name__)\n" + + @pytest.fixture + def symlinked_tmpdir(self, tmpdir): + """ + Where available, return the tempdir as a symlink, + which as revealed in #231 is more fragile than + a natural tempdir. + """ + if not hasattr(os, 'symlink'): + yield str(tmpdir) + return + + link_name = str(tmpdir) + '-linked' + os.symlink(str(tmpdir), link_name) + try: + yield type(tmpdir)(link_name) + finally: + os.unlink(link_name) + + @pytest.fixture(autouse=True) + def patched_path(self, tmpdir): + """ + Patch sys.path to include the 'site-pkgs' dir. Also + restore pkg_resources._namespace_packages to its + former state. + """ + saved_ns_pkgs = pkg_resources._namespace_packages.copy() + saved_sys_path = sys.path[:] + site_pkgs = tmpdir.mkdir('site-pkgs') + sys.path.append(str(site_pkgs)) + try: + yield + finally: + pkg_resources._namespace_packages = saved_ns_pkgs + sys.path = saved_sys_path + + issue591 = pytest.mark.xfail(platform.system() == 'Windows', reason="#591") + + @issue591 + def test_two_levels_deep(self, symlinked_tmpdir): + """ + Test nested namespace packages + Create namespace packages in the following tree : + site-packages-1/pkg1/pkg2 + site-packages-2/pkg1/pkg2 + Check both are in the _namespace_packages dict and that their __path__ + is correct + """ + real_tmpdir = symlinked_tmpdir.realpath() + tmpdir = symlinked_tmpdir + sys.path.append(str(tmpdir / 'site-pkgs2')) + site_dirs = tmpdir / 'site-pkgs', tmpdir / 'site-pkgs2' + for site in site_dirs: + pkg1 = site / 'pkg1' + pkg2 = pkg1 / 'pkg2' + pkg2.ensure_dir() + (pkg1 / '__init__.py').write_text(self.ns_str, encoding='utf-8') + (pkg2 / '__init__.py').write_text(self.ns_str, encoding='utf-8') + import pkg1 + assert "pkg1" in pkg_resources._namespace_packages + # attempt to import pkg2 from site-pkgs2 + import pkg1.pkg2 + # check the _namespace_packages dict + assert "pkg1.pkg2" in pkg_resources._namespace_packages + assert pkg_resources._namespace_packages["pkg1"] == ["pkg1.pkg2"] + # check the __path__ attribute contains both paths + expected = [ + str(real_tmpdir / "site-pkgs" / "pkg1" / "pkg2"), + str(real_tmpdir / "site-pkgs2" / "pkg1" / "pkg2"), + ] + assert pkg1.pkg2.__path__ == expected + + @issue591 + def test_path_order(self, symlinked_tmpdir): + """ + Test that if multiple versions of the same namespace package subpackage + are on different sys.path entries, that only the one earliest on + sys.path is imported, and that the namespace package's __path__ is in + the correct order. + + Regression test for https://github.com/pypa/setuptools/issues/207 + """ + + tmpdir = symlinked_tmpdir + site_dirs = ( + tmpdir / "site-pkgs", + tmpdir / "site-pkgs2", + tmpdir / "site-pkgs3", + ) + + vers_str = "__version__ = %r" + + for number, site in enumerate(site_dirs, 1): + if number > 1: + sys.path.append(str(site)) + nspkg = site / 'nspkg' + subpkg = nspkg / 'subpkg' + subpkg.ensure_dir() + (nspkg / '__init__.py').write_text(self.ns_str, encoding='utf-8') + (subpkg / '__init__.py').write_text( + vers_str % number, encoding='utf-8') + + import nspkg.subpkg + import nspkg + expected = [ + str(site.realpath() / 'nspkg') + for site in site_dirs + ] + assert nspkg.__path__ == expected + assert nspkg.subpkg.__version__ == 1 diff --git a/lib/pkg_resources/tests/test_working_set.py b/lib/pkg_resources/tests/test_working_set.py new file mode 100644 index 00000000..db13c714 --- /dev/null +++ b/lib/pkg_resources/tests/test_working_set.py @@ -0,0 +1,482 @@ +import inspect +import re +import textwrap +import functools + +import pytest + +import pkg_resources + +from .test_resources import Metadata + + +def strip_comments(s): + return '\n'.join( + line for line in s.split('\n') + if line.strip() and not line.strip().startswith('#') + ) + + +def parse_distributions(s): + ''' + Parse a series of distribution specs of the form: + {project_name}-{version} + [optional, indented requirements specification] + + Example: + + foo-0.2 + bar-1.0 + foo>=3.0 + [feature] + baz + + yield 2 distributions: + - project_name=foo, version=0.2 + - project_name=bar, version=1.0, + requires=['foo>=3.0', 'baz; extra=="feature"'] + ''' + s = s.strip() + for spec in re.split(r'\n(?=[^\s])', s): + if not spec: + continue + fields = spec.split('\n', 1) + assert 1 <= len(fields) <= 2 + name, version = fields.pop(0).split('-') + if fields: + requires = textwrap.dedent(fields.pop(0)) + metadata = Metadata(('requires.txt', requires)) + else: + metadata = None + dist = pkg_resources.Distribution(project_name=name, + version=version, + metadata=metadata) + yield dist + + +class FakeInstaller: + + def __init__(self, installable_dists): + self._installable_dists = installable_dists + + def __call__(self, req): + return next(iter(filter(lambda dist: dist in req, + self._installable_dists)), None) + + +def parametrize_test_working_set_resolve(*test_list): + idlist = [] + argvalues = [] + for test in test_list: + ( + name, + installed_dists, + installable_dists, + requirements, + expected1, expected2 + ) = [ + strip_comments(s.lstrip()) for s in + textwrap.dedent(test).lstrip().split('\n\n', 5) + ] + installed_dists = list(parse_distributions(installed_dists)) + installable_dists = list(parse_distributions(installable_dists)) + requirements = list(pkg_resources.parse_requirements(requirements)) + for id_, replace_conflicting, expected in ( + (name, False, expected1), + (name + '_replace_conflicting', True, expected2), + ): + idlist.append(id_) + expected = strip_comments(expected.strip()) + if re.match(r'\w+$', expected): + expected = getattr(pkg_resources, expected) + assert issubclass(expected, Exception) + else: + expected = list(parse_distributions(expected)) + argvalues.append(pytest.param(installed_dists, installable_dists, + requirements, replace_conflicting, + expected)) + return pytest.mark.parametrize('installed_dists,installable_dists,' + 'requirements,replace_conflicting,' + 'resolved_dists_or_exception', + argvalues, ids=idlist) + + +@parametrize_test_working_set_resolve( + ''' + # id + noop + + # installed + + # installable + + # wanted + + # resolved + + # resolved [replace conflicting] + ''', + + ''' + # id + already_installed + + # installed + foo-3.0 + + # installable + + # wanted + foo>=2.1,!=3.1,<4 + + # resolved + foo-3.0 + + # resolved [replace conflicting] + foo-3.0 + ''', + + ''' + # id + installable_not_installed + + # installed + + # installable + foo-3.0 + foo-4.0 + + # wanted + foo>=2.1,!=3.1,<4 + + # resolved + foo-3.0 + + # resolved [replace conflicting] + foo-3.0 + ''', + + ''' + # id + not_installable + + # installed + + # installable + + # wanted + foo>=2.1,!=3.1,<4 + + # resolved + DistributionNotFound + + # resolved [replace conflicting] + DistributionNotFound + ''', + + ''' + # id + no_matching_version + + # installed + + # installable + foo-3.1 + + # wanted + foo>=2.1,!=3.1,<4 + + # resolved + DistributionNotFound + + # resolved [replace conflicting] + DistributionNotFound + ''', + + ''' + # id + installable_with_installed_conflict + + # installed + foo-3.1 + + # installable + foo-3.5 + + # wanted + foo>=2.1,!=3.1,<4 + + # resolved + VersionConflict + + # resolved [replace conflicting] + foo-3.5 + ''', + + ''' + # id + not_installable_with_installed_conflict + + # installed + foo-3.1 + + # installable + + # wanted + foo>=2.1,!=3.1,<4 + + # resolved + VersionConflict + + # resolved [replace conflicting] + DistributionNotFound + ''', + + ''' + # id + installed_with_installed_require + + # installed + foo-3.9 + baz-0.1 + foo>=2.1,!=3.1,<4 + + # installable + + # wanted + baz + + # resolved + foo-3.9 + baz-0.1 + + # resolved [replace conflicting] + foo-3.9 + baz-0.1 + ''', + + ''' + # id + installed_with_conflicting_installed_require + + # installed + foo-5 + baz-0.1 + foo>=2.1,!=3.1,<4 + + # installable + + # wanted + baz + + # resolved + VersionConflict + + # resolved [replace conflicting] + DistributionNotFound + ''', + + ''' + # id + installed_with_installable_conflicting_require + + # installed + foo-5 + baz-0.1 + foo>=2.1,!=3.1,<4 + + # installable + foo-2.9 + + # wanted + baz + + # resolved + VersionConflict + + # resolved [replace conflicting] + baz-0.1 + foo-2.9 + ''', + + ''' + # id + installed_with_installable_require + + # installed + baz-0.1 + foo>=2.1,!=3.1,<4 + + # installable + foo-3.9 + + # wanted + baz + + # resolved + foo-3.9 + baz-0.1 + + # resolved [replace conflicting] + foo-3.9 + baz-0.1 + ''', + + ''' + # id + installable_with_installed_require + + # installed + foo-3.9 + + # installable + baz-0.1 + foo>=2.1,!=3.1,<4 + + # wanted + baz + + # resolved + foo-3.9 + baz-0.1 + + # resolved [replace conflicting] + foo-3.9 + baz-0.1 + ''', + + ''' + # id + installable_with_installable_require + + # installed + + # installable + foo-3.9 + baz-0.1 + foo>=2.1,!=3.1,<4 + + # wanted + baz + + # resolved + foo-3.9 + baz-0.1 + + # resolved [replace conflicting] + foo-3.9 + baz-0.1 + ''', + + ''' + # id + installable_with_conflicting_installable_require + + # installed + foo-5 + + # installable + foo-2.9 + baz-0.1 + foo>=2.1,!=3.1,<4 + + # wanted + baz + + # resolved + VersionConflict + + # resolved [replace conflicting] + baz-0.1 + foo-2.9 + ''', + + ''' + # id + conflicting_installables + + # installed + + # installable + foo-2.9 + foo-5.0 + + # wanted + foo>=2.1,!=3.1,<4 + foo>=4 + + # resolved + VersionConflict + + # resolved [replace conflicting] + VersionConflict + ''', + + ''' + # id + installables_with_conflicting_requires + + # installed + + # installable + foo-2.9 + dep==1.0 + baz-5.0 + dep==2.0 + dep-1.0 + dep-2.0 + + # wanted + foo + baz + + # resolved + VersionConflict + + # resolved [replace conflicting] + VersionConflict + ''', + + ''' + # id + installables_with_conflicting_nested_requires + + # installed + + # installable + foo-2.9 + dep1 + dep1-1.0 + subdep<1.0 + baz-5.0 + dep2 + dep2-1.0 + subdep>1.0 + subdep-0.9 + subdep-1.1 + + # wanted + foo + baz + + # resolved + VersionConflict + + # resolved [replace conflicting] + VersionConflict + ''', +) +def test_working_set_resolve(installed_dists, installable_dists, requirements, + replace_conflicting, resolved_dists_or_exception): + ws = pkg_resources.WorkingSet([]) + list(map(ws.add, installed_dists)) + resolve_call = functools.partial( + ws.resolve, + requirements, installer=FakeInstaller(installable_dists), + replace_conflicting=replace_conflicting, + ) + if inspect.isclass(resolved_dists_or_exception): + with pytest.raises(resolved_dists_or_exception): + resolve_call() + else: + assert sorted(resolve_call()) == sorted(resolved_dists_or_exception) diff --git a/lib/portend.py b/lib/portend.py new file mode 100644 index 00000000..a9f6787f --- /dev/null +++ b/lib/portend.py @@ -0,0 +1,240 @@ +""" +A simple library for managing the availability of ports. +""" + +import time +import socket +import argparse +import sys +import itertools +import contextlib +import platform +from collections import abc +import urllib.parse + +from tempora import timing + + +def client_host(server_host): + """ + Return the host on which a client can connect to the given listener. + + >>> client_host('192.168.0.1') + '192.168.0.1' + >>> client_host('0.0.0.0') + '127.0.0.1' + >>> client_host('::') + '::1' + """ + if server_host == '0.0.0.0': + # 0.0.0.0 is INADDR_ANY, which should answer on localhost. + return '127.0.0.1' + if server_host in ('::', '::0', '::0.0.0.0'): + # :: is IN6ADDR_ANY, which should answer on localhost. + # ::0 and ::0.0.0.0 are non-canonical but common + # ways to write IN6ADDR_ANY. + return '::1' + return server_host + + +class Checker(object): + def __init__(self, timeout=1.0): + self.timeout = timeout + + def assert_free(self, host, port=None): + """ + Assert that the given addr is free + in that all attempts to connect fail within the timeout + or raise a PortNotFree exception. + + >>> free_port = find_available_local_port() + + >>> Checker().assert_free('localhost', free_port) + >>> Checker().assert_free('127.0.0.1', free_port) + >>> Checker().assert_free('::1', free_port) + + Also accepts an addr tuple + + >>> addr = '::1', free_port, 0, 0 + >>> Checker().assert_free(addr) + + Host might refer to a server bind address like '::', which + should use localhost to perform the check. + + >>> Checker().assert_free('::', free_port) + """ + if port is None and isinstance(host, abc.Sequence): + host, port = host[:2] + if platform.system() == 'Windows': + host = client_host(host) # pragma: nocover + info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, socket.SOCK_STREAM) + list(itertools.starmap(self._connect, info)) + + def _connect(self, af, socktype, proto, canonname, sa): + s = socket.socket(af, socktype, proto) + # fail fast with a small timeout + s.settimeout(self.timeout) + + with contextlib.closing(s): + try: + s.connect(sa) + except socket.error: + return + + # the connect succeeded, so the port isn't free + host, port = sa[:2] + tmpl = "Port {port} is in use on {host}." + raise PortNotFree(tmpl.format(**locals())) + + +class Timeout(IOError): + pass + + +class PortNotFree(IOError): + pass + + +def free(host, port, timeout=float('Inf')): + """ + Wait for the specified port to become free (dropping or rejecting + requests). Return when the port is free or raise a Timeout if timeout has + elapsed. + + Timeout may be specified in seconds or as a timedelta. + If timeout is None or ∞, the routine will run indefinitely. + + >>> free('localhost', find_available_local_port()) + + >>> free(None, None) + Traceback (most recent call last): + ... + ValueError: Host values of '' or None are not allowed. + """ + if not host: + raise ValueError("Host values of '' or None are not allowed.") + + timer = timing.Timer(timeout) + + while True: + try: + # Expect a free port, so use a small timeout + Checker(timeout=0.1).assert_free(host, port) + return + except PortNotFree: + if timer.expired(): + raise Timeout("Port {port} not free on {host}.".format(**locals())) + # Politely wait. + time.sleep(0.1) + + +def occupied(host, port, timeout=float('Inf')): + """ + Wait for the specified port to become occupied (accepting requests). + Return when the port is occupied or raise a Timeout if timeout has + elapsed. + + Timeout may be specified in seconds or as a timedelta. + If timeout is None or ∞, the routine will run indefinitely. + + >>> occupied('localhost', find_available_local_port(), .1) + Traceback (most recent call last): + ... + Timeout: Port ... not bound on localhost. + + >>> occupied(None, None) + Traceback (most recent call last): + ... + ValueError: Host values of '' or None are not allowed. + """ + if not host: + raise ValueError("Host values of '' or None are not allowed.") + + timer = timing.Timer(timeout) + + while True: + try: + Checker(timeout=0.5).assert_free(host, port) + if timer.expired(): + raise Timeout("Port {port} not bound on {host}.".format(**locals())) + # Politely wait + time.sleep(0.1) + except PortNotFree: + # port is occupied + return + + +def find_available_local_port(): + """ + Find a free port on localhost. + + >>> 0 < find_available_local_port() < 65536 + True + """ + infos = socket.getaddrinfo(None, 0, socket.AF_UNSPEC, socket.SOCK_STREAM) + family, proto, _, _, addr = next(iter(infos)) + sock = socket.socket(family, proto) + sock.bind(addr) + addr, port = sock.getsockname()[:2] + sock.close() + return port + + +class HostPort(str): + """ + A simple representation of a host/port pair as a string + + >>> hp = HostPort('localhost:32768') + + >>> hp.host + 'localhost' + + >>> hp.port + 32768 + + >>> len(hp) + 15 + + >>> hp = HostPort('[::1]:32768') + + >>> hp.host + '::1' + + >>> hp.port + 32768 + """ + + @property + def host(self): + return urllib.parse.urlparse(f'//{self}').hostname + + @property + def port(self): + return urllib.parse.urlparse(f'//{self}').port + + @classmethod + def from_addr(cls, addr): + listen_host, port = addr[:2] + plain_host = client_host(listen_host) + host = f'[{plain_host}]' if ':' in plain_host else plain_host + return cls(':'.join([host, str(port)])) + + +def _main(args=None): + parser = argparse.ArgumentParser() + + def global_lookup(key): + return globals()[key] + + parser.add_argument('target', metavar='host:port', type=HostPort) + parser.add_argument('func', metavar='state', type=global_lookup) + parser.add_argument('-t', '--timeout', default=None, type=float) + args = parser.parse_args(args) + try: + args.func(args.target.host, args.target.port, timeout=args.timeout) + except Timeout as timeout: + print(timeout, file=sys.stderr) + raise SystemExit(1) + + +__name__ == '__main__' and _main() diff --git a/lib/pygazelle/api.py b/lib/pygazelle/api.py index a8c55a72..5c5a7fe7 100644 --- a/lib/pygazelle/api.py +++ b/lib/pygazelle/api.py @@ -5,7 +5,7 @@ # # Loosely based on the API implementation from 'whatbetter', by Zachary Denton # See https://github.com/zacharydenton/whatbetter -from HTMLParser import HTMLParser +from html.parser import HTMLParser import sys import json @@ -172,7 +172,7 @@ class GazelleAPI(object): id = int(id) if id == self.userid: return self.logged_in_user - elif id in self.cached_users.keys(): + elif id in list(self.cached_users.keys()): return self.cached_users[id] else: return User(id, self) @@ -214,7 +214,7 @@ class GazelleAPI(object): """ if id: id = int(id) - if id in self.cached_artists.keys(): + if id in list(self.cached_artists.keys()): artist = self.cached_artists[id] else: artist = Artist(id, self) @@ -234,7 +234,7 @@ class GazelleAPI(object): pass it to update the object. There is no way to query the count directly from the API, but it can be retrieved from other calls such as 'artist', however. """ - if name in self.cached_tags.keys(): + if name in list(self.cached_tags.keys()): return self.cached_tags[name] else: return Tag(name, self) @@ -245,7 +245,7 @@ class GazelleAPI(object): if the request hasn't already been cached. This is done on demand to reduce unnecessary API calls. """ id = int(id) - if id in self.cached_requests.keys(): + if id in list(self.cached_requests.keys()): return self.cached_requests[id] else: return Request(id, self) @@ -255,7 +255,7 @@ class GazelleAPI(object): Returns a TorrentGroup for the passed ID, associated with this API object. """ id = int(id) - if id in self.cached_torrent_groups.keys(): + if id in list(self.cached_torrent_groups.keys()): return self.cached_torrent_groups[id] else: return TorrentGroup(id, self) @@ -265,7 +265,7 @@ class GazelleAPI(object): Returns a Torrent for the passed ID, associated with this API object. """ id = int(id) - if id in self.cached_torrents.keys(): + if id in list(self.cached_torrents.keys()): return self.cached_torrents[id] else: return Torrent(id, self) @@ -280,7 +280,7 @@ class GazelleAPI(object): return None id = int(response['torrent']['id']) - if id in self.cached_torrents.keys(): + if id in list(self.cached_torrents.keys()): torrent = self.cached_torrents[id] else: torrent = Torrent(id, self) @@ -293,7 +293,7 @@ class GazelleAPI(object): Returns a Category for the passed ID, associated with this API object. """ id = int(id) - if id in self.cached_categories.keys(): + if id in list(self.cached_categories.keys()): cat = self.cached_categories[id] else: cat = Category(id, self) @@ -416,4 +416,4 @@ class GazelleAPI(object): if sys.version_info[0] == 3: text_type = str else: - text_type = unicode + text_type = str diff --git a/lib/pygazelle/artist.py b/lib/pygazelle/artist.py index 60779db1..dba1c894 100644 --- a/lib/pygazelle/artist.py +++ b/lib/pygazelle/artist.py @@ -1,4 +1,4 @@ -from HTMLParser import HTMLParser +from html.parser import HTMLParser class InvalidArtistException(Exception): pass diff --git a/lib/pygazelle/torrent.py b/lib/pygazelle/torrent.py index 38400dd1..52ba13c7 100644 --- a/lib/pygazelle/torrent.py +++ b/lib/pygazelle/torrent.py @@ -1,4 +1,4 @@ -from HTMLParser import HTMLParser +from html.parser import HTMLParser import re class InvalidTorrentException(Exception): diff --git a/lib/pygazelle/torrent_group.py b/lib/pygazelle/torrent_group.py index e9d15625..a42d96d1 100644 --- a/lib/pygazelle/torrent_group.py +++ b/lib/pygazelle/torrent_group.py @@ -114,16 +114,16 @@ class TorrentGroup(object): tag = self.parent_api.get_tag(tag_name) self.tags.append(tag) # some of the below keys aren't in things like comics...should probably watch out for this elsewhere - if 'bookmarked' in search_json_response.keys(): + if 'bookmarked' in list(search_json_response.keys()): self.has_bookmarked = search_json_response['bookmarked'] - if 'vanityHouse' in search_json_response.keys(): + if 'vanityHouse' in list(search_json_response.keys()): self.vanity_house = search_json_response['vanityHouse'] - if 'groupYear' in search_json_response.keys(): + if 'groupYear' in list(search_json_response.keys()): self.year = search_json_response['groupYear'] - if 'releaseType' in search_json_response.keys(): + if 'releaseType' in list(search_json_response.keys()): self.release_type = search_json_response['releaseType'] self.time = search_json_response['groupTime'] - if 'torrentId' in search_json_response.keys(): + if 'torrentId' in list(search_json_response.keys()): search_json_response['torrents'] = [{'torrentId': search_json_response['torrentId']}] new_torrents = [] diff --git a/lib/pygazelle/user.py b/lib/pygazelle/user.py index fc1d6a9a..9c237691 100644 --- a/lib/pygazelle/user.py +++ b/lib/pygazelle/user.py @@ -48,7 +48,7 @@ class User(object): self.passkey = index_json_response['passkey'] self.notifications = index_json_response['notifications'] if self.stats: - self.stats = dict(self.stats.items() + index_json_response['userstats'].items()) # merge in new info + self.stats = dict(list(self.stats.items()) + list(index_json_response['userstats'].items())) # merge in new info else: self.stats = index_json_response['userstats'] @@ -76,7 +76,7 @@ class User(object): self.is_friend = user_json_response['isFriend'] self.profile_text = user_json_response['profileText'] if self.stats: - self.stats = dict(self.stats.items() + user_json_response['stats'].items()) # merge in new info + self.stats = dict(list(self.stats.items()) + list(user_json_response['stats'].items())) # merge in new info else: self.stats = user_json_response['stats'] self.ranks = user_json_response['ranks'] diff --git a/lib/pynma/pynma.py b/lib/pynma/pynma.py index 2fc55560..0caee913 100755 --- a/lib/pynma/pynma.py +++ b/lib/pynma/pynma.py @@ -5,12 +5,12 @@ from xml.dom.minidom import parseString try: from http.client import HTTPSConnection except ImportError: - from httplib import HTTPSConnection + from http.client import HTTPSConnection try: from urllib.parse import urlencode except ImportError: - from urllib import urlencode + from urllib.parse import urlencode __version__ = "1.0" diff --git a/lib/pythontwitter/__init__.py b/lib/pythontwitter/__init__.py deleted file mode 100644 index b13b46d9..00000000 --- a/lib/pythontwitter/__init__.py +++ /dev/null @@ -1,4651 +0,0 @@ -#!/usr/bin/env python -# -# vim: sw=2 ts=2 sts=2 -# -# Copyright 2007 The Python-Twitter Developers -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''A library that provides a Python interface to the Twitter API''' - -__author__ = 'python-twitter@googlegroups.com' -__version__ = '1.0.1' - - -import base64 -import calendar -import datetime -import httplib -import os -import rfc822 -import sys -import tempfile -import textwrap -import time -import urllib -import urllib2 -import urlparse -import gzip -import StringIO - -try: - # Python >= 2.6 - import json as simplejson -except ImportError: - try: - # Python < 2.6 - import simplejson as simplejson - except ImportError: - try: - # Google App Engine - from django.utils import simplejson - except ImportError: - raise ImportError, "Unable to load a json library" - -# parse_qsl moved to urlparse module in v2.6 -try: - from urlparse import parse_qsl, parse_qs -except ImportError: - from cgi import parse_qsl, parse_qs - -try: - from hashlib import md5 -except ImportError: - from md5 import md5 - -import oauth2 as oauth - - -CHARACTER_LIMIT = 140 - -# A singleton representing a lazily instantiated FileCache. -DEFAULT_CACHE = object() - -REQUEST_TOKEN_URL = 'https://api.twitter.com/oauth/request_token' -ACCESS_TOKEN_URL = 'https://api.twitter.com/oauth/access_token' -AUTHORIZATION_URL = 'https://api.twitter.com/oauth/authorize' -SIGNIN_URL = 'https://api.twitter.com/oauth/authenticate' - - -class TwitterError(Exception): - '''Base class for Twitter errors''' - - @property - def message(self): - '''Returns the first argument used to construct this error.''' - return self.args[0] - - -class Status(object): - '''A class representing the Status structure used by the twitter API. - - The Status structure exposes the following properties: - - status.created_at - status.created_at_in_seconds # read only - status.favorited - status.favorite_count - status.in_reply_to_screen_name - status.in_reply_to_user_id - status.in_reply_to_status_id - status.truncated - status.source - status.id - status.text - status.location - status.relative_created_at # read only - status.user - status.urls - status.user_mentions - status.hashtags - status.geo - status.place - status.coordinates - status.contributors - ''' - def __init__(self, - created_at=None, - favorited=None, - favorite_count=None, - id=None, - text=None, - location=None, - user=None, - in_reply_to_screen_name=None, - in_reply_to_user_id=None, - in_reply_to_status_id=None, - truncated=None, - source=None, - now=None, - urls=None, - user_mentions=None, - hashtags=None, - media=None, - geo=None, - place=None, - coordinates=None, - contributors=None, - retweeted=None, - retweeted_status=None, - current_user_retweet=None, - retweet_count=None, - possibly_sensitive=None, - scopes=None, - withheld_copyright=None, - withheld_in_countries=None, - withheld_scope=None): - '''An object to hold a Twitter status message. - - This class is normally instantiated by the twitter.Api class and - returned in a sequence. - - Note: Dates are posted in the form "Sat Jan 27 04:17:38 +0000 2007" - - Args: - created_at: - The time this status message was posted. [Optional] - favorited: - Whether this is a favorite of the authenticated user. [Optional] - favorite_count: - Number of times this status message has been favorited. [Optional] - id: - The unique id of this status message. [Optional] - text: - The text of this status message. [Optional] - location: - the geolocation string associated with this message. [Optional] - relative_created_at: - A human readable string representing the posting time. [Optional] - user: - A twitter.User instance representing the person posting the - message. [Optional] - now: - The current time, if the client chooses to set it. - Defaults to the wall clock time. [Optional] - urls: - user_mentions: - hashtags: - geo: - place: - coordinates: - contributors: - retweeted: - retweeted_status: - current_user_retweet: - retweet_count: - possibly_sensitive: - scopes: - withheld_copyright: - withheld_in_countries: - withheld_scope: - ''' - self.created_at = created_at - self.favorited = favorited - self.favorite_count = favorite_count - self.id = id - self.text = text - self.location = location - self.user = user - self.now = now - self.in_reply_to_screen_name = in_reply_to_screen_name - self.in_reply_to_user_id = in_reply_to_user_id - self.in_reply_to_status_id = in_reply_to_status_id - self.truncated = truncated - self.retweeted = retweeted - self.source = source - self.urls = urls - self.user_mentions = user_mentions - self.hashtags = hashtags - self.media = media - self.geo = geo - self.place = place - self.coordinates = coordinates - self.contributors = contributors - self.retweeted_status = retweeted_status - self.current_user_retweet = current_user_retweet - self.retweet_count = retweet_count - self.possibly_sensitive = possibly_sensitive - self.scopes = scopes - self.withheld_copyright = withheld_copyright - self.withheld_in_countries = withheld_in_countries - self.withheld_scope = withheld_scope - - def GetCreatedAt(self): - '''Get the time this status message was posted. - - Returns: - The time this status message was posted - ''' - return self._created_at - - def SetCreatedAt(self, created_at): - '''Set the time this status message was posted. - - Args: - created_at: - The time this status message was created - ''' - self._created_at = created_at - - created_at = property(GetCreatedAt, SetCreatedAt, - doc='The time this status message was posted.') - - def GetCreatedAtInSeconds(self): - '''Get the time this status message was posted, in seconds since the epoch. - - Returns: - The time this status message was posted, in seconds since the epoch. - ''' - return calendar.timegm(rfc822.parsedate(self.created_at)) - - created_at_in_seconds = property(GetCreatedAtInSeconds, - doc="The time this status message was " - "posted, in seconds since the epoch") - - def GetFavorited(self): - '''Get the favorited setting of this status message. - - Returns: - True if this status message is favorited; False otherwise - ''' - return self._favorited - - def SetFavorited(self, favorited): - '''Set the favorited state of this status message. - - Args: - favorited: - boolean True/False favorited state of this status message - ''' - self._favorited = favorited - - favorited = property(GetFavorited, SetFavorited, - doc='The favorited state of this status message.') - - def GetFavoriteCount(self): - '''Get the favorite count of this status message. - - Returns: - number of times this status message has been favorited - ''' - return self._favorite_count - - def SetFavoriteCount(self, favorite_count): - '''Set the favorited state of this status message. - - Args: - favorite_count: - int number of favorites for this status message - ''' - self._favorite_count = favorite_count - - favorite_count = property(GetFavoriteCount, SetFavoriteCount, - doc='The number of favorites for this status message.') - - def GetId(self): - '''Get the unique id of this status message. - - Returns: - The unique id of this status message - ''' - return self._id - - def SetId(self, id): - '''Set the unique id of this status message. - - Args: - id: - The unique id of this status message - ''' - self._id = id - - id = property(GetId, SetId, - doc='The unique id of this status message.') - - def GetInReplyToScreenName(self): - return self._in_reply_to_screen_name - - def SetInReplyToScreenName(self, in_reply_to_screen_name): - self._in_reply_to_screen_name = in_reply_to_screen_name - - in_reply_to_screen_name = property(GetInReplyToScreenName, SetInReplyToScreenName, - doc='') - - def GetInReplyToUserId(self): - return self._in_reply_to_user_id - - def SetInReplyToUserId(self, in_reply_to_user_id): - self._in_reply_to_user_id = in_reply_to_user_id - - in_reply_to_user_id = property(GetInReplyToUserId, SetInReplyToUserId, - doc='') - - def GetInReplyToStatusId(self): - return self._in_reply_to_status_id - - def SetInReplyToStatusId(self, in_reply_to_status_id): - self._in_reply_to_status_id = in_reply_to_status_id - - in_reply_to_status_id = property(GetInReplyToStatusId, SetInReplyToStatusId, - doc='') - - def GetTruncated(self): - return self._truncated - - def SetTruncated(self, truncated): - self._truncated = truncated - - truncated = property(GetTruncated, SetTruncated, - doc='') - - def GetRetweeted(self): - return self._retweeted - - def SetRetweeted(self, retweeted): - self._retweeted = retweeted - - retweeted = property(GetRetweeted, SetRetweeted, - doc='') - - def GetSource(self): - return self._source - - def SetSource(self, source): - self._source = source - - source = property(GetSource, SetSource, - doc='') - - def GetText(self): - '''Get the text of this status message. - - Returns: - The text of this status message. - ''' - return self._text - - def SetText(self, text): - '''Set the text of this status message. - - Args: - text: - The text of this status message - ''' - self._text = text - - text = property(GetText, SetText, - doc='The text of this status message') - - def GetLocation(self): - '''Get the geolocation associated with this status message - - Returns: - The geolocation string of this status message. - ''' - return self._location - - def SetLocation(self, location): - '''Set the geolocation associated with this status message - - Args: - location: - The geolocation string of this status message - ''' - self._location = location - - location = property(GetLocation, SetLocation, - doc='The geolocation string of this status message') - - def GetRelativeCreatedAt(self): - '''Get a human readable string representing the posting time - - Returns: - A human readable string representing the posting time - ''' - fudge = 1.25 - delta = long(self.now) - long(self.created_at_in_seconds) - - if delta < (1 * fudge): - return 'about a second ago' - elif delta < (60 * (1/fudge)): - return 'about %d seconds ago' % (delta) - elif delta < (60 * fudge): - return 'about a minute ago' - elif delta < (60 * 60 * (1/fudge)): - return 'about %d minutes ago' % (delta / 60) - elif delta < (60 * 60 * fudge) or delta / (60 * 60) == 1: - return 'about an hour ago' - elif delta < (60 * 60 * 24 * (1/fudge)): - return 'about %d hours ago' % (delta / (60 * 60)) - elif delta < (60 * 60 * 24 * fudge) or delta / (60 * 60 * 24) == 1: - return 'about a day ago' - else: - return 'about %d days ago' % (delta / (60 * 60 * 24)) - - relative_created_at = property(GetRelativeCreatedAt, - doc='Get a human readable string representing ' - 'the posting time') - - def GetUser(self): - '''Get a twitter.User representing the entity posting this status message. - - Returns: - A twitter.User representing the entity posting this status message - ''' - return self._user - - def SetUser(self, user): - '''Set a twitter.User representing the entity posting this status message. - - Args: - user: - A twitter.User representing the entity posting this status message - ''' - self._user = user - - user = property(GetUser, SetUser, - doc='A twitter.User representing the entity posting this ' - 'status message') - - def GetNow(self): - '''Get the wallclock time for this status message. - - Used to calculate relative_created_at. Defaults to the time - the object was instantiated. - - Returns: - Whatever the status instance believes the current time to be, - in seconds since the epoch. - ''' - if self._now is None: - self._now = time.time() - return self._now - - def SetNow(self, now): - '''Set the wallclock time for this status message. - - Used to calculate relative_created_at. Defaults to the time - the object was instantiated. - - Args: - now: - The wallclock time for this instance. - ''' - self._now = now - - now = property(GetNow, SetNow, - doc='The wallclock time for this status instance.') - - def GetGeo(self): - return self._geo - - def SetGeo(self, geo): - self._geo = geo - - geo = property(GetGeo, SetGeo, - doc='') - - def GetPlace(self): - return self._place - - def SetPlace(self, place): - self._place = place - - place = property(GetPlace, SetPlace, - doc='') - - def GetCoordinates(self): - return self._coordinates - - def SetCoordinates(self, coordinates): - self._coordinates = coordinates - - coordinates = property(GetCoordinates, SetCoordinates, - doc='') - - def GetContributors(self): - return self._contributors - - def SetContributors(self, contributors): - self._contributors = contributors - - contributors = property(GetContributors, SetContributors, - doc='') - - def GetRetweeted_status(self): - return self._retweeted_status - - def SetRetweeted_status(self, retweeted_status): - self._retweeted_status = retweeted_status - - retweeted_status = property(GetRetweeted_status, SetRetweeted_status, - doc='') - - def GetRetweetCount(self): - return self._retweet_count - - def SetRetweetCount(self, retweet_count): - self._retweet_count = retweet_count - - retweet_count = property(GetRetweetCount, SetRetweetCount, - doc='') - - def GetCurrent_user_retweet(self): - return self._current_user_retweet - - def SetCurrent_user_retweet(self, current_user_retweet): - self._current_user_retweet = current_user_retweet - - current_user_retweet = property(GetCurrent_user_retweet, SetCurrent_user_retweet, - doc='') - - def GetPossibly_sensitive(self): - return self._possibly_sensitive - - def SetPossibly_sensitive(self, possibly_sensitive): - self._possibly_sensitive = possibly_sensitive - - possibly_sensitive = property(GetPossibly_sensitive, SetPossibly_sensitive, - doc='') - - def GetScopes(self): - return self._scopes - - def SetScopes(self, scopes): - self._scopes = scopes - - scopes = property(GetScopes, SetScopes, doc='') - - def GetWithheld_copyright(self): - return self._withheld_copyright - - def SetWithheld_copyright(self, withheld_copyright): - self._withheld_copyright = withheld_copyright - - withheld_copyright = property(GetWithheld_copyright, SetWithheld_copyright, - doc='') - - def GetWithheld_in_countries(self): - return self._withheld_in_countries - - def SetWithheld_in_countries(self, withheld_in_countries): - self._withheld_in_countries = withheld_in_countries - - withheld_in_countries = property(GetWithheld_in_countries, SetWithheld_in_countries, - doc='') - - def GetWithheld_scope(self): - return self._withheld_scope - - def SetWithheld_scope(self, withheld_scope): - self._withheld_scope = withheld_scope - - withheld_scope = property(GetWithheld_scope, SetWithheld_scope, - doc='') - - def __ne__(self, other): - return not self.__eq__(other) - - def __eq__(self, other): - try: - return other and \ - self.created_at == other.created_at and \ - self.id == other.id and \ - self.text == other.text and \ - self.location == other.location and \ - self.user == other.user and \ - self.in_reply_to_screen_name == other.in_reply_to_screen_name and \ - self.in_reply_to_user_id == other.in_reply_to_user_id and \ - self.in_reply_to_status_id == other.in_reply_to_status_id and \ - self.truncated == other.truncated and \ - self.retweeted == other.retweeted and \ - self.favorited == other.favorited and \ - self.favorite_count == other.favorite_count and \ - self.source == other.source and \ - self.geo == other.geo and \ - self.place == other.place and \ - self.coordinates == other.coordinates and \ - self.contributors == other.contributors and \ - self.retweeted_status == other.retweeted_status and \ - self.retweet_count == other.retweet_count and \ - self.current_user_retweet == other.current_user_retweet and \ - self.possibly_sensitive == other.possibly_sensitive and \ - self.scopes == other.scopes and \ - self.withheld_copyright == other.withheld_copyright and \ - self.withheld_in_countries == other.withheld_in_countries and \ - self.withheld_scope == other.withheld_scope - except AttributeError: - return False - - def __str__(self): - '''A string representation of this twitter.Status instance. - - The return value is the same as the JSON string representation. - - Returns: - A string representation of this twitter.Status instance. - ''' - return self.AsJsonString() - - def AsJsonString(self): - '''A JSON string representation of this twitter.Status instance. - - Returns: - A JSON string representation of this twitter.Status instance - ''' - return simplejson.dumps(self.AsDict(), sort_keys=True) - - def AsDict(self): - '''A dict representation of this twitter.Status instance. - - The return value uses the same key names as the JSON representation. - - Return: - A dict representing this twitter.Status instance - ''' - data = {} - if self.created_at: - data['created_at'] = self.created_at - if self.favorited: - data['favorited'] = self.favorited - if self.favorite_count: - data['favorite_count'] = self.favorite_count - if self.id: - data['id'] = self.id - if self.text: - data['text'] = self.text - if self.location: - data['location'] = self.location - if self.user: - data['user'] = self.user.AsDict() - if self.in_reply_to_screen_name: - data['in_reply_to_screen_name'] = self.in_reply_to_screen_name - if self.in_reply_to_user_id: - data['in_reply_to_user_id'] = self.in_reply_to_user_id - if self.in_reply_to_status_id: - data['in_reply_to_status_id'] = self.in_reply_to_status_id - if self.truncated is not None: - data['truncated'] = self.truncated - if self.retweeted is not None: - data['retweeted'] = self.retweeted - if self.favorited is not None: - data['favorited'] = self.favorited - if self.source: - data['source'] = self.source - if self.geo: - data['geo'] = self.geo - if self.place: - data['place'] = self.place - if self.coordinates: - data['coordinates'] = self.coordinates - if self.contributors: - data['contributors'] = self.contributors - if self.hashtags: - data['hashtags'] = [h.text for h in self.hashtags] - if self.retweeted_status: - data['retweeted_status'] = self.retweeted_status.AsDict() - if self.retweet_count: - data['retweet_count'] = self.retweet_count - if self.urls: - data['urls'] = dict([(url.url, url.expanded_url) for url in self.urls]) - if self.user_mentions: - data['user_mentions'] = [um.AsDict() for um in self.user_mentions] - if self.current_user_retweet: - data['current_user_retweet'] = self.current_user_retweet - if self.possibly_sensitive: - data['possibly_sensitive'] = self.possibly_sensitive - if self.scopes: - data['scopes'] = self.scopes - if self.withheld_copyright: - data['withheld_copyright'] = self.withheld_copyright - if self.withheld_in_countries: - data['withheld_in_countries'] = self.withheld_in_countries - if self.withheld_scope: - data['withheld_scope'] = self.withheld_scope - return data - - @staticmethod - def NewFromJsonDict(data): - '''Create a new instance based on a JSON dict. - - Args: - data: A JSON dict, as converted from the JSON in the twitter API - Returns: - A twitter.Status instance - ''' - if 'user' in data: - user = User.NewFromJsonDict(data['user']) - else: - user = None - if 'retweeted_status' in data: - retweeted_status = Status.NewFromJsonDict(data['retweeted_status']) - else: - retweeted_status = None - - if 'current_user_retweet' in data: - current_user_retweet = data['current_user_retweet']['id'] - else: - current_user_retweet = None - - urls = None - user_mentions = None - hashtags = None - media = None - if 'entities' in data: - if 'urls' in data['entities']: - urls = [Url.NewFromJsonDict(u) for u in data['entities']['urls']] - if 'user_mentions' in data['entities']: - user_mentions = [User.NewFromJsonDict(u) for u in data['entities']['user_mentions']] - if 'hashtags' in data['entities']: - hashtags = [Hashtag.NewFromJsonDict(h) for h in data['entities']['hashtags']] - if 'media' in data['entities']: - media = data['entities']['media'] - else: - media = [] - return Status(created_at=data.get('created_at', None), - favorited=data.get('favorited', None), - favorite_count=data.get('favorite_count', None), - id=data.get('id', None), - text=data.get('text', None), - location=data.get('location', None), - in_reply_to_screen_name=data.get('in_reply_to_screen_name', None), - in_reply_to_user_id=data.get('in_reply_to_user_id', None), - in_reply_to_status_id=data.get('in_reply_to_status_id', None), - truncated=data.get('truncated', None), - retweeted=data.get('retweeted', None), - source=data.get('source', None), - user=user, - urls=urls, - user_mentions=user_mentions, - hashtags=hashtags, - media=media, - geo=data.get('geo', None), - place=data.get('place', None), - coordinates=data.get('coordinates', None), - contributors=data.get('contributors', None), - retweeted_status=retweeted_status, - current_user_retweet=current_user_retweet, - retweet_count=data.get('retweet_count', None), - possibly_sensitive=data.get('possibly_sensitive', None), - scopes=data.get('scopes', None), - withheld_copyright=data.get('withheld_copyright', None), - withheld_in_countries=data.get('withheld_in_countries', None), - withheld_scope=data.get('withheld_scope', None)) - - -class User(object): - '''A class representing the User structure used by the twitter API. - - The User structure exposes the following properties: - - user.id - user.name - user.screen_name - user.location - user.description - user.profile_image_url - user.profile_background_tile - user.profile_background_image_url - user.profile_sidebar_fill_color - user.profile_background_color - user.profile_link_color - user.profile_text_color - user.protected - user.utc_offset - user.time_zone - user.url - user.status - user.statuses_count - user.followers_count - user.friends_count - user.favourites_count - user.geo_enabled - user.verified - user.lang - user.notifications - user.contributors_enabled - user.created_at - user.listed_count - ''' - def __init__(self, - id=None, - name=None, - screen_name=None, - location=None, - description=None, - profile_image_url=None, - profile_background_tile=None, - profile_background_image_url=None, - profile_sidebar_fill_color=None, - profile_background_color=None, - profile_link_color=None, - profile_text_color=None, - protected=None, - utc_offset=None, - time_zone=None, - followers_count=None, - friends_count=None, - statuses_count=None, - favourites_count=None, - url=None, - status=None, - geo_enabled=None, - verified=None, - lang=None, - notifications=None, - contributors_enabled=None, - created_at=None, - listed_count=None): - self.id = id - self.name = name - self.screen_name = screen_name - self.location = location - self.description = description - self.profile_image_url = profile_image_url - self.profile_background_tile = profile_background_tile - self.profile_background_image_url = profile_background_image_url - self.profile_sidebar_fill_color = profile_sidebar_fill_color - self.profile_background_color = profile_background_color - self.profile_link_color = profile_link_color - self.profile_text_color = profile_text_color - self.protected = protected - self.utc_offset = utc_offset - self.time_zone = time_zone - self.followers_count = followers_count - self.friends_count = friends_count - self.statuses_count = statuses_count - self.favourites_count = favourites_count - self.url = url - self.status = status - self.geo_enabled = geo_enabled - self.verified = verified - self.lang = lang - self.notifications = notifications - self.contributors_enabled = contributors_enabled - self.created_at = created_at - self.listed_count = listed_count - - def GetId(self): - '''Get the unique id of this user. - - Returns: - The unique id of this user - ''' - return self._id - - def SetId(self, id): - '''Set the unique id of this user. - - Args: - id: The unique id of this user. - ''' - self._id = id - - id = property(GetId, SetId, - doc='The unique id of this user.') - - def GetName(self): - '''Get the real name of this user. - - Returns: - The real name of this user - ''' - return self._name - - def SetName(self, name): - '''Set the real name of this user. - - Args: - name: The real name of this user - ''' - self._name = name - - name = property(GetName, SetName, - doc='The real name of this user.') - - def GetScreenName(self): - '''Get the short twitter name of this user. - - Returns: - The short twitter name of this user - ''' - return self._screen_name - - def SetScreenName(self, screen_name): - '''Set the short twitter name of this user. - - Args: - screen_name: the short twitter name of this user - ''' - self._screen_name = screen_name - - screen_name = property(GetScreenName, SetScreenName, - doc='The short twitter name of this user.') - - def GetLocation(self): - '''Get the geographic location of this user. - - Returns: - The geographic location of this user - ''' - return self._location - - def SetLocation(self, location): - '''Set the geographic location of this user. - - Args: - location: The geographic location of this user - ''' - self._location = location - - location = property(GetLocation, SetLocation, - doc='The geographic location of this user.') - - def GetDescription(self): - '''Get the short text description of this user. - - Returns: - The short text description of this user - ''' - return self._description - - def SetDescription(self, description): - '''Set the short text description of this user. - - Args: - description: The short text description of this user - ''' - self._description = description - - description = property(GetDescription, SetDescription, - doc='The short text description of this user.') - - def GetUrl(self): - '''Get the homepage url of this user. - - Returns: - The homepage url of this user - ''' - return self._url - - def SetUrl(self, url): - '''Set the homepage url of this user. - - Args: - url: The homepage url of this user - ''' - self._url = url - - url = property(GetUrl, SetUrl, - doc='The homepage url of this user.') - - def GetProfileImageUrl(self): - '''Get the url of the thumbnail of this user. - - Returns: - The url of the thumbnail of this user - ''' - return self._profile_image_url - - def SetProfileImageUrl(self, profile_image_url): - '''Set the url of the thumbnail of this user. - - Args: - profile_image_url: The url of the thumbnail of this user - ''' - self._profile_image_url = profile_image_url - - profile_image_url= property(GetProfileImageUrl, SetProfileImageUrl, - doc='The url of the thumbnail of this user.') - - def GetProfileBackgroundTile(self): - '''Boolean for whether to tile the profile background image. - - Returns: - True if the background is to be tiled, False if not, None if unset. - ''' - return self._profile_background_tile - - def SetProfileBackgroundTile(self, profile_background_tile): - '''Set the boolean flag for whether to tile the profile background image. - - Args: - profile_background_tile: Boolean flag for whether to tile or not. - ''' - self._profile_background_tile = profile_background_tile - - profile_background_tile = property(GetProfileBackgroundTile, SetProfileBackgroundTile, - doc='Boolean for whether to tile the background image.') - - def GetProfileBackgroundImageUrl(self): - return self._profile_background_image_url - - def SetProfileBackgroundImageUrl(self, profile_background_image_url): - self._profile_background_image_url = profile_background_image_url - - profile_background_image_url = property(GetProfileBackgroundImageUrl, SetProfileBackgroundImageUrl, - doc='The url of the profile background of this user.') - - def GetProfileSidebarFillColor(self): - return self._profile_sidebar_fill_color - - def SetProfileSidebarFillColor(self, profile_sidebar_fill_color): - self._profile_sidebar_fill_color = profile_sidebar_fill_color - - profile_sidebar_fill_color = property(GetProfileSidebarFillColor, SetProfileSidebarFillColor) - - def GetProfileBackgroundColor(self): - return self._profile_background_color - - def SetProfileBackgroundColor(self, profile_background_color): - self._profile_background_color = profile_background_color - - profile_background_color = property(GetProfileBackgroundColor, SetProfileBackgroundColor) - - def GetProfileLinkColor(self): - return self._profile_link_color - - def SetProfileLinkColor(self, profile_link_color): - self._profile_link_color = profile_link_color - - profile_link_color = property(GetProfileLinkColor, SetProfileLinkColor) - - def GetProfileTextColor(self): - return self._profile_text_color - - def SetProfileTextColor(self, profile_text_color): - self._profile_text_color = profile_text_color - - profile_text_color = property(GetProfileTextColor, SetProfileTextColor) - - def GetProtected(self): - return self._protected - - def SetProtected(self, protected): - self._protected = protected - - protected = property(GetProtected, SetProtected) - - def GetUtcOffset(self): - return self._utc_offset - - def SetUtcOffset(self, utc_offset): - self._utc_offset = utc_offset - - utc_offset = property(GetUtcOffset, SetUtcOffset) - - def GetTimeZone(self): - '''Returns the current time zone string for the user. - - Returns: - The descriptive time zone string for the user. - ''' - return self._time_zone - - def SetTimeZone(self, time_zone): - '''Sets the user's time zone string. - - Args: - time_zone: - The descriptive time zone to assign for the user. - ''' - self._time_zone = time_zone - - time_zone = property(GetTimeZone, SetTimeZone) - - def GetStatus(self): - '''Get the latest twitter.Status of this user. - - Returns: - The latest twitter.Status of this user - ''' - return self._status - - def SetStatus(self, status): - '''Set the latest twitter.Status of this user. - - Args: - status: - The latest twitter.Status of this user - ''' - self._status = status - - status = property(GetStatus, SetStatus, - doc='The latest twitter.Status of this user.') - - def GetFriendsCount(self): - '''Get the friend count for this user. - - Returns: - The number of users this user has befriended. - ''' - return self._friends_count - - def SetFriendsCount(self, count): - '''Set the friend count for this user. - - Args: - count: - The number of users this user has befriended. - ''' - self._friends_count = count - - friends_count = property(GetFriendsCount, SetFriendsCount, - doc='The number of friends for this user.') - - def GetListedCount(self): - '''Get the listed count for this user. - - Returns: - The number of lists this user belongs to. - ''' - return self._listed_count - - def SetListedCount(self, count): - '''Set the listed count for this user. - - Args: - count: - The number of lists this user belongs to. - ''' - self._listed_count = count - - listed_count = property(GetListedCount, SetListedCount, - doc='The number of lists this user belongs to.') - - def GetFollowersCount(self): - '''Get the follower count for this user. - - Returns: - The number of users following this user. - ''' - return self._followers_count - - def SetFollowersCount(self, count): - '''Set the follower count for this user. - - Args: - count: - The number of users following this user. - ''' - self._followers_count = count - - followers_count = property(GetFollowersCount, SetFollowersCount, - doc='The number of users following this user.') - - def GetStatusesCount(self): - '''Get the number of status updates for this user. - - Returns: - The number of status updates for this user. - ''' - return self._statuses_count - - def SetStatusesCount(self, count): - '''Set the status update count for this user. - - Args: - count: - The number of updates for this user. - ''' - self._statuses_count = count - - statuses_count = property(GetStatusesCount, SetStatusesCount, - doc='The number of updates for this user.') - - def GetFavouritesCount(self): - '''Get the number of favourites for this user. - - Returns: - The number of favourites for this user. - ''' - return self._favourites_count - - def SetFavouritesCount(self, count): - '''Set the favourite count for this user. - - Args: - count: - The number of favourites for this user. - ''' - self._favourites_count = count - - favourites_count = property(GetFavouritesCount, SetFavouritesCount, - doc='The number of favourites for this user.') - - def GetGeoEnabled(self): - '''Get the setting of geo_enabled for this user. - - Returns: - True/False if Geo tagging is enabled - ''' - return self._geo_enabled - - def SetGeoEnabled(self, geo_enabled): - '''Set the latest twitter.geo_enabled of this user. - - Args: - geo_enabled: - True/False if Geo tagging is to be enabled - ''' - self._geo_enabled = geo_enabled - - geo_enabled = property(GetGeoEnabled, SetGeoEnabled, - doc='The value of twitter.geo_enabled for this user.') - - def GetVerified(self): - '''Get the setting of verified for this user. - - Returns: - True/False if user is a verified account - ''' - return self._verified - - def SetVerified(self, verified): - '''Set twitter.verified for this user. - - Args: - verified: - True/False if user is a verified account - ''' - self._verified = verified - - verified = property(GetVerified, SetVerified, - doc='The value of twitter.verified for this user.') - - def GetLang(self): - '''Get the setting of lang for this user. - - Returns: - language code of the user - ''' - return self._lang - - def SetLang(self, lang): - '''Set twitter.lang for this user. - - Args: - lang: - language code for the user - ''' - self._lang = lang - - lang = property(GetLang, SetLang, - doc='The value of twitter.lang for this user.') - - def GetNotifications(self): - '''Get the setting of notifications for this user. - - Returns: - True/False for the notifications setting of the user - ''' - return self._notifications - - def SetNotifications(self, notifications): - '''Set twitter.notifications for this user. - - Args: - notifications: - True/False notifications setting for the user - ''' - self._notifications = notifications - - notifications = property(GetNotifications, SetNotifications, - doc='The value of twitter.notifications for this user.') - - def GetContributorsEnabled(self): - '''Get the setting of contributors_enabled for this user. - - Returns: - True/False contributors_enabled of the user - ''' - return self._contributors_enabled - - def SetContributorsEnabled(self, contributors_enabled): - '''Set twitter.contributors_enabled for this user. - - Args: - contributors_enabled: - True/False contributors_enabled setting for the user - ''' - self._contributors_enabled = contributors_enabled - - contributors_enabled = property(GetContributorsEnabled, SetContributorsEnabled, - doc='The value of twitter.contributors_enabled for this user.') - - def GetCreatedAt(self): - '''Get the setting of created_at for this user. - - Returns: - created_at value of the user - ''' - return self._created_at - - def SetCreatedAt(self, created_at): - '''Set twitter.created_at for this user. - - Args: - created_at: - created_at value for the user - ''' - self._created_at = created_at - - created_at = property(GetCreatedAt, SetCreatedAt, - doc='The value of twitter.created_at for this user.') - - def __ne__(self, other): - return not self.__eq__(other) - - def __eq__(self, other): - try: - return other and \ - self.id == other.id and \ - self.name == other.name and \ - self.screen_name == other.screen_name and \ - self.location == other.location and \ - self.description == other.description and \ - self.profile_image_url == other.profile_image_url and \ - self.profile_background_tile == other.profile_background_tile and \ - self.profile_background_image_url == other.profile_background_image_url and \ - self.profile_sidebar_fill_color == other.profile_sidebar_fill_color and \ - self.profile_background_color == other.profile_background_color and \ - self.profile_link_color == other.profile_link_color and \ - self.profile_text_color == other.profile_text_color and \ - self.protected == other.protected and \ - self.utc_offset == other.utc_offset and \ - self.time_zone == other.time_zone and \ - self.url == other.url and \ - self.statuses_count == other.statuses_count and \ - self.followers_count == other.followers_count and \ - self.favourites_count == other.favourites_count and \ - self.friends_count == other.friends_count and \ - self.status == other.status and \ - self.geo_enabled == other.geo_enabled and \ - self.verified == other.verified and \ - self.lang == other.lang and \ - self.notifications == other.notifications and \ - self.contributors_enabled == other.contributors_enabled and \ - self.created_at == other.created_at and \ - self.listed_count == other.listed_count - - except AttributeError: - return False - - def __str__(self): - '''A string representation of this twitter.User instance. - - The return value is the same as the JSON string representation. - - Returns: - A string representation of this twitter.User instance. - ''' - return self.AsJsonString() - - def AsJsonString(self): - '''A JSON string representation of this twitter.User instance. - - Returns: - A JSON string representation of this twitter.User instance - ''' - return simplejson.dumps(self.AsDict(), sort_keys=True) - - def AsDict(self): - '''A dict representation of this twitter.User instance. - - The return value uses the same key names as the JSON representation. - - Return: - A dict representing this twitter.User instance - ''' - data = {} - if self.id: - data['id'] = self.id - if self.name: - data['name'] = self.name - if self.screen_name: - data['screen_name'] = self.screen_name - if self.location: - data['location'] = self.location - if self.description: - data['description'] = self.description - if self.profile_image_url: - data['profile_image_url'] = self.profile_image_url - if self.profile_background_tile is not None: - data['profile_background_tile'] = self.profile_background_tile - if self.profile_background_image_url: - data['profile_sidebar_fill_color'] = self.profile_background_image_url - if self.profile_background_color: - data['profile_background_color'] = self.profile_background_color - if self.profile_link_color: - data['profile_link_color'] = self.profile_link_color - if self.profile_text_color: - data['profile_text_color'] = self.profile_text_color - if self.protected is not None: - data['protected'] = self.protected - if self.utc_offset: - data['utc_offset'] = self.utc_offset - if self.time_zone: - data['time_zone'] = self.time_zone - if self.url: - data['url'] = self.url - if self.status: - data['status'] = self.status.AsDict() - if self.friends_count: - data['friends_count'] = self.friends_count - if self.followers_count: - data['followers_count'] = self.followers_count - if self.statuses_count: - data['statuses_count'] = self.statuses_count - if self.favourites_count: - data['favourites_count'] = self.favourites_count - if self.geo_enabled: - data['geo_enabled'] = self.geo_enabled - if self.verified: - data['verified'] = self.verified - if self.lang: - data['lang'] = self.lang - if self.notifications: - data['notifications'] = self.notifications - if self.contributors_enabled: - data['contributors_enabled'] = self.contributors_enabled - if self.created_at: - data['created_at'] = self.created_at - if self.listed_count: - data['listed_count'] = self.listed_count - - return data - - @staticmethod - def NewFromJsonDict(data): - '''Create a new instance based on a JSON dict. - - Args: - data: - A JSON dict, as converted from the JSON in the twitter API - - Returns: - A twitter.User instance - ''' - if 'status' in data: - status = Status.NewFromJsonDict(data['status']) - else: - status = None - return User(id=data.get('id', None), - name=data.get('name', None), - screen_name=data.get('screen_name', None), - location=data.get('location', None), - description=data.get('description', None), - statuses_count=data.get('statuses_count', None), - followers_count=data.get('followers_count', None), - favourites_count=data.get('favourites_count', None), - friends_count=data.get('friends_count', None), - profile_image_url=data.get('profile_image_url_https', data.get('profile_image_url', None)), - profile_background_tile = data.get('profile_background_tile', None), - profile_background_image_url = data.get('profile_background_image_url', None), - profile_sidebar_fill_color = data.get('profile_sidebar_fill_color', None), - profile_background_color = data.get('profile_background_color', None), - profile_link_color = data.get('profile_link_color', None), - profile_text_color = data.get('profile_text_color', None), - protected = data.get('protected', None), - utc_offset = data.get('utc_offset', None), - time_zone = data.get('time_zone', None), - url=data.get('url', None), - status=status, - geo_enabled=data.get('geo_enabled', None), - verified=data.get('verified', None), - lang=data.get('lang', None), - notifications=data.get('notifications', None), - contributors_enabled=data.get('contributors_enabled', None), - created_at=data.get('created_at', None), - listed_count=data.get('listed_count', None)) - -class List(object): - '''A class representing the List structure used by the twitter API. - - The List structure exposes the following properties: - - list.id - list.name - list.slug - list.description - list.full_name - list.mode - list.uri - list.member_count - list.subscriber_count - list.following - ''' - def __init__(self, - id=None, - name=None, - slug=None, - description=None, - full_name=None, - mode=None, - uri=None, - member_count=None, - subscriber_count=None, - following=None, - user=None): - self.id = id - self.name = name - self.slug = slug - self.description = description - self.full_name = full_name - self.mode = mode - self.uri = uri - self.member_count = member_count - self.subscriber_count = subscriber_count - self.following = following - self.user = user - - def GetId(self): - '''Get the unique id of this list. - - Returns: - The unique id of this list - ''' - return self._id - - def SetId(self, id): - '''Set the unique id of this list. - - Args: - id: - The unique id of this list. - ''' - self._id = id - - id = property(GetId, SetId, - doc='The unique id of this list.') - - def GetName(self): - '''Get the real name of this list. - - Returns: - The real name of this list - ''' - return self._name - - def SetName(self, name): - '''Set the real name of this list. - - Args: - name: - The real name of this list - ''' - self._name = name - - name = property(GetName, SetName, - doc='The real name of this list.') - - def GetSlug(self): - '''Get the slug of this list. - - Returns: - The slug of this list - ''' - return self._slug - - def SetSlug(self, slug): - '''Set the slug of this list. - - Args: - slug: - The slug of this list. - ''' - self._slug = slug - - slug = property(GetSlug, SetSlug, - doc='The slug of this list.') - - def GetDescription(self): - '''Get the description of this list. - - Returns: - The description of this list - ''' - return self._description - - def SetDescription(self, description): - '''Set the description of this list. - - Args: - description: - The description of this list. - ''' - self._description = description - - description = property(GetDescription, SetDescription, - doc='The description of this list.') - - def GetFull_name(self): - '''Get the full_name of this list. - - Returns: - The full_name of this list - ''' - return self._full_name - - def SetFull_name(self, full_name): - '''Set the full_name of this list. - - Args: - full_name: - The full_name of this list. - ''' - self._full_name = full_name - - full_name = property(GetFull_name, SetFull_name, - doc='The full_name of this list.') - - def GetMode(self): - '''Get the mode of this list. - - Returns: - The mode of this list - ''' - return self._mode - - def SetMode(self, mode): - '''Set the mode of this list. - - Args: - mode: - The mode of this list. - ''' - self._mode = mode - - mode = property(GetMode, SetMode, - doc='The mode of this list.') - - def GetUri(self): - '''Get the uri of this list. - - Returns: - The uri of this list - ''' - return self._uri - - def SetUri(self, uri): - '''Set the uri of this list. - - Args: - uri: - The uri of this list. - ''' - self._uri = uri - - uri = property(GetUri, SetUri, - doc='The uri of this list.') - - def GetMember_count(self): - '''Get the member_count of this list. - - Returns: - The member_count of this list - ''' - return self._member_count - - def SetMember_count(self, member_count): - '''Set the member_count of this list. - - Args: - member_count: - The member_count of this list. - ''' - self._member_count = member_count - - member_count = property(GetMember_count, SetMember_count, - doc='The member_count of this list.') - - def GetSubscriber_count(self): - '''Get the subscriber_count of this list. - - Returns: - The subscriber_count of this list - ''' - return self._subscriber_count - - def SetSubscriber_count(self, subscriber_count): - '''Set the subscriber_count of this list. - - Args: - subscriber_count: - The subscriber_count of this list. - ''' - self._subscriber_count = subscriber_count - - subscriber_count = property(GetSubscriber_count, SetSubscriber_count, - doc='The subscriber_count of this list.') - - def GetFollowing(self): - '''Get the following status of this list. - - Returns: - The following status of this list - ''' - return self._following - - def SetFollowing(self, following): - '''Set the following status of this list. - - Args: - following: - The following of this list. - ''' - self._following = following - - following = property(GetFollowing, SetFollowing, - doc='The following status of this list.') - - def GetUser(self): - '''Get the user of this list. - - Returns: - The owner of this list - ''' - return self._user - - def SetUser(self, user): - '''Set the user of this list. - - Args: - user: - The owner of this list. - ''' - self._user = user - - user = property(GetUser, SetUser, - doc='The owner of this list.') - - def __ne__(self, other): - return not self.__eq__(other) - - def __eq__(self, other): - try: - return other and \ - self.id == other.id and \ - self.name == other.name and \ - self.slug == other.slug and \ - self.description == other.description and \ - self.full_name == other.full_name and \ - self.mode == other.mode and \ - self.uri == other.uri and \ - self.member_count == other.member_count and \ - self.subscriber_count == other.subscriber_count and \ - self.following == other.following and \ - self.user == other.user - - except AttributeError: - return False - - def __str__(self): - '''A string representation of this twitter.List instance. - - The return value is the same as the JSON string representation. - - Returns: - A string representation of this twitter.List instance. - ''' - return self.AsJsonString() - - def AsJsonString(self): - '''A JSON string representation of this twitter.List instance. - - Returns: - A JSON string representation of this twitter.List instance - ''' - return simplejson.dumps(self.AsDict(), sort_keys=True) - - def AsDict(self): - '''A dict representation of this twitter.List instance. - - The return value uses the same key names as the JSON representation. - - Return: - A dict representing this twitter.List instance - ''' - data = {} - if self.id: - data['id'] = self.id - if self.name: - data['name'] = self.name - if self.slug: - data['slug'] = self.slug - if self.description: - data['description'] = self.description - if self.full_name: - data['full_name'] = self.full_name - if self.mode: - data['mode'] = self.mode - if self.uri: - data['uri'] = self.uri - if self.member_count is not None: - data['member_count'] = self.member_count - if self.subscriber_count is not None: - data['subscriber_count'] = self.subscriber_count - if self.following is not None: - data['following'] = self.following - if self.user is not None: - data['user'] = self.user.AsDict() - return data - - @staticmethod - def NewFromJsonDict(data): - '''Create a new instance based on a JSON dict. - - Args: - data: - A JSON dict, as converted from the JSON in the twitter API - - Returns: - A twitter.List instance - ''' - if 'user' in data: - user = User.NewFromJsonDict(data['user']) - else: - user = None - return List(id=data.get('id', None), - name=data.get('name', None), - slug=data.get('slug', None), - description=data.get('description', None), - full_name=data.get('full_name', None), - mode=data.get('mode', None), - uri=data.get('uri', None), - member_count=data.get('member_count', None), - subscriber_count=data.get('subscriber_count', None), - following=data.get('following', None), - user=user) - -class DirectMessage(object): - '''A class representing the DirectMessage structure used by the twitter API. - - The DirectMessage structure exposes the following properties: - - direct_message.id - direct_message.created_at - direct_message.created_at_in_seconds # read only - direct_message.sender_id - direct_message.sender_screen_name - direct_message.recipient_id - direct_message.recipient_screen_name - direct_message.text - ''' - - def __init__(self, - id=None, - created_at=None, - sender_id=None, - sender_screen_name=None, - recipient_id=None, - recipient_screen_name=None, - text=None): - '''An object to hold a Twitter direct message. - - This class is normally instantiated by the twitter.Api class and - returned in a sequence. - - Note: Dates are posted in the form "Sat Jan 27 04:17:38 +0000 2007" - - Args: - id: - The unique id of this direct message. [Optional] - created_at: - The time this direct message was posted. [Optional] - sender_id: - The id of the twitter user that sent this message. [Optional] - sender_screen_name: - The name of the twitter user that sent this message. [Optional] - recipient_id: - The id of the twitter that received this message. [Optional] - recipient_screen_name: - The name of the twitter that received this message. [Optional] - text: - The text of this direct message. [Optional] - ''' - self.id = id - self.created_at = created_at - self.sender_id = sender_id - self.sender_screen_name = sender_screen_name - self.recipient_id = recipient_id - self.recipient_screen_name = recipient_screen_name - self.text = text - - def GetId(self): - '''Get the unique id of this direct message. - - Returns: - The unique id of this direct message - ''' - return self._id - - def SetId(self, id): - '''Set the unique id of this direct message. - - Args: - id: - The unique id of this direct message - ''' - self._id = id - - id = property(GetId, SetId, - doc='The unique id of this direct message.') - - def GetCreatedAt(self): - '''Get the time this direct message was posted. - - Returns: - The time this direct message was posted - ''' - return self._created_at - - def SetCreatedAt(self, created_at): - '''Set the time this direct message was posted. - - Args: - created_at: - The time this direct message was created - ''' - self._created_at = created_at - - created_at = property(GetCreatedAt, SetCreatedAt, - doc='The time this direct message was posted.') - - def GetCreatedAtInSeconds(self): - '''Get the time this direct message was posted, in seconds since the epoch. - - Returns: - The time this direct message was posted, in seconds since the epoch. - ''' - return calendar.timegm(rfc822.parsedate(self.created_at)) - - created_at_in_seconds = property(GetCreatedAtInSeconds, - doc="The time this direct message was " - "posted, in seconds since the epoch") - - def GetSenderId(self): - '''Get the unique sender id of this direct message. - - Returns: - The unique sender id of this direct message - ''' - return self._sender_id - - def SetSenderId(self, sender_id): - '''Set the unique sender id of this direct message. - - Args: - sender_id: - The unique sender id of this direct message - ''' - self._sender_id = sender_id - - sender_id = property(GetSenderId, SetSenderId, - doc='The unique sender id of this direct message.') - - def GetSenderScreenName(self): - '''Get the unique sender screen name of this direct message. - - Returns: - The unique sender screen name of this direct message - ''' - return self._sender_screen_name - - def SetSenderScreenName(self, sender_screen_name): - '''Set the unique sender screen name of this direct message. - - Args: - sender_screen_name: - The unique sender screen name of this direct message - ''' - self._sender_screen_name = sender_screen_name - - sender_screen_name = property(GetSenderScreenName, SetSenderScreenName, - doc='The unique sender screen name of this direct message.') - - def GetRecipientId(self): - '''Get the unique recipient id of this direct message. - - Returns: - The unique recipient id of this direct message - ''' - return self._recipient_id - - def SetRecipientId(self, recipient_id): - '''Set the unique recipient id of this direct message. - - Args: - recipient_id: - The unique recipient id of this direct message - ''' - self._recipient_id = recipient_id - - recipient_id = property(GetRecipientId, SetRecipientId, - doc='The unique recipient id of this direct message.') - - def GetRecipientScreenName(self): - '''Get the unique recipient screen name of this direct message. - - Returns: - The unique recipient screen name of this direct message - ''' - return self._recipient_screen_name - - def SetRecipientScreenName(self, recipient_screen_name): - '''Set the unique recipient screen name of this direct message. - - Args: - recipient_screen_name: - The unique recipient screen name of this direct message - ''' - self._recipient_screen_name = recipient_screen_name - - recipient_screen_name = property(GetRecipientScreenName, SetRecipientScreenName, - doc='The unique recipient screen name of this direct message.') - - def GetText(self): - '''Get the text of this direct message. - - Returns: - The text of this direct message. - ''' - return self._text - - def SetText(self, text): - '''Set the text of this direct message. - - Args: - text: - The text of this direct message - ''' - self._text = text - - text = property(GetText, SetText, - doc='The text of this direct message') - - def __ne__(self, other): - return not self.__eq__(other) - - def __eq__(self, other): - try: - return other and \ - self.id == other.id and \ - self.created_at == other.created_at and \ - self.sender_id == other.sender_id and \ - self.sender_screen_name == other.sender_screen_name and \ - self.recipient_id == other.recipient_id and \ - self.recipient_screen_name == other.recipient_screen_name and \ - self.text == other.text - except AttributeError: - return False - - def __str__(self): - '''A string representation of this twitter.DirectMessage instance. - - The return value is the same as the JSON string representation. - - Returns: - A string representation of this twitter.DirectMessage instance. - ''' - return self.AsJsonString() - - def AsJsonString(self): - '''A JSON string representation of this twitter.DirectMessage instance. - - Returns: - A JSON string representation of this twitter.DirectMessage instance - ''' - return simplejson.dumps(self.AsDict(), sort_keys=True) - - def AsDict(self): - '''A dict representation of this twitter.DirectMessage instance. - - The return value uses the same key names as the JSON representation. - - Return: - A dict representing this twitter.DirectMessage instance - ''' - data = {} - if self.id: - data['id'] = self.id - if self.created_at: - data['created_at'] = self.created_at - if self.sender_id: - data['sender_id'] = self.sender_id - if self.sender_screen_name: - data['sender_screen_name'] = self.sender_screen_name - if self.recipient_id: - data['recipient_id'] = self.recipient_id - if self.recipient_screen_name: - data['recipient_screen_name'] = self.recipient_screen_name - if self.text: - data['text'] = self.text - return data - - @staticmethod - def NewFromJsonDict(data): - '''Create a new instance based on a JSON dict. - - Args: - data: - A JSON dict, as converted from the JSON in the twitter API - - Returns: - A twitter.DirectMessage instance - ''' - return DirectMessage(created_at=data.get('created_at', None), - recipient_id=data.get('recipient_id', None), - sender_id=data.get('sender_id', None), - text=data.get('text', None), - sender_screen_name=data.get('sender_screen_name', None), - id=data.get('id', None), - recipient_screen_name=data.get('recipient_screen_name', None)) - -class Hashtag(object): - ''' A class representing a twitter hashtag - ''' - def __init__(self, - text=None): - self.text = text - - @staticmethod - def NewFromJsonDict(data): - '''Create a new instance based on a JSON dict. - - Args: - data: - A JSON dict, as converted from the JSON in the twitter API - - Returns: - A twitter.Hashtag instance - ''' - return Hashtag(text = data.get('text', None)) - -class Trend(object): - ''' A class representing a trending topic - ''' - def __init__(self, name=None, query=None, timestamp=None, url=None): - self.name = name - self.query = query - self.timestamp = timestamp - self.url = url - - def __str__(self): - return 'Name: %s\nQuery: %s\nTimestamp: %s\nSearch URL: %s\n' % (self.name, self.query, self.timestamp, self.url) - - def __ne__(self, other): - return not self.__eq__(other) - - def __eq__(self, other): - try: - return other and \ - self.name == other.name and \ - self.query == other.query and \ - self.timestamp == other.timestamp and \ - self.url == self.url - except AttributeError: - return False - - @staticmethod - def NewFromJsonDict(data, timestamp = None): - '''Create a new instance based on a JSON dict - - Args: - data: - A JSON dict - timestamp: - Gets set as the timestamp property of the new object - - Returns: - A twitter.Trend object - ''' - return Trend(name=data.get('name', None), - query=data.get('query', None), - url=data.get('url', None), - timestamp=timestamp) - -class Url(object): - '''A class representing an URL contained in a tweet''' - def __init__(self, - url=None, - expanded_url=None): - self.url = url - self.expanded_url = expanded_url - - @staticmethod - def NewFromJsonDict(data): - '''Create a new instance based on a JSON dict. - - Args: - data: - A JSON dict, as converted from the JSON in the twitter API - - Returns: - A twitter.Url instance - ''' - return Url(url=data.get('url', None), - expanded_url=data.get('expanded_url', None)) - -class Api(object): - '''A python interface into the Twitter API - - By default, the Api caches results for 1 minute. - - Example usage: - - To create an instance of the twitter.Api class, with no authentication: - - >>> import twitter - >>> api = twitter.Api() - - To fetch the most recently posted public twitter status messages: - - >>> statuses = api.GetPublicTimeline() - >>> print [s.user.name for s in statuses] - [u'DeWitt', u'Kesuke Miyagi', u'ev', u'Buzz Andersen', u'Biz Stone'] #... - - To fetch a single user's public status messages, where "user" is either - a Twitter "short name" or their user id. - - >>> statuses = api.GetUserTimeline(user) - >>> print [s.text for s in statuses] - - To use authentication, instantiate the twitter.Api class with a - consumer key and secret; and the oAuth key and secret: - - >>> api = twitter.Api(consumer_key='twitter consumer key', - consumer_secret='twitter consumer secret', - access_token_key='the_key_given', - access_token_secret='the_key_secret') - - To fetch your friends (after being authenticated): - - >>> users = api.GetFriends() - >>> print [u.name for u in users] - - To post a twitter status message (after being authenticated): - - >>> status = api.PostUpdate('I love python-twitter!') - >>> print status.text - I love python-twitter! - - There are many other methods, including: - - >>> api.PostUpdates(status) - >>> api.PostDirectMessage(user, text) - >>> api.GetUser(user) - >>> api.GetReplies() - >>> api.GetUserTimeline(user) - >>> api.GetHomeTimeLine() - >>> api.GetStatus(id) - >>> api.DestroyStatus(id) - >>> api.GetFriendsTimeline(user) - >>> api.GetFriends(user) - >>> api.GetFollowers() - >>> api.GetFeatured() - >>> api.GetDirectMessages() - >>> api.GetSentDirectMessages() - >>> api.PostDirectMessage(user, text) - >>> api.DestroyDirectMessage(id) - >>> api.DestroyFriendship(user) - >>> api.CreateFriendship(user) - >>> api.GetUserByEmail(email) - >>> api.VerifyCredentials() - ''' - - DEFAULT_CACHE_TIMEOUT = 60 # cache for 1 minute - _API_REALM = 'Twitter API' - - def __init__(self, - consumer_key=None, - consumer_secret=None, - access_token_key=None, - access_token_secret=None, - input_encoding=None, - request_headers=None, - cache=DEFAULT_CACHE, - shortner=None, - base_url=None, - use_gzip_compression=False, - debugHTTP=False): - '''Instantiate a new twitter.Api object. - - Args: - consumer_key: - Your Twitter user's consumer_key. - consumer_secret: - Your Twitter user's consumer_secret. - access_token_key: - The oAuth access token key value you retrieved - from running get_access_token.py. - access_token_secret: - The oAuth access token's secret, also retrieved - from the get_access_token.py run. - input_encoding: - The encoding used to encode input strings. [Optional] - request_header: - A dictionary of additional HTTP request headers. [Optional] - cache: - The cache instance to use. Defaults to DEFAULT_CACHE. - Use None to disable caching. [Optional] - shortner: - The shortner instance to use. Defaults to None. - See shorten_url.py for an example shortner. [Optional] - base_url: - The base URL to use to contact the Twitter API. - Defaults to https://api.twitter.com. [Optional] - use_gzip_compression: - Set to True to tell enable gzip compression for any call - made to Twitter. Defaults to False. [Optional] - debugHTTP: - Set to True to enable debug output from urllib2 when performing - any HTTP requests. Defaults to False. [Optional] - ''' - self.SetCache(cache) - self._urllib = urllib2 - self._cache_timeout = Api.DEFAULT_CACHE_TIMEOUT - self._input_encoding = input_encoding - self._use_gzip = use_gzip_compression - self._debugHTTP = debugHTTP - self._oauth_consumer = None - self._shortlink_size = 19 - - self._InitializeRequestHeaders(request_headers) - self._InitializeUserAgent() - self._InitializeDefaultParameters() - - if base_url is None: - self.base_url = 'https://api.twitter.com/1.1' - else: - self.base_url = base_url - - if consumer_key is not None and (access_token_key is None or - access_token_secret is None): - print >> sys.stderr, 'Twitter now requires an oAuth Access Token for API calls.' - print >> sys.stderr, 'If your using this library from a command line utility, please' - print >> sys.stderr, 'run the the included get_access_token.py tool to generate one.' - - raise TwitterError('Twitter requires oAuth Access Token for all API access') - - self.SetCredentials(consumer_key, consumer_secret, access_token_key, access_token_secret) - - def SetCredentials(self, - consumer_key, - consumer_secret, - access_token_key=None, - access_token_secret=None): - '''Set the consumer_key and consumer_secret for this instance - - Args: - consumer_key: - The consumer_key of the twitter account. - consumer_secret: - The consumer_secret for the twitter account. - access_token_key: - The oAuth access token key value you retrieved - from running get_access_token.py. - access_token_secret: - The oAuth access token's secret, also retrieved - from the get_access_token.py run. - ''' - self._consumer_key = consumer_key - self._consumer_secret = consumer_secret - self._access_token_key = access_token_key - self._access_token_secret = access_token_secret - self._oauth_consumer = None - - if consumer_key is not None and consumer_secret is not None and \ - access_token_key is not None and access_token_secret is not None: - self._signature_method_plaintext = oauth.SignatureMethod_PLAINTEXT() - self._signature_method_hmac_sha1 = oauth.SignatureMethod_HMAC_SHA1() - - self._oauth_token = oauth.Token(key=access_token_key, secret=access_token_secret) - self._oauth_consumer = oauth.Consumer(key=consumer_key, secret=consumer_secret) - - def ClearCredentials(self): - '''Clear the any credentials for this instance - ''' - self._consumer_key = None - self._consumer_secret = None - self._access_token_key = None - self._access_token_secret = None - self._oauth_consumer = None - - def GetSearch(self, - term=None, - geocode=None, - since_id=None, - max_id=None, - until=None, - count=15, - lang=None, - locale=None, - result_type="mixed", - include_entities=None): - '''Return twitter search results for a given term. - - Args: - term: - Term to search by. Optional if you include geocode. - since_id: - Returns results with an ID greater than (that is, more recent - than) the specified ID. There are limits to the number of - Tweets which can be accessed through the API. If the limit of - Tweets has occurred since the since_id, the since_id will be - forced to the oldest ID available. [Optional] - max_id: - Returns only statuses with an ID less than (that is, older - than) or equal to the specified ID. [Optional] - until: - Returns tweets generated before the given date. Date should be - formatted as YYYY-MM-DD. [Optional] - geocode: - Geolocation information in the form (latitude, longitude, radius) - [Optional] - count: - Number of results to return. Default is 15 [Optional] - lang: - Language for results as ISO 639-1 code. Default is None (all languages) - [Optional] - locale: - Language of the search query. Currently only 'ja' is effective. This is - intended for language-specific consumers and the default should work in - the majority of cases. - result_type: - Type of result which should be returned. Default is "mixed". Other - valid options are "recent" and "popular". [Optional] - include_entities: - If True, each tweet will include a node called "entities,". - This node offers a variety of metadata about the tweet in a - discrete structure, including: user_mentions, urls, and - hashtags. [Optional] - - Returns: - A sequence of twitter.Status instances, one for each message containing - the term - ''' - # Build request parameters - parameters = {} - - if since_id: - try: - parameters['since_id'] = long(since_id) - except: - raise TwitterError("since_id must be an integer") - - if max_id: - try: - parameters['max_id'] = long(max_id) - except: - raise TwitterError("max_id must be an integer") - - if until: - parameters['until'] = until - - if lang: - parameters['lang'] = lang - - if locale: - parameters['locale'] = locale - - if term is None and geocode is None: - return [] - - if term is not None: - parameters['q'] = term - - if geocode is not None: - parameters['geocode'] = ','.join(map(str, geocode)) - - if include_entities: - parameters['include_entities'] = 1 - - try: - parameters['count'] = int(count) - except: - raise TwitterError("count must be an integer") - - if result_type in ["mixed", "popular", "recent"]: - parameters['result_type'] = result_type - - # Make and send requests - url = '%s/search/tweets.json' % self.base_url - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - - # Return built list of statuses - return [Status.NewFromJsonDict(x) for x in data['statuses']] - - def GetUsersSearch(self, - term=None, - page=1, - count=20, - include_entities=None): - '''Return twitter user search results for a given term. - - Args: - term: - Term to search by. - page: - Page of results to return. Default is 1 - [Optional] - count: - Number of results to return. Default is 20 - [Optional] - include_entities: - If True, each tweet will include a node called "entities,". - This node offers a variety of metadata about the tweet in a - discrete structure, including: user_mentions, urls, and hashtags. - [Optional] - - Returns: - A sequence of twitter.User instances, one for each message containing - the term - ''' - # Build request parameters - parameters = {} - - if term is not None: - parameters['q'] = term - - if include_entities: - parameters['include_entities'] = 1 - - try: - parameters['count'] = int(count) - except: - raise TwitterError("count must be an integer") - - # Make and send requests - url = '%s/users/search.json' % self.base_url - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return [User.NewFromJsonDict(x) for x in data] - - def GetTrendsCurrent(self, exclude=None): - '''Get the current top trending topics (global) - - Args: - exclude: - Appends the exclude parameter as a request parameter. - Currently only exclude=hashtags is supported. [Optional] - - Returns: - A list with 10 entries. Each entry contains a trend. - ''' - return self.GetTrendsWoeid(id=1, exclude=exclude) - - def GetTrendsWoeid(self, id, exclude=None): - '''Return the top 10 trending topics for a specific WOEID, if trending - information is available for it. - - Args: - woeid: - the Yahoo! Where On Earth ID for a location. - exclude: - Appends the exclude parameter as a request parameter. - Currently only exclude=hashtags is supported. [Optional] - - Returns: - A list with 10 entries. Each entry contains a trend. - ''' - url = '%s/trends/place.json' % (self.base_url) - parameters = {'id': id} - - if exclude: - parameters['exclude'] = exclude - - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - - trends = [] - timestamp = data[0]['as_of'] - - for trend in data[0]['trends']: - trends.append(Trend.NewFromJsonDict(trend, timestamp = timestamp)) - return trends - - def GetHomeTimeline(self, - count=None, - since_id=None, - max_id=None, - trim_user=False, - exclude_replies=False, - contributor_details=False, - include_entities=True): - ''' - Fetch a collection of the most recent Tweets and retweets posted by the - authenticating user and the users they follow. - - The home timeline is central to how most users interact with the Twitter - service. - - The twitter.Api instance must be authenticated. - - Args: - count: - Specifies the number of statuses to retrieve. May not be - greater than 200. Defaults to 20. [Optional] - since_id: - Returns results with an ID greater than (that is, more recent - than) the specified ID. There are limits to the number of - Tweets which can be accessed through the API. If the limit of - Tweets has occurred since the since_id, the since_id will be - forced to the oldest ID available. [Optional] - max_id: - Returns results with an ID less than (that is, older than) or - equal to the specified ID. [Optional] - trim_user: - When True, each tweet returned in a timeline will include a user - object including only the status authors numerical ID. Omit this - parameter to receive the complete user object. [Optional] - exclude_replies: - This parameter will prevent replies from appearing in the - returned timeline. Using exclude_replies with the count - parameter will mean you will receive up-to count tweets - - this is because the count parameter retrieves that many - tweets before filtering out retweets and replies. - [Optional] - contributor_details: - This parameter enhances the contributors element of the - status response to include the screen_name of the contributor. - By default only the user_id of the contributor is included. - [Optional] - include_entities: - The entities node will be disincluded when set to false. - This node offers a variety of metadata about the tweet in a - discreet structure, including: user_mentions, urls, and - hashtags. [Optional] - - Returns: - A sequence of twitter.Status instances, one for each message - ''' - url = '%s/statuses/home_timeline.json' % self.base_url - - if not self._oauth_consumer: - raise TwitterError("API must be authenticated.") - parameters = {} - if count is not None: - try: - if int(count) > 200: - raise TwitterError("'count' may not be greater than 200") - except ValueError: - raise TwitterError("'count' must be an integer") - parameters['count'] = count - if since_id: - try: - parameters['since_id'] = long(since_id) - except ValueError: - raise TwitterError("'since_id' must be an integer") - if max_id: - try: - parameters['max_id'] = long(max_id) - except ValueError: - raise TwitterError("'max_id' must be an integer") - if trim_user: - parameters['trim_user'] = 1 - if exclude_replies: - parameters['exclude_replies'] = 1 - if contributor_details: - parameters['contributor_details'] = 1 - if not include_entities: - parameters['include_entities'] = 'false' - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return [Status.NewFromJsonDict(x) for x in data] - - def GetUserTimeline(self, - user_id=None, - screen_name=None, - since_id=None, - max_id=None, - count=None, - include_rts=None, - trim_user=None, - exclude_replies=None): - '''Fetch the sequence of public Status messages for a single user. - - The twitter.Api instance must be authenticated if the user is private. - - Args: - user_id: - Specifies the ID of the user for whom to return the - user_timeline. Helpful for disambiguating when a valid user ID - is also a valid screen name. [Optional] - screen_name: - Specifies the screen name of the user for whom to return the - user_timeline. Helpful for disambiguating when a valid screen - name is also a user ID. [Optional] - since_id: - Returns results with an ID greater than (that is, more recent - than) the specified ID. There are limits to the number of - Tweets which can be accessed through the API. If the limit of - Tweets has occurred since the since_id, the since_id will be - forced to the oldest ID available. [Optional] - max_id: - Returns only statuses with an ID less than (that is, older - than) or equal to the specified ID. [Optional] - count: - Specifies the number of statuses to retrieve. May not be - greater than 200. [Optional] - include_rts: - If True, the timeline will contain native retweets (if they - exist) in addition to the standard stream of tweets. [Optional] - trim_user: - If True, statuses will only contain the numerical user ID only. - Otherwise a full user object will be returned for each status. - [Optional] - exclude_replies: - If True, this will prevent replies from appearing in the returned - timeline. Using exclude_replies with the count parameter will mean you - will receive up-to count tweets - this is because the count parameter - retrieves that many tweets before filtering out retweets and replies. - This parameter is only supported for JSON and XML responses. [Optional] - - Returns: - A sequence of Status instances, one for each message up to count - ''' - parameters = {} - - url = '%s/statuses/user_timeline.json' % (self.base_url) - - if user_id: - parameters['user_id'] = user_id - elif screen_name: - parameters['screen_name'] = screen_name - - if since_id: - try: - parameters['since_id'] = long(since_id) - except: - raise TwitterError("since_id must be an integer") - - if max_id: - try: - parameters['max_id'] = long(max_id) - except: - raise TwitterError("max_id must be an integer") - - if count: - try: - parameters['count'] = int(count) - except: - raise TwitterError("count must be an integer") - - if include_rts: - parameters['include_rts'] = 1 - - if trim_user: - parameters['trim_user'] = 1 - - if exclude_replies: - parameters['exclude_replies'] = 1 - - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return [Status.NewFromJsonDict(x) for x in data] - - def GetStatus(self, - id, - trim_user=False, - include_my_retweet=True, - include_entities=True): - '''Returns a single status message, specified by the id parameter. - - The twitter.Api instance must be authenticated. - - Args: - id: - The numeric ID of the status you are trying to retrieve. - trim_user: - When set to True, each tweet returned in a timeline will include - a user object including only the status authors numerical ID. - Omit this parameter to receive the complete user object. - [Optional] - include_my_retweet: - When set to True, any Tweets returned that have been retweeted by - the authenticating user will include an additional - current_user_retweet node, containing the ID of the source status - for the retweet. [Optional] - include_entities: - If False, the entities node will be disincluded. - This node offers a variety of metadata about the tweet in a - discreet structure, including: user_mentions, urls, and - hashtags. [Optional] - Returns: - A twitter.Status instance representing that status message - ''' - url = '%s/statuses/show.json' % (self.base_url) - - if not self._oauth_consumer: - raise TwitterError("API must be authenticated.") - - parameters = {} - - try: - parameters['id'] = long(id) - except ValueError: - raise TwitterError("'id' must be an integer.") - - if trim_user: - parameters['trim_user'] = 1 - if include_my_retweet: - parameters['include_my_retweet'] = 1 - if not include_entities: - parameters['include_entities'] = 'none' - - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return Status.NewFromJsonDict(data) - - def DestroyStatus(self, id, trim_user=False): - '''Destroys the status specified by the required ID parameter. - - The twitter.Api instance must be authenticated and the - authenticating user must be the author of the specified status. - - Args: - id: - The numerical ID of the status you're trying to destroy. - - Returns: - A twitter.Status instance representing the destroyed status message - ''' - if not self._oauth_consumer: - raise TwitterError("API must be authenticated.") - - try: - post_data = {'id': long(id)} - except: - raise TwitterError("id must be an integer") - url = '%s/statuses/destroy/%s.json' % (self.base_url, id) - if trim_user: - post_data['trim_user'] = 1 - json = self._FetchUrl(url, post_data=post_data) - data = self._ParseAndCheckTwitter(json) - return Status.NewFromJsonDict(data) - - @classmethod - def _calculate_status_length(cls, status, linksize=19): - dummy_link_replacement = 'https://-%d-chars%s/' % (linksize, '-'*(linksize - 18)) - shortened = ' '.join([x if not (x.startswith('http://') or - x.startswith('https://')) - else - dummy_link_replacement - for x in status.split(' ')]) - return len(shortened) - - def PostUpdate(self, status, in_reply_to_status_id=None, latitude=None, longitude=None, place_id=None, display_coordinates=False, trim_user=False): - '''Post a twitter status message from the authenticated user. - - The twitter.Api instance must be authenticated. - - https://dev.twitter.com/docs/api/1.1/post/statuses/update - - Args: - status: - The message text to be posted. - Must be less than or equal to 140 characters. - in_reply_to_status_id: - The ID of an existing status that the status to be posted is - in reply to. This implicitly sets the in_reply_to_user_id - attribute of the resulting status to the user ID of the - message being replied to. Invalid/missing status IDs will be - ignored. [Optional] - latitude: - Latitude coordinate of the tweet in degrees. Will only work - in conjunction with longitude argument. Both longitude and - latitude will be ignored by twitter if the user has a false - geo_enabled setting. [Optional] - longitude: - Longitude coordinate of the tweet in degrees. Will only work - in conjunction with latitude argument. Both longitude and - latitude will be ignored by twitter if the user has a false - geo_enabled setting. [Optional] - place_id: - A place in the world. These IDs can be retrieved from - GET geo/reverse_geocode. [Optional] - display_coordinates: - Whether or not to put a pin on the exact coordinates a tweet - has been sent from. [Optional] - trim_user: - If True the returned payload will only contain the user IDs, - otherwise the payload will contain the full user data item. - [Optional] - Returns: - A twitter.Status instance representing the message posted. - ''' - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - - url = '%s/statuses/update.json' % self.base_url - - if isinstance(status, unicode) or self._input_encoding is None: - u_status = status - else: - u_status = unicode(status, self._input_encoding) - - #if self._calculate_status_length(u_status, self._shortlink_size) > CHARACTER_LIMIT: - # raise TwitterError("Text must be less than or equal to %d characters. " - # "Consider using PostUpdates." % CHARACTER_LIMIT) - - data = {'status': status} - if in_reply_to_status_id: - data['in_reply_to_status_id'] = in_reply_to_status_id - if latitude is not None and longitude is not None: - data['lat'] = str(latitude) - data['long'] = str(longitude) - if place_id is not None: - data['place_id'] = str(place_id) - if display_coordinates: - data['display_coordinates'] = 'true' - if trim_user: - data['trim_user'] = 'true' - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return Status.NewFromJsonDict(data) - - def PostUpdates(self, status, continuation=None, **kwargs): - '''Post one or more twitter status messages from the authenticated user. - - Unlike api.PostUpdate, this method will post multiple status updates - if the message is longer than 140 characters. - - The twitter.Api instance must be authenticated. - - Args: - status: - The message text to be posted. - May be longer than 140 characters. - continuation: - The character string, if any, to be appended to all but the - last message. Note that Twitter strips trailing '...' strings - from messages. Consider using the unicode \u2026 character - (horizontal ellipsis) instead. [Defaults to None] - **kwargs: - See api.PostUpdate for a list of accepted parameters. - - Returns: - A of list twitter.Status instance representing the messages posted. - ''' - results = list() - if continuation is None: - continuation = '' - line_length = CHARACTER_LIMIT - len(continuation) - lines = textwrap.wrap(status, line_length) - for line in lines[0:-1]: - results.append(self.PostUpdate(line + continuation, **kwargs)) - results.append(self.PostUpdate(lines[-1], **kwargs)) - return results - - def PostRetweet(self, original_id, trim_user=False): - '''Retweet a tweet with the Retweet API. - - The twitter.Api instance must be authenticated. - - Args: - original_id: - The numerical id of the tweet that will be retweeted - trim_user: - If True the returned payload will only contain the user IDs, - otherwise the payload will contain the full user data item. - [Optional] - - Returns: - A twitter.Status instance representing the original tweet with retweet details embedded. - ''' - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - - try: - if int(original_id) <= 0: - raise TwitterError("'original_id' must be a positive number") - except ValueError: - raise TwitterError("'original_id' must be an integer") - - url = '%s/statuses/retweet/%s.json' % (self.base_url, original_id) - - data = {'id': original_id} - if trim_user: - data['trim_user'] = 'true' - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return Status.NewFromJsonDict(data) - - def GetUserRetweets(self, count=None, since_id=None, max_id=None, trim_user=False): - '''Fetch the sequence of retweets made by the authenticated user. - - The twitter.Api instance must be authenticated. - - Args: - count: - The number of status messages to retrieve. [Optional] - since_id: - Returns results with an ID greater than (that is, more recent - than) the specified ID. There are limits to the number of - Tweets which can be accessed through the API. If the limit of - Tweets has occurred since the since_id, the since_id will be - forced to the oldest ID available. [Optional] - max_id: - Returns results with an ID less than (that is, older than) or - equal to the specified ID. [Optional] - trim_user: - If True the returned payload will only contain the user IDs, - otherwise the payload will contain the full user data item. - [Optional] - - Returns: - A sequence of twitter.Status instances, one for each message up to count - ''' - return self.GetUserTimeline(since_id=since_id, count=count, max_id=max_id, trim_user=trim_user, exclude_replies=True, include_rts=True) - - def GetReplies(self, since_id=None, count=None, max_id=None, trim_user=False): - '''Get a sequence of status messages representing the 20 most - recent replies (status updates prefixed with @twitterID) to the - authenticating user. - - Args: - since_id: - Returns results with an ID greater than (that is, more recent - than) the specified ID. There are limits to the number of - Tweets which can be accessed through the API. If the limit of - Tweets has occurred since the since_id, the since_id will be - forced to the oldest ID available. [Optional] - max_id: - Returns results with an ID less than (that is, older than) or - equal to the specified ID. [Optional] - trim_user: - If True the returned payload will only contain the user IDs, - otherwise the payload will contain the full user data item. - [Optional] - - Returns: - A sequence of twitter.Status instances, one for each reply to the user. - ''' - return self.GetUserTimeline(since_id=since_id, count=count, max_id=max_id, trim_user=trim_user, exclude_replies=False, include_rts=False) - - def GetRetweets(self, statusid, count=None, trim_user=False): - '''Returns up to 100 of the first retweets of the tweet identified - by statusid - - Args: - statusid: - The ID of the tweet for which retweets should be searched for - count: - The number of status messages to retrieve. [Optional] - trim_user: - If True the returned payload will only contain the user IDs, - otherwise the payload will contain the full user data item. - [Optional] - - Returns: - A list of twitter.Status instances, which are retweets of statusid - ''' - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instsance must be authenticated.") - url = '%s/statuses/retweets/%s.json' % (self.base_url, statusid) - parameters = {} - if trim_user: - parameters['trim_user'] = 'true' - if count: - try: - parameters['count'] = int(count) - except: - raise TwitterError("count must be an integer") - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return [Status.NewFromJsonDict(s) for s in data] - - def GetRetweetsOfMe(self, - count=None, - since_id=None, - max_id=None, - trim_user=False, - include_entities=True, - include_user_entities=True): - '''Returns up to 100 of the most recent tweets of the user that have been - retweeted by others. - - Args: - count: - The number of retweets to retrieve, up to 100. If omitted, 20 is - assumed. - since_id: - Returns results with an ID greater than (newer than) this ID. - max_id: - Returns results with an ID less than or equal to this ID. - trim_user: - When True, the user object for each tweet will only be an ID. - include_entities: - When True, the tweet entities will be included. - include_user_entities: - When True, the user entities will be included. - ''' - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - url = '%s/statuses/retweets_of_me.json' % self.base_url - parameters = {} - if count is not None: - try: - if int(count) > 100: - raise TwitterError("'count' may not be greater than 100") - except ValueError: - raise TwitterError("'count' must be an integer") - if count: - parameters['count'] = count - if since_id: - parameters['since_id'] = since_id - if max_id: - parameters['max_id'] = max_id - if trim_user: - parameters['trim_user'] = trim_user - if not include_entities: - parameters['include_entities'] = include_entities - if not include_user_entities: - parameters['include_user_entities'] = include_user_entities - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return [Status.NewFromJsonDict(s) for s in data] - - def GetFriends(self, user_id=None, screen_name=None, cursor=-1, skip_status=False, include_user_entities=False): - '''Fetch the sequence of twitter.User instances, one for each friend. - - The twitter.Api instance must be authenticated. - - Args: - user_id: - The twitter id of the user whose friends you are fetching. - If not specified, defaults to the authenticated user. [Optional] - screen_name: - The twitter name of the user whose friends you are fetching. - If not specified, defaults to the authenticated user. [Optional] - cursor: - Should be set to -1 for the initial call and then is used to - control what result page Twitter returns [Optional(ish)] - skip_status: - If True the statuses will not be returned in the user items. - [Optional] - include_user_entities: - When True, the user entities will be included. - - Returns: - A sequence of twitter.User instances, one for each friend - ''' - if not self._oauth_consumer: - raise TwitterError("twitter.Api instance must be authenticated") - url = '%s/friends/list.json' % self.base_url - result = [] - parameters = {} - if user_id is not None: - parameters['user_id'] = user_id - if screen_name is not None: - parameters['screen_name'] = screen_name - if skip_status: - parameters['skip_status'] = True - if include_user_entities: - parameters['include_user_entities'] = True - while True: - parameters['cursor'] = cursor - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - result += [User.NewFromJsonDict(x) for x in data['users']] - if 'next_cursor' in data: - if data['next_cursor'] == 0 or data['next_cursor'] == data['previous_cursor']: - break - else: - cursor = data['next_cursor'] - else: - break - return result - - def GetFriendIDs(self, user_id=None, screen_name=None, cursor=-1, stringify_ids=False, count=None): - '''Returns a list of twitter user id's for every person - the specified user is following. - - Args: - user_id: - The id of the user to retrieve the id list for - [Optional] - screen_name: - The screen_name of the user to retrieve the id list for - [Optional] - cursor: - Specifies the Twitter API Cursor location to start at. - Note: there are pagination limits. - [Optional] - stringify_ids: - if True then twitter will return the ids as strings instead of integers. - [Optional] - count: - The number of status messages to retrieve. [Optional] - - Returns: - A list of integers, one for each user id. - ''' - url = '%s/friends/ids.json' % self.base_url - if not self._oauth_consumer: - raise TwitterError("twitter.Api instance must be authenticated") - parameters = {} - if user_id is not None: - parameters['user_id'] = user_id - if screen_name is not None: - parameters['screen_name'] = screen_name - if stringify_ids: - parameters['stringify_ids'] = True - if count is not None: - parameters['count'] = count - result = [] - while True: - parameters['cursor'] = cursor - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - result += [x for x in data['ids']] - if 'next_cursor' in data: - if data['next_cursor'] == 0 or data['next_cursor'] == data['previous_cursor']: - break - else: - cursor = data['next_cursor'] - else: - break - return result - - - def GetFollowerIDs(self, user_id=None, screen_name=None, cursor=-1, stringify_ids=False, count=None, total_count=None): - '''Returns a list of twitter user id's for every person - that is following the specified user. - - Args: - user_id: - The id of the user to retrieve the id list for - [Optional] - screen_name: - The screen_name of the user to retrieve the id list for - [Optional] - cursor: - Specifies the Twitter API Cursor location to start at. - Note: there are pagination limits. - [Optional] - stringify_ids: - if True then twitter will return the ids as strings instead of integers. - [Optional] - count: - The number of user id's to retrieve per API request. Please be aware that - this might get you rate-limited if set to a small number. By default Twitter - will retrieve 5000 UIDs per call. - [Optional] - total_count: - The total amount of UIDs to retrieve. Good if the account has many followers - and you don't want to get rate limited. The data returned might contain more - UIDs if total_count is not a multiple of count (5000 by default). - [Optional] - - - Returns: - A list of integers, one for each user id. - ''' - url = '%s/followers/ids.json' % self.base_url - if not self._oauth_consumer: - raise TwitterError("twitter.Api instance must be authenticated") - parameters = {} - if user_id is not None: - parameters['user_id'] = user_id - if screen_name is not None: - parameters['screen_name'] = screen_name - if stringify_ids: - parameters['stringify_ids'] = True - if count is not None: - parameters['count'] = count - result = [] - while True: - if total_count and total_count < count: - parameters['count'] = total_count - parameters['cursor'] = cursor - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - result += [x for x in data['ids']] - if 'next_cursor' in data: - if data['next_cursor'] == 0 or data['next_cursor'] == data['previous_cursor']: - break - else: - cursor = data['next_cursor'] - total_count -= len(data['ids']) - if total_count < 1: - break - else: - break - return result - - def GetFollowers(self, user_id=None, screen_name=None, cursor=-1, skip_status=False, include_user_entities=False): - '''Fetch the sequence of twitter.User instances, one for each follower - - The twitter.Api instance must be authenticated. - - Args: - user_id: - The twitter id of the user whose followers you are fetching. - If not specified, defaults to the authenticated user. [Optional] - screen_name: - The twitter name of the user whose followers you are fetching. - If not specified, defaults to the authenticated user. [Optional] - cursor: - Should be set to -1 for the initial call and then is used to - control what result page Twitter returns [Optional(ish)] - skip_status: - If True the statuses will not be returned in the user items. - [Optional] - include_user_entities: - When True, the user entities will be included. - - Returns: - A sequence of twitter.User instances, one for each follower - ''' - if not self._oauth_consumer: - raise TwitterError("twitter.Api instance must be authenticated") - url = '%s/followers/list.json' % self.base_url - result = [] - parameters = {} - if user_id is not None: - parameters['user_id'] = user_id - if screen_name is not None: - parameters['screen_name'] = screen_name - if skip_status: - parameters['skip_status'] = True - if include_user_entities: - parameters['include_user_entities'] = True - while True: - parameters['cursor'] = cursor - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - result += [User.NewFromJsonDict(x) for x in data['users']] - if 'next_cursor' in data: - if data['next_cursor'] == 0 or data['next_cursor'] == data['previous_cursor']: - break - else: - cursor = data['next_cursor'] - else: - break - return result - - def UsersLookup(self, user_id=None, screen_name=None, users=None, include_entities=True): - '''Fetch extended information for the specified users. - - Users may be specified either as lists of either user_ids, - screen_names, or twitter.User objects. The list of users that - are queried is the union of all specified parameters. - - The twitter.Api instance must be authenticated. - - Args: - user_id: - A list of user_ids to retrieve extended information. - [Optional] - screen_name: - A list of screen_names to retrieve extended information. - [Optional] - users: - A list of twitter.User objects to retrieve extended information. - [Optional] - include_entities: - The entities node that may appear within embedded statuses will be - disincluded when set to False. - [Optional] - - Returns: - A list of twitter.User objects for the requested users - ''' - - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - if not user_id and not screen_name and not users: - raise TwitterError("Specify at least one of user_id, screen_name, or users.") - url = '%s/users/lookup.json' % self.base_url - parameters = {} - uids = list() - if user_id: - uids.extend(user_id) - if users: - uids.extend([u.id for u in users]) - if len(uids): - parameters['user_id'] = ','.join(["%s" % u for u in uids]) - if screen_name: - parameters['screen_name'] = ','.join(screen_name) - if not include_entities: - parameters['include_entities'] = 'false' - json = self._FetchUrl(url, parameters=parameters) - try: - data = self._ParseAndCheckTwitter(json) - except TwitterError, e: - _, e, _ = sys.exc_info() - t = e.args[0] - if len(t) == 1 and ('code' in t[0]) and (t[0]['code'] == 34): - data = [] - else: - raise - - return [User.NewFromJsonDict(u) for u in data] - - def GetUser(self, user_id=None, screen_name=None, include_entities=True): - '''Returns a single user. - - The twitter.Api instance must be authenticated. - - Args: - user_id: - The id of the user to retrieve. - [Optional] - screen_name: - The screen name of the user for whom to return results for. Either a - user_id or screen_name is required for this method. - [Optional] - include_entities: - if set to False, the 'entities' node will not be included. - [Optional] - - - Returns: - A twitter.User instance representing that user - ''' - url = '%s/users/show.json' % (self.base_url) - parameters = {} - - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - - if user_id: - parameters['user_id'] = user_id - elif screen_name: - parameters['screen_name'] = screen_name - else: - raise TwitterError("Specify at least one of user_id or screen_name.") - - if not include_entities: - parameters['include_entities'] = 'false' - - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return User.NewFromJsonDict(data) - - def GetDirectMessages(self, since_id=None, max_id=None, count=None, include_entities=True, skip_status=False): - '''Returns a list of the direct messages sent to the authenticating user. - - The twitter.Api instance must be authenticated. - - Args: - since_id: - Returns results with an ID greater than (that is, more recent - than) the specified ID. There are limits to the number of - Tweets which can be accessed through the API. If the limit of - Tweets has occurred since the since_id, the since_id will be - forced to the oldest ID available. [Optional] - max_id: - Returns results with an ID less than (that is, older than) or - equal to the specified ID. [Optional] - count: - Specifies the number of direct messages to try and retrieve, up to a - maximum of 200. The value of count is best thought of as a limit to the - number of Tweets to return because suspended or deleted content is - removed after the count has been applied. [Optional] - include_entities: - The entities node will not be included when set to False. - [Optional] - skip_status: - When set to True statuses will not be included in the returned user - objects. [Optional] - - Returns: - A sequence of twitter.DirectMessage instances - ''' - url = '%s/direct_messages.json' % self.base_url - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - parameters = {} - if since_id: - parameters['since_id'] = since_id - if max_id: - parameters['max_id'] = max_id - if count: - try: - parameters['count'] = int(count) - except: - raise TwitterError("count must be an integer") - if not include_entities: - parameters['include_entities'] = 'false' - if skip_status: - parameters['skip_status'] = 1 - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return [DirectMessage.NewFromJsonDict(x) for x in data] - - def GetSentDirectMessages(self, since_id=None, max_id=None, count=None, page=None, include_entities=True): - '''Returns a list of the direct messages sent by the authenticating user. - - The twitter.Api instance must be authenticated. - - Args: - since_id: - Returns results with an ID greater than (that is, more recent - than) the specified ID. There are limits to the number of - Tweets which can be accessed through the API. If the limit of - Tweets has occured since the since_id, the since_id will be - forced to the oldest ID available. [Optional] - max_id: - Returns results with an ID less than (that is, older than) or - equal to the specified ID. [Optional] - count: - Specifies the number of direct messages to try and retrieve, up to a - maximum of 200. The value of count is best thought of as a limit to the - number of Tweets to return because suspended or deleted content is - removed after the count has been applied. [Optional] - page: - Specifies the page of results to retrieve. - Note: there are pagination limits. [Optional] - include_entities: - The entities node will not be included when set to False. - [Optional] - - Returns: - A sequence of twitter.DirectMessage instances - ''' - url = '%s/direct_messages/sent.json' % self.base_url - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - parameters = {} - if since_id: - parameters['since_id'] = since_id - if page: - parameters['page'] = page - if max_id: - parameters['max_id'] = max_id - if count: - try: - parameters['count'] = int(count) - except: - raise TwitterError("count must be an integer") - if not include_entities: - parameters['include_entities'] = 'false' - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return [DirectMessage.NewFromJsonDict(x) for x in data] - - def PostDirectMessage(self, text, user_id=None, screen_name=None): - '''Post a twitter direct message from the authenticated user - - The twitter.Api instance must be authenticated. user_id or screen_name - must be specified. - - Args: - text: The message text to be posted. Must be less than 140 characters. - user_id: - The ID of the user who should receive the direct message. - [Optional] - screen_name: - The screen name of the user who should receive the direct message. - [Optional] - - Returns: - A twitter.DirectMessage instance representing the message posted - ''' - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - url = '%s/direct_messages/new.json' % self.base_url - data = {'text': text} - if user_id: - data['user_id'] = user_id - elif screen_name: - data['screen_name'] = screen_name - else: - raise TwitterError("Specify at least one of user_id or screen_name.") - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return DirectMessage.NewFromJsonDict(data) - - def DestroyDirectMessage(self, id, include_entities=True): - '''Destroys the direct message specified in the required ID parameter. - - The twitter.Api instance must be authenticated, and the - authenticating user must be the recipient of the specified direct - message. - - Args: - id: The id of the direct message to be destroyed - - Returns: - A twitter.DirectMessage instance representing the message destroyed - ''' - url = '%s/direct_messages/destroy.json' % self.base_url - data = {'id': id} - if not include_entities: - data['include_entities'] = 'false' - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return DirectMessage.NewFromJsonDict(data) - - def CreateFriendship(self, user_id=None, screen_name=None, follow=True): - '''Befriends the user specified by the user_id or screen_name. - - The twitter.Api instance must be authenticated. - - Args: - user_id: - A user_id to follow [Optional] - screen_name: - A screen_name to follow [Optional] - follow: - Set to False to disable notifications for the target user - Returns: - A twitter.User instance representing the befriended user. - ''' - url = '%s/friendships/create.json' % (self.base_url) - data = {} - if user_id: - data['user_id'] = user_id - elif screen_name: - data['screen_name'] = screen_name - else: - raise TwitterError("Specify at least one of user_id or screen_name.") - if follow: - data['follow'] = 'true' - else: - data['follow'] = 'false' - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return User.NewFromJsonDict(data) - - def DestroyFriendship(self, user_id=None, screen_name=None): - '''Discontinues friendship with a user_id or screen_name. - - The twitter.Api instance must be authenticated. - - Args: - user_id: - A user_id to unfollow [Optional] - screen_name: - A screen_name to unfollow [Optional] - Returns: - A twitter.User instance representing the discontinued friend. - ''' - url = '%s/friendships/destroy.json' % self.base_url - data = {} - if user_id: - data['user_id'] = user_id - elif screen_name: - data['screen_name'] = screen_name - else: - raise TwitterError("Specify at least one of user_id or screen_name.") - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return User.NewFromJsonDict(data) - - def CreateFavorite(self, status=None, id=None, include_entities=True): - '''Favorites the specified status object or id as the authenticating user. - Returns the favorite status when successful. - - The twitter.Api instance must be authenticated. - - Args: - id: - The id of the twitter status to mark as a favorite. - [Optional] - status: - The twitter.Status object to mark as a favorite. - [Optional] - include_entities: - The entities node will be omitted when set to False. - Returns: - A twitter.Status instance representing the newly-marked favorite. - ''' - url = '%s/favorites/create.json' % self.base_url - data = {} - if id: - data['id'] = id - elif status: - data['id'] = status.id - else: - raise TwitterError("Specify id or status") - if not include_entities: - data['include_entities'] = 'false' - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return Status.NewFromJsonDict(data) - - def DestroyFavorite(self, status=None, id=None, include_entities=True): - '''Un-Favorites the specified status object or id as the authenticating user. - Returns the un-favorited status when successful. - - The twitter.Api instance must be authenticated. - - Args: - id: - The id of the twitter status to unmark as a favorite. - [Optional] - status: - The twitter.Status object to unmark as a favorite. - [Optional] - include_entities: - The entities node will be omitted when set to False. - Returns: - A twitter.Status instance representing the newly-unmarked favorite. - ''' - url = '%s/favorites/destroy.json' % self.base_url - data = {} - if id: - data['id'] = id - elif status: - data['id'] = status.id - else: - raise TwitterError("Specify id or status") - if not include_entities: - data['include_entities'] = 'false' - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return Status.NewFromJsonDict(data) - - def GetFavorites(self, - user_id=None, - screen_name=None, - count=None, - since_id=None, - max_id=None, - include_entities=True): - '''Return a list of Status objects representing favorited tweets. - By default, returns the (up to) 20 most recent tweets for the - authenticated user. - - Args: - user: - The twitter name or id of the user whose favorites you are fetching. - If not specified, defaults to the authenticated user. [Optional] - page: - Specifies the page of results to retrieve. - Note: there are pagination limits. [Optional] - ''' - parameters = {} - - url = '%s/favorites/list.json' % self.base_url - - if user_id: - parameters['user_id'] = user_id - elif screen_name: - parameters['screen_name'] = user_id - - if since_id: - try: - parameters['since_id'] = long(since_id) - except: - raise TwitterError("since_id must be an integer") - - if max_id: - try: - parameters['max_id'] = long(max_id) - except: - raise TwitterError("max_id must be an integer") - - if count: - try: - parameters['count'] = int(count) - except: - raise TwitterError("count must be an integer") - - if include_entities: - parameters['include_entities'] = True - - - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return [Status.NewFromJsonDict(x) for x in data] - - def GetMentions(self, - count=None, - since_id=None, - max_id=None, - trim_user=False, - contributor_details=False, - include_entities=True): - '''Returns the 20 most recent mentions (status containing @screen_name) - for the authenticating user. - - Args: - count: - Specifies the number of tweets to try and retrieve, up to a maximum of - 200. The value of count is best thought of as a limit to the number of - tweets to return because suspended or deleted content is removed after - the count has been applied. [Optional] - since_id: - Returns results with an ID greater than (that is, more recent - than) the specified ID. There are limits to the number of - Tweets which can be accessed through the API. If the limit of - Tweets has occurred since the since_id, the since_id will be - forced to the oldest ID available. [Optional] - max_id: - Returns only statuses with an ID less than - (that is, older than) the specified ID. [Optional] - trim_user: - When set to True, each tweet returned in a timeline will include a user - object including only the status authors numerical ID. Omit this - parameter to receive the complete user object. - contributor_details: - If set to True, this parameter enhances the contributors element of the - status response to include the screen_name of the contributor. By - default only the user_id of the contributor is included. - include_entities: - The entities node will be disincluded when set to False. - - Returns: - A sequence of twitter.Status instances, one for each mention of the user. - ''' - - url = '%s/statuses/mentions_timeline.json' % self.base_url - - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - - parameters = {} - - if count: - try: - parameters['count'] = int(count) - except: - raise TwitterError("count must be an integer") - if since_id: - try: - parameters['since_id'] = long(since_id) - except: - raise TwitterError("since_id must be an integer") - if max_id: - try: - parameters['max_id'] = long(max_id) - except: - raise TwitterError("max_id must be an integer") - if trim_user: - parameters['trim_user'] = 1 - if contributor_details: - parameters['contributor_details'] = 'true' - if not include_entities: - parameters['include_entities'] = 'false' - - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return [Status.NewFromJsonDict(x) for x in data] - - def CreateList(self, name, mode=None, description=None): - '''Creates a new list with the give name for the authenticated user. - - The twitter.Api instance must be authenticated. - - Args: - name: - New name for the list - mode: - 'public' or 'private'. - Defaults to 'public'. [Optional] - description: - Description of the list. [Optional] - - Returns: - A twitter.List instance representing the new list - ''' - url = '%s/lists/create.json' % self.base_url - - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - parameters = {'name': name} - if mode is not None: - parameters['mode'] = mode - if description is not None: - parameters['description'] = description - json = self._FetchUrl(url, post_data=parameters) - data = self._ParseAndCheckTwitter(json) - return List.NewFromJsonDict(data) - - def DestroyList(self, - owner_screen_name=False, - owner_id=False, - list_id=None, - slug=None): - ''' - Destroys the list identified by list_id or owner_screen_name/owner_id and - slug. - - The twitter.Api instance must be authenticated. - - Args: - owner_screen_name: - The screen_name of the user who owns the list being requested by a slug. - owner_id: - The user ID of the user who owns the list being requested by a slug. - list_id: - The numerical id of the list. - slug: - You can identify a list by its slug instead of its numerical id. If you - decide to do so, note that you'll also have to specify the list owner - using the owner_id or owner_screen_name parameters. - Returns: - A twitter.List instance representing the removed list. - ''' - url = '%s/lists/destroy.json' % self.base_url - data = {} - if list_id: - try: - data['list_id']= long(list_id) - except: - raise TwitterError("list_id must be an integer") - elif slug: - data['slug'] = slug - if owner_id: - try: - data['owner_id'] = long(owner_id) - except: - raise TwitterError("owner_id must be an integer") - elif owner_screen_name: - data['owner_screen_name'] = owner_screen_name - else: - raise TwitterError("Identify list by list_id or owner_screen_name/owner_id and slug") - else: - raise TwitterError("Identify list by list_id or owner_screen_name/owner_id and slug") - - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return List.NewFromJsonDict(data) - - def CreateSubscription(self, - owner_screen_name=False, - owner_id=False, - list_id=None, - slug=None): - '''Creates a subscription to a list by the authenticated user - - The twitter.Api instance must be authenticated. - - Args: - owner_screen_name: - The screen_name of the user who owns the list being requested by a slug. - owner_id: - The user ID of the user who owns the list being requested by a slug. - list_id: - The numerical id of the list. - slug: - You can identify a list by its slug instead of its numerical id. If you - decide to do so, note that you'll also have to specify the list owner - using the owner_id or owner_screen_name parameters. - Returns: - A twitter.List instance representing the list subscribed to - ''' - url = '%s/lists/subscribers/create.json' % (self.base_url) - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - data = {} - if list_id: - try: - data['list_id']= long(list_id) - except: - raise TwitterError("list_id must be an integer") - elif slug: - data['slug'] = slug - if owner_id: - try: - data['owner_id'] = long(owner_id) - except: - raise TwitterError("owner_id must be an integer") - elif owner_screen_name: - data['owner_screen_name'] = owner_screen_name - else: - raise TwitterError("Identify list by list_id or owner_screen_name/owner_id and slug") - else: - raise TwitterError("Identify list by list_id or owner_screen_name/owner_id and slug") - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return List.NewFromJsonDict(data) - - def DestroySubscription(self, - owner_screen_name=False, - owner_id=False, - list_id=None, - slug=None): - '''Destroys the subscription to a list for the authenticated user - - The twitter.Api instance must be authenticated. - - Args: - owner_screen_name: - The screen_name of the user who owns the list being requested by a slug. - owner_id: - The user ID of the user who owns the list being requested by a slug. - list_id: - The numerical id of the list. - slug: - You can identify a list by its slug instead of its numerical id. If you - decide to do so, note that you'll also have to specify the list owner - using the owner_id or owner_screen_name parameters. - Returns: - A twitter.List instance representing the removed list. - ''' - url = '%s/lists/subscribers/destroy.json' % (self.base_url) - if not self._oauth_consumer: - raise TwitterError("The twitter.Api instance must be authenticated.") - data = {} - if list_id: - try: - data['list_id']= long(list_id) - except: - raise TwitterError("list_id must be an integer") - elif slug: - data['slug'] = slug - if owner_id: - try: - data['owner_id'] = long(owner_id) - except: - raise TwitterError("owner_id must be an integer") - elif owner_screen_name: - data['owner_screen_name'] = owner_screen_name - else: - raise TwitterError("Identify list by list_id or owner_screen_name/owner_id and slug") - else: - raise TwitterError("Identify list by list_id or owner_screen_name/owner_id and slug") - json = self._FetchUrl(url, post_data=data) - data = self._ParseAndCheckTwitter(json) - return List.NewFromJsonDict(data) - - def GetSubscriptions(self, user_id=None, screen_name=None, count=20, cursor=-1): - ''' - Obtain a collection of the lists the specified user is subscribed to, 20 - lists per page by default. Does not include the user's own lists. - - The twitter.Api instance must be authenticated. - - Args: - user_id: - The ID of the user for whom to return results for. [Optional] - screen_name: - The screen name of the user for whom to return results for. - [Optional] - count: - The amount of results to return per page. Defaults to 20. - No more than 1000 results will ever be returned in a single page. - cursor: - "page" value that Twitter will use to start building the - list sequence from. -1 to start at the beginning. - Twitter will return in the result the values for next_cursor - and previous_cursor. [Optional] - - Returns: - A sequence of twitter.List instances, one for each list - ''' - if not self._oauth_consumer: - raise TwitterError("twitter.Api instance must be authenticated") - - url = '%s/lists/subscriptions.json' % (self.base_url) - parameters = {} - - try: - parameters['cursor'] = int(cursor) - except: - raise TwitterError("cursor must be an integer") - - try: - parameters['count'] = int(count) - except: - raise TwitterError("count must be an integer") - - if user_id is not None: - try: - parameters['user_id'] = long(user_id) - except: - raise TwitterError('user_id must be an integer') - elif screen_name is not None: - parameters['screen_name'] = screen_name - else: - raise TwitterError('Specify user_id or screen_name') - - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - return [List.NewFromJsonDict(x) for x in data['lists']] - - def GetLists(self, user_id=None, screen_name=None, count=None, cursor=-1): - '''Fetch the sequence of lists for a user. - - The twitter.Api instance must be authenticated. - - Args: - user_id: - The ID of the user for whom to return results for. [Optional] - screen_name: - The screen name of the user for whom to return results for. - [Optional] - count: - The amount of results to return per page. Defaults to 20. No more than - 1000 results will ever be returned in a single page. - [Optional] - cursor: - "page" value that Twitter will use to start building the - list sequence from. -1 to start at the beginning. - Twitter will return in the result the values for next_cursor - and previous_cursor. [Optional] - - Returns: - A sequence of twitter.List instances, one for each list - ''' - if not self._oauth_consumer: - raise TwitterError("twitter.Api instance must be authenticated") - - url = '%s/lists/ownerships.json' % self.base_url - result = [] - parameters = {} - if user_id is not None: - try: - parameters['user_id'] = long(user_id) - except: - raise TwitterError('user_id must be an integer') - elif screen_name is not None: - parameters['screen_name'] = screen_name - else: - raise TwitterError('Specify user_id or screen_name') - if count is not None: - parameters['count'] = count - - while True: - parameters['cursor'] = cursor - json = self._FetchUrl(url, parameters=parameters) - data = self._ParseAndCheckTwitter(json) - result += [List.NewFromJsonDict(x) for x in data['lists']] - if 'next_cursor' in data: - if data['next_cursor'] == 0 or data['next_cursor'] == data['previous_cursor']: - break - else: - cursor = data['next_cursor'] - else: - break - return result - - def VerifyCredentials(self): - '''Returns a twitter.User instance if the authenticating user is valid. - - Returns: - A twitter.User instance representing that user if the - credentials are valid, None otherwise. - ''' - if not self._oauth_consumer: - raise TwitterError("Api instance must first be given user credentials.") - url = '%s/account/verify_credentials.json' % self.base_url - try: - json = self._FetchUrl(url, no_cache=True) - except urllib2.HTTPError, http_error: - if http_error.code == httplib.UNAUTHORIZED: - return None - else: - raise http_error - data = self._ParseAndCheckTwitter(json) - return User.NewFromJsonDict(data) - - def SetCache(self, cache): - '''Override the default cache. Set to None to prevent caching. - - Args: - cache: - An instance that supports the same API as the twitter._FileCache - ''' - if cache == DEFAULT_CACHE: - self._cache = _FileCache() - else: - self._cache = cache - - def SetUrllib(self, urllib): - '''Override the default urllib implementation. - - Args: - urllib: - An instance that supports the same API as the urllib2 module - ''' - self._urllib = urllib - - def SetCacheTimeout(self, cache_timeout): - '''Override the default cache timeout. - - Args: - cache_timeout: - Time, in seconds, that responses should be reused. - ''' - self._cache_timeout = cache_timeout - - def SetUserAgent(self, user_agent): - '''Override the default user agent - - Args: - user_agent: - A string that should be send to the server as the User-agent - ''' - self._request_headers['User-Agent'] = user_agent - - def SetXTwitterHeaders(self, client, url, version): - '''Set the X-Twitter HTTP headers that will be sent to the server. - - Args: - client: - The client name as a string. Will be sent to the server as - the 'X-Twitter-Client' header. - url: - The URL of the meta.xml as a string. Will be sent to the server - as the 'X-Twitter-Client-URL' header. - version: - The client version as a string. Will be sent to the server - as the 'X-Twitter-Client-Version' header. - ''' - self._request_headers['X-Twitter-Client'] = client - self._request_headers['X-Twitter-Client-URL'] = url - self._request_headers['X-Twitter-Client-Version'] = version - - def SetSource(self, source): - '''Suggest the "from source" value to be displayed on the Twitter web site. - - The value of the 'source' parameter must be first recognized by - the Twitter server. New source values are authorized on a case by - case basis by the Twitter development team. - - Args: - source: - The source name as a string. Will be sent to the server as - the 'source' parameter. - ''' - self._default_params['source'] = source - - def GetRateLimitStatus(self, resources=None): - '''Fetch the rate limit status for the currently authorized user. - - Args: - resources: - A comma seperated list of resource families you want to know the current - rate limit disposition of. - [Optional] - - Returns: - A dictionary containing the time the limit will reset (reset_time), - the number of remaining hits allowed before the reset (remaining_hits), - the number of hits allowed in a 60-minute period (hourly_limit), and - the time of the reset in seconds since The Epoch (reset_time_in_seconds). - ''' - parameters = {} - if resources is not None: - parameters['resources'] = resources - - url = '%s/application/rate_limit_status.json' % self.base_url - json = self._FetchUrl(url, parameters=parameters, no_cache=True) - data = self._ParseAndCheckTwitter(json) - return data - - def MaximumHitFrequency(self): - '''Determines the minimum number of seconds that a program must wait - before hitting the server again without exceeding the rate_limit - imposed for the currently authenticated user. - - Returns: - The minimum second interval that a program must use so as to not - exceed the rate_limit imposed for the user. - ''' - rate_status = self.GetRateLimitStatus() - reset_time = rate_status.get('reset_time', None) - limit = rate_status.get('remaining_hits', None) - - if reset_time: - # put the reset time into a datetime object - reset = datetime.datetime(*rfc822.parsedate(reset_time)[:7]) - - # find the difference in time between now and the reset time + 1 hour - delta = reset + datetime.timedelta(hours=1) - datetime.datetime.utcnow() - - if not limit: - return int(delta.seconds) - - # determine the minimum number of seconds allowed as a regular interval - max_frequency = int(delta.seconds / limit) + 1 - - # return the number of seconds - return max_frequency - - return 60 - - def _BuildUrl(self, url, path_elements=None, extra_params=None): - # Break url into constituent parts - (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url) - - # Add any additional path elements to the path - if path_elements: - # Filter out the path elements that have a value of None - p = [i for i in path_elements if i] - if not path.endswith('/'): - path += '/' - path += '/'.join(p) - - # Add any additional query parameters to the query string - if extra_params and len(extra_params) > 0: - extra_query = self._EncodeParameters(extra_params) - # Add it to the existing query - if query: - query += '&' + extra_query - else: - query = extra_query - - # Return the rebuilt URL - return urlparse.urlunparse((scheme, netloc, path, params, query, fragment)) - - def _InitializeRequestHeaders(self, request_headers): - if request_headers: - self._request_headers = request_headers - else: - self._request_headers = {} - - def _InitializeUserAgent(self): - user_agent = 'Python-urllib/%s (python-twitter/%s)' % \ - (self._urllib.__version__, __version__) - self.SetUserAgent(user_agent) - - def _InitializeDefaultParameters(self): - self._default_params = {} - - def _DecompressGzippedResponse(self, response): - raw_data = response.read() - if response.headers.get('content-encoding', None) == 'gzip': - url_data = gzip.GzipFile(fileobj=StringIO.StringIO(raw_data)).read() - else: - url_data = raw_data - return url_data - - def _Encode(self, s): - if self._input_encoding: - return unicode(s, self._input_encoding).encode('utf-8') - else: - return unicode(s).encode('utf-8') - - def _EncodeParameters(self, parameters): - '''Return a string in key=value&key=value form - - Values of None are not included in the output string. - - Args: - parameters: - A dict of (key, value) tuples, where value is encoded as - specified by self._encoding - - Returns: - A URL-encoded string in "key=value&key=value" form - ''' - if parameters is None: - return None - else: - return urllib.urlencode(dict([(k, self._Encode(v)) for k, v in parameters.items() if v is not None])) - - def _EncodePostData(self, post_data): - '''Return a string in key=value&key=value form - - Values are assumed to be encoded in the format specified by self._encoding, - and are subsequently URL encoded. - - Args: - post_data: - A dict of (key, value) tuples, where value is encoded as - specified by self._encoding - - Returns: - A URL-encoded string in "key=value&key=value" form - ''' - if post_data is None: - return None - else: - return urllib.urlencode(dict([(k, self._Encode(v)) for k, v in post_data.items()])) - - def _ParseAndCheckTwitter(self, json): - """Try and parse the JSON returned from Twitter and return - an empty dictionary if there is any error. This is a purely - defensive check because during some Twitter network outages - it will return an HTML failwhale page.""" - try: - data = simplejson.loads(json) - self._CheckForTwitterError(data) - except ValueError: - if "Twitter / Over capacity" in json: - raise TwitterError("Capacity Error") - if "Twitter / Error" in json: - raise TwitterError("Technical Error") - raise TwitterError("json decoding") - - return data - - def _CheckForTwitterError(self, data): - """Raises a TwitterError if twitter returns an error message. - - Args: - data: - A python dict created from the Twitter json response - - Raises: - TwitterError wrapping the twitter error message if one exists. - """ - # Twitter errors are relatively unlikely, so it is faster - # to check first, rather than try and catch the exception - if 'error' in data: - raise TwitterError(data['error']) - if 'errors' in data: - raise TwitterError(data['errors']) - - def _FetchUrl(self, - url, - post_data=None, - parameters=None, - no_cache=None, - use_gzip_compression=None): - '''Fetch a URL, optionally caching for a specified time. - - Args: - url: - The URL to retrieve - post_data: - A dict of (str, unicode) key/value pairs. - If set, POST will be used. - parameters: - A dict whose key/value pairs should encoded and added - to the query string. [Optional] - no_cache: - If true, overrides the cache on the current request - use_gzip_compression: - If True, tells the server to gzip-compress the response. - It does not apply to POST requests. - Defaults to None, which will get the value to use from - the instance variable self._use_gzip [Optional] - - Returns: - A string containing the body of the response. - ''' - # Build the extra parameters dict - extra_params = {} - if self._default_params: - extra_params.update(self._default_params) - if parameters: - extra_params.update(parameters) - - if post_data: - http_method = "POST" - else: - http_method = "GET" - - if self._debugHTTP: - _debug = 1 - else: - _debug = 0 - - http_handler = self._urllib.HTTPHandler(debuglevel=_debug) - https_handler = self._urllib.HTTPSHandler(debuglevel=_debug) - http_proxy = os.environ.get('http_proxy') - https_proxy = os.environ.get('https_proxy') - - if http_proxy is None or https_proxy is None : - proxy_status = False - else : - proxy_status = True - - opener = self._urllib.OpenerDirector() - opener.add_handler(http_handler) - opener.add_handler(https_handler) - - if proxy_status is True : - proxy_handler = self._urllib.ProxyHandler({'http':str(http_proxy),'https': str(https_proxy)}) - opener.add_handler(proxy_handler) - - if use_gzip_compression is None: - use_gzip = self._use_gzip - else: - use_gzip = use_gzip_compression - - # Set up compression - if use_gzip and not post_data: - opener.addheaders.append(('Accept-Encoding', 'gzip')) - - if self._oauth_consumer is not None: - if post_data and http_method == "POST": - parameters = post_data.copy() - - req = oauth.Request.from_consumer_and_token(self._oauth_consumer, - token=self._oauth_token, - http_method=http_method, - http_url=url, parameters=parameters) - - req.sign_request(self._signature_method_hmac_sha1, self._oauth_consumer, self._oauth_token) - - headers = req.to_header() - - if http_method == "POST": - encoded_post_data = req.to_postdata() - else: - encoded_post_data = None - url = req.to_url() - else: - url = self._BuildUrl(url, extra_params=extra_params) - encoded_post_data = self._EncodePostData(post_data) - - # Open and return the URL immediately if we're not going to cache - if encoded_post_data or no_cache or not self._cache or not self._cache_timeout: - response = opener.open(url, encoded_post_data) - url_data = self._DecompressGzippedResponse(response) - opener.close() - else: - # Unique keys are a combination of the url and the oAuth Consumer Key - if self._consumer_key: - key = self._consumer_key + ':' + url - else: - key = url - - # See if it has been cached before - last_cached = self._cache.GetCachedTime(key) - - # If the cached version is outdated then fetch another and store it - if not last_cached or time.time() >= last_cached + self._cache_timeout: - try: - response = opener.open(url, encoded_post_data) - url_data = self._DecompressGzippedResponse(response) - self._cache.Set(key, url_data) - except urllib2.HTTPError, e: - print e - opener.close() - else: - url_data = self._cache.Get(key) - - # Always return the latest version - return url_data - -class _FileCacheError(Exception): - '''Base exception class for FileCache related errors''' - -class _FileCache(object): - - DEPTH = 3 - - def __init__(self,root_directory=None): - self._InitializeRootDirectory(root_directory) - - def Get(self,key): - path = self._GetPath(key) - if os.path.exists(path): - return open(path).read() - else: - return None - - def Set(self,key,data): - path = self._GetPath(key) - directory = os.path.dirname(path) - if not os.path.exists(directory): - os.makedirs(directory) - if not os.path.isdir(directory): - raise _FileCacheError('%s exists but is not a directory' % directory) - temp_fd, temp_path = tempfile.mkstemp() - temp_fp = os.fdopen(temp_fd, 'w') - temp_fp.write(data) - temp_fp.close() - if not path.startswith(self._root_directory): - raise _FileCacheError('%s does not appear to live under %s' % - (path, self._root_directory)) - if os.path.exists(path): - os.remove(path) - os.rename(temp_path, path) - - def Remove(self,key): - path = self._GetPath(key) - if not path.startswith(self._root_directory): - raise _FileCacheError('%s does not appear to live under %s' % - (path, self._root_directory )) - if os.path.exists(path): - os.remove(path) - - def GetCachedTime(self,key): - path = self._GetPath(key) - if os.path.exists(path): - return os.path.getmtime(path) - else: - return None - - def _GetUsername(self): - '''Attempt to find the username in a cross-platform fashion.''' - try: - return os.getenv('USER') or \ - os.getenv('LOGNAME') or \ - os.getenv('USERNAME') or \ - os.getlogin() or \ - 'nobody' - except (AttributeError, IOError, OSError), e: - return 'nobody' - - def _GetTmpCachePath(self): - username = self._GetUsername() - cache_directory = 'python.cache_' + username - return os.path.join(tempfile.gettempdir(), cache_directory) - - def _InitializeRootDirectory(self, root_directory): - if not root_directory: - root_directory = self._GetTmpCachePath() - root_directory = os.path.abspath(root_directory) - if not os.path.exists(root_directory): - os.mkdir(root_directory) - if not os.path.isdir(root_directory): - raise _FileCacheError('%s exists but is not a directory' % - root_directory) - self._root_directory = root_directory - - def _GetPath(self,key): - try: - hashed_key = md5(key).hexdigest() - except TypeError: - hashed_key = md5.new(key).hexdigest() - - return os.path.join(self._root_directory, - self._GetPrefix(hashed_key), - hashed_key) - - def _GetPrefix(self,hashed_key): - return os.path.sep.join(hashed_key[0:_FileCache.DEPTH]) diff --git a/lib/pytz/__init__.py b/lib/pytz/__init__.py index e5cbe56d..0a4e3a00 100644 --- a/lib/pytz/__init__.py +++ b/lib/pytz/__init__.py @@ -40,13 +40,13 @@ from pytz.tzfile import build_tzinfo, _byte_string try: - unicode + str except NameError: # Python 3.x # Python 3.x doesn't have unicode(), making writing code # for Python 2.3 and Python 3.x a pain. - unicode = str + str = str def ascii(s): r""" diff --git a/lib/pytz/lazy.py b/lib/pytz/lazy.py index f7fc597c..4d58a1fa 100644 --- a/lib/pytz/lazy.py +++ b/lib/pytz/lazy.py @@ -61,7 +61,7 @@ class LazyDict(DictMixin): self._fill() finally: _fill_lock.release() - return self.data.keys() + return list(self.data.keys()) class LazyList(list): diff --git a/lib/pytz/tzfile.py b/lib/pytz/tzfile.py index 9c007c80..5bae9f3f 100644 --- a/lib/pytz/tzfile.py +++ b/lib/pytz/tzfile.py @@ -4,7 +4,7 @@ $Id: tzfile.py,v 1.8 2004/06/03 00:15:24 zenzen Exp $ ''' try: - from cStringIO import StringIO + from io import StringIO except ImportError: from io import StringIO from datetime import datetime, timedelta diff --git a/lib/pytz/tzinfo.py b/lib/pytz/tzinfo.py index 1318872d..c7d4b72e 100644 --- a/lib/pytz/tzinfo.py +++ b/lib/pytz/tzinfo.py @@ -548,7 +548,7 @@ def unpickler(zone, utcoffset=None, dstoffset=None, tzname=None): # See if we can find an entry differing only by tzname. Abbreviations # get changed from the initial guess by the database maintainers to # match reality when this information is discovered. - for localized_tz in tz._tzinfos.values(): + for localized_tz in list(tz._tzinfos.values()): if (localized_tz._utcoffset == utcoffset and localized_tz._dst == dstoffset): return localized_tz diff --git a/lib/qbittorrentv2/client.py b/lib/qbittorrentv2/client.py index 4131d293..38c5d17c 100755 --- a/lib/qbittorrentv2/client.py +++ b/lib/qbittorrentv2/client.py @@ -185,7 +185,7 @@ class Client(object): For example: qb.torrents(filter='downloading', sort='ratio'). """ params = {} - for name, value in filters.items(): + for name, value in list(filters.items()): # make sure that old 'status' argument still works name = 'filter' if name == 'status' else name params[name] = value @@ -345,7 +345,7 @@ class Client(object): # convert old option names to new option names options = kwargs.copy() - for old_arg, new_arg in old_arg_map.items(): + for old_arg, new_arg in list(old_arg_map.items()): if options.get(old_arg) and not options.get(new_arg): options[new_arg] = options[old_arg] diff --git a/lib/requests/__init__.py b/lib/requests/__init__.py old mode 100755 new mode 100644 index d2471284..53a5b42a --- a/lib/requests/__init__.py +++ b/lib/requests/__init__.py @@ -6,72 +6,147 @@ # / """ -Requests HTTP library +Requests HTTP Library ~~~~~~~~~~~~~~~~~~~~~ -Requests is an HTTP library, written in Python, for human beings. Basic GET -usage: +Requests is an HTTP library, written in Python, for human beings. +Basic GET usage: >>> import requests >>> r = requests.get('https://www.python.org') >>> r.status_code 200 - >>> 'Python is a programming language' in r.content + >>> b'Python is a programming language' in r.content True ... or POST: >>> payload = dict(key1='value1', key2='value2') - >>> r = requests.post('http://httpbin.org/post', data=payload) + >>> r = requests.post('https://httpbin.org/post', data=payload) >>> print(r.text) { ... "form": { - "key2": "value2", - "key1": "value1" + "key1": "value1", + "key2": "value2" }, ... } The other HTTP methods are supported - see `requests.api`. Full documentation -is at . +is at . -:copyright: (c) 2015 by Kenneth Reitz. +:copyright: (c) 2017 by Kenneth Reitz. :license: Apache 2.0, see LICENSE for more details. - """ -__title__ = 'requests' -__version__ = '2.7.0' -__build__ = 0x020700 -__author__ = 'Kenneth Reitz' -__license__ = 'Apache 2.0' -__copyright__ = 'Copyright 2015 Kenneth Reitz' +import urllib3 +import warnings +from .exceptions import RequestsDependencyWarning -# Attempt to enable urllib3's SNI support, if possible try: - from .packages.urllib3.contrib import pyopenssl - pyopenssl.inject_into_urllib3() + from charset_normalizer import __version__ as charset_normalizer_version +except ImportError: + charset_normalizer_version = None + +try: + from chardet import __version__ as chardet_version +except ImportError: + chardet_version = None + +def check_compatibility(urllib3_version, chardet_version, charset_normalizer_version): + urllib3_version = urllib3_version.split('.') + assert urllib3_version != ['dev'] # Verify urllib3 isn't installed from git. + + # Sometimes, urllib3 only reports its version as 16.1. + if len(urllib3_version) == 2: + urllib3_version.append('0') + + # Check urllib3 for compatibility. + major, minor, patch = urllib3_version # noqa: F811 + major, minor, patch = int(major), int(minor), int(patch) + # urllib3 >= 1.21.1, <= 1.26 + assert major == 1 + assert minor >= 21 + assert minor <= 26 + + # Check charset_normalizer for compatibility. + if chardet_version: + major, minor, patch = chardet_version.split('.')[:3] + major, minor, patch = int(major), int(minor), int(patch) + # chardet_version >= 3.0.2, < 5.0.0 + assert (3, 0, 2) <= (major, minor, patch) < (5, 0, 0) + elif charset_normalizer_version: + major, minor, patch = charset_normalizer_version.split('.')[:3] + major, minor, patch = int(major), int(minor), int(patch) + # charset_normalizer >= 2.0.0 < 3.0.0 + assert (2, 0, 0) <= (major, minor, patch) < (3, 0, 0) + else: + raise Exception("You need either charset_normalizer or chardet installed") + +def _check_cryptography(cryptography_version): + # cryptography < 1.3.4 + try: + cryptography_version = list(map(int, cryptography_version.split('.'))) + except ValueError: + return + + if cryptography_version < [1, 3, 4]: + warning = 'Old version of cryptography ({}) may cause slowdown.'.format(cryptography_version) + warnings.warn(warning, RequestsDependencyWarning) + +# Check imported dependencies for compatibility. +try: + check_compatibility(urllib3.__version__, chardet_version, charset_normalizer_version) +except (AssertionError, ValueError): + warnings.warn("urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported " + "version!".format(urllib3.__version__, chardet_version, charset_normalizer_version), + RequestsDependencyWarning) + +# Attempt to enable urllib3's fallback for SNI support +# if the standard library doesn't support SNI or the +# 'ssl' library isn't available. +try: + try: + import ssl + except ImportError: + ssl = None + + if not getattr(ssl, "HAS_SNI", False): + from urllib3.contrib import pyopenssl + pyopenssl.inject_into_urllib3() + + # Check cryptography version + from cryptography import __version__ as cryptography_version + _check_cryptography(cryptography_version) except ImportError: pass +# urllib3's DependencyWarnings should be silenced. +from urllib3.exceptions import DependencyWarning +warnings.simplefilter('ignore', DependencyWarning) + +from .__version__ import __title__, __description__, __url__, __version__ +from .__version__ import __build__, __author__, __author_email__, __license__ +from .__version__ import __copyright__, __cake__ + from . import utils +from . import packages from .models import Request, Response, PreparedRequest from .api import request, get, head, post, patch, put, delete, options from .sessions import session, Session from .status_codes import codes from .exceptions import ( RequestException, Timeout, URLRequired, - TooManyRedirects, HTTPError, ConnectionError + TooManyRedirects, HTTPError, ConnectionError, + FileModeWarning, ConnectTimeout, ReadTimeout, JSONDecodeError ) # Set default logging handler to avoid "No handler found" warnings. import logging -try: # Python 2.7+ - from logging import NullHandler -except ImportError: - class NullHandler(logging.Handler): - def emit(self, record): - pass +from logging import NullHandler logging.getLogger(__name__).addHandler(NullHandler()) + +# FileModeWarnings go off per the default. +warnings.simplefilter('default', FileModeWarning, append=True) diff --git a/lib/requests/__version__.py b/lib/requests/__version__.py new file mode 100644 index 00000000..e973b03b --- /dev/null +++ b/lib/requests/__version__.py @@ -0,0 +1,14 @@ +# .-. .-. .-. . . .-. .-. .-. .-. +# |( |- |.| | | |- `-. | `-. +# ' ' `-' `-`.`-' `-' `-' ' `-' + +__title__ = 'requests' +__description__ = 'Python HTTP for Humans.' +__url__ = 'https://requests.readthedocs.io' +__version__ = '2.27.1' +__build__ = 0x022701 +__author__ = 'Kenneth Reitz' +__author_email__ = 'me@kennethreitz.org' +__license__ = 'Apache 2.0' +__copyright__ = 'Copyright 2022 Kenneth Reitz' +__cake__ = u'\u2728 \U0001f370 \u2728' diff --git a/lib/requests/_internal_utils.py b/lib/requests/_internal_utils.py new file mode 100644 index 00000000..759d9a56 --- /dev/null +++ b/lib/requests/_internal_utils.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +""" +requests._internal_utils +~~~~~~~~~~~~~~ + +Provides utility functions that are consumed internally by Requests +which depend on extremely few external helpers (such as compat) +""" + +from .compat import is_py2, builtin_str, str + + +def to_native_string(string, encoding='ascii'): + """Given a string object, regardless of type, returns a representation of + that string in the native string type, encoding and decoding where + necessary. This assumes ASCII unless told otherwise. + """ + if isinstance(string, builtin_str): + out = string + else: + if is_py2: + out = string.encode(encoding) + else: + out = string.decode(encoding) + + return out + + +def unicode_is_ascii(u_string): + """Determine if unicode string only contains ASCII characters. + + :param str u_string: unicode string to check. Must be unicode + and not Python 2 `str`. + :rtype: bool + """ + assert isinstance(u_string, str) + try: + u_string.encode('ascii') + return True + except UnicodeEncodeError: + return False diff --git a/lib/requests/adapters.py b/lib/requests/adapters.py old mode 100755 new mode 100644 index f911fc57..fe22ff45 --- a/lib/requests/adapters.py +++ b/lib/requests/adapters.py @@ -8,31 +8,45 @@ This module contains the transport adapters that Requests uses to define and maintain connections. """ +import os.path import socket +from urllib3.poolmanager import PoolManager, proxy_from_url +from urllib3.response import HTTPResponse +from urllib3.util import parse_url +from urllib3.util import Timeout as TimeoutSauce +from urllib3.util.retry import Retry +from urllib3.exceptions import ClosedPoolError +from urllib3.exceptions import ConnectTimeoutError +from urllib3.exceptions import HTTPError as _HTTPError +from urllib3.exceptions import InvalidHeader as _InvalidHeader +from urllib3.exceptions import MaxRetryError +from urllib3.exceptions import NewConnectionError +from urllib3.exceptions import ProxyError as _ProxyError +from urllib3.exceptions import ProtocolError +from urllib3.exceptions import ReadTimeoutError +from urllib3.exceptions import SSLError as _SSLError +from urllib3.exceptions import ResponseError +from urllib3.exceptions import LocationValueError + from .models import Response -from .packages.urllib3.poolmanager import PoolManager, proxy_from_url -from .packages.urllib3.response import HTTPResponse -from .packages.urllib3.util import Timeout as TimeoutSauce -from .packages.urllib3.util.retry import Retry from .compat import urlparse, basestring -from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers, - prepend_scheme_if_needed, get_auth_from_url, urldefragauth, - select_proxy) +from .utils import (DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths, + get_encoding_from_headers, prepend_scheme_if_needed, + get_auth_from_url, urldefragauth, select_proxy) from .structures import CaseInsensitiveDict -from .packages.urllib3.exceptions import ConnectTimeoutError -from .packages.urllib3.exceptions import HTTPError as _HTTPError -from .packages.urllib3.exceptions import MaxRetryError -from .packages.urllib3.exceptions import ProxyError as _ProxyError -from .packages.urllib3.exceptions import ProtocolError -from .packages.urllib3.exceptions import ReadTimeoutError -from .packages.urllib3.exceptions import SSLError as _SSLError -from .packages.urllib3.exceptions import ResponseError from .cookies import extract_cookies_to_jar from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError, - ProxyError, RetryError) + ProxyError, RetryError, InvalidSchema, InvalidProxyURL, + InvalidURL, InvalidHeader) from .auth import _basic_auth_str +try: + from urllib3.contrib.socks import SOCKSProxyManager +except ImportError: + def SOCKSProxyManager(*args, **kwargs): + raise InvalidSchema("Missing dependencies for SOCKS support.") + DEFAULT_POOLBLOCK = False DEFAULT_POOLSIZE = 10 DEFAULT_RETRIES = 0 @@ -45,10 +59,26 @@ class BaseAdapter(object): def __init__(self): super(BaseAdapter, self).__init__() - def send(self): + def send(self, request, stream=False, timeout=None, verify=True, + cert=None, proxies=None): + """Sends PreparedRequest object. Returns Response object. + + :param request: The :class:`PreparedRequest ` being sent. + :param stream: (optional) Whether to stream the request content. + :param timeout: (optional) How long to wait for the server to send + data before giving up, as a float, or a :ref:`(connect timeout, + read timeout) ` tuple. + :type timeout: float or tuple + :param verify: (optional) Either a boolean, in which case it controls whether we verify + the server's TLS certificate, or a string, in which case it must be a path + to a CA bundle to use + :param cert: (optional) Any user-provided SSL certificate to be trusted. + :param proxies: (optional) The proxies dictionary to apply to the request. + """ raise NotImplementedError def close(self): + """Cleans up adapter specific items.""" raise NotImplementedError @@ -62,7 +92,7 @@ class HTTPAdapter(BaseAdapter): :param pool_connections: The number of urllib3 connection pools to cache. :param pool_maxsize: The maximum number of connections to save in the pool. - :param int max_retries: The maximum number of retries each connection + :param max_retries: The maximum number of retries each connection should attempt. Note, this applies only to failed DNS lookups, socket connections and connection timeouts, never to requests where data has made it to the server. By default, Requests does not retry failed @@ -100,12 +130,11 @@ class HTTPAdapter(BaseAdapter): self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block) def __getstate__(self): - return dict((attr, getattr(self, attr, None)) for attr in - self.__attrs__) + return {attr: getattr(self, attr, None) for attr in self.__attrs__} def __setstate__(self, state): # Can't handle by adding 'proxy_manager' to self.__attrs__ because - # because self.poolmanager uses a lambda function, which isn't pickleable. + # self.poolmanager uses a lambda function, which isn't pickleable. self.proxy_manager = {} self.config = {} @@ -145,10 +174,24 @@ class HTTPAdapter(BaseAdapter): :param proxy: The proxy to return a urllib3 ProxyManager for. :param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager. :returns: ProxyManager + :rtype: urllib3.ProxyManager """ - if not proxy in self.proxy_manager: + if proxy in self.proxy_manager: + manager = self.proxy_manager[proxy] + elif proxy.lower().startswith('socks'): + username, password = get_auth_from_url(proxy) + manager = self.proxy_manager[proxy] = SOCKSProxyManager( + proxy, + username=username, + password=password, + num_pools=self._pool_connections, + maxsize=self._pool_maxsize, + block=self._pool_block, + **proxy_kwargs + ) + else: proxy_headers = self.proxy_headers(proxy) - self.proxy_manager[proxy] = proxy_from_url( + manager = self.proxy_manager[proxy] = proxy_from_url( proxy, proxy_headers=proxy_headers, num_pools=self._pool_connections, @@ -156,7 +199,7 @@ class HTTPAdapter(BaseAdapter): block=self._pool_block, **proxy_kwargs) - return self.proxy_manager[proxy] + return manager def cert_verify(self, conn, url, verify, cert): """Verify a SSL certificate. This method should not be called from user @@ -165,7 +208,9 @@ class HTTPAdapter(BaseAdapter): :param conn: The urllib3 connection object associated with the cert. :param url: The requested URL. - :param verify: Whether we should actually verify the certificate. + :param verify: Either a boolean, in which case it controls whether we verify + the server's TLS certificate, or a string, in which case it must be a path + to a CA bundle to use :param cert: The SSL certificate to verify. """ if url.lower().startswith('https') and verify: @@ -177,16 +222,22 @@ class HTTPAdapter(BaseAdapter): cert_loc = verify if not cert_loc: - cert_loc = DEFAULT_CA_BUNDLE_PATH + cert_loc = extract_zipped_paths(DEFAULT_CA_BUNDLE_PATH) - if not cert_loc: - raise Exception("Could not find a suitable SSL CA certificate bundle.") + if not cert_loc or not os.path.exists(cert_loc): + raise IOError("Could not find a suitable TLS CA certificate bundle, " + "invalid path: {}".format(cert_loc)) conn.cert_reqs = 'CERT_REQUIRED' - conn.ca_certs = cert_loc + + if not os.path.isdir(cert_loc): + conn.ca_certs = cert_loc + else: + conn.ca_cert_dir = cert_loc else: conn.cert_reqs = 'CERT_NONE' conn.ca_certs = None + conn.ca_cert_dir = None if cert: if not isinstance(cert, basestring): @@ -194,6 +245,13 @@ class HTTPAdapter(BaseAdapter): conn.key_file = cert[1] else: conn.cert_file = cert + conn.key_file = None + if conn.cert_file and not os.path.exists(conn.cert_file): + raise IOError("Could not find the TLS certificate file, " + "invalid path: {}".format(conn.cert_file)) + if conn.key_file and not os.path.exists(conn.key_file): + raise IOError("Could not find the TLS key file, " + "invalid path: {}".format(conn.key_file)) def build_response(self, req, resp): """Builds a :class:`Response ` object from a urllib3 @@ -203,6 +261,7 @@ class HTTPAdapter(BaseAdapter): :param req: The :class:`PreparedRequest ` used to generate the response. :param resp: The urllib3 response object. + :rtype: requests.Response """ response = Response() @@ -238,11 +297,16 @@ class HTTPAdapter(BaseAdapter): :param url: The URL to connect to. :param proxies: (optional) A Requests-style dictionary of proxies used on this request. + :rtype: urllib3.ConnectionPool """ proxy = select_proxy(url, proxies) if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') + proxy_url = parse_url(proxy) + if not proxy_url.host: + raise InvalidProxyURL("Please check proxy URL. It is malformed" + " and could be missing the host.") proxy_manager = self.proxy_manager_for(proxy) conn = proxy_manager.connection_from_url(url) else: @@ -256,10 +320,12 @@ class HTTPAdapter(BaseAdapter): def close(self): """Disposes of any internal state. - Currently, this just closes the PoolManager, which closes pooled - connections. + Currently, this closes the PoolManager and any active ProxyManager, + which closes any pooled connections. """ self.poolmanager.clear() + for proxy in self.proxy_manager.values(): + proxy.clear() def request_url(self, request, proxies): """Obtain the url to use when making the final request. @@ -273,13 +339,20 @@ class HTTPAdapter(BaseAdapter): :param request: The :class:`PreparedRequest ` being sent. :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs. + :rtype: str """ proxy = select_proxy(request.url, proxies) scheme = urlparse(request.url).scheme - if proxy and scheme != 'https': + + is_proxied_http_request = (proxy and scheme != 'https') + using_socks_proxy = False + if proxy: + proxy_scheme = urlparse(proxy).scheme.lower() + using_socks_proxy = proxy_scheme.startswith('socks') + + url = request.path_url + if is_proxied_http_request and not using_socks_proxy: url = urldefragauth(request.url) - else: - url = request.path_url return url @@ -307,12 +380,13 @@ class HTTPAdapter(BaseAdapter): when subclassing the :class:`HTTPAdapter `. - :param proxies: The url of the proxy being used for this request. + :param proxy: The url of the proxy being used for this request. + :rtype: dict """ headers = {} username, password = get_auth_from_url(proxy) - if username and password: + if username: headers['Proxy-Authorization'] = _basic_auth_str(username, password) @@ -326,17 +400,23 @@ class HTTPAdapter(BaseAdapter): :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) ` tuple. - :type timeout: float or tuple - :param verify: (optional) Whether to verify SSL certificates. + :type timeout: float or tuple or urllib3 Timeout object + :param verify: (optional) Either a boolean, in which case it controls whether + we verify the server's TLS certificate, or a string, in which case it + must be a path to a CA bundle to use :param cert: (optional) Any user-provided SSL certificate to be trusted. :param proxies: (optional) The proxies dictionary to apply to the request. + :rtype: requests.Response """ - conn = self.get_connection(request.url, proxies) + try: + conn = self.get_connection(request.url, proxies) + except LocationValueError as e: + raise InvalidURL(e, request=request) self.cert_verify(conn, request.url, verify, cert) url = self.request_url(request, proxies) - self.add_headers(request) + self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies) chunked = not (request.body is None or 'Content-Length' in request.headers) @@ -346,10 +426,12 @@ class HTTPAdapter(BaseAdapter): timeout = TimeoutSauce(connect=connect, read=read) except ValueError as e: # this may raise a string formatting error. - err = ("Invalid timeout {0}. Pass a (connect, read) " + err = ("Invalid timeout {}. Pass a (connect, read) " "timeout tuple, or a single float to set " "both timeouts to the same value".format(timeout)) raise ValueError(err) + elif isinstance(timeout, TimeoutSauce): + pass else: timeout = TimeoutSauce(connect=timeout, read=timeout) @@ -376,9 +458,11 @@ class HTTPAdapter(BaseAdapter): low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) try: + skip_host = 'Host' in request.headers low_conn.putrequest(request.method, url, - skip_accept_encoding=True) + skip_accept_encoding=True, + skip_host=skip_host) for header, value in request.headers.items(): low_conn.putheader(header, value) @@ -392,7 +476,14 @@ class HTTPAdapter(BaseAdapter): low_conn.send(b'\r\n') low_conn.send(b'0\r\n\r\n') - r = low_conn.getresponse() + # Receive the response from the server + try: + # For Python 2.7, use buffering of HTTP responses + r = low_conn.getresponse(buffering=True) + except TypeError: + # For compatibility with Python 3.3+ + r = low_conn.getresponse() + resp = HTTPResponse.from_httplib( r, pool=conn, @@ -411,11 +502,23 @@ class HTTPAdapter(BaseAdapter): except MaxRetryError as e: if isinstance(e.reason, ConnectTimeoutError): - raise ConnectTimeout(e, request=request) + # TODO: Remove this in 3.0.0: see #2811 + if not isinstance(e.reason, NewConnectionError): + raise ConnectTimeout(e, request=request) if isinstance(e.reason, ResponseError): raise RetryError(e, request=request) + if isinstance(e.reason, _ProxyError): + raise ProxyError(e, request=request) + + if isinstance(e.reason, _SSLError): + # This branch is for urllib3 v1.22 and later. + raise SSLError(e, request=request) + + raise ConnectionError(e, request=request) + + except ClosedPoolError as e: raise ConnectionError(e, request=request) except _ProxyError as e: @@ -423,9 +526,12 @@ class HTTPAdapter(BaseAdapter): except (_SSLError, _HTTPError) as e: if isinstance(e, _SSLError): + # This branch is for urllib3 versions earlier than v1.22 raise SSLError(e, request=request) elif isinstance(e, ReadTimeoutError): raise ReadTimeout(e, request=request) + elif isinstance(e, _InvalidHeader): + raise InvalidHeader(e, request=request) else: raise diff --git a/lib/requests/api.py b/lib/requests/api.py old mode 100755 new mode 100644 index 72a777b2..4cba90ee --- a/lib/requests/api.py +++ b/lib/requests/api.py @@ -8,7 +8,6 @@ This module implements the Requests API. :copyright: (c) 2012 by Kenneth Reitz. :license: Apache2, see LICENSE for more details. - """ from . import sessions @@ -17,23 +16,31 @@ from . import sessions def request(method, url, **kwargs): """Constructs and sends a :class:`Request `. - :param method: method for the new :class:`Request` object. + :param method: method for the new :class:`Request` object: ``GET``, ``OPTIONS``, ``HEAD``, ``POST``, ``PUT``, ``PATCH``, or ``DELETE``. :param url: URL for the new :class:`Request` object. - :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. - :param json: (optional) json data to send in the body of the :class:`Request`. + :param params: (optional) Dictionary, list of tuples or bytes to send + in the query string for the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. + :param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. - :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload. + :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload. + ``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')`` + or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string + defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers + to add for the file. :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. - :param timeout: (optional) How long to wait for the server to send data + :param timeout: (optional) How many seconds to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) ` tuple. :type timeout: float or tuple - :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. + :param allow_redirects: (optional) Boolean. Enable/disable GET/OPTIONS/POST/PUT/PATCH/DELETE/HEAD redirection. Defaults to ``True``. :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. - :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. + :param verify: (optional) Either a boolean, in which case it controls whether we verify + the server's TLS certificate, or a string, in which case it must be a path + to a CA bundle to use. Defaults to ``True``. :param stream: (optional) if ``False``, the response content will be immediately downloaded. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. :return: :class:`Response ` object @@ -42,35 +49,34 @@ def request(method, url, **kwargs): Usage:: >>> import requests - >>> req = requests.request('GET', 'http://httpbin.org/get') + >>> req = requests.request('GET', 'https://httpbin.org/get') + >>> req """ - session = sessions.Session() - response = session.request(method=method, url=url, **kwargs) - # By explicitly closing the session, we avoid leaving sockets open which - # can trigger a ResourceWarning in some cases, and look like a memory leak - # in others. - session.close() - return response + # By using the 'with' statement we are sure the session is closed, thus we + # avoid leaving sockets open which can trigger a ResourceWarning in some + # cases, and look like a memory leak in others. + with sessions.Session() as session: + return session.request(method=method, url=url, **kwargs) def get(url, params=None, **kwargs): - """Sends a GET request. + r"""Sends a GET request. :param url: URL for the new :class:`Request` object. - :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. + :param params: (optional) Dictionary, list of tuples or bytes to send + in the query string for the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', True) return request('get', url, params=params, **kwargs) def options(url, **kwargs): - """Sends a OPTIONS request. + r"""Sends an OPTIONS request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. @@ -78,15 +84,16 @@ def options(url, **kwargs): :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', True) return request('options', url, **kwargs) def head(url, **kwargs): - """Sends a HEAD request. + r"""Sends a HEAD request. :param url: URL for the new :class:`Request` object. - :param \*\*kwargs: Optional arguments that ``request`` takes. + :param \*\*kwargs: Optional arguments that ``request`` takes. If + `allow_redirects` is not provided, it will be set to `False` (as + opposed to the default :meth:`request` behavior). :return: :class:`Response ` object :rtype: requests.Response """ @@ -96,10 +103,11 @@ def head(url, **kwargs): def post(url, data=None, json=None, **kwargs): - """Sends a POST request. + r"""Sends a POST request. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object @@ -110,10 +118,12 @@ def post(url, data=None, json=None, **kwargs): def put(url, data=None, **kwargs): - """Sends a PUT request. + r"""Sends a PUT request. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. + :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object :rtype: requests.Response @@ -123,20 +133,22 @@ def put(url, data=None, **kwargs): def patch(url, data=None, **kwargs): - """Sends a PATCH request. + r"""Sends a PATCH request. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. + :param json: (optional) json data to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. :return: :class:`Response ` object :rtype: requests.Response """ - return request('patch', url, data=data, **kwargs) + return request('patch', url, data=data, **kwargs) def delete(url, **kwargs): - """Sends a DELETE request. + r"""Sends a DELETE request. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. diff --git a/lib/requests/auth.py b/lib/requests/auth.py old mode 100755 new mode 100644 index 03c3302a..eeface39 --- a/lib/requests/auth.py +++ b/lib/requests/auth.py @@ -11,13 +11,15 @@ import os import re import time import hashlib +import threading +import warnings from base64 import b64encode -from .compat import urlparse, str +from .compat import urlparse, str, basestring from .cookies import extract_cookies_to_jar -from .utils import parse_dict_header, to_native_string -from .status_codes import codes +from ._internal_utils import to_native_string +from .utils import parse_dict_header CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' CONTENT_TYPE_MULTI_PART = 'multipart/form-data' @@ -26,8 +28,42 @@ CONTENT_TYPE_MULTI_PART = 'multipart/form-data' def _basic_auth_str(username, password): """Returns a Basic Auth string.""" + # "I want us to put a big-ol' comment on top of it that + # says that this behaviour is dumb but we need to preserve + # it because people are relying on it." + # - Lukasa + # + # These are here solely to maintain backwards compatibility + # for things like ints. This will be removed in 3.0.0. + if not isinstance(username, basestring): + warnings.warn( + "Non-string usernames will no longer be supported in Requests " + "3.0.0. Please convert the object you've passed in ({!r}) to " + "a string or bytes object in the near future to avoid " + "problems.".format(username), + category=DeprecationWarning, + ) + username = str(username) + + if not isinstance(password, basestring): + warnings.warn( + "Non-string passwords will no longer be supported in Requests " + "3.0.0. Please convert the object you've passed in ({!r}) to " + "a string or bytes object in the near future to avoid " + "problems.".format(type(password)), + category=DeprecationWarning, + ) + password = str(password) + # -- End Removal -- + + if isinstance(username, str): + username = username.encode('latin1') + + if isinstance(password, str): + password = password.encode('latin1') + authstr = 'Basic ' + to_native_string( - b64encode(('%s:%s' % (username, password)).encode('latin1')).strip() + b64encode(b':'.join((username, password))).strip() ) return authstr @@ -42,10 +78,20 @@ class AuthBase(object): class HTTPBasicAuth(AuthBase): """Attaches HTTP Basic Authentication to the given Request object.""" + def __init__(self, username, password): self.username = username self.password = password + def __eq__(self, other): + return all([ + self.username == getattr(other, 'username', None), + self.password == getattr(other, 'password', None) + ]) + + def __ne__(self, other): + return not self == other + def __call__(self, r): r.headers['Authorization'] = _basic_auth_str(self.username, self.password) return r @@ -53,6 +99,7 @@ class HTTPBasicAuth(AuthBase): class HTTPProxyAuth(HTTPBasicAuth): """Attaches HTTP Proxy Authentication to a given Request object.""" + def __call__(self, r): r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password) return r @@ -60,22 +107,34 @@ class HTTPProxyAuth(HTTPBasicAuth): class HTTPDigestAuth(AuthBase): """Attaches HTTP Digest Authentication to the given Request object.""" + def __init__(self, username, password): self.username = username self.password = password - self.last_nonce = '' - self.nonce_count = 0 - self.chal = {} - self.pos = None - self.num_401_calls = 1 + # Keep state in per-thread local storage + self._thread_local = threading.local() + + def init_per_thread_state(self): + # Ensure state is initialized just once per-thread + if not hasattr(self._thread_local, 'init'): + self._thread_local.init = True + self._thread_local.last_nonce = '' + self._thread_local.nonce_count = 0 + self._thread_local.chal = {} + self._thread_local.pos = None + self._thread_local.num_401_calls = None def build_digest_header(self, method, url): + """ + :rtype: str + """ - realm = self.chal['realm'] - nonce = self.chal['nonce'] - qop = self.chal.get('qop') - algorithm = self.chal.get('algorithm') - opaque = self.chal.get('opaque') + realm = self._thread_local.chal['realm'] + nonce = self._thread_local.chal['nonce'] + qop = self._thread_local.chal.get('qop') + algorithm = self._thread_local.chal.get('algorithm') + opaque = self._thread_local.chal.get('opaque') + hash_utf8 = None if algorithm is None: _algorithm = 'MD5' @@ -94,6 +153,18 @@ class HTTPDigestAuth(AuthBase): x = x.encode('utf-8') return hashlib.sha1(x).hexdigest() hash_utf8 = sha_utf8 + elif _algorithm == 'SHA-256': + def sha256_utf8(x): + if isinstance(x, str): + x = x.encode('utf-8') + return hashlib.sha256(x).hexdigest() + hash_utf8 = sha256_utf8 + elif _algorithm == 'SHA-512': + def sha512_utf8(x): + if isinstance(x, str): + x = x.encode('utf-8') + return hashlib.sha512(x).hexdigest() + hash_utf8 = sha512_utf8 KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) @@ -114,12 +185,12 @@ class HTTPDigestAuth(AuthBase): HA1 = hash_utf8(A1) HA2 = hash_utf8(A2) - if nonce == self.last_nonce: - self.nonce_count += 1 + if nonce == self._thread_local.last_nonce: + self._thread_local.nonce_count += 1 else: - self.nonce_count = 1 - ncvalue = '%08x' % self.nonce_count - s = str(self.nonce_count).encode('utf-8') + self._thread_local.nonce_count = 1 + ncvalue = '%08x' % self._thread_local.nonce_count + s = str(self._thread_local.nonce_count).encode('utf-8') s += nonce.encode('utf-8') s += time.ctime().encode('utf-8') s += os.urandom(8) @@ -128,18 +199,18 @@ class HTTPDigestAuth(AuthBase): if _algorithm == 'MD5-SESS': HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) - if qop is None: + if not qop: respdig = KD(HA1, "%s:%s" % (nonce, HA2)) elif qop == 'auth' or 'auth' in qop.split(','): noncebit = "%s:%s:%s:%s:%s" % ( nonce, ncvalue, cnonce, 'auth', HA2 - ) + ) respdig = KD(HA1, noncebit) else: # XXX handle auth-int. return None - self.last_nonce = nonce + self._thread_local.last_nonce = nonce # XXX should the partial digests be encoded too? base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ @@ -158,23 +229,32 @@ class HTTPDigestAuth(AuthBase): def handle_redirect(self, r, **kwargs): """Reset num_401_calls counter on redirects.""" if r.is_redirect: - self.num_401_calls = 1 + self._thread_local.num_401_calls = 1 def handle_401(self, r, **kwargs): - """Takes the given response and tries digest-auth, if needed.""" + """ + Takes the given response and tries digest-auth, if needed. - if self.pos is not None: + :rtype: requests.Response + """ + + # If response is not 4xx, do not auth + # See https://github.com/psf/requests/issues/3772 + if not 400 <= r.status_code < 500: + self._thread_local.num_401_calls = 1 + return r + + if self._thread_local.pos is not None: # Rewind the file position indicator of the body to where # it was to resend the request. - r.request.body.seek(self.pos) - num_401_calls = getattr(self, 'num_401_calls', 1) + r.request.body.seek(self._thread_local.pos) s_auth = r.headers.get('www-authenticate', '') - if 'digest' in s_auth.lower() and num_401_calls < 2: + if 'digest' in s_auth.lower() and self._thread_local.num_401_calls < 2: - self.num_401_calls += 1 + self._thread_local.num_401_calls += 1 pat = re.compile(r'digest ', flags=re.IGNORECASE) - self.chal = parse_dict_header(pat.sub('', s_auth, count=1)) + self._thread_local.chal = parse_dict_header(pat.sub('', s_auth, count=1)) # Consume content and release the original connection # to allow our new request to reuse the same one. @@ -192,21 +272,34 @@ class HTTPDigestAuth(AuthBase): return _r - self.num_401_calls = 1 + self._thread_local.num_401_calls = 1 return r def __call__(self, r): + # Initialize per-thread state, if needed + self.init_per_thread_state() # If we have a saved nonce, skip the 401 - if self.last_nonce: + if self._thread_local.last_nonce: r.headers['Authorization'] = self.build_digest_header(r.method, r.url) try: - self.pos = r.body.tell() + self._thread_local.pos = r.body.tell() except AttributeError: # In the case of HTTPDigestAuth being reused and the body of # the previous request was a file-like object, pos has the # file position of the previous body. Ensure it's set to # None. - self.pos = None + self._thread_local.pos = None r.register_hook('response', self.handle_401) r.register_hook('response', self.handle_redirect) + self._thread_local.num_401_calls = 1 + return r + + def __eq__(self, other): + return all([ + self.username == getattr(other, 'username', None), + self.password == getattr(other, 'password', None) + ]) + + def __ne__(self, other): + return not self == other diff --git a/lib/requests/certs.py b/lib/requests/certs.py old mode 100755 new mode 100644 index 07e64750..d1a378d7 --- a/lib/requests/certs.py +++ b/lib/requests/certs.py @@ -2,24 +2,17 @@ # -*- coding: utf-8 -*- """ -certs.py -~~~~~~~~ +requests.certs +~~~~~~~~~~~~~~ -This module returns the preferred default CA certificate bundle. +This module returns the preferred default CA certificate bundle. There is +only one — the one from the certifi package. If you are packaging Requests, e.g., for a Linux distribution or a managed environment, you can change the definition of where() to return a separately packaged CA bundle. """ -import os.path - -try: - from certifi import where -except ImportError: - def where(): - """Return the preferred certificate bundle.""" - # vendored bundle inside Requests - return os.path.join(os.path.dirname(__file__), 'cacert.pem') +from certifi import where if __name__ == '__main__': print(where()) diff --git a/lib/requests/compat.py b/lib/requests/compat.py old mode 100755 new mode 100644 index 70edff78..029ae62a --- a/lib/requests/compat.py +++ b/lib/requests/compat.py @@ -1,10 +1,17 @@ # -*- coding: utf-8 -*- """ -pythoncompat +requests.compat +~~~~~~~~~~~~~~~ + +This module handles import compatibility issues between Python 2 and +Python 3. """ -from .packages import chardet +try: + import chardet +except ImportError: + import charset_normalizer as chardet import sys @@ -21,11 +28,11 @@ is_py2 = (_ver[0] == 2) #: Python 3.x? is_py3 = (_ver[0] == 3) +has_simplejson = False try: import simplejson as json -except (ImportError, SyntaxError): - # simplejson does not support Python 3.2, it throws a SyntaxError - # because of u'...' Unicode literals. + has_simplejson = True +except ImportError: import json # --------- @@ -33,30 +40,42 @@ except (ImportError, SyntaxError): # --------- if is_py2: - from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass + from urllib import ( + quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, + proxy_bypass, proxy_bypass_environment, getproxies_environment) from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag from urllib2 import parse_http_list import cookielib from Cookie import Morsel from StringIO import StringIO - from .packages.urllib3.packages.ordered_dict import OrderedDict + # Keep OrderedDict for backwards compatibility. + from collections import Callable, Mapping, MutableMapping, OrderedDict builtin_str = str bytes = str str = unicode basestring = basestring numeric_types = (int, long, float) + integer_types = (int, long) + JSONDecodeError = ValueError elif is_py3: from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag - from urllib.request import parse_http_list, getproxies, proxy_bypass + from urllib.request import parse_http_list, getproxies, proxy_bypass, proxy_bypass_environment, getproxies_environment from http import cookiejar as cookielib from http.cookies import Morsel from io import StringIO + # Keep OrderedDict for backwards compatibility. from collections import OrderedDict + from collections.abc import Callable, Mapping, MutableMapping + if has_simplejson: + from simplejson import JSONDecodeError + else: + from json import JSONDecodeError builtin_str = str str = str bytes = bytes basestring = (str, bytes) numeric_types = (int, float) + integer_types = (int,) diff --git a/lib/requests/cookies.py b/lib/requests/cookies.py old mode 100755 new mode 100644 index 88b478c7..56fccd9c --- a/lib/requests/cookies.py +++ b/lib/requests/cookies.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- """ +requests.cookies +~~~~~~~~~~~~~~~~ + Compatibility code to be able to use `cookielib.CookieJar` with requests. requests.utils imports from here, so be careful with imports. @@ -8,13 +11,13 @@ requests.utils imports from here, so be careful with imports. import copy import time -import collections -from .compat import cookielib, urlparse, urlunparse, Morsel +import calendar + +from ._internal_utils import to_native_string +from .compat import cookielib, urlparse, urlunparse, Morsel, MutableMapping try: import threading - # grr, pyflakes: this fixes "redefinition of unused 'threading'" - threading except ImportError: import dummy_threading as threading @@ -51,7 +54,7 @@ class MockRequest(object): if not self._r.headers.get('Host'): return self._r.url # If they did set it, retrieve it and reconstruct the expected domain - host = self._r.headers['Host'] + host = to_native_string(self._r.headers['Host'], encoding='utf-8') parsed = urlparse(self._r.url) # Reconstruct the URL as we expect it return urlunparse([ @@ -130,7 +133,11 @@ def extract_cookies_to_jar(jar, request, response): def get_cookie_header(jar, request): - """Produce an appropriate Cookie header string to be sent with `request`, or None.""" + """ + Produce an appropriate Cookie header string to be sent with `request`, or None. + + :rtype: str + """ r = MockRequest(request) jar.add_cookie_header(r) return r.get_new_headers().get('Cookie') @@ -143,10 +150,13 @@ def remove_cookie_by_name(cookiejar, name, domain=None, path=None): """ clearables = [] for cookie in cookiejar: - if cookie.name == name: - if domain is None or domain == cookie.domain: - if path is None or path == cookie.path: - clearables.append((cookie.domain, cookie.path, cookie.name)) + if cookie.name != name: + continue + if domain is not None and domain != cookie.domain: + continue + if path is not None and path != cookie.path: + continue + clearables.append((cookie.domain, cookie.path, cookie.name)) for domain, path, name in clearables: cookiejar.clear(domain, path, name) @@ -154,10 +164,11 @@ def remove_cookie_by_name(cookiejar, name, domain=None, path=None): class CookieConflictError(RuntimeError): """There are two cookies that meet the criteria specified in the cookie jar. - Use .get and .set and include domain and path args in order to be more specific.""" + Use .get and .set and include domain and path args in order to be more specific. + """ -class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): +class RequestsCookieJar(cookielib.CookieJar, MutableMapping): """Compatibility class; is a cookielib.CookieJar, but exposes a dict interface. @@ -174,12 +185,14 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): .. warning:: dictionary operations that are normally O(1) may be O(n). """ + def get(self, name, default=None, domain=None, path=None): """Dict-like get() that also supports optional domain and path args in order to resolve naming collisions from using one cookie jar over multiple domains. - .. warning:: operation is O(n), not O(1).""" + .. warning:: operation is O(n), not O(1). + """ try: return self._find_no_duplicates(name, domain, path) except KeyError: @@ -188,7 +201,8 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): def set(self, name, value, **kwargs): """Dict-like set() that also supports optional domain and path args in order to resolve naming collisions from using one cookie jar over - multiple domains.""" + multiple domains. + """ # support client code that unsets cookies by assignment of a None value: if value is None: remove_cookie_by_name(self, name, domain=kwargs.get('domain'), path=kwargs.get('path')) @@ -203,37 +217,54 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): def iterkeys(self): """Dict-like iterkeys() that returns an iterator of names of cookies - from the jar. See itervalues() and iteritems().""" + from the jar. + + .. seealso:: itervalues() and iteritems(). + """ for cookie in iter(self): yield cookie.name def keys(self): """Dict-like keys() that returns a list of names of cookies from the - jar. See values() and items().""" + jar. + + .. seealso:: values() and items(). + """ return list(self.iterkeys()) def itervalues(self): """Dict-like itervalues() that returns an iterator of values of cookies - from the jar. See iterkeys() and iteritems().""" + from the jar. + + .. seealso:: iterkeys() and iteritems(). + """ for cookie in iter(self): yield cookie.value def values(self): """Dict-like values() that returns a list of values of cookies from the - jar. See keys() and items().""" + jar. + + .. seealso:: keys() and items(). + """ return list(self.itervalues()) def iteritems(self): """Dict-like iteritems() that returns an iterator of name-value tuples - from the jar. See iterkeys() and itervalues().""" + from the jar. + + .. seealso:: iterkeys() and itervalues(). + """ for cookie in iter(self): yield cookie.name, cookie.value def items(self): """Dict-like items() that returns a list of name-value tuples from the - jar. See keys() and values(). Allows client-code to call - ``dict(RequestsCookieJar)`` and get a vanilla python dict of key value - pairs.""" + jar. Allows client-code to call ``dict(RequestsCookieJar)`` and get a + vanilla python dict of key value pairs. + + .. seealso:: keys() and values(). + """ return list(self.iteritems()) def list_domains(self): @@ -254,7 +285,10 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): def multiple_domains(self): """Returns True if there are multiple domains in the jar. - Returns False otherwise.""" + Returns False otherwise. + + :rtype: bool + """ domains = [] for cookie in iter(self): if cookie.domain is not None and cookie.domain in domains: @@ -265,33 +299,45 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): def get_dict(self, domain=None, path=None): """Takes as an argument an optional domain and path and returns a plain old Python dict of name-value pairs of cookies that meet the - requirements.""" + requirements. + + :rtype: dict + """ dictionary = {} for cookie in iter(self): - if (domain is None or cookie.domain == domain) and (path is None - or cookie.path == path): + if ( + (domain is None or cookie.domain == domain) and + (path is None or cookie.path == path) + ): dictionary[cookie.name] = cookie.value return dictionary + def __contains__(self, name): + try: + return super(RequestsCookieJar, self).__contains__(name) + except CookieConflictError: + return True + def __getitem__(self, name): """Dict-like __getitem__() for compatibility with client code. Throws exception if there are more than one cookie with name. In that case, use the more explicit get() method instead. - .. warning:: operation is O(n), not O(1).""" - + .. warning:: operation is O(n), not O(1). + """ return self._find_no_duplicates(name) def __setitem__(self, name, value): """Dict-like __setitem__ for compatibility with client code. Throws exception if there is already a cookie of that name in the jar. In that - case, use the more explicit set() method instead.""" - + case, use the more explicit set() method instead. + """ self.set(name, value) def __delitem__(self, name): """Deletes a cookie given a name. Wraps ``cookielib.CookieJar``'s - ``remove_cookie_by_name()``.""" + ``remove_cookie_by_name()``. + """ remove_cookie_by_name(self, name) def set_cookie(self, cookie, *args, **kwargs): @@ -308,11 +354,17 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): super(RequestsCookieJar, self).update(other) def _find(self, name, domain=None, path=None): - """Requests uses this method internally to get cookie values. Takes as - args name and optional domain and path. Returns a cookie.value. If - there are conflicting cookies, _find arbitrarily chooses one. See - _find_no_duplicates if you want an exception thrown if there are - conflicting cookies.""" + """Requests uses this method internally to get cookie values. + + If there are conflicting cookies, _find arbitrarily chooses one. + See _find_no_duplicates if you want an exception thrown if there are + conflicting cookies. + + :param name: a string containing name of cookie + :param domain: (optional) string containing domain of cookie + :param path: (optional) string containing path of cookie + :return: cookie.value + """ for cookie in iter(self): if cookie.name == name: if domain is None or cookie.domain == domain: @@ -323,10 +375,16 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): def _find_no_duplicates(self, name, domain=None, path=None): """Both ``__get_item__`` and ``get`` call this function: it's never - used elsewhere in Requests. Takes as args name and optional domain and - path. Returns a cookie.value. Throws KeyError if cookie is not found - and CookieConflictError if there are multiple cookies that match name - and optionally domain and path.""" + used elsewhere in Requests. + + :param name: a string containing name of cookie + :param domain: (optional) string containing domain of cookie + :param path: (optional) string containing path of cookie + :raises KeyError: if cookie is not found + :raises CookieConflictError: if there are multiple cookies + that match name and optionally domain and path + :return: cookie.value + """ toReturn = None for cookie in iter(self): if cookie.name == name: @@ -356,16 +414,21 @@ class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): def copy(self): """Return a copy of this RequestsCookieJar.""" new_cj = RequestsCookieJar() + new_cj.set_policy(self.get_policy()) new_cj.update(self) return new_cj + def get_policy(self): + """Return the CookiePolicy instance used.""" + return self._policy + def _copy_cookie_jar(jar): if jar is None: return None if hasattr(jar, 'copy'): - # We're dealing with an instane of RequestsCookieJar + # We're dealing with an instance of RequestsCookieJar return jar.copy() # We're dealing with a generic CookieJar instance new_jar = copy.copy(jar) @@ -381,20 +444,21 @@ def create_cookie(name, value, **kwargs): By default, the pair of `name` and `value` will be set for the domain '' and sent on every request (this is sometimes called a "supercookie"). """ - result = dict( - version=0, - name=name, - value=value, - port=None, - domain='', - path='/', - secure=False, - expires=None, - discard=True, - comment=None, - comment_url=None, - rest={'HttpOnly': None}, - rfc2109=False,) + result = { + 'version': 0, + 'name': name, + 'value': value, + 'port': None, + 'domain': '', + 'path': '/', + 'secure': False, + 'expires': None, + 'discard': True, + 'comment': None, + 'comment_url': None, + 'rest': {'HttpOnly': None}, + 'rfc2109': False, + } badargs = set(kwargs) - set(result) if badargs: @@ -421,8 +485,9 @@ def morsel_to_cookie(morsel): raise TypeError('max-age: %s must be integer' % morsel['max-age']) elif morsel['expires']: time_template = '%a, %d-%b-%Y %H:%M:%S GMT' - expires = int(time.mktime( - time.strptime(morsel['expires'], time_template)) - time.timezone) + expires = calendar.timegm( + time.strptime(morsel['expires'], time_template) + ) return create_cookie( comment=morsel['comment'], comment_url=bool(morsel['comment']), @@ -447,6 +512,7 @@ def cookiejar_from_dict(cookie_dict, cookiejar=None, overwrite=True): :param cookiejar: (optional) A cookiejar to add the cookies to. :param overwrite: (optional) If False, will not replace cookies already in the jar with new ones. + :rtype: CookieJar """ if cookiejar is None: cookiejar = RequestsCookieJar() @@ -465,6 +531,7 @@ def merge_cookies(cookiejar, cookies): :param cookiejar: CookieJar object to add the cookies to. :param cookies: Dictionary or CookieJar object to be added. + :rtype: CookieJar """ if not isinstance(cookiejar, cookielib.CookieJar): raise ValueError('You can only merge into CookieJar') diff --git a/lib/requests/exceptions.py b/lib/requests/exceptions.py old mode 100755 new mode 100644 index 89135a80..79697635 --- a/lib/requests/exceptions.py +++ b/lib/requests/exceptions.py @@ -5,19 +5,19 @@ requests.exceptions ~~~~~~~~~~~~~~~~~~~ This module contains the set of Requests' exceptions. - """ -from .packages.urllib3.exceptions import HTTPError as BaseHTTPError +from urllib3.exceptions import HTTPError as BaseHTTPError + +from .compat import JSONDecodeError as CompatJSONDecodeError class RequestException(IOError): """There was an ambiguous exception that occurred while handling your - request.""" + request. + """ def __init__(self, *args, **kwargs): - """ - Initialize RequestException with `request` and `response` objects. - """ + """Initialize RequestException with `request` and `response` objects.""" response = kwargs.pop('response', None) self.response = response self.request = kwargs.pop('request', None) @@ -27,6 +27,14 @@ class RequestException(IOError): super(RequestException, self).__init__(*args, **kwargs) +class InvalidJSONError(RequestException): + """A JSON error occurred.""" + + +class JSONDecodeError(InvalidJSONError, CompatJSONDecodeError): + """Couldn't decode the text into json""" + + class HTTPError(RequestException): """An HTTP error occurred.""" @@ -72,15 +80,23 @@ class TooManyRedirects(RequestException): class MissingSchema(RequestException, ValueError): - """The URL schema (e.g. http or https) is missing.""" + """The URL scheme (e.g. http or https) is missing.""" class InvalidSchema(RequestException, ValueError): - """See defaults.py for valid schemas.""" + """The URL scheme provided is either invalid or unsupported.""" class InvalidURL(RequestException, ValueError): - """ The URL provided was somehow invalid. """ + """The URL provided was somehow invalid.""" + + +class InvalidHeader(RequestException, ValueError): + """The header value provided was somehow invalid.""" + + +class InvalidProxyURL(InvalidURL): + """The proxy URL provided is invalid.""" class ChunkedEncodingError(RequestException): @@ -88,12 +104,30 @@ class ChunkedEncodingError(RequestException): class ContentDecodingError(RequestException, BaseHTTPError): - """Failed to decode response content""" + """Failed to decode response content.""" class StreamConsumedError(RequestException, TypeError): - """The content for this response was already consumed""" + """The content for this response was already consumed.""" class RetryError(RequestException): """Custom retries logic failed""" + + +class UnrewindableBodyError(RequestException): + """Requests encountered an error when trying to rewind a body.""" + +# Warnings + + +class RequestsWarning(Warning): + """Base warning for Requests.""" + + +class FileModeWarning(RequestsWarning, DeprecationWarning): + """A file was opened in text mode, but Requests determined its binary length.""" + + +class RequestsDependencyWarning(RequestsWarning): + """An imported dependency doesn't match the expected version range.""" diff --git a/lib/requests/help.py b/lib/requests/help.py new file mode 100644 index 00000000..4cd6389f --- /dev/null +++ b/lib/requests/help.py @@ -0,0 +1,135 @@ +"""Module containing bug report helper(s).""" +from __future__ import print_function + +import json +import platform +import sys +import ssl + +import idna +import urllib3 + +from . import __version__ as requests_version + +try: + import charset_normalizer +except ImportError: + charset_normalizer = None + +try: + import chardet +except ImportError: + chardet = None + +try: + from urllib3.contrib import pyopenssl +except ImportError: + pyopenssl = None + OpenSSL = None + cryptography = None +else: + import OpenSSL + import cryptography + + +def _implementation(): + """Return a dict with the Python implementation and version. + + Provide both the name and the version of the Python implementation + currently running. For example, on CPython 2.7.5 it will return + {'name': 'CPython', 'version': '2.7.5'}. + + This function works best on CPython and PyPy: in particular, it probably + doesn't work for Jython or IronPython. Future investigation should be done + to work out the correct shape of the code for those platforms. + """ + implementation = platform.python_implementation() + + if implementation == 'CPython': + implementation_version = platform.python_version() + elif implementation == 'PyPy': + implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, + sys.pypy_version_info.minor, + sys.pypy_version_info.micro) + if sys.pypy_version_info.releaselevel != 'final': + implementation_version = ''.join([ + implementation_version, sys.pypy_version_info.releaselevel + ]) + elif implementation == 'Jython': + implementation_version = platform.python_version() # Complete Guess + elif implementation == 'IronPython': + implementation_version = platform.python_version() # Complete Guess + else: + implementation_version = 'Unknown' + + return {'name': implementation, 'version': implementation_version} + + +def info(): + """Generate information for a bug report.""" + try: + platform_info = { + 'system': platform.system(), + 'release': platform.release(), + } + except IOError: + platform_info = { + 'system': 'Unknown', + 'release': 'Unknown', + } + + implementation_info = _implementation() + urllib3_info = {'version': urllib3.__version__} + charset_normalizer_info = {'version': None} + chardet_info = {'version': None} + if charset_normalizer: + charset_normalizer_info = {'version': charset_normalizer.__version__} + if chardet: + chardet_info = {'version': chardet.__version__} + + pyopenssl_info = { + 'version': None, + 'openssl_version': '', + } + if OpenSSL: + pyopenssl_info = { + 'version': OpenSSL.__version__, + 'openssl_version': '%x' % OpenSSL.SSL.OPENSSL_VERSION_NUMBER, + } + cryptography_info = { + 'version': getattr(cryptography, '__version__', ''), + } + idna_info = { + 'version': getattr(idna, '__version__', ''), + } + + system_ssl = ssl.OPENSSL_VERSION_NUMBER + system_ssl_info = { + 'version': '%x' % system_ssl if system_ssl is not None else '' + } + + return { + 'platform': platform_info, + 'implementation': implementation_info, + 'system_ssl': system_ssl_info, + 'using_pyopenssl': pyopenssl is not None, + 'using_charset_normalizer': chardet is None, + 'pyOpenSSL': pyopenssl_info, + 'urllib3': urllib3_info, + 'chardet': chardet_info, + 'charset_normalizer': charset_normalizer_info, + 'cryptography': cryptography_info, + 'idna': idna_info, + 'requests': { + 'version': requests_version, + }, + } + + +def main(): + """Pretty-print the bug information as JSON.""" + print(json.dumps(info(), sort_keys=True, indent=2)) + + +if __name__ == '__main__': + main() diff --git a/lib/requests/hooks.py b/lib/requests/hooks.py old mode 100755 new mode 100644 index 5dfaf6b6..7a51f212 --- a/lib/requests/hooks.py +++ b/lib/requests/hooks.py @@ -10,36 +10,25 @@ Available hooks: ``response``: The response generated from a Request. - """ - - HOOKS = ['response'] def default_hooks(): - hooks = {} - for event in HOOKS: - hooks[event] = [] - return hooks + return {event: [] for event in HOOKS} # TODO: response is the only one def dispatch_hook(key, hooks, hook_data, **kwargs): """Dispatches a hook dictionary on a given piece of data.""" - - hooks = hooks or dict() - - if key in hooks: - hooks = hooks.get(key) - + hooks = hooks or {} + hooks = hooks.get(key) + if hooks: if hasattr(hooks, '__call__'): hooks = [hooks] - for hook in hooks: _hook_data = hook(hook_data, **kwargs) if _hook_data is not None: hook_data = _hook_data - return hook_data diff --git a/lib/requests/models.py b/lib/requests/models.py old mode 100755 new mode 100644 index 4270c647..dfbea854 --- a/lib/requests/models.py +++ b/lib/requests/models.py @@ -7,41 +7,51 @@ requests.models This module contains the primary objects that power Requests. """ -import collections import datetime +import sys -from io import BytesIO, UnsupportedOperation +# Import encoding now, to avoid implicit import later. +# Implicit import within threads may cause LookupError when standard library is in a ZIP, +# such as in Embedded Python. See https://github.com/psf/requests/issues/3578. +import encodings.idna + +from urllib3.fields import RequestField +from urllib3.filepost import encode_multipart_formdata +from urllib3.util import parse_url +from urllib3.exceptions import ( + DecodeError, ReadTimeoutError, ProtocolError, LocationParseError) + +from io import UnsupportedOperation from .hooks import default_hooks from .structures import CaseInsensitiveDict from .auth import HTTPBasicAuth from .cookies import cookiejar_from_dict, get_cookie_header, _copy_cookie_jar -from .packages.urllib3.fields import RequestField -from .packages.urllib3.filepost import encode_multipart_formdata -from .packages.urllib3.util import parse_url -from .packages.urllib3.exceptions import ( - DecodeError, ReadTimeoutError, ProtocolError, LocationParseError) from .exceptions import ( HTTPError, MissingSchema, InvalidURL, ChunkedEncodingError, - ContentDecodingError, ConnectionError, StreamConsumedError) + ContentDecodingError, ConnectionError, StreamConsumedError, + InvalidJSONError) +from .exceptions import JSONDecodeError as RequestsJSONDecodeError +from ._internal_utils import to_native_string, unicode_is_ascii from .utils import ( guess_filename, get_auth_from_url, requote_uri, stream_decode_response_unicode, to_key_val_list, parse_header_links, - iter_slices, guess_json_utf, super_len, to_native_string) + iter_slices, guess_json_utf, super_len, check_header_validity) from .compat import ( - cookielib, urlunparse, urlsplit, urlencode, str, bytes, StringIO, - is_py2, chardet, builtin_str, basestring) + Callable, Mapping, + cookielib, urlunparse, urlsplit, urlencode, str, bytes, + is_py2, chardet, builtin_str, basestring, JSONDecodeError) from .compat import json as complexjson from .status_codes import codes #: The set of HTTP status codes that indicate an automatically #: processable redirect. REDIRECT_STATI = ( - codes.moved, # 301 - codes.found, # 302 - codes.other, # 303 - codes.temporary_redirect, # 307 - codes.permanent_redirect, # 308 + codes.moved, # 301 + codes.found, # 302 + codes.other, # 303 + codes.temporary_redirect, # 307 + codes.permanent_redirect, # 308 ) DEFAULT_REDIRECT_LIMIT = 30 @@ -103,9 +113,10 @@ class RequestEncodingMixin(object): """Build the body for a multipart/form-data request. Will successfully encode files when passed as a dict or a list of - 2-tuples. Order is retained if data is a list of 2-tuples but arbitrary + tuples. Order is retained if data is a list of tuples but arbitrary if parameters are supplied as a dict. - + The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype) + or 4-tuples (filename, fileobj, contentype, custom_headers). """ if (not files): raise ValueError("Files must be provided.") @@ -146,8 +157,12 @@ class RequestEncodingMixin(object): if isinstance(fp, (str, bytes, bytearray)): fdata = fp - else: + elif hasattr(fp, 'read'): fdata = fp.read() + elif fp is None: + continue + else: + fdata = fp rf = RequestField(name=k, data=fdata, filename=fn, headers=fh) rf.make_multipart(content_type=ft) @@ -165,10 +180,10 @@ class RequestHooksMixin(object): if event not in self.hooks: raise ValueError('Unsupported event specified, with event name "%s"' % (event)) - if isinstance(hook, collections.Callable): + if isinstance(hook, Callable): self.hooks[event].append(hook) elif hasattr(hook, '__iter__'): - self.hooks[event].extend(h for h in hook if isinstance(h, collections.Callable)) + self.hooks[event].extend(h for h in hook if isinstance(h, Callable)) def deregister_hook(self, event, hook): """Deregister a previously registered hook. @@ -191,9 +206,13 @@ class Request(RequestHooksMixin): :param url: URL to send. :param headers: dictionary of headers to send. :param files: dictionary of {filename: fileobject} files to multipart upload. - :param data: the body to attach to the request. If a dictionary is provided, form-encoding will take place. - :param json: json for the body to attach to the request (if data is not specified). - :param params: dictionary of URL parameters to append to the URL. + :param data: the body to attach to the request. If a dictionary or + list of tuples ``[(key, value)]`` is provided, form-encoding will + take place. + :param json: json for the body to attach to the request (if files or data is not specified). + :param params: URL parameters to append to the URL. If a dictionary or + list of tuples ``[(key, value)]`` is provided, form-encoding will + take place. :param auth: Auth handler or (user, pass) tuple. :param cookies: dictionary or CookieJar of cookies to attach to this request. :param hooks: dictionary of callback hooks, for internal usage. @@ -201,13 +220,14 @@ class Request(RequestHooksMixin): Usage:: >>> import requests - >>> req = requests.Request('GET', 'http://httpbin.org/get') + >>> req = requests.Request('GET', 'https://httpbin.org/get') >>> req.prepare() - """ - def __init__(self, method=None, url=None, headers=None, files=None, - data=None, params=None, auth=None, cookies=None, hooks=None, json=None): + + def __init__(self, + method=None, url=None, headers=None, files=None, data=None, + params=None, auth=None, cookies=None, hooks=None, json=None): # Default empty dicts for dict params. data = [] if data is None else data @@ -255,19 +275,21 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): """The fully mutable :class:`PreparedRequest ` object, containing the exact bytes that will be sent to the server. - Generated from either a :class:`Request ` object or manually. + Instances are generated from a :class:`Request ` object, and + should not be instantiated manually; doing so may produce undesirable + effects. Usage:: >>> import requests - >>> req = requests.Request('GET', 'http://httpbin.org/get') + >>> req = requests.Request('GET', 'https://httpbin.org/get') >>> r = req.prepare() + >>> r >>> s = requests.Session() >>> s.send(r) - """ def __init__(self): @@ -284,9 +306,12 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.body = None #: dictionary of callback hooks, for internal usage. self.hooks = default_hooks() + #: integer denoting starting position of a readable file-like body. + self._body_position = None - def prepare(self, method=None, url=None, headers=None, files=None, - data=None, params=None, auth=None, cookies=None, hooks=None, json=None): + def prepare(self, + method=None, url=None, headers=None, files=None, data=None, + params=None, auth=None, cookies=None, hooks=None, json=None): """Prepares the entire request with the given parameters.""" self.prepare_method(method) @@ -313,26 +338,40 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): p._cookies = _copy_cookie_jar(self._cookies) p.body = self.body p.hooks = self.hooks + p._body_position = self._body_position return p def prepare_method(self, method): """Prepares the given HTTP method.""" self.method = method if self.method is not None: - self.method = self.method.upper() + self.method = to_native_string(self.method.upper()) + + @staticmethod + def _get_idna_encoded_host(host): + import idna + + try: + host = idna.encode(host, uts46=True).decode('utf-8') + except idna.IDNAError: + raise UnicodeError + return host def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. - #: We're unable to blindy call unicode/str functions + #: We're unable to blindly call unicode/str functions #: as this will include the bytestring indicator (b'') #: on python 3.x. - #: https://github.com/kennethreitz/requests/pull/2238 + #: https://github.com/psf/requests/pull/2238 if isinstance(url, bytes): url = url.decode('utf8') else: url = unicode(url) if is_py2 else str(url) + # Remove leading whitespaces from url + url = url.lstrip() + # Don't do any URL preparation for non-HTTP schemes like `mailto`, # `data` etc to work around exceptions from `url_parse`, which # handles RFC 3986 only. @@ -347,7 +386,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): raise InvalidURL(*e.args) if not scheme: - error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?") + error = ("Invalid URL {0!r}: No scheme supplied. Perhaps you meant http://{0}?") error = error.format(to_native_string(url, 'utf8')) raise MissingSchema(error) @@ -355,10 +394,16 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) - # Only want to apply IDNA to the hostname - try: - host = host.encode('idna').decode('utf-8') - except UnicodeError: + # In general, we want to try IDNA encoding the hostname if the string contains + # non-ASCII characters. This allows users to automatically get the correct IDNA + # behaviour. For strings containing only ASCII characters, we need to also verify + # it doesn't start with a wildcard (*), before allowing the unencoded hostname. + if not unicode_is_ascii(host): + try: + host = self._get_idna_encoded_host(host) + except UnicodeError: + raise InvalidURL('URL has an invalid label.') + elif host.startswith((u'*', u'.')): raise InvalidURL('URL has an invalid label.') # Carefully reconstruct the network location @@ -385,6 +430,9 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if isinstance(fragment, str): fragment = fragment.encode('utf-8') + if isinstance(params, (str, bytes)): + params = to_native_string(params) + enc_params = self._encode_params(params) if enc_params: if query: @@ -398,10 +446,13 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): def prepare_headers(self, headers): """Prepares the given HTTP headers.""" + self.headers = CaseInsensitiveDict() if headers: - self.headers = CaseInsensitiveDict((to_native_string(name), value) for name, value in headers.items()) - else: - self.headers = CaseInsensitiveDict() + for header in headers.items(): + # Raise exception on invalid header value. + check_header_validity(header) + name, value = header + self.headers[to_native_string(name)] = value def prepare_body(self, data, files, json=None): """Prepares the given HTTP body data.""" @@ -412,29 +463,48 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): # Nottin' on you. body = None content_type = None - length = None - if json is not None: + if not data and json is not None: + # urllib3 requires a bytes-like body. Python 2's json.dumps + # provides this natively, but Python 3 gives a Unicode string. content_type = 'application/json' - body = complexjson.dumps(json) + + try: + body = complexjson.dumps(json, allow_nan=False) + except ValueError as ve: + raise InvalidJSONError(ve, request=self) + + if not isinstance(body, bytes): + body = body.encode('utf-8') is_stream = all([ hasattr(data, '__iter__'), - not isinstance(data, (basestring, list, tuple, dict)) + not isinstance(data, (basestring, list, tuple, Mapping)) ]) - try: - length = super_len(data) - except (TypeError, AttributeError, UnsupportedOperation): - length = None - if is_stream: + try: + length = super_len(data) + except (TypeError, AttributeError, UnsupportedOperation): + length = None + body = data + if getattr(body, 'tell', None) is not None: + # Record the current file position before reading. + # This will allow us to rewind a file in the event + # of a redirect. + try: + self._body_position = body.tell() + except (IOError, OSError): + # This differentiates from None, allowing us to catch + # a failed `tell()` later when trying to rewind the body + self._body_position = object() + if files: raise NotImplementedError('Streamed bodies and files are mutually exclusive.') - if length is not None: + if length: self.headers['Content-Length'] = builtin_str(length) else: self.headers['Transfer-Encoding'] = 'chunked' @@ -443,7 +513,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): if files: (body, content_type) = self._encode_files(files, data) else: - if data and json is None: + if data: body = self._encode_params(data) if isinstance(data, basestring) or hasattr(data, 'read'): content_type = None @@ -459,15 +529,16 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): self.body = body def prepare_content_length(self, body): - if hasattr(body, 'seek') and hasattr(body, 'tell'): - body.seek(0, 2) - self.headers['Content-Length'] = builtin_str(body.tell()) - body.seek(0, 0) - elif body is not None: - l = super_len(body) - if l: - self.headers['Content-Length'] = builtin_str(l) - elif (self.method not in ('GET', 'HEAD')) and (self.headers.get('Content-Length') is None): + """Prepare Content-Length header based on request method and body""" + if body is not None: + length = super_len(body) + if length: + # If length exists, set it. Otherwise, we fallback + # to Transfer-Encoding: chunked. + self.headers['Content-Length'] = builtin_str(length) + elif self.method not in ('GET', 'HEAD') and self.headers.get('Content-Length') is None: + # Set Content-Length to 0 for methods that can have a body + # but don't provide one. (i.e. not GET or HEAD) self.headers['Content-Length'] = '0' def prepare_auth(self, auth, url=''): @@ -501,8 +572,8 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): can only be called once for the life of the :class:`PreparedRequest ` object. Any subsequent calls to ``prepare_cookies`` will have no actual effect, unless the "Cookie" - header is removed beforehand.""" - + header is removed beforehand. + """ if isinstance(cookies, cookielib.CookieJar): self._cookies = cookies else: @@ -533,10 +604,9 @@ class Response(object): ] def __init__(self): - super(Response, self).__init__() - self._content = False self._content_consumed = False + self._next = None #: Integer Code of responded HTTP Status, e.g. 404 or 200. self.status_code = None @@ -548,7 +618,7 @@ class Response(object): #: File-like object representation of response (for advanced usage). #: Use of ``raw`` requires that ``stream=True`` be set on the request. - # This requirement does not apply for use internally to Requests. + #: This requirement does not apply for use internally to Requests. self.raw = None #: Final URL location of Response. @@ -580,16 +650,19 @@ class Response(object): #: is a response. self.request = None + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + def __getstate__(self): # Consume everything; accessing the content attribute makes # sure the content has been fully read. if not self._content_consumed: self.content - return dict( - (attr, getattr(self, attr, None)) - for attr in self.__attrs__ - ) + return {attr: getattr(self, attr, None) for attr in self.__attrs__} def __setstate__(self, state): for name, value in state.items(): @@ -603,11 +676,23 @@ class Response(object): return '' % (self.status_code) def __bool__(self): - """Returns true if :attr:`status_code` is 'OK'.""" + """Returns True if :attr:`status_code` is less than 400. + + This attribute checks if the status code of the response is between + 400 and 600 to see if there was a client error or a server error. If + the status code, is between 200 and 400, this will return True. This + is **not** a check to see if the response code is ``200 OK``. + """ return self.ok def __nonzero__(self): - """Returns true if :attr:`status_code` is 'OK'.""" + """Returns True if :attr:`status_code` is less than 400. + + This attribute checks if the status code of the response is between + 400 and 600 to see if there was a client error or a server error. If + the status code, is between 200 and 400, this will return True. This + is **not** a check to see if the response code is ``200 OK``. + """ return self.ok def __iter__(self): @@ -616,6 +701,13 @@ class Response(object): @property def ok(self): + """Returns True if :attr:`status_code` is less than 400, False if not. + + This attribute checks if the status code of the response is between + 400 and 600 to see if there was a client error or a server error. If + the status code is between 200 and 400, this will return True. This + is **not** a check to see if the response code is ``200 OK``. + """ try: self.raise_for_status() except HTTPError: @@ -631,12 +723,17 @@ class Response(object): @property def is_permanent_redirect(self): - """True if this Response one of the permanant versions of redirect""" + """True if this Response one of the permanent versions of redirect.""" return ('location' in self.headers and self.status_code in (codes.moved_permanently, codes.permanent_redirect)) + @property + def next(self): + """Returns a PreparedRequest for the next request in a redirect chain, if there is one.""" + return self._next + @property def apparent_encoding(self): - """The apparent encoding, provided by the chardet library""" + """The apparent encoding, provided by the charset_normalizer or chardet libraries.""" return chardet.detect(self.content)['encoding'] def iter_content(self, chunk_size=1, decode_unicode=False): @@ -646,6 +743,12 @@ class Response(object): read into memory. This is not necessarily the length of each item returned as decoding can take place. + chunk_size must be of type int or None. A value of None will + function differently depending on the value of `stream`. + stream=True will read data as it arrives in whatever size the + chunks are received. If stream=False, data is returned as + a single chunk. + If decode_unicode is True, content will be decoded using the best available encoding based on the response. """ @@ -674,6 +777,8 @@ class Response(object): if self._content_consumed and isinstance(self._content, bool): raise StreamConsumedError() + elif chunk_size is not None and not isinstance(chunk_size, int): + raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size)) # simulate reading small chunks of the content reused_chunks = iter_slices(self._content, chunk_size) @@ -686,7 +791,7 @@ class Response(object): return chunks - def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=None, delimiter=None): + def iter_lines(self, chunk_size=ITER_CHUNK_SIZE, decode_unicode=False, delimiter=None): """Iterates over the response data, one line at a time. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. @@ -723,18 +828,14 @@ class Response(object): if self._content is False: # Read the contents. - try: - if self._content_consumed: - raise RuntimeError( - 'The content for this response was already consumed') + if self._content_consumed: + raise RuntimeError( + 'The content for this response was already consumed') - if self.status_code == 0: - self._content = None - else: - self._content = bytes().join(self.iter_content(CONTENT_CHUNK_SIZE)) or bytes() - - except AttributeError: + if self.status_code == 0 or self.raw is None: self._content = None + else: + self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b'' self._content_consumed = True # don't need to release the connection; that's been handled by urllib3 @@ -746,7 +847,7 @@ class Response(object): """Content of the response, in unicode. If Response.encoding is None, encoding will be guessed using - ``chardet``. + ``charset_normalizer`` or ``chardet``. The encoding of the response content is determined based solely on HTTP headers, following RFC 2616 to the letter. If you can take advantage of @@ -780,15 +881,17 @@ class Response(object): return content def json(self, **kwargs): - """Returns the json-encoded content of a response, if any. + r"""Returns the json-encoded content of a response, if any. :param \*\*kwargs: Optional arguments that ``json.loads`` takes. + :raises requests.exceptions.JSONDecodeError: If the response body does not + contain valid json. """ - if not self.encoding and len(self.content) > 3: + if not self.encoding and self.content and len(self.content) > 3: # No encoding set. JSON RFC 4627 section 3 states we should expect # UTF-8, -16 or -32. Detect which one to use; If the detection or - # decoding fails, fall back to `self.text` (using chardet to make + # decoding fails, fall back to `self.text` (using charset_normalizer to make # a best guess). encoding = guess_json_utf(self.content) if encoding is not None: @@ -802,7 +905,16 @@ class Response(object): # and the server didn't bother to tell us what codec *was* # used. pass - return complexjson.loads(self.text, **kwargs) + + try: + return complexjson.loads(self.text, **kwargs) + except JSONDecodeError as e: + # Catch JSON-related errors and raise as requests.JSONDecodeError + # This aliases json.JSONDecodeError and simplejson.JSONDecodeError + if is_py2: # e is a ValueError + raise RequestsJSONDecodeError(e.message) + else: + raise RequestsJSONDecodeError(e.msg, e.doc, e.pos) @property def links(self): @@ -823,15 +935,26 @@ class Response(object): return l def raise_for_status(self): - """Raises stored :class:`HTTPError`, if one occurred.""" + """Raises :class:`HTTPError`, if one occurred.""" http_error_msg = '' + if isinstance(self.reason, bytes): + # We attempt to decode utf-8 first because some servers + # choose to localize their reason strings. If the string + # isn't utf-8, we fall back to iso-8859-1 for all other + # encodings. (See PR #3538) + try: + reason = self.reason.decode('utf-8') + except UnicodeDecodeError: + reason = self.reason.decode('iso-8859-1') + else: + reason = self.reason if 400 <= self.status_code < 500: - http_error_msg = '%s Client Error: %s for url: %s' % (self.status_code, self.reason, self.url) + http_error_msg = u'%s Client Error: %s for url: %s' % (self.status_code, reason, self.url) elif 500 <= self.status_code < 600: - http_error_msg = '%s Server Error: %s for url: %s' % (self.status_code, self.reason, self.url) + http_error_msg = u'%s Server Error: %s for url: %s' % (self.status_code, reason, self.url) if http_error_msg: raise HTTPError(http_error_msg, response=self) @@ -843,6 +966,8 @@ class Response(object): *Note: Should not normally need to be called explicitly.* """ if not self._content_consumed: - return self.raw.close() + self.raw.close() - return self.raw.release_conn() + release_conn = getattr(self.raw, 'release_conn', None) + if release_conn is not None: + release_conn() diff --git a/lib/requests/packages.py b/lib/requests/packages.py new file mode 100644 index 00000000..00196bff --- /dev/null +++ b/lib/requests/packages.py @@ -0,0 +1,26 @@ +import sys + +try: + import chardet +except ImportError: + import charset_normalizer as chardet + import warnings + + warnings.filterwarnings('ignore', 'Trying to detect', module='charset_normalizer') + +# This code exists for backwards compatibility reasons. +# I don't like it either. Just look the other way. :) + +for package in ('urllib3', 'idna'): + locals()[package] = __import__(package) + # This traversal is apparently necessary such that the identities are + # preserved (requests.packages.urllib3.* is urllib3.*) + for mod in list(sys.modules): + if mod == package or mod.startswith(package + '.'): + sys.modules['requests.packages.' + mod] = sys.modules[mod] + +target = chardet.__name__ +for mod in list(sys.modules): + if mod == target or mod.startswith(target + '.'): + sys.modules['requests.packages.' + target.replace(target, 'chardet')] = sys.modules[mod] +# Kinda cool, though, right? diff --git a/lib/requests/packages/README.rst b/lib/requests/packages/README.rst deleted file mode 100755 index c42f376b..00000000 --- a/lib/requests/packages/README.rst +++ /dev/null @@ -1,8 +0,0 @@ -If you are planning to submit a pull request to requests with any changes in -this library do not go any further. These are independent libraries which we -vendor into requests. Any changes necessary to these libraries must be made in -them and submitted as separate pull requests to those libraries. - -urllib3 pull requests go here: https://github.com/shazow/urllib3 - -chardet pull requests go here: https://github.com/chardet/chardet diff --git a/lib/requests/packages/__init__.py b/lib/requests/packages/__init__.py deleted file mode 100755 index d62c4b71..00000000 --- a/lib/requests/packages/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from __future__ import absolute_import - -from . import urllib3 diff --git a/lib/requests/packages/chardet/__init__.py b/lib/requests/packages/chardet/__init__.py deleted file mode 100755 index 82c2a48d..00000000 --- a/lib/requests/packages/chardet/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -__version__ = "2.3.0" -from sys import version_info - - -def detect(aBuf): - if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or - (version_info >= (3, 0) and not isinstance(aBuf, bytes))): - raise ValueError('Expected a bytes object, not a unicode object') - - from . import universaldetector - u = universaldetector.UniversalDetector() - u.reset() - u.feed(aBuf) - u.close() - return u.result diff --git a/lib/requests/packages/chardet/big5freq.py b/lib/requests/packages/chardet/big5freq.py deleted file mode 100755 index 65bffc04..00000000 --- a/lib/requests/packages/chardet/big5freq.py +++ /dev/null @@ -1,925 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# Big5 frequency table -# by Taiwan's Mandarin Promotion Council -# -# -# 128 --> 0.42261 -# 256 --> 0.57851 -# 512 --> 0.74851 -# 1024 --> 0.89384 -# 2048 --> 0.97583 -# -# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98 -# Random Distribution Ration = 512/(5401-512)=0.105 -# -# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR - -BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75 - -#Char to FreqOrder table -BIG5_TABLE_SIZE = 5376 - -Big5CharToFreqOrder = ( - 1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16 -3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32 -1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48 - 63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64 -3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80 -4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96 -5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112 - 630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128 - 179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144 - 995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160 -2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176 -1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192 -3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208 - 706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224 -1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240 -3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256 -2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272 - 437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288 -3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304 -1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320 -5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336 - 266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352 -5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368 -1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384 - 32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400 - 188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416 -3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432 -3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448 - 324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464 -2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480 -2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496 - 314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512 - 287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528 -3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544 -1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560 -1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576 -1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592 -2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608 - 265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624 -4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640 -1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656 -5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672 -2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688 - 383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704 - 98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720 - 523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736 - 710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752 -5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768 - 379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784 -1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800 - 585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816 - 690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832 -5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848 -1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864 - 544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880 -3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896 -4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912 -3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928 - 279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944 - 610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960 -1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976 -4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992 -3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008 -3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024 -2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040 -5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056 -3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072 -5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088 -1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104 -2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120 -1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136 - 78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152 -1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168 -4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184 -3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200 - 534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216 - 165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232 - 626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248 -2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264 -5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280 -1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296 -2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312 -1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328 -1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344 -5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360 -5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376 -5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392 -3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408 -4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424 -4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440 -2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456 -5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472 -3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488 - 598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504 -5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520 -5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536 -1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552 -2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568 -3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584 -4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600 -5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616 -3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632 -4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648 -1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664 -1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680 -4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696 -1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712 - 240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728 -1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744 -1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760 -3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776 - 619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792 -5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808 -2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824 -1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840 -1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856 -5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872 - 829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888 -4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904 - 375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920 -2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936 - 444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952 -1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968 -1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984 - 730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000 -4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016 -4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032 -1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048 -3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064 -5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080 -5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096 -1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112 -2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128 -1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144 -3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160 -2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176 -3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192 -2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208 -4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224 -4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240 -3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256 - 97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272 -3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288 - 424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304 -3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320 -4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336 -3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352 -1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368 -5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384 - 199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400 -5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416 -1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432 - 391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448 -4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464 -4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480 - 397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496 -2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512 -2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528 -3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544 -1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560 -4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576 -2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592 -1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608 -1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624 -2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640 -3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656 -1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672 -5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688 -1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704 -4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720 -1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736 - 135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752 -1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768 -4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784 -4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800 -2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816 -1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832 -4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848 - 660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864 -5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880 -2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896 -3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912 -4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928 - 790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944 -5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960 -5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976 -1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992 -4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008 -4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024 -2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040 -3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056 -3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072 -2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088 -1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104 -4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120 -3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136 -3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152 -2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168 -4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184 -5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200 -3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216 -2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232 -3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248 -1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264 -2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280 -3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296 -4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312 -2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328 -2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344 -5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360 -1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376 -2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392 -1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408 -3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424 -4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440 -2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456 -3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472 -3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488 -2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504 -4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520 -2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536 -3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552 -4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568 -5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584 -3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600 - 194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616 -1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632 -4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648 -1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664 -4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680 -5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696 - 510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712 -5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728 -5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744 -2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760 -3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776 -2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792 -2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808 - 681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824 -1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840 -4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856 -3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872 -3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888 - 838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904 -2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920 - 625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936 -2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952 -4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968 -1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984 -4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000 -1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016 -3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032 - 574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048 -3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064 -5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080 -5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096 -3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112 -3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128 -1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144 -2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160 -5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176 -1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192 -1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208 -3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224 - 919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240 -1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256 -4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272 -5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288 -2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304 -3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320 - 516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336 -1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352 -2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368 -2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384 -5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400 -5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416 -5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432 -2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448 -2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464 -1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480 -4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496 -3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512 -3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528 -4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544 -4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560 -2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576 -2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592 -5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608 -4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624 -5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640 -4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656 - 502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672 - 121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688 -1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704 -3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720 -4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736 -1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752 -5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768 -2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784 -2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800 -3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816 -5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832 -1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848 -3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864 -5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880 -1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896 -5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912 -2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928 -3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944 -2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960 -3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976 -3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992 -3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008 -4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024 - 803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040 -2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056 -4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072 -3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088 -5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104 -1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120 -5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136 - 425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152 -1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168 - 479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184 -4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200 -1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216 -4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232 -1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248 - 433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264 -3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280 -4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296 -5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312 - 938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328 -3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344 - 890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360 -2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 #last 512 -#Everything below is of no interest for detection purpose -2522,1613,4812,5799,3345,3945,2523,5800,4162,5801,1637,4163,2471,4813,3946,5802, # 5392 -2500,3034,3800,5803,5804,2195,4814,5805,2163,5806,5807,5808,5809,5810,5811,5812, # 5408 -5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828, # 5424 -5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844, # 5440 -5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856,5857,5858,5859,5860, # 5456 -5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872,5873,5874,5875,5876, # 5472 -5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888,5889,5890,5891,5892, # 5488 -5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905,5906,5907,5908, # 5504 -5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920,5921,5922,5923,5924, # 5520 -5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936,5937,5938,5939,5940, # 5536 -5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952,5953,5954,5955,5956, # 5552 -5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968,5969,5970,5971,5972, # 5568 -5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984,5985,5986,5987,5988, # 5584 -5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004, # 5600 -6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020, # 5616 -6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032,6033,6034,6035,6036, # 5632 -6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052, # 5648 -6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068, # 5664 -6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084, # 5680 -6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100, # 5696 -6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116, # 5712 -6117,6118,6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,6132, # 5728 -6133,6134,6135,6136,6137,6138,6139,6140,6141,6142,6143,6144,6145,6146,6147,6148, # 5744 -6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163,6164, # 5760 -6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179,6180, # 5776 -6181,6182,6183,6184,6185,6186,6187,6188,6189,6190,6191,6192,6193,6194,6195,6196, # 5792 -6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210,6211,6212, # 5808 -6213,6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,3670,6224,6225,6226,6227, # 5824 -6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241,6242,6243, # 5840 -6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,6254,6255,6256,6257,6258,6259, # 5856 -6260,6261,6262,6263,6264,6265,6266,6267,6268,6269,6270,6271,6272,6273,6274,6275, # 5872 -6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,4815,6286,6287,6288,6289,6290, # 5888 -6291,6292,4816,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,6303,6304,6305, # 5904 -6306,6307,6308,6309,6310,6311,4817,4818,6312,6313,6314,6315,6316,6317,6318,4819, # 5920 -6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333,6334, # 5936 -6335,6336,6337,4820,6338,6339,6340,6341,6342,6343,6344,6345,6346,6347,6348,6349, # 5952 -6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363,6364,6365, # 5968 -6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379,6380,6381, # 5984 -6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395,6396,6397, # 6000 -6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,3441,6411,6412, # 6016 -6413,6414,6415,6416,6417,6418,6419,6420,6421,6422,6423,6424,6425,4440,6426,6427, # 6032 -6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,6439,6440,6441,6442,6443, # 6048 -6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,4821,6455,6456,6457,6458, # 6064 -6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472,6473,6474, # 6080 -6475,6476,6477,3947,3948,6478,6479,6480,6481,3272,4441,6482,6483,6484,6485,4442, # 6096 -6486,6487,6488,6489,6490,6491,6492,6493,6494,6495,6496,4822,6497,6498,6499,6500, # 6112 -6501,6502,6503,6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516, # 6128 -6517,6518,6519,6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532, # 6144 -6533,6534,6535,6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548, # 6160 -6549,6550,6551,6552,6553,6554,6555,6556,2784,6557,4823,6558,6559,6560,6561,6562, # 6176 -6563,6564,6565,6566,6567,6568,6569,3949,6570,6571,6572,4824,6573,6574,6575,6576, # 6192 -6577,6578,6579,6580,6581,6582,6583,4825,6584,6585,6586,3950,2785,6587,6588,6589, # 6208 -6590,6591,6592,6593,6594,6595,6596,6597,6598,6599,6600,6601,6602,6603,6604,6605, # 6224 -6606,6607,6608,6609,6610,6611,6612,4826,6613,6614,6615,4827,6616,6617,6618,6619, # 6240 -6620,6621,6622,6623,6624,6625,4164,6626,6627,6628,6629,6630,6631,6632,6633,6634, # 6256 -3547,6635,4828,6636,6637,6638,6639,6640,6641,6642,3951,2984,6643,6644,6645,6646, # 6272 -6647,6648,6649,4165,6650,4829,6651,6652,4830,6653,6654,6655,6656,6657,6658,6659, # 6288 -6660,6661,6662,4831,6663,6664,6665,6666,6667,6668,6669,6670,6671,4166,6672,4832, # 6304 -3952,6673,6674,6675,6676,4833,6677,6678,6679,4167,6680,6681,6682,3198,6683,6684, # 6320 -6685,6686,6687,6688,6689,6690,6691,6692,6693,6694,6695,6696,6697,4834,6698,6699, # 6336 -6700,6701,6702,6703,6704,6705,6706,6707,6708,6709,6710,6711,6712,6713,6714,6715, # 6352 -6716,6717,6718,6719,6720,6721,6722,6723,6724,6725,6726,6727,6728,6729,6730,6731, # 6368 -6732,6733,6734,4443,6735,6736,6737,6738,6739,6740,6741,6742,6743,6744,6745,4444, # 6384 -6746,6747,6748,6749,6750,6751,6752,6753,6754,6755,6756,6757,6758,6759,6760,6761, # 6400 -6762,6763,6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777, # 6416 -6778,6779,6780,6781,4168,6782,6783,3442,6784,6785,6786,6787,6788,6789,6790,6791, # 6432 -4169,6792,6793,6794,6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806, # 6448 -6807,6808,6809,6810,6811,4835,6812,6813,6814,4445,6815,6816,4446,6817,6818,6819, # 6464 -6820,6821,6822,6823,6824,6825,6826,6827,6828,6829,6830,6831,6832,6833,6834,6835, # 6480 -3548,6836,6837,6838,6839,6840,6841,6842,6843,6844,6845,6846,4836,6847,6848,6849, # 6496 -6850,6851,6852,6853,6854,3953,6855,6856,6857,6858,6859,6860,6861,6862,6863,6864, # 6512 -6865,6866,6867,6868,6869,6870,6871,6872,6873,6874,6875,6876,6877,3199,6878,6879, # 6528 -6880,6881,6882,4447,6883,6884,6885,6886,6887,6888,6889,6890,6891,6892,6893,6894, # 6544 -6895,6896,6897,6898,6899,6900,6901,6902,6903,6904,4170,6905,6906,6907,6908,6909, # 6560 -6910,6911,6912,6913,6914,6915,6916,6917,6918,6919,6920,6921,6922,6923,6924,6925, # 6576 -6926,6927,4837,6928,6929,6930,6931,6932,6933,6934,6935,6936,3346,6937,6938,4838, # 6592 -6939,6940,6941,4448,6942,6943,6944,6945,6946,4449,6947,6948,6949,6950,6951,6952, # 6608 -6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966,6967,6968, # 6624 -6969,6970,6971,6972,6973,6974,6975,6976,6977,6978,6979,6980,6981,6982,6983,6984, # 6640 -6985,6986,6987,6988,6989,6990,6991,6992,6993,6994,3671,6995,6996,6997,6998,4839, # 6656 -6999,7000,7001,7002,3549,7003,7004,7005,7006,7007,7008,7009,7010,7011,7012,7013, # 6672 -7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027,7028,7029, # 6688 -7030,4840,7031,7032,7033,7034,7035,7036,7037,7038,4841,7039,7040,7041,7042,7043, # 6704 -7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058,7059, # 6720 -7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,2985,7071,7072,7073,7074, # 6736 -7075,7076,7077,7078,7079,7080,4842,7081,7082,7083,7084,7085,7086,7087,7088,7089, # 6752 -7090,7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105, # 6768 -7106,7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,4450,7119,7120, # 6784 -7121,7122,7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136, # 6800 -7137,7138,7139,7140,7141,7142,7143,4843,7144,7145,7146,7147,7148,7149,7150,7151, # 6816 -7152,7153,7154,7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167, # 6832 -7168,7169,7170,7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183, # 6848 -7184,7185,7186,7187,7188,4171,4172,7189,7190,7191,7192,7193,7194,7195,7196,7197, # 6864 -7198,7199,7200,7201,7202,7203,7204,7205,7206,7207,7208,7209,7210,7211,7212,7213, # 6880 -7214,7215,7216,7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229, # 6896 -7230,7231,7232,7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245, # 6912 -7246,7247,7248,7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261, # 6928 -7262,7263,7264,7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277, # 6944 -7278,7279,7280,7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293, # 6960 -7294,7295,7296,4844,7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308, # 6976 -7309,7310,7311,7312,7313,7314,7315,7316,4451,7317,7318,7319,7320,7321,7322,7323, # 6992 -7324,7325,7326,7327,7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339, # 7008 -7340,7341,7342,7343,7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,4173,7354, # 7024 -7355,4845,7356,7357,7358,7359,7360,7361,7362,7363,7364,7365,7366,7367,7368,7369, # 7040 -7370,7371,7372,7373,7374,7375,7376,7377,7378,7379,7380,7381,7382,7383,7384,7385, # 7056 -7386,7387,7388,4846,7389,7390,7391,7392,7393,7394,7395,7396,7397,7398,7399,7400, # 7072 -7401,7402,7403,7404,7405,3672,7406,7407,7408,7409,7410,7411,7412,7413,7414,7415, # 7088 -7416,7417,7418,7419,7420,7421,7422,7423,7424,7425,7426,7427,7428,7429,7430,7431, # 7104 -7432,7433,7434,7435,7436,7437,7438,7439,7440,7441,7442,7443,7444,7445,7446,7447, # 7120 -7448,7449,7450,7451,7452,7453,4452,7454,3200,7455,7456,7457,7458,7459,7460,7461, # 7136 -7462,7463,7464,7465,7466,7467,7468,7469,7470,7471,7472,7473,7474,4847,7475,7476, # 7152 -7477,3133,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487,7488,7489,7490,7491, # 7168 -7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,3347,7503,7504,7505,7506, # 7184 -7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,7520,7521,4848, # 7200 -7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535,7536,7537, # 7216 -7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,3801,4849,7550,7551, # 7232 -7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, # 7248 -7568,7569,3035,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582, # 7264 -7583,7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598, # 7280 -7599,7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614, # 7296 -7615,7616,4850,7617,7618,3802,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628, # 7312 -7629,7630,7631,7632,4851,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643, # 7328 -7644,7645,7646,7647,7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659, # 7344 -7660,7661,7662,7663,7664,7665,7666,7667,7668,7669,7670,4453,7671,7672,7673,7674, # 7360 -7675,7676,7677,7678,7679,7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690, # 7376 -7691,7692,7693,7694,7695,7696,7697,3443,7698,7699,7700,7701,7702,4454,7703,7704, # 7392 -7705,7706,7707,7708,7709,7710,7711,7712,7713,2472,7714,7715,7716,7717,7718,7719, # 7408 -7720,7721,7722,7723,7724,7725,7726,7727,7728,7729,7730,7731,3954,7732,7733,7734, # 7424 -7735,7736,7737,7738,7739,7740,7741,7742,7743,7744,7745,7746,7747,7748,7749,7750, # 7440 -3134,7751,7752,4852,7753,7754,7755,4853,7756,7757,7758,7759,7760,4174,7761,7762, # 7456 -7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,7777,7778, # 7472 -7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791,7792,7793,7794, # 7488 -7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,4854,7806,7807,7808,7809, # 7504 -7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824,7825, # 7520 -4855,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7536 -7841,7842,7843,7844,7845,7846,7847,3955,7848,7849,7850,7851,7852,7853,7854,7855, # 7552 -7856,7857,7858,7859,7860,3444,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870, # 7568 -7871,7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886, # 7584 -7887,7888,7889,7890,7891,4175,7892,7893,7894,7895,7896,4856,4857,7897,7898,7899, # 7600 -7900,2598,7901,7902,7903,7904,7905,7906,7907,7908,4455,7909,7910,7911,7912,7913, # 7616 -7914,3201,7915,7916,7917,7918,7919,7920,7921,4858,7922,7923,7924,7925,7926,7927, # 7632 -7928,7929,7930,7931,7932,7933,7934,7935,7936,7937,7938,7939,7940,7941,7942,7943, # 7648 -7944,7945,7946,7947,7948,7949,7950,7951,7952,7953,7954,7955,7956,7957,7958,7959, # 7664 -7960,7961,7962,7963,7964,7965,7966,7967,7968,7969,7970,7971,7972,7973,7974,7975, # 7680 -7976,7977,7978,7979,7980,7981,4859,7982,7983,7984,7985,7986,7987,7988,7989,7990, # 7696 -7991,7992,7993,7994,7995,7996,4860,7997,7998,7999,8000,8001,8002,8003,8004,8005, # 7712 -8006,8007,8008,8009,8010,8011,8012,8013,8014,8015,8016,4176,8017,8018,8019,8020, # 7728 -8021,8022,8023,4861,8024,8025,8026,8027,8028,8029,8030,8031,8032,8033,8034,8035, # 7744 -8036,4862,4456,8037,8038,8039,8040,4863,8041,8042,8043,8044,8045,8046,8047,8048, # 7760 -8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063,8064, # 7776 -8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,8080, # 7792 -8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,8096, # 7808 -8097,8098,8099,4864,4177,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110, # 7824 -8111,8112,8113,8114,8115,8116,8117,8118,8119,8120,4178,8121,8122,8123,8124,8125, # 7840 -8126,8127,8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141, # 7856 -8142,8143,8144,8145,4865,4866,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155, # 7872 -8156,8157,8158,8159,8160,8161,8162,8163,8164,8165,4179,8166,8167,8168,8169,8170, # 7888 -8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181,4457,8182,8183,8184,8185, # 7904 -8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201, # 7920 -8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213,8214,8215,8216,8217, # 7936 -8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229,8230,8231,8232,8233, # 7952 -8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245,8246,8247,8248,8249, # 7968 -8250,8251,8252,8253,8254,8255,8256,3445,8257,8258,8259,8260,8261,8262,4458,8263, # 7984 -8264,8265,8266,8267,8268,8269,8270,8271,8272,4459,8273,8274,8275,8276,3550,8277, # 8000 -8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,4460,8290,8291,8292, # 8016 -8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,4867, # 8032 -8308,8309,8310,8311,8312,3551,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322, # 8048 -8323,8324,8325,8326,4868,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337, # 8064 -8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353, # 8080 -8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,4869,4461,8364,8365,8366,8367, # 8096 -8368,8369,8370,4870,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382, # 8112 -8383,8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398, # 8128 -8399,8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,4871,8411,8412,8413, # 8144 -8414,8415,8416,8417,8418,8419,8420,8421,8422,4462,8423,8424,8425,8426,8427,8428, # 8160 -8429,8430,8431,8432,8433,2986,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443, # 8176 -8444,8445,8446,8447,8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459, # 8192 -8460,8461,8462,8463,8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475, # 8208 -8476,8477,8478,4180,8479,8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490, # 8224 -8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506, # 8240 -8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522, # 8256 -8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538, # 8272 -8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554, # 8288 -8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,4872,8565,8566,8567,8568,8569, # 8304 -8570,8571,8572,8573,4873,8574,8575,8576,8577,8578,8579,8580,8581,8582,8583,8584, # 8320 -8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597,8598,8599,8600, # 8336 -8601,8602,8603,8604,8605,3803,8606,8607,8608,8609,8610,8611,8612,8613,4874,3804, # 8352 -8614,8615,8616,8617,8618,8619,8620,8621,3956,8622,8623,8624,8625,8626,8627,8628, # 8368 -8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,2865,8639,8640,8641,8642,8643, # 8384 -8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,4463,8657,8658, # 8400 -8659,4875,4876,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672, # 8416 -8673,8674,8675,8676,8677,8678,8679,8680,8681,4464,8682,8683,8684,8685,8686,8687, # 8432 -8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, # 8448 -8704,8705,8706,8707,8708,8709,2261,8710,8711,8712,8713,8714,8715,8716,8717,8718, # 8464 -8719,8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,4181, # 8480 -8734,8735,8736,8737,8738,8739,8740,8741,8742,8743,8744,8745,8746,8747,8748,8749, # 8496 -8750,8751,8752,8753,8754,8755,8756,8757,8758,8759,8760,8761,8762,8763,4877,8764, # 8512 -8765,8766,8767,8768,8769,8770,8771,8772,8773,8774,8775,8776,8777,8778,8779,8780, # 8528 -8781,8782,8783,8784,8785,8786,8787,8788,4878,8789,4879,8790,8791,8792,4880,8793, # 8544 -8794,8795,8796,8797,8798,8799,8800,8801,4881,8802,8803,8804,8805,8806,8807,8808, # 8560 -8809,8810,8811,8812,8813,8814,8815,3957,8816,8817,8818,8819,8820,8821,8822,8823, # 8576 -8824,8825,8826,8827,8828,8829,8830,8831,8832,8833,8834,8835,8836,8837,8838,8839, # 8592 -8840,8841,8842,8843,8844,8845,8846,8847,4882,8848,8849,8850,8851,8852,8853,8854, # 8608 -8855,8856,8857,8858,8859,8860,8861,8862,8863,8864,8865,8866,8867,8868,8869,8870, # 8624 -8871,8872,8873,8874,8875,8876,8877,8878,8879,8880,8881,8882,8883,8884,3202,8885, # 8640 -8886,8887,8888,8889,8890,8891,8892,8893,8894,8895,8896,8897,8898,8899,8900,8901, # 8656 -8902,8903,8904,8905,8906,8907,8908,8909,8910,8911,8912,8913,8914,8915,8916,8917, # 8672 -8918,8919,8920,8921,8922,8923,8924,4465,8925,8926,8927,8928,8929,8930,8931,8932, # 8688 -4883,8933,8934,8935,8936,8937,8938,8939,8940,8941,8942,8943,2214,8944,8945,8946, # 8704 -8947,8948,8949,8950,8951,8952,8953,8954,8955,8956,8957,8958,8959,8960,8961,8962, # 8720 -8963,8964,8965,4884,8966,8967,8968,8969,8970,8971,8972,8973,8974,8975,8976,8977, # 8736 -8978,8979,8980,8981,8982,8983,8984,8985,8986,8987,8988,8989,8990,8991,8992,4885, # 8752 -8993,8994,8995,8996,8997,8998,8999,9000,9001,9002,9003,9004,9005,9006,9007,9008, # 8768 -9009,9010,9011,9012,9013,9014,9015,9016,9017,9018,9019,9020,9021,4182,9022,9023, # 8784 -9024,9025,9026,9027,9028,9029,9030,9031,9032,9033,9034,9035,9036,9037,9038,9039, # 8800 -9040,9041,9042,9043,9044,9045,9046,9047,9048,9049,9050,9051,9052,9053,9054,9055, # 8816 -9056,9057,9058,9059,9060,9061,9062,9063,4886,9064,9065,9066,9067,9068,9069,4887, # 8832 -9070,9071,9072,9073,9074,9075,9076,9077,9078,9079,9080,9081,9082,9083,9084,9085, # 8848 -9086,9087,9088,9089,9090,9091,9092,9093,9094,9095,9096,9097,9098,9099,9100,9101, # 8864 -9102,9103,9104,9105,9106,9107,9108,9109,9110,9111,9112,9113,9114,9115,9116,9117, # 8880 -9118,9119,9120,9121,9122,9123,9124,9125,9126,9127,9128,9129,9130,9131,9132,9133, # 8896 -9134,9135,9136,9137,9138,9139,9140,9141,3958,9142,9143,9144,9145,9146,9147,9148, # 8912 -9149,9150,9151,4888,9152,9153,9154,9155,9156,9157,9158,9159,9160,9161,9162,9163, # 8928 -9164,9165,9166,9167,9168,9169,9170,9171,9172,9173,9174,9175,4889,9176,9177,9178, # 8944 -9179,9180,9181,9182,9183,9184,9185,9186,9187,9188,9189,9190,9191,9192,9193,9194, # 8960 -9195,9196,9197,9198,9199,9200,9201,9202,9203,4890,9204,9205,9206,9207,9208,9209, # 8976 -9210,9211,9212,9213,9214,9215,9216,9217,9218,9219,9220,9221,9222,4466,9223,9224, # 8992 -9225,9226,9227,9228,9229,9230,9231,9232,9233,9234,9235,9236,9237,9238,9239,9240, # 9008 -9241,9242,9243,9244,9245,4891,9246,9247,9248,9249,9250,9251,9252,9253,9254,9255, # 9024 -9256,9257,4892,9258,9259,9260,9261,4893,4894,9262,9263,9264,9265,9266,9267,9268, # 9040 -9269,9270,9271,9272,9273,4467,9274,9275,9276,9277,9278,9279,9280,9281,9282,9283, # 9056 -9284,9285,3673,9286,9287,9288,9289,9290,9291,9292,9293,9294,9295,9296,9297,9298, # 9072 -9299,9300,9301,9302,9303,9304,9305,9306,9307,9308,9309,9310,9311,9312,9313,9314, # 9088 -9315,9316,9317,9318,9319,9320,9321,9322,4895,9323,9324,9325,9326,9327,9328,9329, # 9104 -9330,9331,9332,9333,9334,9335,9336,9337,9338,9339,9340,9341,9342,9343,9344,9345, # 9120 -9346,9347,4468,9348,9349,9350,9351,9352,9353,9354,9355,9356,9357,9358,9359,9360, # 9136 -9361,9362,9363,9364,9365,9366,9367,9368,9369,9370,9371,9372,9373,4896,9374,4469, # 9152 -9375,9376,9377,9378,9379,4897,9380,9381,9382,9383,9384,9385,9386,9387,9388,9389, # 9168 -9390,9391,9392,9393,9394,9395,9396,9397,9398,9399,9400,9401,9402,9403,9404,9405, # 9184 -9406,4470,9407,2751,9408,9409,3674,3552,9410,9411,9412,9413,9414,9415,9416,9417, # 9200 -9418,9419,9420,9421,4898,9422,9423,9424,9425,9426,9427,9428,9429,3959,9430,9431, # 9216 -9432,9433,9434,9435,9436,4471,9437,9438,9439,9440,9441,9442,9443,9444,9445,9446, # 9232 -9447,9448,9449,9450,3348,9451,9452,9453,9454,9455,9456,9457,9458,9459,9460,9461, # 9248 -9462,9463,9464,9465,9466,9467,9468,9469,9470,9471,9472,4899,9473,9474,9475,9476, # 9264 -9477,4900,9478,9479,9480,9481,9482,9483,9484,9485,9486,9487,9488,3349,9489,9490, # 9280 -9491,9492,9493,9494,9495,9496,9497,9498,9499,9500,9501,9502,9503,9504,9505,9506, # 9296 -9507,9508,9509,9510,9511,9512,9513,9514,9515,9516,9517,9518,9519,9520,4901,9521, # 9312 -9522,9523,9524,9525,9526,4902,9527,9528,9529,9530,9531,9532,9533,9534,9535,9536, # 9328 -9537,9538,9539,9540,9541,9542,9543,9544,9545,9546,9547,9548,9549,9550,9551,9552, # 9344 -9553,9554,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568, # 9360 -9569,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,9581,9582,9583,9584, # 9376 -3805,9585,9586,9587,9588,9589,9590,9591,9592,9593,9594,9595,9596,9597,9598,9599, # 9392 -9600,9601,9602,4903,9603,9604,9605,9606,9607,4904,9608,9609,9610,9611,9612,9613, # 9408 -9614,4905,9615,9616,9617,9618,9619,9620,9621,9622,9623,9624,9625,9626,9627,9628, # 9424 -9629,9630,9631,9632,4906,9633,9634,9635,9636,9637,9638,9639,9640,9641,9642,9643, # 9440 -4907,9644,9645,9646,9647,9648,9649,9650,9651,9652,9653,9654,9655,9656,9657,9658, # 9456 -9659,9660,9661,9662,9663,9664,9665,9666,9667,9668,9669,9670,9671,9672,4183,9673, # 9472 -9674,9675,9676,9677,4908,9678,9679,9680,9681,4909,9682,9683,9684,9685,9686,9687, # 9488 -9688,9689,9690,4910,9691,9692,9693,3675,9694,9695,9696,2945,9697,9698,9699,9700, # 9504 -9701,9702,9703,9704,9705,4911,9706,9707,9708,9709,9710,9711,9712,9713,9714,9715, # 9520 -9716,9717,9718,9719,9720,9721,9722,9723,9724,9725,9726,9727,9728,9729,9730,9731, # 9536 -9732,9733,9734,9735,4912,9736,9737,9738,9739,9740,4913,9741,9742,9743,9744,9745, # 9552 -9746,9747,9748,9749,9750,9751,9752,9753,9754,9755,9756,9757,9758,4914,9759,9760, # 9568 -9761,9762,9763,9764,9765,9766,9767,9768,9769,9770,9771,9772,9773,9774,9775,9776, # 9584 -9777,9778,9779,9780,9781,9782,4915,9783,9784,9785,9786,9787,9788,9789,9790,9791, # 9600 -9792,9793,4916,9794,9795,9796,9797,9798,9799,9800,9801,9802,9803,9804,9805,9806, # 9616 -9807,9808,9809,9810,9811,9812,9813,9814,9815,9816,9817,9818,9819,9820,9821,9822, # 9632 -9823,9824,9825,9826,9827,9828,9829,9830,9831,9832,9833,9834,9835,9836,9837,9838, # 9648 -9839,9840,9841,9842,9843,9844,9845,9846,9847,9848,9849,9850,9851,9852,9853,9854, # 9664 -9855,9856,9857,9858,9859,9860,9861,9862,9863,9864,9865,9866,9867,9868,4917,9869, # 9680 -9870,9871,9872,9873,9874,9875,9876,9877,9878,9879,9880,9881,9882,9883,9884,9885, # 9696 -9886,9887,9888,9889,9890,9891,9892,4472,9893,9894,9895,9896,9897,3806,9898,9899, # 9712 -9900,9901,9902,9903,9904,9905,9906,9907,9908,9909,9910,9911,9912,9913,9914,4918, # 9728 -9915,9916,9917,4919,9918,9919,9920,9921,4184,9922,9923,9924,9925,9926,9927,9928, # 9744 -9929,9930,9931,9932,9933,9934,9935,9936,9937,9938,9939,9940,9941,9942,9943,9944, # 9760 -9945,9946,4920,9947,9948,9949,9950,9951,9952,9953,9954,9955,4185,9956,9957,9958, # 9776 -9959,9960,9961,9962,9963,9964,9965,4921,9966,9967,9968,4473,9969,9970,9971,9972, # 9792 -9973,9974,9975,9976,9977,4474,9978,9979,9980,9981,9982,9983,9984,9985,9986,9987, # 9808 -9988,9989,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000,10001,10002,10003, # 9824 -10004,10005,10006,10007,10008,10009,10010,10011,10012,10013,10014,10015,10016,10017,10018,10019, # 9840 -10020,10021,4922,10022,4923,10023,10024,10025,10026,10027,10028,10029,10030,10031,10032,10033, # 9856 -10034,10035,10036,10037,10038,10039,10040,10041,10042,10043,10044,10045,10046,10047,10048,4924, # 9872 -10049,10050,10051,10052,10053,10054,10055,10056,10057,10058,10059,10060,10061,10062,10063,10064, # 9888 -10065,10066,10067,10068,10069,10070,10071,10072,10073,10074,10075,10076,10077,10078,10079,10080, # 9904 -10081,10082,10083,10084,10085,10086,10087,4475,10088,10089,10090,10091,10092,10093,10094,10095, # 9920 -10096,10097,4476,10098,10099,10100,10101,10102,10103,10104,10105,10106,10107,10108,10109,10110, # 9936 -10111,2174,10112,10113,10114,10115,10116,10117,10118,10119,10120,10121,10122,10123,10124,10125, # 9952 -10126,10127,10128,10129,10130,10131,10132,10133,10134,10135,10136,10137,10138,10139,10140,3807, # 9968 -4186,4925,10141,10142,10143,10144,10145,10146,10147,4477,4187,10148,10149,10150,10151,10152, # 9984 -10153,4188,10154,10155,10156,10157,10158,10159,10160,10161,4926,10162,10163,10164,10165,10166, #10000 -10167,10168,10169,10170,10171,10172,10173,10174,10175,10176,10177,10178,10179,10180,10181,10182, #10016 -10183,10184,10185,10186,10187,10188,10189,10190,10191,10192,3203,10193,10194,10195,10196,10197, #10032 -10198,10199,10200,4478,10201,10202,10203,10204,4479,10205,10206,10207,10208,10209,10210,10211, #10048 -10212,10213,10214,10215,10216,10217,10218,10219,10220,10221,10222,10223,10224,10225,10226,10227, #10064 -10228,10229,10230,10231,10232,10233,10234,4927,10235,10236,10237,10238,10239,10240,10241,10242, #10080 -10243,10244,10245,10246,10247,10248,10249,10250,10251,10252,10253,10254,10255,10256,10257,10258, #10096 -10259,10260,10261,10262,10263,10264,10265,10266,10267,10268,10269,10270,10271,10272,10273,4480, #10112 -4928,4929,10274,10275,10276,10277,10278,10279,10280,10281,10282,10283,10284,10285,10286,10287, #10128 -10288,10289,10290,10291,10292,10293,10294,10295,10296,10297,10298,10299,10300,10301,10302,10303, #10144 -10304,10305,10306,10307,10308,10309,10310,10311,10312,10313,10314,10315,10316,10317,10318,10319, #10160 -10320,10321,10322,10323,10324,10325,10326,10327,10328,10329,10330,10331,10332,10333,10334,4930, #10176 -10335,10336,10337,10338,10339,10340,10341,10342,4931,10343,10344,10345,10346,10347,10348,10349, #10192 -10350,10351,10352,10353,10354,10355,3088,10356,2786,10357,10358,10359,10360,4189,10361,10362, #10208 -10363,10364,10365,10366,10367,10368,10369,10370,10371,10372,10373,10374,10375,4932,10376,10377, #10224 -10378,10379,10380,10381,10382,10383,10384,10385,10386,10387,10388,10389,10390,10391,10392,4933, #10240 -10393,10394,10395,4934,10396,10397,10398,10399,10400,10401,10402,10403,10404,10405,10406,10407, #10256 -10408,10409,10410,10411,10412,3446,10413,10414,10415,10416,10417,10418,10419,10420,10421,10422, #10272 -10423,4935,10424,10425,10426,10427,10428,10429,10430,4936,10431,10432,10433,10434,10435,10436, #10288 -10437,10438,10439,10440,10441,10442,10443,4937,10444,10445,10446,10447,4481,10448,10449,10450, #10304 -10451,10452,10453,10454,10455,10456,10457,10458,10459,10460,10461,10462,10463,10464,10465,10466, #10320 -10467,10468,10469,10470,10471,10472,10473,10474,10475,10476,10477,10478,10479,10480,10481,10482, #10336 -10483,10484,10485,10486,10487,10488,10489,10490,10491,10492,10493,10494,10495,10496,10497,10498, #10352 -10499,10500,10501,10502,10503,10504,10505,4938,10506,10507,10508,10509,10510,2552,10511,10512, #10368 -10513,10514,10515,10516,3447,10517,10518,10519,10520,10521,10522,10523,10524,10525,10526,10527, #10384 -10528,10529,10530,10531,10532,10533,10534,10535,10536,10537,10538,10539,10540,10541,10542,10543, #10400 -4482,10544,4939,10545,10546,10547,10548,10549,10550,10551,10552,10553,10554,10555,10556,10557, #10416 -10558,10559,10560,10561,10562,10563,10564,10565,10566,10567,3676,4483,10568,10569,10570,10571, #10432 -10572,3448,10573,10574,10575,10576,10577,10578,10579,10580,10581,10582,10583,10584,10585,10586, #10448 -10587,10588,10589,10590,10591,10592,10593,10594,10595,10596,10597,10598,10599,10600,10601,10602, #10464 -10603,10604,10605,10606,10607,10608,10609,10610,10611,10612,10613,10614,10615,10616,10617,10618, #10480 -10619,10620,10621,10622,10623,10624,10625,10626,10627,4484,10628,10629,10630,10631,10632,4940, #10496 -10633,10634,10635,10636,10637,10638,10639,10640,10641,10642,10643,10644,10645,10646,10647,10648, #10512 -10649,10650,10651,10652,10653,10654,10655,10656,4941,10657,10658,10659,2599,10660,10661,10662, #10528 -10663,10664,10665,10666,3089,10667,10668,10669,10670,10671,10672,10673,10674,10675,10676,10677, #10544 -10678,10679,10680,4942,10681,10682,10683,10684,10685,10686,10687,10688,10689,10690,10691,10692, #10560 -10693,10694,10695,10696,10697,4485,10698,10699,10700,10701,10702,10703,10704,4943,10705,3677, #10576 -10706,10707,10708,10709,10710,10711,10712,4944,10713,10714,10715,10716,10717,10718,10719,10720, #10592 -10721,10722,10723,10724,10725,10726,10727,10728,4945,10729,10730,10731,10732,10733,10734,10735, #10608 -10736,10737,10738,10739,10740,10741,10742,10743,10744,10745,10746,10747,10748,10749,10750,10751, #10624 -10752,10753,10754,10755,10756,10757,10758,10759,10760,10761,4946,10762,10763,10764,10765,10766, #10640 -10767,4947,4948,10768,10769,10770,10771,10772,10773,10774,10775,10776,10777,10778,10779,10780, #10656 -10781,10782,10783,10784,10785,10786,10787,10788,10789,10790,10791,10792,10793,10794,10795,10796, #10672 -10797,10798,10799,10800,10801,10802,10803,10804,10805,10806,10807,10808,10809,10810,10811,10812, #10688 -10813,10814,10815,10816,10817,10818,10819,10820,10821,10822,10823,10824,10825,10826,10827,10828, #10704 -10829,10830,10831,10832,10833,10834,10835,10836,10837,10838,10839,10840,10841,10842,10843,10844, #10720 -10845,10846,10847,10848,10849,10850,10851,10852,10853,10854,10855,10856,10857,10858,10859,10860, #10736 -10861,10862,10863,10864,10865,10866,10867,10868,10869,10870,10871,10872,10873,10874,10875,10876, #10752 -10877,10878,4486,10879,10880,10881,10882,10883,10884,10885,4949,10886,10887,10888,10889,10890, #10768 -10891,10892,10893,10894,10895,10896,10897,10898,10899,10900,10901,10902,10903,10904,10905,10906, #10784 -10907,10908,10909,10910,10911,10912,10913,10914,10915,10916,10917,10918,10919,4487,10920,10921, #10800 -10922,10923,10924,10925,10926,10927,10928,10929,10930,10931,10932,4950,10933,10934,10935,10936, #10816 -10937,10938,10939,10940,10941,10942,10943,10944,10945,10946,10947,10948,10949,4488,10950,10951, #10832 -10952,10953,10954,10955,10956,10957,10958,10959,4190,10960,10961,10962,10963,10964,10965,10966, #10848 -10967,10968,10969,10970,10971,10972,10973,10974,10975,10976,10977,10978,10979,10980,10981,10982, #10864 -10983,10984,10985,10986,10987,10988,10989,10990,10991,10992,10993,10994,10995,10996,10997,10998, #10880 -10999,11000,11001,11002,11003,11004,11005,11006,3960,11007,11008,11009,11010,11011,11012,11013, #10896 -11014,11015,11016,11017,11018,11019,11020,11021,11022,11023,11024,11025,11026,11027,11028,11029, #10912 -11030,11031,11032,4951,11033,11034,11035,11036,11037,11038,11039,11040,11041,11042,11043,11044, #10928 -11045,11046,11047,4489,11048,11049,11050,11051,4952,11052,11053,11054,11055,11056,11057,11058, #10944 -4953,11059,11060,11061,11062,11063,11064,11065,11066,11067,11068,11069,11070,11071,4954,11072, #10960 -11073,11074,11075,11076,11077,11078,11079,11080,11081,11082,11083,11084,11085,11086,11087,11088, #10976 -11089,11090,11091,11092,11093,11094,11095,11096,11097,11098,11099,11100,11101,11102,11103,11104, #10992 -11105,11106,11107,11108,11109,11110,11111,11112,11113,11114,11115,3808,11116,11117,11118,11119, #11008 -11120,11121,11122,11123,11124,11125,11126,11127,11128,11129,11130,11131,11132,11133,11134,4955, #11024 -11135,11136,11137,11138,11139,11140,11141,11142,11143,11144,11145,11146,11147,11148,11149,11150, #11040 -11151,11152,11153,11154,11155,11156,11157,11158,11159,11160,11161,4956,11162,11163,11164,11165, #11056 -11166,11167,11168,11169,11170,11171,11172,11173,11174,11175,11176,11177,11178,11179,11180,4957, #11072 -11181,11182,11183,11184,11185,11186,4958,11187,11188,11189,11190,11191,11192,11193,11194,11195, #11088 -11196,11197,11198,11199,11200,3678,11201,11202,11203,11204,11205,11206,4191,11207,11208,11209, #11104 -11210,11211,11212,11213,11214,11215,11216,11217,11218,11219,11220,11221,11222,11223,11224,11225, #11120 -11226,11227,11228,11229,11230,11231,11232,11233,11234,11235,11236,11237,11238,11239,11240,11241, #11136 -11242,11243,11244,11245,11246,11247,11248,11249,11250,11251,4959,11252,11253,11254,11255,11256, #11152 -11257,11258,11259,11260,11261,11262,11263,11264,11265,11266,11267,11268,11269,11270,11271,11272, #11168 -11273,11274,11275,11276,11277,11278,11279,11280,11281,11282,11283,11284,11285,11286,11287,11288, #11184 -11289,11290,11291,11292,11293,11294,11295,11296,11297,11298,11299,11300,11301,11302,11303,11304, #11200 -11305,11306,11307,11308,11309,11310,11311,11312,11313,11314,3679,11315,11316,11317,11318,4490, #11216 -11319,11320,11321,11322,11323,11324,11325,11326,11327,11328,11329,11330,11331,11332,11333,11334, #11232 -11335,11336,11337,11338,11339,11340,11341,11342,11343,11344,11345,11346,11347,4960,11348,11349, #11248 -11350,11351,11352,11353,11354,11355,11356,11357,11358,11359,11360,11361,11362,11363,11364,11365, #11264 -11366,11367,11368,11369,11370,11371,11372,11373,11374,11375,11376,11377,3961,4961,11378,11379, #11280 -11380,11381,11382,11383,11384,11385,11386,11387,11388,11389,11390,11391,11392,11393,11394,11395, #11296 -11396,11397,4192,11398,11399,11400,11401,11402,11403,11404,11405,11406,11407,11408,11409,11410, #11312 -11411,4962,11412,11413,11414,11415,11416,11417,11418,11419,11420,11421,11422,11423,11424,11425, #11328 -11426,11427,11428,11429,11430,11431,11432,11433,11434,11435,11436,11437,11438,11439,11440,11441, #11344 -11442,11443,11444,11445,11446,11447,11448,11449,11450,11451,11452,11453,11454,11455,11456,11457, #11360 -11458,11459,11460,11461,11462,11463,11464,11465,11466,11467,11468,11469,4963,11470,11471,4491, #11376 -11472,11473,11474,11475,4964,11476,11477,11478,11479,11480,11481,11482,11483,11484,11485,11486, #11392 -11487,11488,11489,11490,11491,11492,4965,11493,11494,11495,11496,11497,11498,11499,11500,11501, #11408 -11502,11503,11504,11505,11506,11507,11508,11509,11510,11511,11512,11513,11514,11515,11516,11517, #11424 -11518,11519,11520,11521,11522,11523,11524,11525,11526,11527,11528,11529,3962,11530,11531,11532, #11440 -11533,11534,11535,11536,11537,11538,11539,11540,11541,11542,11543,11544,11545,11546,11547,11548, #11456 -11549,11550,11551,11552,11553,11554,11555,11556,11557,11558,11559,11560,11561,11562,11563,11564, #11472 -4193,4194,11565,11566,11567,11568,11569,11570,11571,11572,11573,11574,11575,11576,11577,11578, #11488 -11579,11580,11581,11582,11583,11584,11585,11586,11587,11588,11589,11590,11591,4966,4195,11592, #11504 -11593,11594,11595,11596,11597,11598,11599,11600,11601,11602,11603,11604,3090,11605,11606,11607, #11520 -11608,11609,11610,4967,11611,11612,11613,11614,11615,11616,11617,11618,11619,11620,11621,11622, #11536 -11623,11624,11625,11626,11627,11628,11629,11630,11631,11632,11633,11634,11635,11636,11637,11638, #11552 -11639,11640,11641,11642,11643,11644,11645,11646,11647,11648,11649,11650,11651,11652,11653,11654, #11568 -11655,11656,11657,11658,11659,11660,11661,11662,11663,11664,11665,11666,11667,11668,11669,11670, #11584 -11671,11672,11673,11674,4968,11675,11676,11677,11678,11679,11680,11681,11682,11683,11684,11685, #11600 -11686,11687,11688,11689,11690,11691,11692,11693,3809,11694,11695,11696,11697,11698,11699,11700, #11616 -11701,11702,11703,11704,11705,11706,11707,11708,11709,11710,11711,11712,11713,11714,11715,11716, #11632 -11717,11718,3553,11719,11720,11721,11722,11723,11724,11725,11726,11727,11728,11729,11730,4969, #11648 -11731,11732,11733,11734,11735,11736,11737,11738,11739,11740,4492,11741,11742,11743,11744,11745, #11664 -11746,11747,11748,11749,11750,11751,11752,4970,11753,11754,11755,11756,11757,11758,11759,11760, #11680 -11761,11762,11763,11764,11765,11766,11767,11768,11769,11770,11771,11772,11773,11774,11775,11776, #11696 -11777,11778,11779,11780,11781,11782,11783,11784,11785,11786,11787,11788,11789,11790,4971,11791, #11712 -11792,11793,11794,11795,11796,11797,4972,11798,11799,11800,11801,11802,11803,11804,11805,11806, #11728 -11807,11808,11809,11810,4973,11811,11812,11813,11814,11815,11816,11817,11818,11819,11820,11821, #11744 -11822,11823,11824,11825,11826,11827,11828,11829,11830,11831,11832,11833,11834,3680,3810,11835, #11760 -11836,4974,11837,11838,11839,11840,11841,11842,11843,11844,11845,11846,11847,11848,11849,11850, #11776 -11851,11852,11853,11854,11855,11856,11857,11858,11859,11860,11861,11862,11863,11864,11865,11866, #11792 -11867,11868,11869,11870,11871,11872,11873,11874,11875,11876,11877,11878,11879,11880,11881,11882, #11808 -11883,11884,4493,11885,11886,11887,11888,11889,11890,11891,11892,11893,11894,11895,11896,11897, #11824 -11898,11899,11900,11901,11902,11903,11904,11905,11906,11907,11908,11909,11910,11911,11912,11913, #11840 -11914,11915,4975,11916,11917,11918,11919,11920,11921,11922,11923,11924,11925,11926,11927,11928, #11856 -11929,11930,11931,11932,11933,11934,11935,11936,11937,11938,11939,11940,11941,11942,11943,11944, #11872 -11945,11946,11947,11948,11949,4976,11950,11951,11952,11953,11954,11955,11956,11957,11958,11959, #11888 -11960,11961,11962,11963,11964,11965,11966,11967,11968,11969,11970,11971,11972,11973,11974,11975, #11904 -11976,11977,11978,11979,11980,11981,11982,11983,11984,11985,11986,11987,4196,11988,11989,11990, #11920 -11991,11992,4977,11993,11994,11995,11996,11997,11998,11999,12000,12001,12002,12003,12004,12005, #11936 -12006,12007,12008,12009,12010,12011,12012,12013,12014,12015,12016,12017,12018,12019,12020,12021, #11952 -12022,12023,12024,12025,12026,12027,12028,12029,12030,12031,12032,12033,12034,12035,12036,12037, #11968 -12038,12039,12040,12041,12042,12043,12044,12045,12046,12047,12048,12049,12050,12051,12052,12053, #11984 -12054,12055,12056,12057,12058,12059,12060,12061,4978,12062,12063,12064,12065,12066,12067,12068, #12000 -12069,12070,12071,12072,12073,12074,12075,12076,12077,12078,12079,12080,12081,12082,12083,12084, #12016 -12085,12086,12087,12088,12089,12090,12091,12092,12093,12094,12095,12096,12097,12098,12099,12100, #12032 -12101,12102,12103,12104,12105,12106,12107,12108,12109,12110,12111,12112,12113,12114,12115,12116, #12048 -12117,12118,12119,12120,12121,12122,12123,4979,12124,12125,12126,12127,12128,4197,12129,12130, #12064 -12131,12132,12133,12134,12135,12136,12137,12138,12139,12140,12141,12142,12143,12144,12145,12146, #12080 -12147,12148,12149,12150,12151,12152,12153,12154,4980,12155,12156,12157,12158,12159,12160,4494, #12096 -12161,12162,12163,12164,3811,12165,12166,12167,12168,12169,4495,12170,12171,4496,12172,12173, #12112 -12174,12175,12176,3812,12177,12178,12179,12180,12181,12182,12183,12184,12185,12186,12187,12188, #12128 -12189,12190,12191,12192,12193,12194,12195,12196,12197,12198,12199,12200,12201,12202,12203,12204, #12144 -12205,12206,12207,12208,12209,12210,12211,12212,12213,12214,12215,12216,12217,12218,12219,12220, #12160 -12221,4981,12222,12223,12224,12225,12226,12227,12228,12229,12230,12231,12232,12233,12234,12235, #12176 -4982,12236,12237,12238,12239,12240,12241,12242,12243,12244,12245,4983,12246,12247,12248,12249, #12192 -4984,12250,12251,12252,12253,12254,12255,12256,12257,12258,12259,12260,12261,12262,12263,12264, #12208 -4985,12265,4497,12266,12267,12268,12269,12270,12271,12272,12273,12274,12275,12276,12277,12278, #12224 -12279,12280,12281,12282,12283,12284,12285,12286,12287,4986,12288,12289,12290,12291,12292,12293, #12240 -12294,12295,12296,2473,12297,12298,12299,12300,12301,12302,12303,12304,12305,12306,12307,12308, #12256 -12309,12310,12311,12312,12313,12314,12315,12316,12317,12318,12319,3963,12320,12321,12322,12323, #12272 -12324,12325,12326,12327,12328,12329,12330,12331,12332,4987,12333,12334,12335,12336,12337,12338, #12288 -12339,12340,12341,12342,12343,12344,12345,12346,12347,12348,12349,12350,12351,12352,12353,12354, #12304 -12355,12356,12357,12358,12359,3964,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369, #12320 -12370,3965,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384, #12336 -12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400, #12352 -12401,12402,12403,12404,12405,12406,12407,12408,4988,12409,12410,12411,12412,12413,12414,12415, #12368 -12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431, #12384 -12432,12433,12434,12435,12436,12437,12438,3554,12439,12440,12441,12442,12443,12444,12445,12446, #12400 -12447,12448,12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462, #12416 -12463,12464,4989,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477, #12432 -12478,12479,12480,4990,12481,12482,12483,12484,12485,12486,12487,12488,12489,4498,12490,12491, #12448 -12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507, #12464 -12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523, #12480 -12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,12535,12536,12537,12538,12539, #12496 -12540,12541,12542,12543,12544,12545,12546,12547,12548,12549,12550,12551,4991,12552,12553,12554, #12512 -12555,12556,12557,12558,12559,12560,12561,12562,12563,12564,12565,12566,12567,12568,12569,12570, #12528 -12571,12572,12573,12574,12575,12576,12577,12578,3036,12579,12580,12581,12582,12583,3966,12584, #12544 -12585,12586,12587,12588,12589,12590,12591,12592,12593,12594,12595,12596,12597,12598,12599,12600, #12560 -12601,12602,12603,12604,12605,12606,12607,12608,12609,12610,12611,12612,12613,12614,12615,12616, #12576 -12617,12618,12619,12620,12621,12622,12623,12624,12625,12626,12627,12628,12629,12630,12631,12632, #12592 -12633,12634,12635,12636,12637,12638,12639,12640,12641,12642,12643,12644,12645,12646,4499,12647, #12608 -12648,12649,12650,12651,12652,12653,12654,12655,12656,12657,12658,12659,12660,12661,12662,12663, #12624 -12664,12665,12666,12667,12668,12669,12670,12671,12672,12673,12674,12675,12676,12677,12678,12679, #12640 -12680,12681,12682,12683,12684,12685,12686,12687,12688,12689,12690,12691,12692,12693,12694,12695, #12656 -12696,12697,12698,4992,12699,12700,12701,12702,12703,12704,12705,12706,12707,12708,12709,12710, #12672 -12711,12712,12713,12714,12715,12716,12717,12718,12719,12720,12721,12722,12723,12724,12725,12726, #12688 -12727,12728,12729,12730,12731,12732,12733,12734,12735,12736,12737,12738,12739,12740,12741,12742, #12704 -12743,12744,12745,12746,12747,12748,12749,12750,12751,12752,12753,12754,12755,12756,12757,12758, #12720 -12759,12760,12761,12762,12763,12764,12765,12766,12767,12768,12769,12770,12771,12772,12773,12774, #12736 -12775,12776,12777,12778,4993,2175,12779,12780,12781,12782,12783,12784,12785,12786,4500,12787, #12752 -12788,12789,12790,12791,12792,12793,12794,12795,12796,12797,12798,12799,12800,12801,12802,12803, #12768 -12804,12805,12806,12807,12808,12809,12810,12811,12812,12813,12814,12815,12816,12817,12818,12819, #12784 -12820,12821,12822,12823,12824,12825,12826,4198,3967,12827,12828,12829,12830,12831,12832,12833, #12800 -12834,12835,12836,12837,12838,12839,12840,12841,12842,12843,12844,12845,12846,12847,12848,12849, #12816 -12850,12851,12852,12853,12854,12855,12856,12857,12858,12859,12860,12861,4199,12862,12863,12864, #12832 -12865,12866,12867,12868,12869,12870,12871,12872,12873,12874,12875,12876,12877,12878,12879,12880, #12848 -12881,12882,12883,12884,12885,12886,12887,4501,12888,12889,12890,12891,12892,12893,12894,12895, #12864 -12896,12897,12898,12899,12900,12901,12902,12903,12904,12905,12906,12907,12908,12909,12910,12911, #12880 -12912,4994,12913,12914,12915,12916,12917,12918,12919,12920,12921,12922,12923,12924,12925,12926, #12896 -12927,12928,12929,12930,12931,12932,12933,12934,12935,12936,12937,12938,12939,12940,12941,12942, #12912 -12943,12944,12945,12946,12947,12948,12949,12950,12951,12952,12953,12954,12955,12956,1772,12957, #12928 -12958,12959,12960,12961,12962,12963,12964,12965,12966,12967,12968,12969,12970,12971,12972,12973, #12944 -12974,12975,12976,12977,12978,12979,12980,12981,12982,12983,12984,12985,12986,12987,12988,12989, #12960 -12990,12991,12992,12993,12994,12995,12996,12997,4502,12998,4503,12999,13000,13001,13002,13003, #12976 -4504,13004,13005,13006,13007,13008,13009,13010,13011,13012,13013,13014,13015,13016,13017,13018, #12992 -13019,13020,13021,13022,13023,13024,13025,13026,13027,13028,13029,3449,13030,13031,13032,13033, #13008 -13034,13035,13036,13037,13038,13039,13040,13041,13042,13043,13044,13045,13046,13047,13048,13049, #13024 -13050,13051,13052,13053,13054,13055,13056,13057,13058,13059,13060,13061,13062,13063,13064,13065, #13040 -13066,13067,13068,13069,13070,13071,13072,13073,13074,13075,13076,13077,13078,13079,13080,13081, #13056 -13082,13083,13084,13085,13086,13087,13088,13089,13090,13091,13092,13093,13094,13095,13096,13097, #13072 -13098,13099,13100,13101,13102,13103,13104,13105,13106,13107,13108,13109,13110,13111,13112,13113, #13088 -13114,13115,13116,13117,13118,3968,13119,4995,13120,13121,13122,13123,13124,13125,13126,13127, #13104 -4505,13128,13129,13130,13131,13132,13133,13134,4996,4506,13135,13136,13137,13138,13139,4997, #13120 -13140,13141,13142,13143,13144,13145,13146,13147,13148,13149,13150,13151,13152,13153,13154,13155, #13136 -13156,13157,13158,13159,4998,13160,13161,13162,13163,13164,13165,13166,13167,13168,13169,13170, #13152 -13171,13172,13173,13174,13175,13176,4999,13177,13178,13179,13180,13181,13182,13183,13184,13185, #13168 -13186,13187,13188,13189,13190,13191,13192,13193,13194,13195,13196,13197,13198,13199,13200,13201, #13184 -13202,13203,13204,13205,13206,5000,13207,13208,13209,13210,13211,13212,13213,13214,13215,13216, #13200 -13217,13218,13219,13220,13221,13222,13223,13224,13225,13226,13227,4200,5001,13228,13229,13230, #13216 -13231,13232,13233,13234,13235,13236,13237,13238,13239,13240,3969,13241,13242,13243,13244,3970, #13232 -13245,13246,13247,13248,13249,13250,13251,13252,13253,13254,13255,13256,13257,13258,13259,13260, #13248 -13261,13262,13263,13264,13265,13266,13267,13268,3450,13269,13270,13271,13272,13273,13274,13275, #13264 -13276,5002,13277,13278,13279,13280,13281,13282,13283,13284,13285,13286,13287,13288,13289,13290, #13280 -13291,13292,13293,13294,13295,13296,13297,13298,13299,13300,13301,13302,3813,13303,13304,13305, #13296 -13306,13307,13308,13309,13310,13311,13312,13313,13314,13315,13316,13317,13318,13319,13320,13321, #13312 -13322,13323,13324,13325,13326,13327,13328,4507,13329,13330,13331,13332,13333,13334,13335,13336, #13328 -13337,13338,13339,13340,13341,5003,13342,13343,13344,13345,13346,13347,13348,13349,13350,13351, #13344 -13352,13353,13354,13355,13356,13357,13358,13359,13360,13361,13362,13363,13364,13365,13366,13367, #13360 -5004,13368,13369,13370,13371,13372,13373,13374,13375,13376,13377,13378,13379,13380,13381,13382, #13376 -13383,13384,13385,13386,13387,13388,13389,13390,13391,13392,13393,13394,13395,13396,13397,13398, #13392 -13399,13400,13401,13402,13403,13404,13405,13406,13407,13408,13409,13410,13411,13412,13413,13414, #13408 -13415,13416,13417,13418,13419,13420,13421,13422,13423,13424,13425,13426,13427,13428,13429,13430, #13424 -13431,13432,4508,13433,13434,13435,4201,13436,13437,13438,13439,13440,13441,13442,13443,13444, #13440 -13445,13446,13447,13448,13449,13450,13451,13452,13453,13454,13455,13456,13457,5005,13458,13459, #13456 -13460,13461,13462,13463,13464,13465,13466,13467,13468,13469,13470,4509,13471,13472,13473,13474, #13472 -13475,13476,13477,13478,13479,13480,13481,13482,13483,13484,13485,13486,13487,13488,13489,13490, #13488 -13491,13492,13493,13494,13495,13496,13497,13498,13499,13500,13501,13502,13503,13504,13505,13506, #13504 -13507,13508,13509,13510,13511,13512,13513,13514,13515,13516,13517,13518,13519,13520,13521,13522, #13520 -13523,13524,13525,13526,13527,13528,13529,13530,13531,13532,13533,13534,13535,13536,13537,13538, #13536 -13539,13540,13541,13542,13543,13544,13545,13546,13547,13548,13549,13550,13551,13552,13553,13554, #13552 -13555,13556,13557,13558,13559,13560,13561,13562,13563,13564,13565,13566,13567,13568,13569,13570, #13568 -13571,13572,13573,13574,13575,13576,13577,13578,13579,13580,13581,13582,13583,13584,13585,13586, #13584 -13587,13588,13589,13590,13591,13592,13593,13594,13595,13596,13597,13598,13599,13600,13601,13602, #13600 -13603,13604,13605,13606,13607,13608,13609,13610,13611,13612,13613,13614,13615,13616,13617,13618, #13616 -13619,13620,13621,13622,13623,13624,13625,13626,13627,13628,13629,13630,13631,13632,13633,13634, #13632 -13635,13636,13637,13638,13639,13640,13641,13642,5006,13643,13644,13645,13646,13647,13648,13649, #13648 -13650,13651,5007,13652,13653,13654,13655,13656,13657,13658,13659,13660,13661,13662,13663,13664, #13664 -13665,13666,13667,13668,13669,13670,13671,13672,13673,13674,13675,13676,13677,13678,13679,13680, #13680 -13681,13682,13683,13684,13685,13686,13687,13688,13689,13690,13691,13692,13693,13694,13695,13696, #13696 -13697,13698,13699,13700,13701,13702,13703,13704,13705,13706,13707,13708,13709,13710,13711,13712, #13712 -13713,13714,13715,13716,13717,13718,13719,13720,13721,13722,13723,13724,13725,13726,13727,13728, #13728 -13729,13730,13731,13732,13733,13734,13735,13736,13737,13738,13739,13740,13741,13742,13743,13744, #13744 -13745,13746,13747,13748,13749,13750,13751,13752,13753,13754,13755,13756,13757,13758,13759,13760, #13760 -13761,13762,13763,13764,13765,13766,13767,13768,13769,13770,13771,13772,13773,13774,3273,13775, #13776 -13776,13777,13778,13779,13780,13781,13782,13783,13784,13785,13786,13787,13788,13789,13790,13791, #13792 -13792,13793,13794,13795,13796,13797,13798,13799,13800,13801,13802,13803,13804,13805,13806,13807, #13808 -13808,13809,13810,13811,13812,13813,13814,13815,13816,13817,13818,13819,13820,13821,13822,13823, #13824 -13824,13825,13826,13827,13828,13829,13830,13831,13832,13833,13834,13835,13836,13837,13838,13839, #13840 -13840,13841,13842,13843,13844,13845,13846,13847,13848,13849,13850,13851,13852,13853,13854,13855, #13856 -13856,13857,13858,13859,13860,13861,13862,13863,13864,13865,13866,13867,13868,13869,13870,13871, #13872 -13872,13873,13874,13875,13876,13877,13878,13879,13880,13881,13882,13883,13884,13885,13886,13887, #13888 -13888,13889,13890,13891,13892,13893,13894,13895,13896,13897,13898,13899,13900,13901,13902,13903, #13904 -13904,13905,13906,13907,13908,13909,13910,13911,13912,13913,13914,13915,13916,13917,13918,13919, #13920 -13920,13921,13922,13923,13924,13925,13926,13927,13928,13929,13930,13931,13932,13933,13934,13935, #13936 -13936,13937,13938,13939,13940,13941,13942,13943,13944,13945,13946,13947,13948,13949,13950,13951, #13952 -13952,13953,13954,13955,13956,13957,13958,13959,13960,13961,13962,13963,13964,13965,13966,13967, #13968 -13968,13969,13970,13971,13972) #13973 - -# flake8: noqa diff --git a/lib/requests/packages/chardet/big5prober.py b/lib/requests/packages/chardet/big5prober.py deleted file mode 100755 index becce81e..00000000 --- a/lib/requests/packages/chardet/big5prober.py +++ /dev/null @@ -1,42 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import Big5DistributionAnalysis -from .mbcssm import Big5SMModel - - -class Big5Prober(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(Big5SMModel) - self._mDistributionAnalyzer = Big5DistributionAnalysis() - self.reset() - - def get_charset_name(self): - return "Big5" diff --git a/lib/requests/packages/chardet/chardetect.py b/lib/requests/packages/chardet/chardetect.py deleted file mode 100755 index ffe892f2..00000000 --- a/lib/requests/packages/chardet/chardetect.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python -""" -Script which takes one or more file paths and reports on their detected -encodings - -Example:: - - % chardetect somefile someotherfile - somefile: windows-1252 with confidence 0.5 - someotherfile: ascii with confidence 1.0 - -If no paths are provided, it takes its input from stdin. - -""" - -from __future__ import absolute_import, print_function, unicode_literals - -import argparse -import sys -from io import open - -from chardet import __version__ -from chardet.universaldetector import UniversalDetector - - -def description_of(lines, name='stdin'): - """ - Return a string describing the probable encoding of a file or - list of strings. - - :param lines: The lines to get the encoding of. - :type lines: Iterable of bytes - :param name: Name of file or collection of lines - :type name: str - """ - u = UniversalDetector() - for line in lines: - u.feed(line) - u.close() - result = u.result - if result['encoding']: - return '{0}: {1} with confidence {2}'.format(name, result['encoding'], - result['confidence']) - else: - return '{0}: no result'.format(name) - - -def main(argv=None): - ''' - Handles command line arguments and gets things started. - - :param argv: List of arguments, as if specified on the command-line. - If None, ``sys.argv[1:]`` is used instead. - :type argv: list of str - ''' - # Get command line arguments - parser = argparse.ArgumentParser( - description="Takes one or more file paths and reports their detected \ - encodings", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - conflict_handler='resolve') - parser.add_argument('input', - help='File whose encoding we would like to determine.', - type=argparse.FileType('rb'), nargs='*', - default=[sys.stdin]) - parser.add_argument('--version', action='version', - version='%(prog)s {0}'.format(__version__)) - args = parser.parse_args(argv) - - for f in args.input: - if f.isatty(): - print("You are running chardetect interactively. Press " + - "CTRL-D twice at the start of a blank line to signal the " + - "end of your input. If you want help, run chardetect " + - "--help\n", file=sys.stderr) - print(description_of(f, f.name)) - - -if __name__ == '__main__': - main() diff --git a/lib/requests/packages/chardet/chardistribution.py b/lib/requests/packages/chardet/chardistribution.py deleted file mode 100755 index 4e64a00b..00000000 --- a/lib/requests/packages/chardet/chardistribution.py +++ /dev/null @@ -1,231 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, - EUCTW_TYPICAL_DISTRIBUTION_RATIO) -from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, - EUCKR_TYPICAL_DISTRIBUTION_RATIO) -from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE, - GB2312_TYPICAL_DISTRIBUTION_RATIO) -from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE, - BIG5_TYPICAL_DISTRIBUTION_RATIO) -from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE, - JIS_TYPICAL_DISTRIBUTION_RATIO) -from .compat import wrap_ord - -ENOUGH_DATA_THRESHOLD = 1024 -SURE_YES = 0.99 -SURE_NO = 0.01 -MINIMUM_DATA_THRESHOLD = 3 - - -class CharDistributionAnalysis: - def __init__(self): - # Mapping table to get frequency order from char order (get from - # GetOrder()) - self._mCharToFreqOrder = None - self._mTableSize = None # Size of above table - # This is a constant value which varies from language to language, - # used in calculating confidence. See - # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html - # for further detail. - self._mTypicalDistributionRatio = None - self.reset() - - def reset(self): - """reset analyser, clear any state""" - # If this flag is set to True, detection is done and conclusion has - # been made - self._mDone = False - self._mTotalChars = 0 # Total characters encountered - # The number of characters whose frequency order is less than 512 - self._mFreqChars = 0 - - def feed(self, aBuf, aCharLen): - """feed a character with known length""" - if aCharLen == 2: - # we only care about 2-bytes character in our distribution analysis - order = self.get_order(aBuf) - else: - order = -1 - if order >= 0: - self._mTotalChars += 1 - # order is valid - if order < self._mTableSize: - if 512 > self._mCharToFreqOrder[order]: - self._mFreqChars += 1 - - def get_confidence(self): - """return confidence based on existing data""" - # if we didn't receive any character in our consideration range, - # return negative answer - if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD: - return SURE_NO - - if self._mTotalChars != self._mFreqChars: - r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars) - * self._mTypicalDistributionRatio)) - if r < SURE_YES: - return r - - # normalize confidence (we don't want to be 100% sure) - return SURE_YES - - def got_enough_data(self): - # It is not necessary to receive all data to draw conclusion. - # For charset detection, certain amount of data is enough - return self._mTotalChars > ENOUGH_DATA_THRESHOLD - - def get_order(self, aBuf): - # We do not handle characters based on the original encoding string, - # but convert this encoding string to a number, here called order. - # This allows multiple encodings of a language to share one frequency - # table. - return -1 - - -class EUCTWDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = EUCTWCharToFreqOrder - self._mTableSize = EUCTW_TABLE_SIZE - self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for euc-TW encoding, we are interested - # first byte range: 0xc4 -- 0xfe - # second byte range: 0xa1 -- 0xfe - # no validation needed here. State machine has done that - first_char = wrap_ord(aBuf[0]) - if first_char >= 0xC4: - return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1 - else: - return -1 - - -class EUCKRDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = EUCKRCharToFreqOrder - self._mTableSize = EUCKR_TABLE_SIZE - self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for euc-KR encoding, we are interested - # first byte range: 0xb0 -- 0xfe - # second byte range: 0xa1 -- 0xfe - # no validation needed here. State machine has done that - first_char = wrap_ord(aBuf[0]) - if first_char >= 0xB0: - return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1 - else: - return -1 - - -class GB2312DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = GB2312CharToFreqOrder - self._mTableSize = GB2312_TABLE_SIZE - self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for GB2312 encoding, we are interested - # first byte range: 0xb0 -- 0xfe - # second byte range: 0xa1 -- 0xfe - # no validation needed here. State machine has done that - first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) - if (first_char >= 0xB0) and (second_char >= 0xA1): - return 94 * (first_char - 0xB0) + second_char - 0xA1 - else: - return -1 - - -class Big5DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = Big5CharToFreqOrder - self._mTableSize = BIG5_TABLE_SIZE - self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for big5 encoding, we are interested - # first byte range: 0xa4 -- 0xfe - # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe - # no validation needed here. State machine has done that - first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) - if first_char >= 0xA4: - if second_char >= 0xA1: - return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 - else: - return 157 * (first_char - 0xA4) + second_char - 0x40 - else: - return -1 - - -class SJISDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = JISCharToFreqOrder - self._mTableSize = JIS_TABLE_SIZE - self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for sjis encoding, we are interested - # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe - # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe - # no validation needed here. State machine has done that - first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) - if (first_char >= 0x81) and (first_char <= 0x9F): - order = 188 * (first_char - 0x81) - elif (first_char >= 0xE0) and (first_char <= 0xEF): - order = 188 * (first_char - 0xE0 + 31) - else: - return -1 - order = order + second_char - 0x40 - if second_char > 0x7F: - order = -1 - return order - - -class EUCJPDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = JISCharToFreqOrder - self._mTableSize = JIS_TABLE_SIZE - self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO - - def get_order(self, aBuf): - # for euc-JP encoding, we are interested - # first byte range: 0xa0 -- 0xfe - # second byte range: 0xa1 -- 0xfe - # no validation needed here. State machine has done that - char = wrap_ord(aBuf[0]) - if char >= 0xA0: - return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1 - else: - return -1 diff --git a/lib/requests/packages/chardet/charsetgroupprober.py b/lib/requests/packages/chardet/charsetgroupprober.py deleted file mode 100755 index 85e7a1c6..00000000 --- a/lib/requests/packages/chardet/charsetgroupprober.py +++ /dev/null @@ -1,106 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from . import constants -import sys -from .charsetprober import CharSetProber - - -class CharSetGroupProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mActiveNum = 0 - self._mProbers = [] - self._mBestGuessProber = None - - def reset(self): - CharSetProber.reset(self) - self._mActiveNum = 0 - for prober in self._mProbers: - if prober: - prober.reset() - prober.active = True - self._mActiveNum += 1 - self._mBestGuessProber = None - - def get_charset_name(self): - if not self._mBestGuessProber: - self.get_confidence() - if not self._mBestGuessProber: - return None -# self._mBestGuessProber = self._mProbers[0] - return self._mBestGuessProber.get_charset_name() - - def feed(self, aBuf): - for prober in self._mProbers: - if not prober: - continue - if not prober.active: - continue - st = prober.feed(aBuf) - if not st: - continue - if st == constants.eFoundIt: - self._mBestGuessProber = prober - return self.get_state() - elif st == constants.eNotMe: - prober.active = False - self._mActiveNum -= 1 - if self._mActiveNum <= 0: - self._mState = constants.eNotMe - return self.get_state() - return self.get_state() - - def get_confidence(self): - st = self.get_state() - if st == constants.eFoundIt: - return 0.99 - elif st == constants.eNotMe: - return 0.01 - bestConf = 0.0 - self._mBestGuessProber = None - for prober in self._mProbers: - if not prober: - continue - if not prober.active: - if constants._debug: - sys.stderr.write(prober.get_charset_name() - + ' not active\n') - continue - cf = prober.get_confidence() - if constants._debug: - sys.stderr.write('%s confidence = %s\n' % - (prober.get_charset_name(), cf)) - if bestConf < cf: - bestConf = cf - self._mBestGuessProber = prober - if not self._mBestGuessProber: - return 0.0 - return bestConf -# else: -# self._mBestGuessProber = self._mProbers[0] -# return self._mBestGuessProber.get_confidence() diff --git a/lib/requests/packages/chardet/charsetprober.py b/lib/requests/packages/chardet/charsetprober.py deleted file mode 100755 index 97581712..00000000 --- a/lib/requests/packages/chardet/charsetprober.py +++ /dev/null @@ -1,62 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from . import constants -import re - - -class CharSetProber: - def __init__(self): - pass - - def reset(self): - self._mState = constants.eDetecting - - def get_charset_name(self): - return None - - def feed(self, aBuf): - pass - - def get_state(self): - return self._mState - - def get_confidence(self): - return 0.0 - - def filter_high_bit_only(self, aBuf): - aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) - return aBuf - - def filter_without_english_letters(self, aBuf): - aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) - return aBuf - - def filter_with_english_letters(self, aBuf): - # TODO - return aBuf diff --git a/lib/requests/packages/chardet/codingstatemachine.py b/lib/requests/packages/chardet/codingstatemachine.py deleted file mode 100755 index 8dd8c917..00000000 --- a/lib/requests/packages/chardet/codingstatemachine.py +++ /dev/null @@ -1,61 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .constants import eStart -from .compat import wrap_ord - - -class CodingStateMachine: - def __init__(self, sm): - self._mModel = sm - self._mCurrentBytePos = 0 - self._mCurrentCharLen = 0 - self.reset() - - def reset(self): - self._mCurrentState = eStart - - def next_state(self, c): - # for each byte we get its class - # if it is first byte, we also get byte length - # PY3K: aBuf is a byte stream, so c is an int, not a byte - byteCls = self._mModel['classTable'][wrap_ord(c)] - if self._mCurrentState == eStart: - self._mCurrentBytePos = 0 - self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] - # from byte's class and stateTable, we get its next state - curr_state = (self._mCurrentState * self._mModel['classFactor'] - + byteCls) - self._mCurrentState = self._mModel['stateTable'][curr_state] - self._mCurrentBytePos += 1 - return self._mCurrentState - - def get_current_charlen(self): - return self._mCurrentCharLen - - def get_coding_state_machine(self): - return self._mModel['name'] diff --git a/lib/requests/packages/chardet/compat.py b/lib/requests/packages/chardet/compat.py deleted file mode 100755 index d9e30add..00000000 --- a/lib/requests/packages/chardet/compat.py +++ /dev/null @@ -1,34 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# Contributor(s): -# Ian Cordasco - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -import sys - - -if sys.version_info < (3, 0): - base_str = (str, unicode) -else: - base_str = (bytes, str) - - -def wrap_ord(a): - if sys.version_info < (3, 0) and isinstance(a, base_str): - return ord(a) - else: - return a diff --git a/lib/requests/packages/chardet/constants.py b/lib/requests/packages/chardet/constants.py deleted file mode 100755 index e4d148b3..00000000 --- a/lib/requests/packages/chardet/constants.py +++ /dev/null @@ -1,39 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -_debug = 0 - -eDetecting = 0 -eFoundIt = 1 -eNotMe = 2 - -eStart = 0 -eError = 1 -eItsMe = 2 - -SHORTCUT_THRESHOLD = 0.95 diff --git a/lib/requests/packages/chardet/cp949prober.py b/lib/requests/packages/chardet/cp949prober.py deleted file mode 100755 index ff4272f8..00000000 --- a/lib/requests/packages/chardet/cp949prober.py +++ /dev/null @@ -1,44 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import EUCKRDistributionAnalysis -from .mbcssm import CP949SMModel - - -class CP949Prober(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(CP949SMModel) - # NOTE: CP949 is a superset of EUC-KR, so the distribution should be - # not different. - self._mDistributionAnalyzer = EUCKRDistributionAnalysis() - self.reset() - - def get_charset_name(self): - return "CP949" diff --git a/lib/requests/packages/chardet/escprober.py b/lib/requests/packages/chardet/escprober.py deleted file mode 100755 index 80a844ff..00000000 --- a/lib/requests/packages/chardet/escprober.py +++ /dev/null @@ -1,86 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from . import constants -from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, - ISO2022KRSMModel) -from .charsetprober import CharSetProber -from .codingstatemachine import CodingStateMachine -from .compat import wrap_ord - - -class EscCharSetProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mCodingSM = [ - CodingStateMachine(HZSMModel), - CodingStateMachine(ISO2022CNSMModel), - CodingStateMachine(ISO2022JPSMModel), - CodingStateMachine(ISO2022KRSMModel) - ] - self.reset() - - def reset(self): - CharSetProber.reset(self) - for codingSM in self._mCodingSM: - if not codingSM: - continue - codingSM.active = True - codingSM.reset() - self._mActiveSM = len(self._mCodingSM) - self._mDetectedCharset = None - - def get_charset_name(self): - return self._mDetectedCharset - - def get_confidence(self): - if self._mDetectedCharset: - return 0.99 - else: - return 0.00 - - def feed(self, aBuf): - for c in aBuf: - # PY3K: aBuf is a byte array, so c is an int, not a byte - for codingSM in self._mCodingSM: - if not codingSM: - continue - if not codingSM.active: - continue - codingState = codingSM.next_state(wrap_ord(c)) - if codingState == constants.eError: - codingSM.active = False - self._mActiveSM -= 1 - if self._mActiveSM <= 0: - self._mState = constants.eNotMe - return self.get_state() - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 - return self.get_state() - - return self.get_state() diff --git a/lib/requests/packages/chardet/escsm.py b/lib/requests/packages/chardet/escsm.py deleted file mode 100755 index bd302b4c..00000000 --- a/lib/requests/packages/chardet/escsm.py +++ /dev/null @@ -1,242 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .constants import eStart, eError, eItsMe - -HZ_cls = ( -1,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,0,0,0,0, # 20 - 27 -0,0,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,0,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,4,0,5,2,0, # 78 - 7f -1,1,1,1,1,1,1,1, # 80 - 87 -1,1,1,1,1,1,1,1, # 88 - 8f -1,1,1,1,1,1,1,1, # 90 - 97 -1,1,1,1,1,1,1,1, # 98 - 9f -1,1,1,1,1,1,1,1, # a0 - a7 -1,1,1,1,1,1,1,1, # a8 - af -1,1,1,1,1,1,1,1, # b0 - b7 -1,1,1,1,1,1,1,1, # b8 - bf -1,1,1,1,1,1,1,1, # c0 - c7 -1,1,1,1,1,1,1,1, # c8 - cf -1,1,1,1,1,1,1,1, # d0 - d7 -1,1,1,1,1,1,1,1, # d8 - df -1,1,1,1,1,1,1,1, # e0 - e7 -1,1,1,1,1,1,1,1, # e8 - ef -1,1,1,1,1,1,1,1, # f0 - f7 -1,1,1,1,1,1,1,1, # f8 - ff -) - -HZ_st = ( -eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f -eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 - 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f - 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 - 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f -) - -HZCharLenTable = (0, 0, 0, 0, 0, 0) - -HZSMModel = {'classTable': HZ_cls, - 'classFactor': 6, - 'stateTable': HZ_st, - 'charLenTable': HZCharLenTable, - 'name': "HZ-GB-2312"} - -ISO2022CN_cls = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,0,0,0,0, # 20 - 27 -0,3,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,4,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff -) - -ISO2022CN_st = ( -eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 -eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f -eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 -eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 - 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 -eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f -) - -ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) - -ISO2022CNSMModel = {'classTable': ISO2022CN_cls, - 'classFactor': 9, - 'stateTable': ISO2022CN_st, - 'charLenTable': ISO2022CNCharLenTable, - 'name': "ISO-2022-CN"} - -ISO2022JP_cls = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,2,2, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,7,0,0,0, # 20 - 27 -3,0,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -6,0,4,0,8,0,0,0, # 40 - 47 -0,9,5,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff -) - -ISO2022JP_st = ( -eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 -eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 -eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f -eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 -eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f -eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 -eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f -eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 -) - -ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - -ISO2022JPSMModel = {'classTable': ISO2022JP_cls, - 'classFactor': 10, - 'stateTable': ISO2022JP_st, - 'charLenTable': ISO2022JPCharLenTable, - 'name': "ISO-2022-JP"} - -ISO2022KR_cls = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,3,0,0,0, # 20 - 27 -0,4,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,5,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff -) - -ISO2022KR_st = ( -eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 -eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f -eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 -eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f -eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 -) - -ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) - -ISO2022KRSMModel = {'classTable': ISO2022KR_cls, - 'classFactor': 6, - 'stateTable': ISO2022KR_st, - 'charLenTable': ISO2022KRCharLenTable, - 'name': "ISO-2022-KR"} - -# flake8: noqa diff --git a/lib/requests/packages/chardet/eucjpprober.py b/lib/requests/packages/chardet/eucjpprober.py deleted file mode 100755 index 8e64fdcc..00000000 --- a/lib/requests/packages/chardet/eucjpprober.py +++ /dev/null @@ -1,90 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -import sys -from . import constants -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import EUCJPDistributionAnalysis -from .jpcntx import EUCJPContextAnalysis -from .mbcssm import EUCJPSMModel - - -class EUCJPProber(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(EUCJPSMModel) - self._mDistributionAnalyzer = EUCJPDistributionAnalysis() - self._mContextAnalyzer = EUCJPContextAnalysis() - self.reset() - - def reset(self): - MultiByteCharSetProber.reset(self) - self._mContextAnalyzer.reset() - - def get_charset_name(self): - return "EUC-JP" - - def feed(self, aBuf): - aLen = len(aBuf) - for i in range(0, aLen): - # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte - codingState = self._mCodingSM.next_state(aBuf[i]) - if codingState == constants.eError: - if constants._debug: - sys.stderr.write(self.get_charset_name() - + ' prober hit error at byte ' + str(i) - + '\n') - self._mState = constants.eNotMe - break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - break - elif codingState == constants.eStart: - charLen = self._mCodingSM.get_current_charlen() - if i == 0: - self._mLastChar[1] = aBuf[0] - self._mContextAnalyzer.feed(self._mLastChar, charLen) - self._mDistributionAnalyzer.feed(self._mLastChar, charLen) - else: - self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) - self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], - charLen) - - self._mLastChar[0] = aBuf[aLen - 1] - - if self.get_state() == constants.eDetecting: - if (self._mContextAnalyzer.got_enough_data() and - (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): - self._mState = constants.eFoundIt - - return self.get_state() - - def get_confidence(self): - contxtCf = self._mContextAnalyzer.get_confidence() - distribCf = self._mDistributionAnalyzer.get_confidence() - return max(contxtCf, distribCf) diff --git a/lib/requests/packages/chardet/euckrfreq.py b/lib/requests/packages/chardet/euckrfreq.py deleted file mode 100755 index a179e4c2..00000000 --- a/lib/requests/packages/chardet/euckrfreq.py +++ /dev/null @@ -1,596 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# Sampling from about 20M text materials include literature and computer technology - -# 128 --> 0.79 -# 256 --> 0.92 -# 512 --> 0.986 -# 1024 --> 0.99944 -# 2048 --> 0.99999 -# -# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24 -# Random Distribution Ration = 512 / (2350-512) = 0.279. -# -# Typical Distribution Ratio - -EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0 - -EUCKR_TABLE_SIZE = 2352 - -# Char to FreqOrder table , -EUCKRCharToFreqOrder = ( \ - 13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87, -1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398, -1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734, - 945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739, - 116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622, - 708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750, -1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856, - 344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205, - 709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779, -1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19, -1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567, -1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797, -1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802, -1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899, - 885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818, -1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409, -1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697, -1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770, -1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723, - 544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416, -1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300, - 119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083, - 893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857, -1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871, - 282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420, -1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885, - 127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889, - 0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893, -1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317, -1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841, -1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910, -1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610, - 269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375, -1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939, - 887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870, - 217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934, -1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888, -1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950, -1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065, -1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002, -1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965, -1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467, - 50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285, - 639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7, - 103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979, -1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985, - 818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994, -1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250, - 423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824, - 532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003, -2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745, - 619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61, - 191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023, -2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032, -2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912, -2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224, - 719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012, - 819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050, -2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681, - 499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414, -1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068, -2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075, -1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850, -2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606, -2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449, -1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452, - 949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112, -2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121, -2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130, - 22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274, - 962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139, -2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721, -1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298, -2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463, -2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747, -2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285, -2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187, -2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10, -2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350, -1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201, -2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972, -2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219, -2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233, -2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242, -2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247, -1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178, -1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255, -2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259, -1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262, -2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702, -1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273, - 295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541, -2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117, - 432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187, -2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800, - 808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312, -2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229, -2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315, - 501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484, -2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170, -1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335, - 425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601, -1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395, -2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354, -1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476, -2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035, - 416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498, -2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310, -1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389, -2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504, -1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505, -2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145, -1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624, - 593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700, -2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221, -2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377, - 644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448, - 915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485, -1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705, -1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465, - 291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471, -2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997, -2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486, - 797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494, - 434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771, - 585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323, -2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491, - 95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510, - 161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519, -2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532, -2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199, - 704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544, -2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247, -1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441, - 249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562, -2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362, -2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583, -2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465, - 3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431, - 202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151, - 974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596, -2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406, -2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611, -2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619, -1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628, -2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, - 670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256 -#Everything below is of no interest for detection purpose -2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658, -2659,2660,2661,2662,2663,2664,2665,2666,2667,2668,2669,2670,2671,2672,2673,2674, -2675,2676,2677,2678,2679,2680,2681,2682,2683,2684,2685,2686,2687,2688,2689,2690, -2691,2692,2693,2694,2695,2696,2697,2698,2699,1542, 880,2700,2701,2702,2703,2704, -2705,2706,2707,2708,2709,2710,2711,2712,2713,2714,2715,2716,2717,2718,2719,2720, -2721,2722,2723,2724,2725,1543,2726,2727,2728,2729,2730,2731,2732,1544,2733,2734, -2735,2736,2737,2738,2739,2740,2741,2742,2743,2744,2745,2746,2747,2748,2749,2750, -2751,2752,2753,2754,1545,2755,2756,2757,2758,2759,2760,2761,2762,2763,2764,2765, -2766,1546,2767,1547,2768,2769,2770,2771,2772,2773,2774,2775,2776,2777,2778,2779, -2780,2781,2782,2783,2784,2785,2786,1548,2787,2788,2789,1109,2790,2791,2792,2793, -2794,2795,2796,2797,2798,2799,2800,2801,2802,2803,2804,2805,2806,2807,2808,2809, -2810,2811,2812,1329,2813,2814,2815,2816,2817,2818,2819,2820,2821,2822,2823,2824, -2825,2826,2827,2828,2829,2830,2831,2832,2833,2834,2835,2836,2837,2838,2839,2840, -2841,2842,2843,2844,2845,2846,2847,2848,2849,2850,2851,2852,2853,2854,2855,2856, -1549,2857,2858,2859,2860,1550,2861,2862,1551,2863,2864,2865,2866,2867,2868,2869, -2870,2871,2872,2873,2874,1110,1330,2875,2876,2877,2878,2879,2880,2881,2882,2883, -2884,2885,2886,2887,2888,2889,2890,2891,2892,2893,2894,2895,2896,2897,2898,2899, -2900,2901,2902,2903,2904,2905,2906,2907,2908,2909,2910,2911,2912,2913,2914,2915, -2916,2917,2918,2919,2920,2921,2922,2923,2924,2925,2926,2927,2928,2929,2930,1331, -2931,2932,2933,2934,2935,2936,2937,2938,2939,2940,2941,2942,2943,1552,2944,2945, -2946,2947,2948,2949,2950,2951,2952,2953,2954,2955,2956,2957,2958,2959,2960,2961, -2962,2963,2964,1252,2965,2966,2967,2968,2969,2970,2971,2972,2973,2974,2975,2976, -2977,2978,2979,2980,2981,2982,2983,2984,2985,2986,2987,2988,2989,2990,2991,2992, -2993,2994,2995,2996,2997,2998,2999,3000,3001,3002,3003,3004,3005,3006,3007,3008, -3009,3010,3011,3012,1553,3013,3014,3015,3016,3017,1554,3018,1332,3019,3020,3021, -3022,3023,3024,3025,3026,3027,3028,3029,3030,3031,3032,3033,3034,3035,3036,3037, -3038,3039,3040,3041,3042,3043,3044,3045,3046,3047,3048,3049,3050,1555,3051,3052, -3053,1556,1557,3054,3055,3056,3057,3058,3059,3060,3061,3062,3063,3064,3065,3066, -3067,1558,3068,3069,3070,3071,3072,3073,3074,3075,3076,1559,3077,3078,3079,3080, -3081,3082,3083,1253,3084,3085,3086,3087,3088,3089,3090,3091,3092,3093,3094,3095, -3096,3097,3098,3099,3100,3101,3102,3103,3104,3105,3106,3107,3108,1152,3109,3110, -3111,3112,3113,1560,3114,3115,3116,3117,1111,3118,3119,3120,3121,3122,3123,3124, -3125,3126,3127,3128,3129,3130,3131,3132,3133,3134,3135,3136,3137,3138,3139,3140, -3141,3142,3143,3144,3145,3146,3147,3148,3149,3150,3151,3152,3153,3154,3155,3156, -3157,3158,3159,3160,3161,3162,3163,3164,3165,3166,3167,3168,3169,3170,3171,3172, -3173,3174,3175,3176,1333,3177,3178,3179,3180,3181,3182,3183,3184,3185,3186,3187, -3188,3189,1561,3190,3191,1334,3192,3193,3194,3195,3196,3197,3198,3199,3200,3201, -3202,3203,3204,3205,3206,3207,3208,3209,3210,3211,3212,3213,3214,3215,3216,3217, -3218,3219,3220,3221,3222,3223,3224,3225,3226,3227,3228,3229,3230,3231,3232,3233, -3234,1562,3235,3236,3237,3238,3239,3240,3241,3242,3243,3244,3245,3246,3247,3248, -3249,3250,3251,3252,3253,3254,3255,3256,3257,3258,3259,3260,3261,3262,3263,3264, -3265,3266,3267,3268,3269,3270,3271,3272,3273,3274,3275,3276,3277,1563,3278,3279, -3280,3281,3282,3283,3284,3285,3286,3287,3288,3289,3290,3291,3292,3293,3294,3295, -3296,3297,3298,3299,3300,3301,3302,3303,3304,3305,3306,3307,3308,3309,3310,3311, -3312,3313,3314,3315,3316,3317,3318,3319,3320,3321,3322,3323,3324,3325,3326,3327, -3328,3329,3330,3331,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341,3342,3343, -3344,3345,3346,3347,3348,3349,3350,3351,3352,3353,3354,3355,3356,3357,3358,3359, -3360,3361,3362,3363,3364,1335,3365,3366,3367,3368,3369,3370,3371,3372,3373,3374, -3375,3376,3377,3378,3379,3380,3381,3382,3383,3384,3385,3386,3387,1336,3388,3389, -3390,3391,3392,3393,3394,3395,3396,3397,3398,3399,3400,3401,3402,3403,3404,3405, -3406,3407,3408,3409,3410,3411,3412,3413,3414,1337,3415,3416,3417,3418,3419,1338, -3420,3421,3422,1564,1565,3423,3424,3425,3426,3427,3428,3429,3430,3431,1254,3432, -3433,3434,1339,3435,3436,3437,3438,3439,1566,3440,3441,3442,3443,3444,3445,3446, -3447,3448,3449,3450,3451,3452,3453,3454,1255,3455,3456,3457,3458,3459,1567,1191, -3460,1568,1569,3461,3462,3463,1570,3464,3465,3466,3467,3468,1571,3469,3470,3471, -3472,3473,1572,3474,3475,3476,3477,3478,3479,3480,3481,3482,3483,3484,3485,3486, -1340,3487,3488,3489,3490,3491,3492,1021,3493,3494,3495,3496,3497,3498,1573,3499, -1341,3500,3501,3502,3503,3504,3505,3506,3507,3508,3509,3510,3511,1342,3512,3513, -3514,3515,3516,1574,1343,3517,3518,3519,1575,3520,1576,3521,3522,3523,3524,3525, -3526,3527,3528,3529,3530,3531,3532,3533,3534,3535,3536,3537,3538,3539,3540,3541, -3542,3543,3544,3545,3546,3547,3548,3549,3550,3551,3552,3553,3554,3555,3556,3557, -3558,3559,3560,3561,3562,3563,3564,3565,3566,3567,3568,3569,3570,3571,3572,3573, -3574,3575,3576,3577,3578,3579,3580,1577,3581,3582,1578,3583,3584,3585,3586,3587, -3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603, -3604,1579,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618, -3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,1580,3630,3631,1581,3632, -3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,3643,3644,3645,3646,3647,3648, -3649,3650,3651,3652,3653,3654,3655,3656,1582,3657,3658,3659,3660,3661,3662,3663, -3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,3676,3677,3678,3679, -3680,3681,3682,3683,3684,3685,3686,3687,3688,3689,3690,3691,3692,3693,3694,3695, -3696,3697,3698,3699,3700,1192,3701,3702,3703,3704,1256,3705,3706,3707,3708,1583, -1257,3709,3710,3711,3712,3713,3714,3715,3716,1584,3717,3718,3719,3720,3721,3722, -3723,3724,3725,3726,3727,3728,3729,3730,3731,3732,3733,3734,3735,3736,3737,3738, -3739,3740,3741,3742,3743,3744,3745,1344,3746,3747,3748,3749,3750,3751,3752,3753, -3754,3755,3756,1585,3757,3758,3759,3760,3761,3762,3763,3764,3765,3766,1586,3767, -3768,3769,3770,3771,3772,3773,3774,3775,3776,3777,3778,1345,3779,3780,3781,3782, -3783,3784,3785,3786,3787,3788,3789,3790,3791,3792,3793,3794,3795,1346,1587,3796, -3797,1588,3798,3799,3800,3801,3802,3803,3804,3805,3806,1347,3807,3808,3809,3810, -3811,1589,3812,3813,3814,3815,3816,3817,3818,3819,3820,3821,1590,3822,3823,1591, -1348,3824,3825,3826,3827,3828,3829,3830,1592,3831,3832,1593,3833,3834,3835,3836, -3837,3838,3839,3840,3841,3842,3843,3844,1349,3845,3846,3847,3848,3849,3850,3851, -3852,3853,3854,3855,3856,3857,3858,1594,3859,3860,3861,3862,3863,3864,3865,3866, -3867,3868,3869,1595,3870,3871,3872,3873,1596,3874,3875,3876,3877,3878,3879,3880, -3881,3882,3883,3884,3885,3886,1597,3887,3888,3889,3890,3891,3892,3893,3894,3895, -1598,3896,3897,3898,1599,1600,3899,1350,3900,1351,3901,3902,1352,3903,3904,3905, -3906,3907,3908,3909,3910,3911,3912,3913,3914,3915,3916,3917,3918,3919,3920,3921, -3922,3923,3924,1258,3925,3926,3927,3928,3929,3930,3931,1193,3932,1601,3933,3934, -3935,3936,3937,3938,3939,3940,3941,3942,3943,1602,3944,3945,3946,3947,3948,1603, -3949,3950,3951,3952,3953,3954,3955,3956,3957,3958,3959,3960,3961,3962,3963,3964, -3965,1604,3966,3967,3968,3969,3970,3971,3972,3973,3974,3975,3976,3977,1353,3978, -3979,3980,3981,3982,3983,3984,3985,3986,3987,3988,3989,3990,3991,1354,3992,3993, -3994,3995,3996,3997,3998,3999,4000,4001,4002,4003,4004,4005,4006,4007,4008,4009, -4010,4011,4012,4013,4014,4015,4016,4017,4018,4019,4020,4021,4022,4023,1355,4024, -4025,4026,4027,4028,4029,4030,4031,4032,4033,4034,4035,4036,4037,4038,4039,4040, -1605,4041,4042,4043,4044,4045,4046,4047,4048,4049,4050,4051,4052,4053,4054,4055, -4056,4057,4058,4059,4060,1606,4061,4062,4063,4064,1607,4065,4066,4067,4068,4069, -4070,4071,4072,4073,4074,4075,4076,1194,4077,4078,1608,4079,4080,4081,4082,4083, -4084,4085,4086,4087,1609,4088,4089,4090,4091,4092,4093,4094,4095,4096,4097,4098, -4099,4100,4101,4102,4103,4104,4105,4106,4107,4108,1259,4109,4110,4111,4112,4113, -4114,4115,4116,4117,4118,4119,4120,4121,4122,4123,4124,1195,4125,4126,4127,1610, -4128,4129,4130,4131,4132,4133,4134,4135,4136,4137,1356,4138,4139,4140,4141,4142, -4143,4144,1611,4145,4146,4147,4148,4149,4150,4151,4152,4153,4154,4155,4156,4157, -4158,4159,4160,4161,4162,4163,4164,4165,4166,4167,4168,4169,4170,4171,4172,4173, -4174,4175,4176,4177,4178,4179,4180,4181,4182,4183,4184,4185,4186,4187,4188,4189, -4190,4191,4192,4193,4194,4195,4196,4197,4198,4199,4200,4201,4202,4203,4204,4205, -4206,4207,4208,4209,4210,4211,4212,4213,4214,4215,4216,4217,4218,4219,1612,4220, -4221,4222,4223,4224,4225,4226,4227,1357,4228,1613,4229,4230,4231,4232,4233,4234, -4235,4236,4237,4238,4239,4240,4241,4242,4243,1614,4244,4245,4246,4247,4248,4249, -4250,4251,4252,4253,4254,4255,4256,4257,4258,4259,4260,4261,4262,4263,4264,4265, -4266,4267,4268,4269,4270,1196,1358,4271,4272,4273,4274,4275,4276,4277,4278,4279, -4280,4281,4282,4283,4284,4285,4286,4287,1615,4288,4289,4290,4291,4292,4293,4294, -4295,4296,4297,4298,4299,4300,4301,4302,4303,4304,4305,4306,4307,4308,4309,4310, -4311,4312,4313,4314,4315,4316,4317,4318,4319,4320,4321,4322,4323,4324,4325,4326, -4327,4328,4329,4330,4331,4332,4333,4334,1616,4335,4336,4337,4338,4339,4340,4341, -4342,4343,4344,4345,4346,4347,4348,4349,4350,4351,4352,4353,4354,4355,4356,4357, -4358,4359,4360,1617,4361,4362,4363,4364,4365,1618,4366,4367,4368,4369,4370,4371, -4372,4373,4374,4375,4376,4377,4378,4379,4380,4381,4382,4383,4384,4385,4386,4387, -4388,4389,4390,4391,4392,4393,4394,4395,4396,4397,4398,4399,4400,4401,4402,4403, -4404,4405,4406,4407,4408,4409,4410,4411,4412,4413,4414,4415,4416,1619,4417,4418, -4419,4420,4421,4422,4423,4424,4425,1112,4426,4427,4428,4429,4430,1620,4431,4432, -4433,4434,4435,4436,4437,4438,4439,4440,4441,4442,1260,1261,4443,4444,4445,4446, -4447,4448,4449,4450,4451,4452,4453,4454,4455,1359,4456,4457,4458,4459,4460,4461, -4462,4463,4464,4465,1621,4466,4467,4468,4469,4470,4471,4472,4473,4474,4475,4476, -4477,4478,4479,4480,4481,4482,4483,4484,4485,4486,4487,4488,4489,1055,4490,4491, -4492,4493,4494,4495,4496,4497,4498,4499,4500,4501,4502,4503,4504,4505,4506,4507, -4508,4509,4510,4511,4512,4513,4514,4515,4516,4517,4518,1622,4519,4520,4521,1623, -4522,4523,4524,4525,4526,4527,4528,4529,4530,4531,4532,4533,4534,4535,1360,4536, -4537,4538,4539,4540,4541,4542,4543, 975,4544,4545,4546,4547,4548,4549,4550,4551, -4552,4553,4554,4555,4556,4557,4558,4559,4560,4561,4562,4563,4564,4565,4566,4567, -4568,4569,4570,4571,1624,4572,4573,4574,4575,4576,1625,4577,4578,4579,4580,4581, -4582,4583,4584,1626,4585,4586,4587,4588,4589,4590,4591,4592,4593,4594,4595,1627, -4596,4597,4598,4599,4600,4601,4602,4603,4604,4605,4606,4607,4608,4609,4610,4611, -4612,4613,4614,4615,1628,4616,4617,4618,4619,4620,4621,4622,4623,4624,4625,4626, -4627,4628,4629,4630,4631,4632,4633,4634,4635,4636,4637,4638,4639,4640,4641,4642, -4643,4644,4645,4646,4647,4648,4649,1361,4650,4651,4652,4653,4654,4655,4656,4657, -4658,4659,4660,4661,1362,4662,4663,4664,4665,4666,4667,4668,4669,4670,4671,4672, -4673,4674,4675,4676,4677,4678,4679,4680,4681,4682,1629,4683,4684,4685,4686,4687, -1630,4688,4689,4690,4691,1153,4692,4693,4694,1113,4695,4696,4697,4698,4699,4700, -4701,4702,4703,4704,4705,4706,4707,4708,4709,4710,4711,1197,4712,4713,4714,4715, -4716,4717,4718,4719,4720,4721,4722,4723,4724,4725,4726,4727,4728,4729,4730,4731, -4732,4733,4734,4735,1631,4736,1632,4737,4738,4739,4740,4741,4742,4743,4744,1633, -4745,4746,4747,4748,4749,1262,4750,4751,4752,4753,4754,1363,4755,4756,4757,4758, -4759,4760,4761,4762,4763,4764,4765,4766,4767,4768,1634,4769,4770,4771,4772,4773, -4774,4775,4776,4777,4778,1635,4779,4780,4781,4782,4783,4784,4785,4786,4787,4788, -4789,1636,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799,4800,4801,4802,4803, -4804,4805,4806,1637,4807,4808,4809,1638,4810,4811,4812,4813,4814,4815,4816,4817, -4818,1639,4819,4820,4821,4822,4823,4824,4825,4826,4827,4828,4829,4830,4831,4832, -4833,1077,4834,4835,4836,4837,4838,4839,4840,4841,4842,4843,4844,4845,4846,4847, -4848,4849,4850,4851,4852,4853,4854,4855,4856,4857,4858,4859,4860,4861,4862,4863, -4864,4865,4866,4867,4868,4869,4870,4871,4872,4873,4874,4875,4876,4877,4878,4879, -4880,4881,4882,4883,1640,4884,4885,1641,4886,4887,4888,4889,4890,4891,4892,4893, -4894,4895,4896,4897,4898,4899,4900,4901,4902,4903,4904,4905,4906,4907,4908,4909, -4910,4911,1642,4912,4913,4914,1364,4915,4916,4917,4918,4919,4920,4921,4922,4923, -4924,4925,4926,4927,4928,4929,4930,4931,1643,4932,4933,4934,4935,4936,4937,4938, -4939,4940,4941,4942,4943,4944,4945,4946,4947,4948,4949,4950,4951,4952,4953,4954, -4955,4956,4957,4958,4959,4960,4961,4962,4963,4964,4965,4966,4967,4968,4969,4970, -4971,4972,4973,4974,4975,4976,4977,4978,4979,4980,1644,4981,4982,4983,4984,1645, -4985,4986,1646,4987,4988,4989,4990,4991,4992,4993,4994,4995,4996,4997,4998,4999, -5000,5001,5002,5003,5004,5005,1647,5006,1648,5007,5008,5009,5010,5011,5012,1078, -5013,5014,5015,5016,5017,5018,5019,5020,5021,5022,5023,5024,5025,5026,5027,5028, -1365,5029,5030,5031,5032,5033,5034,5035,5036,5037,5038,5039,1649,5040,5041,5042, -5043,5044,5045,1366,5046,5047,5048,5049,5050,5051,5052,5053,5054,5055,1650,5056, -5057,5058,5059,5060,5061,5062,5063,5064,5065,5066,5067,5068,5069,5070,5071,5072, -5073,5074,5075,5076,5077,1651,5078,5079,5080,5081,5082,5083,5084,5085,5086,5087, -5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102,5103, -5104,5105,5106,5107,5108,5109,5110,1652,5111,5112,5113,5114,5115,5116,5117,5118, -1367,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,1653,5130,5131,5132, -5133,5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148, -5149,1368,5150,1654,5151,1369,5152,5153,5154,5155,5156,5157,5158,5159,5160,5161, -5162,5163,5164,5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,5176,5177, -5178,1370,5179,5180,5181,5182,5183,5184,5185,5186,5187,5188,5189,5190,5191,5192, -5193,5194,5195,5196,5197,5198,1655,5199,5200,5201,5202,1656,5203,5204,5205,5206, -1371,5207,1372,5208,5209,5210,5211,1373,5212,5213,1374,5214,5215,5216,5217,5218, -5219,5220,5221,5222,5223,5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234, -5235,5236,5237,5238,5239,5240,5241,5242,5243,5244,5245,5246,5247,1657,5248,5249, -5250,5251,1658,1263,5252,5253,5254,5255,5256,1375,5257,5258,5259,5260,5261,5262, -5263,5264,5265,5266,5267,5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278, -5279,5280,5281,5282,5283,1659,5284,5285,5286,5287,5288,5289,5290,5291,5292,5293, -5294,5295,5296,5297,5298,5299,5300,1660,5301,5302,5303,5304,5305,5306,5307,5308, -5309,5310,5311,5312,5313,5314,5315,5316,5317,5318,5319,5320,5321,1376,5322,5323, -5324,5325,5326,5327,5328,5329,5330,5331,5332,5333,1198,5334,5335,5336,5337,5338, -5339,5340,5341,5342,5343,1661,5344,5345,5346,5347,5348,5349,5350,5351,5352,5353, -5354,5355,5356,5357,5358,5359,5360,5361,5362,5363,5364,5365,5366,5367,5368,5369, -5370,5371,5372,5373,5374,5375,5376,5377,5378,5379,5380,5381,5382,5383,5384,5385, -5386,5387,5388,5389,5390,5391,5392,5393,5394,5395,5396,5397,5398,1264,5399,5400, -5401,5402,5403,5404,5405,5406,5407,5408,5409,5410,5411,5412,1662,5413,5414,5415, -5416,1663,5417,5418,5419,5420,5421,5422,5423,5424,5425,5426,5427,5428,5429,5430, -5431,5432,5433,5434,5435,5436,5437,5438,1664,5439,5440,5441,5442,5443,5444,5445, -5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456,5457,5458,5459,5460,5461, -5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472,5473,5474,5475,5476,5477, -5478,1154,5479,5480,5481,5482,5483,5484,5485,1665,5486,5487,5488,5489,5490,5491, -5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504,5505,5506,5507, -5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520,5521,5522,5523, -5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536,5537,5538,5539, -5540,5541,5542,5543,5544,5545,5546,5547,5548,1377,5549,5550,5551,5552,5553,5554, -5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568,5569,5570, -1114,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584,5585, -5586,5587,5588,5589,5590,5591,5592,1378,5593,5594,5595,5596,5597,5598,5599,5600, -5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,1379,5615, -5616,5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631, -5632,5633,5634,1380,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646, -5647,5648,5649,1381,1056,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660, -1666,5661,5662,5663,5664,5665,5666,5667,5668,1667,5669,1668,5670,5671,5672,5673, -5674,5675,5676,5677,5678,1155,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688, -5689,5690,5691,5692,5693,5694,5695,5696,5697,5698,1669,5699,5700,5701,5702,5703, -5704,5705,1670,5706,5707,5708,5709,5710,1671,5711,5712,5713,5714,1382,5715,5716, -5717,5718,5719,5720,5721,5722,5723,5724,5725,1672,5726,5727,1673,1674,5728,5729, -5730,5731,5732,5733,5734,5735,5736,1675,5737,5738,5739,5740,5741,5742,5743,5744, -1676,5745,5746,5747,5748,5749,5750,5751,1383,5752,5753,5754,5755,5756,5757,5758, -5759,5760,5761,5762,5763,5764,5765,5766,5767,5768,1677,5769,5770,5771,5772,5773, -1678,5774,5775,5776, 998,5777,5778,5779,5780,5781,5782,5783,5784,5785,1384,5786, -5787,5788,5789,5790,5791,5792,5793,5794,5795,5796,5797,5798,5799,5800,1679,5801, -5802,5803,1115,1116,5804,5805,5806,5807,5808,5809,5810,5811,5812,5813,5814,5815, -5816,5817,5818,5819,5820,5821,5822,5823,5824,5825,5826,5827,5828,5829,5830,5831, -5832,5833,5834,5835,5836,5837,5838,5839,5840,5841,5842,5843,5844,5845,5846,5847, -5848,5849,5850,5851,5852,5853,5854,5855,1680,5856,5857,5858,5859,5860,5861,5862, -5863,5864,1681,5865,5866,5867,1682,5868,5869,5870,5871,5872,5873,5874,5875,5876, -5877,5878,5879,1683,5880,1684,5881,5882,5883,5884,1685,5885,5886,5887,5888,5889, -5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904,5905, -5906,5907,1686,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, -5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,1687, -5936,5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951, -5952,1688,1689,5953,1199,5954,5955,5956,5957,5958,5959,5960,5961,1690,5962,5963, -5964,5965,5966,5967,5968,5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979, -5980,5981,1385,5982,1386,5983,5984,5985,5986,5987,5988,5989,5990,5991,5992,5993, -5994,5995,5996,5997,5998,5999,6000,6001,6002,6003,6004,6005,6006,6007,6008,6009, -6010,6011,6012,6013,6014,6015,6016,6017,6018,6019,6020,6021,6022,6023,6024,6025, -6026,6027,1265,6028,6029,1691,6030,6031,6032,6033,6034,6035,6036,6037,6038,6039, -6040,6041,6042,6043,6044,6045,6046,6047,6048,6049,6050,6051,6052,6053,6054,6055, -6056,6057,6058,6059,6060,6061,6062,6063,6064,6065,6066,6067,6068,6069,6070,6071, -6072,6073,6074,6075,6076,6077,6078,6079,6080,6081,6082,6083,6084,1692,6085,6086, -6087,6088,6089,6090,6091,6092,6093,6094,6095,6096,6097,6098,6099,6100,6101,6102, -6103,6104,6105,6106,6107,6108,6109,6110,6111,6112,6113,6114,6115,6116,6117,6118, -6119,6120,6121,6122,6123,6124,6125,6126,6127,6128,6129,6130,6131,1693,6132,6133, -6134,6135,6136,1694,6137,6138,6139,6140,6141,1695,6142,6143,6144,6145,6146,6147, -6148,6149,6150,6151,6152,6153,6154,6155,6156,6157,6158,6159,6160,6161,6162,6163, -6164,6165,6166,6167,6168,6169,6170,6171,6172,6173,6174,6175,6176,6177,6178,6179, -6180,6181,6182,6183,6184,6185,1696,6186,6187,6188,6189,6190,6191,6192,6193,6194, -6195,6196,6197,6198,6199,6200,6201,6202,6203,6204,6205,6206,6207,6208,6209,6210, -6211,6212,6213,6214,6215,6216,6217,6218,6219,1697,6220,6221,6222,6223,6224,6225, -6226,6227,6228,6229,6230,6231,6232,6233,6234,6235,6236,6237,6238,6239,6240,6241, -6242,6243,6244,6245,6246,6247,6248,6249,6250,6251,6252,6253,1698,6254,6255,6256, -6257,6258,6259,6260,6261,6262,6263,1200,6264,6265,6266,6267,6268,6269,6270,6271, #1024 -6272,6273,6274,6275,6276,6277,6278,6279,6280,6281,6282,6283,6284,6285,6286,6287, -6288,6289,6290,6291,6292,6293,6294,6295,6296,6297,6298,6299,6300,6301,6302,1699, -6303,6304,1700,6305,6306,6307,6308,6309,6310,6311,6312,6313,6314,6315,6316,6317, -6318,6319,6320,6321,6322,6323,6324,6325,6326,6327,6328,6329,6330,6331,6332,6333, -6334,6335,6336,6337,6338,6339,1701,6340,6341,6342,6343,6344,1387,6345,6346,6347, -6348,6349,6350,6351,6352,6353,6354,6355,6356,6357,6358,6359,6360,6361,6362,6363, -6364,6365,6366,6367,6368,6369,6370,6371,6372,6373,6374,6375,6376,6377,6378,6379, -6380,6381,6382,6383,6384,6385,6386,6387,6388,6389,6390,6391,6392,6393,6394,6395, -6396,6397,6398,6399,6400,6401,6402,6403,6404,6405,6406,6407,6408,6409,6410,6411, -6412,6413,1702,6414,6415,6416,6417,6418,6419,6420,6421,6422,1703,6423,6424,6425, -6426,6427,6428,6429,6430,6431,6432,6433,6434,6435,6436,6437,6438,1704,6439,6440, -6441,6442,6443,6444,6445,6446,6447,6448,6449,6450,6451,6452,6453,6454,6455,6456, -6457,6458,6459,6460,6461,6462,6463,6464,6465,6466,6467,6468,6469,6470,6471,6472, -6473,6474,6475,6476,6477,6478,6479,6480,6481,6482,6483,6484,6485,6486,6487,6488, -6489,6490,6491,6492,6493,6494,6495,6496,6497,6498,6499,6500,6501,6502,6503,1266, -6504,6505,6506,6507,6508,6509,6510,6511,6512,6513,6514,6515,6516,6517,6518,6519, -6520,6521,6522,6523,6524,6525,6526,6527,6528,6529,6530,6531,6532,6533,6534,6535, -6536,6537,6538,6539,6540,6541,6542,6543,6544,6545,6546,6547,6548,6549,6550,6551, -1705,1706,6552,6553,6554,6555,6556,6557,6558,6559,6560,6561,6562,6563,6564,6565, -6566,6567,6568,6569,6570,6571,6572,6573,6574,6575,6576,6577,6578,6579,6580,6581, -6582,6583,6584,6585,6586,6587,6588,6589,6590,6591,6592,6593,6594,6595,6596,6597, -6598,6599,6600,6601,6602,6603,6604,6605,6606,6607,6608,6609,6610,6611,6612,6613, -6614,6615,6616,6617,6618,6619,6620,6621,6622,6623,6624,6625,6626,6627,6628,6629, -6630,6631,6632,6633,6634,6635,6636,6637,1388,6638,6639,6640,6641,6642,6643,6644, -1707,6645,6646,6647,6648,6649,6650,6651,6652,6653,6654,6655,6656,6657,6658,6659, -6660,6661,6662,6663,1708,6664,6665,6666,6667,6668,6669,6670,6671,6672,6673,6674, -1201,6675,6676,6677,6678,6679,6680,6681,6682,6683,6684,6685,6686,6687,6688,6689, -6690,6691,6692,6693,6694,6695,6696,6697,6698,6699,6700,6701,6702,6703,6704,6705, -6706,6707,6708,6709,6710,6711,6712,6713,6714,6715,6716,6717,6718,6719,6720,6721, -6722,6723,6724,6725,1389,6726,6727,6728,6729,6730,6731,6732,6733,6734,6735,6736, -1390,1709,6737,6738,6739,6740,6741,6742,1710,6743,6744,6745,6746,1391,6747,6748, -6749,6750,6751,6752,6753,6754,6755,6756,6757,1392,6758,6759,6760,6761,6762,6763, -6764,6765,6766,6767,6768,6769,6770,6771,6772,6773,6774,6775,6776,6777,6778,6779, -6780,1202,6781,6782,6783,6784,6785,6786,6787,6788,6789,6790,6791,6792,6793,6794, -6795,6796,6797,6798,6799,6800,6801,6802,6803,6804,6805,6806,6807,6808,6809,1711, -6810,6811,6812,6813,6814,6815,6816,6817,6818,6819,6820,6821,6822,6823,6824,6825, -6826,6827,6828,6829,6830,6831,6832,6833,6834,6835,6836,1393,6837,6838,6839,6840, -6841,6842,6843,6844,6845,6846,6847,6848,6849,6850,6851,6852,6853,6854,6855,6856, -6857,6858,6859,6860,6861,6862,6863,6864,6865,6866,6867,6868,6869,6870,6871,6872, -6873,6874,6875,6876,6877,6878,6879,6880,6881,6882,6883,6884,6885,6886,6887,6888, -6889,6890,6891,6892,6893,6894,6895,6896,6897,6898,6899,6900,6901,6902,1712,6903, -6904,6905,6906,6907,6908,6909,6910,1713,6911,6912,6913,6914,6915,6916,6917,6918, -6919,6920,6921,6922,6923,6924,6925,6926,6927,6928,6929,6930,6931,6932,6933,6934, -6935,6936,6937,6938,6939,6940,6941,6942,6943,6944,6945,6946,6947,6948,6949,6950, -6951,6952,6953,6954,6955,6956,6957,6958,6959,6960,6961,6962,6963,6964,6965,6966, -6967,6968,6969,6970,6971,6972,6973,6974,1714,6975,6976,6977,6978,6979,6980,6981, -6982,6983,6984,6985,6986,6987,6988,1394,6989,6990,6991,6992,6993,6994,6995,6996, -6997,6998,6999,7000,1715,7001,7002,7003,7004,7005,7006,7007,7008,7009,7010,7011, -7012,7013,7014,7015,7016,7017,7018,7019,7020,7021,7022,7023,7024,7025,7026,7027, -7028,1716,7029,7030,7031,7032,7033,7034,7035,7036,7037,7038,7039,7040,7041,7042, -7043,7044,7045,7046,7047,7048,7049,7050,7051,7052,7053,7054,7055,7056,7057,7058, -7059,7060,7061,7062,7063,7064,7065,7066,7067,7068,7069,7070,7071,7072,7073,7074, -7075,7076,7077,7078,7079,7080,7081,7082,7083,7084,7085,7086,7087,7088,7089,7090, -7091,7092,7093,7094,7095,7096,7097,7098,7099,7100,7101,7102,7103,7104,7105,7106, -7107,7108,7109,7110,7111,7112,7113,7114,7115,7116,7117,7118,7119,7120,7121,7122, -7123,7124,7125,7126,7127,7128,7129,7130,7131,7132,7133,7134,7135,7136,7137,7138, -7139,7140,7141,7142,7143,7144,7145,7146,7147,7148,7149,7150,7151,7152,7153,7154, -7155,7156,7157,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167,7168,7169,7170, -7171,7172,7173,7174,7175,7176,7177,7178,7179,7180,7181,7182,7183,7184,7185,7186, -7187,7188,7189,7190,7191,7192,7193,7194,7195,7196,7197,7198,7199,7200,7201,7202, -7203,7204,7205,7206,7207,1395,7208,7209,7210,7211,7212,7213,1717,7214,7215,7216, -7217,7218,7219,7220,7221,7222,7223,7224,7225,7226,7227,7228,7229,7230,7231,7232, -7233,7234,7235,7236,7237,7238,7239,7240,7241,7242,7243,7244,7245,7246,7247,7248, -7249,7250,7251,7252,7253,7254,7255,7256,7257,7258,7259,7260,7261,7262,7263,7264, -7265,7266,7267,7268,7269,7270,7271,7272,7273,7274,7275,7276,7277,7278,7279,7280, -7281,7282,7283,7284,7285,7286,7287,7288,7289,7290,7291,7292,7293,7294,7295,7296, -7297,7298,7299,7300,7301,7302,7303,7304,7305,7306,7307,7308,7309,7310,7311,7312, -7313,1718,7314,7315,7316,7317,7318,7319,7320,7321,7322,7323,7324,7325,7326,7327, -7328,7329,7330,7331,7332,7333,7334,7335,7336,7337,7338,7339,7340,7341,7342,7343, -7344,7345,7346,7347,7348,7349,7350,7351,7352,7353,7354,7355,7356,7357,7358,7359, -7360,7361,7362,7363,7364,7365,7366,7367,7368,7369,7370,7371,7372,7373,7374,7375, -7376,7377,7378,7379,7380,7381,7382,7383,7384,7385,7386,7387,7388,7389,7390,7391, -7392,7393,7394,7395,7396,7397,7398,7399,7400,7401,7402,7403,7404,7405,7406,7407, -7408,7409,7410,7411,7412,7413,7414,7415,7416,7417,7418,7419,7420,7421,7422,7423, -7424,7425,7426,7427,7428,7429,7430,7431,7432,7433,7434,7435,7436,7437,7438,7439, -7440,7441,7442,7443,7444,7445,7446,7447,7448,7449,7450,7451,7452,7453,7454,7455, -7456,7457,7458,7459,7460,7461,7462,7463,7464,7465,7466,7467,7468,7469,7470,7471, -7472,7473,7474,7475,7476,7477,7478,7479,7480,7481,7482,7483,7484,7485,7486,7487, -7488,7489,7490,7491,7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,7503, -7504,7505,7506,7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519, -7520,7521,7522,7523,7524,7525,7526,7527,7528,7529,7530,7531,7532,7533,7534,7535, -7536,7537,7538,7539,7540,7541,7542,7543,7544,7545,7546,7547,7548,7549,7550,7551, -7552,7553,7554,7555,7556,7557,7558,7559,7560,7561,7562,7563,7564,7565,7566,7567, -7568,7569,7570,7571,7572,7573,7574,7575,7576,7577,7578,7579,7580,7581,7582,7583, -7584,7585,7586,7587,7588,7589,7590,7591,7592,7593,7594,7595,7596,7597,7598,7599, -7600,7601,7602,7603,7604,7605,7606,7607,7608,7609,7610,7611,7612,7613,7614,7615, -7616,7617,7618,7619,7620,7621,7622,7623,7624,7625,7626,7627,7628,7629,7630,7631, -7632,7633,7634,7635,7636,7637,7638,7639,7640,7641,7642,7643,7644,7645,7646,7647, -7648,7649,7650,7651,7652,7653,7654,7655,7656,7657,7658,7659,7660,7661,7662,7663, -7664,7665,7666,7667,7668,7669,7670,7671,7672,7673,7674,7675,7676,7677,7678,7679, -7680,7681,7682,7683,7684,7685,7686,7687,7688,7689,7690,7691,7692,7693,7694,7695, -7696,7697,7698,7699,7700,7701,7702,7703,7704,7705,7706,7707,7708,7709,7710,7711, -7712,7713,7714,7715,7716,7717,7718,7719,7720,7721,7722,7723,7724,7725,7726,7727, -7728,7729,7730,7731,7732,7733,7734,7735,7736,7737,7738,7739,7740,7741,7742,7743, -7744,7745,7746,7747,7748,7749,7750,7751,7752,7753,7754,7755,7756,7757,7758,7759, -7760,7761,7762,7763,7764,7765,7766,7767,7768,7769,7770,7771,7772,7773,7774,7775, -7776,7777,7778,7779,7780,7781,7782,7783,7784,7785,7786,7787,7788,7789,7790,7791, -7792,7793,7794,7795,7796,7797,7798,7799,7800,7801,7802,7803,7804,7805,7806,7807, -7808,7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823, -7824,7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839, -7840,7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855, -7856,7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871, -7872,7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887, -7888,7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903, -7904,7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919, -7920,7921,7922,7923,7924,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, -7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, -7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, -7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, -7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, -8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, -8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, -8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, -8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, -8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, -8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, -8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, -8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, -8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, -8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, -8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, -8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, -8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, -8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, -8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, -8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, -8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, -8272,8273,8274,8275,8276,8277,8278,8279,8280,8281,8282,8283,8284,8285,8286,8287, -8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303, -8304,8305,8306,8307,8308,8309,8310,8311,8312,8313,8314,8315,8316,8317,8318,8319, -8320,8321,8322,8323,8324,8325,8326,8327,8328,8329,8330,8331,8332,8333,8334,8335, -8336,8337,8338,8339,8340,8341,8342,8343,8344,8345,8346,8347,8348,8349,8350,8351, -8352,8353,8354,8355,8356,8357,8358,8359,8360,8361,8362,8363,8364,8365,8366,8367, -8368,8369,8370,8371,8372,8373,8374,8375,8376,8377,8378,8379,8380,8381,8382,8383, -8384,8385,8386,8387,8388,8389,8390,8391,8392,8393,8394,8395,8396,8397,8398,8399, -8400,8401,8402,8403,8404,8405,8406,8407,8408,8409,8410,8411,8412,8413,8414,8415, -8416,8417,8418,8419,8420,8421,8422,8423,8424,8425,8426,8427,8428,8429,8430,8431, -8432,8433,8434,8435,8436,8437,8438,8439,8440,8441,8442,8443,8444,8445,8446,8447, -8448,8449,8450,8451,8452,8453,8454,8455,8456,8457,8458,8459,8460,8461,8462,8463, -8464,8465,8466,8467,8468,8469,8470,8471,8472,8473,8474,8475,8476,8477,8478,8479, -8480,8481,8482,8483,8484,8485,8486,8487,8488,8489,8490,8491,8492,8493,8494,8495, -8496,8497,8498,8499,8500,8501,8502,8503,8504,8505,8506,8507,8508,8509,8510,8511, -8512,8513,8514,8515,8516,8517,8518,8519,8520,8521,8522,8523,8524,8525,8526,8527, -8528,8529,8530,8531,8532,8533,8534,8535,8536,8537,8538,8539,8540,8541,8542,8543, -8544,8545,8546,8547,8548,8549,8550,8551,8552,8553,8554,8555,8556,8557,8558,8559, -8560,8561,8562,8563,8564,8565,8566,8567,8568,8569,8570,8571,8572,8573,8574,8575, -8576,8577,8578,8579,8580,8581,8582,8583,8584,8585,8586,8587,8588,8589,8590,8591, -8592,8593,8594,8595,8596,8597,8598,8599,8600,8601,8602,8603,8604,8605,8606,8607, -8608,8609,8610,8611,8612,8613,8614,8615,8616,8617,8618,8619,8620,8621,8622,8623, -8624,8625,8626,8627,8628,8629,8630,8631,8632,8633,8634,8635,8636,8637,8638,8639, -8640,8641,8642,8643,8644,8645,8646,8647,8648,8649,8650,8651,8652,8653,8654,8655, -8656,8657,8658,8659,8660,8661,8662,8663,8664,8665,8666,8667,8668,8669,8670,8671, -8672,8673,8674,8675,8676,8677,8678,8679,8680,8681,8682,8683,8684,8685,8686,8687, -8688,8689,8690,8691,8692,8693,8694,8695,8696,8697,8698,8699,8700,8701,8702,8703, -8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719, -8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735, -8736,8737,8738,8739,8740,8741) - -# flake8: noqa diff --git a/lib/requests/packages/chardet/euckrprober.py b/lib/requests/packages/chardet/euckrprober.py deleted file mode 100755 index 5982a46b..00000000 --- a/lib/requests/packages/chardet/euckrprober.py +++ /dev/null @@ -1,42 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import EUCKRDistributionAnalysis -from .mbcssm import EUCKRSMModel - - -class EUCKRProber(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(EUCKRSMModel) - self._mDistributionAnalyzer = EUCKRDistributionAnalysis() - self.reset() - - def get_charset_name(self): - return "EUC-KR" diff --git a/lib/requests/packages/chardet/euctwfreq.py b/lib/requests/packages/chardet/euctwfreq.py deleted file mode 100755 index 576e7504..00000000 --- a/lib/requests/packages/chardet/euctwfreq.py +++ /dev/null @@ -1,428 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# EUCTW frequency table -# Converted from big5 work -# by Taiwan's Mandarin Promotion Council -# - -# 128 --> 0.42261 -# 256 --> 0.57851 -# 512 --> 0.74851 -# 1024 --> 0.89384 -# 2048 --> 0.97583 -# -# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98 -# Random Distribution Ration = 512/(5401-512)=0.105 -# -# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR - -EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75 - -# Char to FreqOrder table , -EUCTW_TABLE_SIZE = 8102 - -EUCTWCharToFreqOrder = ( - 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 -3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 -1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 - 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 -3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 -4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 -7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 - 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 - 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 - 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 -2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 -1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 -3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 - 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 -1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 -3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 -2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 - 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 -3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 -1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 -7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 - 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 -7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 -1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 - 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 - 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 -3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 -3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 - 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 -2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 -2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 - 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 - 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 -3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 -1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 -1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 -1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 -2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 - 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 -4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 -1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 -7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 -2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 - 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 - 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 - 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 - 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 -7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 - 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 -1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 - 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 - 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 -7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 -1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 - 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 -3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 -4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 -3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 - 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 - 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 -1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 -4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 -3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 -3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 -2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 -7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 -3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 -7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 -1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 -2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 -1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 - 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 -1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 -4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 -3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 - 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 - 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 - 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 -2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 -7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 -1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 -2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 -1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 -1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 -7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 -7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 -7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 -3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 -4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 -1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 -7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 -2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 -7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 -3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 -3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 -7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 -2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 -7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 - 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 -4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 -2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 -7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 -3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 -2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 -2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 - 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 -2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 -1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 -1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 -2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 -1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 -7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 -7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 -2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 -4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 -1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 -7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 - 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 -4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 - 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 -2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 - 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 -1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 -1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 - 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 -3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 -3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 -1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 -3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 -7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 -7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 -1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 -2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 -1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 -3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 -2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 -3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 -2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 -4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 -4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 -3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 - 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 -3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 - 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 -3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 -3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 -3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 -1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 -7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 - 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 -7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 -1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 - 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 -4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 -3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 - 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 -2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 -2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 -3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 -1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 -4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 -2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 -1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 -1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 -2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 -3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 -1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 -7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 -1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 -4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 -1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 - 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 -1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 -3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 -3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 -2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 -1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 -4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 - 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 -7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 -2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 -3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 -4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 - 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 -7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 -7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 -1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 -4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 -3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 -2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 -3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 -3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 -2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 -1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 -4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 -3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 -3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 -2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 -4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 -7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 -3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 -2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 -3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 -1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 -2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 -3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 -4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 -2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 -2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 -7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 -1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 -2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 -1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 -3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 -4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 -2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 -3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 -3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 -2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 -4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 -2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 -3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 -4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 -7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 -3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 - 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 -1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 -4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 -1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 -4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 -7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 - 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 -7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 -2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 -1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 -1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 -3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 - 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 - 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 - 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 -3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 -2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 - 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 -7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 -1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 -3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 -7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 -1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 -7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 -4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 -1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 -2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 -2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 -4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 - 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 - 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 -3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 -3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 -1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 -2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 -7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 -1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 -1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 -3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 - 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 -1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 -4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 -7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 -2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 -3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 - 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 -1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 -2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 -2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 -7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 -7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 -7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 -2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 -2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 -1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 -4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 -3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 -3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 -4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 -4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 -2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 -2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 -7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 -4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 -7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 -2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 -1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 -3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 -4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 -2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 - 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 -2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 -1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 -2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 -2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 -4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 -7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 -1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 -3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 -7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 -1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 -8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 -2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 -8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 -2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 -2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 -8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 -8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 -8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 - 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 -8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 -4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 -3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 -8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 -1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 -8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 - 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 -1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 - 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 -4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 -1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 -4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 -1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 - 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 -3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 -4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 -8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 - 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 -3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 - 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 -2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 -#Everything below is of no interest for detection purpose -2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, # 8118 -2493,3016,3734,8123,8124,2192,8125,8126,2162,8127,8128,8129,8130,8131,8132,8133, # 8134 -8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,8144,8145,8146,8147,8148,8149, # 8150 -8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,8160,8161,8162,8163,8164,8165, # 8166 -8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181, # 8182 -8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197, # 8198 -8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213, # 8214 -8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229, # 8230 -8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245, # 8246 -8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,8256,8257,8258,8259,8260,8261, # 8262 -8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,8272,8273,8274,8275,8276,8277, # 8278 -8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,8290,8291,8292,8293, # 8294 -8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,8308,8309, # 8310 -8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322,8323,8324,8325, # 8326 -8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337,8338,8339,8340,8341, # 8342 -8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353,8354,8355,8356,8357, # 8358 -8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,8368,8369,8370,8371,8372,8373, # 8374 -8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,8384,8385,8386,8387,8388,8389, # 8390 -8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,8400,8401,8402,8403,8404,8405, # 8406 -8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,8416,8417,8418,8419,8420,8421, # 8422 -8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,8432,8433,8434,8435,8436,8437, # 8438 -8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,8448,8449,8450,8451,8452,8453, # 8454 -8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,8464,8465,8466,8467,8468,8469, # 8470 -8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,8480,8481,8482,8483,8484,8485, # 8486 -8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501, # 8502 -8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517, # 8518 -8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533, # 8534 -8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549, # 8550 -8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,8565, # 8566 -8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,8576,8577,8578,8579,8580,8581, # 8582 -8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597, # 8598 -8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,8608,8609,8610,8611,8612,8613, # 8614 -8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,8624,8625,8626,8627,8628,8629, # 8630 -8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8644,8645, # 8646 -8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,8657,8658,8659,8660,8661, # 8662 -8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672,8673,8674,8675,8676,8677, # 8678 -8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, # 8694 -8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710 -8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726 -8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742 - -# flake8: noqa diff --git a/lib/requests/packages/chardet/euctwprober.py b/lib/requests/packages/chardet/euctwprober.py deleted file mode 100755 index fe652fe3..00000000 --- a/lib/requests/packages/chardet/euctwprober.py +++ /dev/null @@ -1,41 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import EUCTWDistributionAnalysis -from .mbcssm import EUCTWSMModel - -class EUCTWProber(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(EUCTWSMModel) - self._mDistributionAnalyzer = EUCTWDistributionAnalysis() - self.reset() - - def get_charset_name(self): - return "EUC-TW" diff --git a/lib/requests/packages/chardet/gb2312freq.py b/lib/requests/packages/chardet/gb2312freq.py deleted file mode 100755 index 1238f510..00000000 --- a/lib/requests/packages/chardet/gb2312freq.py +++ /dev/null @@ -1,472 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# GB2312 most frequently used character table -# -# Char to FreqOrder table , from hz6763 - -# 512 --> 0.79 -- 0.79 -# 1024 --> 0.92 -- 0.13 -# 2048 --> 0.98 -- 0.06 -# 6768 --> 1.00 -- 0.02 -# -# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79 -# Random Distribution Ration = 512 / (3755 - 512) = 0.157 -# -# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR - -GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9 - -GB2312_TABLE_SIZE = 3760 - -GB2312CharToFreqOrder = ( -1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205, -2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842, -2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409, - 249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670, -1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820, -1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585, - 152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566, -1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575, -2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853, -3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061, - 544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155, -1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406, - 927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816, -2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606, - 360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023, -2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414, -1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513, -3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052, - 198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570, -1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575, - 253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250, -2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506, -1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26, -3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835, -1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686, -2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054, -1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894, - 585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105, -3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403, -3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694, - 252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873, -3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940, - 836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121, -1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648, -3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992, -2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233, -1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157, - 755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807, -1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094, -4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258, - 887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478, -3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152, -3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909, - 509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272, -1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221, -2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252, -1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301, -1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254, - 389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070, -3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461, -3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360, -4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124, - 296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535, -3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243, -1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713, -1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071, -4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442, - 215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946, - 814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257, -3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180, -1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427, - 602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781, -1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724, -2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937, - 930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943, - 432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789, - 396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552, -3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246, -4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451, -3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310, - 750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860, -2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297, -2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780, -2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745, - 776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936, -2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032, - 968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657, - 163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414, - 220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976, -3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436, -2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254, -2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536, -1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238, - 18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059, -2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741, - 90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447, - 286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601, -1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269, -1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894, - 915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173, - 681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994, -1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956, -2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437, -3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154, -2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240, -2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143, -2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634, -3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472, -1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541, -1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143, -2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312, -1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414, -3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754, -1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424, -1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302, -3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739, - 795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004, -2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484, -1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739, -4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535, -1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641, -1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307, -3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573, -1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533, - 47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965, - 504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99, -1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280, - 160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505, -1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012, -1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039, - 744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982, -3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530, -4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392, -3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656, -2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220, -2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766, -1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535, -3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728, -2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338, -1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627, -1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885, - 125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411, -2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671, -2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162, -3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774, -4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524, -3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346, - 180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040, -3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188, -2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280, -1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131, - 259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947, - 774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970, -3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814, -4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557, -2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997, -1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972, -1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369, - 766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376, -1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480, -3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610, - 955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128, - 642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769, -1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207, - 57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392, -1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623, - 193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782, -2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650, - 158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478, -2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773, -2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007, -1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323, -1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598, -2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961, - 819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302, -1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409, -1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683, -2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191, -2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616, -3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302, -1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774, -4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147, - 571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731, - 845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464, -3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377, -1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315, - 470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557, -3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903, -1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060, -4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261, -1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092, -2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810, -1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708, - 498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658, -1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871, -3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503, - 448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229, -2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112, - 136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504, -1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389, -1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27, -1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542, -3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861, -2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845, -3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700, -3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469, -3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582, - 996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999, -2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274, - 786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020, -2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601, - 12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628, -1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31, - 475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668, - 233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778, -1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169, -3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667, -3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881, -1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276, -1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320, -3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751, -2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432, -2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772, -1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843, -3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116, - 451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904, -4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652, -1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664, -2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770, -3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283, -3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626, -1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713, - 768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333, - 391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062, -2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555, - 931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014, -1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510, - 386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015, -1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459, -1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390, -1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238, -1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232, -1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624, - 381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189, - 852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, # last 512 -#Everything below is of no interest for detection purpose -5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636, -5509,3257,5510,5973,5445,5872,4941,4403,3174,4627,5873,6276,2286,4230,5446,5874, -5122,6102,6103,4162,5447,5123,5323,4849,6277,3980,3851,5066,4246,5774,5067,6278, -3001,2807,5695,3346,5775,5974,5158,5448,6487,5975,5976,5776,3598,6279,5696,4806, -4211,4154,6280,6488,6489,6490,6281,4212,5037,3374,4171,6491,4562,4807,4722,4827, -5977,6104,4532,4079,5159,5324,5160,4404,3858,5359,5875,3975,4288,4610,3486,4512, -5325,3893,5360,6282,6283,5560,2522,4231,5978,5186,5449,2569,3878,6284,5401,3578, -4415,6285,4656,5124,5979,2506,4247,4449,3219,3417,4334,4969,4329,6492,4576,4828, -4172,4416,4829,5402,6286,3927,3852,5361,4369,4830,4477,4867,5876,4173,6493,6105, -4657,6287,6106,5877,5450,6494,4155,4868,5451,3700,5629,4384,6288,6289,5878,3189, -4881,6107,6290,6495,4513,6496,4692,4515,4723,5100,3356,6497,6291,3810,4080,5561, -3570,4430,5980,6498,4355,5697,6499,4724,6108,6109,3764,4050,5038,5879,4093,3226, -6292,5068,5217,4693,3342,5630,3504,4831,4377,4466,4309,5698,4431,5777,6293,5778, -4272,3706,6110,5326,3752,4676,5327,4273,5403,4767,5631,6500,5699,5880,3475,5039, -6294,5562,5125,4348,4301,4482,4068,5126,4593,5700,3380,3462,5981,5563,3824,5404, -4970,5511,3825,4738,6295,6501,5452,4516,6111,5881,5564,6502,6296,5982,6503,4213, -4163,3454,6504,6112,4009,4450,6113,4658,6297,6114,3035,6505,6115,3995,4904,4739, -4563,4942,4110,5040,3661,3928,5362,3674,6506,5292,3612,4791,5565,4149,5983,5328, -5259,5021,4725,4577,4564,4517,4364,6298,5405,4578,5260,4594,4156,4157,5453,3592, -3491,6507,5127,5512,4709,4922,5984,5701,4726,4289,6508,4015,6116,5128,4628,3424, -4241,5779,6299,4905,6509,6510,5454,5702,5780,6300,4365,4923,3971,6511,5161,3270, -3158,5985,4100, 867,5129,5703,6117,5363,3695,3301,5513,4467,6118,6512,5455,4232, -4242,4629,6513,3959,4478,6514,5514,5329,5986,4850,5162,5566,3846,4694,6119,5456, -4869,5781,3779,6301,5704,5987,5515,4710,6302,5882,6120,4392,5364,5705,6515,6121, -6516,6517,3736,5988,5457,5989,4695,2457,5883,4551,5782,6303,6304,6305,5130,4971, -6122,5163,6123,4870,3263,5365,3150,4871,6518,6306,5783,5069,5706,3513,3498,4409, -5330,5632,5366,5458,5459,3991,5990,4502,3324,5991,5784,3696,4518,5633,4119,6519, -4630,5634,4417,5707,4832,5992,3418,6124,5993,5567,4768,5218,6520,4595,3458,5367, -6125,5635,6126,4202,6521,4740,4924,6307,3981,4069,4385,6308,3883,2675,4051,3834, -4302,4483,5568,5994,4972,4101,5368,6309,5164,5884,3922,6127,6522,6523,5261,5460, -5187,4164,5219,3538,5516,4111,3524,5995,6310,6311,5369,3181,3386,2484,5188,3464, -5569,3627,5708,6524,5406,5165,4677,4492,6312,4872,4851,5885,4468,5996,6313,5709, -5710,6128,2470,5886,6314,5293,4882,5785,3325,5461,5101,6129,5711,5786,6525,4906, -6526,6527,4418,5887,5712,4808,2907,3701,5713,5888,6528,3765,5636,5331,6529,6530, -3593,5889,3637,4943,3692,5714,5787,4925,6315,6130,5462,4405,6131,6132,6316,5262, -6531,6532,5715,3859,5716,5070,4696,5102,3929,5788,3987,4792,5997,6533,6534,3920, -4809,5000,5998,6535,2974,5370,6317,5189,5263,5717,3826,6536,3953,5001,4883,3190, -5463,5890,4973,5999,4741,6133,6134,3607,5570,6000,4711,3362,3630,4552,5041,6318, -6001,2950,2953,5637,4646,5371,4944,6002,2044,4120,3429,6319,6537,5103,4833,6538, -6539,4884,4647,3884,6003,6004,4758,3835,5220,5789,4565,5407,6540,6135,5294,4697, -4852,6320,6321,3206,4907,6541,6322,4945,6542,6136,6543,6323,6005,4631,3519,6544, -5891,6545,5464,3784,5221,6546,5571,4659,6547,6324,6137,5190,6548,3853,6549,4016, -4834,3954,6138,5332,3827,4017,3210,3546,4469,5408,5718,3505,4648,5790,5131,5638, -5791,5465,4727,4318,6325,6326,5792,4553,4010,4698,3439,4974,3638,4335,3085,6006, -5104,5042,5166,5892,5572,6327,4356,4519,5222,5573,5333,5793,5043,6550,5639,5071, -4503,6328,6139,6551,6140,3914,3901,5372,6007,5640,4728,4793,3976,3836,4885,6552, -4127,6553,4451,4102,5002,6554,3686,5105,6555,5191,5072,5295,4611,5794,5296,6556, -5893,5264,5894,4975,5466,5265,4699,4976,4370,4056,3492,5044,4886,6557,5795,4432, -4769,4357,5467,3940,4660,4290,6141,4484,4770,4661,3992,6329,4025,4662,5022,4632, -4835,4070,5297,4663,4596,5574,5132,5409,5895,6142,4504,5192,4664,5796,5896,3885, -5575,5797,5023,4810,5798,3732,5223,4712,5298,4084,5334,5468,6143,4052,4053,4336, -4977,4794,6558,5335,4908,5576,5224,4233,5024,4128,5469,5225,4873,6008,5045,4729, -4742,4633,3675,4597,6559,5897,5133,5577,5003,5641,5719,6330,6560,3017,2382,3854, -4406,4811,6331,4393,3964,4946,6561,2420,3722,6562,4926,4378,3247,1736,4442,6332, -5134,6333,5226,3996,2918,5470,4319,4003,4598,4743,4744,4485,3785,3902,5167,5004, -5373,4394,5898,6144,4874,1793,3997,6334,4085,4214,5106,5642,4909,5799,6009,4419, -4189,3330,5899,4165,4420,5299,5720,5227,3347,6145,4081,6335,2876,3930,6146,3293, -3786,3910,3998,5900,5300,5578,2840,6563,5901,5579,6147,3531,5374,6564,6565,5580, -4759,5375,6566,6148,3559,5643,6336,6010,5517,6337,6338,5721,5902,3873,6011,6339, -6567,5518,3868,3649,5722,6568,4771,4947,6569,6149,4812,6570,2853,5471,6340,6341, -5644,4795,6342,6012,5723,6343,5724,6013,4349,6344,3160,6150,5193,4599,4514,4493, -5168,4320,6345,4927,3666,4745,5169,5903,5005,4928,6346,5725,6014,4730,4203,5046, -4948,3395,5170,6015,4150,6016,5726,5519,6347,5047,3550,6151,6348,4197,4310,5904, -6571,5581,2965,6152,4978,3960,4291,5135,6572,5301,5727,4129,4026,5905,4853,5728, -5472,6153,6349,4533,2700,4505,5336,4678,3583,5073,2994,4486,3043,4554,5520,6350, -6017,5800,4487,6351,3931,4103,5376,6352,4011,4321,4311,4190,5136,6018,3988,3233, -4350,5906,5645,4198,6573,5107,3432,4191,3435,5582,6574,4139,5410,6353,5411,3944, -5583,5074,3198,6575,6354,4358,6576,5302,4600,5584,5194,5412,6577,6578,5585,5413, -5303,4248,5414,3879,4433,6579,4479,5025,4854,5415,6355,4760,4772,3683,2978,4700, -3797,4452,3965,3932,3721,4910,5801,6580,5195,3551,5907,3221,3471,3029,6019,3999, -5908,5909,5266,5267,3444,3023,3828,3170,4796,5646,4979,4259,6356,5647,5337,3694, -6357,5648,5338,4520,4322,5802,3031,3759,4071,6020,5586,4836,4386,5048,6581,3571, -4679,4174,4949,6154,4813,3787,3402,3822,3958,3215,3552,5268,4387,3933,4950,4359, -6021,5910,5075,3579,6358,4234,4566,5521,6359,3613,5049,6022,5911,3375,3702,3178, -4911,5339,4521,6582,6583,4395,3087,3811,5377,6023,6360,6155,4027,5171,5649,4421, -4249,2804,6584,2270,6585,4000,4235,3045,6156,5137,5729,4140,4312,3886,6361,4330, -6157,4215,6158,3500,3676,4929,4331,3713,4930,5912,4265,3776,3368,5587,4470,4855, -3038,4980,3631,6159,6160,4132,4680,6161,6362,3923,4379,5588,4255,6586,4121,6587, -6363,4649,6364,3288,4773,4774,6162,6024,6365,3543,6588,4274,3107,3737,5050,5803, -4797,4522,5589,5051,5730,3714,4887,5378,4001,4523,6163,5026,5522,4701,4175,2791, -3760,6589,5473,4224,4133,3847,4814,4815,4775,3259,5416,6590,2738,6164,6025,5304, -3733,5076,5650,4816,5590,6591,6165,6592,3934,5269,6593,3396,5340,6594,5804,3445, -3602,4042,4488,5731,5732,3525,5591,4601,5196,6166,6026,5172,3642,4612,3202,4506, -4798,6366,3818,5108,4303,5138,5139,4776,3332,4304,2915,3415,4434,5077,5109,4856, -2879,5305,4817,6595,5913,3104,3144,3903,4634,5341,3133,5110,5651,5805,6167,4057, -5592,2945,4371,5593,6596,3474,4182,6367,6597,6168,4507,4279,6598,2822,6599,4777, -4713,5594,3829,6169,3887,5417,6170,3653,5474,6368,4216,2971,5228,3790,4579,6369, -5733,6600,6601,4951,4746,4555,6602,5418,5475,6027,3400,4665,5806,6171,4799,6028, -5052,6172,3343,4800,4747,5006,6370,4556,4217,5476,4396,5229,5379,5477,3839,5914, -5652,5807,4714,3068,4635,5808,6173,5342,4192,5078,5419,5523,5734,6174,4557,6175, -4602,6371,6176,6603,5809,6372,5735,4260,3869,5111,5230,6029,5112,6177,3126,4681, -5524,5915,2706,3563,4748,3130,6178,4018,5525,6604,6605,5478,4012,4837,6606,4534, -4193,5810,4857,3615,5479,6030,4082,3697,3539,4086,5270,3662,4508,4931,5916,4912, -5811,5027,3888,6607,4397,3527,3302,3798,2775,2921,2637,3966,4122,4388,4028,4054, -1633,4858,5079,3024,5007,3982,3412,5736,6608,3426,3236,5595,3030,6179,3427,3336, -3279,3110,6373,3874,3039,5080,5917,5140,4489,3119,6374,5812,3405,4494,6031,4666, -4141,6180,4166,6032,5813,4981,6609,5081,4422,4982,4112,3915,5653,3296,3983,6375, -4266,4410,5654,6610,6181,3436,5082,6611,5380,6033,3819,5596,4535,5231,5306,5113, -6612,4952,5918,4275,3113,6613,6376,6182,6183,5814,3073,4731,4838,5008,3831,6614, -4888,3090,3848,4280,5526,5232,3014,5655,5009,5737,5420,5527,6615,5815,5343,5173, -5381,4818,6616,3151,4953,6617,5738,2796,3204,4360,2989,4281,5739,5174,5421,5197, -3132,5141,3849,5142,5528,5083,3799,3904,4839,5480,2880,4495,3448,6377,6184,5271, -5919,3771,3193,6034,6035,5920,5010,6036,5597,6037,6378,6038,3106,5422,6618,5423, -5424,4142,6619,4889,5084,4890,4313,5740,6620,3437,5175,5307,5816,4199,5198,5529, -5817,5199,5656,4913,5028,5344,3850,6185,2955,5272,5011,5818,4567,4580,5029,5921, -3616,5233,6621,6622,6186,4176,6039,6379,6380,3352,5200,5273,2908,5598,5234,3837, -5308,6623,6624,5819,4496,4323,5309,5201,6625,6626,4983,3194,3838,4167,5530,5922, -5274,6381,6382,3860,3861,5599,3333,4292,4509,6383,3553,5481,5820,5531,4778,6187, -3955,3956,4324,4389,4218,3945,4325,3397,2681,5923,4779,5085,4019,5482,4891,5382, -5383,6040,4682,3425,5275,4094,6627,5310,3015,5483,5657,4398,5924,3168,4819,6628, -5925,6629,5532,4932,4613,6041,6630,4636,6384,4780,4204,5658,4423,5821,3989,4683, -5822,6385,4954,6631,5345,6188,5425,5012,5384,3894,6386,4490,4104,6632,5741,5053, -6633,5823,5926,5659,5660,5927,6634,5235,5742,5824,4840,4933,4820,6387,4859,5928, -4955,6388,4143,3584,5825,5346,5013,6635,5661,6389,5014,5484,5743,4337,5176,5662, -6390,2836,6391,3268,6392,6636,6042,5236,6637,4158,6638,5744,5663,4471,5347,3663, -4123,5143,4293,3895,6639,6640,5311,5929,5826,3800,6189,6393,6190,5664,5348,3554, -3594,4749,4603,6641,5385,4801,6043,5827,4183,6642,5312,5426,4761,6394,5665,6191, -4715,2669,6643,6644,5533,3185,5427,5086,5930,5931,5386,6192,6044,6645,4781,4013, -5745,4282,4435,5534,4390,4267,6045,5746,4984,6046,2743,6193,3501,4087,5485,5932, -5428,4184,4095,5747,4061,5054,3058,3862,5933,5600,6646,5144,3618,6395,3131,5055, -5313,6396,4650,4956,3855,6194,3896,5202,4985,4029,4225,6195,6647,5828,5486,5829, -3589,3002,6648,6397,4782,5276,6649,6196,6650,4105,3803,4043,5237,5830,6398,4096, -3643,6399,3528,6651,4453,3315,4637,6652,3984,6197,5535,3182,3339,6653,3096,2660, -6400,6654,3449,5934,4250,4236,6047,6401,5831,6655,5487,3753,4062,5832,6198,6199, -6656,3766,6657,3403,4667,6048,6658,4338,2897,5833,3880,2797,3780,4326,6659,5748, -5015,6660,5387,4351,5601,4411,6661,3654,4424,5935,4339,4072,5277,4568,5536,6402, -6662,5238,6663,5349,5203,6200,5204,6201,5145,4536,5016,5056,4762,5834,4399,4957, -6202,6403,5666,5749,6664,4340,6665,5936,5177,5667,6666,6667,3459,4668,6404,6668, -6669,4543,6203,6670,4276,6405,4480,5537,6671,4614,5205,5668,6672,3348,2193,4763, -6406,6204,5937,5602,4177,5669,3419,6673,4020,6205,4443,4569,5388,3715,3639,6407, -6049,4058,6206,6674,5938,4544,6050,4185,4294,4841,4651,4615,5488,6207,6408,6051, -5178,3241,3509,5835,6208,4958,5836,4341,5489,5278,6209,2823,5538,5350,5206,5429, -6675,4638,4875,4073,3516,4684,4914,4860,5939,5603,5389,6052,5057,3237,5490,3791, -6676,6409,6677,4821,4915,4106,5351,5058,4243,5539,4244,5604,4842,4916,5239,3028, -3716,5837,5114,5605,5390,5940,5430,6210,4332,6678,5540,4732,3667,3840,6053,4305, -3408,5670,5541,6410,2744,5240,5750,6679,3234,5606,6680,5607,5671,3608,4283,4159, -4400,5352,4783,6681,6411,6682,4491,4802,6211,6412,5941,6413,6414,5542,5751,6683, -4669,3734,5942,6684,6415,5943,5059,3328,4670,4144,4268,6685,6686,6687,6688,4372, -3603,6689,5944,5491,4373,3440,6416,5543,4784,4822,5608,3792,4616,5838,5672,3514, -5391,6417,4892,6690,4639,6691,6054,5673,5839,6055,6692,6056,5392,6212,4038,5544, -5674,4497,6057,6693,5840,4284,5675,4021,4545,5609,6418,4454,6419,6213,4113,4472, -5314,3738,5087,5279,4074,5610,4959,4063,3179,4750,6058,6420,6214,3476,4498,4716, -5431,4960,4685,6215,5241,6694,6421,6216,6695,5841,5945,6422,3748,5946,5179,3905, -5752,5545,5947,4374,6217,4455,6423,4412,6218,4803,5353,6696,3832,5280,6219,4327, -4702,6220,6221,6059,4652,5432,6424,3749,4751,6425,5753,4986,5393,4917,5948,5030, -5754,4861,4733,6426,4703,6697,6222,4671,5949,4546,4961,5180,6223,5031,3316,5281, -6698,4862,4295,4934,5207,3644,6427,5842,5950,6428,6429,4570,5843,5282,6430,6224, -5088,3239,6060,6699,5844,5755,6061,6431,2701,5546,6432,5115,5676,4039,3993,3327, -4752,4425,5315,6433,3941,6434,5677,4617,4604,3074,4581,6225,5433,6435,6226,6062, -4823,5756,5116,6227,3717,5678,4717,5845,6436,5679,5846,6063,5847,6064,3977,3354, -6437,3863,5117,6228,5547,5394,4499,4524,6229,4605,6230,4306,4500,6700,5951,6065, -3693,5952,5089,4366,4918,6701,6231,5548,6232,6702,6438,4704,5434,6703,6704,5953, -4168,6705,5680,3420,6706,5242,4407,6066,3812,5757,5090,5954,4672,4525,3481,5681, -4618,5395,5354,5316,5955,6439,4962,6707,4526,6440,3465,4673,6067,6441,5682,6708, -5435,5492,5758,5683,4619,4571,4674,4804,4893,4686,5493,4753,6233,6068,4269,6442, -6234,5032,4705,5146,5243,5208,5848,6235,6443,4963,5033,4640,4226,6236,5849,3387, -6444,6445,4436,4437,5850,4843,5494,4785,4894,6709,4361,6710,5091,5956,3331,6237, -4987,5549,6069,6711,4342,3517,4473,5317,6070,6712,6071,4706,6446,5017,5355,6713, -6714,4988,5436,6447,4734,5759,6715,4735,4547,4456,4754,6448,5851,6449,6450,3547, -5852,5318,6451,6452,5092,4205,6716,6238,4620,4219,5611,6239,6072,4481,5760,5957, -5958,4059,6240,6453,4227,4537,6241,5761,4030,4186,5244,5209,3761,4457,4876,3337, -5495,5181,6242,5959,5319,5612,5684,5853,3493,5854,6073,4169,5613,5147,4895,6074, -5210,6717,5182,6718,3830,6243,2798,3841,6075,6244,5855,5614,3604,4606,5496,5685, -5118,5356,6719,6454,5960,5357,5961,6720,4145,3935,4621,5119,5962,4261,6721,6455, -4786,5963,4375,4582,6245,6246,6247,6076,5437,4877,5856,3376,4380,6248,4160,6722, -5148,6456,5211,6457,6723,4718,6458,6724,6249,5358,4044,3297,6459,6250,5857,5615, -5497,5245,6460,5498,6725,6251,6252,5550,3793,5499,2959,5396,6461,6462,4572,5093, -5500,5964,3806,4146,6463,4426,5762,5858,6077,6253,4755,3967,4220,5965,6254,4989, -5501,6464,4352,6726,6078,4764,2290,5246,3906,5438,5283,3767,4964,2861,5763,5094, -6255,6256,4622,5616,5859,5860,4707,6727,4285,4708,4824,5617,6257,5551,4787,5212, -4965,4935,4687,6465,6728,6466,5686,6079,3494,4413,2995,5247,5966,5618,6729,5967, -5764,5765,5687,5502,6730,6731,6080,5397,6467,4990,6258,6732,4538,5060,5619,6733, -4719,5688,5439,5018,5149,5284,5503,6734,6081,4607,6259,5120,3645,5861,4583,6260, -4584,4675,5620,4098,5440,6261,4863,2379,3306,4585,5552,5689,4586,5285,6735,4864, -6736,5286,6082,6737,4623,3010,4788,4381,4558,5621,4587,4896,3698,3161,5248,4353, -4045,6262,3754,5183,4588,6738,6263,6739,6740,5622,3936,6741,6468,6742,6264,5095, -6469,4991,5968,6743,4992,6744,6083,4897,6745,4256,5766,4307,3108,3968,4444,5287, -3889,4343,6084,4510,6085,4559,6086,4898,5969,6746,5623,5061,4919,5249,5250,5504, -5441,6265,5320,4878,3242,5862,5251,3428,6087,6747,4237,5624,5442,6266,5553,4539, -6748,2585,3533,5398,4262,6088,5150,4736,4438,6089,6267,5505,4966,6749,6268,6750, -6269,5288,5554,3650,6090,6091,4624,6092,5690,6751,5863,4270,5691,4277,5555,5864, -6752,5692,4720,4865,6470,5151,4688,4825,6753,3094,6754,6471,3235,4653,6755,5213, -5399,6756,3201,4589,5865,4967,6472,5866,6473,5019,3016,6757,5321,4756,3957,4573, -6093,4993,5767,4721,6474,6758,5625,6759,4458,6475,6270,6760,5556,4994,5214,5252, -6271,3875,5768,6094,5034,5506,4376,5769,6761,2120,6476,5253,5770,6762,5771,5970, -3990,5971,5557,5558,5772,6477,6095,2787,4641,5972,5121,6096,6097,6272,6763,3703, -5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978, -4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767) - -# flake8: noqa diff --git a/lib/requests/packages/chardet/gb2312prober.py b/lib/requests/packages/chardet/gb2312prober.py deleted file mode 100755 index 0325a2d8..00000000 --- a/lib/requests/packages/chardet/gb2312prober.py +++ /dev/null @@ -1,41 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import GB2312DistributionAnalysis -from .mbcssm import GB2312SMModel - -class GB2312Prober(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(GB2312SMModel) - self._mDistributionAnalyzer = GB2312DistributionAnalysis() - self.reset() - - def get_charset_name(self): - return "GB2312" diff --git a/lib/requests/packages/chardet/hebrewprober.py b/lib/requests/packages/chardet/hebrewprober.py deleted file mode 100755 index ba225c5e..00000000 --- a/lib/requests/packages/chardet/hebrewprober.py +++ /dev/null @@ -1,283 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Shy Shalom -# Portions created by the Initial Developer are Copyright (C) 2005 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .charsetprober import CharSetProber -from .constants import eNotMe, eDetecting -from .compat import wrap_ord - -# This prober doesn't actually recognize a language or a charset. -# It is a helper prober for the use of the Hebrew model probers - -### General ideas of the Hebrew charset recognition ### -# -# Four main charsets exist in Hebrew: -# "ISO-8859-8" - Visual Hebrew -# "windows-1255" - Logical Hebrew -# "ISO-8859-8-I" - Logical Hebrew -# "x-mac-hebrew" - ?? Logical Hebrew ?? -# -# Both "ISO" charsets use a completely identical set of code points, whereas -# "windows-1255" and "x-mac-hebrew" are two different proper supersets of -# these code points. windows-1255 defines additional characters in the range -# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific -# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6. -# x-mac-hebrew defines similar additional code points but with a different -# mapping. -# -# As far as an average Hebrew text with no diacritics is concerned, all four -# charsets are identical with respect to code points. Meaning that for the -# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters -# (including final letters). -# -# The dominant difference between these charsets is their directionality. -# "Visual" directionality means that the text is ordered as if the renderer is -# not aware of a BIDI rendering algorithm. The renderer sees the text and -# draws it from left to right. The text itself when ordered naturally is read -# backwards. A buffer of Visual Hebrew generally looks like so: -# "[last word of first line spelled backwards] [whole line ordered backwards -# and spelled backwards] [first word of first line spelled backwards] -# [end of line] [last word of second line] ... etc' " -# adding punctuation marks, numbers and English text to visual text is -# naturally also "visual" and from left to right. -# -# "Logical" directionality means the text is ordered "naturally" according to -# the order it is read. It is the responsibility of the renderer to display -# the text from right to left. A BIDI algorithm is used to place general -# punctuation marks, numbers and English text in the text. -# -# Texts in x-mac-hebrew are almost impossible to find on the Internet. From -# what little evidence I could find, it seems that its general directionality -# is Logical. -# -# To sum up all of the above, the Hebrew probing mechanism knows about two -# charsets: -# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are -# backwards while line order is natural. For charset recognition purposes -# the line order is unimportant (In fact, for this implementation, even -# word order is unimportant). -# Logical Hebrew - "windows-1255" - normal, naturally ordered text. -# -# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be -# specifically identified. -# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew -# that contain special punctuation marks or diacritics is displayed with -# some unconverted characters showing as question marks. This problem might -# be corrected using another model prober for x-mac-hebrew. Due to the fact -# that x-mac-hebrew texts are so rare, writing another model prober isn't -# worth the effort and performance hit. -# -#### The Prober #### -# -# The prober is divided between two SBCharSetProbers and a HebrewProber, -# all of which are managed, created, fed data, inquired and deleted by the -# SBCSGroupProber. The two SBCharSetProbers identify that the text is in -# fact some kind of Hebrew, Logical or Visual. The final decision about which -# one is it is made by the HebrewProber by combining final-letter scores -# with the scores of the two SBCharSetProbers to produce a final answer. -# -# The SBCSGroupProber is responsible for stripping the original text of HTML -# tags, English characters, numbers, low-ASCII punctuation characters, spaces -# and new lines. It reduces any sequence of such characters to a single space. -# The buffer fed to each prober in the SBCS group prober is pure text in -# high-ASCII. -# The two SBCharSetProbers (model probers) share the same language model: -# Win1255Model. -# The first SBCharSetProber uses the model normally as any other -# SBCharSetProber does, to recognize windows-1255, upon which this model was -# built. The second SBCharSetProber is told to make the pair-of-letter -# lookup in the language model backwards. This in practice exactly simulates -# a visual Hebrew model using the windows-1255 logical Hebrew model. -# -# The HebrewProber is not using any language model. All it does is look for -# final-letter evidence suggesting the text is either logical Hebrew or visual -# Hebrew. Disjointed from the model probers, the results of the HebrewProber -# alone are meaningless. HebrewProber always returns 0.00 as confidence -# since it never identifies a charset by itself. Instead, the pointer to the -# HebrewProber is passed to the model probers as a helper "Name Prober". -# When the Group prober receives a positive identification from any prober, -# it asks for the name of the charset identified. If the prober queried is a -# Hebrew model prober, the model prober forwards the call to the -# HebrewProber to make the final decision. In the HebrewProber, the -# decision is made according to the final-letters scores maintained and Both -# model probers scores. The answer is returned in the form of the name of the -# charset identified, either "windows-1255" or "ISO-8859-8". - -# windows-1255 / ISO-8859-8 code points of interest -FINAL_KAF = 0xea -NORMAL_KAF = 0xeb -FINAL_MEM = 0xed -NORMAL_MEM = 0xee -FINAL_NUN = 0xef -NORMAL_NUN = 0xf0 -FINAL_PE = 0xf3 -NORMAL_PE = 0xf4 -FINAL_TSADI = 0xf5 -NORMAL_TSADI = 0xf6 - -# Minimum Visual vs Logical final letter score difference. -# If the difference is below this, don't rely solely on the final letter score -# distance. -MIN_FINAL_CHAR_DISTANCE = 5 - -# Minimum Visual vs Logical model score difference. -# If the difference is below this, don't rely at all on the model score -# distance. -MIN_MODEL_DISTANCE = 0.01 - -VISUAL_HEBREW_NAME = "ISO-8859-8" -LOGICAL_HEBREW_NAME = "windows-1255" - - -class HebrewProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mLogicalProber = None - self._mVisualProber = None - self.reset() - - def reset(self): - self._mFinalCharLogicalScore = 0 - self._mFinalCharVisualScore = 0 - # The two last characters seen in the previous buffer, - # mPrev and mBeforePrev are initialized to space in order to simulate - # a word delimiter at the beginning of the data - self._mPrev = ' ' - self._mBeforePrev = ' ' - # These probers are owned by the group prober. - - def set_model_probers(self, logicalProber, visualProber): - self._mLogicalProber = logicalProber - self._mVisualProber = visualProber - - def is_final(self, c): - return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, - FINAL_TSADI] - - def is_non_final(self, c): - # The normal Tsadi is not a good Non-Final letter due to words like - # 'lechotet' (to chat) containing an apostrophe after the tsadi. This - # apostrophe is converted to a space in FilterWithoutEnglishLetters - # causing the Non-Final tsadi to appear at an end of a word even - # though this is not the case in the original text. - # The letters Pe and Kaf rarely display a related behavior of not being - # a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' - # for example legally end with a Non-Final Pe or Kaf. However, the - # benefit of these letters as Non-Final letters outweighs the damage - # since these words are quite rare. - return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE] - - def feed(self, aBuf): - # Final letter analysis for logical-visual decision. - # Look for evidence that the received buffer is either logical Hebrew - # or visual Hebrew. - # The following cases are checked: - # 1) A word longer than 1 letter, ending with a final letter. This is - # an indication that the text is laid out "naturally" since the - # final letter really appears at the end. +1 for logical score. - # 2) A word longer than 1 letter, ending with a Non-Final letter. In - # normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, - # should not end with the Non-Final form of that letter. Exceptions - # to this rule are mentioned above in isNonFinal(). This is an - # indication that the text is laid out backwards. +1 for visual - # score - # 3) A word longer than 1 letter, starting with a final letter. Final - # letters should not appear at the beginning of a word. This is an - # indication that the text is laid out backwards. +1 for visual - # score. - # - # The visual score and logical score are accumulated throughout the - # text and are finally checked against each other in GetCharSetName(). - # No checking for final letters in the middle of words is done since - # that case is not an indication for either Logical or Visual text. - # - # We automatically filter out all 7-bit characters (replace them with - # spaces) so the word boundary detection works properly. [MAP] - - if self.get_state() == eNotMe: - # Both model probers say it's not them. No reason to continue. - return eNotMe - - aBuf = self.filter_high_bit_only(aBuf) - - for cur in aBuf: - if cur == ' ': - # We stand on a space - a word just ended - if self._mBeforePrev != ' ': - # next-to-last char was not a space so self._mPrev is not a - # 1 letter word - if self.is_final(self._mPrev): - # case (1) [-2:not space][-1:final letter][cur:space] - self._mFinalCharLogicalScore += 1 - elif self.is_non_final(self._mPrev): - # case (2) [-2:not space][-1:Non-Final letter][ - # cur:space] - self._mFinalCharVisualScore += 1 - else: - # Not standing on a space - if ((self._mBeforePrev == ' ') and - (self.is_final(self._mPrev)) and (cur != ' ')): - # case (3) [-2:space][-1:final letter][cur:not space] - self._mFinalCharVisualScore += 1 - self._mBeforePrev = self._mPrev - self._mPrev = cur - - # Forever detecting, till the end or until both model probers return - # eNotMe (handled above) - return eDetecting - - def get_charset_name(self): - # Make the decision: is it Logical or Visual? - # If the final letter score distance is dominant enough, rely on it. - finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore - if finalsub >= MIN_FINAL_CHAR_DISTANCE: - return LOGICAL_HEBREW_NAME - if finalsub <= -MIN_FINAL_CHAR_DISTANCE: - return VISUAL_HEBREW_NAME - - # It's not dominant enough, try to rely on the model scores instead. - modelsub = (self._mLogicalProber.get_confidence() - - self._mVisualProber.get_confidence()) - if modelsub > MIN_MODEL_DISTANCE: - return LOGICAL_HEBREW_NAME - if modelsub < -MIN_MODEL_DISTANCE: - return VISUAL_HEBREW_NAME - - # Still no good, back to final letter distance, maybe it'll save the - # day. - if finalsub < 0.0: - return VISUAL_HEBREW_NAME - - # (finalsub > 0 - Logical) or (don't know what to do) default to - # Logical. - return LOGICAL_HEBREW_NAME - - def get_state(self): - # Remain active as long as any of the model probers are active. - if (self._mLogicalProber.get_state() == eNotMe) and \ - (self._mVisualProber.get_state() == eNotMe): - return eNotMe - return eDetecting diff --git a/lib/requests/packages/chardet/jisfreq.py b/lib/requests/packages/chardet/jisfreq.py deleted file mode 100755 index 064345b0..00000000 --- a/lib/requests/packages/chardet/jisfreq.py +++ /dev/null @@ -1,569 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# Sampling from about 20M text materials include literature and computer technology -# -# Japanese frequency table, applied to both S-JIS and EUC-JP -# They are sorted in order. - -# 128 --> 0.77094 -# 256 --> 0.85710 -# 512 --> 0.92635 -# 1024 --> 0.97130 -# 2048 --> 0.99431 -# -# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58 -# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191 -# -# Typical Distribution Ratio, 25% of IDR - -JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0 - -# Char to FreqOrder table , -JIS_TABLE_SIZE = 4368 - -JISCharToFreqOrder = ( - 40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16 -3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32 -1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48 -2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64 -2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80 -5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96 -1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112 -5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128 -5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144 -5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160 -5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176 -5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192 -5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208 -1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224 -1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240 -1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256 -2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272 -3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288 -3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304 - 4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320 - 12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336 -1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352 - 109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368 -5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384 - 271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400 - 32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416 - 43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432 - 280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448 - 54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464 -5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480 -5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496 -5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512 -4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528 -5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544 -5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560 -5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576 -5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592 -5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608 -5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624 -5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640 -5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656 -5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672 -3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688 -5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704 -5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720 -5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736 -5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752 -5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768 -5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784 -5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800 -5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816 -5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832 -5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848 -5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864 -5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880 -5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896 -5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912 -5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928 -5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944 -5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960 -5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976 -5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992 -5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008 -5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024 -5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040 -5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056 -5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072 -5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088 -5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104 -5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120 -5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136 -5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152 -5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168 -5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184 -5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200 -5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216 -5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232 -5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248 -5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264 -5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280 -5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296 -6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312 -6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328 -6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344 -6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360 -6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376 -6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392 -6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408 -6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424 -4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440 - 854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456 - 665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472 -1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488 -1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504 - 896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520 -3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536 -3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552 - 804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568 -3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584 -3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600 - 586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616 -2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632 - 277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648 -3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664 -1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680 - 380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696 -1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712 - 850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728 -2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744 -2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760 -2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776 -2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792 -1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808 -1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824 -1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840 -1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856 -2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872 -1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888 -2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904 -1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920 -1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936 -1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952 -1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968 -1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984 -1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000 - 606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016 - 684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032 -1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048 -2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064 -2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080 -2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096 -3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112 -3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128 - 884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144 -3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160 -1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176 - 861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192 -2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208 -1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224 - 576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240 -3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256 -4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272 -2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288 -1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304 -2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320 -1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336 - 385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352 - 178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368 -1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384 -2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400 -2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416 -2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432 -3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448 -1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464 -2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480 - 359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496 - 837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512 - 855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528 -1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544 -2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560 - 633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576 -1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592 -1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608 - 353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624 -1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640 -1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656 -1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672 - 764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688 -2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704 - 278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720 -2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736 -3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752 -2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768 -1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784 -6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800 -1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816 -2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832 -1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848 - 470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864 - 72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880 -3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896 -3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912 -1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928 -1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944 -1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960 -1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976 - 123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992 - 913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008 -2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024 - 900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040 -3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056 -2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072 - 423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088 -1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104 -2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120 - 220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136 -1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152 - 745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168 -4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184 -2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200 -1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216 - 666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232 -1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248 -2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264 - 376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280 -6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296 -1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312 -1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328 -2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344 -3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360 - 914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376 -3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392 -1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408 - 674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424 -1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440 - 199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456 -3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472 - 370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488 -2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504 - 414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520 -4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536 -2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552 -1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568 -1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584 -1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600 - 166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616 -1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632 -3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648 -1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664 -3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680 - 264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696 - 543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712 - 983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728 -2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744 -1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760 - 867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776 -1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792 - 894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808 -1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824 - 530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840 - 839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856 - 480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872 -1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888 -1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904 -2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920 -4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936 - 227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952 -1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968 - 328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984 -1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000 -3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016 -1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032 -2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048 -2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064 -1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080 -1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096 -2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112 - 455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128 -2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144 -1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160 -1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176 -1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192 -1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208 -3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224 -2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240 -2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256 - 575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272 -3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288 -3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304 -1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320 -2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336 -1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352 -2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512 -#Everything below is of no interest for detection purpose -2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, # 4384 -6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, # 4400 -6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, # 4416 -6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, # 4432 -6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, # 4448 -4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, # 4464 -4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, # 4480 -3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, # 4496 -3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, # 4512 -4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, # 4528 -3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, # 4544 -6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, # 4560 -4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, # 4576 -6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, # 4592 -6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, # 4608 -6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, # 4624 -6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, # 4640 -6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, # 4656 -6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, # 4672 -3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, # 4688 -3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, # 4704 -6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, # 4720 -2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, # 4736 -4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, # 4752 -4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, # 4768 -4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, # 4784 -6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, # 4800 -3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, # 4816 -4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, # 4832 -4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, # 4848 -6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, # 4864 -4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, # 4880 -6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, # 4896 -3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, # 4912 -2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, # 4928 -4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, # 4944 -2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, # 4960 -6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, # 4976 -4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, # 4992 -6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, # 5008 -6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, # 5024 -6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, # 5040 -4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, # 5056 -6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, # 5072 -2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, # 5088 -6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, # 5104 -4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, # 5120 -6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, # 5136 -4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, # 5152 -4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, # 5168 -6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, # 5184 -6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, # 5200 -6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, # 5216 -3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, # 5232 -1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, # 5248 -3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, # 5264 -3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, # 5280 -4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, # 5296 -6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, # 5312 -3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, # 5328 -6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, # 5344 -3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, # 5360 -3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, # 5376 -2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, # 5392 -6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, # 5408 -6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, # 5424 -3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, # 5440 -6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, # 5456 -3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, # 5472 -6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, # 5488 -6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, # 5504 -6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, # 5520 -4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, # 5536 -6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, # 5552 -4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, # 5568 -3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, # 5584 -3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, # 5600 -6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, # 5616 -6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, # 5632 -4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, # 5648 -6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, # 5664 -6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, # 5680 -6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, # 5696 -6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, # 5712 -6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, # 5728 -6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, # 5744 -4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, # 5760 -4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, # 5776 -3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, # 5792 -6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, # 5808 -4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, # 5824 -2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, # 5840 -6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, # 5856 -6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, # 5872 -4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, # 5888 -2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, # 5904 -4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, # 5920 -2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, # 5936 -4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, # 5952 -4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, # 5968 -4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, # 5984 -6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, # 6000 -3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, # 6016 -6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, # 6032 -3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, # 6048 -6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, # 6064 -2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, # 6080 -3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, # 6096 -7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, # 6112 -2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, # 6128 -3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, # 6144 -3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, # 6160 -3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, # 6176 -3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, # 6192 -7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, # 6208 -7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, # 6224 -7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, # 6240 -7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, # 6256 -7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, # 6272 -4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, # 6288 -3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, # 6304 -3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, # 6320 -4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, # 6336 -3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, # 6352 -3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, # 6368 -7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, # 6384 -4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, # 6400 -7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, # 6416 -7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, # 6432 -7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, # 6448 -7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, # 6464 -7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, # 6480 -4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, # 6496 -4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, # 6512 -7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, # 6528 -3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, # 6544 -4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, # 6560 -7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, # 6576 -7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, # 6592 -4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, # 6608 -3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, # 6624 -3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, # 6640 -7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, # 6656 -4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, # 6672 -4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, # 6688 -4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, # 6704 -4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, # 6720 -4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, # 6736 -4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, # 6752 -7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, # 6768 -7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, # 6784 -7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, # 6800 -7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, # 6816 -7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, # 6832 -2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, # 6848 -3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, # 6864 -7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, # 6880 -7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, # 6896 -3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, # 6912 -4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, # 6928 -3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, # 6944 -3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, # 6960 -2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, # 6976 -7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, # 6992 -7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, # 7008 -4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, # 7024 -3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, # 7040 -3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, # 7056 -7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, # 7072 -7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, # 7088 -7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, # 7104 -4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, # 7120 -7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, # 7136 -2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, # 7152 -3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, # 7168 -4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, # 7184 -7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, # 7200 -4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, # 7216 -4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, # 7232 -7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, # 7248 -7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, # 7264 -5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, # 7280 -7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, # 7296 -7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, # 7312 -7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, # 7328 -7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, # 7344 -7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, # 7360 -5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, # 7376 -5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, # 7392 -7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, # 7408 -3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, # 7424 -7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, # 7440 -7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, # 7456 -3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, # 7472 -7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, # 7488 -7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, # 7504 -1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, # 7520 -3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, # 7536 -4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, # 7552 -2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, # 7568 -3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, # 7584 -2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, # 7600 -5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, # 7616 -4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, # 7632 -4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, # 7648 -5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, # 7664 -7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, # 7680 -7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, # 7696 -7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, # 7712 -7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, # 7728 -3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, # 7744 -7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, # 7760 -3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, # 7776 -7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, # 7792 -4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, # 7808 -7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, # 7824 -7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7840 -7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, # 7856 -7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, # 7872 -7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, # 7888 -7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, # 7904 -7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, # 7920 -7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, # 7936 -7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, # 7952 -7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, # 7968 -7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, # 7984 -7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, # 8000 -8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, # 8016 -8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, # 8032 -8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, # 8048 -8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, # 8064 -8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, # 8080 -8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, # 8096 -8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, # 8112 -8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, # 8128 -8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, # 8144 -8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, # 8160 -8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, # 8176 -8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, # 8192 -8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, # 8208 -8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, # 8224 -8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240 -8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256 -8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272 - -# flake8: noqa diff --git a/lib/requests/packages/chardet/jpcntx.py b/lib/requests/packages/chardet/jpcntx.py deleted file mode 100755 index 59aeb6a8..00000000 --- a/lib/requests/packages/chardet/jpcntx.py +++ /dev/null @@ -1,227 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .compat import wrap_ord - -NUM_OF_CATEGORY = 6 -DONT_KNOW = -1 -ENOUGH_REL_THRESHOLD = 100 -MAX_REL_THRESHOLD = 1000 -MINIMUM_DATA_THRESHOLD = 4 - -# This is hiragana 2-char sequence table, the number in each cell represents its frequency category -jp2CharContext = ( -(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), -(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), -(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), -(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), -(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), -(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), -(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), -(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), -(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), -(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), -(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), -(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), -(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), -(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), -(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), -(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), -(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), -(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), -(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), -(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), -(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), -(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), -(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), -(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), -(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), -(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), -(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), -(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), -(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), -(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), -(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), -(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), -(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), -(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), -(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), -(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), -(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), -(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), -(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), -(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), -(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), -(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), -(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), -(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), -(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), -(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), -(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), -(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), -(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), -(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), -(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), -(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), -(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), -(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), -(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), -(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), -(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), -(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), -(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), -(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), -(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), -(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), -(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), -(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), -(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), -(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), -(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), -(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), -(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), -(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), -(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), -(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), -(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), -(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), -) - -class JapaneseContextAnalysis: - def __init__(self): - self.reset() - - def reset(self): - self._mTotalRel = 0 # total sequence received - # category counters, each interger counts sequence in its category - self._mRelSample = [0] * NUM_OF_CATEGORY - # if last byte in current buffer is not the last byte of a character, - # we need to know how many bytes to skip in next buffer - self._mNeedToSkipCharNum = 0 - self._mLastCharOrder = -1 # The order of previous char - # If this flag is set to True, detection is done and conclusion has - # been made - self._mDone = False - - def feed(self, aBuf, aLen): - if self._mDone: - return - - # The buffer we got is byte oriented, and a character may span in more than one - # buffers. In case the last one or two byte in last buffer is not - # complete, we record how many byte needed to complete that character - # and skip these bytes here. We can choose to record those bytes as - # well and analyse the character once it is complete, but since a - # character will not make much difference, by simply skipping - # this character will simply our logic and improve performance. - i = self._mNeedToSkipCharNum - while i < aLen: - order, charLen = self.get_order(aBuf[i:i + 2]) - i += charLen - if i > aLen: - self._mNeedToSkipCharNum = i - aLen - self._mLastCharOrder = -1 - else: - if (order != -1) and (self._mLastCharOrder != -1): - self._mTotalRel += 1 - if self._mTotalRel > MAX_REL_THRESHOLD: - self._mDone = True - break - self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1 - self._mLastCharOrder = order - - def got_enough_data(self): - return self._mTotalRel > ENOUGH_REL_THRESHOLD - - def get_confidence(self): - # This is just one way to calculate confidence. It works well for me. - if self._mTotalRel > MINIMUM_DATA_THRESHOLD: - return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel - else: - return DONT_KNOW - - def get_order(self, aBuf): - return -1, 1 - -class SJISContextAnalysis(JapaneseContextAnalysis): - def __init__(self): - self.charset_name = "SHIFT_JIS" - - def get_charset_name(self): - return self.charset_name - - def get_order(self, aBuf): - if not aBuf: - return -1, 1 - # find out current char's byte length - first_char = wrap_ord(aBuf[0]) - if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)): - charLen = 2 - if (first_char == 0x87) or (0xFA <= first_char <= 0xFC): - self.charset_name = "CP932" - else: - charLen = 1 - - # return its order if it is hiragana - if len(aBuf) > 1: - second_char = wrap_ord(aBuf[1]) - if (first_char == 202) and (0x9F <= second_char <= 0xF1): - return second_char - 0x9F, charLen - - return -1, charLen - -class EUCJPContextAnalysis(JapaneseContextAnalysis): - def get_order(self, aBuf): - if not aBuf: - return -1, 1 - # find out current char's byte length - first_char = wrap_ord(aBuf[0]) - if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE): - charLen = 2 - elif first_char == 0x8F: - charLen = 3 - else: - charLen = 1 - - # return its order if it is hiragana - if len(aBuf) > 1: - second_char = wrap_ord(aBuf[1]) - if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3): - return second_char - 0xA1, charLen - - return -1, charLen - -# flake8: noqa diff --git a/lib/requests/packages/chardet/langbulgarianmodel.py b/lib/requests/packages/chardet/langbulgarianmodel.py deleted file mode 100755 index e5788fc6..00000000 --- a/lib/requests/packages/chardet/langbulgarianmodel.py +++ /dev/null @@ -1,229 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# 255: Control characters that usually does not exist in any text -# 254: Carriage/Return -# 253: symbol (punctuation) that does not belong to word -# 252: 0 - 9 - -# Character Mapping Table: -# this table is modified base on win1251BulgarianCharToOrderMap, so -# only number <64 is sure valid - -Latin5_BulgarianCharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 -110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 -253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 -116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 -194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, # 80 -210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, # 90 - 81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, # a0 - 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # b0 - 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, # c0 - 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # d0 - 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, # e0 - 62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, # f0 -) - -win1251BulgarianCharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, # 40 -110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, # 50 -253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, # 60 -116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, # 70 -206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, # 80 -221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, # 90 - 88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, # a0 - 73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, # b0 - 31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, # c0 - 39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, # d0 - 1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, # e0 - 7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, # f0 -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 96.9392% -# first 1024 sequences:3.0618% -# rest sequences: 0.2992% -# negative sequences: 0.0020% -BulgarianLangModel = ( -0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, -3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1, -0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0, -0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0, -0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0, -1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0, -0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0, -0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3, -2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1, -3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1, -3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2, -1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0, -3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1, -1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0, -2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2, -2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0, -3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2, -1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0, -2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2, -2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0, -3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2, -1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0, -2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2, -2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0, -2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2, -1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0, -2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2, -1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0, -3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2, -1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0, -3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1, -1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0, -2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1, -1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0, -2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2, -1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0, -2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1, -1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0, -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, -1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2, -1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1, -2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2, -1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0, -2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2, -1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1, -0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2, -1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, -2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1, -1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0, -1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1, -0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1, -0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, -0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, -2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0, -1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1, -0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, -0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, -1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1, -1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, -1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -) - -Latin5BulgarianModel = { - 'charToOrderMap': Latin5_BulgarianCharToOrderMap, - 'precedenceMatrix': BulgarianLangModel, - 'mTypicalPositiveRatio': 0.969392, - 'keepEnglishLetter': False, - 'charsetName': "ISO-8859-5" -} - -Win1251BulgarianModel = { - 'charToOrderMap': win1251BulgarianCharToOrderMap, - 'precedenceMatrix': BulgarianLangModel, - 'mTypicalPositiveRatio': 0.969392, - 'keepEnglishLetter': False, - 'charsetName': "windows-1251" -} - - -# flake8: noqa diff --git a/lib/requests/packages/chardet/langcyrillicmodel.py b/lib/requests/packages/chardet/langcyrillicmodel.py deleted file mode 100755 index a86f54bd..00000000 --- a/lib/requests/packages/chardet/langcyrillicmodel.py +++ /dev/null @@ -1,329 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# KOI8-R language model -# Character Mapping Table: -KOI8R_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, # 80 -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, # 90 -223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, # a0 -238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, # b0 - 27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, # c0 - 15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, # d0 - 59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, # e0 - 35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0 -) - -win1251_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, -239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253, - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, - 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, - 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, -) - -latin5_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, - 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, - 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, -239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, -) - -macCyrillic_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, -239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16, - 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, - 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255, -) - -IBM855_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 -191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205, -206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70, - 3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219, -220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229, -230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243, - 8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248, - 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, -250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255, -) - -IBM866_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, # 40 -155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, # 50 -253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, # 60 - 67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, # 70 - 37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35, - 45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43, - 3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15, -191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, -207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, -223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, - 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, -239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255, -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 97.6601% -# first 1024 sequences: 2.3389% -# rest sequences: 0.1237% -# negative sequences: 0.0009% -RussianLangModel = ( -0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, -3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, -0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1, -0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0, -0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1, -1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1, -1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0, -2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1, -1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0, -3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1, -1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0, -2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2, -1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1, -1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1, -1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, -2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1, -1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0, -3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2, -1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1, -2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1, -1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0, -2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0, -0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1, -1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0, -1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1, -1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0, -3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1, -2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1, -3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1, -1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1, -1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1, -0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0, -2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1, -1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0, -1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1, -0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1, -1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0, -2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2, -2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1, -1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0, -1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0, -2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0, -1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1, -0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0, -2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1, -1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1, -1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0, -0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1, -0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1, -0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, -1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1, -0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0, -0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, -1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1, -0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, -2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0, -0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, -) - -Koi8rModel = { - 'charToOrderMap': KOI8R_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "KOI8-R" -} - -Win1251CyrillicModel = { - 'charToOrderMap': win1251_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "windows-1251" -} - -Latin5CyrillicModel = { - 'charToOrderMap': latin5_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "ISO-8859-5" -} - -MacCyrillicModel = { - 'charToOrderMap': macCyrillic_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "MacCyrillic" -}; - -Ibm866Model = { - 'charToOrderMap': IBM866_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "IBM866" -} - -Ibm855Model = { - 'charToOrderMap': IBM855_CharToOrderMap, - 'precedenceMatrix': RussianLangModel, - 'mTypicalPositiveRatio': 0.976601, - 'keepEnglishLetter': False, - 'charsetName': "IBM855" -} - -# flake8: noqa diff --git a/lib/requests/packages/chardet/langgreekmodel.py b/lib/requests/packages/chardet/langgreekmodel.py deleted file mode 100755 index ddb58376..00000000 --- a/lib/requests/packages/chardet/langgreekmodel.py +++ /dev/null @@ -1,225 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# 255: Control characters that usually does not exist in any text -# 254: Carriage/Return -# 253: symbol (punctuation) that does not belong to word -# 252: 0 - 9 - -# Character Mapping Table: -Latin7_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 - 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 -253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 - 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 -253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 -253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, # b0 -110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 - 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 -124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 - 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 -) - -win1253_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, # 40 - 79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, # 50 -253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, # 60 - 78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, # 70 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 80 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 90 -253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, # a0 -253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, # b0 -110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, # c0 - 35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0 -124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0 - 9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0 -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 98.2851% -# first 1024 sequences:1.7001% -# rest sequences: 0.0359% -# negative sequences: 0.0148% -GreekLangModel = ( -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0, -3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, -0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0, -2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0, -0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0, -2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0, -2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0, -0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0, -2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0, -0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0, -3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0, -3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0, -2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0, -2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0, -0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0, -0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0, -0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2, -0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0, -0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2, -0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0, -0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2, -0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2, -0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, -0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2, -0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0, -0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0, -0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0, -0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0, -0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0, -0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2, -0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0, -0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2, -0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0, -0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2, -0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0, -0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2, -0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0, -0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1, -0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0, -0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2, -0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, -0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2, -0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2, -0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, -0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, -0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1, -0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0, -0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0, -0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -) - -Latin7GreekModel = { - 'charToOrderMap': Latin7_CharToOrderMap, - 'precedenceMatrix': GreekLangModel, - 'mTypicalPositiveRatio': 0.982851, - 'keepEnglishLetter': False, - 'charsetName': "ISO-8859-7" -} - -Win1253GreekModel = { - 'charToOrderMap': win1253_CharToOrderMap, - 'precedenceMatrix': GreekLangModel, - 'mTypicalPositiveRatio': 0.982851, - 'keepEnglishLetter': False, - 'charsetName': "windows-1253" -} - -# flake8: noqa diff --git a/lib/requests/packages/chardet/langhebrewmodel.py b/lib/requests/packages/chardet/langhebrewmodel.py deleted file mode 100755 index 75f2bc7f..00000000 --- a/lib/requests/packages/chardet/langhebrewmodel.py +++ /dev/null @@ -1,201 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Simon Montagu -# Portions created by the Initial Developer are Copyright (C) 2005 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# Shoshannah Forbes - original C code (?) -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# 255: Control characters that usually does not exist in any text -# 254: Carriage/Return -# 253: symbol (punctuation) that does not belong to word -# 252: 0 - 9 - -# Windows-1255 language model -# Character Mapping Table: -win1255_CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, # 40 - 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, # 50 -253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, # 60 - 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, # 70 -124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214, -215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221, - 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, -106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, - 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, -238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250, - 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, - 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253, -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 98.4004% -# first 1024 sequences: 1.5981% -# rest sequences: 0.087% -# negative sequences: 0.0015% -HebrewLangModel = ( -0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, -3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, -1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, -1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, -1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, -1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, -1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, -0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, -0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, -1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, -0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, -0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, -0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, -0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, -0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, -0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, -0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, -0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, -0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, -0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, -0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, -0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, -1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, -0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, -0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, -0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, -0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, -0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, -2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, -0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, -0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, -1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, -0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, -2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, -1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, -2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, -1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, -2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, -0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, -1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, -0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, -) - -Win1255HebrewModel = { - 'charToOrderMap': win1255_CharToOrderMap, - 'precedenceMatrix': HebrewLangModel, - 'mTypicalPositiveRatio': 0.984004, - 'keepEnglishLetter': False, - 'charsetName': "windows-1255" -} - -# flake8: noqa diff --git a/lib/requests/packages/chardet/langhungarianmodel.py b/lib/requests/packages/chardet/langhungarianmodel.py deleted file mode 100755 index 49d2f0fe..00000000 --- a/lib/requests/packages/chardet/langhungarianmodel.py +++ /dev/null @@ -1,225 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# 255: Control characters that usually does not exist in any text -# 254: Carriage/Return -# 253: symbol (punctuation) that does not belong to word -# 252: 0 - 9 - -# Character Mapping Table: -Latin2_HungarianCharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, - 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, -253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, - 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, -159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174, -175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, -191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205, - 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, -221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231, -232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, - 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, -245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, -) - -win1250HungarianCharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, - 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253, -253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8, - 23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253, -161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176, -177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190, -191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205, - 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220, -221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231, -232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, - 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, -245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 94.7368% -# first 1024 sequences:5.2623% -# rest sequences: 0.8894% -# negative sequences: 0.0009% -HungarianLangModel = ( -0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, -3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, -3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3, -0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, -3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2, -0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0, -3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, -3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, -3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2, -0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1, -0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, -3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0, -1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0, -1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0, -1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1, -3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1, -2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1, -2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1, -2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1, -2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0, -2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, -3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1, -2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1, -2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1, -2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1, -1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1, -1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1, -3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0, -1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1, -1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1, -2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1, -2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0, -2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1, -3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1, -2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1, -1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0, -1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0, -2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1, -2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1, -1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0, -1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1, -2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0, -1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0, -1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0, -2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1, -2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1, -2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1, -1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1, -1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1, -1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0, -0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0, -2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1, -2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1, -1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1, -2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1, -1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0, -1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0, -2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0, -2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1, -2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0, -1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0, -2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0, -0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, -1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0, -0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0, -1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, -0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0, -2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, -0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, -) - -Latin2HungarianModel = { - 'charToOrderMap': Latin2_HungarianCharToOrderMap, - 'precedenceMatrix': HungarianLangModel, - 'mTypicalPositiveRatio': 0.947368, - 'keepEnglishLetter': True, - 'charsetName': "ISO-8859-2" -} - -Win1250HungarianModel = { - 'charToOrderMap': win1250HungarianCharToOrderMap, - 'precedenceMatrix': HungarianLangModel, - 'mTypicalPositiveRatio': 0.947368, - 'keepEnglishLetter': True, - 'charsetName': "windows-1250" -} - -# flake8: noqa diff --git a/lib/requests/packages/chardet/langthaimodel.py b/lib/requests/packages/chardet/langthaimodel.py deleted file mode 100755 index 0508b1b1..00000000 --- a/lib/requests/packages/chardet/langthaimodel.py +++ /dev/null @@ -1,200 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Communicator client code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -# 255: Control characters that usually does not exist in any text -# 254: Carriage/Return -# 253: symbol (punctuation) that does not belong to word -# 252: 0 - 9 - -# The following result for thai was collected from a limited sample (1M). - -# Character Mapping Table: -TIS620CharToOrderMap = ( -255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 -255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 -253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 -252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 -253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, # 40 -188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, # 50 -253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, # 60 - 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, # 70 -209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222, -223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235, -236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57, - 49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54, - 45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63, - 22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244, - 11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247, - 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253, -) - -# Model Table: -# total sequences: 100% -# first 512 sequences: 92.6386% -# first 1024 sequences:7.3177% -# rest sequences: 1.0230% -# negative sequences: 0.0436% -ThaiLangModel = ( -0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3, -0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2, -3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3, -0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1, -3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2, -3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1, -3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2, -3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1, -3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1, -3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, -3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1, -2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1, -3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1, -0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1, -0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0, -3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2, -1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0, -3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3, -3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0, -1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2, -0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0, -2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3, -0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0, -3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1, -2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0, -3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2, -0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2, -3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, -3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0, -2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, -3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1, -2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1, -3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1, -3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0, -3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1, -3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1, -3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1, -1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2, -0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, -3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3, -0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, -3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0, -3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1, -1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0, -3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1, -3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2, -0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0, -0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0, -1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1, -1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1, -3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1, -0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0, -0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, -3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0, -3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0, -0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1, -0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0, -0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1, -0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1, -0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0, -0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1, -0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0, -3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0, -0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0, -0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0, -3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1, -2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1, -0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0, -3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0, -0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0, -1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0, -1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, -1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0, -1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -) - -TIS620ThaiModel = { - 'charToOrderMap': TIS620CharToOrderMap, - 'precedenceMatrix': ThaiLangModel, - 'mTypicalPositiveRatio': 0.926386, - 'keepEnglishLetter': False, - 'charsetName': "TIS-620" -} - -# flake8: noqa diff --git a/lib/requests/packages/chardet/latin1prober.py b/lib/requests/packages/chardet/latin1prober.py deleted file mode 100755 index eef35735..00000000 --- a/lib/requests/packages/chardet/latin1prober.py +++ /dev/null @@ -1,139 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .charsetprober import CharSetProber -from .constants import eNotMe -from .compat import wrap_ord - -FREQ_CAT_NUM = 4 - -UDF = 0 # undefined -OTH = 1 # other -ASC = 2 # ascii capital letter -ASS = 3 # ascii small letter -ACV = 4 # accent capital vowel -ACO = 5 # accent capital other -ASV = 6 # accent small vowel -ASO = 7 # accent small other -CLASS_NUM = 8 # total classes - -Latin1_CharToClass = ( - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F - OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 - ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F - ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 - ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F - OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 - ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F - ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 - ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F - OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 - OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F - UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 - OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 - OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF - ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 - ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF - ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 - ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF - ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 - ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF - ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 - ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF -) - -# 0 : illegal -# 1 : very unlikely -# 2 : normal -# 3 : very likely -Latin1ClassModel = ( - # UDF OTH ASC ASS ACV ACO ASV ASO - 0, 0, 0, 0, 0, 0, 0, 0, # UDF - 0, 3, 3, 3, 3, 3, 3, 3, # OTH - 0, 3, 3, 3, 3, 3, 3, 3, # ASC - 0, 3, 3, 3, 1, 1, 3, 3, # ASS - 0, 3, 3, 3, 1, 2, 1, 2, # ACV - 0, 3, 3, 3, 3, 3, 3, 3, # ACO - 0, 3, 1, 3, 1, 1, 1, 3, # ASV - 0, 3, 1, 3, 1, 1, 3, 3, # ASO -) - - -class Latin1Prober(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self.reset() - - def reset(self): - self._mLastCharClass = OTH - self._mFreqCounter = [0] * FREQ_CAT_NUM - CharSetProber.reset(self) - - def get_charset_name(self): - return "windows-1252" - - def feed(self, aBuf): - aBuf = self.filter_with_english_letters(aBuf) - for c in aBuf: - charClass = Latin1_CharToClass[wrap_ord(c)] - freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) - + charClass] - if freq == 0: - self._mState = eNotMe - break - self._mFreqCounter[freq] += 1 - self._mLastCharClass = charClass - - return self.get_state() - - def get_confidence(self): - if self.get_state() == eNotMe: - return 0.01 - - total = sum(self._mFreqCounter) - if total < 0.01: - confidence = 0.0 - else: - confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0) - / total) - if confidence < 0.0: - confidence = 0.0 - # lower the confidence of latin1 so that other more accurate - # detector can take priority. - confidence = confidence * 0.73 - return confidence diff --git a/lib/requests/packages/chardet/mbcharsetprober.py b/lib/requests/packages/chardet/mbcharsetprober.py deleted file mode 100755 index bb42f2fb..00000000 --- a/lib/requests/packages/chardet/mbcharsetprober.py +++ /dev/null @@ -1,86 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# Proofpoint, Inc. -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -import sys -from . import constants -from .charsetprober import CharSetProber - - -class MultiByteCharSetProber(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mDistributionAnalyzer = None - self._mCodingSM = None - self._mLastChar = [0, 0] - - def reset(self): - CharSetProber.reset(self) - if self._mCodingSM: - self._mCodingSM.reset() - if self._mDistributionAnalyzer: - self._mDistributionAnalyzer.reset() - self._mLastChar = [0, 0] - - def get_charset_name(self): - pass - - def feed(self, aBuf): - aLen = len(aBuf) - for i in range(0, aLen): - codingState = self._mCodingSM.next_state(aBuf[i]) - if codingState == constants.eError: - if constants._debug: - sys.stderr.write(self.get_charset_name() - + ' prober hit error at byte ' + str(i) - + '\n') - self._mState = constants.eNotMe - break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - break - elif codingState == constants.eStart: - charLen = self._mCodingSM.get_current_charlen() - if i == 0: - self._mLastChar[1] = aBuf[0] - self._mDistributionAnalyzer.feed(self._mLastChar, charLen) - else: - self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], - charLen) - - self._mLastChar[0] = aBuf[aLen - 1] - - if self.get_state() == constants.eDetecting: - if (self._mDistributionAnalyzer.got_enough_data() and - (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): - self._mState = constants.eFoundIt - - return self.get_state() - - def get_confidence(self): - return self._mDistributionAnalyzer.get_confidence() diff --git a/lib/requests/packages/chardet/mbcsgroupprober.py b/lib/requests/packages/chardet/mbcsgroupprober.py deleted file mode 100755 index 03c9dcf3..00000000 --- a/lib/requests/packages/chardet/mbcsgroupprober.py +++ /dev/null @@ -1,54 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# Proofpoint, Inc. -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .charsetgroupprober import CharSetGroupProber -from .utf8prober import UTF8Prober -from .sjisprober import SJISProber -from .eucjpprober import EUCJPProber -from .gb2312prober import GB2312Prober -from .euckrprober import EUCKRProber -from .cp949prober import CP949Prober -from .big5prober import Big5Prober -from .euctwprober import EUCTWProber - - -class MBCSGroupProber(CharSetGroupProber): - def __init__(self): - CharSetGroupProber.__init__(self) - self._mProbers = [ - UTF8Prober(), - SJISProber(), - EUCJPProber(), - GB2312Prober(), - EUCKRProber(), - CP949Prober(), - Big5Prober(), - EUCTWProber() - ] - self.reset() diff --git a/lib/requests/packages/chardet/mbcssm.py b/lib/requests/packages/chardet/mbcssm.py deleted file mode 100755 index efe678ca..00000000 --- a/lib/requests/packages/chardet/mbcssm.py +++ /dev/null @@ -1,572 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .constants import eStart, eError, eItsMe - -# BIG5 - -BIG5_cls = ( - 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,1, # 78 - 7f - 4,4,4,4,4,4,4,4, # 80 - 87 - 4,4,4,4,4,4,4,4, # 88 - 8f - 4,4,4,4,4,4,4,4, # 90 - 97 - 4,4,4,4,4,4,4,4, # 98 - 9f - 4,3,3,3,3,3,3,3, # a0 - a7 - 3,3,3,3,3,3,3,3, # a8 - af - 3,3,3,3,3,3,3,3, # b0 - b7 - 3,3,3,3,3,3,3,3, # b8 - bf - 3,3,3,3,3,3,3,3, # c0 - c7 - 3,3,3,3,3,3,3,3, # c8 - cf - 3,3,3,3,3,3,3,3, # d0 - d7 - 3,3,3,3,3,3,3,3, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,3,3,3, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,3,3,0 # f8 - ff -) - -BIG5_st = ( - eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 - eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f - eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart#10-17 -) - -Big5CharLenTable = (0, 1, 1, 2, 0) - -Big5SMModel = {'classTable': BIG5_cls, - 'classFactor': 5, - 'stateTable': BIG5_st, - 'charLenTable': Big5CharLenTable, - 'name': 'Big5'} - -# CP949 - -CP949_cls = ( - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f - 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f - 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f - 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f - 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f - 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f - 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f - 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f - 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af - 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf - 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff -) - -CP949_st = ( -#cls= 0 1 2 3 4 5 6 7 8 9 # previous state = - eError,eStart, 3,eError,eStart,eStart, 4, 5,eError, 6, # eStart - eError,eError,eError,eError,eError,eError,eError,eError,eError,eError, # eError - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe, # eItsMe - eError,eError,eStart,eStart,eError,eError,eError,eStart,eStart,eStart, # 3 - eError,eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 4 - eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart,eStart, # 5 - eError,eStart,eStart,eStart,eStart,eError,eError,eStart,eStart,eStart, # 6 -) - -CP949CharLenTable = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) - -CP949SMModel = {'classTable': CP949_cls, - 'classFactor': 10, - 'stateTable': CP949_st, - 'charLenTable': CP949CharLenTable, - 'name': 'CP949'} - -# EUC-JP - -EUCJP_cls = ( - 4,4,4,4,4,4,4,4, # 00 - 07 - 4,4,4,4,4,4,5,5, # 08 - 0f - 4,4,4,4,4,4,4,4, # 10 - 17 - 4,4,4,5,4,4,4,4, # 18 - 1f - 4,4,4,4,4,4,4,4, # 20 - 27 - 4,4,4,4,4,4,4,4, # 28 - 2f - 4,4,4,4,4,4,4,4, # 30 - 37 - 4,4,4,4,4,4,4,4, # 38 - 3f - 4,4,4,4,4,4,4,4, # 40 - 47 - 4,4,4,4,4,4,4,4, # 48 - 4f - 4,4,4,4,4,4,4,4, # 50 - 57 - 4,4,4,4,4,4,4,4, # 58 - 5f - 4,4,4,4,4,4,4,4, # 60 - 67 - 4,4,4,4,4,4,4,4, # 68 - 6f - 4,4,4,4,4,4,4,4, # 70 - 77 - 4,4,4,4,4,4,4,4, # 78 - 7f - 5,5,5,5,5,5,5,5, # 80 - 87 - 5,5,5,5,5,5,1,3, # 88 - 8f - 5,5,5,5,5,5,5,5, # 90 - 97 - 5,5,5,5,5,5,5,5, # 98 - 9f - 5,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,2,2,2, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,2,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,0,5 # f8 - ff -) - -EUCJP_st = ( - 3, 4, 3, 5,eStart,eError,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 - eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f - 3,eError,eError,eError,eStart,eStart,eStart,eStart#20-27 -) - -EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) - -EUCJPSMModel = {'classTable': EUCJP_cls, - 'classFactor': 6, - 'stateTable': EUCJP_st, - 'charLenTable': EUCJPCharLenTable, - 'name': 'EUC-JP'} - -# EUC-KR - -EUCKR_cls = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 1,1,1,1,1,1,1,1, # 40 - 47 - 1,1,1,1,1,1,1,1, # 48 - 4f - 1,1,1,1,1,1,1,1, # 50 - 57 - 1,1,1,1,1,1,1,1, # 58 - 5f - 1,1,1,1,1,1,1,1, # 60 - 67 - 1,1,1,1,1,1,1,1, # 68 - 6f - 1,1,1,1,1,1,1,1, # 70 - 77 - 1,1,1,1,1,1,1,1, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,3,3,3, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,3,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 2,2,2,2,2,2,2,2, # e0 - e7 - 2,2,2,2,2,2,2,2, # e8 - ef - 2,2,2,2,2,2,2,2, # f0 - f7 - 2,2,2,2,2,2,2,0 # f8 - ff -) - -EUCKR_st = ( - eError,eStart, 3,eError,eError,eError,eError,eError,#00-07 - eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart #08-0f -) - -EUCKRCharLenTable = (0, 1, 2, 0) - -EUCKRSMModel = {'classTable': EUCKR_cls, - 'classFactor': 4, - 'stateTable': EUCKR_st, - 'charLenTable': EUCKRCharLenTable, - 'name': 'EUC-KR'} - -# EUC-TW - -EUCTW_cls = ( - 2,2,2,2,2,2,2,2, # 00 - 07 - 2,2,2,2,2,2,0,0, # 08 - 0f - 2,2,2,2,2,2,2,2, # 10 - 17 - 2,2,2,0,2,2,2,2, # 18 - 1f - 2,2,2,2,2,2,2,2, # 20 - 27 - 2,2,2,2,2,2,2,2, # 28 - 2f - 2,2,2,2,2,2,2,2, # 30 - 37 - 2,2,2,2,2,2,2,2, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,2, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,6,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,3,4,4,4,4,4,4, # a0 - a7 - 5,5,1,1,1,1,1,1, # a8 - af - 1,1,1,1,1,1,1,1, # b0 - b7 - 1,1,1,1,1,1,1,1, # b8 - bf - 1,1,3,1,3,3,3,3, # c0 - c7 - 3,3,3,3,3,3,3,3, # c8 - cf - 3,3,3,3,3,3,3,3, # d0 - d7 - 3,3,3,3,3,3,3,3, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,3,3,3, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,3,3,0 # f8 - ff -) - -EUCTW_st = ( - eError,eError,eStart, 3, 3, 3, 4,eError,#00-07 - eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 - eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f - 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 - eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f -) - -EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) - -EUCTWSMModel = {'classTable': EUCTW_cls, - 'classFactor': 7, - 'stateTable': EUCTW_st, - 'charLenTable': EUCTWCharLenTable, - 'name': 'x-euc-tw'} - -# GB2312 - -GB2312_cls = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 3,3,3,3,3,3,3,3, # 30 - 37 - 3,3,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,4, # 78 - 7f - 5,6,6,6,6,6,6,6, # 80 - 87 - 6,6,6,6,6,6,6,6, # 88 - 8f - 6,6,6,6,6,6,6,6, # 90 - 97 - 6,6,6,6,6,6,6,6, # 98 - 9f - 6,6,6,6,6,6,6,6, # a0 - a7 - 6,6,6,6,6,6,6,6, # a8 - af - 6,6,6,6,6,6,6,6, # b0 - b7 - 6,6,6,6,6,6,6,6, # b8 - bf - 6,6,6,6,6,6,6,6, # c0 - c7 - 6,6,6,6,6,6,6,6, # c8 - cf - 6,6,6,6,6,6,6,6, # d0 - d7 - 6,6,6,6,6,6,6,6, # d8 - df - 6,6,6,6,6,6,6,6, # e0 - e7 - 6,6,6,6,6,6,6,6, # e8 - ef - 6,6,6,6,6,6,6,6, # f0 - f7 - 6,6,6,6,6,6,6,0 # f8 - ff -) - -GB2312_st = ( - eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07 - eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 - 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f - eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27 - eError,eError,eStart,eStart,eStart,eStart,eStart,eStart #28-2f -) - -# To be accurate, the length of class 6 can be either 2 or 4. -# But it is not necessary to discriminate between the two since -# it is used for frequency analysis only, and we are validing -# each code range there as well. So it is safe to set it to be -# 2 here. -GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) - -GB2312SMModel = {'classTable': GB2312_cls, - 'classFactor': 7, - 'stateTable': GB2312_st, - 'charLenTable': GB2312CharLenTable, - 'name': 'GB2312'} - -# Shift_JIS - -SJIS_cls = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,1, # 78 - 7f - 3,3,3,3,3,2,2,3, # 80 - 87 - 3,3,3,3,3,3,3,3, # 88 - 8f - 3,3,3,3,3,3,3,3, # 90 - 97 - 3,3,3,3,3,3,3,3, # 98 - 9f - #0xa0 is illegal in sjis encoding, but some pages does - #contain such byte. We need to be more error forgiven. - 2,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,2,2,2, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,2,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,4,4,4, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,0,0,0) # f8 - ff - - -SJIS_st = ( - eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart #10-17 -) - -SJISCharLenTable = (0, 1, 1, 2, 0, 0) - -SJISSMModel = {'classTable': SJIS_cls, - 'classFactor': 6, - 'stateTable': SJIS_st, - 'charLenTable': SJISCharLenTable, - 'name': 'Shift_JIS'} - -# UCS2-BE - -UCS2BE_cls = ( - 0,0,0,0,0,0,0,0, # 00 - 07 - 0,0,1,0,0,2,0,0, # 08 - 0f - 0,0,0,0,0,0,0,0, # 10 - 17 - 0,0,0,3,0,0,0,0, # 18 - 1f - 0,0,0,0,0,0,0,0, # 20 - 27 - 0,3,3,3,3,3,0,0, # 28 - 2f - 0,0,0,0,0,0,0,0, # 30 - 37 - 0,0,0,0,0,0,0,0, # 38 - 3f - 0,0,0,0,0,0,0,0, # 40 - 47 - 0,0,0,0,0,0,0,0, # 48 - 4f - 0,0,0,0,0,0,0,0, # 50 - 57 - 0,0,0,0,0,0,0,0, # 58 - 5f - 0,0,0,0,0,0,0,0, # 60 - 67 - 0,0,0,0,0,0,0,0, # 68 - 6f - 0,0,0,0,0,0,0,0, # 70 - 77 - 0,0,0,0,0,0,0,0, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,0,0,0,0,0,0,0, # a0 - a7 - 0,0,0,0,0,0,0,0, # a8 - af - 0,0,0,0,0,0,0,0, # b0 - b7 - 0,0,0,0,0,0,0,0, # b8 - bf - 0,0,0,0,0,0,0,0, # c0 - c7 - 0,0,0,0,0,0,0,0, # c8 - cf - 0,0,0,0,0,0,0,0, # d0 - d7 - 0,0,0,0,0,0,0,0, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,4,5 # f8 - ff -) - -UCS2BE_st = ( - 5, 7, 7,eError, 4, 3,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17 - 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f - 6, 6, 6, 6, 5, 7, 7,eError,#20-27 - 5, 8, 6, 6,eError, 6, 6, 6,#28-2f - 6, 6, 6, 6,eError,eError,eStart,eStart #30-37 -) - -UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) - -UCS2BESMModel = {'classTable': UCS2BE_cls, - 'classFactor': 6, - 'stateTable': UCS2BE_st, - 'charLenTable': UCS2BECharLenTable, - 'name': 'UTF-16BE'} - -# UCS2-LE - -UCS2LE_cls = ( - 0,0,0,0,0,0,0,0, # 00 - 07 - 0,0,1,0,0,2,0,0, # 08 - 0f - 0,0,0,0,0,0,0,0, # 10 - 17 - 0,0,0,3,0,0,0,0, # 18 - 1f - 0,0,0,0,0,0,0,0, # 20 - 27 - 0,3,3,3,3,3,0,0, # 28 - 2f - 0,0,0,0,0,0,0,0, # 30 - 37 - 0,0,0,0,0,0,0,0, # 38 - 3f - 0,0,0,0,0,0,0,0, # 40 - 47 - 0,0,0,0,0,0,0,0, # 48 - 4f - 0,0,0,0,0,0,0,0, # 50 - 57 - 0,0,0,0,0,0,0,0, # 58 - 5f - 0,0,0,0,0,0,0,0, # 60 - 67 - 0,0,0,0,0,0,0,0, # 68 - 6f - 0,0,0,0,0,0,0,0, # 70 - 77 - 0,0,0,0,0,0,0,0, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,0,0,0,0,0,0,0, # a0 - a7 - 0,0,0,0,0,0,0,0, # a8 - af - 0,0,0,0,0,0,0,0, # b0 - b7 - 0,0,0,0,0,0,0,0, # b8 - bf - 0,0,0,0,0,0,0,0, # c0 - c7 - 0,0,0,0,0,0,0,0, # c8 - cf - 0,0,0,0,0,0,0,0, # d0 - d7 - 0,0,0,0,0,0,0,0, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,4,5 # f8 - ff -) - -UCS2LE_st = ( - 6, 6, 7, 6, 4, 3,eError,eError,#00-07 - eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f - eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17 - 5, 5, 5,eError, 5,eError, 6, 6,#18-1f - 7, 6, 8, 8, 5, 5, 5,eError,#20-27 - 5, 5, 5,eError,eError,eError, 5, 5,#28-2f - 5, 5, 5,eError, 5,eError,eStart,eStart #30-37 -) - -UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) - -UCS2LESMModel = {'classTable': UCS2LE_cls, - 'classFactor': 6, - 'stateTable': UCS2LE_st, - 'charLenTable': UCS2LECharLenTable, - 'name': 'UTF-16LE'} - -# UTF-8 - -UTF8_cls = ( - 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 1,1,1,1,1,1,1,1, # 40 - 47 - 1,1,1,1,1,1,1,1, # 48 - 4f - 1,1,1,1,1,1,1,1, # 50 - 57 - 1,1,1,1,1,1,1,1, # 58 - 5f - 1,1,1,1,1,1,1,1, # 60 - 67 - 1,1,1,1,1,1,1,1, # 68 - 6f - 1,1,1,1,1,1,1,1, # 70 - 77 - 1,1,1,1,1,1,1,1, # 78 - 7f - 2,2,2,2,3,3,3,3, # 80 - 87 - 4,4,4,4,4,4,4,4, # 88 - 8f - 4,4,4,4,4,4,4,4, # 90 - 97 - 4,4,4,4,4,4,4,4, # 98 - 9f - 5,5,5,5,5,5,5,5, # a0 - a7 - 5,5,5,5,5,5,5,5, # a8 - af - 5,5,5,5,5,5,5,5, # b0 - b7 - 5,5,5,5,5,5,5,5, # b8 - bf - 0,0,6,6,6,6,6,6, # c0 - c7 - 6,6,6,6,6,6,6,6, # c8 - cf - 6,6,6,6,6,6,6,6, # d0 - d7 - 6,6,6,6,6,6,6,6, # d8 - df - 7,8,8,8,8,8,8,8, # e0 - e7 - 8,8,8,8,8,9,8,8, # e8 - ef - 10,11,11,11,11,11,11,11, # f0 - f7 - 12,13,13,13,14,15,0,0 # f8 - ff -) - -UTF8_st = ( - eError,eStart,eError,eError,eError,eError, 12, 10,#00-07 - 9, 11, 8, 7, 6, 5, 4, 3,#08-0f - eError,eError,eError,eError,eError,eError,eError,eError,#10-17 - eError,eError,eError,eError,eError,eError,eError,eError,#18-1f - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 - eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f - eError,eError, 5, 5, 5, 5,eError,eError,#30-37 - eError,eError,eError,eError,eError,eError,eError,eError,#38-3f - eError,eError,eError, 5, 5, 5,eError,eError,#40-47 - eError,eError,eError,eError,eError,eError,eError,eError,#48-4f - eError,eError, 7, 7, 7, 7,eError,eError,#50-57 - eError,eError,eError,eError,eError,eError,eError,eError,#58-5f - eError,eError,eError,eError, 7, 7,eError,eError,#60-67 - eError,eError,eError,eError,eError,eError,eError,eError,#68-6f - eError,eError, 9, 9, 9, 9,eError,eError,#70-77 - eError,eError,eError,eError,eError,eError,eError,eError,#78-7f - eError,eError,eError,eError,eError, 9,eError,eError,#80-87 - eError,eError,eError,eError,eError,eError,eError,eError,#88-8f - eError,eError, 12, 12, 12, 12,eError,eError,#90-97 - eError,eError,eError,eError,eError,eError,eError,eError,#98-9f - eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7 - eError,eError,eError,eError,eError,eError,eError,eError,#a8-af - eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7 - eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf - eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 - eError,eError,eError,eError,eError,eError,eError,eError #c8-cf -) - -UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) - -UTF8SMModel = {'classTable': UTF8_cls, - 'classFactor': 16, - 'stateTable': UTF8_st, - 'charLenTable': UTF8CharLenTable, - 'name': 'UTF-8'} diff --git a/lib/requests/packages/chardet/sbcharsetprober.py b/lib/requests/packages/chardet/sbcharsetprober.py deleted file mode 100755 index 37291bd2..00000000 --- a/lib/requests/packages/chardet/sbcharsetprober.py +++ /dev/null @@ -1,120 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -import sys -from . import constants -from .charsetprober import CharSetProber -from .compat import wrap_ord - -SAMPLE_SIZE = 64 -SB_ENOUGH_REL_THRESHOLD = 1024 -POSITIVE_SHORTCUT_THRESHOLD = 0.95 -NEGATIVE_SHORTCUT_THRESHOLD = 0.05 -SYMBOL_CAT_ORDER = 250 -NUMBER_OF_SEQ_CAT = 4 -POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 -#NEGATIVE_CAT = 0 - - -class SingleByteCharSetProber(CharSetProber): - def __init__(self, model, reversed=False, nameProber=None): - CharSetProber.__init__(self) - self._mModel = model - # TRUE if we need to reverse every pair in the model lookup - self._mReversed = reversed - # Optional auxiliary prober for name decision - self._mNameProber = nameProber - self.reset() - - def reset(self): - CharSetProber.reset(self) - # char order of last character - self._mLastOrder = 255 - self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT - self._mTotalSeqs = 0 - self._mTotalChar = 0 - # characters that fall in our sampling range - self._mFreqChar = 0 - - def get_charset_name(self): - if self._mNameProber: - return self._mNameProber.get_charset_name() - else: - return self._mModel['charsetName'] - - def feed(self, aBuf): - if not self._mModel['keepEnglishLetter']: - aBuf = self.filter_without_english_letters(aBuf) - aLen = len(aBuf) - if not aLen: - return self.get_state() - for c in aBuf: - order = self._mModel['charToOrderMap'][wrap_ord(c)] - if order < SYMBOL_CAT_ORDER: - self._mTotalChar += 1 - if order < SAMPLE_SIZE: - self._mFreqChar += 1 - if self._mLastOrder < SAMPLE_SIZE: - self._mTotalSeqs += 1 - if not self._mReversed: - i = (self._mLastOrder * SAMPLE_SIZE) + order - model = self._mModel['precedenceMatrix'][i] - else: # reverse the order of the letters in the lookup - i = (order * SAMPLE_SIZE) + self._mLastOrder - model = self._mModel['precedenceMatrix'][i] - self._mSeqCounters[model] += 1 - self._mLastOrder = order - - if self.get_state() == constants.eDetecting: - if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: - cf = self.get_confidence() - if cf > POSITIVE_SHORTCUT_THRESHOLD: - if constants._debug: - sys.stderr.write('%s confidence = %s, we have a' - 'winner\n' % - (self._mModel['charsetName'], cf)) - self._mState = constants.eFoundIt - elif cf < NEGATIVE_SHORTCUT_THRESHOLD: - if constants._debug: - sys.stderr.write('%s confidence = %s, below negative' - 'shortcut threshhold %s\n' % - (self._mModel['charsetName'], cf, - NEGATIVE_SHORTCUT_THRESHOLD)) - self._mState = constants.eNotMe - - return self.get_state() - - def get_confidence(self): - r = 0.01 - if self._mTotalSeqs > 0: - r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs - / self._mModel['mTypicalPositiveRatio']) - r = r * self._mFreqChar / self._mTotalChar - if r >= 1.0: - r = 0.99 - return r diff --git a/lib/requests/packages/chardet/sbcsgroupprober.py b/lib/requests/packages/chardet/sbcsgroupprober.py deleted file mode 100755 index 1b6196cd..00000000 --- a/lib/requests/packages/chardet/sbcsgroupprober.py +++ /dev/null @@ -1,69 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from .charsetgroupprober import CharSetGroupProber -from .sbcharsetprober import SingleByteCharSetProber -from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, - Latin5CyrillicModel, MacCyrillicModel, - Ibm866Model, Ibm855Model) -from .langgreekmodel import Latin7GreekModel, Win1253GreekModel -from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel -from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel -from .langthaimodel import TIS620ThaiModel -from .langhebrewmodel import Win1255HebrewModel -from .hebrewprober import HebrewProber - - -class SBCSGroupProber(CharSetGroupProber): - def __init__(self): - CharSetGroupProber.__init__(self) - self._mProbers = [ - SingleByteCharSetProber(Win1251CyrillicModel), - SingleByteCharSetProber(Koi8rModel), - SingleByteCharSetProber(Latin5CyrillicModel), - SingleByteCharSetProber(MacCyrillicModel), - SingleByteCharSetProber(Ibm866Model), - SingleByteCharSetProber(Ibm855Model), - SingleByteCharSetProber(Latin7GreekModel), - SingleByteCharSetProber(Win1253GreekModel), - SingleByteCharSetProber(Latin5BulgarianModel), - SingleByteCharSetProber(Win1251BulgarianModel), - SingleByteCharSetProber(Latin2HungarianModel), - SingleByteCharSetProber(Win1250HungarianModel), - SingleByteCharSetProber(TIS620ThaiModel), - ] - hebrewProber = HebrewProber() - logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, - False, hebrewProber) - visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, - hebrewProber) - hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) - self._mProbers.extend([hebrewProber, logicalHebrewProber, - visualHebrewProber]) - - self.reset() diff --git a/lib/requests/packages/chardet/sjisprober.py b/lib/requests/packages/chardet/sjisprober.py deleted file mode 100755 index cd0e9e70..00000000 --- a/lib/requests/packages/chardet/sjisprober.py +++ /dev/null @@ -1,91 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -import sys -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine -from .chardistribution import SJISDistributionAnalysis -from .jpcntx import SJISContextAnalysis -from .mbcssm import SJISSMModel -from . import constants - - -class SJISProber(MultiByteCharSetProber): - def __init__(self): - MultiByteCharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(SJISSMModel) - self._mDistributionAnalyzer = SJISDistributionAnalysis() - self._mContextAnalyzer = SJISContextAnalysis() - self.reset() - - def reset(self): - MultiByteCharSetProber.reset(self) - self._mContextAnalyzer.reset() - - def get_charset_name(self): - return self._mContextAnalyzer.get_charset_name() - - def feed(self, aBuf): - aLen = len(aBuf) - for i in range(0, aLen): - codingState = self._mCodingSM.next_state(aBuf[i]) - if codingState == constants.eError: - if constants._debug: - sys.stderr.write(self.get_charset_name() - + ' prober hit error at byte ' + str(i) - + '\n') - self._mState = constants.eNotMe - break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - break - elif codingState == constants.eStart: - charLen = self._mCodingSM.get_current_charlen() - if i == 0: - self._mLastChar[1] = aBuf[0] - self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], - charLen) - self._mDistributionAnalyzer.feed(self._mLastChar, charLen) - else: - self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 - - charLen], charLen) - self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], - charLen) - - self._mLastChar[0] = aBuf[aLen - 1] - - if self.get_state() == constants.eDetecting: - if (self._mContextAnalyzer.got_enough_data() and - (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): - self._mState = constants.eFoundIt - - return self.get_state() - - def get_confidence(self): - contxtCf = self._mContextAnalyzer.get_confidence() - distribCf = self._mDistributionAnalyzer.get_confidence() - return max(contxtCf, distribCf) diff --git a/lib/requests/packages/chardet/universaldetector.py b/lib/requests/packages/chardet/universaldetector.py deleted file mode 100755 index 476522b9..00000000 --- a/lib/requests/packages/chardet/universaldetector.py +++ /dev/null @@ -1,170 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is Mozilla Universal charset detector code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 2001 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# Shy Shalom - original C code -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from . import constants -import sys -import codecs -from .latin1prober import Latin1Prober # windows-1252 -from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets -from .sbcsgroupprober import SBCSGroupProber # single-byte character sets -from .escprober import EscCharSetProber # ISO-2122, etc. -import re - -MINIMUM_THRESHOLD = 0.20 -ePureAscii = 0 -eEscAscii = 1 -eHighbyte = 2 - - -class UniversalDetector: - def __init__(self): - self._highBitDetector = re.compile(b'[\x80-\xFF]') - self._escDetector = re.compile(b'(\033|~{)') - self._mEscCharSetProber = None - self._mCharSetProbers = [] - self.reset() - - def reset(self): - self.result = {'encoding': None, 'confidence': 0.0} - self.done = False - self._mStart = True - self._mGotData = False - self._mInputState = ePureAscii - self._mLastChar = b'' - if self._mEscCharSetProber: - self._mEscCharSetProber.reset() - for prober in self._mCharSetProbers: - prober.reset() - - def feed(self, aBuf): - if self.done: - return - - aLen = len(aBuf) - if not aLen: - return - - if not self._mGotData: - # If the data starts with BOM, we know it is UTF - if aBuf[:3] == codecs.BOM_UTF8: - # EF BB BF UTF-8 with BOM - self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0} - elif aBuf[:4] == codecs.BOM_UTF32_LE: - # FF FE 00 00 UTF-32, little-endian BOM - self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} - elif aBuf[:4] == codecs.BOM_UTF32_BE: - # 00 00 FE FF UTF-32, big-endian BOM - self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} - elif aBuf[:4] == b'\xFE\xFF\x00\x00': - # FE FF 00 00 UCS-4, unusual octet order BOM (3412) - self.result = { - 'encoding': "X-ISO-10646-UCS-4-3412", - 'confidence': 1.0 - } - elif aBuf[:4] == b'\x00\x00\xFF\xFE': - # 00 00 FF FE UCS-4, unusual octet order BOM (2143) - self.result = { - 'encoding': "X-ISO-10646-UCS-4-2143", - 'confidence': 1.0 - } - elif aBuf[:2] == codecs.BOM_LE: - # FF FE UTF-16, little endian BOM - self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} - elif aBuf[:2] == codecs.BOM_BE: - # FE FF UTF-16, big endian BOM - self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} - - self._mGotData = True - if self.result['encoding'] and (self.result['confidence'] > 0.0): - self.done = True - return - - if self._mInputState == ePureAscii: - if self._highBitDetector.search(aBuf): - self._mInputState = eHighbyte - elif ((self._mInputState == ePureAscii) and - self._escDetector.search(self._mLastChar + aBuf)): - self._mInputState = eEscAscii - - self._mLastChar = aBuf[-1:] - - if self._mInputState == eEscAscii: - if not self._mEscCharSetProber: - self._mEscCharSetProber = EscCharSetProber() - if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: - self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), - 'confidence': self._mEscCharSetProber.get_confidence()} - self.done = True - elif self._mInputState == eHighbyte: - if not self._mCharSetProbers: - self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), - Latin1Prober()] - for prober in self._mCharSetProbers: - if prober.feed(aBuf) == constants.eFoundIt: - self.result = {'encoding': prober.get_charset_name(), - 'confidence': prober.get_confidence()} - self.done = True - break - - def close(self): - if self.done: - return - if not self._mGotData: - if constants._debug: - sys.stderr.write('no data received!\n') - return - self.done = True - - if self._mInputState == ePureAscii: - self.result = {'encoding': 'ascii', 'confidence': 1.0} - return self.result - - if self._mInputState == eHighbyte: - proberConfidence = None - maxProberConfidence = 0.0 - maxProber = None - for prober in self._mCharSetProbers: - if not prober: - continue - proberConfidence = prober.get_confidence() - if proberConfidence > maxProberConfidence: - maxProberConfidence = proberConfidence - maxProber = prober - if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): - self.result = {'encoding': maxProber.get_charset_name(), - 'confidence': maxProber.get_confidence()} - return self.result - - if constants._debug: - sys.stderr.write('no probers hit minimum threshhold\n') - for prober in self._mCharSetProbers[0].mProbers: - if not prober: - continue - sys.stderr.write('%s confidence = %s\n' % - (prober.get_charset_name(), - prober.get_confidence())) diff --git a/lib/requests/packages/chardet/utf8prober.py b/lib/requests/packages/chardet/utf8prober.py deleted file mode 100755 index 1c0bb5d8..00000000 --- a/lib/requests/packages/chardet/utf8prober.py +++ /dev/null @@ -1,76 +0,0 @@ -######################## BEGIN LICENSE BLOCK ######################## -# The Original Code is mozilla.org code. -# -# The Initial Developer of the Original Code is -# Netscape Communications Corporation. -# Portions created by the Initial Developer are Copyright (C) 1998 -# the Initial Developer. All Rights Reserved. -# -# Contributor(s): -# Mark Pilgrim - port to Python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA -# 02110-1301 USA -######################### END LICENSE BLOCK ######################### - -from . import constants -from .charsetprober import CharSetProber -from .codingstatemachine import CodingStateMachine -from .mbcssm import UTF8SMModel - -ONE_CHAR_PROB = 0.5 - - -class UTF8Prober(CharSetProber): - def __init__(self): - CharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(UTF8SMModel) - self.reset() - - def reset(self): - CharSetProber.reset(self) - self._mCodingSM.reset() - self._mNumOfMBChar = 0 - - def get_charset_name(self): - return "utf-8" - - def feed(self, aBuf): - for c in aBuf: - codingState = self._mCodingSM.next_state(c) - if codingState == constants.eError: - self._mState = constants.eNotMe - break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt - break - elif codingState == constants.eStart: - if self._mCodingSM.get_current_charlen() >= 2: - self._mNumOfMBChar += 1 - - if self.get_state() == constants.eDetecting: - if self.get_confidence() > constants.SHORTCUT_THRESHOLD: - self._mState = constants.eFoundIt - - return self.get_state() - - def get_confidence(self): - unlike = 0.99 - if self._mNumOfMBChar < 6: - for i in range(0, self._mNumOfMBChar): - unlike = unlike * ONE_CHAR_PROB - return 1.0 - unlike - else: - return unlike diff --git a/lib/requests/packages/urllib3/__init__.py b/lib/requests/packages/urllib3/__init__.py deleted file mode 100755 index b80f19d2..00000000 --- a/lib/requests/packages/urllib3/__init__.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -urllib3 - Thread-safe connection pooling and re-using. -""" - -__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' -__license__ = 'MIT' -__version__ = 'dev' - - -from .connectionpool import ( - HTTPConnectionPool, - HTTPSConnectionPool, - connection_from_url -) - -from . import exceptions -from .filepost import encode_multipart_formdata -from .poolmanager import PoolManager, ProxyManager, proxy_from_url -from .response import HTTPResponse -from .util.request import make_headers -from .util.url import get_host -from .util.timeout import Timeout -from .util.retry import Retry - - -# Set default logging handler to avoid "No handler found" warnings. -import logging -try: # Python 2.7+ - from logging import NullHandler -except ImportError: - class NullHandler(logging.Handler): - def emit(self, record): - pass - -logging.getLogger(__name__).addHandler(NullHandler()) - -def add_stderr_logger(level=logging.DEBUG): - """ - Helper for quickly adding a StreamHandler to the logger. Useful for - debugging. - - Returns the handler after adding it. - """ - # This method needs to be in this __init__.py to get the __name__ correct - # even if urllib3 is vendored within another package. - logger = logging.getLogger(__name__) - handler = logging.StreamHandler() - handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) - logger.addHandler(handler) - logger.setLevel(level) - logger.debug('Added a stderr logging handler to logger: %s' % __name__) - return handler - -# ... Clean up. -del NullHandler - - -import warnings -# SecurityWarning's always go off by default. -warnings.simplefilter('always', exceptions.SecurityWarning, append=True) -# SubjectAltNameWarning's should go off once per host -warnings.simplefilter('default', exceptions.SubjectAltNameWarning) -# InsecurePlatformWarning's don't vary between requests, so we keep it default. -warnings.simplefilter('default', exceptions.InsecurePlatformWarning, - append=True) - -def disable_warnings(category=exceptions.HTTPWarning): - """ - Helper for quickly disabling all urllib3 warnings. - """ - warnings.simplefilter('ignore', category) diff --git a/lib/requests/packages/urllib3/connection.py b/lib/requests/packages/urllib3/connection.py deleted file mode 100755 index 3eab1e28..00000000 --- a/lib/requests/packages/urllib3/connection.py +++ /dev/null @@ -1,277 +0,0 @@ -import datetime -import sys -import socket -from socket import error as SocketError, timeout as SocketTimeout -import warnings -from .packages import six - -try: # Python 3 - from http.client import HTTPConnection as _HTTPConnection, HTTPException -except ImportError: - from httplib import HTTPConnection as _HTTPConnection, HTTPException - - -class DummyConnection(object): - "Used to detect a failed ConnectionCls import." - pass - - -try: # Compiled with SSL? - HTTPSConnection = DummyConnection - import ssl - BaseSSLError = ssl.SSLError -except (ImportError, AttributeError): # Platform-specific: No SSL. - ssl = None - - class BaseSSLError(BaseException): - pass - - -try: # Python 3: - # Not a no-op, we're adding this to the namespace so it can be imported. - ConnectionError = ConnectionError -except NameError: # Python 2: - class ConnectionError(Exception): - pass - - -from .exceptions import ( - NewConnectionError, - ConnectTimeoutError, - SubjectAltNameWarning, - SystemTimeWarning, -) -from .packages.ssl_match_hostname import match_hostname - -from .util.ssl_ import ( - resolve_cert_reqs, - resolve_ssl_version, - ssl_wrap_socket, - assert_fingerprint, -) - - -from .util import connection - -port_by_scheme = { - 'http': 80, - 'https': 443, -} - -RECENT_DATE = datetime.date(2014, 1, 1) - - -class HTTPConnection(_HTTPConnection, object): - """ - Based on httplib.HTTPConnection but provides an extra constructor - backwards-compatibility layer between older and newer Pythons. - - Additional keyword parameters are used to configure attributes of the connection. - Accepted parameters include: - - - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` - - ``source_address``: Set the source address for the current connection. - - .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x - - - ``socket_options``: Set specific options on the underlying socket. If not specified, then - defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling - Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. - - For example, if you wish to enable TCP Keep Alive in addition to the defaults, - you might pass:: - - HTTPConnection.default_socket_options + [ - (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), - ] - - Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). - """ - - default_port = port_by_scheme['http'] - - #: Disable Nagle's algorithm by default. - #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` - default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] - - #: Whether this connection verifies the host's certificate. - is_verified = False - - def __init__(self, *args, **kw): - if six.PY3: # Python 3 - kw.pop('strict', None) - - # Pre-set source_address in case we have an older Python like 2.6. - self.source_address = kw.get('source_address') - - if sys.version_info < (2, 7): # Python 2.6 - # _HTTPConnection on Python 2.6 will balk at this keyword arg, but - # not newer versions. We can still use it when creating a - # connection though, so we pop it *after* we have saved it as - # self.source_address. - kw.pop('source_address', None) - - #: The socket options provided by the user. If no options are - #: provided, we use the default options. - self.socket_options = kw.pop('socket_options', self.default_socket_options) - - # Superclass also sets self.source_address in Python 2.7+. - _HTTPConnection.__init__(self, *args, **kw) - - def _new_conn(self): - """ Establish a socket connection and set nodelay settings on it. - - :return: New socket connection. - """ - extra_kw = {} - if self.source_address: - extra_kw['source_address'] = self.source_address - - if self.socket_options: - extra_kw['socket_options'] = self.socket_options - - try: - conn = connection.create_connection( - (self.host, self.port), self.timeout, **extra_kw) - - except SocketTimeout as e: - raise ConnectTimeoutError( - self, "Connection to %s timed out. (connect timeout=%s)" % - (self.host, self.timeout)) - - except SocketError as e: - raise NewConnectionError( - self, "Failed to establish a new connection: %s" % e) - - return conn - - def _prepare_conn(self, conn): - self.sock = conn - # the _tunnel_host attribute was added in python 2.6.3 (via - # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do - # not have them. - if getattr(self, '_tunnel_host', None): - # TODO: Fix tunnel so it doesn't depend on self.sock state. - self._tunnel() - # Mark this connection as not reusable - self.auto_open = 0 - - def connect(self): - conn = self._new_conn() - self._prepare_conn(conn) - - -class HTTPSConnection(HTTPConnection): - default_port = port_by_scheme['https'] - - def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw): - - HTTPConnection.__init__(self, host, port, strict=strict, - timeout=timeout, **kw) - - self.key_file = key_file - self.cert_file = cert_file - - # Required property for Google AppEngine 1.9.0 which otherwise causes - # HTTPS requests to go out as HTTP. (See Issue #356) - self._protocol = 'https' - - def connect(self): - conn = self._new_conn() - self._prepare_conn(conn) - self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file) - - -class VerifiedHTTPSConnection(HTTPSConnection): - """ - Based on httplib.HTTPSConnection but wraps the socket with - SSL certification. - """ - cert_reqs = None - ca_certs = None - ca_cert_dir = None - ssl_version = None - assert_fingerprint = None - - def set_cert(self, key_file=None, cert_file=None, - cert_reqs=None, ca_certs=None, - assert_hostname=None, assert_fingerprint=None, - ca_cert_dir=None): - - if (ca_certs or ca_cert_dir) and cert_reqs is None: - cert_reqs = 'CERT_REQUIRED' - - self.key_file = key_file - self.cert_file = cert_file - self.cert_reqs = cert_reqs - self.ca_certs = ca_certs - self.ca_cert_dir = ca_cert_dir - self.assert_hostname = assert_hostname - self.assert_fingerprint = assert_fingerprint - - def connect(self): - # Add certificate verification - conn = self._new_conn() - - resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) - resolved_ssl_version = resolve_ssl_version(self.ssl_version) - - hostname = self.host - if getattr(self, '_tunnel_host', None): - # _tunnel_host was added in Python 2.6.3 - # (See: http://hg.python.org/cpython/rev/0f57b30a152f) - - self.sock = conn - # Calls self._set_hostport(), so self.host is - # self._tunnel_host below. - self._tunnel() - # Mark this connection as not reusable - self.auto_open = 0 - - # Override the host with the one we're requesting data from. - hostname = self._tunnel_host - - is_time_off = datetime.date.today() < RECENT_DATE - if is_time_off: - warnings.warn(( - 'System time is way off (before {0}). This will probably ' - 'lead to SSL verification errors').format(RECENT_DATE), - SystemTimeWarning - ) - - # Wrap socket using verification with the root certs in - # trusted_root_certs - self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, - cert_reqs=resolved_cert_reqs, - ca_certs=self.ca_certs, - ca_cert_dir=self.ca_cert_dir, - server_hostname=hostname, - ssl_version=resolved_ssl_version) - - if self.assert_fingerprint: - assert_fingerprint(self.sock.getpeercert(binary_form=True), - self.assert_fingerprint) - elif resolved_cert_reqs != ssl.CERT_NONE \ - and self.assert_hostname is not False: - cert = self.sock.getpeercert() - if not cert.get('subjectAltName', ()): - warnings.warn(( - 'Certificate for {0} has no `subjectAltName`, falling back to check for a ' - '`commonName` for now. This feature is being removed by major browsers and ' - 'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 ' - 'for details.)'.format(hostname)), - SubjectAltNameWarning - ) - match_hostname(cert, self.assert_hostname or hostname) - - self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED - or self.assert_fingerprint is not None) - - -if ssl: - # Make a copy for testing. - UnverifiedHTTPSConnection = HTTPSConnection - HTTPSConnection = VerifiedHTTPSConnection -else: - HTTPSConnection = DummyConnection diff --git a/lib/requests/packages/urllib3/contrib/appengine.py b/lib/requests/packages/urllib3/contrib/appengine.py deleted file mode 100755 index ed9d8b81..00000000 --- a/lib/requests/packages/urllib3/contrib/appengine.py +++ /dev/null @@ -1,222 +0,0 @@ -import logging -import os -import warnings - -from ..exceptions import ( - HTTPError, - HTTPWarning, - MaxRetryError, - ProtocolError, - TimeoutError, - SSLError -) - -from ..packages.six import BytesIO -from ..request import RequestMethods -from ..response import HTTPResponse -from ..util.timeout import Timeout -from ..util.retry import Retry - -try: - from google.appengine.api import urlfetch -except ImportError: - urlfetch = None - - -log = logging.getLogger(__name__) - - -class AppEnginePlatformWarning(HTTPWarning): - pass - - -class AppEnginePlatformError(HTTPError): - pass - - -class AppEngineManager(RequestMethods): - """ - Connection manager for Google App Engine sandbox applications. - - This manager uses the URLFetch service directly instead of using the - emulated httplib, and is subject to URLFetch limitations as described in - the App Engine documentation here: - - https://cloud.google.com/appengine/docs/python/urlfetch - - Notably it will raise an AppEnginePlatformError if: - * URLFetch is not available. - * If you attempt to use this on GAEv2 (Managed VMs), as full socket - support is available. - * If a request size is more than 10 megabytes. - * If a response size is more than 32 megabtyes. - * If you use an unsupported request method such as OPTIONS. - - Beyond those cases, it will raise normal urllib3 errors. - """ - - def __init__(self, headers=None, retries=None, validate_certificate=True): - if not urlfetch: - raise AppEnginePlatformError( - "URLFetch is not available in this environment.") - - if is_prod_appengine_v2(): - raise AppEnginePlatformError( - "Use normal urllib3.PoolManager instead of AppEngineManager" - "on Managed VMs, as using URLFetch is not necessary in " - "this environment.") - - warnings.warn( - "urllib3 is using URLFetch on Google App Engine sandbox instead " - "of sockets. To use sockets directly instead of URLFetch see " - "https://urllib3.readthedocs.org/en/latest/contrib.html.", - AppEnginePlatformWarning) - - RequestMethods.__init__(self, headers) - self.validate_certificate = validate_certificate - - self.retries = retries or Retry.DEFAULT - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - # Return False to re-raise any potential exceptions - return False - - def urlopen(self, method, url, body=None, headers=None, - retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT, - **response_kw): - - retries = self._get_retries(retries, redirect) - - try: - response = urlfetch.fetch( - url, - payload=body, - method=method, - headers=headers or {}, - allow_truncated=False, - follow_redirects=( - redirect and - retries.redirect != 0 and - retries.total), - deadline=self._get_absolute_timeout(timeout), - validate_certificate=self.validate_certificate, - ) - except urlfetch.DeadlineExceededError as e: - raise TimeoutError(self, e) - - except urlfetch.InvalidURLError as e: - if 'too large' in e.message: - raise AppEnginePlatformError( - "URLFetch request too large, URLFetch only " - "supports requests up to 10mb in size.", e) - raise ProtocolError(e) - - except urlfetch.DownloadError as e: - if 'Too many redirects' in e.message: - raise MaxRetryError(self, url, reason=e) - raise ProtocolError(e) - - except urlfetch.ResponseTooLargeError as e: - raise AppEnginePlatformError( - "URLFetch response too large, URLFetch only supports" - "responses up to 32mb in size.", e) - - except urlfetch.SSLCertificateError as e: - raise SSLError(e) - - except urlfetch.InvalidMethodError as e: - raise AppEnginePlatformError( - "URLFetch does not support method: %s" % method, e) - - http_response = self._urlfetch_response_to_http_response( - response, **response_kw) - - # Check for redirect response - if (http_response.get_redirect_location() and - retries.raise_on_redirect and redirect): - raise MaxRetryError(self, url, "too many redirects") - - # Check if we should retry the HTTP response. - if retries.is_forced_retry(method, status_code=http_response.status): - retries = retries.increment( - method, url, response=http_response, _pool=self) - log.info("Forced retry: %s" % url) - retries.sleep() - return self.urlopen( - method, url, - body=body, headers=headers, - retries=retries, redirect=redirect, - timeout=timeout, **response_kw) - - return http_response - - def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw): - - if is_prod_appengine_v1(): - # Production GAE handles deflate encoding automatically, but does - # not remove the encoding header. - content_encoding = urlfetch_resp.headers.get('content-encoding') - - if content_encoding == 'deflate': - del urlfetch_resp.headers['content-encoding'] - - return HTTPResponse( - # In order for decoding to work, we must present the content as - # a file-like object. - body=BytesIO(urlfetch_resp.content), - headers=urlfetch_resp.headers, - status=urlfetch_resp.status_code, - **response_kw - ) - - def _get_absolute_timeout(self, timeout): - if timeout is Timeout.DEFAULT_TIMEOUT: - return 5 # 5s is the default timeout for URLFetch. - if isinstance(timeout, Timeout): - if not timeout.read is timeout.connect: - warnings.warn( - "URLFetch does not support granular timeout settings, " - "reverting to total timeout.", AppEnginePlatformWarning) - return timeout.total - return timeout - - def _get_retries(self, retries, redirect): - if not isinstance(retries, Retry): - retries = Retry.from_int( - retries, redirect=redirect, default=self.retries) - - if retries.connect or retries.read or retries.redirect: - warnings.warn( - "URLFetch only supports total retries and does not " - "recognize connect, read, or redirect retry parameters.", - AppEnginePlatformWarning) - - return retries - - -def is_appengine(): - return (is_local_appengine() or - is_prod_appengine_v1() or - is_prod_appengine_v2()) - - -def is_appengine_sandbox(): - return is_appengine() and not is_prod_appengine_v2() - - -def is_local_appengine(): - return ('APPENGINE_RUNTIME' in os.environ and - 'Development/' in os.environ['SERVER_SOFTWARE']) - - -def is_prod_appengine_v1(): - return ('APPENGINE_RUNTIME' in os.environ and - 'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and - not is_prod_appengine_v2()) - - -def is_prod_appengine_v2(): - return os.environ.get('GAE_VM', False) == 'true' diff --git a/lib/requests/packages/urllib3/contrib/ntlmpool.py b/lib/requests/packages/urllib3/contrib/ntlmpool.py deleted file mode 100755 index c6b266f5..00000000 --- a/lib/requests/packages/urllib3/contrib/ntlmpool.py +++ /dev/null @@ -1,114 +0,0 @@ -""" -NTLM authenticating pool, contributed by erikcederstran - -Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 -""" - -try: - from http.client import HTTPSConnection -except ImportError: - from httplib import HTTPSConnection -from logging import getLogger -from ntlm import ntlm - -from urllib3 import HTTPSConnectionPool - - -log = getLogger(__name__) - - -class NTLMConnectionPool(HTTPSConnectionPool): - """ - Implements an NTLM authentication version of an urllib3 connection pool - """ - - scheme = 'https' - - def __init__(self, user, pw, authurl, *args, **kwargs): - """ - authurl is a random URL on the server that is protected by NTLM. - user is the Windows user, probably in the DOMAIN\\username format. - pw is the password for the user. - """ - super(NTLMConnectionPool, self).__init__(*args, **kwargs) - self.authurl = authurl - self.rawuser = user - user_parts = user.split('\\', 1) - self.domain = user_parts[0].upper() - self.user = user_parts[1] - self.pw = pw - - def _new_conn(self): - # Performs the NTLM handshake that secures the connection. The socket - # must be kept open while requests are performed. - self.num_connections += 1 - log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % - (self.num_connections, self.host, self.authurl)) - - headers = {} - headers['Connection'] = 'Keep-Alive' - req_header = 'Authorization' - resp_header = 'www-authenticate' - - conn = HTTPSConnection(host=self.host, port=self.port) - - # Send negotiation message - headers[req_header] = ( - 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) - log.debug('Request headers: %s' % headers) - conn.request('GET', self.authurl, None, headers) - res = conn.getresponse() - reshdr = dict(res.getheaders()) - log.debug('Response status: %s %s' % (res.status, res.reason)) - log.debug('Response headers: %s' % reshdr) - log.debug('Response data: %s [...]' % res.read(100)) - - # Remove the reference to the socket, so that it can not be closed by - # the response object (we want to keep the socket open) - res.fp = None - - # Server should respond with a challenge message - auth_header_values = reshdr[resp_header].split(', ') - auth_header_value = None - for s in auth_header_values: - if s[:5] == 'NTLM ': - auth_header_value = s[5:] - if auth_header_value is None: - raise Exception('Unexpected %s response header: %s' % - (resp_header, reshdr[resp_header])) - - # Send authentication message - ServerChallenge, NegotiateFlags = \ - ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) - auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, - self.user, - self.domain, - self.pw, - NegotiateFlags) - headers[req_header] = 'NTLM %s' % auth_msg - log.debug('Request headers: %s' % headers) - conn.request('GET', self.authurl, None, headers) - res = conn.getresponse() - log.debug('Response status: %s %s' % (res.status, res.reason)) - log.debug('Response headers: %s' % dict(res.getheaders())) - log.debug('Response data: %s [...]' % res.read()[:100]) - if res.status != 200: - if res.status == 401: - raise Exception('Server rejected request: wrong ' - 'username or password') - raise Exception('Wrong server response: %s %s' % - (res.status, res.reason)) - - res.fp = None - log.debug('Connection established') - return conn - - def urlopen(self, method, url, body=None, headers=None, retries=3, - redirect=True, assert_same_host=True): - if headers is None: - headers = {} - headers['Connection'] = 'Keep-Alive' - return super(NTLMConnectionPool, self).urlopen(method, url, body, - headers, retries, - redirect, - assert_same_host) diff --git a/lib/requests/packages/urllib3/contrib/pyopenssl.py b/lib/requests/packages/urllib3/contrib/pyopenssl.py deleted file mode 100755 index c20ae46d..00000000 --- a/lib/requests/packages/urllib3/contrib/pyopenssl.py +++ /dev/null @@ -1,309 +0,0 @@ -'''SSL with SNI_-support for Python 2. Follow these instructions if you would -like to verify SSL certificates in Python 2. Note, the default libraries do -*not* do certificate checking; you need to do additional work to validate -certificates yourself. - -This needs the following packages installed: - -* pyOpenSSL (tested with 0.13) -* ndg-httpsclient (tested with 0.3.2) -* pyasn1 (tested with 0.1.6) - -You can install them with the following command: - - pip install pyopenssl ndg-httpsclient pyasn1 - -To activate certificate checking, call -:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code -before you begin making HTTP requests. This can be done in a ``sitecustomize`` -module, or at any other time before your application begins using ``urllib3``, -like this:: - - try: - import urllib3.contrib.pyopenssl - urllib3.contrib.pyopenssl.inject_into_urllib3() - except ImportError: - pass - -Now you can use :mod:`urllib3` as you normally would, and it will support SNI -when the required modules are installed. - -Activating this module also has the positive side effect of disabling SSL/TLS -compression in Python 2 (see `CRIME attack`_). - -If you want to configure the default list of supported cipher suites, you can -set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable. - -Module Variables ----------------- - -:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites. - -.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication -.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit) - -''' - -try: - from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT - from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName -except SyntaxError as e: - raise ImportError(e) - -import OpenSSL.SSL -from pyasn1.codec.der import decoder as der_decoder -from pyasn1.type import univ, constraint -from socket import _fileobject, timeout -import ssl -import select - -from .. import connection -from .. import util - -__all__ = ['inject_into_urllib3', 'extract_from_urllib3'] - -# SNI only *really* works if we can read the subjectAltName of certificates. -HAS_SNI = SUBJ_ALT_NAME_SUPPORT - -# Map from urllib3 to PyOpenSSL compatible parameter-values. -_openssl_versions = { - ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD, - ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD, -} - -try: - _openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD}) -except AttributeError: - pass - -_openssl_verify = { - ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE, - ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER, - ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER - + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT, -} - -DEFAULT_SSL_CIPHER_LIST = util.ssl_.DEFAULT_CIPHERS - -# OpenSSL will only write 16K at a time -SSL_WRITE_BLOCKSIZE = 16384 - -try: - _ = memoryview - has_memoryview = True -except NameError: - has_memoryview = False - -orig_util_HAS_SNI = util.HAS_SNI -orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket - - -def inject_into_urllib3(): - 'Monkey-patch urllib3 with PyOpenSSL-backed SSL-support.' - - connection.ssl_wrap_socket = ssl_wrap_socket - util.HAS_SNI = HAS_SNI - - -def extract_from_urllib3(): - 'Undo monkey-patching by :func:`inject_into_urllib3`.' - - connection.ssl_wrap_socket = orig_connection_ssl_wrap_socket - util.HAS_SNI = orig_util_HAS_SNI - - -### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. -class SubjectAltName(BaseSubjectAltName): - '''ASN.1 implementation for subjectAltNames support''' - - # There is no limit to how many SAN certificates a certificate may have, - # however this needs to have some limit so we'll set an arbitrarily high - # limit. - sizeSpec = univ.SequenceOf.sizeSpec + \ - constraint.ValueSizeConstraint(1, 1024) - - -### Note: This is a slightly bug-fixed version of same from ndg-httpsclient. -def get_subj_alt_name(peer_cert): - # Search through extensions - dns_name = [] - if not SUBJ_ALT_NAME_SUPPORT: - return dns_name - - general_names = SubjectAltName() - for i in range(peer_cert.get_extension_count()): - ext = peer_cert.get_extension(i) - ext_name = ext.get_short_name() - if ext_name != 'subjectAltName': - continue - - # PyOpenSSL returns extension data in ASN.1 encoded form - ext_dat = ext.get_data() - decoded_dat = der_decoder.decode(ext_dat, - asn1Spec=general_names) - - for name in decoded_dat: - if not isinstance(name, SubjectAltName): - continue - for entry in range(len(name)): - component = name.getComponentByPosition(entry) - if component.getName() != 'dNSName': - continue - dns_name.append(str(component.getComponent())) - - return dns_name - - -class WrappedSocket(object): - '''API-compatibility wrapper for Python OpenSSL's Connection-class. - - Note: _makefile_refs, _drop() and _reuse() are needed for the garbage - collector of pypy. - ''' - - def __init__(self, connection, socket, suppress_ragged_eofs=True): - self.connection = connection - self.socket = socket - self.suppress_ragged_eofs = suppress_ragged_eofs - self._makefile_refs = 0 - - def fileno(self): - return self.socket.fileno() - - def makefile(self, mode, bufsize=-1): - self._makefile_refs += 1 - return _fileobject(self, mode, bufsize, close=True) - - def recv(self, *args, **kwargs): - try: - data = self.connection.recv(*args, **kwargs) - except OpenSSL.SSL.SysCallError as e: - if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'): - return b'' - else: - raise - except OpenSSL.SSL.ZeroReturnError as e: - if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN: - return b'' - else: - raise - except OpenSSL.SSL.WantReadError: - rd, wd, ed = select.select( - [self.socket], [], [], self.socket.gettimeout()) - if not rd: - raise timeout('The read operation timed out') - else: - return self.recv(*args, **kwargs) - else: - return data - - def settimeout(self, timeout): - return self.socket.settimeout(timeout) - - def _send_until_done(self, data): - while True: - try: - return self.connection.send(data) - except OpenSSL.SSL.WantWriteError: - _, wlist, _ = select.select([], [self.socket], [], - self.socket.gettimeout()) - if not wlist: - raise timeout() - continue - - def sendall(self, data): - if has_memoryview and not isinstance(data, memoryview): - data = memoryview(data) - - total_sent = 0 - while total_sent < len(data): - sent = self._send_until_done(data[total_sent:total_sent+SSL_WRITE_BLOCKSIZE]) - total_sent += sent - - def shutdown(self): - # FIXME rethrow compatible exceptions should we ever use this - self.connection.shutdown() - - def close(self): - if self._makefile_refs < 1: - return self.connection.close() - else: - self._makefile_refs -= 1 - - def getpeercert(self, binary_form=False): - x509 = self.connection.get_peer_certificate() - - if not x509: - return x509 - - if binary_form: - return OpenSSL.crypto.dump_certificate( - OpenSSL.crypto.FILETYPE_ASN1, - x509) - - return { - 'subject': ( - (('commonName', x509.get_subject().CN),), - ), - 'subjectAltName': [ - ('DNS', value) - for value in get_subj_alt_name(x509) - ] - } - - def _reuse(self): - self._makefile_refs += 1 - - def _drop(self): - if self._makefile_refs < 1: - self.close() - else: - self._makefile_refs -= 1 - - -def _verify_callback(cnx, x509, err_no, err_depth, return_code): - return err_no == 0 - - -def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None, ca_cert_dir=None): - ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version]) - if certfile: - keyfile = keyfile or certfile # Match behaviour of the normal python ssl library - ctx.use_certificate_file(certfile) - if keyfile: - ctx.use_privatekey_file(keyfile) - if cert_reqs != ssl.CERT_NONE: - ctx.set_verify(_openssl_verify[cert_reqs], _verify_callback) - if ca_certs or ca_cert_dir: - try: - ctx.load_verify_locations(ca_certs, ca_cert_dir) - except OpenSSL.SSL.Error as e: - raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e) - else: - ctx.set_default_verify_paths() - - # Disable TLS compression to migitate CRIME attack (issue #309) - OP_NO_COMPRESSION = 0x20000 - ctx.set_options(OP_NO_COMPRESSION) - - # Set list of supported ciphersuites. - ctx.set_cipher_list(DEFAULT_SSL_CIPHER_LIST) - - cnx = OpenSSL.SSL.Connection(ctx, sock) - cnx.set_tlsext_host_name(server_hostname) - cnx.set_connect_state() - while True: - try: - cnx.do_handshake() - except OpenSSL.SSL.WantReadError: - rd, _, _ = select.select([sock], [], [], sock.gettimeout()) - if not rd: - raise timeout('select timed out') - continue - except OpenSSL.SSL.Error as e: - raise ssl.SSLError('bad handshake: %r' % e) - break - - return WrappedSocket(cnx, sock) diff --git a/lib/requests/packages/urllib3/exceptions.py b/lib/requests/packages/urllib3/exceptions.py deleted file mode 100755 index 9607d65f..00000000 --- a/lib/requests/packages/urllib3/exceptions.py +++ /dev/null @@ -1,193 +0,0 @@ - -## Base Exceptions - -class HTTPError(Exception): - "Base exception used by this module." - pass - -class HTTPWarning(Warning): - "Base warning used by this module." - pass - - - -class PoolError(HTTPError): - "Base exception for errors caused within a pool." - def __init__(self, pool, message): - self.pool = pool - HTTPError.__init__(self, "%s: %s" % (pool, message)) - - def __reduce__(self): - # For pickling purposes. - return self.__class__, (None, None) - - -class RequestError(PoolError): - "Base exception for PoolErrors that have associated URLs." - def __init__(self, pool, url, message): - self.url = url - PoolError.__init__(self, pool, message) - - def __reduce__(self): - # For pickling purposes. - return self.__class__, (None, self.url, None) - - -class SSLError(HTTPError): - "Raised when SSL certificate fails in an HTTPS connection." - pass - - -class ProxyError(HTTPError): - "Raised when the connection to a proxy fails." - pass - - -class DecodeError(HTTPError): - "Raised when automatic decoding based on Content-Type fails." - pass - - -class ProtocolError(HTTPError): - "Raised when something unexpected happens mid-request/response." - pass - - -#: Renamed to ProtocolError but aliased for backwards compatibility. -ConnectionError = ProtocolError - - -## Leaf Exceptions - -class MaxRetryError(RequestError): - """Raised when the maximum number of retries is exceeded. - - :param pool: The connection pool - :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` - :param string url: The requested Url - :param exceptions.Exception reason: The underlying error - - """ - - def __init__(self, pool, url, reason=None): - self.reason = reason - - message = "Max retries exceeded with url: %s (Caused by %r)" % ( - url, reason) - - RequestError.__init__(self, pool, url, message) - - -class HostChangedError(RequestError): - "Raised when an existing pool gets a request for a foreign host." - - def __init__(self, pool, url, retries=3): - message = "Tried to open a foreign host with url: %s" % url - RequestError.__init__(self, pool, url, message) - self.retries = retries - - -class TimeoutStateError(HTTPError): - """ Raised when passing an invalid state to a timeout """ - pass - - -class TimeoutError(HTTPError): - """ Raised when a socket timeout error occurs. - - Catching this error will catch both :exc:`ReadTimeoutErrors - ` and :exc:`ConnectTimeoutErrors `. - """ - pass - - -class ReadTimeoutError(TimeoutError, RequestError): - "Raised when a socket timeout occurs while receiving data from a server" - pass - - -# This timeout error does not have a URL attached and needs to inherit from the -# base HTTPError -class ConnectTimeoutError(TimeoutError): - "Raised when a socket timeout occurs while connecting to a server" - pass - -class NewConnectionError(ConnectTimeoutError, PoolError): - "Raised when we fail to establish a new connection. Usually ECONNREFUSED." - pass - -class EmptyPoolError(PoolError): - "Raised when a pool runs out of connections and no more are allowed." - pass - - -class ClosedPoolError(PoolError): - "Raised when a request enters a pool after the pool has been closed." - pass - - -class LocationValueError(ValueError, HTTPError): - "Raised when there is something wrong with a given URL input." - pass - - -class LocationParseError(LocationValueError): - "Raised when get_host or similar fails to parse the URL input." - - def __init__(self, location): - message = "Failed to parse: %s" % location - HTTPError.__init__(self, message) - - self.location = location - - -class ResponseError(HTTPError): - "Used as a container for an error reason supplied in a MaxRetryError." - GENERIC_ERROR = 'too many error responses' - SPECIFIC_ERROR = 'too many {status_code} error responses' - - -class SecurityWarning(HTTPWarning): - "Warned when perfoming security reducing actions" - pass - - -class SubjectAltNameWarning(SecurityWarning): - "Warned when connecting to a host with a certificate missing a SAN." - pass - - -class InsecureRequestWarning(SecurityWarning): - "Warned when making an unverified HTTPS request." - pass - - -class SystemTimeWarning(SecurityWarning): - "Warned when system time is suspected to be wrong" - pass - - -class InsecurePlatformWarning(SecurityWarning): - "Warned when certain SSL configuration is not available on a platform." - pass - - -class ResponseNotChunked(ProtocolError, ValueError): - "Response needs to be chunked in order to read it as chunks." - pass - - -class ProxySchemeUnknown(AssertionError, ValueError): - "ProxyManager does not support the supplied scheme" - # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. - - def __init__(self, scheme): - message = "Not supported proxy scheme %s" % scheme - super(ProxySchemeUnknown, self).__init__(message) - - -class HeaderParsingError(HTTPError): - "Raised by assert_header_parsing, but we convert it to a log.warning statement." - def __init__(self, defects, unparsed_data): - message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data) - super(HeaderParsingError, self).__init__(message) diff --git a/lib/requests/packages/urllib3/fields.py b/lib/requests/packages/urllib3/fields.py deleted file mode 100755 index c853f8d5..00000000 --- a/lib/requests/packages/urllib3/fields.py +++ /dev/null @@ -1,177 +0,0 @@ -import email.utils -import mimetypes - -from .packages import six - - -def guess_content_type(filename, default='application/octet-stream'): - """ - Guess the "Content-Type" of a file. - - :param filename: - The filename to guess the "Content-Type" of using :mod:`mimetypes`. - :param default: - If no "Content-Type" can be guessed, default to `default`. - """ - if filename: - return mimetypes.guess_type(filename)[0] or default - return default - - -def format_header_param(name, value): - """ - Helper function to format and quote a single header parameter. - - Particularly useful for header parameters which might contain - non-ASCII values, like file names. This follows RFC 2231, as - suggested by RFC 2388 Section 4.4. - - :param name: - The name of the parameter, a string expected to be ASCII only. - :param value: - The value of the parameter, provided as a unicode string. - """ - if not any(ch in value for ch in '"\\\r\n'): - result = '%s="%s"' % (name, value) - try: - result.encode('ascii') - except UnicodeEncodeError: - pass - else: - return result - if not six.PY3: # Python 2: - value = value.encode('utf-8') - value = email.utils.encode_rfc2231(value, 'utf-8') - value = '%s*=%s' % (name, value) - return value - - -class RequestField(object): - """ - A data container for request body parameters. - - :param name: - The name of this request field. - :param data: - The data/value body. - :param filename: - An optional filename of the request field. - :param headers: - An optional dict-like object of headers to initially use for the field. - """ - def __init__(self, name, data, filename=None, headers=None): - self._name = name - self._filename = filename - self.data = data - self.headers = {} - if headers: - self.headers = dict(headers) - - @classmethod - def from_tuples(cls, fieldname, value): - """ - A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. - - Supports constructing :class:`~urllib3.fields.RequestField` from - parameter of key/value strings AND key/filetuple. A filetuple is a - (filename, data, MIME type) tuple where the MIME type is optional. - For example:: - - 'foo': 'bar', - 'fakefile': ('foofile.txt', 'contents of foofile'), - 'realfile': ('barfile.txt', open('realfile').read()), - 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), - 'nonamefile': 'contents of nonamefile field', - - Field names and filenames must be unicode. - """ - if isinstance(value, tuple): - if len(value) == 3: - filename, data, content_type = value - else: - filename, data = value - content_type = guess_content_type(filename) - else: - filename = None - content_type = None - data = value - - request_param = cls(fieldname, data, filename=filename) - request_param.make_multipart(content_type=content_type) - - return request_param - - def _render_part(self, name, value): - """ - Overridable helper function to format a single header parameter. - - :param name: - The name of the parameter, a string expected to be ASCII only. - :param value: - The value of the parameter, provided as a unicode string. - """ - return format_header_param(name, value) - - def _render_parts(self, header_parts): - """ - Helper function to format and quote a single header. - - Useful for single headers that are composed of multiple items. E.g., - 'Content-Disposition' fields. - - :param header_parts: - A sequence of (k, v) typles or a :class:`dict` of (k, v) to format - as `k1="v1"; k2="v2"; ...`. - """ - parts = [] - iterable = header_parts - if isinstance(header_parts, dict): - iterable = header_parts.items() - - for name, value in iterable: - if value: - parts.append(self._render_part(name, value)) - - return '; '.join(parts) - - def render_headers(self): - """ - Renders the headers for this request field. - """ - lines = [] - - sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] - for sort_key in sort_keys: - if self.headers.get(sort_key, False): - lines.append('%s: %s' % (sort_key, self.headers[sort_key])) - - for header_name, header_value in self.headers.items(): - if header_name not in sort_keys: - if header_value: - lines.append('%s: %s' % (header_name, header_value)) - - lines.append('\r\n') - return '\r\n'.join(lines) - - def make_multipart(self, content_disposition=None, content_type=None, - content_location=None): - """ - Makes this request field into a multipart request field. - - This method overrides "Content-Disposition", "Content-Type" and - "Content-Location" headers to the request parameter. - - :param content_type: - The 'Content-Type' of the request body. - :param content_location: - The 'Content-Location' of the request body. - - """ - self.headers['Content-Disposition'] = content_disposition or 'form-data' - self.headers['Content-Disposition'] += '; '.join([ - '', self._render_parts( - (('name', self._name), ('filename', self._filename)) - ) - ]) - self.headers['Content-Type'] = content_type - self.headers['Content-Location'] = content_location diff --git a/lib/requests/packages/urllib3/packages/__init__.py b/lib/requests/packages/urllib3/packages/__init__.py deleted file mode 100755 index 37e83515..00000000 --- a/lib/requests/packages/urllib3/packages/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from __future__ import absolute_import - -from . import ssl_match_hostname - diff --git a/lib/requests/packages/urllib3/packages/ordered_dict.py b/lib/requests/packages/urllib3/packages/ordered_dict.py deleted file mode 100755 index 4479363c..00000000 --- a/lib/requests/packages/urllib3/packages/ordered_dict.py +++ /dev/null @@ -1,259 +0,0 @@ -# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. -# Passes Python2.7's test suite and incorporates all the latest updates. -# Copyright 2009 Raymond Hettinger, released under the MIT License. -# http://code.activestate.com/recipes/576693/ -try: - from thread import get_ident as _get_ident -except ImportError: - from dummy_thread import get_ident as _get_ident - -try: - from _abcoll import KeysView, ValuesView, ItemsView -except ImportError: - pass - - -class OrderedDict(dict): - 'Dictionary that remembers insertion order' - # An inherited dict maps keys to values. - # The inherited dict provides __getitem__, __len__, __contains__, and get. - # The remaining methods are order-aware. - # Big-O running times for all methods are the same as for regular dictionaries. - - # The internal self.__map dictionary maps keys to links in a doubly linked list. - # The circular doubly linked list starts and ends with a sentinel element. - # The sentinel element never gets deleted (this simplifies the algorithm). - # Each link is stored as a list of length three: [PREV, NEXT, KEY]. - - def __init__(self, *args, **kwds): - '''Initialize an ordered dictionary. Signature is the same as for - regular dictionaries, but keyword arguments are not recommended - because their insertion order is arbitrary. - - ''' - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - try: - self.__root - except AttributeError: - self.__root = root = [] # sentinel node - root[:] = [root, root, None] - self.__map = {} - self.__update(*args, **kwds) - - def __setitem__(self, key, value, dict_setitem=dict.__setitem__): - 'od.__setitem__(i, y) <==> od[i]=y' - # Setting a new item creates a new link which goes at the end of the linked - # list, and the inherited dictionary is updated with the new key/value pair. - if key not in self: - root = self.__root - last = root[0] - last[1] = root[0] = self.__map[key] = [last, root, key] - dict_setitem(self, key, value) - - def __delitem__(self, key, dict_delitem=dict.__delitem__): - 'od.__delitem__(y) <==> del od[y]' - # Deleting an existing item uses self.__map to find the link which is - # then removed by updating the links in the predecessor and successor nodes. - dict_delitem(self, key) - link_prev, link_next, key = self.__map.pop(key) - link_prev[1] = link_next - link_next[0] = link_prev - - def __iter__(self): - 'od.__iter__() <==> iter(od)' - root = self.__root - curr = root[1] - while curr is not root: - yield curr[2] - curr = curr[1] - - def __reversed__(self): - 'od.__reversed__() <==> reversed(od)' - root = self.__root - curr = root[0] - while curr is not root: - yield curr[2] - curr = curr[0] - - def clear(self): - 'od.clear() -> None. Remove all items from od.' - try: - for node in self.__map.itervalues(): - del node[:] - root = self.__root - root[:] = [root, root, None] - self.__map.clear() - except AttributeError: - pass - dict.clear(self) - - def popitem(self, last=True): - '''od.popitem() -> (k, v), return and remove a (key, value) pair. - Pairs are returned in LIFO order if last is true or FIFO order if false. - - ''' - if not self: - raise KeyError('dictionary is empty') - root = self.__root - if last: - link = root[0] - link_prev = link[0] - link_prev[1] = root - root[0] = link_prev - else: - link = root[1] - link_next = link[1] - root[1] = link_next - link_next[0] = root - key = link[2] - del self.__map[key] - value = dict.pop(self, key) - return key, value - - # -- the following methods do not depend on the internal structure -- - - def keys(self): - 'od.keys() -> list of keys in od' - return list(self) - - def values(self): - 'od.values() -> list of values in od' - return [self[key] for key in self] - - def items(self): - 'od.items() -> list of (key, value) pairs in od' - return [(key, self[key]) for key in self] - - def iterkeys(self): - 'od.iterkeys() -> an iterator over the keys in od' - return iter(self) - - def itervalues(self): - 'od.itervalues -> an iterator over the values in od' - for k in self: - yield self[k] - - def iteritems(self): - 'od.iteritems -> an iterator over the (key, value) items in od' - for k in self: - yield (k, self[k]) - - def update(*args, **kwds): - '''od.update(E, **F) -> None. Update od from dict/iterable E and F. - - If E is a dict instance, does: for k in E: od[k] = E[k] - If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] - Or if E is an iterable of items, does: for k, v in E: od[k] = v - In either case, this is followed by: for k, v in F.items(): od[k] = v - - ''' - if len(args) > 2: - raise TypeError('update() takes at most 2 positional ' - 'arguments (%d given)' % (len(args),)) - elif not args: - raise TypeError('update() takes at least 1 argument (0 given)') - self = args[0] - # Make progressively weaker assumptions about "other" - other = () - if len(args) == 2: - other = args[1] - if isinstance(other, dict): - for key in other: - self[key] = other[key] - elif hasattr(other, 'keys'): - for key in other.keys(): - self[key] = other[key] - else: - for key, value in other: - self[key] = value - for key, value in kwds.items(): - self[key] = value - - __update = update # let subclasses override update without breaking __init__ - - __marker = object() - - def pop(self, key, default=__marker): - '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised. - - ''' - if key in self: - result = self[key] - del self[key] - return result - if default is self.__marker: - raise KeyError(key) - return default - - def setdefault(self, key, default=None): - 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' - if key in self: - return self[key] - self[key] = default - return default - - def __repr__(self, _repr_running={}): - 'od.__repr__() <==> repr(od)' - call_key = id(self), _get_ident() - if call_key in _repr_running: - return '...' - _repr_running[call_key] = 1 - try: - if not self: - return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, self.items()) - finally: - del _repr_running[call_key] - - def __reduce__(self): - 'Return state information for pickling' - items = [[k, self[k]] for k in self] - inst_dict = vars(self).copy() - for k in vars(OrderedDict()): - inst_dict.pop(k, None) - if inst_dict: - return (self.__class__, (items,), inst_dict) - return self.__class__, (items,) - - def copy(self): - 'od.copy() -> a shallow copy of od' - return self.__class__(self) - - @classmethod - def fromkeys(cls, iterable, value=None): - '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S - and values equal to v (which defaults to None). - - ''' - d = cls() - for key in iterable: - d[key] = value - return d - - def __eq__(self, other): - '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive - while comparison to a regular mapping is order-insensitive. - - ''' - if isinstance(other, OrderedDict): - return len(self)==len(other) and self.items() == other.items() - return dict.__eq__(self, other) - - def __ne__(self, other): - return not self == other - - # -- the following methods are only used in Python 2.7 -- - - def viewkeys(self): - "od.viewkeys() -> a set-like object providing a view on od's keys" - return KeysView(self) - - def viewvalues(self): - "od.viewvalues() -> an object providing a view on od's values" - return ValuesView(self) - - def viewitems(self): - "od.viewitems() -> a set-like object providing a view on od's items" - return ItemsView(self) diff --git a/lib/requests/packages/urllib3/packages/six.py b/lib/requests/packages/urllib3/packages/six.py deleted file mode 100755 index 27d80112..00000000 --- a/lib/requests/packages/urllib3/packages/six.py +++ /dev/null @@ -1,385 +0,0 @@ -"""Utilities for writing code that runs on Python 2 and 3""" - -#Copyright (c) 2010-2011 Benjamin Peterson - -#Permission is hereby granted, free of charge, to any person obtaining a copy of -#this software and associated documentation files (the "Software"), to deal in -#the Software without restriction, including without limitation the rights to -#use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -#the Software, and to permit persons to whom the Software is furnished to do so, -#subject to the following conditions: - -#The above copyright notice and this permission notice shall be included in all -#copies or substantial portions of the Software. - -#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -#FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -#COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -#IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -#CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -import operator -import sys -import types - -__author__ = "Benjamin Peterson " -__version__ = "1.2.0" # Revision 41c74fef2ded - - -# True if we are running on Python 3. -PY3 = sys.version_info[0] == 3 - -if PY3: - string_types = str, - integer_types = int, - class_types = type, - text_type = str - binary_type = bytes - - MAXSIZE = sys.maxsize -else: - string_types = basestring, - integer_types = (int, long) - class_types = (type, types.ClassType) - text_type = unicode - binary_type = str - - if sys.platform.startswith("java"): - # Jython always uses 32 bits. - MAXSIZE = int((1 << 31) - 1) - else: - # It's possible to have sizeof(long) != sizeof(Py_ssize_t). - class X(object): - def __len__(self): - return 1 << 31 - try: - len(X()) - except OverflowError: - # 32-bit - MAXSIZE = int((1 << 31) - 1) - else: - # 64-bit - MAXSIZE = int((1 << 63) - 1) - del X - - -def _add_doc(func, doc): - """Add documentation to a function.""" - func.__doc__ = doc - - -def _import_module(name): - """Import module, returning the module after the last dot.""" - __import__(name) - return sys.modules[name] - - -class _LazyDescr(object): - - def __init__(self, name): - self.name = name - - def __get__(self, obj, tp): - result = self._resolve() - setattr(obj, self.name, result) - # This is a bit ugly, but it avoids running this again. - delattr(tp, self.name) - return result - - -class MovedModule(_LazyDescr): - - def __init__(self, name, old, new=None): - super(MovedModule, self).__init__(name) - if PY3: - if new is None: - new = name - self.mod = new - else: - self.mod = old - - def _resolve(self): - return _import_module(self.mod) - - -class MovedAttribute(_LazyDescr): - - def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None): - super(MovedAttribute, self).__init__(name) - if PY3: - if new_mod is None: - new_mod = name - self.mod = new_mod - if new_attr is None: - if old_attr is None: - new_attr = name - else: - new_attr = old_attr - self.attr = new_attr - else: - self.mod = old_mod - if old_attr is None: - old_attr = name - self.attr = old_attr - - def _resolve(self): - module = _import_module(self.mod) - return getattr(module, self.attr) - - - -class _MovedItems(types.ModuleType): - """Lazy loading of moved objects""" - - -_moved_attributes = [ - MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"), - MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"), - MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"), - MovedAttribute("map", "itertools", "builtins", "imap", "map"), - MovedAttribute("reload_module", "__builtin__", "imp", "reload"), - MovedAttribute("reduce", "__builtin__", "functools"), - MovedAttribute("StringIO", "StringIO", "io"), - MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"), - MovedAttribute("zip", "itertools", "builtins", "izip", "zip"), - - MovedModule("builtins", "__builtin__"), - MovedModule("configparser", "ConfigParser"), - MovedModule("copyreg", "copy_reg"), - MovedModule("http_cookiejar", "cookielib", "http.cookiejar"), - MovedModule("http_cookies", "Cookie", "http.cookies"), - MovedModule("html_entities", "htmlentitydefs", "html.entities"), - MovedModule("html_parser", "HTMLParser", "html.parser"), - MovedModule("http_client", "httplib", "http.client"), - MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"), - MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"), - MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"), - MovedModule("cPickle", "cPickle", "pickle"), - MovedModule("queue", "Queue"), - MovedModule("reprlib", "repr"), - MovedModule("socketserver", "SocketServer"), - MovedModule("tkinter", "Tkinter"), - MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"), - MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"), - MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"), - MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"), - MovedModule("tkinter_tix", "Tix", "tkinter.tix"), - MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"), - MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"), - MovedModule("tkinter_colorchooser", "tkColorChooser", - "tkinter.colorchooser"), - MovedModule("tkinter_commondialog", "tkCommonDialog", - "tkinter.commondialog"), - MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"), - MovedModule("tkinter_font", "tkFont", "tkinter.font"), - MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"), - MovedModule("tkinter_tksimpledialog", "tkSimpleDialog", - "tkinter.simpledialog"), - MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"), - MovedModule("winreg", "_winreg"), -] -for attr in _moved_attributes: - setattr(_MovedItems, attr.name, attr) -del attr - -moves = sys.modules[__name__ + ".moves"] = _MovedItems("moves") - - -def add_move(move): - """Add an item to six.moves.""" - setattr(_MovedItems, move.name, move) - - -def remove_move(name): - """Remove item from six.moves.""" - try: - delattr(_MovedItems, name) - except AttributeError: - try: - del moves.__dict__[name] - except KeyError: - raise AttributeError("no such move, %r" % (name,)) - - -if PY3: - _meth_func = "__func__" - _meth_self = "__self__" - - _func_code = "__code__" - _func_defaults = "__defaults__" - - _iterkeys = "keys" - _itervalues = "values" - _iteritems = "items" -else: - _meth_func = "im_func" - _meth_self = "im_self" - - _func_code = "func_code" - _func_defaults = "func_defaults" - - _iterkeys = "iterkeys" - _itervalues = "itervalues" - _iteritems = "iteritems" - - -try: - advance_iterator = next -except NameError: - def advance_iterator(it): - return it.next() -next = advance_iterator - - -if PY3: - def get_unbound_function(unbound): - return unbound - - Iterator = object - - def callable(obj): - return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) -else: - def get_unbound_function(unbound): - return unbound.im_func - - class Iterator(object): - - def next(self): - return type(self).__next__(self) - - callable = callable -_add_doc(get_unbound_function, - """Get the function out of a possibly unbound function""") - - -get_method_function = operator.attrgetter(_meth_func) -get_method_self = operator.attrgetter(_meth_self) -get_function_code = operator.attrgetter(_func_code) -get_function_defaults = operator.attrgetter(_func_defaults) - - -def iterkeys(d): - """Return an iterator over the keys of a dictionary.""" - return iter(getattr(d, _iterkeys)()) - -def itervalues(d): - """Return an iterator over the values of a dictionary.""" - return iter(getattr(d, _itervalues)()) - -def iteritems(d): - """Return an iterator over the (key, value) pairs of a dictionary.""" - return iter(getattr(d, _iteritems)()) - - -if PY3: - def b(s): - return s.encode("latin-1") - def u(s): - return s - if sys.version_info[1] <= 1: - def int2byte(i): - return bytes((i,)) - else: - # This is about 2x faster than the implementation above on 3.2+ - int2byte = operator.methodcaller("to_bytes", 1, "big") - import io - StringIO = io.StringIO - BytesIO = io.BytesIO -else: - def b(s): - return s - def u(s): - return unicode(s, "unicode_escape") - int2byte = chr - import StringIO - StringIO = BytesIO = StringIO.StringIO -_add_doc(b, """Byte literal""") -_add_doc(u, """Text literal""") - - -if PY3: - import builtins - exec_ = getattr(builtins, "exec") - - - def reraise(tp, value, tb=None): - if value.__traceback__ is not tb: - raise value.with_traceback(tb) - raise value - - - print_ = getattr(builtins, "print") - del builtins - -else: - def exec_(code, globs=None, locs=None): - """Execute code in a namespace.""" - if globs is None: - frame = sys._getframe(1) - globs = frame.f_globals - if locs is None: - locs = frame.f_locals - del frame - elif locs is None: - locs = globs - exec("""exec code in globs, locs""") - - - exec_("""def reraise(tp, value, tb=None): - raise tp, value, tb -""") - - - def print_(*args, **kwargs): - """The new-style print function.""" - fp = kwargs.pop("file", sys.stdout) - if fp is None: - return - def write(data): - if not isinstance(data, basestring): - data = str(data) - fp.write(data) - want_unicode = False - sep = kwargs.pop("sep", None) - if sep is not None: - if isinstance(sep, unicode): - want_unicode = True - elif not isinstance(sep, str): - raise TypeError("sep must be None or a string") - end = kwargs.pop("end", None) - if end is not None: - if isinstance(end, unicode): - want_unicode = True - elif not isinstance(end, str): - raise TypeError("end must be None or a string") - if kwargs: - raise TypeError("invalid keyword arguments to print()") - if not want_unicode: - for arg in args: - if isinstance(arg, unicode): - want_unicode = True - break - if want_unicode: - newline = unicode("\n") - space = unicode(" ") - else: - newline = "\n" - space = " " - if sep is None: - sep = space - if end is None: - end = newline - for i, arg in enumerate(args): - if i: - write(sep) - write(arg) - write(end) - -_add_doc(reraise, """Reraise an exception.""") - - -def with_metaclass(meta, base=object): - """Create a base class with a metaclass.""" - return meta("NewBase", (base,), {}) diff --git a/lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py b/lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py deleted file mode 100755 index dd59a75f..00000000 --- a/lib/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -try: - # Python 3.2+ - from ssl import CertificateError, match_hostname -except ImportError: - try: - # Backport of the function from a pypi module - from backports.ssl_match_hostname import CertificateError, match_hostname - except ImportError: - # Our vendored copy - from ._implementation import CertificateError, match_hostname - -# Not needed, but documenting what we provide. -__all__ = ('CertificateError', 'match_hostname') diff --git a/lib/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py b/lib/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py deleted file mode 100755 index 52f42873..00000000 --- a/lib/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py +++ /dev/null @@ -1,105 +0,0 @@ -"""The match_hostname() function from Python 3.3.3, essential when using SSL.""" - -# Note: This file is under the PSF license as the code comes from the python -# stdlib. http://docs.python.org/3/license.html - -import re - -__version__ = '3.4.0.2' - -class CertificateError(ValueError): - pass - - -def _dnsname_match(dn, hostname, max_wildcards=1): - """Matching according to RFC 6125, section 6.4.3 - - http://tools.ietf.org/html/rfc6125#section-6.4.3 - """ - pats = [] - if not dn: - return False - - # Ported from python3-syntax: - # leftmost, *remainder = dn.split(r'.') - parts = dn.split(r'.') - leftmost = parts[0] - remainder = parts[1:] - - wildcards = leftmost.count('*') - if wildcards > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survey of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise CertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) - - # speed up common case w/o wildcards - if not wildcards: - return dn.lower() == hostname.lower() - - # RFC 6125, section 6.4.3, subitem 1. - # The client SHOULD NOT attempt to match a presented identifier in which - # the wildcard character comprises a label other than the left-most label. - if leftmost == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - elif leftmost.startswith('xn--') or hostname.startswith('xn--'): - # RFC 6125, section 6.4.3, subitem 3. - # The client SHOULD NOT attempt to match a presented identifier - # where the wildcard character is embedded within an A-label or - # U-label of an internationalized domain name. - pats.append(re.escape(leftmost)) - else: - # Otherwise, '*' matches any dotless string, e.g. www* - pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) - - # add the remaining fragments, ignore any wildcards - for frag in remainder: - pats.append(re.escape(frag)) - - pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - return pat.match(hostname) - - -def match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 - rules are followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise CertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise CertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise CertificateError("no appropriate commonName or " - "subjectAltName fields were found") diff --git a/lib/requests/packages/urllib3/poolmanager.py b/lib/requests/packages/urllib3/poolmanager.py deleted file mode 100755 index 76b6a129..00000000 --- a/lib/requests/packages/urllib3/poolmanager.py +++ /dev/null @@ -1,280 +0,0 @@ -import logging - -try: # Python 3 - from urllib.parse import urljoin -except ImportError: - from urlparse import urljoin - -from ._collections import RecentlyUsedContainer -from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool -from .connectionpool import port_by_scheme -from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown -from .request import RequestMethods -from .util.url import parse_url -from .util.retry import Retry - - -__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] - - -pool_classes_by_scheme = { - 'http': HTTPConnectionPool, - 'https': HTTPSConnectionPool, -} - -log = logging.getLogger(__name__) - -SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', - 'ssl_version') - - -class PoolManager(RequestMethods): - """ - Allows for arbitrary requests while transparently keeping track of - necessary connection pools for you. - - :param num_pools: - Number of connection pools to cache before discarding the least - recently used pool. - - :param headers: - Headers to include with all requests, unless other headers are given - explicitly. - - :param \**connection_pool_kw: - Additional parameters are used to create fresh - :class:`urllib3.connectionpool.ConnectionPool` instances. - - Example:: - - >>> manager = PoolManager(num_pools=2) - >>> r = manager.request('GET', 'http://google.com/') - >>> r = manager.request('GET', 'http://google.com/mail') - >>> r = manager.request('GET', 'http://yahoo.com/') - >>> len(manager.pools) - 2 - - """ - - proxy = None - - def __init__(self, num_pools=10, headers=None, **connection_pool_kw): - RequestMethods.__init__(self, headers) - self.connection_pool_kw = connection_pool_kw - self.pools = RecentlyUsedContainer(num_pools, - dispose_func=lambda p: p.close()) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.clear() - # Return False to re-raise any potential exceptions - return False - - def _new_pool(self, scheme, host, port): - """ - Create a new :class:`ConnectionPool` based on host, port and scheme. - - This method is used to actually create the connection pools handed out - by :meth:`connection_from_url` and companion methods. It is intended - to be overridden for customization. - """ - pool_cls = pool_classes_by_scheme[scheme] - kwargs = self.connection_pool_kw - if scheme == 'http': - kwargs = self.connection_pool_kw.copy() - for kw in SSL_KEYWORDS: - kwargs.pop(kw, None) - - return pool_cls(host, port, **kwargs) - - def clear(self): - """ - Empty our store of pools and direct them all to close. - - This will not affect in-flight connections, but they will not be - re-used after completion. - """ - self.pools.clear() - - def connection_from_host(self, host, port=None, scheme='http'): - """ - Get a :class:`ConnectionPool` based on the host, port, and scheme. - - If ``port`` isn't given, it will be derived from the ``scheme`` using - ``urllib3.connectionpool.port_by_scheme``. - """ - - if not host: - raise LocationValueError("No host specified.") - - scheme = scheme or 'http' - port = port or port_by_scheme.get(scheme, 80) - pool_key = (scheme, host, port) - - with self.pools.lock: - # If the scheme, host, or port doesn't match existing open - # connections, open a new ConnectionPool. - pool = self.pools.get(pool_key) - if pool: - return pool - - # Make a fresh ConnectionPool of the desired type - pool = self._new_pool(scheme, host, port) - self.pools[pool_key] = pool - - return pool - - def connection_from_url(self, url): - """ - Similar to :func:`urllib3.connectionpool.connection_from_url` but - doesn't pass any additional parameters to the - :class:`urllib3.connectionpool.ConnectionPool` constructor. - - Additional parameters are taken from the :class:`.PoolManager` - constructor. - """ - u = parse_url(url) - return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - - def urlopen(self, method, url, redirect=True, **kw): - """ - Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` - with custom cross-host redirect logic and only sends the request-uri - portion of the ``url``. - - The given ``url`` parameter must be absolute, such that an appropriate - :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. - """ - u = parse_url(url) - conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) - - kw['assert_same_host'] = False - kw['redirect'] = False - if 'headers' not in kw: - kw['headers'] = self.headers - - if self.proxy is not None and u.scheme == "http": - response = conn.urlopen(method, url, **kw) - else: - response = conn.urlopen(method, u.request_uri, **kw) - - redirect_location = redirect and response.get_redirect_location() - if not redirect_location: - return response - - # Support relative URLs for redirecting. - redirect_location = urljoin(url, redirect_location) - - # RFC 7231, Section 6.4.4 - if response.status == 303: - method = 'GET' - - retries = kw.get('retries') - if not isinstance(retries, Retry): - retries = Retry.from_int(retries, redirect=redirect) - - try: - retries = retries.increment(method, url, response=response, _pool=conn) - except MaxRetryError: - if retries.raise_on_redirect: - raise - return response - - kw['retries'] = retries - kw['redirect'] = redirect - - log.info("Redirecting %s -> %s" % (url, redirect_location)) - return self.urlopen(method, redirect_location, **kw) - - -class ProxyManager(PoolManager): - """ - Behaves just like :class:`PoolManager`, but sends all requests through - the defined proxy, using the CONNECT method for HTTPS URLs. - - :param proxy_url: - The URL of the proxy to be used. - - :param proxy_headers: - A dictionary contaning headers that will be sent to the proxy. In case - of HTTP they are being sent with each request, while in the - HTTPS/CONNECT case they are sent only once. Could be used for proxy - authentication. - - Example: - >>> proxy = urllib3.ProxyManager('http://localhost:3128/') - >>> r1 = proxy.request('GET', 'http://google.com/') - >>> r2 = proxy.request('GET', 'http://httpbin.org/') - >>> len(proxy.pools) - 1 - >>> r3 = proxy.request('GET', 'https://httpbin.org/') - >>> r4 = proxy.request('GET', 'https://twitter.com/') - >>> len(proxy.pools) - 3 - - """ - - def __init__(self, proxy_url, num_pools=10, headers=None, - proxy_headers=None, **connection_pool_kw): - - if isinstance(proxy_url, HTTPConnectionPool): - proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, - proxy_url.port) - proxy = parse_url(proxy_url) - if not proxy.port: - port = port_by_scheme.get(proxy.scheme, 80) - proxy = proxy._replace(port=port) - - if proxy.scheme not in ("http", "https"): - raise ProxySchemeUnknown(proxy.scheme) - - self.proxy = proxy - self.proxy_headers = proxy_headers or {} - - connection_pool_kw['_proxy'] = self.proxy - connection_pool_kw['_proxy_headers'] = self.proxy_headers - - super(ProxyManager, self).__init__( - num_pools, headers, **connection_pool_kw) - - def connection_from_host(self, host, port=None, scheme='http'): - if scheme == "https": - return super(ProxyManager, self).connection_from_host( - host, port, scheme) - - return super(ProxyManager, self).connection_from_host( - self.proxy.host, self.proxy.port, self.proxy.scheme) - - def _set_proxy_headers(self, url, headers=None): - """ - Sets headers needed by proxies: specifically, the Accept and Host - headers. Only sets headers not provided by the user. - """ - headers_ = {'Accept': '*/*'} - - netloc = parse_url(url).netloc - if netloc: - headers_['Host'] = netloc - - if headers: - headers_.update(headers) - return headers_ - - def urlopen(self, method, url, redirect=True, **kw): - "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." - u = parse_url(url) - - if u.scheme == "http": - # For proxied HTTPS requests, httplib sets the necessary headers - # on the CONNECT to the proxy. For HTTP, we'll definitely - # need to set 'Host' at the very least. - headers = kw.get('headers', self.headers) - kw['headers'] = self._set_proxy_headers(url, headers) - - return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) - - -def proxy_from_url(url, **kw): - return ProxyManager(proxy_url=url, **kw) diff --git a/lib/requests/packages/urllib3/response.py b/lib/requests/packages/urllib3/response.py deleted file mode 100755 index 788eb6ca..00000000 --- a/lib/requests/packages/urllib3/response.py +++ /dev/null @@ -1,485 +0,0 @@ -from contextlib import contextmanager -import zlib -import io -from socket import timeout as SocketTimeout - -from ._collections import HTTPHeaderDict -from .exceptions import ( - ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked -) -from .packages.six import string_types as basestring, binary_type, PY3 -from .packages.six.moves import http_client as httplib -from .connection import HTTPException, BaseSSLError -from .util.response import is_fp_closed, is_response_to_head - - -class DeflateDecoder(object): - - def __init__(self): - self._first_try = True - self._data = binary_type() - self._obj = zlib.decompressobj() - - def __getattr__(self, name): - return getattr(self._obj, name) - - def decompress(self, data): - if not data: - return data - - if not self._first_try: - return self._obj.decompress(data) - - self._data += data - try: - return self._obj.decompress(data) - except zlib.error: - self._first_try = False - self._obj = zlib.decompressobj(-zlib.MAX_WBITS) - try: - return self.decompress(self._data) - finally: - self._data = None - - -class GzipDecoder(object): - - def __init__(self): - self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) - - def __getattr__(self, name): - return getattr(self._obj, name) - - def decompress(self, data): - if not data: - return data - return self._obj.decompress(data) - - -def _get_decoder(mode): - if mode == 'gzip': - return GzipDecoder() - - return DeflateDecoder() - - -class HTTPResponse(io.IOBase): - """ - HTTP Response container. - - Backwards-compatible to httplib's HTTPResponse but the response ``body`` is - loaded and decoded on-demand when the ``data`` property is accessed. This - class is also compatible with the Python standard library's :mod:`io` - module, and can hence be treated as a readable object in the context of that - framework. - - Extra parameters for behaviour not present in httplib.HTTPResponse: - - :param preload_content: - If True, the response's body will be preloaded during construction. - - :param decode_content: - If True, attempts to decode specific content-encoding's based on headers - (like 'gzip' and 'deflate') will be skipped and raw data will be used - instead. - - :param original_response: - When this HTTPResponse wrapper is generated from an httplib.HTTPResponse - object, it's convenient to include the original for debug purposes. It's - otherwise unused. - """ - - CONTENT_DECODERS = ['gzip', 'deflate'] - REDIRECT_STATUSES = [301, 302, 303, 307, 308] - - def __init__(self, body='', headers=None, status=0, version=0, reason=None, - strict=0, preload_content=True, decode_content=True, - original_response=None, pool=None, connection=None): - - if isinstance(headers, HTTPHeaderDict): - self.headers = headers - else: - self.headers = HTTPHeaderDict(headers) - self.status = status - self.version = version - self.reason = reason - self.strict = strict - self.decode_content = decode_content - - self._decoder = None - self._body = None - self._fp = None - self._original_response = original_response - self._fp_bytes_read = 0 - - if body and isinstance(body, (basestring, binary_type)): - self._body = body - - self._pool = pool - self._connection = connection - - if hasattr(body, 'read'): - self._fp = body - - # Are we using the chunked-style of transfer encoding? - self.chunked = False - self.chunk_left = None - tr_enc = self.headers.get('transfer-encoding', '').lower() - # Don't incur the penalty of creating a list and then discarding it - encodings = (enc.strip() for enc in tr_enc.split(",")) - if "chunked" in encodings: - self.chunked = True - - # We certainly don't want to preload content when the response is chunked. - if not self.chunked and preload_content and not self._body: - self._body = self.read(decode_content=decode_content) - - def get_redirect_location(self): - """ - Should we redirect and where to? - - :returns: Truthy redirect location string if we got a redirect status - code and valid location. ``None`` if redirect status and no - location. ``False`` if not a redirect status code. - """ - if self.status in self.REDIRECT_STATUSES: - return self.headers.get('location') - - return False - - def release_conn(self): - if not self._pool or not self._connection: - return - - self._pool._put_conn(self._connection) - self._connection = None - - @property - def data(self): - # For backwords-compat with earlier urllib3 0.4 and earlier. - if self._body: - return self._body - - if self._fp: - return self.read(cache_content=True) - - def tell(self): - """ - Obtain the number of bytes pulled over the wire so far. May differ from - the amount of content returned by :meth:``HTTPResponse.read`` if bytes - are encoded on the wire (e.g, compressed). - """ - return self._fp_bytes_read - - def _init_decoder(self): - """ - Set-up the _decoder attribute if necessar. - """ - # Note: content-encoding value should be case-insensitive, per RFC 7230 - # Section 3.2 - content_encoding = self.headers.get('content-encoding', '').lower() - if self._decoder is None and content_encoding in self.CONTENT_DECODERS: - self._decoder = _get_decoder(content_encoding) - - def _decode(self, data, decode_content, flush_decoder): - """ - Decode the data passed in and potentially flush the decoder. - """ - try: - if decode_content and self._decoder: - data = self._decoder.decompress(data) - except (IOError, zlib.error) as e: - content_encoding = self.headers.get('content-encoding', '').lower() - raise DecodeError( - "Received response with content-encoding: %s, but " - "failed to decode it." % content_encoding, e) - - if flush_decoder and decode_content and self._decoder: - buf = self._decoder.decompress(binary_type()) - data += buf + self._decoder.flush() - - return data - - @contextmanager - def _error_catcher(self): - """ - Catch low-level python exceptions, instead re-raising urllib3 - variants, so that low-level exceptions are not leaked in the - high-level api. - - On exit, release the connection back to the pool. - """ - try: - try: - yield - - except SocketTimeout: - # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but - # there is yet no clean way to get at it from this context. - raise ReadTimeoutError(self._pool, None, 'Read timed out.') - - except BaseSSLError as e: - # FIXME: Is there a better way to differentiate between SSLErrors? - if 'read operation timed out' not in str(e): # Defensive: - # This shouldn't happen but just in case we're missing an edge - # case, let's avoid swallowing SSL errors. - raise - - raise ReadTimeoutError(self._pool, None, 'Read timed out.') - - except HTTPException as e: - # This includes IncompleteRead. - raise ProtocolError('Connection broken: %r' % e, e) - except Exception: - # The response may not be closed but we're not going to use it anymore - # so close it now to ensure that the connection is released back to the pool. - if self._original_response and not self._original_response.isclosed(): - self._original_response.close() - - raise - finally: - if self._original_response and self._original_response.isclosed(): - self.release_conn() - - def read(self, amt=None, decode_content=None, cache_content=False): - """ - Similar to :meth:`httplib.HTTPResponse.read`, but with two additional - parameters: ``decode_content`` and ``cache_content``. - - :param amt: - How much of the content to read. If specified, caching is skipped - because it doesn't make sense to cache partial content as the full - response. - - :param decode_content: - If True, will attempt to decode the body based on the - 'content-encoding' header. - - :param cache_content: - If True, will save the returned data such that the same result is - returned despite of the state of the underlying file object. This - is useful if you want the ``.data`` property to continue working - after having ``.read()`` the file object. (Overridden if ``amt`` is - set.) - """ - self._init_decoder() - if decode_content is None: - decode_content = self.decode_content - - if self._fp is None: - return - - flush_decoder = False - data = None - - with self._error_catcher(): - if amt is None: - # cStringIO doesn't like amt=None - data = self._fp.read() - flush_decoder = True - else: - cache_content = False - data = self._fp.read(amt) - if amt != 0 and not data: # Platform-specific: Buggy versions of Python. - # Close the connection when no data is returned - # - # This is redundant to what httplib/http.client _should_ - # already do. However, versions of python released before - # December 15, 2012 (http://bugs.python.org/issue16298) do - # not properly close the connection in all cases. There is - # no harm in redundantly calling close. - self._fp.close() - flush_decoder = True - - if data: - self._fp_bytes_read += len(data) - - data = self._decode(data, decode_content, flush_decoder) - - if cache_content: - self._body = data - - return data - - - def stream(self, amt=2**16, decode_content=None): - """ - A generator wrapper for the read() method. A call will block until - ``amt`` bytes have been read from the connection or until the - connection is closed. - - :param amt: - How much of the content to read. The generator will return up to - much data per iteration, but may return less. This is particularly - likely when using compressed data. However, the empty string will - never be returned. - - :param decode_content: - If True, will attempt to decode the body based on the - 'content-encoding' header. - """ - if self.chunked: - for line in self.read_chunked(amt, decode_content=decode_content): - yield line - else: - while not is_fp_closed(self._fp): - data = self.read(amt=amt, decode_content=decode_content) - - if data: - yield data - - @classmethod - def from_httplib(ResponseCls, r, **response_kw): - """ - Given an :class:`httplib.HTTPResponse` instance ``r``, return a - corresponding :class:`urllib3.response.HTTPResponse` object. - - Remaining parameters are passed to the HTTPResponse constructor, along - with ``original_response=r``. - """ - headers = r.msg - - if not isinstance(headers, HTTPHeaderDict): - if PY3: # Python 3 - headers = HTTPHeaderDict(headers.items()) - else: # Python 2 - headers = HTTPHeaderDict.from_httplib(headers) - - # HTTPResponse objects in Python 3 don't have a .strict attribute - strict = getattr(r, 'strict', 0) - resp = ResponseCls(body=r, - headers=headers, - status=r.status, - version=r.version, - reason=r.reason, - strict=strict, - original_response=r, - **response_kw) - return resp - - # Backwards-compatibility methods for httplib.HTTPResponse - def getheaders(self): - return self.headers - - def getheader(self, name, default=None): - return self.headers.get(name, default) - - # Overrides from io.IOBase - def close(self): - if not self.closed: - self._fp.close() - - @property - def closed(self): - if self._fp is None: - return True - elif hasattr(self._fp, 'closed'): - return self._fp.closed - elif hasattr(self._fp, 'isclosed'): # Python 2 - return self._fp.isclosed() - else: - return True - - def fileno(self): - if self._fp is None: - raise IOError("HTTPResponse has no file to get a fileno from") - elif hasattr(self._fp, "fileno"): - return self._fp.fileno() - else: - raise IOError("The file-like object this HTTPResponse is wrapped " - "around has no file descriptor") - - def flush(self): - if self._fp is not None and hasattr(self._fp, 'flush'): - return self._fp.flush() - - def readable(self): - # This method is required for `io` module compatibility. - return True - - def readinto(self, b): - # This method is required for `io` module compatibility. - temp = self.read(len(b)) - if len(temp) == 0: - return 0 - else: - b[:len(temp)] = temp - return len(temp) - - def _update_chunk_length(self): - # First, we'll figure out length of a chunk and then - # we'll try to read it from socket. - if self.chunk_left is not None: - return - line = self._fp.fp.readline() - line = line.split(b';', 1)[0] - try: - self.chunk_left = int(line, 16) - except ValueError: - # Invalid chunked protocol response, abort. - self.close() - raise httplib.IncompleteRead(line) - - def _handle_chunk(self, amt): - returned_chunk = None - if amt is None: - chunk = self._fp._safe_read(self.chunk_left) - returned_chunk = chunk - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None - elif amt < self.chunk_left: - value = self._fp._safe_read(amt) - self.chunk_left = self.chunk_left - amt - returned_chunk = value - elif amt == self.chunk_left: - value = self._fp._safe_read(amt) - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None - returned_chunk = value - else: # amt > self.chunk_left - returned_chunk = self._fp._safe_read(self.chunk_left) - self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. - self.chunk_left = None - return returned_chunk - - def read_chunked(self, amt=None, decode_content=None): - """ - Similar to :meth:`HTTPResponse.read`, but with an additional - parameter: ``decode_content``. - - :param decode_content: - If True, will attempt to decode the body based on the - 'content-encoding' header. - """ - self._init_decoder() - # FIXME: Rewrite this method and make it a class with a better structured logic. - if not self.chunked: - raise ResponseNotChunked("Response is not chunked. " - "Header 'transfer-encoding: chunked' is missing.") - - # Don't bother reading the body of a HEAD request. - if self._original_response and is_response_to_head(self._original_response): - self._original_response.close() - return - - with self._error_catcher(): - while True: - self._update_chunk_length() - if self.chunk_left == 0: - break - chunk = self._handle_chunk(amt) - yield self._decode(chunk, decode_content=decode_content, - flush_decoder=True) - - # Chunk content ends with \r\n: discard it. - while True: - line = self._fp.fp.readline() - if not line: - # Some sites may not end with '\r\n'. - break - if line == b'\r\n': - break - - # We read everything; close the "file". - if self._original_response: - self._original_response.close() diff --git a/lib/requests/packages/urllib3/util/__init__.py b/lib/requests/packages/urllib3/util/__init__.py deleted file mode 100755 index 8becc814..00000000 --- a/lib/requests/packages/urllib3/util/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# For backwards compatibility, provide imports that used to be here. -from .connection import is_connection_dropped -from .request import make_headers -from .response import is_fp_closed -from .ssl_ import ( - SSLContext, - HAS_SNI, - assert_fingerprint, - resolve_cert_reqs, - resolve_ssl_version, - ssl_wrap_socket, -) -from .timeout import ( - current_time, - Timeout, -) - -from .retry import Retry -from .url import ( - get_host, - parse_url, - split_first, - Url, -) diff --git a/lib/requests/packages/urllib3/util/connection.py b/lib/requests/packages/urllib3/util/connection.py deleted file mode 100755 index 4f2f0f18..00000000 --- a/lib/requests/packages/urllib3/util/connection.py +++ /dev/null @@ -1,100 +0,0 @@ -import socket -try: - from select import poll, POLLIN -except ImportError: # `poll` doesn't exist on OSX and other platforms - poll = False - try: - from select import select - except ImportError: # `select` doesn't exist on AppEngine. - select = False - - -def is_connection_dropped(conn): # Platform-specific - """ - Returns True if the connection is dropped and should be closed. - - :param conn: - :class:`httplib.HTTPConnection` object. - - Note: For platforms like AppEngine, this will always return ``False`` to - let the platform handle connection recycling transparently for us. - """ - sock = getattr(conn, 'sock', False) - if sock is False: # Platform-specific: AppEngine - return False - if sock is None: # Connection already closed (such as by httplib). - return True - - if not poll: - if not select: # Platform-specific: AppEngine - return False - - try: - return select([sock], [], [], 0.0)[0] - except socket.error: - return True - - # This version is better on platforms that support it. - p = poll() - p.register(sock, POLLIN) - for (fno, ev) in p.poll(0.0): - if fno == sock.fileno(): - # Either data is buffered (bad), or the connection is dropped. - return True - - -# This function is copied from socket.py in the Python 2.7 standard -# library test suite. Added to its signature is only `socket_options`. -def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None, socket_options=None): - """Connect to *address* and return the socket object. - - Convenience function. Connect to *address* (a 2-tuple ``(host, - port)``) and return the socket object. Passing the optional - *timeout* parameter will set the timeout on the socket instance - before attempting to connect. If no *timeout* is supplied, the - global default timeout setting returned by :func:`getdefaulttimeout` - is used. If *source_address* is set it must be a tuple of (host, port) - for the socket to bind as a source address before making the connection. - An host of '' or port 0 tells the OS to use the default. - """ - - host, port = address - if host.startswith('['): - host = host.strip('[]') - err = None - for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - sock = None - try: - sock = socket.socket(af, socktype, proto) - - # If provided, set socket level options before connecting. - # This is the only addition urllib3 makes to this function. - _set_socket_options(sock, socket_options) - - if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: - sock.settimeout(timeout) - if source_address: - sock.bind(source_address) - sock.connect(sa) - return sock - - except socket.error as e: - err = e - if sock is not None: - sock.close() - sock = None - - if err is not None: - raise err - - raise socket.error("getaddrinfo returns an empty list") - - -def _set_socket_options(sock, options): - if options is None: - return - - for opt in options: - sock.setsockopt(*opt) diff --git a/lib/requests/packages/urllib3/util/request.py b/lib/requests/packages/urllib3/util/request.py deleted file mode 100755 index bc64f6b1..00000000 --- a/lib/requests/packages/urllib3/util/request.py +++ /dev/null @@ -1,71 +0,0 @@ -from base64 import b64encode - -from ..packages.six import b - -ACCEPT_ENCODING = 'gzip,deflate' - - -def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, - basic_auth=None, proxy_basic_auth=None, disable_cache=None): - """ - Shortcuts for generating request headers. - - :param keep_alive: - If ``True``, adds 'connection: keep-alive' header. - - :param accept_encoding: - Can be a boolean, list, or string. - ``True`` translates to 'gzip,deflate'. - List will get joined by comma. - String will be used as provided. - - :param user_agent: - String representing the user-agent you want, such as - "python-urllib3/0.6" - - :param basic_auth: - Colon-separated username:password string for 'authorization: basic ...' - auth header. - - :param proxy_basic_auth: - Colon-separated username:password string for 'proxy-authorization: basic ...' - auth header. - - :param disable_cache: - If ``True``, adds 'cache-control: no-cache' header. - - Example:: - - >>> make_headers(keep_alive=True, user_agent="Batman/1.0") - {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} - >>> make_headers(accept_encoding=True) - {'accept-encoding': 'gzip,deflate'} - """ - headers = {} - if accept_encoding: - if isinstance(accept_encoding, str): - pass - elif isinstance(accept_encoding, list): - accept_encoding = ','.join(accept_encoding) - else: - accept_encoding = ACCEPT_ENCODING - headers['accept-encoding'] = accept_encoding - - if user_agent: - headers['user-agent'] = user_agent - - if keep_alive: - headers['connection'] = 'keep-alive' - - if basic_auth: - headers['authorization'] = 'Basic ' + \ - b64encode(b(basic_auth)).decode('utf-8') - - if proxy_basic_auth: - headers['proxy-authorization'] = 'Basic ' + \ - b64encode(b(proxy_basic_auth)).decode('utf-8') - - if disable_cache: - headers['cache-control'] = 'no-cache' - - return headers diff --git a/lib/requests/packages/urllib3/util/response.py b/lib/requests/packages/urllib3/util/response.py deleted file mode 100755 index 2c1de154..00000000 --- a/lib/requests/packages/urllib3/util/response.py +++ /dev/null @@ -1,73 +0,0 @@ -from ..packages.six.moves import http_client as httplib - -from ..exceptions import HeaderParsingError - - -def is_fp_closed(obj): - """ - Checks whether a given file-like object is closed. - - :param obj: - The file-like object to check. - """ - - try: - # Check via the official file-like-object way. - return obj.closed - except AttributeError: - pass - - try: - # Check if the object is a container for another file-like object that - # gets released on exhaustion (e.g. HTTPResponse). - return obj.fp is None - except AttributeError: - pass - - raise ValueError("Unable to determine whether fp is closed.") - - -def assert_header_parsing(headers): - """ - Asserts whether all headers have been successfully parsed. - Extracts encountered errors from the result of parsing headers. - - Only works on Python 3. - - :param headers: Headers to verify. - :type headers: `httplib.HTTPMessage`. - - :raises urllib3.exceptions.HeaderParsingError: - If parsing errors are found. - """ - - # This will fail silently if we pass in the wrong kind of parameter. - # To make debugging easier add an explicit check. - if not isinstance(headers, httplib.HTTPMessage): - raise TypeError('expected httplib.Message, got {}.'.format( - type(headers))) - - defects = getattr(headers, 'defects', None) - get_payload = getattr(headers, 'get_payload', None) - - unparsed_data = None - if get_payload: # Platform-specific: Python 3. - unparsed_data = get_payload() - - if defects or unparsed_data: - raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) - - -def is_response_to_head(response): - """ - Checks, wether a the request of a response has been a HEAD-request. - Handles the quirks of AppEngine. - - :param conn: - :type conn: :class:`httplib.HTTPResponse` - """ - # FIXME: Can we do this somehow without accessing private httplib _method? - method = response._method - if isinstance(method, int): # Platform-specific: Appengine - return method == 3 - return method.upper() == 'HEAD' diff --git a/lib/requests/packages/urllib3/util/retry.py b/lib/requests/packages/urllib3/util/retry.py deleted file mode 100755 index 1fb1f23b..00000000 --- a/lib/requests/packages/urllib3/util/retry.py +++ /dev/null @@ -1,285 +0,0 @@ -import time -import logging - -from ..exceptions import ( - ConnectTimeoutError, - MaxRetryError, - ProtocolError, - ReadTimeoutError, - ResponseError, -) -from ..packages import six - - -log = logging.getLogger(__name__) - - -class Retry(object): - """ Retry configuration. - - Each retry attempt will create a new Retry object with updated values, so - they can be safely reused. - - Retries can be defined as a default for a pool:: - - retries = Retry(connect=5, read=2, redirect=5) - http = PoolManager(retries=retries) - response = http.request('GET', 'http://example.com/') - - Or per-request (which overrides the default for the pool):: - - response = http.request('GET', 'http://example.com/', retries=Retry(10)) - - Retries can be disabled by passing ``False``:: - - response = http.request('GET', 'http://example.com/', retries=False) - - Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless - retries are disabled, in which case the causing exception will be raised. - - :param int total: - Total number of retries to allow. Takes precedence over other counts. - - Set to ``None`` to remove this constraint and fall back on other - counts. It's a good idea to set this to some sensibly-high value to - account for unexpected edge cases and avoid infinite retry loops. - - Set to ``0`` to fail on the first retry. - - Set to ``False`` to disable and imply ``raise_on_redirect=False``. - - :param int connect: - How many connection-related errors to retry on. - - These are errors raised before the request is sent to the remote server, - which we assume has not triggered the server to process the request. - - Set to ``0`` to fail on the first retry of this type. - - :param int read: - How many times to retry on read errors. - - These errors are raised after the request was sent to the server, so the - request may have side-effects. - - Set to ``0`` to fail on the first retry of this type. - - :param int redirect: - How many redirects to perform. Limit this to avoid infinite redirect - loops. - - A redirect is a HTTP response with a status code 301, 302, 303, 307 or - 308. - - Set to ``0`` to fail on the first retry of this type. - - Set to ``False`` to disable and imply ``raise_on_redirect=False``. - - :param iterable method_whitelist: - Set of uppercased HTTP method verbs that we should retry on. - - By default, we only retry on methods which are considered to be - indempotent (multiple requests with the same parameters end with the - same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`. - - :param iterable status_forcelist: - A set of HTTP status codes that we should force a retry on. - - By default, this is disabled with ``None``. - - :param float backoff_factor: - A backoff factor to apply between attempts. urllib3 will sleep for:: - - {backoff factor} * (2 ^ ({number of total retries} - 1)) - - seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep - for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer - than :attr:`Retry.BACKOFF_MAX`. - - By default, backoff is disabled (set to 0). - - :param bool raise_on_redirect: Whether, if the number of redirects is - exhausted, to raise a MaxRetryError, or to return a response with a - response code in the 3xx range. - """ - - DEFAULT_METHOD_WHITELIST = frozenset([ - 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']) - - #: Maximum backoff time. - BACKOFF_MAX = 120 - - def __init__(self, total=10, connect=None, read=None, redirect=None, - method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None, - backoff_factor=0, raise_on_redirect=True, _observed_errors=0): - - self.total = total - self.connect = connect - self.read = read - - if redirect is False or total is False: - redirect = 0 - raise_on_redirect = False - - self.redirect = redirect - self.status_forcelist = status_forcelist or set() - self.method_whitelist = method_whitelist - self.backoff_factor = backoff_factor - self.raise_on_redirect = raise_on_redirect - self._observed_errors = _observed_errors # TODO: use .history instead? - - def new(self, **kw): - params = dict( - total=self.total, - connect=self.connect, read=self.read, redirect=self.redirect, - method_whitelist=self.method_whitelist, - status_forcelist=self.status_forcelist, - backoff_factor=self.backoff_factor, - raise_on_redirect=self.raise_on_redirect, - _observed_errors=self._observed_errors, - ) - params.update(kw) - return type(self)(**params) - - @classmethod - def from_int(cls, retries, redirect=True, default=None): - """ Backwards-compatibility for the old retries format.""" - if retries is None: - retries = default if default is not None else cls.DEFAULT - - if isinstance(retries, Retry): - return retries - - redirect = bool(redirect) and None - new_retries = cls(retries, redirect=redirect) - log.debug("Converted retries value: %r -> %r" % (retries, new_retries)) - return new_retries - - def get_backoff_time(self): - """ Formula for computing the current backoff - - :rtype: float - """ - if self._observed_errors <= 1: - return 0 - - backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1)) - return min(self.BACKOFF_MAX, backoff_value) - - def sleep(self): - """ Sleep between retry attempts using an exponential backoff. - - By default, the backoff factor is 0 and this method will return - immediately. - """ - backoff = self.get_backoff_time() - if backoff <= 0: - return - time.sleep(backoff) - - def _is_connection_error(self, err): - """ Errors when we're fairly sure that the server did not receive the - request, so it should be safe to retry. - """ - return isinstance(err, ConnectTimeoutError) - - def _is_read_error(self, err): - """ Errors that occur after the request has been started, so we should - assume that the server began processing it. - """ - return isinstance(err, (ReadTimeoutError, ProtocolError)) - - def is_forced_retry(self, method, status_code): - """ Is this method/status code retryable? (Based on method/codes whitelists) - """ - if self.method_whitelist and method.upper() not in self.method_whitelist: - return False - - return self.status_forcelist and status_code in self.status_forcelist - - def is_exhausted(self): - """ Are we out of retries? """ - retry_counts = (self.total, self.connect, self.read, self.redirect) - retry_counts = list(filter(None, retry_counts)) - if not retry_counts: - return False - - return min(retry_counts) < 0 - - def increment(self, method=None, url=None, response=None, error=None, _pool=None, _stacktrace=None): - """ Return a new Retry object with incremented retry counters. - - :param response: A response object, or None, if the server did not - return a response. - :type response: :class:`~urllib3.response.HTTPResponse` - :param Exception error: An error encountered during the request, or - None if the response was received successfully. - - :return: A new ``Retry`` object. - """ - if self.total is False and error: - # Disabled, indicate to re-raise the error. - raise six.reraise(type(error), error, _stacktrace) - - total = self.total - if total is not None: - total -= 1 - - _observed_errors = self._observed_errors - connect = self.connect - read = self.read - redirect = self.redirect - cause = 'unknown' - - if error and self._is_connection_error(error): - # Connect retry? - if connect is False: - raise six.reraise(type(error), error, _stacktrace) - elif connect is not None: - connect -= 1 - _observed_errors += 1 - - elif error and self._is_read_error(error): - # Read retry? - if read is False: - raise six.reraise(type(error), error, _stacktrace) - elif read is not None: - read -= 1 - _observed_errors += 1 - - elif response and response.get_redirect_location(): - # Redirect retry? - if redirect is not None: - redirect -= 1 - cause = 'too many redirects' - - else: - # Incrementing because of a server error like a 500 in - # status_forcelist and a the given method is in the whitelist - _observed_errors += 1 - cause = ResponseError.GENERIC_ERROR - if response and response.status: - cause = ResponseError.SPECIFIC_ERROR.format( - status_code=response.status) - - new_retry = self.new( - total=total, - connect=connect, read=read, redirect=redirect, - _observed_errors=_observed_errors) - - if new_retry.is_exhausted(): - raise MaxRetryError(_pool, url, error or ResponseError(cause)) - - log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry)) - - return new_retry - - - def __repr__(self): - return ('{cls.__name__}(total={self.total}, connect={self.connect}, ' - 'read={self.read}, redirect={self.redirect})').format( - cls=type(self), self=self) - - -# For backwards compatibility (equivalent to pre-v1.9): -Retry.DEFAULT = Retry(3) diff --git a/lib/requests/packages/urllib3/util/ssl_.py b/lib/requests/packages/urllib3/util/ssl_.py deleted file mode 100755 index 47b817e3..00000000 --- a/lib/requests/packages/urllib3/util/ssl_.py +++ /dev/null @@ -1,286 +0,0 @@ -from binascii import hexlify, unhexlify -from hashlib import md5, sha1, sha256 - -from ..exceptions import SSLError, InsecurePlatformWarning - - -SSLContext = None -HAS_SNI = False -create_default_context = None - -# Maps the length of a digest to a possible hash function producing this digest -HASHFUNC_MAP = { - 32: md5, - 40: sha1, - 64: sha256, -} - -import errno -import warnings - -try: # Test for SSL features - import ssl - from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 - from ssl import HAS_SNI # Has SNI? -except ImportError: - pass - - -try: - from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION -except ImportError: - OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 - OP_NO_COMPRESSION = 0x20000 - -# A secure default. -# Sources for more information on TLS ciphers: -# -# - https://wiki.mozilla.org/Security/Server_Side_TLS -# - https://www.ssllabs.com/projects/best-practices/index.html -# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ -# -# The general intent is: -# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE), -# - prefer ECDHE over DHE for better performance, -# - prefer any AES-GCM over any AES-CBC for better performance and security, -# - use 3DES as fallback which is secure but slow, -# - disable NULL authentication, MD5 MACs and DSS for security reasons. -DEFAULT_CIPHERS = ( - 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' - 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:' - '!eNULL:!MD5' -) - -try: - from ssl import SSLContext # Modern SSL? -except ImportError: - import sys - - class SSLContext(object): # Platform-specific: Python 2 & 3.1 - supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or - (3, 2) <= sys.version_info) - - def __init__(self, protocol_version): - self.protocol = protocol_version - # Use default values from a real SSLContext - self.check_hostname = False - self.verify_mode = ssl.CERT_NONE - self.ca_certs = None - self.options = 0 - self.certfile = None - self.keyfile = None - self.ciphers = None - - def load_cert_chain(self, certfile, keyfile): - self.certfile = certfile - self.keyfile = keyfile - - def load_verify_locations(self, cafile=None, capath=None): - self.ca_certs = cafile - - if capath is not None: - raise SSLError("CA directories not supported in older Pythons") - - def set_ciphers(self, cipher_suite): - if not self.supports_set_ciphers: - raise TypeError( - 'Your version of Python does not support setting ' - 'a custom cipher suite. Please upgrade to Python ' - '2.7, 3.2, or later if you need this functionality.' - ) - self.ciphers = cipher_suite - - def wrap_socket(self, socket, server_hostname=None): - warnings.warn( - 'A true SSLContext object is not available. This prevents ' - 'urllib3 from configuring SSL appropriately and may cause ' - 'certain SSL connections to fail. For more information, see ' - 'https://urllib3.readthedocs.org/en/latest/security.html' - '#insecureplatformwarning.', - InsecurePlatformWarning - ) - kwargs = { - 'keyfile': self.keyfile, - 'certfile': self.certfile, - 'ca_certs': self.ca_certs, - 'cert_reqs': self.verify_mode, - 'ssl_version': self.protocol, - } - if self.supports_set_ciphers: # Platform-specific: Python 2.7+ - return wrap_socket(socket, ciphers=self.ciphers, **kwargs) - else: # Platform-specific: Python 2.6 - return wrap_socket(socket, **kwargs) - - -def assert_fingerprint(cert, fingerprint): - """ - Checks if given fingerprint matches the supplied certificate. - - :param cert: - Certificate as bytes object. - :param fingerprint: - Fingerprint as string of hexdigits, can be interspersed by colons. - """ - - fingerprint = fingerprint.replace(':', '').lower() - digest_length = len(fingerprint) - hashfunc = HASHFUNC_MAP.get(digest_length) - if not hashfunc: - raise SSLError( - 'Fingerprint of invalid length: {0}'.format(fingerprint)) - - # We need encode() here for py32; works on py2 and p33. - fingerprint_bytes = unhexlify(fingerprint.encode()) - - cert_digest = hashfunc(cert).digest() - - if cert_digest != fingerprint_bytes: - raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' - .format(fingerprint, hexlify(cert_digest))) - - -def resolve_cert_reqs(candidate): - """ - Resolves the argument to a numeric constant, which can be passed to - the wrap_socket function/method from the ssl module. - Defaults to :data:`ssl.CERT_NONE`. - If given a string it is assumed to be the name of the constant in the - :mod:`ssl` module or its abbrevation. - (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. - If it's neither `None` nor a string we assume it is already the numeric - constant which can directly be passed to wrap_socket. - """ - if candidate is None: - return CERT_NONE - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'CERT_' + candidate) - return res - - return candidate - - -def resolve_ssl_version(candidate): - """ - like resolve_cert_reqs - """ - if candidate is None: - return PROTOCOL_SSLv23 - - if isinstance(candidate, str): - res = getattr(ssl, candidate, None) - if res is None: - res = getattr(ssl, 'PROTOCOL_' + candidate) - return res - - return candidate - - -def create_urllib3_context(ssl_version=None, cert_reqs=None, - options=None, ciphers=None): - """All arguments have the same meaning as ``ssl_wrap_socket``. - - By default, this function does a lot of the same work that - ``ssl.create_default_context`` does on Python 3.4+. It: - - - Disables SSLv2, SSLv3, and compression - - Sets a restricted set of server ciphers - - If you wish to enable SSLv3, you can do:: - - from urllib3.util import ssl_ - context = ssl_.create_urllib3_context() - context.options &= ~ssl_.OP_NO_SSLv3 - - You can do the same to enable compression (substituting ``COMPRESSION`` - for ``SSLv3`` in the last line above). - - :param ssl_version: - The desired protocol version to use. This will default to - PROTOCOL_SSLv23 which will negotiate the highest protocol that both - the server and your installation of OpenSSL support. - :param cert_reqs: - Whether to require the certificate verification. This defaults to - ``ssl.CERT_REQUIRED``. - :param options: - Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, - ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. - :param ciphers: - Which cipher suites to allow the server to select. - :returns: - Constructed SSLContext object with specified options - :rtype: SSLContext - """ - context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) - - # Setting the default here, as we may have no ssl module on import - cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs - - if options is None: - options = 0 - # SSLv2 is easily broken and is considered harmful and dangerous - options |= OP_NO_SSLv2 - # SSLv3 has several problems and is now dangerous - options |= OP_NO_SSLv3 - # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ - # (issue #309) - options |= OP_NO_COMPRESSION - - context.options |= options - - if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 - context.set_ciphers(ciphers or DEFAULT_CIPHERS) - - context.verify_mode = cert_reqs - if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 - # We do our own verification, including fingerprints and alternative - # hostnames. So disable it here - context.check_hostname = False - return context - - -def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, - ca_certs=None, server_hostname=None, - ssl_version=None, ciphers=None, ssl_context=None, - ca_cert_dir=None): - """ - All arguments except for server_hostname, ssl_context, and ca_cert_dir have - the same meaning as they do when using :func:`ssl.wrap_socket`. - - :param server_hostname: - When SNI is supported, the expected hostname of the certificate - :param ssl_context: - A pre-made :class:`SSLContext` object. If none is provided, one will - be created using :func:`create_urllib3_context`. - :param ciphers: - A string of ciphers we wish the client to support. This is not - supported on Python 2.6 as the ssl module does not support it. - :param ca_cert_dir: - A directory containing CA certificates in multiple separate files, as - supported by OpenSSL's -CApath flag or the capath argument to - SSLContext.load_verify_locations(). - """ - context = ssl_context - if context is None: - context = create_urllib3_context(ssl_version, cert_reqs, - ciphers=ciphers) - - if ca_certs or ca_cert_dir: - try: - context.load_verify_locations(ca_certs, ca_cert_dir) - except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 - raise SSLError(e) - # Py33 raises FileNotFoundError which subclasses OSError - # These are not equivalent unless we check the errno attribute - except OSError as e: # Platform-specific: Python 3.3 and beyond - if e.errno == errno.ENOENT: - raise SSLError(e) - raise - - if certfile: - context.load_cert_chain(certfile, keyfile) - if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI - return context.wrap_socket(sock, server_hostname=server_hostname) - return context.wrap_socket(sock) diff --git a/lib/requests/packages/urllib3/util/url.py b/lib/requests/packages/urllib3/util/url.py deleted file mode 100755 index e58050cd..00000000 --- a/lib/requests/packages/urllib3/util/url.py +++ /dev/null @@ -1,214 +0,0 @@ -from collections import namedtuple - -from ..exceptions import LocationParseError - - -url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] - - -class Url(namedtuple('Url', url_attrs)): - """ - Datastructure for representing an HTTP URL. Used as a return value for - :func:`parse_url`. - """ - slots = () - - def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, - query=None, fragment=None): - if path and not path.startswith('/'): - path = '/' + path - return super(Url, cls).__new__(cls, scheme, auth, host, port, path, - query, fragment) - - @property - def hostname(self): - """For backwards-compatibility with urlparse. We're nice like that.""" - return self.host - - @property - def request_uri(self): - """Absolute path including the query string.""" - uri = self.path or '/' - - if self.query is not None: - uri += '?' + self.query - - return uri - - @property - def netloc(self): - """Network location including host and port""" - if self.port: - return '%s:%d' % (self.host, self.port) - return self.host - - @property - def url(self): - """ - Convert self into a url - - This function should more or less round-trip with :func:`.parse_url`. The - returned url may not be exactly the same as the url inputted to - :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls - with a blank port will have : removed). - - Example: :: - - >>> U = parse_url('http://google.com/mail/') - >>> U.url - 'http://google.com/mail/' - >>> Url('http', 'username:password', 'host.com', 80, - ... '/path', 'query', 'fragment').url - 'http://username:password@host.com:80/path?query#fragment' - """ - scheme, auth, host, port, path, query, fragment = self - url = '' - - # We use "is not None" we want things to happen with empty strings (or 0 port) - if scheme is not None: - url += scheme + '://' - if auth is not None: - url += auth + '@' - if host is not None: - url += host - if port is not None: - url += ':' + str(port) - if path is not None: - url += path - if query is not None: - url += '?' + query - if fragment is not None: - url += '#' + fragment - - return url - - def __str__(self): - return self.url - -def split_first(s, delims): - """ - Given a string and an iterable of delimiters, split on the first found - delimiter. Return two split parts and the matched delimiter. - - If not found, then the first part is the full input string. - - Example:: - - >>> split_first('foo/bar?baz', '?/=') - ('foo', 'bar?baz', '/') - >>> split_first('foo/bar?baz', '123') - ('foo/bar?baz', '', None) - - Scales linearly with number of delims. Not ideal for large number of delims. - """ - min_idx = None - min_delim = None - for d in delims: - idx = s.find(d) - if idx < 0: - continue - - if min_idx is None or idx < min_idx: - min_idx = idx - min_delim = d - - if min_idx is None or min_idx < 0: - return s, '', None - - return s[:min_idx], s[min_idx+1:], min_delim - - -def parse_url(url): - """ - Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is - performed to parse incomplete urls. Fields not provided will be None. - - Partly backwards-compatible with :mod:`urlparse`. - - Example:: - - >>> parse_url('http://google.com/mail/') - Url(scheme='http', host='google.com', port=None, path='/mail/', ...) - >>> parse_url('google.com:80') - Url(scheme=None, host='google.com', port=80, path=None, ...) - >>> parse_url('/foo?bar') - Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) - """ - - # While this code has overlap with stdlib's urlparse, it is much - # simplified for our needs and less annoying. - # Additionally, this implementations does silly things to be optimal - # on CPython. - - if not url: - # Empty - return Url() - - scheme = None - auth = None - host = None - port = None - path = None - fragment = None - query = None - - # Scheme - if '://' in url: - scheme, url = url.split('://', 1) - - # Find the earliest Authority Terminator - # (http://tools.ietf.org/html/rfc3986#section-3.2) - url, path_, delim = split_first(url, ['/', '?', '#']) - - if delim: - # Reassemble the path - path = delim + path_ - - # Auth - if '@' in url: - # Last '@' denotes end of auth part - auth, url = url.rsplit('@', 1) - - # IPv6 - if url and url[0] == '[': - host, url = url.split(']', 1) - host += ']' - - # Port - if ':' in url: - _host, port = url.split(':', 1) - - if not host: - host = _host - - if port: - # If given, ports must be integers. - if not port.isdigit(): - raise LocationParseError(url) - port = int(port) - else: - # Blank ports are cool, too. (rfc3986#section-3.2.3) - port = None - - elif not host and url: - host = url - - if not path: - return Url(scheme, auth, host, port, path, query, fragment) - - # Fragment - if '#' in path: - path, fragment = path.split('#', 1) - - # Query - if '?' in path: - path, query = path.split('?', 1) - - return Url(scheme, auth, host, port, path, query, fragment) - -def get_host(url): - """ - Deprecated. Use :func:`.parse_url` instead. - """ - p = parse_url(url) - return p.scheme or 'http', p.hostname, p.port diff --git a/lib/requests/sessions.py b/lib/requests/sessions.py old mode 100755 new mode 100644 index 9c0dd73d..3f59cab9 --- a/lib/requests/sessions.py +++ b/lib/requests/sessions.py @@ -1,34 +1,35 @@ # -*- coding: utf-8 -*- """ -requests.session -~~~~~~~~~~~~~~~~ +requests.sessions +~~~~~~~~~~~~~~~~~ This module provides a Session object to manage and persist settings across requests (cookies, auth, proxies). - """ import os -from collections import Mapping -from datetime import datetime +import sys +import time +from datetime import timedelta +from collections import OrderedDict from .auth import _basic_auth_str -from .compat import cookielib, OrderedDict, urljoin, urlparse +from .compat import cookielib, is_py3, urljoin, urlparse, Mapping from .cookies import ( cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies) from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT from .hooks import default_hooks, dispatch_hook -from .utils import to_key_val_list, default_headers, to_native_string +from ._internal_utils import to_native_string +from .utils import to_key_val_list, default_headers, DEFAULT_PORTS from .exceptions import ( TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError) -from .packages.urllib3._collections import RecentlyUsedContainer -from .structures import CaseInsensitiveDict +from .structures import CaseInsensitiveDict from .adapters import HTTPAdapter from .utils import ( requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies, - get_auth_from_url + get_auth_from_url, rewind_body, resolve_proxies ) from .status_codes import codes @@ -36,13 +37,19 @@ from .status_codes import codes # formerly defined here, reexposed here for backward compatibility from .models import REDIRECT_STATI -REDIRECT_CACHE_SIZE = 1000 +# Preferred clock, based on which one is more accurate on a given system. +if sys.platform == 'win32': + try: # Python 3.4+ + preferred_clock = time.perf_counter + except AttributeError: # Earlier than Python 3. + preferred_clock = time.clock +else: + preferred_clock = time.time def merge_setting(request_setting, session_setting, dict_class=OrderedDict): - """ - Determines appropriate setting for a given request, taking into account the - explicit setting on that request, and the setting in the session. If a + """Determines appropriate setting for a given request, taking into account + the explicit setting on that request, and the setting in the session. If a setting is a dictionary, they will be merged together using `dict_class` """ @@ -72,8 +79,7 @@ def merge_setting(request_setting, session_setting, dict_class=OrderedDict): def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict): - """ - Properly merges both requests and session hooks. + """Properly merges both requests and session hooks. This is necessary because when request_hooks == {'response': []}, the merge breaks Session hooks entirely. @@ -88,43 +94,91 @@ def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict): class SessionRedirectMixin(object): + + def get_redirect_target(self, resp): + """Receives a Response. Returns a redirect URI or ``None``""" + # Due to the nature of how requests processes redirects this method will + # be called at least once upon the original response and at least twice + # on each subsequent redirect response (if any). + # If a custom mixin is used to handle this logic, it may be advantageous + # to cache the redirect location onto the response object as a private + # attribute. + if resp.is_redirect: + location = resp.headers['location'] + # Currently the underlying http module on py3 decode headers + # in latin1, but empirical evidence suggests that latin1 is very + # rarely used with non-ASCII characters in HTTP headers. + # It is more likely to get UTF8 header rather than latin1. + # This causes incorrect handling of UTF8 encoded location headers. + # To solve this, we re-encode the location in latin1. + if is_py3: + location = location.encode('latin1') + return to_native_string(location, 'utf8') + return None + + def should_strip_auth(self, old_url, new_url): + """Decide whether Authorization header should be removed when redirecting""" + old_parsed = urlparse(old_url) + new_parsed = urlparse(new_url) + if old_parsed.hostname != new_parsed.hostname: + return True + # Special case: allow http -> https redirect when using the standard + # ports. This isn't specified by RFC 7235, but is kept to avoid + # breaking backwards compatibility with older versions of requests + # that allowed any redirects on the same host. + if (old_parsed.scheme == 'http' and old_parsed.port in (80, None) + and new_parsed.scheme == 'https' and new_parsed.port in (443, None)): + return False + + # Handle default port usage corresponding to scheme. + changed_port = old_parsed.port != new_parsed.port + changed_scheme = old_parsed.scheme != new_parsed.scheme + default_port = (DEFAULT_PORTS.get(old_parsed.scheme, None), None) + if (not changed_scheme and old_parsed.port in default_port + and new_parsed.port in default_port): + return False + + # Standard case: root URI must match + return changed_port or changed_scheme + def resolve_redirects(self, resp, req, stream=False, timeout=None, - verify=True, cert=None, proxies=None, **adapter_kwargs): - """Receives a Response. Returns a generator of Responses.""" + verify=True, cert=None, proxies=None, yield_requests=False, **adapter_kwargs): + """Receives a Response. Returns a generator of Responses or Requests.""" - i = 0 - hist = [] # keep track of history + hist = [] # keep track of history - while resp.is_redirect: + url = self.get_redirect_target(resp) + previous_fragment = urlparse(req.url).fragment + while url: prepared_request = req.copy() - if i > 0: - # Update history and keep track of redirects. - hist.append(resp) - new_hist = list(hist) - resp.history = new_hist + # Update history and keep track of redirects. + # resp.history must ignore the original request in this loop + hist.append(resp) + resp.history = hist[1:] try: resp.content # Consume socket so it can be released except (ChunkedEncodingError, ContentDecodingError, RuntimeError): resp.raw.read(decode_content=False) - if i >= self.max_redirects: - raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects) + if len(resp.history) >= self.max_redirects: + raise TooManyRedirects('Exceeded {} redirects.'.format(self.max_redirects), response=resp) # Release the connection back into the pool. resp.close() - url = resp.headers['location'] - method = req.method - # Handle redirection without scheme (see: RFC 1808 Section 4) if url.startswith('//'): parsed_rurl = urlparse(resp.url) - url = '%s:%s' % (parsed_rurl.scheme, url) + url = ':'.join([to_native_string(parsed_rurl.scheme), url]) - # The scheme should be lower case... + # Normalize url case and attach previous fragment if needed (RFC 7231 7.1.2) parsed = urlparse(url) + if parsed.fragment == '' and previous_fragment: + parsed = parsed._replace(fragment=previous_fragment) + elif parsed.fragment: + previous_fragment = parsed.fragment url = parsed.geturl() # Facilitate relative 'location' headers, as allowed by RFC 7231. @@ -136,98 +190,87 @@ class SessionRedirectMixin(object): url = requote_uri(url) prepared_request.url = to_native_string(url) - # Cache the url, unless it redirects to itself. - if resp.is_permanent_redirect and req.url != prepared_request.url: - self.redirect_cache[req.url] = prepared_request.url - # http://tools.ietf.org/html/rfc7231#section-6.4.4 - if (resp.status_code == codes.see_other and - method != 'HEAD'): - method = 'GET' + self.rebuild_method(prepared_request, resp) - # Do what the browsers do, despite standards... - # First, turn 302s into GETs. - if resp.status_code == codes.found and method != 'HEAD': - method = 'GET' - - # Second, if a POST is responded to with a 301, turn it into a GET. - # This bizarre behaviour is explained in Issue 1704. - if resp.status_code == codes.moved and method == 'POST': - method = 'GET' - - prepared_request.method = method - - # https://github.com/kennethreitz/requests/issues/1084 + # https://github.com/psf/requests/issues/1084 if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): - if 'Content-Length' in prepared_request.headers: - del prepared_request.headers['Content-Length'] - + # https://github.com/psf/requests/issues/3490 + purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding') + for header in purged_headers: + prepared_request.headers.pop(header, None) prepared_request.body = None headers = prepared_request.headers - try: - del headers['Cookie'] - except KeyError: - pass + headers.pop('Cookie', None) # Extract any cookies sent on the response to the cookiejar # in the new request. Because we've mutated our copied prepared # request, use the old one that we haven't yet touched. extract_cookies_to_jar(prepared_request._cookies, req, resp.raw) - prepared_request._cookies.update(self.cookies) + merge_cookies(prepared_request._cookies, self.cookies) prepared_request.prepare_cookies(prepared_request._cookies) # Rebuild auth and proxy information. proxies = self.rebuild_proxies(prepared_request, proxies) self.rebuild_auth(prepared_request, resp) + # A failed tell() sets `_body_position` to `object()`. This non-None + # value ensures `rewindable` will be True, allowing us to raise an + # UnrewindableBodyError, instead of hanging the connection. + rewindable = ( + prepared_request._body_position is not None and + ('Content-Length' in headers or 'Transfer-Encoding' in headers) + ) + + # Attempt to rewind consumed file-like object. + if rewindable: + rewind_body(prepared_request) + # Override the original request. req = prepared_request - resp = self.send( - req, - stream=stream, - timeout=timeout, - verify=verify, - cert=cert, - proxies=proxies, - allow_redirects=False, - **adapter_kwargs - ) + if yield_requests: + yield req + else: - extract_cookies_to_jar(self.cookies, prepared_request, resp.raw) + resp = self.send( + req, + stream=stream, + timeout=timeout, + verify=verify, + cert=cert, + proxies=proxies, + allow_redirects=False, + **adapter_kwargs + ) - i += 1 - yield resp + extract_cookies_to_jar(self.cookies, prepared_request, resp.raw) + + # extract redirect url, if any, for the next loop + url = self.get_redirect_target(resp) + yield resp def rebuild_auth(self, prepared_request, response): - """ - When being redirected we may want to strip authentication from the + """When being redirected we may want to strip authentication from the request to avoid leaking credentials. This method intelligently removes and reapplies authentication where possible to avoid credential loss. """ headers = prepared_request.headers url = prepared_request.url - if 'Authorization' in headers: + if 'Authorization' in headers and self.should_strip_auth(response.request.url, url): # If we get redirected to a new host, we should strip out any - # authentication headers. - original_parsed = urlparse(response.request.url) - redirect_parsed = urlparse(url) - - if (original_parsed.hostname != redirect_parsed.hostname): - del headers['Authorization'] + # authentication headers. + del headers['Authorization'] # .netrc might have more auth for us on our new host. new_auth = get_netrc_auth(url) if self.trust_env else None if new_auth is not None: prepared_request.prepare_auth(new_auth) - return - def rebuild_proxies(self, prepared_request, proxies): - """ - This method re-evaluates the proxy configuration by considering the + """This method re-evaluates the proxy configuration by considering the environment variables. If we are redirected to a URL covered by NO_PROXY, we strip the proxy configuration. Otherwise, we set missing proxy keys for this URL (in case they were stripped by a previous @@ -235,19 +278,12 @@ class SessionRedirectMixin(object): This method also replaces the Proxy-Authorization header where necessary. + + :rtype: dict """ headers = prepared_request.headers - url = prepared_request.url - scheme = urlparse(url).scheme - new_proxies = proxies.copy() if proxies is not None else {} - - if self.trust_env and not should_bypass_proxies(url): - environ_proxies = get_environ_proxies(url) - - proxy = environ_proxies.get(scheme) - - if proxy: - new_proxies.setdefault(scheme, environ_proxies[scheme]) + scheme = urlparse(prepared_request.url).scheme + new_proxies = resolve_proxies(prepared_request, proxies, self.trust_env) if 'Proxy-Authorization' in headers: del headers['Proxy-Authorization'] @@ -262,6 +298,28 @@ class SessionRedirectMixin(object): return new_proxies + def rebuild_method(self, prepared_request, response): + """When being redirected we may want to change the method of the request + based on certain specs or browser behavior. + """ + method = prepared_request.method + + # https://tools.ietf.org/html/rfc7231#section-6.4.4 + if response.status_code == codes.see_other and method != 'HEAD': + method = 'GET' + + # Do what the browsers do, despite standards... + # First, turn 302s into GETs. + if response.status_code == codes.found and method != 'HEAD': + method = 'GET' + + # Second, if a POST is responded to with a 301, turn it into a GET. + # This bizarre behaviour is explained in Issue 1704. + if response.status_code == codes.moved and method == 'POST': + method = 'GET' + + prepared_request.method = method + class Session(SessionRedirectMixin): """A Requests session. @@ -272,19 +330,19 @@ class Session(SessionRedirectMixin): >>> import requests >>> s = requests.Session() - >>> s.get('http://httpbin.org/get') - 200 + >>> s.get('https://httpbin.org/get') + Or as a context manager:: >>> with requests.Session() as s: - >>> s.get('http://httpbin.org/get') - 200 + ... s.get('https://httpbin.org/get') + """ __attrs__ = [ 'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'prefetch', 'adapters', 'stream', 'trust_env', + 'cert', 'adapters', 'stream', 'trust_env', 'max_redirects', ] @@ -316,16 +374,27 @@ class Session(SessionRedirectMixin): self.stream = False #: SSL Verification default. + #: Defaults to `True`, requiring requests to verify the TLS certificate at the + #: remote end. + #: If verify is set to `False`, requests will accept any TLS certificate + #: presented by the server, and will ignore hostname mismatches and/or + #: expired certificates, which will make your application vulnerable to + #: man-in-the-middle (MitM) attacks. + #: Only set this to `False` for testing. self.verify = True - #: SSL certificate default. + #: SSL client certificate default, if String, path to ssl client + #: cert file (.pem). If Tuple, ('cert', 'key') pair. self.cert = None #: Maximum number of redirects allowed. If the request exceeds this #: limit, a :class:`TooManyRedirects` exception is raised. + #: This defaults to requests.models.DEFAULT_REDIRECT_LIMIT, which is + #: 30. self.max_redirects = DEFAULT_REDIRECT_LIMIT - #: Should we trust the environment? + #: Trust environment settings for proxy configuration, default + #: authentication and similar. self.trust_env = True #: A CookieJar containing all currently outstanding cookies set on this @@ -339,9 +408,6 @@ class Session(SessionRedirectMixin): self.mount('https://', HTTPAdapter()) self.mount('http://', HTTPAdapter()) - # Only store 1000 redirects to prevent using infinite memory - self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE) - def __enter__(self): return self @@ -356,6 +422,7 @@ class Session(SessionRedirectMixin): :param request: :class:`Request` instance to prepare with this session's settings. + :rtype: requests.PreparedRequest """ cookies = request.cookies or {} @@ -367,7 +434,6 @@ class Session(SessionRedirectMixin): merged_cookies = merge_cookies( merge_cookies(RequestsCookieJar(), self.cookies), cookies) - # Set environment's basic authentication if not explicitly set. auth = request.auth if self.trust_env and not auth and not self.auth: @@ -389,20 +455,9 @@ class Session(SessionRedirectMixin): return p def request(self, method, url, - params=None, - data=None, - headers=None, - cookies=None, - files=None, - auth=None, - timeout=None, - allow_redirects=True, - proxies=None, - hooks=None, - stream=None, - verify=None, - cert=None, - json=None): + params=None, data=None, headers=None, cookies=None, files=None, + auth=None, timeout=None, allow_redirects=True, proxies=None, + hooks=None, stream=None, verify=None, cert=None, json=None): """Constructs a :class:`Request `, prepares it and sends it. Returns :class:`Response ` object. @@ -410,8 +465,8 @@ class Session(SessionRedirectMixin): :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. - :param data: (optional) Dictionary, bytes, or file-like object to send - in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param json: (optional) json to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the @@ -432,26 +487,30 @@ class Session(SessionRedirectMixin): hostname to the URL of the proxy. :param stream: (optional) whether to immediately download the response content. Defaults to ``False``. - :param verify: (optional) if ``True``, the SSL cert will be verified. - A CA_BUNDLE path can also be provided. + :param verify: (optional) Either a boolean, in which case it controls whether we verify + the server's TLS certificate, or a string, in which case it must be a path + to a CA bundle to use. Defaults to ``True``. When set to + ``False``, requests will accept any TLS certificate presented by + the server, and will ignore hostname mismatches and/or expired + certificates, which will make your application vulnerable to + man-in-the-middle (MitM) attacks. Setting verify to ``False`` + may be useful during local development or testing. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. + :rtype: requests.Response """ - - method = to_native_string(method) - # Create the Request. req = Request( - method = method.upper(), - url = url, - headers = headers, - files = files, - data = data or {}, - json = json, - params = params or {}, - auth = auth, - cookies = cookies, - hooks = hooks, + method=method.upper(), + url=url, + headers=headers, + files=files, + data=data or {}, + json=json, + params=params or {}, + auth=auth, + cookies=cookies, + hooks=hooks, ) prep = self.prepare_request(req) @@ -472,97 +531,105 @@ class Session(SessionRedirectMixin): return resp def get(self, url, **kwargs): - """Sends a GET request. Returns :class:`Response` object. + r"""Sends a GET request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) return self.request('GET', url, **kwargs) def options(self, url, **kwargs): - """Sends a OPTIONS request. Returns :class:`Response` object. + r"""Sends a OPTIONS request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', True) return self.request('OPTIONS', url, **kwargs) def head(self, url, **kwargs): - """Sends a HEAD request. Returns :class:`Response` object. + r"""Sends a HEAD request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response """ kwargs.setdefault('allow_redirects', False) return self.request('HEAD', url, **kwargs) def post(self, url, data=None, json=None, **kwargs): - """Sends a POST request. Returns :class:`Response` object. + r"""Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param json: (optional) json to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response """ return self.request('POST', url, data=data, json=json, **kwargs) def put(self, url, data=None, **kwargs): - """Sends a PUT request. Returns :class:`Response` object. + r"""Sends a PUT request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response """ return self.request('PUT', url, data=data, **kwargs) def patch(self, url, data=None, **kwargs): - """Sends a PATCH request. Returns :class:`Response` object. + r"""Sends a PATCH request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. - :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. :param \*\*kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response """ - return self.request('PATCH', url, data=data, **kwargs) + return self.request('PATCH', url, data=data, **kwargs) def delete(self, url, **kwargs): - """Sends a DELETE request. Returns :class:`Response` object. + r"""Sends a DELETE request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. :param \*\*kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response """ return self.request('DELETE', url, **kwargs) def send(self, request, **kwargs): - """Send a given PreparedRequest.""" + """Send a given PreparedRequest. + + :rtype: requests.Response + """ # Set defaults that the hooks can utilize to ensure they always have # the correct parameters to reproduce the previous request. kwargs.setdefault('stream', self.stream) kwargs.setdefault('verify', self.verify) kwargs.setdefault('cert', self.cert) - kwargs.setdefault('proxies', self.proxies) + if 'proxies' not in kwargs: + kwargs['proxies'] = resolve_proxies( + request, self.proxies, self.trust_env + ) # It's possible that users might accidentally send a Request object. # Guard against that specific failure case. - if not isinstance(request, PreparedRequest): + if isinstance(request, Request): raise ValueError('You can only send PreparedRequests.') - checked_urls = set() - while request.url in self.redirect_cache: - checked_urls.add(request.url) - new_url = self.redirect_cache.get(request.url) - if new_url in checked_urls: - break - request.url = new_url - # Set up variables needed for resolve_redirects and dispatching of hooks allow_redirects = kwargs.pop('allow_redirects', True) stream = kwargs.get('stream') @@ -572,13 +639,14 @@ class Session(SessionRedirectMixin): adapter = self.get_adapter(url=request.url) # Start time (approximately) of the request - start = datetime.utcnow() + start = preferred_clock() # Send the request r = adapter.send(request, **kwargs) # Total elapsed time of the request (approximately) - r.elapsed = datetime.utcnow() - start + elapsed = preferred_clock() - start + r.elapsed = timedelta(seconds=elapsed) # Response manipulation hooks r = dispatch_hook('response', hooks, r, **kwargs) @@ -592,11 +660,13 @@ class Session(SessionRedirectMixin): extract_cookies_to_jar(self.cookies, request, r.raw) - # Redirect resolving generator. - gen = self.resolve_redirects(r, request, **kwargs) - # Resolve redirects if allowed. - history = [resp for resp in gen] if allow_redirects else [] + if allow_redirects: + # Redirect resolving generator. + gen = self.resolve_redirects(r, request, **kwargs) + history = [resp for resp in gen] + else: + history = [] # Shuffle things around if there's history. if history: @@ -606,17 +676,29 @@ class Session(SessionRedirectMixin): r = history.pop() r.history = history + # If redirects aren't being followed, store the response on the Request for Response.next(). + if not allow_redirects: + try: + r._next = next(self.resolve_redirects(r, request, yield_requests=True, **kwargs)) + except StopIteration: + pass + if not stream: r.content return r def merge_environment_settings(self, url, proxies, stream, verify, cert): - """Check the environment and merge it with some settings.""" + """ + Check the environment and merge it with some settings. + + :rtype: dict + """ # Gather clues from the surrounding environment. if self.trust_env: # Set environment's proxies. - env_proxies = get_environ_proxies(url) or {} + no_proxy = proxies.get('no_proxy') if proxies is not None else None + env_proxies = get_environ_proxies(url, no_proxy=no_proxy) for (k, v) in env_proxies.items(): proxies.setdefault(k, v) @@ -636,14 +718,18 @@ class Session(SessionRedirectMixin): 'cert': cert} def get_adapter(self, url): - """Returns the appropriate connnection adapter for the given URL.""" + """ + Returns the appropriate connection adapter for the given URL. + + :rtype: requests.adapters.BaseAdapter + """ for (prefix, adapter) in self.adapters.items(): - if url.lower().startswith(prefix): + if url.lower().startswith(prefix.lower()): return adapter # Nothing matches :-/ - raise InvalidSchema("No connection adapters were found for '%s'" % url) + raise InvalidSchema("No connection adapters were found for {!r}".format(url)) def close(self): """Closes all adapters and as such the session""" @@ -653,8 +739,8 @@ class Session(SessionRedirectMixin): def mount(self, prefix, adapter): """Registers a connection adapter to a prefix. - Adapters are sorted in descending order by key length.""" - + Adapters are sorted in descending order by prefix length. + """ self.adapters[prefix] = adapter keys_to_move = [k for k in self.adapters if len(k) < len(prefix)] @@ -662,21 +748,24 @@ class Session(SessionRedirectMixin): self.adapters[key] = self.adapters.pop(key) def __getstate__(self): - state = dict((attr, getattr(self, attr, None)) for attr in self.__attrs__) - state['redirect_cache'] = dict(self.redirect_cache) + state = {attr: getattr(self, attr, None) for attr in self.__attrs__} return state def __setstate__(self, state): - redirect_cache = state.pop('redirect_cache', {}) for attr, value in state.items(): setattr(self, attr, value) - self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE) - for redirect, to in redirect_cache.items(): - self.redirect_cache[redirect] = to - def session(): - """Returns a :class:`Session` for context-management.""" + """ + Returns a :class:`Session` for context-management. + .. deprecated:: 1.0.0 + + This method has been deprecated since version 1.0.0 and is only kept for + backwards compatibility. New code should use :class:`~requests.sessions.Session` + to create a session. This may be removed at a future date. + + :rtype: Session + """ return Session() diff --git a/lib/requests/status_codes.py b/lib/requests/status_codes.py old mode 100755 new mode 100644 index e0887f21..d80a7cd4 --- a/lib/requests/status_codes.py +++ b/lib/requests/status_codes.py @@ -1,5 +1,25 @@ # -*- coding: utf-8 -*- +r""" +The ``codes`` object defines a mapping from common names for HTTP statuses +to their numerical codes, accessible either as attributes or as dictionary +items. + +Example:: + + >>> import requests + >>> requests.codes['temporary_redirect'] + 307 + >>> requests.codes.teapot + 418 + >>> requests.codes['\o/'] + 200 + +Some codes have multiple names, and both upper- and lower-case versions of +the names are allowed. For example, ``codes.ok``, ``codes.OK``, and +``codes.okay`` all correspond to the HTTP status code 200. +""" + from .structures import LookupDict _codes = { @@ -31,7 +51,7 @@ _codes = { 306: ('switch_proxy',), 307: ('temporary_redirect', 'temporary_moved', 'temporary'), 308: ('permanent_redirect', - 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 + 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 # Client Error. 400: ('bad_request', 'bad'), @@ -53,6 +73,7 @@ _codes = { 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), 417: ('expectation_failed',), 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), + 421: ('misdirected_request',), 422: ('unprocessable_entity', 'unprocessable'), 423: ('locked',), 424: ('failed_dependency', 'dependency'), @@ -78,12 +99,25 @@ _codes = { 507: ('insufficient_storage',), 509: ('bandwidth_limit_exceeded', 'bandwidth'), 510: ('not_extended',), + 511: ('network_authentication_required', 'network_auth', 'network_authentication'), } codes = LookupDict(name='status_codes') -for (code, titles) in list(_codes.items()): - for title in titles: - setattr(codes, title, code) - if not title.startswith('\\'): - setattr(codes, title.upper(), code) +def _init(): + for code, titles in _codes.items(): + for title in titles: + setattr(codes, title, code) + if not title.startswith(('\\', '/')): + setattr(codes, title.upper(), code) + + def doc(code): + names = ', '.join('``%s``' % n for n in _codes[code]) + return '* %d: %s' % (code, names) + + global __doc__ + __doc__ = (__doc__ + '\n' + + '\n'.join(doc(code) for code in sorted(_codes)) + if __doc__ is not None else None) + +_init() diff --git a/lib/requests/structures.py b/lib/requests/structures.py old mode 100755 new mode 100644 index 3e5f2faa..8ee0ba7a --- a/lib/requests/structures.py +++ b/lib/requests/structures.py @@ -5,18 +5,18 @@ requests.structures ~~~~~~~~~~~~~~~~~~~ Data structures that power Requests. - """ -import collections +from collections import OrderedDict + +from .compat import Mapping, MutableMapping -class CaseInsensitiveDict(collections.MutableMapping): - """ - A case-insensitive ``dict``-like object. +class CaseInsensitiveDict(MutableMapping): + """A case-insensitive ``dict``-like object. Implements all methods and operations of - ``collections.MutableMapping`` as well as dict's ``copy``. Also + ``MutableMapping`` as well as dict's ``copy``. Also provides ``lower_items``. All keys are expected to be strings. The structure remembers the @@ -37,10 +37,10 @@ class CaseInsensitiveDict(collections.MutableMapping): If the constructor, ``.update``, or equality comparison operations are given keys that have equal ``.lower()``s, the behavior is undefined. - """ + def __init__(self, data=None, **kwargs): - self._store = dict() + self._store = OrderedDict() if data is None: data = {} self.update(data, **kwargs) @@ -71,7 +71,7 @@ class CaseInsensitiveDict(collections.MutableMapping): ) def __eq__(self, other): - if isinstance(other, collections.Mapping): + if isinstance(other, Mapping): other = CaseInsensitiveDict(other) else: return NotImplemented @@ -85,6 +85,7 @@ class CaseInsensitiveDict(collections.MutableMapping): def __repr__(self): return str(dict(self.items())) + class LookupDict(dict): """Dictionary lookup object.""" diff --git a/lib/requests/utils.py b/lib/requests/utils.py old mode 100755 new mode 100644 index 3d4c7945..153776c7 --- a/lib/requests/utils.py +++ b/lib/requests/utils.py @@ -6,37 +6,102 @@ requests.utils This module provides utility functions that are used within Requests that are also useful for external consumption. - """ -import cgi import codecs -import collections +import contextlib import io import os -import platform import re -import sys import socket import struct +import sys +import tempfile import warnings +import zipfile +from collections import OrderedDict +from urllib3.util import make_headers +from urllib3.util import parse_url -from . import __version__ +from .__version__ import __version__ from . import certs +# to_native_string is unused here, but imported here for backwards compatibility +from ._internal_utils import to_native_string from .compat import parse_http_list as _parse_list_header -from .compat import (quote, urlparse, bytes, str, OrderedDict, unquote, is_py2, - builtin_str, getproxies, proxy_bypass, urlunparse, - basestring) -from .cookies import RequestsCookieJar, cookiejar_from_dict +from .compat import ( + quote, urlparse, bytes, str, unquote, getproxies, + proxy_bypass, urlunparse, basestring, integer_types, is_py3, + proxy_bypass_environment, getproxies_environment, Mapping) +from .cookies import cookiejar_from_dict from .structures import CaseInsensitiveDict -from .exceptions import InvalidURL - -_hush_pyflakes = (RequestsCookieJar,) +from .exceptions import ( + InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError) NETRC_FILES = ('.netrc', '_netrc') DEFAULT_CA_BUNDLE_PATH = certs.where() +DEFAULT_PORTS = {'http': 80, 'https': 443} + +# Ensure that ', ' is used to preserve previous delimiter behavior. +DEFAULT_ACCEPT_ENCODING = ", ".join( + re.split(r",\s*", make_headers(accept_encoding=True)["accept-encoding"]) +) + + +if sys.platform == 'win32': + # provide a proxy_bypass version on Windows without DNS lookups + + def proxy_bypass_registry(host): + try: + if is_py3: + import winreg + else: + import _winreg as winreg + except ImportError: + return False + + try: + internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, + r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') + # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it + proxyEnable = int(winreg.QueryValueEx(internetSettings, + 'ProxyEnable')[0]) + # ProxyOverride is almost always a string + proxyOverride = winreg.QueryValueEx(internetSettings, + 'ProxyOverride')[0] + except OSError: + return False + if not proxyEnable or not proxyOverride: + return False + + # make a check value list from the registry entry: replace the + # '' string by the localhost entry and the corresponding + # canonical entry. + proxyOverride = proxyOverride.split(';') + # now check if we match one of the registry values. + for test in proxyOverride: + if test == '': + if '.' not in host: + return True + test = test.replace(".", r"\.") # mask dots + test = test.replace("*", r".*") # change glob sequence + test = test.replace("?", r".") # change glob char + if re.match(test, host, re.I): + return True + return False + + def proxy_bypass(host): # noqa + """Return True, if the host should be bypassed. + + Checks proxy settings gathered from the environment, if specified, + or the registry. + """ + if getproxies_environment(): + return proxy_bypass_environment(host) + else: + return proxy_bypass_registry(host) + def dict_to_sequence(d): """Returns an internal sequence dictionary update.""" @@ -48,40 +113,90 @@ def dict_to_sequence(d): def super_len(o): + total_length = None + current_position = 0 + if hasattr(o, '__len__'): - return len(o) + total_length = len(o) - if hasattr(o, 'len'): - return o.len + elif hasattr(o, 'len'): + total_length = o.len - if hasattr(o, 'fileno'): + elif hasattr(o, 'fileno'): try: fileno = o.fileno() - except io.UnsupportedOperation: + except (io.UnsupportedOperation, AttributeError): + # AttributeError is a surprising exception, seeing as how we've just checked + # that `hasattr(o, 'fileno')`. It happens for objects obtained via + # `Tarfile.extractfile()`, per issue 5229. pass else: - return os.fstat(fileno).st_size + total_length = os.fstat(fileno).st_size - if hasattr(o, 'getvalue'): - # e.g. BytesIO, cStringIO.StringIO - return len(o.getvalue()) + # Having used fstat to determine the file length, we need to + # confirm that this file was opened up in binary mode. + if 'b' not in o.mode: + warnings.warn(( + "Requests has determined the content-length for this " + "request using the binary size of the file: however, the " + "file has been opened in text mode (i.e. without the 'b' " + "flag in the mode). This may lead to an incorrect " + "content-length. In Requests 3.0, support will be removed " + "for files in text mode."), + FileModeWarning + ) + + if hasattr(o, 'tell'): + try: + current_position = o.tell() + except (OSError, IOError): + # This can happen in some weird situations, such as when the file + # is actually a special file descriptor like stdin. In this + # instance, we don't know what the length is, so set it to zero and + # let requests chunk it instead. + if total_length is not None: + current_position = total_length + else: + if hasattr(o, 'seek') and total_length is None: + # StringIO and BytesIO have seek but no usable fileno + try: + # seek to end of file + o.seek(0, 2) + total_length = o.tell() + + # seek back to current position to support + # partially read file-like objects + o.seek(current_position or 0) + except (OSError, IOError): + total_length = 0 + + if total_length is None: + total_length = 0 + + return max(0, total_length - current_position) def get_netrc_auth(url, raise_errors=False): """Returns the Requests tuple auth for a given url from netrc.""" + netrc_file = os.environ.get('NETRC') + if netrc_file is not None: + netrc_locations = (netrc_file,) + else: + netrc_locations = ('~/{}'.format(f) for f in NETRC_FILES) + try: from netrc import netrc, NetrcParseError netrc_path = None - for f in NETRC_FILES: + for f in netrc_locations: try: - loc = os.path.expanduser('~/{0}'.format(f)) + loc = os.path.expanduser(f) except KeyError: # os.path.expanduser can fail when $HOME is undefined and - # getpwuid fails. See http://bugs.python.org/issue20164 & - # https://github.com/kennethreitz/requests/issues/1846 + # getpwuid fails. See https://bugs.python.org/issue20164 & + # https://github.com/psf/requests/issues/1846 return if os.path.exists(loc): @@ -94,8 +209,12 @@ def get_netrc_auth(url, raise_errors=False): ri = urlparse(url) - # Strip port numbers from netloc - host = ri.netloc.split(':')[0] + # Strip port numbers from netloc. This weird `if...encode`` dance is + # used for Python 3.2, which doesn't support unicode literals. + splitstr = b':' + if isinstance(url, str): + splitstr = splitstr.decode('ascii') + host = ri.netloc.split(splitstr)[0] try: _netrc = netrc(netrc_path).authenticators(host) @@ -109,7 +228,7 @@ def get_netrc_auth(url, raise_errors=False): if raise_errors: raise - # AppEngine hackiness. + # App Engine hackiness. except (ImportError, AttributeError): pass @@ -122,6 +241,57 @@ def guess_filename(obj): return os.path.basename(name) +def extract_zipped_paths(path): + """Replace nonexistent paths that look like they refer to a member of a zip + archive with the location of an extracted copy of the target, or else + just return the provided path unchanged. + """ + if os.path.exists(path): + # this is already a valid path, no need to do anything further + return path + + # find the first valid part of the provided path and treat that as a zip archive + # assume the rest of the path is the name of a member in the archive + archive, member = os.path.split(path) + while archive and not os.path.exists(archive): + archive, prefix = os.path.split(archive) + if not prefix: + # If we don't check for an empty prefix after the split (in other words, archive remains unchanged after the split), + # we _can_ end up in an infinite loop on a rare corner case affecting a small number of users + break + member = '/'.join([prefix, member]) + + if not zipfile.is_zipfile(archive): + return path + + zip_file = zipfile.ZipFile(archive) + if member not in zip_file.namelist(): + return path + + # we have a valid zip archive and a valid member of that archive + tmp = tempfile.gettempdir() + extracted_path = os.path.join(tmp, member.split('/')[-1]) + if not os.path.exists(extracted_path): + # use read + write to avoid the creating nested folders, we only want the file, avoids mkdir racing condition + with atomic_open(extracted_path) as file_handler: + file_handler.write(zip_file.read(member)) + return extracted_path + + +@contextlib.contextmanager +def atomic_open(filename): + """Write a file to the disk in an atomic fashion""" + replacer = os.rename if sys.version_info[0] == 2 else os.replace + tmp_descriptor, tmp_name = tempfile.mkstemp(dir=os.path.dirname(filename)) + try: + with os.fdopen(tmp_descriptor, 'wb') as tmp_handler: + yield tmp_handler + replacer(tmp_name, filename) + except BaseException: + os.remove(tmp_name) + raise + + def from_key_val_list(value): """Take an object and test to see if it can be represented as a dictionary. Unless it can not be represented as such, return an @@ -132,9 +302,13 @@ def from_key_val_list(value): >>> from_key_val_list([('key', 'val')]) OrderedDict([('key', 'val')]) >>> from_key_val_list('string') - ValueError: need more than 1 value to unpack + Traceback (most recent call last): + ... + ValueError: cannot encode objects that are not 2-tuples >>> from_key_val_list({'key': 'val'}) OrderedDict([('key', 'val')]) + + :rtype: OrderedDict """ if value is None: return None @@ -156,7 +330,11 @@ def to_key_val_list(value): >>> to_key_val_list({'key': 'val'}) [('key', 'val')] >>> to_key_val_list('string') - ValueError: cannot encode objects that are not 2-tuples. + Traceback (most recent call last): + ... + ValueError: cannot encode objects that are not 2-tuples + + :rtype: list """ if value is None: return None @@ -164,7 +342,7 @@ def to_key_val_list(value): if isinstance(value, (str, bytes, bool, int)): raise ValueError('cannot encode objects that are not 2-tuples') - if isinstance(value, collections.Mapping): + if isinstance(value, Mapping): value = value.items() return list(value) @@ -192,6 +370,7 @@ def parse_list_header(value): :param value: a string with a list header. :return: :class:`list` + :rtype: list """ result = [] for item in _parse_list_header(value): @@ -222,6 +401,7 @@ def parse_dict_header(value): :param value: a string with a dict header. :return: :class:`dict` + :rtype: dict """ result = {} for item in _parse_list_header(value): @@ -242,6 +422,7 @@ def unquote_header_value(value, is_filename=False): using for quoting. :param value: the header value to unquote. + :rtype: str """ if value and value[0] == value[-1] == '"': # this is not the real unquoting, but fixing this so that the @@ -264,6 +445,7 @@ def dict_from_cookiejar(cj): """Returns a key/value dictionary from a CookieJar. :param cj: CookieJar object to extract cookies from. + :rtype: dict """ cookie_dict = {} @@ -279,11 +461,10 @@ def add_dict_to_cookiejar(cj, cookie_dict): :param cj: CookieJar to insert cookies into. :param cookie_dict: Dict of key/values to insert into CookieJar. + :rtype: CookieJar """ - cj2 = cookiejar_from_dict(cookie_dict) - cj.update(cj2) - return cj + return cookiejar_from_dict(cookie_dict, cj) def get_encodings_from_content(content): @@ -306,10 +487,36 @@ def get_encodings_from_content(content): xml_re.findall(content)) +def _parse_content_type_header(header): + """Returns content type and parameters from given header + + :param header: string + :return: tuple containing content type and dictionary of + parameters + """ + + tokens = header.split(';') + content_type, params = tokens[0].strip(), tokens[1:] + params_dict = {} + items_to_strip = "\"' " + + for param in params: + param = param.strip() + if param: + key, value = param, True + index_of_equals = param.find("=") + if index_of_equals != -1: + key = param[:index_of_equals].strip(items_to_strip) + value = param[index_of_equals + 1:].strip(items_to_strip) + params_dict[key.lower()] = value + return content_type, params_dict + + def get_encoding_from_headers(headers): """Returns encodings from given HTTP Header Dict. :param headers: dictionary to extract encoding from. + :rtype: str """ content_type = headers.get('content-type') @@ -317,7 +524,7 @@ def get_encoding_from_headers(headers): if not content_type: return None - content_type, params = cgi.parse_header(content_type) + content_type, params = _parse_content_type_header(content_type) if 'charset' in params: return params['charset'].strip("'\"") @@ -325,6 +532,10 @@ def get_encoding_from_headers(headers): if 'text' in content_type: return 'ISO-8859-1' + if 'application/json' in content_type: + # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset + return 'utf-8' + def stream_decode_response_unicode(iterator, r): """Stream decodes a iterator.""" @@ -347,6 +558,8 @@ def stream_decode_response_unicode(iterator, r): def iter_slices(string, slice_length): """Iterate over slices of a string.""" pos = 0 + if slice_length is None or slice_length <= 0: + slice_length = len(string) while pos < len(string): yield string[pos:pos + slice_length] pos += slice_length @@ -362,6 +575,7 @@ def get_unicode_from_response(r): 1. charset from content-type 2. fall back and replace all unicode characters + :rtype: str """ warnings.warn(( 'In requests 3.0, get_unicode_from_response will be removed. For ' @@ -389,13 +603,14 @@ def get_unicode_from_response(r): # The unreserved URI characters (RFC 3986) UNRESERVED_SET = frozenset( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - + "0123456789-._~") + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~") def unquote_unreserved(uri): """Un-escape any percent-escape sequences in a URI that are unreserved characters. This leaves all reserved, illegal and non-ASCII bytes encoded. + + :rtype: str """ parts = uri.split('%') for i in range(1, len(parts)): @@ -420,6 +635,8 @@ def requote_uri(uri): This function passes the given URI through an unquote/quote cycle to ensure that it is fully and consistently quoted. + + :rtype: str """ safe_with_percent = "!#$%&'()*+,/:;=?@[]~" safe_without_percent = "!#$&'()*+,/:;=?@[]~" @@ -436,10 +653,12 @@ def requote_uri(uri): def address_in_network(ip, net): - """ - This function allows you to check if on IP belongs to a network subnet + """This function allows you to check if an IP belongs to a network subnet + Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24 + + :rtype: bool """ ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0] netaddr, bits = net.split('/') @@ -449,15 +668,20 @@ def address_in_network(ip, net): def dotted_netmask(mask): - """ - Converts mask from /xx format to xxx.xxx.xxx.xxx + """Converts mask from /xx format to xxx.xxx.xxx.xxx + Example: if mask is 24 function returns 255.255.255.0 + + :rtype: str """ bits = 0xffffffff ^ (1 << 32 - mask) - 1 return socket.inet_ntoa(struct.pack('>I', bits)) def is_ipv4_address(string_ip): + """ + :rtype: bool + """ try: socket.inet_aton(string_ip) except socket.error: @@ -466,7 +690,11 @@ def is_ipv4_address(string_ip): def is_valid_cidr(string_network): - """Very simple check of the cidr format in no_proxy variable""" + """ + Very simple check of the cidr format in no_proxy variable. + + :rtype: bool + """ if string_network.count('/') == 1: try: mask = int(string_network.split('/')[1]) @@ -485,58 +713,101 @@ def is_valid_cidr(string_network): return True -def should_bypass_proxies(url): +@contextlib.contextmanager +def set_environ(env_name, value): + """Set the environment variable 'env_name' to 'value' + + Save previous value, yield, and then restore the previous value stored in + the environment variable 'env_name'. + + If 'value' is None, do nothing""" + value_changed = value is not None + if value_changed: + old_value = os.environ.get(env_name) + os.environ[env_name] = value + try: + yield + finally: + if value_changed: + if old_value is None: + del os.environ[env_name] + else: + os.environ[env_name] = old_value + + +def should_bypass_proxies(url, no_proxy): """ Returns whether we should bypass proxies or not. + + :rtype: bool """ + # Prioritize lowercase environment variables over uppercase + # to keep a consistent behaviour with other http projects (curl, wget). get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL # we're getting isn't in the no_proxy list. - no_proxy = get_proxy('no_proxy') - netloc = urlparse(url).netloc + no_proxy_arg = no_proxy + if no_proxy is None: + no_proxy = get_proxy('no_proxy') + parsed = urlparse(url) + + if parsed.hostname is None: + # URLs don't always have hostnames, e.g. file:/// urls. + return True if no_proxy: # We need to check whether we match here. We need to see if we match - # the end of the netloc, both with and without the port. - no_proxy = no_proxy.replace(' ', '').split(',') + # the end of the hostname, both with and without the port. + no_proxy = ( + host for host in no_proxy.replace(' ', '').split(',') if host + ) - ip = netloc.split(':')[0] - if is_ipv4_address(ip): + if is_ipv4_address(parsed.hostname): for proxy_ip in no_proxy: if is_valid_cidr(proxy_ip): - if address_in_network(ip, proxy_ip): + if address_in_network(parsed.hostname, proxy_ip): return True + elif parsed.hostname == proxy_ip: + # If no_proxy ip was defined in plain IP notation instead of cidr notation & + # matches the IP of the index + return True else: + host_with_port = parsed.hostname + if parsed.port: + host_with_port += ':{}'.format(parsed.port) + for host in no_proxy: - if netloc.endswith(host) or netloc.split(':')[0].endswith(host): + if parsed.hostname.endswith(host) or host_with_port.endswith(host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. return True - # If the system proxy settings indicate that this URL should be bypassed, - # don't proxy. - # The proxy_bypass function is incredibly buggy on OS X in early versions - # of Python 2.6, so allow this call to fail. Only catch the specific - # exceptions we've seen, though: this call failing in other ways can reveal - # legitimate problems. - try: - bypass = proxy_bypass(netloc) - except (TypeError, socket.gaierror): - bypass = False + with set_environ('no_proxy', no_proxy_arg): + # parsed.hostname can be `None` in cases such as a file URI. + try: + bypass = proxy_bypass(parsed.hostname) + except (TypeError, socket.gaierror): + bypass = False if bypass: return True return False -def get_environ_proxies(url): - """Return a dict of environment proxies.""" - if should_bypass_proxies(url): + +def get_environ_proxies(url, no_proxy=None): + """ + Return a dict of environment proxies. + + :rtype: dict + """ + if should_bypass_proxies(url, no_proxy=no_proxy): return {} else: return getproxies() + def select_proxy(url, proxies): """Select a proxy for the url, if applicable. @@ -545,75 +816,99 @@ def select_proxy(url, proxies): """ proxies = proxies or {} urlparts = urlparse(url) - proxy = proxies.get(urlparts.scheme+'://'+urlparts.hostname) - if proxy is None: - proxy = proxies.get(urlparts.scheme) + if urlparts.hostname is None: + return proxies.get(urlparts.scheme, proxies.get('all')) + + proxy_keys = [ + urlparts.scheme + '://' + urlparts.hostname, + urlparts.scheme, + 'all://' + urlparts.hostname, + 'all', + ] + proxy = None + for proxy_key in proxy_keys: + if proxy_key in proxies: + proxy = proxies[proxy_key] + break + return proxy + +def resolve_proxies(request, proxies, trust_env=True): + """This method takes proxy information from a request and configuration + input to resolve a mapping of target proxies. This will consider settings + such a NO_PROXY to strip proxy configurations. + + :param request: Request or PreparedRequest + :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs + :param trust_env: Boolean declaring whether to trust environment configs + + :rtype: dict + """ + proxies = proxies if proxies is not None else {} + url = request.url + scheme = urlparse(url).scheme + no_proxy = proxies.get('no_proxy') + new_proxies = proxies.copy() + + if trust_env and not should_bypass_proxies(url, no_proxy=no_proxy): + environ_proxies = get_environ_proxies(url, no_proxy=no_proxy) + + proxy = environ_proxies.get(scheme, environ_proxies.get('all')) + + if proxy: + new_proxies.setdefault(scheme, proxy) + return new_proxies + + def default_user_agent(name="python-requests"): - """Return a string representing the default user agent.""" - _implementation = platform.python_implementation() + """ + Return a string representing the default user agent. - if _implementation == 'CPython': - _implementation_version = platform.python_version() - elif _implementation == 'PyPy': - _implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, - sys.pypy_version_info.minor, - sys.pypy_version_info.micro) - if sys.pypy_version_info.releaselevel != 'final': - _implementation_version = ''.join([_implementation_version, sys.pypy_version_info.releaselevel]) - elif _implementation == 'Jython': - _implementation_version = platform.python_version() # Complete Guess - elif _implementation == 'IronPython': - _implementation_version = platform.python_version() # Complete Guess - else: - _implementation_version = 'Unknown' - - try: - p_system = platform.system() - p_release = platform.release() - except IOError: - p_system = 'Unknown' - p_release = 'Unknown' - - return " ".join(['%s/%s' % (name, __version__), - '%s/%s' % (_implementation, _implementation_version), - '%s/%s' % (p_system, p_release)]) + :rtype: str + """ + return '%s/%s' % (name, __version__) def default_headers(): + """ + :rtype: requests.structures.CaseInsensitiveDict + """ return CaseInsensitiveDict({ 'User-Agent': default_user_agent(), - 'Accept-Encoding': ', '.join(('gzip', 'deflate')), + 'Accept-Encoding': DEFAULT_ACCEPT_ENCODING, 'Accept': '*/*', 'Connection': 'keep-alive', }) def parse_header_links(value): - """Return a dict of parsed link headers proxies. + """Return a list of parsed link headers proxies. i.e. Link: ; rel=front; type="image/jpeg",; rel=back;type="image/jpeg" + :rtype: list """ links = [] - replace_chars = " '\"" + replace_chars = ' \'"' - for val in re.split(", *<", value): + value = value.strip(replace_chars) + if not value: + return links + + for val in re.split(', *<', value): try: - url, params = val.split(";", 1) + url, params = val.split(';', 1) except ValueError: url, params = val, '' - link = {} + link = {'url': url.strip('<> \'"')} - link["url"] = url.strip("<> '\"") - - for param in params.split(";"): + for param in params.split(';'): try: - key, value = param.split("=") + key, value = param.split('=') except ValueError: break @@ -631,11 +926,14 @@ _null3 = _null * 3 def guess_json_utf(data): + """ + :rtype: str + """ # JSON always starts with two ASCII characters, so detection is as # easy as counting the nulls and from their location and count # determine the encoding. Also detect a BOM, if present. sample = data[:4] - if sample in (codecs.BOM_UTF32_LE, codecs.BOM32_BE): + if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): return 'utf-32' # BOM included if sample[:3] == codecs.BOM_UTF8: return 'utf-8-sig' # BOM included, MS style (discouraged) @@ -660,22 +958,40 @@ def guess_json_utf(data): def prepend_scheme_if_needed(url, new_scheme): - '''Given a URL that may or may not have a scheme, prepend the given scheme. - Does not replace a present scheme with the one provided as an argument.''' - scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) + """Given a URL that may or may not have a scheme, prepend the given scheme. + Does not replace a present scheme with the one provided as an argument. - # urlparse is a finicky beast, and sometimes decides that there isn't a - # netloc present. Assume that it's being over-cautious, and switch netloc - # and path if urlparse decided there was no netloc. + :rtype: str + """ + parsed = parse_url(url) + scheme, auth, host, port, path, query, fragment = parsed + + # A defect in urlparse determines that there isn't a netloc present in some + # urls. We previously assumed parsing was overly cautious, and swapped the + # netloc and path. Due to a lack of tests on the original defect, this is + # maintained with parse_url for backwards compatibility. + netloc = parsed.netloc if not netloc: netloc, path = path, netloc - return urlunparse((scheme, netloc, path, params, query, fragment)) + if auth: + # parse_url doesn't provide the netloc with auth + # so we'll add it ourselves. + netloc = '@'.join([auth, netloc]) + if scheme is None: + scheme = new_scheme + if path is None: + path = '' + + return urlunparse((scheme, netloc, path, '', query, fragment)) def get_auth_from_url(url): """Given a url with authentication components, extract them into a tuple of - username,password.""" + username,password. + + :rtype: (str,str) + """ parsed = urlparse(url) try: @@ -686,28 +1002,37 @@ def get_auth_from_url(url): return auth -def to_native_string(string, encoding='ascii'): - """ - Given a string object, regardless of type, returns a representation of that - string in the native string type, encoding and decoding where necessary. - This assumes ASCII unless told otherwise. - """ - out = None +# Moved outside of function to avoid recompile every call +_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$') +_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$') - if isinstance(string, builtin_str): - out = string + +def check_header_validity(header): + """Verifies that header value is a string which doesn't contain + leading whitespace or return characters. This prevents unintended + header injection. + + :param header: tuple, in the format (name, value). + """ + name, value = header + + if isinstance(value, bytes): + pat = _CLEAN_HEADER_REGEX_BYTE else: - if is_py2: - out = string.encode(encoding) - else: - out = string.decode(encoding) - - return out + pat = _CLEAN_HEADER_REGEX_STR + try: + if not pat.match(value): + raise InvalidHeader("Invalid return character or leading space in header: %s" % name) + except TypeError: + raise InvalidHeader("Value for header {%s: %s} must be of type str or " + "bytes, not %s" % (name, value, type(value))) def urldefragauth(url): """ - Given a url remove the fragment and the authentication part + Given a url remove the fragment and the authentication part. + + :rtype: str """ scheme, netloc, path, params, query, fragment = urlparse(url) @@ -718,3 +1043,18 @@ def urldefragauth(url): netloc = netloc.rsplit('@', 1)[-1] return urlunparse((scheme, netloc, path, params, query, '')) + + +def rewind_body(prepared_request): + """Move file pointer back to its recorded starting position + so it can be read again on redirect. + """ + body_seek = getattr(prepared_request.body, 'seek', None) + if body_seek is not None and isinstance(prepared_request._body_position, integer_types): + try: + body_seek(prepared_request._body_position) + except (IOError, OSError): + raise UnrewindableBodyError("An error occurred when rewinding request " + "body for redirect.") + else: + raise UnrewindableBodyError("Unable to rewind request body for redirect.") diff --git a/lib/requests_oauthlib/__init__.py b/lib/requests_oauthlib/__init__.py new file mode 100644 index 00000000..a4e03a4e --- /dev/null +++ b/lib/requests_oauthlib/__init__.py @@ -0,0 +1,19 @@ +import logging + +from .oauth1_auth import OAuth1 +from .oauth1_session import OAuth1Session +from .oauth2_auth import OAuth2 +from .oauth2_session import OAuth2Session, TokenUpdated + +__version__ = "1.3.0" + +import requests + +if requests.__version__ < "2.0.0": + msg = ( + "You are using requests version %s, which is older than " + "requests-oauthlib expects, please upgrade to 2.0.0 or later." + ) + raise Warning(msg % requests.__version__) + +logging.getLogger("requests_oauthlib").addHandler(logging.NullHandler()) diff --git a/lib/requests_oauthlib/compliance_fixes/__init__.py b/lib/requests_oauthlib/compliance_fixes/__init__.py new file mode 100644 index 00000000..02fa5120 --- /dev/null +++ b/lib/requests_oauthlib/compliance_fixes/__init__.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import + +from .facebook import facebook_compliance_fix +from .fitbit import fitbit_compliance_fix +from .linkedin import linkedin_compliance_fix +from .slack import slack_compliance_fix +from .instagram import instagram_compliance_fix +from .mailchimp import mailchimp_compliance_fix +from .weibo import weibo_compliance_fix +from .plentymarkets import plentymarkets_compliance_fix diff --git a/lib/requests_oauthlib/compliance_fixes/douban.py b/lib/requests_oauthlib/compliance_fixes/douban.py new file mode 100644 index 00000000..ecc57b08 --- /dev/null +++ b/lib/requests_oauthlib/compliance_fixes/douban.py @@ -0,0 +1,17 @@ +import json + +from oauthlib.common import to_unicode + + +def douban_compliance_fix(session): + def fix_token_type(r): + token = json.loads(r.text) + token.setdefault("token_type", "Bearer") + fixed_token = json.dumps(token) + r._content = to_unicode(fixed_token).encode("utf-8") + return r + + session._client_default_token_placement = "query" + session.register_compliance_hook("access_token_response", fix_token_type) + + return session diff --git a/lib/requests_oauthlib/compliance_fixes/facebook.py b/lib/requests_oauthlib/compliance_fixes/facebook.py new file mode 100644 index 00000000..90e79212 --- /dev/null +++ b/lib/requests_oauthlib/compliance_fixes/facebook.py @@ -0,0 +1,33 @@ +from json import dumps + +try: + from urlparse import parse_qsl +except ImportError: + from urllib.parse import parse_qsl + +from oauthlib.common import to_unicode + + +def facebook_compliance_fix(session): + def _compliance_fix(r): + # if Facebook claims to be sending us json, let's trust them. + if "application/json" in r.headers.get("content-type", {}): + return r + + # Facebook returns a content-type of text/plain when sending their + # x-www-form-urlencoded responses, along with a 200. If not, let's + # assume we're getting JSON and bail on the fix. + if "text/plain" in r.headers.get("content-type", {}) and r.status_code == 200: + token = dict(parse_qsl(r.text, keep_blank_values=True)) + else: + return r + + expires = token.get("expires") + if expires is not None: + token["expires_in"] = expires + token["token_type"] = "Bearer" + r._content = to_unicode(dumps(token)).encode("UTF-8") + return r + + session.register_compliance_hook("access_token_response", _compliance_fix) + return session diff --git a/lib/requests_oauthlib/compliance_fixes/fitbit.py b/lib/requests_oauthlib/compliance_fixes/fitbit.py new file mode 100644 index 00000000..7e627024 --- /dev/null +++ b/lib/requests_oauthlib/compliance_fixes/fitbit.py @@ -0,0 +1,25 @@ +""" +The Fitbit API breaks from the OAuth2 RFC standard by returning an "errors" +object list, rather than a single "error" string. This puts hooks in place so +that oauthlib can process an error in the results from access token and refresh +token responses. This is necessary to prevent getting the generic red herring +MissingTokenError. +""" + +from json import loads, dumps + +from oauthlib.common import to_unicode + + +def fitbit_compliance_fix(session): + def _missing_error(r): + token = loads(r.text) + if "errors" in token: + # Set the error to the first one we have + token["error"] = token["errors"][0]["errorType"] + r._content = to_unicode(dumps(token)).encode("UTF-8") + return r + + session.register_compliance_hook("access_token_response", _missing_error) + session.register_compliance_hook("refresh_token_response", _missing_error) + return session diff --git a/lib/requests_oauthlib/compliance_fixes/instagram.py b/lib/requests_oauthlib/compliance_fixes/instagram.py new file mode 100644 index 00000000..4e07fe08 --- /dev/null +++ b/lib/requests_oauthlib/compliance_fixes/instagram.py @@ -0,0 +1,26 @@ +try: + from urlparse import urlparse, parse_qs +except ImportError: + from urllib.parse import urlparse, parse_qs + +from oauthlib.common import add_params_to_uri + + +def instagram_compliance_fix(session): + def _non_compliant_param_name(url, headers, data): + # If the user has already specified the token in the URL + # then there's nothing to do. + # If the specified token is different from ``session.access_token``, + # we assume the user intends to override the access token. + url_query = dict(parse_qs(urlparse(url).query)) + token = url_query.get("access_token") + if token: + # Nothing to do, just return. + return url, headers, data + + token = [("access_token", session.access_token)] + url = add_params_to_uri(url, token) + return url, headers, data + + session.register_compliance_hook("protected_request", _non_compliant_param_name) + return session diff --git a/lib/requests_oauthlib/compliance_fixes/linkedin.py b/lib/requests_oauthlib/compliance_fixes/linkedin.py new file mode 100644 index 00000000..cd5b4ace --- /dev/null +++ b/lib/requests_oauthlib/compliance_fixes/linkedin.py @@ -0,0 +1,21 @@ +from json import loads, dumps + +from oauthlib.common import add_params_to_uri, to_unicode + + +def linkedin_compliance_fix(session): + def _missing_token_type(r): + token = loads(r.text) + token["token_type"] = "Bearer" + r._content = to_unicode(dumps(token)).encode("UTF-8") + return r + + def _non_compliant_param_name(url, headers, data): + token = [("oauth2_access_token", session.access_token)] + url = add_params_to_uri(url, token) + return url, headers, data + + session._client.default_token_placement = "query" + session.register_compliance_hook("access_token_response", _missing_token_type) + session.register_compliance_hook("protected_request", _non_compliant_param_name) + return session diff --git a/lib/requests_oauthlib/compliance_fixes/mailchimp.py b/lib/requests_oauthlib/compliance_fixes/mailchimp.py new file mode 100644 index 00000000..c69ce9fd --- /dev/null +++ b/lib/requests_oauthlib/compliance_fixes/mailchimp.py @@ -0,0 +1,23 @@ +import json + +from oauthlib.common import to_unicode + + +def mailchimp_compliance_fix(session): + def _null_scope(r): + token = json.loads(r.text) + if "scope" in token and token["scope"] is None: + token.pop("scope") + r._content = to_unicode(json.dumps(token)).encode("utf-8") + return r + + def _non_zero_expiration(r): + token = json.loads(r.text) + if "expires_in" in token and token["expires_in"] == 0: + token["expires_in"] = 3600 + r._content = to_unicode(json.dumps(token)).encode("utf-8") + return r + + session.register_compliance_hook("access_token_response", _null_scope) + session.register_compliance_hook("access_token_response", _non_zero_expiration) + return session diff --git a/lib/requests_oauthlib/compliance_fixes/plentymarkets.py b/lib/requests_oauthlib/compliance_fixes/plentymarkets.py new file mode 100644 index 00000000..9f605f05 --- /dev/null +++ b/lib/requests_oauthlib/compliance_fixes/plentymarkets.py @@ -0,0 +1,29 @@ +from json import dumps, loads +import re + +from oauthlib.common import to_unicode + + +def plentymarkets_compliance_fix(session): + def _to_snake_case(n): + return re.sub("(.)([A-Z][a-z]+)", r"\1_\2", n).lower() + + def _compliance_fix(r): + # Plenty returns the Token in CamelCase instead of _ + if ( + "application/json" in r.headers.get("content-type", {}) + and r.status_code == 200 + ): + token = loads(r.text) + else: + return r + + fixed_token = {} + for k, v in token.items(): + fixed_token[_to_snake_case(k)] = v + + r._content = to_unicode(dumps(fixed_token)).encode("UTF-8") + return r + + session.register_compliance_hook("access_token_response", _compliance_fix) + return session diff --git a/lib/requests_oauthlib/compliance_fixes/slack.py b/lib/requests_oauthlib/compliance_fixes/slack.py new file mode 100644 index 00000000..3f574b03 --- /dev/null +++ b/lib/requests_oauthlib/compliance_fixes/slack.py @@ -0,0 +1,37 @@ +try: + from urlparse import urlparse, parse_qs +except ImportError: + from urllib.parse import urlparse, parse_qs + +from oauthlib.common import add_params_to_uri + + +def slack_compliance_fix(session): + def _non_compliant_param_name(url, headers, data): + # If the user has already specified the token, either in the URL + # or in a data dictionary, then there's nothing to do. + # If the specified token is different from ``session.access_token``, + # we assume the user intends to override the access token. + url_query = dict(parse_qs(urlparse(url).query)) + token = url_query.get("token") + if not token and isinstance(data, dict): + token = data.get("token") + + if token: + # Nothing to do, just return. + return url, headers, data + + if not data: + data = {"token": session.access_token} + elif isinstance(data, dict): + data["token"] = session.access_token + else: + # ``data`` is something other than a dict: maybe a stream, + # maybe a file object, maybe something else. We can't easily + # modify it, so we'll set the token by modifying the URL instead. + token = [("token", session.access_token)] + url = add_params_to_uri(url, token) + return url, headers, data + + session.register_compliance_hook("protected_request", _non_compliant_param_name) + return session diff --git a/lib/requests_oauthlib/compliance_fixes/weibo.py b/lib/requests_oauthlib/compliance_fixes/weibo.py new file mode 100644 index 00000000..6733abeb --- /dev/null +++ b/lib/requests_oauthlib/compliance_fixes/weibo.py @@ -0,0 +1,15 @@ +from json import loads, dumps + +from oauthlib.common import to_unicode + + +def weibo_compliance_fix(session): + def _missing_token_type(r): + token = loads(r.text) + token["token_type"] = "Bearer" + r._content = to_unicode(dumps(token)).encode("UTF-8") + return r + + session._client.default_token_placement = "query" + session.register_compliance_hook("access_token_response", _missing_token_type) + return session diff --git a/lib/requests_oauthlib/oauth1_auth.py b/lib/requests_oauthlib/oauth1_auth.py new file mode 100644 index 00000000..cfbbd590 --- /dev/null +++ b/lib/requests_oauthlib/oauth1_auth.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import logging + +from oauthlib.common import extract_params +from oauthlib.oauth1 import Client, SIGNATURE_HMAC, SIGNATURE_TYPE_AUTH_HEADER +from oauthlib.oauth1 import SIGNATURE_TYPE_BODY +from requests.compat import is_py3 +from requests.utils import to_native_string +from requests.auth import AuthBase + +CONTENT_TYPE_FORM_URLENCODED = "application/x-www-form-urlencoded" +CONTENT_TYPE_MULTI_PART = "multipart/form-data" + +if is_py3: + unicode = str + +log = logging.getLogger(__name__) + +# OBS!: Correct signing of requests are conditional on invoking OAuth1 +# as the last step of preparing a request, or at least having the +# content-type set properly. +class OAuth1(AuthBase): + """Signs the request using OAuth 1 (RFC5849)""" + + client_class = Client + + def __init__( + self, + client_key, + client_secret=None, + resource_owner_key=None, + resource_owner_secret=None, + callback_uri=None, + signature_method=SIGNATURE_HMAC, + signature_type=SIGNATURE_TYPE_AUTH_HEADER, + rsa_key=None, + verifier=None, + decoding="utf-8", + client_class=None, + force_include_body=False, + **kwargs + ): + + try: + signature_type = signature_type.upper() + except AttributeError: + pass + + client_class = client_class or self.client_class + + self.force_include_body = force_include_body + + self.client = client_class( + client_key, + client_secret, + resource_owner_key, + resource_owner_secret, + callback_uri, + signature_method, + signature_type, + rsa_key, + verifier, + decoding=decoding, + **kwargs + ) + + def __call__(self, r): + """Add OAuth parameters to the request. + + Parameters may be included from the body if the content-type is + urlencoded, if no content type is set a guess is made. + """ + # Overwriting url is safe here as request will not modify it past + # this point. + log.debug("Signing request %s using client %s", r, self.client) + + content_type = r.headers.get("Content-Type", "") + if ( + not content_type + and extract_params(r.body) + or self.client.signature_type == SIGNATURE_TYPE_BODY + ): + content_type = CONTENT_TYPE_FORM_URLENCODED + if not isinstance(content_type, unicode): + content_type = content_type.decode("utf-8") + + is_form_encoded = CONTENT_TYPE_FORM_URLENCODED in content_type + + log.debug( + "Including body in call to sign: %s", + is_form_encoded or self.force_include_body, + ) + + if is_form_encoded: + r.headers["Content-Type"] = CONTENT_TYPE_FORM_URLENCODED + r.url, headers, r.body = self.client.sign( + unicode(r.url), unicode(r.method), r.body or "", r.headers + ) + elif self.force_include_body: + # To allow custom clients to work on non form encoded bodies. + r.url, headers, r.body = self.client.sign( + unicode(r.url), unicode(r.method), r.body or "", r.headers + ) + else: + # Omit body data in the signing of non form-encoded requests + r.url, headers, _ = self.client.sign( + unicode(r.url), unicode(r.method), None, r.headers + ) + + r.prepare_headers(headers) + r.url = to_native_string(r.url) + log.debug("Updated url: %s", r.url) + log.debug("Updated headers: %s", headers) + log.debug("Updated body: %r", r.body) + return r diff --git a/lib/requests_oauthlib/oauth1_session.py b/lib/requests_oauthlib/oauth1_session.py new file mode 100644 index 00000000..aa17f28f --- /dev/null +++ b/lib/requests_oauthlib/oauth1_session.py @@ -0,0 +1,400 @@ +from __future__ import unicode_literals + +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse + +import logging + +from oauthlib.common import add_params_to_uri +from oauthlib.common import urldecode as _urldecode +from oauthlib.oauth1 import SIGNATURE_HMAC, SIGNATURE_RSA, SIGNATURE_TYPE_AUTH_HEADER +import requests + +from . import OAuth1 + + +log = logging.getLogger(__name__) + + +def urldecode(body): + """Parse query or json to python dictionary""" + try: + return _urldecode(body) + except Exception: + import json + + return json.loads(body) + + +class TokenRequestDenied(ValueError): + def __init__(self, message, response): + super(TokenRequestDenied, self).__init__(message) + self.response = response + + @property + def status_code(self): + """For backwards-compatibility purposes""" + return self.response.status_code + + +class TokenMissing(ValueError): + def __init__(self, message, response): + super(TokenMissing, self).__init__(message) + self.response = response + + +class VerifierMissing(ValueError): + pass + + +class OAuth1Session(requests.Session): + """Request signing and convenience methods for the oauth dance. + + What is the difference between OAuth1Session and OAuth1? + + OAuth1Session actually uses OAuth1 internally and its purpose is to assist + in the OAuth workflow through convenience methods to prepare authorization + URLs and parse the various token and redirection responses. It also provide + rudimentary validation of responses. + + An example of the OAuth workflow using a basic CLI app and Twitter. + + >>> # Credentials obtained during the registration. + >>> client_key = 'client key' + >>> client_secret = 'secret' + >>> callback_uri = 'https://127.0.0.1/callback' + >>> + >>> # Endpoints found in the OAuth provider API documentation + >>> request_token_url = 'https://api.twitter.com/oauth/request_token' + >>> authorization_url = 'https://api.twitter.com/oauth/authorize' + >>> access_token_url = 'https://api.twitter.com/oauth/access_token' + >>> + >>> oauth_session = OAuth1Session(client_key,client_secret=client_secret, callback_uri=callback_uri) + >>> + >>> # First step, fetch the request token. + >>> oauth_session.fetch_request_token(request_token_url) + { + 'oauth_token': 'kjerht2309u', + 'oauth_token_secret': 'lsdajfh923874', + } + >>> + >>> # Second step. Follow this link and authorize + >>> oauth_session.authorization_url(authorization_url) + 'https://api.twitter.com/oauth/authorize?oauth_token=sdf0o9823sjdfsdf&oauth_callback=https%3A%2F%2F127.0.0.1%2Fcallback' + >>> + >>> # Third step. Fetch the access token + >>> redirect_response = raw_input('Paste the full redirect URL here.') + >>> oauth_session.parse_authorization_response(redirect_response) + { + 'oauth_token: 'kjerht2309u', + 'oauth_token_secret: 'lsdajfh923874', + 'oauth_verifier: 'w34o8967345', + } + >>> oauth_session.fetch_access_token(access_token_url) + { + 'oauth_token': 'sdf0o9823sjdfsdf', + 'oauth_token_secret': '2kjshdfp92i34asdasd', + } + >>> # Done. You can now make OAuth requests. + >>> status_url = 'http://api.twitter.com/1/statuses/update.json' + >>> new_status = {'status': 'hello world!'} + >>> oauth_session.post(status_url, data=new_status) + + """ + + def __init__( + self, + client_key, + client_secret=None, + resource_owner_key=None, + resource_owner_secret=None, + callback_uri=None, + signature_method=SIGNATURE_HMAC, + signature_type=SIGNATURE_TYPE_AUTH_HEADER, + rsa_key=None, + verifier=None, + client_class=None, + force_include_body=False, + **kwargs + ): + """Construct the OAuth 1 session. + + :param client_key: A client specific identifier. + :param client_secret: A client specific secret used to create HMAC and + plaintext signatures. + :param resource_owner_key: A resource owner key, also referred to as + request token or access token depending on + when in the workflow it is used. + :param resource_owner_secret: A resource owner secret obtained with + either a request or access token. Often + referred to as token secret. + :param callback_uri: The URL the user is redirect back to after + authorization. + :param signature_method: Signature methods determine how the OAuth + signature is created. The three options are + oauthlib.oauth1.SIGNATURE_HMAC (default), + oauthlib.oauth1.SIGNATURE_RSA and + oauthlib.oauth1.SIGNATURE_PLAIN. + :param signature_type: Signature type decides where the OAuth + parameters are added. Either in the + Authorization header (default) or to the URL + query parameters or the request body. Defined as + oauthlib.oauth1.SIGNATURE_TYPE_AUTH_HEADER, + oauthlib.oauth1.SIGNATURE_TYPE_QUERY and + oauthlib.oauth1.SIGNATURE_TYPE_BODY + respectively. + :param rsa_key: The private RSA key as a string. Can only be used with + signature_method=oauthlib.oauth1.SIGNATURE_RSA. + :param verifier: A verifier string to prove authorization was granted. + :param client_class: A subclass of `oauthlib.oauth1.Client` to use with + `requests_oauthlib.OAuth1` instead of the default + :param force_include_body: Always include the request body in the + signature creation. + :param **kwargs: Additional keyword arguments passed to `OAuth1` + """ + super(OAuth1Session, self).__init__() + self._client = OAuth1( + client_key, + client_secret=client_secret, + resource_owner_key=resource_owner_key, + resource_owner_secret=resource_owner_secret, + callback_uri=callback_uri, + signature_method=signature_method, + signature_type=signature_type, + rsa_key=rsa_key, + verifier=verifier, + client_class=client_class, + force_include_body=force_include_body, + **kwargs + ) + self.auth = self._client + + @property + def token(self): + oauth_token = self._client.client.resource_owner_key + oauth_token_secret = self._client.client.resource_owner_secret + oauth_verifier = self._client.client.verifier + + token_dict = {} + if oauth_token: + token_dict["oauth_token"] = oauth_token + if oauth_token_secret: + token_dict["oauth_token_secret"] = oauth_token_secret + if oauth_verifier: + token_dict["oauth_verifier"] = oauth_verifier + + return token_dict + + @token.setter + def token(self, value): + self._populate_attributes(value) + + @property + def authorized(self): + """Boolean that indicates whether this session has an OAuth token + or not. If `self.authorized` is True, you can reasonably expect + OAuth-protected requests to the resource to succeed. If + `self.authorized` is False, you need the user to go through the OAuth + authentication dance before OAuth-protected requests to the resource + will succeed. + """ + if self._client.client.signature_method == SIGNATURE_RSA: + # RSA only uses resource_owner_key + return bool(self._client.client.resource_owner_key) + else: + # other methods of authentication use all three pieces + return ( + bool(self._client.client.client_secret) + and bool(self._client.client.resource_owner_key) + and bool(self._client.client.resource_owner_secret) + ) + + def authorization_url(self, url, request_token=None, **kwargs): + """Create an authorization URL by appending request_token and optional + kwargs to url. + + This is the second step in the OAuth 1 workflow. The user should be + redirected to this authorization URL, grant access to you, and then + be redirected back to you. The redirection back can either be specified + during client registration or by supplying a callback URI per request. + + :param url: The authorization endpoint URL. + :param request_token: The previously obtained request token. + :param kwargs: Optional parameters to append to the URL. + :returns: The authorization URL with new parameters embedded. + + An example using a registered default callback URI. + + >>> request_token_url = 'https://api.twitter.com/oauth/request_token' + >>> authorization_url = 'https://api.twitter.com/oauth/authorize' + >>> oauth_session = OAuth1Session('client-key', client_secret='secret') + >>> oauth_session.fetch_request_token(request_token_url) + { + 'oauth_token': 'sdf0o9823sjdfsdf', + 'oauth_token_secret': '2kjshdfp92i34asdasd', + } + >>> oauth_session.authorization_url(authorization_url) + 'https://api.twitter.com/oauth/authorize?oauth_token=sdf0o9823sjdfsdf' + >>> oauth_session.authorization_url(authorization_url, foo='bar') + 'https://api.twitter.com/oauth/authorize?oauth_token=sdf0o9823sjdfsdf&foo=bar' + + An example using an explicit callback URI. + + >>> request_token_url = 'https://api.twitter.com/oauth/request_token' + >>> authorization_url = 'https://api.twitter.com/oauth/authorize' + >>> oauth_session = OAuth1Session('client-key', client_secret='secret', callback_uri='https://127.0.0.1/callback') + >>> oauth_session.fetch_request_token(request_token_url) + { + 'oauth_token': 'sdf0o9823sjdfsdf', + 'oauth_token_secret': '2kjshdfp92i34asdasd', + } + >>> oauth_session.authorization_url(authorization_url) + 'https://api.twitter.com/oauth/authorize?oauth_token=sdf0o9823sjdfsdf&oauth_callback=https%3A%2F%2F127.0.0.1%2Fcallback' + """ + kwargs["oauth_token"] = request_token or self._client.client.resource_owner_key + log.debug("Adding parameters %s to url %s", kwargs, url) + return add_params_to_uri(url, kwargs.items()) + + def fetch_request_token(self, url, realm=None, **request_kwargs): + r"""Fetch a request token. + + This is the first step in the OAuth 1 workflow. A request token is + obtained by making a signed post request to url. The token is then + parsed from the application/x-www-form-urlencoded response and ready + to be used to construct an authorization url. + + :param url: The request token endpoint URL. + :param realm: A list of realms to request access to. + :param \*\*request_kwargs: Optional arguments passed to ''post'' + function in ''requests.Session'' + :returns: The response in dict format. + + Note that a previously set callback_uri will be reset for your + convenience, or else signature creation will be incorrect on + consecutive requests. + + >>> request_token_url = 'https://api.twitter.com/oauth/request_token' + >>> oauth_session = OAuth1Session('client-key', client_secret='secret') + >>> oauth_session.fetch_request_token(request_token_url) + { + 'oauth_token': 'sdf0o9823sjdfsdf', + 'oauth_token_secret': '2kjshdfp92i34asdasd', + } + """ + self._client.client.realm = " ".join(realm) if realm else None + token = self._fetch_token(url, **request_kwargs) + log.debug("Resetting callback_uri and realm (not needed in next phase).") + self._client.client.callback_uri = None + self._client.client.realm = None + return token + + def fetch_access_token(self, url, verifier=None, **request_kwargs): + """Fetch an access token. + + This is the final step in the OAuth 1 workflow. An access token is + obtained using all previously obtained credentials, including the + verifier from the authorization step. + + Note that a previously set verifier will be reset for your + convenience, or else signature creation will be incorrect on + consecutive requests. + + >>> access_token_url = 'https://api.twitter.com/oauth/access_token' + >>> redirect_response = 'https://127.0.0.1/callback?oauth_token=kjerht2309uf&oauth_token_secret=lsdajfh923874&oauth_verifier=w34o8967345' + >>> oauth_session = OAuth1Session('client-key', client_secret='secret') + >>> oauth_session.parse_authorization_response(redirect_response) + { + 'oauth_token: 'kjerht2309u', + 'oauth_token_secret: 'lsdajfh923874', + 'oauth_verifier: 'w34o8967345', + } + >>> oauth_session.fetch_access_token(access_token_url) + { + 'oauth_token': 'sdf0o9823sjdfsdf', + 'oauth_token_secret': '2kjshdfp92i34asdasd', + } + """ + if verifier: + self._client.client.verifier = verifier + if not getattr(self._client.client, "verifier", None): + raise VerifierMissing("No client verifier has been set.") + token = self._fetch_token(url, **request_kwargs) + log.debug("Resetting verifier attribute, should not be used anymore.") + self._client.client.verifier = None + return token + + def parse_authorization_response(self, url): + """Extract parameters from the post authorization redirect response URL. + + :param url: The full URL that resulted from the user being redirected + back from the OAuth provider to you, the client. + :returns: A dict of parameters extracted from the URL. + + >>> redirect_response = 'https://127.0.0.1/callback?oauth_token=kjerht2309uf&oauth_token_secret=lsdajfh923874&oauth_verifier=w34o8967345' + >>> oauth_session = OAuth1Session('client-key', client_secret='secret') + >>> oauth_session.parse_authorization_response(redirect_response) + { + 'oauth_token: 'kjerht2309u', + 'oauth_token_secret: 'lsdajfh923874', + 'oauth_verifier: 'w34o8967345', + } + """ + log.debug("Parsing token from query part of url %s", url) + token = dict(urldecode(urlparse(url).query)) + log.debug("Updating internal client token attribute.") + self._populate_attributes(token) + self.token = token + return token + + def _populate_attributes(self, token): + if "oauth_token" in token: + self._client.client.resource_owner_key = token["oauth_token"] + else: + raise TokenMissing( + "Response does not contain a token: {resp}".format(resp=token), token + ) + if "oauth_token_secret" in token: + self._client.client.resource_owner_secret = token["oauth_token_secret"] + if "oauth_verifier" in token: + self._client.client.verifier = token["oauth_verifier"] + + def _fetch_token(self, url, **request_kwargs): + log.debug("Fetching token from %s using client %s", url, self._client.client) + r = self.post(url, **request_kwargs) + + if r.status_code >= 400: + error = "Token request failed with code %s, response was '%s'." + raise TokenRequestDenied(error % (r.status_code, r.text), r) + + log.debug('Decoding token from response "%s"', r.text) + try: + token = dict(urldecode(r.text.strip())) + except ValueError as e: + error = ( + "Unable to decode token from token response. " + "This is commonly caused by an unsuccessful request where" + " a non urlencoded error message is returned. " + "The decoding error was %s" + "" % e + ) + raise ValueError(error) + + log.debug("Obtained token %s", token) + log.debug("Updating internal client attributes from token data.") + self._populate_attributes(token) + self.token = token + return token + + def rebuild_auth(self, prepared_request, response): + """ + When being redirected we should always strip Authorization + header, since nonce may not be reused as per OAuth spec. + """ + if "Authorization" in prepared_request.headers: + # If we get redirected to a new host, we should strip out + # any authentication headers. + prepared_request.headers.pop("Authorization", True) + prepared_request.prepare_auth(self.auth) + return diff --git a/lib/requests_oauthlib/oauth2_auth.py b/lib/requests_oauthlib/oauth2_auth.py new file mode 100644 index 00000000..b880f72f --- /dev/null +++ b/lib/requests_oauthlib/oauth2_auth.py @@ -0,0 +1,37 @@ +from __future__ import unicode_literals +from oauthlib.oauth2 import WebApplicationClient, InsecureTransportError +from oauthlib.oauth2 import is_secure_transport +from requests.auth import AuthBase + + +class OAuth2(AuthBase): + """Adds proof of authorization (OAuth2 token) to the request.""" + + def __init__(self, client_id=None, client=None, token=None): + """Construct a new OAuth 2 authorization object. + + :param client_id: Client id obtained during registration + :param client: :class:`oauthlib.oauth2.Client` to be used. Default is + WebApplicationClient which is useful for any + hosted application but not mobile or desktop. + :param token: Token dictionary, must include access_token + and token_type. + """ + self._client = client or WebApplicationClient(client_id, token=token) + if token: + for k, v in token.items(): + setattr(self._client, k, v) + + def __call__(self, r): + """Append an OAuth 2 token to the request. + + Note that currently HTTPS is required for all requests. There may be + a token type that allows for plain HTTP in the future and then this + should be updated to allow plain HTTP on a white list basis. + """ + if not is_secure_transport(r.url): + raise InsecureTransportError() + r.url, r.headers, r.body = self._client.add_token( + r.url, http_method=r.method, body=r.body, headers=r.headers + ) + return r diff --git a/lib/requests_oauthlib/oauth2_session.py b/lib/requests_oauthlib/oauth2_session.py new file mode 100644 index 00000000..eea4ac6f --- /dev/null +++ b/lib/requests_oauthlib/oauth2_session.py @@ -0,0 +1,534 @@ +from __future__ import unicode_literals + +import logging + +from oauthlib.common import generate_token, urldecode +from oauthlib.oauth2 import WebApplicationClient, InsecureTransportError +from oauthlib.oauth2 import LegacyApplicationClient +from oauthlib.oauth2 import TokenExpiredError, is_secure_transport +import requests + +log = logging.getLogger(__name__) + + +class TokenUpdated(Warning): + def __init__(self, token): + super(TokenUpdated, self).__init__() + self.token = token + + +class OAuth2Session(requests.Session): + """Versatile OAuth 2 extension to :class:`requests.Session`. + + Supports any grant type adhering to :class:`oauthlib.oauth2.Client` spec + including the four core OAuth 2 grants. + + Can be used to create authorization urls, fetch tokens and access protected + resources using the :class:`requests.Session` interface you are used to. + + - :class:`oauthlib.oauth2.WebApplicationClient` (default): Authorization Code Grant + - :class:`oauthlib.oauth2.MobileApplicationClient`: Implicit Grant + - :class:`oauthlib.oauth2.LegacyApplicationClient`: Password Credentials Grant + - :class:`oauthlib.oauth2.BackendApplicationClient`: Client Credentials Grant + + Note that the only time you will be using Implicit Grant from python is if + you are driving a user agent able to obtain URL fragments. + """ + + def __init__( + self, + client_id=None, + client=None, + auto_refresh_url=None, + auto_refresh_kwargs=None, + scope=None, + redirect_uri=None, + token=None, + state=None, + token_updater=None, + **kwargs + ): + """Construct a new OAuth 2 client session. + + :param client_id: Client id obtained during registration + :param client: :class:`oauthlib.oauth2.Client` to be used. Default is + WebApplicationClient which is useful for any + hosted application but not mobile or desktop. + :param scope: List of scopes you wish to request access to + :param redirect_uri: Redirect URI you registered as callback + :param token: Token dictionary, must include access_token + and token_type. + :param state: State string used to prevent CSRF. This will be given + when creating the authorization url and must be supplied + when parsing the authorization response. + Can be either a string or a no argument callable. + :auto_refresh_url: Refresh token endpoint URL, must be HTTPS. Supply + this if you wish the client to automatically refresh + your access tokens. + :auto_refresh_kwargs: Extra arguments to pass to the refresh token + endpoint. + :token_updater: Method with one argument, token, to be used to update + your token database on automatic token refresh. If not + set a TokenUpdated warning will be raised when a token + has been refreshed. This warning will carry the token + in its token argument. + :param kwargs: Arguments to pass to the Session constructor. + """ + super(OAuth2Session, self).__init__(**kwargs) + self._client = client or WebApplicationClient(client_id, token=token) + self.token = token or {} + self.scope = scope + self.redirect_uri = redirect_uri + self.state = state or generate_token + self._state = state + self.auto_refresh_url = auto_refresh_url + self.auto_refresh_kwargs = auto_refresh_kwargs or {} + self.token_updater = token_updater + + # Ensure that requests doesn't do any automatic auth. See #278. + # The default behavior can be re-enabled by setting auth to None. + self.auth = lambda r: r + + # Allow customizations for non compliant providers through various + # hooks to adjust requests and responses. + self.compliance_hook = { + "access_token_response": set(), + "refresh_token_response": set(), + "protected_request": set(), + } + + def new_state(self): + """Generates a state string to be used in authorizations.""" + try: + self._state = self.state() + log.debug("Generated new state %s.", self._state) + except TypeError: + self._state = self.state + log.debug("Re-using previously supplied state %s.", self._state) + return self._state + + @property + def client_id(self): + return getattr(self._client, "client_id", None) + + @client_id.setter + def client_id(self, value): + self._client.client_id = value + + @client_id.deleter + def client_id(self): + del self._client.client_id + + @property + def token(self): + return getattr(self._client, "token", None) + + @token.setter + def token(self, value): + self._client.token = value + self._client.populate_token_attributes(value) + + @property + def access_token(self): + return getattr(self._client, "access_token", None) + + @access_token.setter + def access_token(self, value): + self._client.access_token = value + + @access_token.deleter + def access_token(self): + del self._client.access_token + + @property + def authorized(self): + """Boolean that indicates whether this session has an OAuth token + or not. If `self.authorized` is True, you can reasonably expect + OAuth-protected requests to the resource to succeed. If + `self.authorized` is False, you need the user to go through the OAuth + authentication dance before OAuth-protected requests to the resource + will succeed. + """ + return bool(self.access_token) + + def authorization_url(self, url, state=None, **kwargs): + """Form an authorization URL. + + :param url: Authorization endpoint url, must be HTTPS. + :param state: An optional state string for CSRF protection. If not + given it will be generated for you. + :param kwargs: Extra parameters to include. + :return: authorization_url, state + """ + state = state or self.new_state() + return ( + self._client.prepare_request_uri( + url, + redirect_uri=self.redirect_uri, + scope=self.scope, + state=state, + **kwargs + ), + state, + ) + + def fetch_token( + self, + token_url, + code=None, + authorization_response=None, + body="", + auth=None, + username=None, + password=None, + method="POST", + force_querystring=False, + timeout=None, + headers=None, + verify=True, + proxies=None, + include_client_id=None, + client_secret=None, + **kwargs + ): + """Generic method for fetching an access token from the token endpoint. + + If you are using the MobileApplicationClient you will want to use + `token_from_fragment` instead of `fetch_token`. + + The current implementation enforces the RFC guidelines. + + :param token_url: Token endpoint URL, must use HTTPS. + :param code: Authorization code (used by WebApplicationClients). + :param authorization_response: Authorization response URL, the callback + URL of the request back to you. Used by + WebApplicationClients instead of code. + :param body: Optional application/x-www-form-urlencoded body to add the + include in the token request. Prefer kwargs over body. + :param auth: An auth tuple or method as accepted by `requests`. + :param username: Username required by LegacyApplicationClients to appear + in the request body. + :param password: Password required by LegacyApplicationClients to appear + in the request body. + :param method: The HTTP method used to make the request. Defaults + to POST, but may also be GET. Other methods should + be added as needed. + :param force_querystring: If True, force the request body to be sent + in the querystring instead. + :param timeout: Timeout of the request in seconds. + :param headers: Dict to default request headers with. + :param verify: Verify SSL certificate. + :param proxies: The `proxies` argument is passed onto `requests`. + :param include_client_id: Should the request body include the + `client_id` parameter. Default is `None`, + which will attempt to autodetect. This can be + forced to always include (True) or never + include (False). + :param client_secret: The `client_secret` paired to the `client_id`. + This is generally required unless provided in the + `auth` tuple. If the value is `None`, it will be + omitted from the request, however if the value is + an empty string, an empty string will be sent. + :param kwargs: Extra parameters to include in the token request. + :return: A token dict + """ + if not is_secure_transport(token_url): + raise InsecureTransportError() + + if not code and authorization_response: + self._client.parse_request_uri_response( + authorization_response, state=self._state + ) + code = self._client.code + elif not code and isinstance(self._client, WebApplicationClient): + code = self._client.code + if not code: + raise ValueError( + "Please supply either code or " "authorization_response parameters." + ) + + # Earlier versions of this library build an HTTPBasicAuth header out of + # `username` and `password`. The RFC states, however these attributes + # must be in the request body and not the header. + # If an upstream server is not spec compliant and requires them to + # appear as an Authorization header, supply an explicit `auth` header + # to this function. + # This check will allow for empty strings, but not `None`. + # + # References + # 4.3.2 - Resource Owner Password Credentials Grant + # https://tools.ietf.org/html/rfc6749#section-4.3.2 + + if isinstance(self._client, LegacyApplicationClient): + if username is None: + raise ValueError( + "`LegacyApplicationClient` requires both the " + "`username` and `password` parameters." + ) + if password is None: + raise ValueError( + "The required parameter `username` was supplied, " + "but `password` was not." + ) + + # merge username and password into kwargs for `prepare_request_body` + if username is not None: + kwargs["username"] = username + if password is not None: + kwargs["password"] = password + + # is an auth explicitly supplied? + if auth is not None: + # if we're dealing with the default of `include_client_id` (None): + # we will assume the `auth` argument is for an RFC compliant server + # and we should not send the `client_id` in the body. + # This approach allows us to still force the client_id by submitting + # `include_client_id=True` along with an `auth` object. + if include_client_id is None: + include_client_id = False + + # otherwise we may need to create an auth header + else: + # since we don't have an auth header, we MAY need to create one + # it is possible that we want to send the `client_id` in the body + # if so, `include_client_id` should be set to True + # otherwise, we will generate an auth header + if include_client_id is not True: + client_id = self.client_id + if client_id: + log.debug( + 'Encoding `client_id` "%s" with `client_secret` ' + "as Basic auth credentials.", + client_id, + ) + client_secret = client_secret if client_secret is not None else "" + auth = requests.auth.HTTPBasicAuth(client_id, client_secret) + + if include_client_id: + # this was pulled out of the params + # it needs to be passed into prepare_request_body + if client_secret is not None: + kwargs["client_secret"] = client_secret + + body = self._client.prepare_request_body( + code=code, + body=body, + redirect_uri=self.redirect_uri, + include_client_id=include_client_id, + **kwargs + ) + + headers = headers or { + "Accept": "application/json", + "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8", + } + self.token = {} + request_kwargs = {} + if method.upper() == "POST": + request_kwargs["params" if force_querystring else "data"] = dict( + urldecode(body) + ) + elif method.upper() == "GET": + request_kwargs["params"] = dict(urldecode(body)) + else: + raise ValueError("The method kwarg must be POST or GET.") + + r = self.request( + method=method, + url=token_url, + timeout=timeout, + headers=headers, + auth=auth, + verify=verify, + proxies=proxies, + **request_kwargs + ) + + log.debug("Request to fetch token completed with status %s.", r.status_code) + log.debug("Request url was %s", r.request.url) + log.debug("Request headers were %s", r.request.headers) + log.debug("Request body was %s", r.request.body) + log.debug("Response headers were %s and content %s.", r.headers, r.text) + log.debug( + "Invoking %d token response hooks.", + len(self.compliance_hook["access_token_response"]), + ) + for hook in self.compliance_hook["access_token_response"]: + log.debug("Invoking hook %s.", hook) + r = hook(r) + + self._client.parse_request_body_response(r.text, scope=self.scope) + self.token = self._client.token + log.debug("Obtained token %s.", self.token) + return self.token + + def token_from_fragment(self, authorization_response): + """Parse token from the URI fragment, used by MobileApplicationClients. + + :param authorization_response: The full URL of the redirect back to you + :return: A token dict + """ + self._client.parse_request_uri_response( + authorization_response, state=self._state + ) + self.token = self._client.token + return self.token + + def refresh_token( + self, + token_url, + refresh_token=None, + body="", + auth=None, + timeout=None, + headers=None, + verify=True, + proxies=None, + **kwargs + ): + """Fetch a new access token using a refresh token. + + :param token_url: The token endpoint, must be HTTPS. + :param refresh_token: The refresh_token to use. + :param body: Optional application/x-www-form-urlencoded body to add the + include in the token request. Prefer kwargs over body. + :param auth: An auth tuple or method as accepted by `requests`. + :param timeout: Timeout of the request in seconds. + :param headers: A dict of headers to be used by `requests`. + :param verify: Verify SSL certificate. + :param proxies: The `proxies` argument will be passed to `requests`. + :param kwargs: Extra parameters to include in the token request. + :return: A token dict + """ + if not token_url: + raise ValueError("No token endpoint set for auto_refresh.") + + if not is_secure_transport(token_url): + raise InsecureTransportError() + + refresh_token = refresh_token or self.token.get("refresh_token") + + log.debug( + "Adding auto refresh key word arguments %s.", self.auto_refresh_kwargs + ) + kwargs.update(self.auto_refresh_kwargs) + body = self._client.prepare_refresh_body( + body=body, refresh_token=refresh_token, scope=self.scope, **kwargs + ) + log.debug("Prepared refresh token request body %s", body) + + if headers is None: + headers = { + "Accept": "application/json", + "Content-Type": ("application/x-www-form-urlencoded;charset=UTF-8"), + } + + r = self.post( + token_url, + data=dict(urldecode(body)), + auth=auth, + timeout=timeout, + headers=headers, + verify=verify, + withhold_token=True, + proxies=proxies, + ) + log.debug("Request to refresh token completed with status %s.", r.status_code) + log.debug("Response headers were %s and content %s.", r.headers, r.text) + log.debug( + "Invoking %d token response hooks.", + len(self.compliance_hook["refresh_token_response"]), + ) + for hook in self.compliance_hook["refresh_token_response"]: + log.debug("Invoking hook %s.", hook) + r = hook(r) + + self.token = self._client.parse_request_body_response(r.text, scope=self.scope) + if not "refresh_token" in self.token: + log.debug("No new refresh token given. Re-using old.") + self.token["refresh_token"] = refresh_token + return self.token + + def request( + self, + method, + url, + data=None, + headers=None, + withhold_token=False, + client_id=None, + client_secret=None, + **kwargs + ): + """Intercept all requests and add the OAuth 2 token if present.""" + if not is_secure_transport(url): + raise InsecureTransportError() + if self.token and not withhold_token: + log.debug( + "Invoking %d protected resource request hooks.", + len(self.compliance_hook["protected_request"]), + ) + for hook in self.compliance_hook["protected_request"]: + log.debug("Invoking hook %s.", hook) + url, headers, data = hook(url, headers, data) + + log.debug("Adding token %s to request.", self.token) + try: + url, headers, data = self._client.add_token( + url, http_method=method, body=data, headers=headers + ) + # Attempt to retrieve and save new access token if expired + except TokenExpiredError: + if self.auto_refresh_url: + log.debug( + "Auto refresh is set, attempting to refresh at %s.", + self.auto_refresh_url, + ) + + # We mustn't pass auth twice. + auth = kwargs.pop("auth", None) + if client_id and client_secret and (auth is None): + log.debug( + 'Encoding client_id "%s" with client_secret as Basic auth credentials.', + client_id, + ) + auth = requests.auth.HTTPBasicAuth(client_id, client_secret) + token = self.refresh_token( + self.auto_refresh_url, auth=auth, **kwargs + ) + if self.token_updater: + log.debug( + "Updating token to %s using %s.", token, self.token_updater + ) + self.token_updater(token) + url, headers, data = self._client.add_token( + url, http_method=method, body=data, headers=headers + ) + else: + raise TokenUpdated(token) + else: + raise + + log.debug("Requesting url %s using method %s.", url, method) + log.debug("Supplying headers %s and data %s", headers, data) + log.debug("Passing through key word arguments %s.", kwargs) + return super(OAuth2Session, self).request( + method, url, headers=headers, data=data, **kwargs + ) + + def register_compliance_hook(self, hook_type, hook): + """Register a hook for request/response tweaking. + + Available hooks are: + access_token_response invoked before token parsing. + refresh_token_response invoked before refresh token parsing. + protected_request invoked before making a request. + + If you find a new hook is needed please send a GitHub PR request + or open an issue. + """ + if hook_type not in self.compliance_hook: + raise ValueError( + "Hook type %s is not in %s.", hook_type, self.compliance_hook + ) + self.compliance_hook[hook_type].add(hook) diff --git a/lib/sgmllib.py b/lib/sgmllib.py new file mode 100644 index 00000000..88a02a30 --- /dev/null +++ b/lib/sgmllib.py @@ -0,0 +1,547 @@ +"""A parser for SGML, using the derived class as a static DTD.""" + +# XXX This only supports those SGML features used by HTML. + +# XXX There should be a way to distinguish between PCDATA (parsed +# character data -- the normal case), RCDATA (replaceable character +# data -- only char and entity references and end tags are special) +# and CDATA (character data -- only end tags are special). RCDATA is +# not supported at all. + +import _markupbase +import re + +__all__ = ["SGMLParser", "SGMLParseError"] + +# Regular expressions used for parsing + +interesting = re.compile('[&<]') +incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' + '<([a-zA-Z][^<>]*|' + '/([a-zA-Z][^<>]*)?|' + '![^<>]*)?') + +entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') +charref = re.compile('&#([0-9]+)[^0-9]') + +starttagopen = re.compile('<[>a-zA-Z]') +shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') +shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') +piclose = re.compile('>') +endbracket = re.compile('[<>]') +tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') +attrfind = re.compile( + r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' + r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?') + + +class SGMLParseError(RuntimeError): + """Exception raised for all parse errors.""" + pass + + +# SGML parser base class -- find tags and call handler functions. +# Usage: p = SGMLParser(); p.feed(data); ...; p.close(). +# The dtd is defined by deriving a class which defines methods +# with special names to handle tags: start_foo and end_foo to handle +# and , respectively, or do_foo to handle by itself. +# (Tags are converted to lower case for this purpose.) The data +# between tags is passed to the parser by calling self.handle_data() +# with some data as argument (the data may be split up in arbitrary +# chunks). Entity references are passed by calling +# self.handle_entityref() with the entity reference as argument. + +class SGMLParser(_markupbase.ParserBase): + # Definition of entities -- derived classes may override + entity_or_charref = re.compile('&(?:' + '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)' + ')(;?)') + + def __init__(self, verbose=0): + """Initialize and reset this instance.""" + self.verbose = verbose + self.reset() + + def reset(self): + """Reset this instance. Loses all unprocessed data.""" + self.__starttag_text = None + self.rawdata = '' + self.stack = [] + self.lasttag = '???' + self.nomoretags = 0 + self.literal = 0 + _markupbase.ParserBase.reset(self) + + def setnomoretags(self): + """Enter literal mode (CDATA) till EOF. + + Intended for derived classes only. + """ + self.nomoretags = self.literal = 1 + + def setliteral(self, *args): + """Enter literal mode (CDATA). + + Intended for derived classes only. + """ + self.literal = 1 + + def feed(self, data): + """Feed some data to the parser. + + Call this as often as you want, with as little or as much text + as you want (may include '\n'). (This just saves the text, + all the processing is done by goahead().) + """ + + self.rawdata = self.rawdata + data + self.goahead(0) + + def close(self): + """Handle the remaining data.""" + self.goahead(1) + + def error(self, message): + raise SGMLParseError(message) + + # Internal -- handle data as far as reasonable. May leave state + # and data to be processed by a subsequent call. If 'end' is + # true, force handling all data as if followed by EOF marker. + def goahead(self, end): + rawdata = self.rawdata + i = 0 + n = len(rawdata) + while i < n: + if self.nomoretags: + self.handle_data(rawdata[i:n]) + i = n + break + match = interesting.search(rawdata, i) + if match: j = match.start() + else: j = n + if i < j: + self.handle_data(rawdata[i:j]) + i = j + if i == n: break + if rawdata[i] == '<': + if starttagopen.match(rawdata, i): + if self.literal: + self.handle_data(rawdata[i]) + i = i+1 + continue + k = self.parse_starttag(i) + if k < 0: break + i = k + continue + if rawdata.startswith(" (i + 1): + self.handle_data("<") + i = i+1 + else: + # incomplete + break + continue + if rawdata.startswith(" {!r} at position {}".format(name, m.group(0), m.start(0))) + index = m.end(0) + yield name, m + break + if m is None: + c = pattern[index] + # If the character represents the start of one of the known selector types, + # throw an exception mentioning that the known selector type is in error; + # otherwise, report the invalid character. + if c == '[': + msg = "Malformed attribute selector at position {}".format(index) + elif c == '.': + msg = "Malformed class selector at position {}".format(index) + elif c == '#': + msg = "Malformed id selector at position {}".format(index) + elif c == ':': + msg = "Malformed pseudo-class selector at position {}".format(index) + else: + msg = "Invalid character {!r} position {}".format(c, index) + raise SelectorSyntaxError(msg, self.pattern, index) + if self.debug: # pragma: no cover + print('## END PARSING') + + def process_selectors(self, index: int = 0, flags: int = 0) -> ct.SelectorList: + """Process selectors.""" + + return self.parse_selectors(self.selector_iter(self.pattern), index, flags) + + +# Precompile CSS selector lists for pseudo-classes (additional logic may be required beyond the pattern) +# A few patterns are order dependent as they use patterns previous compiled. + +# CSS pattern for `:link` and `:any-link` +CSS_LINK = CSSParser( + 'html|*:is(a, area)[href]' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML) +# CSS pattern for `:checked` +CSS_CHECKED = CSSParser( + ''' + html|*:is(input[type=checkbox], input[type=radio])[checked], html|option[selected] + ''' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML) +# CSS pattern for `:default` (must compile CSS_CHECKED first) +CSS_DEFAULT = CSSParser( + ''' + :checked, + + /* + This pattern must be at the end. + Special logic is applied to the last selector. + */ + html|form html|*:is(button, input)[type="submit"] + ''' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_DEFAULT) +# CSS pattern for `:indeterminate` +CSS_INDETERMINATE = CSSParser( + ''' + html|input[type="checkbox"][indeterminate], + html|input[type="radio"]:is(:not([name]), [name=""]):not([checked]), + html|progress:not([value]), + + /* + This pattern must be at the end. + Special logic is applied to the last selector. + */ + html|input[type="radio"][name]:not([name='']):not([checked]) + ''' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_INDETERMINATE) +# CSS pattern for `:disabled` +CSS_DISABLED = CSSParser( + ''' + html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset)[disabled], + html|optgroup[disabled] > html|option, + html|fieldset[disabled] > html|*:is(input:not([type=hidden]), button, select, textarea, fieldset), + html|fieldset[disabled] > + html|*:not(legend:nth-of-type(1)) html|*:is(input:not([type=hidden]), button, select, textarea, fieldset) + ''' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML) +# CSS pattern for `:enabled` +CSS_ENABLED = CSSParser( + ''' + html|*:is(input:not([type=hidden]), button, select, textarea, fieldset, optgroup, option, fieldset):not(:disabled) + ''' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML) +# CSS pattern for `:required` +CSS_REQUIRED = CSSParser( + 'html|*:is(input, textarea, select)[required]' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML) +# CSS pattern for `:optional` +CSS_OPTIONAL = CSSParser( + 'html|*:is(input, textarea, select):not([required])' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML) +# CSS pattern for `:placeholder-shown` +CSS_PLACEHOLDER_SHOWN = CSSParser( + ''' + html|input:is( + :not([type]), + [type=""], + [type=text], + [type=search], + [type=url], + [type=tel], + [type=email], + [type=password], + [type=number] + )[placeholder]:not([placeholder='']):is(:not([value]), [value=""]), + html|textarea[placeholder]:not([placeholder='']) + ''' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML | FLG_PLACEHOLDER_SHOWN) +# CSS pattern default for `:nth-child` "of S" feature +CSS_NTH_OF_S_DEFAULT = CSSParser( + '*|*' +).process_selectors(flags=FLG_PSEUDO) +# CSS pattern for `:read-write` (CSS_DISABLED must be compiled first) +CSS_READ_WRITE = CSSParser( + ''' + html|*:is( + textarea, + input:is( + :not([type]), + [type=""], + [type=text], + [type=search], + [type=url], + [type=tel], + [type=email], + [type=number], + [type=password], + [type=date], + [type=datetime-local], + [type=month], + [type=time], + [type=week] + ) + ):not([readonly], :disabled), + html|*:is([contenteditable=""], [contenteditable="true" i]) + ''' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML) +# CSS pattern for `:read-only` +CSS_READ_ONLY = CSSParser( + ''' + html|*:not(:read-write) + ''' +).process_selectors(flags=FLG_PSEUDO | FLG_HTML) +# CSS pattern for `:in-range` +CSS_IN_RANGE = CSSParser( + ''' + html|input:is( + [type="date"], + [type="month"], + [type="week"], + [type="time"], + [type="datetime-local"], + [type="number"], + [type="range"] + ):is( + [min], + [max] + ) + ''' +).process_selectors(flags=FLG_PSEUDO | FLG_IN_RANGE | FLG_HTML) +# CSS pattern for `:out-of-range` +CSS_OUT_OF_RANGE = CSSParser( + ''' + html|input:is( + [type="date"], + [type="month"], + [type="week"], + [type="time"], + [type="datetime-local"], + [type="number"], + [type="range"] + ):is( + [min], + [max] + ) + ''' +).process_selectors(flags=FLG_PSEUDO | FLG_OUT_OF_RANGE | FLG_HTML) diff --git a/lib/soupsieve/css_types.py b/lib/soupsieve/css_types.py new file mode 100644 index 00000000..e5a6e49c --- /dev/null +++ b/lib/soupsieve/css_types.py @@ -0,0 +1,407 @@ +"""CSS selector structure items.""" +import copyreg +from .pretty import pretty +from typing import Any, Type, Tuple, Union, Dict, Iterator, Hashable, Optional, Pattern, Iterable, Mapping + +__all__ = ( + 'Selector', + 'SelectorNull', + 'SelectorTag', + 'SelectorAttribute', + 'SelectorContains', + 'SelectorNth', + 'SelectorLang', + 'SelectorList', + 'Namespaces', + 'CustomSelectors' +) + + +SEL_EMPTY = 0x1 +SEL_ROOT = 0x2 +SEL_DEFAULT = 0x4 +SEL_INDETERMINATE = 0x8 +SEL_SCOPE = 0x10 +SEL_DIR_LTR = 0x20 +SEL_DIR_RTL = 0x40 +SEL_IN_RANGE = 0x80 +SEL_OUT_OF_RANGE = 0x100 +SEL_DEFINED = 0x200 +SEL_PLACEHOLDER_SHOWN = 0x400 + + +class Immutable: + """Immutable.""" + + __slots__: Tuple[str, ...] = ('_hash',) + + _hash: int + + def __init__(self, **kwargs: Any) -> None: + """Initialize.""" + + temp = [] + for k, v in kwargs.items(): + temp.append(type(v)) + temp.append(v) + super(Immutable, self).__setattr__(k, v) + super(Immutable, self).__setattr__('_hash', hash(tuple(temp))) + + @classmethod + def __base__(cls) -> "Type[Immutable]": + """Get base class.""" + + return cls + + def __eq__(self, other: Any) -> bool: + """Equal.""" + + return ( + isinstance(other, self.__base__()) and + all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash']) + ) + + def __ne__(self, other: Any) -> bool: + """Equal.""" + + return ( + not isinstance(other, self.__base__()) or + any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash']) + ) + + def __hash__(self) -> int: + """Hash.""" + + return self._hash + + def __setattr__(self, name: str, value: Any) -> None: + """Prevent mutability.""" + + raise AttributeError("'{}' is immutable".format(self.__class__.__name__)) + + def __repr__(self) -> str: # pragma: no cover + """Representation.""" + + return "{}({})".format( + self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]]) + ) + + __str__ = __repr__ + + def pretty(self) -> None: # pragma: no cover + """Pretty print.""" + + print(pretty(self)) + + +class ImmutableDict(Mapping[Any, Any]): + """Hashable, immutable dictionary.""" + + def __init__( + self, + arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]] + ) -> None: + """Initialize.""" + + self._validate(arg) + self._d = dict(arg) + self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())])) + + def _validate(self, arg: Union[Dict[Any, Any], Iterable[Tuple[Any, Any]]]) -> None: + """Validate arguments.""" + + if isinstance(arg, dict): + if not all([isinstance(v, Hashable) for v in arg.values()]): + raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) + elif not all([isinstance(k, Hashable) and isinstance(v, Hashable) for k, v in arg]): + raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) + + def __iter__(self) -> Iterator[Any]: + """Iterator.""" + + return iter(self._d) + + def __len__(self) -> int: + """Length.""" + + return len(self._d) + + def __getitem__(self, key: Any) -> Any: + """Get item: `namespace['key']`.""" + + return self._d[key] + + def __hash__(self) -> int: + """Hash.""" + + return self._hash + + def __repr__(self) -> str: # pragma: no cover + """Representation.""" + + return "{!r}".format(self._d) + + __str__ = __repr__ + + +class Namespaces(ImmutableDict): + """Namespaces.""" + + def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: + """Initialize.""" + + super().__init__(arg) + + def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: + """Validate arguments.""" + + if isinstance(arg, dict): + if not all([isinstance(v, str) for v in arg.values()]): + raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) + elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): + raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) + + +class CustomSelectors(ImmutableDict): + """Custom selectors.""" + + def __init__(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: + """Initialize.""" + + super().__init__(arg) + + def _validate(self, arg: Union[Dict[str, str], Iterable[Tuple[str, str]]]) -> None: + """Validate arguments.""" + + if isinstance(arg, dict): + if not all([isinstance(v, str) for v in arg.values()]): + raise TypeError('{} values must be hashable'.format(self.__class__.__name__)) + elif not all([isinstance(k, str) and isinstance(v, str) for k, v in arg]): + raise TypeError('{} keys and values must be Unicode strings'.format(self.__class__.__name__)) + + +class Selector(Immutable): + """Selector.""" + + __slots__ = ( + 'tag', 'ids', 'classes', 'attributes', 'nth', 'selectors', + 'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash' + ) + + tag: Optional['SelectorTag'] + ids: Tuple[str, ...] + classes: Tuple[str, ...] + attributes: Tuple['SelectorAttribute', ...] + nth: Tuple['SelectorNth', ...] + selectors: Tuple['SelectorList', ...] + relation: 'SelectorList' + rel_type: Optional[str] + contains: Tuple['SelectorContains', ...] + lang: Tuple['SelectorLang', ...] + flags: int + + def __init__( + self, + tag: Optional['SelectorTag'], + ids: Tuple[str, ...], + classes: Tuple[str, ...], + attributes: Tuple['SelectorAttribute', ...], + nth: Tuple['SelectorNth', ...], + selectors: Tuple['SelectorList', ...], + relation: 'SelectorList', + rel_type: Optional[str], + contains: Tuple['SelectorContains', ...], + lang: Tuple['SelectorLang', ...], + flags: int + ): + """Initialize.""" + + super().__init__( + tag=tag, + ids=ids, + classes=classes, + attributes=attributes, + nth=nth, + selectors=selectors, + relation=relation, + rel_type=rel_type, + contains=contains, + lang=lang, + flags=flags + ) + + +class SelectorNull(Immutable): + """Null Selector.""" + + def __init__(self) -> None: + """Initialize.""" + + super().__init__() + + +class SelectorTag(Immutable): + """Selector tag.""" + + __slots__ = ("name", "prefix", "_hash") + + name: str + prefix: Optional[str] + + def __init__(self, name: str, prefix: Optional[str]) -> None: + """Initialize.""" + + super().__init__(name=name, prefix=prefix) + + +class SelectorAttribute(Immutable): + """Selector attribute rule.""" + + __slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash") + + attribute: str + prefix: str + pattern: Optional[Pattern[str]] + xml_type_pattern: Optional[Pattern[str]] + + def __init__( + self, + attribute: str, + prefix: str, + pattern: Optional[Pattern[str]], + xml_type_pattern: Optional[Pattern[str]] + ) -> None: + """Initialize.""" + + super().__init__( + attribute=attribute, + prefix=prefix, + pattern=pattern, + xml_type_pattern=xml_type_pattern + ) + + +class SelectorContains(Immutable): + """Selector contains rule.""" + + __slots__ = ("text", "own", "_hash") + + text: Tuple[str, ...] + own: bool + + def __init__(self, text: Iterable[str], own: bool) -> None: + """Initialize.""" + + super().__init__(text=tuple(text), own=own) + + +class SelectorNth(Immutable): + """Selector nth type.""" + + __slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash") + + a: int + n: bool + b: int + of_type: bool + last: bool + selectors: 'SelectorList' + + def __init__(self, a: int, n: bool, b: int, of_type: bool, last: bool, selectors: 'SelectorList') -> None: + """Initialize.""" + + super().__init__( + a=a, + n=n, + b=b, + of_type=of_type, + last=last, + selectors=selectors + ) + + +class SelectorLang(Immutable): + """Selector language rules.""" + + __slots__ = ("languages", "_hash",) + + languages: Tuple[str, ...] + + def __init__(self, languages: Iterable[str]): + """Initialize.""" + + super().__init__(languages=tuple(languages)) + + def __iter__(self) -> Iterator[str]: + """Iterator.""" + + return iter(self.languages) + + def __len__(self) -> int: # pragma: no cover + """Length.""" + + return len(self.languages) + + def __getitem__(self, index: int) -> str: # pragma: no cover + """Get item.""" + + return self.languages[index] + + +class SelectorList(Immutable): + """Selector list.""" + + __slots__ = ("selectors", "is_not", "is_html", "_hash") + + selectors: Tuple[Union['Selector', 'SelectorNull'], ...] + is_not: bool + is_html: bool + + def __init__( + self, + selectors: Optional[Iterable[Union['Selector', 'SelectorNull']]] = None, + is_not: bool = False, + is_html: bool = False + ) -> None: + """Initialize.""" + + super().__init__( + selectors=tuple(selectors) if selectors is not None else tuple(), + is_not=is_not, + is_html=is_html + ) + + def __iter__(self) -> Iterator[Union['Selector', 'SelectorNull']]: + """Iterator.""" + + return iter(self.selectors) + + def __len__(self) -> int: + """Length.""" + + return len(self.selectors) + + def __getitem__(self, index: int) -> Union['Selector', 'SelectorNull']: + """Get item.""" + + return self.selectors[index] + + +def _pickle(p: Any) -> Any: + return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]]) + + +def pickle_register(obj: Any) -> None: + """Allow object to be pickled.""" + + copyreg.pickle(obj, _pickle) + + +pickle_register(Selector) +pickle_register(SelectorNull) +pickle_register(SelectorTag) +pickle_register(SelectorAttribute) +pickle_register(SelectorContains) +pickle_register(SelectorNth) +pickle_register(SelectorLang) +pickle_register(SelectorList) diff --git a/lib/soupsieve/pretty.py b/lib/soupsieve/pretty.py new file mode 100644 index 00000000..57d16c97 --- /dev/null +++ b/lib/soupsieve/pretty.py @@ -0,0 +1,137 @@ +""" +Format a pretty string of a `SoupSieve` object for easy debugging. + +This won't necessarily support all types and such, and definitely +not support custom outputs. + +It is mainly geared towards our types as the `SelectorList` +object is a beast to look at without some indentation and newlines. +The format and various output types is fairly known (though it +hasn't been tested extensively to make sure we aren't missing corners). + +Example: + +``` +>>> import soupsieve as sv +>>> sv.compile('this > that.class[name=value]').selectors.pretty() +SelectorList( + selectors=( + Selector( + tag=SelectorTag( + name='that', + prefix=None), + ids=(), + classes=( + 'class', + ), + attributes=( + SelectorAttribute( + attribute='name', + prefix='', + pattern=re.compile( + '^value$'), + xml_type_pattern=None), + ), + nth=(), + selectors=(), + relation=SelectorList( + selectors=( + Selector( + tag=SelectorTag( + name='this', + prefix=None), + ids=(), + classes=(), + attributes=(), + nth=(), + selectors=(), + relation=SelectorList( + selectors=(), + is_not=False, + is_html=False), + rel_type='>', + contains=(), + lang=(), + flags=0), + ), + is_not=False, + is_html=False), + rel_type=None, + contains=(), + lang=(), + flags=0), + ), + is_not=False, + is_html=False) +``` +""" +import re +from typing import Any + +RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(') +RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=') +RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}') +RE_LSTRT = re.compile(r'\[') +RE_DSTRT = re.compile(r'\{') +RE_TSTRT = re.compile(r'\(') +RE_LEND = re.compile(r'\]') +RE_DEND = re.compile(r'\}') +RE_TEND = re.compile(r'\)') +RE_INT = re.compile(r'\d+') +RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+') +RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"') +RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'") +RE_SEP = re.compile(r'\s*(,)\s*') +RE_DSEP = re.compile(r'\s*(:)\s*') + +TOKENS = { + 'class': RE_CLASS, + 'param': RE_PARAM, + 'empty': RE_EMPTY, + 'lstrt': RE_LSTRT, + 'dstrt': RE_DSTRT, + 'tstrt': RE_TSTRT, + 'lend': RE_LEND, + 'dend': RE_DEND, + 'tend': RE_TEND, + 'sqstr': RE_SQSTR, + 'sep': RE_SEP, + 'dsep': RE_DSEP, + 'int': RE_INT, + 'kword': RE_KWORD, + 'dqstr': RE_DQSTR +} + + +def pretty(obj: Any) -> str: # pragma: no cover + """Make the object output string pretty.""" + + sel = str(obj) + index = 0 + end = len(sel) - 1 + indent = 0 + output = [] + + while index <= end: + m = None + for k, v in TOKENS.items(): + m = v.match(sel, index) + + if m: + name = k + index = m.end(0) + if name in ('class', 'lstrt', 'dstrt', 'tstrt'): + indent += 4 + output.append('{}\n{}'.format(m.group(0), " " * indent)) + elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'): + output.append(m.group(0)) + elif name in ('lend', 'dend', 'tend'): + indent -= 4 + output.append(m.group(0)) + elif name in ('sep',): + output.append('{}\n{}'.format(m.group(1), " " * indent)) + elif name in ('dsep',): + output.append('{} '.format(m.group(1))) + break + + return ''.join(output) diff --git a/lib/soupsieve/py.typed b/lib/soupsieve/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/lib/soupsieve/util.py b/lib/soupsieve/util.py new file mode 100644 index 00000000..2b1ed24b --- /dev/null +++ b/lib/soupsieve/util.py @@ -0,0 +1,116 @@ +"""Utility.""" +from functools import wraps, lru_cache +import warnings +import re +from typing import Callable, Any, Optional, Tuple, List + +DEBUG = 0x00001 + +RE_PATTERN_LINE_SPLIT = re.compile(r'(?:\r\n|(?!\r\n)[\n\r])|$') + +UC_A = ord('A') +UC_Z = ord('Z') + + +@lru_cache(maxsize=512) +def lower(string: str) -> str: + """Lower.""" + + new_string = [] + for c in string: + o = ord(c) + new_string.append(chr(o + 32) if UC_A <= o <= UC_Z else c) + return ''.join(new_string) + + +class SelectorSyntaxError(Exception): + """Syntax error in a CSS selector.""" + + def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None: + """Initialize.""" + + self.line = None + self.col = None + self.context = None + + if pattern is not None and index is not None: + # Format pattern to show line and column position + self.context, self.line, self.col = get_pattern_context(pattern, index) + msg = '{}\n line {}:\n{}'.format(msg, self.line, self.context) + + super().__init__(msg) + + +def deprecated(message: str, stacklevel: int = 2) -> Callable[..., Any]: # pragma: no cover + """ + Raise a `DeprecationWarning` when wrapped function/method is called. + + Usage: + + @deprecated("This method will be removed in version X; use Y instead.") + def some_method()" + pass + """ + + def _wrapper(func: Callable[..., Any]) -> Callable[..., Any]: + @wraps(func) + def _deprecated_func(*args: Any, **kwargs: Any) -> Any: + warnings.warn( + f"'{func.__name__}' is deprecated. {message}", + category=DeprecationWarning, + stacklevel=stacklevel + ) + return func(*args, **kwargs) + return _deprecated_func + return _wrapper + + +def warn_deprecated(message: str, stacklevel: int = 2) -> None: # pragma: no cover + """Warn deprecated.""" + + warnings.warn( + message, + category=DeprecationWarning, + stacklevel=stacklevel + ) + + +def get_pattern_context(pattern: str, index: int) -> Tuple[str, int, int]: + """Get the pattern context.""" + + last = 0 + current_line = 1 + col = 1 + text = [] # type: List[str] + line = 1 + offset = None # type: Optional[int] + + # Split pattern by newline and handle the text before the newline + for m in RE_PATTERN_LINE_SPLIT.finditer(pattern): + linetext = pattern[last:m.start(0)] + if not len(m.group(0)) and not len(text): + indent = '' + offset = -1 + col = index - last + 1 + elif last <= index < m.end(0): + indent = '--> ' + offset = (-1 if index > m.start(0) else 0) + 3 + col = index - last + 1 + else: + indent = ' ' + offset = None + if len(text): + # Regardless of whether we are presented with `\r\n`, `\r`, or `\n`, + # we will render the output with just `\n`. We will still log the column + # correctly though. + text.append('\n') + text.append('{}{}'.format(indent, linetext)) + if offset is not None: + text.append('\n') + text.append(' ' * (col + offset) + '^') + line = current_line + + current_line += 1 + last = m.end(0) + + return ''.join(text), line, col diff --git a/lib/tempora/__init__.py b/lib/tempora/__init__.py new file mode 100644 index 00000000..224ba5a5 --- /dev/null +++ b/lib/tempora/__init__.py @@ -0,0 +1,682 @@ +"Objects and routines pertaining to date and time (tempora)" + +import datetime +import time +import re +import numbers +import functools +import warnings +import contextlib + +from jaraco.functools import once + + +class Parser: + """ + *deprecated* + + Datetime parser: parses a date-time string using multiple possible + formats. + + >>> p = Parser(('%H%M', '%H:%M')) + >>> tuple(p.parse('1319')) + (1900, 1, 1, 13, 19, 0, 0, 1, -1) + >>> dateParser = Parser(('%m/%d/%Y', '%Y-%m-%d', '%d-%b-%Y')) + >>> tuple(dateParser.parse('2003-12-20')) + (2003, 12, 20, 0, 0, 0, 5, 354, -1) + >>> tuple(dateParser.parse('16-Dec-1994')) + (1994, 12, 16, 0, 0, 0, 4, 350, -1) + >>> tuple(dateParser.parse('5/19/2003')) + (2003, 5, 19, 0, 0, 0, 0, 139, -1) + >>> dtParser = Parser(('%Y-%m-%d %H:%M:%S', '%a %b %d %H:%M:%S %Y')) + >>> tuple(dtParser.parse('2003-12-20 19:13:26')) + (2003, 12, 20, 19, 13, 26, 5, 354, -1) + >>> tuple(dtParser.parse('Tue Jan 20 16:19:33 2004')) + (2004, 1, 20, 16, 19, 33, 1, 20, -1) + + Be forewarned, a ValueError will be raised if more than one format + matches: + + >>> Parser(('%H%M', '%H%M%S')).parse('732') + Traceback (most recent call last): + ... + ValueError: More than one format string matched target 732. + + >>> Parser(('%H',)).parse('22:21') + Traceback (most recent call last): + ... + ValueError: No format strings matched the target 22:21. + """ + + formats = ('%m/%d/%Y', '%m/%d/%y', '%Y-%m-%d', '%d-%b-%Y', '%d-%b-%y') + "some common default formats" + + def __init__(self, formats=None): + warnings.warn("Use dateutil.parser", DeprecationWarning) + if formats: + self.formats = formats + + def parse(self, target): + self.target = target + results = tuple(filter(None, map(self._parse, self.formats))) + del self.target + if not results: + tmpl = "No format strings matched the target {target}." + raise ValueError(tmpl.format(**locals())) + if not len(results) == 1: + tmpl = "More than one format string matched target {target}." + raise ValueError(tmpl.format(**locals())) + return results[0] + + def _parse(self, format): + try: + result = time.strptime(self.target, format) + except ValueError: + result = False + return result + + +# some useful constants +osc_per_year = 290091329207984000 +""" +mean vernal equinox year expressed in oscillations of atomic cesium at the +year 2000 (see http://webexhibits.org/calendars/timeline.html for more info). +""" +osc_per_second = 9192631770 +seconds_per_second = 1 +seconds_per_year = 31556940 +seconds_per_minute = 60 +minutes_per_hour = 60 +hours_per_day = 24 +seconds_per_hour = seconds_per_minute * minutes_per_hour +seconds_per_day = seconds_per_hour * hours_per_day +days_per_year = seconds_per_year / seconds_per_day +thirty_days = datetime.timedelta(days=30) +# these values provide useful averages +six_months = datetime.timedelta(days=days_per_year / 2) +seconds_per_month = seconds_per_year / 12 +hours_per_month = hours_per_day * days_per_year / 12 + + +@once +def _needs_year_help(): + """ + Some versions of Python render %Y with only three characters :( + https://bugs.python.org/issue39103 + """ + return len(datetime.date(900, 1, 1).strftime('%Y')) != 4 + + +def ensure_datetime(ob): + """ + Given a datetime or date or time object from the ``datetime`` + module, always return a datetime using default values. + """ + if isinstance(ob, datetime.datetime): + return ob + date = time = ob + if isinstance(ob, datetime.date): + time = datetime.time() + if isinstance(ob, datetime.time): + date = datetime.date(1900, 1, 1) + return datetime.datetime.combine(date, time) + + +def strftime(fmt, t): + """ + Portable strftime. + + In the stdlib, strftime has `known portability problems + `_. This function + aims to smooth over those issues and provide a + consistent experience across the major platforms. + + >>> strftime('%Y', datetime.datetime(1890, 1, 1)) + '1890' + >>> strftime('%Y', datetime.datetime(900, 1, 1)) + '0900' + + Supports time.struct_time, tuples, and datetime.datetime objects. + + >>> strftime('%Y-%m-%d', (1976, 5, 7)) + '1976-05-07' + + Also supports date objects + + >>> strftime('%Y', datetime.date(1976, 5, 7)) + '1976' + + Also supports milliseconds using %s. + + >>> strftime('%s', datetime.time(microsecond=20000)) + '020' + + Also supports microseconds (3 digits) using %µ + + >>> strftime('%µ', datetime.time(microsecond=123456)) + '456' + + Historically, %u was used for microseconds, but now + it honors the value rendered by stdlib. + + >>> strftime('%u', datetime.date(1976, 5, 7)) + '5' + + Also supports microseconds (6 digits) using %f + + >>> strftime('%f', datetime.time(microsecond=23456)) + '023456' + + Even supports time values on date objects (discouraged): + + >>> strftime('%f', datetime.date(1976, 1, 1)) + '000000' + >>> strftime('%µ', datetime.date(1976, 1, 1)) + '000' + >>> strftime('%s', datetime.date(1976, 1, 1)) + '000' + + And vice-versa: + + >>> strftime('%Y', datetime.time()) + '1900' + """ + if isinstance(t, (time.struct_time, tuple)): + t = datetime.datetime(*t[:6]) + t = ensure_datetime(t) + subs = ( + ('%s', '%03d' % (t.microsecond // 1000)), + ('%µ', '%03d' % (t.microsecond % 1000)), + ) + if _needs_year_help(): # pragma: nocover + subs += (('%Y', '%04d' % t.year),) + + def doSub(s, sub): + return s.replace(*sub) + + def doSubs(s): + return functools.reduce(doSub, subs, s) + + fmt = '%%'.join(map(doSubs, fmt.split('%%'))) + return t.strftime(fmt) + + +def datetime_mod(dt, period, start=None): + """ + Find the time which is the specified date/time truncated to the time delta + relative to the start date/time. + By default, the start time is midnight of the same day as the specified + date/time. + + >>> datetime_mod(datetime.datetime(2004, 1, 2, 3), + ... datetime.timedelta(days = 1.5), + ... start = datetime.datetime(2004, 1, 1)) + datetime.datetime(2004, 1, 1, 0, 0) + >>> datetime_mod(datetime.datetime(2004, 1, 2, 13), + ... datetime.timedelta(days = 1.5), + ... start = datetime.datetime(2004, 1, 1)) + datetime.datetime(2004, 1, 2, 12, 0) + >>> datetime_mod(datetime.datetime(2004, 1, 2, 13), + ... datetime.timedelta(days = 7), + ... start = datetime.datetime(2004, 1, 1)) + datetime.datetime(2004, 1, 1, 0, 0) + >>> datetime_mod(datetime.datetime(2004, 1, 10, 13), + ... datetime.timedelta(days = 7), + ... start = datetime.datetime(2004, 1, 1)) + datetime.datetime(2004, 1, 8, 0, 0) + """ + if start is None: + # use midnight of the same day + start = datetime.datetime.combine(dt.date(), datetime.time()) + # calculate the difference between the specified time and the start date. + delta = dt - start + + # now aggregate the delta and the period into microseconds + # Use microseconds because that's the highest precision of these time + # pieces. Also, using microseconds ensures perfect precision (no floating + # point errors). + def get_time_delta_microseconds(td): + return (td.days * seconds_per_day + td.seconds) * 1000000 + td.microseconds + + delta, period = map(get_time_delta_microseconds, (delta, period)) + offset = datetime.timedelta(microseconds=delta % period) + # the result is the original specified time minus the offset + result = dt - offset + return result + + +def datetime_round(dt, period, start=None): + """ + Find the nearest even period for the specified date/time. + + >>> datetime_round(datetime.datetime(2004, 11, 13, 8, 11, 13), + ... datetime.timedelta(hours = 1)) + datetime.datetime(2004, 11, 13, 8, 0) + >>> datetime_round(datetime.datetime(2004, 11, 13, 8, 31, 13), + ... datetime.timedelta(hours = 1)) + datetime.datetime(2004, 11, 13, 9, 0) + >>> datetime_round(datetime.datetime(2004, 11, 13, 8, 30), + ... datetime.timedelta(hours = 1)) + datetime.datetime(2004, 11, 13, 9, 0) + """ + result = datetime_mod(dt, period, start) + if abs(dt - result) >= period // 2: + result += period + return result + + +def get_nearest_year_for_day(day): + """ + Returns the nearest year to now inferred from a Julian date. + + >>> freezer = getfixture('freezer') + >>> freezer.move_to('2019-05-20') + >>> get_nearest_year_for_day(20) + 2019 + >>> get_nearest_year_for_day(340) + 2018 + >>> freezer.move_to('2019-12-15') + >>> get_nearest_year_for_day(20) + 2020 + """ + now = time.gmtime() + result = now.tm_year + # if the day is far greater than today, it must be from last year + if day - now.tm_yday > 365 // 2: + result -= 1 + # if the day is far less than today, it must be for next year. + if now.tm_yday - day > 365 // 2: + result += 1 + return result + + +def gregorian_date(year, julian_day): + """ + Gregorian Date is defined as a year and a julian day (1-based + index into the days of the year). + + >>> gregorian_date(2007, 15) + datetime.date(2007, 1, 15) + """ + result = datetime.date(year, 1, 1) + result += datetime.timedelta(days=julian_day - 1) + return result + + +def get_period_seconds(period): + """ + return the number of seconds in the specified period + + >>> get_period_seconds('day') + 86400 + >>> get_period_seconds(86400) + 86400 + >>> get_period_seconds(datetime.timedelta(hours=24)) + 86400 + >>> get_period_seconds('day + os.system("rm -Rf *")') + Traceback (most recent call last): + ... + ValueError: period not in (second, minute, hour, day, month, year) + """ + if isinstance(period, str): + try: + name = 'seconds_per_' + period.lower() + result = globals()[name] + except KeyError: + msg = "period not in (second, minute, hour, day, month, year)" + raise ValueError(msg) + elif isinstance(period, numbers.Number): + result = period + elif isinstance(period, datetime.timedelta): + result = period.days * get_period_seconds('day') + period.seconds + else: + raise TypeError('period must be a string or integer') + return result + + +def get_date_format_string(period): + """ + For a given period (e.g. 'month', 'day', or some numeric interval + such as 3600 (in secs)), return the format string that can be + used with strftime to format that time to specify the times + across that interval, but no more detailed. + For example, + + >>> get_date_format_string('month') + '%Y-%m' + >>> get_date_format_string(3600) + '%Y-%m-%d %H' + >>> get_date_format_string('hour') + '%Y-%m-%d %H' + >>> get_date_format_string(None) + Traceback (most recent call last): + ... + TypeError: period must be a string or integer + >>> get_date_format_string('garbage') + Traceback (most recent call last): + ... + ValueError: period not in (second, minute, hour, day, month, year) + """ + # handle the special case of 'month' which doesn't have + # a static interval in seconds + if isinstance(period, str) and period.lower() == 'month': + return '%Y-%m' + file_period_secs = get_period_seconds(period) + format_pieces = ('%Y', '-%m-%d', ' %H', '-%M', '-%S') + seconds_per_second = 1 + intervals = ( + seconds_per_year, + seconds_per_day, + seconds_per_hour, + seconds_per_minute, + seconds_per_second, + ) + mods = list(map(lambda interval: file_period_secs % interval, intervals)) + format_pieces = format_pieces[: mods.index(0) + 1] + return ''.join(format_pieces) + + +def divide_timedelta_float(td, divisor): + """ + Divide a timedelta by a float value + + >>> one_day = datetime.timedelta(days=1) + >>> half_day = datetime.timedelta(days=.5) + >>> divide_timedelta_float(one_day, 2.0) == half_day + True + >>> divide_timedelta_float(one_day, 2) == half_day + True + """ + warnings.warn("Use native division", DeprecationWarning) + return td / divisor + + +def calculate_prorated_values(): + """ + >>> monkeypatch = getfixture('monkeypatch') + >>> import builtins + >>> monkeypatch.setattr(builtins, 'input', lambda prompt: '3/hour') + >>> calculate_prorated_values() + per minute: 0.05 + per hour: 3.0 + per day: 72.0 + per month: 2191.454166666667 + per year: 26297.45 + """ + rate = input("Enter the rate (3/hour, 50/month)> ") + for period, value in _prorated_values(rate): + print("per {period}: {value}".format(**locals())) + + +def _prorated_values(rate): + """ + Given a rate (a string in units per unit time), and return that same + rate for various time periods. + + >>> for period, value in _prorated_values('20/hour'): + ... print('{period}: {value:0.3f}'.format(**locals())) + minute: 0.333 + hour: 20.000 + day: 480.000 + month: 14609.694 + year: 175316.333 + + """ + res = re.match(r'(?P[\d.]+)/(?P\w+)$', rate).groupdict() + value = float(res['value']) + value_per_second = value / get_period_seconds(res['period']) + for period in ('minute', 'hour', 'day', 'month', 'year'): + period_value = value_per_second * get_period_seconds(period) + yield period, period_value + + +def parse_timedelta(str): + """ + Take a string representing a span of time and parse it to a time delta. + Accepts any string of comma-separated numbers each with a unit indicator. + + >>> parse_timedelta('1 day') + datetime.timedelta(days=1) + + >>> parse_timedelta('1 day, 30 seconds') + datetime.timedelta(days=1, seconds=30) + + >>> parse_timedelta('47.32 days, 20 minutes, 15.4 milliseconds') + datetime.timedelta(days=47, seconds=28848, microseconds=15400) + + Supports weeks, months, years + + >>> parse_timedelta('1 week') + datetime.timedelta(days=7) + + >>> parse_timedelta('1 year, 1 month') + datetime.timedelta(days=395, seconds=58685) + + Note that months and years strict intervals, not aligned + to a calendar: + + >>> now = datetime.datetime.now() + >>> later = now + parse_timedelta('1 year') + >>> diff = later.replace(year=now.year) - now + >>> diff.seconds + 20940 + + >>> parse_timedelta('14 seconds foo') + Traceback (most recent call last): + ... + ValueError: Unexpected 'foo' + + Supports abbreviations: + + >>> parse_timedelta('1s') + datetime.timedelta(seconds=1) + + >>> parse_timedelta('1sec') + datetime.timedelta(seconds=1) + + >>> parse_timedelta('5min1sec') + datetime.timedelta(seconds=301) + + >>> parse_timedelta('1 ms') + datetime.timedelta(microseconds=1000) + + >>> parse_timedelta('1 µs') + datetime.timedelta(microseconds=1) + + >>> parse_timedelta('1 us') + datetime.timedelta(microseconds=1) + + And supports the common colon-separated duration: + + >>> parse_timedelta('14:00:35.362') + datetime.timedelta(seconds=50435, microseconds=362000) + + TODO: Should this be 14 hours or 14 minutes? + + >>> parse_timedelta('14:00') + datetime.timedelta(seconds=50400) + + >>> parse_timedelta('14:00 minutes') + Traceback (most recent call last): + ... + ValueError: Cannot specify units with composite delta + + Nanoseconds get rounded to the nearest microsecond: + + >>> parse_timedelta('600 ns') + datetime.timedelta(microseconds=1) + + >>> parse_timedelta('.002 µs, 499 ns') + datetime.timedelta(microseconds=1) + """ + return _parse_timedelta_nanos(str).resolve() + + +def _parse_timedelta_nanos(str): + parts = re.finditer(r'(?P[\d.:]+)\s?(?P[^\W\d_]+)?', str) + chk_parts = _check_unmatched(parts, str) + deltas = map(_parse_timedelta_part, chk_parts) + return sum(deltas, _Saved_NS()) + + +def _check_unmatched(matches, text): + """ + Ensure no words appear in unmatched text. + """ + + def check_unmatched(unmatched): + found = re.search(r'\w+', unmatched) + if found: + raise ValueError(f"Unexpected {found.group(0)!r}") + + pos = 0 + for match in matches: + check_unmatched(text[pos : match.start()]) + yield match + pos = match.end() + check_unmatched(text[match.end() :]) + + +_unit_lookup = { + 'µs': 'microsecond', + 'µsec': 'microsecond', + 'us': 'microsecond', + 'usec': 'microsecond', + 'micros': 'microsecond', + 'ms': 'millisecond', + 'msec': 'millisecond', + 'millis': 'millisecond', + 's': 'second', + 'sec': 'second', + 'h': 'hour', + 'hr': 'hour', + 'm': 'minute', + 'min': 'minute', + 'w': 'week', + 'wk': 'week', + 'd': 'day', + 'ns': 'nanosecond', + 'nsec': 'nanosecond', + 'nanos': 'nanosecond', +} + + +def _resolve_unit(raw_match): + if raw_match is None: + return 'second' + text = raw_match.lower() + return _unit_lookup.get(text, text) + + +def _parse_timedelta_composite(raw_value, unit): + if unit != 'seconds': + raise ValueError("Cannot specify units with composite delta") + values = raw_value.split(':') + units = 'hours', 'minutes', 'seconds' + composed = ' '.join(f'{value} {unit}' for value, unit in zip(values, units)) + return _parse_timedelta_nanos(composed) + + +def _parse_timedelta_part(match): + unit = _resolve_unit(match.group('unit')) + if not unit.endswith('s'): + unit += 's' + raw_value = match.group('value') + if ':' in raw_value: + return _parse_timedelta_composite(raw_value, unit) + value = float(raw_value) + if unit == 'months': + unit = 'years' + value = value / 12 + if unit == 'years': + unit = 'days' + value = value * days_per_year + return _Saved_NS.derive(unit, value) + + +class _Saved_NS: + """ + Bundle a timedelta with nanoseconds. + + >>> _Saved_NS.derive('microseconds', .001) + _Saved_NS(td=datetime.timedelta(0), nanoseconds=1) + """ + + td = datetime.timedelta() + nanoseconds = 0 + multiplier = dict( + seconds=1000000000, + milliseconds=1000000, + microseconds=1000, + ) + + def __init__(self, **kwargs): + vars(self).update(kwargs) + + @classmethod + def derive(cls, unit, value): + if unit == 'nanoseconds': + return _Saved_NS(nanoseconds=value) + + res = _Saved_NS(td=datetime.timedelta(**{unit: value})) + with contextlib.suppress(KeyError): + res.nanoseconds = int(value * cls.multiplier[unit]) % 1000 + return res + + def __add__(self, other): + return _Saved_NS( + td=self.td + other.td, nanoseconds=self.nanoseconds + other.nanoseconds + ) + + def resolve(self): + """ + Resolve any nanoseconds into the microseconds field, + discarding any nanosecond resolution (but honoring partial + microseconds). + """ + addl_micros = round(self.nanoseconds / 1000) + return self.td + datetime.timedelta(microseconds=addl_micros) + + def __repr__(self): + return f'_Saved_NS(td={self.td!r}, nanoseconds={self.nanoseconds!r})' + + +def divide_timedelta(td1, td2): + """ + Get the ratio of two timedeltas + + >>> one_day = datetime.timedelta(days=1) + >>> one_hour = datetime.timedelta(hours=1) + >>> divide_timedelta(one_hour, one_day) == 1 / 24 + True + """ + warnings.warn("Use native division", DeprecationWarning) + return td1 / td2 + + +def date_range(start=None, stop=None, step=None): + """ + Much like the built-in function range, but works with dates + + >>> range_items = date_range( + ... datetime.datetime(2005,12,21), + ... datetime.datetime(2005,12,25), + ... ) + >>> my_range = tuple(range_items) + >>> datetime.datetime(2005,12,21) in my_range + True + >>> datetime.datetime(2005,12,22) in my_range + True + >>> datetime.datetime(2005,12,25) in my_range + False + >>> from_now = date_range(stop=datetime.datetime(2099, 12, 31)) + >>> next(from_now) + datetime.datetime(...) + """ + if step is None: + step = datetime.timedelta(days=1) + if start is None: + start = datetime.datetime.now() + while start < stop: + yield start + start += step diff --git a/lib/tempora/schedule.py b/lib/tempora/schedule.py new file mode 100644 index 00000000..a94c9819 --- /dev/null +++ b/lib/tempora/schedule.py @@ -0,0 +1,227 @@ +""" +Classes for calling functions a schedule. Has time zone support. + +For example, to run a job at 08:00 every morning in 'Asia/Calcutta': + +>>> job = lambda: print("time is now", datetime.datetime()) +>>> time = datetime.time(8, tzinfo=pytz.timezone('Asia/Calcutta')) +>>> cmd = PeriodicCommandFixedDelay.daily_at(time, job) +>>> sched = InvokeScheduler() +>>> sched.add(cmd) +>>> while True: # doctest: +SKIP +... sched.run_pending() +... time.sleep(.1) +""" + +import datetime +import numbers +import abc +import bisect + +import pytz + + +def now(): + """ + Provide the current timezone-aware datetime. + + A client may override this function to change the default behavior, + such as to use local time or timezone-naïve times. + """ + return datetime.datetime.utcnow().replace(tzinfo=pytz.utc) + + +def from_timestamp(ts): + """ + Convert a numeric timestamp to a timezone-aware datetime. + + A client may override this function to change the default behavior, + such as to use local time or timezone-naïve times. + """ + return datetime.datetime.utcfromtimestamp(ts).replace(tzinfo=pytz.utc) + + +class DelayedCommand(datetime.datetime): + """ + A command to be executed after some delay (seconds or timedelta). + """ + + @classmethod + def from_datetime(cls, other): + return cls( + other.year, + other.month, + other.day, + other.hour, + other.minute, + other.second, + other.microsecond, + other.tzinfo, + ) + + @classmethod + def after(cls, delay, target): + if not isinstance(delay, datetime.timedelta): + delay = datetime.timedelta(seconds=delay) + due_time = now() + delay + cmd = cls.from_datetime(due_time) + cmd.delay = delay + cmd.target = target + return cmd + + @staticmethod + def _from_timestamp(input): + """ + If input is a real number, interpret it as a Unix timestamp + (seconds sinc Epoch in UTC) and return a timezone-aware + datetime object. Otherwise return input unchanged. + """ + if not isinstance(input, numbers.Real): + return input + return from_timestamp(input) + + @classmethod + def at_time(cls, at, target): + """ + Construct a DelayedCommand to come due at `at`, where `at` may be + a datetime or timestamp. + """ + at = cls._from_timestamp(at) + cmd = cls.from_datetime(at) + cmd.delay = at - now() + cmd.target = target + return cmd + + def due(self): + return now() >= self + + +class PeriodicCommand(DelayedCommand): + """ + Like a delayed command, but expect this command to run every delay + seconds. + """ + + def _next_time(self): + """ + Add delay to self, localized + """ + return self._localize(self + self.delay) + + @staticmethod + def _localize(dt): + """ + Rely on pytz.localize to ensure new result honors DST. + """ + try: + tz = dt.tzinfo + return tz.localize(dt.replace(tzinfo=None)) + except AttributeError: + return dt + + def next(self): + cmd = self.__class__.from_datetime(self._next_time()) + cmd.delay = self.delay + cmd.target = self.target + return cmd + + def __setattr__(self, key, value): + if key == 'delay' and not value > datetime.timedelta(): + raise ValueError( + "A PeriodicCommand must have a positive, " "non-zero delay." + ) + super(PeriodicCommand, self).__setattr__(key, value) + + +class PeriodicCommandFixedDelay(PeriodicCommand): + """ + Like a periodic command, but don't calculate the delay based on + the current time. Instead use a fixed delay following the initial + run. + """ + + @classmethod + def at_time(cls, at, delay, target): + """ + >>> cmd = PeriodicCommandFixedDelay.at_time(0, 30, None) + >>> cmd.delay.total_seconds() + 30.0 + """ + at = cls._from_timestamp(at) + cmd = cls.from_datetime(at) + if isinstance(delay, numbers.Number): + delay = datetime.timedelta(seconds=delay) + cmd.delay = delay + cmd.target = target + return cmd + + @classmethod + def daily_at(cls, at, target): + """ + Schedule a command to run at a specific time each day. + + >>> from tempora import utc + >>> noon = utc.time(12, 0) + >>> cmd = PeriodicCommandFixedDelay.daily_at(noon, None) + >>> cmd.delay.total_seconds() + 86400.0 + """ + daily = datetime.timedelta(days=1) + # convert when to the next datetime matching this time + when = datetime.datetime.combine(datetime.date.today(), at) + when -= daily + while when < now(): + when += daily + return cls.at_time(cls._localize(when), daily, target) + + +class Scheduler: + """ + A rudimentary abstract scheduler accepting DelayedCommands + and dispatching them on schedule. + """ + + def __init__(self): + self.queue = [] + + def add(self, command): + assert isinstance(command, DelayedCommand) + bisect.insort(self.queue, command) + + def run_pending(self): + while self.queue: + command = self.queue[0] + if not command.due(): + break + self.run(command) + if isinstance(command, PeriodicCommand): + self.add(command.next()) + del self.queue[0] + + @abc.abstractmethod + def run(self, command): + """ + Run the command + """ + + +class InvokeScheduler(Scheduler): + """ + Command targets are functions to be invoked on schedule. + """ + + def run(self, command): + command.target() + + +class CallbackScheduler(Scheduler): + """ + Command targets are passed to a dispatch callable on schedule. + """ + + def __init__(self, dispatch): + super().__init__() + self.dispatch = dispatch + + def run(self, command): + self.dispatch(command.target) diff --git a/lib/tempora/tests/test_schedule.py b/lib/tempora/tests/test_schedule.py new file mode 100644 index 00000000..0ce35435 --- /dev/null +++ b/lib/tempora/tests/test_schedule.py @@ -0,0 +1,149 @@ +import time +import random +import datetime +from unittest import mock + +import pytest +import pytz +import freezegun + +from tempora import schedule + + +do_nothing = type(None) + + +def test_delayed_command_order(): + """ + delayed commands should be sorted by delay time + """ + delays = [random.randint(0, 99) for x in range(5)] + cmds = sorted( + [schedule.DelayedCommand.after(delay, do_nothing) for delay in delays] + ) + assert [c.delay.seconds for c in cmds] == sorted(delays) + + +def test_periodic_command_delay(): + "A PeriodicCommand must have a positive, non-zero delay." + with pytest.raises(ValueError) as exc_info: + schedule.PeriodicCommand.after(0, None) + assert str(exc_info.value) == test_periodic_command_delay.__doc__ + + +def test_periodic_command_fixed_delay(): + """ + Test that we can construct a periodic command with a fixed initial + delay. + """ + fd = schedule.PeriodicCommandFixedDelay.at_time( + at=schedule.now(), delay=datetime.timedelta(seconds=2), target=lambda: None + ) + assert fd.due() is True + assert fd.next().due() is False + + +class TestCommands: + def test_delayed_command_from_timestamp(self): + """ + Ensure a delayed command can be constructed from a timestamp. + """ + t = time.time() + schedule.DelayedCommand.at_time(t, do_nothing) + + def test_command_at_noon(self): + """ + Create a periodic command that's run at noon every day. + """ + when = datetime.time(12, 0, tzinfo=pytz.utc) + cmd = schedule.PeriodicCommandFixedDelay.daily_at(when, target=None) + assert cmd.due() is False + next_cmd = cmd.next() + daily = datetime.timedelta(days=1) + day_from_now = schedule.now() + daily + two_days_from_now = day_from_now + daily + assert day_from_now < next_cmd < two_days_from_now + + @pytest.mark.parametrize("hour", range(10, 14)) + @pytest.mark.parametrize("tz_offset", (14, -14)) + def test_command_at_noon_distant_local(self, hour, tz_offset): + """ + Run test_command_at_noon, but with the local timezone + more than 12 hours away from UTC. + """ + with freezegun.freeze_time(f"2020-01-10 {hour:02}:01", tz_offset=tz_offset): + self.test_command_at_noon() + + +class TestTimezones: + def test_alternate_timezone_west(self): + target_tz = pytz.timezone('US/Pacific') + target = schedule.now().astimezone(target_tz) + cmd = schedule.DelayedCommand.at_time(target, target=None) + assert cmd.due() + + def test_alternate_timezone_east(self): + target_tz = pytz.timezone('Europe/Amsterdam') + target = schedule.now().astimezone(target_tz) + cmd = schedule.DelayedCommand.at_time(target, target=None) + assert cmd.due() + + def test_daylight_savings(self): + """ + A command at 9am should always be 9am regardless of + a DST boundary. + """ + with freezegun.freeze_time('2018-03-10 08:00:00'): + target_tz = pytz.timezone('US/Eastern') + target_time = datetime.time(9, tzinfo=target_tz) + cmd = schedule.PeriodicCommandFixedDelay.daily_at( + target_time, target=lambda: None + ) + + def naive(dt): + return dt.replace(tzinfo=None) + + assert naive(cmd) == datetime.datetime(2018, 3, 10, 9, 0, 0) + next_ = cmd.next() + assert naive(next_) == datetime.datetime(2018, 3, 11, 9, 0, 0) + assert next_ - cmd == datetime.timedelta(hours=23) + + +class TestScheduler: + def test_invoke_scheduler(self): + sched = schedule.InvokeScheduler() + target = mock.MagicMock() + cmd = schedule.DelayedCommand.after(0, target) + sched.add(cmd) + sched.run_pending() + target.assert_called_once() + assert not sched.queue + + def test_callback_scheduler(self): + callback = mock.MagicMock() + sched = schedule.CallbackScheduler(callback) + target = mock.MagicMock() + cmd = schedule.DelayedCommand.after(0, target) + sched.add(cmd) + sched.run_pending() + callback.assert_called_once_with(target) + + def test_periodic_command(self): + sched = schedule.InvokeScheduler() + target = mock.MagicMock() + + before = datetime.datetime.utcnow() + + cmd = schedule.PeriodicCommand.after(10, target) + sched.add(cmd) + sched.run_pending() + target.assert_not_called() + + with freezegun.freeze_time(before + datetime.timedelta(seconds=15)): + sched.run_pending() + assert sched.queue + target.assert_called_once() + + with freezegun.freeze_time(before + datetime.timedelta(seconds=25)): + sched.run_pending() + assert target.call_count == 2 diff --git a/lib/tempora/tests/test_timing.py b/lib/tempora/tests/test_timing.py new file mode 100644 index 00000000..43bf7efc --- /dev/null +++ b/lib/tempora/tests/test_timing.py @@ -0,0 +1,50 @@ +import datetime +import time +import contextlib +import os +from unittest import mock + +import pytest +from tempora import timing + + +def test_IntervalGovernor(): + """ + IntervalGovernor should prevent a function from being called more than + once per interval. + """ + func_under_test = mock.MagicMock() + # to look like a function, it needs a __name__ attribute + func_under_test.__name__ = 'func_under_test' + interval = datetime.timedelta(seconds=1) + governed = timing.IntervalGovernor(interval)(func_under_test) + governed('a') + governed('b') + governed(3, 'sir') + func_under_test.assert_called_once_with('a') + + +@pytest.fixture +def alt_tz(monkeypatch): + hasattr(time, 'tzset') or pytest.skip("tzset not available") + + @contextlib.contextmanager + def change(): + val = 'AEST-10AEDT-11,M10.5.0,M3.5.0' + with monkeypatch.context() as ctx: + ctx.setitem(os.environ, 'TZ', val) + time.tzset() + yield + time.tzset() + + return change() + + +def test_Stopwatch_timezone_change(alt_tz): + """ + The stopwatch should provide a consistent duration even + if the timezone changes. + """ + watch = timing.Stopwatch() + with alt_tz: + assert abs(watch.split().total_seconds()) < 0.1 diff --git a/lib/tempora/timing.py b/lib/tempora/timing.py new file mode 100644 index 00000000..6b3147a9 --- /dev/null +++ b/lib/tempora/timing.py @@ -0,0 +1,266 @@ +import datetime +import functools +import numbers +import time +import collections.abc +import contextlib + +import jaraco.functools + + +class Stopwatch: + """ + A simple stopwatch which starts automatically. + + >>> w = Stopwatch() + >>> _1_sec = datetime.timedelta(seconds=1) + >>> w.split() < _1_sec + True + >>> import time + >>> time.sleep(1.0) + >>> w.split() >= _1_sec + True + >>> w.stop() >= _1_sec + True + >>> w.reset() + >>> w.start() + >>> w.split() < _1_sec + True + + It should be possible to launch the Stopwatch in a context: + + >>> with Stopwatch() as watch: + ... assert isinstance(watch.split(), datetime.timedelta) + + In that case, the watch is stopped when the context is exited, + so to read the elapsed time: + + >>> watch.elapsed + datetime.timedelta(...) + >>> watch.elapsed.seconds + 0 + """ + + def __init__(self): + self.reset() + self.start() + + def reset(self): + self.elapsed = datetime.timedelta(0) + with contextlib.suppress(AttributeError): + del self.start_time + + def start(self): + self.start_time = datetime.datetime.utcnow() + + def stop(self): + stop_time = datetime.datetime.utcnow() + self.elapsed += stop_time - self.start_time + del self.start_time + return self.elapsed + + def split(self): + local_duration = datetime.datetime.utcnow() - self.start_time + return self.elapsed + local_duration + + # context manager support + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.stop() + + +class IntervalGovernor: + """ + Decorate a function to only allow it to be called once per + min_interval. Otherwise, it returns None. + + >>> gov = IntervalGovernor(30) + >>> gov.min_interval.total_seconds() + 30.0 + """ + + def __init__(self, min_interval): + if isinstance(min_interval, numbers.Number): + min_interval = datetime.timedelta(seconds=min_interval) + self.min_interval = min_interval + self.last_call = None + + def decorate(self, func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + allow = not self.last_call or self.last_call.split() > self.min_interval + if allow: + self.last_call = Stopwatch() + return func(*args, **kwargs) + + return wrapper + + __call__ = decorate + + +class Timer(Stopwatch): + """ + Watch for a target elapsed time. + + >>> t = Timer(0.1) + >>> t.expired() + False + >>> __import__('time').sleep(0.15) + >>> t.expired() + True + """ + + def __init__(self, target=float('Inf')): + self.target = self._accept(target) + super(Timer, self).__init__() + + @staticmethod + def _accept(target): + """ + Accept None or ∞ or datetime or numeric for target + + >>> Timer._accept(datetime.timedelta(seconds=30)) + 30.0 + >>> Timer._accept(None) + inf + """ + if isinstance(target, datetime.timedelta): + target = target.total_seconds() + + if target is None: + # treat None as infinite target + target = float('Inf') + + return target + + def expired(self): + return self.split().total_seconds() > self.target + + +class BackoffDelay(collections.abc.Iterator): + """ + Exponential backoff delay. + + Useful for defining delays between retries. Consider for use + with ``jaraco.functools.retry_call`` as the cleanup. + + Default behavior has no effect; a delay or jitter must + be supplied for the call to be non-degenerate. + + >>> bd = BackoffDelay() + >>> bd() + >>> bd() + + The following instance will delay 10ms for the first call, + 20ms for the second, etc. + + >>> bd = BackoffDelay(delay=0.01, factor=2) + >>> bd() + >>> bd() + + Inspect and adjust the state of the delay anytime. + + >>> bd.delay + 0.04 + >>> bd.delay = 0.01 + + Set limit to prevent the delay from exceeding bounds. + + >>> bd = BackoffDelay(delay=0.01, factor=2, limit=0.015) + >>> bd() + >>> bd.delay + 0.015 + + To reset the backoff, simply call ``.reset()``: + + >>> bd.reset() + >>> bd.delay + 0.01 + + Iterate on the object to retrieve/advance the delay values. + + >>> next(bd) + 0.01 + >>> next(bd) + 0.015 + >>> import itertools + >>> tuple(itertools.islice(bd, 3)) + (0.015, 0.015, 0.015) + + Limit may be a callable taking a number and returning + the limited number. + + >>> at_least_one = lambda n: max(n, 1) + >>> bd = BackoffDelay(delay=0.01, factor=2, limit=at_least_one) + >>> next(bd) + 0.01 + >>> next(bd) + 1 + + Pass a jitter to add or subtract seconds to the delay. + + >>> bd = BackoffDelay(jitter=0.01) + >>> next(bd) + 0 + >>> next(bd) + 0.01 + + Jitter may be a callable. To supply a non-deterministic jitter + between -0.5 and 0.5, consider: + + >>> import random + >>> jitter=functools.partial(random.uniform, -0.5, 0.5) + >>> bd = BackoffDelay(jitter=jitter) + >>> next(bd) + 0 + >>> 0 <= next(bd) <= 0.5 + True + """ + + delay = 0 + + factor = 1 + "Multiplier applied to delay" + + jitter = 0 + "Number or callable returning extra seconds to add to delay" + + @jaraco.functools.save_method_args + def __init__(self, delay=0, factor=1, limit=float('inf'), jitter=0): + self.delay = delay + self.factor = factor + if isinstance(limit, numbers.Number): + limit_ = limit + + def limit(n): + return max(0, min(limit_, n)) + + self.limit = limit + if isinstance(jitter, numbers.Number): + jitter_ = jitter + + def jitter(): + return jitter_ + + self.jitter = jitter + + def __call__(self): + time.sleep(next(self)) + + def __next__(self): + delay = self.delay + self.bump() + return delay + + def __iter__(self): + return self + + def bump(self): + self.delay = self.limit(self.delay * self.factor + self.jitter()) + + def reset(self): + saved = self._saved___init__ + self.__init__(*saved.args, **saved.kwargs) diff --git a/lib/tempora/utc.py b/lib/tempora/utc.py new file mode 100644 index 00000000..a585fb54 --- /dev/null +++ b/lib/tempora/utc.py @@ -0,0 +1,36 @@ +""" +Facilities for common time operations in UTC. + +Inspired by the `utc project `_. + +>>> dt = now() +>>> dt == fromtimestamp(dt.timestamp()) +True +>>> dt.tzinfo +datetime.timezone.utc + +>>> from time import time as timestamp +>>> now().timestamp() - timestamp() < 0.1 +True + +>>> (now() - fromtimestamp(timestamp())).total_seconds() < 0.1 +True + +>>> datetime(2018, 6, 26, 0).tzinfo +datetime.timezone.utc + +>>> time(0, 0).tzinfo +datetime.timezone.utc +""" + +import datetime as std +import functools + + +__all__ = ['now', 'fromtimestamp', 'datetime', 'time'] + + +now = functools.partial(std.datetime.now, std.timezone.utc) +fromtimestamp = functools.partial(std.datetime.fromtimestamp, tz=std.timezone.utc) +datetime = functools.partial(std.datetime, tzinfo=std.timezone.utc) +time = functools.partial(std.time, tzinfo=std.timezone.utc) diff --git a/lib/twitter/__init__.py b/lib/twitter/__init__.py new file mode 100644 index 00000000..b4836243 --- /dev/null +++ b/lib/twitter/__init__.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright 2007-2018 The Python-Twitter Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A library that provides a Python interface to the Twitter API.""" +from __future__ import absolute_import + +__author__ = 'The Python-Twitter Developers' +__email__ = 'python-twitter@googlegroups.com' +__copyright__ = 'Copyright (c) 2007-2016 The Python-Twitter Developers' +__license__ = 'Apache License 2.0' +__version__ = '3.5' +__url__ = 'https://github.com/bear/python-twitter' +__download_url__ = 'https://pypi.python.org/pypi/python-twitter' +__description__ = 'A Python wrapper around the Twitter API' + + +import json # noqa + +try: + from hashlib import md5 # noqa +except ImportError: + from md5 import md5 # noqa + +from ._file_cache import _FileCache # noqa +from .error import TwitterError # noqa +from .parse_tweet import ParseTweet # noqa + +from .models import ( # noqa + Category, # noqa + DirectMessage, # noqa + Hashtag, # noqa + List, # noqa + Media, # noqa + Trend, # noqa + Url, # noqa + User, # noqa + UserStatus, # noqa + Status # noqa +) + +from .api import Api # noqa diff --git a/lib/twitter/_file_cache.py b/lib/twitter/_file_cache.py new file mode 100644 index 00000000..39962457 --- /dev/null +++ b/lib/twitter/_file_cache.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +import errno +import os +import tempfile + +from hashlib import md5 + + +class _FileCacheError(Exception): + """Base exception class for FileCache related errors""" + + +class _FileCache(object): + DEPTH = 3 + + def __init__(self, root_directory=None): + self._InitializeRootDirectory(root_directory) + + def Get(self, key): + path = self._GetPath(key) + if os.path.exists(path): + with open(path) as f: + return f.read() + else: + return None + + def Set(self, key, data): + path = self._GetPath(key) + directory = os.path.dirname(path) + if not os.path.exists(directory): + os.makedirs(directory) + if not os.path.isdir(directory): + raise _FileCacheError('%s exists but is not a directory' % directory) + temp_fd, temp_path = tempfile.mkstemp() + temp_fp = os.fdopen(temp_fd, 'w') + temp_fp.write(data) + temp_fp.close() + if not path.startswith(self._root_directory): + raise _FileCacheError('%s does not appear to live under %s' % + (path, self._root_directory)) + if os.path.exists(path): + os.remove(path) + os.rename(temp_path, path) + + def Remove(self, key): + path = self._GetPath(key) + if not path.startswith(self._root_directory): + raise _FileCacheError('%s does not appear to live under %s' % + (path, self._root_directory)) + if os.path.exists(path): + os.remove(path) + + def GetCachedTime(self, key): + path = self._GetPath(key) + if os.path.exists(path): + return os.path.getmtime(path) + else: + return None + + def _GetUsername(self): + """Attempt to find the username in a cross-platform fashion.""" + try: + return os.getenv('USER') or \ + os.getenv('LOGNAME') or \ + os.getenv('USERNAME') or \ + os.getlogin() or \ + 'nobody' + except (AttributeError, IOError, OSError): + return 'nobody' + + def _GetTmpCachePath(self): + username = self._GetUsername() + cache_directory = 'python.cache_' + username + return os.path.join(tempfile.gettempdir(), cache_directory) + + def _InitializeRootDirectory(self, root_directory): + if not root_directory: + root_directory = self._GetTmpCachePath() + root_directory = os.path.abspath(root_directory) + try: + os.mkdir(root_directory) + except OSError as e: + if e.errno == errno.EEXIST and os.path.isdir(root_directory): + # directory already exists + pass + else: + # exists but is a file, or no permissions, or... + raise + self._root_directory = root_directory + + def _GetPath(self, key): + try: + hashed_key = md5(key.encode('utf-8')).hexdigest() + except TypeError: + hashed_key = md5.new(key).hexdigest() + + return os.path.join(self._root_directory, + self._GetPrefix(hashed_key), + hashed_key) + + def _GetPrefix(self, hashed_key): + return os.path.sep.join(hashed_key[0:_FileCache.DEPTH]) diff --git a/lib/twitter/api.py b/lib/twitter/api.py new file mode 100755 index 00000000..15a778f1 --- /dev/null +++ b/lib/twitter/api.py @@ -0,0 +1,5035 @@ +#!/usr/bin/env python + +# +# +# Copyright 2007-2016, 2018 The Python-Twitter Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A library that provides a Python interface to the Twitter API""" +from __future__ import division +from __future__ import print_function + +import json +import sys +import gzip +import time +import base64 +import re +import logging +import requests +from requests_oauthlib import OAuth1, OAuth2 +import io +import warnings +from uuid import uuid4 +import os + +try: + # python 3 + from urllib.parse import urlparse, urlunparse, urlencode, quote_plus + from urllib.request import __version__ as urllib_version +except ImportError: + from urlparse import urlparse, urlunparse + from urllib import urlencode, quote_plus + from urllib import __version__ as urllib_version + +from twitter import ( + __version__, + _FileCache, + Category, + DirectMessage, + List, + Status, + Trend, + User, + UserStatus, +) + +from twitter.ratelimit import RateLimit + +from twitter.twitter_utils import ( + calc_expected_status_length, + is_url, + parse_media_file, + enf_type, + parse_arg_list) + +from twitter.error import ( + TwitterError, + PythonTwitterDeprecationWarning330, +) + +if sys.version_info > (3,): + long = int # pylint: disable=invalid-name,redefined-builtin + +CHARACTER_LIMIT = 280 + +# A singleton representing a lazily instantiated FileCache. +DEFAULT_CACHE = object() + +logger = logging.getLogger(__name__) + + +class Api(object): + """A python interface into the Twitter API + + By default, the Api caches results for 1 minute. + + Example usage: + + To create an instance of the twitter.Api class, with no authentication: + + >>> import twitter + >>> api = twitter.Api() + + To fetch a single user's public status messages, where "user" is either + a Twitter "short name" or their user id. + + >>> statuses = api.GetUserTimeline(user) + >>> print([s.text for s in statuses]) + + To use authentication, instantiate the twitter.Api class with a + consumer key and secret; and the oAuth key and secret: + + >>> api = twitter.Api(consumer_key='twitter consumer key', + consumer_secret='twitter consumer secret', + access_token_key='the_key_given', + access_token_secret='the_key_secret') + + To fetch your friends (after being authenticated): + + >>> users = api.GetFriends() + >>> print([u.name for u in users]) + + To post a twitter status message (after being authenticated): + + >>> status = api.PostUpdate('I love python-twitter!') + >>> print(status.text) + I love python-twitter! + + There are many other methods, including: + + >>> api.PostUpdates(status) + >>> api.PostDirectMessage(user, text) + >>> api.GetUser(user) + >>> api.GetReplies() + >>> api.GetUserTimeline(user) + >>> api.GetHomeTimeline() + >>> api.GetStatus(status_id) + >>> api.GetStatuses(status_ids) + >>> api.DestroyStatus(status_id) + >>> api.GetFriends(user) + >>> api.GetFollowers() + >>> api.GetFeatured() + >>> api.GetDirectMessages() + >>> api.GetSentDirectMessages() + >>> api.PostDirectMessage(user, text) + >>> api.DestroyDirectMessage(message_id) + >>> api.DestroyFriendship(user) + >>> api.CreateFriendship(user) + >>> api.LookupFriendship(user) + >>> api.VerifyCredentials() + """ + + DEFAULT_CACHE_TIMEOUT = 60 # cache for 1 minute + _API_REALM = 'Twitter API' + + def __init__(self, + consumer_key=None, + consumer_secret=None, + access_token_key=None, + access_token_secret=None, + application_only_auth=False, + input_encoding=None, + request_headers=None, + cache=DEFAULT_CACHE, + base_url=None, + stream_url=None, + upload_url=None, + chunk_size=1024 * 1024, + use_gzip_compression=False, + debugHTTP=False, + timeout=None, + sleep_on_rate_limit=False, + tweet_mode='compat', + proxies=None): + """Instantiate a new twitter.Api object. + + Args: + consumer_key (str): + Your Twitter user's consumer_key. + consumer_secret (str): + Your Twitter user's consumer_secret. + access_token_key (str): + The oAuth access token key value you retrieved + from running get_access_token.py. + access_token_secret (str): + The oAuth access token's secret, also retrieved + from the get_access_token.py run. + application_only_auth: + Use Application-Only Auth instead of User Auth. + Defaults to False [Optional] + input_encoding (str, optional): + The encoding used to encode input strings. + request_header (dict, optional): + A dictionary of additional HTTP request headers. + cache (object, optional): + The cache instance to use. Defaults to DEFAULT_CACHE. + Use None to disable caching. + base_url (str, optional): + The base URL to use to contact the Twitter API. + Defaults to https://api.twitter.com. + stream_url (str, optional): + The base URL to use for streaming endpoints. + Defaults to 'https://stream.twitter.com/1.1'. + upload_url (str, optional): + The base URL to use for uploads. Defaults to 'https://upload.twitter.com/1.1'. + chunk_size (int, optional): + Chunk size to use for chunked (multi-part) uploads of images/videos/gifs. + Defaults to 1MB. Anything under 16KB and you run the risk of erroring out + on 15MB files. + use_gzip_compression (bool, optional): + Set to True to tell enable gzip compression for any call + made to Twitter. Defaults to False. + debugHTTP (bool, optional): + Set to True to enable debug output from urllib2 when performing + any HTTP requests. Defaults to False. + timeout (int, optional): + Set timeout (in seconds) of the http/https requests. If None the + requests lib default will be used. Defaults to None. + sleep_on_rate_limit (bool, optional): + Whether to sleep an appropriate amount of time if a rate limit is hit for + an endpoint. + tweet_mode (str, optional): + Whether to use the new (as of Sept. 2016) extended tweet mode. See docs for + details. Choices are ['compatibility', 'extended']. + proxies (dict, optional): + A dictionary of proxies for the request to pass through, if not specified + allows requests lib to use environmental variables for proxy if any. + """ + + # check to see if the library is running on a Google App Engine instance + # see GAE.rst for more information + if os.environ: + if 'APPENGINE_RUNTIME' in os.environ.keys(): + # Adapter ensures requests use app engine's urlfetch + import requests_toolbelt.adapters.appengine + requests_toolbelt.adapters.appengine.monkeypatch() + # App Engine does not like this caching strategy, disable caching + cache = None + + self.SetCache(cache) + self._cache_timeout = Api.DEFAULT_CACHE_TIMEOUT + self._input_encoding = input_encoding + self._use_gzip = use_gzip_compression + self._debugHTTP = debugHTTP + self._shortlink_size = 19 + if timeout and timeout < 30: + warnings.warn("Warning: The Twitter streaming API sends 30s keepalives, the given timeout is shorter!") + self._timeout = timeout + self.__auth = None + + self._InitializeRequestHeaders(request_headers) + self._InitializeUserAgent() + self._InitializeDefaultParameters() + + self.rate_limit = RateLimit() + self.sleep_on_rate_limit = sleep_on_rate_limit + self.tweet_mode = tweet_mode + self.proxies = proxies + + if base_url is None: + self.base_url = 'https://api.twitter.com/1.1' + else: + self.base_url = base_url + + if stream_url is None: + self.stream_url = 'https://stream.twitter.com/1.1' + else: + self.stream_url = stream_url + + if upload_url is None: + self.upload_url = 'https://upload.twitter.com/1.1' + else: + self.upload_url = upload_url + + self.chunk_size = chunk_size + + if self.chunk_size < 1024 * 16: + warnings.warn(( + "A chunk size lower than 16384 may result in too many " + "requests to the Twitter API when uploading videos. You are " + "strongly advised to increase it above 16384")) + + if (consumer_key and not + (application_only_auth or all([access_token_key, access_token_secret]))): + raise TwitterError({'message': "Missing oAuth Consumer Key or Access Token"}) + + self.SetCredentials(consumer_key, consumer_secret, access_token_key, access_token_secret, + application_only_auth) + + if debugHTTP: + try: + import http.client as http_client # python3 + except ImportError: + import httplib as http_client # python2 + + http_client.HTTPConnection.debuglevel = 1 + + logging.basicConfig() # you need to initialize logging, otherwise you will not see anything from requests + logging.getLogger().setLevel(logging.DEBUG) + requests_log = logging.getLogger("requests.packages.urllib3") + requests_log.setLevel(logging.DEBUG) + requests_log.propagate = True + + @staticmethod + def GetAppOnlyAuthToken(consumer_key, consumer_secret): + """ + Generate a Bearer Token from consumer_key and consumer_secret + """ + key = quote_plus(consumer_key) + secret = quote_plus(consumer_secret) + bearer_token = base64.b64encode('{}:{}'.format(key, secret).encode('utf8')) + + post_headers = { + 'Authorization': 'Basic {0}'.format(bearer_token.decode('utf8')), + 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8' + } + + res = requests.post(url='https://api.twitter.com/oauth2/token', + data={'grant_type': 'client_credentials'}, + headers=post_headers) + bearer_creds = res.json() + return bearer_creds + + def SetCredentials(self, + consumer_key, + consumer_secret, + access_token_key=None, + access_token_secret=None, + application_only_auth=False): + """Set the consumer_key and consumer_secret for this instance + + Args: + consumer_key: + The consumer_key of the twitter account. + consumer_secret: + The consumer_secret for the twitter account. + access_token_key: + The oAuth access token key value you retrieved + from running get_access_token.py. + access_token_secret: + The oAuth access token's secret, also retrieved + from the get_access_token.py run. + application_only_auth: + Whether to generate a bearer token and use Application-Only Auth + """ + self._consumer_key = consumer_key + self._consumer_secret = consumer_secret + self._access_token_key = access_token_key + self._access_token_secret = access_token_secret + + if application_only_auth: + self._bearer_token = self.GetAppOnlyAuthToken(consumer_key, consumer_secret) + self.__auth = OAuth2(token=self._bearer_token) + else: + auth_list = [consumer_key, consumer_secret, + access_token_key, access_token_secret] + if all(auth_list): + self.__auth = OAuth1(consumer_key, consumer_secret, + access_token_key, access_token_secret) + + self._config = None + + def GetHelpConfiguration(self): + """Get basic help configuration details from Twitter. + + Args: + None + + Returns: + dict: Sets self._config and returns dict of help config values. + """ + if self._config is None: + url = '%s/help/configuration.json' % self.base_url + resp = self._RequestUrl(url, 'GET') + data = self._ParseAndCheckTwitter(resp.content.decode('utf-8')) + self._config = data + return self._config + + def GetShortUrlLength(self, https=False): + """Returns number of characters reserved per URL included in a tweet. + + Args: + https (bool, optional): + If True, return number of characters reserved for https urls + or, if False, return number of character reserved for http urls. + Returns: + (int): Number of characters reserved per URL. + """ + config = self.GetHelpConfiguration() + if https: + return config['short_url_length_https'] + else: + return config['short_url_length'] + + def ClearCredentials(self): + """Clear any credentials for this instance + """ + self._consumer_key = None + self._consumer_secret = None + self._access_token_key = None + self._access_token_secret = None + self._bearer_token = None + self.__auth = None # for request upgrade + + def GetSearch(self, + term=None, + raw_query=None, + geocode=None, + since_id=None, + max_id=None, + until=None, + since=None, + count=15, + lang=None, + locale=None, + result_type="mixed", + include_entities=None, + return_json=False): + """Return twitter search results for a given term. You must specify one + of term, geocode, or raw_query. + + Args: + term (str, optional): + Term to search by. Optional if you include geocode. + raw_query (str, optional): + A raw query as a string. This should be everything after the "?" in + the URL (i.e., the query parameters). You are responsible for all + type checking and ensuring that the query string is properly + formatted, as it will only be URL-encoded before be passed directly + to Twitter with no other checks performed. For advanced usage only. + *This will override any other parameters passed* + since_id (int, optional): + Returns results with an ID greater than (that is, more recent + than) the specified ID. There are limits to the number of + Tweets which can be accessed through the API. If the limit of + Tweets has occurred since the since_id, the since_id will be + forced to the oldest ID available. + max_id (int, optional): + Returns only statuses with an ID less than (that is, older + than) or equal to the specified ID. + until (str, optional): + Returns tweets generated before the given date. Date should be + formatted as YYYY-MM-DD. + since (str, optional): + Returns tweets generated since the given date. Date should be + formatted as YYYY-MM-DD. + geocode (str or list or tuple, optional): + Geolocation within which to search for tweets. Can be either a + string in the form of "latitude,longitude,radius" where latitude + and longitude are floats and radius is a string such as "1mi" or + "1km" ("mi" or "km" are the only units allowed). For example: + >>> api.GetSearch(geocode="37.781157,-122.398720,1mi"). + Otherwise, you can pass a list of either floats or strings for + lat/long and a string for radius: + >>> api.GetSearch(geocode=[37.781157, -122.398720, "1mi"]) + >>> # or: + >>> api.GetSearch(geocode=(37.781157, -122.398720, "1mi")) + >>> # or: + >>> api.GetSearch(geocode=("37.781157", "-122.398720", "1mi")) + count (int, optional): + Number of results to return. Default is 15 and maxmimum that + Twitter returns is 100 irrespective of what you type in. + lang (str, optional): + Language for results as ISO 639-1 code. Default is None + (all languages). + locale (str, optional): + Language of the search query. Currently only 'ja' is effective. + This is intended for language-specific consumers and the default + should work in the majority of cases. + result_type (str, optional): + Type of result which should be returned. Default is "mixed". + Valid options are "mixed, "recent", and "popular". + include_entities (bool, optional): + If True, each tweet will include a node called "entities". + This node offers a variety of metadata about the tweet in a + discrete structure, including: user_mentions, urls, and + hashtags. + return_json (bool, optional): + If True JSON data will be returned, instead of twitter.Userret + Returns: + list: A sequence of twitter.Status instances, one for each message + containing the term, within the bounds of the geocoded area, or + given by the raw_query. + """ + + url = '%s/search/tweets.json' % self.base_url + parameters = {} + + if since_id: + parameters['since_id'] = enf_type('since_id', int, since_id) + + if max_id: + parameters['max_id'] = enf_type('max_id', int, max_id) + + if until: + parameters['until'] = enf_type('until', str, until) + + if since: + parameters['since'] = enf_type('since', str, since) + + if lang: + parameters['lang'] = enf_type('lang', str, lang) + + if locale: + parameters['locale'] = enf_type('locale', str, locale) + + if term is None and geocode is None and raw_query is None: + return [] + + if term is not None: + parameters['q'] = term + + if geocode is not None: + if isinstance(geocode, list) or isinstance(geocode, tuple): + parameters['geocode'] = ','.join([str(geo) for geo in geocode]) + else: + parameters['geocode'] = enf_type('geocode', str, geocode) + + if include_entities: + parameters['include_entities'] = enf_type('include_entities', + bool, + include_entities) + + parameters['count'] = enf_type('count', int, count) + + if result_type in ["mixed", "popular", "recent"]: + parameters['result_type'] = result_type + + if raw_query is not None: + url = "{url}?{raw_query}".format( + url=url, + raw_query=raw_query) + resp = self._RequestUrl(url, 'GET') + else: + resp = self._RequestUrl(url, 'GET', data=parameters) + + data = self._ParseAndCheckTwitter(resp.content.decode('utf-8')) + if return_json: + return data + else: + return [Status.NewFromJsonDict(x) for x in data.get('statuses', '')] + + def GetUsersSearch(self, + term=None, + page=1, + count=20, + include_entities=None): + """Return twitter user search results for a given term. + + Args: + term: + Term to search by. + page: + Page of results to return. Default is 1 + [Optional] + count: + Number of results to return. Default is 20 + [Optional] + include_entities: + If True, each tweet will include a node called "entities,". + This node offers a variety of metadata about the tweet in a + discrete structure, including: user_mentions, urls, and hashtags. + [Optional] + + Returns: + A sequence of twitter.User instances, one for each message containing + the term + """ + # Build request parameters + parameters = {} + + if term is not None: + parameters['q'] = term + + if page != 1: + parameters['page'] = page + + if include_entities: + parameters['include_entities'] = 1 + + try: + parameters['count'] = int(count) + except ValueError: + raise TwitterError({'message': "count must be an integer"}) + + # Make and send requests + url = '%s/users/search.json' % self.base_url + resp = self._RequestUrl(url, 'GET', data=parameters) + data = self._ParseAndCheckTwitter(resp.content.decode('utf-8')) + return [User.NewFromJsonDict(x) for x in data] + + def GetTrendsCurrent(self, exclude=None): + """Get the current top trending topics (global) + + Args: + exclude: + Appends the exclude parameter as a request parameter. + Currently only exclude=hashtags is supported. [Optional] + + Returns: + A list with 10 entries. Each entry contains a trend. + """ + return self.GetTrendsWoeid(woeid=1, exclude=exclude) + + def GetTrendsWoeid(self, woeid, exclude=None): + """Return the top 10 trending topics for a specific WOEID, if trending + information is available for it. + + Args: + woeid: + the Yahoo! Where On Earth ID for a location. + exclude: + Appends the exclude parameter as a request parameter. + Currently only exclude=hashtags is supported. [Optional] + + Returns: + A list with 10 entries. Each entry contains a trend. + """ + url = '%s/trends/place.json' % (self.base_url) + parameters = {'id': woeid} + + if exclude: + parameters['exclude'] = exclude + + resp = self._RequestUrl(url, verb='GET', data=parameters) + data = self._ParseAndCheckTwitter(resp.content.decode('utf-8')) + trends = [] + timestamp = data[0]['as_of'] + + for trend in data[0]['trends']: + trends.append(Trend.NewFromJsonDict(trend, timestamp=timestamp)) + return trends + + def GetUserSuggestionCategories(self): + """ Return the list of suggested user categories, this can be used in + GetUserSuggestion function + Returns: + A list of categories + """ + url = '%s/users/suggestions.json' % (self.base_url) + resp = self._RequestUrl(url, verb='GET') + data = self._ParseAndCheckTwitter(resp.content.decode('utf-8')) + + categories = [] + + for category in data: + categories.append(Category.NewFromJsonDict(category)) + return categories + + def GetUserSuggestion(self, category): + """ Returns a list of users in a category + Args: + category: + The Category object to limit the search by + Returns: + A list of users in that category + """ + url = '%s/users/suggestions/%s.json' % (self.base_url, category.slug) + + resp = self._RequestUrl(url, verb='GET') + data = self._ParseAndCheckTwitter(resp.content.decode('utf-8')) + + users = [] + for user in data['users']: + users.append(User.NewFromJsonDict(user)) + return users + + def GetHomeTimeline(self, + count=None, + since_id=None, + max_id=None, + trim_user=False, + exclude_replies=False, + contributor_details=False, + include_entities=True): + """Fetch a collection of the most recent Tweets and retweets posted + by the authenticating user and the users they follow. + + The home timeline is central to how most users interact with Twitter. + + Args: + count: + Specifies the number of statuses to retrieve. May not be + greater than 200. Defaults to 20. [Optional] + since_id: + Returns results with an ID greater than (that is, more recent + than) the specified ID. There are limits to the number of + Tweets which can be accessed through the API. If the limit of + Tweets has occurred since the since_id, the since_id will be + forced to the oldest ID available. [Optional] + max_id: + Returns results with an ID less than (that is, older than) or + equal to the specified ID. [Optional] + trim_user: + When True, each tweet returned in a timeline will include a user + object including only the status authors numerical ID. Omit this + parameter to receive the complete user object. [Optional] + exclude_replies: + This parameter will prevent replies from appearing in the + returned timeline. Using exclude_replies with the count + parameter will mean you will receive up-to count tweets - + this is because the count parameter retrieves that many + tweets before filtering out retweets and replies. [Optional] + contributor_details: + This parameter enhances the contributors element of the + status response to include the screen_name of the contributor. + By default only the user_id of the contributor is included. [Optional] + include_entities: + The entities node will be disincluded when set to false. + This node offers a variety of metadata about the tweet in a + discreet structure, including: user_mentions, urls, and + hashtags. [Optional] + + Returns: + A sequence of twitter.Status instances, one for each message + """ + url = '%s/statuses/home_timeline.json' % self.base_url + + parameters = {} + if count is not None: + try: + if int(count) > 200: + raise TwitterError({'message': "'count' may not be greater than 200"}) + except ValueError: + raise TwitterError({'message': "'count' must be an integer"}) + parameters['count'] = count + if since_id: + try: + parameters['since_id'] = int(since_id) + except ValueError: + raise TwitterError({'message': "'since_id' must be an integer"}) + if max_id: + try: + parameters['max_id'] = int(max_id) + except ValueError: + raise TwitterError({'message': "'max_id' must be an integer"}) + if trim_user: + parameters['trim_user'] = 1 + if exclude_replies: + parameters['exclude_replies'] = 1 + if contributor_details: + parameters['contributor_details'] = 1 + if not include_entities: + parameters['include_entities'] = 'false' + resp = self._RequestUrl(url, 'GET', data=parameters) + data = self._ParseAndCheckTwitter(resp.content.decode('utf-8')) + + return [Status.NewFromJsonDict(x) for x in data] + + def GetUserTimeline(self, + user_id=None, + screen_name=None, + since_id=None, + max_id=None, + count=None, + include_rts=True, + trim_user=False, + exclude_replies=False): + """Fetch the sequence of public Status messages for a single user. + + The twitter.Api instance must be authenticated if the user is private. + + Args: + user_id (int, optional): + Specifies the ID of the user for whom to return the + user_timeline. Helpful for disambiguating when a valid user ID + is also a valid screen name. + screen_name (str, optional): + Specifies the screen name of the user for whom to return the + user_timeline. Helpful for disambiguating when a valid screen + name is also a user ID. + since_id (int, optional): + Returns results with an ID greater than (that is, more recent + than) the specified ID. There are limits to the number of + Tweets which can be accessed through the API. If the limit of + Tweets has occurred since the since_id, the since_id will be + forced to the oldest ID available. + max_id (int, optional): + Returns only statuses with an ID less than (that is, older + than) or equal to the specified ID. + count (int, optional): + Specifies the number of statuses to retrieve. May not be + greater than 200. + include_rts (bool, optional): + If True, the timeline will contain native retweets (if they + exist) in addition to the standard stream of tweets. + trim_user (bool, optional): + If True, statuses will only contain the numerical user ID only. + Otherwise a full user object will be returned for each status. + exclude_replies (bool, optional) + If True, this will prevent replies from appearing in the returned + timeline. Using exclude_replies with the count parameter will mean you + will receive up-to count tweets - this is because the count parameter + retrieves that many tweets before filtering out retweets and replies. + This parameter is only supported for JSON and XML responses. + + Returns: + A sequence of Status instances, one for each message up to count + """ + url = '%s/statuses/user_timeline.json' % (self.base_url) + parameters = {} + + if user_id: + parameters['user_id'] = enf_type('user_id', int, user_id) + elif screen_name: + parameters['screen_name'] = screen_name + if since_id: + parameters['since_id'] = enf_type('since_id', int, since_id) + if max_id: + parameters['max_id'] = enf_type('max_id', int, max_id) + if count: + parameters['count'] = enf_type('count', int, count) + + parameters['include_rts'] = enf_type('include_rts', bool, include_rts) + parameters['trim_user'] = enf_type('trim_user', bool, trim_user) + parameters['exclude_replies'] = enf_type('exclude_replies', bool, exclude_replies) + + resp = self._RequestUrl(url, 'GET', data=parameters) + data = self._ParseAndCheckTwitter(resp.content.decode('utf-8')) + + return [Status.NewFromJsonDict(x) for x in data] + + def GetStatus(self, + status_id, + trim_user=False, + include_my_retweet=True, + include_entities=True, + include_ext_alt_text=True): + """Returns a single status message, specified by the status_id parameter. + + Args: + status_id: + The numeric ID of the status you are trying to retrieve. + trim_user: + When set to True, each tweet returned in a timeline will include + a user object including only the status authors numerical ID. + Omit this parameter to receive the complete user object. [Optional] + include_my_retweet: + When set to True, any Tweets returned that have been retweeted by + the authenticating user will include an additional + current_user_retweet node, containing the ID of the source status + for the retweet. [Optional] + include_entities: + If False, the entities node will be disincluded. + This node offers a variety of metadata about the tweet in a + discreet structure, including: user_mentions, urls, and + hashtags. [Optional] + Returns: + A twitter.Status instance representing that status message + """ + url = '%s/statuses/show.json' % (self.base_url) + + parameters = { + 'id': enf_type('status_id', int, status_id), + 'trim_user': enf_type('trim_user', bool, trim_user), + 'include_my_retweet': enf_type('include_my_retweet', bool, include_my_retweet), + 'include_entities': enf_type('include_entities', bool, include_entities), + 'include_ext_alt_text': enf_type('include_ext_alt_text', bool, include_ext_alt_text) + } + + resp = self._RequestUrl(url, 'GET', data=parameters) + data = self._ParseAndCheckTwitter(resp.content.decode('utf-8')) + + return Status.NewFromJsonDict(data) + + def GetStatuses(self, + status_ids, + trim_user=False, + include_entities=True, + map=False): + """Returns a list of status messages, specified by the status_ids parameter. + + Args: + status_ids: + A list of the numeric ID of the statuses you are trying to retrieve. + trim_user: + When set to True, each tweet returned in a timeline will include + a user object including only the status authors numerical ID. + Omit this parameter to receive the complete user object. [Optional] + include_entities: + If False, the entities node will be disincluded. + This node offers a variety of metadata about the tweet in a + discreet structure, including: user_mentions, urls, and + hashtags. [Optional] + map: + If True, returns a dictionary with status id as key and returned + status data (or None if tweet does not exist or is inaccessible) + as value. Otherwise returns an unordered list of successfully + retrieved Tweets. [Optional] + Returns: + A dictionary or unordered list (depending on the parameter 'map') of + twitter Status instances representing the status messages. + """ + url = '%s/statuses/lookup.json' % (self.base_url) + + map = enf_type('map', bool, map) + + if map: + result = {} + else: + result = [] + offset = 0 + parameters = { + 'trim_user': enf_type('trim_user', bool, trim_user), + 'include_entities': enf_type('include_entities', bool, include_entities), + 'map': map + } + while offset < len(status_ids): + parameters['id'] = ','.join([str(enf_type('status_id', int, status_id)) for status_id in status_ids[offset:offset + 100]]) + + resp = self._RequestUrl(url, 'GET', data=parameters) + data = self._ParseAndCheckTwitter(resp.content.decode('utf-8')) + if map: + result.update({int(key): (Status.NewFromJsonDict(value) if value else None) for key, value in data['id'].items()}) + else: + result += [Status.NewFromJsonDict(dataitem) for dataitem in data] + + offset += 100 + + return result + + def GetStatusOembed(self, + status_id=None, + url=None, + maxwidth=None, + hide_media=False, + hide_thread=False, + omit_script=False, + align=None, + related=None, + lang=None): + """Returns information allowing the creation of an embedded representation of a + Tweet on third party sites. + + Specify tweet by the id or url parameter. + + Args: + status_id: + The numeric ID of the status you are trying to embed. + url: + The url of the status you are trying to embed. + maxwidth: + The maximum width in pixels that the embed should be rendered at. + This value is constrained to be between 250 and 550 pixels. [Optional] + hide_media: + Specifies whether the embedded Tweet should automatically expand images. [Optional] + hide_thread: + Specifies whether the embedded Tweet should automatically show the original + message in the case that the embedded Tweet is a reply. [Optional] + omit_script: + Specifies whether the embedded Tweet HTML should include a