Upgraded unidecode to 0.04.17

This commit is contained in:
Bas Stottelaar
2015-01-27 22:01:22 +01:00
parent f6b5c0190d
commit 527d2087c1
7 changed files with 539 additions and 153 deletions

View File

@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
# vi:tabstop=4:expandtab:sw=4
"""Transliterate Unicode text into plain 7-bit ASCII.
Example usage:
@@ -39,10 +40,15 @@ def unidecode(string):
if codepoint < 0x80: # Basic ASCII
retval.append(str(char))
continue
if codepoint > 0xeffff:
continue # Characters in Private Use Area and above are ignored
if 0xd800 <= codepoint <= 0xdfff:
warnings.warn( "Surrogate character %r will be ignored. "
"You might be using a narrow Python build." % (char,),
RuntimeWarning, 2)
section = codepoint >> 8 # Chop off the last two hex digits
position = codepoint % 256 # Last two hex digits
@@ -50,7 +56,7 @@ def unidecode(string):
table = Cache[section]
except KeyError:
try:
mod = __import__('unidecode.x%03x'%(section), [], [], ['data'])
mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data'])
except ImportError:
Cache[section] = None
continue # No match: ignore this character and carry on.