utils: add normalization of Unicode names

This commit is contained in:
Lephe 2019-06-05 18:24:41 -04:00
parent 21ebfb7d2a
commit e5ff934c4a
1 changed files with 53 additions and 0 deletions

View File

@ -0,0 +1,53 @@
pseudo_map = {
0x0020:
" -. "
"0123456789 "
" abcdefghijklmno"
"pqrstuvwxyz _"
" abcdefghijklmno"
"pqrstuvwxyz - ",
0x00c0:
"aaaaaaaceeeeiiii"
"dnooooo ouuuuy b"
"aaaaaaaceeeeiiii"
"dnooooo ouuuuy y",
0x0100:
"aaaaaaccccccccdd"
"ddeeeeeeeeeegggg"
"gggghhhhiiiiiiii"
"iiiijjkkklllllll"
"lllnnnnnnnnnoooo"
"oooorrrrrrssssss"
"ssttttttuuuuuuuu"
"uuuuwwyyyzzzzzz ",
}
def _normalize_char(c):
n = ord(c)
for (start, codes) in pseudo_map.items():
index = n - start
if index not in range(len(codes)): continue
r = codes[index]
if r == ' ':
raise ValueError(c)
return r
raise ValueError(c)
def normalize(string):
out = []
errors = []
for c in string:
try:
out.append(_normalize_char(c))
except ValueError:
errors.append(c)
if errors:
raise ValueError(errors)
return ''.join(out)