utils: add normalization of Unicode names

pull/12/head
Lephe 3 years ago
parent 21ebfb7d2a
commit e5ff934c4a
  1. 53
      app/utils/unicode_names.py

@ -0,0 +1,53 @@
pseudo_map = {
0x0020:
" -. "
"0123456789 "
" abcdefghijklmno"
"pqrstuvwxyz _"
" abcdefghijklmno"
"pqrstuvwxyz - ",
0x00c0:
"aaaaaaaceeeeiiii"
"dnooooo ouuuuy b"
"aaaaaaaceeeeiiii"
"dnooooo ouuuuy y",
0x0100:
"aaaaaaccccccccdd"
"ddeeeeeeeeeegggg"
"gggghhhhiiiiiiii"
"iiiijjkkklllllll"
"lllnnnnnnnnnoooo"
"oooorrrrrrssssss"
"ssttttttuuuuuuuu"
"uuuuwwyyyzzzzzz ",
}
def _normalize_char(c):
n = ord(c)
for (start, codes) in pseudo_map.items():
index = n - start
if index not in range(len(codes)): continue
r = codes[index]
if r == ' ':
raise ValueError(c)
return r
raise ValueError(c)
def normalize(string):
out = []
errors = []
for c in string:
try:
out.append(_normalize_char(c))
except ValueError:
errors.append(c)
if errors:
raise ValueError(errors)
return ''.join(out)
Loading…
Cancel
Save