Browse Source

utils: add normalization of Unicode names

pull/12/head
Lephe 3 months ago
parent
commit
e5ff934c4a
1 changed files with 53 additions and 0 deletions
  1. 53
    0
      app/utils/unicode_names.py

+ 53
- 0
app/utils/unicode_names.py View File

@@ -0,0 +1,53 @@
pseudo_map = {
0x0020:
" -. "
"0123456789 "
" abcdefghijklmno"
"pqrstuvwxyz _"
" abcdefghijklmno"
"pqrstuvwxyz - ",
0x00c0:
"aaaaaaaceeeeiiii"
"dnooooo ouuuuy b"
"aaaaaaaceeeeiiii"
"dnooooo ouuuuy y",
0x0100:
"aaaaaaccccccccdd"
"ddeeeeeeeeeegggg"
"gggghhhhiiiiiiii"
"iiiijjkkklllllll"
"lllnnnnnnnnnoooo"
"oooorrrrrrssssss"
"ssttttttuuuuuuuu"
"uuuuwwyyyzzzzzz ",
}

def _normalize_char(c):
n = ord(c)

for (start, codes) in pseudo_map.items():
index = n - start
if index not in range(len(codes)): continue

r = codes[index]
if r == ' ':
raise ValueError(c)
return r

raise ValueError(c)

def normalize(string):
out = []
errors = []

for c in string:
try:
out.append(_normalize_char(c))
except ValueError:
errors.append(c)

if errors:
raise ValueError(errors)

return ''.join(out)


Loading…
Cancel
Save