diff --git a/app/utils/unicode_names.py b/app/utils/unicode_names.py new file mode 100644 index 0000000..5022910 --- /dev/null +++ b/app/utils/unicode_names.py @@ -0,0 +1,53 @@ +pseudo_map = { + 0x0020: + " -. " + "0123456789 " + " abcdefghijklmno" + "pqrstuvwxyz _" + " abcdefghijklmno" + "pqrstuvwxyz - ", + 0x00c0: + "aaaaaaaceeeeiiii" + "dnooooo ouuuuy b" + "aaaaaaaceeeeiiii" + "dnooooo ouuuuy y", + 0x0100: + "aaaaaaccccccccdd" + "ddeeeeeeeeeegggg" + "gggghhhhiiiiiiii" + "iiiijjkkklllllll" + "lllnnnnnnnnnoooo" + "oooorrrrrrssssss" + "ssttttttuuuuuuuu" + "uuuuwwyyyzzzzzz ", +} + +def _normalize_char(c): + n = ord(c) + + for (start, codes) in pseudo_map.items(): + index = n - start + if index not in range(len(codes)): continue + + r = codes[index] + if r == ' ': + raise ValueError(c) + return r + + raise ValueError(c) + +def normalize(string): + out = [] + errors = [] + + for c in string: + try: + out.append(_normalize_char(c)) + except ValueError: + errors.append(c) + + if errors: + raise ValueError(errors) + + return ''.join(out) +