pseudo_map = { 0x0020: " -. " "0123456789 " " abcdefghijklmno" "pqrstuvwxyz _" " abcdefghijklmno" "pqrstuvwxyz - ", 0x00c0: "aaaaaaaceeeeiiii" "dnooooo ouuuuy b" "aaaaaaaceeeeiiii" "dnooooo ouuuuy y", 0x0100: "aaaaaaccccccccdd" "ddeeeeeeeeeegggg" "gggghhhhiiiiiiii" "iiiijjkkklllllll" "lllnnnnnnnnnoooo" "oooorrrrrrssssss" "ssttttttuuuuuuuu" "uuuuwwyyyzzzzzz ", } def _normalize_char(c): n = ord(c) for (start, codes) in pseudo_map.items(): index = n - start if index not in range(len(codes)): continue r = codes[index] if r == ' ': raise ValueError(c) return r raise ValueError(c) def normalize(string): out = [] errors = [] for c in string: try: out.append(_normalize_char(c)) except ValueError: errors.append(c) if errors: raise ValueError(errors) return ''.join(out)