#!/usr/bin/env python3 """ Utility to dump data from a set binary file. Mainly here to check that the format is correct for now. """ import os, unicodedata from functools import cmp_to_key from argparse import ArgumentParser # --- # Decoding function. # --- def frombytes(bnum): if type(bnum) == int: return bnum return int.from_bytes(bnum, byteorder='big', signed=False) leaders = {} def decode_set1(braw, only_check=True): # Get the rest of the header, check the sum and filesize cmajors = frombytes(braw[9]) cchars = frombytes(braw[10:12]) flags = frombytes(braw[12]) pic_h = frombytes(braw[13]) pic_fmt = frombytes(braw[14:16]) checksum = frombytes(braw[20:24]) filesize = frombytes(braw[24:28]) datasize = frombytes(braw[28:32]) # Get the flags. with_unicode = flags & 0x01 != 0 with_cat = flags & 0x02 != 0 with_newcat = flags & 0x04 != 0 with_ctf = flags & 0x08 != 0 with_casemul = flags & 0x10 != 0 # Make the entry size. centsize = 8 + 4 + 4 * \ (with_unicode + with_cat + with_newcat + with_ctf + with_casemul) # Check the sizes and the checksum. if filesize != len(braw): print("ERROR: Invalid filesize!", "Declared %dB, calculated %dB"%(filesize, len(braw))) return 1 calc_datasize = filesize - 32 - 4 * cmajors - centsize * cchars if datasize != calc_datasize: print("ERROR: Invalid datasize!", "Declared %dB, calculated %dB"%(datasize, calc_datasize)) return 1 calc_checksum = sum(braw[32:]) if checksum != calc_checksum: print("ERROR: Invalid checksum!", "Declared 0x%08X, calculated 0x%08X"%(checksum, calc_checksum)) return 1 # Get the binary data. data_off = filesize - datasize bdata = braw[data_off:] # Get leaders. braw = braw[32:data_off] leads = [] for id_major in range(cmajors): bmajor = braw[id_major * 4:id_major * 4 + 4] code = frombytes(bmajor[0]) if code in leads: print("ERROR: Duplicate major 0x%02X."%code) return 1 leads += [code] leaders[code] = { 'start': frombytes(bmajor[2:]), 'count': 0, 'chars': {}, 'pos': id_major } # Sort leaders. def cmp_lead(x, y): global leaders if leaders[x]['start'] == leaders[y]['start']: return x if leaders[x]['pos'] < leaders[y]['pos'] else y return x if leaders[x]['start'] < leaders[y]['start'] else y leads.sort(key=cmp_to_key(cmp_lead)) # Get counts. for id in range(len(leads) - 1): leaders[leads[id]]['count'] = \ leaders[leads[id + 1]]['start'] - leaders[leads[id]]['start'] leaders[leads[-1]]['count'] = cchars - leaders[leads[-1]]['start'] # Get characters according to their leader. braw = braw[cmajors * 4:] for lead in leaders: for id_char in range(leaders[lead]['start'], \ leaders[lead]['start'] + leaders[lead]['count']): bchar = braw[id_char * centsize:id_char * centsize + centsize] code = frombytes(bchar[:2]) if code >> 8 != lead: print("ERROR: character 0x%04X at position %d"%(code, id_char), "should have leader 0x%02X but"%lead, "has leader 0x%02X!"%(code >> 8)) return 1 if code in leaders[lead]['chars']: print("ERROR: duplicate character 0x%04X"%code, "at position %d"%id_char, "(prev. %d)"%leaders[lead]['chars']['pos']) return 1 # Get the FONTCHARACTER sequence. mul_off = frombytes(bchar[8:12]) mul_sz = frombytes(bchar[2]) mul = None if mul_sz: rmul = bdata[mul_off:mul_off + mul_sz] mul = [] while rmul: if rmul[0] in leaders: mul += [(rmul[0] << 8) | rmul[1]] rmul = rmul[2:] continue mul += [rmul[0]] rmul = rmul[1:] # Get the Unicode string. uni = None off = 12 if with_unicode: uni_sz = frombytes(bchar[3]) uni_off = frombytes(bchar[off:off + 4]) off += 4 if uni_sz: runi = bdata[uni_off:uni_off + uni_sz] uni = runi.decode('utf-8') # TODO: get the rest leaders[lead]['chars'][code] = { 'uni': uni, 'mul': mul, 'pos': id_char } if only_check: return 0 print("OVERALL HEADER") print("%d bytes (data zone is %dB)"%(filesize, datasize)) print("%d leader characters, %d characters"%(cmajors, cchars)) print("") print("Tokens and sequences in this file:") print("- FONTCHARACTER sequences") if with_unicode: print("- Unicode equivalents") if with_cat: print("- CAT tokens") if with_newcat: print("- Newcat tokens") if with_ctf: print("- CTF tokens") if with_casemul: print("- Casemul tokens") print("") for lead, data in leaders.items(): print("0x%02X LEADER"%lead) print("Starts at character 0x%04X, stops at 0x%04X (count: %d)" %(data['start'], data['start'] + data['count'] - 1, data['count'])) if data['chars']: print("") for code, char in data['chars'].items(): print("- 0x%0*X"%(4 if code > 0xFF else 2, code), end='') mul = char['mul'] if mul: m = ', '.join(map(lambda x:"0x%0*X"%(4 if x > 0xFF else 2, x), mul)) print(" - seq: %s"%m, end='') uni = char['uni'] if uni: if any(map(lambda c:unicodedata.category(c).startswith('C'), \ uni)): uni = ''.join(map(lambda x:'\\x%02X'%ord(x), uni)) print(" - unicode: \"%s\""%uni, end='') print("") print("") return 0 def decode_set(braw, only_check=True): global leaders # Check the magic. bmagic = braw[:9] if bmagic[:8] != b"CASIOFC\x7f": print("ERROR: Invalid magic string!") return 1 if bmagic[8] == 0x01: return decode_set1(braw, only_check) else: print("ERROR: Unmanaged version 0x%02X!"%bmagic[8]) return 1 # --- # Main function. # --- if __name__ == '__main__': # Parse the arguments. ap = ArgumentParser(description='FONTCHARACTER binary file dumper') ap.add_argument('--only-check', help='Should only check if the file is valid', action="store_true") ap.add_argument('input', help='The file which to dump the content.') args = ap.parse_args() # Obtain the file. braw = open(args.input, "rb").read() # Decode it. exit(decode_set(braw, args.only_check)) # End of file.