casio_doc/fontcharacter/tools/dumpbin.py

217 lines
5.8 KiB
Python
Executable File

#!/usr/bin/env python3
""" Utility to dump data from a set binary file.
Mainly here to check that the format is correct for now.
"""
import os, unicodedata
from functools import cmp_to_key
from argparse import ArgumentParser
# ---
# Decoding function.
# ---
def frombytes(bnum):
if type(bnum) == int: return bnum
return int.from_bytes(bnum, byteorder='big', signed=False)
leaders = {}
def decode_set1(braw, only_check=True):
# Get the rest of the header, check the sum and filesize
cmajors = frombytes(braw[9])
cchars = frombytes(braw[10:12])
flags = frombytes(braw[12])
pic_h = frombytes(braw[13])
pic_fmt = frombytes(braw[14:16])
checksum = frombytes(braw[20:24])
filesize = frombytes(braw[24:28])
datasize = frombytes(braw[28:32])
# Get the flags.
with_unicode = flags & 0x01 != 0
with_cat = flags & 0x02 != 0
with_newcat = flags & 0x04 != 0
with_ctf = flags & 0x08 != 0
with_casemul = flags & 0x10 != 0
# Make the entry size.
centsize = 8 + 4 + 4 * \
(with_unicode + with_cat + with_newcat + with_ctf + with_casemul)
# Check the sizes and the checksum.
if filesize != len(braw):
print("ERROR: Invalid filesize!",
"Declared %dB, calculated %dB"%(filesize, len(braw)))
return 1
calc_datasize = filesize - 32 - 4 * cmajors - centsize * cchars
if datasize != calc_datasize:
print("ERROR: Invalid datasize!",
"Declared %dB, calculated %dB"%(datasize, calc_datasize))
return 1
calc_checksum = sum(braw[32:])
if checksum != calc_checksum:
print("ERROR: Invalid checksum!",
"Declared 0x%08X, calculated 0x%08X"%(checksum, calc_checksum))
return 1
# Get the binary data.
data_off = filesize - datasize
bdata = braw[data_off:]
# Get leaders.
braw = braw[32:data_off]
leads = []
for id_major in range(cmajors):
bmajor = braw[id_major * 4:id_major * 4 + 4]
code = frombytes(bmajor[0])
if code in leads:
print("ERROR: Duplicate major 0x%02X."%code)
return 1
leads += [code]
leaders[code] = {
'start': frombytes(bmajor[2:]),
'count': 0,
'chars': {},
'pos': id_major
}
# Sort leaders.
def cmp_lead(x, y):
global leaders
if leaders[x]['start'] == leaders[y]['start']:
return x if leaders[x]['pos'] < leaders[y]['pos'] else y
return x if leaders[x]['start'] < leaders[y]['start'] else y
leads.sort(key=cmp_to_key(cmp_lead))
# Get counts.
for id in range(len(leads) - 1):
leaders[leads[id]]['count'] = \
leaders[leads[id + 1]]['start'] - leaders[leads[id]]['start']
leaders[leads[-1]]['count'] = cchars - leaders[leads[-1]]['start']
# Get characters according to their leader.
braw = braw[cmajors * 4:]
for lead in leaders:
for id_char in range(leaders[lead]['start'], \
leaders[lead]['start'] + leaders[lead]['count']):
bchar = braw[id_char * centsize:id_char * centsize + centsize]
code = frombytes(bchar[:2])
if code >> 8 != lead:
print("ERROR: character 0x%04X at position %d"%(code, id_char),
"should have leader 0x%02X but"%lead,
"has leader 0x%02X!"%(code >> 8))
return 1
if code in leaders[lead]['chars']:
print("ERROR: duplicate character 0x%04X"%code,
"at position %d"%id_char,
"(prev. %d)"%leaders[lead]['chars']['pos'])
return 1
# Get the FONTCHARACTER sequence.
mul_off = frombytes(bchar[8:12])
mul_sz = frombytes(bchar[2])
mul = None
if mul_sz:
rmul = bdata[mul_off:mul_off + mul_sz]
mul = []
while rmul:
if rmul[0] in leaders:
mul += [(rmul[0] << 8) | rmul[1]]
rmul = rmul[2:]
continue
mul += [rmul[0]]
rmul = rmul[1:]
# Get the Unicode string.
uni = None
off = 12
if with_unicode:
uni_sz = frombytes(bchar[3])
uni_off = frombytes(bchar[off:off + 4])
off += 4
if uni_sz:
runi = bdata[uni_off:uni_off + uni_sz]
uni = runi.decode('utf-8')
# TODO: get the rest
leaders[lead]['chars'][code] = {
'uni': uni,
'mul': mul,
'pos': id_char
}
if only_check:
return 0
print("OVERALL HEADER")
print("%d bytes (data zone is %dB)"%(filesize, datasize))
print("%d leader characters, %d characters"%(cmajors, cchars))
print("")
print("Tokens and sequences in this file:")
print("- FONTCHARACTER sequences")
if with_unicode: print("- Unicode equivalents")
if with_cat: print("- CAT tokens")
if with_newcat: print("- Newcat tokens")
if with_ctf: print("- CTF tokens")
if with_casemul: print("- Casemul tokens")
print("")
for lead, data in leaders.items():
print("0x%02X LEADER"%lead)
print("Starts at character 0x%04X, stops at 0x%04X (count: %d)"
%(data['start'], data['start'] + data['count'] - 1,
data['count']))
if data['chars']:
print("")
for code, char in data['chars'].items():
print("- 0x%0*X"%(4 if code > 0xFF else 2, code), end='')
mul = char['mul']
if mul:
m = ', '.join(map(lambda x:"0x%0*X"%(4 if x > 0xFF else 2, x),
mul))
print(" - seq: %s"%m, end='')
uni = char['uni']
if uni:
if any(map(lambda c:unicodedata.category(c).startswith('C'), \
uni)):
uni = ''.join(map(lambda x:'\\x%02X'%ord(x), uni))
print(" - unicode: \"%s\""%uni, end='')
print("")
print("")
return 0
def decode_set(braw, only_check=True):
global leaders
# Check the magic.
bmagic = braw[:9]
if bmagic[:8] != b"CASIOFC\x7f":
print("ERROR: Invalid magic string!")
return 1
if bmagic[8] == 0x01:
return decode_set1(braw, only_check)
else:
print("ERROR: Unmanaged version 0x%02X!"%bmagic[8])
return 1
# ---
# Main function.
# ---
if __name__ == '__main__':
# Parse the arguments.
ap = ArgumentParser(description='FONTCHARACTER binary file dumper')
ap.add_argument('--only-check', help='Should only check if the file is valid',
action="store_true")
ap.add_argument('input', help='The file which to dump the content.')
args = ap.parse_args()
# Obtain the file.
braw = open(args.input, "rb").read()
# Decode it.
exit(decode_set(braw, args.only_check))
# End of file.