1
0
Fork 0

Compare commits

...

7 Commits

45 changed files with 12783 additions and 224 deletions

View File

@ -140,9 +140,17 @@ add_compile_options(
# Target definitions.
# ---
add_custom_command(
OUTPUT ${CMAKE_SOURCE_DIR}/lib/chars.c
COMMAND python3 ${CMAKE_SOURCE_DIR}/chars/process_chars.py
DEPENDS chars/process_chars.py chars/chars.toml
VERBATIM
)
add_library(${PROJECT_NAME} STATIC
lib/casiolink.c
lib/cdefs.c
lib/chars.c
lib/data.c
lib/detection.c
lib/link.c
@ -154,6 +162,7 @@ add_library(${PROJECT_NAME} STATIC
lib/picture.c
lib/seven.c
lib/seven_ohp.c
lib/text.c
)
target_include_directories(${PROJECT_NAME} PRIVATE ${LIB_INCLUDE_DIRS})
target_compile_definitions(${PROJECT_NAME} PRIVATE ${LIB_COMPILE_DEFS})
@ -235,7 +244,9 @@ if(ENABLE_CLI)
install(TARGETS p7os RUNTIME)
install(TARGETS p7screen RUNTIME)
install(TARGETS xfer9860 RUNTIME)
install(TARGETS CaS RUNTIME)
# TODO: CaS is in its very early stages and supports too few features
# as of today to be useful installed on a target system.
#install(TARGETS CaS RUNTIME)
endif()
if(ENABLE_UDEV)

8532
chars/chars.toml Normal file

File diff suppressed because it is too large Load Diff

572
chars/process_chars.py Normal file
View File

@ -0,0 +1,572 @@
#!/usr/bin/env python
# *****************************************************************************
# Copyright (C) 2024 Thomas Touhey <thomas@touhey.fr>
#
# This software is governed by the CeCILL 2.1 license under French law and
# abiding by the rules of distribution of free software. You can use, modify
# and/or redistribute the software under the terms of the CeCILL 2.1 license
# as circulated by CEA, CNRS and INRIA at the following
# URL: https://cecill.info
#
# As a counterpart to the access to the source code and rights to copy, modify
# and redistribute granted by the license, users are provided only with a
# limited warranty and the software's author, the holder of the economic
# rights, and the successive licensors have only limited liability.
#
# In this respect, the user's attention is drawn to the risks associated with
# loading, using, modifying and/or developing or reproducing the software by
# the user in light of its specific status of free software, that may mean
# that it is complicated to manipulate, and that also therefore means that it
# is reserved for developers and experienced professionals having in-depth
# computer knowledge. Users are therefore encouraged to load and test the
# software's suitability as regards their requirements in conditions enabling
# the security of their systems and/or data to be ensured and, more generally,
# to use and operate it in the same conditions as regards security.
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL 2.1 license and that you accept its terms.
# *****************************************************************************
"""Process the character references.
This script requires the ``toml`` package to be installed.
"""
from __future__ import annotations
import argparse
from collections import defaultdict
from enum import Enum
from logging import getLogger
from pathlib import Path
from typing import Annotated, Any, Literal
import coloredlogs
from pydantic import BaseModel, ConfigDict, Field, field_validator
import toml
CharacterTableKey = Literal["legacy"] | Literal["9860"]
"""Type representing a character table."""
DEFAULT_OUTPUT_PATH = Path(__file__).parent.parent / "lib" / "chars.c"
"""Default output path."""
DEFAULT_REFERENCE_PATH = Path(__file__).parent / "chars.toml"
"""Default path to the character reference."""
MULTI_BYTE_LEADERS: dict[CharacterTableKey, tuple[int, ...]] = {
"legacy": (0x00, 0x7F, 0xF7),
"9860": (0x00, 0x7F, 0xE5, 0xE6, 0xE7, 0xF7, 0xF9),
}
"""Multi-byte leaders per encoding."""
logger = getLogger(__name__)
"""Logger."""
class Character(BaseModel):
"""Data regarding a given character."""
model_config = ConfigDict(extra="forbid")
"""Model configuration."""
code: int
"""Character code."""
table: CharacterTableKey | None = None
"""Specific character table to which the character applies."""
code_legacy: int | None = None
"""Equivalent code in the legacy table, if the table is 9860."""
code_9860: int | None = None
"""Equivalent code in the fx-9860G table, if the table is legacy."""
name: str
"""Character name."""
opcode: list[int] | None = None
"""Characters to resolve the character as for display purposes."""
unicode: list[list[int]] = []
"""Unicode character sequences."""
cat: list[str] = []
"""CAT sequences equivalent to the character."""
@property
def symbol(self) -> str:
"""Symbol name for the code.
:return: The symbol name.
"""
return f"char_{self.table or 'all'}_{self.code:04X}"
@field_validator('unicode', mode='before')
@staticmethod
def _validate_unicode_sequences(value: Any, /) -> Any:
"""Validate the Unicode sequences defined in the object.
:param value: Value to set for the Unicode sequences.
:return: Sanitized Unicode characters.
"""
if value is None:
return []
if (
isinstance(value, list)
and len(value) > 0
and isinstance(value[0], int)
):
return [value]
return value
@field_validator('cat', mode='before')
@staticmethod
def _validate_cat_sequences(value: Any, /) -> Any:
"""Validate the CAT sequences defined in the object.
:param value: Value to set for the CAT sequences.
:return: Sanitized CAT sequences.
"""
if value is None:
return []
if isinstance(value, str):
return [value]
return value
class SequenceParsingTree(BaseModel):
"""Parsing tree for a character."""
subtrees: dict[tuple[int, ...], SequenceParsingTree] = {}
"""Subtrees to match."""
leaf: Character | None = None
"""Leaf to take if none of the subtrees match."""
def add_character(self, sequence: tuple[int, ...], character: Character, /) -> None:
"""Add the character in the sequence.
Say we want to insert the character as the sequence [1, 2, 3, 4]:
* If there is a subtree with that exact sequence, we want to set the
leaf on it to the character.
* If there is a subtree being the prefix of the sequence, e.g. [1, 2],
we want to recursively add the children into the tree as the [3, 4]
sequence.
* If any subtree has a common prefix, e.g. [1, 2, 5, 6, 7]
([1, 2] prefix), we want to transform the following::
{[1, 2, 5, 6, 7]: Tree(...)}
Into the following::
{[1, 2]: Tree(subtrees={
[5, 6, 7]: Tree(...),
[3, 4]: Tree(leaf=character),
})}
* Otherwise, we want to create the new subtree with the sequence
name.
:param sequence: Sequence to add the character as.
:param character: Character to reference as the sequence in the
parsing tree.
:raises ValueError: A character is already defined for the sequence.
"""
for common_len in range(len(sequence), 0, -1):
try:
subtree_key = next(
key for key in self.subtrees
if key[:common_len] == sequence[:common_len]
)
except StopIteration:
continue
if subtree_key == sequence:
leaf = self.subtrees[subtree_key].leaf
if leaf is not None and leaf.code != character.code:
raise ValueError(
"sequence already used by "
+ f"character 0x{leaf.code:04X}",
)
if leaf is None:
self.subtrees[subtree_key].leaf = character
elif subtree_key == sequence[:common_len]:
# A subtree might exist, we may want to add a character
# recursively on it.
self.subtrees[subtree_key].add_character(
sequence[common_len:],
character,
)
else:
subtree = self.subtrees.pop(subtree_key)
self.subtrees[subtree_key[:common_len]] = self.__class__(
subtrees={
sequence[common_len:]: self.__class__(leaf=character),
subtree_key[common_len:]: subtree,
},
)
return
self.subtrees[sequence] = SequenceParsingTree(leaf=character)
def print(self, *, indent: str = "") -> None:
"""Print the tree.
:param indent: Indentation.
"""
if self.leaf is not None:
print(f"{indent}<leaf: character {self.leaf.id}>")
for key, subtree in self.subtrees.items():
print(f"{indent}<subtree: {key!r}>")
subtree.print(indent=indent + " ")
class RawCharacterReference(BaseModel):
"""Raw character reference."""
model_config = ConfigDict(extra="forbid")
"""Model configuration."""
chars: list[Character] = []
"""Character reference."""
class CharacterTable(BaseModel):
"""Character table definition."""
characters: dict[int, Character] = {}
"""List of characters in the reference."""
cat_parsing_tree: Annotated[
SequenceParsingTree,
Field(default_factory=SequenceParsingTree),
]
"""CAT sequence parsing tree."""
unicode_parsing_tree: Annotated[
SequenceParsingTree,
Field(default_factory=SequenceParsingTree),
]
"""Unicode sequence parsing tree."""
class CharacterReference(BaseModel):
"""Character reference."""
tables: dict[CharacterTableKey, CharacterTable]
"""Character tables."""
@classmethod
def from_toml_file(
cls: type[CharacterReference],
path: str | Path,
/,
) -> CharacterReference:
"""Produce a character reference from a TOML file.
:param path: Path to the TOML file.
:return: Decoded character reference.
"""
is_invalid = False
tables = {
"legacy": CharacterTable(),
"9860": CharacterTable(),
}
raw_ref = RawCharacterReference(**toml.load(path))
for char in raw_ref.chars:
for table_key in ("legacy", "9860"):
if char.table is not None and char.table != table_key:
continue
table = tables[table_key]
if char.code in table.characters:
is_invalid = True
logger.warning(
"Duplicate character 0x%04X in character table %s.",
char.code,
table_key,
)
continue
leaders = MULTI_BYTE_LEADERS[table_key]
leader = (char.code >> 8) & 255
if leader not in leaders:
is_invalid = True
logger.warning(
"Unsupported leader 0x%02X for character 0x%04X in "
+ "character table %s.",
leader,
char.code,
table_key,
)
continue
table.characters[char.code] = char
for sequence in char.cat:
try:
table.cat_parsing_tree.add_character(
tuple(sequence.encode("ascii")),
char,
)
except ValueError as exc:
logger.warning(
"Could not add CAT sequence \"%s\" for char "
"0x%04X in table %s: %s.",
sequence,
char.code,
table_key,
str(exc),
)
is_invalid = True
for sequence in char.unicode:
try:
table.unicode_parsing_tree.add_character(
tuple(sequence),
char,
)
except ValueError as exc:
logger.warning(
"Could not add Unicode sequence \"%s\" for char "
"0x%04X in table %s: %s.",
"[" + ", ".join(f"0x{n:02X}" for n in sequence) + "]",
char.code,
table_key,
str(exc),
)
is_invalid = True
if is_invalid:
logger.error(
"One or more errors have occurred while parsing the "
"reference.",
)
raise ValueError()
return cls(tables=tables)
def get_sequence_parsing_tree_lines(
tree: SequenceParsingTree,
/,
*,
kind: Literal["byte", "u32"] = "byte",
symbol: str,
) -> Iterator[str]:
"""Get chars.c lines to define a byte parsing tree.
:param tree: Parsing tree to represent.
:param tree_type: C type of the parsing tree.
:param match_type: C type for the match node.
:param symbol: Name of the symbol to define the tree as.
:return: Iterator for the lines required to define the tree.
"""
if kind == "u32":
tree_type = "cahute_u32_parsing_tree"
match_type = "cahute_u32_match"
seq_cast = "(cahute_u32 const [])"
else:
tree_type = "cahute_byte_parsing_tree"
match_type = "cahute_byte_match"
seq_cast = "(cahute_u8 const [])"
def explore_tree(
symbol: str,
tree: SequenceParsingTree,
/,
*,
is_local: bool = True,
) -> Iterator[str]:
"""Explore the trees.
:param tree: Tree to yield lines for.
:param suffix: Suffix to apply to the symbol name.
:return: Line iterator.
"""
for i, (sequence, subtree) in enumerate(tree.subtrees.items()):
yield from explore_tree(symbol + f"_{i}", subtree)
yield ""
yield f"CAHUTE_LOCAL_DATA(struct {match_type} const) {symbol}_m{i} = " + "{"
if i == 0: # Next node.
yield " NULL,"
else:
yield f" &{symbol}_m{i - 1},"
yield f" &{symbol}_{i}," # Subtree.
yield f" {seq_cast}" + "{" + ", ".join(map(str, sequence)) + "}," # Sequence.
yield f" {len(sequence)}" # Sequence length.
yield "};"
yield ""
if is_local:
yield f"CAHUTE_LOCAL_DATA(struct {tree_type} const) {symbol} = " + "{"
else:
yield f"struct {tree_type} const {symbol} = " + "{"
if tree.subtrees:
yield f" &{symbol}_m{len(tree.subtrees) - 1},"
else:
yield " NULL,"
if tree.leaf:
yield f" &{tree.leaf.symbol}"
else:
yield " NULL"
yield "};"
yield ""
yield from explore_tree(symbol, tree, is_local=False)
def get_chars_c_lines(*, ref: CharacterReference) -> Iterator[str]:
"""Get the chars.c lines.
:param ref: Reference to produce the chars.c from.
:param fp: Stream to which to output the file.
"""
yield '#include "chars.h"'
yield ""
# ---
# Define every character, so that they can be referenced by later
# functions.
# ---
chars_per_symbol = {}
for table in ref.tables.values():
for char in table.characters.values():
chars_per_symbol[char.symbol] = char
for symbol, char in sorted(chars_per_symbol.items()):
# See ``cahute_char_entry`` in ``lib/chars.h`` for more information.
yield f"CAHUTE_LOCAL_DATA(struct cahute_char_entry const) {symbol} = " + "{"
# Legacy character code.
if char.table is None or char.table == "legacy":
yield f' {char.code},'
elif char.code_legacy is not None:
yield f' {char.code_legacy},'
else:
yield ' 0,'
# fx-9860G character code.
if char.table is None or char.table == "9860":
yield f' {char.code},'
elif char.code_9860 is not None:
yield f' {char.code_9860},'
else:
yield ' 0,'
if char.unicode and char.unicode[0]:
yield ' (cahute_u32 const []){' + ", ".join(map(str, char.unicode[0])) + '},'
else:
yield ' NULL,'
if char.cat:
yield ' (char const []){' + ", ".join(str(ord(x)) for x in char.cat[0]) + '},'
else:
yield ' NULL,'
if char.opcode is not None:
yield ' (cahute_u16 const []){' + ", ".join(map(str, char.opcode)) + '},'
else:
yield ' NULL,'
if char.unicode:
yield f' {len(char.unicode[0])},'
else:
yield ' 0,'
if char.cat:
yield f' {len(char.cat[0])},'
else:
yield ' 0,'
if char.opcode is not None:
yield f' {len(char.opcode)}'
else:
yield ' 0'
yield "};"
yield ""
# ---
# Export all tables directly.
# ---
for table_key, table in ref.tables.items():
for lead in MULTI_BYTE_LEADERS[table_key]:
yield f"struct cahute_char_entry const *cahute_chars_{table_key}_{lead:02X}[] = " + "{"
for index in range(256):
suffix = ',' if index < 255 else ''
code = (lead << 8) | index
if code in table.characters:
yield f" &{table.characters[code].symbol}{suffix}"
else:
yield f" NULL{suffix}"
yield "};"
yield ""
# ---
# Export the CAT parsing trees.
# ---
for table_key, table in ref.tables.items():
yield from get_sequence_parsing_tree_lines(
table.cat_parsing_tree,
symbol=f"cahute_cat_{table_key}_parsing_tree",
)
yield ""
# ---
# Export the Unicode parsing tree.
# ---
for table_key, table in ref.tables.items():
yield from get_sequence_parsing_tree_lines(
table.unicode_parsing_tree,
symbol=f"cahute_unicode_{table_key}_parsing_tree",
kind="u32",
)
yield ""
argument_parser = argparse.ArgumentParser(
prog=Path(__file__).name,
description="Produce the character source file from the reference.",
)
argument_parser.add_argument("path", type=Path, nargs="?")
argument_parser.add_argument("--reference", type=Path, default=DEFAULT_REFERENCE_PATH)
if __name__ == "__main__":
coloredlogs.install(level="DEBUG")
args = argument_parser.parse_args()
output_path = args.path or DEFAULT_OUTPUT_PATH
ref_path = args.reference
try:
ref = CharacterReference.from_toml_file(ref_path)
except ValueError:
exit(1)
with open(output_path, "w") as fp:
for line in get_chars_c_lines(ref=ref):
print(line, file=fp)

View File

@ -27,6 +27,10 @@
* ************************************************************************* */
#include "cas.h"
#include "common.h"
/* TODO: This should probably be set to CTF once it has been implemented. */
#define OUTPUT_ENCODING CAHUTE_TEXT_ENCODING_UTF8
/**
* Read data.
@ -90,28 +94,37 @@ fail:
*/
CAHUTE_LOCAL(int)
list_data_types(struct args const *args, cahute_data const *data) {
/* TODO: Use character conversion to ensure that we have
* terminal-compatible strings here. */
printf("\n");
for (; data; data = data->cahute_data_next) {
switch (data->cahute_data_type) {
case CAHUTE_DATA_TYPE_PROGRAM: {
size_t name_size =
size_t program_name_size =
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_name_size;
size_t program_size =
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_size;
printf(
name_size ? "%" CAHUTE_PRIuSIZE " bytes \tProgram \"%.*s\".\n"
: "%" CAHUTE_PRIuSIZE " bytes \tProgram.\n",
program_size,
(int)name_size,
(char const *)
if (program_name_size) {
printf(
"%" CAHUTE_PRIuSIZE " bytes \tProgram \"",
program_size
);
print_content(
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_name
);
.cahute_data_content_program_name,
program_name_size,
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_encoding,
OUTPUT_ENCODING
);
printf("\".\n");
} else
printf(
"%" CAHUTE_PRIuSIZE " bytes \tProgram.\n",
program_size
);
} break;
default:
@ -130,6 +143,56 @@ list_data_types(struct args const *args, cahute_data const *data) {
* @return Return code.
*/
CAHUTE_LOCAL(int) list_data(struct args const *args, cahute_data const *data) {
int is_first = 1;
for (; data; data = data->cahute_data_next, is_first = 0) {
/* TODO: If the 'pager' flag is set, and is_first is not set,
* we want to put a break here. */
switch (data->cahute_data_type) {
case CAHUTE_DATA_TYPE_PROGRAM: {
size_t program_password_size =
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_password_size;
printf("@@display program \"");
print_content(
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_name,
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_name_size,
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_encoding,
OUTPUT_ENCODING
);
printf("\"");
if (program_password_size) {
printf(" (");
print_content(
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_password,
program_password_size,
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_encoding,
OUTPUT_ENCODING
);
printf(")\n");
} else
printf("\n");
print_content(
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_content,
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_size,
data->cahute_data_content.cahute_data_content_program
.cahute_data_content_program_encoding,
OUTPUT_ENCODING
);
printf("\n");
} break;
}
}
/* TODO */
return 0;
}

View File

@ -685,7 +685,7 @@ int load_default_casrc(struct casrc_database *db) {
pathbuf[len++] = '/';
sprintf(&pathbuf[len], ".casrc");
filep = fopen(pathbuf, "r");
filep = fopen(pathbuf, "rb");
if (filep) {
free(pathbuf);
@ -697,7 +697,7 @@ int load_default_casrc(struct casrc_database *db) {
}
/* Read the system casrc next. */
filep = fopen("/etc/system.casrc", "r");
filep = fopen("/etc/system.casrc", "rb");
if (filep)
goto read;
#endif

View File

@ -74,6 +74,54 @@ extern void set_log_level(char const *loglevel) {
cahute_set_log_level(value);
}
/**
* Print content from an encoding into a destination one.
*
* @param data Data to convert on-the-fly.
* @param data_size Size of the data to convert.
* @param encoding Encoding of the data.
* @param dest_encoding Encoding to display the data as.
*/
extern void print_content(
void const *data,
size_t data_size,
int encoding,
int dest_encoding
) {
cahute_u8 buf[128], *p;
size_t p_size;
int err;
while (1) {
p = buf;
p_size = sizeof(buf);
err = cahute_convert_text(
(void **)&p,
&p_size,
&data,
&data_size,
dest_encoding,
encoding
);
if (p_size < sizeof(buf)) {
fwrite(buf, sizeof(buf) - p_size, 1, stdout);
if (!err || err == CAHUTE_ERROR_TERMINATED)
return;
if (err == CAHUTE_ERROR_SIZE)
continue;
break;
}
if (!err)
return;
break; /* Including CAHUTE_ERROR_SIZE. */
}
fprintf(stdout, "<CONVERSION FAILED: 0x%04X>", err);
}
/**
* Get a line and allocate it.
*

View File

@ -33,6 +33,13 @@
extern char const *get_current_log_level(void);
extern void set_log_level(char const *loglevel);
extern void print_content(
void const *data,
size_t data_size,
int encoding,
int dest_encoding
);
/* Portable getdelim() implementation. */
extern ssize_t
portable_getdelim(char **sp, size_t *np, int delim, FILE *filep);

View File

@ -723,7 +723,7 @@ int parse_args(int argc, char **argv, struct args *args) {
/* Open the local source path if a path is given. */
if (args->local_source_path && !args->local_source_fp) {
args->local_source_fp = fopen(args->local_source_path, "r");
args->local_source_fp = fopen(args->local_source_path, "rb");
if (!args->local_source_fp) {
fprintf(
stderr,
@ -737,7 +737,7 @@ int parse_args(int argc, char **argv, struct args *args) {
/* Open the local target path if a path is given. */
if (args->local_target_path && !args->local_target_fp) {
args->local_target_fp = fopen(args->local_target_path, "w+");
args->local_target_fp = fopen(args->local_target_path, "wb+");
if (!args->local_target_fp) {
fprintf(
stderr,

View File

@ -152,7 +152,7 @@ read_file_contents(char const *path, cahute_u8 **datap, size_t *sizep) {
size_t size;
FILE *fp;
fp = fopen(path, "r");
fp = fopen(path, "rb");
if (!fp) {
fprintf(stderr, "Unable to open the file: %s\n", strerror(errno));
goto fail;
@ -337,7 +337,7 @@ int parse_args(int argc, char **argv, struct args *args) {
}
args->command = COMMAND_BACKUP;
args->output_fp = fopen(output_path, "w");
args->output_fp = fopen(output_path, "wb");
args->upload_uexe = 0;
if (!args->output_fp) {
fprintf(

View File

@ -247,7 +247,7 @@ process_params:
}
if (args->local_target_path) {
args->local_target_fp = fopen(args->local_target_path, "w");
args->local_target_fp = fopen(args->local_target_path, "wb");
if (!args->local_target_fp) {
fprintf(
stderr,

View File

@ -8,6 +8,6 @@ the Cahute C library directly.
:maxdepth: 1
developer-guides/build
developer-guides/detect-usb
developer-guides/detect-serial
developer-guides/open-usb-link
developer-guides/detection
developer-guides/links
developer-guides/text

View File

@ -0,0 +1,11 @@
Using device detection
======================
These sections describe specific problems that you may want to solve using
device detection utilities with the Cahute C library directly.
.. toctree::
:maxdepth: 1
detection/usb
detection/serial

View File

@ -0,0 +1,10 @@
Using links
===========
These sections describe specific problems that you may want to solve using
links with the Cahute C library directly.
.. toctree::
:maxdepth: 1
links/open-usb-link

View File

@ -0,0 +1,10 @@
Using text conversion utilities
===============================
These sections describe specific problems that you may want to solve using
text conversion utilities with the Cahute C library directly.
.. toctree::
:maxdepth: 1
text/convert

View File

@ -0,0 +1,135 @@
/* Compile using: gcc convert-multi-in.c `pkg-config cahute --cflags --libs`. */
#include <stdio.h>
#include <string.h>
#include <cahute.h>
/* Example long buffer. */
static cahute_u8 const example[] =
"4\x0EN\x0D"
"8\x0ES\x0D"
"1\x0E"
"C\x0D\"ENTREZ LES OPERATEURS\"\x0D"
"\xF7\x08N\x0D\xF7\x08\x7F\x8F:\xF7\x09:\xF7\x0A:\x7F\x8F\x0E"
"D\x0D"
"\xF7\x0B"
"D=0\x0D\x0D\xF7\x00"
"D=43:\xF7\x01\x0D\xF7\x10S,4,\"\xA9"
"\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0DN\x11"
"3\x7F\xB0N\x11"
"2\x7F\xB0"
"N\x11"
"0\x13"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=42:\xF7\x01\x0D"
"\xF7\x10S,4,\"\x89\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0D"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=32:\xF7\x01\x0D\xF7\x10S,4,\"\x99\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0DN\x11"
"1\x13"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=33:\xF7\x01\x0D\xF7\x10S,4,\"\xB9\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN"
"\x0D"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x09\x0D\x0D\xF7\x00"
"C=1:\xF7\x01\x0D"
"\"GOLDORAK\"\x0D\xF7\x02\x0D\"INVALIDE\"\x0D\xF7\x03\x00";
int main(void) {
char final_buf[1024];
cahute_u8 read_buf[32];
size_t read_offset = 0;
void *dest = final_buf;
size_t dest_size = sizeof(final_buf);
void const *src;
size_t src_size;
size_t present = 0;
int i, err;
for (i = 0;; i++) {
size_t read_size;
/* Start by completing the buffer.
* If there are ``present`` bytes already in the buffer, we want
* to add ``sizeof(read_buf) - present`` bytes in the buffer. */
if (read_offset > sizeof(example))
break;
src_size = sizeof(read_buf) - present;
if (src_size > sizeof(example) - read_offset) {
/* There may be less bytes to read than expected, we want to
* complete it this way. */
src_size = sizeof(example) - read_offset;
}
memcpy(&read_buf[present], &example[read_offset], src_size);
read_offset += src_size;
/* We now want to incorporate the already-present bytes into the
* buffer, to prepare for the conversion. */
src = read_buf;
src_size += present;
present = src_size;
/* We now have an ``src`` buffer of ``src_size`` bytes to read,
* we can operate the conversion. */
err = cahute_convert_text(
&dest,
&dest_size,
&src,
&src_size,
CAHUTE_TEXT_ENCODING_UTF8,
CAHUTE_TEXT_ENCODING_9860_8
);
printf(
"Pass %d: %zu bytes read, error set to 0x%04X\n",
i,
present - src_size,
err
);
if (err == CAHUTE_ERROR_TERMINATED)
break; /* A sentinel was found! */
if (!err) {
present = 0;
continue; /* There may be some more bytes to read. */
}
if (err == CAHUTE_ERROR_TRUNC) {
/* Truncated input, we must check that at least one byte has
* been read from the source data to avoid an infinite loop. */
if (src_size == present)
return 1;
/* Otherwise, we want to copy the leftover bytes at
* the beginning and complete.
*
* NOTE: Both memory areas may overlap, we must use memmove()
* to avoid overwriting data we're trying to copy! */
memmove(read_buf, src, src_size);
present = src_size;
continue;
}
/* Other failure, we must stop! */
return 1;
}
/* Print the result of the conversion. */
printf("---\n");
fwrite(final_buf, 1, sizeof(final_buf) - dest_size, stdout);
printf("\n---\n");
return 0;
}

View File

@ -0,0 +1,42 @@
Pass 0: 32 bytes read, error set to 0x0000
Pass 1: 31 bytes read, error set to 0x0007
Pass 2: 32 bytes read, error set to 0x0000
Pass 3: 31 bytes read, error set to 0x0007
Pass 4: 32 bytes read, error set to 0x0000
Pass 5: 32 bytes read, error set to 0x0000
Pass 6: 32 bytes read, error set to 0x0000
Pass 7: 32 bytes read, error set to 0x0000
Pass 8: 23 bytes read, error set to 0x000A
---
4→N
8→S
1→C
"ENTREZ LES OPERATEURS"
While N
While Getkey:WhileEnd:Do:Getkey→D
LpWhile D=0
If D=43:Then
Locate S,4,"×"
S+1→S:N1→N
N≠3 And N≠2 And N≠0⇒0→C:IfEnd
If D=42:Then
Locate S,4,"+"
S+1→S:N1→N
0→C:IfEnd
If D=32:Then
Locate S,4,""
S+1→S:N1→N
N≠1⇒0→C:IfEnd
If D=33:Then
Locate S,4,"÷"
S+1→S:N1→N
0→C:IfEnd
WhileEnd
If C=1:Then
"GOLDORAK"
Else
"INVALIDE"
IfEnd
---

View File

@ -0,0 +1,101 @@
/* Compile using: gcc convert-multi-out.c `pkg-config cahute --cflags --libs`. */
#include <stdio.h>
#include <cahute.h>
/* Example long buffer. */
static cahute_u8 const example[] =
"4\x0EN\x0D"
"8\x0ES\x0D"
"1\x0E"
"C\x0D\"ENTREZ LES OPERATEURS\"\x0D"
"\xF7\x08N\x0D\xF7\x08\x7F\x8F:\xF7\x09:\xF7\x0A:\x7F\x8F\x0E"
"D\x0D"
"\xF7\x0B"
"D=0\x0D\x0D\xF7\x00"
"D=43:\xF7\x01\x0D\xF7\x10S,4,\"\xA9"
"\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0DN\x11"
"3\x7F\xB0N\x11"
"2\x7F\xB0"
"N\x11"
"0\x13"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=42:\xF7\x01\x0D"
"\xF7\x10S,4,\"\x89\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0D"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=32:\xF7\x01\x0D\xF7\x10S,4,\"\x99\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN\x0DN\x11"
"1\x13"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x00"
"D=33:\xF7\x01\x0D\xF7\x10S,4,\"\xB9\"\x0DS\x89"
"1\x0ES:N\x99"
"1\x0EN"
"\x0D"
"0\x0E"
"C:\xF7\x03\x0D\xF7\x09\x0D\x0D\xF7\x00"
"C=1:\xF7\x01\x0D"
"\"GOLDORAK\"\x0D\xF7\x02\x0D\"INVALIDE\"\x0D\xF7\x03\x00";
int main(void) {
cahute_u8 buf[64];
void const *src = example;
size_t src_size = sizeof(example);
void *dest;
size_t dest_size;
int i, err;
for (i = 0;; i++) {
size_t converted;
dest = buf;
dest_size = sizeof(buf);
err = cahute_convert_text(
&dest,
&dest_size,
&src,
&src_size,
CAHUTE_TEXT_ENCODING_UTF8,
CAHUTE_TEXT_ENCODING_9860_8
);
converted = sizeof(buf) - dest_size;
printf(
"Pass %d: %zu bytes converted, error set to 0x%04X:\n",
i,
converted,
err
);
if (converted) {
printf("---\n");
fwrite(buf, 1, converted, stdout);
printf("\n---\n");
}
if (err == CAHUTE_ERROR_SIZE) {
/* Not enough bytes in the destination buffer.
* We want to check that at least one byte has been converted,
* otherwise it means our buffer is not big enough for the
* first byte. */
if (!converted)
break;
continue;
}
break;
}
if (err && err != CAHUTE_ERROR_TERMINATED)
printf("Conversion has failed.\n");
return 0;
}

View File

@ -0,0 +1,58 @@
Pass 0: 63 bytes converted, error set to 0x0006:
---
4→N
8→S
1→C
"ENTREZ LES OPERATEURS"
While N
While Getkey:
---
Pass 1: 64 bytes converted, error set to 0x0006:
---
WhileEnd:Do:Getkey→D
LpWhile D=0
If D=43:Then
Locate S,4,"×
---
Pass 2: 64 bytes converted, error set to 0x0006:
---
"
S+1→S:N1→N
N≠3 And N≠2 And N≠0⇒0→C:IfEnd
If D
---
Pass 3: 63 bytes converted, error set to 0x0006:
---
=42:Then
Locate S,4,"+"
S+1→S:N1→N
0→C:IfEnd
If D=32:
---
Pass 4: 64 bytes converted, error set to 0x0006:
---
Then
Locate S,4,""
S+1→S:N1→N
N≠1⇒0→C:IfEnd
If
---
Pass 5: 57 bytes converted, error set to 0x0006:
---
D=33:Then
Locate S,4,"÷"
S+1→S:N1→N
0→C:IfEnd
---
Pass 6: 56 bytes converted, error set to 0x0000:
---
WhileEnd
If C=1:Then
"GOLDORAK"
Else
"INVALIDE"
IfEnd
---

View File

@ -0,0 +1,54 @@
/* Compile using: gcc convert-simple.c `pkg-config cahute --cflags --libs`. */
#include <cahute.h>
/* Example buffer to convert. */
static cahute_u16 example[] = {
'\\',
'\\',
'f',
'l',
's',
'0',
'\\',
'a',
'n',
'g',
0xCE,
'.',
't',
'x',
't'
};
int main(void) {
char buf[128];
cahute_u8 *dest = buf;
size_t dest_size = sizeof(buf);
void const *source = example;
size_t source_size = sizeof(example);
int err;
err = cahute_convert_text(
(void **)&dest,
&dest_size,
&source,
&source_size,
CAHUTE_TEXT_ENCODING_UTF8,
CAHUTE_TEXT_ENCODING_9860_16_HOST
);
if ((!err || err == CAHUTE_ERROR_TERMINATED) && !dest_size) {
/* We need enough space to add a terminating zero here. */
err = CAHUTE_ERROR_SIZE;
}
if (err) {
printf("Conversion has failed: error 0x%04X has occurred.\n", err);
return 1;
}
*dest = 0;
printf("Result: %s\n", buf);
return 0;
}

View File

@ -0,0 +1,92 @@
.. _guide-developer-convert-text:
Converting text from an encoding to another
===========================================
In order to convert text from a text encoding to another, you must use
the :c:func:`cahute_convert_text` function. There are several possible
approaches you can take.
Using single pass conversion for small buffers
----------------------------------------------
For small blobs of data, such as the name of a program, or the name of a file
or directory, you can use a one pass approach with a static buffer.
In this case, you can make a single call to :c:func:`cahute_convert_text`,
which in nominal cases, should return either:
* :c:macro:`CAHUTE_OK`, if the input data has been read in its entirety,
and no sentinels were detected.
* :c:macro:`CAHUTE_ERROR_TERMINATED`, if a sentinel has been encountered
in the source data.
An example implementation is the following:
.. literalinclude:: convert-single.c
:language: c
This program displays the following output:
.. code-block:: text
Result: \\fls0\angθ.txt
Using multi pass conversion on output
-------------------------------------
If your source data is larger, you can do multiple passes into a buffer
before either placing the result into a stream, or reallocating a buffer
progressively using ``realloc()``.
For every pass, you need to call :c:func:`cahute_convert_text` with the
output set to your buffer, and the input set to your source memory.
On every pass, in nominal circumstances, the function will return one of
the following:
* :c:macro:`CAHUTE_OK`, if the conversion has terminated successfully,
i.e. if there was no more contents to read from the input.
* :c:macro:`CAHUTE_ERROR_TERMINATED`, if the conversion has been interrupted
due to a sentinel being found in the source data.
* :c:macro:`CAHUTE_ERROR_SIZE`, if the conversion has run out of space
in the output buffer, prompting you to make another pass after reading
the contents of the output buffer.
An example that places the result of each pass into the standard output is
the following:
.. literalinclude:: convert-multi-out.c
:language: c
This program displays the following output:
.. literalinclude:: convert-multi-out.txt
:language: text
Multi pass conversion on input
------------------------------
If you read your source data from a stream, you can do multiple passes
on the input.
For every pass, you need to call :c:func:`cahute_convert_text` with
the input set to your read buffer. On every pass, in nominal circumstances,
the function will return one of the following:
* :c:macro:`CAHUTE_ERROR_TERMINATED`, if a sentinel was found in the source
data.
* :c:macro:`CAHUTE_ERROR_TRUNC`, if the input was found to be truncated,
prompting you to do another pass while keeping the rest of the data.
* :c:macro:`CAHUTE_OK`, if all of the source data was converted, but no
sentinel was found, prompting you to do another pass but not crash if
no more bytes were available.
An example that reads from a memory area into a read buffer is the following:
.. literalinclude:: convert-multi-in.c
:language: c
This program displays the following output:
.. literalinclude:: convert-multi-in.txt
:language: text

View File

@ -74,8 +74,8 @@ Type definitions
.. c:member:: void *cahute_data_content_program_content
PRogram content, in the encoding set in
:c:member:`cahute_data_content_program_content`.
Program content, in the encoding set in
:c:member:`cahute_data_content_program_encoding`.
Function declarations
---------------------

View File

@ -48,6 +48,30 @@ Macro definitions
Error raised if an incoming message was too big for the corresponding
internal buffers.
.. c:macro:: CAHUTE_ERROR_TRUNC
Error raised when the source data was incomplete or truncated.
.. c:macro:: CAHUTE_ERROR_INVALID
Error raised when the source data was invalid.
.. c:macro:: CAHUTE_ERROR_INCOMPAT
Error raised if:
* A device was not suitable to be opened to be used by a link.
* An input buffer contained a character that had no equivalent in
the destination encoding.
.. c:macro:: CAHUTE_ERROR_TERMINATED
Error raised if:
* A connection to a device over a link has been terminated, and the
requested operation is therefore not possible.
* An input buffer contained a sentinel, for text conversion.
.. c:macro:: CAHUTE_ERROR_NOT_FOUND
Error code raised if a device could not be found using the provided
@ -58,16 +82,6 @@ Macro definitions
Error raised if only a single device was expected, but multiple were
found.
.. c:macro:: CAHUTE_ERROR_INCOMPAT
Error raised if a device was not suitable to be opened to be used by
a link.
.. c:macro:: CAHUTE_ERROR_TERMINATED
Error raised if a device is still present, but has terminated the
communication.
.. c:macro:: CAHUTE_ERROR_GONE
Error raised if a device with which communication was previously

View File

@ -9,10 +9,207 @@ Macro definitions
``CAHUTE_TEXT_ENCODING_*`` are constants representing how a given
picture's data is encoded.
.. c:macro:: CAHUTE_TEXT_ENCODING_FONTCHARACTER_VARIABLE
.. c:macro:: CAHUTE_TEXT_ENCODING_LEGACY_8
Constant representing the :ref:`text-encoding-fontcharacter-variable`.
Constant representing the :ref:`text-encoding-fc8` with
the legacy character table.
.. c:macro:: CAHUTE_TEXT_ENCODING_FONTCHARACTER_FIXED
.. c:macro:: CAHUTE_TEXT_ENCODING_LEGACY_16_HOST
Constant representing the :ref:`text-encoding-fontcharacter-fixed`.
Constant representing the :ref:`text-encoding-fc16` with
the legacy character table, and host endianness.
.. c:macro:: CAHUTE_TEXT_ENCODING_LEGACY_16_BE
Constant representing the :ref:`text-encoding-fc16` with
the legacy character table, and big endian.
.. c:macro:: CAHUTE_TEXT_ENCODING_LEGACY_16_LE
Constant representing the :ref:`text-encoding-fc16` with
the legacy character table, and little endian.
.. c:macro:: CAHUTE_TEXT_ENCODING_9860_8
Constant representing the :ref:`text-encoding-fc8` with
the fx-9860G character table.
.. c:macro:: CAHUTE_TEXT_ENCODING_9860_16_HOST
Constant representing the :ref:`text-encoding-fc16` with
the fx-9860G character table, and host endianness.
.. c:macro:: CAHUTE_TEXT_ENCODING_9860_16_BE
Constant representing the :ref:`text-encoding-fc16` with
the fx-9860G character table, and big endian.
.. c:macro:: CAHUTE_TEXT_ENCODING_9860_16_LE
Constant representing the :ref:`text-encoding-fc16` with
the fx-9860G character table, and little endian.
.. c:macro:: CAHUTE_TEXT_ENCODING_CAT
Constant representing the :ref:`text-encoding-cat`.
.. c:macro:: CAHUTE_TEXT_ENCODING_CTF
Constant representing the :ref:`text-encoding-ctf`.
.. c:macro:: CAHUTE_TEXT_ENCODING_UTF32_HOST
Constant representing the :ref:`text-encoding-utf32`, with
host endianness.
.. c:macro:: CAHUTE_TEXT_ENCODING_UTF32_BE
Constant representing the :ref:`text-encoding-utf32`, with
big endian.
.. c:macro:: CAHUTE_TEXT_ENCODING_UTF32_LE
Constant representing the :ref:`text-encoding-utf32`, with
little endian.
.. c:macro:: CAHUTE_TEXT_ENCODING_UTF8
Constant representing the :ref:`text-encoding-utf8`.
Function declarations
---------------------
.. c:function:: int cahute_convert_text(void **bufp, size_t *buf_sizep, \
void const **datap, size_t *data_sizep, int dest_encoding, \
int source_encoding)
Convert text from one encoding to another.
.. note::
When :c:macro:`CAHUTE_TEXT_ENCODING_UTF32_HOST`,
:c:macro:`CAHUTE_TEXT_ENCODING_UTF32_BE`,
:c:macro:`CAHUTE_TEXT_ENCODING_UTF32_LE` or
:c:macro:`CAHUTE_TEXT_ENCODING_UTF8` is used as the destination
encoding, **Normalization Form C (NFC)** is employed; see
`Unicode Normalization Forms`_ for more information.
Errors you can expect from this function are the following:
:c:macro:`CAHUTE_OK`
The conversion has finished successfully, and there is no
more bytes in the input buffer to read.
:c:macro:`CAHUTE_ERROR_TERMINATED`
A sentinel has been found, and the conversion has been interrupted.
.. note::
If this error is raised, ``*datap`` is set to **after** the
sentinel, and ``*data_sizep`` is set accordingly.
This is useful in case you have multiple text blobs placed
back-to-back.
:c:macro:`CAHUTE_ERROR_SIZE`
The destination buffer had insufficient space, and the procedure
was interrupted.
:c:macro:`CAHUTE_ERROR_TRUNC`
The source data had an incomplete sequence, and the procedure
was interrupted.
:c:macro:`CAHUTE_ERROR_INVALID`
The source data contained an unknown or invalid sequence, and
the procedure was interrupted.
:c:macro:`CAHUTE_ERROR_INCOMPAT`
The source data contained a sequence that could not be translated
to the destination encoding.
At the end of its process, this function updates ``*bufp``, ``*buf_sizep``,
``*datap`` and ``*data_sizep`` to the final state of the function,
even in case of error, so that:
* You can determine how much of the destination buffer was filled,
by substracting the final buffer size to the original buffer size.
* In case of :c:macro:`CAHUTE_ERROR_SIZE`, you can get the place at
which to get the leftover bytes in the source data.
* In case of :c:macro:`CAHUTE_ERROR_TRUNC`, you can get the place at
which to get the leftover bytes in the source data to complete with
additional data for the next conversion.
* In case of :c:macro:`CAHUTE_ERROR_INVALID` or
:c:macro:`CAHUTE_ERROR_INCOMPAT`, you can get the place of the
problematic input sequence.
Currently supported conversions are the following:
.. list-table::
:header-rows: 1
:width: 100%
* - | Src. ⯈
| ▼ Dst.
- ``LEGACY_*``
- ``9860_*``
- ``CAT``
- ``CTF``
- ``UTF*``
* - ``LEGACY_*``
- x
- x
-
-
-
* - ``9860_*``
- x
- x
-
-
-
* - ``CAT``
-
-
-
-
-
* - ``CTF``
-
-
-
-
-
* - ``UTF*``
- x
- x
-
-
- x
For specific guides on how to use this function, see
:ref:`guide-developer-convert-text`.
:param bufp: Pointer to the destination buffer pointer.
:param buf_sizep: Pointer to the destination buffer size.
:param datap: Pointer to the source data pointer.
:param data_sizep: Pointer to the source data size.
:param dest_encoding: Destination encoding.
:param source_encoding: Source encoding.
:return: Error, or 0 if the operation was successful.
.. c:function:: int cahute_convert_to_utf8(char *buf, size_t buf_size, \
void const *data, size_t data_size, int encoding)
Convert the provided data to UTF-8, and place a terminating NUL character.
This is a utility that calls :c:func:`cahute_convert_text`, for
simple scripts using the Cahute library.
:param buf: Destination buffer.
:param buf_size: Destination buffer size.
:param data: Source data.
:param data_size: Size of the source data.
:param encoding: Encoding of the source data.
:return: Error, or 0 if the operation was successful.
.. _Unicode Normalization Forms: https://www.unicode.org/reports/tr15/

View File

@ -14,3 +14,4 @@ happening in this project, and why it is happening.
:maxdepth: 2
internals/links
internals/text

180
docs/internals/text.rst Normal file
View File

@ -0,0 +1,180 @@
Characters, encodings and conversions
=====================================
Cahute supports text encoding representation and conversions, and requires
several mechanisms to do so.
Character reference
-------------------
Cahute has its own database for representing CASIO's character tables, present
in the ``chars/chars.toml``. This file contains one entry per character in
one or both character tables, as a ``chars`` entry, with the following
properties:
``code``
**(required)** Code of the targeted character tables, e.g. ``0x56`` or
``0xE560``.
``table``
**(optional)** Targeted character table, among ``9860`` and ``legacy``.
If this property is not set, the character is considered to belong to all
character tables.
``code_legacy``
**(optional)** Equivalent code in the legacy character table, if the
table is not ``legacy`` and an equivalent exists.
``code_9860``
**(optional)** Equivalent code in the fx-9860G character table, if the
table is not ``9860`` and an equivalent exists.
``opcode``
**(optional)** List of graph character codes the character resolves to for
display purposes, if the character is an opcode.
For example, this can be set to ``[0x66, 0xE5D6]``.
``unicode``
**(optional)** Unicode character sequence, or list of character sequences,
if the character has one or more equivalents as Unicode.
A Unicode character sequence is represented as a list of integers in TOML.
This can be set to a list of character sequences if e.g. the Unicode
sequence has different forms depending on the source Unicode normalization
form. **In this case, the NFC normalization must be placed first, since
it will be used for conversions if the destination encoding is
Unicode-based.**
For example, the value can be set to the following:
* For character ``0x45`` (``-``): ``[0x45]``.
* For character ``0xE609`` (``é``): ``[[0xE9], [0x65, 0x301]]``
*(NFC normalization first)*.
``cat``
**(optional)** CAT sequence, or list of sequences, if the character has
one or more euiqvalent sequences in the CAT data encoding.
A sequence is represented as a string in TOML.
If multiple sequences are provided, the first one will be used for
conversions if the destination encoding is CAT.
For example, the value can be set to the following:
* For character ``0x21`` (``!``): ``["\\!mark", "!"]``.
* For character ``0xF712``: ``"\\Send("``.
.. note::
Multiple character entries defining the same CAT sequence are allowed
as long as they bear the same code, e.g. ``\LinearReg`` defined
for both the legacy and fx-9860G character table.
Compiled character reference
----------------------------
The character reference present above is transpiled into a C source file,
``lib/chars.c``, by the Python script at ``chars/process_chars.py``.
The structures used and globals defined in this file are declared in
``lib/chars.h``, as included and used by ``lib/text.c`` which contains
the effective conversion and description utilities.
Character entries that can be gathered through character tables or parsing
trees are of the following format:
.. c:struct:: cahute_char_entry
Character entry.
.. c:member:: int code_legacy
Code of the character in the legacy character table.
Note that is is defined even if ``code_legacy`` is not defined in
the character entry in ``chars.toml``, as long as ``table`` is
defined to either ``legacy``, or not defined.
If no code is available for the character in the legacy character
table, this is set to 0.
.. c:member:: int code_9860
Code of the character in the fx-9860G character table.
Note that is is defined even if ``code_9860`` is not defined in
the character entry in ``chars.toml``, as long as ``table`` is
defined to either ``9860``, or not defined.
If no code is available for the character in the fx-9860G character
table, this is set to 0.
.. c:member:: cahute_u16 const *opcode
Sequence of characters, in the same character table, the character
resolves to for display purposes, if the character is an opcode.
.. c:member:: size_t opcode_len
Number of elements in :c:member:`opcode`.
If this is set to 0, the character is not an opcode, and therefore,
the aforementioned member should not be used.
.. c:member:: cahute_u32 const *unicode
Sequence of Unicode codepoints the character can be translated to,
in Normalization Form D (NFD).
.. c:member:: size_t unicode_len
Number of elements in :c:member:`unicode`.
If this is set to 0, the character does not have a Unicode translation,
and therefore, the aforementioned member should not be used.
.. c:member:: char const *cat
Sequence of bytes the character can be translated into Catalog
files; see :ref:`file-format-cat` for more information.
.. c:member:: size_t cat_len
Number of elements in :c:member:`cat`.
If this is set to 0, the character does not have a CAT representation,
and therefore, the aforementioned member should not be used.
Character entries are available through the following input / parsing oriented
utilities:
* ``cahute_chars_<table>_<leader>``: tables of 256
:c:struct:`cahute_char_entry` pointers that are either ``NULL`` or
defined, depending of if a character exists with that code with that
multi-byte leader.
For example, ``cahute_chars_9860_E5`` defines all 256 character entries
from ``0xE500`` to ``0xE5FF`` included for the fx-9860G character table.
* Parsing trees.
Conversion logic
----------------
The conversion logic is implemented within :c:func:`cahute_convert_text`.
The structure of a conversion loop is always the same:
* Get the character from the source buffer.
* Convert the character from a table to another, if possible.
* Place the character into the destination buffer.
The available character conversion loops are the following:
* CASIO character based, where the intermediate product is a pointer to a
:c:struct:`cahute_char_entry` structure.
* Unicode-based, where the intermediate product is a sequence of UTF-32
encoded Unicode characters in host endianness.
.. _TOML: https://toml.io/en/

View File

@ -101,6 +101,24 @@ are available:
:c:func:`cahute_be32toh`, :c:func:`cahute_le32toh`, :c:func:`cahute_htobe16`,
:c:func:`cahute_htole16`, :c:func:`cahute_htobe32`, :c:func:`cahute_htole32`.
.. warning::
When ``fopen()`` is used, the ``b`` flag **must** be used to ensure that
platforms don't operate conversions by default:
.. code-block:: c
/* WRONG. */
FILE *fp = fopen("myfile.txt", "r");
/* Correct! */
FILE *fp = fopen("myfile.txt", "rb");
This is notably useful for platforms such as Windows, that operates
conversions by default (i.e. use ``rt`` instead of ``rb`` by default),
except on seeking / telling, which could cause problems where the
computed file size does not match with the size of the actually read data.
.. _pre-commit: https://pre-commit.com/
.. _clang-format: https://clang.llvm.org/docs/ClangFormat.html
.. _Black: https://github.com/psf/black

View File

@ -62,6 +62,18 @@ The format of such headers is the following:
-
-
.. _casiolink-cas40-al-end:
``\x17\x17`` CAS40 AL End
~~~~~~~~~~~~~~~~~~~~~~~~~
This header represents an end of sequence when in ``AL`` mode. It is only
used with the CASIOLINK protocol, when using the CAS40 header format, and
when the ``AL`` data type has been sent and received at least once;
see :ref:`casiolink-cas40-al-mode` for more information.
This is not followed by any data parts.
.. _casiolink-cas40-end:
``\x17\xFF`` CAS40 End
@ -72,33 +84,137 @@ protocol, when using the CAS40 header format.
This is not followed by any data parts.
.. note::
This prefix is common to all sentinels in the CAS40 variant, i.e.
headers and data parts, of the size corresponding to the expected
data part size.
.. note::
This data type does not end the communication when ``AL`` mode has been
enabled; see :ref:`casiolink-cas40-al-mode`.
.. _casiolink-cas40-a1:
``A1`` CAS40 Dynamic Graph
~~~~~~~~~~~~~~~~~~~~~~~~~~
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 1 B
- Reserved
-
- Set to ``\0``.
* - 2 (0x02)
- 2 B
- Length (*L*)
- Size of the data part, plus 2.
- 8-bit integer.
This is followed by 1 data part of *L - 2* bytes, being the definition of the
dynamic graph with a ``\xFF`` sentinel.
This data type is final.
.. _casiolink-cas40-aa:
``AA`` CAS40 Dynamic Graphs
~~~~~~~~~~~~~~~~~~~~~~~~~~~
``AA`` CAS40 Dynamic Graph in Bulk
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
This file type is actually the same as :ref:`casiolink-cas40-a1`, except
it is in a context where multiple editor programs are being sent, i.e.
the data type is non-final.
.. _casiolink-cas40-ad:
``AD`` CAS40 All Memories
~~~~~~~~~~~~~~~~~~~~~~~~~
``AD`` CAS40 All Variable Memories
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
This data type contains all variable memories currently defined on the
device.
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Unknown
-
- Set to ``"CA"``.
* - 2 (0x02)
- 2 B
- Count (*C*)
- Number of elements, including the sentinel.
- Big endian 16-bit integer.
There are *C* times data parts of 22 bytes each, the last one being
the sentinel, with the following data:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Type
- Data part type.
- ``\0\0`` for the cells, ``\x17\xFF`` for the sentinel.
* - 2 (0x02)
- 10 B
- Value (real part)
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- Value (imaginary part)
-
- :ref:`number-format-casiolink-bcd`
This data type is final.
.. _casiolink-cas40-al:
``AL`` CAS40 All
~~~~~~~~~~~~~~~~
.. todo:: Describe this.
This data type signals that the calculator is about to send all of its data.
This does does have type-specific data, and is not followed by any data parts.
.. note::
If this data type is received at least once, it means that all final
data types become non-final, and that a special sentinel header is
required; see :ref:`casiolink-cas40-al-mode` for more information.
.. _casiolink-cas40-am:
``AM`` CAS40 Variable Memories
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
This is equivalent to :ref:`casiolink-cas40-ad`, but only returns variables
designated using a letter or symbol.
.. _casiolink-cas40-bu:
@ -122,6 +238,7 @@ Type-specific data for such files are the following:
- Backup type, among:
* ``TYPEA00``: fx-9700GH style backup (32768 bytes).
* ``TYPEA02``: CFX-9800G style backup (32768 bytes).
There is one data part, for which the size depends on the backup type.
@ -189,10 +306,12 @@ Type-specific data for such files are the following:
-
- Should be set to ``\x03``.
.. todo:: Document the role of the different fields here!
This is followed by 3 data parts, each representing a monochrome picture with
a one-byte prefix representing the color.
.. todo:: Document the role of the different fields here!
This data type is final.
.. _casiolink-cas40-dd:
@ -251,14 +370,19 @@ Type-specific data for such files are the following:
-
- ``F`` (?)
.. todo:: Document the role of the different fields here!
This is followed by a single data part representing the monochrome picture.
This data type is final.
.. _casiolink-cas40-dm:
``DM`` CAS40 Defined Memories
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
This is equivalent to :ref:`casiolink-cas40-ad`, but only returns defined
memories.
.. _casiolink-cas40-en:
@ -379,7 +503,7 @@ Type-specific data is the following:
-
- Should be set to ``\0``.
This is followed by a single program being the program's content.
This is followed by a single data part being the program's content.
This data type is final.
@ -445,6 +569,11 @@ Type-specific data is the following:
-
- Big endian 16-bit length of the function 6 definition.
This is followed by a single data part with the contents of all of the
functions.
This data type is final.
.. _casiolink-cas40-fn:
``FN`` CAS40 Single Editor Program in Bulk
@ -456,40 +585,328 @@ the data is non-final.
.. _casiolink-cas40-fp:
``FN`` Single Password Protected Editor Program in Bulk
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``FP`` CAS40 Single Password Protected Editor Program in Bulk
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This file type is actually the same as :ref:`casiolink-cas40-ep`, except
it is in a context where multiple editor programs are being sent, i.e.
the data is non-final.
.. _casiolink-cas40-g1:
``G1`` CAS40 Graph Function
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 1 B
- Reserved
-
- Set to ``\0``.
* - 1 (0x01)
- 2 B
- Length (*L*)
- Length of the contents, plus two.
- Big-endian 16-bit integer.
* - 3 (0x03)
- 2 B
- Unknown
-
- Set to ``\0`` by default.
* - 5 (0x05)
- 2 B
- Type (*T*)
-
- Big-endian 16-bit integer, for which the values are:
.. list-table::
:header-rows: 1
* - Value
- Description
* - ``0x0000``
- Unset
* - ``0x0100``
- Rect (``Y=...X``)
* - ``0x0102``
- Pol (``r=...θ``), with optional ``0xF6`` (``,``) separator.
* - ``0x0103``
- Parm (``Xt=...T``)
* - ``0x0104``
- Ineq (``Y>...X``)
* - ``0x0105``
- Ineq (``Y<...X``)
* - ``0x0106``
- Ineq (``Y≥...X``)
* - ``0x0107``
- Ineq (``Y≤...X``)
There is exactly 1 data part of *L* - 2 bytes, containing the source of
the Graph Function.
This data type is final.
.. _casiolink-cas40-ga:
``GA`` CAS40 Graph
~~~~~~~~~~~~~~~~~~
``GA`` CAS40 Graph Function in Bulk
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
This file type is actually the same as :ref:`casiolink-cas40-g1`, except
it is in a context where multiple graph functions are being sent, i.e.
the data is non-final.
.. _casiolink-cas40-gf:
``GF`` CAS40 Graph Zoom
~~~~~~~~~~~~~~~~~~~~~~~
``GF`` CAS40 Factor
~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Unknown
-
- Set to ``"RA"``.
* - 2 (0x02)
- 2 B
- Unknown
-
- Set to ``\x00\x02``.
There is exactly 1 data part of 22 bytes, of the following format:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``\0``.
* - 2 (0x02)
- 10 B
- Xfact
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- Yfact
-
- :ref:`number-format-casiolink-bcd`
This data type is final.
.. _casiolink-cas40-gr:
``GR`` CAS40 Graph Range
~~~~~~~~~~~~~~~~~~~~~~~~
``GR`` CAS40 Range
~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Unknown
-
- Set to ``"RA"``.
* - 2 (0x02)
- 2 B
- Unknown
-
- Set to ``\x00\x09``.
There is exactly 1 data part of 92 bytes, of the following format:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``\0``.
* - 2 (0x02)
- 10 B
- Xmin
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- Xmax
-
- :ref:`number-format-casiolink-bcd`
* - 22 (0x16)
- 10 B
- Xscale
-
- :ref:`number-format-casiolink-bcd`
* - 32 (0x20)
- 10 B
- Ymin
-
- :ref:`number-format-casiolink-bcd`
* - 42 (0x2A)
- 10 B
- Ymax
-
- :ref:`number-format-casiolink-bcd`
* - 52 (0x34)
- 10 B
- Yscale
-
- :ref:`number-format-casiolink-bcd`
* - 62 (0x3E)
- 10 B
- Tmin, θmin
-
- :ref:`number-format-casiolink-bcd`
* - 72 (0x48)
- 10 B
- Tmax, θmax
-
- :ref:`number-format-casiolink-bcd`
* - 82 (0x52)
- 10 B
- Tpitch, θpitch
-
- :ref:`number-format-casiolink-bcd`
This data type is final.
.. _casiolink-cas40-gt:
``GT`` CAS40 Function Table
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``"RA"``.
* - 2 (0x02)
- 2 B
- Length (*L*)
- Length of the function definition, plus two.
- Big endian 16-bit integer.
* - 4 (0x04)
- 2 B
- Count (*C*)
-
- Big endian 16-bit integer.
* - 6 (0x06)
- 2 B
- Unknown
-
- Set to ``\0\0``.
There is *C* + 2 data parts, where:
* The first data part is the source function from which the table is
computed, which is *L - 2* bytes long and includes a sentinel (``\xFF``).
* The second data part are the table properties, which are 32 bytes long.
They have the following format:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``\0\0``.
* - 2 (0x02)
- 10 B
- Start
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- End
-
- :ref:`number-format-casiolink-bcd`
* - 22 (0x16)
- 10 B
- Pitch
-
- :ref:`number-format-casiolink-bcd`
* The next *C* data parts are the cells, which are 22 bytes long.
They have the following format:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``\0\0``.
* - 2 (0x02)
- 10 B
- X
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- Y
-
- :ref:`number-format-casiolink-bcd`
This data type is final.
.. _casiolink-cas40-m1:
@ -508,7 +925,7 @@ Type-specific data is the following:
- Values
* - 0 (0x00)
- 2 B
- Unknown
- Reserved
-
- Set to ``"RA"``.
* - 2 (0x02)
@ -563,17 +980,10 @@ This data type is final.
Equivalent to :ref:`casiolink-cas40-m1`, except:
* There are *W* times *H* data parts instead of *W* times *H* + 1, as the
* There are *W* times *H* data parts instead of *W* times *H*, as the
sentinel is not present;
* The data type is not final.
.. _casiolink-cas40-pd:
``PD`` CAS40 Polynomial Equation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
.. _casiolink-cas40-p1:
``P1`` CAS40 Single Numbered Program
@ -631,6 +1041,107 @@ Type-specific data for such files are the following:
This is followed by a single data part containing the program's content.
This data type is final.
.. _casiolink-cas40-pd:
``PD`` CAS40 Polynomial Equation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``"RA"``.
* - 2 (0x02)
- 2 B
- Degree (*D*)
-
- Big endian 16-bit integer.
The contents depends on the degree (*D*) field:
* For degree 2, there is 1 data part which is 32 bytes long, and contains the
components of the ``ax²+bx+c=0`` equation, in the following format:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``\0\0``
* - 2 (0x02)
- 10 B
- a
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- b
-
- :ref:`number-format-casiolink-bcd`
* - 22 (0x16)
- 10 B
- c
-
- :ref:`number-format-casiolink-bcd`
* For degree 3, there is 1 data part which is 42 bytes long, and contains the
components of the ``ax³+bx²+cx+d=0`` equation, in the following format:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``\0\0``
* - 2 (0x02)
- 10 B
- a
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- b
-
- :ref:`number-format-casiolink-bcd`
* - 22 (0x16)
- 10 B
- c
-
- :ref:`number-format-casiolink-bcd`
* - 32 (0x20)
- 10 B
- d
-
- :ref:`number-format-casiolink-bcd`
This data type is final.
.. _casiolink-cas40-pz:
``PZ`` CAS40 Multiple Numbered Programs
@ -674,33 +1185,287 @@ This is followed by 2 data parts:
See :ref:`casiolink-cas40-p1` for more information.
This data type is final.
.. _casiolink-cas40-rt:
``RT`` CAS40 Recursion Table
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``"RA"``.
* - 2 (0x02)
- 2 B
- Length (*L*)
- Length of the function definition, plus two.
- Big endian 16-bit integer.
* - 4 (0x04)
- 2 B
- Count (*C*)
-
- Big endian 16-bit integer.
* - 6 (0x06)
- 2 B
- Unknown
-
- Set to ``\0\0``.
There is *C* + 2 data parts, where:
* The first data part is the source function from which the table is
computed, which is *L - 2* bytes long and includes a sentinel (``\xFF``).
* The second data part are the table properties, which are 22 bytes long.
They have the following format:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``\0\0``.
* - 2 (0x02)
- 10 B
- nStart
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- nEnd
-
- :ref:`number-format-casiolink-bcd`
* The next *C* data parts are the cells, which are 32 bytes long.
They have the following format:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``\0\0``.
* - 2 (0x02)
- 10 B
- n
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- an
-
- :ref:`number-format-casiolink-bcd`
* - 22 (0x16)
- 10 B
- Σan
-
- :ref:`number-format-casiolink-bcd`
This data type is final.
.. _casiolink-cas40-sd:
``SD`` CAS40 Simultaneous Equations
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``"RA"``.
* - 2 (0x02)
- 1 B
- Width (*W*)
-
- 8-bit integer.
* - 3 (0x03)
- 1 B
- Height (*H*)
-
- 8-bit integer.
There is *W* * *H* + 1 data parts, each 14 bytes long, of the following format:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Type
- Data part type.
- ``\0\0`` for the cells, ``\x17\xFF`` for the sentinel.
* - 2 (0x02)
- 1 B
- X
- Horizontal coordinate in the matrix, starting from 1.
- 8-bit integer.
* - 3 (0x03)
- 1 B
- Y
- Vertical coordinate in the matrix, starting from 1.
- 8-bit integer.
* - 4 (0x02)
- 10 B
- Value for the cell.
-
- :ref:`number-format-casiolink-bcd`
This data type is final.
.. _casiolink-cas40-sr:
``SR`` CAS40 Paired Variable Data
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``"RA"``.
* - 2 (0x02)
- 2 B
- Count (*C*)
- Number of elements, including the sentinel.
- Big endian 16-bit integer.
There are *C* times data parts of 32 bytes each, the last one being
the sentinel, with the following data:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Type
- Data part type.
- ``\0\0`` for the cells, ``\x17\xFF`` for the sentinel.
* - 2 (0x02)
- 10 B
- X value
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- Y value
-
- :ref:`number-format-casiolink-bcd`
* - 22 (0x16)
- 10 B
- f value
-
- :ref:`number-format-casiolink-bcd`
This data type is final.
.. _casiolink-cas40-ss:
``SS`` CAS40 Single Variable Data
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. todo:: Describe this.
Type-specific data is the following:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Reserved
-
- Set to ``"RA"``.
* - 2 (0x02)
- 2 B
- Count (*C*)
- Number of elements, including the sentinel.
- Big endian 16-bit integer.
There are *C* + 1 data parts of 22 bytes each, the last one being
the sentinel, with the following data:
.. list-table::
:header-rows: 1
* - Offset
- Size
- Field name
- Description
- Values
* - 0 (0x00)
- 2 B
- Type
- Data part type.
- ``\0\0`` for the cells, ``\x17\xFF`` for the sentinel.
* - 2 (0x02)
- 10 B
- X value
-
- :ref:`number-format-casiolink-bcd`
* - 12 (0x0C)
- 10 B
- f value
-
- :ref:`number-format-casiolink-bcd`
This data type is final.
.. _casiolink-cas50:

View File

@ -22,4 +22,96 @@ In order to initiate the connection, the communication schema is the following:
sender->>receiver: Send a 0x16 (START)
receiver->>sender: Send a 0x13 (ESTABLISHED)
.. todo:: Determine the role of all of that.
Send or receive data using the CAS40 or CAS50 variant
-----------------------------------------------------
For the :ref:`receive-protocol-rationale` or :ref:`transmit-protocol-rationale`
using the CAS40 or CAS50 variants, the flow is the following:
.. mermaid::
sequenceDiagram
Participant sender as Sender
Participant receiver as Receiver
sender->>receiver: Send header (0x3A)
alt An error has occurred
receiver->>sender: Send an error
else
receiver->>sender: Acknowledge (0x06)
loop A data part is expected
sender->>receiver: Send a data part (0x3A)
receiver->>sender: Acknowledge (0x06)
end
end
The number of data parts, and the size of each of them, is determined using
on the header, and is **not** communicated using the protocol itself.
.. note::
This is the opposite approach from Protocol 7.00, where the interpretation
can be done entirely after the file has been sent, even for unknown
data types. Here, you need to make at least a partial interpretation
of the header.
On CAS40 headers, the interpretation must be done per data type, whereas
on CAS50 headers, with a few exceptions, you can generally rely on the
3-letter format instead (with exceptions), which is easier to support
as it has fewer values.
Every data type can either be final, i.e. be the last one to be transmitted
on a given communication before the sender considers the communication as
completed, or not.
When only non-final data types are used, the sender can end the communication
manually by sending a sentinel header, in the form of a
:ref:`casiolink-cas40-end` or :ref:`casiolink-cas50-end` data type.
.. _casiolink-cas40-al-mode:
CAS40 AL Mode
~~~~~~~~~~~~~
When a sender using CAS40 wants to transmit all of its data, it can use an
AL mode, which causes the following flow:
.. mermaid::
sequenceDiagram
Participant sender as Sender
Participant receiver as Receiver
sender->>receiver: Send AL header (0x3A, "AL" data type)
receiver->>sender: Acknowledge (0x06)
loop Data is to be sent
Note over sender,receiver: Sender sends header and data parts
end
sender->>receiver: Send AL End header (0x3A, "\x17\x17" data type)
receiver->>sender: Acknowledge (0x06)
Note over sender,receiver: Communication ends
In this mode, **all data types that are normally final become non-final**.
This includes :ref:`casiolink-cas40-end`, which does not end the communication
anymore, as once this mode is enabled, only :ref:`casiolink-cas40-al-end`
is able to do this.
See the following for more information:
* :ref:`casiolink-cas40-al`
* :ref:`casiolink-cas40-al-end`
Request data using the CAS50 variant
------------------------------------
.. todo:: Write this!
Send or receive data using the CAS100 variant
---------------------------------------------
.. todo:: Write this!

View File

@ -1,22 +1,122 @@
Text encodings
==============
.. todo::
CASIO uses specific character tables on their calculators.
Introduce this section.
We will call CASIO's character table "FONTCHARACTER".
.. note::
These tables or encodings are sometimes named ``FONTCHARACTER`` by the
community, since that was the name of the type that could contain a
character code in the fx-9860G SDK published by CASIO in 2004.
.. _text-encoding-fontcharacter-variable:
In these tables, every code can represent either:
Variable width FONTCHARACTER encoding
-------------------------------------
* Control characters, e.g. ``0x000A`` (newline);
* Graph characters, e.g. ``0x0023`` (``#``);
* Operation codes, or "opcodes" for short, e.g. ``0xF710`` (``Locate``).
.. todo:: Write this.
.. note::
.. _text-encoding-fontcharacter-fixed:
All of these types will be named "characters" in this section.
Fixed-width FONTCHARACTER encoding
----------------------------------
CASIO has had two separate character tables following similar logics:
.. todo:: Write this: fixed-width 16-bit, big endian.
* The legacy character table, applied on calculators up to the fx-9860G,
excluded.
* The fx-9860G character table, applied on all compatible calculators
post-2004, including fx-CG and fx-CP calculators.
Both have the same multi-byte leader logic, i.e. characters have a "lead"
character within a specific set, then a code relative to this set.
Sets for the above character tables are the following:
* For legacy: ``0x00``, ``0x7F``, ``0xF7``.
* For fx-9860G: ``0x00``, ``0x7F``, ``0xE5``, ``0xE6``, ``0xE7``, ``0xF7``,
``0xF9``.
It is important to distinguish both, as while a lot of characters are
common between both tables, some characters have been removed or replaced
from one to the other, and the legacy table uses some of the fx-9860G table's
multi-byte leaders as characters.
The following sections will present the character encodings and associated
tables used within and surrounding CASIO calculators.
.. _text-encoding-fc8:
Variable width encoding
-----------------------
This encoding can be used with either the legacy or fx-9860G character table.
Every character is represented with either one or two bytes, depending on
the first byte of the sequence:
* If the first byte is a multi-byte leader for the character table, the
sequence is two-bytes long;
* Otherwise, the sequence is one-byte long.
For example, take the encoded sequence ``\x12\xE5\xAB``:
* With the legacy character table, since none of the characters are multi-byte
leaders, the sequence represents three characters ``0x0012``, ``0x00E5``,
``0x00AB``.
* With the fx-9860G character table, ``\xE5`` is a multi-byte leader, which
means that the sequence represents two characters ``0x0012`` and ``0xE5AB``.
.. _text-encoding-fc16:
Fixed-width encoding
--------------------
This encoding can be used with either the legacy or fx-9860G character table.
Every character is represented using two bytes, using either big or little
endian.
For example, take the sequence of characters ``0x0012`` and ``0xE5AB``:
* If using big endian, the encoded sequence will be ``\x00\x12\xE5\xAB``;
* If using little endian, the encoded sequence will be ``\x12\x00\xAB\xE5``.
.. _text-encoding-cat:
CAT data encoding
-----------------
This encoding can be used with both the legacy or fx-9860G character table,
and represents every supported character with an ASCII-compatible character
sequence.
Some example sequences are the following:
* The legacy or fx-9860G character ``0x0040`` (``-``) is represented in CAT
data encoding using the ASCII sequence ``-``;
* The legacy or fx-9860G character ``0xF718`` is represented in CAT data
encoding using the ASCII sequence ``\ClrText``;
* The legacy character ``0x00E6`` is represented in CAT data encoding using
the ASCII sequence ``CL``.
.. _text-encoding-ctf:
CTF data encoding
-----------------
.. todo:: Write this. ASCII-based.
.. _text-encoding-utf32:
UTF-32 encoding
---------------
Cahute supports the `UTF-32`_ fixed-length encoding without
Byte-Order Mark (BOM), with big and little endiannesses.
.. _text-encoding-utf8:
UTF-8 encoding
--------------
Cahute supports the `UTF-8`_ variable-length encoding without
Byte-Order Mark (BOM).
.. _UTF-32: https://en.wikipedia.org/wiki/UTF-32
.. _UTF-8: https://en.wikipedia.org/wiki/UTF-8

View File

@ -40,22 +40,24 @@ CAHUTE_BEGIN_NAMESPACE
#define CAHUTE_OK 0x0000 /* No error has occurred. */
#define CAHUTE_ERROR_UNKNOWN 0x0001 /* An unknown error has occurred. */
#define CAHUTE_ERROR_IMPL 0x0002 /* A feature was unimplemented. */
#define CAHUTE_ERROR_ALLOC 0x0003 /* A memory allocation has failed. */
#define CAHUTE_ERROR_PRIV 0x0004 /* Insufficient privileges were found. */
#define CAHUTE_ERROR_INT 0x0005 /* Interrupted by a callback. */
#define CAHUTE_ERROR_SIZE 0x0006 /* Some received data was too big. */
#define CAHUTE_ERROR_UNKNOWN 0x0001 /* An unknown error has occurred. */
#define CAHUTE_ERROR_IMPL 0x0002 /* A feature was unimplemented. */
#define CAHUTE_ERROR_ALLOC 0x0003 /* A memory allocation has failed. */
#define CAHUTE_ERROR_PRIV 0x0004 /* Insufficient privileges were found. */
#define CAHUTE_ERROR_INT 0x0005 /* Interrupted by a callback. */
#define CAHUTE_ERROR_SIZE 0x0006 /* Some received data was too big. */
#define CAHUTE_ERROR_TRUNC 0x0007 /* Input was truncated. */
#define CAHUTE_ERROR_INVALID 0x0008 /* Invalid data or behaviour. */
#define CAHUTE_ERROR_INCOMPAT 0x0009 /* Incompatibility detected. */
#define CAHUTE_ERROR_TERMINATED 0x000A /* Input or link was terminated. */
#define CAHUTE_ERROR_NOT_FOUND 0x0101 /* Device could not be found. */
#define CAHUTE_ERROR_TOO_MANY 0x0102 /* Too Many Devices found. */
#define CAHUTE_ERROR_INCOMPAT 0x0103 /* Found device is incompatible. */
#define CAHUTE_ERROR_GONE 0x0104 /* Device is gone, I/O has failed. */
#define CAHUTE_ERROR_TERMINATED 0x0105 /* Device has terminated the comm. */
#define CAHUTE_ERROR_TIMEOUT_START 0x0106 /* A timeout has occurred on 1st. */
#define CAHUTE_ERROR_TIMEOUT 0x0107 /* A timeout has occurred on next. */
#define CAHUTE_ERROR_CORRUPT 0x0108 /* Corrupted packet. */
#define CAHUTE_ERROR_IRRECOV 0x0109 /* Irrecoverable link */
#define CAHUTE_ERROR_GONE 0x0103 /* Device is gone, I/O has failed. */
#define CAHUTE_ERROR_TIMEOUT_START 0x0104 /* A timeout has occurred on 1st. */
#define CAHUTE_ERROR_TIMEOUT 0x0105 /* A timeout has occurred on next. */
#define CAHUTE_ERROR_CORRUPT 0x0106 /* Corrupted packet. */
#define CAHUTE_ERROR_IRRECOV 0x0107 /* Irrecoverable link */
#define CAHUTE_ERROR_NOOW 0x0201 /* File was not overwritten. */

View File

@ -32,9 +32,46 @@
CAHUTE_BEGIN_NAMESPACE
#define CAHUTE_TEXT_ENCODING_FONTCHARACTER_VARIABLE 1
#define CAHUTE_TEXT_ENCODING_FONTCHARACTER_FIXED 2
#define CAHUTE_TEXT_ENCODING_LEGACY_8 1
#define CAHUTE_TEXT_ENCODING_LEGACY_16_HOST 2
#define CAHUTE_TEXT_ENCODING_LEGACY_16_BE 3
#define CAHUTE_TEXT_ENCODING_LEGACY_16_LE 4
#define CAHUTE_TEXT_ENCODING_9860_8 5
#define CAHUTE_TEXT_ENCODING_9860_16_HOST 6
#define CAHUTE_TEXT_ENCODING_9860_16_BE 7
#define CAHUTE_TEXT_ENCODING_9860_16_LE 8
#define CAHUTE_TEXT_ENCODING_CAT 10
#define CAHUTE_TEXT_ENCODING_CTF 11
#define CAHUTE_TEXT_ENCODING_UTF32_HOST 20
#define CAHUTE_TEXT_ENCODING_UTF32_BE 21
#define CAHUTE_TEXT_ENCODING_UTF32_LE 22
#define CAHUTE_TEXT_ENCODING_UTF8 23
CAHUTE_BEGIN_DECLS
CAHUTE_EXTERN(int)
cahute_convert_text(
void **cahute__bufp,
size_t *cahute__buf_sizep,
void const **cahute__datap,
size_t *cahute__data_sizep,
int cahute__dest_encoding,
int cahute__source_encoding
);
CAHUTE_EXTERN(int)
cahute_convert_to_utf8(
char *cahute__buf,
size_t cahute__buf_size,
void const *cahute__data,
size_t cahute__data_size,
int cahute__encoding
);
CAHUTE_END_DECLS
CAHUTE_END_NAMESPACE
#endif /* CAHUTE_TEXT_H */

2
lib/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
# this file is generated by the build process
chars.c

View File

@ -38,7 +38,7 @@ pz_program_names =
/* TIMEOUT_PACKET_TYPE is the timeout before reading the packet type, i.e.
* the first byte, while TIMEOUT_PACKET_CONTENTS is the timeout before
* reading any of the following bytes. */
#define TIMEOUT_PACKET_CONTENTS 1000
#define TIMEOUT_PACKET_CONTENTS 2000
#define PACKET_TYPE_ACK 0x06
#define PACKET_TYPE_ESTABLISHED 0x13
@ -112,9 +112,9 @@ cahute_casiolink_receive_raw_data(cahute_link *link, unsigned long timeout) {
cahute_u8 *buf = link->data_buffer;
size_t buf_capacity = link->data_buffer_capacity;
size_t buf_size, part_count = 1, part_repeat = 1;
size_t part_sizes[2];
size_t part_sizes[5];
int packet_type, err, variant = 0, checksum, checksum_alt;
int log_part_data = 1, is_end = 0, is_final = 0;
int log_part_data = 1, is_al_end = 0, is_end = 0, is_final = 0;
part_sizes[0] = 0;
@ -265,31 +265,55 @@ cahute_casiolink_receive_raw_data(cahute_link *link, unsigned long timeout) {
* NOTE: These sections can either define:
*
* - 'part_sizes[0]' only, if there's only one part.
* - 'part_sizes[...]' and 'part_count' if there's multiple parts,
* and/or with 'part_repeat' set if the number of repetitions is
* arbitrary.
* - 'part_sizes[...]' and 'part_count' if there's multiple parts.
* - 'part_count' to 0 if there's no data part associated with the
* header. */
* header.
*
* Note that if 'part_repeat' is set, it represents how much times the
* last data size is read. */
switch (variant) {
case CAHUTE_CASIOLINK_VARIANT_CAS40:
if (!memcmp(&buf[1], "\x17\xFF", 2)) {
if (!memcmp(&buf[1], "\x17\x17", 2)) {
/* CAS40 AL End */
part_count = 0;
is_al_end = 1;
} else if (!memcmp(&buf[1], "\x17\xFF", 2)) {
/* CAS40 End */
part_count = 0;
is_end = 1;
} else if (!memcmp(&buf[1], "A1", 2)) {
/* CAS40 Dynamic Graph */
part_sizes[0] = ((buf[4] << 8) | buf[5]);
if (part_sizes[0] > 2)
part_sizes[0] -= 2;
is_final = 1;
} else if (!memcmp(&buf[1], "AA", 2)) {
/* CAS40 Dynamic Graph in Bulk */
part_sizes[0] = ((buf[4] << 8) | buf[5]);
if (part_sizes[0] > 2)
part_sizes[0] -= 2;
} else if (!memcmp(&buf[1], "AD", 2)) {
/* CAS40 All Memories */
part_repeat = (buf[5] << 8) | buf[6];
part_sizes[0] = 22;
is_final = 1;
} else if (!memcmp(&buf[1], "AL", 2)) {
/* CAS40 All */
part_count = 0;
link->flags |= CAHUTE_LINK_FLAG_ALMODE;
} else if (!memcmp(&buf[1], "AM", 2)) {
/* CAS40 Variable Memories */
part_repeat = (buf[5] << 8) | buf[6];
part_sizes[0] = 22;
is_final = 1;
} else if (!memcmp(&buf[1], "BU", 2)) {
/* CAS40 Backup */
if (!memcmp(&buf[3], "TYPEA00", 7))
part_sizes[0] = 32768;
else if (!memcmp(&buf[3], "TYPEA02", 7))
part_sizes[0] = 32768;
is_final = 1;
} else if (!memcmp(&buf[1], "DD", 2)) {
/* CAS40 Monochrome Screenshot. */
int width = buf[3], height = buf[4];
if (!memcmp(&buf[5], "\x10\x44WF", 4))
part_sizes[0] = ((width >> 3) + !!(width & 7)) * height;
log_part_data = 0;
is_final = 1;
} else if (!memcmp(&buf[1], "DC", 2)) {
/* CAS40 Color Screenshot. */
@ -302,6 +326,20 @@ cahute_casiolink_receive_raw_data(cahute_link *link, unsigned long timeout) {
log_part_data = 0;
is_final = 1;
} else if (!memcmp(&buf[1], "DD", 2)) {
/* CAS40 Monochrome Screenshot. */
int width = buf[3], height = buf[4];
if (!memcmp(&buf[5], "\x10\x44WF", 4))
part_sizes[0] = ((width >> 3) + !!(width & 7)) * height;
log_part_data = 0;
is_final = 1;
} else if (!memcmp(&buf[1], "DM", 2)) {
/* CAS40 Defined Memories */
part_repeat = (buf[5] << 8) | buf[6];
part_sizes[0] = 22;
is_final = 1;
} else if (!memcmp(&buf[1], "EN", 2)) {
/* CAS40 Single Editor Program */
part_sizes[0] = ((buf[4] << 8) | buf[5]) - 2;
@ -324,6 +362,29 @@ cahute_casiolink_receive_raw_data(cahute_link *link, unsigned long timeout) {
} else if (!memcmp(&buf[1], "FP", 2)) {
/* CAS40 Single Password Protected Editor Program in Bulk */
part_sizes[0] = ((buf[4] << 8) | buf[5]) - 2;
} else if (!memcmp(&buf[1], "G1", 2)) {
/* CAS40 Graph Function */
part_sizes[0] = ((buf[4] << 8) | buf[5]) - 2;
is_final = 1;
} else if (!memcmp(&buf[1], "GA", 2)) {
/* CAS40 Graph Function in Bulk */
part_sizes[0] = ((buf[4] << 8) | buf[5]) - 2;
} else if (!memcmp(&buf[1], "GF", 2)) {
/* CAS40 Factor */
part_sizes[0] = 2 + buf[6] * 10;
is_final = 1;
} else if (!memcmp(&buf[1], "GR", 2)) {
/* CAS40 Range */
part_sizes[0] = 92;
is_final = 1;
} else if (!memcmp(&buf[1], "GT", 2)) {
/* CAS40 Function Table */
part_count = 3;
part_repeat = (buf[7] << 8) | buf[8];
part_sizes[0] = buf[6] - 2;
part_sizes[1] = 32;
part_sizes[2] = 22;
is_final = 1;
} else if (!memcmp(&buf[1], "M1", 2)) {
/* CAS40 Single Matrix */
int width = buf[5], height = buf[6];
@ -341,12 +402,39 @@ cahute_casiolink_receive_raw_data(cahute_link *link, unsigned long timeout) {
/* CAS40 Single Numbered Program. */
part_sizes[0] = ((buf[4] << 8) | buf[5]) - 2;
is_final = 1;
} else if (!memcmp(&buf[1], "PD", 2)) {
/* CAS40 Polynomial Equation */
part_sizes[0] = buf[6] * 10 + 12;
is_final = 1;
} else if (!memcmp(&buf[1], "PZ", 2)) {
/* CAS40 Multiple Numbered Programs */
part_count = 2;
part_sizes[0] = 190;
part_sizes[1] = ((buf[4] << 8) | buf[5]) - 2;
is_final = 1;
} else if (!memcmp(&buf[1], "RT", 2)) {
/* CAS40 Recursion Table */
part_count = 3;
part_repeat = (buf[7] << 8) | buf[8];
part_sizes[0] = buf[6] - 2;
part_sizes[1] = 22;
part_sizes[2] = 32;
is_final = 1;
} else if (!memcmp(&buf[1], "SD", 2)) {
/* CAS40 Simultaneous Equations */
part_repeat = buf[5] * buf[6] + 1;
part_sizes[0] = 14;
is_final = 1;
} else if (!memcmp(&buf[1], "SR", 2)) {
/* CAS40 Paired Variable Data */
part_repeat = (buf[5] << 8) | buf[6];
part_sizes[0] = 32;
is_final = 1;
} else if (!memcmp(&buf[1], "SS", 2)) {
/* CAS40 Single Variable Data */
part_repeat = (buf[5] << 8) | buf[6];
part_sizes[0] = 22;
is_final = 1;
}
break;
@ -452,7 +540,7 @@ cahute_casiolink_receive_raw_data(cahute_link *link, unsigned long timeout) {
if (part_count) {
cahute_u8 tmp_buf[2];
size_t part_i, part_j, index, total;
size_t part_i, index, total;
/* There is data to be read.
* The method to transfer data here varies depending on the variant:
@ -467,61 +555,64 @@ cahute_casiolink_receive_raw_data(cahute_link *link, unsigned long timeout) {
buf = &buf[buf_size];
index = 1;
total = part_count * part_repeat;
for (part_j = 0; part_j < part_repeat; part_j++) {
for (part_i = 0; part_i < part_count; part_i++, index++) {
size_t part_size = part_sizes[part_i];
total = part_count - 1 + part_repeat;
for (part_i = 0; part_i < total; part_i++, index++) {
size_t part_size =
part_sizes[part_i >= part_count ? part_count - 1 : part_i];
err = cahute_read_from_link(
link,
tmp_buf,
1,
TIMEOUT_PACKET_CONTENTS,
TIMEOUT_PACKET_CONTENTS
);
if (err == CAHUTE_ERROR_TIMEOUT_START)
return CAHUTE_ERROR_TIMEOUT;
if (err)
return err;
msg(ll_info,
"Reading data part %d/%d (%" CAHUTE_PRIuSIZE "o).",
index,
total,
part_size);
if (tmp_buf[0] != PACKET_TYPE_HEADER) {
msg(ll_error,
"Expected 0x3A (':') packet type, got 0x%02X.",
buf[0]);
return CAHUTE_ERROR_UNKNOWN;
err = cahute_read_from_link(
link,
tmp_buf,
1,
TIMEOUT_PACKET_CONTENTS,
TIMEOUT_PACKET_CONTENTS
);
if (err == CAHUTE_ERROR_TIMEOUT_START)
return CAHUTE_ERROR_TIMEOUT;
if (err)
return err;
if (tmp_buf[0] != PACKET_TYPE_HEADER) {
msg(ll_error,
"Expected 0x3A (':') packet type, got 0x%02X.",
buf[0]);
return CAHUTE_ERROR_UNKNOWN;
}
if (part_size) {
size_t part_size_left = part_size;
cahute_u8 *p = buf;
/* Use a loop to be able to follow the transfer progress
* using logs. */
while (part_size_left) {
size_t to_read =
part_size_left > 512 ? 512 : part_size_left;
err = cahute_read_from_link(
link,
p,
to_read,
TIMEOUT_PACKET_CONTENTS,
TIMEOUT_PACKET_CONTENTS
);
if (err == CAHUTE_ERROR_TIMEOUT_START)
return CAHUTE_ERROR_TIMEOUT;
if (err)
return err;
part_size_left -= to_read;
p += to_read;
}
}
msg(ll_info,
"Reading data part %d/%d (%" CAHUTE_PRIuSIZE "o).",
index,
total,
part_size);
err = cahute_read_from_link(
link,
buf,
part_size,
TIMEOUT_PACKET_CONTENTS,
TIMEOUT_PACKET_CONTENTS
);
if (err == CAHUTE_ERROR_TIMEOUT_START)
return CAHUTE_ERROR_TIMEOUT;
if (err)
return err;
/* Read and check the checksum. */
err = cahute_read_from_link(
link,
tmp_buf + 1,
1,
TIMEOUT_PACKET_CONTENTS,
TIMEOUT_PACKET_CONTENTS
);
if (err == CAHUTE_ERROR_TIMEOUT_START)
return CAHUTE_ERROR_TIMEOUT;
if (err)
return err;
if (part_size) {
/* For color screenshots, sometimes the first byte is not
* taken into account in the checksum calculation, as it's
* metadata for the sheet and not the "actual data" of the
@ -531,47 +622,62 @@ cahute_casiolink_receive_raw_data(cahute_link *link, unsigned long timeout) {
checksum = cahute_casiolink_checksum(buf, part_size);
checksum_alt =
cahute_casiolink_checksum(buf + 1, part_size - 1);
if (checksum != tmp_buf[1] && checksum_alt != tmp_buf[1]) {
cahute_u8 const send_buf[] = {PACKET_TYPE_INVALID_DATA
};
msg(ll_warn,
"Invalid checksum (expected: 0x%02X, computed: "
"0x%02X).",
tmp_buf[1],
checksum);
mem(ll_info, buf, part_size);
msg(ll_error, "Transfer will abort.");
link->flags |= CAHUTE_LINK_FLAG_IRRECOVERABLE;
err = cahute_write_to_link(link, send_buf, 1);
if (err)
return err;
return CAHUTE_ERROR_CORRUPT;
}
/* Acknowledge the data. */
{
cahute_u8 const send_buf[] = {PACKET_TYPE_ACK};
err = cahute_write_to_link(link, send_buf, 1);
if (err)
return err;
}
msg(ll_info,
"Data part %d/%d received and acknowledged.",
index,
total);
if (log_part_data)
mem(ll_info, buf, part_size);
buf += part_size;
buf_size += part_size;
} else {
checksum = 0;
checksum_alt = 0;
}
/* Read and check the checksum. */
err = cahute_read_from_link(
link,
tmp_buf + 1,
1,
TIMEOUT_PACKET_CONTENTS,
TIMEOUT_PACKET_CONTENTS
);
if (err == CAHUTE_ERROR_TIMEOUT_START)
return CAHUTE_ERROR_TIMEOUT;
if (err)
return err;
if (checksum != tmp_buf[1] && checksum_alt != tmp_buf[1]) {
cahute_u8 const send_buf[] = {PACKET_TYPE_INVALID_DATA};
msg(ll_warn,
"Invalid checksum (expected: 0x%02X, computed: "
"0x%02X).",
tmp_buf[1],
checksum);
mem(ll_info, buf, part_size);
msg(ll_error, "Transfer will abort.");
link->flags |= CAHUTE_LINK_FLAG_IRRECOVERABLE;
err = cahute_write_to_link(link, send_buf, 1);
if (err)
return err;
return CAHUTE_ERROR_CORRUPT;
}
/* Acknowledge the data. */
{
cahute_u8 const send_buf[] = {PACKET_TYPE_ACK};
err = cahute_write_to_link(link, send_buf, 1);
if (err)
return err;
}
msg(ll_info,
"Data part %d/%d received and acknowledged.",
index,
total);
if (log_part_data)
mem(ll_info, buf, part_size);
buf += part_size;
buf_size += part_size;
}
break;
@ -586,14 +692,14 @@ cahute_casiolink_receive_raw_data(cahute_link *link, unsigned long timeout) {
link->protocol_state.casiolink.last_variant = variant;
link->data_buffer_size = buf_size;
if (is_end) {
if (is_al_end || (is_end && (~link->flags & CAHUTE_LINK_FLAG_ALMODE))) {
/* The packet was an end packet. */
link->flags |= CAHUTE_LINK_FLAG_TERMINATED;
msg(ll_info, "Received data was a sentinel!");
return CAHUTE_ERROR_TERMINATED;
}
if (is_final) {
if (is_final && (~link->flags & CAHUTE_LINK_FLAG_ALMODE)) {
/* The packet was a final one in the communication. */
link->flags |= CAHUTE_LINK_FLAG_TERMINATED;
msg(ll_info, "Received data was final!");
@ -750,7 +856,7 @@ cahute_casiolink_receive_data(
if (!memcmp(&buf[1], "P1", 2))
return cahute_create_program(
datap,
CAHUTE_TEXT_ENCODING_FONTCHARACTER_VARIABLE,
CAHUTE_TEXT_ENCODING_LEGACY_8,
NULL, /* No program name, this is anonymous. */
0,
NULL, /* No password. */
@ -774,7 +880,7 @@ cahute_casiolink_receive_data(
err = cahute_create_program(
datap,
CAHUTE_TEXT_ENCODING_FONTCHARACTER_VARIABLE,
CAHUTE_TEXT_ENCODING_LEGACY_8,
names++,
1,
NULL, /* No password. */
@ -816,7 +922,7 @@ cahute_casiolink_receive_data(
if (!memcmp(&buf[5], "PG", 2))
return cahute_create_program(
datap,
CAHUTE_TEXT_ENCODING_FONTCHARACTER_VARIABLE,
CAHUTE_TEXT_ENCODING_LEGACY_8,
&buf[11],
name_size,
&buf[27],

141
lib/chars.h Normal file
View File

@ -0,0 +1,141 @@
/* ****************************************************************************
* Copyright (C) 2024 Thomas Touhey <thomas@touhey.fr>
*
* This software is governed by the CeCILL 2.1 license under French law and
* abiding by the rules of distribution of free software. You can use, modify
* and/or redistribute the software under the terms of the CeCILL 2.1 license
* as circulated by CEA, CNRS and INRIA at the following
* URL: https://cecill.info
*
* As a counterpart to the access to the source code and rights to copy, modify
* and redistribute granted by the license, users are provided only with a
* limited warranty and the software's author, the holder of the economic
* rights, and the successive licensors have only limited liability.
*
* In this respect, the user's attention is drawn to the risks associated with
* loading, using, modifying and/or developing or reproducing the software by
* the user in light of its specific status of free software, that may mean
* that it is complicated to manipulate, and that also therefore means that it
* is reserved for developers and experienced professionals having in-depth
* computer knowledge. Users are therefore encouraged to load and test the
* software's suitability as regards their requirements in conditions enabling
* the security of their systems and/or data to be ensured and, more generally,
* to use and operate it in the same conditions as regards security.
*
* The fact that you are presently reading this means that you have had
* knowledge of the CeCILL 2.1 license and that you accept its terms.
* ************************************************************************* */
#ifndef INTERNAL_CHARS_H
#define INTERNAL_CHARS_H 1
#include <cahute/cdefs.h>
/**
* FONTCHARACTER entry.
*
* @property code_legacy Code in the legacy table (0 if undefined).
* @property code_9860 Code in the fx-9860G table (0 if undefined).
* @property unicode Unicode sequence corresponding to the character.
* @property unicode_len Length of the Unicode sequence.
* @property cat CAT sequence for the character.
* @property cat_len CAT sequence length.
* @property opcode Characters, if the character is a multi-sequence.
* @property opcode_len Multi-character sequence length.
*/
struct cahute_char_entry {
unsigned int code_legacy;
unsigned int code_9860;
cahute_u32 const *unicode;
char const *cat;
cahute_u16 const *opcode;
size_t unicode_len;
size_t cat_len;
size_t opcode_len;
};
/**
* Byte parsing tree.
*
* This tree contains the nodes to match from this point on, and the
* character entry to use if no nodes were matched.
*
* @property matches Nodes to match to update the tree.
* @property entry Character entry to fall back to.
*/
struct cahute_byte_parsing_tree {
struct cahute_byte_match const *matches;
struct cahute_char_entry const *entry;
};
/**
* Byte matching node for a parsing tree.
*
* This node contains the sequence to match to go to the matching tree.
*
* @property next Next node if no match was found.
* @property subtree Tree to use if the sequence matches.
* @property sequence Sequence to match.
* @property sequence_len Size of the sequence to match.
*/
struct cahute_byte_match {
struct cahute_byte_match const *next;
struct cahute_byte_parsing_tree const *subtree;
cahute_u8 const *sequence;
size_t sequence_len;
};
/**
* 32-bit integer parsing tree.
*
* Equivalent of ``cahute_byte_match`` for 32-bit integer sequences.
*
* @property matches Nodes to match to update the tree.
* @property entry Character entry to fall back to.
*/
struct cahute_u32_parsing_tree {
struct cahute_u32_match const *matches;
struct cahute_char_entry const *entry;
};
/**
* 32-bit matching node for a parsing tree.
*
* Equivalent of ``cahute_byte_match`` for 32-bit integer sequences.
*
* @property next Next node if no match was found.
* @property subtree Tree to use if the sequence matches.
* @property sequence Sequence to match.
* @property sequence_len Size of the sequence to match.
*/
struct cahute_u32_match {
struct cahute_u32_match const *next;
struct cahute_u32_parsing_tree const *subtree;
cahute_u32 const *sequence;
size_t sequence_len;
};
/* ---
* Definitions made in chars.c
* --- */
extern struct cahute_char_entry const *cahute_chars_legacy_00[];
extern struct cahute_char_entry const *cahute_chars_legacy_7F[];
extern struct cahute_char_entry const *cahute_chars_legacy_F7[];
extern struct cahute_char_entry const *cahute_chars_9860_00[];
extern struct cahute_char_entry const *cahute_chars_9860_7F[];
extern struct cahute_char_entry const *cahute_chars_9860_E5[];
extern struct cahute_char_entry const *cahute_chars_9860_E6[];
extern struct cahute_char_entry const *cahute_chars_9860_E7[];
extern struct cahute_char_entry const *cahute_chars_9860_F7[];
extern struct cahute_char_entry const *cahute_chars_9860_F9[];
extern struct cahute_u32_parsing_tree const cahute_unicode_legacy_parsing_tree;
extern struct cahute_u32_parsing_tree const cahute_unicode_9860_parsing_tree;
extern struct cahute_byte_parsing_tree const cahute_cat_legacy_parsing_tree;
extern struct cahute_byte_parsing_tree const cahute_cat_9860_parsing_tree;
#endif /* INTERNAL_CHARS_H */

View File

@ -182,6 +182,7 @@ cahute__log_win_error(
#define CAHUTE_LINK_FLAG_GONE 0x00000100 /* Underlying medium gone. */
#define CAHUTE_LINK_FLAG_TERMINATED 0x00000200 /* Was terminated! */
#define CAHUTE_LINK_FLAG_IRRECOVERABLE 0x00000400 /* Cannot recover. */
#define CAHUTE_LINK_FLAG_ALMODE 0x00000800 /* CAS40 AL data received. */
/* Medium types allowed. */
#if POSIX_ENABLED

View File

@ -67,7 +67,7 @@ cahute_mcs_decode_data(
return cahute_create_program(
datap,
CAHUTE_TEXT_ENCODING_FONTCHARACTER_VARIABLE,
CAHUTE_TEXT_ENCODING_9860_8,
name,
name_size,
content,

View File

@ -50,16 +50,20 @@ CAHUTE_EXTERN(char const *) cahute_get_error_name(int code) {
return "CAHUTE_ERROR_INT";
case CAHUTE_ERROR_SIZE:
return "CAHUTE_ERROR_SIZE";
case CAHUTE_ERROR_TRUNC:
return "CAHUTE_ERROR_TRUNC";
case CAHUTE_ERROR_INVALID:
return "CAHUTE_ERROR_INVALID";
case CAHUTE_ERROR_INCOMPAT:
return "CAHUTE_ERROR_INCOMPAT";
case CAHUTE_ERROR_TERMINATED:
return "CAHUTE_ERROR_TERMINATED";
case CAHUTE_ERROR_NOT_FOUND:
return "CAHUTE_ERROR_NOT_FOUND";
case CAHUTE_ERROR_TOO_MANY:
return "CAHUTE_ERROR_TOO_MANY";
case CAHUTE_ERROR_INCOMPAT:
return "CAHUTE_ERROR_INCOMPAT";
case CAHUTE_ERROR_GONE:
return "CAHUTE_ERROR_GONE";
case CAHUTE_ERROR_TERMINATED:
return "CAHUTE_ERROR_TERMINATED";
case CAHUTE_ERROR_TIMEOUT_START:
return "CAHUTE_ERROR_TIMEOUT_START";
case CAHUTE_ERROR_TIMEOUT:

View File

@ -2164,10 +2164,10 @@ cahute_seven_receive_data(
);
if (err) {
/* We have successfully negociated with the device to switch
* serial settings but have not managed to change settings
* ourselves. We can no longer communicate with the device,
* hence can no longer negotiate the serial settings back.
* Therefore, we consider the link to be irrecoverable. */
* serial settings but have not managed to change settings
* ourselves. We can no longer communicate with the device,
* hence can no longer negotiate the serial settings back.
* Therefore, we consider the link to be irrecoverable. */
msg(ll_error,
"Could not set the serial params; that makes our "
"connection "

1153
lib/text.c Normal file

File diff suppressed because it is too large Load Diff