From 291eea1bfaa364fcfb920fecec550ce22541bea9 Mon Sep 17 00:00:00 2001 From: Babz Date: Sat, 18 Sep 2021 21:44:06 +0200 Subject: [PATCH] fix string padding with variable-length chars (utf-8) --- CMakeLists.txt | 1 + src/term.c | 14 +++----------- src/ui.c | 22 ++++++++++++---------- src/utf8.c | 28 ++++++++++++++++++++++++++++ src/utf8.h | 7 +++++++ 5 files changed, 51 insertions(+), 21 deletions(-) create mode 100644 src/utf8.c create mode 100644 src/utf8.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d6a406..c8f54d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ set(SOURCES src/syscalls.S src/job.c src/wren_utils.c + src/utf8.c ) set(SOURCES_WREN wren/src/wren_compiler.c diff --git a/src/term.c b/src/term.c index 53b2c54..c5a5c57 100644 --- a/src/term.c +++ b/src/term.c @@ -8,6 +8,8 @@ #include #include +#include "utf8.h" + extern font_t uf5x7; struct tcell { @@ -36,17 +38,7 @@ static int tgrid_sets(int *row, int *col, int fg, int bg, const char *s) { if (*col >= UNS_TERM_COLS) return i; - // detect utf-8 char length - unsigned char lb = s[i]; - int charlen = 0; - if ((lb & 0x80) == 0) // lead bit is zero, must be a single ascii - charlen = 1; - else if ((lb & 0xE0) == 0xC0) // 110x xxxx - charlen = 2; - else if ((lb & 0xF0) == 0xE0) // 1110 xxxx - charlen = 3; - else if ((lb & 0xF8) == 0xF0) // 1111 0xxx - charlen = 4; + int charlen = charlen_utf8(s[i]); char unichar[4]; int j; diff --git a/src/ui.c b/src/ui.c index 7357fbf..6c5cef2 100644 --- a/src/ui.c +++ b/src/ui.c @@ -1,13 +1,15 @@ #include "ui.h" -#include "term.h" +#include +#include + #include #include #include #include -#include -#include +#include "term.h" +#include "utf8.h" static void date_str(char *buf) { rtc_time_t t; @@ -17,24 +19,24 @@ static void date_str(char *buf) { } static void str_pad(char *buf, const char *prefix, const char *suffix, int wanted_len) { - const int l1 = strlen(prefix); - const int l2 = strlen(suffix); + const size_t l1 = strlen(prefix); + const size_t l2 = strlen(suffix); - const int toadd = wanted_len - (l1 + l2); + const size_t toadd = wanted_len - (strlen_utf8(prefix) + strlen_utf8(suffix)); - for (int i = 0; i < l1; i++) { + for (size_t i = 0; i < l1; i++) { buf[i] = prefix[i]; } - for (int i = 0; i < toadd; i++) { + for (size_t i = 0; i < toadd; i++) { buf[l1 + i] = ' '; } - for (int i = 0; i < l2; i++) { + for (size_t i = 0; i < l2; i++) { buf[l1 + toadd + i] = suffix[i]; } - buf[wanted_len] = '\0'; + buf[l1 + l2 + toadd] = '\0'; } void set_statusbar(int tick_ctr, int shift_state, int alpha_state, int battery) { diff --git a/src/utf8.c b/src/utf8.c new file mode 100644 index 0000000..cf07e00 --- /dev/null +++ b/src/utf8.c @@ -0,0 +1,28 @@ +#include "utf8.h" + +int charlen_utf8(char c) { + int charlen = 0; + + if ((c & 0x80) == 0) // lead bit is zero, must be a single ascii + charlen = 1; + else if ((c & 0xE0) == 0xC0) // 110x xxxx + charlen = 2; + else if ((c & 0xF0) == 0xE0) // 1110 xxxx + charlen = 3; + else if ((c & 0xF8) == 0xF0) // 1111 0xxx + charlen = 4; + + return charlen; +} + +int strlen_utf8(const char *str) { + int utf8_len = 0; + int i = 0; + + while (str[i] != '\0') { + i += charlen_utf8(str[i]); + utf8_len++; + } + + return utf8_len; +} \ No newline at end of file diff --git a/src/utf8.h b/src/utf8.h new file mode 100644 index 0000000..46acea4 --- /dev/null +++ b/src/utf8.h @@ -0,0 +1,7 @@ +#ifndef UNS_UTF8_H +#define UNS_UTF8_H + +int charlen_utf8(char c); +int strlen_utf8(const char *str); + +#endif // #ifndef UNS_UTF8_H \ No newline at end of file