From 048490485acc97f8e3f2749e67392d9219bb9f34 Mon Sep 17 00:00:00 2001 From: Takashi Yano Date: Mon, 25 Jun 2018 13:34:47 +0900 Subject: [PATCH] Fix Unicode table. * (mkcategories): Fix a bug that outputs incorrect Unicode category table for code point ranges. * (categories.t): Rebuild it using the bug-fixed mkcategories. This fixes the problem reported in the following post. https://cygwin.com/ml/cygwin/2018-06/msg00248.html --- newlib/libc/ctype/categories.t | 32 ++++++++++---------------------- newlib/libc/ctype/mkcategories | 10 ++++++---- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/newlib/libc/ctype/categories.t b/newlib/libc/ctype/categories.t index dd5cf7d08..670457713 100644 --- a/newlib/libc/ctype/categories.t +++ b/newlib/libc/ctype/categories.t @@ -1217,11 +1217,9 @@ {CAT_No, 0x32B1, 14}, {CAT_So, 0x32C0, 62}, {CAT_So, 0x3300, 255}, - {CAT_Lo, 0x3400, 0}, - {CAT_Lo, 0x4DB5, 0}, + {CAT_Lo, 0x3400, 6581}, {CAT_So, 0x4DC0, 63}, - {CAT_Lo, 0x4E00, 0}, - {CAT_Lo, 0x9FEA, 0}, + {CAT_Lo, 0x4E00, 20970}, {CAT_Lo, 0xA000, 20}, {CAT_Lm, 0xA015, 0}, {CAT_Lo, 0xA016, 1142}, @@ -1394,14 +1392,10 @@ {CAT_Mc, 0xABEC, 0}, {CAT_Mn, 0xABED, 0}, {CAT_Nd, 0xABF0, 9}, - {CAT_Lo, 0xAC00, 0}, - {CAT_Lo, 0xD7A3, 0}, + {CAT_Lo, 0xAC00, 11171}, {CAT_Lo, 0xD7B0, 22}, {CAT_Lo, 0xD7CB, 48}, - {CAT_Cs, 0xD800, 0}, - {CAT_Cs, 0xDB7F, 1}, - {CAT_Cs, 0xDBFF, 1}, - {CAT_Cs, 0xDFFF, 0}, + {CAT_Cs, 0xD800, 2047}, {CAT_Lo, 0xF900, 365}, {CAT_Lo, 0xFA70, 105}, {CAT_Ll, 0xFB00, 6}, @@ -1876,8 +1870,7 @@ {CAT_Mn, 0x16F8F, 3}, {CAT_Lm, 0x16F93, 12}, {CAT_Lm, 0x16FE0, 1}, - {CAT_Lo, 0x17000, 0}, - {CAT_Lo, 0x187EC, 0}, + {CAT_Lo, 0x17000, 6124}, {CAT_Lo, 0x18800, 754}, {CAT_Lo, 0x1B000, 286}, {CAT_Lo, 0x1B170, 395}, @@ -2070,16 +2063,11 @@ {CAT_So, 0x1F980, 23}, {CAT_So, 0x1F9C0, 0}, {CAT_So, 0x1F9D0, 22}, - {CAT_Lo, 0x20000, 0}, - {CAT_Lo, 0x2A6D6, 0}, - {CAT_Lo, 0x2A700, 0}, - {CAT_Lo, 0x2B734, 0}, - {CAT_Lo, 0x2B740, 0}, - {CAT_Lo, 0x2B81D, 0}, - {CAT_Lo, 0x2B820, 0}, - {CAT_Lo, 0x2CEA1, 0}, - {CAT_Lo, 0x2CEB0, 0}, - {CAT_Lo, 0x2EBE0, 0}, + {CAT_Lo, 0x20000, 42710}, + {CAT_Lo, 0x2A700, 4148}, + {CAT_Lo, 0x2B740, 221}, + {CAT_Lo, 0x2B820, 5761}, + {CAT_Lo, 0x2CEB0, 7472}, {CAT_Lo, 0x2F800, 541}, {CAT_Cf, 0xE0001, 0}, {CAT_Cf, 0xE0020, 95}, diff --git a/newlib/libc/ctype/mkcategories b/newlib/libc/ctype/mkcategories index 24dd93ad0..22c7d4d5a 100755 --- a/newlib/libc/ctype/mkcategories +++ b/newlib/libc/ctype/mkcategories @@ -24,14 +24,15 @@ cat <<\/EOS first= item () { if [ -n "$first" ] - then if [ $(( 0x$1 )) -ne $(( 0x${last-0} + 1 )) ] + then if [ "$2" != "isRangeLast" \ + -a $(( 0x$1 )) -ne $(( 0x${last-0} + 1 )) ] then range fi fi if [ -z "$first" ] then first=$1 - val=$2 + val=$3 fi last=$1 @@ -60,8 +61,9 @@ then -e '/;Co;/ d' else cat fi | -sed -e "s,^\([^;]*\);[^;]*;\([^;]*\);.*,\1 \2," | -uniq -f1 --group=append | sed -e "s,^$,range," -e t -e "s,^,item ," +sed -e "s,^\([^;]*\);<[^;]*\, Last>;\([^;]*\);.*,\1 isRangeLast \2," \ + -e "s,^\([^;]*\);[^;]*;\([^;]*\);.*,\1 isNormalOrRangeFirst \2," | +uniq -f2 --group=append | sed -e "s,^$,range," -e t -e "s,^,item ," ) | sh > categories.t sed -e "s/.*\(CAT_[A-Za-z]*\).*/ \1,/" categories.t |