use generated character data

The tow* functions use an included case conversion table which can be
generated from Unicode data.
The isw* functions use a character categories table (provided by
categories.c) which can be generated from Unicode data.
Delegation between current-locale and specific-locale-dependent functions
was reverted towards the generic locale-dependent functions (*_l.c);
this is however only relevant on systems with non-Unicode wide character
locales, thus not on Cygwin.
This commit is contained in:
Thomas Wolff 2018-03-09 13:30:33 +01:00 committed by Corinna Vinschen
parent 3ccfb407af
commit 41f72ab4d7
38 changed files with 441 additions and 2683 deletions

View File

@ -24,6 +24,7 @@ if ELIX_LEVEL_1
ELIX_SOURCES =
else
ELIX_SOURCES = \
categories.c \
isalnum_l.c \
isalpha_l.c \
isascii.c \

View File

@ -79,7 +79,8 @@ am__objects_1 = lib_a-ctype_.$(OBJEXT) lib_a-isalnum.$(OBJEXT) \
lib_a-ispunct.$(OBJEXT) lib_a-isspace.$(OBJEXT) \
lib_a-isxdigit.$(OBJEXT) lib_a-tolower.$(OBJEXT) \
lib_a-toupper.$(OBJEXT)
@ELIX_LEVEL_1_FALSE@am__objects_2 = lib_a-isalnum_l.$(OBJEXT) \
@ELIX_LEVEL_1_FALSE@am__objects_2 = lib_a-categories.$(OBJEXT) \
@ELIX_LEVEL_1_FALSE@ lib_a-isalnum_l.$(OBJEXT) \
@ELIX_LEVEL_1_FALSE@ lib_a-isalpha_l.$(OBJEXT) \
@ELIX_LEVEL_1_FALSE@ lib_a-isascii.$(OBJEXT) \
@ELIX_LEVEL_1_FALSE@ lib_a-isascii_l.$(OBJEXT) \
@ -142,7 +143,7 @@ libctype_la_LIBADD =
am__objects_3 = ctype_.lo isalnum.lo isalpha.lo iscntrl.lo isdigit.lo \
islower.lo isupper.lo isprint.lo ispunct.lo isspace.lo \
isxdigit.lo tolower.lo toupper.lo
@ELIX_LEVEL_1_FALSE@am__objects_4 = isalnum_l.lo isalpha_l.lo \
@ELIX_LEVEL_1_FALSE@am__objects_4 = categories.lo isalnum_l.lo isalpha_l.lo \
@ELIX_LEVEL_1_FALSE@ isascii.lo isascii_l.lo isblank.lo \
@ELIX_LEVEL_1_FALSE@ isblank_l.lo iscntrl_l.lo isdigit_l.lo \
@ELIX_LEVEL_1_FALSE@ islower_l.lo isupper_l.lo isprint_l.lo \
@ -351,6 +352,7 @@ GENERAL_SOURCES = \
toupper.c
@ELIX_LEVEL_1_FALSE@ELIX_SOURCES = \
@ELIX_LEVEL_1_FALSE@ categories.c \
@ELIX_LEVEL_1_FALSE@ isalnum_l.c \
@ELIX_LEVEL_1_FALSE@ isalpha_l.c \
@ELIX_LEVEL_1_FALSE@ isascii.c \
@ -609,6 +611,12 @@ lib_a-toupper.o: toupper.c
lib_a-toupper.obj: toupper.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-toupper.obj `if test -f 'toupper.c'; then $(CYGPATH_W) 'toupper.c'; else $(CYGPATH_W) '$(srcdir)/toupper.c'; fi`
lib_a-categories.o: categories.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-categories.o `test -f 'categories.c' || echo '$(srcdir)/'`categories.c
lib_a-categories.obj: categories.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-categories.obj `if test -f 'categories.c'; then $(CYGPATH_W) 'categories.c'; else $(CYGPATH_W) '$(srcdir)/categories.c'; fi`
lib_a-isalnum_l.o: isalnum_l.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-isalnum_l.o `test -f 'isalnum_l.c' || echo '$(srcdir)/'`isalnum_l.c

View File

@ -0,0 +1,39 @@
#include <wctype.h>
#include "categories.h"
struct _category {
enum category cat: 11;
unsigned int first: 21;
unsigned short delta;
} __attribute__((packed));
static const struct _category categories[] = {
#include "categories.t"
};
static enum category
bisearch_cat(wint_t ucs, const struct _category *table, int max)
{
int min = 0;
int mid;
if (ucs < table[0].first || ucs > table[max].first + table[max].delta)
return 0;
while (max >= min)
{
mid = (min + max) / 2;
if (ucs > table[mid].first + table[mid].delta)
min = mid + 1;
else if (ucs < table[mid].first)
max = mid - 1;
else
return table[mid].cat;
}
return -1;
}
enum category category(wint_t ucs)
{
return bisearch_cat(ucs, categories,
sizeof(categories) / sizeof(*categories) - 1);
}

View File

@ -0,0 +1,7 @@
/* category data */
enum category {
#include "categories.cat"
};
extern enum category category(wint_t ucs);

View File

@ -39,5 +39,5 @@ No supporting OS subroutines are required.
int
iswalnum (wint_t c)
{
return (iswalpha (c) || iswdigit (c));
return iswalnum_l (c, 0);
}

View File

@ -1,10 +1,23 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <ctype.h>
#include <wctype.h>
#include "local.h"
#include "categories.h"
int
iswalnum_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return iswalpha (c) || iswdigit (c);
#ifdef _MB_CAPABLE
//return iswalpha (c) || iswdigit (c);
c = _jp2uc_l (c, locale);
enum category cat = category (c);
return cat == CAT_LC || cat == CAT_Lu || cat == CAT_Ll || cat == CAT_Lt
|| cat == CAT_Lm || cat == CAT_Lo
|| cat == CAT_Nl // Letter_Number
|| cat == CAT_Nd // Decimal_Number
;
#else
return c < (wint_t)0x100 ? isalnum (c) : 0;
#endif /* _MB_CAPABLE */
}

View File

@ -1,5 +1,6 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -63,377 +64,10 @@ PORTABILITY
No supporting OS subroutines are required.
*/
#include <_ansi.h>
#include <newlib.h>
#include <wctype.h>
#include <string.h>
#include <ctype.h>
#include "local.h"
#ifdef _MB_CAPABLE
#include "utf8alpha.h"
#endif /* _MB_CAPABLE */
int
iswalpha (wint_t c)
{
#ifdef _MB_CAPABLE
unsigned const char *table;
unsigned char *ptr;
unsigned char ctmp;
int size;
wint_t x;
c = _jp2uc (c);
/* Based on and tested against Unicode 5.2
See utf8alpha.h for a description how to fetch the data. */
x = (c >> 8);
/* for some large sections, all characters are alphabetic so handle them here */
if ((x >= 0x34 && x <= 0x4c) ||
(x >= 0x4e && x <= 0x9e) ||
(x >= 0xac && x <= 0xd6) ||
(x >= 0x120 && x <= 0x122) ||
(x >= 0x130 && x <= 0x133) ||
(x >= 0x200 && x <= 0x2a5) ||
(x >= 0x2a7 && x <= 0x2b6))
return 1;
switch (x)
{
case 0x00:
table = u0;
size = sizeof(u0);
break;
case 0x01:
case 0x11:
case 0x15:
case 0x1e:
case 0xa0:
case 0xa1:
case 0xa2:
case 0xa3:
case 0xa5:
case 0xf9:
case 0xfc:
case 0x2f8:
case 0x2f9:
return 1;
case 0x02:
table = u2;
size = sizeof(u2);
break;
case 0x03:
table = u3;
size = sizeof(u3);
break;
case 0x04:
table = u4;
size = sizeof(u4);
break;
case 0x05:
table = u5;
size = sizeof(u5);
break;
case 0x06:
table = u6;
size = sizeof(u6);
break;
case 0x07:
table = u7;
size = sizeof(u7);
break;
case 0x08:
table = u8;
size = sizeof(u8);
break;
case 0x09:
table = u9;
size = sizeof(u9);
break;
case 0x0a:
table = ua;
size = sizeof(ua);
break;
case 0x0b:
table = ub;
size = sizeof(ub);
break;
case 0x0c:
table = uc;
size = sizeof(uc);
break;
case 0x0d:
table = ud;
size = sizeof(ud);
break;
case 0x0e:
table = ue;
size = sizeof(ue);
break;
case 0x0f:
table = uf;
size = sizeof(uf);
break;
case 0x10:
table = u10;
size = sizeof(u10);
break;
case 0x12:
table = u12;
size = sizeof(u12);
break;
case 0x13:
table = u13;
size = sizeof(u13);
break;
case 0x14:
table = u14;
size = sizeof(u14);
break;
case 0x16:
table = u16;
size = sizeof(u16);
break;
case 0x17:
table = u17;
size = sizeof(u17);
break;
case 0x18:
table = u18;
size = sizeof(u18);
break;
case 0x19:
table = u19;
size = sizeof(u19);
break;
case 0x1a:
table = u1a;
size = sizeof(u1a);
break;
case 0x1b:
table = u1b;
size = sizeof(u1b);
break;
case 0x1c:
table = u1c;
size = sizeof(u1c);
break;
case 0x1d:
table = u1d;
size = sizeof(u1d);
break;
case 0x1f:
table = u1f;
size = sizeof(u1f);
break;
case 0x20:
table = u20;
size = sizeof(u20);
break;
case 0x21:
table = u21;
size = sizeof(u21);
break;
case 0x24:
table = u24;
size = sizeof(u24);
break;
case 0x2c:
table = u2c;
size = sizeof(u2c);
break;
case 0x2d:
table = u2d;
size = sizeof(u2d);
break;
case 0x2e:
table = u2e;
size = sizeof(u2e);
break;
case 0x30:
table = u30;
size = sizeof(u30);
break;
case 0x31:
table = u31;
size = sizeof(u31);
break;
case 0x4d:
table = u4d;
size = sizeof(u4d);
break;
case 0x9f:
table = u9f;
size = sizeof(u9f);
break;
case 0xa4:
table = ua4;
size = sizeof(ua4);
break;
case 0xa6:
table = ua6;
size = sizeof(ua6);
break;
case 0xa7:
table = ua7;
size = sizeof(ua7);
break;
case 0xa8:
table = ua8;
size = sizeof(ua8);
break;
case 0xa9:
table = ua9;
size = sizeof(ua9);
break;
case 0xaa:
table = uaa;
size = sizeof(uaa);
break;
case 0xab:
table = uab;
size = sizeof(uab);
break;
case 0xd7:
table = ud7;
size = sizeof(ud7);
break;
case 0xfa:
table = ufa;
size = sizeof(ufa);
break;
case 0xfb:
table = ufb;
size = sizeof(ufb);
break;
case 0xfd:
table = ufd;
size = sizeof(ufd);
break;
case 0xfe:
table = ufe;
size = sizeof(ufe);
break;
case 0xff:
table = uff;
size = sizeof(uff);
break;
case 0x100:
table = u100;
size = sizeof(u100);
break;
case 0x101:
table = u101;
size = sizeof(u101);
break;
case 0x102:
table = u102;
size = sizeof(u102);
break;
case 0x103:
table = u103;
size = sizeof(u103);
break;
case 0x104:
table = u104;
size = sizeof(u104);
break;
case 0x108:
table = u108;
size = sizeof(u108);
break;
case 0x109:
table = u109;
size = sizeof(u109);
break;
case 0x10a:
table = u10a;
size = sizeof(u10a);
break;
case 0x10b:
table = u10b;
size = sizeof(u10b);
break;
case 0x10c:
table = u10c;
size = sizeof(u10c);
break;
case 0x110:
table = u110;
size = sizeof(u110);
break;
case 0x123:
table = u123;
size = sizeof(u123);
break;
case 0x124:
table = u124;
size = sizeof(u124);
break;
case 0x134:
table = u134;
size = sizeof(u134);
break;
case 0x1d4:
table = u1d4;
size = sizeof(u1d4);
break;
case 0x1d5:
table = u1d5;
size = sizeof(u1d5);
break;
case 0x1d6:
table = u1d6;
size = sizeof(u1d6);
break;
case 0x1d7:
table = u1d7;
size = sizeof(u1d7);
break;
case 0x1f1:
table = u1f1;
size = sizeof(u1f1);
break;
case 0x2a6:
table = u2a6;
size = sizeof(u2a6);
break;
case 0x2b7:
table = u2b7;
size = sizeof(u2b7);
break;
case 0x2fa:
table = u2fa;
size = sizeof(u2fa);
break;
default:
return 0;
}
/* we have narrowed down to a section of 256 characters to check */
/* now check if c matches the alphabetic wide-chars within that section */
ptr = (unsigned char *)table;
ctmp = (unsigned char)c;
while (ptr < table + size)
{
if (ctmp == *ptr)
return 1;
if (ctmp < *ptr)
return 0;
/* otherwise c > *ptr */
/* look for 0x0 as next element which indicates a range */
++ptr;
if (ptr < table + size - 1 && *ptr == 0x0)
{
/* we have a range..see if c falls within range */
++ptr;
if (ctmp <= *ptr)
return 1;
++ptr;
}
}
/* not in table */
return 0;
#else
return (c < (wint_t)0x100 ? isalpha (c) : 0);
#endif /* _MB_CAPABLE */
return iswalpha_l (c, 0);
}

View File

@ -1,10 +1,21 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <ctype.h>
#include <wctype.h>
#include "local.h"
#include "categories.h"
int
iswalpha_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return iswalpha (c);
#ifdef _MB_CAPABLE
c = _jp2uc_l (c, locale);
enum category cat = category (c);
return cat == CAT_LC || cat == CAT_Lu || cat == CAT_Ll || cat == CAT_Lt
|| cat == CAT_Lm || cat == CAT_Lo
|| cat == CAT_Nl // Letter_Number
;
#else
return c < (wint_t)0x100 ? isalpha (c) : 0;
#endif /* _MB_CAPABLE */
}

View File

@ -1,5 +1,6 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -62,26 +63,10 @@ PORTABILITY
No supporting OS subroutines are required.
*/
#include <_ansi.h>
#include <newlib.h>
#include <wctype.h>
#include <ctype.h>
#include <string.h>
#include "local.h"
int
iswblank (wint_t c)
{
#ifdef _MB_CAPABLE
c = _jp2uc (c);
/* Based on Unicode 5.2. Control char 09, plus all characters
from general category "Zs", which are not marked as decomposition
type "noBreak". */
return (c == 0x0009 || c == 0x0020 ||
c == 0x1680 || c == 0x180e ||
(c >= 0x2000 && c <= 0x2006) ||
(c >= 0x2008 && c <= 0x200a) ||
c == 0x205f || c == 0x3000);
#else
return (c < 0x100 ? isblank (c) : 0);
#endif /* _MB_CAPABLE */
return iswblank_l (c, 0);
}

View File

@ -1,10 +1,20 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <ctype.h>
#include <wctype.h>
#include "local.h"
#include "categories.h"
int
iswblank_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return iswblank (c);
#ifdef _MB_CAPABLE
c = _jp2uc_l (c, locale);
enum category cat = category (c);
// exclude "<noBreak>"?
return cat == CAT_Zs
|| c == '\t';
#else
return c < 0x100 ? isblank (c) : 0;
#endif /* _MB_CAPABLE */
}

View File

@ -1,5 +1,6 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -62,24 +63,10 @@ PORTABILITY
No supporting OS subroutines are required.
*/
#include <_ansi.h>
#include <newlib.h>
#include <wctype.h>
#include <ctype.h>
#include <string.h>
#include "local.h"
int
iswcntrl (wint_t c)
{
#ifdef _MB_CAPABLE
c = _jp2uc (c);
/* Based on Unicode 5.2. All characters from general category "Cc", "Zl",
and "Zp". */
return ((c >= 0x0000 && c <= 0x001f) ||
(c >= 0x007f && c <= 0x009f) ||
c == 0x2028 || c == 0x2029);
#else
return (c < 0x100 ? iscntrl (c) : 0);
#endif /* _MB_CAPABLE */
return iswcntrl_l (c, 0);
}

View File

@ -1,10 +1,20 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <ctype.h>
#include <wctype.h>
#include "local.h"
#include "categories.h"
int
iswcntrl_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return iswcntrl (c);
#ifdef _MB_CAPABLE
c = _jp2uc_l (c, locale);
enum category cat = category (c);
return cat == CAT_Cc
|| cat == CAT_Zl || cat == CAT_Zp // Line/Paragraph Separator
;
#else
return c < 0x100 ? iscntrl (c) : 0;
#endif /* _MB_CAPABLE */
}

View File

@ -1,10 +1,41 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <wctype.h>
#include "local.h"
int
iswctype_l (wint_t c, wctype_t desc, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return iswctype (c, desc);
switch (desc)
{
case WC_ALNUM:
return iswalnum_l (c, locale);
case WC_ALPHA:
return iswalpha_l (c, locale);
case WC_BLANK:
return iswblank_l (c, locale);
case WC_CNTRL:
return iswcntrl_l (c, locale);
case WC_DIGIT:
return iswdigit_l (c, locale);
case WC_GRAPH:
return iswgraph_l (c, locale);
case WC_LOWER:
return iswlower_l (c, locale);
case WC_PRINT:
return iswprint_l (c, locale);
case WC_PUNCT:
return iswpunct_l (c, locale);
case WC_SPACE:
return iswspace_l (c, locale);
case WC_UPPER:
return iswupper_l (c, locale);
case WC_XDIGIT:
return iswxdigit_l (c, locale);
default:
return 0; /* eliminate warning */
}
/* otherwise unknown */
return 0;
}

View File

@ -38,5 +38,6 @@ No supporting OS subroutines are required.
int
iswdigit (wint_t c)
{
return (c >= (wint_t)'0' && c <= (wint_t)'9');
return c >= (wint_t)'0' && c <= (wint_t)'9';
// category (c) == CAT_Nd not to be included as of C-99
}

View File

@ -4,5 +4,5 @@
int
iswdigit_l (wint_t c, struct __locale_t *locale)
{
return (c >= (wint_t)'0' && c <= (wint_t)'9');
return c >= (wint_t)'0' && c <= (wint_t)'9';
}

View File

@ -1,5 +1,6 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -67,5 +68,5 @@ No supporting OS subroutines are required.
int
iswgraph (wint_t c)
{
return (iswprint (c) && !iswspace (c));
return iswgraph_l (c, 0);
}

View File

@ -1,10 +1,23 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <wctype.h>
#include "local.h"
#include "categories.h"
int
iswgraph_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return iswprint (c) && !iswspace (c);
#ifdef _MB_CAPABLE
//return iswprint (c, locale) && !iswspace (c, locale);
c = _jp2uc_l (c, locale);
enum category cat = category (c);
return cat != -1
&& cat != CAT_Cc && cat != CAT_Cf
&& cat != CAT_Cs // Surrogate
&& cat != CAT_Zs
&& cat != CAT_Zl && cat != CAT_Zp // Line/Paragraph Separator
;
#else
return iswprint_l (c, locale) && !iswspace_l (c, locale);
#endif /* _MB_CAPABLE */
}

View File

@ -17,7 +17,7 @@ SYNOPSIS
DESCRIPTION
<<iswlower>> is a function which classifies wide-character values that
have uppercase translations.
are categorized as lowercase.
<<iswlower_l>> is like <<iswlower>> but performs the check based on the
locale specified by the locale object locale. If <[locale]> is
@ -38,5 +38,5 @@ No supporting OS subroutines are required.
int
iswlower (wint_t c)
{
return (towupper (c) != c);
return iswlower_l (c, 0);
}

View File

@ -1,10 +1,20 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <ctype.h>
#include <wctype.h>
#include "local.h"
#include "categories.h"
int
iswlower_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return (towupper (c) != c);
#ifdef _MB_CAPABLE
c = _jp2uc_l (c, locale);
// The wide-character class "lower" contains at least those characters wc
// which are equal to towlower(wc) and different from towupper(wc).
enum category cat = category (c);
return cat == CAT_Ll || (cat == CAT_LC && towlower (c) == c);
#else
return c < 0x100 ? islower (c) : 0;
#endif /* _MB_CAPABLE */
}

View File

@ -1,5 +1,6 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -62,440 +63,10 @@ PORTABILITY
No supporting OS subroutines are required.
*/
#include <_ansi.h>
#include <newlib.h>
#include <wctype.h>
#include <string.h>
#include <ctype.h>
#include "local.h"
#ifdef _MB_CAPABLE
#include "utf8print.h"
#endif /* _MB_CAPABLE */
int
iswprint (wint_t c)
{
#ifdef _MB_CAPABLE
unsigned const char *table;
unsigned char *ptr;
unsigned char ctmp;
int size;
wint_t x;
c = _jp2uc (c);
/* Based on and tested against Unicode 5.2
See utf8print.h for a description how to fetch the data. */
x = (c >> 8);
/* for some large sections, all characters are printuation so handle them here */
if ((x >= 0x33 && x <= 0x4c) ||
(x >= 0x4e && x <= 0x9e) ||
(x >= 0xa0 && x <= 0xa3) ||
(x >= 0xac && x <= 0xd6) ||
(x >= 0xe0 && x <= 0xf9) ||
(x >= 0x120 && x <= 0x122) ||
(x >= 0x130 && x <= 0x133) ||
(x >= 0x200 && x <= 0x2a5) ||
(x >= 0x2a7 && x <= 0x2b6) ||
(x >= 0xf00 && x <= 0xffe) ||
(x >= 0x1000 && x <= 0x10fe))
return 1;
switch (x)
{
case 0x01:
case 0x02:
case 0x04:
case 0x11:
case 0x14:
case 0x15:
case 0x1e:
case 0x22:
case 0x25:
case 0x28:
case 0x29:
case 0x2a:
case 0xa5:
case 0xfc:
case 0x2f8:
case 0x2f9:
return 1;
case 0x00:
table = u0;
size = sizeof(u0);
break;
case 0x03:
table = u3;
size = sizeof(u3);
break;
case 0x05:
table = u5;
size = sizeof(u5);
break;
case 0x06:
table = u6;
size = sizeof(u6);
break;
case 0x07:
table = u7;
size = sizeof(u7);
break;
case 0x08:
table = u8;
size = sizeof(u8);
break;
case 0x09:
table = u9;
size = sizeof(u9);
break;
case 0x0a:
table = ua;
size = sizeof(ua);
break;
case 0x0b:
table = ub;
size = sizeof(ub);
break;
case 0x0c:
table = uc;
size = sizeof(uc);
break;
case 0x0d:
table = ud;
size = sizeof(ud);
break;
case 0x0e:
table = ue;
size = sizeof(ue);
break;
case 0x0f:
table = uf;
size = sizeof(uf);
break;
case 0x10:
table = u10;
size = sizeof(u10);
break;
case 0x12:
table = u12;
size = sizeof(u12);
break;
case 0x13:
table = u13;
size = sizeof(u13);
break;
case 0x16:
table = u16;
size = sizeof(u16);
break;
case 0x17:
table = u17;
size = sizeof(u17);
break;
case 0x18:
table = u18;
size = sizeof(u18);
break;
case 0x19:
table = u19;
size = sizeof(u19);
break;
case 0x1a:
table = u1a;
size = sizeof(u1a);
break;
case 0x1b:
table = u1b;
size = sizeof(u1b);
break;
case 0x1c:
table = u1c;
size = sizeof(u1c);
break;
case 0x1d:
table = u1d;
size = sizeof(u1d);
break;
case 0x1f:
table = u1f;
size = sizeof(u1f);
break;
case 0x20:
table = u20;
size = sizeof(u20);
break;
case 0x21:
table = u21;
size = sizeof(u21);
break;
case 0x23:
table = u23;
size = sizeof(u23);
break;
case 0x24:
table = u24;
size = sizeof(u24);
break;
case 0x26:
table = u26;
size = sizeof(u26);
break;
case 0x27:
table = u27;
size = sizeof(u27);
break;
case 0x2b:
table = u2b;
size = sizeof(u2b);
break;
case 0x2c:
table = u2c;
size = sizeof(u2c);
break;
case 0x2d:
table = u2d;
size = sizeof(u2d);
break;
case 0x2e:
table = u2e;
size = sizeof(u2e);
break;
case 0x2f:
table = u2f;
size = sizeof(u2f);
break;
case 0x30:
table = u30;
size = sizeof(u30);
break;
case 0x31:
table = u31;
size = sizeof(u31);
break;
case 0x32:
table = u32;
size = sizeof(u32);
break;
case 0x4d:
table = u4d;
size = sizeof(u4d);
break;
case 0x9f:
table = u9f;
size = sizeof(u9f);
break;
case 0xa4:
table = ua4;
size = sizeof(ua4);
break;
case 0xa6:
table = ua6;
size = sizeof(ua6);
break;
case 0xa7:
table = ua7;
size = sizeof(ua7);
break;
case 0xa8:
table = ua8;
size = sizeof(ua8);
break;
case 0xa9:
table = ua9;
size = sizeof(ua9);
break;
case 0xaa:
table = uaa;
size = sizeof(uaa);
break;
case 0xab:
table = uab;
size = sizeof(uab);
break;
case 0xd7:
table = ud7;
size = sizeof(ud7);
break;
case 0xfa:
table = ufa;
size = sizeof(ufa);
break;
case 0xfb:
table = ufb;
size = sizeof(ufb);
break;
case 0xfd:
table = ufd;
size = sizeof(ufd);
break;
case 0xfe:
table = ufe;
size = sizeof(ufe);
break;
case 0xff:
table = uff;
size = sizeof(uff);
break;
case 0x100:
table = u100;
size = sizeof(u100);
break;
case 0x101:
table = u101;
size = sizeof(u101);
break;
case 0x102:
table = u102;
size = sizeof(u102);
break;
case 0x103:
table = u103;
size = sizeof(u103);
break;
case 0x104:
table = u104;
size = sizeof(u104);
break;
case 0x108:
table = u108;
size = sizeof(u108);
break;
case 0x109:
table = u109;
size = sizeof(u109);
break;
case 0x10a:
table = u10a;
size = sizeof(u10a);
break;
case 0x10b:
table = u10b;
size = sizeof(u10b);
break;
case 0x10c:
table = u10c;
size = sizeof(u10c);
break;
case 0x10e:
table = u10e;
size = sizeof(u10e);
break;
case 0x110:
table = u110;
size = sizeof(u110);
break;
case 0x123:
table = u123;
size = sizeof(u123);
break;
case 0x124:
table = u124;
size = sizeof(u124);
break;
case 0x134:
table = u134;
size = sizeof(u134);
break;
case 0x1d0:
table = u1d0;
size = sizeof(u1d0);
break;
case 0x1d1:
table = u1d1;
size = sizeof(u1d1);
break;
case 0x1d2:
table = u1d2;
size = sizeof(u1d2);
break;
case 0x1d3:
table = u1d3;
size = sizeof(u1d3);
break;
case 0x1d4:
table = u1d4;
size = sizeof(u1d4);
break;
case 0x1d5:
table = u1d5;
size = sizeof(u1d5);
break;
case 0x1d6:
table = u1d6;
size = sizeof(u1d6);
break;
case 0x1d7:
table = u1d7;
size = sizeof(u1d7);
break;
case 0x1f0:
table = u1f0;
size = sizeof(u1f0);
break;
case 0x1f1:
table = u1f1;
size = sizeof(u1f1);
break;
case 0x1f2:
table = u1f2;
size = sizeof(u1f2);
break;
case 0x2a6:
table = u2a6;
size = sizeof(u2a6);
break;
case 0x2b7:
table = u2b7;
size = sizeof(u2b7);
break;
case 0x2fa:
table = u2fa;
size = sizeof(u2fa);
break;
case 0xe00:
table = ue00;
size = sizeof(ue00);
break;
case 0xe01:
table = ue01;
size = sizeof(ue01);
break;
case 0xfff:
table = ufff;
size = sizeof(ufff);
break;
case 0x10ff:
table = u10ff;
size = sizeof(u10ff);
break;
default:
return 0;
}
/* we have narrowed down to a section of 256 characters to check */
/* now check if c matches the printuation wide-chars within that section */
ptr = (unsigned char *)table;
ctmp = (unsigned char)c;
while (ptr < table + size)
{
if (ctmp == *ptr)
return 1;
if (ctmp < *ptr)
return 0;
/* otherwise c > *ptr */
/* look for 0x0 as next element which indicates a range */
++ptr;
if (*ptr == 0x0)
{
/* we have a range..see if c falls within range */
++ptr;
if (ctmp <= *ptr)
return 1;
++ptr;
}
}
/* not in table */
return 0;
#else
return (c < (wint_t)0x100 ? isprint (c) : 0);
#endif /* _MB_CAPABLE */
return iswprint_l (c, 0);
}

View File

@ -1,10 +1,21 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <ctype.h>
#include <wctype.h>
#include "local.h"
#include "categories.h"
int
iswprint_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return iswprint (c);
#ifdef _MB_CAPABLE
c = _jp2uc_l (c, locale);
enum category cat = category (c);
return cat != -1
&& cat != CAT_Cc && cat != CAT_Cf
&& cat != CAT_Cs // Surrogate
;
#else
return c < (wint_t)0x100 ? isprint (c) : 0;
#endif /* _MB_CAPABLE */
}

View File

@ -1,5 +1,6 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -62,14 +63,10 @@ PORTABILITY
No supporting OS subroutines are required.
*/
#include <_ansi.h>
#include <newlib.h>
#include <wctype.h>
#include <string.h>
#include <ctype.h>
#include "local.h"
int
iswpunct (wint_t c)
{
return (!iswalnum (c) && iswgraph (c));
return iswpunct_l (c, 0);
}

View File

@ -1,10 +1,26 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <ctype.h>
#include <wctype.h>
#include "local.h"
#include "categories.h"
int
iswpunct_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return !iswalnum (c) && iswgraph (c);
#ifdef _MB_CAPABLE
//return !iswalnum (c) && iswgraph (c);
c = _jp2uc_l (c, locale);
enum category cat = category (c);
return cat == CAT_Pc || cat == CAT_Pd || cat == CAT_Pe || cat == CAT_Pf || cat == CAT_Pi || cat == CAT_Po || cat == CAT_Ps
|| cat == CAT_Sm // Math Symbols
// the following are included for backwards consistency:
|| cat == CAT_Sc // Currency Symbols
|| cat == CAT_Sk // Modifier_Symbol
|| cat == CAT_So // Other_Symbol
|| cat == CAT_No // Other_Number
;
#else
return c < (wint_t)0x100 ? ispunct (c) : 0;
#endif /* _MB_CAPABLE */
}

View File

@ -1,5 +1,6 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -62,27 +63,10 @@ PORTABILITY
No supporting OS subroutines are required.
*/
#include <_ansi.h>
#include <newlib.h>
#include <wctype.h>
#include <ctype.h>
#include <string.h>
#include "local.h"
int
iswspace (wint_t c)
{
#ifdef _MB_CAPABLE
c = _jp2uc (c);
/* Based on Unicode 5.2. Control chars 09-0D, plus all characters
from general category "Zs", which are not marked as decomposition
type "noBreak". */
return ((c >= 0x0009 && c <= 0x000d) || c == 0x0020 ||
c == 0x1680 || c == 0x180e ||
(c >= 0x2000 && c <= 0x2006) ||
(c >= 0x2008 && c <= 0x200a) ||
c == 0x2028 || c == 0x2029 ||
c == 0x205f || c == 0x3000);
#else
return (c < 0x100 ? isspace (c) : 0);
#endif /* _MB_CAPABLE */
return iswspace_l (c, 0);
}

View File

@ -1,10 +1,21 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <ctype.h>
#include <wctype.h>
#include "local.h"
#include "categories.h"
int
iswspace_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return iswspace (c);
#ifdef _MB_CAPABLE
c = _jp2uc_l (c, locale);
enum category cat = category (c);
// exclude "<noBreak>"?
return cat == CAT_Zs
|| cat == CAT_Zl || cat == CAT_Zp // Line/Paragraph Separator
|| (c >= 0x9 && c <= 0xD);
#else
return c < 0x100 ? isspace (c) : 0;
#endif /* _MB_CAPABLE */
}

View File

@ -17,14 +17,14 @@ SYNOPSIS
DESCRIPTION
<<iswupper>> is a function which classifies wide-character values that
have uppercase translations.
are categorized as uppercase.
<<iswupper_l>> is like <<iswupper>> but performs the check based on the
locale specified by the locale object locale. If <[locale]> is
LC_GLOBAL_LOCALE or not a valid locale object, the behaviour is undefined.
RETURNS
<<iswupper>>, <<iswupper_l>> return non-zero if <[c]> is a uppercase wide character.
<<iswupper>>, <<iswupper_l>> return non-zero if <[c]> is an uppercase wide character.
PORTABILITY
<<iswupper>> is C99.
@ -38,5 +38,5 @@ No supporting OS subroutines are required.
int
iswupper (wint_t c)
{
return (towlower (c) != c);
return iswupper_l (c, 0);
}

View File

@ -1,10 +1,20 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <ctype.h>
#include <wctype.h>
#include "local.h"
#include "categories.h"
int
iswupper_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return towlower (c) != c;
#ifdef _MB_CAPABLE
c = _jp2uc_l (c, locale);
// The wide-character class "upper" contains at least those characters wc
// which are equal to towupper(wc) and different from towlower(wc).
enum category cat = category (c);
return cat == CAT_Lu || (cat == CAT_LC && towupper (c) == c);
#else
return c < 0x100 ? islower (c) : 0;
#endif /* _MB_CAPABLE */
}

View File

@ -38,7 +38,7 @@ No supporting OS subroutines are required.
int
iswxdigit (wint_t c)
{
return ((c >= (wint_t)'0' && c <= (wint_t)'9') ||
(c >= (wint_t)'a' && c <= (wint_t)'f') ||
(c >= (wint_t)'A' && c <= (wint_t)'F'));
return (c >= (wint_t)'0' && c <= (wint_t)'9')
|| (c >= (wint_t)'a' && c <= (wint_t)'f')
|| (c >= (wint_t)'A' && c <= (wint_t)'F');
}

View File

@ -1,7 +1,8 @@
/* Routine to translate from Japanese characters to Unicode */
/* Routine to translate between Japanese characters and Unicode */
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff: consider locale, add dummy uc2jp
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -25,7 +26,7 @@
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
@ -40,13 +41,15 @@
#include <string.h>
#include <wctype.h>
#include "local.h"
#include "jp2uc.h"
/* Japanese encoding types supported */
#define JP_JIS 1
#define JP_SJIS 2
#define JP_EUCJP 3
/* Japanese to Unicode conversion routine */
#include "jp2uc.h"
static wint_t
__jp2uc (wint_t c, int type)
{
@ -104,7 +107,7 @@ __jp2uc (wint_t c, int type)
return d02f4[index];
}
/* handle smaller ranges here */
/* handle smaller ranges here */
switch (byte1)
{
case 0xA1:
@ -148,18 +151,48 @@ __jp2uc (wint_t c, int type)
return WEOF;
}
return WEOF;
return WEOF;
}
/* Unicode to Japanese conversion routine */
static wint_t
__uc2jp (wint_t c, int type)
{
#warning back-conversion Unicode to Japanese not implemented; needed for towupper/towlower
return c;
}
/* Japanese to Unicode conversion interface */
wint_t
_jp2uc_l (wint_t c, struct __locale_t * l)
{
char * cs = l ? __locale_charset(l) : __current_locale_charset();
if (0 == strcmp (cs, "JIS"))
c = __jp2uc (c, JP_JIS);
else if (0 == strcmp (cs, "SJIS"))
c = __jp2uc (c, JP_SJIS);
else if (0 == strcmp (cs, "EUCJP"))
c = __jp2uc (c, JP_EUCJP);
return c;
}
wint_t
_jp2uc (wint_t c)
{
if (!strcmp (__current_locale_charset (), "JIS"))
c = __jp2uc (c, JP_JIS);
else if (!strcmp (__current_locale_charset (), "SJIS"))
c = __jp2uc (c, JP_SJIS);
else if (!strcmp (__current_locale_charset (), "EUCJP"))
c = __jp2uc (c, JP_EUCJP);
return _jp2uc_l (c, 0);
}
/* Unicode to Japanese conversion interface */
wint_t
_uc2jp_l (wint_t c, struct __locale_t * l)
{
char * cs = l ? __locale_charset(l) : __current_locale_charset();
if (0 == strcmp (cs, "JIS"))
c = __uc2jp (c, JP_JIS);
else if (0 == strcmp (cs, "SJIS"))
c = __uc2jp (c, JP_SJIS);
else if (0 == strcmp (cs, "EUCJP"))
c = __uc2jp (c, JP_EUCJP);
return c;
}

View File

@ -1,3 +1,5 @@
/* Modified (m) 2017 Thomas Wolff: fixed locale/wchar handling */
/* wctrans constants */
#include <_ansi.h>
@ -21,11 +23,22 @@
#define WC_UPPER 11
#define WC_XDIGIT 12
/* internal function to translate JP to Unicode */
/* internal functions to translate between JP and Unicode */
/* note this is not applicable to Cygwin, where wchar_t is always Unicode,
and should not be applicable to most other platforms either;
* platforms for which wchar_t is not Unicode should be explicitly listed
* the transformation should be applied to all non-Unicode locales
(also Chinese, Korean, and even 8-bit locales such as *.CP1252)
* for towupper and towlower, the result must be back-transformed
into the respective locale encoding; currently NOT IMPLEMENTED
*/
#ifdef __CYGWIN__
/* Under Cygwin, the incoming wide character is already given in UTF due
to the requirements of the underlying OS. */
/* Under Cygwin, wchar_t (or its extension wint_t) is Unicode */
#define _jp2uc(c) (c)
#define _jp2uc_l(c, l) (c)
#define _uc2jp_l(c, l) (c)
#else
wint_t _jp2uc (wint_t);
wint_t _jp2uc_l (wint_t, struct __locale_t *);
wint_t _uc2jp_l (wint_t, struct __locale_t *);
#endif

View File

@ -1,5 +1,6 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -69,10 +70,9 @@ No supporting OS subroutines are required.
*/
#include <_ansi.h>
#include <string.h>
#include <reent.h>
#include <wctype.h>
#include <errno.h>
//#include <errno.h>
#include "local.h"
wint_t
@ -80,13 +80,13 @@ _towctrans_r (struct _reent *r,
wint_t c,
wctrans_t w)
{
if (w == WCT_TOLOWER)
return towlower (c);
else if (w == WCT_TOUPPER)
return towupper (c);
if (w == WCT_TOLOWER || w == WCT_TOUPPER)
return towctrans_l (c, w, 0);
else
{
r->_errno = EINVAL;
// skipping this because it was causing trouble (cygwin crash)
// and there is no errno specified for towctrans
//r->_errno = EINVAL;
return c;
}
}
@ -94,7 +94,7 @@ _towctrans_r (struct _reent *r,
#ifndef _REENT_ONLY
wint_t
towctrans (wint_t c,
wctrans_t w)
wctrans_t w)
{
return _towctrans_r (_REENT, c, w);
}

View File

@ -1,10 +1,101 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <wctype.h>
//#include <errno.h>
#include "local.h"
enum {EVENCAP, ODDCAP};
enum {TO1, TOLO, TOUP, TOBOTH};
static struct caseconv_entry {
unsigned int first: 21;
unsigned short diff: 8;
unsigned char mode: 2;
int delta: 17;
} __attribute__ ((packed))
caseconv_table [] = {
#include "caseconv.t"
};
#define first(ce) ce.first
#define last(ce) (ce.first + ce.diff)
/* auxiliary function for binary search in interval properties table */
static const struct caseconv_entry *
bisearch(wint_t ucs, const struct caseconv_entry *table, int max)
{
int min = 0;
int mid;
if (ucs < first(table[0]) || ucs > last(table[max]))
return 0;
while (max >= min)
{
mid = (min + max) / 2;
if (ucs > last(table[mid]))
min = mid + 1;
else if (ucs < first(table[mid]))
max = mid - 1;
else
return &table[mid];
}
return 0;
}
static wint_t
toulower (wint_t c)
{
const struct caseconv_entry * cce =
bisearch(c, caseconv_table,
sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
if (cce)
switch (cce->mode) {
case TOLO: return c + cce->delta;
case TOBOTH: return c + 1;
case TO1: switch (cce->delta) {
case EVENCAP: if (!(c & 1)) return c + 1; break;
case ODDCAP: if (c & 1) return c + 1; break;
}
}
else
return c;
}
static wint_t
touupper (wint_t c)
{
const struct caseconv_entry * cce =
bisearch(c, caseconv_table,
sizeof(caseconv_table) / sizeof(*caseconv_table) - 1);
if (cce)
switch (cce->mode) {
case TOUP: return c + cce->delta;
case TOBOTH: return c - 1;
case TO1: switch (cce->delta) {
case EVENCAP: if (c & 1) return c - 1; break;
case ODDCAP: if (!(c & 1)) return c - 1; break;
}
}
else
return c;
}
wint_t
towctrans_l (wint_t c, wctrans_t w, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
return towctrans (c, w);
wint_t u = _jp2uc_l (c, locale);
wint_t res;
if (w == WCT_TOLOWER)
res = toulower (u);
else if (w == WCT_TOUPPER)
res = touupper (u);
else
{
// skipping the errno setting that was previously involved
// by delegating to towctrans; it was causing trouble (cygwin crash)
// and there is no errno specified for towctrans
return c;
}
if (res != u)
return _uc2jp_l (res, locale);
else
return c;
}

View File

@ -1,5 +1,6 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -65,9 +66,6 @@ No supporting OS subroutines are required.
*/
#include <_ansi.h>
#include <newlib.h>
#include <string.h>
#include <reent.h>
#include <ctype.h>
#include <wctype.h>
#include "local.h"
@ -76,500 +74,8 @@ wint_t
towlower (wint_t c)
{
#ifdef _MB_CAPABLE
c = _jp2uc (c);
/* Based on and tested against Unicode 5.2 */
/* Expression used to filter out the characters for the below code:
awk -F\; '{ if ( $14 != "" ) print $1; }' UnicodeData.txt
*/
if (c < 0x100)
{
if ((c >= 0x0041 && c <= 0x005a) ||
(c >= 0x00c0 && c <= 0x00d6) ||
(c >= 0x00d8 && c <= 0x00de))
return (c + 0x20);
return c;
}
else if (c < 0x300)
{
if ((c >= 0x0100 && c <= 0x012e) ||
(c >= 0x0132 && c <= 0x0136) ||
(c >= 0x014a && c <= 0x0176) ||
(c >= 0x01de && c <= 0x01ee) ||
(c >= 0x01f8 && c <= 0x021e) ||
(c >= 0x0222 && c <= 0x0232))
{
if (!(c & 0x01))
return (c + 1);
return c;
}
if (c == 0x0130)
return 0x0069;
if ((c >= 0x0139 && c <= 0x0147) ||
(c >= 0x01cd && c <= 0x01db))
{
if (c & 0x01)
return (c + 1);
return c;
}
if (c >= 0x178 && c <= 0x01f7)
{
wint_t k;
switch (c)
{
case 0x0178:
k = 0x00ff;
break;
case 0x0179:
case 0x017b:
case 0x017d:
case 0x0182:
case 0x0184:
case 0x0187:
case 0x018b:
case 0x0191:
case 0x0198:
case 0x01a0:
case 0x01a2:
case 0x01a4:
case 0x01a7:
case 0x01ac:
case 0x01af:
case 0x01b3:
case 0x01b5:
case 0x01b8:
case 0x01bc:
case 0x01c5:
case 0x01c8:
case 0x01cb:
case 0x01cd:
case 0x01cf:
case 0x01d1:
case 0x01d3:
case 0x01d5:
case 0x01d7:
case 0x01d9:
case 0x01db:
case 0x01f2:
case 0x01f4:
k = c + 1;
break;
case 0x0181:
k = 0x0253;
break;
case 0x0186:
k = 0x0254;
break;
case 0x0189:
k = 0x0256;
break;
case 0x018a:
k = 0x0257;
break;
case 0x018e:
k = 0x01dd;
break;
case 0x018f:
k = 0x0259;
break;
case 0x0190:
k = 0x025b;
break;
case 0x0193:
k = 0x0260;
break;
case 0x0194:
k = 0x0263;
break;
case 0x0196:
k = 0x0269;
break;
case 0x0197:
k = 0x0268;
break;
case 0x019c:
k = 0x026f;
break;
case 0x019d:
k = 0x0272;
break;
case 0x019f:
k = 0x0275;
break;
case 0x01a6:
k = 0x0280;
break;
case 0x01a9:
k = 0x0283;
break;
case 0x01ae:
k = 0x0288;
break;
case 0x01b1:
k = 0x028a;
break;
case 0x01b2:
k = 0x028b;
break;
case 0x01b7:
k = 0x0292;
break;
case 0x01c4:
case 0x01c7:
case 0x01ca:
case 0x01f1:
k = c + 2;
break;
case 0x01f6:
k = 0x0195;
break;
case 0x01f7:
k = 0x01bf;
break;
default:
k = 0;
}
if (k != 0)
return k;
}
else if (c == 0x0220)
return 0x019e;
else if (c >= 0x023a && c <= 0x024e)
{
wint_t k;
switch (c)
{
case 0x023a:
k = 0x2c65;
break;
case 0x023b:
case 0x0241:
case 0x0246:
case 0x0248:
case 0x024a:
case 0x024c:
case 0x024e:
k = c + 1;
break;
case 0x023d:
k = 0x019a;
break;
case 0x023e:
k = 0x2c66;
break;
case 0x0243:
k = 0x0180;
break;
case 0x0244:
k = 0x0289;
break;
case 0x0245:
k = 0x028c;
break;
default:
k = 0;
}
if (k != 0)
return k;
}
}
else if (c < 0x0400)
{
if (c == 0x0370 || c == 0x0372 || c == 0x0376)
return (c + 1);
if (c >= 0x0391 && c <= 0x03ab && c != 0x03a2)
return (c + 0x20);
if (c >= 0x03d8 && c <= 0x03ee && !(c & 0x01))
return (c + 1);
if (c >= 0x0386 && c <= 0x03ff)
{
wint_t k;
switch (c)
{
case 0x0386:
k = 0x03ac;
break;
case 0x0388:
k = 0x03ad;
break;
case 0x0389:
k = 0x03ae;
break;
case 0x038a:
k = 0x03af;
break;
case 0x038c:
k = 0x03cc;
break;
case 0x038e:
k = 0x03cd;
break;
case 0x038f:
k = 0x03ce;
break;
case 0x03cf:
k = 0x03d7;
break;
case 0x03f4:
k = 0x03b8;
break;
case 0x03f7:
k = 0x03f8;
break;
case 0x03f9:
k = 0x03f2;
break;
case 0x03fa:
k = 0x03fb;
break;
case 0x03fd:
k = 0x037b;
break;
case 0x03fe:
k = 0x037c;
break;
case 0x03ff:
k = 0x037d;
break;
default:
k = 0;
}
if (k != 0)
return k;
}
}
else if (c < 0x500)
{
if (c >= 0x0400 && c <= 0x040f)
return (c + 0x50);
if (c >= 0x0410 && c <= 0x042f)
return (c + 0x20);
if ((c >= 0x0460 && c <= 0x0480) ||
(c >= 0x048a && c <= 0x04be) ||
(c >= 0x04d0 && c <= 0x04fe))
{
if (!(c & 0x01))
return (c + 1);
return c;
}
if (c == 0x04c0)
return 0x04cf;
if (c >= 0x04c1 && c <= 0x04cd)
{
if (c & 0x01)
return (c + 1);
return c;
}
}
else if (c < 0x1f00)
{
if ((c >= 0x0500 && c <= 0x050e) ||
(c >= 0x0510 && c <= 0x0524) ||
(c >= 0x1e00 && c <= 0x1e94) ||
(c >= 0x1ea0 && c <= 0x1ef8))
{
if (!(c & 0x01))
return (c + 1);
return c;
}
if (c >= 0x0531 && c <= 0x0556)
return (c + 0x30);
if (c >= 0x10a0 && c <= 0x10c5)
return (c + 0x1c60);
if (c == 0x1e9e)
return 0x00df;
if (c >= 0x1efa && c <= 0x1efe && !(c & 0x01))
return (c + 1);
}
else if (c < 0x2000)
{
if ((c >= 0x1f08 && c <= 0x1f0f) ||
(c >= 0x1f18 && c <= 0x1f1d) ||
(c >= 0x1f28 && c <= 0x1f2f) ||
(c >= 0x1f38 && c <= 0x1f3f) ||
(c >= 0x1f48 && c <= 0x1f4d) ||
(c >= 0x1f68 && c <= 0x1f6f) ||
(c >= 0x1f88 && c <= 0x1f8f) ||
(c >= 0x1f98 && c <= 0x1f9f) ||
(c >= 0x1fa8 && c <= 0x1faf))
return (c - 0x08);
if (c >= 0x1f59 && c <= 0x1f5f)
{
if (c & 0x01)
return (c - 0x08);
return c;
}
if (c >= 0x1fb8 && c <= 0x1ffc)
{
wint_t k;
switch (c)
{
case 0x1fb8:
case 0x1fb9:
case 0x1fd8:
case 0x1fd9:
case 0x1fe8:
case 0x1fe9:
k = c - 0x08;
break;
case 0x1fba:
case 0x1fbb:
k = c - 0x4a;
break;
case 0x1fbc:
k = 0x1fb3;
break;
case 0x1fc8:
case 0x1fc9:
case 0x1fca:
case 0x1fcb:
k = c - 0x56;
break;
case 0x1fcc:
k = 0x1fc3;
break;
case 0x1fda:
case 0x1fdb:
k = c - 0x64;
break;
case 0x1fea:
case 0x1feb:
k = c - 0x70;
break;
case 0x1fec:
k = 0x1fe5;
break;
case 0x1ff8:
case 0x1ff9:
k = c - 0x80;
break;
case 0x1ffa:
case 0x1ffb:
k = c - 0x7e;
break;
case 0x1ffc:
k = 0x1ff3;
break;
default:
k = 0;
}
if (k != 0)
return k;
}
}
else if (c < 0x2c00)
{
if (c >= 0x2160 && c <= 0x216f)
return (c + 0x10);
if (c >= 0x24b6 && c <= 0x24cf)
return (c + 0x1a);
switch (c)
{
case 0x2126:
return 0x03c9;
case 0x212a:
return 0x006b;
case 0x212b:
return 0x00e5;
case 0x2132:
return 0x214e;
case 0x2183:
return 0x2184;
}
}
else if (c < 0x2d00)
{
if (c >= 0x2c00 && c <= 0x2c2e)
return (c + 0x30);
if (c >= 0x2c80 && c <= 0x2ce2 && !(c & 0x01))
return (c + 1);
switch (c)
{
case 0x2c60:
return 0x2c61;
case 0x2c62:
return 0x026b;
case 0x2c63:
return 0x1d7d;
case 0x2c64:
return 0x027d;
case 0x2c67:
case 0x2c69:
case 0x2c6b:
case 0x2c72:
case 0x2c75:
case 0x2ceb:
case 0x2ced:
return c + 1;
case 0x2c6d:
return 0x0251;
case 0x2c6e:
return 0x0271;
case 0x2c6f:
return 0x0250;
case 0x2c70:
return 0x0252;
case 0x2c7e:
return 0x023f;
case 0x2c7f:
return 0x0240;
}
}
else if (c >= 0xa600 && c < 0xa800)
{
if ((c >= 0xa640 && c <= 0xa65e) ||
(c >= 0xa662 && c <= 0xa66c) ||
(c >= 0xa680 && c <= 0xa696) ||
(c >= 0xa722 && c <= 0xa72e) ||
(c >= 0xa732 && c <= 0xa76e) ||
(c >= 0xa77f && c <= 0xa786))
{
if (!(c & 1))
return (c + 1);
return c;
}
switch (c)
{
case 0xa779:
case 0xa77b:
case 0xa77e:
case 0xa78b:
return (c + 1);
case 0xa77d:
return 0x1d79;
}
}
else
{
if (c >= 0xff21 && c <= 0xff3a)
return (c + 0x20);
if (c >= 0x10400 && c <= 0x10427)
return (c + 0x28);
}
return c;
return towctrans (c, WCT_TOLOWER);
#else
return (c < 0x00ff ? (wint_t)(tolower ((int)c)) : c);
return c < 0x00ff ? (wint_t)(tolower ((int)c)) : c;
#endif /* _MB_CAPABLE */
}

View File

@ -1,3 +1,4 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <newlib.h>
#include <wctype.h>
@ -6,7 +7,9 @@
wint_t
towlower_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
#ifdef _MB_CAPABLE
return towctrans_l (c, WCT_TOLOWER, locale);
#else
return towlower (c);
#endif /* _MB_CAPABLE */
}

View File

@ -1,5 +1,6 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Modified (m) 2017 Thomas Wolff to refer to generated Unicode data tables.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -76,518 +77,8 @@ wint_t
towupper (wint_t c)
{
#ifdef _MB_CAPABLE
c = _jp2uc (c);
/* Based on and tested against Unicode 5.2 */
/* Expression used to filter out the characters for the below code:
awk -F\; '{ if ( $13 != "" ) print $1; }' UnicodeData.txt
*/
if (c < 0x100)
{
if (c == 0x00b5)
return 0x039c;
if ((c >= 0x00e0 && c <= 0x00fe && c != 0x00f7) ||
(c >= 0x0061 && c <= 0x007a))
return (c - 0x20);
if (c == 0xff)
return 0x0178;
return c;
}
else if (c < 0x300)
{
if ((c >= 0x0101 && c <= 0x012f) ||
(c >= 0x0133 && c <= 0x0137) ||
(c >= 0x014b && c <= 0x0177) ||
(c >= 0x01df && c <= 0x01ef) ||
(c >= 0x01f9 && c <= 0x021f) ||
(c >= 0x0223 && c <= 0x0233) ||
(c >= 0x0247 && c <= 0x024f))
{
if (c & 0x01)
return (c - 1);
return c;
}
if ((c >= 0x013a && c <= 0x0148) ||
(c >= 0x01ce && c <= 0x01dc) ||
c == 0x023c || c == 0x0242)
{
if (!(c & 0x01))
return (c - 1);
return c;
}
if (c == 0x0131)
return 0x0049;
if (c == 0x017a || c == 0x017c || c == 0x017e)
return (c - 1);
if (c >= 0x017f && c <= 0x0292)
{
wint_t k;
switch (c)
{
case 0x017f:
k = 0x0053;
break;
case 0x0180:
k = 0x0243;
break;
case 0x0183:
k = 0x0182;
break;
case 0x0185:
k = 0x0184;
break;
case 0x0188:
k = 0x0187;
break;
case 0x018c:
k = 0x018b;
break;
case 0x0192:
k = 0x0191;
break;
case 0x0195:
k = 0x01f6;
break;
case 0x0199:
k = 0x0198;
break;
case 0x019a:
k = 0x023d;
break;
case 0x019e:
k = 0x0220;
break;
case 0x01a1:
case 0x01a3:
case 0x01a5:
case 0x01a8:
case 0x01ad:
case 0x01b0:
case 0x01b4:
case 0x01b6:
case 0x01b9:
case 0x01bd:
case 0x01c5:
case 0x01c8:
case 0x01cb:
case 0x01f2:
case 0x01f5:
k = c - 1;
break;
case 0x01bf:
k = 0x01f7;
break;
case 0x01c6:
case 0x01c9:
case 0x01cc:
k = c - 2;
break;
case 0x01dd:
k = 0x018e;
break;
case 0x01f3:
k = 0x01f1;
break;
case 0x023f:
k = 0x2c7e;
break;
case 0x0240:
k = 0x2c7f;
break;
case 0x0250:
k = 0x2c6f;
break;
case 0x0251:
k = 0x2c6d;
break;
case 0x0252:
k = 0x2c70;
break;
case 0x0253:
k = 0x0181;
break;
case 0x0254:
k = 0x0186;
break;
case 0x0256:
k = 0x0189;
break;
case 0x0257:
k = 0x018a;
break;
case 0x0259:
k = 0x018f;
break;
case 0x025b:
k = 0x0190;
break;
case 0x0260:
k = 0x0193;
break;
case 0x0263:
k = 0x0194;
break;
case 0x0268:
k = 0x0197;
break;
case 0x0269:
k = 0x0196;
break;
case 0x026b:
k = 0x2c62;
break;
case 0x026f:
k = 0x019c;
break;
case 0x0271:
k = 0x2c6e;
break;
case 0x0272:
k = 0x019d;
break;
case 0x0275:
k = 0x019f;
break;
case 0x027d:
k = 0x2c64;
break;
case 0x0280:
k = 0x01a6;
break;
case 0x0283:
k = 0x01a9;
break;
case 0x0288:
k = 0x01ae;
break;
case 0x0289:
k = 0x0244;
break;
case 0x028a:
k = 0x01b1;
break;
case 0x028b:
k = 0x01b2;
break;
case 0x028c:
k = 0x0245;
break;
case 0x0292:
k = 0x01b7;
break;
default:
k = 0;
}
if (k != 0)
return k;
}
}
else if (c < 0x0400)
{
wint_t k;
if (c >= 0x03ad && c <= 0x03af)
return (c - 0x25);
if (c >= 0x03b1 && c <= 0x03cb && c != 0x03c2)
return (c - 0x20);
if (c >= 0x03d9 && c <= 0x03ef && (c & 1))
return (c - 1);
switch (c)
{
case 0x0345:
k = 0x0399;
break;
case 0x0371:
case 0x0373:
case 0x0377:
case 0x03f8:
case 0x03fb:
k = c - 1;
break;
case 0x037b:
case 0x037c:
case 0x037d:
k = c + 0x82;
break;
case 0x03ac:
k = 0x0386;
break;
case 0x03c2:
k = 0x03a3;
break;
case 0x03cc:
k = 0x038c;
break;
case 0x03cd:
case 0x03ce:
k = c - 0x3f;
break;
case 0x03d0:
k = 0x0392;
break;
case 0x03d1:
k = 0x0398;
break;
case 0x03d5:
k = 0x03a6;
break;
case 0x03d6:
k = 0x03a0;
break;
case 0x03d7:
k = 0x03cf;
break;
case 0x03f0:
k = 0x039a;
break;
case 0x03f1:
k = 0x03a1;
break;
case 0x03f2:
k = 0x03f9;
break;
case 0x03f5:
k = 0x0395;
break;
default:
k = 0;
}
if (k != 0)
return k;
}
else if (c < 0x500)
{
if (c >= 0x0430 && c <= 0x044f)
return (c - 0x20);
if (c >= 0x0450 && c <= 0x045f)
return (c - 0x50);
if ((c >= 0x0461 && c <= 0x0481) ||
(c >= 0x048b && c <= 0x04bf) ||
(c >= 0x04d1 && c <= 0x04ff))
{
if (c & 0x01)
return (c - 1);
return c;
}
if (c >= 0x04c2 && c <= 0x04ce)
{
if (!(c & 0x01))
return (c - 1);
return c;
}
if (c == 0x04cf)
return 0x04c0;
}
else if (c < 0x0600)
{
if (c >= 0x0501 && c <= 0x0525 && (c & 1))
return c - 1;
if (c >= 0x0561 && c <= 0x0586)
return (c - 0x30);
}
else if (c < 0x1f00)
{
if (c == 0x1d79)
return 0xa77d;
if (c == 0x1d7d)
return 0x2c63;
if ((c >= 0x1e01 && c <= 0x1e95) ||
(c >= 0x1ea1 && c <= 0x1eff))
{
if (c & 0x01)
return (c - 1);
return c;
}
if (c == 0x1e9b)
return 0x1e60;
}
else if (c < 0x2000)
{
if ((c >= 0x1f00 && c <= 0x1f07) ||
(c >= 0x1f10 && c <= 0x1f15) ||
(c >= 0x1f20 && c <= 0x1f27) ||
(c >= 0x1f30 && c <= 0x1f37) ||
(c >= 0x1f40 && c <= 0x1f45) ||
(c >= 0x1f60 && c <= 0x1f67) ||
(c >= 0x1f80 && c <= 0x1f87) ||
(c >= 0x1f90 && c <= 0x1f97) ||
(c >= 0x1fa0 && c <= 0x1fa7))
return (c + 0x08);
if (c >= 0x1f51 && c <= 0x1f57 && (c & 0x01))
return (c + 0x08);
if (c >= 0x1f70 && c <= 0x1ff3)
{
wint_t k;
switch (c)
{
case 0x1fb0:
k = 0x1fb8;
break;
case 0x1fb1:
k = 0x1fb9;
break;
case 0x1f70:
k = 0x1fba;
break;
case 0x1f71:
k = 0x1fbb;
break;
case 0x1fb3:
k = 0x1fbc;
break;
case 0x1fbe:
k = 0x0399;
break;
case 0x1f72:
k = 0x1fc8;
break;
case 0x1f73:
k = 0x1fc9;
break;
case 0x1f74:
k = 0x1fca;
break;
case 0x1f75:
k = 0x1fcb;
break;
case 0x1fc3:
k = 0x1fcc;
break;
case 0x1fd0:
k = 0x1fd8;
break;
case 0x1fd1:
k = 0x1fd9;
break;
case 0x1f76:
k = 0x1fda;
break;
case 0x1f77:
k = 0x1fdb;
break;
case 0x1fe0:
k = 0x1fe8;
break;
case 0x1fe1:
k = 0x1fe9;
break;
case 0x1f7a:
k = 0x1fea;
break;
case 0x1f7b:
k = 0x1feb;
break;
case 0x1fe5:
k = 0x1fec;
break;
case 0x1f78:
k = 0x1ff8;
break;
case 0x1f79:
k = 0x1ff9;
break;
case 0x1f7c:
k = 0x1ffa;
break;
case 0x1f7d:
k = 0x1ffb;
break;
case 0x1ff3:
k = 0x1ffc;
break;
default:
k = 0;
}
if (k != 0)
return k;
}
}
else if (c < 0x3000)
{
if (c == 0x214e)
return 0x2132;
if (c == 0x2184)
return 0x2183;
if (c >= 0x2170 && c <= 0x217f)
return (c - 0x10);
if (c >= 0x24d0 && c <= 0x24e9)
return (c - 0x1a);
if (c >= 0x2c30 && c <= 0x2c5e)
return (c - 0x30);
if ((c >= 0x2c68 && c <= 0x2c6c && !(c & 1)) ||
(c >= 0x2c81 && c <= 0x2ce3 && (c & 1)) ||
c == 0x2c73 || c == 0x2c76 ||
c == 0x2cec || c == 0x2cee)
return (c - 1);
if (c >= 0x2c81 && c <= 0x2ce3 && (c & 1))
return (c - 1);
if (c >= 0x2d00 && c <= 0x2d25)
return (c - 0x1c60);
switch (c)
{
case 0x2c61:
return 0x2c60;
case 0x2c65:
return 0x023a;
case 0x2c66:
return 0x023e;
}
}
else if (c >= 0xa000 && c < 0xb000)
{
if (((c >= 0xa641 && c <= 0xa65f) ||
(c >= 0xa663 && c <= 0xa66d) ||
(c >= 0xa681 && c <= 0xa697) ||
(c >= 0xa723 && c <= 0xa72f) ||
(c >= 0xa733 && c <= 0xa76f) ||
(c >= 0xa77f && c <= 0xa787)) &&
(c & 1))
return (c - 1);
if (c == 0xa77a || c == 0xa77c || c == 0xa78c)
return (c - 1);
}
else
{
if (c >= 0xff41 && c <= 0xff5a)
return (c - 0x20);
if (c >= 0x10428 && c <= 0x1044f)
return (c - 0x28);
}
return c;
return towctrans (c, WCT_TOUPPER);
#else
return (c < 0x00ff ? (wint_t)(toupper ((int)c)) : c);
return c < 0x00ff ? (wint_t)(toupper ((int)c)) : c;
#endif /* _MB_CAPABLE */
}

View File

@ -1,10 +1,14 @@
/* Modified (m) 2017 Thomas Wolff: revise Unicode and locale/wchar handling */
#include <_ansi.h>
#include <wctype.h>
#include "local.h"
wint_t
towupper_l (wint_t c, struct __locale_t *locale)
{
/* We're using a locale-independent representation of upper/lower case
based on Unicode data. Thus, the locale doesn't matter. */
#ifdef _MB_CAPABLE
return towctrans_l (c, WCT_TOUPPER, locale);
#else
return towupper (c);
#endif /* _MB_CAPABLE */
}

View File

@ -1,355 +0,0 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
The name of Red Hat Incorporated may not be used to endorse
or promote products derived from this software without specific
prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Generated using UnicodeData.txt 5.2 */
/* Expression used to filter out the characters for the below tables:
awk -F\; \
'{ \
VAL = strtonum (sprintf("0x%s", $1)); \
# All of general category "L", except for two Thai characters which \
# are actually punctuation characters. Old Unicode weirdness. \
# The character "COMBINING GREEK YPOGEGRAMMENI", as well as all Thai \
# characters which are in "Mn" category. Old Unicode weirdness. \
# All numerical digit or letter characters, except the ASCII variants. \
# This is necessary due to the unfortunate ISO C definition for the \
# iswdigit class, otherwise these characters are missing in iswalnum. \
# All "Other Symbols" which are named as "LETTER" characters. \
# \
# Before running this test, make sure to expand all Unicode blocks \
# which are just marked by their first and last character! \
# \
if ( (match($3, "^L") && VAL != 0x0e2f && VAL != 0x0e46) \
|| (match($3, "^Mn") && (VAL == 0x0345 || match($2, "\\<CHARACTER\\>"))) \
|| (match($3, "^N[dl]") && VAL >= 0x100) \
|| (match($3, "^So") && match($2, "\\<LETTER\\>"))) \
print $1; \
}' UnicodeData.txt
*/
static const unsigned char u0[] = {
0x41, 0x0, 0x5a, 0x61, 0x0, 0x7a, 0xaa, 0xb5,
0xba, 0xc0, 0x0, 0xd6, 0xd8, 0x0, 0xf6, 0xf8,
0x0, 0xff };
/* u1 all alphabetic */
static const unsigned char u2[] = {
0x00, 0x0, 0xc1, 0xc6, 0x0, 0xd1,
0xe0, 0x0, 0xe4, 0xec, 0xee };
static const unsigned char u3[] = {
0x45, 0x70, 0x0, 0x74, 0x76, 0x77,
0x7a, 0x0, 0x7d, 0x86, 0x88, 0x0, 0x8a, 0x8c,
0x8e, 0x0, 0xa1, 0xa3, 0x0, 0xf5,
0xf7, 0x0, 0xff };
static const unsigned char u4[] = {
0x00, 0x0, 0x81, 0x8a, 0x0, 0xff };
static const unsigned char u5[] = {
0x00, 0x0, 0x25, 0x31, 0x0, 0x56, 0x59,
0x61, 0x0, 0x87, 0xd0, 0x0, 0xea,
0xf0, 0x0, 0xf2 };
static const unsigned char u6[] = {
0x21, 0x0, 0x4a, 0x60, 0x0, 0x69,
0x6e, 0x0, 0x6f, 0x71, 0x0, 0xd3,
0xd5, 0xe5, 0x0, 0xe6, 0xee, 0x0, 0xfc, 0xff };
static const unsigned char u7[] = {
0x10, 0x12, 0x0, 0x2f, 0x4d, 0x0, 0xa5, 0xb1,
0xc0, 0x0, 0xea, 0xf4, 0xf5, 0xfa };
static const unsigned char u8[] = {
0x00, 0x0, 0x15, 0x1a, 0x24, 0x28 };
static const unsigned char u9[] = {
0x04, 0x0, 0x39, 0x3d, 0x50, 0x58, 0x0, 0x61,
0x66, 0x0, 0x6f, 0x71, 0x72, 0x79, 0x0, 0x7f,
0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90,
0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0, 0xb2,
0xb6, 0x0, 0xb9, 0xbd, 0xce, 0xdc, 0x0, 0xdd,
0xdf, 0x0, 0xe1, 0xe6, 0x0, 0xf1 };
static const unsigned char ua[] = {
0x05, 0x0, 0x0a, 0x0f, 0x0, 0x10,
0x13, 0x0, 0x28, 0x2a, 0x0, 0x30,
0x32, 0x0, 0x33, 0x35, 0x0, 0x36,
0x38, 0x0, 0x39, 0x59, 0x0, 0x5c,
0x5e, 0x66, 0x0, 0x6f, 0x72, 0x0, 0x74,
0x85, 0x0, 0x8d, 0x8f, 0x0, 0x91,
0x93, 0x0, 0xa8, 0xaa, 0x0, 0xb0,
0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9,
0xbd, 0xd0, 0xe0, 0xe1, 0xe6, 0x0, 0xef };
static const unsigned char ub[] = {
0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10,
0x13, 0x0, 0x28, 0x2a, 0x0, 0x30,
0x32, 0x0, 0x33, 0x35, 0x0, 0x39, 0x3d,
0x5c, 0x0, 0x5d, 0x5f, 0x0, 0x61,
0x66, 0x0, 0x6f, 0x71, 0x83, 0x85, 0x0, 0x8a,
0x8e, 0x0, 0x90, 0x92, 0x0, 0x95,
0x99, 0x0, 0x9a, 0x9c, 0x9e, 0x0, 0x9f,
0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa,
0xae, 0x0, 0xb9, 0xd0, 0xe6, 0x0, 0xef };
static const unsigned char uc[] = {
0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10,
0x12, 0x0, 0x28, 0x2a, 0x0, 0x33,
0x35, 0x0, 0x39, 0x3d, 0x58, 0x59,
0x60, 0x0, 0x61, 0x66, 0x0, 0x6f,
0x85, 0x0, 0x8c, 0x8e, 0x0, 0x90,
0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3,
0xb5, 0x0, 0xb9, 0xbd, 0xde, 0xe0, 0x0, 0xe1,
0xe6, 0x0, 0xef };
static const unsigned char ud[] = {
0x05, 0x0, 0x0c, 0x0e, 0x0, 0x10,
0x12, 0x0, 0x28, 0x2a, 0x0, 0x39, 0x3d,
0x60, 0x0, 0x61, 0x66, 0x0, 0x6f,
0x7a, 0x0, 0x7f, 0x85, 0x0, 0x96, 0x9a,
0x0, 0xb1, 0xb3, 0x0, 0xbb, 0xbd,
0xc0, 0x0, 0xc6 };
static const unsigned char ue[] = {
0x01, 0x0, 0x2e, 0x30, 0x0, 0x3a, 0x40,
0x0, 0x45, 0x47, 0x0, 0x4e, 0x50, 0x0, 0x59,
0x81, 0x0, 0x82, 0x84, 0x87, 0x0, 0x88, 0x8a,
0x8d, 0x94, 0x0, 0x97, 0x99, 0x0, 0x9f, 0xa1,
0x0, 0xa3, 0xa5, 0xa7, 0xaa, 0x0, 0xab, 0xad,
0x0, 0xb0, 0xb2, 0x0, 0xb3, 0xbd, 0xc0, 0x0,
0xc4, 0xc6, 0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd };
static const unsigned char uf[] = {
0x00, 0x20, 0x0, 0x29, 0x40, 0x0, 0x47, 0x49,
0x0, 0x6c, 0x88, 0x0, 0x8b };
static const unsigned char u10[] = {
0x00, 0x0, 0x2a, 0x3f, 0x0, 0x49,
0x50, 0x0, 0x55, 0x5a, 0x0, 0x5d,
0x61, 0x65, 0x66, 0x6e, 0x0, 0x70,
0x75, 0x0, 0x81, 0x8e, 0x90, 0x0, 0x99,
0xa0, 0x0, 0xc5, 0xd0, 0x0, 0xfa, 0xfc };
/* u11 all alphabetic */
static const unsigned char u12[] = {
0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d,
0x50, 0x0, 0x56, 0x58, 0x5a, 0x0, 0x5d,
0x60, 0x0, 0x88, 0x8a, 0x0, 0x8d,
0x90, 0x0, 0xb0, 0xb2, 0x0, 0xb5,
0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0, 0xc5,
0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff };
static const unsigned char u13[] = {
0x00, 0x0, 0x10, 0x12, 0x0, 0x15,
0x18, 0x0, 0x5a, 0x80, 0x0, 0x8f,
0xa0, 0x0, 0xf4 };
static const unsigned char u14[] = {
0x01, 0x0, 0xff };
/* u15 all alphabetic */
static const unsigned char u16[] = {
0x00, 0x0, 0x6c, 0x6f, 0x0, 0x7f,
0x81, 0x0, 0x9a, 0xa0, 0x0, 0xea,
0xee, 0x0, 0xf0 };
static const unsigned char u17[] = {
0x00, 0x0, 0x0c, 0x0e, 0x0, 0x11,
0x20, 0x0, 0x31, 0x40, 0x0, 0x51,
0x60, 0x0, 0x6c, 0x6e, 0x0, 0x70,
0x80, 0x0, 0xb3, 0xd7, 0xdc, 0xe0, 0x0, 0xe9 };
static const unsigned char u18[] = {
0x10, 0x0, 0x19, 0x20, 0x0, 0x77,
0x80, 0x0, 0xa8, 0xaa, 0xb0, 0x0, 0xf5 };
static const unsigned char u19[] = {
0x00, 0x0, 0x1c, 0x46, 0x0, 0x6d,
0x70, 0x0, 0x74, 0x80, 0x0, 0xab,
0xc1, 0x0, 0xc7, 0xd0, 0x0, 0xda };
static const unsigned char u1a[] = {
0x00, 0x0, 0x16, 0x20, 0x0, 0x54,
0x80, 0x0, 0x89, 0x90, 0x0, 0x99, 0xa7 };
static const unsigned char u1b[] = {
0x05, 0x0, 0x33, 0x45, 0x0, 0x4b,
0x50, 0x0, 0x59, 0x83, 0x0, 0xa0,
0xae, 0x0, 0xb9 };
static const unsigned char u1c[] = {
0x00, 0x0, 0x23, 0x40, 0x0, 0x49,
0x4d, 0x0, 0x7d, 0xe9, 0x0, 0xec,
0xee, 0x0, 0xf1 };
static const unsigned char u1d[] = {
0x00, 0x0, 0xbf };
/* u1e all alphabetic */
static const unsigned char u1f[] = {
0x00, 0x0, 0x15, 0x18, 0x0, 0x1d,
0x20, 0x0, 0x45, 0x48, 0x0, 0x4d, 0x50, 0x0, 0x57, 0x59,
0x5b, 0x5d, 0x5f, 0x0, 0x7d, 0x80, 0x0, 0xb4,
0xb6, 0x0, 0xbc, 0xbe, 0xc2, 0x0, 0xc4, 0xc6,
0x0, 0xcc, 0xd0, 0x0, 0xd3, 0xd6, 0x0, 0xdb,
0xe0, 0x0, 0xec, 0xf2, 0x0, 0xf4, 0xf6, 0x0,
0xfc };
static const unsigned char u20[] = {
0x71, 0x7f, 0x90, 0x0, 0x94 };
static const unsigned char u21[] = {
0x02, 0x07, 0x0a, 0x0, 0x13, 0x15,
0x19, 0x0, 0x1d, 0x24, 0x26, 0x28, 0x0, 0x2d,
0x2f, 0x0, 0x39, 0x3c, 0x0, 0x3f,
0x45, 0x0, 0x49, 0x4e, 0x60, 0x0, 0x88 };
static const unsigned char u24[] = {
0x9c, 0x0, 0xe9 };
static const unsigned char u2c[] = {
0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e,
0x60, 0x0, 0xe4, 0xeb, 0x0, 0xee };
static const unsigned char u2d[] = {
0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f,
0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6,
0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6,
0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6,
0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6,
0xd8, 0x0, 0xde };
static const unsigned char u2e[] = {
0x2f };
static const unsigned char u30[] = {
0x05, 0x0, 0x07, 0x21, 0x0,
0x29, 0x31, 0x0, 0x35, 0x38, 0x0, 0x3c, 0x41,
0x0, 0x96, 0x9d, 0x0, 0x9f, 0xa1, 0x0, 0xfa,
0xfc, 0x0, 0xff };
static const unsigned char u31[] = {
0x05, 0x0, 0x2d, 0x31, 0x0,
0x8e, 0xa0, 0x0, 0xb7, 0xf0, 0x0, 0xff };
/* u34 to u4c all alphabetic */
static const unsigned char u4d[] = {
0x00, 0x0, 0xb5 };
/* u4e to u9e all alphabetic */
static const unsigned char u9f[] = {
0x00, 0x0, 0xcb };
/* ua0 to ua3 all alphabetic */
static const unsigned char ua4[] = {
0x00, 0x0, 0x8c, 0xd0, 0x0, 0xfd };
/* ua5 all alphabetic */
static const unsigned char ua6[] = {
0x00, 0x0, 0x0c, 0x10, 0x0, 0x2b,
0x40, 0x0, 0x5f, 0x62, 0x0, 0x6e,
0x7f, 0x0, 0x97, 0xa0, 0x0, 0xef };
static const unsigned char ua7[] = {
0x17, 0x0, 0x1f, 0x22, 0x0, 0x88,
0x8b, 0x8c,
0xfb, 0x0, 0xff };
static const unsigned char ua8[] = {
0x00, 0x01, 0x03, 0x0, 0x05, 0x07, 0x0, 0x0a,
0x0c, 0x0, 0x22, 0x40, 0x0, 0x73,
0x82, 0x0, 0xb3, 0xd0, 0x0, 0xd9,
0xf2, 0x0, 0xf7, 0xfb };
static const unsigned char ua9[] = {
0x00, 0x0, 0x25, 0x30, 0x0, 0x46,
0x60, 0x0, 0x7c, 0x84, 0x0, 0xb2,
0xcf, 0x0, 0xd9 };
static const unsigned char uaa[] = {
0x00, 0x0, 0x28, 0x40, 0x0, 0x42,
0x44, 0x0, 0x4b, 0x50, 0x0, 0x59,
0x60, 0x0, 0x76, 0x7a, 0x80, 0x0, 0xaf,
0xb1, 0xb5, 0xb6, 0xb9, 0x0, 0xbd,
0xc0, 0xc2, 0xdb, 0x0, 0xdd };
static const unsigned char uab[] = {
0xc0, 0x0, 0xe2, 0xf0, 0x0, 0xf9 };
/* uac to ud6 all alphabetic */
static const unsigned char ud7[] = {
0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6,
0xcb, 0x0, 0xfb };
/* uf9 all alphabetic */
static const unsigned char ufa[] = {
0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d,
0x70, 0x0, 0xd9 };
static const unsigned char ufb[] = {
0x00, 0x0, 0x06, 0x13, 0x0, 0x17, 0x1d,
0x1f, 0x0, 0x28, 0x2a, 0x0, 0x36, 0x38, 0x0,
0x3c, 0x3e, 0x40, 0x0, 0x41, 0x43, 0x0, 0x44,
0x46, 0x0, 0xb1, 0xd3, 0x0, 0xff };
/* ufc all alphabetic */
static const unsigned char ufd[] = {
0x00, 0x0, 0x3d, 0x50, 0x0,
0x8f, 0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfb };
static const unsigned char ufe[] = {
0x70,
0x0, 0x74, 0x76, 0x0, 0xfc };
static const unsigned char uff[] = {
0x10, 0x0, 0x19,
0x21, 0x0, 0x3a, 0x41, 0x0, 0x5a, 0x66, 0x0,
0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0, 0xcf, 0xd2,
0x0, 0xd7, 0xda, 0x0, 0xdc };
static const unsigned char u100[] = {
0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26,
0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d,
0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa };
static const unsigned char u101[] = {
0x40, 0x0, 0x74 };
static const unsigned char u102[] = {
0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 };
static const unsigned char u103[] = {
0x00, 0x0, 0x1e, 0x30, 0x0, 0x4a,
0x80, 0x0, 0x9d, 0xa0, 0x0, 0xc3,
0xc8, 0x0, 0xcf, 0xd1, 0x0, 0xd5 };
static const unsigned char u104[] = {
0x00, 0x0, 0x9d, 0xa0, 0x0, 0xa9 };
static const unsigned char u108[] = {
0x00, 0x0, 0x05, 0x08, 0x0a, 0x0, 0x35,
0x37, 0x38, 0x3c, 0x3f, 0x0, 0x55 };
static const unsigned char u109[] = {
0x00, 0x0, 0x15, 0x20, 0x0, 0x39 };
static const unsigned char u10a[] = {
0x00, 0x10, 0x0, 0x13, 0x15, 0x0, 0x17,
0x19, 0x0, 0x33, 0x60, 0x0, 0x7c };
static const unsigned char u10b[] = {
0x00, 0x0, 0x35, 0x40, 0x0, 0x55,
0x60, 0x0, 0x72 };
static const unsigned char u10c[] = {
0x00, 0x0, 0x48 };
static const unsigned char u110[] = {
0x83, 0x0, 0xaf };
/* u120 to u122 all alphabetic */
static const unsigned char u123[] = {
0x00, 0x0, 0x6e };
static const unsigned char u124[] = {
0x00, 0x0, 0x62 };
/* u130 to u133 all alphabetic */
static const unsigned char u134[] = {
0x00, 0x0, 0x2e };
static const unsigned char u1d4[] = {
0x00, 0x0, 0x54, 0x56, 0x0, 0x9c,
0x9e, 0x0, 0x9f, 0xa2, 0xa5, 0x0, 0xa6,
0xa9, 0x0, 0xac, 0xae, 0x0, 0xb9, 0xbb,
0xbd, 0x0, 0xc3, 0xc5, 0x0, 0xff };
static const unsigned char u1d5[] = {
0x00, 0x0, 0x05, 0x07, 0x0,
0x0a, 0x0d, 0x0, 0x14, 0x16, 0x0, 0x1c, 0x1e,
0x0, 0x39, 0x3b, 0x0, 0x3e, 0x40, 0x0, 0x44,
0x46, 0x4a, 0x0, 0x50, 0x52, 0x0, 0xff };
static const unsigned char u1d6[] = {
0x00, 0x0, 0xa5, 0xa8, 0x0, 0xc0,
0xc2, 0x0, 0xda, 0xdc, 0x0, 0xfa,
0xfc, 0x0, 0xff };
static const unsigned char u1d7[] = {
0x00, 0x0, 0x14, 0x16, 0x0, 0x34,
0x36, 0x0, 0x4e, 0x50, 0x0, 0x6e,
0x70, 0x0, 0x88, 0x8a, 0x0, 0xa8,
0xaa, 0x0, 0xc2, 0xc4, 0x0, 0xcb,
0xce, 0x0, 0xff };
static const unsigned char u1f1[] = {
0x10, 0x0, 0x2c, 0x31, 0x3d, 0x3f, 0x42, 0x46,
0x57, 0x5f, 0x79, 0x7b, 0x7c, 0x7f, 0x8a };
/* u200 to u2a5 all alphabetic */
static const unsigned char u2a6[] = {
0x00, 0x0, 0xd6 };
/* u2a7 to u2b6 all alphabetic */
static const unsigned char u2b7[] = {
0x00, 0x0, 0x34 };
/* u2f8 to u2f9 all alphabetic */
static const unsigned char u2fa[] = {
0x00, 0x0, 0x1d };

View File

@ -1,389 +0,0 @@
/* Copyright (c) 2002 Red Hat Incorporated.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
The name of Red Hat Incorporated may not be used to endorse
or promote products derived from this software without specific
prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL RED HAT INCORPORATED BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Generated using UnicodeData.txt 5.2 */
/* Expression used to filter out the characters for the below tables:
awk -F\; \
'{ \
VAL = strtonum (sprintf("0x%s", $1)); \
# All valid characters except from categories Cc (C0 or C1 control code), \
# Cs (Surrogates), Zl (Line separator), and Zp (Paragraph separator).\
# \
# Before running this test, make sure to expand all Unicode blocks \
# which are just marked by their first and last character! \
# \
if (!match($3, "^C[cs]") && !match($3, "^Z[lp]")) \
print $1; \
}' UnicodeData.txt
*/
static const unsigned char u0[] = {
0x20, 0x0, 0x7e, 0xa0, 0x0, 0xff };
/* u1 is all-print */
/* u2 is all-print */
static const unsigned char u3[] = {
0x00, 0x0, 0x77, 0x7a, 0x0, 0x7e,
0x84, 0x0, 0x8a, 0x8c, 0x8e, 0x0,
0xa1, 0xa3, 0x0, 0xff };
/* u4 is all-print */
static const unsigned char u5[] = {
0x00, 0x0, 0x25, 0x31, 0x0,
0x56, 0x59, 0x0, 0x5f, 0x61, 0x0, 0x87, 0x89,
0x0, 0x8a, 0x91, 0x0, 0xc7, 0xd0, 0x0, 0xea,
0xf0, 0x0, 0xf4 };
static const unsigned char u6[] = {
0x00, 0x0, 0x03, 0x06, 0x0, 0x1b, 0x1e, 0x1f,
0x21, 0x0, 0x5e, 0x60, 0x0, 0xff };
static const unsigned char u7[] = {
0x00, 0x0, 0x0d, 0x0f, 0x0, 0x4a, 0x4d, 0x0, 0xb1,
0xc0, 0x0, 0xfa };
static const unsigned char u8[] = {
0x00, 0x0, 0x2d, 0x30, 0x0, 0x3e, };
static const unsigned char u9[] = {
0x00, 0x0, 0x39, 0x3c, 0x0, 0x4e, 0x50, 0x0, 0x55,
0x58, 0x0, 0x72, 0x79, 0x0, 0x7f, 0x81, 0x0, 0x83,
0x85, 0x0, 0x8c, 0x8f, 0x0, 0x90, 0x93, 0x0, 0xa8,
0xaa, 0x0, 0xb0, 0xb2, 0xb6, 0x0, 0xb9, 0xbc,
0x0, 0xc4, 0xc7, 0xc8, 0xcb, 0x0, 0xce,
0xd7, 0xdc, 0x0, 0xdd, 0xdf, 0x0, 0xe3, 0xe6,
0x0, 0xfb };
static const unsigned char ua[] = {
0x01, 0x0, 0x03, 0x05, 0x0, 0x0a, 0x0f, 0x0,
0x10, 0x13, 0x0, 0x28, 0x2a, 0x0, 0x30, 0x32,
0x0, 0x33, 0x35, 0x0, 0x36, 0x38, 0x0, 0x39,
0x3c, 0x3e, 0x0, 0x42, 0x47, 0x0, 0x48, 0x4b,
0x0, 0x4d, 0x51, 0x59, 0x0, 0x5c, 0x5e, 0x66, 0x0,
0x75, 0x81, 0x0, 0x83, 0x85, 0x0, 0x8d,
0x8f, 0x0, 0x91, 0x93, 0x0, 0xa8, 0xaa, 0x0,
0xb0, 0xb2, 0x0, 0xb3, 0xb5, 0x0, 0xb9, 0xbc,
0x0, 0xc5, 0xc7, 0x0, 0xc9, 0xcb, 0x0, 0xcd,
0xd0, 0xe0, 0x0, 0xe3, 0xe6, 0x0, 0xef, 0xf1 };
static const unsigned char ub[] = {
0x01, 0x0, 0x03,
0x05, 0x0, 0x0c, 0x0f, 0x0, 0x10, 0x13, 0x0,
0x28, 0x2a, 0x0, 0x30, 0x32, 0x0, 0x33, 0x35,
0x0, 0x39, 0x3c, 0x0, 0x44, 0x47, 0x0, 0x48,
0x4b, 0x0, 0x4d, 0x56, 0x0, 0x57, 0x5c, 0x0,
0x5d, 0x5f, 0x0, 0x63, 0x66, 0x0, 0x71, 0x82,
0x0, 0x83, 0x85, 0x0, 0x8a, 0x8e, 0x0, 0x90,
0x92, 0x0, 0x95, 0x99, 0x0, 0x9a, 0x9c, 0x9e,
0x0, 0x9f, 0xa3, 0x0, 0xa4, 0xa8, 0x0, 0xaa,
0xae, 0x0, 0xb9, 0xbe, 0x0,
0xc2, 0xc6, 0x0, 0xc8, 0xca, 0x0, 0xcd, 0xd0,
0xd7, 0xe6, 0xe7, 0x0, 0xfa };
static const unsigned char uc[] = {
0x01, 0x0, 0x03, 0x05, 0x0,
0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28, 0x2a,
0x0, 0x33, 0x35, 0x0, 0x39, 0x3d, 0x0, 0x44,
0x46, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x55, 0x0,
0x56, 0x58, 0x59, 0x60, 0x0, 0x63, 0x66, 0x0, 0x6f,
0x78, 0x0, 0x7f, 0x82, 0x83, 0x85, 0x0, 0x8c,
0x8e, 0x0, 0x90, 0x92, 0x0, 0xa8, 0xaa, 0x0, 0xb3,
0xb5, 0x0, 0xb9, 0xbc, 0x0, 0xc4, 0xc6, 0x0, 0xc8,
0xca, 0x0, 0xcd, 0xd5, 0x0, 0xd6, 0xde, 0xe0, 0x0,
0xe3, 0xe6, 0x0, 0xef, 0xf1, 0xf2 };
static const unsigned char ud[] = {
0x02, 0x0, 0x03, 0x05,
0x0, 0x0c, 0x0e, 0x0, 0x10, 0x12, 0x0, 0x28,
0x2a, 0x0, 0x39, 0x3d, 0x0, 0x44, 0x46, 0x0,
0x48, 0x4a, 0x0, 0x4d, 0x57, 0x60, 0x0, 0x63,
0x66, 0x0, 0x75, 0x79, 0x0, 0x7f, 0x82, 0x0, 0x83,
0x85, 0x0, 0x96, 0x9a, 0x0, 0xb1, 0xb3, 0x0, 0xbb,
0xbd, 0xc0, 0x0, 0xc6, 0xca, 0xcf, 0x0, 0xd4, 0xd6,
0xd8, 0x0, 0xdf, 0xf2, 0x0, 0xf4 };
static const unsigned char ue[] = {
0x01, 0x0,
0x3a, 0x3f, 0x0, 0x5b, 0x81, 0x0, 0x82, 0x84,
0x87, 0x0, 0x88, 0x8a, 0x8d, 0x94, 0x0, 0x97,
0x99, 0x0, 0x9f, 0xa1, 0x0, 0xa3, 0xa5, 0xa7,
0xaa, 0x0, 0xab, 0xad, 0x0, 0xb9, 0xbb, 0x0,
0xbd, 0xc0, 0x0, 0xc4, 0xc6, 0xc8, 0x0, 0xcd,
0xd0, 0x0, 0xd9, 0xdc, 0x0, 0xdd };
static const unsigned char uf[] = {
0x00, 0x0, 0x47, 0x49, 0x0, 0x6c,
0x71, 0x0, 0x8b, 0x90, 0x0, 0x97,
0x99, 0x0, 0xbc, 0xbe, 0x0, 0xcc,
0xce, 0x0, 0xd8 };
static const unsigned char u10[] = {
0x00, 0x0, 0xc5, 0xd0, 0x0, 0xfc };
/* u11 is all-print */
static const unsigned char u12[] = {
0x00, 0x0, 0x48, 0x4a, 0x0, 0x4d, 0x50, 0x0, 0x56,
0x58, 0x5a, 0x0, 0x5d, 0x60, 0x0, 0x88,
0x8a, 0x0, 0x8d, 0x90, 0x0, 0xb0, 0xb2,
0x0, 0xb5, 0xb8, 0x0, 0xbe, 0xc0, 0xc2, 0x0,
0xc5, 0xc8, 0x0, 0xd6, 0xd8, 0x0, 0xff };
static const unsigned char u13[] = {
0x00, 0x0, 0x10, 0x12, 0x0, 0x15,
0x18, 0x0, 0x5a, 0x5f, 0x0, 0x7c,
0x80, 0x0, 0x99, 0xa0, 0x0, 0xf4 };
/* u14 is all-print */
/* u15 is all-print */
static const unsigned char u16[] = {
0x00, 0x0, 0x9c, 0xa0, 0x0, 0xf0 };
static const unsigned char u17[] = {
0x00, 0x0, 0x0c, 0x0e, 0x0, 0x14, 0x20,
0x0, 0x36, 0x40, 0x0, 0x53, 0x60, 0x0, 0x6c,
0x6e, 0x0, 0x70, 0x72, 0x0, 0x73, 0x80, 0x0,
0xdd, 0xe0, 0x0, 0xe9, 0xf0, 0x0, 0xf9 };
static const unsigned char u18[] = {
0x00, 0x0, 0x0e, 0x10,
0x0, 0x19, 0x20, 0x0, 0x77, 0x80, 0x0, 0xaa,
0xb0, 0x0, 0xf5 };
static const unsigned char u19[] = {
0x00, 0x0, 0x1c, 0x20, 0x0, 0x2b,
0x30, 0x0, 0x3b, 0x40, 0x44, 0x0, 0x6d,
0x70, 0x0, 0x74, 0x80, 0x0, 0xab,
0xb0, 0x0, 0xc9, 0xd0, 0x0, 0xda,
0xde, 0x0, 0xff };
static const unsigned char u1a[] = {
0x00, 0x0, 0x1b, 0x1e, 0x0, 0x5e,
0x60, 0x0, 0x7c, 0x7f, 0x0, 0x89,
0x90, 0x0, 0x99, 0xa0, 0x0, 0xad };
static const unsigned char u1b[] = {
0x00, 0x0, 0x4b, 0x50, 0x0, 0x7c,
0x80, 0x0, 0xaa, 0xae, 0x0, 0xb9 };
static const unsigned char u1c[] = {
0x00, 0x0, 0x37, 0x3b, 0x0, 0x49,
0x4d, 0x0, 0x7f, 0xd0, 0x0, 0xf2 };
static const unsigned char u1d[] = {
0x00, 0x0, 0xe6, 0xfd, 0x0, 0xff };
/* u1e is all-print */
static const unsigned char u1f[] = {
0x00, 0x0,
0x15, 0x18, 0x0, 0x1d, 0x20, 0x0, 0x45, 0x48,
0x0, 0x4d, 0x50, 0x0, 0x57, 0x59, 0x5b, 0x5d,
0x5f, 0x0, 0x7d, 0x80, 0x0, 0xb4, 0xb6, 0x0,
0xc4, 0xc6, 0x0, 0xd3, 0xd6, 0x0, 0xdb, 0xdd,
0x0, 0xef, 0xf2, 0x0, 0xf4, 0xf6, 0x0, 0xfe };
static const unsigned char u20[] = {
0x00, 0x0, 0x27, 0x2a, 0x0, 0x64,
0x6a, 0x0, 0x71, 0x74, 0x0, 0x8e,
0x90, 0x0, 0x94, 0xa0, 0x0, 0xb8,
0xd0, 0x0, 0xf0 };
static const unsigned char u21[] = {
0x00, 0x0, 0x89, 0x90, 0x0, 0xff };
/* u22 is all-print */
static const unsigned char u23[] = {
0x00, 0x0, 0xe8 };
static const unsigned char u24[] = {
0x00, 0x0, 0x26, 0x40, 0x0, 0x4a,
0x60, 0x0, 0xff };
/* u25 is all-print */
static const unsigned char u26[] = {
0x00, 0x0, 0xcd, 0xcf, 0x0, 0xe1,
0xe3, 0xe8, 0x0, 0xff };
static const unsigned char u27[] = {
0x01, 0x0, 0x04, 0x06, 0x0, 0x09,
0x0c, 0x0, 0x27, 0x29, 0x0, 0x4b, 0x4d,
0x4f, 0x0, 0x52, 0x56, 0x0, 0x5e,
0x61, 0x0, 0x94, 0x98, 0x0, 0xaf,
0xb1, 0x0, 0xbe, 0xc0, 0x0, 0xca, 0xcc,
0xd0, 0x0, 0xff };
/* u28 to u2a are all-print */
static const unsigned char u2b[] = {
0x00, 0x0, 0x4c, 0x50, 0x0, 0x59 };
static const unsigned char u2c[] = {
0x00, 0x0, 0x2e, 0x30, 0x0, 0x5e,
0x60, 0x0, 0xf1, 0xf9, 0x0, 0xff };
static const unsigned char u2d[] = {
0x00, 0x0, 0x25, 0x30, 0x0, 0x65, 0x6f,
0x80, 0x0, 0x96, 0xa0, 0x0, 0xa6,
0xa8, 0x0, 0xae, 0xb0, 0x0, 0xb6,
0xb8, 0x0, 0xbe, 0xc0, 0x0, 0xc6,
0xc8, 0x0, 0xce, 0xd0, 0x0, 0xd6,
0xd8, 0x0, 0xde, 0xe0, 0x0, 0xff };
static const unsigned char u2e[] = {
0x00, 0x0, 0x31, 0x80, 0x0, 0x99,
0x9b, 0x0, 0xf3 };
static const unsigned char u2f[] = {
0x00, 0x0, 0xd5, 0xf0, 0x0, 0xfb };
static const unsigned char u30[] = {
0x00, 0x0,
0x3f, 0x41, 0x0, 0x96, 0x99, 0x0, 0xff };
static const unsigned char u31[] = {
0x05, 0x0, 0x2d, 0x31, 0x0, 0x8e,
0x90, 0x0, 0xb7, 0xc0, 0x0, 0xe3,
0xf0, 0x0, 0xff };
static const unsigned char u32[] = {
0x00, 0x0, 0x1e, 0x20, 0x0, 0xfe };
/* u33 to u4c is all-print */
static const unsigned char u4d[] = {
0x00, 0x0, 0xb5, 0xc0, 0x0, 0xff };
/* u4e to u9e is all-print */
static const unsigned char u9f[] = {
0x00, 0x0, 0xcb };
/* ua0 to ua3 is all-print */
static const unsigned char ua4[] = {
0x00, 0x0, 0x8c, 0x90, 0x0, 0xc6,
0xd0, 0x0, 0xff };
/* ua5 is all-print */
static const unsigned char ua6[] = {
0x00, 0x0, 0x2b, 0x40, 0x0, 0x5f,
0x62, 0x0, 0x73, 0x7c, 0x0, 0x97,
0xa0, 0x0, 0xf7 };
static const unsigned char ua7[] = {
0x00, 0x0, 0x8c, 0xfb, 0x0, 0xff };
static const unsigned char ua8[] = {
0x00, 0x0, 0x2b, 0x30, 0x0, 0x39,
0x40, 0x0, 0x77, 0x80, 0x0, 0xc4,
0xce, 0x0, 0xd9, 0xe0, 0x0, 0xfb };
static const unsigned char ua9[] = {
0x00, 0x0, 0x53, 0x5f, 0x0, 0x7c,
0x80, 0x0, 0xcd, 0xcf, 0x0, 0xd9,
0xde, 0xdf };
static const unsigned char uaa[] = {
0x00, 0x0, 0x36, 0x40, 0x0, 0x4d,
0x50, 0x0, 0x59, 0x5c, 0x0, 0x7b,
0x80, 0x0, 0xc2, 0xdb, 0x0, 0xdf };
static const unsigned char uab[] = {
0xc0, 0x0, 0xed, 0xf0, 0x0, 0xf9 };
/* uac to ud6 is all-print */
static const unsigned char ud7[] = {
0x00, 0x0, 0xa3, 0xb0, 0x0, 0xc6,
0xcb, 0x0, 0xfb };
/* ud8 to udf are UTF-16 surrogates, non-printable */
/* ue0 to uf9 is all-print */
static const unsigned char ufa[] = {
0x00, 0x0, 0x2d, 0x30, 0x0, 0x6d,
0x70, 0x0, 0xd9 };
static const unsigned char ufb[] = {
0x00, 0x0, 0x06, 0x13, 0x0, 0x17,
0x1d, 0x0, 0x36, 0x38, 0x0, 0x3c,
0x3e, 0x40, 0x41, 0x43, 0x44,
0x46, 0x0, 0xb1, 0xd3, 0x0, 0xff };
/* ufc is all-print */
static const unsigned char ufd[] = {
0x00, 0x0, 0x3f, 0x50, 0x0, 0x8f,
0x92, 0x0, 0xc7, 0xf0, 0x0, 0xfd };
static const unsigned char ufe[] = {
0x00, 0x0, 0x19, 0x20, 0x0, 0x26,
0x30, 0x0, 0x52, 0x54, 0x0, 0x66,
0x68, 0x0, 0x6b, 0x70, 0x0, 0x74,
0x76, 0x0, 0xfc, 0xff };
static const unsigned char uff[] = {
0x01, 0x0, 0xbe, 0xc2, 0x0, 0xc7, 0xca, 0x0,
0xcf, 0xd2, 0x0, 0xd7, 0xda, 0x0, 0xdc, 0xe0,
0x0, 0xe6, 0xe8, 0x0, 0xee, 0xf9, 0x0, 0xfd };
static const unsigned char u100[] = {
0x00, 0x0, 0x0b, 0x0d, 0x0, 0x26,
0x28, 0x0, 0x3a, 0x3c, 0x3d, 0x3f, 0x0, 0x4d,
0x50, 0x0, 0x5d, 0x80, 0x0, 0xfa };
static const unsigned char u101[] = {
0x00, 0x0, 0x02, 0x07, 0x0, 0x33,
0x37, 0x0, 0x8a, 0x90, 0x0, 0x9b,
0xd0, 0x0, 0xfd };
static const unsigned char u102[] = {
0x80, 0x0, 0x9c, 0xa0, 0x0, 0xd0 };
static const unsigned char u103[] = {
0x00, 0x0, 0x1e, 0x20, 0x0, 0x23,
0x30, 0x0, 0x4a, 0x80, 0x0, 0x9d,
0x9f, 0x0, 0xc3, 0xc8, 0x0, 0xd5 };
static const unsigned char u104[] = {
0x00, 0x0, 0x9d, 0xa0, 0x0, 0xa9 };
static const unsigned char u108[] = {
0x00, 0x0, 0x05, 0x08, 0x0a, 0x0, 0x35,
0x37, 0x38, 0x3c, 0x3f, 0x0, 0x55,
0x57, 0x0, 0x5f };
static const unsigned char u109[] = {
0x00, 0x0, 0x1b, 0x1f, 0x0, 0x39, 0x3f };
static const unsigned char u10a[] = {
0x00, 0x0, 0x03, 0x05, 0x06, 0x0c, 0x0, 0x13,
0x15, 0x0, 0x17, 0x19, 0x0, 0x33,
0x38, 0x0, 0x3a, 0x3f, 0x0, 0x47,
0x50, 0x0, 0x58, 0x60, 0x0, 0x7f };
static const unsigned char u10b[] = {
0x00, 0x0, 0x35, 0x39, 0x0, 0x55,
0x58, 0x0, 0x72, 0x78, 0x0, 0x7f };
static const unsigned char u10c[] = {
0x00, 0x0, 0x48 };
static const unsigned char u10e[] = {
0x60, 0x0, 0x7e };
static const unsigned char u110[] = {
0x80, 0x0, 0xc1 };
/* u120 to u122 is all-print */
static const unsigned char u123[] = {
0x00, 0x0, 0x6e };
static const unsigned char u124[] = {
0x00, 0x0, 0x62, 0x70, 0x0, 0x73 };
/* u130 to u133 is all-print */
static const unsigned char u134[] = {
0x00, 0x0, 0x2e };
static const unsigned char u1d0[] = {
0x00, 0x0, 0xf5 };
static const unsigned char u1d1[] = {
0x00, 0x0, 0x26, 0x29, 0x0, 0xdd };
static const unsigned char u1d2[] = {
0x00, 0x0, 0x45 };
static const unsigned char u1d3[] = {
0x00, 0x0, 0x56, 0x60, 0x0, 0x71 };
static const unsigned char u1d4[] = {
0x00, 0x0, 0x54, 0x56, 0x0, 0x9c, 0x9e, 0x0,
0x9f, 0xa2, 0xa5, 0x0, 0xa6, 0xa9, 0x0, 0xac,
0xae, 0x0, 0xb9, 0xbb, 0xbd, 0x0, 0xc3,
0xc5, 0x0, 0xff };
static const unsigned char u1d5[] = {
0x00, 0x0, 0x05, 0x07, 0x0, 0x0a,
0x0d, 0x0, 0x14, 0x16, 0x0, 0x1c, 0x1e, 0x0,
0x39, 0x3b, 0x0, 0x3e, 0x40, 0x0, 0x44, 0x46,
0x4a, 0x0, 0x50, 0x52, 0x0, 0xff };
static const unsigned char u1d6[] = {
0x00, 0x0, 0xa5, 0xa8, 0x0, 0xff };
static const unsigned char u1d7[] = {
0x00, 0x0, 0xcb, 0xce, 0x0, 0xff };
static const unsigned char u1f0[] = {
0x00, 0x0, 0x2b, 0x30, 0x0, 0x93 };
static const unsigned char u1f1[] = {
0x00, 0x0, 0x0a, 0x10, 0x0, 0x2e,
0x31, 0x3d, 0x3f, 0x42, 0x46, 0x4a, 0x0, 0x4e,
0x57, 0x5f, 0x79, 0x7b, 0x7c, 0x7f, 0x8a, 0x0,
0x8c, 0x8d, 0x90 };
static const unsigned char u1f2[] = {
0x00, 0x10, 0x0, 0x31, 0x40, 0x0, 0x48 };
/* u200 to u2a5 is all-print */
static const unsigned char u2a6[] = {
0x00, 0x0, 0xd6 };
/* u2a7 to u2b6 is all-print */
static const unsigned char u2b7[] = {
0x00, 0x0, 0x34 };
/* u2f8 to u2f9 is all-print */
static const unsigned char u2fa[] = {
0x00,
0x0, 0x1d };
static const unsigned char ue00[] = {
0x01, 0x20, 0x0, 0x7f };
static const unsigned char ue01[] = {
0x00, 0x0, 0xef };
/* uf00 to uffe is all-print */
static const unsigned char ufff[] = {
0x00, 0x0, 0xfd };
/* u1000 to u10fe is all-print */
static const unsigned char u10ff[] = {
0x00, 0x0, 0xfd };