2002-09-27 Jeff Johnston <jjohnstn@redhat.com>

* libc/ctype/jp2uc.c: Change to use multiple arrays in jp2uc.h.
        Also convert to EUCJP before using arrays.  For values not in
        the conversion arrays, return WEOF.
        * libc/ctype/jp2uc.h: Change from one array to a number of
        arrays to account for the fact that the originating table
        is not contiguous for the input values since some are invalid.
This commit is contained in:
Jeff Johnston 2002-09-27 20:17:52 +00:00
parent 8921a7a7a3
commit 1ed3c86dd7
3 changed files with 292 additions and 260 deletions

View File

@ -1,3 +1,12 @@
2002-09-27 Jeff Johnston <jjohnstn@redhat.com>
* libc/ctype/jp2uc.c: Change to use multiple arrays in jp2uc.h.
Also convert to EUCJP before using arrays. For values not in
the conversion arrays, return WEOF.
* libc/ctype/jp2uc.h: Change from one array to a number of
arrays to account for the fact that the originating table
is not contiguous for the input values since some are invalid.
2002-09-24 Jeff Johnston <jjohnstn@redhat.com>
* libc/time/ctime.c: Fix prototype documentation.

View File

@ -40,47 +40,104 @@ wint_t
_DEFUN (__jp2uc, (c, type), wint_t c _AND int type)
{
int index, adj;
unsigned char byte1, byte2, adj_byte1, adj_byte2;
unsigned char byte1, byte2;
wint_t ret;
/* we actually use a table of JIS to Unicode. For SJIS, we simply
note that SJIS is essentially JIS with the top bits on in each
byte. For EUCJP, we essentially do a translation to JIS before
accessing the table. */
/* we actually use tables of EUCJP to Unicode. For JIS, we simply
note that EUCJP is essentially JIS with the top bits on in each
byte and translate to EUCJP. For SJIS, we do a translation to EUCJP before
accessing the tables. */
switch (type)
{
case JP_JIS:
index = ((c >> 8) - 0x21) * 0xfe + ((c & 0xff) - 0x21);
break;
case JP_SJIS:
index = ((c >> 8) - 0xa1) * 0xfe + ((c & 0xff) - 0xa1);
byte1 = (c >> 8) + 0x80;
byte2 = (c & 0xff) + 0x80;
break;
case JP_EUCJP:
byte1 = (c >> 8);
byte2 = (c & 0xff);
break;
case JP_SJIS:
byte1 = c >> 8;
byte2 = c & 0xff;
if (byte2 <= 0x7e || (byte2 & 0x1))
if (byte2 <= 0x9e)
{
adj = -0x22;
adj_byte2 = (byte2 & 0xfe) - 31;
adj = 0xa1 - 0x22;
byte2 = (byte2 - 31) + 0xa1;
}
else
{
adj = -0x21;
adj_byte2 = byte2 - (0x7e + 0x21);
adj = 0xa1 - 0x21;
byte2 = (byte2 - 126) + 0xa1;
}
if (byte1 <= 0x9f)
adj_byte1 = ((byte1 - 112) >> 1) + adj;
byte1 = ((byte1 - 112) << 1) + adj;
else
adj_byte1 = ((byte1 - 112) >> 1) + adj;
index = adj_byte1 * 0xfe + adj_byte2;
byte1 = ((byte1 - 176) << 1) + adj;
break;
default:
return WEOF;
}
if (index < 0 || index > (sizeof(jp2uc) / sizeof(unsigned short)))
return WEOF;
return (wint_t)jp2uc[index];
/* find conversion in jp2uc arrays */
/* handle larger ranges first */
if (byte1 >= 0xb0 && byte1 <= 0xcf && c <= 0xcfd3)
{
index = (byte1 - 0xb0) * 0xfe + (byte2 - 0xa1);
return b02cf[index];
}
else if (byte1 >= 0xd0 && byte1 <= 0xf4 && c <= 0xf4a6)
{
index = (byte1 - 0xd0) * 0xfe + (byte2 - 0xa1);
return d02f4[index];
}
/* handle smaller ranges here */
switch (byte1)
{
case 0xA1:
return (wint_t)a1[byte2 - 0xa1];
case 0xA2:
ret = a2[byte2 - 0xa1];
if (ret != 0)
return (wint_t)ret;
break;
case 0xA3:
if (a3[byte2 - 0xa1])
return (wint_t)(0xff00 + (byte2 - 0xa0));
break;
case 0xA4:
if (byte2 <= 0xf3)
return (wint_t)(0x3000 + (byte2 - 0x60));
break;
case 0xA5:
if (byte2 <= 0xf6)
return (wint_t)(0x3000 + byte2);
break;
case 0xA6:
ret = 0;
if (byte2 <= 0xd8)
ret = (wint_t)a6[byte2 - 0xa1];
if (ret != 0)
return ret;
break;
case 0xA7:
ret = 0;
if (byte2 <= 0xf1)
ret = (wint_t)a7[byte2 - 0xa1];
if (ret != 0)
return ret;
break;
case 0xA8:
if (byte2 <= 0xc0)
return (wint_t)a8[byte2 - 0xa1];
break;
default:
return WEOF;
}
return WEOF;
}
#endif /* MB_CAPABLE */

View File

@ -1,6 +1,7 @@
/* based on eucjp-208A.txt */
static unsigned short jp2uc[] = {
/* a1 is contiguous from a1a1 to a1fe */
static unsigned short a1[] = {
0x3000,
0x3001,
0x3002,
@ -94,7 +95,11 @@ static unsigned short jp2uc[] = {
0x25CB,
0x25CF,
0x25CE,
0x25C7,
0x25C7
};
/* a2 has a number of holes between a2a1 and a2fe which we fill with 0x0000 */
static unsigned short a2[] = {
0x25C6,
0x25A1,
0x25A0,
@ -109,6 +114,17 @@ static unsigned short jp2uc[] = {
0x2191,
0x2193,
0x3013,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x2208,
0x220B,
0x2286,
@ -117,6 +133,14 @@ static unsigned short jp2uc[] = {
0x2283,
0x222A,
0x2229,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x2227,
0x2228,
0x00AC,
@ -124,6 +148,17 @@ static unsigned short jp2uc[] = {
0x21D4,
0x2200,
0x2203,
0x2229,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x2220,
0x22A5,
0x2312,
@ -139,6 +174,13 @@ static unsigned short jp2uc[] = {
0x2235,
0x222B,
0x222C,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x212B,
0x2030,
0x266F,
@ -147,238 +189,118 @@ static unsigned short jp2uc[] = {
0x2020,
0x2021,
0x00B6,
0x25EF,
0xFF10,
0xFF11,
0xFF12,
0xFF13,
0xFF14,
0xFF15,
0xFF16,
0xFF17,
0xFF18,
0xFF19,
0xFF21,
0xFF22,
0xFF23,
0xFF24,
0xFF25,
0xFF26,
0xFF27,
0xFF28,
0xFF29,
0xFF2A,
0xFF2B,
0xFF2C,
0xFF2D,
0xFF2E,
0xFF2F,
0xFF30,
0xFF31,
0xFF32,
0xFF33,
0xFF34,
0xFF35,
0xFF36,
0xFF37,
0xFF38,
0xFF39,
0xFF3A,
0xFF41,
0xFF42,
0xFF43,
0xFF44,
0xFF45,
0xFF46,
0xFF47,
0xFF48,
0xFF49,
0xFF4A,
0xFF4B,
0xFF4C,
0xFF4D,
0xFF4E,
0xFF4F,
0xFF50,
0xFF51,
0xFF52,
0xFF53,
0xFF54,
0xFF55,
0xFF56,
0xFF57,
0xFF58,
0xFF59,
0xFF5A,
0x3041,
0x3042,
0x3043,
0x3044,
0x3045,
0x3046,
0x3047,
0x3048,
0x3049,
0x304A,
0x304B,
0x304C,
0x304D,
0x304E,
0x304F,
0x3050,
0x3051,
0x3052,
0x3053,
0x3054,
0x3055,
0x3056,
0x3057,
0x3058,
0x3059,
0x305A,
0x305B,
0x305C,
0x305D,
0x305E,
0x305F,
0x3060,
0x3061,
0x3062,
0x3063,
0x3064,
0x3065,
0x3066,
0x3067,
0x3068,
0x3069,
0x306A,
0x306B,
0x306C,
0x306D,
0x306E,
0x306F,
0x3070,
0x3071,
0x3072,
0x3073,
0x3074,
0x3075,
0x3076,
0x3077,
0x3078,
0x3079,
0x307A,
0x307B,
0x307C,
0x307D,
0x307E,
0x307F,
0x3080,
0x3081,
0x3082,
0x3083,
0x3084,
0x3085,
0x3086,
0x3087,
0x3088,
0x3089,
0x308A,
0x308B,
0x308C,
0x308D,
0x308E,
0x308F,
0x3090,
0x3091,
0x3092,
0x3093,
0x30A1,
0x30A2,
0x30A3,
0x30A4,
0x30A5,
0x30A6,
0x30A7,
0x30A8,
0x30A9,
0x30AA,
0x30AB,
0x30AC,
0x30AD,
0x30AE,
0x30AF,
0x30B0,
0x30B1,
0x30B2,
0x30B3,
0x30B4,
0x30B5,
0x30B6,
0x30B7,
0x30B8,
0x30B9,
0x30BA,
0x30BB,
0x30BC,
0x30BD,
0x30BE,
0x30BF,
0x30C0,
0x30C1,
0x30C2,
0x30C3,
0x30C4,
0x30C5,
0x30C6,
0x30C7,
0x30C8,
0x30C9,
0x30CA,
0x30CB,
0x30CC,
0x30CD,
0x30CE,
0x30CF,
0x30D0,
0x30D1,
0x30D2,
0x30D3,
0x30D4,
0x30D5,
0x30D6,
0x30D7,
0x30D8,
0x30D9,
0x30DA,
0x30DB,
0x30DC,
0x30DD,
0x30DE,
0x30DF,
0x30E0,
0x30E1,
0x30E2,
0x30E3,
0x30E4,
0x30E5,
0x30E6,
0x30E7,
0x30E8,
0x30E9,
0x30EA,
0x30EB,
0x30EC,
0x30ED,
0x30EE,
0x30EF,
0x30F0,
0x30F1,
0x30F2,
0x30F3,
0x30F4,
0x30F5,
0x30F6,
0x222C,
0x0000,
0x0000,
0x0000,
0x25EF
};
/* a3a1 to a3fe is mostly contiguous. Conversion output values are
of the form 0xFFxx where xx is (yy - 0xA0) where the input is 0xA3yy */
static unsigned char a3[] = {
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
0,
0,
0,
0,
0,
0,
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
0,
0,
0,
0,
0,
0,
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
};
/* a4 is contiguous from a4a1 to a4f3 */
/* transform = 0x30xx where xx = last byte - 0x60 */
/* a5 is contiguous from a5a1 to a5f6 */
/* transform = 0x30xx where xx = last byte */
/* a6 is mostly contiguous from a6a1 to a6d8 */
static unsigned short a6[] = {
0x0391,
0x0392,
0x0393,
@ -403,6 +325,14 @@ static unsigned short jp2uc[] = {
0x03A7,
0x03A8,
0x03A9,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x03B1,
0x03B2,
0x03B3,
@ -426,7 +356,11 @@ static unsigned short jp2uc[] = {
0x03C6,
0x03C7,
0x03C8,
0x03C9,
0x03C9
};
/* a7 is mostly contiguous from a7a1 to a7f1 */
static unsigned short a7[] = {
0x0410,
0x0411,
0x0412,
@ -460,6 +394,21 @@ static unsigned short jp2uc[] = {
0x042D,
0x042E,
0x042F,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0000,
0x0430,
0x0431,
0x0432,
@ -492,7 +441,11 @@ static unsigned short jp2uc[] = {
0x044C,
0x044D,
0x044E,
0x044F,
0x044F
};
/* a8 is contiguous from a8a1 to a8c0 */
static unsigned short a8[] = {
0x2500,
0x2502,
0x250C,
@ -524,7 +477,13 @@ static unsigned short jp2uc[] = {
0x2530,
0x2525,
0x2538,
0x2542,
0x2542
};
/* no conversion a9 to af */
/* b0a1 to cfd3 is contiguous except for illegal sequences with 0xfe */
static unsigned short b02cf[] = {
0x4E9C,
0x5516,
0x5A03,
@ -3489,7 +3448,12 @@ static unsigned short jp2uc[] = {
0x6900,
0x6E7E,
0x7897,
0x8155,
0x8155
};
/* d0a1 to f4a6 is contiguous */
static unsigned short d02f4[] = {
0x5F0C,
0x4E10,
0x4E15,
@ -6879,5 +6843,7 @@ static unsigned short jp2uc[] = {
0x9059,
0x7464,
0x51DC,
0x7199
0x7199
};
/* f5 to fe is non-existent */