From d16a56306d63b4d94412b479a8ea83463a3514ab Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Wed, 20 Jul 2016 22:05:59 +0200 Subject: [PATCH] Consolidate wctomb/mbtowc calls for POSIX-1.2008 - Remove charset parameter from low level __foo_wctomb/__foo_mbtowc calls. - Instead, create array of function for ISO and Windows codepages to point to function which does not require to evaluate the charset string on each call. Create matching helper functions. I.e., __iso_wctomb, __iso_mbtowc, __cp_wctomb and __cp_mbtowc are functions returning the right function pointer now. - Create __WCTOMB/__MBTOWC macros utilizing per-reent locale and replace calls to __wctomb/__mbtowc with calls to __WCTOMB/__MBTOWC. - Drop global __wctomb/__mbtowc vars. - Utilize aforementioned changes in Cygwin to get rid of charset in other, calling functions and simplify the code. - In Cygwin restrict global cygheap locale info to the job performed by internal_setlocale. Use UTF-8 instead of ASCII on the fly in internal conversion functions. - In Cygwin dll_entry, make sure to initialize a TLS area with a NULL _REENT->_locale pointer. Add comment to explain why. Signed-off by: Corinna Vinschen --- newlib/libc/ctype/isprint.c | 2 +- newlib/libc/ctype/local.h | 3 +- newlib/libc/locale/locale.c | 79 +++--- newlib/libc/locale/nl_langinfo.c | 2 - newlib/libc/locale/setlocale.h | 44 +++- newlib/libc/stdio/vfprintf.c | 7 +- newlib/libc/stdio/vfscanf.c | 3 +- newlib/libc/stdlib/btowc.c | 3 +- newlib/libc/stdlib/local.h | 71 +++--- newlib/libc/stdlib/mblen.c | 2 +- newlib/libc/stdlib/mblen_r.c | 2 +- newlib/libc/stdlib/mbrtowc.c | 8 +- newlib/libc/stdlib/mbstowcs_r.c | 2 +- newlib/libc/stdlib/mbtowc.c | 2 +- newlib/libc/stdlib/mbtowc_r.c | 402 +++++++++++++++++++++++++++--- newlib/libc/stdlib/sb_charsets.c | 34 ++- newlib/libc/stdlib/wcrtomb.c | 8 +- newlib/libc/stdlib/wcsnrtombs.c | 4 +- newlib/libc/stdlib/wcstombs_r.c | 4 +- newlib/libc/stdlib/wctob.c | 3 +- newlib/libc/stdlib/wctomb.c | 3 +- newlib/libc/stdlib/wctomb_r.c | 359 +++++++++++++++++++++++--- newlib/libc/time/strptime.c | 2 +- winsup/cygwin/cygheap.cc | 4 +- winsup/cygwin/cygheap.h | 2 - winsup/cygwin/fhandler.h | 2 +- winsup/cygwin/fhandler_console.cc | 35 +-- winsup/cygwin/init.cc | 9 + winsup/cygwin/nlsfuncs.cc | 95 +++---- winsup/cygwin/strfuncs.cc | 60 +++-- winsup/cygwin/wchar.h | 40 ++- 31 files changed, 941 insertions(+), 355 deletions(-) diff --git a/newlib/libc/ctype/isprint.c b/newlib/libc/ctype/isprint.c index 87254f4bc..172a2333b 100644 --- a/newlib/libc/ctype/isprint.c +++ b/newlib/libc/ctype/isprint.c @@ -56,6 +56,6 @@ _DEFUN(isgraph,(c),int c) int _DEFUN(isprint,(c),int c) { - return(__ctype_ptr__[c+1] & (_P|_U|_L|_N|_B)); + return(__CTYPE_PTR[c+1] & (_P|_U|_L|_N|_B)); } diff --git a/newlib/libc/ctype/local.h b/newlib/libc/ctype/local.h index 1a16520a7..b3788e218 100644 --- a/newlib/libc/ctype/local.h +++ b/newlib/libc/ctype/local.h @@ -1,6 +1,7 @@ /* wctrans constants */ #include <_ansi.h> +#include "../locale/setlocale.h" /* valid values for wctrans_t */ #define WCT_TOLOWER 1 @@ -20,8 +21,6 @@ #define WC_UPPER 11 #define WC_XDIGIT 12 -extern char *__locale_charset(_NOARGS); - /* internal function to translate JP to Unicode */ #ifdef __CYGWIN__ /* Under Cygwin, the incoming wide character is already given in UTF due diff --git a/newlib/libc/locale/locale.c b/newlib/libc/locale/locale.c index eb08dade5..0430f1f53 100644 --- a/newlib/libc/locale/locale.c +++ b/newlib/libc/locale/locale.c @@ -459,12 +459,11 @@ loadlocale(struct _reent *p, int category) dependent on the cateogry. */ char *locale = NULL; char charset[ENCODING_LEN + 1]; - unsigned long val; + long val = 0; char *end, *c = NULL; int mbc_max; - int (*l_wctomb) (struct _reent *, char *, wchar_t, const char *, mbstate_t *); - int (*l_mbtowc) (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); + wctomb_p l_wctomb; + mbtowc_p l_mbtowc; int cjknarrow = 0; /* Avoid doing everything twice if nothing has changed. */ @@ -674,8 +673,8 @@ restart: *c = '\0'; mbc_max = 1; #ifdef _MB_EXTENDED_CHARSETS_ISO - l_wctomb = __iso_wctomb; - l_mbtowc = __iso_mbtowc; + l_wctomb = __iso_wctomb (val); + l_mbtowc = __iso_mbtowc (val); #else /* !_MB_EXTENDED_CHARSETS_ISO */ l_wctomb = __ascii_wctomb; l_mbtowc = __ascii_mbtowc; @@ -715,8 +714,8 @@ restart: case 1258: mbc_max = 1; #ifdef _MB_EXTENDED_CHARSETS_WINDOWS - l_wctomb = __cp_wctomb; - l_mbtowc = __cp_mbtowc; + l_wctomb = __cp_wctomb (val); + l_mbtowc = __cp_mbtowc (val); #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ l_wctomb = __ascii_wctomb; l_mbtowc = __ascii_mbtowc; @@ -740,15 +739,21 @@ restart: if (*c == '-') ++c; if (*c == 'R' || *c == 'r') - strcpy (charset, "CP20866"); + { + val = 20866; + strcpy (charset, "CP20866"); + } else if (*c == 'U' || *c == 'u') - strcpy (charset, "CP21866"); + { + val = 21866; + strcpy (charset, "CP21866"); + } else FAIL; mbc_max = 1; #ifdef _MB_EXTENDED_CHARSETS_WINDOWS - l_wctomb = __cp_wctomb; - l_mbtowc = __cp_mbtowc; + l_wctomb = __cp_wctomb (val); + l_mbtowc = __cp_mbtowc (val); #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ l_wctomb = __ascii_wctomb; l_mbtowc = __ascii_mbtowc; @@ -786,11 +791,12 @@ restart: ++c; if (strcasecmp (c, "PS")) FAIL; + val = 101; strcpy (charset, "CP101"); mbc_max = 1; #ifdef _MB_EXTENDED_CHARSETS_WINDOWS - l_wctomb = __cp_wctomb; - l_mbtowc = __cp_mbtowc; + l_wctomb = __cp_wctomb (val); + l_mbtowc = __cp_mbtowc (val); #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ l_wctomb = __ascii_wctomb; l_mbtowc = __ascii_mbtowc; @@ -804,11 +810,12 @@ restart: /* PT154 */ if (strcasecmp (charset, "PT154")) FAIL; + val = 102; strcpy (charset, "CP102"); mbc_max = 1; #ifdef _MB_EXTENDED_CHARSETS_WINDOWS - l_wctomb = __cp_wctomb; - l_mbtowc = __cp_mbtowc; + l_wctomb = __cp_wctomb (val); + l_mbtowc = __cp_mbtowc (val); #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ l_wctomb = __ascii_wctomb; l_mbtowc = __ascii_mbtowc; @@ -826,8 +833,8 @@ restart: strcpy (charset, "CP874"); mbc_max = 1; #ifdef _MB_EXTENDED_CHARSETS_WINDOWS - l_wctomb = __cp_wctomb; - l_mbtowc = __cp_mbtowc; + l_wctomb = __cp_wctomb (val); + l_mbtowc = __cp_mbtowc (val); #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */ l_wctomb = __ascii_wctomb; l_mbtowc = __ascii_mbtowc; @@ -859,8 +866,8 @@ restart: #ifdef __CYGWIN__ __mb_cur_max = mbc_max; /* Only for backward compat */ #endif - __wctomb = l_wctomb; - __mbtowc = l_mbtowc; + __global_locale.wctomb = l_wctomb; + __global_locale.mbtowc = l_mbtowc; __set_ctype (NULL, charset); /* Determine the width for the "CJK Ambiguous Width" category of characters. This is used in wcwidth(). Assume single width for @@ -943,16 +950,6 @@ __get_locale_env(struct _reent *p, int category) } #endif /* _MB_CAPABLE */ -char * -_DEFUN_VOID(__locale_charset) -{ -#ifdef __HAVE_LOCALE_INFO__ - return __get_current_ctype_locale ()->codeset; -#else - return __global_locale.ctype_codeset; -#endif -} - int _DEFUN_VOID(__locale_mb_cur_max) { @@ -963,36 +960,16 @@ _DEFUN_VOID(__locale_mb_cur_max) #endif } -char * -_DEFUN_VOID(__locale_msgcharset) -{ -#ifdef __HAVE_LOCALE_INFO__ - return (char *) __get_current_messages_locale ()->codeset; -#else - return (char *) __global_locale.message_codeset; -#endif -} - -int -_DEFUN_VOID(__locale_cjk_lang) -{ -#ifdef __HAVE_LOCALE_INFO__ - return __get_current_locale ()->cjk_lang; -#else - return __global_locale.cjk_lang; -#endif -} - #ifdef __HAVE_LOCALE_INFO__ char * _DEFUN_VOID(__locale_ctype_ptr) { /* Only check if the current thread/reent has a locale. ctype_ptr is unused in __global_locale, rather the global variable __ctype_ptr__ is used. */ + extern char *__ctype_ptr__; return __get_locale_r (_REENT) ? __get_locale_r (_REENT)->ctype_ptr : __ctype_ptr__; } - #endif struct lconv * diff --git a/newlib/libc/locale/nl_langinfo.c b/newlib/libc/locale/nl_langinfo.c index ce6d8d8c2..46e8b0e92 100644 --- a/newlib/libc/locale/nl_langinfo.c +++ b/newlib/libc/locale/nl_langinfo.c @@ -172,8 +172,6 @@ static struct _nl_item_t #define _REL(BASE) ((int)item-BASE) -extern char *__locale_charset (); - char * _DEFUN(nl_langinfo, (item), nl_item item) { diff --git a/newlib/libc/locale/setlocale.h b/newlib/libc/locale/setlocale.h index ffb1cf13e..50797db0c 100644 --- a/newlib/libc/locale/setlocale.h +++ b/newlib/libc/locale/setlocale.h @@ -159,8 +159,7 @@ typedef __uint32_t LCID; struct lc_collate_T { LCID lcid; - int (*mbtowc) (struct _reent *, wchar_t *, const char *, size_t, const char *, - mbstate_t *); + int (*mbtowc) (struct _reent *, wchar_t *, const char *, size_t, mbstate_t *); char codeset[ENCODING_LEN + 1]; }; extern const struct lc_collate_T _C_collate_locale; @@ -169,13 +168,12 @@ extern const struct lc_collate_T _C_collate_locale; struct _thr_locale_t { char categories[_LC_LAST][ENCODING_LEN + 1]; - int (*__wctomb) (struct _reent *, char *, wchar_t, - const char *, mbstate_t *); - int (*__mbtowc) (struct _reent *, wchar_t *, - const char *, size_t, const char *, - mbstate_t *); - char *ctype_ptr; /* Unused in __global_locale */ + int (*wctomb) (struct _reent *, char *, wchar_t, + mbstate_t *); + int (*mbtowc) (struct _reent *, wchar_t *, + const char *, size_t, mbstate_t *); int cjk_lang; + char *ctype_ptr; /* Unused in __global_locale */ #ifndef __HAVE_LOCALE_INFO__ char mb_cur_max[2]; char ctype_codeset[ENCODING_LEN + 1]; @@ -264,6 +262,36 @@ __get_current_collate_locale (void) } #endif +_ELIDABLE_INLINE const char * +__locale_charset (void) +{ +#ifdef __HAVE_LOCALE_INFO__ + return __get_current_ctype_locale ()->codeset; +#else + return __global_locale.ctype_codeset; +#endif +} + +_ELIDABLE_INLINE const char * +__locale_msgcharset (void) +{ +#ifdef __HAVE_LOCALE_INFO__ + return (char *) __get_current_messages_locale ()->codeset; +#else + return (char *) __global_locale.message_codeset; +#endif +} + +_ELIDABLE_INLINE int +__locale_cjk_lang (void) +{ +#ifdef __HAVE_LOCALE_INFO__ + return __get_current_locale ()->cjk_lang; +#else + return __global_locale.cjk_lang; +#endif +} + int __ctype_load_locale (struct _thr_locale_t *, const char *, void *, const char *, int); int __monetary_load_locale (struct _thr_locale_t *, const char *, void *, diff --git a/newlib/libc/stdio/vfprintf.c b/newlib/libc/stdio/vfprintf.c index 6430edf2c..ed92bb2f9 100644 --- a/newlib/libc/stdio/vfprintf.c +++ b/newlib/libc/stdio/vfprintf.c @@ -910,8 +910,8 @@ _DEFUN(_VFPRINTF_R, (data, fp, fmt0, ap), for (;;) { cp = fmt; #ifdef _MB_CAPABLE - while ((n = __mbtowc (data, &wc, fmt, MB_CUR_MAX, - __locale_charset (), &state)) != 0) { + while ((n = __MBTOWC (data, &wc, fmt, MB_CUR_MAX, + &state)) != 0) { if (n < 0) { /* Wave invalid chars through. */ memset (&state, 0, sizeof state); @@ -2079,8 +2079,7 @@ _DEFUN(get_arg, (data, n, fmt, ap, numargs_p, args, arg_type, last_fmt), while (*fmt && n >= numargs) { # ifdef _MB_CAPABLE - while ((nbytes = __mbtowc (data, &wc, fmt, MB_CUR_MAX, - __locale_charset (), &wc_state)) > 0) + while ((nbytes = __MBTOWC (data, &wc, fmt, MB_CUR_MAX, &wc_state)) > 0) { fmt += nbytes; if (wc == '%') diff --git a/newlib/libc/stdio/vfscanf.c b/newlib/libc/stdio/vfscanf.c index e2e08db0e..544d8db7f 100644 --- a/newlib/libc/stdio/vfscanf.c +++ b/newlib/libc/stdio/vfscanf.c @@ -508,8 +508,7 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap), #ifndef _MB_CAPABLE wc = *fmt; #else - nbytes = __mbtowc (rptr, &wc, (char *) fmt, MB_CUR_MAX, - __locale_charset (), &state); + nbytes = __MBTOWC (rptr, &wc, (char *) fmt, MB_CUR_MAX, &state); if (nbytes < 0) { wc = 0xFFFD; /* Unicode replacement character */ nbytes = 1; diff --git a/newlib/libc/stdlib/btowc.c b/newlib/libc/stdlib/btowc.c index 354a1d6e1..7605ce6b6 100644 --- a/newlib/libc/stdlib/btowc.c +++ b/newlib/libc/stdlib/btowc.c @@ -23,8 +23,7 @@ btowc (int c) _REENT_CHECK_MISC(_REENT); - retval = __mbtowc (_REENT, &pwc, (const char *) &b, 1, - __locale_charset (), &mbs); + retval = __MBTOWC (_REENT, &pwc, (const char *) &b, 1, &mbs); if (retval != 0 && retval != 1) return WEOF; diff --git a/newlib/libc/stdlib/local.h b/newlib/libc/stdlib/local.h index aaa32dff5..0b950d2f4 100644 --- a/newlib/libc/stdlib/local.h +++ b/newlib/libc/stdlib/local.h @@ -5,62 +5,59 @@ char * _EXFUN(_gcvt,(struct _reent *, double , int , char *, char, int)); -char *__locale_charset(_NOARGS); +#include "../locale/setlocale.h" #ifndef __machine_mbstate_t_defined #include #endif -extern int (*__wctomb) (struct _reent *, char *, wchar_t, const char *, - mbstate_t *); -int __ascii_wctomb (struct _reent *, char *, wchar_t, const char *, - mbstate_t *); +typedef int wctomb_f (struct _reent *, char *, wchar_t, mbstate_t *); +typedef wctomb_f *wctomb_p; + +wctomb_f __ascii_wctomb; #ifdef _MB_CAPABLE -int __utf8_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); -int __sjis_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); -int __eucjp_wctomb (struct _reent *, char *, wchar_t, const char *, - mbstate_t *); -int __jis_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); -int __iso_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); -int __cp_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +wctomb_f __utf8_wctomb; +wctomb_f __sjis_wctomb; +wctomb_f __eucjp_wctomb; +wctomb_f __jis_wctomb; +wctomb_p __iso_wctomb (int val); +wctomb_p __cp_wctomb (int val); #ifdef __CYGWIN__ -int __gbk_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); -int __kr_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); -int __big5_wctomb (struct _reent *, char *, wchar_t, const char *, mbstate_t *); +wctomb_f __gbk_wctomb; +wctomb_f __kr_wctomb; +wctomb_f __big5_wctomb; #endif #endif -extern int (*__mbtowc) (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); -int __ascii_mbtowc (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); +#define __WCTOMB (__get_current_locale ()->wctomb) + +typedef int mbtowc_f (struct _reent *, wchar_t *, const char *, size_t, + mbstate_t *); +typedef mbtowc_f *mbtowc_p; + +mbtowc_f __ascii_mbtowc; #ifdef _MB_CAPABLE -int __utf8_mbtowc (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); -int __sjis_mbtowc (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); -int __eucjp_mbtowc (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); -int __jis_mbtowc (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); -int __iso_mbtowc (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); -int __cp_mbtowc (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); +mbtowc_f __utf8_mbtowc; +mbtowc_f __sjis_mbtowc; +mbtowc_f __eucjp_mbtowc; +mbtowc_f __jis_mbtowc; +mbtowc_p __iso_mbtowc (int val); +mbtowc_p __cp_mbtowc (int val); #ifdef __CYGWIN__ -int __gbk_mbtowc (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); -int __kr_mbtowc (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); -int __big5_mbtowc (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); +mbtowc_f __gbk_mbtowc; +mbtowc_f __kr_mbtowc; +mbtowc_f __big5_mbtowc; #endif #endif +#define __MBTOWC (__get_current_locale ()->mbtowc) + extern wchar_t __iso_8859_conv[14][0x60]; +int __iso_8859_val_index (int); int __iso_8859_index (const char *); extern wchar_t __cp_conv[][0x80]; +int __cp_val_index (int); int __cp_index (const char *); #endif diff --git a/newlib/libc/stdlib/mblen.c b/newlib/libc/stdlib/mblen.c index 4d9ac38bd..f9fb46466 100644 --- a/newlib/libc/stdlib/mblen.c +++ b/newlib/libc/stdlib/mblen.c @@ -60,7 +60,7 @@ _DEFUN (mblen, (s, n), _REENT_CHECK_MISC(reent); state = &(_REENT_MBLEN_STATE(reent)); - retval = __mbtowc (reent, NULL, s, n, __locale_charset (), state); + retval = __MBTOWC (reent, NULL, s, n, state); if (retval < 0) { state->__count = 0; diff --git a/newlib/libc/stdlib/mblen_r.c b/newlib/libc/stdlib/mblen_r.c index c3b596443..9c1533ec4 100644 --- a/newlib/libc/stdlib/mblen_r.c +++ b/newlib/libc/stdlib/mblen_r.c @@ -57,7 +57,7 @@ _DEFUN (_mblen_r, (r, s, n, state), { #ifdef _MB_CAPABLE int retval; - retval = __mbtowc (r, NULL, s, n, __locale_charset (), state); + retval = __MBTOWC (r, NULL, s, n, state); if (retval < 0) { diff --git a/newlib/libc/stdlib/mbrtowc.c b/newlib/libc/stdlib/mbrtowc.c index 550aaff99..69e3acb62 100644 --- a/newlib/libc/stdlib/mbrtowc.c +++ b/newlib/libc/stdlib/mbrtowc.c @@ -26,9 +26,9 @@ _DEFUN (_mbrtowc_r, (ptr, pwc, s, n, ps), #endif if (s == NULL) - retval = __mbtowc (ptr, NULL, "", 1, __locale_charset (), ps); + retval = __MBTOWC (ptr, NULL, "", 1, ps); else - retval = __mbtowc (ptr, pwc, s, n, __locale_charset (), ps); + retval = __MBTOWC (ptr, pwc, s, n, ps); if (retval == -1) { @@ -63,9 +63,9 @@ _DEFUN (mbrtowc, (pwc, s, n, ps), #endif if (s == NULL) - retval = __mbtowc (reent, NULL, "", 1, __locale_charset (), ps); + retval = __MBTOWC (reent, NULL, "", 1, ps); else - retval = __mbtowc (reent, pwc, s, n, __locale_charset (), ps); + retval = __MBTOWC (reent, pwc, s, n, ps); if (retval == -1) { diff --git a/newlib/libc/stdlib/mbstowcs_r.c b/newlib/libc/stdlib/mbstowcs_r.c index cae1f0693..495049972 100644 --- a/newlib/libc/stdlib/mbstowcs_r.c +++ b/newlib/libc/stdlib/mbstowcs_r.c @@ -18,7 +18,7 @@ _DEFUN (_mbstowcs_r, (reent, pwcs, s, n, state), n = (size_t) 1; /* Value doesn't matter as long as it's not 0. */ while (n > 0) { - bytes = __mbtowc (r, pwcs, t, MB_CUR_MAX, __locale_charset (), state); + bytes = __MBTOWC (r, pwcs, t, MB_CUR_MAX, state); if (bytes < 0) { state->__count = 0; diff --git a/newlib/libc/stdlib/mbtowc.c b/newlib/libc/stdlib/mbtowc.c index 3c7f84b9e..7b8be771c 100644 --- a/newlib/libc/stdlib/mbtowc.c +++ b/newlib/libc/stdlib/mbtowc.c @@ -70,7 +70,7 @@ _DEFUN (mbtowc, (pwc, s, n), _REENT_CHECK_MISC(reent); ps = &(_REENT_MBTOWC_STATE(reent)); - retval = __mbtowc (reent, pwc, s, n, __locale_charset (), ps); + retval = __MBTOWC (reent, pwc, s, n, ps); if (retval < 0) { diff --git a/newlib/libc/stdlib/mbtowc_r.c b/newlib/libc/stdlib/mbtowc_r.c index 986595cfd..116b4d82f 100644 --- a/newlib/libc/stdlib/mbtowc_r.c +++ b/newlib/libc/stdlib/mbtowc_r.c @@ -7,15 +7,6 @@ #include #include "local.h" -int (*__mbtowc) (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *) -#ifdef __CYGWIN__ - /* Cygwin starts up in UTF-8 mode. */ - = __utf8_mbtowc; -#else - = __ascii_mbtowc; -#endif - int _DEFUN (_mbtowc_r, (r, pwc, s, n, state), struct _reent *r _AND @@ -24,16 +15,15 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state), size_t n _AND mbstate_t *state) { - return __mbtowc (r, pwc, s, n, __locale_charset (), state); + return __MBTOWC (r, pwc, s, n, state); } int -_DEFUN (__ascii_mbtowc, (r, pwc, s, n, charset, state), +_DEFUN (__ascii_mbtowc, (r, pwc, s, n, state), struct _reent *r _AND wchar_t *pwc _AND const char *s _AND size_t n _AND - const char *charset _AND mbstate_t *state) { wchar_t dummy; @@ -106,14 +96,9 @@ static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = { #define __state __count #ifdef _MB_EXTENDED_CHARSETS_ISO -int -_DEFUN (__iso_mbtowc, (r, pwc, s, n, charset, state), - struct _reent *r _AND - wchar_t *pwc _AND - const char *s _AND - size_t n _AND - const char *charset _AND - mbstate_t *state) +static int +___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + int iso_idx, mbstate_t *state) { wchar_t dummy; unsigned char *t = (unsigned char *)s; @@ -129,7 +114,6 @@ _DEFUN (__iso_mbtowc, (r, pwc, s, n, charset, state), if (*t >= 0xa0) { - int iso_idx = __iso_8859_index (charset + 9); if (iso_idx >= 0) { *pwc = __iso_8859_conv[iso_idx][*t - 0xa0]; @@ -149,17 +133,145 @@ _DEFUN (__iso_mbtowc, (r, pwc, s, n, charset, state), return 1; } + +static int +__iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, -1, state); +} + +static int +__iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 0, state); +} + +static int +__iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 1, state); +} + +static int +__iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 2, state); +} + +static int +__iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 3, state); +} + +static int +__iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 4, state); +} + +static int +__iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 5, state); +} + +static int +__iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 6, state); +} + +static int +__iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 7, state); +} + +static int +__iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 8, state); +} + +static int +__iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 9, state); +} + +static int +__iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 10, state); +} + +static int +__iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 11, state); +} + +static int +__iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 12, state); +} + +static int +__iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___iso_mbtowc (r, pwc, s, n, 13, state); +} + +static mbtowc_p __iso_8859_mbtowc[17] = { + NULL, + __iso_8859_1_mbtowc, + __iso_8859_2_mbtowc, + __iso_8859_3_mbtowc, + __iso_8859_4_mbtowc, + __iso_8859_5_mbtowc, + __iso_8859_6_mbtowc, + __iso_8859_7_mbtowc, + __iso_8859_8_mbtowc, + __iso_8859_9_mbtowc, + __iso_8859_10_mbtowc, + __iso_8859_11_mbtowc, + NULL, /* No ISO 8859-12 */ + __iso_8859_13_mbtowc, + __iso_8859_14_mbtowc, + __iso_8859_15_mbtowc, + __iso_8859_16_mbtowc +}; + +/* val *MUST* be valid! All checks for validity are supposed to be + performed before calling this function. */ +mbtowc_p +__iso_mbtowc (int val) +{ + return __iso_8859_mbtowc[val]; +} #endif /* _MB_EXTENDED_CHARSETS_ISO */ #ifdef _MB_EXTENDED_CHARSETS_WINDOWS -int -_DEFUN (__cp_mbtowc, (r, pwc, s, n, charset, state), - struct _reent *r _AND - wchar_t *pwc _AND - const char *s _AND - size_t n _AND - const char *charset _AND - mbstate_t *state) +static int +___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + int cp_idx, mbstate_t *state) { wchar_t dummy; unsigned char *t = (unsigned char *)s; @@ -175,7 +287,6 @@ _DEFUN (__cp_mbtowc, (r, pwc, s, n, charset, state), if (*t >= 0x80) { - int cp_idx = __cp_index (charset + 2); if (cp_idx >= 0) { *pwc = __cp_conv[cp_idx][*t - 0x80]; @@ -195,15 +306,233 @@ _DEFUN (__cp_mbtowc, (r, pwc, s, n, charset, state), return 1; } + +static int +__cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 0, state); +} + +static int +__cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 1, state); +} + +static int +__cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 2, state); +} + +static int +__cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 3, state); +} + +static int +__cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 4, state); +} + +static int +__cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 5, state); +} + +static int +__cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 6, state); +} + +static int +__cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 7, state); +} + +static int +__cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 8, state); +} + +static int +__cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 9, state); +} + +static int +__cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 10, state); +} + +static int +__cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 11, state); +} + +static int +__cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 12, state); +} + +static int +__cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 13, state); +} + +static int +__cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 14, state); +} + +static int +__cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 15, state); +} + +static int +__cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 16, state); +} + +static int +__cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 17, state); +} + +static int +__cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 18, state); +} + +static int +__cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 19, state); +} + +static int +__cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 20, state); +} + +static int +__cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 21, state); +} + +static int +__cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 22, state); +} + +static int +__cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 23, state); +} + +static int +__cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 24, state); +} + +static int +__cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, + mbstate_t *state) +{ + return ___cp_mbtowc (r, pwc, s, n, 25, state); +} + +static mbtowc_p __cp_xxx_mbtowc[26] = { + __cp_437_mbtowc, + __cp_720_mbtowc, + __cp_737_mbtowc, + __cp_775_mbtowc, + __cp_850_mbtowc, + __cp_852_mbtowc, + __cp_855_mbtowc, + __cp_857_mbtowc, + __cp_858_mbtowc, + __cp_862_mbtowc, + __cp_866_mbtowc, + __cp_874_mbtowc, + __cp_1125_mbtowc, + __cp_1250_mbtowc, + __cp_1251_mbtowc, + __cp_1252_mbtowc, + __cp_1253_mbtowc, + __cp_1254_mbtowc, + __cp_1255_mbtowc, + __cp_1256_mbtowc, + __cp_1257_mbtowc, + __cp_1258_mbtowc, + __cp_20866_mbtowc, + __cp_21866_mbtowc, + __cp_101_mbtowc, + __cp_102_mbtowc +}; + +/* val *MUST* be valid! All checks for validity are supposed to be + performed before calling this function. */ +mbtowc_p +__cp_mbtowc (int val) +{ + return __cp_xxx_mbtowc[__cp_val_index (val)]; +} #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ int -_DEFUN (__utf8_mbtowc, (r, pwc, s, n, charset, state), +_DEFUN (__utf8_mbtowc, (r, pwc, s, n, state), struct _reent *r _AND wchar_t *pwc _AND const char *s _AND size_t n _AND - const char *charset _AND mbstate_t *state) { wchar_t dummy; @@ -401,12 +730,11 @@ _DEFUN (__utf8_mbtowc, (r, pwc, s, n, charset, state), because the underlying OS requires wchar_t == UTF-16. */ #ifndef __CYGWIN__ int -_DEFUN (__sjis_mbtowc, (r, pwc, s, n, charset, state), +_DEFUN (__sjis_mbtowc, (r, pwc, s, n, state), struct _reent *r _AND wchar_t *pwc _AND const char *s _AND size_t n _AND - const char *charset _AND mbstate_t *state) { wchar_t dummy; @@ -459,12 +787,11 @@ _DEFUN (__sjis_mbtowc, (r, pwc, s, n, charset, state), } int -_DEFUN (__eucjp_mbtowc, (r, pwc, s, n, charset, state), +_DEFUN (__eucjp_mbtowc, (r, pwc, s, n, state), struct _reent *r _AND wchar_t *pwc _AND const char *s _AND size_t n _AND - const char *charset _AND mbstate_t *state) { wchar_t dummy; @@ -543,12 +870,11 @@ _DEFUN (__eucjp_mbtowc, (r, pwc, s, n, charset, state), } int -_DEFUN (__jis_mbtowc, (r, pwc, s, n, charset, state), +_DEFUN (__jis_mbtowc, (r, pwc, s, n, state), struct _reent *r _AND wchar_t *pwc _AND const char *s _AND size_t n _AND - const char *charset _AND mbstate_t *state) { wchar_t dummy; diff --git a/newlib/libc/stdlib/sb_charsets.c b/newlib/libc/stdlib/sb_charsets.c index e668c4b83..38583e572 100644 --- a/newlib/libc/stdlib/sb_charsets.c +++ b/newlib/libc/stdlib/sb_charsets.c @@ -2,8 +2,6 @@ #include #ifdef _MB_CAPABLE -extern char *__locale_charset (); - #ifdef _MB_EXTENDED_CHARSETS_ISO /* Tables for the ISO-8859-x to UTF conversion. The first index into the table is a value computed from the value x (function __iso_8859_index), @@ -674,26 +672,31 @@ __micro_atoi (const char *s) #ifdef _MB_EXTENDED_CHARSETS_ISO int -__iso_8859_index (const char *charset_ext) +__iso_8859_val_index (int val) { - int iso_idx = __micro_atoi (charset_ext); - if (iso_idx >= 2 && iso_idx <= 16) + if (val >= 2 && val <= 16) { - iso_idx -= 2; - if (iso_idx > 10) - --iso_idx; - return iso_idx; + val -= 2; + if (val > 10) + --val; + return (int) val; } return -1; } + +int +__iso_8859_index (const char *charset_ext) +{ + return __iso_8859_val_index (__micro_atoi (charset_ext)); +} #endif /* _MB_EXTENDED_CHARSETS_ISO */ #ifdef _MB_EXTENDED_CHARSETS_WINDOWS int -__cp_index (const char *charset_ext) +__cp_val_index (int val) { - int cp_idx = __micro_atoi (charset_ext); - switch (cp_idx) + int cp_idx; + switch (val) { case 437: cp_idx = 0; @@ -779,5 +782,12 @@ __cp_index (const char *charset_ext) } return cp_idx; } + +int +__cp_index (const char *charset_ext) +{ + int cp_idx = __cp_val_index (__micro_atoi (charset_ext)); +} + #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ #endif /* _MB_CAPABLE */ diff --git a/newlib/libc/stdlib/wcrtomb.c b/newlib/libc/stdlib/wcrtomb.c index 4f3006814..df7741a8c 100644 --- a/newlib/libc/stdlib/wcrtomb.c +++ b/newlib/libc/stdlib/wcrtomb.c @@ -25,9 +25,9 @@ _DEFUN (_wcrtomb_r, (ptr, s, wc, ps), #endif if (s == NULL) - retval = __wctomb (ptr, buf, L'\0', __locale_charset (), ps); + retval = __WCTOMB (ptr, buf, L'\0', ps); else - retval = __wctomb (ptr, s, wc, __locale_charset (), ps); + retval = __WCTOMB (ptr, s, wc, ps); if (retval == -1) { @@ -62,9 +62,9 @@ _DEFUN (wcrtomb, (s, wc, ps), #endif if (s == NULL) - retval = __wctomb (reent, buf, L'\0', __locale_charset (), ps); + retval = __WCTOMB (reent, buf, L'\0', ps); else - retval = __wctomb (reent, s, wc, __locale_charset (), ps); + retval = __WCTOMB (reent, s, wc, ps); if (retval == -1) { diff --git a/newlib/libc/stdlib/wcsnrtombs.c b/newlib/libc/stdlib/wcsnrtombs.c index 99c8bd175..b09f7e19e 100644 --- a/newlib/libc/stdlib/wcsnrtombs.c +++ b/newlib/libc/stdlib/wcsnrtombs.c @@ -138,7 +138,7 @@ _DEFUN (_wcsnrtombs_r, (r, dst, src, nwc, len, ps), { int count = ps->__count; wint_t wch = ps->__value.__wch; - int bytes = __wctomb (r, buff, *pwcs, __locale_charset (), ps); + int bytes = __WCTOMB (r, buff, *pwcs, ps); if (bytes == -1) { r->_errno = EILSEQ; @@ -164,7 +164,7 @@ _DEFUN (_wcsnrtombs_r, (r, dst, src, nwc, len, ps), } else { - /* not enough room, we must back up state to before __wctomb call */ + /* not enough room, we must back up state to before __WCTOMB call */ ps->__count = count; ps->__value.__wch = wch; len = 0; diff --git a/newlib/libc/stdlib/wcstombs_r.c b/newlib/libc/stdlib/wcstombs_r.c index 3bac8a337..0680cd783 100644 --- a/newlib/libc/stdlib/wcstombs_r.c +++ b/newlib/libc/stdlib/wcstombs_r.c @@ -20,7 +20,7 @@ _DEFUN (_wcstombs_r, (reent, s, pwcs, n, state), size_t num_bytes = 0; while (*pwcs != 0) { - bytes = __wctomb (r, buff, *pwcs++, __locale_charset (), state); + bytes = __WCTOMB (r, buff, *pwcs++, state); if (bytes == -1) return -1; num_bytes += bytes; @@ -31,7 +31,7 @@ _DEFUN (_wcstombs_r, (reent, s, pwcs, n, state), { while (n > 0) { - bytes = __wctomb (r, buff, *pwcs, __locale_charset (), state); + bytes = __WCTOMB (r, buff, *pwcs, state); if (bytes == -1) return -1; num_to_copy = (n > bytes ? bytes : (int)n); diff --git a/newlib/libc/stdlib/wctob.c b/newlib/libc/stdlib/wctob.c index eebaec84f..61365a178 100644 --- a/newlib/libc/stdlib/wctob.c +++ b/newlib/libc/stdlib/wctob.c @@ -21,6 +21,5 @@ wctob (wint_t wc) reent = _REENT; _REENT_CHECK_MISC(reent); - return __wctomb (reent, (char *) pmb, wc, __locale_charset (), &mbs) == 1 - ? (int) pmb[0] : EOF; + return __WCTOMB (reent, (char *) pmb, wc, &mbs) == 1 ? (int) pmb[0] : EOF; } diff --git a/newlib/libc/stdlib/wctomb.c b/newlib/libc/stdlib/wctomb.c index 8d4ceb8ad..13892ffa4 100644 --- a/newlib/libc/stdlib/wctomb.c +++ b/newlib/libc/stdlib/wctomb.c @@ -61,8 +61,7 @@ _DEFUN (wctomb, (s, wchar), _REENT_CHECK_MISC(reent); - return __wctomb (reent, s, wchar, __locale_charset (), - &(_REENT_WCTOMB_STATE(reent))); + return __WCTOMB (reent, s, wchar, &(_REENT_WCTOMB_STATE(reent))); #else /* not _MB_CAPABLE */ if (s == NULL) return 0; diff --git a/newlib/libc/stdlib/wctomb_r.c b/newlib/libc/stdlib/wctomb_r.c index c93962fa4..02b85e685 100644 --- a/newlib/libc/stdlib/wctomb_r.c +++ b/newlib/libc/stdlib/wctomb_r.c @@ -6,15 +6,6 @@ #include "mbctype.h" #include "local.h" -int (*__wctomb) (struct _reent *, char *, wchar_t, const char *charset, - mbstate_t *) -#ifdef __CYGWIN__ - /* Cygwin starts up in UTF-8 mode. */ - = __utf8_wctomb; -#else - = __ascii_wctomb; -#endif - int _DEFUN (_wctomb_r, (r, s, wchar, state), struct _reent *r _AND @@ -22,15 +13,14 @@ _DEFUN (_wctomb_r, (r, s, wchar, state), wchar_t _wchar _AND mbstate_t *state) { - return __wctomb (r, s, _wchar, __locale_charset (), state); + return __WCTOMB (r, s, _wchar, state); } int -_DEFUN (__ascii_wctomb, (r, s, wchar, charset, state), +_DEFUN (__ascii_wctomb, (r, s, wchar, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND - const char *charset _AND mbstate_t *state) { /* Avoids compiler warnings about comparisons that are always false @@ -60,11 +50,10 @@ _DEFUN (__ascii_wctomb, (r, s, wchar, charset, state), #define __state __count int -_DEFUN (__utf8_wctomb, (r, s, wchar, charset, state), +_DEFUN (__utf8_wctomb, (r, s, wchar, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND - const char *charset _AND mbstate_t *state) { wint_t wchar = _wchar; @@ -155,11 +144,10 @@ _DEFUN (__utf8_wctomb, (r, s, wchar, charset, state), because the underlying OS requires wchar_t == UTF-16. */ #ifndef __CYGWIN__ int -_DEFUN (__sjis_wctomb, (r, s, wchar, charset, state), +_DEFUN (__sjis_wctomb, (r, s, wchar, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND - const char *charset _AND mbstate_t *state) { wint_t wchar = _wchar; @@ -190,11 +178,10 @@ _DEFUN (__sjis_wctomb, (r, s, wchar, charset, state), } int -_DEFUN (__eucjp_wctomb, (r, s, wchar, charset, state), +_DEFUN (__eucjp_wctomb, (r, s, wchar, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND - const char *charset _AND mbstate_t *state) { wint_t wchar = _wchar; @@ -231,11 +218,10 @@ _DEFUN (__eucjp_wctomb, (r, s, wchar, charset, state), } int -_DEFUN (__jis_wctomb, (r, s, wchar, charset, state), +_DEFUN (__jis_wctomb, (r, s, wchar, state), struct _reent *r _AND char *s _AND wchar_t _wchar _AND - const char *charset _AND mbstate_t *state) { wint_t wchar = _wchar; @@ -282,13 +268,9 @@ _DEFUN (__jis_wctomb, (r, s, wchar, charset, state), #endif /* !__CYGWIN__ */ #ifdef _MB_EXTENDED_CHARSETS_ISO -int -_DEFUN (__iso_wctomb, (r, s, wchar, charset, state), - struct _reent *r _AND - char *s _AND - wchar_t _wchar _AND - const char *charset _AND - mbstate_t *state) +static int +___iso_wctomb (struct _reent *r, char *s, wchar_t _wchar, int iso_idx, + mbstate_t *state) { wint_t wchar = _wchar; @@ -298,7 +280,6 @@ _DEFUN (__iso_wctomb, (r, s, wchar, charset, state), /* wchars <= 0x9f translate to all ISO charsets directly. */ if (wchar >= 0xa0) { - int iso_idx = __iso_8859_index (charset + 9); if (iso_idx >= 0) { unsigned char mb; @@ -326,16 +307,130 @@ _DEFUN (__iso_wctomb, (r, s, wchar, charset, state), *s = (char) wchar; return 1; } + +int __iso_8859_1_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, -1, state); +} + +int __iso_8859_2_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 0, state); +} + +int __iso_8859_3_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 1, state); +} + +int __iso_8859_4_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 2, state); +} + +int __iso_8859_5_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 3, state); +} + +int __iso_8859_6_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 4, state); +} + +int __iso_8859_7_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 5, state); +} + +int __iso_8859_8_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 6, state); +} + +int __iso_8859_9_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 7, state); +} + +int __iso_8859_10_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 8, state); +} + +int __iso_8859_11_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 9, state); +} + +int __iso_8859_13_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 10, state); +} + +int __iso_8859_14_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 11, state); +} + +int __iso_8859_15_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 12, state); +} + +int __iso_8859_16_wctomb (struct _reent *r, char *s, wchar_t _wchar, + mbstate_t *state) +{ + return ___iso_wctomb (r, s, _wchar, 13, state); +} + +static wctomb_p __iso_8859_wctomb[17] = { + NULL, + __iso_8859_1_wctomb, + __iso_8859_2_wctomb, + __iso_8859_3_wctomb, + __iso_8859_4_wctomb, + __iso_8859_5_wctomb, + __iso_8859_6_wctomb, + __iso_8859_7_wctomb, + __iso_8859_8_wctomb, + __iso_8859_9_wctomb, + __iso_8859_10_wctomb, + __iso_8859_11_wctomb, + NULL, /* No ISO 8859-12 */ + __iso_8859_13_wctomb, + __iso_8859_14_wctomb, + __iso_8859_15_wctomb, + __iso_8859_16_wctomb +}; + +/* val *MUST* be valid! All checks for validity are supposed to be + performed before calling this function. */ +wctomb_p +__iso_wctomb (int val) +{ + return __iso_8859_wctomb[val]; +} #endif /* _MB_EXTENDED_CHARSETS_ISO */ #ifdef _MB_EXTENDED_CHARSETS_WINDOWS -int -_DEFUN (__cp_wctomb, (r, s, wchar, charset, state), - struct _reent *r _AND - char *s _AND - wchar_t _wchar _AND - const char *charset _AND - mbstate_t *state) +static int +___cp_wctomb (struct _reent *r, char *s, wchar_t _wchar, int cp_idx, + mbstate_t *state) { wint_t wchar = _wchar; @@ -344,7 +439,6 @@ _DEFUN (__cp_wctomb, (r, s, wchar, charset, state), if (wchar >= 0x80) { - int cp_idx = __cp_index (charset + 2); if (cp_idx >= 0) { unsigned char mb; @@ -372,5 +466,198 @@ _DEFUN (__cp_wctomb, (r, s, wchar, charset, state), *s = (char) wchar; return 1; } + +static int +__cp_437_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 0, state); +} + +static int +__cp_720_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 1, state); +} + +static int +__cp_737_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 2, state); +} + +static int +__cp_775_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 3, state); +} + +static int +__cp_850_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 4, state); +} + +static int +__cp_852_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 5, state); +} + +static int +__cp_855_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 6, state); +} + +static int +__cp_857_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 7, state); +} + +static int +__cp_858_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 8, state); +} + +static int +__cp_862_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 9, state); +} + +static int +__cp_866_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 10, state); +} + +static int +__cp_874_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 11, state); +} + +static int +__cp_1125_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 12, state); +} + +static int +__cp_1250_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 13, state); +} + +static int +__cp_1251_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 14, state); +} + +static int +__cp_1252_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 15, state); +} + +static int +__cp_1253_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 16, state); +} + +static int +__cp_1254_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 17, state); +} + +static int +__cp_1255_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 18, state); +} + +static int +__cp_1256_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 19, state); +} + +static int +__cp_1257_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 20, state); +} + +static int +__cp_1258_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 21, state); +} + +static int +__cp_20866_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 22, state); +} + +static int +__cp_21866_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 23, state); +} + +static int +__cp_101_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 24, state); +} + +static int +__cp_102_wctomb (struct _reent *r, char *s, wchar_t _wchar, mbstate_t *state) +{ + return ___cp_wctomb (r, s, _wchar, 25, state); +} + +static wctomb_p __cp_xxx_wctomb[26] = { + __cp_437_wctomb, + __cp_720_wctomb, + __cp_737_wctomb, + __cp_775_wctomb, + __cp_850_wctomb, + __cp_852_wctomb, + __cp_855_wctomb, + __cp_857_wctomb, + __cp_858_wctomb, + __cp_862_wctomb, + __cp_866_wctomb, + __cp_874_wctomb, + __cp_1125_wctomb, + __cp_1250_wctomb, + __cp_1251_wctomb, + __cp_1252_wctomb, + __cp_1253_wctomb, + __cp_1254_wctomb, + __cp_1255_wctomb, + __cp_1256_wctomb, + __cp_1257_wctomb, + __cp_1258_wctomb, + __cp_20866_wctomb, + __cp_21866_wctomb, + __cp_101_wctomb, + __cp_102_wctomb +}; + +/* val *MUST* be valid! All checks for validity are supposed to be + performed before calling this function. */ +wctomb_p +__cp_wctomb (int val) +{ + return __cp_xxx_wctomb[__cp_val_index (val)]; +} #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ #endif /* _MB_CAPABLE */ diff --git a/newlib/libc/time/strptime.c b/newlib/libc/time/strptime.c index 88b897834..a057450fc 100644 --- a/newlib/libc/time/strptime.c +++ b/newlib/libc/time/strptime.c @@ -68,7 +68,7 @@ is_leap_year (int year) /* Needed for strptime. */ static int -match_string (const char *__restrict *buf, const char **strs) +match_string (const char *__restrict *buf, const char * const*strs) { int i = 0; diff --git a/winsup/cygwin/cygheap.cc b/winsup/cygwin/cygheap.cc index 11f868f01..87a5eb964 100644 --- a/winsup/cygwin/cygheap.cc +++ b/winsup/cygwin/cygheap.cc @@ -28,7 +28,7 @@ static mini_cygheap NO_COPY cygheap_dummy = { - {__utf8_mbtowc, __utf8_wctomb} + {__utf8_mbtowc} }; init_cygheap NO_COPY *cygheap = (init_cygheap *) &cygheap_dummy; @@ -245,8 +245,6 @@ cygheap_init () cygheap->bucket_val[b] = sz[b & 1]; /* Default locale settings. */ cygheap->locale.mbtowc = __utf8_mbtowc; - cygheap->locale.wctomb = __utf8_wctomb; - strcpy (cygheap->locale.charset, "UTF-8"); /* Set umask to a sane default. */ cygheap->umask = 022; cygheap->rlim_core = RLIM_INFINITY; diff --git a/winsup/cygwin/cygheap.h b/winsup/cygwin/cygheap.h index e2807e216..abbf9ec07 100644 --- a/winsup/cygwin/cygheap.h +++ b/winsup/cygwin/cygheap.h @@ -346,8 +346,6 @@ struct cygheap_debug struct cygheap_locale { mbtowc_p mbtowc; - wctomb_p wctomb; - char charset[ENCODING_LEN + 1]; }; struct user_heap_info diff --git a/winsup/cygwin/fhandler.h b/winsup/cygwin/fhandler.h index 3321523ef..c7db8f8e3 100644 --- a/winsup/cygwin/fhandler.h +++ b/winsup/cygwin/fhandler.h @@ -1355,7 +1355,7 @@ class dev_console inline UINT get_console_cp (); DWORD con_to_str (char *d, int dlen, WCHAR w); - DWORD str_to_con (mbtowc_p, const char *, PWCHAR d, const char *s, DWORD sz); + DWORD str_to_con (mbtowc_p, PWCHAR d, const char *s, DWORD sz); void set_color (HANDLE); void set_default_attr (); int set_cl_x (cltype); diff --git a/winsup/cygwin/fhandler_console.cc b/winsup/cygwin/fhandler_console.cc index 76aff0f58..45fe8828b 100644 --- a/winsup/cygwin/fhandler_console.cc +++ b/winsup/cygwin/fhandler_console.cc @@ -225,10 +225,9 @@ dev_console::get_console_cp () } inline DWORD -dev_console::str_to_con (mbtowc_p f_mbtowc, const char *charset, - PWCHAR d, const char *s, DWORD sz) +dev_console::str_to_con (mbtowc_p f_mbtowc, PWCHAR d, const char *s, DWORD sz) { - return sys_cp_mbstowcs (f_mbtowc, charset, d, CONVERT_LIMIT, s, sz); + return sys_cp_mbstowcs (f_mbtowc, d, CONVERT_LIMIT, s, sz); } bool @@ -2002,21 +2001,10 @@ fhandler_console::write_normal (const unsigned char *src, const unsigned char *found = src; size_t ret; mbstate_t ps; - UINT cp = con.get_console_cp (); - const char *charset; mbtowc_p f_mbtowc; - if (cp) - { - /* The alternate charset is always 437, just as in the Linux console. */ - f_mbtowc = __cp_mbtowc; - charset = "CP437"; - } - else - { - f_mbtowc = cygheap->locale.mbtowc; - charset = cygheap->locale.charset; - } + /* The alternate charset is always 437, just as in the Linux console. */ + f_mbtowc = con.get_console_cp () ? __cp_mbtowc (437) : __MBTOWC; /* First check if we have cached lead bytes of a former try to write a truncated multibyte sequence. If so, process it. */ @@ -2027,7 +2015,7 @@ fhandler_console::write_normal (const unsigned char *src, memcpy (trunc_buf.buf + trunc_buf.len, src, cp_len); memset (&ps, 0, sizeof ps); switch (ret = f_mbtowc (_REENT, NULL, (const char *) trunc_buf.buf, - trunc_buf.len + cp_len, charset, &ps)) + trunc_buf.len + cp_len, &ps)) { case -2: /* Still truncated multibyte sequence? Keep in trunc_buf. */ @@ -2052,9 +2040,9 @@ fhandler_console::write_normal (const unsigned char *src, /* Valid multibyte sequence? Process. */ if (nfound) { - buf_len = con.str_to_con (f_mbtowc, charset, write_buf, - (const char *) trunc_buf.buf, - nfound - trunc_buf.buf); + buf_len = con.str_to_con (f_mbtowc, write_buf, + (const char *) trunc_buf.buf, + nfound - trunc_buf.buf); if (!write_console (write_buf, buf_len, done)) { debug_printf ("multibyte sequence write failed, handle %p", get_output_handle ()); @@ -2075,7 +2063,7 @@ fhandler_console::write_normal (const unsigned char *src, && base_chars[*found] == NOR) { switch (ret = f_mbtowc (_REENT, NULL, (const char *) found, - end - found, charset, &ps)) + end - found, &ps)) { case -2: /* Truncated multibyte sequence. Store for next write. */ trunc_buf.len = end - found; @@ -2098,8 +2086,7 @@ do_print: if (found != src) { DWORD len = found - src; - buf_len = con.str_to_con (f_mbtowc, charset, write_buf, - (const char *) src, len); + buf_len = con.str_to_con (f_mbtowc, write_buf, (const char *) src, len); if (!buf_len) { debug_printf ("conversion error, handle %p", @@ -2178,7 +2165,7 @@ do_print: if (found + 1 < end) { ret = __utf8_mbtowc (_REENT, NULL, (const char *) found + 1, - end - found - 1, NULL, &ps); + end - found - 1, &ps); if (ret != (size_t) -1) while (ret-- > 0) { diff --git a/winsup/cygwin/init.cc b/winsup/cygwin/init.cc index 7285e3db6..1728105ef 100644 --- a/winsup/cygwin/init.cc +++ b/winsup/cygwin/init.cc @@ -83,6 +83,15 @@ dll_entry (HANDLE h, DWORD reason, void *static_load) cygwin_hmodule = (HMODULE) h; dynamically_loaded = (static_load == NULL); + /* Starting with adding the POSIX-1.2008 per-thread locale functionality, + we need an initalized _REENT area even for the functions called from + dll_crt0_0. In fact, we only need the _REENT->_locale pointer + initialized to NULL, so subsequent calls to locale-specific functions + will always fall back to __global_locale, rather then crash due to + _REENT->_locale having an arbitrary value. */ + (void) alloca (CYGTLS_PADSIZE); + _REENT->_locale = NULL; + dll_crt0_0 (); _my_oldfunc = TlsAlloc (); dll_finished_loading = true; diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc index 6dde25111..2ba9f3244 100644 --- a/winsup/cygwin/nlsfuncs.cc +++ b/winsup/cygwin/nlsfuncs.cc @@ -16,8 +16,6 @@ details. */ #include "dtable.h" #include "cygheap.h" #include "tls_pbuf.h" -/* Internal headers from newlib */ -#include "../locale/setlocale.h" #include "lc_msg.h" #include "lc_era.h" @@ -31,8 +29,7 @@ details. */ __eval_datetimefmt(lcid,(type),(flags),&lc_time_ptr,\ lc_time_end-lc_time_ptr) #define charfromwchar(category,in) \ - __charfromwchar (_##category##_locale->in,_LC(category),\ - f_wctomb,charset) + __charfromwchar (_##category##_locale->in,_LC(category),f_wctomb) #define has_modifier(x) ((x)[0] && !strcmp (modifier, (x))) @@ -159,8 +156,7 @@ __get_lcid_from_locale (const char *name) is set, s==NULL returns -1 since then it's used to recognize invalid strings in the used charset. */ static size_t -lc_wcstombs (wctomb_p f_wctomb, const char *charset, - char *s, const wchar_t *pwcs, size_t n, +lc_wcstombs (wctomb_p f_wctomb, char *s, const wchar_t *pwcs, size_t n, bool return_invalid = false) { char *ptr = s; @@ -175,7 +171,7 @@ lc_wcstombs (wctomb_p f_wctomb, const char *charset, size_t num_bytes = 0; while (*pwcs != 0) { - bytes = f_wctomb (_REENT, buf, *pwcs++, charset, &state); + bytes = f_wctomb (_REENT, buf, *pwcs++, &state); if (bytes != (size_t) -1) num_bytes += bytes; else if (return_invalid) @@ -185,7 +181,7 @@ lc_wcstombs (wctomb_p f_wctomb, const char *charset, } while (n > 0) { - bytes = f_wctomb (_REENT, buf, *pwcs, charset, &state); + bytes = f_wctomb (_REENT, buf, *pwcs, &state); if (bytes == (size_t) -1) { memset (&state, 0, sizeof state); @@ -207,8 +203,7 @@ lc_wcstombs (wctomb_p f_wctomb, const char *charset, /* Never returns -1. Invalid sequences are translated to replacement wide-chars. */ static size_t -lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset, - wchar_t *pwcs, const char *s, size_t n) +lc_mbstowcs (mbtowc_p f_mbtowc, wchar_t *pwcs, const char *s, size_t n) { size_t ret = 0; char *t = (char *) s; @@ -220,8 +215,7 @@ lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset, n = 1; while (n > 0) { - bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */, - charset, &state); + bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */, &state); if (bytes == (size_t) -1) { state.__count = 0; @@ -294,13 +288,12 @@ __setlocaleinfo (char **ptr, size_t size, wchar_t val) } static char * -__charfromwchar (const wchar_t *in, char **ptr, size_t size, - wctomb_p f_wctomb, const char *charset) +__charfromwchar (const wchar_t *in, char **ptr, size_t size, wctomb_p f_wctomb) { size_t num; char *ret; - num = lc_wcstombs (f_wctomb, charset, ret = *ptr, in, size); + num = lc_wcstombs (f_wctomb, ret = *ptr, in, size); *ptr += num + 1; return ret; } @@ -600,11 +593,11 @@ __set_lc_time_from_win (const char *name, /* Evaluate string length in target charset. Characters invalid in the target charset are simply ignored, as on Linux. */ len = 0; - len += lc_wcstombs (f_wctomb, charset, NULL, era->era, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_fmt, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_t_fmt, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, era->era_t_fmt, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, era->alt_digits, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, era->era, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, era->era_d_fmt, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, era->era_d_t_fmt, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, era->era_t_fmt, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, era->alt_digits, 0) + 1; len += (wcslen (era->era) + 1) * sizeof (wchar_t); len += (wcslen (era->era_d_fmt) + 1) * sizeof (wchar_t); len += (wcslen (era->era_d_t_fmt) + 1) * sizeof (wchar_t); @@ -742,8 +735,7 @@ __set_lc_ctype_from_win (const char *name, lc_ctype_ptr = (char *) woutdig; _ctype_locale->outdigits[i] = lc_ctype_ptr; memset (&state, 0, sizeof state); - lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], charset, - &state); + lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], &state); *lc_ctype_ptr++ = '\0'; } } @@ -885,8 +877,7 @@ __set_lc_monetary_from_win (const char *name, LOCALE_SCURRENCY); /* As on Linux: If the currency_symbol can't be represented in the given charset, use int_curr_symbol. */ - if (lc_wcstombs (f_wctomb, charset, NULL, - _monetary_locale->wcurrency_symbol, + if (lc_wcstombs (f_wctomb, NULL, _monetary_locale->wcurrency_symbol, 0, true) == (size_t) -1) _monetary_locale->currency_symbol = _monetary_locale->int_curr_symbol; else @@ -1026,10 +1017,10 @@ __set_lc_messages_from_win (const char *name, len += (strlen (charset) + 1); if (lcid) { - len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesexpr, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, msg->noexpr, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesstr, 0) + 1; - len += lc_wcstombs (f_wctomb, charset, NULL, msg->nostr, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, msg->yesexpr, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, msg->noexpr, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, msg->yesstr, 0) + 1; + len += lc_wcstombs (f_wctomb, NULL, msg->nostr, 0) + 1; len += (wcslen (msg->yesexpr) + 1) * sizeof (wchar_t); len += (wcslen (msg->noexpr) + 1) * sizeof (wchar_t); len += (wcslen (msg->yesstr) + 1) * sizeof (wchar_t); @@ -1051,13 +1042,13 @@ __set_lc_messages_from_win (const char *name, if (lcid) { _messages_locale->yesexpr = (const char *) c; - len = lc_wcstombs (f_wctomb, charset, c, msg->yesexpr, lc_messages_end - c); + len = lc_wcstombs (f_wctomb, c, msg->yesexpr, lc_messages_end - c); _messages_locale->noexpr = (const char *) (c += len + 1); - len = lc_wcstombs (f_wctomb, charset, c, msg->noexpr, lc_messages_end - c); + len = lc_wcstombs (f_wctomb, c, msg->noexpr, lc_messages_end - c); _messages_locale->yesstr = (const char *) (c += len + 1); - len = lc_wcstombs (f_wctomb, charset, c, msg->yesstr, lc_messages_end - c); + len = lc_wcstombs (f_wctomb, c, msg->yesstr, lc_messages_end - c); _messages_locale->nostr = (const char *) (c += len + 1); - len = lc_wcstombs (f_wctomb, charset, c, msg->nostr, lc_messages_end - c); + len = lc_wcstombs (f_wctomb, c, msg->nostr, lc_messages_end - c); c += len + 1; if ((uintptr_t) c % 1) ++c; @@ -1149,15 +1140,14 @@ strcoll (const char *__restrict s1, const char *__restrict s2) /* The ANSI version of CompareString uses the default charset of the lcid, so we must use the Unicode version. */ mbtowc_p collate_mbtowc = __get_current_collate_locale ()->mbtowc; - const char *collate_charset = __get_current_collate_locale ()->codeset; - n1 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s1, 0) + 1; + n1 = lc_mbstowcs (collate_mbtowc, NULL, s1, 0) + 1; ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t)) : tp.w_get ()); - lc_mbstowcs (collate_mbtowc, collate_charset, ws1, s1, n1); - n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1; + lc_mbstowcs (collate_mbtowc, ws1, s1, n1); + n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1; ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t)) : tp.w_get ()); - lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2); + lc_mbstowcs (collate_mbtowc, ws2, s2, n2); ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1); if (n1 > NT_MAX_PATH) free (ws1); @@ -1226,13 +1216,12 @@ strxfrm (char *__restrict s1, const char *__restrict s2, size_t sn) /* The ANSI version of LCMapString uses the default charset of the lcid, so we must use the Unicode version. */ mbtowc_p collate_mbtowc = __get_current_collate_locale ()->mbtowc; - const char *collate_charset = __get_current_collate_locale ()->codeset; - n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1; + n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1; ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t)) : tp.w_get ()); if (ws2) { - lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2); + lc_mbstowcs (collate_mbtowc, ws2, s2, n2); /* The sort key is a NUL-terminated byte string. */ ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, (PWCHAR) s1, sn); @@ -1474,7 +1463,7 @@ __set_locale_from_locale_alias (const char *locale, char *new_locale) if (strlen (replace) > ENCODING_LEN) continue; /* The file is latin1 encoded */ - lc_mbstowcs (__iso_mbtowc, "ISO-8859-1", walias, alias, ENCODING_LEN + 1); + lc_mbstowcs (__iso_mbtowc (1), walias, alias, ENCODING_LEN + 1); walias[ENCODING_LEN] = L'\0'; if (!wcscmp (wlocale, walias)) { @@ -1503,33 +1492,25 @@ internal_setlocale () wchar_t *w_path = NULL, *w_cwd; /* Don't do anything if the charset hasn't actually changed. */ - if (strcmp (cygheap->locale.charset, __locale_charset ()) == 0) + if (cygheap->locale.mbtowc == __global_locale.mbtowc) return; - debug_printf ("Cygwin charset changed from %s to %s", - cygheap->locale.charset, __locale_charset ()); + debug_printf ("Cygwin charset chang to %s", __locale_charset ()); /* Fetch PATH and CWD and convert to wchar_t in previous charset. */ path = getenv ("PATH"); if (path && *path) /* $PATH can be potentially unset. */ { w_path = tp.w_get (); - sys_mbstowcs (w_path, 32768, path); + sys_cp_mbstowcs (cygheap->locale.mbtowc, w_path, 32768, path); } w_cwd = tp.w_get (); cwdstuff::cwd_lock.acquire (); - sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ()); + sys_cp_mbstowcs (cygheap->locale.mbtowc, w_cwd, 32768, + cygheap->cwd.get_posix ()); /* Set charset for internal conversion functions. */ - if (*__locale_charset () == 'A'/*SCII*/) - { - cygheap->locale.mbtowc = __utf8_mbtowc; - cygheap->locale.wctomb = __utf8_wctomb; - } - else - { - cygheap->locale.mbtowc = __mbtowc; - cygheap->locale.wctomb = __wctomb; - } - strcpy (cygheap->locale.charset, __locale_charset ()); + cygheap->locale.mbtowc = __global_locale.mbtowc; + if (cygheap->locale.mbtowc == __ascii_mbtowc) + cygheap->locale.mbtowc = __utf8_mbtowc; /* Restore CWD and PATH in new charset. */ cygheap->cwd.reset_posix (w_cwd); cwdstuff::cwd_lock.release (); diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 40f2c2945..c962f7cf8 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -140,15 +140,13 @@ __db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp) } extern "C" int -__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, - mbstate_t *state) +__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { return __db_wctomb (r,s, wchar, 932); } extern "C" int -__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, - mbstate_t *state) +__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { /* Unfortunately, the Windows eucJP codepage 20932 is not really 100% compatible to eucJP. It's a cute approximation which makes it a @@ -192,22 +190,19 @@ __eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, } extern "C" int -__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, - mbstate_t *state) +__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { return __db_wctomb (r,s, wchar, 936); } extern "C" int -__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, - mbstate_t *state) +__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { return __db_wctomb (r,s, wchar, 949); } extern "C" int -__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset, - mbstate_t *state) +__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { return __db_wctomb (r,s, wchar, 950); } @@ -268,14 +263,14 @@ __db_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, UINT cp, extern "C" int __sjis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, - const char *charset, mbstate_t *state) + mbstate_t *state) { return __db_mbtowc (r, pwc, s, n, 932, state); } extern "C" int __eucjp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, - const char *charset, mbstate_t *state) + mbstate_t *state) { /* See comment in __eucjp_wctomb above. */ wchar_t dummy; @@ -352,21 +347,21 @@ jis_x_0212: extern "C" int __gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, - const char *charset, mbstate_t *state) + mbstate_t *state) { return __db_mbtowc (r, pwc, s, n, 936, state); } extern "C" int __kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, - const char *charset, mbstate_t *state) + mbstate_t *state) { return __db_mbtowc (r, pwc, s, n, 949, state); } extern "C" int __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, - const char *charset, mbstate_t *state) + mbstate_t *state) { return __db_mbtowc (r, pwc, s, n, 950, state); } @@ -408,7 +403,7 @@ __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, */ static size_t __reg3 sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc, - bool is_path) + bool is_path) { char buf[10]; char *ptr = dst; @@ -416,9 +411,10 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc, size_t n = 0; mbstate_t ps; save_errno save; - wctomb_p f_wctomb = cygheap->locale.wctomb; - const char *charset = cygheap->locale.charset; + wctomb_p f_wctomb = __WCTOMB; + if (f_wctomb == __ascii_wctomb) + f_wctomb = __utf8_wctomb; memset (&ps, 0, sizeof ps); if (dst == NULL) len = (size_t) -1; @@ -441,13 +437,13 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc, } else { - bytes = f_wctomb (_REENT, buf, pw, charset, &ps); - if (bytes == -1 && *charset != 'U'/*TF-8*/) + bytes = f_wctomb (_REENT, buf, pw, &ps); + if (bytes == -1 && f_wctomb != __utf8_wctomb) { /* Convert chars invalid in the current codepage to a sequence ASCII CAN; UTF-8 representation of invalid char. */ buf[0] = 0x18; /* ASCII CAN */ - bytes = __utf8_wctomb (_REENT, buf + 1, pw, charset, &ps); + bytes = __utf8_wctomb (_REENT, buf + 1, pw, &ps); if (bytes == -1) { ++pwcs; @@ -465,8 +461,7 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc, ps.__count = 0; continue; } - bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, charset, - &ps); + bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs, &ps); nwc--; } } @@ -557,8 +552,8 @@ sys_wcstombs_alloc_no_path (char **dst_p, int type, const wchar_t *src, charset, which is the charset returned by GetConsoleCP (). Most of the time this is used for box and line drawing characters. */ size_t __reg3 -sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst, - size_t dlen, const char *src, size_t nms) +sys_cp_mbstowcs (mbtowc_p f_mbtowc, wchar_t *dst, size_t dlen, + const char *src, size_t nms) { wchar_t *ptr = dst; unsigned const char *pmbs = (unsigned const char *) src; @@ -581,10 +576,11 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst, next byte must be a valid UTF-8 start byte. If the charset isn't UTF-8 anyway, try to convert the following bytes as UTF-8 sequence. */ - if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4 && *charset != 'U'/*TF-8*/) + if (nms > 2 && pmbs[1] >= 0xc2 && pmbs[1] <= 0xf4 + && f_mbtowc != __utf8_mbtowc) { bytes = __utf8_mbtowc (_REENT, ptr, (const char *) pmbs + 1, - nms - 1, charset, &ps); + nms - 1, &ps); if (bytes < 0) { /* Invalid UTF-8 sequence? Treat the ASCII CAN character as @@ -603,7 +599,7 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst, wchar_t *ptr2 = dst ? ptr + 1 : NULL; int bytes2 = __utf8_mbtowc (_REENT, ptr2, (const char *) pmbs + bytes, - nms - bytes, charset, &ps); + nms - bytes, &ps); if (bytes2 < 0) memset (&ps, 0, sizeof ps); else @@ -625,7 +621,7 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst, } } else if ((bytes = f_mbtowc (_REENT, ptr, (const char *) pmbs, nms, - charset, &ps)) < 0) + &ps)) < 0) { /* The technique is based on a discussion here: http://www.mail-archive.com/linux-utf8@nl.linux.org/msg00080.html @@ -668,8 +664,10 @@ sys_cp_mbstowcs (mbtowc_p f_mbtowc, const char *charset, wchar_t *dst, size_t __reg3 sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, size_t nms) { - return sys_cp_mbstowcs (cygheap->locale.mbtowc, cygheap->locale.charset, - dst, dlen, src, nms); + mbtowc_p f_mbtowc = __MBTOWC; + if (f_mbtowc == __ascii_mbtowc) + f_mbtowc = __utf8_mbtowc; + return sys_cp_mbstowcs (f_mbtowc, dst, dlen, src, nms); } /* Same as sys_wcstombs_alloc, just backwards. */ diff --git a/winsup/cygwin/wchar.h b/winsup/cygwin/wchar.h index 1bffd6377..b3dacf3b5 100644 --- a/winsup/cygwin/wchar.h +++ b/winsup/cygwin/wchar.h @@ -11,6 +11,9 @@ details. */ #include_next +/* Internal headers from newlib */ +#include "../locale/setlocale.h" + #define ENCODING_LEN 31 #ifdef __cplusplus @@ -18,29 +21,23 @@ extern "C" { #endif typedef int mbtowc_f (struct _reent *, wchar_t *, const char *, size_t, - const char *, mbstate_t *); + mbstate_t *); typedef mbtowc_f *mbtowc_p; -extern mbtowc_p __mbtowc; extern mbtowc_f __ascii_mbtowc; extern mbtowc_f __utf8_mbtowc; -extern mbtowc_f __iso_mbtowc; -extern mbtowc_f __cp_mbtowc; -extern mbtowc_f __sjis_mbtowc; -extern mbtowc_f __eucjp_mbtowc; -extern mbtowc_f __gbk_mbtowc; -extern mbtowc_f __kr_mbtowc; -extern mbtowc_f __big5_mbtowc; +extern mbtowc_p __iso_mbtowc (int); +extern mbtowc_p __cp_mbtowc (int); -typedef int wctomb_f (struct _reent *, char *, wchar_t, const char *, - mbstate_t *); +#define __MBTOWC (__get_current_locale ()->mbtowc) + +typedef int wctomb_f (struct _reent *, char *, wchar_t, mbstate_t *); typedef wctomb_f *wctomb_p; -extern wctomb_p __wctomb; extern wctomb_f __ascii_wctomb; extern wctomb_f __utf8_wctomb; -extern char *__locale_charset (); +#define __WCTOMB (__get_current_locale ()->wctomb) #ifdef __cplusplus } @@ -49,20 +46,21 @@ extern char *__locale_charset (); #ifdef __INSIDE_CYGWIN__ #ifdef __cplusplus size_t __reg3 sys_wcstombs (char *dst, size_t len, const wchar_t * src, - size_t nwc = (size_t) -1); + size_t nwc = (size_t) -1); size_t __reg3 sys_wcstombs_no_path (char *dst, size_t len, - const wchar_t * src, size_t nwc = (size_t) -1); + const wchar_t * src, + size_t nwc = (size_t) -1); size_t __reg3 sys_wcstombs_alloc (char **, int, const wchar_t *, - size_t = (size_t) -1); + size_t = (size_t) -1); size_t __reg3 sys_wcstombs_alloc_no_path (char **, int, const wchar_t *, - size_t = (size_t) -1); + size_t = (size_t) -1); -size_t __reg3 sys_cp_mbstowcs (mbtowc_p, const char *, wchar_t *, size_t, - const char *, size_t = (size_t) -1); +size_t __reg3 sys_cp_mbstowcs (mbtowc_p, wchar_t *, size_t, const char *, + size_t = (size_t) -1); size_t __reg3 sys_mbstowcs (wchar_t * dst, size_t dlen, const char *src, - size_t nms = (size_t) -1); + size_t nms = (size_t) -1); size_t __reg3 sys_mbstowcs_alloc (wchar_t **, int, const char *, - size_t = (size_t) -1); + size_t = (size_t) -1); #endif /* __cplusplus */ #endif /* __INSIDE_CYGWIN__ */