* ctype.cc (_CTYPE_DATA_0_127): Add _B class to TAB character.

(__ctype_default): New character class array for default ASCII
	character set.
	(__ctype_iso): New array of character class array for ISO charsets.
	(__ctype_cp): Ditto for singlebyte Windows codepages.
	(tolower): Implement as distinct function to support any singlebyte
	charset.
	(toupper): Ditto.
	(__set_ctype): New function to copy singlebyte character classes
	corresponding to current charset to ctype_b array.
	Align copyright text to upstream.
	* dcrt0.cc (dll_crt0_1): Reset current locale to "C" per POSIX.
	* environ.cc (set_file_api_mode): Remove.
	(codepage_init): Remove.
	(parse_thing): Remove "codepage" setting.
	(environ_init): Set locale according to environment settings, or
	to current codepage, before converting environment to multibyte.
	* fhandler.h (fhandler_console::write_replacement_char): Drop argument.
	* fhandler_console.cc (dev_console::str_to_con): Call sys_cp_mbstowcs
	rather than MultiByteToWideChar.
	(fhandler_console::write_replacement_char): Always print a funny
	half filled square if a character isn't in the current charset.
	(fhandler_console::write_normal): Convert to using __mbtowc
	rather than next_char.
	* fork.cc (frok::child): Drop call to set_file_api_mode.
	* globals.cc (enum codepage_type) Remove.
	(current_codepage): Remove.
	* miscfuncs.cc (cygwin_wcslwr): Unused, dangerous.  Remove.
	(cygwin_wcsupr): Ditto.
	(is_cp_multibyte): Remove.
	(next_char): Remove.
	* miscfuncs.h (is_cp_multibyte): Drop declaration.
	(next_char): Ditto.
	* strfuncs.cc (get_cp): Remove.
	(__db_wctomb): New function to implement _wctomb_r functionality for
	doublebyte charsets using WideCharToMultiByte.
	(__sjis_wctomb): New function to replace unusable newlib function.
	(__jis_wctomb): Ditto.
	(__eucjp_wctomb): Ditto.
	(__gbk_wctomb): New function.
	(__kr_wctomb): Ditto.
	(__big5_wctomb): Ditto.
	(__db_mbtowc): New function to implement _mbtowc_r functionality for
	doublebyte charsets using MultiByteToWideChar.
	(__sjis_mbtowc): New function to replace unusable newlib function.
	(__jis_mbtowc): Ditto.
	(__eucjp_mbtowc): Ditto.
	(__gbk_mbtowc): New function.
	(__kr_mbtowc): New function
	(__big5_mbtowc): New function
	(__set_charset_from_codepage): New function.
	(sys_wcstombs): Reimplement, basically using same wide char to multibyte
	conversion as newlib's application level functions.  Plus extras.
	Add lengthy comment to explain.  Change return type to size_t.
	(sys_wcstombs_alloc): Just use sys_wcstombs.  Change return type to
	size_t.
	(sys_cp_mbstowcs): Replace sys_mbstowcs, take additional codepage
	argument.  Explain why.  Change return type to size_t.
	(sys_mbstowcs_alloc): Just use sys_mbstowcs.  Change return type to
	size_t.
	* wchar.h: Declare internal functions implemented in strfuncs.cc.
	(wcscasecmp): Remove.
	(wcsncasecmp): Remove.
	(wcslwr): Remove.
	(wcsupr): Remove.
	* winsup.h (codepage_init): Remove declaration.
	(get_cp): Ditto.
	(sys_wcstombs): Align declaration to new implementation.
	(sys_wcstombs_alloc): Ditto.
	(sys_cp_mbstowcs): Add declaration.
	(sys_mbstowcs): Define as inline function.
	(sys_mbstowcs_alloc): Align declaration to new implementation.
	(set_file_api_mode): Remove declaration.
	* include/ctype.h (isblank): Redefine to use _B character class.
	(toupper): Remove ASCII-only definition.
	(tolower): Ditto.
This commit is contained in:
Corinna Vinschen 2009-03-24 12:18:34 +00:00
parent 6a32d500a9
commit 161211d186
14 changed files with 1337 additions and 316 deletions

View File

@ -1,4 +1,83 @@
2009-03-23 Corinna Vinschen <corinna@vinschen.de>
2009-03-24 Corinna Vinschen <corinna@vinschen.de>
* ctype.cc (_CTYPE_DATA_0_127): Add _B class to TAB character.
(__ctype_default): New character class array for default ASCII
character set.
(__ctype_iso): New array of character class array for ISO charsets.
(__ctype_cp): Ditto for singlebyte Windows codepages.
(tolower): Implement as distinct function to support any singlebyte
charset.
(toupper): Ditto.
(__set_ctype): New function to copy singlebyte character classes
corresponding to current charset to ctype_b array.
Align copyright text to upstream.
* dcrt0.cc (dll_crt0_1): Reset current locale to "C" per POSIX.
* environ.cc (set_file_api_mode): Remove.
(codepage_init): Remove.
(parse_thing): Remove "codepage" setting.
(environ_init): Set locale according to environment settings, or
to current codepage, before converting environment to multibyte.
* fhandler.h (fhandler_console::write_replacement_char): Drop argument.
* fhandler_console.cc (dev_console::str_to_con): Call sys_cp_mbstowcs
rather than MultiByteToWideChar.
(fhandler_console::write_replacement_char): Always print a funny
half filled square if a character isn't in the current charset.
(fhandler_console::write_normal): Convert to using __mbtowc
rather than next_char.
* fork.cc (frok::child): Drop call to set_file_api_mode.
* globals.cc (enum codepage_type) Remove.
(current_codepage): Remove.
* miscfuncs.cc (cygwin_wcslwr): Unused, dangerous. Remove.
(cygwin_wcsupr): Ditto.
(is_cp_multibyte): Remove.
(next_char): Remove.
* miscfuncs.h (is_cp_multibyte): Drop declaration.
(next_char): Ditto.
* strfuncs.cc (get_cp): Remove.
(__db_wctomb): New function to implement _wctomb_r functionality for
doublebyte charsets using WideCharToMultiByte.
(__sjis_wctomb): New function to replace unusable newlib function.
(__jis_wctomb): Ditto.
(__eucjp_wctomb): Ditto.
(__gbk_wctomb): New function.
(__kr_wctomb): Ditto.
(__big5_wctomb): Ditto.
(__db_mbtowc): New function to implement _mbtowc_r functionality for
doublebyte charsets using MultiByteToWideChar.
(__sjis_mbtowc): New function to replace unusable newlib function.
(__jis_mbtowc): Ditto.
(__eucjp_mbtowc): Ditto.
(__gbk_mbtowc): New function.
(__kr_mbtowc): New function
(__big5_mbtowc): New function
(__set_charset_from_codepage): New function.
(sys_wcstombs): Reimplement, basically using same wide char to multibyte
conversion as newlib's application level functions. Plus extras.
Add lengthy comment to explain. Change return type to size_t.
(sys_wcstombs_alloc): Just use sys_wcstombs. Change return type to
size_t.
(sys_cp_mbstowcs): Replace sys_mbstowcs, take additional codepage
argument. Explain why. Change return type to size_t.
(sys_mbstowcs_alloc): Just use sys_mbstowcs. Change return type to
size_t.
* wchar.h: Declare internal functions implemented in strfuncs.cc.
(wcscasecmp): Remove.
(wcsncasecmp): Remove.
(wcslwr): Remove.
(wcsupr): Remove.
* winsup.h (codepage_init): Remove declaration.
(get_cp): Ditto.
(sys_wcstombs): Align declaration to new implementation.
(sys_wcstombs_alloc): Ditto.
(sys_cp_mbstowcs): Add declaration.
(sys_mbstowcs): Define as inline function.
(sys_mbstowcs_alloc): Align declaration to new implementation.
(set_file_api_mode): Remove declaration.
* include/ctype.h (isblank): Redefine to use _B character class.
(toupper): Remove ASCII-only definition.
(tolower): Ditto.
2009-03-24 Corinna Vinschen <corinna@vinschen.de>
* sec_auth.cc (str2buf2uni): Remove.
* security.h (str2buf2uni): Remove declaration.

View File

@ -1,10 +1,12 @@
#include "winsup.h"
extern "C" {
#include <ctype.h>
#include <stdlib.h>
#include <wctype.h>
#define _CTYPE_DATA_0_127 \
_C, _C, _C, _C, _C, _C, _C, _C, \
_C, _C|_S, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C, \
_C, _B|_C|_S, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C, \
_C, _C, _C, _C, _C, _C, _C, _C, \
_C, _C, _C, _C, _C, _C, _C, _C, \
_S|_B, _P, _P, _P, _P, _P, _P, _P, \
@ -36,7 +38,645 @@ extern "C" {
0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0
0, 0, 0, 0, 0, 0, 0, 0
/* FIXME: These tables should rather be defined in newlib and we should
switch to the newer __ctype_ptr method from newlib for new applications. */
static char __ctype_default[128] = { _CTYPE_DATA_128_256 };
static char __ctype_iso[15][128] = {
/* ISO-8859-1 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _L },
/* ISO-8859-2 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _U, _P, _U, _P, _U, _U, _P,
_P, _U, _U, _U, _U, _P, _U, _U,
_P, _L, _P, _L, _P, _L, _L, _P,
_P, _L, _L, _L, _L, _P, _L, _L,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _L },
/* ISO-8859-3 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _U, _P, _P, _P, 0, _U, _P,
_P, _U, _U, _U, _U, _P, 0, _U,
_P, _L, _P, _P, _P, _L, _L, _P,
_P, _L, _L, _L, _L, _P, 0, _L,
_U, _U, _U, 0, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
0, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, 0, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
0, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _P },
/* ISO-8859-4 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _U, _L, _U, _P, _U, _U, _P,
_P, _U, _U, _U, _U, _P, _U, _P,
_P, _L, _P, _L, _P, _L, _L, _P,
_P, _L, _L, _L, _L, _P, _L, _L,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _L },
/* ISO-8859-5 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _P, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_P, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _P, _L, _L },
/* ISO-8859-6 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, 0, 0, 0, _P, 0, 0, 0,
0, 0, 0, 0, _P, _P, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, _P, 0, 0, 0, _P,
0, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
0, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, 0, 0, 0, 0, 0,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _P, _P, _P, _P, _P,
_P, _P, _P, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 },
/* ISO-8859-7 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _U, _P,
_U, _U, _U, _P, _U, _P, _U, _U,
_L, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P },
/* ISO-8859-8 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, 0, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, 0, 0, _P, _P, 0 },
/* ISO-8859-9 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _L },
/* ISO-8859-10 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _P, _U, _U,
_P, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _P, _L, _L,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L },
/* ISO-8859-11 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _P, _L, _L, _P, _P, _P, _P,
_P, _P, _P, 0, 0, 0, 0, _P,
_L, _L, _L, _L, _L, _L, _L, _P,
_P, _P, _P, _P, _P, _P, _P, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, 0, 0, 0, 0 },
/* ISO-8859-13 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _U, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _L, _P, _P, _P, _P, _P,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _P },
/* ISO-8859-14 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _U, _L, _P, _U, _L, _U, _P,
_U, _P, _U, _L, _U, _P, _P, _U,
_U, _L, _U, _L, _U, _L, _P, _U,
_L, _L, _L, _U, _L, _U, _L, _L,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L },
/* ISO-8859-15 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _L },
/* ISO-8859-16 */
{ _C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_C, _C, _C, _C, _C, _C, _C, _C,
_S|_B, _U, _L, _U, _P, _P, _U, _P,
_L, _P, _U, _P, _U, _P, _L, _U,
_P, _P, _U, _U, _U, _P, _P, _P,
_L, _L, _L, _P, _U, _L, _U, _L,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L }
};
static char __ctype_cp[22][128] = {
/* CP437 */
{ _U, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _U, _U,
_U, _L, _U, _L, _L, _L, _L, _L,
_L, _U, _U, _P, _P, _P, _P, _P,
_L, _L, _L, _L, _L, _L, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _L, _U, _L, _U, _L, _P, _L,
_U, _U, _U, _L, _P, _L, _L, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _S|_B },
/* CP720 */
{ 0, 0, _L, _L, 0, _L, 0, _L,
_L, _L, _L, _L, _L, 0, 0, 0,
0, _P, _P, _L, _P, _P, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
0, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _S|_B },
/* CP737 */
{ _U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _U, _U, _U, _U, _U, _U,
_U, _P, _P, _P, _P, _U, _U, _P,
_P, _P, _P, _P, _P, _P, _P, _S|_B },
/* CP775 */
{ _U, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _U, _L, _L, _U, _U, _U,
_U, _L, _U, _L, _L, _U, _P, _U,
_L, _U, _U, _P, _P, _P, _P, _P,
_U, _U, _L, _U, _L, _L, _P, _P,
_P, _P, _P, _P, _P, _U, _P, _P,
_P, _P, _P, _P, _P, _U, _U, _U,
_U, _P, _P, _P, _P, _U, _U, _P,
_P, _P, _P, _P, _P, _P, _U, _U,
_P, _P, _P, _P, _P, _P, _P, _U,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _P, _P, _P, _P, _P, _P, _P,
_U, _L, _U, _U, _L, _U, _L, _L,
_U, _L, _U, _L, _L, _U, _U, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _S|_B },
/* CP850 */
{ _U, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _U, _U,
_U, _L, _U, _L, _L, _L, _L, _L,
_L, _U, _U, _L, _P, _U, _P, _P,
_L, _L, _L, _L, _L, _U, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _U, _U, _U,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _L, _U,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _U, _U, _U, _U, _L, _U, _U,
_U, _P, _P, _P, _P, _P, _U, _P,
_U, _L, _U, _U, _L, _U, _L, _U,
_L, _U, _U, _U, _L, _U, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _S|_B },
/* CP852 */
{ _U, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _U, _L, _L, _U, _U, _U,
_U, _U, _L, _L, _L, _U, _L, _U,
_L, _U, _U, _U, _L, _U, _P, _L,
_L, _L, _L, _L, _U, _L, _U, _L,
_U, _L, _P, _L, _U, _L, _P, _P,
_P, _P, _P, _P, _P, _U, _U, _U,
_U, _P, _P, _P, _P, _U, _L, _P,
_P, _P, _P, _P, _P, _P, _U, _L,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _U, _U, _U, _L, _U, _U, _U,
_L, _P, _P, _P, _P, _U, _U, _P,
_U, _L, _U, _U, _L, _L, _U, _L,
_U, _U, _L, _U, _L, _U, _L, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _L, _U, _L, _P, _S|_B },
/* CP855 */
{ _L, _U, _L, _U, _L, _U, _L, _U,
_L, _U, _L, _U, _L, _U, _L, _U,
_L, _U, _L, _U, _L, _U, _L, _U,
_L, _U, _L, _U, _L, _U, _L, _U,
_L, _U, _L, _U, _L, _U, _L, _U,
_L, _U, _L, _U, _L, _U, _P, _P,
_P, _P, _P, _P, _P, _L, _U, _L,
_U, _P, _P, _P, _P, _L, _U, _P,
_P, _P, _P, _P, _P, _P, _L, _U,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _U, _L, _U, _L, _U, _L, _U,
_L, _P, _P, _P, _P, _U, _L, _P,
_U, _L, _U, _L, _U, _L, _U, _L,
_U, _L, _U, _L, _U, _L, _U, _P,
_P, _L, _U, _L, _U, _L, _U, _L,
_U, _L, _U, _L, _U, _P, _P, _S|_B },
/* CP857 */
{ _U, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _U, _U,
_U, _L, _U, _L, _L, _L, _L, _L,
_U, _U, _U, _L, _P, _U, _U, _L,
_L, _L, _L, _L, _L, _U, _U, _L,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _U, _U, _U, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _L, _U,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _U, _U, _U, _L, _U, _U,
_U, _P, _P, _P, _P, _P, _U, _P,
_U, _L, _U, _U, _L, _U, _L, _L,
_P, _U, _U, _U, _L, _L, _P, _P,
_P, _P, _L, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _S|_B },
/* CP858 */
{ _U, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _U, _U,
_U, _L, _U, _L, _L, _L, _L, _L,
_L, _U, _U, _L, _P, _U, _P, _P,
_L, _L, _L, _L, _L, _U, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _U, _U, _U,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _L, _U,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _U, _U, _U, _U, _P, _U, _U,
_U, _P, _P, _P, _P, _P, _U, _P,
_U, _L, _U, _U, _L, _U, _L, _U,
_L, _U, _U, _U, _L, _U, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _S|_B },
/* CP862 */
{ _L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _P, _P, _P, _P, _P,
_L, _L, _L, _L, _L, _U, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _L, _U, _L, _U, _L, _P, _L,
_U, _U, _U, _L, _P, _L, _L, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _S|_B },
/* CP866 */
{ _U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_U, _L, _U, _L, _U, _L, _U, _L,
_P, _P, _P, _P, _P, _P, _P, _S|_B },
/* CP874 */
{ _P, 0, 0, 0, 0, _P, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, _P, _P, _P, _P, _P, _P, _P,
0, 0, 0, 0, 0, 0, 0, 0,
_S|_B, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, 0, 0, 0, 0, _P,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _L, _L, 0, 0, 0, 0 },
/* CP1125 */
{ _U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_U, _L, _U, _L, _U, _L, _U, _L,
_U, _L, _P, _P, _P, _P, _P, _S|_B },
/* CP1250 */
{ _P, 0, _P, 0, _P, _P, _P, _P,
0, _P, _U, _P, _U, _U, _U, _U,
0, _P, _P, _P, _P, _P, _P, _P,
0, _P, _L, _P, _L, _L, _L, _L,
_S|_B, _P, _P, _U, _P, _U, _P, _P,
_P, _P, _U, _P, _P, _P, _P, _U,
_P, _P, _P, _L, _P, _P, _P, _P,
_P, _L, _L, _P, _U, _P, _L, _L,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _P },
/* CP1251 */
{ _U, _U, _P, _L, _P, _P, _P, _P,
_P, _P, _U, _P, _U, _U, _U, _U,
_L, _P, _P, _P, _P, _P, _P, _P,
_L, _P, _L, _L, _L, _L, _P, _U,
_S|_B, _U, _L, _U, _P, _U, _P, _P,
_U, _P, _U, _P, _P, _P, _P, _U,
_P, _P, _U, _L, _L, _P, _P, _P,
_L, _P, _L, _P, _L, _U, _L, _L,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L },
/* CP1252 */
{ _P, 0, _P, _L, _P, _P, _P, _P,
_P, _P, _U, _P, _U, _U, 0, 0,
0, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _L, _P, _L, 0, _L, _U,
_S|_B, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _L },
/* CP1253 */
{ _P, 0, _P, _L, _P, _P, _P, _P,
0, _P, 0, _P, 0, 0, 0, 0,
0, _P, _P, _P, _P, _P, _P, _P,
0, _P, _P, 0, 0, 0, 0, 0,
_S|_B, _P, _U, _P, _P, _P, _P, _P,
_P, _P, 0, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_U, _U, _U, _P, _U, _P, _U, _U,
_L, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L },
/* CP1254 */
{ _P, 0, _P, _L, _P, _P, _P, _P,
_P, _P, _U, _P, _U, 0, 0, 0,
0, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _L, _P, _L, 0, 0, _U,
_S|_B, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _L },
/* CP1255 */
{ _P, 0, _P, _L, _P, _P, _P, _P,
_P, _P, 0, _P, 0, 0, 0, 0,
0, _P, _P, _P, _P, _P, _P, _P,
_P, _P, 0, _P, 0, 0, 0, 0,
_S|_B, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, 0, 0, 0, 0, 0, 0, 0,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, 0, 0, _P, _P, 0 },
/* CP1256 */
{ _P, _L, _P, _L, _P, _P, _P, _P,
_P, _P, _L, _P, _U, _L, _L, _L,
_L, _P, _P, _P, _P, _P, _P, _P,
_L, _P, _L, _P, _L, _P, _P, _L,
_S|_B, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _L, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _P, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_P, _P, _P, _P, _L, _P, _P, _P,
_P, _L, _P, _L, _L, _P, _P, _L },
/* CP1257 */
{ _P, 0, _P, 0, _P, _P, _P, _P,
0, _P, 0, _P, 0, _P, _P, _P,
0, _P, _P, _P, _P, _P, _P, _P,
0, _P, 0, _P, 0, _P, _P, 0,
_S|_B, 0, _P, _P, _P, 0, _P, _P,
_U, _P, _U, _P, _P, _P, _P, _U,
_P, _P, _P, _P, _P, _P, _P, _P,
_L, _P, _L, _P, _P, _P, _P, _L,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _U, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _L, _P },
/* CP1258 */
{ _P, 0, _P, _L, _P, _P, _P, _P,
_P, _P, 0, _P, _U, 0, 0, 0,
0, _P, _P, _P, _P, _P, _P, _P,
_P, _P, 0, _P, _L, 0, 0, _U,
_S|_B, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_P, _P, _P, _P, _P, _P, _P, _P,
_U, _U, _U, _U, _U, _U, _U, _U,
_U, _U, _U, _U, _P, _U, _U, _U,
_U, _U, _P, _U, _U, _U, _U, _P,
_U, _U, _U, _U, _U, _U, _P, _L,
_L, _L, _L, _L, _L, _L, _L, _L,
_L, _L, _L, _L, _P, _L, _L, _L,
_L, _L, _P, _L, _L, _L, _L, _P,
_L, _L, _L, _L, _L, _L, _P, _L }
};
char ctype_b[128 + 256] = {
_CTYPE_DATA_128_256,
@ -70,12 +710,81 @@ makefunc(ispunct)
makefunc(isspace)
makefunc(isupper)
makefunc(isxdigit)
makefunc(tolower)
makefunc(toupper)
makefunc(isblank)
makefunc(isascii)
makefunc(toascii)
static int __cdecl
c_tolower (int c)
{
if ((unsigned char) c <= 0x7f)
return isupper (c) ? c + 0x20 : c;
char s[8] = { c, '\0' };
wchar_t wc;
if (mbtowc (&wc, s, 1) >= 0
&& wctomb (s, (wchar_t) towlower ((wint_t) wc)) == 1)
c = s[0];
return c;
}
EXPORT_ALIAS(c_tolower, tolower)
static int __cdecl
c_toupper (int c)
{
if ((unsigned char) c <= 0x7f)
return islower (c) ? c - 0x20 : c;
char s[8] = { c, '\0' };
wchar_t wc;
if (mbtowc (&wc, s, 1) >= 0
&& wctomb (s, (wchar_t) towupper ((wint_t) wc)) == 1)
c = s[0];
return c;
}
EXPORT_ALIAS(c_toupper, toupper)
/* Called from newlib's setlocale(). What we do here is to copy the
128 bytes of charset specific ctype data into the array at _ctype_b.
Given that the functionality is usually implemented locally in the
application, that's the only backward compatible way to do it.
Setlocale is usually only called once in an application, so this isn't
time-critical anyway. */
int __iso_8859_index (const char *charset_ext); /* Newlib */
int __cp_index (const char *charset_ext); /* Newlib */
void
__set_ctype (const char *charset)
{
int idx;
switch (*charset)
{
case 'I':
idx = __iso_8859_index (charset + 9);
/* Our ctype table has a leading ISO-8859-1 element. */
if (idx < 0)
idx = 0;
else
++idx;
memcpy (ctype_b, __ctype_iso[idx], 128);
memcpy (ctype_b + 256, __ctype_iso[idx], 128);
return;
case 'C':
idx = __cp_index (charset + 2);
if (idx < 0)
break;
memcpy (ctype_b, __ctype_cp[idx], 128);
memcpy (ctype_b + 256, __ctype_cp[idx], 128);
return;
default:
break;
}
memcpy (ctype_b, __ctype_default, 128);
memcpy (ctype_b + 256, __ctype_default, 128);
}
} /* extern "C" */
/*
* Copyright (c) 1989 The Regents of the University of California.
@ -89,13 +798,6 @@ makefunc(toascii)
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

View File

@ -16,6 +16,7 @@ details. */
#include <stdlib.h>
#include "glob.h"
#include <ctype.h>
#include <locale.h>
#include "environ.h"
#include "sigproc.h"
#include "pinfo.h"
@ -921,6 +922,8 @@ dll_crt0_1 (void *)
do this for noncygwin case since the signal thread is blocked due to
LoadLibrary serialization. */
ld_preload ();
/* Reset current locale to "C" per POSIX */
_setlocale_r (_GLOBAL_REENT, LC_CTYPE, "C");
if (user_data->main)
cygwin_exit (user_data->main (__argc, __argv, *user_data->envptr));
__asm__ (" \n\

View File

@ -13,6 +13,7 @@ details. */
#include <wchar.h>
#include <wctype.h>
#include <ctype.h>
#include <locale.h>
#include <assert.h>
#include <cygwin/version.h>
#include <winnls.h>
@ -552,48 +553,6 @@ glob_init (const char *buf)
}
}
void
set_file_api_mode (codepage_type cp)
{
if (cp == oem_cp)
{
SetFileApisToOEM ();
debug_printf ("File APIs set to OEM");
}
else
{
SetFileApisToANSI ();
debug_printf ("File APIs set to ANSI");
}
}
void
codepage_init (const char *buf)
{
if (!buf)
buf = "ansi";
if (ascii_strcasematch (buf, "oem"))
{
current_codepage = oem_cp;
active_codepage = GetOEMCP ();
}
else if (ascii_strcasematch (buf, "utf8"))
{
current_codepage = utf8_cp;
active_codepage = CP_UTF8;
}
else
{
if (!ascii_strcasematch (buf, "ansi"))
debug_printf ("Wrong codepage name: %s", buf);
/* Fallback to ANSI */
current_codepage = ansi_cp;
active_codepage = GetACP ();
}
set_file_api_mode (current_codepage);
}
static void
set_chunksize (const char *buf)
{
@ -629,7 +588,6 @@ static struct parse_thing
} values[2];
} known[] NO_COPY =
{
{"codepage", {func: &codepage_init}, isfunc, NULL, {{0}, {0}}},
{"dosfilewarning", {&dos_file_warning}, justset, NULL, {{false}, {true}}},
{"envcache", {&envcache}, justset, NULL, {{true}, {false}}},
{"error_start", {func: &error_start_init}, isfunc, NULL, {{0}, {0}}},
@ -774,6 +732,8 @@ environ_init (char **envp, int envc)
static char NO_COPY cygterm[] = "TERM=cygwin";
myfault efault;
tmp_pathbuf tp;
bool got_lc = false;
static const char *lc_arr[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
if (efault.faulted ())
api_fatal ("internal error reading the windows environment - too many environment variables?");
@ -818,10 +778,27 @@ environ_init (char **envp, int envc)
/* Allocate space for environment + trailing NULL + CYGWIN env. */
lastenviron = envp = (char **) malloc ((4 + (envc = 100)) * sizeof (char *));
/* We need the CYGWIN variable content before we can loop through
/* We need the locale variables' content before we can loop through
the whole environment, so that the wide-char to multibyte conversion
can be done according to the "codepage" setting, as well as the
uppercasing according to the "upcaseenv" setting. */
can be done according to the $LC_ALL/$LC_CTYPE/$LANG/current_codepage
setting, as well as the uppercasing according to the "upcaseenv"
setting. Note that we have to reset the LC_CTYPE setting to "C"
before calling main() for POSIX compatibility. */
for (int lc = 0; lc_arr[lc]; ++lc)
{
if ((i = GetEnvironmentVariableA (lc_arr[lc], NULL, 0)))
{
char *buf = (char *) alloca (i);
GetEnvironmentVariableA (lc_arr[lc], buf, i);
if (_setlocale_r (_GLOBAL_REENT, LC_CTYPE, buf))
got_lc = true;
}
}
/* No matching POSIX environment variable, use current codepage. */
if (!got_lc)
_setlocale_r (_GLOBAL_REENT, LC_CTYPE, "en_US");
/* We also need the CYGWIN variable early to know the value of the
CYGWIN=upcaseenv setting for the below loop. */
if ((i = GetEnvironmentVariableA ("CYGWIN", NULL, 0)))
{
char *buf = (char *) alloca (i);

View File

@ -927,7 +927,7 @@ class fhandler_console: public fhandler_termios
void cursor_set (bool, int, int);
void cursor_get (int *, int *);
void cursor_rel (int, int);
void write_replacement_char (const unsigned char *);
void write_replacement_char ();
const unsigned char *write_normal (unsigned const char*, unsigned const char *);
void char_command (char);
bool set_raw_win32_keyboard_mode (bool);

View File

@ -13,6 +13,7 @@ details. */
#include "miscfuncs.h"
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <wingdi.h>
#include <winuser.h>
#include <winnls.h>
@ -133,13 +134,13 @@ dev_console::con_to_str (char *d, int dlen, WCHAR w)
inline UINT
dev_console::get_console_cp ()
{
return alternate_charset_active ? GetConsoleOutputCP () : get_cp ();
return alternate_charset_active ? GetConsoleOutputCP () : 0;
}
inline DWORD
dev_console::str_to_con (PWCHAR d, const char *s, DWORD sz)
{
return MultiByteToWideChar (get_console_cp (), 0, s, sz, d, CONVERT_LIMIT);
return sys_cp_mbstowcs (get_console_cp (), d, CONVERT_LIMIT, s, sz);
}
bool
@ -1400,22 +1401,15 @@ beep ()
MessageBeep (MB_OK);
}
/* This gets called when we found an invalid UTF-8 character. We try with
the default ANSI codepage. If that fails we just print a question mark.
Looks ugly but is a neat and alomst sane fallback for many languages. */
/* This gets called when we found an invalid input character. We just
print a half filled square (UTF 0x2592). We have no chance to figure
out the "meaning" of the input char anyway. */
void
fhandler_console::write_replacement_char (const unsigned char *char_p)
fhandler_console::write_replacement_char ()
{
int n;
WCHAR def_cp_chars[2];
static const wchar_t replacement_char = 0x2592; /* Half filled square */
DWORD done;
n = MultiByteToWideChar (GetACP (), 0, (const CHAR *) char_p, 1,
def_cp_chars, 2);
if (n)
WriteConsoleW (get_output_handle (), def_cp_chars, n, &done, 0);
else
WriteConsoleW (get_output_handle (), L"?", 1, &done, 0);
WriteConsoleW (get_output_handle (), &replacement_char, 1, &done, 0);
}
const unsigned char *
@ -1426,22 +1420,46 @@ fhandler_console::write_normal (const unsigned char *src,
DWORD done;
DWORD buf_len;
const unsigned char *found = src;
const unsigned char *nfound;
size_t ret;
mbstate_t ps;
UINT cp = dev_state->get_console_cp ();
char charsetbuf[32];
char *charset = __locale_charset ();
mbtowc_p f_mbtowc = __mbtowc;
if (cp)
f_mbtowc = __set_charset_from_codepage (cp, charset = charsetbuf);
/* First check if we have cached lead bytes of a former try to write
a truncated multibyte sequence. If so, process it. */
if (trunc_buf.len)
{
const unsigned char *nfound;
int cp_len = min (end - src, 4 - trunc_buf.len);
memcpy (trunc_buf.buf + trunc_buf.len, src, cp_len);
nfound = next_char (cp, trunc_buf.buf,
trunc_buf.buf + trunc_buf.len + cp_len);
/* Still truncated multibyte sequence? Keep in trunc_buf. */
if (nfound == trunc_buf.buf)
memset (&ps, 0, sizeof ps);
switch (ret = f_mbtowc (_REENT, NULL, (const char *) trunc_buf.buf,
trunc_buf.len + cp_len, charset, &ps))
{
case -2:
/* Still truncated multibyte sequence? Keep in trunc_buf. */
trunc_buf.len += cp_len;
return end;
case -1:
/* Give up, print replacement chars for trunc_buf... */
for (int i = 0; i < trunc_buf.len; ++i)
write_replacement_char ();
/* ... mark trunc_buf as unused... */
trunc_buf.len = 0;
/* ... and proceed. */
nfound = NULL;
break;
case 0:
nfound = trunc_buf.buf + 1;
break;
default:
nfound = trunc_buf.buf + ret;
break;
}
/* Valid multibyte sequence? Process. */
if (nfound)
@ -1454,28 +1472,32 @@ fhandler_console::write_normal (const unsigned char *src,
trunc_buf.len = 0;
return found;
}
/* Give up, print replacement chars for trunc_buf... */
for (int i = 0; i < trunc_buf.len; ++i)
write_replacement_char (trunc_buf.buf + i);
/* ... mark trunc_buf as unused... */
trunc_buf.len = 0;
/* ... and proceed. */
}
memset (&ps, 0, sizeof ps);
while (found < end
&& found - src < CONVERT_LIMIT
&& base_chars[*found] == NOR)
{
nfound = next_char (cp, found, end);
if (!nfound) /* Invalid multibyte sequence. */
break;
if (nfound == found) /* Truncated multibyte sequence. */
{ /* Stick to it until the next write. */
switch (ret = f_mbtowc (_REENT, NULL, (const char *) found,
end - found, charset, &ps))
{
case -2:
/* Truncated multibyte sequence. Stick to it until the next write. */
trunc_buf.len = end - found;
memcpy (trunc_buf.buf, found, trunc_buf.len);
return end;
case -1:
break;
case 0:
found++;
break;
default:
found += ret;
break;
}
found = nfound;
if (ret == (size_t) -1) /* Invalid multibyte sequence. */
break;
}
/* Print all the base ones out */
@ -1558,7 +1580,7 @@ fhandler_console::write_normal (const unsigned char *src,
cursor_set (false, 8 * (x / 8 + 1), y);
break;
case NOR:
write_replacement_char (found);
write_replacement_char ();
break;
}
found++;

View File

@ -205,8 +205,6 @@ frok::child (volatile char * volatile here)
}
#endif
set_file_api_mode (current_codepage);
MALLOC_CHECK;
/* Incredible but true: If we use sockets and SYSV IPC shared memory,

View File

@ -25,8 +25,6 @@ HMODULE NO_COPY cygwin_hmodule;
HANDLE hExeced;
/* Codepage and multibyte string specific stuff. */
enum codepage_type {ansi_cp, oem_cp, utf8_cp};
codepage_type current_codepage = ansi_cp;
UINT active_codepage;
/* program exit the program */

View File

@ -49,7 +49,7 @@ extern const __declspec(dllimport) char _ctype_[];
#if !defined(__cplusplus) || defined(__INSIDE_CYGWIN__)
#define isalpha(c) ((_ctype_+1)[(unsigned)(c)]&(_U|_L))
#define isblank(c) ((c) == ' ' || (c) == '\t')
#define isblank(c) ((_ctype_+1)[(unsigned)(c)]&_B)
#define isupper(c) ((_ctype_+1)[(unsigned)(c)]&_U)
#define islower(c) ((_ctype_+1)[(unsigned)(c)]&_L)
#define isdigit(c) ((_ctype_+1)[(unsigned)(c)]&_N)
@ -60,12 +60,6 @@ extern const __declspec(dllimport) char _ctype_[];
#define isprint(c) ((_ctype_+1)[(unsigned)(c)]&(_P|_U|_L|_N|_B))
#define isgraph(c) ((_ctype_+1)[(unsigned)(c)]&(_P|_U|_L|_N))
#define iscntrl(c) ((_ctype_+1)[(unsigned)(c)]&_C)
/* Non-gcc versions will get the library versions, and will be
slightly slower */
# define toupper(c) \
__extension__ ({ int __x = (c); islower(__x) ? (__x - 'a' + 'A') : __x;})
# define tolower(c) \
__extension__ ({ int __x = (c); isupper(__x) ? (__x - 'A' + 'a') : __x;})
#endif /* !__cplusplus */
#if !defined(__STRICT_ANSI__) || defined(__INSIDE_CYGWIN__)

View File

@ -141,26 +141,6 @@ cygwin_strncasecmp (const char *cs, const char *ct, size_t n)
return RtlCompareUnicodeString (&us, &ut, TRUE);
}
extern "C" wchar_t * __stdcall
cygwin_wcslwr (wchar_t *string)
{
UNICODE_STRING us;
RtlInitUnicodeString (&us, string);
RtlDowncaseUnicodeString (&us, &us, FALSE);
return string;
}
extern "C" wchar_t * __stdcall
cygwin_wcsupr (wchar_t *string)
{
UNICODE_STRING us;
RtlInitUnicodeString (&us, string);
RtlUpcaseUnicodeString (&us, &us, FALSE);
return string;
}
extern "C" char * __stdcall
cygwin_strlwr (char *string)
{
@ -189,118 +169,6 @@ cygwin_strupr (char *string)
return string;
}
/* FIXME? We only support standard ANSI/OEM codepages according to
http://www.microsoft.com/globaldev/reference/cphome.mspx as well
as UTF-8 and codepage 1361, which is also mentioned as valid
doublebyte codepage in MSDN man pages (e.g. IsDBCSLeadByteEx).
Everything else will be hosed. */
bool
is_cp_multibyte (UINT cp)
{
switch (cp)
{
case 932:
case 936:
case 949:
case 950:
case 1361:
case 65001:
return true;
}
return false;
}
/* OMYGOD! CharNextExA is not UTF-8 aware! It only works fine with
double byte charsets. So we have to do it ourselves for UTF-8.
While being at it, we do more. If a double-byte or multibyte
sequence is truncated due to an early end, we need a way to recognize
it. The reason is that multiple buffered write statements might
accidentally stop and start in the middle of a single character byte
sequence. If we have to interpret the byte sequences (as in
fhandler_console), we would print wrong output in these cases.
So we have four possible return values here:
ret = end if str >= end
ret = NULL if we encounter an invalid byte sequence
ret = str if we encounter the start byte of a truncated byte sequence
ret = str + n if we encounter a vaild byte sequence
*/
const unsigned char *
next_char (UINT cp, const unsigned char *str, const unsigned char *end)
{
const unsigned char *ret = NULL;
if (str >= end)
return end;
switch (cp)
{
case 932:
case 936:
case 949:
case 950:
case 1361:
if (*str <= 0x7f)
ret = str + 1;
else if (str == end - 1 && IsDBCSLeadByteEx (cp, *str))
ret = str;
else
ret = (const unsigned char *) CharNextExA (cp, (const CHAR *) str, 0);
break;
case CP_UTF8:
switch (str[0] >> 4)
{
case 0x0 ... 0x7: /* One byte character. */
ret = str + 1;
break;
case 0x8 ... 0xb: /* Followup byte. Invalid as first byte. */
ret = NULL;
break;
case 0xc ... 0xd: /* Two byte character. */
/* Check followup bytes for validity. */
if (str >= end - 1)
ret = str;
else if (str[1] <= 0xbf)
ret = str + 2;
else
ret = NULL;
break;
case 0xe: /* Three byte character. */
if (str >= end - 2)
ret = str;
else if ((str[1] & 0xc0) == 0x80 && (str[2] & 0xc0) == 0x80
&& (str[0] != 0xe0 || str[1] >= 0xa0)
&& (str[0] != 0xed || str[1] <= 0x9f))
ret = str + 3;
else
ret = NULL;
break;
case 0xf: /* Four byte character. */
if (str[0] >= 0xf8)
ret = NULL;
else if (str >= end - 3)
ret = str;
else if ((str[1] & 0xc0) == 0x80 && (str[2] & 0xc0) == 0x80
&& (str[3] & 0xc0) == 0x80
&& (str[0] == 0xf0 || str[1] >= 0x90)
&& (str[0] == 0xf4 || str[1] <= 0x8f))
ret = str + 4;
else
ret = NULL;
break;
}
break;
default:
ret = str + 1;
break;
}
return ret;
}
int __stdcall
check_invalid_virtual_addr (const void *s, unsigned sz)
{

View File

@ -25,11 +25,6 @@ void backslashify (const char *, char *, bool);
void slashify (const char *, char *, bool);
#define isslash(c) ((c) == '/')
/* multibyte stuff */
bool is_cp_multibyte (UINT cp);
const unsigned char *next_char (UINT cp, const unsigned char *str,
const unsigned char *end);
/* Memory checking */
int __stdcall check_invalid_virtual_addr (const void *s, unsigned sz) __attribute__ ((regparm(2)));

View File

@ -20,45 +20,356 @@ details. */
#include "fhandler.h"
#include "dtable.h"
#include "cygheap.h"
#include "tls_pbuf.h"
UINT
get_cp ()
/* The SJIS, JIS and EUCJP conversion in newlib does not use UTF as
wchar_t character representation. That's unfortunate for us since
we require UTF for the OS. What we do here is to have our own
implementation of the base functions for the conversion using
the MulitByteToWideChar/WideCharToMultiByte functions. */
/* GBK, CP949, and Big5 conversions are not available so far in newlib. */
static int
__db_wctomb (struct _reent *r, char *s, wchar_t wchar, UINT cp)
{
if (!active_codepage)
codepage_init ("ansi");
return active_codepage;
if (s == NULL)
return 0;
if (wchar < 0x80)
{
*s = (char) wchar;
return 1;
}
BOOL def_used = false;
int ret = WideCharToMultiByte (cp, cp > 50000 ? 0 : WC_NO_BEST_FIT_CHARS,
&wchar, 1, s, MB_CUR_MAX, NULL, &def_used);
if (ret > 0 && !def_used)
return ret;
r->_errno = EILSEQ;
return -1;
}
/* tlen is always treated as the maximum buffer size, including the '\0'
character. sys_wcstombs will always return a 0-terminated result, no
matter what. */
int __stdcall
sys_wcstombs (char *tgt, int tlen, const PWCHAR src, int slen)
extern "C" int
__sjis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
mbstate_t *state)
{
int ret;
return __db_wctomb (r,s, wchar, 932);
}
/* Convert UNICODE private use area. Reverse functionality (only for
path names) is transform_chars in path.cc. */
if (slen < 0)
slen = wcslen (src) + 1;
WCHAR sbuf[slen];
memcpy (sbuf, src, slen * sizeof (WCHAR));
const unsigned char *end = (unsigned char *) (sbuf + slen);
for (unsigned char *s = ((unsigned char *) sbuf) + 1; s < end;
s += sizeof (WCHAR))
if (*s == 0xf0)
*s = 0;
ret = WideCharToMultiByte (get_cp (), 0, sbuf, slen, tgt, tlen, NULL, NULL);
if (ret && tgt)
extern "C" int
__jis_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
mbstate_t *state)
{
return __db_wctomb (r,s, wchar, 50220);
}
extern "C" int
__eucjp_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
mbstate_t *state)
{
return __db_wctomb (r,s, wchar, 51932);
}
extern "C" int
__gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
mbstate_t *state)
{
return __db_wctomb (r,s, wchar, 936);
}
extern "C" int
__kr_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
mbstate_t *state)
{
return __db_wctomb (r,s, wchar, 949);
}
extern "C" int
__big5_wctomb (struct _reent *r, char *s, wchar_t wchar, const char *charset,
mbstate_t *state)
{
return __db_wctomb (r,s, wchar, 950);
}
static int
__db_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
UINT cp, mbstate_t *state)
{
wchar_t dummy;
char buf[2];
int ret;
if (pwc == NULL)
pwc = &dummy;
if (s == NULL)
return 0; /* not state-dependent */
if (n == 0)
return -2;
if (state->__count == 0)
{
ret = (ret < tlen) ? ret : tlen - 1;
tgt[ret] = '\0';
if (*(unsigned char *) s < 0x80)
{
*pwc = *(unsigned char *) s;
return *s ? 1 : 0;
}
ret = MultiByteToWideChar (cp, cp > 50000 ? 0 : MB_ERR_INVALID_CHARS,
s, 2, pwc, 1);
if (ret)
return *s ? 2 : 0;
if (n == 1)
{
state->__count = 1;
state->__value.__wchb[0] = *s;
return -2;
}
else
{
/* These Win32 functions are really crappy. Assuming n is 2
but the first byte is a singlebyte charcode, the function
does not convert that byte and return 1, rather it just
returns 0. So, what we do here is to check if the first
byte returns a valid value... */
ret = MultiByteToWideChar (cp,
cp > 50000 ? 0 : MB_ERR_INVALID_CHARS,
s, 1, pwc, 1);
if (ret)
return *s ? 1 : 0;
}
r->_errno = EILSEQ;
return -1;
}
if (!*s)
return -2;
buf[0] = state->__value.__wchb[0];
buf[1] = *s;
ret = MultiByteToWideChar (cp, cp > 50000 ? 0 : MB_ERR_INVALID_CHARS,
buf, 2, pwc, 1);
if (!ret)
{
r->_errno = EILSEQ;
return -1;
}
return ret;
}
extern "C" int
__sjis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
const char *charset, mbstate_t *state)
{
return __db_mbtowc (r, pwc, s, n, 932, state);
}
extern "C" int
__jis_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
const char *charset, mbstate_t *state)
{
return __db_mbtowc (r, pwc, s, n, 50220, state);
}
extern "C" int
__eucjp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
const char *charset, mbstate_t *state)
{
return __db_mbtowc (r, pwc, s, n, 51932, state);
}
extern "C" int
__gbk_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
const char *charset, mbstate_t *state)
{
return __db_mbtowc (r, pwc, s, n, 936, state);
}
extern "C" int
__kr_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
const char *charset, mbstate_t *state)
{
return __db_mbtowc (r, pwc, s, n, 949, state);
}
extern "C" int
__big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
const char *charset, mbstate_t *state)
{
return __db_mbtowc (r, pwc, s, n, 950, state);
}
/* Convert Windows codepage to a setlocale compatible character set code.
Called from newlib's setlocale() with the current ANSI codepage, if the
charset isn't given explicitely in the POSIX compatible locale specifier.
The function also returns a pointer to the corresponding _mbtowc_r
function. This is used below in the sys_cp_mbstowcs function which
is called directly from fhandler_console if the "Alternate Charset" has
been switched on by an escape sequence. */
extern "C" mbtowc_p
__set_charset_from_codepage (UINT cp, char *charset)
{
switch (cp)
{
case 437:
case 720:
case 737:
case 775:
case 850:
case 852:
case 855:
case 857:
case 858:
case 862:
case 866:
case 874:
case 1125:
case 1250:
case 1251:
case 1252:
case 1253:
case 1254:
case 1255:
case 1256:
case 1257:
case 1258:
__small_sprintf (charset, "CP%u", cp);
return __cp_mbtowc;
case 28591:
case 28592:
case 28593:
case 28594:
case 28595:
case 28596:
case 28597:
case 28598:
case 28599:
case 28603:
case 28605:
__small_sprintf (charset, "ISO-8859-%u", cp - 28590);
return __iso_mbtowc;
case 932:
strcpy (charset, "SJIS");
return __sjis_mbtowc;
case 936:
strcpy (charset, "GBK");
return __gbk_mbtowc;
case 949:
strcpy (charset, "CP949");
return __kr_mbtowc;
case 950:
strcpy (charset, "BIG5");
return __big5_mbtowc;
case 50220:
strcpy (charset, "JIS");
return __jis_mbtowc;
case 51932:
strcpy (charset, "EUCJP");
return __eucjp_mbtowc;
case 65001:
strcpy (charset, "UTF-8");
return __utf8_mbtowc;
default:
break;
}
strcpy (charset, "ASCII");
return __ascii_mbtowc;
}
/* Our own sys_wcstombs/sys_mbstowcs functions differ from the
wcstombs/mbstowcs API in three ways:
- The UNICODE private use area is used in filenames to specify
characters not allowed in Windows filenames ('*', '?', etc).
The sys_wcstombs converts characters in the private use area
back to the corresponding ASCII chars.
- If a wide character in a filename has no representation in the current
multibyte charset, then usually you wouldn't be able to access the
file. To fix this problem, sys_wcstombs creates a replacement multibyte
sequences for the non-representable wide-char. The sequence starts with
an ASCII SO (0x0e, Ctrl-N), followed by the UTF-8 representation of the
character. The sys_(cp_)mbstowcs function detects ASCII SO characters
in the input multibyte string and converts the following multibyte
sequence in by treating it as an UTF-8 char. If that fails, the ASCII
SO was probably standalone and it gets just copied over as ASCII SO.
- The functions always create 0-terminated results, no matter what.
If the result is truncated due to buffer size, it's a bug in Cygwin
and the buffer in the calling function should be raised. */
size_t __stdcall
sys_wcstombs (char *dst, size_t len, const PWCHAR src, size_t nwc)
{
char buf[10];
char *ptr = dst;
wchar_t *pwcs = (wchar_t *) src;
size_t n = 0;
mbstate_t ps;
memset (&ps, 0, sizeof ps);
if (dst == NULL)
len = (size_t) -1;
while (n < len && nwc-- > 0)
{
wchar_t pw = *pwcs;
/* Convert UNICODE private use area. Reverse functionality (only for
path names) is transform_chars in path.cc. */
if ((pw & 0xff00) == 0xf000)
pw &= 0xff;
int bytes = _wctomb_r (_REENT, buf, pw, &ps);
/* Convert chars invalid in the current codepage to a sequence
ASCII SO; UTF-8 representation of invalid char.
Do the same for ASCII SO itself. */
if ((bytes == -1 || pw == 0x0e) && *__locale_charset () != 'U'/*TF-8*/)
{
buf[0] = 0x0e; /* ASCII SO */
bytes = __utf8_wctomb (_REENT, buf + 1, pw, __locale_charset (), &ps);
if (bytes == -1)
{
++pwcs;
ps.__count = 0;
continue;
}
++bytes; /* Add the ASCII SO to the byte count. */
if (ps.__count == -4) /* First half of a surrogate pair. */
{
++pwcs;
if ((*pwcs & 0xfc00) != 0xdc00) /* Invalid second half. */
{
++pwcs;
ps.__count = 0;
continue;
}
bytes += __utf8_wctomb (_REENT, buf + bytes, *pwcs,
__locale_charset (), &ps);
}
}
if (n + bytes <= len)
{
n += bytes;
if (dst)
{
for (int i = 0; i < bytes; ++i)
*ptr++ = buf[i];
}
if (*pwcs++ == 0x00)
break;
}
else
break;
}
if (n && dst)
{
n = (n < len) ? n : len - 1;
dst[n] = '\0';
}
return n;
}
/* Allocate a buffer big enough for the string, always including the
terminating '\0'. The buffer pointer is returned in *tgt_p, the return
terminating '\0'. The buffer pointer is returned in *dst_p, the return
value is the number of bytes written to the buffer, as usual.
The "type" argument determines where the resulting buffer is stored.
It's either one of the cygheap_types values, or it's "HEAP_NOTHEAP".
@ -67,57 +378,129 @@ sys_wcstombs (char *tgt, int tlen, const PWCHAR src, int slen)
Note that this code is shared by cygserver (which requires it via
__small_vsprintf) and so when built there plain calloc is the
only choice. */
int __stdcall
sys_wcstombs_alloc (char **tgt_p, int type, const PWCHAR src, int slen)
size_t __stdcall
sys_wcstombs_alloc (char **dst_p, int type, const PWCHAR src, size_t nwc)
{
int ret;
size_t ret;
ret = WideCharToMultiByte (get_cp (), 0, src, slen, NULL, 0 ,NULL, NULL);
if (ret)
ret = sys_wcstombs (NULL, (size_t) -1, src, nwc);
if (ret > 0)
{
size_t tlen = (slen == -1) ? ret : ret + 1;
size_t dlen = ret + 1;
if (type == HEAP_NOTHEAP)
*tgt_p = (char *) calloc (tlen, sizeof (char));
*dst_p = (char *) calloc (dlen, sizeof (char));
else
*tgt_p = (char *) ccalloc ((cygheap_types) type, tlen, sizeof (char));
if (!*tgt_p)
*dst_p = (char *) ccalloc ((cygheap_types) type, dlen, sizeof (char));
if (!*dst_p)
return 0;
ret = sys_wcstombs (*tgt_p, tlen, src, slen);
ret = sys_wcstombs (*dst_p, dlen, src, nwc);
}
return ret;
}
int __stdcall
sys_mbstowcs (PWCHAR tgt, int tlen, const char *src, int slen)
/* sys_cp_mbstowcs is actually most of the time called as sys_mbstowcs with
a 0 codepage. If cp is not 0, the codepage is evaluated and used for the
conversion. This is so that fhandler_console can switch to an alternate
charset, which is the charset returned by GetConsoleCP (). Most of the
time this is used for box and line drawing characters. */
size_t __stdcall
sys_cp_mbstowcs (UINT cp, PWCHAR dst, size_t dlen, const char *src, size_t nms)
{
int ret = MultiByteToWideChar (get_cp (), 0, src, slen, tgt, tlen);
if (ret && tgt)
wchar_t *ptr = dst;
char *pmbs = (char *) src;
size_t count = 0;
size_t len = dlen;
int bytes;
mbstate_t ps;
char charsetbuf[32];
char *charset = __locale_charset ();
mbtowc_p f_mbtowc = __mbtowc;
if (cp)
f_mbtowc = __set_charset_from_codepage (cp, charset = charsetbuf);
memset (&ps, 0, sizeof ps);
if (dst == NULL)
len = (size_t)-1;
while (len > 0)
{
ret = (ret < tlen) ? ret : tlen - 1;
tgt[ret] = L'\0';
/* ASCII SO. Convert following UTF-8 sequence (if not UTF-8 anyway). */
if (*pmbs == 0x0e && *charset != 'U'/*TF-8*/)
{
pmbs++;
bytes = __utf8_mbtowc (_REENT, ptr, pmbs, nms, charset, &ps);
if (bytes < 0)
{
/* Invalid UTF-8 sequence? Treat the ASCII SO character as
stand-alone ASCII SO char. */
bytes = 1;
if (dst)
*ptr = 0x0e;
memset (&ps, 0, sizeof ps);
break;
}
if (bytes == 0)
break;
if (ps.__count == 4) /* First half of a surrogate. */
{
wchar_t *ptr2 = dst ? ptr + 1 : NULL;
int bytes2 = __utf8_mbtowc (_REENT, ptr2, pmbs + bytes,
nms - bytes, charset, &ps);
if (bytes2 < 0)
break;
pmbs += bytes2;
nms -= bytes2;
++count;
ptr = dst ? ptr + 1 : NULL;
--len;
}
}
else
bytes = f_mbtowc (_REENT, ptr, pmbs, nms, charset, &ps);
if (bytes > 0)
{
pmbs += bytes;
nms -= bytes;
++count;
ptr = dst ? ptr + 1 : NULL;
--len;
}
else
{
if (bytes == 0)
++count;
break;
}
}
return ret;
if (count && dst)
{
count = (count < dlen) ? count : dlen - 1;
dst[count] = L'\0';
}
return count;
}
/* Same as sys_wcstombs_alloc, just backwards. */
int __stdcall
sys_mbstowcs_alloc (PWCHAR *tgt_p, int type, const char *src, int slen)
size_t __stdcall
sys_mbstowcs_alloc (PWCHAR *dst_p, int type, const char *src, size_t nms)
{
int ret;
size_t ret;
ret = MultiByteToWideChar (get_cp (), 0, src, slen, NULL, 0);
if (ret)
ret = sys_mbstowcs (NULL, (size_t) -1, src, nms);
if (ret > 0)
{
size_t tlen = (slen == -1 ? ret : ret + 1);
size_t dlen = ret + 1;
if (type == HEAP_NOTHEAP)
*tgt_p = (PWCHAR) calloc (tlen, sizeof (WCHAR));
*dst_p = (PWCHAR) calloc (dlen, sizeof (WCHAR));
else
*tgt_p = (PWCHAR) ccalloc ((cygheap_types) type, tlen, sizeof (WCHAR));
if (!*tgt_p)
*dst_p = (PWCHAR) ccalloc ((cygheap_types) type, dlen, sizeof (WCHAR));
if (!*dst_p)
return 0;
ret = sys_mbstowcs (*tgt_p, tlen, src, slen);
ret = sys_mbstowcs (*dst_p, dlen, src, nms);
}
return ret;
}

View File

@ -1,6 +1,6 @@
/* wchar.h: Extra wchar defs
Copyright 2007 Red Hat, Inc.
Copyright 2007, 2009 Red Hat, Inc.
This file is part of Cygwin.
@ -17,21 +17,22 @@ details. */
extern "C" {
#endif
#undef wcscasecmp
#define wcscasecmp cygwin_wcscasecmp
int __stdcall cygwin_wcscasecmp (const wchar_t *, const wchar_t *);
extern "C" int __utf8_wctomb (struct _reent *, char *, wchar_t,
const char *, mbstate_t *);
#undef wcsncasecmp
#define wcsncasecmp cygwin_wcsncasecmp
int __stdcall cygwin_wcsncasecmp (const wchar_t *, const wchar_t *, size_t);
typedef int mbtowc_f (struct _reent *, wchar_t *, const char *, size_t,
const char *, mbstate_t *);
typedef mbtowc_f *mbtowc_p;
#undef wcslwr
#define wcslwr cygwin_wcslwr
wchar_t * __stdcall cygwin_wcslwr (wchar_t *);
extern "C" mbtowc_p __mbtowc;
extern "C" mbtowc_f __ascii_mbtowc;
extern "C" mbtowc_f __utf8_mbtowc;
extern "C" mbtowc_f __iso_mbtowc;
extern "C" mbtowc_f __cp_mbtowc;
#undef wcsupr
#define wcsupr cygwin_wcsupr
wchar_t * __stdcall cygwin_wcsupr (wchar_t *);
extern "C" char *__locale_charset ();
extern "C" mbtowc_p __set_charset_from_codepage (UINT cp, char *charset);
#ifdef __cplusplus
}

View File

@ -104,21 +104,24 @@ extern const char case_folded_upper[];
/* The one function we use from winuser.h most of the time */
extern "C" DWORD WINAPI GetLastError (void);
void codepage_init (const char *buf);
UINT get_cp ();
/* Used as type by sys_wcstombs_alloc and sys_mbstowcs_alloc. For a
description see there. */
#define HEAP_NOTHEAP -1
int __stdcall sys_wcstombs (char *, int, const PWCHAR, int = -1)
size_t __stdcall sys_wcstombs (char *, size_t, const PWCHAR, size_t = (size_t) -1)
__attribute__ ((regparm(3)));
int __stdcall sys_wcstombs_alloc (char **, int, const PWCHAR, int = -1)
size_t __stdcall sys_wcstombs_alloc (char **, int, const PWCHAR, size_t = (size_t) -1)
__attribute__ ((regparm(3)));
int __stdcall sys_mbstowcs (PWCHAR, int, const char *, int = -1)
size_t __stdcall sys_cp_mbstowcs (UINT, PWCHAR, size_t, const char *, size_t = (size_t) -1)
__attribute__ ((regparm(3)));
int __stdcall sys_mbstowcs_alloc (PWCHAR *, int, const char *, int = -1)
inline size_t
sys_mbstowcs (PWCHAR dst, size_t dlen, const char *src,
size_t nms = (size_t) -1)
{
return sys_cp_mbstowcs (0, dst, dlen, src, nms);
}
size_t __stdcall sys_mbstowcs_alloc (PWCHAR *, int, const char *, size_t = (size_t) -1)
__attribute__ ((regparm(3)));
/* Used to check if Cygwin DLL is dynamically loaded. */
@ -333,8 +336,6 @@ inline void clear_procimptoken ()
CloseHandle (old_procimp);
}
}
void set_file_api_mode (codepage_type);
#endif
#endif /* defined __cplusplus */