From e6ff6a18d4949dd49acc9be04bd522ebdd207900 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Sun, 21 Jul 2013 19:28:45 +0000 Subject: [PATCH] * regex/regcomp.c (wgetnext): Add a kludge to be more glibc compatible. Add comment to explain. --- winsup/cygwin/ChangeLog | 5 +++++ winsup/cygwin/regex/regcomp.c | 12 ++++++++++++ winsup/cygwin/release/1.7.22 | 5 +++++ 3 files changed, 22 insertions(+) diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog index 6a7d285c3..64bb27178 100644 --- a/winsup/cygwin/ChangeLog +++ b/winsup/cygwin/ChangeLog @@ -1,3 +1,8 @@ +2013-07-21 Corinna Vinschen + + * regex/regcomp.c (wgetnext): Add a kludge to be more glibc compatible. + Add comment to explain. + 2013-07-20 Christopher Faylor * exceptions.cc (signal_exit): Remove comment. Use diff --git a/winsup/cygwin/regex/regcomp.c b/winsup/cygwin/regex/regcomp.c index 881c16099..b68db99c7 100644 --- a/winsup/cygwin/regex/regcomp.c +++ b/winsup/cygwin/regex/regcomp.c @@ -1128,6 +1128,18 @@ wgetnext(struct parse *p) wint_t ret; size_t n; +#ifdef __CYGWIN__ + /* Kludge for more glibc compatibility. On Cygwin as well as on + Linux, mbrtowc returns -1 if the current local's codeset is ASCII + and the character is >= 0x80. Nevertheless, glibc's regcomp allows + any char value, even stuff like [\xc0-\xff], if the locale's codeset + is ASCII, so in regcomp it ignores the fact that chars >= 0x80 are + invalid ASCII chars. To be more Linux-compatible, we align the + behaviour to glibc here. Allow any character value if the current + local's codeset is ASCII. */ + if (*__locale_charset () == 'A') /* SCII */ + return (wint_t) (unsigned char) *p->next++; +#endif memset(&mbs, 0, sizeof(mbs)); n = mbrtowc(&wc, p->next, p->end - p->next, &mbs); if (n == (size_t)-1 || n == (size_t)-2) { diff --git a/winsup/cygwin/release/1.7.22 b/winsup/cygwin/release/1.7.22 index d5b7e8355..edb942548 100644 --- a/winsup/cygwin/release/1.7.22 +++ b/winsup/cygwin/release/1.7.22 @@ -11,6 +11,11 @@ What changed: - Added cygwin GetCommandLine wrappers which will allow Cygwin programs to (appear to) use the Windows command line functions. +- regcomp(3) now allows character values >= 0x80 if the current codeset is + ASCII (default codeset in the "C"/"POSIX" locale). This allows patterns + containing arbitrary byte values as GLibc's regcomp. + Fixes: http://cygwin.com/ml/cygwin/2013-07/msg00405.html + Bug fixes: ----------