newlib: vf[w]scanf: Fix conversion multibyte <-> wchar_t

* vfscanf: per POSIX, if the target type is wchar_t, the width is
  counted in (multibyte) characters, not in bytes.

* vfscanf: Handle UTF-8 multibyte sequences converted to surrogate
  pairs on UTF-16 systems.

* vfwscanf: Don't count high surrogates in input against field width
  counting.  Per POSIX, input is

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2017-12-01 17:18:26 +01:00
parent 9638c07527
commit a49209d2bc
2 changed files with 53 additions and 25 deletions

View File

@ -488,10 +488,15 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
_p = _p0; \
_w; \
})
/* For systems with wchar_t == 2 (UTF-16) check if there's room for
at least 2 wchar_t's (surrogate pairs). */
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
({ \
size_t _nw = (_w); \
if (_p_p && _p - _p0 == _nw) \
ptrdiff_t _dif = _p - _p0; \
if (_p_p && \
((sizeof (_type) == 2 && _dif >= _nw - 1) \
|| _dif >= _nw)) \
{ \
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
if (!_p0) \
@ -499,7 +504,7 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
nassigned = EOF; \
goto match_failure; \
} \
_p = _p0 + _nw; \
_p = _p0 + _dif; \
*_p_p = _p0; \
_nw <<= 1; \
} \
@ -948,7 +953,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
size_t wcp_siz = 0;
#endif
mbstate_t state;
memset (&state, 0, sizeof (mbstate_t));
if (flags & SUPPRESS)
wcp = NULL;
#ifdef _WANT_IO_POSIX_EXTENSIONS
@ -958,13 +962,17 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
else
wcp = GET_ARG (N, ap, wchar_t *);
n = 0;
while (width-- != 0)
while (width != 0)
{
if (n == MB_CUR_MAX)
goto input_failure;
buf[n++] = *fp->_p;
fp->_r -= 1;
fp->_p += 1;
/* Got a high surrogate, allow low surrogate to slip
through */
if (mbslen != 3 || state.__count != 4)
memset (&state, 0, sizeof (mbstate_t));
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
== (size_t)-1)
goto input_failure; /* Invalid sequence */
@ -973,6 +981,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
if (mbslen != (size_t)-2) /* Incomplete sequence */
{
nread += n;
/* Handle high surrogate */
if (mbslen != 3 || state.__count != 4)
width -= 1;
if (!(flags & SUPPRESS))
{
#ifdef _WANT_IO_POSIX_EXTENSIONS
@ -1122,7 +1133,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
#endif
/* Process %S and %ls placeholders */
mbstate_t state;
memset (&state, 0, sizeof (mbstate_t));
if (flags & SUPPRESS)
wcp = &wc;
#ifdef _WANT_IO_POSIX_EXTENSIONS
@ -1139,7 +1149,10 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
buf[n++] = *fp->_p;
fp->_r -= 1;
fp->_p += 1;
width--;
/* Got a high surrogate, allow low surrogate to slip
through */
if (mbslen != 3 || state.__count != 4)
memset (&state, 0, sizeof (mbstate_t));
if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
== (size_t)-1)
goto input_failure;
@ -1154,6 +1167,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
break;
}
nread += n;
/* Handle high surrogate */
if (mbslen != 3 || state.__count != 4)
width -= 1;
if ((flags & SUPPRESS) == 0)
{
wcp += 1;

View File

@ -376,6 +376,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
wint_t wi; /* handy wint_t */
char *mbp = NULL; /* multibyte string pointer for %c %s %[ */
size_t nconv; /* number of bytes in mb. conversion */
char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
char *cp;
short *sp;
@ -458,13 +459,15 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
_p = _p0; \
_w; \
})
/* For char output, check if there's room for at least MB_CUR_MAX
characters. */
#define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \
({ \
size_t _nw = (_w); \
ptrdiff_t _dif = _p - _p0; \
if (_p_p && \
((sizeof (_type) == 1 && _dif >= _nw - MB_CUR_MAX) \
|| (sizeof (_type) != 1 && _dif == _nw))) \
|| _dif >= _nw)) \
{ \
_p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \
if (!_p0) \
@ -925,7 +928,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
#endif
if (flags & SUPPRESS)
;
mbp = mbbuf;
#ifdef _WANT_IO_POSIX_EXTENSIONS
else if (flags & MALLOC)
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
@ -934,16 +937,19 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
mbp = GET_ARG(N, ap, char *);
n = 0;
memset ((_PTR)&mbs, '\0', sizeof (mbstate_t));
while (width-- != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF)
while (width != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF)
{
#ifdef _WANT_IO_POSIX_EXTENSIONS
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
#endif
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
if (nconv == (size_t) -1)
goto input_failure;
/* Ignore high surrogate in width counting */
if (nconv != 0 || mbs.__count != -4)
width--;
if (!(flags & SUPPRESS))
{
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
if (nconv == (size_t) -1)
goto input_failure;
#ifdef _WANT_IO_POSIX_EXTENSIONS
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
#endif
mbp += nconv;
}
n++;
@ -1014,7 +1020,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
#endif
if (flags & SUPPRESS)
;
mbp = mbbuf;
#ifdef _WANT_IO_POSIX_EXTENSIONS
else if (flags & MALLOC)
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
@ -1024,13 +1030,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
n = 0;
memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
while ((wi = _fgetwc_r (rptr, fp)) != WEOF
&& width-- != 0 && INCCL (wi))
&& width != 0 && INCCL (wi))
{
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
if (nconv == (size_t) -1)
goto input_failure;
/* Ignore high surrogate in width counting */
if (nconv != 0 || mbs.__count != -4)
width--;
if (!(flags & SUPPRESS))
{
nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
if (nconv == (size_t) -1)
goto input_failure;
mbp += nconv;
#ifdef _WANT_IO_POSIX_EXTENSIONS
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
@ -1101,7 +1110,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
#endif
if (flags & SUPPRESS)
;
mbp = mbbuf;
#ifdef _WANT_IO_POSIX_EXTENSIONS
else if (flags & MALLOC)
mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
@ -1110,13 +1119,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
mbp = GET_ARG(N, ap, char *);
memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
while ((wi = _fgetwc_r (rptr, fp)) != WEOF
&& width-- != 0 && !iswspace (wi))
&& width != 0 && !iswspace (wi))
{
nconv = wcrtomb(mbp, wi, &mbs);
if (nconv == (size_t)-1)
goto input_failure;
/* Ignore high surrogate in width counting */
if (nconv != 0 || mbs.__count != -4)
width--;
if (!(flags & SUPPRESS))
{
nconv = wcrtomb(mbp, wi, &mbs);
if (nconv == (size_t)-1)
goto input_failure;
mbp += nconv;
#ifdef _WANT_IO_POSIX_EXTENSIONS
mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);