py/objstr: Always validate utf-8 for mp_obj_new_str.

All uses of this are either tiny strings or not-known-to-be-safe.

Update comments for mp_obj_new_str_copy and mp_obj_new_str_of_type.

Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
This commit is contained in:
Jim Mussared 2022-08-24 12:22:57 +10:00 committed by Damien George
parent 3a910b1565
commit 6c3d8d38bf
3 changed files with 12 additions and 11 deletions

View File

@ -789,11 +789,11 @@ mp_obj_t mp_obj_new_int_from_uint(mp_uint_t value);
mp_obj_t mp_obj_new_int_from_str_len(const char **str, size_t len, bool neg, unsigned int base);
mp_obj_t mp_obj_new_int_from_ll(long long val); // this must return a multi-precision integer object (or raise an overflow exception)
mp_obj_t mp_obj_new_int_from_ull(unsigned long long val); // this must return a multi-precision integer object (or raise an overflow exception)
mp_obj_t mp_obj_new_str(const char *data, size_t len);
mp_obj_t mp_obj_new_str_via_qstr(const char *data, size_t len);
mp_obj_t mp_obj_new_str_from_vstr(vstr_t *vstr);
mp_obj_t mp_obj_new_str(const char *data, size_t len); // will check utf-8 (raises UnicodeError)
mp_obj_t mp_obj_new_str_via_qstr(const char *data, size_t len); // input data must be valid utf-8
mp_obj_t mp_obj_new_str_from_vstr(vstr_t *vstr); // will check utf-8 (raises UnicodeError)
#if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
mp_obj_t mp_obj_new_str_from_utf8_vstr(vstr_t *vstr); // only use when vstr is already known to be utf-8 encoded
mp_obj_t mp_obj_new_str_from_utf8_vstr(vstr_t *vstr); // input data must be valid utf-8
#else
#define mp_obj_new_str_from_utf8_vstr mp_obj_new_str_from_vstr
#endif

View File

@ -202,11 +202,7 @@ mp_obj_t mp_obj_str_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_
} else {
mp_buffer_info_t bufinfo;
mp_get_buffer_raise(args[0], &bufinfo, MP_BUFFER_READ);
#if MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
if (!utf8_check(bufinfo.buf, bufinfo.len)) {
mp_raise_msg(&mp_type_UnicodeError, NULL);
}
#endif
// This will utf-8 check the input.
return mp_obj_new_str(bufinfo.buf, bufinfo.len);
}
}
@ -2268,6 +2264,11 @@ mp_obj_t mp_obj_new_bytes_from_vstr(vstr_t *vstr) {
}
mp_obj_t mp_obj_new_str(const char *data, size_t len) {
#if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
if (!utf8_check((byte *)data, len)) {
mp_raise_msg(&mp_type_UnicodeError, NULL);
}
#endif
qstr q = qstr_find_strn(data, len);
if (q != MP_QSTRnull) {
// qstr with this data already exists

View File

@ -88,8 +88,8 @@ mp_obj_t mp_obj_str_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t
void mp_str_print_json(const mp_print_t *print, const byte *str_data, size_t str_len);
mp_obj_t mp_obj_str_format(size_t n_args, const mp_obj_t *args, mp_map_t *kwargs);
mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args);
mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t len);
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte *data, size_t len);
mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t len); // for type=str, input data must be valid utf-8
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte *data, size_t len); // for type=str, will check utf-8 (raises UnicodeError)
mp_obj_t mp_obj_str_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_uint_t flags);