extmod/modure: Add match.span(), start() and end() methods, and tests.

This feature is controlled at compile time by
MICROPY_PY_URE_MATCH_SPAN_START_END, disabled by default.

Thanks to @dmazzella for the original patch for this feature; see #3770.
This commit is contained in:
Damien George 2018-05-24 13:08:15 +10:00
parent 1f86460910
commit 1e9b871d29
3 changed files with 99 additions and 0 deletions

View File

@ -94,11 +94,66 @@ MP_DEFINE_CONST_FUN_OBJ_1(match_groups_obj, match_groups);
#endif
#if MICROPY_PY_URE_MATCH_SPAN_START_END
STATIC void match_span_helper(size_t n_args, const mp_obj_t *args, mp_obj_t span[2]) {
mp_obj_match_t *self = MP_OBJ_TO_PTR(args[0]);
mp_int_t no = 0;
if (n_args == 2) {
no = mp_obj_get_int(args[1]);
if (no < 0 || no >= self->num_matches) {
nlr_raise(mp_obj_new_exception_arg1(&mp_type_IndexError, args[1]));
}
}
mp_int_t s = -1;
mp_int_t e = -1;
const char *start = self->caps[no * 2];
if (start != NULL) {
// have a match for this group
const char *begin = mp_obj_str_get_str(self->str);
s = start - begin;
e = self->caps[no * 2 + 1] - begin;
}
span[0] = mp_obj_new_int(s);
span[1] = mp_obj_new_int(e);
}
STATIC mp_obj_t match_span(size_t n_args, const mp_obj_t *args) {
mp_obj_t span[2];
match_span_helper(n_args, args, span);
return mp_obj_new_tuple(2, span);
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(match_span_obj, 1, 2, match_span);
STATIC mp_obj_t match_start(size_t n_args, const mp_obj_t *args) {
mp_obj_t span[2];
match_span_helper(n_args, args, span);
return span[0];
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(match_start_obj, 1, 2, match_start);
STATIC mp_obj_t match_end(size_t n_args, const mp_obj_t *args) {
mp_obj_t span[2];
match_span_helper(n_args, args, span);
return span[1];
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(match_end_obj, 1, 2, match_end);
#endif
STATIC const mp_rom_map_elem_t match_locals_dict_table[] = {
{ MP_ROM_QSTR(MP_QSTR_group), MP_ROM_PTR(&match_group_obj) },
#if MICROPY_PY_URE_MATCH_GROUPS
{ MP_ROM_QSTR(MP_QSTR_groups), MP_ROM_PTR(&match_groups_obj) },
#endif
#if MICROPY_PY_URE_MATCH_SPAN_START_END
{ MP_ROM_QSTR(MP_QSTR_span), MP_ROM_PTR(&match_span_obj) },
{ MP_ROM_QSTR(MP_QSTR_start), MP_ROM_PTR(&match_start_obj) },
{ MP_ROM_QSTR(MP_QSTR_end), MP_ROM_PTR(&match_end_obj) },
#endif
};
STATIC MP_DEFINE_CONST_DICT(match_locals_dict, match_locals_dict_table);

View File

@ -1146,6 +1146,10 @@ typedef double mp_float_t;
#define MICROPY_PY_URE_MATCH_GROUPS (0)
#endif
#ifndef MICROPY_PY_URE_MATCH_SPAN_START_END
#define MICROPY_PY_URE_MATCH_SPAN_START_END (0)
#endif
#ifndef MICROPY_PY_UHEAPQ
#define MICROPY_PY_UHEAPQ (0)
#endif

40
tests/extmod/ure_span.py Normal file
View File

@ -0,0 +1,40 @@
# test match.span(), and nested spans
try:
import ure as re
except ImportError:
try:
import re
except ImportError:
print("SKIP")
raise SystemExit
try:
m = re.match(".", "a")
m.span
except AttributeError:
print('SKIP')
raise SystemExit
def print_spans(match):
print('----')
try:
i = 0
while True:
print(match.span(i), match.start(i), match.end(i))
i += 1
except IndexError:
pass
m = re.match(r'(([0-9]*)([a-z]*)[0-9]*)','1234hello567')
print_spans(m)
m = re.match(r'([0-9]*)(([a-z]*)([0-9]*))','1234hello567')
print_spans(m)
# optional span that matches
print_spans(re.match(r'(a)?b(c)', 'abc'))
# optional span that doesn't match
print_spans(re.match(r'(a)?b(c)', 'bc'))