py, lexer: Fix parsing of raw strings (allow escaping of quote).

This commit is contained in:
Damien George 2014-04-10 11:30:55 +01:00
parent f22626ee4f
commit a91f41407b
1 changed files with 44 additions and 39 deletions

View File

@ -455,50 +455,55 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
} else {
n_closing = 0;
if (!is_raw && is_char(lex, '\\')) {
if (is_char(lex, '\\')) {
next_char(lex);
unichar c = CUR_CHAR(lex);
switch (c) {
case MP_LEXER_CHAR_EOF: break; // TODO a proper error message?
case '\n': c = MP_LEXER_CHAR_EOF; break; // TODO check this works correctly (we are supposed to ignore it
case '\\': break;
case '\'': break;
case '"': break;
case 'a': c = 0x07; break;
case 'b': c = 0x08; break;
case 't': c = 0x09; break;
case 'n': c = 0x0a; break;
case 'v': c = 0x0b; break;
case 'f': c = 0x0c; break;
case 'r': c = 0x0d; break;
case 'x':
{
uint num = 0;
if (!get_hex(lex, 2, &num)) {
// TODO error message
assert(0);
}
c = num;
break;
}
case 'N': break; // TODO \N{name} only in strings
case 'u': break; // TODO \uxxxx only in strings
case 'U': break; // TODO \Uxxxxxxxx only in strings
default:
if (c >= '0' && c <= '7') {
// Octal sequence, 1-3 chars
int digits = 3;
int num = c - '0';
while (is_following_odigit(lex) && --digits != 0) {
next_char(lex);
num = num * 8 + (CUR_CHAR(lex) - '0');
if (is_raw) {
// raw strings allow escaping of quotes, but the backslash is also emitted
vstr_add_char(&lex->vstr, '\\');
} else {
switch (c) {
case MP_LEXER_CHAR_EOF: break; // TODO a proper error message?
case '\n': c = MP_LEXER_CHAR_EOF; break; // TODO check this works correctly (we are supposed to ignore it
case '\\': break;
case '\'': break;
case '"': break;
case 'a': c = 0x07; break;
case 'b': c = 0x08; break;
case 't': c = 0x09; break;
case 'n': c = 0x0a; break;
case 'v': c = 0x0b; break;
case 'f': c = 0x0c; break;
case 'r': c = 0x0d; break;
case 'x':
{
uint num = 0;
if (!get_hex(lex, 2, &num)) {
// TODO error message
assert(0);
}
c = num;
} else {
// unrecognised escape character; CPython lets this through verbatim as '\' and then the character
vstr_add_char(&lex->vstr, '\\');
break;
}
break;
case 'N': break; // TODO \N{name} only in strings
case 'u': break; // TODO \uxxxx only in strings
case 'U': break; // TODO \Uxxxxxxxx only in strings
default:
if (c >= '0' && c <= '7') {
// Octal sequence, 1-3 chars
int digits = 3;
int num = c - '0';
while (is_following_odigit(lex) && --digits != 0) {
next_char(lex);
num = num * 8 + (CUR_CHAR(lex) - '0');
}
c = num;
} else {
// unrecognised escape character; CPython lets this through verbatim as '\' and then the character
vstr_add_char(&lex->vstr, '\\');
}
break;
}
}
if (c != MP_LEXER_CHAR_EOF) {
vstr_add_char(&lex->vstr, c);