Implement extended named capture group identifiers (bnoordhuis)
This commit is contained in:
parent
4949d75daf
commit
20a57f90d7
2 changed files with 14 additions and 11 deletions
23
libregexp.c
23
libregexp.c
|
@ -1071,11 +1071,10 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
|
|||
}
|
||||
|
||||
/* '*pp' is the first char after '<' */
|
||||
static int re_parse_group_name(char *buf, int buf_size,
|
||||
const uint8_t **pp, BOOL is_utf16)
|
||||
static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp)
|
||||
{
|
||||
const uint8_t *p;
|
||||
uint32_t c;
|
||||
const uint8_t *p, *p1;
|
||||
uint32_t c, d;
|
||||
char *q;
|
||||
|
||||
p = *pp;
|
||||
|
@ -1086,11 +1085,18 @@ static int re_parse_group_name(char *buf, int buf_size,
|
|||
p++;
|
||||
if (*p != 'u')
|
||||
return -1;
|
||||
c = lre_parse_escape(&p, is_utf16 * 2);
|
||||
c = lre_parse_escape(&p, 2); // accept surrogate pairs
|
||||
} else if (c == '>') {
|
||||
break;
|
||||
} else if (c >= 128) {
|
||||
c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
|
||||
if (c >= 0xD800 && c <= 0xDBFF) {
|
||||
d = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p1);
|
||||
if (d >= 0xDC00 && d <= 0xDFFF) {
|
||||
c = 0x10000 + 0x400 * (c - 0xD800) + (d - 0xDC00);
|
||||
p = p1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
p++;
|
||||
}
|
||||
|
@ -1140,8 +1146,7 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
|
|||
/* potential named capture */
|
||||
if (capture_name) {
|
||||
p += 3;
|
||||
if (re_parse_group_name(name, sizeof(name), &p,
|
||||
s->is_utf16) == 0) {
|
||||
if (re_parse_group_name(name, sizeof(name), &p) == 0) {
|
||||
if (!strcmp(name, capture_name))
|
||||
return capture_index;
|
||||
}
|
||||
|
@ -1314,7 +1319,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
} else if (p[2] == '<') {
|
||||
p += 3;
|
||||
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
|
||||
&p, s->is_utf16)) {
|
||||
&p)) {
|
||||
return re_parse_error(s, "invalid group name");
|
||||
}
|
||||
if (find_group_name(s, s->u.tmp_buf) > 0) {
|
||||
|
@ -1378,7 +1383,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
|||
}
|
||||
p1 += 3;
|
||||
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
|
||||
&p1, s->is_utf16)) {
|
||||
&p1)) {
|
||||
if (s->is_utf16 || re_has_named_captures(s))
|
||||
return re_parse_error(s, "invalid group name");
|
||||
else
|
||||
|
|
|
@ -9,8 +9,6 @@ test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-r
|
|||
test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-realm.js:20: strict mode: Test262Error: Expected a ReferenceError but got a different error constructor with the same name
|
||||
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
|
||||
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
|
||||
test262/test/built-ins/RegExp/named-groups/non-unicode-property-names-valid.js:46: SyntaxError: invalid group name
|
||||
test262/test/built-ins/RegExp/named-groups/non-unicode-property-names-valid.js:46: strict mode: SyntaxError: invalid group name
|
||||
test262/test/built-ins/RegExp/unicode_full_case_folding.js:20: Test262Error: \u0390 does not match \u1fd3
|
||||
test262/test/built-ins/RegExp/unicode_full_case_folding.js:20: strict mode: Test262Error: \u0390 does not match \u1fd3
|
||||
test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
|
||||
|
|
Loading…
Reference in a new issue