Add RegExp.escape (#687)

This commit is contained in:
Ben Noordhuis 2024-11-15 12:17:38 +01:00 committed by GitHub
parent b5d41818e2
commit 554907e88f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 70 additions and 8 deletions

View file

@ -572,6 +572,16 @@ static const uint8_t unicode_prop_ID_Continue1_index[66] = {
0x01, 0x0e, 0x01, 0x0e,
}; };
static const uint8_t unicode_prop_White_Space_table[22] = {
0x88, 0x84, 0x91, 0x80, 0xe3, 0x80, 0x99, 0x80,
0x55, 0xde, 0x80, 0x49, 0x7e, 0x8a, 0x9c, 0x0c,
0x80, 0xae, 0x80, 0x4f, 0x9f, 0x80,
};
static const uint8_t unicode_prop_White_Space_index[3] = {
0x01, 0x30, 0x00,
};
static const uint8_t unicode_cc_table[916] = { static const uint8_t unicode_cc_table[916] = {
0xb2, 0xcf, 0xd4, 0x00, 0xe8, 0x03, 0xdc, 0x00, 0xb2, 0xcf, 0xd4, 0x00, 0xe8, 0x03, 0xdc, 0x00,
0xe8, 0x00, 0xd8, 0x04, 0xdc, 0x01, 0xca, 0x03, 0xe8, 0x00, 0xd8, 0x04, 0xdc, 0x01, 0xca, 0x03,
@ -4262,12 +4272,6 @@ static const uint8_t unicode_prop_Variation_Selector_table[13] = {
0x6d, 0x02, 0xef, 0x40, 0xef, 0x6d, 0x02, 0xef, 0x40, 0xef,
}; };
static const uint8_t unicode_prop_White_Space_table[22] = {
0x88, 0x84, 0x91, 0x80, 0xe3, 0x80, 0x99, 0x80,
0x55, 0xde, 0x80, 0x49, 0x7e, 0x8a, 0x9c, 0x0c,
0x80, 0xae, 0x80, 0x4f, 0x9f, 0x80,
};
static const uint8_t unicode_prop_Bidi_Mirrored_table[173] = { static const uint8_t unicode_prop_Bidi_Mirrored_table[173] = {
0xa7, 0x81, 0x91, 0x00, 0x80, 0x9b, 0x00, 0x80, 0xa7, 0x81, 0x91, 0x00, 0x80, 0x9b, 0x00, 0x80,
0x9c, 0x00, 0x80, 0xac, 0x80, 0x8e, 0x80, 0x4e, 0x9c, 0x00, 0x80, 0xac, 0x80, 0x8e, 0x80, 0x4e,

View file

@ -545,6 +545,13 @@ BOOL lre_is_id_continue(uint32_t c)
sizeof(unicode_prop_ID_Continue1_index) / 3); sizeof(unicode_prop_ID_Continue1_index) / 3);
} }
BOOL lre_is_white_space(uint32_t c)
{
return lre_is_in_table(c, unicode_prop_White_Space_table,
unicode_prop_White_Space_index,
sizeof(unicode_prop_White_Space_index) / 3);
}
#define UNICODE_DECOMP_LEN_MAX 18 #define UNICODE_DECOMP_LEN_MAX 18
typedef enum { typedef enum {

View file

@ -107,6 +107,7 @@ int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode);
LRE_BOOL lre_is_id_start(uint32_t c); LRE_BOOL lre_is_id_start(uint32_t c);
LRE_BOOL lre_is_id_continue(uint32_t c); LRE_BOOL lre_is_id_continue(uint32_t c);
LRE_BOOL lre_is_white_space(uint32_t c);
int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len, int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
UnicodeNormalizationEnum n_type, UnicodeNormalizationEnum n_type,

View file

@ -43836,6 +43836,53 @@ void *lre_realloc(void *opaque, void *ptr, size_t size)
return js_realloc_rt(ctx->rt, ptr, size); return js_realloc_rt(ctx->rt, ptr, size);
} }
static JSValue js_regexp_escape(JSContext *ctx, JSValue this_val,
int argc, JSValue *argv)
{
StringBuffer b_s, *b = &b_s;
JSString *p;
uint32_t c, i;
char s[16];
if (!JS_IsString(argv[0]))
return JS_ThrowTypeError(ctx, "not a string");
p = JS_VALUE_GET_STRING(argv[0]);
string_buffer_init2(ctx, b, 0, p->is_wide_char);
for (i = 0; i < p->len; i++) {
c = p->is_wide_char ? (uint32_t)p->u.str16[i] : (uint32_t)p->u.str8[i];
if (c < 33) {
if (c >= 9 && c <= 13) {
string_buffer_putc8(b, '\\');
string_buffer_putc8(b, "tnvfr"[c - 9]);
} else {
goto hex2;
}
} else if (c < 128) {
if ((c >= '0' && c <= '9')
|| (c >= 'A' && c <= 'Z')
|| (c >= 'a' && c <= 'z')) {
if (i == 0)
goto hex2;
} else if (strchr(",-=<>#&!%:;@~'`\"", c)) {
goto hex2;
} else if (c != '_') {
string_buffer_putc8(b, '\\');
}
string_buffer_putc8(b, c);
} else if (c < 256) {
hex2:
snprintf(s, sizeof(s), "\\x%02x", c);
string_buffer_puts8(b, s);
} else if (is_surrogate(c) || lre_is_white_space(c) || c == 0xFEFF) {
snprintf(s, sizeof(s), "\\u%04x", c);
string_buffer_puts8(b, s);
} else {
string_buffer_putc16(b, c);
}
}
return string_buffer_end(b);
}
static JSValue js_regexp_exec(JSContext *ctx, JSValue this_val, static JSValue js_regexp_exec(JSContext *ctx, JSValue this_val,
int argc, JSValue *argv) int argc, JSValue *argv)
{ {
@ -44864,6 +44911,7 @@ done:
} }
static const JSCFunctionListEntry js_regexp_funcs[] = { static const JSCFunctionListEntry js_regexp_funcs[] = {
JS_CFUNC_DEF("escape", 1, js_regexp_escape ),
JS_CGETSET_DEF("[Symbol.species]", js_get_this, NULL ), JS_CGETSET_DEF("[Symbol.species]", js_get_this, NULL ),
}; };

View file

@ -174,7 +174,7 @@ regexp-modifiers=skip
regexp-named-groups regexp-named-groups
regexp-unicode-property-escapes regexp-unicode-property-escapes
regexp-v-flag regexp-v-flag
RegExp.escape=skip RegExp.escape
resizable-arraybuffer resizable-arraybuffer
rest-parameters rest-parameters
Set Set

View file

@ -1574,6 +1574,7 @@ void build_flags_tables(FILE *f)
build_prop_table(f, PROP_Case_Ignorable, TRUE); build_prop_table(f, PROP_Case_Ignorable, TRUE);
build_prop_table(f, PROP_ID_Start, TRUE); build_prop_table(f, PROP_ID_Start, TRUE);
build_prop_table(f, PROP_ID_Continue1, TRUE); build_prop_table(f, PROP_ID_Continue1, TRUE);
build_prop_table(f, PROP_White_Space, TRUE);
} }
void dump_name_table(FILE *f, const char *cname, const char **tab_name, int len, void dump_name_table(FILE *f, const char *cname, const char **tab_name, int len,
@ -1813,7 +1814,8 @@ void build_prop_list_table(FILE *f)
for(i = 0; i < PROP_TABLE_COUNT; i++) { for(i = 0; i < PROP_TABLE_COUNT; i++) {
if (i == PROP_ID_Start || if (i == PROP_ID_Start ||
i == PROP_Case_Ignorable || i == PROP_Case_Ignorable ||
i == PROP_ID_Continue1) { i == PROP_ID_Continue1 ||
i == PROP_White_Space) {
/* already generated */ /* already generated */
} else { } else {
build_prop_table(f, i, FALSE); build_prop_table(f, i, FALSE);