From 8cd59bf7c439e3370143924813ad6684918795c5 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Sun, 20 Oct 2024 12:42:21 +0200 Subject: [PATCH] Improve JS_DetectModule (#610) It's still not infallible (I don't think it can ever be, the whole premise is wrong) but hopefully it's a little less fallible now. Fixes: https://github.com/quickjs-ng/quickjs/issues/606 --- gen/function_source.c | Bin 3214 -> 2896 bytes gen/hello.c | Bin 1353 -> 1429 bytes quickjs.c | 61 +++++++++++++++++++----------------- quickjs.h | 6 ++++ tests/detect_module/0.js | 1 + tests/detect_module/1.js | 2 ++ tests/detect_module/2.js | 1 + tests/detect_module/3.js | 8 +++++ tests/detect_module/4.js | 3 ++ tests/test_module_detect.js | 5 --- 10 files changed, 54 insertions(+), 33 deletions(-) create mode 100644 tests/detect_module/0.js create mode 100644 tests/detect_module/1.js create mode 100644 tests/detect_module/2.js create mode 100644 tests/detect_module/3.js create mode 100644 tests/detect_module/4.js delete mode 100644 tests/test_module_detect.js diff --git a/gen/function_source.c b/gen/function_source.c index e041b1805d02e0897f50c2e974e5a3987babd2c1..d4394f64559a0e106cb7840b10edf866cf0dbe38 100644 GIT binary patch literal 2896 zcmai0O>f&q5WVwPY}1P)C<#Ao$%fHW03QrDKr8fA1O!E{EIJhFkjvO^kpJG9_m;vc zQj!Zn?#z4h=It!8?`GzvZmhYtt?dhED^rZF+ZC=Zi>7%o>lfp;*8DZr<)_=#oA%z6 z-R@Bk{aZ787e&9KHLpHv%ER!csn_M(?W-s%yLND9tXr3*xig<1hBDubZRzT+ z&4+I6OPde%f7bkAvh4CY1YG_g|6fV`DEVI_6F(>MT%Pd-o{2s)tSryBl&Pywtl5h1w1PtEs$Gc9$r z!qpKgVT*}%ufnov|5s&;L6Yw(Xx&%HP%aeXD}=|DUGc9`LF+N#3g;nXg{cl)3Jy)l zv7@jlNqX3u`@OBKbHZH678?Kv8$Htfow*9&F8;#YiBtjNt~|ZD9Dyk;>kF<-{Xi1` z*qEGyBO3UVf+F$Lia@&5Itd0l;m&XZ;9CNV6E>ZqLWXt|!a$Kg3=bBGvL#$PX9Xsq z0{~@uZuut#DL2fpF z@$7c>_t?66XU(j4uOqopgox5T%QeH}NOLA>*OFCl%-L@ee=YCE^wy1iYd-vQcc(%B zDdFqrTNKIN$Lxx_J=2$>znAXIZPAf&q5WVwPY}1P(C<#Ag%7W2T03QrDKr8fA6a+=BEIJhFkjvO^;s4&*_mRbT7Q_T>$1}7PS?84jozrdH_bjbWsz6avsynZv(xI2zAQf9tlrgk zs%Z8Pf@t5X>4zZrwXBP(-)Qybi*>p0-c{wgc)xoS1VvMKM)hTFk~lN!%Y9d5+rBPL z+0;uYADeH~jw=$A85Czd(ggQ8<%tnBp7ZTi9=kb(+}qVvg^{ z%mSPNP9Xs?0A7d$K?MfD1<-yZtmEwgwt!En3GiIgC7kH0?fL@i1$@#i07NfHg9gNl z0Z8*hej#irg)EW8E@1}alr+5j4iF6rST+0%EYt%C3lvS>o`G5=2$VvrpC~#eJ;35i zWQdW0hQ->};MSCZwGFf_C8rNGp)1KG066f*1`>Io5|VBqAKB+{7qSB|yHUs5uR))oM8szkUerC!VaVBu2T?QCk~2L&EOsrj`7lMnosdfOUir<3w3Xa*phm zr2~9=jM#61SpW(A;6z!#s}qN8-+rHK$a3Ctr3ZXNZ1%ve8vGq4a@+R`n0>S7C4n5hfjM0JVuhJ30ihU?2v&s ztUsX%R5~Jd0L+nxPJSM)V~&rnQo&P(14+gysd`mOIGf>!<)}#x-82K?`f-5ekZyD( zjnfR8op!$iOzj;eCxfS<%a9=5X`vM0i10xrX^K!f5^*HC#Lbtu6X)qb7_$|~>?#?E z*$h9}0cNv+g6HA11D*h5Jzfpoke}^@ANuP!VqhA(fElvJ@ZUy>3c#n|Gt6|t4*46n8beTeug8bcpJOKKYU%gtY%h=D*` zE>c@V1X+`X1PGtTXoQr*2js zo7(6nqoy~j?2~@{*mv?Sbeof#)t`NB%Dq<8*1QemJ?0|{^CXWnhhvzDxLt`?xmA;2 z#Qs*+O10YbZLL22b$e@#?oTma1usD$FGsb{%X(rLd3z^H6uZ1t)2aONb;#-wu`xX2 zh$B(WNXGe2K;^#6Iy12zyP-nC zthD*P+4PmZZSsw7ujSDz)*0%YDapHsHLGCeKH|O-?v>z9KF2xVY_grM9&B2{%$z!q z#fsqE^w+$~)@7Zy&vZNY2A_TVWiGSmms>T>gN=hdDf7D3dKidd+hyL0>6|haGBm-J MV@+=UsdL!2V#F%v8HOqD@gow&pQv9rbLP}vlHA9Qa zyi{*OP<$)jOd!juRmEJ2%|rDSnWqZJQy?P4IZY*N;iNH6N;CI@eX233aog8}7m7UP qRn4CZ0BMX}IzmE@geWxnh4V(Vjw*k_iU*(YT!a#*z3F=Q*4`W5LwZ^O delta 363 zcmZXQF$%&k6ozSp4i#JqBD4fuCBtjdHhKgv;9eR#Iys04UO@@2Uck`T|_+f)Anbooxc0mB{}^X z@ul-q;r&vAszl-rdWHiAweh_sC}DL41(%Q=fI0vexO^K&octmJK!Cf(W1(-57LtQP zg`F}kOL!36e0wayijNh&LZmVF8Ktqel+vzQJ|tiUpX2-=*xp$-Xz>&YuqFFqBSzvj H>225>=)_mK diff --git a/quickjs.c b/quickjs.c index 2587a64..32ecd80 100644 --- a/quickjs.c +++ b/quickjs.c @@ -20250,34 +20250,6 @@ static void skip_shebang(const uint8_t **pp, const uint8_t *buf_end) } } -/* return true if 'input' contains the source of a module - (heuristic). 'input' must be a zero terminated. - - Heuristic: - - Skip comments - - Expect 'import' keyword not followed by '(' or '.' - - Expect 'export' keyword - - Expect 'await' keyword -*/ -/* input is pure ASCII or UTF-8 encoded source code */ -BOOL JS_DetectModule(const char *input, size_t input_len) -{ - const uint8_t *p = (const uint8_t *)input; - int tok; - - skip_shebang(&p, p + input_len); - switch(simple_next_token(&p, FALSE)) { - case TOK_IMPORT: - tok = simple_next_token(&p, FALSE); - return (tok != '.' && tok != '('); - case TOK_AWAIT: - case TOK_EXPORT: - return TRUE; - default: - return FALSE; - } -} - static inline int get_prev_opcode(JSFunctionDef *fd) { if (fd->last_opcode_pos < 0) return OP_invalid; @@ -26380,6 +26352,7 @@ static JSModuleDef *js_host_resolve_imported_module(JSContext *ctx, /* load the module */ if (!rt->module_loader_func) { /* XXX: use a syntax error ? */ + // XXX: update JS_DetectModule when you change this JS_ThrowReferenceError(ctx, "could not load module '%s'", cname); js_free(ctx, cname); @@ -54702,6 +54675,38 @@ static void _JS_AddIntrinsicCallSite(JSContext *ctx) countof(js_callsite_proto_funcs)); } +BOOL JS_DetectModule(const char *input, size_t input_len) +{ + JSRuntime *rt; + JSContext *ctx; + JSValue val; + BOOL is_module; + + is_module = TRUE; + rt = JS_NewRuntime(); + if (!rt) + return FALSE; + ctx = JS_NewContextRaw(rt); + if (!ctx) { + JS_FreeRuntime(rt); + return FALSE; + } + JS_AddIntrinsicRegExp(ctx); // otherwise regexp literals don't parse + val = __JS_EvalInternal(ctx, JS_UNDEFINED, input, input_len, "", + JS_EVAL_TYPE_MODULE|JS_EVAL_FLAG_COMPILE_ONLY, -1); + if (JS_IsException(val)) { + const char *msg = JS_ToCString(ctx, rt->current_exception); + // gruesome hack to recognize exceptions from import statements; + // necessary because we don't pass in a module loader + is_module = !!strstr(msg, "ReferenceError: could not load module"); + JS_FreeCString(ctx, msg); + } + JS_FreeValue(ctx, val); + JS_FreeContext(ctx); + JS_FreeRuntime(rt); + return is_module; +} + #undef malloc #undef free #undef realloc diff --git a/quickjs.h b/quickjs.h index e95d224..1e33150 100644 --- a/quickjs.h +++ b/quickjs.h @@ -693,6 +693,12 @@ JS_EXTERN JSValue JS_CallConstructor(JSContext *ctx, JSValue func_obj, JS_EXTERN JSValue JS_CallConstructor2(JSContext *ctx, JSValue func_obj, JSValue new_target, int argc, JSValue *argv); +/* Try to detect if the input is a module. Returns TRUE if parsing the input + * as a module produces no syntax errors. It's a naive approach that is not + * wholly infallible: non-strict classic scripts may _parse_ okay as a module + * but not _execute_ as one (different runtime semantics.) Use with caution. + * |input| can be either ASCII or UTF-8 encoded source code. + */ JS_EXTERN JS_BOOL JS_DetectModule(const char *input, size_t input_len); /* 'input' must be zero terminated i.e. input[input_len] = '\0'. */ JS_EXTERN JSValue JS_Eval(JSContext *ctx, const char *input, size_t input_len, diff --git a/tests/detect_module/0.js b/tests/detect_module/0.js new file mode 100644 index 0000000..18a9109 --- /dev/null +++ b/tests/detect_module/0.js @@ -0,0 +1 @@ +await undefined diff --git a/tests/detect_module/1.js b/tests/detect_module/1.js new file mode 100644 index 0000000..0212282 --- /dev/null +++ b/tests/detect_module/1.js @@ -0,0 +1,2 @@ +const p = Promise.resolve(42) +await p diff --git a/tests/detect_module/2.js b/tests/detect_module/2.js new file mode 100644 index 0000000..0b1d1f7 --- /dev/null +++ b/tests/detect_module/2.js @@ -0,0 +1 @@ +await = 42 // parsed as classic script diff --git a/tests/detect_module/3.js b/tests/detect_module/3.js new file mode 100644 index 0000000..5b175e9 --- /dev/null +++ b/tests/detect_module/3.js @@ -0,0 +1,8 @@ +/*--- +negative: + phase: parse + type: SyntaxError +---*/ +// the import statement makes it a module but `await = 42` is a SyntaxError +import * as _ from "dummy" +await = 42 diff --git a/tests/detect_module/4.js b/tests/detect_module/4.js new file mode 100644 index 0000000..3ee1df7 --- /dev/null +++ b/tests/detect_module/4.js @@ -0,0 +1,3 @@ +// imports should classify it as a module, even when not at the top +os.now() +import * as os from "os" diff --git a/tests/test_module_detect.js b/tests/test_module_detect.js deleted file mode 100644 index 7ce9d22..0000000 --- a/tests/test_module_detect.js +++ /dev/null @@ -1,5 +0,0 @@ -// This needs to be parsed as a module or will throw SyntaxError. -// - -await 0; -