Improve Date.parse

- rewrite Date.parse() with separate parsers
- return `NaN` for out of bounds field values as specified
- accept up to 9 decimals for millisecond fraction but truncate at 3
- accept many more alternative date/time formats
- add test cases in tests/test_builtin.js
This commit is contained in:
Charlie Gordon 2024-02-25 23:47:26 +01:00
parent 6428ce0c8b
commit 78db49cf95
2 changed files with 367 additions and 249 deletions

541
quickjs.c
View file

@ -49765,7 +49765,7 @@ static JSValue js_Date_UTC(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
{
// UTC(y, mon, d, h, m, s, ms)
double fields[] = { 0, 0, 1, 0, 0, 0, 0 };
double fields[9] = { 0, 0, 1, 0, 0, 0, 0, 0, 0 };
int i, n;
double a;
@ -49786,145 +49786,338 @@ static JSValue js_Date_UTC(JSContext *ctx, JSValueConst this_val,
return JS_NewFloat64(ctx, set_date_fields(fields, 0));
}
static void string_skip_spaces(JSString *sp, int *pp) {
while (*pp < sp->len && string_get(sp, *pp) == ' ')
/* Date string parsing */
static BOOL string_skip_char(const uint8_t *sp, int *pp, int c) {
if (sp[*pp] == c) {
*pp += 1;
return TRUE;
} else {
return FALSE;
}
}
/* skip spaces, update offset */
static void string_skip_spaces(const uint8_t *sp, int *pp) {
while (sp[*pp] == ' ')
*pp += 1;
}
static void string_skip_non_spaces(JSString *sp, int *pp) {
while (*pp < sp->len && string_get(sp, *pp) != ' ')
/* skip dashes dots and commas */
static void string_skip_separators(const uint8_t *sp, int *pp) {
int c;
while ((c = sp[*pp]) == '-' || c == '.' || c == ',')
*pp += 1;
}
/* parse a numeric field with an optional sign if accept_sign is TRUE */
static int string_get_digits(JSString *sp, int *pp, int64_t *pval) {
int64_t v = 0;
/* skip non spaces, update offset */
static void string_skip_non_spaces(const uint8_t *sp, int *pp) {
while (sp[*pp] != '\0' && sp[*pp] != ' ')
*pp += 1;
}
/* parse a numeric field (max_digits = 0 -> no maximum) */
static BOOL string_get_digits(const uint8_t *sp, int *pp, int *pval,
int min_digits, int max_digits)
{
int v = 0;
int c, p = *pp, p_start;
if (p >= sp->len)
return -1;
p_start = p;
while (p < sp->len) {
c = string_get(sp, p);
if (!(c >= '0' && c <= '9')) {
if (p == p_start)
return -1;
else
break;
}
while ((c = sp[p]) >= '0' && c <= '9') {
v = v * 10 + c - '0';
p++;
}
*pval = v;
*pp = p;
return 0;
}
static int string_get_signed_digits(JSString *sp, int *pp, int64_t *pval) {
int res, sgn, p = *pp;
if (p >= sp->len)
return -1;
sgn = string_get(sp, p);
if (sgn == '-' || sgn == '+')
p++;
res = string_get_digits(sp, &p, pval);
if (res == 0 && sgn == '-') {
if (*pval == 0)
return -1; // reject negative zero
*pval = -*pval;
}
*pp = p;
return res;
}
/* parse a fixed width numeric field */
static int string_get_fixed_width_digits(JSString *sp, int *pp, int n, int64_t *pval) {
int64_t v = 0;
int i, c, p = *pp;
for(i = 0; i < n; i++) {
if (p >= sp->len)
return -1;
c = string_get(sp, p);
if (!(c >= '0' && c <= '9'))
return -1;
v = v * 10 + c - '0';
p++;
}
*pval = v;
*pp = p;
return 0;
}
static int string_get_milliseconds(JSString *sp, int *pp, int64_t *pval) {
/* parse milliseconds as a fractional part, round to nearest */
/* XXX: the spec does not indicate which rounding should be used */
int mul = 1000, ms = 0, p = *pp, c, p_start;
if (p >= sp->len)
return -1;
p_start = p;
while (p < sp->len) {
c = string_get(sp, p);
if (!(c >= '0' && c <= '9')) {
if (p == p_start)
return -1;
else
if (p - p_start == max_digits)
break;
}
if (mul == 1 && c >= '5')
ms += 1;
if (p - p_start < min_digits)
return FALSE;
*pval = v;
*pp = p;
return TRUE;
}
static BOOL string_get_milliseconds(const uint8_t *sp, int *pp, int *pval) {
/* parse optional fractional part as milliseconds and truncate. */
/* spec does not indicate which rounding should be used */
int mul = 1000, ms = 0, c, p_start, p = *pp;
c = sp[p];
if (c == '.' || c == ',') {
p++;
p_start = p;
while ((c = sp[p]) >= '0' && c <= '9') {
ms += (c - '0') * (mul /= 10);
p++;
if (p - p_start == 9)
break;
}
if (p > p_start) {
/* only consume the separator if digits are present */
*pval = ms;
*pp = p;
return 0;
}
}
return TRUE;
}
static BOOL string_get_timezone(const uint8_t *sp, int *pp, int *tzp) {
int tz = 0, sgn, hh, mm, p = *pp;
static int find_abbrev(JSString *sp, int p, const char *list, int count) {
sgn = sp[p];
if (sgn == '+' || sgn == '-') {
p++;
if (!string_get_digits(sp, &p, &hh, 2, 2))
return FALSE;
string_skip_char(sp, &p, ':'); /* optional separator */
if (!string_get_digits(sp, &p, &mm, 2, 2))
return FALSE;
if (hh > 23 || mm > 59)
return FALSE;
tz = hh * 60 + mm;
if (sgn != '+')
tz = -tz;
} else
if (sgn == 'Z') {
p++;
} else {
return FALSE;
}
*pp = p;
*tzp = tz;
return TRUE;
}
static BOOL string_match(const uint8_t *sp, int *pp, const char *s) {
int p = *pp;
while (*s != '\0') {
if (sp[p] != (uint8_t)*s++)
return FALSE;
p++;
}
*pp = p;
return TRUE;
}
static int find_abbrev(const uint8_t *sp, int p, const char *list, int count) {
int n, i;
if (p + 3 <= sp->len) {
for (n = 0; n < count; n++) {
for (i = 0; i < 3; i++) {
if (string_get(sp, p + i) != month_names[n * 3 + i])
goto next;
}
for (i = 0;; i++) {
if (sp[p + i] != (uint8_t)month_names[n * 3 + i])
break;
if (i == 2)
return n;
next:;
}
}
return -1;
}
static int string_get_month(JSString *sp, int *pp, int64_t *pval) {
static BOOL string_get_month(const uint8_t *sp, int *pp, int *pval) {
int n;
string_skip_spaces(sp, pp);
n = find_abbrev(sp, *pp, month_names, 12);
if (n < 0)
return -1;
return FALSE;
*pval = n;
*pval = n + 1;
*pp += 3;
return 0;
return TRUE;
}
/* parse toISOString format */
static BOOL js_date_parse_isostring(const uint8_t *sp, int fields[9], BOOL *is_local) {
int sgn, i, p = 0;
/* initialize fields to the beginning of the Epoch */
for (i = 0; i < 9; i++) {
fields[i] = (i == 2);
}
*is_local = FALSE;
/* year is either yyyy digits or [+-]yyyyyy */
sgn = sp[p];
if (sgn == '-' || sgn == '+') {
p++;
if (!string_get_digits(sp, &p, &fields[0], 6, 6))
return FALSE;
if (sgn == '-') {
if (fields[0] == 0)
return FALSE; // reject -000000
fields[0] = -fields[0];
}
} else {
if (!string_get_digits(sp, &p, &fields[0], 4, 4))
return FALSE;
}
if (string_skip_char(sp, &p, '-')) {
if (!string_get_digits(sp, &p, &fields[1], 2, 2)) /* month */
return FALSE;
if (fields[1] < 1)
return FALSE;
fields[1] -= 1;
if (string_skip_char(sp, &p, '-')) {
if (!string_get_digits(sp, &p, &fields[2], 2, 2)) /* day */
return FALSE;
if (fields[2] < 1)
return FALSE;
}
}
if (string_skip_char(sp, &p, 'T')) {
*is_local = TRUE;
if (!string_get_digits(sp, &p, &fields[3], 2, 2) /* hour */
|| !string_skip_char(sp, &p, ':')
|| !string_get_digits(sp, &p, &fields[4], 2, 2)) /* minute */
return FALSE;
if (string_skip_char(sp, &p, ':')) {
if (!string_get_digits(sp, &p, &fields[5], 2, 2)) /* second */
return FALSE;
string_get_milliseconds(sp, &p, &fields[6]);
}
}
/* parse the time zone offset if present: [+-]HH:mm or [+-]HHmm */
if (sp[p]) {
*is_local = FALSE;
if (!string_get_timezone(sp, &p, &fields[8]))
return FALSE;
}
/* error if extraneous characters */
return sp[p] == '\0';
}
/* parse toString, toUTCString and other formats */
static BOOL js_date_parse_otherstring(const uint8_t *sp, int fields[9], BOOL *is_local) {
int c, i, val, p = 0, p_start;
int num[3];
BOOL has_year = FALSE;
BOOL has_mon = FALSE;
BOOL has_time = FALSE;
int num_index = 0;
/* initialize fields to the beginning of 2001-01-01 */
fields[0] = 2001;
fields[1] = 1;
fields[2] = 1;
for (i = 3; i < 9; i++) {
fields[i] = 0;
}
*is_local = TRUE;
while (sp[p] != '\0') {
string_skip_spaces(sp, &p);
p_start = p;
if ((c = sp[p]) == '+' || c == '-') {
if (has_time && string_get_timezone(sp, &p, &fields[8])) {
*is_local = FALSE;
} else {
p++;
if (string_get_digits(sp, &p, &val, 1, 9)) {
if (c == '-') {
if (val == 0)
return FALSE;
val = -val;
}
fields[0] = val;
has_year = TRUE;
}
}
} else
if (string_get_digits(sp, &p, &val, 1, 9)) {
if (string_skip_char(sp, &p, ':')) {
/* time part */
fields[3] = val;
if (!string_get_digits(sp, &p, &fields[4], 1, 2))
return FALSE;
if (string_skip_char(sp, &p, ':')) {
if (!string_get_digits(sp, &p, &fields[5], 1, 2))
return FALSE;
string_get_milliseconds(sp, &p, &fields[6]);
}
has_time = TRUE;
} else {
if (p - p_start > 2) {
fields[0] = val;
has_year = TRUE;
} else
if (val < 1 || val > 31) {
fields[0] = val + (val < 100) * 1900;
has_year = TRUE;
} else {
if (num_index == 3)
return FALSE;
num[num_index++] = val;
}
}
} else
if (string_get_month(sp, &p, &fields[1])) {
has_mon = TRUE;
} else
if (c == 'Z') {
*is_local = FALSE;
p++;
continue;
} else
if (string_match(sp, &p, "GMT") || string_match(sp, &p, "UTC")) {
*is_local = FALSE;
continue;
} else {
/* skip a word */
string_skip_non_spaces(sp, &p);
continue;
}
string_skip_separators(sp, &p);
}
if (num_index + has_year + has_mon > 3)
return FALSE;
switch (num_index) {
case 0:
if (!has_year)
return FALSE;
break;
case 1:
if (has_mon)
fields[2] = num[0];
else
fields[1] = num[0];
break;
case 2:
if (has_year) {
fields[1] = num[0];
fields[2] = num[1];
} else
if (has_mon) {
fields[0] = num[1] + (num[1] < 100) * 1900;
fields[2] = num[0];
} else {
fields[1] = num[0];
fields[2] = num[1];
}
break;
case 3:
fields[0] = num[2] + (num[2] < 100) * 1900;
fields[1] = num[0];
fields[2] = num[1];
break;
default:
return FALSE;
}
if (fields[1] < 1 || fields[2] < 1)
return FALSE;
fields[1] -= 1;
return TRUE;
}
static JSValue js_Date_parse(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
{
// parse(s)
JSValue s, rv;
int64_t fields[] = { 0, 1, 1, 0, 0, 0, 0 };
double fields1[7];
int64_t tz, hh, mm;
int fields[9];
double fields1[9];
double d;
int p, i, c, sgn, l;
int i, c;
JSString *sp;
uint8_t buf[128];
BOOL is_local;
rv = JS_NAN;
@ -49934,145 +50127,33 @@ static JSValue js_Date_parse(JSContext *ctx, JSValueConst this_val,
return JS_EXCEPTION;
sp = JS_VALUE_GET_STRING(s);
p = 0;
if (p < sp->len && (((c = string_get(sp, p)) >= '0' && c <= '9') || c == '+' || c == '-')) {
/* ISO format */
/* year field can be negative */
if (string_get_signed_digits(sp, &p, &fields[0]))
goto done;
for (i = 1; i < 7; i++) {
if (p >= sp->len)
break;
switch(i) {
case 1:
case 2:
c = '-';
break;
case 3:
c = 'T';
break;
case 4:
case 5:
c = ':';
break;
case 6:
c = '.';
break;
}
if (string_get(sp, p) != c)
break;
p++;
if (i == 6) {
if (string_get_milliseconds(sp, &p, &fields[i]))
goto done;
} else {
if (string_get_digits(sp, &p, &fields[i]))
goto done;
}
}
/* no time: UTC by default */
is_local = (i > 3);
fields[1] -= 1;
/* parse the time zone offset if present: [+-]HH:mm or [+-]HHmm */
tz = 0;
if (p < sp->len) {
sgn = string_get(sp, p);
if (sgn == '+' || sgn == '-') {
p++;
l = sp->len - p;
if (l != 4 && l != 5)
goto done;
if (string_get_fixed_width_digits(sp, &p, 2, &hh))
goto done;
if (l == 5) {
if (string_get(sp, p) != ':')
goto done;
p++;
}
if (string_get_fixed_width_digits(sp, &p, 2, &mm))
goto done;
tz = hh * 60 + mm;
if (sgn == '-')
tz = -tz;
is_local = FALSE;
} else if (sgn == 'Z') {
p++;
is_local = FALSE;
} else {
goto done;
}
/* error if extraneous characters */
if (p != sp->len)
goto done;
}
} else {
/* toString or toUTCString format */
/* skip the day of the week */
string_skip_non_spaces(sp, &p);
string_skip_spaces(sp, &p);
if (p >= sp->len)
goto done;
c = string_get(sp, p);
if (c >= '0' && c <= '9') {
/* day of month first */
if (string_get_digits(sp, &p, &fields[2]))
goto done;
if (string_get_month(sp, &p, &fields[1]))
goto done;
} else {
/* month first */
if (string_get_month(sp, &p, &fields[1]))
goto done;
string_skip_spaces(sp, &p);
if (string_get_digits(sp, &p, &fields[2]))
goto done;
}
/* year */
string_skip_spaces(sp, &p);
if (string_get_signed_digits(sp, &p, &fields[0]))
goto done;
/* hour, min, seconds */
string_skip_spaces(sp, &p);
for(i = 0; i < 3; i++) {
if (i == 1 || i == 2) {
if (p >= sp->len)
goto done;
if (string_get(sp, p) != ':')
goto done;
p++;
}
if (string_get_digits(sp, &p, &fields[3 + i]))
goto done;
}
// XXX: parse optional milliseconds?
/* parse the time zone offset if present: [+-]HHmm */
is_local = FALSE;
tz = 0;
for (tz = 0; p < sp->len; p++) {
sgn = string_get(sp, p);
if (sgn == '+' || sgn == '-') {
p++;
if (string_get_fixed_width_digits(sp, &p, 2, &hh))
goto done;
if (string_get_fixed_width_digits(sp, &p, 2, &mm))
goto done;
tz = hh * 60 + mm;
if (sgn == '-')
tz = -tz;
break;
}
/* convert the string as a byte array */
for (i = 0; i < sp->len && i < (int)countof(buf) - 1; i++) {
c = string_get(sp, i);
if (c > 255)
c = (c == 0x2212) ? '-' : 'x';
buf[i] = c;
}
buf[i] = '\0';
if (js_date_parse_isostring(buf, fields, &is_local)
|| js_date_parse_otherstring(buf, fields, &is_local)) {
static int const field_max[6] = { 0, 11, 31, 24, 59, 59 };
BOOL valid = TRUE;
/* check field maximum values */
for (i = 1; i < 6; i++) {
if (fields[i] > field_max[i])
valid = FALSE;
}
/* special case 24:00:00.000 */
if (fields[3] == 24 && (fields[4] | fields[5] | fields[6]))
valid = FALSE;
if (valid) {
for(i = 0; i < 7; i++)
fields1[i] = fields[i];
d = set_date_fields(fields1, is_local) - tz * 60000;
d = set_date_fields(fields1, is_local) - fields[8] * 60000;
rv = JS_NewFloat64(ctx, d);
done:
}
}
JS_FreeValue(ctx, s);
return rv;
}

View file

@ -540,28 +540,65 @@ function test_date()
// is not a valid instance of this format.
// Hence the fractional part after . should have 3 digits and how
// a different number of digits is handled is implementation defined.
var d = new Date(1506098258091), a, s;
assert(Date.parse(""), NaN);
assert(Date.parse("2000"), 946684800000);
assert(Date.parse("2000-01"), 946684800000);
assert(Date.parse("2000-01-01"), 946684800000);
//assert(Date.parse("2000-01-01T"), NaN);
//assert(Date.parse("2000-01-01T00Z"), NaN);
assert(Date.parse("2000-01-01T00:00Z"), 946684800000);
assert(Date.parse("2000-01-01T00:00:00Z"), 946684800000);
assert(Date.parse("2000-01-01T00:00:00.1Z"), 946684800100);
assert(Date.parse("2000-01-01T00:00:00.10Z"), 946684800100);
assert(Date.parse("2000-01-01T00:00:00.100Z"), 946684800100);
assert(Date.parse("2000-01-01T00:00:00.1000Z"), 946684800100);
assert(Date.parse("2000-01-01T00:00:00+00:00"), 946684800000);
//assert(Date.parse("2000-01-01T00:00:00+00:30"), 946686600000);
var d = new Date("2000T00:00"); // Jan 1st 2000, 0:00:00 local time
assert(typeof d === 'object' && d.toString() != 'Invalid Date');
assert((new Date('Jan 1 2000')).toISOString(),
d.toISOString());
assert((new Date('Jan 1 2000 00:00')).toISOString(),
d.toISOString());
assert((new Date('Jan 1 2000 00:00:00')).toISOString(),
d.toISOString());
assert((new Date('Jan 1 2000 00:00:00 GMT+0100')).toISOString(),
'1999-12-31T23:00:00.000Z');
assert((new Date('Jan 1 2000 00:00:00 GMT+0200')).toISOString(),
'1999-12-31T22:00:00.000Z');
assert((new Date('Sat Jan 1 2000')).toISOString(),
d.toISOString());
assert((new Date('Sat Jan 1 2000 00:00')).toISOString(),
d.toISOString());
assert((new Date('Sat Jan 1 2000 00:00:00')).toISOString(),
d.toISOString());
assert((new Date('Sat Jan 1 2000 00:00:00 GMT+0100')).toISOString(),
'1999-12-31T23:00:00.000Z');
assert((new Date('Sat Jan 1 2000 00:00:00 GMT+0200')).toISOString(),
'1999-12-31T22:00:00.000Z');
var d = new Date(1506098258091);
assert(d.toISOString(), "2017-09-22T16:37:38.091Z");
d.setUTCHours(18, 10, 11);
assert(d.toISOString(), "2017-09-22T18:10:11.091Z");
a = Date.parse(d.toISOString());
var a = Date.parse(d.toISOString());
assert((new Date(a)).toISOString(), d.toISOString());
s = new Date("2020-01-01T01:01:01.1Z").toISOString();
assert(s, "2020-01-01T01:01:01.100Z");
s = new Date("2020-01-01T01:01:01.12Z").toISOString();
assert(s, "2020-01-01T01:01:01.120Z");
s = new Date("2020-01-01T01:01:01.123Z").toISOString();
assert(s, "2020-01-01T01:01:01.123Z");
s = new Date("2020-01-01T01:01:01.1234Z").toISOString();
assert(s, "2020-01-01T01:01:01.123Z");
s = new Date("2020-01-01T01:01:01.12345Z").toISOString();
assert(s, "2020-01-01T01:01:01.123Z");
s = new Date("2020-01-01T01:01:01.1235Z").toISOString();
assert(s == "2020-01-01T01:01:01.124Z" || // QuickJS
s == "2020-01-01T01:01:01.123Z"); // nodeJS
s = new Date("2020-01-01T01:01:01.9999Z").toISOString();
assert(s == "2020-01-01T01:01:02.000Z" || // QuickJS
s == "2020-01-01T01:01:01.999Z"); // nodeJS
assert((new Date("2020-01-01T01:01:01.123Z")).toISOString(),
"2020-01-01T01:01:01.123Z");
/* implementation defined behavior */
assert((new Date("2020-01-01T01:01:01.1Z")).toISOString(),
"2020-01-01T01:01:01.100Z");
assert((new Date("2020-01-01T01:01:01.12Z")).toISOString(),
"2020-01-01T01:01:01.120Z");
assert((new Date("2020-01-01T01:01:01.1234Z")).toISOString(),
"2020-01-01T01:01:01.123Z");
assert((new Date("2020-01-01T01:01:01.12345Z")).toISOString(),
"2020-01-01T01:01:01.123Z");
assert((new Date("2020-01-01T01:01:01.1235Z")).toISOString(),
"2020-01-01T01:01:01.123Z");
assert((new Date("2020-01-01T01:01:01.9999Z")).toISOString(),
"2020-01-01T01:01:01.999Z");
assert(Date.UTC(NaN), NaN);
assert(Date.UTC(2017, NaN), NaN);