From 1dfaa616801a8f559eb7abf232f008a27ff5958a Mon Sep 17 00:00:00 2001 From: Fabrice Bellard Date: Sat, 24 May 2025 10:16:10 +0200 Subject: [PATCH] improved compatibility of std.parseExtJSON() with JSON5 --- doc/quickjs.texi | 11 ++++++---- quickjs.c | 55 +++++++++++++++++++++++++++++++++++++---------- tests/test_std.js | 14 +++++++++++- 3 files changed, 64 insertions(+), 16 deletions(-) diff --git a/doc/quickjs.texi b/doc/quickjs.texi index eef00b7..9130b47 100644 --- a/doc/quickjs.texi +++ b/doc/quickjs.texi @@ -449,17 +449,20 @@ optional properties: @item parseExtJSON(str) - Parse @code{str} using a superset of @code{JSON.parse}. The - following extensions are accepted: + Parse @code{str} using a superset of @code{JSON.parse}. The superset + is very close to the JSON5 specification. The following extensions + are accepted: @itemize @item Single line and multiline comments @item unquoted properties (ASCII-only Javascript identifiers) @item trailing comma in array and object definitions @item single quoted strings + @item @code{\v} escape and multi-line strings with trailing @code{\} @item @code{\f} and @code{\v} are accepted as space characters - @item leading plus in numbers - @item octal (@code{0o} prefix) and hexadecimal (@code{0x} prefix) numbers + @item leading plus or decimal point in numbers + @item hexadecimal (@code{0x} prefix), octal (@code{0o} prefix) and binary (@code{0b} prefix) integers + @item @code{NaN} and @code{Infinity} are accepted as numbers @end itemize @end table diff --git a/quickjs.c b/quickjs.c index 2c545d9..e56adb8 100644 --- a/quickjs.c +++ b/quickjs.c @@ -21824,6 +21824,7 @@ static __exception int next_token(JSParseState *s) } /* 'c' is the first character. Return JS_ATOM_NULL in case of error */ +/* XXX: accept unicode identifiers as JSON5 ? */ static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c) { const uint8_t *p; @@ -21899,11 +21900,22 @@ static int json_parse_string(JSParseState *s, const uint8_t **pp, int sep) c = (c << 4) | h; } break; + case '\n': + if (s->ext_json) + continue; + goto bad_escape; + case 'v': + if (s->ext_json) { + c = '\v'; + break; + } + goto bad_escape; default: if (c == sep) break; if (p > s->buf_end) goto end_of_input; + bad_escape: js_parse_error_pos(s, p - 1, "Bad escaped character"); goto fail; } @@ -21943,8 +21955,23 @@ static int json_parse_number(JSParseState *s, const uint8_t **pp) if (*p == '+' || *p == '-') p++; - if (!is_digit(*p)) - return js_parse_error_pos(s, p, "Unexpected token '%c'", *p_start); + if (!is_digit(*p)) { + if (s->ext_json) { + if (strstart((const char *)p, "Infinity", (const char **)&p)) { + d = 1.0 / 0.0; + if (*p_start == '-') + d = -d; + goto done; + } else if (strstart((const char *)p, "NaN", (const char **)&p)) { + d = NAN; + goto done; + } else if (*p != '.') { + goto unexpected_token; + } + } else { + goto unexpected_token; + } + } if (p[0] == '0') { if (s->ext_json) { @@ -21962,8 +21989,10 @@ static int json_parse_number(JSParseState *s, const uint8_t **pp) } if (radix != 10) { /* prefix is present */ - if (to_digit(*p) >= radix) + if (to_digit(*p) >= radix) { + unexpected_token: return js_parse_error_pos(s, p, "Unexpected token '%c'", *p); + } d = js_atod((const char *)p_start, (const char **)&p, 0, JS_ATOD_INT_ONLY | JS_ATOD_ACCEPT_BIN_OCT, &atod_mem); goto done; @@ -22122,7 +22151,6 @@ static __exception int json_next_token(JSParseState *s) case 'Y': case 'Z': case '_': case '$': - /* identifier : only pure ascii characters are accepted */ p++; atom = json_parse_ident(s, &p, c); if (atom == JS_ATOM_NULL) @@ -22133,17 +22161,16 @@ static __exception int json_next_token(JSParseState *s) s->token.val = TOK_IDENT; break; case '+': - if (!s->ext_json || !is_digit(p[1])) + if (!s->ext_json) goto def_token; goto parse_number; - case '0': - if (is_digit(p[1])) + case '.': + if (s->ext_json && is_digit(p[1])) + goto parse_number; + else goto def_token; - goto parse_number; case '-': - if (!is_digit(p[1])) - goto def_token; - goto parse_number; + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -46187,6 +46214,12 @@ static JSValue json_parse_value(JSParseState *s) val = JS_NewBool(ctx, s->token.u.ident.atom == JS_ATOM_true); } else if (s->token.u.ident.atom == JS_ATOM_null) { val = JS_NULL; + } else if (s->token.u.ident.atom == JS_ATOM_NaN && s->ext_json) { + /* Note: json5 identifier handling is ambiguous e.g. is + '{ NaN: 1 }' a valid JSON5 production ? */ + val = JS_NewFloat64(s->ctx, NAN); + } else if (s->token.u.ident.atom == JS_ATOM_Infinity && s->ext_json) { + val = JS_NewFloat64(s->ctx, INFINITY); } else { goto def_token; } diff --git a/tests/test_std.js b/tests/test_std.js index 0bbd9e7..3debe40 100644 --- a/tests/test_std.js +++ b/tests/test_std.js @@ -129,15 +129,27 @@ function test_popen() function test_ext_json() { var expected, input, obj; - expected = '{"x":false,"y":true,"z2":null,"a":[1,8,160],"s":"str"}'; + expected = '{"x":false,"y":true,"z2":null,"a":[1,8,160],"b":"abc\\u000bd","s":"str"}'; input = `{ "x":false, /*comments are allowed */ "y":true, // also a comment z2:null, // unquoted property names "a":[+1,0o10,0xa0,], // plus prefix, octal, hexadecimal + "b": "ab\ +c\\vd", // multi-line strings, '\v' escape "s":'str',} // trailing comma in objects and arrays, single quoted string `; obj = std.parseExtJSON(input); assert(JSON.stringify(obj), expected); + + obj = std.parseExtJSON('[Infinity, +Infinity, -Infinity, NaN, +NaN, -NaN, .1, -.2]'); + assert(obj[0], Infinity); + assert(obj[1], Infinity); + assert(obj[2], -Infinity); + assert(obj[3], NaN); + assert(obj[4], NaN); + assert(obj[5], NaN); + assert(obj[6], 0.1); + assert(obj[7], -0.2); } function test_os()