From 25ffdb418ea221eaab7605f24aa3087323cd501d Mon Sep 17 00:00:00 2001 From: Fabrice Bellard Date: Thu, 10 Apr 2025 10:34:40 +0200 Subject: [PATCH] fixed the handling of unicode identifiers --- quickjs.c | 24 +++++++++++++++++------- tests/test_language.js | 7 +++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/quickjs.c b/quickjs.c index db5f04c..90d8fe3 100644 --- a/quickjs.c +++ b/quickjs.c @@ -2861,14 +2861,26 @@ static JSAtom JS_NewAtomStr(JSContext *ctx, JSString *p) return __JS_NewAtom(rt, p, JS_ATOM_TYPE_STRING); } +/* XXX: optimize */ +static size_t count_ascii(const uint8_t *buf, size_t len) +{ + const uint8_t *p, *p_end; + p = buf; + p_end = buf + len; + while (p < p_end && *p < 128) + p++; + return p - buf; +} + /* str is UTF-8 encoded */ JSAtom JS_NewAtomLen(JSContext *ctx, const char *str, size_t len) { JSValue val; - if (len == 0 || !is_digit(*str)) { - // XXX: this will not work if UTF-8 encoded str contains non ASCII bytes - JSAtom atom = __JS_FindAtom(ctx->rt, str, len, JS_ATOM_TYPE_STRING); + if (len == 0 || + (!is_digit(*str) && + count_ascii((const uint8_t *)str, len) == len)) { + JSAtom atom = __JS_FindAtom(ctx->rt, str, len, JS_ATOM_TYPE_STRING); if (atom) return atom; } @@ -3810,10 +3822,8 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len) p_start = (const uint8_t *)buf; p_end = p_start + buf_len; - p = p_start; - while (p < p_end && *p < 128) - p++; - len1 = p - p_start; + len1 = count_ascii(p_start, buf_len); + p = p_start + len1; if (len1 > JS_STRING_LEN_MAX) return JS_ThrowInternalError(ctx, "string too long"); if (p == p_end) { diff --git a/tests/test_language.js b/tests/test_language.js index 11a45de..cda782b 100644 --- a/tests/test_language.js +++ b/tests/test_language.js @@ -622,6 +622,12 @@ function test_optional_chaining() assert((a?.["b"])().c, 42); } +function test_unicode_ident() +{ + var õ = 3; + assert(typeof õ, "undefined"); +} + test_op1(); test_cvt(); test_eq(); @@ -645,3 +651,4 @@ test_function_expr_name(); test_parse_semicolon(); test_optional_chaining(); test_parse_arrow_function(); +test_unicode_ident();