fixed the handling of unicode identifiers

2025-05-29 01:49:18 +08:00 · 2025-04-10 10:34:40 +02:00 · 2025-04-10 10:34:40 +02:00 · 25ffdb418e
commit 25ffdb418e
parent 9d3776d0d4
2 changed files with 24 additions and 7 deletions
--- a/quickjs.c
+++ b/quickjs.c
@ -2861,14 +2861,26 @@ static JSAtom JS_NewAtomStr(JSContext *ctx, JSString *p)
    return __JS_NewAtom(rt, p, JS_ATOM_TYPE_STRING);
 }

+/* XXX: optimize */
+static size_t count_ascii(const uint8_t *buf, size_t len)
+{
+    const uint8_t *p, *p_end;
+    p = buf;
+    p_end = buf + len;
+    while (p < p_end && *p < 128)
+        p++;
+    return p - buf;
+}
+
 /* str is UTF-8 encoded */
 JSAtom JS_NewAtomLen(JSContext *ctx, const char *str, size_t len)
 {
    JSValue val;

-    if (len == 0 || !is_digit(*str)) {
-        // XXX: this will not work if UTF-8 encoded str contains non ASCII bytes
-        JSAtom atom = __JS_FindAtom(ctx->rt, str, len, JS_ATOM_TYPE_STRING);
+    if (len == 0 ||
+        (!is_digit(*str) &&
+         count_ascii((const uint8_t *)str, len) == len)) {
+            JSAtom atom = __JS_FindAtom(ctx->rt, str, len, JS_ATOM_TYPE_STRING);
        if (atom)
            return atom;
    }
@ -3810,10 +3822,8 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)

    p_start = (const uint8_t *)buf;
    p_end = p_start + buf_len;
-    p = p_start;
-    while (p < p_end && *p < 128)
-        p++;
-    len1 = p - p_start;
+    len1 = count_ascii(p_start, buf_len);
+    p = p_start + len1;
    if (len1 > JS_STRING_LEN_MAX)
        return JS_ThrowInternalError(ctx, "string too long");
    if (p == p_end) {
--- a/tests/test_language.js
+++ b/tests/test_language.js
@ -622,6 +622,12 @@ function test_optional_chaining()
    assert((a?.["b"])().c, 42);
 }

+function test_unicode_ident()
+{
+    var Ãµ = 3;
+    assert(typeof õ, "undefined");
+}
+
 test_op1();
 test_cvt();
 test_eq();
@ -645,3 +651,4 @@ test_function_expr_name();
 test_parse_semicolon();
 test_optional_chaining();
 test_parse_arrow_function();
+test_unicode_ident();