From 07ff4749926ee71d69f3e196264d9f192f1e308f Mon Sep 17 00:00:00 2001 From: Fabrice Bellard Date: Fri, 22 Dec 2023 11:05:49 +0100 Subject: [PATCH] use Unicode normalization in String.prototype.localeCompare --- quickjs.c | 131 ++++++++++++++++++++++++++++++++++----------- test262_errors.txt | 2 - 2 files changed, 100 insertions(+), 33 deletions(-) diff --git a/quickjs.c b/quickjs.c index c80a300..dfcacda 100644 --- a/quickjs.c +++ b/quickjs.c @@ -41109,26 +41109,6 @@ static BOOL test_final_sigma(JSString *p, int sigma_pos) return !lre_is_cased(c1); } -static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val, - int argc, JSValueConst *argv) -{ - JSValue a, b; - int cmp; - - a = JS_ToStringCheckObject(ctx, this_val); - if (JS_IsException(a)) - return JS_EXCEPTION; - b = JS_ToString(ctx, argv[0]); - if (JS_IsException(b)) { - JS_FreeValue(ctx, a); - return JS_EXCEPTION; - } - cmp = js_string_compare(ctx, JS_VALUE_GET_STRING(a), JS_VALUE_GET_STRING(b)); - JS_FreeValue(ctx, a); - JS_FreeValue(ctx, b); - return JS_NewInt32(ctx, cmp); -} - static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int to_lower) { @@ -41214,23 +41194,38 @@ static JSValue JS_NewUTF32String(JSContext *ctx, const uint32_t *buf, int len) return JS_EXCEPTION; } +static int js_string_normalize1(JSContext *ctx, uint32_t **pout_buf, + JSValueConst val, + UnicodeNormalizationEnum n_type) +{ + int buf_len, out_len; + uint32_t *buf, *out_buf; + + buf_len = JS_ToUTF32String(ctx, &buf, val); + if (buf_len < 0) + return -1; + out_len = unicode_normalize(&out_buf, buf, buf_len, n_type, + ctx->rt, (DynBufReallocFunc *)js_realloc_rt); + js_free(ctx, buf); + if (out_len < 0) + return -1; + *pout_buf = out_buf; + return out_len; +} + static JSValue js_string_normalize(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { const char *form, *p; size_t form_len; - int is_compat, buf_len, out_len; + int is_compat, out_len; UnicodeNormalizationEnum n_type; JSValue val; - uint32_t *buf, *out_buf; + uint32_t *out_buf; val = JS_ToStringCheckObject(ctx, this_val); if (JS_IsException(val)) return val; - buf_len = JS_ToUTF32String(ctx, &buf, val); - JS_FreeValue(ctx, val); - if (buf_len < 0) - return JS_EXCEPTION; if (argc == 0 || JS_IsUndefined(argv[0])) { n_type = UNICODE_NFC; @@ -41256,22 +41251,96 @@ static JSValue js_string_normalize(JSContext *ctx, JSValueConst this_val, JS_FreeCString(ctx, form); JS_ThrowRangeError(ctx, "bad normalization form"); fail1: - js_free(ctx, buf); + JS_FreeValue(ctx, val); return JS_EXCEPTION; } JS_FreeCString(ctx, form); } - out_len = unicode_normalize(&out_buf, buf, buf_len, n_type, - ctx->rt, (DynBufReallocFunc *)js_realloc_rt); - js_free(ctx, buf); + out_len = js_string_normalize1(ctx, &out_buf, val, n_type); + JS_FreeValue(ctx, val); if (out_len < 0) return JS_EXCEPTION; val = JS_NewUTF32String(ctx, out_buf, out_len); js_free(ctx, out_buf); return val; } -#endif /* CONFIG_ALL_UNICODE */ + +/* return < 0, 0 or > 0 */ +static int js_UTF32_compare(const uint32_t *buf1, int buf1_len, + const uint32_t *buf2, int buf2_len) +{ + int i, len, c, res; + len = min_int(buf1_len, buf2_len); + for(i = 0; i < len; i++) { + /* Note: range is limited so a subtraction is valid */ + c = buf1[i] - buf2[i]; + if (c != 0) + return c; + } + if (buf1_len == buf2_len) + res = 0; + else if (buf1_len < buf2_len) + res = -1; + else + res = 1; + return res; +} + +static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv) +{ + JSValue a, b; + int cmp, a_len, b_len; + uint32_t *a_buf, *b_buf; + + a = JS_ToStringCheckObject(ctx, this_val); + if (JS_IsException(a)) + return JS_EXCEPTION; + b = JS_ToString(ctx, argv[0]); + if (JS_IsException(b)) { + JS_FreeValue(ctx, a); + return JS_EXCEPTION; + } + a_len = js_string_normalize1(ctx, &a_buf, a, UNICODE_NFC); + JS_FreeValue(ctx, a); + if (a_len < 0) { + JS_FreeValue(ctx, b); + return JS_EXCEPTION; + } + + b_len = js_string_normalize1(ctx, &b_buf, b, UNICODE_NFC); + JS_FreeValue(ctx, b); + if (b_len < 0) { + js_free(ctx, a_buf); + return JS_EXCEPTION; + } + cmp = js_UTF32_compare(a_buf, a_len, b_buf, b_len); + js_free(ctx, a_buf); + js_free(ctx, b_buf); + return JS_NewInt32(ctx, cmp); +} +#else /* CONFIG_ALL_UNICODE */ +static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv) +{ + JSValue a, b; + int cmp; + + a = JS_ToStringCheckObject(ctx, this_val); + if (JS_IsException(a)) + return JS_EXCEPTION; + b = JS_ToString(ctx, argv[0]); + if (JS_IsException(b)) { + JS_FreeValue(ctx, a); + return JS_EXCEPTION; + } + cmp = js_string_compare(ctx, JS_VALUE_GET_STRING(a), JS_VALUE_GET_STRING(b)); + JS_FreeValue(ctx, a); + JS_FreeValue(ctx, b); + return JS_NewInt32(ctx, cmp); +} +#endif /* !CONFIG_ALL_UNICODE */ /* also used for String.prototype.valueOf */ static JSValue js_string_toString(JSContext *ctx, JSValueConst this_val, diff --git a/test262_errors.txt b/test262_errors.txt index 77ee6cc..31eeb6e 100644 --- a/test262_errors.txt +++ b/test262_errors.txt @@ -5,8 +5,6 @@ test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Er test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier test262/test/built-ins/RegExp/unicode_full_case_folding.js:20: Test262Error: \u0390 does not match \u1fd3 test262/test/built-ins/RegExp/unicode_full_case_folding.js:20: strict mode: Test262Error: \u0390 does not match \u1fd3 -test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6). -test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: strict mode: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6). test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: TypeError: ArrayBuffer is detached (Testing with Float64Array.) test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: strict mode: TypeError: ArrayBuffer is detached (Testing with Float64Array.) test262/test/language/expressions/assignment/target-member-computed-reference-null.js:32: Test262Error: Expected a DummyError but got a TypeError