From c06af876f6d44488bad3c3e1e5f479edb36f8bf5 Mon Sep 17 00:00:00 2001 From: Charlie Gordon Date: Thu, 15 Feb 2024 10:30:04 +0100 Subject: [PATCH] Improve string concatenation hack - add more cases of in place string concatenation this temporary hack improves the microbench timing by 30% but has little impact on the test262 timings. --- Makefile | 3 +- VERSION | 2 +- quickjs.c | 174 ++++++++++++++++++++++++++++++------------------------ 3 files changed, 99 insertions(+), 80 deletions(-) diff --git a/Makefile b/Makefile index ead045f..fc59a4f 100644 --- a/Makefile +++ b/Makefile @@ -48,9 +48,10 @@ PREFIX?=/usr/local #CONFIG_ASAN=y # use memory sanitizer #CONFIG_MSAN=y -# include the code for BigFloat/BigDecimal, math mode and faster large integers # use UB sanitizer #CONFIG_UBSAN=y + +# include the code for BigFloat/BigDecimal and math mode CONFIG_BIGNUM=y OBJDIR=.obj diff --git a/VERSION b/VERSION index e89de35..e32e065 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2024-01-13 +2024-02-14 diff --git a/quickjs.c b/quickjs.c index ec24a56..fccf340 100644 --- a/quickjs.c +++ b/quickjs.c @@ -1454,6 +1454,10 @@ static inline int is_digit(int c) { return c >= '0' && c <= '9'; } +static inline int string_get(const JSString *p, int idx) { + return p->is_wide_char ? p->u.str16[idx] : p->u.str8[idx]; +} + typedef struct JSClassShortDef { JSAtom class_name; JSClassFinalizer *finalizer; @@ -2427,10 +2431,7 @@ static inline BOOL is_num_string(uint32_t *pval, const JSString *p) len = p->len; if (len == 0 || len > 10) return FALSE; - if (p->is_wide_char) - c = p->u.str16[0]; - else - c = p->u.str8[0]; + c = string_get(p, 0); if (is_num(c)) { if (c == '0') { if (len != 1) @@ -2439,10 +2440,7 @@ static inline BOOL is_num_string(uint32_t *pval, const JSString *p) } else { n = c - '0'; for(i = 1; i < len; i++) { - if (p->is_wide_char) - c = p->u.str16[i]; - else - c = p->u.str8[i]; + c = string_get(p, i); if (!is_num(c)) return FALSE; n64 = (uint64_t)n * 10 + (c - '0'); @@ -2487,10 +2485,24 @@ static uint32_t hash_string(const JSString *str, uint32_t h) return h; } -static __maybe_unused void JS_DumpString(JSRuntime *rt, - const JSString *p) +static __maybe_unused void JS_DumpChar(JSRuntime *rt, int c, int sep) { - int i, c, sep; + if (c == sep || c == '\\') { + putchar('\\'); + putchar(c); + } else if (c >= ' ' && c <= 126) { + putchar(c); + } else if (c == '\n') { + putchar('\\'); + putchar('n'); + } else { + printf("\\u%04x", c); + } +} + +static __maybe_unused void JS_DumpString(JSRuntime *rt, const JSString *p) +{ + int i, sep; if (p == NULL) { printf(""); @@ -2500,21 +2512,7 @@ static __maybe_unused void JS_DumpString(JSRuntime *rt, sep = (p->header.ref_count == 1) ? '\"' : '\''; putchar(sep); for(i = 0; i < p->len; i++) { - if (p->is_wide_char) - c = p->u.str16[i]; - else - c = p->u.str8[i]; - if (c == sep || c == '\\') { - putchar('\\'); - putchar(c); - } else if (c >= ' ' && c <= 126) { - putchar(c); - } else if (c == '\n') { - putchar('\\'); - putchar('n'); - } else { - printf("\\u%04x", c); - } + JS_DumpChar(rt, string_get(p, i), sep); } putchar(sep); } @@ -2857,6 +2855,7 @@ static JSAtom __JS_NewAtomInit(JSRuntime *rt, const char *str, int len, return __JS_NewAtom(rt, p, atom_type); } +/* Warning: str must be ASCII only */ static JSAtom __JS_FindAtom(JSRuntime *rt, const char *str, size_t len, int atom_type) { @@ -2951,11 +2950,13 @@ static JSAtom JS_NewAtomStr(JSContext *ctx, JSString *p) return __JS_NewAtom(rt, p, JS_ATOM_TYPE_STRING); } +/* str is UTF-8 encoded */ JSAtom JS_NewAtomLen(JSContext *ctx, const char *str, size_t len) { JSValue val; if (len == 0 || !is_digit(*str)) { + // XXX: this will not work if UTF-8 encoded str contains non ASCII bytes JSAtom atom = __JS_FindAtom(ctx->rt, str, len, JS_ATOM_TYPE_STRING); if (atom) return atom; @@ -3061,10 +3062,7 @@ static const char *JS_AtomGetStrRT(JSRuntime *rt, char *buf, int buf_size, return (const char *)str->u.str8; } for(i = 0; i < str->len; i++) { - if (str->is_wide_char) - c = str->u.str16[i]; - else - c = str->u.str8[i]; + c = string_get(str, i); if ((q - buf) >= buf_size - UTF8_CHAR_LEN_MAX) break; if (c < 128) { @@ -3695,10 +3693,6 @@ static int string_buffer_putc(StringBuffer *s, uint32_t c) return string_buffer_putc16(s, c); } -static int string_get(const JSString *p, int idx) { - return p->is_wide_char ? p->u.str16[idx] : p->u.str8[idx]; -} - static int string_getc(const JSString *p, int *pidx) { int idx, c, c1; @@ -4185,6 +4179,42 @@ static JSValue JS_ConcatString1(JSContext *ctx, return JS_MKPTR(JS_TAG_STRING, p); } +static BOOL JS_ConcatStringInPlace(JSContext *ctx, JSString *p1, JSValueConst op2) { + if (JS_VALUE_GET_TAG(op2) == JS_TAG_STRING) { + JSString *p2 = JS_VALUE_GET_STRING(op2); + size_t size1; + + if (p2->len == 0) + return TRUE; + if (p1->header.ref_count != 1) + return FALSE; + size1 = js_malloc_usable_size(ctx, p1); + if (p1->is_wide_char) { + if (size1 >= sizeof(*p1) + ((p1->len + p2->len) << 1)) { + if (p2->is_wide_char) { + memcpy(p1->u.str16 + p1->len, p2->u.str16, p2->len << 1); + p1->len += p2->len; + return TRUE; + } else { + size_t i; + for (i = 0; i < p2->len; i++) { + p1->u.str16[p1->len++] = p2->u.str8[i]; + } + return TRUE; + } + } + } else if (!p2->is_wide_char) { + if (size1 >= sizeof(*p1) + p1->len + p2->len + 1) { + memcpy(p1->u.str8 + p1->len, p2->u.str8, p2->len); + p1->len += p2->len; + p1->u.str8[p1->len] = '\0'; + return TRUE; + } + } + } + return FALSE; +} + /* op1 and op2 are converted to strings. For convenience, op1 or op2 = JS_EXCEPTION are accepted and return JS_EXCEPTION. */ static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2) @@ -4207,27 +4237,11 @@ static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2) } } p1 = JS_VALUE_GET_STRING(op1); - p2 = JS_VALUE_GET_STRING(op2); - - /* XXX: could also check if p1 is empty */ - if (p2->len == 0) { - goto ret_op1; - } - if (p1->header.ref_count == 1 && p1->is_wide_char == p2->is_wide_char - && js_malloc_usable_size(ctx, p1) >= sizeof(*p1) + ((p1->len + p2->len) << p2->is_wide_char) + 1 - p1->is_wide_char) { - /* Concatenate in place in available space at the end of p1 */ - if (p1->is_wide_char) { - memcpy(p1->u.str16 + p1->len, p2->u.str16, p2->len << 1); - p1->len += p2->len; - } else { - memcpy(p1->u.str8 + p1->len, p2->u.str8, p2->len); - p1->len += p2->len; - p1->u.str8[p1->len] = '\0'; - } - ret_op1: + if (JS_ConcatStringInPlace(ctx, p1, op2)) { JS_FreeValue(ctx, op2); return op1; } + p2 = JS_VALUE_GET_STRING(op2); ret = JS_ConcatString1(ctx, p1, p2); JS_FreeValue(ctx, op1); JS_FreeValue(ctx, op2); @@ -17778,6 +17792,11 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, sp[-2] = __JS_NewFloat64(ctx, JS_VALUE_GET_FLOAT64(op1) + JS_VALUE_GET_FLOAT64(op2)); sp--; + } else if (JS_IsString(op1) && JS_IsString(op2)) { + sp[-2] = JS_ConcatString(ctx, op1, op2); + sp--; + if (JS_IsException(sp[-1])) + goto exception; } else { add_slow: if (js_add_slow(ctx, sp)) @@ -17788,38 +17807,45 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, BREAK; CASE(OP_add_loc): { + JSValue op2; JSValue *pv; int idx; idx = *pc; pc += 1; + op2 = sp[-1]; pv = &var_buf[idx]; - if (likely(JS_VALUE_IS_BOTH_INT(*pv, sp[-1]))) { + if (likely(JS_VALUE_IS_BOTH_INT(*pv, op2))) { int64_t r; - r = (int64_t)JS_VALUE_GET_INT(*pv) + - JS_VALUE_GET_INT(sp[-1]); + r = (int64_t)JS_VALUE_GET_INT(*pv) + JS_VALUE_GET_INT(op2); if (unlikely((int)r != r)) goto add_loc_slow; *pv = JS_NewInt32(ctx, r); sp--; - } else if (JS_VALUE_GET_TAG(*pv) == JS_TAG_STRING) { - JSValue op1; - op1 = sp[-1]; + } else if (JS_VALUE_IS_BOTH_FLOAT(*pv, op2)) { + *pv = __JS_NewFloat64(ctx, JS_VALUE_GET_FLOAT64(*pv) + + JS_VALUE_GET_FLOAT64(op2)); sp--; - op1 = JS_ToPrimitiveFree(ctx, op1, HINT_NONE); - if (JS_IsException(op1)) + } else if (JS_VALUE_GET_TAG(*pv) == JS_TAG_STRING) { + sp--; + op2 = JS_ToPrimitiveFree(ctx, op2, HINT_NONE); + if (JS_IsException(op2)) goto exception; - op1 = JS_ConcatString(ctx, JS_DupValue(ctx, *pv), op1); - if (JS_IsException(op1)) - goto exception; - set_value(ctx, pv, op1); + if (JS_ConcatStringInPlace(ctx, JS_VALUE_GET_STRING(*pv), op2)) { + JS_FreeValue(ctx, op2); + } else { + op2 = JS_ConcatString(ctx, JS_DupValue(ctx, *pv), op2); + if (JS_IsException(op2)) + goto exception; + set_value(ctx, pv, op2); + } } else { JSValue ops[2]; add_loc_slow: /* In case of exception, js_add_slow frees ops[0] and ops[1], so we must duplicate *pv */ ops[0] = JS_DupValue(ctx, *pv); - ops[1] = sp[-1]; + ops[1] = op2; sp--; if (js_add_slow(ctx, ops + 2)) goto exception; @@ -35685,6 +35711,7 @@ static JSString *JS_ReadString(BCReaderState *s) js_free_string(s->ctx->rt, p); return NULL; } + // XXX: potential endianness issue memcpy(p->u.str8, s->ptr, size); s->ptr += size; if (!is_wide_char) { @@ -41142,10 +41169,7 @@ static int js_string_get_own_property(JSContext *ctx, idx = __JS_AtomToUInt32(prop); if (idx < p1->len) { if (desc) { - if (p1->is_wide_char) - ch = p1->u.str16[idx]; - else - ch = p1->u.str8[idx]; + ch = string_get(p1, idx); desc->flags = JS_PROP_ENUMERABLE; desc->value = js_new_string_char(ctx, ch); desc->getter = JS_UNDEFINED; @@ -41411,10 +41435,7 @@ static JSValue js_string_charCodeAt(JSContext *ctx, JSValueConst this_val, if (idx < 0 || idx >= p->len) { ret = JS_NAN; } else { - if (p->is_wide_char) - c = p->u.str16[idx]; - else - c = p->u.str8[idx]; + c = string_get(p, idx); ret = JS_NewInt32(ctx, c); } JS_FreeValue(ctx, val); @@ -41444,10 +41465,7 @@ static JSValue js_string_charAt(JSContext *ctx, JSValueConst this_val, else ret = js_new_string8(ctx, NULL, 0); } else { - if (p->is_wide_char) - c = p->u.str16[idx]; - else - c = p->u.str8[idx]; + c = string_get(p, idx); ret = js_new_string_char(ctx, c); } JS_FreeValue(ctx, val); @@ -45324,7 +45342,7 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc, has_content = TRUE; } } - if (has_content && JS_VALUE_GET_STRING(jsc->gap)->len != 0) { + if (has_content && !JS_IsEmptyString(jsc->gap)) { string_buffer_putc8(jsc->b, '\n'); string_buffer_concat_value(jsc->b, indent); }