From 156d981afeb07d7520ad75685e15b8dabd95f35a Mon Sep 17 00:00:00 2001 From: Fabrice Bellard Date: Tue, 25 Mar 2025 16:01:40 +0100 Subject: [PATCH] added string ropes for faster concatenation of long strings (issue #67) --- quickjs.c | 636 +++++++++++++++++++++++++++++++++++++----- quickjs.h | 4 +- tests/microbench.js | 28 ++ tests/test_builtin.js | 27 ++ 4 files changed, 624 insertions(+), 71 deletions(-) diff --git a/quickjs.c b/quickjs.c index 0bcb9fe..f2d62ce 100644 --- a/quickjs.c +++ b/quickjs.c @@ -105,6 +105,7 @@ //#define DUMP_MODULE_RESOLVE //#define DUMP_PROMISE //#define DUMP_READ_OBJECT +//#define DUMP_ROPE_REBALANCE /* test the GC by forcing it before each object allocation */ //#define FORCE_GC_AT_MALLOC @@ -196,11 +197,24 @@ typedef enum JSErrorEnum { #define JS_STACK_SIZE_MAX 65534 #define JS_STRING_LEN_MAX ((1 << 30) - 1) +/* strings <= this length are not concatenated using ropes. if too + small, the rope memory overhead becomes high. */ +#define JS_STRING_ROPE_SHORT_LEN 512 +/* specific threshold for initial rope use */ +#define JS_STRING_ROPE_SHORT2_LEN 8192 +/* rope depth at which we rebalance */ +#define JS_STRING_ROPE_MAX_DEPTH 60 + #define __exception __attribute__((warn_unused_result)) typedef struct JSShape JSShape; typedef struct JSString JSString; typedef struct JSString JSAtomStruct; +typedef struct JSObject JSObject; + +#define JS_VALUE_GET_OBJ(v) ((JSObject *)JS_VALUE_GET_PTR(v)) +#define JS_VALUE_GET_STRING(v) ((JSString *)JS_VALUE_GET_PTR(v)) +#define JS_VALUE_GET_STRING_ROPE(v) ((JSStringRope *)JS_VALUE_GET_PTR(v)) typedef enum { JS_GC_PHASE_NONE, @@ -483,6 +497,17 @@ struct JSString { } u; }; +typedef struct JSStringRope { + JSRefCountHeader header; /* must come first, 32-bit */ + uint32_t len; + uint8_t is_wide_char; /* 0 = 8 bits, 1 = 16 bits characters */ + uint8_t depth; /* max depth of the rope tree */ + /* XXX: could reduce memory usage by using a direct pointer with + bit 0 to select rope or string */ + JSValue left; + JSValue right; /* might be the empty string */ +} JSStringRope; + typedef struct JSClosureVar { uint8_t is_local : 1; uint8_t is_arg : 1; @@ -1123,7 +1148,8 @@ static int JS_CreateProperty(JSContext *ctx, JSObject *p, JSAtom prop, JSValueConst val, JSValueConst getter, JSValueConst setter, int flags); -static int js_string_memcmp(const JSString *p1, const JSString *p2, int len); +static int js_string_memcmp(const JSString *p1, int pos1, const JSString *p2, + int pos2, int len); static void reset_weak_ref(JSRuntime *rt, JSObject *p); static JSValue js_array_buffer_constructor3(JSContext *ctx, JSValueConst new_target, @@ -2337,6 +2363,17 @@ static uint32_t hash_string(const JSString *str, uint32_t h) return h; } +static uint32_t hash_string_rope(JSValueConst val, uint32_t h) +{ + if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) { + return hash_string(JS_VALUE_GET_STRING(val), h); + } else { + JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val); + h = hash_string_rope(r->left, h); + return hash_string_rope(r->right, h); + } +} + static __maybe_unused void JS_DumpChar(JSRuntime *rt, int c, int sep) { if (c == sep || c == '\\') { @@ -2569,7 +2606,7 @@ static JSAtom __JS_NewAtom(JSRuntime *rt, JSString *str, int atom_type) if (p->hash == h && p->atom_type == atom_type && p->len == len && - js_string_memcmp(p, str, len) == 0) { + js_string_memcmp(p, 0, str, 0, len) == 0) { if (!__JS_AtomIsConst(i)) p->header.ref_count++; goto done; @@ -3645,13 +3682,21 @@ static int string_buffer_concat_value(StringBuffer *s, JSValueConst v) return -1; } if (unlikely(JS_VALUE_GET_TAG(v) != JS_TAG_STRING)) { - v1 = JS_ToString(s->ctx, v); - if (JS_IsException(v1)) - return string_buffer_set_error(s); - p = JS_VALUE_GET_STRING(v1); - res = string_buffer_concat(s, p, 0, p->len); - JS_FreeValue(s->ctx, v1); - return res; + if (JS_VALUE_GET_TAG(v) == JS_TAG_STRING_ROPE) { + JSStringRope *r = JS_VALUE_GET_STRING_ROPE(v); + /* recursion is acceptable because the rope depth is bounded */ + if (string_buffer_concat_value(s, r->left)) + return -1; + return string_buffer_concat_value(s, r->right); + } else { + v1 = JS_ToString(s->ctx, v); + if (JS_IsException(v1)) + return string_buffer_set_error(s); + p = JS_VALUE_GET_STRING(v1); + res = string_buffer_concat(s, p, 0, p->len); + JS_FreeValue(s->ctx, v1); + return res; + } } p = JS_VALUE_GET_STRING(v); return string_buffer_concat(s, p, 0, p->len); @@ -3961,20 +4006,21 @@ static int memcmp16(const uint16_t *src1, const uint16_t *src2, int len) return 0; } -static int js_string_memcmp(const JSString *p1, const JSString *p2, int len) +static int js_string_memcmp(const JSString *p1, int pos1, const JSString *p2, + int pos2, int len) { int res; if (likely(!p1->is_wide_char)) { if (likely(!p2->is_wide_char)) - res = memcmp(p1->u.str8, p2->u.str8, len); + res = memcmp(p1->u.str8 + pos1, p2->u.str8 + pos2, len); else - res = -memcmp16_8(p2->u.str16, p1->u.str8, len); + res = -memcmp16_8(p2->u.str16 + pos2, p1->u.str8 + pos1, len); } else { if (!p2->is_wide_char) - res = memcmp16_8(p1->u.str16, p2->u.str8, len); + res = memcmp16_8(p1->u.str16 + pos1, p2->u.str8 + pos2, len); else - res = memcmp16(p1->u.str16, p2->u.str16, len); + res = memcmp16(p1->u.str16 + pos1, p2->u.str16 + pos2, len); } return res; } @@ -3985,7 +4031,7 @@ static int js_string_compare(JSContext *ctx, { int res, len; len = min_int(p1->len, p2->len); - res = js_string_memcmp(p1, p2, len); + res = js_string_memcmp(p1, 0, p2, 0, len); if (res == 0) { if (p1->len == p2->len) res = 0; @@ -4071,27 +4117,10 @@ static BOOL JS_ConcatStringInPlace(JSContext *ctx, JSString *p1, JSValueConst op return FALSE; } -/* op1 and op2 are converted to strings. For convenience, op1 or op2 = - JS_EXCEPTION are accepted and return JS_EXCEPTION. */ -static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2) +static JSValue JS_ConcatString2(JSContext *ctx, JSValue op1, JSValue op2) { JSValue ret; JSString *p1, *p2; - - if (unlikely(JS_VALUE_GET_TAG(op1) != JS_TAG_STRING)) { - op1 = JS_ToStringFree(ctx, op1); - if (JS_IsException(op1)) { - JS_FreeValue(ctx, op2); - return JS_EXCEPTION; - } - } - if (unlikely(JS_VALUE_GET_TAG(op2) != JS_TAG_STRING)) { - op2 = JS_ToStringFree(ctx, op2); - if (JS_IsException(op2)) { - JS_FreeValue(ctx, op1); - return JS_EXCEPTION; - } - } p1 = JS_VALUE_GET_STRING(op1); if (JS_ConcatStringInPlace(ctx, p1, op2)) { JS_FreeValue(ctx, op2); @@ -4104,6 +4133,402 @@ static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2) return ret; } +/* Return the character at position 'idx'. 'val' must be a string or rope */ +static int string_rope_get(JSValueConst val, uint32_t idx) +{ + if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) { + return string_get(JS_VALUE_GET_STRING(val), idx); + } else { + JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val); + uint32_t len; + if (JS_VALUE_GET_TAG(r->left) == JS_TAG_STRING) + len = JS_VALUE_GET_STRING(r->left)->len; + else + len = JS_VALUE_GET_STRING_ROPE(r->left)->len; + if (idx < len) + return string_rope_get(r->left, idx); + else + return string_rope_get(r->right, idx - len); + } +} + +typedef struct { + JSValueConst stack[JS_STRING_ROPE_MAX_DEPTH]; + int stack_len; +} JSStringRopeIter; + +static void string_rope_iter_init(JSStringRopeIter *s, JSValueConst val) +{ + s->stack_len = 0; + s->stack[s->stack_len++] = val; +} + +/* iterate thru a rope and return the strings in order */ +static JSString *string_rope_iter_next(JSStringRopeIter *s) +{ + JSValueConst val; + JSStringRope *r; + + if (s->stack_len == 0) + return NULL; + val = s->stack[--s->stack_len]; + for(;;) { + if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) + return JS_VALUE_GET_STRING(val); + r = JS_VALUE_GET_STRING_ROPE(val); + assert(s->stack_len < JS_STRING_ROPE_MAX_DEPTH); + s->stack[s->stack_len++] = r->right; + val = r->left; + } +} + +static uint32_t string_rope_get_len(JSValueConst val) +{ + if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) + return JS_VALUE_GET_STRING(val)->len; + else + return JS_VALUE_GET_STRING_ROPE(val)->len; +} + +static int js_string_rope_compare(JSContext *ctx, JSValueConst op1, + JSValueConst op2, BOOL eq_only) +{ + uint32_t len1, len2, len, pos1, pos2, l; + int res; + JSStringRopeIter it1, it2; + JSString *p1, *p2; + + len1 = string_rope_get_len(op1); + len2 = string_rope_get_len(op2); + /* no need to go further for equality test if + different length */ + if (eq_only && len1 != len2) + return 1; + len = min_uint32(len1, len2); + string_rope_iter_init(&it1, op1); + string_rope_iter_init(&it2, op2); + p1 = string_rope_iter_next(&it1); + p2 = string_rope_iter_next(&it2); + pos1 = 0; + pos2 = 0; + while (len != 0) { + l = min_uint32(p1->len - pos1, p2->len - pos2); + l = min_uint32(l, len); + res = js_string_memcmp(p1, pos1, p2, pos2, l); + if (res != 0) + return res; + len -= l; + pos1 += l; + if (pos1 >= p1->len) { + p1 = string_rope_iter_next(&it1); + pos1 = 0; + } + pos2 += l; + if (pos2 >= p2->len) { + p2 = string_rope_iter_next(&it2); + pos2 = 0; + } + } + + if (len1 == len2) + res = 0; + else if (len1 < len2) + res = -1; + else + res = 1; + return res; +} + +/* 'rope' must be a rope. return a string and modify the rope so that + it won't need to be linearized again. */ +static JSValue js_linearize_string_rope(JSContext *ctx, JSValue rope) +{ + StringBuffer b_s, *b = &b_s; + JSStringRope *r; + JSValue ret; + + r = JS_VALUE_GET_STRING_ROPE(rope); + + /* check whether it is already linearized */ + if (JS_VALUE_GET_TAG(r->right) == JS_TAG_STRING && + JS_VALUE_GET_STRING(r->right)->len == 0) { + ret = JS_DupValue(ctx, r->left); + JS_FreeValue(ctx, rope); + return ret; + } + if (string_buffer_init2(ctx, b, r->len, r->is_wide_char)) + goto fail; + if (string_buffer_concat_value(b, rope)) + goto fail; + ret = string_buffer_end(b); + if (r->header.ref_count > 1) { + /* update the rope so that it won't need to be linearized again */ + JS_FreeValue(ctx, r->left); + JS_FreeValue(ctx, r->right); + r->left = JS_DupValue(ctx, ret); + r->right = JS_AtomToString(ctx, JS_ATOM_empty_string); + } + JS_FreeValue(ctx, rope); + return ret; + fail: + JS_FreeValue(ctx, rope); + return JS_EXCEPTION; +} + +static JSValue js_rebalancee_string_rope(JSContext *ctx, JSValueConst rope); + +/* op1 and op2 must be strings or string ropes */ +static JSValue js_new_string_rope(JSContext *ctx, JSValue op1, JSValue op2) +{ + uint32_t len; + int is_wide_char, depth; + JSStringRope *r; + JSValue res; + + if (JS_VALUE_GET_TAG(op1) == JS_TAG_STRING) { + JSString *p1 = JS_VALUE_GET_STRING(op1); + len = p1->len; + is_wide_char = p1->is_wide_char; + depth = 0; + } else { + JSStringRope *r1 = JS_VALUE_GET_STRING_ROPE(op1); + len = r1->len; + is_wide_char = r1->is_wide_char; + depth = r1->depth; + } + + if (JS_VALUE_GET_TAG(op2) == JS_TAG_STRING) { + JSString *p2 = JS_VALUE_GET_STRING(op2); + len += p2->len; + is_wide_char |= p2->is_wide_char; + } else { + JSStringRope *r2 = JS_VALUE_GET_STRING_ROPE(op2); + len += r2->len; + is_wide_char |= r2->is_wide_char; + depth = max_int(depth, r2->depth); + } + if (len > JS_STRING_LEN_MAX) { + JS_ThrowInternalError(ctx, "string too long"); + goto fail; + } + r = js_malloc(ctx, sizeof(*r)); + if (!r) + goto fail; + r->header.ref_count = 1; + r->len = len; + r->is_wide_char = is_wide_char; + r->depth = depth + 1; + r->left = op1; + r->right = op2; + res = JS_MKPTR(JS_TAG_STRING_ROPE, r); + if (r->depth > JS_STRING_ROPE_MAX_DEPTH) { + JSValue res2; +#ifdef DUMP_ROPE_REBALANCE + printf("rebalance: initial depth=%d\n", r->depth); +#endif + res2 = js_rebalancee_string_rope(ctx, res); +#ifdef DUMP_ROPE_REBALANCE + if (JS_VALUE_GET_TAG(res2) == JS_TAG_STRING_ROPE) + printf("rebalance: final depth=%d\n", JS_VALUE_GET_STRING_ROPE(res2)->depth); +#endif + JS_FreeValue(ctx, res); + return res2; + } else { + return res; + } + fail: + JS_FreeValue(ctx, op1); + JS_FreeValue(ctx, op2); + return JS_EXCEPTION; +} + +#define ROPE_N_BUCKETS 44 + +/* Fibonacii numbers starting from F_2 */ +static const uint32_t rope_bucket_len[ROPE_N_BUCKETS] = { + 1, 2, 3, 5, + 8, 13, 21, 34, + 55, 89, 144, 233, + 377, 610, 987, 1597, + 2584, 4181, 6765, 10946, + 17711, 28657, 46368, 75025, + 121393, 196418, 317811, 514229, + 832040, 1346269, 2178309, 3524578, + 5702887, 9227465, 14930352, 24157817, + 39088169, 63245986, 102334155, 165580141, + 267914296, 433494437, 701408733, 1134903170, /* > JS_STRING_LEN_MAX */ +}; + +static int js_rebalancee_string_rope_rec(JSContext *ctx, JSValue *buckets, + JSValueConst val) +{ + if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) { + JSString *p = JS_VALUE_GET_STRING(val); + uint32_t len, i; + JSValue a, b; + + len = p->len; + if (len == 0) + return 0; /* nothing to do */ + /* find the bucket i so that rope_bucket_len[i] <= len < + rope_bucket_len[i + 1] and concatenate the ropes in the + buckets before */ + a = JS_NULL; + i = 0; + while (len >= rope_bucket_len[i + 1]) { + b = buckets[i]; + if (!JS_IsNull(b)) { + buckets[i] = JS_NULL; + if (JS_IsNull(a)) { + a = b; + } else { + a = js_new_string_rope(ctx, b, a); + if (JS_IsException(a)) + return -1; + } + } + i++; + } + if (!JS_IsNull(a)) { + a = js_new_string_rope(ctx, a, JS_DupValue(ctx, val)); + if (JS_IsException(a)) + return -1; + } else { + a = JS_DupValue(ctx, val); + } + while (!JS_IsNull(buckets[i])) { + a = js_new_string_rope(ctx, buckets[i], a); + buckets[i] = JS_NULL; + if (JS_IsException(a)) + return -1; + i++; + } + buckets[i] = a; + } else { + JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val); + js_rebalancee_string_rope_rec(ctx, buckets, r->left); + js_rebalancee_string_rope_rec(ctx, buckets, r->right); + } + return 0; +} + +/* Return a new rope which is balanced. Algorithm from "Ropes: an + Alternative to Strings", Hans-J. Boehm, Russ Atkinson and Michael + Plass. */ +static JSValue js_rebalancee_string_rope(JSContext *ctx, JSValueConst rope) +{ + JSValue buckets[ROPE_N_BUCKETS], a, b; + int i; + + for(i = 0; i < ROPE_N_BUCKETS; i++) + buckets[i] = JS_NULL; + if (js_rebalancee_string_rope_rec(ctx, buckets, rope)) + goto fail; + a = JS_NULL; + for(i = 0; i < ROPE_N_BUCKETS; i++) { + b = buckets[i]; + if (!JS_IsNull(b)) { + buckets[i] = JS_NULL; + if (JS_IsNull(a)) { + a = b; + } else { + a = js_new_string_rope(ctx, b, a); + if (JS_IsException(a)) + goto fail; + } + } + } + /* fail safe */ + if (JS_IsNull(a)) + return JS_AtomToString(ctx, JS_ATOM_empty_string); + else + return a; + fail: + for(i = 0; i < ROPE_N_BUCKETS; i++) { + JS_FreeValue(ctx, buckets[i]); + } + return JS_EXCEPTION; +} + +/* op1 and op2 are converted to strings. For convenience, op1 or op2 = + JS_EXCEPTION are accepted and return JS_EXCEPTION. */ +static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2) +{ + JSString *p1, *p2; + + if (unlikely(JS_VALUE_GET_TAG(op1) != JS_TAG_STRING && + JS_VALUE_GET_TAG(op1) != JS_TAG_STRING_ROPE)) { + op1 = JS_ToStringFree(ctx, op1); + if (JS_IsException(op1)) { + JS_FreeValue(ctx, op2); + return JS_EXCEPTION; + } + } + if (unlikely(JS_VALUE_GET_TAG(op2) != JS_TAG_STRING && + JS_VALUE_GET_TAG(op2) != JS_TAG_STRING_ROPE)) { + op2 = JS_ToStringFree(ctx, op2); + if (JS_IsException(op2)) { + JS_FreeValue(ctx, op1); + return JS_EXCEPTION; + } + } + + /* normal concatenation for short strings */ + if (JS_VALUE_GET_TAG(op2) == JS_TAG_STRING) { + p2 = JS_VALUE_GET_STRING(op2); + if (p2->len == 0) { + JS_FreeValue(ctx, op2); + return op1; + } + if (p2->len <= JS_STRING_ROPE_SHORT_LEN) { + if (JS_VALUE_GET_TAG(op1) == JS_TAG_STRING) { + p1 = JS_VALUE_GET_STRING(op1); + if (p1->len <= JS_STRING_ROPE_SHORT2_LEN) { + return JS_ConcatString2(ctx, op1, op2); + } else { + return js_new_string_rope(ctx, op1, op2); + } + } else { + JSStringRope *r1; + r1 = JS_VALUE_GET_STRING_ROPE(op1); + if (JS_VALUE_GET_TAG(r1->right) == JS_TAG_STRING && + JS_VALUE_GET_STRING(r1->right)->len <= JS_STRING_ROPE_SHORT_LEN) { + JSValue val, ret; + val = JS_ConcatString2(ctx, JS_DupValue(ctx, r1->right), op2); + if (JS_IsException(val)) { + JS_FreeValue(ctx, op1); + return JS_EXCEPTION; + } + ret = js_new_string_rope(ctx, JS_DupValue(ctx, r1->left), val); + JS_FreeValue(ctx, op1); + return ret; + } + } + } + } else if (JS_VALUE_GET_TAG(op1) == JS_TAG_STRING) { + JSStringRope *r2; + p1 = JS_VALUE_GET_STRING(op1); + if (p1->len == 0) { + JS_FreeValue(ctx, op1); + return op2; + } + r2 = JS_VALUE_GET_STRING_ROPE(op2); + if (JS_VALUE_GET_TAG(r2->left) == JS_TAG_STRING && + JS_VALUE_GET_STRING(r2->left)->len <= JS_STRING_ROPE_SHORT_LEN) { + JSValue val, ret; + val = JS_ConcatString2(ctx, op1, JS_DupValue(ctx, r2->left)); + if (JS_IsException(val)) { + JS_FreeValue(ctx, op2); + return JS_EXCEPTION; + } + ret = js_new_string_rope(ctx, val, JS_DupValue(ctx, r2->right)); + JS_FreeValue(ctx, op2); + return ret; + } + } + return js_new_string_rope(ctx, op1, op2); +} + /* Shape support */ static inline size_t get_shape_size(size_t hash_size, size_t prop_size) @@ -4757,7 +5182,9 @@ static int JS_SetObjectData(JSContext *ctx, JSValueConst obj, JSValue val) case JS_CLASS_DATE: case JS_CLASS_BIG_INT: JS_FreeValue(ctx, p->u.object_data); - p->u.object_data = val; + p->u.object_data = val; /* for JS_CLASS_STRING, 'val' must + be JS_TAG_STRING (and not a + rope) */ return 0; } } @@ -5376,6 +5803,15 @@ void __JS_FreeValueRT(JSRuntime *rt, JSValue v) } } break; + case JS_TAG_STRING_ROPE: + /* Note: recursion is acceptable because the rope depth is bounded */ + { + JSStringRope *p = JS_VALUE_GET_STRING_ROPE(v); + JS_FreeValueRT(rt, p->left); + JS_FreeValueRT(rt, p->right); + js_free_rt(rt, p); + } + break; case JS_TAG_OBJECT: case JS_TAG_FUNCTION_BYTECODE: { @@ -6750,6 +7186,7 @@ static JSValueConst JS_GetPrototypePrimitive(JSContext *ctx, JSValueConst val) val = ctx->class_proto[JS_CLASS_BOOLEAN]; break; case JS_TAG_STRING: + case JS_TAG_STRING_ROPE: val = ctx->class_proto[JS_CLASS_STRING]; break; case JS_TAG_SYMBOL: @@ -6949,14 +7386,24 @@ JSValue JS_GetPropertyInternal(JSContext *ctx, JSValueConst obj, { JSString *p1 = JS_VALUE_GET_STRING(obj); if (__JS_AtomIsTaggedInt(prop)) { - uint32_t idx, ch; + uint32_t idx; idx = __JS_AtomToUInt32(prop); if (idx < p1->len) { - if (p1->is_wide_char) - ch = p1->u.str16[idx]; - else - ch = p1->u.str8[idx]; - return js_new_string_char(ctx, ch); + return js_new_string_char(ctx, string_get(p1, idx)); + } + } else if (prop == JS_ATOM_length) { + return JS_NewInt32(ctx, p1->len); + } + } + break; + case JS_TAG_STRING_ROPE: + { + JSStringRope *p1 = JS_VALUE_GET_STRING_ROPE(obj); + if (__JS_AtomIsTaggedInt(prop)) { + uint32_t idx; + idx = __JS_AtomToUInt32(prop); + if (idx < p1->len) { + return js_new_string_char(ctx, string_rope_get(obj, idx)); } } else if (prop == JS_ATOM_length) { return JS_NewInt32(ctx, p1->len); @@ -7263,13 +7710,12 @@ static uint32_t js_string_obj_get_length(JSContext *ctx, JSValueConst obj) { JSObject *p; - JSString *p1; uint32_t len = 0; /* This is a class exotic method: obj class_id is JS_CLASS_STRING */ p = JS_VALUE_GET_OBJ(obj); if (JS_VALUE_GET_TAG(p->u.object_data) == JS_TAG_STRING) { - p1 = JS_VALUE_GET_STRING(p->u.object_data); + JSString *p1 = JS_VALUE_GET_STRING(p->u.object_data); len = p1->len; } return len; @@ -9768,6 +10214,12 @@ static int JS_ToBoolFree(JSContext *ctx, JSValue val) JS_FreeValue(ctx, val); return ret; } + case JS_TAG_STRING_ROPE: + { + BOOL ret = JS_VALUE_GET_STRING_ROPE(val)->len != 0; + JS_FreeValue(ctx, val); + return ret; + } case JS_TAG_SHORT_BIG_INT: return JS_VALUE_GET_SHORT_BIG_INT(val) != 0; case JS_TAG_BIG_INT: @@ -11558,6 +12010,7 @@ static JSValue JS_ToNumberHintFree(JSContext *ctx, JSValue val, return JS_EXCEPTION; goto redo; case JS_TAG_STRING: + case JS_TAG_STRING_ROPE: { const char *str; const char *p; @@ -12164,7 +12617,7 @@ static JSValue js_dtoa2(JSContext *ctx, return res; } -JSValue JS_ToStringInternal(JSContext *ctx, JSValueConst val, BOOL is_ToPropertyKey) +static JSValue JS_ToStringInternal(JSContext *ctx, JSValueConst val, BOOL is_ToPropertyKey) { uint32_t tag; const char *str; @@ -12174,6 +12627,8 @@ JSValue JS_ToStringInternal(JSContext *ctx, JSValueConst val, BOOL is_ToProperty switch(tag) { case JS_TAG_STRING: return JS_DupValue(ctx, val); + case JS_TAG_STRING_ROPE: + return js_linearize_string_rope(ctx, JS_DupValue(ctx, val)); case JS_TAG_INT: { int len; @@ -12532,6 +12987,12 @@ static __maybe_unused void JS_DumpValueShort(JSRuntime *rt, JS_DumpString(rt, p); } break; + case JS_TAG_STRING_ROPE: + { + JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val); + printf("[rope len=%d depth=%d]", r->len, r->depth); + } + break; case JS_TAG_FUNCTION_BYTECODE: { JSFunctionBytecode *b = JS_VALUE_GET_PTR(val); @@ -12692,6 +13153,7 @@ static JSValue JS_ToBigIntFree(JSContext *ctx, JSValue val) val = __JS_NewShortBigInt(ctx, JS_VALUE_GET_INT(val)); break; case JS_TAG_STRING: + case JS_TAG_STRING_ROPE: val = JS_StringToBigIntErr(ctx, val); if (JS_IsException(val)) return val; @@ -13117,6 +13579,11 @@ static no_inline __exception int js_binary_arith_slow(JSContext *ctx, JSValue *s return -1; } +static inline BOOL tag_is_string(uint32_t tag) +{ + return tag == JS_TAG_STRING || tag == JS_TAG_STRING_ROPE; +} + static no_inline __exception int js_add_slow(JSContext *ctx, JSValue *sp) { JSValue op1, op2; @@ -13169,7 +13636,7 @@ static no_inline __exception int js_add_slow(JSContext *ctx, JSValue *sp) tag2 = JS_VALUE_GET_NORM_TAG(op2); } - if (tag1 == JS_TAG_STRING || tag2 == JS_TAG_STRING) { + if (tag_is_string(tag1) || tag_is_string(tag2)) { sp[-2] = JS_ConcatString(ctx, op1, op2); if (JS_IsException(sp[-2])) goto exception; @@ -13511,11 +13978,13 @@ static no_inline int js_relational_slow(JSContext *ctx, JSValue *sp, tag1 = JS_VALUE_GET_NORM_TAG(op1); tag2 = JS_VALUE_GET_NORM_TAG(op2); - if (tag1 == JS_TAG_STRING && tag2 == JS_TAG_STRING) { - JSString *p1, *p2; - p1 = JS_VALUE_GET_STRING(op1); - p2 = JS_VALUE_GET_STRING(op2); - res = js_string_compare(ctx, p1, p2); + if (tag_is_string(tag1) && tag_is_string(tag2)) { + if (tag1 == JS_TAG_STRING && tag2 == JS_TAG_STRING) { + res = js_string_compare(ctx, JS_VALUE_GET_STRING(op1), + JS_VALUE_GET_STRING(op2)); + } else { + res = js_string_rope_compare(ctx, op1, op2, FALSE); + } switch(op) { case OP_lt: res = (res < 0); @@ -13539,16 +14008,16 @@ static no_inline int js_relational_slow(JSContext *ctx, JSValue *sp, goto float64_compare; } else { if ((((tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT) && - tag2 == JS_TAG_STRING) || + tag_is_string(tag2)) || ((tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT) && - tag1 == JS_TAG_STRING))) { - if (tag1 == JS_TAG_STRING) { + tag_is_string(tag1)))) { + if (tag_is_string(tag1)) { op1 = JS_StringToBigInt(ctx, op1); if (JS_VALUE_GET_TAG(op1) != JS_TAG_BIG_INT && JS_VALUE_GET_TAG(op1) != JS_TAG_SHORT_BIG_INT) goto invalid_bigint_string; } - if (tag2 == JS_TAG_STRING) { + if (tag_is_string(tag2)) { op2 = JS_StringToBigInt(ctx, op2); if (JS_VALUE_GET_TAG(op2) != JS_TAG_BIG_INT && JS_VALUE_GET_TAG(op2) != JS_TAG_SHORT_BIG_INT) { @@ -13665,18 +14134,21 @@ static no_inline __exception int js_eq_slow(JSContext *ctx, JSValue *sp, } else if ((tag1 == JS_TAG_NULL && tag2 == JS_TAG_UNDEFINED) || (tag2 == JS_TAG_NULL && tag1 == JS_TAG_UNDEFINED)) { res = TRUE; - } else if ((tag1 == JS_TAG_STRING && tag_is_number(tag2)) || - (tag2 == JS_TAG_STRING && tag_is_number(tag1))) { + } else if (tag_is_string(tag1) && tag_is_string(tag2)) { + /* needed when comparing strings and ropes */ + res = js_strict_eq2(ctx, op1, op2, JS_EQ_STRICT); + } else if ((tag_is_string(tag1) && tag_is_number(tag2)) || + (tag_is_string(tag2) && tag_is_number(tag1))) { if (tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT || tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT) { - if (tag1 == JS_TAG_STRING) { + if (tag_is_string(tag1)) { op1 = JS_StringToBigInt(ctx, op1); if (JS_VALUE_GET_TAG(op1) != JS_TAG_BIG_INT && JS_VALUE_GET_TAG(op1) != JS_TAG_SHORT_BIG_INT) goto invalid_bigint_string; } - if (tag2 == JS_TAG_STRING) { + if (tag_is_string(tag2)) { op2 = JS_StringToBigInt(ctx, op2); if (JS_VALUE_GET_TAG(op2) != JS_TAG_BIG_INT && JS_VALUE_GET_TAG(op2) != JS_TAG_SHORT_BIG_INT ) { @@ -13707,9 +14179,9 @@ static no_inline __exception int js_eq_slow(JSContext *ctx, JSValue *sp, op2 = JS_NewInt32(ctx, JS_VALUE_GET_INT(op2)); goto redo; } else if ((tag1 == JS_TAG_OBJECT && - (tag_is_number(tag2) || tag2 == JS_TAG_STRING || tag2 == JS_TAG_SYMBOL)) || + (tag_is_number(tag2) || tag_is_string(tag2) || tag2 == JS_TAG_SYMBOL)) || (tag2 == JS_TAG_OBJECT && - (tag_is_number(tag1) || tag1 == JS_TAG_STRING || tag1 == JS_TAG_SYMBOL))) { + (tag_is_number(tag1) || tag_is_string(tag1) || tag1 == JS_TAG_SYMBOL))) { op1 = JS_ToPrimitiveFree(ctx, op1, HINT_NONE); if (JS_IsException(op1)) { JS_FreeValue(ctx, op2); @@ -13805,14 +14277,15 @@ static BOOL js_strict_eq2(JSContext *ctx, JSValue op1, JSValue op2, res = (tag1 == tag2); break; case JS_TAG_STRING: + case JS_TAG_STRING_ROPE: { - JSString *p1, *p2; - if (tag1 != tag2) { + if (!tag_is_string(tag2)) { res = FALSE; + } else if (tag1 == JS_TAG_STRING && tag2 == JS_TAG_STRING) { + res = (js_string_compare(ctx, JS_VALUE_GET_STRING(op1), + JS_VALUE_GET_STRING(op2)) == 0); } else { - p1 = JS_VALUE_GET_STRING(op1); - p2 = JS_VALUE_GET_STRING(op2); - res = (js_string_compare(ctx, p1, p2) == 0); + res = (js_string_rope_compare(ctx, op1, op2, TRUE) == 0); } } break; @@ -14070,6 +14543,7 @@ static __exception int js_operator_typeof(JSContext *ctx, JSValueConst op1) atom = JS_ATOM_boolean; break; case JS_TAG_STRING: + case JS_TAG_STRING_ROPE: atom = JS_ATOM_string; break; case JS_TAG_OBJECT: @@ -34537,6 +35011,16 @@ static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj) JS_WriteString(s, p); } break; + case JS_TAG_STRING_ROPE: + { + JSValue str; + str = JS_ToString(s->ctx, obj); + if (JS_IsException(str)) + goto fail; + JS_WriteObjectRec(s, str); + JS_FreeValue(s->ctx, str); + } + break; case JS_TAG_FUNCTION_BYTECODE: if (!s->allow_bytecode) goto invalid_tag; @@ -36141,13 +36625,22 @@ static JSValue JS_ToObject(JSContext *ctx, JSValueConst val) obj = JS_NewObjectClass(ctx, JS_CLASS_NUMBER); goto set_value; case JS_TAG_STRING: + case JS_TAG_STRING_ROPE: /* XXX: should call the string constructor */ { - JSString *p1 = JS_VALUE_GET_STRING(val); + JSValue str; + str = JS_ToString(ctx, val); /* ensure that we never store a rope */ + if (JS_IsException(str)) + return JS_EXCEPTION; obj = JS_NewObjectClass(ctx, JS_CLASS_STRING); - JS_DefinePropertyValue(ctx, obj, JS_ATOM_length, JS_NewInt32(ctx, p1->len), 0); + if (!JS_IsException(obj)) { + JS_DefinePropertyValue(ctx, obj, JS_ATOM_length, + JS_NewInt32(ctx, JS_VALUE_GET_STRING(str)->len), 0); + JS_SetObjectData(ctx, obj, JS_DupValue(ctx, str)); + } + JS_FreeValue(ctx, str); + return obj; } - goto set_value; case JS_TAG_BOOL: obj = JS_NewObjectClass(ctx, JS_CLASS_BOOLEAN); goto set_value; @@ -40415,7 +40908,8 @@ static JSValue js_string_constructor(JSContext *ctx, JSValueConst new_target, static JSValue js_thisStringValue(JSContext *ctx, JSValueConst this_val) { - if (JS_VALUE_GET_TAG(this_val) == JS_TAG_STRING) + if (JS_VALUE_GET_TAG(this_val) == JS_TAG_STRING || + JS_VALUE_GET_TAG(this_val) == JS_TAG_STRING_ROPE) return JS_DupValue(ctx, this_val); if (JS_VALUE_GET_TAG(this_val) == JS_TAG_OBJECT) { @@ -44334,6 +44828,7 @@ static JSValue js_json_check(JSContext *ctx, JSONStringifyContext *jsc, if (JS_IsFunction(ctx, val)) break; case JS_TAG_STRING: + case JS_TAG_STRING_ROPE: case JS_TAG_INT: case JS_TAG_FLOAT64: case JS_TAG_BOOL: @@ -44508,6 +45003,7 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc, concat_primitive: switch (JS_VALUE_GET_NORM_TAG(val)) { case JS_TAG_STRING: + case JS_TAG_STRING_ROPE: val = JS_ToQuotedStringFree(ctx, val); if (JS_IsException(val)) goto exception; @@ -46115,6 +46611,9 @@ static uint32_t map_hash_key(JSValueConst key) case JS_TAG_STRING: h = hash_string(JS_VALUE_GET_STRING(key), 0); break; + case JS_TAG_STRING_ROPE: + h = hash_string_rope(key, 0); + break; case JS_TAG_OBJECT: case JS_TAG_SYMBOL: h = (uintptr_t)JS_VALUE_GET_PTR(key) * 3163; @@ -49698,6 +50197,7 @@ static JSValue JS_ToBigIntCtorFree(JSContext *ctx, JSValue val) } break; case JS_TAG_STRING: + case JS_TAG_STRING_ROPE: val = JS_StringToBigIntErr(ctx, val); break; case JS_TAG_OBJECT: diff --git a/quickjs.h b/quickjs.h index 2463364..849594b 100644 --- a/quickjs.h +++ b/quickjs.h @@ -48,7 +48,6 @@ extern "C" { typedef struct JSRuntime JSRuntime; typedef struct JSContext JSContext; -typedef struct JSObject JSObject; typedef struct JSClass JSClass; typedef uint32_t JSClassID; typedef uint32_t JSAtom; @@ -78,6 +77,7 @@ enum { JS_TAG_BIG_INT = -9, JS_TAG_SYMBOL = -8, JS_TAG_STRING = -7, + JS_TAG_STRING_ROPE = -6, JS_TAG_MODULE = -3, /* used internally */ JS_TAG_FUNCTION_BYTECODE = -2, /* used internally */ JS_TAG_OBJECT = -1, @@ -280,8 +280,6 @@ static inline JSValue __JS_NewShortBigInt(JSContext *ctx, int64_t d) #define JS_VALUE_IS_BOTH_INT(v1, v2) ((JS_VALUE_GET_TAG(v1) | JS_VALUE_GET_TAG(v2)) == 0) #define JS_VALUE_IS_BOTH_FLOAT(v1, v2) (JS_TAG_IS_FLOAT64(JS_VALUE_GET_TAG(v1)) && JS_TAG_IS_FLOAT64(JS_VALUE_GET_TAG(v2))) -#define JS_VALUE_GET_OBJ(v) ((JSObject *)JS_VALUE_GET_PTR(v)) -#define JS_VALUE_GET_STRING(v) ((JSString *)JS_VALUE_GET_PTR(v)) #define JS_VALUE_HAS_REF_COUNT(v) ((unsigned)JS_VALUE_GET_TAG(v) >= (unsigned)JS_TAG_FIRST) /* special values */ diff --git a/tests/microbench.js b/tests/microbench.js index 4f69c40..7397ad9 100644 --- a/tests/microbench.js +++ b/tests/microbench.js @@ -957,6 +957,32 @@ function string_build4(n) return n * 1000; } +/* append */ +function string_build_large1(n) +{ + var i, j, r, len = 20000; + for(j = 0; j < n; j++) { + r = ""; + for(i = 0; i < len; i++) + r += "abcdef"; + global_res = r; + } + return n * len; +} + +/* prepend */ +function string_build_large2(n) +{ + var i, j, r, len = 20000; + for(j = 0; j < n; j++) { + r = ""; + for(i = 0; i < len; i++) + r = "abcdef" + r; + global_res = r; + } + return n * len; +} + /* sort bench */ function sort_bench(text) { @@ -1336,6 +1362,8 @@ function main(argc, argv, g) string_build2, string_build3, string_build4, + string_build_large1, + string_build_large2, int_to_string, int_toString, float_to_string, diff --git a/tests/test_builtin.js b/tests/test_builtin.js index 1de89ed..383d577 100644 --- a/tests/test_builtin.js +++ b/tests/test_builtin.js @@ -864,6 +864,32 @@ function test_generator() assert(v.value === 6 && v.done === true); } +function rope_concat(n, dir) +{ + var i, s; + s = ""; + if (dir > 0) { + for(i = 0; i < n; i++) + s += String.fromCharCode(i & 0xffff); + } else { + for(i = n - 1; i >= 0; i--) + s = String.fromCharCode(i & 0xffff) + s; + } + + for(i = 0; i < n; i++) { + /* test before the assert to go faster */ + if (s.charCodeAt(i) != (i & 0xffff)) { + assert(s.charCodeAt(i), i & 0xffff); + } + } +} + +function test_rope() +{ + rope_concat(100000, 1); + rope_concat(100000, -1); +} + test(); test_function(); test_enum(); @@ -880,3 +906,4 @@ test_symbol(); test_map(); test_weak_map(); test_generator(); +test_rope();