added string ropes for faster concatenation of long strings (issue #67)

This commit is contained in:
Fabrice Bellard 2025-03-25 16:01:40 +01:00
parent 372ad84e9a
commit 156d981afe
4 changed files with 624 additions and 71 deletions

636
quickjs.c
View File

@ -105,6 +105,7 @@
//#define DUMP_MODULE_RESOLVE
//#define DUMP_PROMISE
//#define DUMP_READ_OBJECT
//#define DUMP_ROPE_REBALANCE
/* test the GC by forcing it before each object allocation */
//#define FORCE_GC_AT_MALLOC
@ -196,11 +197,24 @@ typedef enum JSErrorEnum {
#define JS_STACK_SIZE_MAX 65534
#define JS_STRING_LEN_MAX ((1 << 30) - 1)
/* strings <= this length are not concatenated using ropes. if too
small, the rope memory overhead becomes high. */
#define JS_STRING_ROPE_SHORT_LEN 512
/* specific threshold for initial rope use */
#define JS_STRING_ROPE_SHORT2_LEN 8192
/* rope depth at which we rebalance */
#define JS_STRING_ROPE_MAX_DEPTH 60
#define __exception __attribute__((warn_unused_result))
typedef struct JSShape JSShape;
typedef struct JSString JSString;
typedef struct JSString JSAtomStruct;
typedef struct JSObject JSObject;
#define JS_VALUE_GET_OBJ(v) ((JSObject *)JS_VALUE_GET_PTR(v))
#define JS_VALUE_GET_STRING(v) ((JSString *)JS_VALUE_GET_PTR(v))
#define JS_VALUE_GET_STRING_ROPE(v) ((JSStringRope *)JS_VALUE_GET_PTR(v))
typedef enum {
JS_GC_PHASE_NONE,
@ -483,6 +497,17 @@ struct JSString {
} u;
};
typedef struct JSStringRope {
JSRefCountHeader header; /* must come first, 32-bit */
uint32_t len;
uint8_t is_wide_char; /* 0 = 8 bits, 1 = 16 bits characters */
uint8_t depth; /* max depth of the rope tree */
/* XXX: could reduce memory usage by using a direct pointer with
bit 0 to select rope or string */
JSValue left;
JSValue right; /* might be the empty string */
} JSStringRope;
typedef struct JSClosureVar {
uint8_t is_local : 1;
uint8_t is_arg : 1;
@ -1123,7 +1148,8 @@ static int JS_CreateProperty(JSContext *ctx, JSObject *p,
JSAtom prop, JSValueConst val,
JSValueConst getter, JSValueConst setter,
int flags);
static int js_string_memcmp(const JSString *p1, const JSString *p2, int len);
static int js_string_memcmp(const JSString *p1, int pos1, const JSString *p2,
int pos2, int len);
static void reset_weak_ref(JSRuntime *rt, JSObject *p);
static JSValue js_array_buffer_constructor3(JSContext *ctx,
JSValueConst new_target,
@ -2337,6 +2363,17 @@ static uint32_t hash_string(const JSString *str, uint32_t h)
return h;
}
static uint32_t hash_string_rope(JSValueConst val, uint32_t h)
{
if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) {
return hash_string(JS_VALUE_GET_STRING(val), h);
} else {
JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val);
h = hash_string_rope(r->left, h);
return hash_string_rope(r->right, h);
}
}
static __maybe_unused void JS_DumpChar(JSRuntime *rt, int c, int sep)
{
if (c == sep || c == '\\') {
@ -2569,7 +2606,7 @@ static JSAtom __JS_NewAtom(JSRuntime *rt, JSString *str, int atom_type)
if (p->hash == h &&
p->atom_type == atom_type &&
p->len == len &&
js_string_memcmp(p, str, len) == 0) {
js_string_memcmp(p, 0, str, 0, len) == 0) {
if (!__JS_AtomIsConst(i))
p->header.ref_count++;
goto done;
@ -3645,13 +3682,21 @@ static int string_buffer_concat_value(StringBuffer *s, JSValueConst v)
return -1;
}
if (unlikely(JS_VALUE_GET_TAG(v) != JS_TAG_STRING)) {
v1 = JS_ToString(s->ctx, v);
if (JS_IsException(v1))
return string_buffer_set_error(s);
p = JS_VALUE_GET_STRING(v1);
res = string_buffer_concat(s, p, 0, p->len);
JS_FreeValue(s->ctx, v1);
return res;
if (JS_VALUE_GET_TAG(v) == JS_TAG_STRING_ROPE) {
JSStringRope *r = JS_VALUE_GET_STRING_ROPE(v);
/* recursion is acceptable because the rope depth is bounded */
if (string_buffer_concat_value(s, r->left))
return -1;
return string_buffer_concat_value(s, r->right);
} else {
v1 = JS_ToString(s->ctx, v);
if (JS_IsException(v1))
return string_buffer_set_error(s);
p = JS_VALUE_GET_STRING(v1);
res = string_buffer_concat(s, p, 0, p->len);
JS_FreeValue(s->ctx, v1);
return res;
}
}
p = JS_VALUE_GET_STRING(v);
return string_buffer_concat(s, p, 0, p->len);
@ -3961,20 +4006,21 @@ static int memcmp16(const uint16_t *src1, const uint16_t *src2, int len)
return 0;
}
static int js_string_memcmp(const JSString *p1, const JSString *p2, int len)
static int js_string_memcmp(const JSString *p1, int pos1, const JSString *p2,
int pos2, int len)
{
int res;
if (likely(!p1->is_wide_char)) {
if (likely(!p2->is_wide_char))
res = memcmp(p1->u.str8, p2->u.str8, len);
res = memcmp(p1->u.str8 + pos1, p2->u.str8 + pos2, len);
else
res = -memcmp16_8(p2->u.str16, p1->u.str8, len);
res = -memcmp16_8(p2->u.str16 + pos2, p1->u.str8 + pos1, len);
} else {
if (!p2->is_wide_char)
res = memcmp16_8(p1->u.str16, p2->u.str8, len);
res = memcmp16_8(p1->u.str16 + pos1, p2->u.str8 + pos2, len);
else
res = memcmp16(p1->u.str16, p2->u.str16, len);
res = memcmp16(p1->u.str16 + pos1, p2->u.str16 + pos2, len);
}
return res;
}
@ -3985,7 +4031,7 @@ static int js_string_compare(JSContext *ctx,
{
int res, len;
len = min_int(p1->len, p2->len);
res = js_string_memcmp(p1, p2, len);
res = js_string_memcmp(p1, 0, p2, 0, len);
if (res == 0) {
if (p1->len == p2->len)
res = 0;
@ -4071,27 +4117,10 @@ static BOOL JS_ConcatStringInPlace(JSContext *ctx, JSString *p1, JSValueConst op
return FALSE;
}
/* op1 and op2 are converted to strings. For convenience, op1 or op2 =
JS_EXCEPTION are accepted and return JS_EXCEPTION. */
static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2)
static JSValue JS_ConcatString2(JSContext *ctx, JSValue op1, JSValue op2)
{
JSValue ret;
JSString *p1, *p2;
if (unlikely(JS_VALUE_GET_TAG(op1) != JS_TAG_STRING)) {
op1 = JS_ToStringFree(ctx, op1);
if (JS_IsException(op1)) {
JS_FreeValue(ctx, op2);
return JS_EXCEPTION;
}
}
if (unlikely(JS_VALUE_GET_TAG(op2) != JS_TAG_STRING)) {
op2 = JS_ToStringFree(ctx, op2);
if (JS_IsException(op2)) {
JS_FreeValue(ctx, op1);
return JS_EXCEPTION;
}
}
p1 = JS_VALUE_GET_STRING(op1);
if (JS_ConcatStringInPlace(ctx, p1, op2)) {
JS_FreeValue(ctx, op2);
@ -4104,6 +4133,402 @@ static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2)
return ret;
}
/* Return the character at position 'idx'. 'val' must be a string or rope */
static int string_rope_get(JSValueConst val, uint32_t idx)
{
if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) {
return string_get(JS_VALUE_GET_STRING(val), idx);
} else {
JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val);
uint32_t len;
if (JS_VALUE_GET_TAG(r->left) == JS_TAG_STRING)
len = JS_VALUE_GET_STRING(r->left)->len;
else
len = JS_VALUE_GET_STRING_ROPE(r->left)->len;
if (idx < len)
return string_rope_get(r->left, idx);
else
return string_rope_get(r->right, idx - len);
}
}
typedef struct {
JSValueConst stack[JS_STRING_ROPE_MAX_DEPTH];
int stack_len;
} JSStringRopeIter;
static void string_rope_iter_init(JSStringRopeIter *s, JSValueConst val)
{
s->stack_len = 0;
s->stack[s->stack_len++] = val;
}
/* iterate thru a rope and return the strings in order */
static JSString *string_rope_iter_next(JSStringRopeIter *s)
{
JSValueConst val;
JSStringRope *r;
if (s->stack_len == 0)
return NULL;
val = s->stack[--s->stack_len];
for(;;) {
if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING)
return JS_VALUE_GET_STRING(val);
r = JS_VALUE_GET_STRING_ROPE(val);
assert(s->stack_len < JS_STRING_ROPE_MAX_DEPTH);
s->stack[s->stack_len++] = r->right;
val = r->left;
}
}
static uint32_t string_rope_get_len(JSValueConst val)
{
if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING)
return JS_VALUE_GET_STRING(val)->len;
else
return JS_VALUE_GET_STRING_ROPE(val)->len;
}
static int js_string_rope_compare(JSContext *ctx, JSValueConst op1,
JSValueConst op2, BOOL eq_only)
{
uint32_t len1, len2, len, pos1, pos2, l;
int res;
JSStringRopeIter it1, it2;
JSString *p1, *p2;
len1 = string_rope_get_len(op1);
len2 = string_rope_get_len(op2);
/* no need to go further for equality test if
different length */
if (eq_only && len1 != len2)
return 1;
len = min_uint32(len1, len2);
string_rope_iter_init(&it1, op1);
string_rope_iter_init(&it2, op2);
p1 = string_rope_iter_next(&it1);
p2 = string_rope_iter_next(&it2);
pos1 = 0;
pos2 = 0;
while (len != 0) {
l = min_uint32(p1->len - pos1, p2->len - pos2);
l = min_uint32(l, len);
res = js_string_memcmp(p1, pos1, p2, pos2, l);
if (res != 0)
return res;
len -= l;
pos1 += l;
if (pos1 >= p1->len) {
p1 = string_rope_iter_next(&it1);
pos1 = 0;
}
pos2 += l;
if (pos2 >= p2->len) {
p2 = string_rope_iter_next(&it2);
pos2 = 0;
}
}
if (len1 == len2)
res = 0;
else if (len1 < len2)
res = -1;
else
res = 1;
return res;
}
/* 'rope' must be a rope. return a string and modify the rope so that
it won't need to be linearized again. */
static JSValue js_linearize_string_rope(JSContext *ctx, JSValue rope)
{
StringBuffer b_s, *b = &b_s;
JSStringRope *r;
JSValue ret;
r = JS_VALUE_GET_STRING_ROPE(rope);
/* check whether it is already linearized */
if (JS_VALUE_GET_TAG(r->right) == JS_TAG_STRING &&
JS_VALUE_GET_STRING(r->right)->len == 0) {
ret = JS_DupValue(ctx, r->left);
JS_FreeValue(ctx, rope);
return ret;
}
if (string_buffer_init2(ctx, b, r->len, r->is_wide_char))
goto fail;
if (string_buffer_concat_value(b, rope))
goto fail;
ret = string_buffer_end(b);
if (r->header.ref_count > 1) {
/* update the rope so that it won't need to be linearized again */
JS_FreeValue(ctx, r->left);
JS_FreeValue(ctx, r->right);
r->left = JS_DupValue(ctx, ret);
r->right = JS_AtomToString(ctx, JS_ATOM_empty_string);
}
JS_FreeValue(ctx, rope);
return ret;
fail:
JS_FreeValue(ctx, rope);
return JS_EXCEPTION;
}
static JSValue js_rebalancee_string_rope(JSContext *ctx, JSValueConst rope);
/* op1 and op2 must be strings or string ropes */
static JSValue js_new_string_rope(JSContext *ctx, JSValue op1, JSValue op2)
{
uint32_t len;
int is_wide_char, depth;
JSStringRope *r;
JSValue res;
if (JS_VALUE_GET_TAG(op1) == JS_TAG_STRING) {
JSString *p1 = JS_VALUE_GET_STRING(op1);
len = p1->len;
is_wide_char = p1->is_wide_char;
depth = 0;
} else {
JSStringRope *r1 = JS_VALUE_GET_STRING_ROPE(op1);
len = r1->len;
is_wide_char = r1->is_wide_char;
depth = r1->depth;
}
if (JS_VALUE_GET_TAG(op2) == JS_TAG_STRING) {
JSString *p2 = JS_VALUE_GET_STRING(op2);
len += p2->len;
is_wide_char |= p2->is_wide_char;
} else {
JSStringRope *r2 = JS_VALUE_GET_STRING_ROPE(op2);
len += r2->len;
is_wide_char |= r2->is_wide_char;
depth = max_int(depth, r2->depth);
}
if (len > JS_STRING_LEN_MAX) {
JS_ThrowInternalError(ctx, "string too long");
goto fail;
}
r = js_malloc(ctx, sizeof(*r));
if (!r)
goto fail;
r->header.ref_count = 1;
r->len = len;
r->is_wide_char = is_wide_char;
r->depth = depth + 1;
r->left = op1;
r->right = op2;
res = JS_MKPTR(JS_TAG_STRING_ROPE, r);
if (r->depth > JS_STRING_ROPE_MAX_DEPTH) {
JSValue res2;
#ifdef DUMP_ROPE_REBALANCE
printf("rebalance: initial depth=%d\n", r->depth);
#endif
res2 = js_rebalancee_string_rope(ctx, res);
#ifdef DUMP_ROPE_REBALANCE
if (JS_VALUE_GET_TAG(res2) == JS_TAG_STRING_ROPE)
printf("rebalance: final depth=%d\n", JS_VALUE_GET_STRING_ROPE(res2)->depth);
#endif
JS_FreeValue(ctx, res);
return res2;
} else {
return res;
}
fail:
JS_FreeValue(ctx, op1);
JS_FreeValue(ctx, op2);
return JS_EXCEPTION;
}
#define ROPE_N_BUCKETS 44
/* Fibonacii numbers starting from F_2 */
static const uint32_t rope_bucket_len[ROPE_N_BUCKETS] = {
1, 2, 3, 5,
8, 13, 21, 34,
55, 89, 144, 233,
377, 610, 987, 1597,
2584, 4181, 6765, 10946,
17711, 28657, 46368, 75025,
121393, 196418, 317811, 514229,
832040, 1346269, 2178309, 3524578,
5702887, 9227465, 14930352, 24157817,
39088169, 63245986, 102334155, 165580141,
267914296, 433494437, 701408733, 1134903170, /* > JS_STRING_LEN_MAX */
};
static int js_rebalancee_string_rope_rec(JSContext *ctx, JSValue *buckets,
JSValueConst val)
{
if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) {
JSString *p = JS_VALUE_GET_STRING(val);
uint32_t len, i;
JSValue a, b;
len = p->len;
if (len == 0)
return 0; /* nothing to do */
/* find the bucket i so that rope_bucket_len[i] <= len <
rope_bucket_len[i + 1] and concatenate the ropes in the
buckets before */
a = JS_NULL;
i = 0;
while (len >= rope_bucket_len[i + 1]) {
b = buckets[i];
if (!JS_IsNull(b)) {
buckets[i] = JS_NULL;
if (JS_IsNull(a)) {
a = b;
} else {
a = js_new_string_rope(ctx, b, a);
if (JS_IsException(a))
return -1;
}
}
i++;
}
if (!JS_IsNull(a)) {
a = js_new_string_rope(ctx, a, JS_DupValue(ctx, val));
if (JS_IsException(a))
return -1;
} else {
a = JS_DupValue(ctx, val);
}
while (!JS_IsNull(buckets[i])) {
a = js_new_string_rope(ctx, buckets[i], a);
buckets[i] = JS_NULL;
if (JS_IsException(a))
return -1;
i++;
}
buckets[i] = a;
} else {
JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val);
js_rebalancee_string_rope_rec(ctx, buckets, r->left);
js_rebalancee_string_rope_rec(ctx, buckets, r->right);
}
return 0;
}
/* Return a new rope which is balanced. Algorithm from "Ropes: an
Alternative to Strings", Hans-J. Boehm, Russ Atkinson and Michael
Plass. */
static JSValue js_rebalancee_string_rope(JSContext *ctx, JSValueConst rope)
{
JSValue buckets[ROPE_N_BUCKETS], a, b;
int i;
for(i = 0; i < ROPE_N_BUCKETS; i++)
buckets[i] = JS_NULL;
if (js_rebalancee_string_rope_rec(ctx, buckets, rope))
goto fail;
a = JS_NULL;
for(i = 0; i < ROPE_N_BUCKETS; i++) {
b = buckets[i];
if (!JS_IsNull(b)) {
buckets[i] = JS_NULL;
if (JS_IsNull(a)) {
a = b;
} else {
a = js_new_string_rope(ctx, b, a);
if (JS_IsException(a))
goto fail;
}
}
}
/* fail safe */
if (JS_IsNull(a))
return JS_AtomToString(ctx, JS_ATOM_empty_string);
else
return a;
fail:
for(i = 0; i < ROPE_N_BUCKETS; i++) {
JS_FreeValue(ctx, buckets[i]);
}
return JS_EXCEPTION;
}
/* op1 and op2 are converted to strings. For convenience, op1 or op2 =
JS_EXCEPTION are accepted and return JS_EXCEPTION. */
static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2)
{
JSString *p1, *p2;
if (unlikely(JS_VALUE_GET_TAG(op1) != JS_TAG_STRING &&
JS_VALUE_GET_TAG(op1) != JS_TAG_STRING_ROPE)) {
op1 = JS_ToStringFree(ctx, op1);
if (JS_IsException(op1)) {
JS_FreeValue(ctx, op2);
return JS_EXCEPTION;
}
}
if (unlikely(JS_VALUE_GET_TAG(op2) != JS_TAG_STRING &&
JS_VALUE_GET_TAG(op2) != JS_TAG_STRING_ROPE)) {
op2 = JS_ToStringFree(ctx, op2);
if (JS_IsException(op2)) {
JS_FreeValue(ctx, op1);
return JS_EXCEPTION;
}
}
/* normal concatenation for short strings */
if (JS_VALUE_GET_TAG(op2) == JS_TAG_STRING) {
p2 = JS_VALUE_GET_STRING(op2);
if (p2->len == 0) {
JS_FreeValue(ctx, op2);
return op1;
}
if (p2->len <= JS_STRING_ROPE_SHORT_LEN) {
if (JS_VALUE_GET_TAG(op1) == JS_TAG_STRING) {
p1 = JS_VALUE_GET_STRING(op1);
if (p1->len <= JS_STRING_ROPE_SHORT2_LEN) {
return JS_ConcatString2(ctx, op1, op2);
} else {
return js_new_string_rope(ctx, op1, op2);
}
} else {
JSStringRope *r1;
r1 = JS_VALUE_GET_STRING_ROPE(op1);
if (JS_VALUE_GET_TAG(r1->right) == JS_TAG_STRING &&
JS_VALUE_GET_STRING(r1->right)->len <= JS_STRING_ROPE_SHORT_LEN) {
JSValue val, ret;
val = JS_ConcatString2(ctx, JS_DupValue(ctx, r1->right), op2);
if (JS_IsException(val)) {
JS_FreeValue(ctx, op1);
return JS_EXCEPTION;
}
ret = js_new_string_rope(ctx, JS_DupValue(ctx, r1->left), val);
JS_FreeValue(ctx, op1);
return ret;
}
}
}
} else if (JS_VALUE_GET_TAG(op1) == JS_TAG_STRING) {
JSStringRope *r2;
p1 = JS_VALUE_GET_STRING(op1);
if (p1->len == 0) {
JS_FreeValue(ctx, op1);
return op2;
}
r2 = JS_VALUE_GET_STRING_ROPE(op2);
if (JS_VALUE_GET_TAG(r2->left) == JS_TAG_STRING &&
JS_VALUE_GET_STRING(r2->left)->len <= JS_STRING_ROPE_SHORT_LEN) {
JSValue val, ret;
val = JS_ConcatString2(ctx, op1, JS_DupValue(ctx, r2->left));
if (JS_IsException(val)) {
JS_FreeValue(ctx, op2);
return JS_EXCEPTION;
}
ret = js_new_string_rope(ctx, val, JS_DupValue(ctx, r2->right));
JS_FreeValue(ctx, op2);
return ret;
}
}
return js_new_string_rope(ctx, op1, op2);
}
/* Shape support */
static inline size_t get_shape_size(size_t hash_size, size_t prop_size)
@ -4757,7 +5182,9 @@ static int JS_SetObjectData(JSContext *ctx, JSValueConst obj, JSValue val)
case JS_CLASS_DATE:
case JS_CLASS_BIG_INT:
JS_FreeValue(ctx, p->u.object_data);
p->u.object_data = val;
p->u.object_data = val; /* for JS_CLASS_STRING, 'val' must
be JS_TAG_STRING (and not a
rope) */
return 0;
}
}
@ -5376,6 +5803,15 @@ void __JS_FreeValueRT(JSRuntime *rt, JSValue v)
}
}
break;
case JS_TAG_STRING_ROPE:
/* Note: recursion is acceptable because the rope depth is bounded */
{
JSStringRope *p = JS_VALUE_GET_STRING_ROPE(v);
JS_FreeValueRT(rt, p->left);
JS_FreeValueRT(rt, p->right);
js_free_rt(rt, p);
}
break;
case JS_TAG_OBJECT:
case JS_TAG_FUNCTION_BYTECODE:
{
@ -6750,6 +7186,7 @@ static JSValueConst JS_GetPrototypePrimitive(JSContext *ctx, JSValueConst val)
val = ctx->class_proto[JS_CLASS_BOOLEAN];
break;
case JS_TAG_STRING:
case JS_TAG_STRING_ROPE:
val = ctx->class_proto[JS_CLASS_STRING];
break;
case JS_TAG_SYMBOL:
@ -6949,14 +7386,24 @@ JSValue JS_GetPropertyInternal(JSContext *ctx, JSValueConst obj,
{
JSString *p1 = JS_VALUE_GET_STRING(obj);
if (__JS_AtomIsTaggedInt(prop)) {
uint32_t idx, ch;
uint32_t idx;
idx = __JS_AtomToUInt32(prop);
if (idx < p1->len) {
if (p1->is_wide_char)
ch = p1->u.str16[idx];
else
ch = p1->u.str8[idx];
return js_new_string_char(ctx, ch);
return js_new_string_char(ctx, string_get(p1, idx));
}
} else if (prop == JS_ATOM_length) {
return JS_NewInt32(ctx, p1->len);
}
}
break;
case JS_TAG_STRING_ROPE:
{
JSStringRope *p1 = JS_VALUE_GET_STRING_ROPE(obj);
if (__JS_AtomIsTaggedInt(prop)) {
uint32_t idx;
idx = __JS_AtomToUInt32(prop);
if (idx < p1->len) {
return js_new_string_char(ctx, string_rope_get(obj, idx));
}
} else if (prop == JS_ATOM_length) {
return JS_NewInt32(ctx, p1->len);
@ -7263,13 +7710,12 @@ static uint32_t js_string_obj_get_length(JSContext *ctx,
JSValueConst obj)
{
JSObject *p;
JSString *p1;
uint32_t len = 0;
/* This is a class exotic method: obj class_id is JS_CLASS_STRING */
p = JS_VALUE_GET_OBJ(obj);
if (JS_VALUE_GET_TAG(p->u.object_data) == JS_TAG_STRING) {
p1 = JS_VALUE_GET_STRING(p->u.object_data);
JSString *p1 = JS_VALUE_GET_STRING(p->u.object_data);
len = p1->len;
}
return len;
@ -9768,6 +10214,12 @@ static int JS_ToBoolFree(JSContext *ctx, JSValue val)
JS_FreeValue(ctx, val);
return ret;
}
case JS_TAG_STRING_ROPE:
{
BOOL ret = JS_VALUE_GET_STRING_ROPE(val)->len != 0;
JS_FreeValue(ctx, val);
return ret;
}
case JS_TAG_SHORT_BIG_INT:
return JS_VALUE_GET_SHORT_BIG_INT(val) != 0;
case JS_TAG_BIG_INT:
@ -11558,6 +12010,7 @@ static JSValue JS_ToNumberHintFree(JSContext *ctx, JSValue val,
return JS_EXCEPTION;
goto redo;
case JS_TAG_STRING:
case JS_TAG_STRING_ROPE:
{
const char *str;
const char *p;
@ -12164,7 +12617,7 @@ static JSValue js_dtoa2(JSContext *ctx,
return res;
}
JSValue JS_ToStringInternal(JSContext *ctx, JSValueConst val, BOOL is_ToPropertyKey)
static JSValue JS_ToStringInternal(JSContext *ctx, JSValueConst val, BOOL is_ToPropertyKey)
{
uint32_t tag;
const char *str;
@ -12174,6 +12627,8 @@ JSValue JS_ToStringInternal(JSContext *ctx, JSValueConst val, BOOL is_ToProperty
switch(tag) {
case JS_TAG_STRING:
return JS_DupValue(ctx, val);
case JS_TAG_STRING_ROPE:
return js_linearize_string_rope(ctx, JS_DupValue(ctx, val));
case JS_TAG_INT:
{
int len;
@ -12532,6 +12987,12 @@ static __maybe_unused void JS_DumpValueShort(JSRuntime *rt,
JS_DumpString(rt, p);
}
break;
case JS_TAG_STRING_ROPE:
{
JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val);
printf("[rope len=%d depth=%d]", r->len, r->depth);
}
break;
case JS_TAG_FUNCTION_BYTECODE:
{
JSFunctionBytecode *b = JS_VALUE_GET_PTR(val);
@ -12692,6 +13153,7 @@ static JSValue JS_ToBigIntFree(JSContext *ctx, JSValue val)
val = __JS_NewShortBigInt(ctx, JS_VALUE_GET_INT(val));
break;
case JS_TAG_STRING:
case JS_TAG_STRING_ROPE:
val = JS_StringToBigIntErr(ctx, val);
if (JS_IsException(val))
return val;
@ -13117,6 +13579,11 @@ static no_inline __exception int js_binary_arith_slow(JSContext *ctx, JSValue *s
return -1;
}
static inline BOOL tag_is_string(uint32_t tag)
{
return tag == JS_TAG_STRING || tag == JS_TAG_STRING_ROPE;
}
static no_inline __exception int js_add_slow(JSContext *ctx, JSValue *sp)
{
JSValue op1, op2;
@ -13169,7 +13636,7 @@ static no_inline __exception int js_add_slow(JSContext *ctx, JSValue *sp)
tag2 = JS_VALUE_GET_NORM_TAG(op2);
}
if (tag1 == JS_TAG_STRING || tag2 == JS_TAG_STRING) {
if (tag_is_string(tag1) || tag_is_string(tag2)) {
sp[-2] = JS_ConcatString(ctx, op1, op2);
if (JS_IsException(sp[-2]))
goto exception;
@ -13511,11 +13978,13 @@ static no_inline int js_relational_slow(JSContext *ctx, JSValue *sp,
tag1 = JS_VALUE_GET_NORM_TAG(op1);
tag2 = JS_VALUE_GET_NORM_TAG(op2);
if (tag1 == JS_TAG_STRING && tag2 == JS_TAG_STRING) {
JSString *p1, *p2;
p1 = JS_VALUE_GET_STRING(op1);
p2 = JS_VALUE_GET_STRING(op2);
res = js_string_compare(ctx, p1, p2);
if (tag_is_string(tag1) && tag_is_string(tag2)) {
if (tag1 == JS_TAG_STRING && tag2 == JS_TAG_STRING) {
res = js_string_compare(ctx, JS_VALUE_GET_STRING(op1),
JS_VALUE_GET_STRING(op2));
} else {
res = js_string_rope_compare(ctx, op1, op2, FALSE);
}
switch(op) {
case OP_lt:
res = (res < 0);
@ -13539,16 +14008,16 @@ static no_inline int js_relational_slow(JSContext *ctx, JSValue *sp,
goto float64_compare;
} else {
if ((((tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT) &&
tag2 == JS_TAG_STRING) ||
tag_is_string(tag2)) ||
((tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT) &&
tag1 == JS_TAG_STRING))) {
if (tag1 == JS_TAG_STRING) {
tag_is_string(tag1)))) {
if (tag_is_string(tag1)) {
op1 = JS_StringToBigInt(ctx, op1);
if (JS_VALUE_GET_TAG(op1) != JS_TAG_BIG_INT &&
JS_VALUE_GET_TAG(op1) != JS_TAG_SHORT_BIG_INT)
goto invalid_bigint_string;
}
if (tag2 == JS_TAG_STRING) {
if (tag_is_string(tag2)) {
op2 = JS_StringToBigInt(ctx, op2);
if (JS_VALUE_GET_TAG(op2) != JS_TAG_BIG_INT &&
JS_VALUE_GET_TAG(op2) != JS_TAG_SHORT_BIG_INT) {
@ -13665,18 +14134,21 @@ static no_inline __exception int js_eq_slow(JSContext *ctx, JSValue *sp,
} else if ((tag1 == JS_TAG_NULL && tag2 == JS_TAG_UNDEFINED) ||
(tag2 == JS_TAG_NULL && tag1 == JS_TAG_UNDEFINED)) {
res = TRUE;
} else if ((tag1 == JS_TAG_STRING && tag_is_number(tag2)) ||
(tag2 == JS_TAG_STRING && tag_is_number(tag1))) {
} else if (tag_is_string(tag1) && tag_is_string(tag2)) {
/* needed when comparing strings and ropes */
res = js_strict_eq2(ctx, op1, op2, JS_EQ_STRICT);
} else if ((tag_is_string(tag1) && tag_is_number(tag2)) ||
(tag_is_string(tag2) && tag_is_number(tag1))) {
if (tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT ||
tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT) {
if (tag1 == JS_TAG_STRING) {
if (tag_is_string(tag1)) {
op1 = JS_StringToBigInt(ctx, op1);
if (JS_VALUE_GET_TAG(op1) != JS_TAG_BIG_INT &&
JS_VALUE_GET_TAG(op1) != JS_TAG_SHORT_BIG_INT)
goto invalid_bigint_string;
}
if (tag2 == JS_TAG_STRING) {
if (tag_is_string(tag2)) {
op2 = JS_StringToBigInt(ctx, op2);
if (JS_VALUE_GET_TAG(op2) != JS_TAG_BIG_INT &&
JS_VALUE_GET_TAG(op2) != JS_TAG_SHORT_BIG_INT ) {
@ -13707,9 +14179,9 @@ static no_inline __exception int js_eq_slow(JSContext *ctx, JSValue *sp,
op2 = JS_NewInt32(ctx, JS_VALUE_GET_INT(op2));
goto redo;
} else if ((tag1 == JS_TAG_OBJECT &&
(tag_is_number(tag2) || tag2 == JS_TAG_STRING || tag2 == JS_TAG_SYMBOL)) ||
(tag_is_number(tag2) || tag_is_string(tag2) || tag2 == JS_TAG_SYMBOL)) ||
(tag2 == JS_TAG_OBJECT &&
(tag_is_number(tag1) || tag1 == JS_TAG_STRING || tag1 == JS_TAG_SYMBOL))) {
(tag_is_number(tag1) || tag_is_string(tag1) || tag1 == JS_TAG_SYMBOL))) {
op1 = JS_ToPrimitiveFree(ctx, op1, HINT_NONE);
if (JS_IsException(op1)) {
JS_FreeValue(ctx, op2);
@ -13805,14 +14277,15 @@ static BOOL js_strict_eq2(JSContext *ctx, JSValue op1, JSValue op2,
res = (tag1 == tag2);
break;
case JS_TAG_STRING:
case JS_TAG_STRING_ROPE:
{
JSString *p1, *p2;
if (tag1 != tag2) {
if (!tag_is_string(tag2)) {
res = FALSE;
} else if (tag1 == JS_TAG_STRING && tag2 == JS_TAG_STRING) {
res = (js_string_compare(ctx, JS_VALUE_GET_STRING(op1),
JS_VALUE_GET_STRING(op2)) == 0);
} else {
p1 = JS_VALUE_GET_STRING(op1);
p2 = JS_VALUE_GET_STRING(op2);
res = (js_string_compare(ctx, p1, p2) == 0);
res = (js_string_rope_compare(ctx, op1, op2, TRUE) == 0);
}
}
break;
@ -14070,6 +14543,7 @@ static __exception int js_operator_typeof(JSContext *ctx, JSValueConst op1)
atom = JS_ATOM_boolean;
break;
case JS_TAG_STRING:
case JS_TAG_STRING_ROPE:
atom = JS_ATOM_string;
break;
case JS_TAG_OBJECT:
@ -34537,6 +35011,16 @@ static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj)
JS_WriteString(s, p);
}
break;
case JS_TAG_STRING_ROPE:
{
JSValue str;
str = JS_ToString(s->ctx, obj);
if (JS_IsException(str))
goto fail;
JS_WriteObjectRec(s, str);
JS_FreeValue(s->ctx, str);
}
break;
case JS_TAG_FUNCTION_BYTECODE:
if (!s->allow_bytecode)
goto invalid_tag;
@ -36141,13 +36625,22 @@ static JSValue JS_ToObject(JSContext *ctx, JSValueConst val)
obj = JS_NewObjectClass(ctx, JS_CLASS_NUMBER);
goto set_value;
case JS_TAG_STRING:
case JS_TAG_STRING_ROPE:
/* XXX: should call the string constructor */
{
JSString *p1 = JS_VALUE_GET_STRING(val);
JSValue str;
str = JS_ToString(ctx, val); /* ensure that we never store a rope */
if (JS_IsException(str))
return JS_EXCEPTION;
obj = JS_NewObjectClass(ctx, JS_CLASS_STRING);
JS_DefinePropertyValue(ctx, obj, JS_ATOM_length, JS_NewInt32(ctx, p1->len), 0);
if (!JS_IsException(obj)) {
JS_DefinePropertyValue(ctx, obj, JS_ATOM_length,
JS_NewInt32(ctx, JS_VALUE_GET_STRING(str)->len), 0);
JS_SetObjectData(ctx, obj, JS_DupValue(ctx, str));
}
JS_FreeValue(ctx, str);
return obj;
}
goto set_value;
case JS_TAG_BOOL:
obj = JS_NewObjectClass(ctx, JS_CLASS_BOOLEAN);
goto set_value;
@ -40415,7 +40908,8 @@ static JSValue js_string_constructor(JSContext *ctx, JSValueConst new_target,
static JSValue js_thisStringValue(JSContext *ctx, JSValueConst this_val)
{
if (JS_VALUE_GET_TAG(this_val) == JS_TAG_STRING)
if (JS_VALUE_GET_TAG(this_val) == JS_TAG_STRING ||
JS_VALUE_GET_TAG(this_val) == JS_TAG_STRING_ROPE)
return JS_DupValue(ctx, this_val);
if (JS_VALUE_GET_TAG(this_val) == JS_TAG_OBJECT) {
@ -44334,6 +44828,7 @@ static JSValue js_json_check(JSContext *ctx, JSONStringifyContext *jsc,
if (JS_IsFunction(ctx, val))
break;
case JS_TAG_STRING:
case JS_TAG_STRING_ROPE:
case JS_TAG_INT:
case JS_TAG_FLOAT64:
case JS_TAG_BOOL:
@ -44508,6 +45003,7 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
concat_primitive:
switch (JS_VALUE_GET_NORM_TAG(val)) {
case JS_TAG_STRING:
case JS_TAG_STRING_ROPE:
val = JS_ToQuotedStringFree(ctx, val);
if (JS_IsException(val))
goto exception;
@ -46115,6 +46611,9 @@ static uint32_t map_hash_key(JSValueConst key)
case JS_TAG_STRING:
h = hash_string(JS_VALUE_GET_STRING(key), 0);
break;
case JS_TAG_STRING_ROPE:
h = hash_string_rope(key, 0);
break;
case JS_TAG_OBJECT:
case JS_TAG_SYMBOL:
h = (uintptr_t)JS_VALUE_GET_PTR(key) * 3163;
@ -49698,6 +50197,7 @@ static JSValue JS_ToBigIntCtorFree(JSContext *ctx, JSValue val)
}
break;
case JS_TAG_STRING:
case JS_TAG_STRING_ROPE:
val = JS_StringToBigIntErr(ctx, val);
break;
case JS_TAG_OBJECT:

View File

@ -48,7 +48,6 @@ extern "C" {
typedef struct JSRuntime JSRuntime;
typedef struct JSContext JSContext;
typedef struct JSObject JSObject;
typedef struct JSClass JSClass;
typedef uint32_t JSClassID;
typedef uint32_t JSAtom;
@ -78,6 +77,7 @@ enum {
JS_TAG_BIG_INT = -9,
JS_TAG_SYMBOL = -8,
JS_TAG_STRING = -7,
JS_TAG_STRING_ROPE = -6,
JS_TAG_MODULE = -3, /* used internally */
JS_TAG_FUNCTION_BYTECODE = -2, /* used internally */
JS_TAG_OBJECT = -1,
@ -280,8 +280,6 @@ static inline JSValue __JS_NewShortBigInt(JSContext *ctx, int64_t d)
#define JS_VALUE_IS_BOTH_INT(v1, v2) ((JS_VALUE_GET_TAG(v1) | JS_VALUE_GET_TAG(v2)) == 0)
#define JS_VALUE_IS_BOTH_FLOAT(v1, v2) (JS_TAG_IS_FLOAT64(JS_VALUE_GET_TAG(v1)) && JS_TAG_IS_FLOAT64(JS_VALUE_GET_TAG(v2)))
#define JS_VALUE_GET_OBJ(v) ((JSObject *)JS_VALUE_GET_PTR(v))
#define JS_VALUE_GET_STRING(v) ((JSString *)JS_VALUE_GET_PTR(v))
#define JS_VALUE_HAS_REF_COUNT(v) ((unsigned)JS_VALUE_GET_TAG(v) >= (unsigned)JS_TAG_FIRST)
/* special values */

View File

@ -957,6 +957,32 @@ function string_build4(n)
return n * 1000;
}
/* append */
function string_build_large1(n)
{
var i, j, r, len = 20000;
for(j = 0; j < n; j++) {
r = "";
for(i = 0; i < len; i++)
r += "abcdef";
global_res = r;
}
return n * len;
}
/* prepend */
function string_build_large2(n)
{
var i, j, r, len = 20000;
for(j = 0; j < n; j++) {
r = "";
for(i = 0; i < len; i++)
r = "abcdef" + r;
global_res = r;
}
return n * len;
}
/* sort bench */
function sort_bench(text) {
@ -1336,6 +1362,8 @@ function main(argc, argv, g)
string_build2,
string_build3,
string_build4,
string_build_large1,
string_build_large2,
int_to_string,
int_toString,
float_to_string,

View File

@ -864,6 +864,32 @@ function test_generator()
assert(v.value === 6 && v.done === true);
}
function rope_concat(n, dir)
{
var i, s;
s = "";
if (dir > 0) {
for(i = 0; i < n; i++)
s += String.fromCharCode(i & 0xffff);
} else {
for(i = n - 1; i >= 0; i--)
s = String.fromCharCode(i & 0xffff) + s;
}
for(i = 0; i < n; i++) {
/* test before the assert to go faster */
if (s.charCodeAt(i) != (i & 0xffff)) {
assert(s.charCodeAt(i), i & 0xffff);
}
}
}
function test_rope()
{
rope_concat(100000, 1);
rope_concat(100000, -1);
}
test();
test_function();
test_enum();
@ -880,3 +906,4 @@ test_symbol();
test_map();
test_weak_map();
test_generator();
test_rope();