mirror of
https://github.com/bellard/quickjs.git
synced 2025-05-09 18:15:43 +08:00
fixed CONFIG_ALL_UNICODE compilation
This commit is contained in:
parent
9bd10d8eb9
commit
1be68b3345
402
libunicode.c
402
libunicode.c
@ -537,6 +537,207 @@ int cr_invert(CharRange *cr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define CASE_U (1 << 0)
|
||||
#define CASE_L (1 << 1)
|
||||
#define CASE_F (1 << 2)
|
||||
|
||||
/* use the case conversion table to generate range of characters.
|
||||
CASE_U: set char if modified by uppercasing,
|
||||
CASE_L: set char if modified by lowercasing,
|
||||
CASE_F: set char if modified by case folding,
|
||||
*/
|
||||
static int unicode_case1(CharRange *cr, int case_mask)
|
||||
{
|
||||
#define MR(x) (1 << RUN_TYPE_ ## x)
|
||||
const uint32_t tab_run_mask[3] = {
|
||||
MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) |
|
||||
MR(UF_D1_EXT) | MR(U_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
|
||||
|
||||
MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2),
|
||||
|
||||
MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
|
||||
};
|
||||
#undef MR
|
||||
uint32_t mask, v, code, type, len, i, idx;
|
||||
|
||||
if (case_mask == 0)
|
||||
return 0;
|
||||
mask = 0;
|
||||
for(i = 0; i < 3; i++) {
|
||||
if ((case_mask >> i) & 1)
|
||||
mask |= tab_run_mask[i];
|
||||
}
|
||||
for(idx = 0; idx < countof(case_conv_table1); idx++) {
|
||||
v = case_conv_table1[idx];
|
||||
type = (v >> (32 - 17 - 7 - 4)) & 0xf;
|
||||
code = v >> (32 - 17);
|
||||
len = (v >> (32 - 17 - 7)) & 0x7f;
|
||||
if ((mask >> type) & 1) {
|
||||
// printf("%d: type=%d %04x %04x\n", idx, type, code, code + len - 1);
|
||||
switch(type) {
|
||||
case RUN_TYPE_UL:
|
||||
if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
|
||||
goto def_case;
|
||||
code += ((case_mask & CASE_U) != 0);
|
||||
for(i = 0; i < len; i += 2) {
|
||||
if (cr_add_interval(cr, code + i, code + i + 1))
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case RUN_TYPE_LSU:
|
||||
if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
|
||||
goto def_case;
|
||||
if (!(case_mask & CASE_U)) {
|
||||
if (cr_add_interval(cr, code, code + 1))
|
||||
return -1;
|
||||
}
|
||||
if (cr_add_interval(cr, code + 1, code + 2))
|
||||
return -1;
|
||||
if (case_mask & CASE_U) {
|
||||
if (cr_add_interval(cr, code + 2, code + 3))
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
def_case:
|
||||
if (cr_add_interval(cr, code, code + len))
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int point_cmp(const void *p1, const void *p2, void *arg)
|
||||
{
|
||||
uint32_t v1 = *(uint32_t *)p1;
|
||||
uint32_t v2 = *(uint32_t *)p2;
|
||||
return (v1 > v2) - (v1 < v2);
|
||||
}
|
||||
|
||||
static void cr_sort_and_remove_overlap(CharRange *cr)
|
||||
{
|
||||
uint32_t start, end, start1, end1, i, j;
|
||||
|
||||
/* the resulting ranges are not necessarily sorted and may overlap */
|
||||
rqsort(cr->points, cr->len / 2, sizeof(cr->points[0]) * 2, point_cmp, NULL);
|
||||
j = 0;
|
||||
for(i = 0; i < cr->len; ) {
|
||||
start = cr->points[i];
|
||||
end = cr->points[i + 1];
|
||||
i += 2;
|
||||
while (i < cr->len) {
|
||||
start1 = cr->points[i];
|
||||
end1 = cr->points[i + 1];
|
||||
if (start1 > end) {
|
||||
/* |------|
|
||||
* |-------| */
|
||||
break;
|
||||
} else if (end1 <= end) {
|
||||
/* |------|
|
||||
* |--| */
|
||||
i += 2;
|
||||
} else {
|
||||
/* |------|
|
||||
* |-------| */
|
||||
end = end1;
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
cr->points[j] = start;
|
||||
cr->points[j + 1] = end;
|
||||
j += 2;
|
||||
}
|
||||
cr->len = j;
|
||||
}
|
||||
|
||||
/* canonicalize a character set using the JS regex case folding rules
|
||||
(see lre_canonicalize()) */
|
||||
int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode)
|
||||
{
|
||||
CharRange cr_inter, cr_mask, cr_result, cr_sub;
|
||||
uint32_t v, code, len, i, idx, start, end, c, d_start, d_end, d;
|
||||
|
||||
cr_init(&cr_mask, cr->mem_opaque, cr->realloc_func);
|
||||
cr_init(&cr_inter, cr->mem_opaque, cr->realloc_func);
|
||||
cr_init(&cr_result, cr->mem_opaque, cr->realloc_func);
|
||||
cr_init(&cr_sub, cr->mem_opaque, cr->realloc_func);
|
||||
|
||||
if (unicode_case1(&cr_mask, is_unicode ? CASE_F : CASE_U))
|
||||
goto fail;
|
||||
if (cr_op(&cr_inter, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
|
||||
goto fail;
|
||||
|
||||
if (cr_invert(&cr_mask))
|
||||
goto fail;
|
||||
if (cr_op(&cr_sub, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
|
||||
goto fail;
|
||||
|
||||
/* cr_inter = cr & cr_mask */
|
||||
/* cr_sub = cr & ~cr_mask */
|
||||
|
||||
/* use the case conversion table to compute the result */
|
||||
d_start = -1;
|
||||
d_end = -1;
|
||||
idx = 0;
|
||||
v = case_conv_table1[idx];
|
||||
code = v >> (32 - 17);
|
||||
len = (v >> (32 - 17 - 7)) & 0x7f;
|
||||
for(i = 0; i < cr_inter.len; i += 2) {
|
||||
start = cr_inter.points[i];
|
||||
end = cr_inter.points[i + 1];
|
||||
|
||||
for(c = start; c < end; c++) {
|
||||
for(;;) {
|
||||
if (c >= code && c < code + len)
|
||||
break;
|
||||
idx++;
|
||||
assert(idx < countof(case_conv_table1));
|
||||
v = case_conv_table1[idx];
|
||||
code = v >> (32 - 17);
|
||||
len = (v >> (32 - 17 - 7)) & 0x7f;
|
||||
}
|
||||
d = lre_case_folding_entry(c, idx, v, is_unicode);
|
||||
/* try to merge with the current interval */
|
||||
if (d_start == -1) {
|
||||
d_start = d;
|
||||
d_end = d + 1;
|
||||
} else if (d_end == d) {
|
||||
d_end++;
|
||||
} else {
|
||||
cr_add_interval(&cr_result, d_start, d_end);
|
||||
d_start = d;
|
||||
d_end = d + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (d_start != -1) {
|
||||
if (cr_add_interval(&cr_result, d_start, d_end))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* the resulting ranges are not necessarily sorted and may overlap */
|
||||
cr_sort_and_remove_overlap(&cr_result);
|
||||
|
||||
/* or with the character not affected by the case folding */
|
||||
cr->len = 0;
|
||||
if (cr_op(cr, cr_result.points, cr_result.len, cr_sub.points, cr_sub.len, CR_OP_UNION))
|
||||
goto fail;
|
||||
|
||||
cr_free(&cr_inter);
|
||||
cr_free(&cr_mask);
|
||||
cr_free(&cr_result);
|
||||
cr_free(&cr_sub);
|
||||
return 0;
|
||||
fail:
|
||||
cr_free(&cr_inter);
|
||||
cr_free(&cr_mask);
|
||||
cr_free(&cr_result);
|
||||
cr_free(&cr_sub);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ALL_UNICODE
|
||||
|
||||
BOOL lre_is_id_start(uint32_t c)
|
||||
@ -1296,207 +1497,6 @@ static int unicode_prop1(CharRange *cr, int prop_idx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define CASE_U (1 << 0)
|
||||
#define CASE_L (1 << 1)
|
||||
#define CASE_F (1 << 2)
|
||||
|
||||
/* use the case conversion table to generate range of characters.
|
||||
CASE_U: set char if modified by uppercasing,
|
||||
CASE_L: set char if modified by lowercasing,
|
||||
CASE_F: set char if modified by case folding,
|
||||
*/
|
||||
static int unicode_case1(CharRange *cr, int case_mask)
|
||||
{
|
||||
#define MR(x) (1 << RUN_TYPE_ ## x)
|
||||
const uint32_t tab_run_mask[3] = {
|
||||
MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) |
|
||||
MR(UF_D1_EXT) | MR(U_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
|
||||
|
||||
MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2),
|
||||
|
||||
MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
|
||||
};
|
||||
#undef MR
|
||||
uint32_t mask, v, code, type, len, i, idx;
|
||||
|
||||
if (case_mask == 0)
|
||||
return 0;
|
||||
mask = 0;
|
||||
for(i = 0; i < 3; i++) {
|
||||
if ((case_mask >> i) & 1)
|
||||
mask |= tab_run_mask[i];
|
||||
}
|
||||
for(idx = 0; idx < countof(case_conv_table1); idx++) {
|
||||
v = case_conv_table1[idx];
|
||||
type = (v >> (32 - 17 - 7 - 4)) & 0xf;
|
||||
code = v >> (32 - 17);
|
||||
len = (v >> (32 - 17 - 7)) & 0x7f;
|
||||
if ((mask >> type) & 1) {
|
||||
// printf("%d: type=%d %04x %04x\n", idx, type, code, code + len - 1);
|
||||
switch(type) {
|
||||
case RUN_TYPE_UL:
|
||||
if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
|
||||
goto def_case;
|
||||
code += ((case_mask & CASE_U) != 0);
|
||||
for(i = 0; i < len; i += 2) {
|
||||
if (cr_add_interval(cr, code + i, code + i + 1))
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case RUN_TYPE_LSU:
|
||||
if ((case_mask & CASE_U) && (case_mask & (CASE_L | CASE_F)))
|
||||
goto def_case;
|
||||
if (!(case_mask & CASE_U)) {
|
||||
if (cr_add_interval(cr, code, code + 1))
|
||||
return -1;
|
||||
}
|
||||
if (cr_add_interval(cr, code + 1, code + 2))
|
||||
return -1;
|
||||
if (case_mask & CASE_U) {
|
||||
if (cr_add_interval(cr, code + 2, code + 3))
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
def_case:
|
||||
if (cr_add_interval(cr, code, code + len))
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int point_cmp(const void *p1, const void *p2, void *arg)
|
||||
{
|
||||
uint32_t v1 = *(uint32_t *)p1;
|
||||
uint32_t v2 = *(uint32_t *)p2;
|
||||
return (v1 > v2) - (v1 < v2);
|
||||
}
|
||||
|
||||
static void cr_sort_and_remove_overlap(CharRange *cr)
|
||||
{
|
||||
uint32_t start, end, start1, end1, i, j;
|
||||
|
||||
/* the resulting ranges are not necessarily sorted and may overlap */
|
||||
rqsort(cr->points, cr->len / 2, sizeof(cr->points[0]) * 2, point_cmp, NULL);
|
||||
j = 0;
|
||||
for(i = 0; i < cr->len; ) {
|
||||
start = cr->points[i];
|
||||
end = cr->points[i + 1];
|
||||
i += 2;
|
||||
while (i < cr->len) {
|
||||
start1 = cr->points[i];
|
||||
end1 = cr->points[i + 1];
|
||||
if (start1 > end) {
|
||||
/* |------|
|
||||
* |-------| */
|
||||
break;
|
||||
} else if (end1 <= end) {
|
||||
/* |------|
|
||||
* |--| */
|
||||
i += 2;
|
||||
} else {
|
||||
/* |------|
|
||||
* |-------| */
|
||||
end = end1;
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
cr->points[j] = start;
|
||||
cr->points[j + 1] = end;
|
||||
j += 2;
|
||||
}
|
||||
cr->len = j;
|
||||
}
|
||||
|
||||
/* canonicalize a character set using the JS regex case folding rules
|
||||
(see lre_canonicalize()) */
|
||||
int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode)
|
||||
{
|
||||
CharRange cr_inter, cr_mask, cr_result, cr_sub;
|
||||
uint32_t v, code, len, i, idx, start, end, c, d_start, d_end, d;
|
||||
|
||||
cr_init(&cr_mask, cr->mem_opaque, cr->realloc_func);
|
||||
cr_init(&cr_inter, cr->mem_opaque, cr->realloc_func);
|
||||
cr_init(&cr_result, cr->mem_opaque, cr->realloc_func);
|
||||
cr_init(&cr_sub, cr->mem_opaque, cr->realloc_func);
|
||||
|
||||
if (unicode_case1(&cr_mask, is_unicode ? CASE_F : CASE_U))
|
||||
goto fail;
|
||||
if (cr_op(&cr_inter, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
|
||||
goto fail;
|
||||
|
||||
if (cr_invert(&cr_mask))
|
||||
goto fail;
|
||||
if (cr_op(&cr_sub, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
|
||||
goto fail;
|
||||
|
||||
/* cr_inter = cr & cr_mask */
|
||||
/* cr_sub = cr & ~cr_mask */
|
||||
|
||||
/* use the case conversion table to compute the result */
|
||||
d_start = -1;
|
||||
d_end = -1;
|
||||
idx = 0;
|
||||
v = case_conv_table1[idx];
|
||||
code = v >> (32 - 17);
|
||||
len = (v >> (32 - 17 - 7)) & 0x7f;
|
||||
for(i = 0; i < cr_inter.len; i += 2) {
|
||||
start = cr_inter.points[i];
|
||||
end = cr_inter.points[i + 1];
|
||||
|
||||
for(c = start; c < end; c++) {
|
||||
for(;;) {
|
||||
if (c >= code && c < code + len)
|
||||
break;
|
||||
idx++;
|
||||
assert(idx < countof(case_conv_table1));
|
||||
v = case_conv_table1[idx];
|
||||
code = v >> (32 - 17);
|
||||
len = (v >> (32 - 17 - 7)) & 0x7f;
|
||||
}
|
||||
d = lre_case_folding_entry(c, idx, v, is_unicode);
|
||||
/* try to merge with the current interval */
|
||||
if (d_start == -1) {
|
||||
d_start = d;
|
||||
d_end = d + 1;
|
||||
} else if (d_end == d) {
|
||||
d_end++;
|
||||
} else {
|
||||
cr_add_interval(&cr_result, d_start, d_end);
|
||||
d_start = d;
|
||||
d_end = d + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (d_start != -1) {
|
||||
if (cr_add_interval(&cr_result, d_start, d_end))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* the resulting ranges are not necessarily sorted and may overlap */
|
||||
cr_sort_and_remove_overlap(&cr_result);
|
||||
|
||||
/* or with the character not affected by the case folding */
|
||||
cr->len = 0;
|
||||
if (cr_op(cr, cr_result.points, cr_result.len, cr_sub.points, cr_sub.len, CR_OP_UNION))
|
||||
goto fail;
|
||||
|
||||
cr_free(&cr_inter);
|
||||
cr_free(&cr_mask);
|
||||
cr_free(&cr_result);
|
||||
cr_free(&cr_sub);
|
||||
return 0;
|
||||
fail:
|
||||
cr_free(&cr_inter);
|
||||
cr_free(&cr_mask);
|
||||
cr_free(&cr_result);
|
||||
cr_free(&cr_sub);
|
||||
return -1;
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
POP_GC,
|
||||
POP_PROP,
|
||||
|
Loading…
x
Reference in New Issue
Block a user