mirror of
				https://github.com/bellard/quickjs.git
				synced 2025-05-29 01:49:18 +08:00 
			
		
		
		
	added regexp modifiers
This commit is contained in:
		
							parent
							
								
									9c973a8923
								
							
						
					
					
						commit
						f95b8ba1bb
					
				@ -26,11 +26,15 @@
 | 
			
		||||
 | 
			
		||||
DEF(invalid, 1) /* never used */
 | 
			
		||||
DEF(char, 3)
 | 
			
		||||
DEF(char_i, 3)
 | 
			
		||||
DEF(char32, 5)
 | 
			
		||||
DEF(char32_i, 5)
 | 
			
		||||
DEF(dot, 1)
 | 
			
		||||
DEF(any, 1) /* same as dot but match any character including line terminator */
 | 
			
		||||
DEF(line_start, 1)
 | 
			
		||||
DEF(line_start_m, 1)
 | 
			
		||||
DEF(line_end, 1)
 | 
			
		||||
DEF(line_end_m, 1)
 | 
			
		||||
DEF(goto, 5)
 | 
			
		||||
DEF(split_goto_first, 5)
 | 
			
		||||
DEF(split_next_first, 5)
 | 
			
		||||
@ -42,11 +46,17 @@ DEF(loop, 5) /* decrement the top the stack and goto if != 0 */
 | 
			
		||||
DEF(push_i32, 5) /* push integer on the stack */
 | 
			
		||||
DEF(drop, 1)
 | 
			
		||||
DEF(word_boundary, 1)
 | 
			
		||||
DEF(word_boundary_i, 1)
 | 
			
		||||
DEF(not_word_boundary, 1)
 | 
			
		||||
DEF(not_word_boundary_i, 1)
 | 
			
		||||
DEF(back_reference, 2)
 | 
			
		||||
DEF(backward_back_reference, 2) /* must come after back_reference */
 | 
			
		||||
DEF(back_reference_i, 2) /* must come after */
 | 
			
		||||
DEF(backward_back_reference, 2) /* must come after */
 | 
			
		||||
DEF(backward_back_reference_i, 2) /* must come after */
 | 
			
		||||
DEF(range, 3) /* variable length */
 | 
			
		||||
DEF(range_i, 3) /* variable length */
 | 
			
		||||
DEF(range32, 3) /* variable length */
 | 
			
		||||
DEF(range32_i, 3) /* variable length */
 | 
			
		||||
DEF(lookahead, 5)
 | 
			
		||||
DEF(negative_lookahead, 5)
 | 
			
		||||
DEF(push_char_pos, 1) /* push the character position on the stack */
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										178
									
								
								libregexp.c
									
									
									
									
									
								
							
							
						
						
									
										178
									
								
								libregexp.c
									
									
									
									
									
								
							@ -73,6 +73,7 @@ typedef struct {
 | 
			
		||||
    BOOL is_unicode;
 | 
			
		||||
    BOOL unicode_sets; /* if set, is_unicode is also set */
 | 
			
		||||
    BOOL ignore_case;
 | 
			
		||||
    BOOL multi_line;
 | 
			
		||||
    BOOL dotall;
 | 
			
		||||
    int capture_count;
 | 
			
		||||
    int total_capture_count; /* -1 = not computed yet */
 | 
			
		||||
@ -499,6 +500,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
 | 
			
		||||
        printf("%s", reopcode_info[opcode].name);
 | 
			
		||||
        switch(opcode) {
 | 
			
		||||
        case REOP_char:
 | 
			
		||||
        case REOP_char_i:
 | 
			
		||||
            val = get_u16(buf + pos + 1);
 | 
			
		||||
            if (val >= ' ' && val <= 126)
 | 
			
		||||
                printf(" '%c'", val);
 | 
			
		||||
@ -506,6 +508,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
 | 
			
		||||
                printf(" 0x%04x", val);
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_char32:
 | 
			
		||||
        case REOP_char32_i:
 | 
			
		||||
            val = get_u32(buf + pos + 1);
 | 
			
		||||
            if (val >= ' ' && val <= 126)
 | 
			
		||||
                printf(" '%c'", val);
 | 
			
		||||
@ -532,7 +535,9 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
 | 
			
		||||
        case REOP_save_start:
 | 
			
		||||
        case REOP_save_end:
 | 
			
		||||
        case REOP_back_reference:
 | 
			
		||||
        case REOP_back_reference_i:
 | 
			
		||||
        case REOP_backward_back_reference:
 | 
			
		||||
        case REOP_backward_back_reference_i:
 | 
			
		||||
            printf(" %u", buf[pos + 1]);
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_save_reset:
 | 
			
		||||
@ -543,6 +548,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
 | 
			
		||||
            printf(" %d", val);
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_range:
 | 
			
		||||
        case REOP_range_i:
 | 
			
		||||
            {
 | 
			
		||||
                int n, i;
 | 
			
		||||
                n = get_u16(buf + pos + 1);
 | 
			
		||||
@ -554,6 +560,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_range32:
 | 
			
		||||
        case REOP_range32_i:
 | 
			
		||||
            {
 | 
			
		||||
                int n, i;
 | 
			
		||||
                n = get_u16(buf + pos + 1);
 | 
			
		||||
@ -1172,7 +1179,7 @@ static int re_emit_range(REParseState *s, const CharRange *cr)
 | 
			
		||||
        if (high <= 0xffff) {
 | 
			
		||||
            /* can use 16 bit ranges with the conversion that 0xffff =
 | 
			
		||||
               infinity */
 | 
			
		||||
            re_emit_op_u16(s, REOP_range, len);
 | 
			
		||||
            re_emit_op_u16(s, s->ignore_case ? REOP_range_i : REOP_range, len);
 | 
			
		||||
            for(i = 0; i < cr->len; i += 2) {
 | 
			
		||||
                dbuf_put_u16(&s->byte_code, cr->points[i]);
 | 
			
		||||
                high = cr->points[i + 1] - 1;
 | 
			
		||||
@ -1181,7 +1188,7 @@ static int re_emit_range(REParseState *s, const CharRange *cr)
 | 
			
		||||
                dbuf_put_u16(&s->byte_code, high);
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            re_emit_op_u16(s, REOP_range32, len);
 | 
			
		||||
            re_emit_op_u16(s, s->ignore_case ? REOP_range32_i : REOP_range32, len);
 | 
			
		||||
            for(i = 0; i < cr->len; i += 2) {
 | 
			
		||||
                dbuf_put_u32(&s->byte_code, cr->points[i]);
 | 
			
		||||
                dbuf_put_u32(&s->byte_code, cr->points[i + 1] - 1);
 | 
			
		||||
@ -1198,10 +1205,18 @@ static int re_string_cmp_len(const void *a, const void *b, void *arg)
 | 
			
		||||
    return (p1->len < p2->len) - (p1->len > p2->len);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void re_emit_char(REParseState *s, int c)
 | 
			
		||||
{
 | 
			
		||||
    if (c <= 0xffff)
 | 
			
		||||
        re_emit_op_u16(s, s->ignore_case ? REOP_char_i : REOP_char, c);
 | 
			
		||||
    else
 | 
			
		||||
        re_emit_op_u32(s, s->ignore_case ? REOP_char32_i : REOP_char32, c);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int re_emit_string_list(REParseState *s, const REStringList *sl)
 | 
			
		||||
{
 | 
			
		||||
    REString **tab, *p;
 | 
			
		||||
    int i, j, c, split_pos, last_match_pos, n;
 | 
			
		||||
    int i, j, split_pos, last_match_pos, n;
 | 
			
		||||
    BOOL has_empty_string, is_last;
 | 
			
		||||
    
 | 
			
		||||
    //    re_string_list_dump("sl", sl);
 | 
			
		||||
@ -1241,11 +1256,7 @@ static int re_emit_string_list(REParseState *s, const REStringList *sl)
 | 
			
		||||
            else
 | 
			
		||||
                split_pos = 0;
 | 
			
		||||
            for(j = 0; j < p->len; j++) {
 | 
			
		||||
                c = p->buf[j];
 | 
			
		||||
                if (c <= 0xffff)
 | 
			
		||||
                    re_emit_op_u16(s, REOP_char, c);
 | 
			
		||||
                else
 | 
			
		||||
                    re_emit_op_u32(s, REOP_char32, c);
 | 
			
		||||
                re_emit_char(s, p->buf[j]);
 | 
			
		||||
            }
 | 
			
		||||
            if (!is_last) {
 | 
			
		||||
                last_match_pos = re_emit_op_u32(s, REOP_goto, last_match_pos);
 | 
			
		||||
@ -1497,27 +1508,35 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
 | 
			
		||||
        len = reopcode_info[opcode].size;
 | 
			
		||||
        switch(opcode) {
 | 
			
		||||
        case REOP_range:
 | 
			
		||||
        case REOP_range_i:
 | 
			
		||||
            val = get_u16(bc_buf + pos + 1);
 | 
			
		||||
            len += val * 4;
 | 
			
		||||
            goto simple_char;
 | 
			
		||||
        case REOP_range32:
 | 
			
		||||
        case REOP_range32_i:
 | 
			
		||||
            val = get_u16(bc_buf + pos + 1);
 | 
			
		||||
            len += val * 8;
 | 
			
		||||
            goto simple_char;
 | 
			
		||||
        case REOP_char:
 | 
			
		||||
        case REOP_char_i:
 | 
			
		||||
        case REOP_char32:
 | 
			
		||||
        case REOP_char32_i:
 | 
			
		||||
        case REOP_dot:
 | 
			
		||||
        case REOP_any:
 | 
			
		||||
        simple_char:
 | 
			
		||||
            ret = FALSE;
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_line_start:
 | 
			
		||||
        case REOP_line_start_m:
 | 
			
		||||
        case REOP_line_end:
 | 
			
		||||
        case REOP_line_end_m:
 | 
			
		||||
        case REOP_push_i32:
 | 
			
		||||
        case REOP_push_char_pos:
 | 
			
		||||
        case REOP_drop:
 | 
			
		||||
        case REOP_word_boundary:
 | 
			
		||||
        case REOP_word_boundary_i:
 | 
			
		||||
        case REOP_not_word_boundary:
 | 
			
		||||
        case REOP_not_word_boundary_i:
 | 
			
		||||
        case REOP_prev:
 | 
			
		||||
            /* no effect */
 | 
			
		||||
            break;
 | 
			
		||||
@ -1525,7 +1544,9 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
 | 
			
		||||
        case REOP_save_end:
 | 
			
		||||
        case REOP_save_reset:
 | 
			
		||||
        case REOP_back_reference:
 | 
			
		||||
        case REOP_back_reference_i:
 | 
			
		||||
        case REOP_backward_back_reference:
 | 
			
		||||
        case REOP_backward_back_reference_i:
 | 
			
		||||
            break;
 | 
			
		||||
        default:
 | 
			
		||||
            /* safe behavior: we cannot predict the outcome */
 | 
			
		||||
@ -1550,24 +1571,32 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
 | 
			
		||||
        len = reopcode_info[opcode].size;
 | 
			
		||||
        switch(opcode) {
 | 
			
		||||
        case REOP_range:
 | 
			
		||||
        case REOP_range_i:
 | 
			
		||||
            val = get_u16(bc_buf + pos + 1);
 | 
			
		||||
            len += val * 4;
 | 
			
		||||
            goto simple_char;
 | 
			
		||||
        case REOP_range32:
 | 
			
		||||
        case REOP_range32_i:
 | 
			
		||||
            val = get_u16(bc_buf + pos + 1);
 | 
			
		||||
            len += val * 8;
 | 
			
		||||
            goto simple_char;
 | 
			
		||||
        case REOP_char:
 | 
			
		||||
        case REOP_char_i:
 | 
			
		||||
        case REOP_char32:
 | 
			
		||||
        case REOP_char32_i:
 | 
			
		||||
        case REOP_dot:
 | 
			
		||||
        case REOP_any:
 | 
			
		||||
        simple_char:
 | 
			
		||||
            count++;
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_line_start:
 | 
			
		||||
        case REOP_line_start_m:
 | 
			
		||||
        case REOP_line_end:
 | 
			
		||||
        case REOP_line_end_m:
 | 
			
		||||
        case REOP_word_boundary:
 | 
			
		||||
        case REOP_word_boundary_i:
 | 
			
		||||
        case REOP_not_word_boundary:
 | 
			
		||||
        case REOP_not_word_boundary_i:
 | 
			
		||||
            break;
 | 
			
		||||
        default:
 | 
			
		||||
            return -1;
 | 
			
		||||
@ -1725,6 +1754,41 @@ static int find_group_name(REParseState *s, const char *name)
 | 
			
		||||
 | 
			
		||||
static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir);
 | 
			
		||||
 | 
			
		||||
static int re_parse_modifiers(REParseState *s, const uint8_t **pp)
 | 
			
		||||
{
 | 
			
		||||
    const uint8_t *p = *pp;
 | 
			
		||||
    int mask = 0;
 | 
			
		||||
    int val;
 | 
			
		||||
 | 
			
		||||
    for(;;) {
 | 
			
		||||
        if (*p == 'i') {
 | 
			
		||||
            val = LRE_FLAG_IGNORECASE;
 | 
			
		||||
        } else if (*p == 'm') {
 | 
			
		||||
            val = LRE_FLAG_MULTILINE;
 | 
			
		||||
        } else if (*p == 's') {
 | 
			
		||||
            val = LRE_FLAG_DOTALL;
 | 
			
		||||
        } else {
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
        if (mask & val)
 | 
			
		||||
            return re_parse_error(s, "duplicate modifier: '%c'", *p);
 | 
			
		||||
        mask |= val;
 | 
			
		||||
        p++;
 | 
			
		||||
    }
 | 
			
		||||
    *pp = p;
 | 
			
		||||
    return mask;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static BOOL update_modifier(BOOL val, int add_mask, int remove_mask,
 | 
			
		||||
                            int mask)
 | 
			
		||||
{
 | 
			
		||||
    if (add_mask & mask)
 | 
			
		||||
        val = TRUE;
 | 
			
		||||
    if (remove_mask & mask)
 | 
			
		||||
        val = FALSE;
 | 
			
		||||
    return val;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int re_parse_term(REParseState *s, BOOL is_backward_dir)
 | 
			
		||||
{
 | 
			
		||||
    const uint8_t *p;
 | 
			
		||||
@ -1739,11 +1803,11 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
 | 
			
		||||
    switch(c) {
 | 
			
		||||
    case '^':
 | 
			
		||||
        p++;
 | 
			
		||||
        re_emit_op(s, REOP_line_start);
 | 
			
		||||
        re_emit_op(s, s->multi_line ? REOP_line_start_m : REOP_line_start);
 | 
			
		||||
        break;
 | 
			
		||||
    case '$':
 | 
			
		||||
        p++;
 | 
			
		||||
        re_emit_op(s, REOP_line_end);
 | 
			
		||||
        re_emit_op(s, s->multi_line ? REOP_line_end_m : REOP_line_end);
 | 
			
		||||
        break;
 | 
			
		||||
    case '.':
 | 
			
		||||
        p++;
 | 
			
		||||
@ -1793,6 +1857,44 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
 | 
			
		||||
                p = s->buf_ptr;
 | 
			
		||||
                if (re_parse_expect(s, &p, ')'))
 | 
			
		||||
                    return -1;
 | 
			
		||||
            } else if (p[2] == 'i' || p[2] == 'm' || p[2] == 's' || p[2] == '-') {
 | 
			
		||||
                BOOL saved_ignore_case, saved_multi_line, saved_dotall;
 | 
			
		||||
                int add_mask, remove_mask;
 | 
			
		||||
                p += 2;
 | 
			
		||||
                remove_mask = 0;
 | 
			
		||||
                add_mask = re_parse_modifiers(s, &p);
 | 
			
		||||
                if (add_mask < 0)
 | 
			
		||||
                    return -1;
 | 
			
		||||
                if (*p == '-') {
 | 
			
		||||
                    p++;
 | 
			
		||||
                    remove_mask = re_parse_modifiers(s, &p);
 | 
			
		||||
                    if (remove_mask < 0)
 | 
			
		||||
                        return -1;
 | 
			
		||||
                }
 | 
			
		||||
                if ((add_mask == 0 && remove_mask == 0) ||
 | 
			
		||||
                    (add_mask & remove_mask) != 0) {
 | 
			
		||||
                    return re_parse_error(s, "invalid modifiers");
 | 
			
		||||
                }
 | 
			
		||||
                if (re_parse_expect(s, &p, ':'))
 | 
			
		||||
                    return -1;
 | 
			
		||||
                saved_ignore_case = s->ignore_case;
 | 
			
		||||
                saved_multi_line = s->multi_line;
 | 
			
		||||
                saved_dotall = s->dotall;
 | 
			
		||||
                s->ignore_case = update_modifier(s->ignore_case, add_mask, remove_mask, LRE_FLAG_IGNORECASE);
 | 
			
		||||
                s->multi_line = update_modifier(s->multi_line, add_mask, remove_mask, LRE_FLAG_MULTILINE);
 | 
			
		||||
                s->dotall = update_modifier(s->dotall, add_mask, remove_mask, LRE_FLAG_DOTALL);
 | 
			
		||||
 | 
			
		||||
                last_atom_start = s->byte_code.size;
 | 
			
		||||
                last_capture_count = s->capture_count;
 | 
			
		||||
                s->buf_ptr = p;
 | 
			
		||||
                if (re_parse_disjunction(s, is_backward_dir))
 | 
			
		||||
                    return -1;
 | 
			
		||||
                p = s->buf_ptr;
 | 
			
		||||
                if (re_parse_expect(s, &p, ')'))
 | 
			
		||||
                    return -1;
 | 
			
		||||
                s->ignore_case = saved_ignore_case;
 | 
			
		||||
                s->multi_line = saved_multi_line;
 | 
			
		||||
                s->dotall = saved_dotall;
 | 
			
		||||
            } else if ((p[2] == '=' || p[2] == '!')) {
 | 
			
		||||
                is_neg = (p[2] == '!');
 | 
			
		||||
                is_backward_lookahead = FALSE;
 | 
			
		||||
@ -1871,7 +1973,11 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
 | 
			
		||||
        switch(p[1]) {
 | 
			
		||||
        case 'b':
 | 
			
		||||
        case 'B':
 | 
			
		||||
            re_emit_op(s, REOP_word_boundary + (p[1] != 'b'));
 | 
			
		||||
            if (p[1] != 'b') {
 | 
			
		||||
                re_emit_op(s, s->ignore_case ? REOP_not_word_boundary_i : REOP_not_word_boundary);
 | 
			
		||||
            } else {
 | 
			
		||||
                re_emit_op(s, s->ignore_case ? REOP_word_boundary_i : REOP_word_boundary);
 | 
			
		||||
            }
 | 
			
		||||
            p += 2;
 | 
			
		||||
            break;
 | 
			
		||||
        case 'k':
 | 
			
		||||
@ -1960,7 +2066,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
 | 
			
		||||
            emit_back_reference:
 | 
			
		||||
                last_atom_start = s->byte_code.size;
 | 
			
		||||
                last_capture_count = s->capture_count;
 | 
			
		||||
                re_emit_op_u8(s, REOP_back_reference + is_backward_dir, c);
 | 
			
		||||
                
 | 
			
		||||
                re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case, c);
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        default:
 | 
			
		||||
@ -2001,10 +2108,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
 | 
			
		||||
        } else {
 | 
			
		||||
            if (s->ignore_case)
 | 
			
		||||
                c = lre_canonicalize(c, s->is_unicode);
 | 
			
		||||
            if (c <= 0xffff)
 | 
			
		||||
                re_emit_op_u16(s, REOP_char, c);
 | 
			
		||||
            else
 | 
			
		||||
                re_emit_op_u32(s, REOP_char32, c);
 | 
			
		||||
            re_emit_char(s, c);
 | 
			
		||||
        }
 | 
			
		||||
        if (is_backward_dir)
 | 
			
		||||
            re_emit_op(s, REOP_prev);
 | 
			
		||||
@ -2314,10 +2418,12 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
 | 
			
		||||
            stack_size--;
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_range:
 | 
			
		||||
        case REOP_range_i:
 | 
			
		||||
            val = get_u16(bc_buf + pos + 1);
 | 
			
		||||
            len += val * 4;
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_range32:
 | 
			
		||||
        case REOP_range32_i:
 | 
			
		||||
            val = get_u16(bc_buf + pos + 1);
 | 
			
		||||
            len += val * 8;
 | 
			
		||||
            break;
 | 
			
		||||
@ -2348,6 +2454,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
 | 
			
		||||
    s->is_unicode = ((re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0);
 | 
			
		||||
    is_sticky = ((re_flags & LRE_FLAG_STICKY) != 0);
 | 
			
		||||
    s->ignore_case = ((re_flags & LRE_FLAG_IGNORECASE) != 0);
 | 
			
		||||
    s->multi_line = ((re_flags & LRE_FLAG_MULTILINE) != 0);
 | 
			
		||||
    s->dotall = ((re_flags & LRE_FLAG_DOTALL) != 0);
 | 
			
		||||
    s->unicode_sets = ((re_flags & LRE_FLAG_UNICODE_SETS) != 0);
 | 
			
		||||
    s->capture_count = 1;
 | 
			
		||||
@ -2545,8 +2652,6 @@ typedef struct {
 | 
			
		||||
    int cbuf_type;
 | 
			
		||||
    int capture_count;
 | 
			
		||||
    int stack_size_max;
 | 
			
		||||
    BOOL multi_line;
 | 
			
		||||
    BOOL ignore_case;
 | 
			
		||||
    BOOL is_unicode;
 | 
			
		||||
    int interrupt_counter;
 | 
			
		||||
    void *opaque; /* used for stack overflow check */
 | 
			
		||||
@ -2695,17 +2800,19 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_char32:
 | 
			
		||||
        case REOP_char32_i:
 | 
			
		||||
            val = get_u32(pc);
 | 
			
		||||
            pc += 4;
 | 
			
		||||
            goto test_char;
 | 
			
		||||
        case REOP_char:
 | 
			
		||||
        case REOP_char_i:
 | 
			
		||||
            val = get_u16(pc);
 | 
			
		||||
            pc += 2;
 | 
			
		||||
        test_char:
 | 
			
		||||
            if (cptr >= cbuf_end)
 | 
			
		||||
                goto no_match;
 | 
			
		||||
            GET_CHAR(c, cptr, cbuf_end, cbuf_type);
 | 
			
		||||
            if (s->ignore_case) {
 | 
			
		||||
            if (opcode == REOP_char_i || opcode == REOP_char32_i) {
 | 
			
		||||
                c = lre_canonicalize(c, s->is_unicode);
 | 
			
		||||
            }
 | 
			
		||||
            if (val != c)
 | 
			
		||||
@ -2749,18 +2856,20 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
 | 
			
		||||
                return LRE_RET_TIMEOUT;
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_line_start:
 | 
			
		||||
        case REOP_line_start_m:
 | 
			
		||||
            if (cptr == s->cbuf)
 | 
			
		||||
                break;
 | 
			
		||||
            if (!s->multi_line)
 | 
			
		||||
            if (opcode == REOP_line_start)
 | 
			
		||||
                goto no_match;
 | 
			
		||||
            PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
 | 
			
		||||
            if (!is_line_terminator(c))
 | 
			
		||||
                goto no_match;
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_line_end:
 | 
			
		||||
        case REOP_line_end_m:
 | 
			
		||||
            if (cptr == cbuf_end)
 | 
			
		||||
                break;
 | 
			
		||||
            if (!s->multi_line)
 | 
			
		||||
            if (opcode == REOP_line_end)
 | 
			
		||||
                goto no_match;
 | 
			
		||||
            PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
 | 
			
		||||
            if (!is_line_terminator(c))
 | 
			
		||||
@ -2823,15 +2932,19 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
 | 
			
		||||
                goto no_match;
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_word_boundary:
 | 
			
		||||
        case REOP_word_boundary_i:
 | 
			
		||||
        case REOP_not_word_boundary:
 | 
			
		||||
        case REOP_not_word_boundary_i:
 | 
			
		||||
            {
 | 
			
		||||
                BOOL v1, v2;
 | 
			
		||||
                int ignore_case = (opcode == REOP_word_boundary_i || opcode == REOP_not_word_boundary_i);
 | 
			
		||||
                BOOL is_boundary = (opcode == REOP_word_boundary || opcode == REOP_word_boundary_i);
 | 
			
		||||
                /* char before */
 | 
			
		||||
                if (cptr == s->cbuf) {
 | 
			
		||||
                    v1 = FALSE;
 | 
			
		||||
                } else {
 | 
			
		||||
                    PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
 | 
			
		||||
                    if (s->ignore_case)
 | 
			
		||||
                    if (ignore_case)
 | 
			
		||||
                        c = lre_canonicalize(c, s->is_unicode);
 | 
			
		||||
                    v1 = is_word_char(c);
 | 
			
		||||
                }
 | 
			
		||||
@ -2840,16 +2953,18 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
 | 
			
		||||
                    v2 = FALSE;
 | 
			
		||||
                } else {
 | 
			
		||||
                    PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
 | 
			
		||||
                    if (s->ignore_case)
 | 
			
		||||
                    if (ignore_case)
 | 
			
		||||
                        c = lre_canonicalize(c, s->is_unicode);
 | 
			
		||||
                    v2 = is_word_char(c);
 | 
			
		||||
                }
 | 
			
		||||
                if (v1 ^ v2 ^ (REOP_not_word_boundary - opcode))
 | 
			
		||||
                if (v1 ^ v2 ^ is_boundary)
 | 
			
		||||
                    goto no_match;
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_back_reference:
 | 
			
		||||
        case REOP_back_reference_i:
 | 
			
		||||
        case REOP_backward_back_reference:
 | 
			
		||||
        case REOP_backward_back_reference_i:
 | 
			
		||||
            {
 | 
			
		||||
                const uint8_t *cptr1, *cptr1_end, *cptr1_start;
 | 
			
		||||
                uint32_t c1, c2;
 | 
			
		||||
@ -2861,14 +2976,15 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
 | 
			
		||||
                cptr1_end = capture[2 * val + 1];
 | 
			
		||||
                if (!cptr1_start || !cptr1_end)
 | 
			
		||||
                    break;
 | 
			
		||||
                if (opcode == REOP_back_reference) {
 | 
			
		||||
                if (opcode == REOP_back_reference ||
 | 
			
		||||
                    opcode == REOP_back_reference_i) {
 | 
			
		||||
                    cptr1 = cptr1_start;
 | 
			
		||||
                    while (cptr1 < cptr1_end) {
 | 
			
		||||
                        if (cptr >= cbuf_end)
 | 
			
		||||
                            goto no_match;
 | 
			
		||||
                        GET_CHAR(c1, cptr1, cptr1_end, cbuf_type);
 | 
			
		||||
                        GET_CHAR(c2, cptr, cbuf_end, cbuf_type);
 | 
			
		||||
                        if (s->ignore_case) {
 | 
			
		||||
                        if (opcode == REOP_back_reference_i) {
 | 
			
		||||
                            c1 = lre_canonicalize(c1, s->is_unicode);
 | 
			
		||||
                            c2 = lre_canonicalize(c2, s->is_unicode);
 | 
			
		||||
                        }
 | 
			
		||||
@ -2882,7 +2998,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
 | 
			
		||||
                            goto no_match;
 | 
			
		||||
                        GET_PREV_CHAR(c1, cptr1, cptr1_start, cbuf_type);
 | 
			
		||||
                        GET_PREV_CHAR(c2, cptr, s->cbuf, cbuf_type);
 | 
			
		||||
                        if (s->ignore_case) {
 | 
			
		||||
                        if (opcode == REOP_backward_back_reference_i) {
 | 
			
		||||
                            c1 = lre_canonicalize(c1, s->is_unicode);
 | 
			
		||||
                            c2 = lre_canonicalize(c2, s->is_unicode);
 | 
			
		||||
                        }
 | 
			
		||||
@ -2893,6 +3009,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_range:
 | 
			
		||||
        case REOP_range_i:
 | 
			
		||||
            {
 | 
			
		||||
                int n;
 | 
			
		||||
                uint32_t low, high, idx_min, idx_max, idx;
 | 
			
		||||
@ -2902,7 +3019,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
 | 
			
		||||
                if (cptr >= cbuf_end)
 | 
			
		||||
                    goto no_match;
 | 
			
		||||
                GET_CHAR(c, cptr, cbuf_end, cbuf_type);
 | 
			
		||||
                if (s->ignore_case) {
 | 
			
		||||
                if (opcode == REOP_range_i) {
 | 
			
		||||
                    c = lre_canonicalize(c, s->is_unicode);
 | 
			
		||||
                }
 | 
			
		||||
                idx_min = 0;
 | 
			
		||||
@ -2933,6 +3050,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
 | 
			
		||||
            }
 | 
			
		||||
            break;
 | 
			
		||||
        case REOP_range32:
 | 
			
		||||
        case REOP_range32_i:
 | 
			
		||||
            {
 | 
			
		||||
                int n;
 | 
			
		||||
                uint32_t low, high, idx_min, idx_max, idx;
 | 
			
		||||
@ -2942,7 +3060,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
 | 
			
		||||
                if (cptr >= cbuf_end)
 | 
			
		||||
                    goto no_match;
 | 
			
		||||
                GET_CHAR(c, cptr, cbuf_end, cbuf_type);
 | 
			
		||||
                if (s->ignore_case) {
 | 
			
		||||
                if (opcode == REOP_range32_i) {
 | 
			
		||||
                    c = lre_canonicalize(c, s->is_unicode);
 | 
			
		||||
                }
 | 
			
		||||
                idx_min = 0;
 | 
			
		||||
@ -3036,8 +3154,6 @@ int lre_exec(uint8_t **capture,
 | 
			
		||||
    StackInt *stack_buf;
 | 
			
		||||
 | 
			
		||||
    re_flags = lre_get_flags(bc_buf);
 | 
			
		||||
    s->multi_line = (re_flags & LRE_FLAG_MULTILINE) != 0;
 | 
			
		||||
    s->ignore_case = (re_flags & LRE_FLAG_IGNORECASE) != 0;
 | 
			
		||||
    s->is_unicode = (re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0;
 | 
			
		||||
    s->capture_count = bc_buf[RE_HEADER_CAPTURE_COUNT];
 | 
			
		||||
    s->stack_size_max = bc_buf[RE_HEADER_STACK_SIZE];
 | 
			
		||||
 | 
			
		||||
@ -177,7 +177,7 @@ regexp-dotall
 | 
			
		||||
regexp-duplicate-named-groups=skip
 | 
			
		||||
regexp-lookbehind
 | 
			
		||||
regexp-match-indices
 | 
			
		||||
regexp-modifiers=skip
 | 
			
		||||
regexp-modifiers
 | 
			
		||||
regexp-named-groups
 | 
			
		||||
regexp-unicode-property-escapes
 | 
			
		||||
regexp-v-flag
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user