allow regexp interruption (e.g. with Ctrl-C in the REPL)

This commit is contained in:
Fabrice Bellard 2025-03-13 17:17:51 +01:00
parent 027f3cb5e4
commit 25aaa77370
3 changed files with 61 additions and 12 deletions

View File

@ -54,6 +54,9 @@ typedef enum {
#define CAPTURE_COUNT_MAX 255 #define CAPTURE_COUNT_MAX 255
#define STACK_SIZE_MAX 255 #define STACK_SIZE_MAX 255
/* must be large enough to have a negligible runtime cost and small
enough to call the interrupt callback often. */
#define INTERRUPT_COUNTER_INIT 10000
/* unicode code points */ /* unicode code points */
#define CP_LS 0x2028 #define CP_LS 0x2028
@ -1931,6 +1934,7 @@ typedef struct {
BOOL multi_line; BOOL multi_line;
BOOL ignore_case; BOOL ignore_case;
BOOL is_unicode; BOOL is_unicode;
int interrupt_counter;
void *opaque; /* used for stack overflow check */ void *opaque; /* used for stack overflow check */
size_t state_size; size_t state_size;
@ -1977,7 +1981,17 @@ static int push_state(REExecContext *s,
return 0; return 0;
} }
/* return 1 if match, 0 if not match or -1 if error. */ static int lre_poll_timeout(REExecContext *s)
{
if (unlikely(--s->interrupt_counter <= 0)) {
s->interrupt_counter = INTERRUPT_COUNTER_INIT;
if (lre_check_timeout(s->opaque))
return LRE_RET_TIMEOUT;
}
return 0;
}
/* return 1 if match, 0 if not match or < 0 if error. */
static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
StackInt *stack, int stack_len, StackInt *stack, int stack_len,
const uint8_t *pc, const uint8_t *cptr, const uint8_t *pc, const uint8_t *cptr,
@ -2008,6 +2022,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
ret = 0; ret = 0;
recurse: recurse:
for(;;) { for(;;) {
if (lre_poll_timeout(s))
return LRE_RET_TIMEOUT;
if (s->state_stack_len == 0) if (s->state_stack_len == 0)
return ret; return ret;
rs = (REExecState *)(s->state_stack + rs = (REExecState *)(s->state_stack +
@ -2097,7 +2113,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
ret = push_state(s, capture, stack, stack_len, ret = push_state(s, capture, stack, stack_len,
pc1, cptr, RE_EXEC_STATE_SPLIT, 0); pc1, cptr, RE_EXEC_STATE_SPLIT, 0);
if (ret < 0) if (ret < 0)
return -1; return LRE_RET_MEMORY_ERROR;
break; break;
} }
case REOP_lookahead: case REOP_lookahead:
@ -2109,12 +2125,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
RE_EXEC_STATE_LOOKAHEAD + opcode - REOP_lookahead, RE_EXEC_STATE_LOOKAHEAD + opcode - REOP_lookahead,
0); 0);
if (ret < 0) if (ret < 0)
return -1; return LRE_RET_MEMORY_ERROR;
break; break;
case REOP_goto: case REOP_goto:
val = get_u32(pc); val = get_u32(pc);
pc += 4 + (int)val; pc += 4 + (int)val;
if (lre_poll_timeout(s))
return LRE_RET_TIMEOUT;
break; break;
case REOP_line_start: case REOP_line_start:
if (cptr == s->cbuf) if (cptr == s->cbuf)
@ -2179,6 +2197,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
pc += 4; pc += 4;
if (--stack[stack_len - 1] != 0) { if (--stack[stack_len - 1] != 0) {
pc += (int)val; pc += (int)val;
if (lre_poll_timeout(s))
return LRE_RET_TIMEOUT;
} }
break; break;
case REOP_push_char_pos: case REOP_push_char_pos:
@ -2353,9 +2373,12 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
q = 0; q = 0;
for(;;) { for(;;) {
if (lre_poll_timeout(s))
return LRE_RET_TIMEOUT;
res = lre_exec_backtrack(s, capture, stack, stack_len, res = lre_exec_backtrack(s, capture, stack, stack_len,
pc1, cptr, TRUE); pc1, cptr, TRUE);
if (res == -1) if (res == LRE_RET_MEMORY_ERROR ||
res == LRE_RET_TIMEOUT)
return res; return res;
if (!res) if (!res)
break; break;
@ -2373,7 +2396,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
RE_EXEC_STATE_GREEDY_QUANT, RE_EXEC_STATE_GREEDY_QUANT,
q - quant_min); q - quant_min);
if (ret < 0) if (ret < 0)
return -1; return LRE_RET_MEMORY_ERROR;
} }
} }
break; break;
@ -2383,7 +2406,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
} }
} }
/* Return 1 if match, 0 if not match or -1 if error. cindex is the /* Return 1 if match, 0 if not match or < 0 if error (see LRE_RET_x). cindex is the
starting position of the match and must be such as 0 <= cindex <= starting position of the match and must be such as 0 <= cindex <=
clen. */ clen. */
int lre_exec(uint8_t **capture, int lre_exec(uint8_t **capture,
@ -2405,6 +2428,7 @@ int lre_exec(uint8_t **capture,
s->cbuf_type = cbuf_type; s->cbuf_type = cbuf_type;
if (s->cbuf_type == 1 && s->is_unicode) if (s->cbuf_type == 1 && s->is_unicode)
s->cbuf_type = 2; s->cbuf_type = 2;
s->interrupt_counter = INTERRUPT_COUNTER_INIT;
s->opaque = opaque; s->opaque = opaque;
s->state_size = sizeof(REExecState) + s->state_size = sizeof(REExecState) +

View File

@ -36,6 +36,9 @@
#define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */ #define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */
#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */ #define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
#define LRE_RET_MEMORY_ERROR (-1)
#define LRE_RET_TIMEOUT (-2)
uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
const char *buf, size_t buf_len, int re_flags, const char *buf, size_t buf_len, int re_flags,
void *opaque); void *opaque);
@ -50,6 +53,8 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16);
/* must be provided by the user, return non zero if overflow */ /* must be provided by the user, return non zero if overflow */
int lre_check_stack_overflow(void *opaque, size_t alloca_size); int lre_check_stack_overflow(void *opaque, size_t alloca_size);
/* must be provided by the user, return non zero if time out */
int lre_check_timeout(void *opaque);
void *lre_realloc(void *opaque, void *ptr, size_t size); void *lre_realloc(void *opaque, void *ptr, size_t size);
#endif /* LIBREGEXP_H */ #endif /* LIBREGEXP_H */

View File

@ -6836,15 +6836,19 @@ static JSValue JS_ThrowTypeErrorInvalidClass(JSContext *ctx, int class_id)
return JS_ThrowTypeErrorAtom(ctx, "%s object expected", name); return JS_ThrowTypeErrorAtom(ctx, "%s object expected", name);
} }
static void JS_ThrowInterrupted(JSContext *ctx)
{
JS_ThrowInternalError(ctx, "interrupted");
JS_SetUncatchableError(ctx, ctx->rt->current_exception, TRUE);
}
static no_inline __exception int __js_poll_interrupts(JSContext *ctx) static no_inline __exception int __js_poll_interrupts(JSContext *ctx)
{ {
JSRuntime *rt = ctx->rt; JSRuntime *rt = ctx->rt;
ctx->interrupt_counter = JS_INTERRUPT_COUNTER_INIT; ctx->interrupt_counter = JS_INTERRUPT_COUNTER_INIT;
if (rt->interrupt_handler) { if (rt->interrupt_handler) {
if (rt->interrupt_handler(rt, rt->interrupt_opaque)) { if (rt->interrupt_handler(rt, rt->interrupt_opaque)) {
/* XXX: should set a specific flag to avoid catching */ JS_ThrowInterrupted(ctx);
JS_ThrowInternalError(ctx, "interrupted");
JS_SetUncatchableError(ctx, ctx->rt->current_exception, TRUE);
return -1; return -1;
} }
} }
@ -43914,12 +43918,20 @@ fail:
return JS_EXCEPTION; return JS_EXCEPTION;
} }
BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size) int lre_check_stack_overflow(void *opaque, size_t alloca_size)
{ {
JSContext *ctx = opaque; JSContext *ctx = opaque;
return js_check_stack_overflow(ctx->rt, alloca_size); return js_check_stack_overflow(ctx->rt, alloca_size);
} }
int lre_check_timeout(void *opaque)
{
JSContext *ctx = opaque;
JSRuntime *rt = ctx->rt;
return (rt->interrupt_handler &&
rt->interrupt_handler(rt, rt->interrupt_opaque));
}
void *lre_realloc(void *opaque, void *ptr, size_t size) void *lre_realloc(void *opaque, void *ptr, size_t size)
{ {
JSContext *ctx = opaque; JSContext *ctx = opaque;
@ -43987,7 +43999,11 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
goto fail; goto fail;
} }
} else { } else {
JS_ThrowInternalError(ctx, "out of memory in regexp execution"); if (rc == LRE_RET_TIMEOUT) {
JS_ThrowInterrupted(ctx);
} else {
JS_ThrowInternalError(ctx, "out of memory in regexp execution");
}
goto fail; goto fail;
} }
} else { } else {
@ -44183,7 +44199,11 @@ static JSValue JS_RegExpDelete(JSContext *ctx, JSValueConst this_val, JSValueCon
goto fail; goto fail;
} }
} else { } else {
JS_ThrowInternalError(ctx, "out of memory in regexp execution"); if (ret == LRE_RET_TIMEOUT) {
JS_ThrowInterrupted(ctx);
} else {
JS_ThrowInternalError(ctx, "out of memory in regexp execution");
}
goto fail; goto fail;
} }
break; break;