patch 9.2.0339: regexp: nfa_regmatch() allocates and frees too often

Problem:  nfa_regmatch() allocates and frees two list buffers on every
          call, causing unnecessary memory allocation overhead for
          frequently used patterns.
Solution: Cache the list buffers in the regprog struct and reuse them
          on subsequent top-level calls. Recursive calls still allocate
          their own buffers. Free cached buffers in nfa_regfree()
          (Yasuhiro Matsumoto).

Benchmark: 10K lines, `:%s` x50 iterations

| Pattern | Before | After | Improvement |
|---|---|---|---|
| `\<\(\w\+\%(ing\|tion\|ed\|ly\)\|\w\{3,}\)\>` (many matches) | 4.384s | 4.299s | -2% |
| `\(foo\|bar\|baz\)\{3,}\(qux\|quux\|corge\)\{2,}...` (no match, high nstate) | 16.927s | 3.015s | -82% |

closes: #19956

Signed-off-by: Yasuhiro Matsumoto <mattn.jp@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
This commit is contained in:
Yasuhiro Matsumoto
2026-04-12 16:12:22 +00:00
committed by Christian Brabandt
parent 86ae6858ab
commit 105d65e29b
3 changed files with 34 additions and 5 deletions
+2
View File
@@ -123,6 +123,8 @@ typedef struct
char_u *pattern;
int nsubexp; // number of ()
int nstate;
void *listbuf[2]; // cached list buffers for
// nfa_regmatch()
nfa_state_T state[1]; // actually longer..
} nfa_regprog_T;
+30 -5
View File
@@ -5797,9 +5797,21 @@ nfa_regmatch(
// Allocate memory for the lists of nodes.
size = (prog->nstate + 1) * sizeof(nfa_thread_T);
list[0].t = alloc(size);
// Reuse cached list buffers from prog when available (top-level call).
// Recursive calls must allocate their own buffers.
if (toplevel && prog->listbuf[0] != NULL)
{
list[0].t = (nfa_thread_T *)prog->listbuf[0];
list[1].t = (nfa_thread_T *)prog->listbuf[1];
prog->listbuf[0] = NULL;
prog->listbuf[1] = NULL;
}
else
{
list[0].t = alloc(size);
list[1].t = alloc(size);
}
list[0].len = prog->nstate + 1;
list[1].t = alloc(size);
list[1].len = prog->nstate + 1;
if (list[0].t == NULL || list[1].t == NULL)
goto theend;
@@ -7287,9 +7299,18 @@ nextchar:
#endif
theend:
// Free memory
vim_free(list[0].t);
vim_free(list[1].t);
// Cache list buffers in prog for reuse, or free if prog already has
// cached buffers (recursive call case).
if (prog->listbuf[0] == NULL && list[0].t != NULL && list[1].t != NULL)
{
prog->listbuf[0] = list[0].t;
prog->listbuf[1] = list[1].t;
}
else
{
vim_free(list[0].t);
vim_free(list[1].t);
}
vim_free(listids);
#undef ADD_STATE_IF_MATCH
#ifdef NFA_REGEXP_DEBUG_LOG
@@ -7644,6 +7665,8 @@ nfa_regcomp(char_u *expr, int re_flags)
goto fail;
state_ptr = prog->state;
prog->re_in_use = FALSE;
prog->listbuf[0] = NULL;
prog->listbuf[1] = NULL;
/*
* PASS 2
@@ -7707,6 +7730,8 @@ nfa_regfree(regprog_T *prog)
vim_free(((nfa_regprog_T *)prog)->match_text);
vim_free(((nfa_regprog_T *)prog)->pattern);
vim_free(((nfa_regprog_T *)prog)->listbuf[0]);
vim_free(((nfa_regprog_T *)prog)->listbuf[1]);
vim_free(prog);
}
+2
View File
@@ -734,6 +734,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
339,
/**/
338,
/**/