From 105d65e29b636981b2a92cd0205b19f85951d770 Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsumoto Date: Sun, 12 Apr 2026 16:12:22 +0000 Subject: [PATCH] patch 9.2.0339: regexp: nfa_regmatch() allocates and frees too often Problem: nfa_regmatch() allocates and frees two list buffers on every call, causing unnecessary memory allocation overhead for frequently used patterns. Solution: Cache the list buffers in the regprog struct and reuse them on subsequent top-level calls. Recursive calls still allocate their own buffers. Free cached buffers in nfa_regfree() (Yasuhiro Matsumoto). Benchmark: 10K lines, `:%s` x50 iterations | Pattern | Before | After | Improvement | |---|---|---|---| | `\<\(\w\+\%(ing\|tion\|ed\|ly\)\|\w\{3,}\)\>` (many matches) | 4.384s | 4.299s | -2% | | `\(foo\|bar\|baz\)\{3,}\(qux\|quux\|corge\)\{2,}...` (no match, high nstate) | 16.927s | 3.015s | -82% | closes: #19956 Signed-off-by: Yasuhiro Matsumoto Signed-off-by: Christian Brabandt --- src/regexp.h | 2 ++ src/regexp_nfa.c | 35 ++++++++++++++++++++++++++++++----- src/version.c | 2 ++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/regexp.h b/src/regexp.h index 7a0c3abac6..aa685a54d0 100644 --- a/src/regexp.h +++ b/src/regexp.h @@ -123,6 +123,8 @@ typedef struct char_u *pattern; int nsubexp; // number of () int nstate; + void *listbuf[2]; // cached list buffers for + // nfa_regmatch() nfa_state_T state[1]; // actually longer.. } nfa_regprog_T; diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c index 610c6bd678..4bd588b4e4 100644 --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -5797,9 +5797,21 @@ nfa_regmatch( // Allocate memory for the lists of nodes. size = (prog->nstate + 1) * sizeof(nfa_thread_T); - list[0].t = alloc(size); + // Reuse cached list buffers from prog when available (top-level call). + // Recursive calls must allocate their own buffers. + if (toplevel && prog->listbuf[0] != NULL) + { + list[0].t = (nfa_thread_T *)prog->listbuf[0]; + list[1].t = (nfa_thread_T *)prog->listbuf[1]; + prog->listbuf[0] = NULL; + prog->listbuf[1] = NULL; + } + else + { + list[0].t = alloc(size); + list[1].t = alloc(size); + } list[0].len = prog->nstate + 1; - list[1].t = alloc(size); list[1].len = prog->nstate + 1; if (list[0].t == NULL || list[1].t == NULL) goto theend; @@ -7287,9 +7299,18 @@ nextchar: #endif theend: - // Free memory - vim_free(list[0].t); - vim_free(list[1].t); + // Cache list buffers in prog for reuse, or free if prog already has + // cached buffers (recursive call case). + if (prog->listbuf[0] == NULL && list[0].t != NULL && list[1].t != NULL) + { + prog->listbuf[0] = list[0].t; + prog->listbuf[1] = list[1].t; + } + else + { + vim_free(list[0].t); + vim_free(list[1].t); + } vim_free(listids); #undef ADD_STATE_IF_MATCH #ifdef NFA_REGEXP_DEBUG_LOG @@ -7644,6 +7665,8 @@ nfa_regcomp(char_u *expr, int re_flags) goto fail; state_ptr = prog->state; prog->re_in_use = FALSE; + prog->listbuf[0] = NULL; + prog->listbuf[1] = NULL; /* * PASS 2 @@ -7707,6 +7730,8 @@ nfa_regfree(regprog_T *prog) vim_free(((nfa_regprog_T *)prog)->match_text); vim_free(((nfa_regprog_T *)prog)->pattern); + vim_free(((nfa_regprog_T *)prog)->listbuf[0]); + vim_free(((nfa_regprog_T *)prog)->listbuf[1]); vim_free(prog); } diff --git a/src/version.c b/src/version.c index 15f52077eb..fc48763819 100644 --- a/src/version.c +++ b/src/version.c @@ -734,6 +734,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 339, /**/ 338, /**/