From 397b9da29d29a96d92ebdfd9d6bf379249519fa1 Mon Sep 17 00:00:00 2001 From: Damien Lejay Date: Tue, 12 Aug 2025 20:46:50 +0200 Subject: [PATCH 01/11] patch 9.1.1624: Cscope not enabled on MacOS Problem: Cscope not enabled on MacOS Solution: Remove #ifdef test for MacOS and always enable FEAT_CSCOPE on Unix (Damien Lejay). Remove the !MACOS_X guard from FEAT_CSCOPE in feature.h. macOS has been a certified UNIX since 10.5 and supports cscope without issues. This guard originated in the pre-OS X era when Classic MacOS lacked the required POSIX APIs. MacVim already ships with +cscope successfully. closes: #17976 Signed-off-by: Damien Lejay Signed-off-by: Christian Brabandt --- src/feature.h | 2 +- src/version.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/feature.h b/src/feature.h index a87b9c2bbe..e5df7fab7c 100644 --- a/src/feature.h +++ b/src/feature.h @@ -236,7 +236,7 @@ /* * +cscope Unix only: Cscope support. */ -#if defined(UNIX) && defined(FEAT_HUGE) && defined(ENABLE_CSCOPE) && !defined(MACOS_X) +#if defined(UNIX) && defined(FEAT_HUGE) && defined(ENABLE_CSCOPE) # define FEAT_CSCOPE #endif diff --git a/src/version.c b/src/version.c index 78cf50877d..22db56962e 100644 --- a/src/version.c +++ b/src/version.c @@ -719,6 +719,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1624, /**/ 1623, /**/ From 487dd1716a028efd0cc1bc1b4bfdc53b82688641 Mon Sep 17 00:00:00 2001 From: Shane-XB-Qian Date: Tue, 12 Aug 2025 21:14:58 +0200 Subject: [PATCH 02/11] runtime(vim): cannot jump to :colorscheme files Problem: cannot jump to :colorscheme files Solution: Let runtime/autoload/vim.vim handle :colorscheme lines closes: #17971 Signed-off-by: Shane-XB-Qian Signed-off-by: Christian Brabandt --- runtime/autoload/vim.vim | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/runtime/autoload/vim.vim b/runtime/autoload/vim.vim index 8dc14c4760..c517b756fe 100644 --- a/runtime/autoload/vim.vim +++ b/runtime/autoload/vim.vim @@ -1,5 +1,16 @@ vim9script +# Language: Vim9 script +# Contributers: @lacygoill +# Shane-XB-Qian +# Last Change: 2025 Aug 12 +# +# Vim Script to handle +# :import, :packadd and :colorscheme +# lines and allows to easily jump to it using gf +# +# see runtime/ftplugin/vim.vim + # Interface {{{1 export def Find(editcmd: string) #{{{2 var curline: string = getline('.') @@ -9,6 +20,11 @@ export def Find(editcmd: string) #{{{2 return endif + if curline =~ '^\s*\%(:\s*\)\=colo\%[rscheme]\s' + HandleColoLine(editcmd, curline) + return + endif + if curline =~ '^\s*\%(:\s*\)\=import\s' HandleImportLine(editcmd, curline) return @@ -51,6 +67,30 @@ def HandlePackaddLine(editcmd: string, curline: string) #{{{2 endif enddef +def HandleColoLine(editcmd: string, curline: string) #{{{2 + var pat: string = '^\s*colo\%[rscheme]\s\+\zs\S\+$' + var colo: string = curline->matchstr(pat) + + if colo == '' + try + execute 'normal! ' .. editcmd .. 'zv' + catch + Error(v:exception) + return + endtry + else + var split: string = editcmd[0] == 'g' ? 'edit' : editcmd[1] == 'g' ? 'tabedit' : 'split' + var files: list = getcompletion($'colors/{colo}', 'runtime') + ->map((_, fname: string) => fname->findfile(&rtp)->fnamemodify(':p')) + ->filter((_, path: string): bool => filereadable(path)) + if empty(files) + echo 'Could not find any colorscheme file for ' .. string(colo) + return + endif + files->Open(split) + endif +enddef + def HandleImportLine(editcmd: string, curline: string) #{{{2 var fname: string var import_cmd: string = '^\s*import\s\+\%(autoload\s\+\)\=' @@ -132,3 +172,5 @@ def Error(msg: string) #{{{2 echomsg msg echohl NONE enddef + +# vim: sw=4 et From 639d93fc0efe925ec87f31db7f3ccdc0add4e247 Mon Sep 17 00:00:00 2001 From: Shane-XB-Qian Date: Tue, 12 Aug 2025 21:28:28 +0200 Subject: [PATCH 03/11] runetime(vim): gf maybe hang and fail * searching'**' maybe terrible slow * searching '**/plugin/' maybe fail * the pattern for the :packadd/:import/:colorscheme did not consistently check for a colon, so always use '\%(:\s*\)\=' before the actual ex command While at it rename the generic name vim.vim to vimgoto.vim as this more clearly states what this script is for. Signed-off-by: Shane-XB-Qian Signed-off-by: Christian Brabandt --- runtime/autoload/{vim.vim => vimgoto.vim} | 13 +++++-------- runtime/ftplugin/vim.vim | 10 ++++++---- 2 files changed, 11 insertions(+), 12 deletions(-) rename runtime/autoload/{vim.vim => vimgoto.vim} (89%) diff --git a/runtime/autoload/vim.vim b/runtime/autoload/vimgoto.vim similarity index 89% rename from runtime/autoload/vim.vim rename to runtime/autoload/vimgoto.vim index c517b756fe..b34dcc1a13 100644 --- a/runtime/autoload/vim.vim +++ b/runtime/autoload/vimgoto.vim @@ -39,7 +39,7 @@ enddef #}}}1 # Core {{{1 def HandlePackaddLine(editcmd: string, curline: string) #{{{2 - var pat: string = '^\s*packadd!\=\s\+\zs\S\+$' + var pat: string = '^\s*\%(:\s*\)\=packadd!\=\s\+\zs\S\+$' var plugin: string = curline ->matchstr(pat) ->substitute('^vim-\|\.vim$', '', 'g') @@ -53,12 +53,9 @@ def HandlePackaddLine(editcmd: string, curline: string) #{{{2 endtry else var split: string = editcmd[0] == 'g' ? 'edit' : editcmd[1] == 'g' ? 'tabedit' : 'split' - # In the past, we passed `runtime` to `getcompletion()`, instead of - # `cmdline`. But the output was tricky to use, because it contained - # paths relative to inconsistent root directories. - var files: list = getcompletion($'edit **/plugin/{plugin}.vim', 'cmdline') + var files: list = getcompletion($'plugin/{plugin}', 'runtime') + ->map((_, fname: string) => fname->findfile(&rtp)->fnamemodify(':p')) ->filter((_, path: string): bool => filereadable(path)) - ->map((_, fname: string) => fname->fnamemodify(':p')) if empty(files) echo 'Could not find any plugin file for ' .. string(plugin) return @@ -68,7 +65,7 @@ def HandlePackaddLine(editcmd: string, curline: string) #{{{2 enddef def HandleColoLine(editcmd: string, curline: string) #{{{2 - var pat: string = '^\s*colo\%[rscheme]\s\+\zs\S\+$' + var pat: string = '^\s*\%(:\s*\)\=colo\%[rscheme]\s\+\zs\S\+$' var colo: string = curline->matchstr(pat) if colo == '' @@ -93,7 +90,7 @@ enddef def HandleImportLine(editcmd: string, curline: string) #{{{2 var fname: string - var import_cmd: string = '^\s*import\s\+\%(autoload\s\+\)\=' + var import_cmd: string = '^\s*\%(:\s*\)\=import\s\+\%(autoload\s\+\)\=' var import_alias: string = '\%(\s\+as\s\+\w\+\)\=$' var import_string: string = import_cmd .. '\([''"]\)\zs.*\ze\1' .. import_alias var import_expr: string = import_cmd .. '\zs.*\ze' .. import_alias diff --git a/runtime/ftplugin/vim.vim b/runtime/ftplugin/vim.vim index 9b0f89dbef..22b2614658 100644 --- a/runtime/ftplugin/vim.vim +++ b/runtime/ftplugin/vim.vim @@ -9,6 +9,7 @@ " Last Change: 2025 Aug 07 " 2025 Aug 06 by Vim Project (add gf maps #17881) " 2025 Aug 08 by Vim Project (add Vim script complete function #17871) +" 2025 Aug 12 by Vim Project (improve vimgoto script #17970)) " Only do this when not done yet for this buffer if exists("b:did_ftplugin") @@ -152,7 +153,7 @@ if !exists("no_plugin_maps") && !exists("no_vim_maps") nnoremap [" :call search('\%(^\s*".*\n\)\%(^\s*"\)\@!', "bW") xnoremap [" :exe "normal! gv"call search('\%(^\s*".*\n\)\%(^\s*"\)\@!', "bW") - " Purpose: Handle `:import` and `:packadd` lines in a smarter way. {{{ + " Purpose: Handle :import, :colorscheme and :packadd lines in a smarter way. {{{ " " `:import` is followed by a filename or filepath. Find it. " @@ -171,9 +172,10 @@ if !exists("no_plugin_maps") && !exists("no_vim_maps") " buffer. " }}} " We use the `F` variants, instead of the `f` ones, because they're smarter. - nnoremap gf :call vim#Find('gF') - nnoremap f :call vim#Find("\C-W>F") - nnoremap gf :call vim#Find("\C-W>gF") + " See $VIMRUNTIME/autoload/vimgoto.vim + nnoremap gf :call vimgoto#Find('gF') + nnoremap f :call vimgoto#Find("\C-W>F") + nnoremap gf :call vimgoto#Find("\C-W>gF") endif " Let the matchit plugin know what items can be matched. From 59e1d7f353993e93be97dbec30d3218742960f62 Mon Sep 17 00:00:00 2001 From: Girish Palya Date: Tue, 12 Aug 2025 21:38:56 +0200 Subject: [PATCH 04/11] patch 9.1.1625: Autocompletion slow with include- and tag-completion Problem: Autocompletion slow with include- and tag-completion Solution: Refactor ins_compl_interrupted() to also check for timeout, further refactor code to skip outputting message when performing autocompletion (Girish Palya). Running `vim *` in `vim/src` was slower than expected when 'autocomplete' was enabled. Include-file and tag-file completion sources were not subject to the timeout check, causing unnecessary delays. So apply the timeout check to these sources as well, improving autocompletion responsiveness, refactor find_pattern_in_path() to take an additional "silent" argument, to suppress any messages. closes: #17966 Signed-off-by: Girish Palya Signed-off-by: Christian Brabandt --- src/ex_docmd.c | 10 +++++----- src/insexpand.c | 14 +++++--------- src/normal.c | 2 +- src/proto/search.pro | 2 +- src/search.c | 12 +++++++----- src/version.c | 2 ++ src/window.c | 3 ++- 7 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/ex_docmd.c b/src/ex_docmd.c index 977329601c..87b390aa87 100644 --- a/src/ex_docmd.c +++ b/src/ex_docmd.c @@ -9432,8 +9432,8 @@ exec_normal(int was_typed, int use_vpeekc, int may_use_terminal_loop UNUSED) ex_checkpath(exarg_T *eap) { find_pattern_in_path(NULL, 0, 0, FALSE, FALSE, CHECK_PATH, 1L, - eap->forceit ? ACTION_SHOW_ALL : ACTION_SHOW, - (linenr_T)1, (linenr_T)MAXLNUM, eap->forceit); + eap->forceit ? ACTION_SHOW_ALL : ACTION_SHOW, + (linenr_T)1, (linenr_T)MAXLNUM, eap->forceit, FALSE); } #if defined(FEAT_QUICKFIX) @@ -9501,9 +9501,9 @@ ex_findpat(exarg_T *eap) } if (!eap->skip) find_pattern_in_path(eap->arg, 0, (int)STRLEN(eap->arg), - whole, !eap->forceit, - *eap->cmd == 'd' ? FIND_DEFINE : FIND_ANY, - n, action, eap->line1, eap->line2, eap->forceit); + whole, !eap->forceit, + *eap->cmd == 'd' ? FIND_DEFINE : FIND_ANY, n, action, + eap->line1, eap->line2, eap->forceit, FALSE); } #endif diff --git a/src/insexpand.c b/src/insexpand.c index 991c9d23d0..bc77b2d02f 100644 --- a/src/insexpand.c +++ b/src/insexpand.c @@ -2042,8 +2042,7 @@ ins_compl_files( leader_len = (int)ins_compl_leader_len(); } - for (i = 0; i < count && !got_int && !compl_interrupted - && !compl_time_slice_expired; i++) + for (i = 0; i < count && !got_int && !ins_compl_interrupted(); i++) { fp = mch_fopen((char *)files[i], "r"); // open dictionary file if (flags != DICT_EXACT && !shortmess(SHM_COMPLETIONSCAN) @@ -2060,7 +2059,7 @@ ins_compl_files( // Read dictionary file line by line. // Check each line for a match. - while (!got_int && !compl_interrupted && !compl_time_slice_expired + while (!got_int && !ins_compl_interrupted() && !vim_fgets(buf, LSIZE, fp)) { ptr = buf; @@ -2297,7 +2296,7 @@ ins_compl_init_get_longest(void) int ins_compl_interrupted(void) { - return compl_interrupted; + return compl_interrupted || compl_time_slice_expired; } /* @@ -3841,10 +3840,7 @@ f_complete_check(typval_T *argvars UNUSED, typval_T *rettv) RedrawingDisabled = 0; ins_compl_check_keys(0, TRUE); - if (compl_autocomplete && compl_time_slice_expired) - rettv->vval.v_number = TRUE; - else - rettv->vval.v_number = ins_compl_interrupted(); + rettv->vval.v_number = ins_compl_interrupted(); RedrawingDisabled = save_RedrawingDisabled; } @@ -4462,7 +4458,7 @@ get_next_include_file_completion(int compl_type) (compl_type == CTRL_X_PATH_DEFINES && !(compl_cont_status & CONT_SOL)) ? FIND_DEFINE : FIND_ANY, 1L, ACTION_EXPAND, - (linenr_T)1, (linenr_T)MAXLNUM, FALSE); + (linenr_T)1, (linenr_T)MAXLNUM, FALSE, compl_autocomplete); } #endif diff --git a/src/normal.c b/src/normal.c index 8758d4b75d..64b338edc8 100644 --- a/src/normal.c +++ b/src/normal.c @@ -4478,7 +4478,7 @@ nv_brackets(cmdarg_T *cap) SAFE_islower(cap->nchar) ? ACTION_SHOW : ACTION_GOTO, cap->cmdchar == ']' ? curwin->w_cursor.lnum + 1 : (linenr_T)1, (linenr_T)MAXLNUM, - FALSE); + FALSE, FALSE); vim_free(ptr); curwin->w_set_curswant = TRUE; } diff --git a/src/proto/search.pro b/src/proto/search.pro index 6f1cf9285c..4fcce1bd30 100644 --- a/src/proto/search.pro +++ b/src/proto/search.pro @@ -33,7 +33,7 @@ int check_linecomment(char_u *line); void showmatch(int c); int current_search(long count, int forward); int linewhite(linenr_T lnum); -void find_pattern_in_path(char_u *ptr, int dir, int len, int whole, int skip_comments, int type, long count, int action, linenr_T start_lnum, linenr_T end_lnum, int forceit); +void find_pattern_in_path(char_u *ptr, int dir, int len, int whole, int skip_comments, int type, long count, int action, linenr_T start_lnum, linenr_T end_lnum, int forceit, int silent); spat_T *get_spat(int idx); int get_spat_last_idx(void); void f_searchcount(typval_T *argvars, typval_T *rettv); diff --git a/src/search.c b/src/search.c index cdd171e954..68f17e25f3 100644 --- a/src/search.c +++ b/src/search.c @@ -3406,7 +3406,8 @@ find_pattern_in_path( int action, // What to do when we find it linenr_T start_lnum, // first line to start searching linenr_T end_lnum, // last line for searching - int forceit) // If true, always switch to the found path + int forceit, // If true, always switch to the found path + int silent) // Do not print messages when ACTION_EXPAND { SearchedFile *files; // Stack of included files SearchedFile *bigger; // When we need more space @@ -3680,7 +3681,8 @@ find_pattern_in_path( files[depth].name = curr_fname = new_fname; files[depth].lnum = 0; files[depth].matched = FALSE; - if (action == ACTION_EXPAND) + if (action == ACTION_EXPAND + && !shortmess(SHM_COMPLETIONSCAN) && !silent) { msg_hist_off = TRUE; // reset in msg_trunc_attr() vim_snprintf((char*)IObuff, IOSIZE, @@ -3909,8 +3911,8 @@ search_line: else if (action == ACTION_SHOW) { show_pat_in_path(line, type, did_show, action, - (depth == -1) ? NULL : files[depth].fp, - (depth == -1) ? &lnum : &files[depth].lnum, 1L); + (depth == -1) ? NULL : files[depth].fp, + (depth == -1) ? &lnum : &files[depth].lnum, 1L); did_show = TRUE; } else @@ -4060,7 +4062,7 @@ exit_matched: msg(_("No included files")); } } - else if (!found && action != ACTION_EXPAND) + else if (!found && action != ACTION_EXPAND && !silent) { if (got_int || ins_compl_interrupted()) emsg(_(e_interrupted)); diff --git a/src/version.c b/src/version.c index 22db56962e..d68e63d119 100644 --- a/src/version.c +++ b/src/version.c @@ -719,6 +719,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1625, /**/ 1624, /**/ diff --git a/src/window.c b/src/window.c index 484fdf8e10..85971164a2 100644 --- a/src/window.c +++ b/src/window.c @@ -698,7 +698,8 @@ wingotofile: find_pattern_in_path(ptr, 0, len, TRUE, Prenum == 0 ? TRUE : FALSE, type, - Prenum1, ACTION_SPLIT, (linenr_T)1, (linenr_T)MAXLNUM, FALSE); + Prenum1, ACTION_SPLIT, (linenr_T)1, (linenr_T)MAXLNUM, + FALSE, FALSE); vim_free(ptr); curwin->w_set_curswant = TRUE; break; From 5ba6e41d37ec29fc8360a4f06d4e95c63eb013ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anttoni=20Erkkil=C3=A4?= Date: Tue, 12 Aug 2025 21:59:06 +0200 Subject: [PATCH 05/11] patch 9.1.1626: cindent: does not handle compound literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: C-indent does not handle compound literals (@44100hertz, @Jorenar) Solution: Detect and handle compound literal and structure initialization (Anttoni Erkkilä) match '=' or "return" optionally followed by &, (typecast), { Fixes also initialization which begins with multiple opening braces. fixes: #2090 fixes: #12491 closes: #17865 Signed-off-by: Anttoni Erkkilä Signed-off-by: Christian Brabandt --- runtime/doc/version9.txt | 3 +- src/cindent.c | 102 +++++++++++++++++++++++++++++------- src/testdir/test_indent.vim | 25 +++++++++ src/version.c | 2 + 4 files changed, 111 insertions(+), 21 deletions(-) diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt index 1052e7e515..fb278f0ab0 100644 --- a/runtime/doc/version9.txt +++ b/runtime/doc/version9.txt @@ -1,4 +1,4 @@ -*version9.txt* For Vim version 9.1. Last change: 2025 Aug 08 +*version9.txt* For Vim version 9.1. Last change: 2025 Aug 12 VIM REFERENCE MANUAL by Bram Moolenaar @@ -41736,6 +41736,7 @@ Others: ~ - |gv| works in operator pending mode and does not abort - The close button shown in the non-GUI 'tabline' will only be visible if the 'mouse' option contains either "a" or any of the flags "n", "v", or "i". +- |C-indenting| handles compound literals. *added-9.2* Added ~ diff --git a/src/cindent.c b/src/cindent.c index e1f7446768..3dea1079ee 100644 --- a/src/cindent.c +++ b/src/cindent.c @@ -690,10 +690,9 @@ cin_islabel(void) // XXX /* * Return TRUE if string "s" ends with the string "find", possibly followed by * white space and comments. Skip strings and comments. - * Ignore "ignore" after "find" if it's not NULL. */ static int -cin_ends_in(char_u *s, char_u *find, char_u *ignore) +cin_ends_in(char_u *s, char_u *find) { char_u *p = s; char_u *r; @@ -705,8 +704,6 @@ cin_ends_in(char_u *s, char_u *find, char_u *ignore) if (STRNCMP(p, find, len) == 0) { r = skipwhite(p + len); - if (ignore != NULL && STRNCMP(r, ignore, STRLEN(ignore)) == 0) - r = skipwhite(r + STRLEN(ignore)); if (cin_nocode(r)) return TRUE; } @@ -717,9 +714,77 @@ cin_ends_in(char_u *s, char_u *find, char_u *ignore) } /* - * Recognize structure initialization and enumerations: + * Strings can be concatenated with comments between: + * "string0" |*comment*| "string1" + */ + static char_u * +cin_skip_comment_and_string(char_u *s) +{ + char_u *r = NULL, *p = s; + do + { + r = p; + p = cin_skipcomment(p); + if (*p) + p = skip_string(p); + } while (p != r); + return p; +} + +/* + * Recognize structure or compound literal initialization: + * =|return [&][(typecast)] [{] + * The number of opening braces is arbitrary. + */ + static int +cin_is_compound_init(char_u *s) +{ + char_u *p = s, *r = NULL; + + while (*p) + { + if (*p == '=') + p = r = cin_skipcomment(p + 1); + else if (!STRNCMP(p, "return", 6) && !vim_isIDc(p[6]) + && (p == s || (p > s && !vim_isIDc(p[-1])))) + p = r = cin_skipcomment(p + 6); + else + p = cin_skip_comment_and_string(p + 1); + } + if (!r) + return FALSE; + p = r; // p points now after '=' or "return" + + if (cin_nocode(p)) + return TRUE; + + if (*p == '&') + p = cin_skipcomment(p + 1); + + if (*p == '(') // skip a typecast + { + int open_count = 1; + do + { + p = cin_skip_comment_and_string(p + 1); + if (cin_nocode(p)) + return TRUE; + open_count += (*p == '(') - (*p == ')'); + } while (open_count); + p = cin_skipcomment(p + 1); + if (cin_nocode(p)) + return TRUE; + } + + while (*p == '{') + p = cin_skipcomment(p + 1); + return cin_nocode(p); +} + +/* + * Recognize enumerations: * "[typedef] [static|public|protected|private] enum" - * "[typedef] [static|public|protected|private] = {" + * Call another function to recognize structure initialization. */ static int cin_isinit(void) @@ -753,10 +818,7 @@ cin_isinit(void) if (cin_starts_with(s, "enum")) return TRUE; - if (cin_ends_in(s, (char_u *)"=", (char_u *)"{")) - return TRUE; - - return FALSE; + return cin_is_compound_init(s); } // Maximum number of lines to search back for a "namespace" line. @@ -1634,7 +1696,7 @@ get_baseclass_amount(int col) if (find_last_paren(ml_get_curline(), '(', ')') && (trypos = find_match_paren(curbuf->b_ind_maxparen)) != NULL) amount = get_indent_lnum(trypos->lnum); // XXX - if (!cin_ends_in(ml_get_curline(), (char_u *)",", NULL)) + if (!cin_ends_in(ml_get_curline(), (char_u *)",")) amount += curbuf->b_ind_cpp_baseclass; } else @@ -2505,7 +2567,7 @@ get_c_indent(void) cur_amount = MAXCOL; l = ml_get(our_paren_pos.lnum); if (curbuf->b_ind_unclosed_wrapped - && cin_ends_in(l, (char_u *)"(", NULL)) + && cin_ends_in(l, (char_u *)"(")) { // look for opening unmatched paren, indent one level // for each additional level @@ -3702,8 +3764,8 @@ term_again: && !cin_nocode(theline) && vim_strchr(theline, '{') == NULL && vim_strchr(theline, '}') == NULL - && !cin_ends_in(theline, (char_u *)":", NULL) - && !cin_ends_in(theline, (char_u *)",", NULL) + && !cin_ends_in(theline, (char_u *)":") + && !cin_ends_in(theline, (char_u *)",") && cin_isfuncdecl(NULL, cur_curpos.lnum + 1, cur_curpos.lnum + 1) && !cin_isterminated(theline, FALSE, TRUE)) @@ -3764,7 +3826,7 @@ term_again: // } foo, // bar; n = 0; - if (cin_ends_in(l, (char_u *)",", NULL) + if (cin_ends_in(l, (char_u *)",") || (*l != NUL && (n = l[STRLEN(l) - 1]) == '\\')) { // take us back to opening paren @@ -3812,14 +3874,14 @@ term_again: // comments) align at column 0. For example: // char *string_array[] = { "foo", // / * x * / "b};ar" }; / * foobar * / - if (cin_ends_in(l, (char_u *)"};", NULL)) + if (cin_ends_in(l, (char_u *)"};")) break; // If the previous line ends on '[' we are probably in an // array constant: // something = [ // 234, <- extra indent - if (cin_ends_in(l, (char_u *)"[", NULL)) + if (cin_ends_in(l, (char_u *)"[")) { amount = get_indent() + ind_continuation; break; @@ -3840,7 +3902,7 @@ term_again: break; } if (curwin->w_cursor.lnum > 0 - && cin_ends_in(look, (char_u *)"}", NULL)) + && cin_ends_in(look, (char_u *)"}")) break; curwin->w_cursor = curpos_save; @@ -3860,10 +3922,10 @@ term_again: // int foo, // bar; // indent_to_0 here; - if (cin_ends_in(l, (char_u *)";", NULL)) + if (cin_ends_in(l, (char_u *)";")) { l = ml_get(curwin->w_cursor.lnum - 1); - if (cin_ends_in(l, (char_u *)",", NULL) + if (cin_ends_in(l, (char_u *)",") || (*l != NUL && l[STRLEN(l) - 1] == '\\')) break; l = ml_get_curline(); diff --git a/src/testdir/test_indent.vim b/src/testdir/test_indent.vim index e20694792c..52d7be1390 100644 --- a/src/testdir/test_indent.vim +++ b/src/testdir/test_indent.vim @@ -119,6 +119,31 @@ func Test_userlabel_indent() close! endfunc +" Test that struct members are aligned +func Test_struct_indent() + new + call setline(1, ['struct a a = {', '1,', '1,']) + normal gg=G + call assert_equal(getline(2), getline(3)) + + call setline(1, 'a = (struct a) {') + normal gg=G + call assert_equal(getline(2), getline(3)) + + call setline(1, 'void *ptr = &(static struct a) {{') + normal gg=G + call assert_equal(getline(2), getline(3)) + + call setline(1, 'a = (macro(arg1, "str)))")) {') + normal gg=G + call assert_equal(getline(2), getline(3)) + + call setline(1, 'return (struct a) {') + normal gg=G + call assert_equal(getline(2), getline(3)) + close! +endfunc + " Test for 'copyindent' func Test_copyindent() new diff --git a/src/version.c b/src/version.c index d68e63d119..071eca3f04 100644 --- a/src/version.c +++ b/src/version.c @@ -719,6 +719,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1626, /**/ 1625, /**/ From 7e0df5eee9eab872261fd5eb0068cec967a2ba77 Mon Sep 17 00:00:00 2001 From: Girish Palya Date: Tue, 12 Aug 2025 22:22:52 +0200 Subject: [PATCH 06/11] patch 9.1.1627: fuzzy matching can be improved Problem: fuzzy-matching can be improved Solution: Implement a better fuzzy matching algorithm (Girish Palya) Replace fuzzy matching algorithm with improved fzy-based implementation The [current](https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/) fuzzy matching algorithm has several accuracy issues: * It struggles with CamelCase * It fails to prioritize matches at the beginning of strings, often ranking middle matches higher. After evaluating alternatives (see my comments [here](https://github.com/vim/vim/issues/17531#issuecomment-3112046897) and [here](https://github.com/vim/vim/issues/17531#issuecomment-3121593900)), I chose to adopt the [fzy](https://github.com/jhawthorn/fzy) algorithm, which: * Resolves the aforementioned issues. * Performs better. Implementation details This version is based on the original fzy [algorithm](https://github.com/jhawthorn/fzy/blob/master/src/match.c), with one key enhancement: **multibyte character support**. * The original implementation supports only ASCII. * This patch replaces ascii lookup tables with function calls, making it compatible with multibyte character sets. * Core logic (`match_row()` and `match_positions()`) remains faithful to the original, but now operates on codepoints rather than single-byte characters. Performance Tested against a dataset of **90,000 Linux kernel filenames**. Results (in milliseconds) show a **\~2x performance improvement** over the current fuzzy matching algorithm. ``` Search String Current Algo FZY Algo ------------------------------------------------- init 131.759 66.916 main 83.688 40.861 sig 98.348 39.699 index 109.222 30.738 ab 72.222 44.357 cd 83.036 54.739 a 58.94 62.242 b 43.612 43.442 c 64.39 67.442 k 40.585 36.371 z 34.708 22.781 w 38.033 30.109 cpa 82.596 38.116 arz 84.251 23.964 zzzz 35.823 22.75 dimag 110.686 29.646 xa 43.188 29.199 nha 73.953 31.001 nedax 94.775 29.568 dbue 79.846 25.902 fp 46.826 31.641 tr 90.951 55.883 kw 38.875 23.194 rp 101.575 55.775 kkkkkkkkkkkkkkkkkkkkkkkkkkkkk 48.519 30.921 ``` ```vim vim9script var haystack = readfile('/Users/gp/linux.files') var needles = ['init', 'main', 'sig', 'index', 'ab', 'cd', 'a', 'b', 'c', 'k', 'z', 'w', 'cpa', 'arz', 'zzzz', 'dimag', 'xa', 'nha', 'nedax', 'dbue', 'fp', 'tr', 'kw', 'rp', 'kkkkkkkkkkkkkkkkkkkkkkkkkkkkk'] for needle in needles var start = reltime() var tmp = matchfuzzy(haystack, needle) echom $'{needle}' (start->reltime()->reltimefloat() * 1000) endfor ``` Additional changes * Removed the "camelcase" option from both matchfuzzy() and matchfuzzypos(), as it's now obsolete with the improved algorithm. related: neovim/neovim#34101 fixes #17531 closes: #17900 Signed-off-by: Girish Palya Signed-off-by: Christian Brabandt --- Filelist | 2 + runtime/doc/builtin.txt | 5 +- runtime/doc/pattern.txt | 5 +- runtime/doc/version9.txt | 2 + src/Make_ami.mak | 1 + src/Make_cyg_ming.mak | 1 + src/Make_mvc.mak | 4 + src/Make_vms.mms | 6 + src/Makefile | 11 + src/README.md | 1 + src/buffer.c | 6 +- src/cmdexpand.c | 4 +- src/fuzzy.c | 1141 +++++++++++++++++++++++++++++ src/insexpand.c | 34 +- src/map.c | 4 +- src/option.c | 2 +- src/proto.h | 1 + src/proto/fuzzy.pro | 11 + src/proto/search.pro | 9 - src/quickfix.c | 8 +- src/search.c | 1198 ------------------------------- src/testdir/test_cmdline.vim | 6 +- src/testdir/test_matchfuzzy.vim | 131 ++-- src/version.c | 2 + src/vim.h | 5 +- 25 files changed, 1289 insertions(+), 1311 deletions(-) create mode 100644 src/fuzzy.c create mode 100644 src/proto/fuzzy.pro diff --git a/Filelist b/Filelist index b52506d756..413a88b65c 100644 --- a/Filelist +++ b/Filelist @@ -82,6 +82,7 @@ SRC_ALL = \ src/findfile.c \ src/float.c \ src/fold.c \ + src/fuzzy.c \ src/getchar.c \ src/gc.c \ src/globals.h \ @@ -291,6 +292,7 @@ SRC_ALL = \ src/proto/findfile.pro \ src/proto/float.pro \ src/proto/fold.pro \ + src/proto/fuzzy.pro \ src/proto/getchar.pro \ src/proto/gc.pro \ src/proto/gui.pro \ diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index 9e2205af8e..752d0515d8 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -1,4 +1,4 @@ -*builtin.txt* For Vim version 9.1. Last change: 2025 Aug 10 +*builtin.txt* For Vim version 9.1. Last change: 2025 Aug 12 VIM REFERENCE MANUAL by Bram Moolenaar @@ -7421,9 +7421,6 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* given sequence. limit Maximum number of matches in {list} to be returned. Zero means no limit. - camelcase Use enhanced camel case scoring making results - better suited for completion related to - programming languages. Defaults to v:true. If {list} is a list of dictionaries, then the optional {dict} argument supports the following additional items: diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index 8da707f1b2..02668a0473 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -1,4 +1,4 @@ -*pattern.txt* For Vim version 9.1. Last change: 2025 Aug 06 +*pattern.txt* For Vim version 9.1. Last change: 2025 Aug 12 VIM REFERENCE MANUAL by Bram Moolenaar @@ -1509,6 +1509,9 @@ characters in the search string. If the search string has multiple words, then each word is matched separately. So the words in the search string can be present in any order in a string. +Vim uses the same improved algorithm as the fzy project: +https://github.com/jhawthorn/fzy + Fuzzy matching assigns a score for each matched string based on the following criteria: - The number of sequentially matching characters. diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt index fb278f0ab0..281ae2d513 100644 --- a/runtime/doc/version9.txt +++ b/runtime/doc/version9.txt @@ -41723,6 +41723,8 @@ Functions: ~ - Add the optional {opts} |Dict| argument to |getchar()| to control: cursor behaviour, return type and whether or not to simplify the returned key - |chdir()| allows to optionally specify a scope argument +- |matchfuzzy()| and |matchfuzzypos()| use an improved fuzzy matching + algorithm (same as fzy). Others: ~ - the regex engines match correctly case-insensitive multi-byte characters diff --git a/src/Make_ami.mak b/src/Make_ami.mak index 7c96467357..839c6faa49 100644 --- a/src/Make_ami.mak +++ b/src/Make_ami.mak @@ -113,6 +113,7 @@ SRC += \ findfile.c \ float.c \ fold.c \ + fuzzy.c \ getchar.c \ gc.c \ hardcopy.c \ diff --git a/src/Make_cyg_ming.mak b/src/Make_cyg_ming.mak index 6ae1c0a054..3ffa8600d7 100644 --- a/src/Make_cyg_ming.mak +++ b/src/Make_cyg_ming.mak @@ -823,6 +823,7 @@ OBJ = \ $(OUTDIR)/findfile.o \ $(OUTDIR)/float.o \ $(OUTDIR)/fold.o \ + $(OUTDIR)/fuzzy.o \ $(OUTDIR)/getchar.o \ $(OUTDIR)/gc.o \ $(OUTDIR)/gui_xim.o \ diff --git a/src/Make_mvc.mak b/src/Make_mvc.mak index f4899d5661..383344d46d 100644 --- a/src/Make_mvc.mak +++ b/src/Make_mvc.mak @@ -732,6 +732,7 @@ OBJ = \ $(OUTDIR)\findfile.obj \ $(OUTDIR)\float.obj \ $(OUTDIR)\fold.obj \ + $(OUTDIR)\fuzzy.obj \ $(OUTDIR)\getchar.obj \ $(OUTDIR)\gc.obj \ $(OUTDIR)\gui_xim.obj \ @@ -1616,6 +1617,8 @@ $(OUTDIR)/float.obj: $(OUTDIR) float.c $(INCL) $(OUTDIR)/fold.obj: $(OUTDIR) fold.c $(INCL) +$(OUTDIR)/fuzzy.obj: $(OUTDIR) fuzzy.c $(INCL) + $(OUTDIR)/getchar.obj: $(OUTDIR) getchar.c $(INCL) $(OUTDIR)/gc.obj: $(OUTDIR) gc.c $(INCL) @@ -1961,6 +1964,7 @@ proto.h: \ proto/filepath.pro \ proto/findfile.pro \ proto/float.pro \ + proto/fuzzy.pro \ proto/getchar.pro \ proto/gc.pro \ proto/gui_xim.pro \ diff --git a/src/Make_vms.mms b/src/Make_vms.mms index 8eb2455a88..d452291af0 100644 --- a/src/Make_vms.mms +++ b/src/Make_vms.mms @@ -529,6 +529,7 @@ SRC = \ findfile.c \ float.c \ fold.c \ + fuzzy.c \ getchar.c \ gc.c \ gui_xim.c \ @@ -665,6 +666,7 @@ OBJ = \ [.$(DEST)]findfile.obj \ [.$(DEST)]float.obj \ [.$(DEST)]fold.obj \ + [.$(DEST)]fuzzy.obj \ [.$(DEST)]getchar.obj \ [.$(DEST)]gc.obj \ [.$(DEST)]gui_xim.obj \ @@ -1141,6 +1143,10 @@ lua_env : [.$(DEST)]fold.obj : fold.c vim.h [.$(DEST)]config.h feature.h os_unix.h \ ascii.h keymap.h termdefs.h macros.h structs.h regexp.h gui.h beval.h \ [.proto]gui_beval.pro option.h ex_cmds.h proto.h errors.h globals.h +[.$(DEST)]fuzzy.obj : fuzzy.c vim.h [.$(DEST)]config.h feature.h os_unix.h \ + ascii.h keymap.h termdefs.h macros.h structs.h regexp.h \ + gui.h beval.h [.proto]gui_beval.pro option.h ex_cmds.h proto.h \ + errors.h globals.h [.$(DEST)]getchar.obj : getchar.c vim.h [.$(DEST)]config.h feature.h os_unix.h \ ascii.h keymap.h termdefs.h macros.h structs.h regexp.h \ gui.h beval.h [.proto]gui_beval.pro option.h ex_cmds.h proto.h \ diff --git a/src/Makefile b/src/Makefile index e3a8600422..32c0d97d18 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1523,6 +1523,7 @@ BASIC_SRC = \ findfile.c \ float.c \ fold.c \ + fuzzy.c \ getchar.c \ gc.c \ gui_xim.c \ @@ -1701,6 +1702,7 @@ OBJ_COMMON = \ objects/findfile.o \ objects/float.o \ objects/fold.o \ + objects/fuzzy.o \ objects/getchar.o \ objects/gc.o \ objects/gui_xim.o \ @@ -1886,6 +1888,7 @@ PRO_AUTO = \ findfile.pro \ float.pro \ fold.pro \ + fuzzy.pro \ getchar.pro \ gc.pro \ gui_xim.pro \ @@ -3309,6 +3312,9 @@ objects/float.o: float.c objects/fold.o: fold.c $(CCC) -o $@ fold.c +objects/fuzzy.o: fuzzy.c + $(CCC) -o $@ fuzzy.c + objects/getchar.o: getchar.c $(CCC) -o $@ getchar.c @@ -3988,6 +3994,11 @@ objects/fold.o: fold.c vim.h protodef.h auto/config.h feature.h os_unix.h \ proto/gui_beval.pro structs.h regexp.h gui.h libvterm/include/vterm.h \ libvterm/include/vterm_keycodes.h alloc.h ex_cmds.h spell.h proto.h \ globals.h errors.h +objects/fuzzy.o: fuzzy.c vim.h protodef.h auto/config.h feature.h os_unix.h \ + auto/osdef.h ascii.h keymap.h termdefs.h macros.h option.h beval.h \ + proto/gui_beval.pro structs.h regexp.h gui.h libvterm/include/vterm.h \ + libvterm/include/vterm_keycodes.h alloc.h ex_cmds.h spell.h proto.h \ + globals.h errors.h objects/getchar.o: getchar.c vim.h protodef.h auto/config.h feature.h os_unix.h \ auto/osdef.h ascii.h keymap.h termdefs.h macros.h option.h beval.h \ proto/gui_beval.pro structs.h regexp.h gui.h libvterm/include/vterm.h \ diff --git a/src/README.md b/src/README.md index a01f695d93..6c67199345 100644 --- a/src/README.md +++ b/src/README.md @@ -48,6 +48,7 @@ fileio.c | reading and writing files filepath.c | dealing with file names and paths findfile.c | search for files in 'path' fold.c | folding +fuzzy.c | fuzzy matching getchar.c | getting characters and key mapping gc.c | garbage collection help.c | vim help related functions diff --git a/src/buffer.c b/src/buffer.c index 5c8cd259b2..1ca561fce2 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -2945,12 +2945,14 @@ ExpandBufnames( { p = NULL; // first try matching with the short file name - if ((score = fuzzy_match_str(buf->b_sfname, pat)) != 0) + if ((score = fuzzy_match_str(buf->b_sfname, pat)) + != FUZZY_SCORE_NONE) p = buf->b_sfname; if (p == NULL) { // next try matching with the full path file name - if ((score = fuzzy_match_str(buf->b_ffname, pat)) != 0) + if ((score = fuzzy_match_str(buf->b_ffname, pat)) + != FUZZY_SCORE_NONE) p = buf->b_ffname; } } diff --git a/src/cmdexpand.c b/src/cmdexpand.c index 4ef1879117..dc65ae8eee 100644 --- a/src/cmdexpand.c +++ b/src/cmdexpand.c @@ -3612,7 +3612,7 @@ ExpandGenericExt( else { score = fuzzy_match_str(str, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } } else @@ -4022,7 +4022,7 @@ ExpandUserDefined( else { score = fuzzy_match_str(s, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } } else diff --git a/src/fuzzy.c b/src/fuzzy.c new file mode 100644 index 0000000000..6ebfa5b217 --- /dev/null +++ b/src/fuzzy.c @@ -0,0 +1,1141 @@ +/* vi:set ts=8 sts=4 sw=4 noet: + * + * VIM - Vi IMproved by Bram Moolenaar + * + * Do ":help uganda" in Vim to read copying and usage conditions. + * Do ":help credits" in Vim to see a list of people who contributed. + * See README.txt for an overview of the Vim source code. + * + * fuzzy.c: fuzzy matching algorithm and related functions + * + * Portions of this file are adapted from fzy (https://github.com/jhawthorn/fzy) + * Original code: + * Copyright (c) 2014 John Hawthorn + * Licensed under the MIT License. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "vim.h" + +#if defined(FEAT_EVAL) || defined(FEAT_PROTO) +static int fuzzy_match_item_compare(const void *s1, const void *s2); +static void fuzzy_match_in_list(list_T *l, char_u *str, int matchseq, char_u *key, callback_T *item_cb, int retmatchpos, list_T *fmatchlist, long max_matches); +static void do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos); +#endif +static int fuzzy_match_str_compare(const void *s1, const void *s2); +static void fuzzy_match_str_sort(fuzmatch_str_T *fm, int sz); +static int fuzzy_match_func_compare(const void *s1, const void *s2); +static void fuzzy_match_func_sort(fuzmatch_str_T *fm, int sz); + +typedef double score_t; +static score_t match_positions(char_u *needle, char_u *haystack, int_u *positions); +static int has_match(char_u *needle, char_u *haystack); + +#define SCORE_MAX INFINITY +#define SCORE_MIN -INFINITY +#define SCORE_SCALE 1000 + +typedef struct +{ + int idx; // used for stable sort + listitem_T *item; + int score; + list_T *lmatchpos; + char_u *pat; + char_u *itemstr; + int itemstr_allocated; + int startpos; +} fuzzyItem_T; + +/* + * fuzzy_match() + * + * Returns TRUE if "pat_arg" matches "str". Also returns the match score in + * "outScore" and the matching character positions in "matches". + */ + int +fuzzy_match( + char_u *str, + char_u *pat_arg, + int matchseq, + int *outScore, + int_u *matches, + int maxMatches) +{ + char_u *save_pat; + char_u *pat; + char_u *p; + int complete = FALSE; + int score = 0; + int numMatches = 0; + score_t fzy_score; + + *outScore = 0; + + save_pat = vim_strsave(pat_arg); + if (save_pat == NULL) + return FALSE; + pat = save_pat; + p = pat; + + // Try matching each word in 'pat_arg' in 'str' + while (TRUE) + { + if (matchseq) + complete = TRUE; + else + { + // Extract one word from the pattern (separated by space) + p = skipwhite(p); + if (*p == NUL) + break; + pat = p; + while (*p != NUL && !VIM_ISWHITE(PTR2CHAR(p))) + { + if (has_mbyte) + MB_PTR_ADV(p); + else + ++p; + } + if (*p == NUL) // processed all the words + complete = TRUE; + *p = NUL; + } + + score = FUZZY_SCORE_NONE; + if (has_match(pat, str)) + { + fzy_score = match_positions(pat, str, matches + numMatches); + score = (fzy_score == SCORE_MIN) ? INT_MIN + 1 + : (fzy_score == SCORE_MAX) ? INT_MAX + : (fzy_score < 0) ? (int)ceil(fzy_score * SCORE_SCALE - 0.5) + : (int)floor(fzy_score * SCORE_SCALE + 0.5); + } + + if (score == FUZZY_SCORE_NONE) + { + numMatches = 0; + *outScore = FUZZY_SCORE_NONE; + break; + } + + if (score > 0 && *outScore > INT_MAX - score) + *outScore = INT_MAX; + else if (score < 0 && *outScore < INT_MIN + 1 - score) + *outScore = INT_MIN + 1; + else + *outScore += score; + + numMatches += MB_CHARLEN(pat); + + if (complete || numMatches >= maxMatches) + break; + + // try matching the next word + ++p; + } + + vim_free(save_pat); + return numMatches != 0; +} + +#if defined(FEAT_EVAL) || defined(FEAT_PROTO) +/* + * Sort the fuzzy matches in the descending order of the match score. + * For items with same score, retain the order using the index (stable sort) + */ + static int +fuzzy_match_item_compare(const void *s1, const void *s2) +{ + int v1 = ((fuzzyItem_T *)s1)->score; + int v2 = ((fuzzyItem_T *)s2)->score; + + if (v1 == v2) + { + int exact_match1 = FALSE, exact_match2 = FALSE; + char_u *pat = ((fuzzyItem_T *)s1)->pat; + int patlen = (int)STRLEN(pat); + int startpos = ((fuzzyItem_T *)s1)->startpos; + exact_match1 = (startpos >= 0) && STRNCMP(pat, + ((fuzzyItem_T *)s1)->itemstr + startpos, patlen) == 0; + startpos = ((fuzzyItem_T *)s2)->startpos; + exact_match2 = (startpos >= 0) && STRNCMP(pat, + ((fuzzyItem_T *)s2)->itemstr + startpos, patlen) == 0; + + if (exact_match1 == exact_match2) + { + int idx1 = ((fuzzyItem_T *)s1)->idx; + int idx2 = ((fuzzyItem_T *)s2)->idx; + return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; + } + else if (exact_match2) + return 1; + return -1; + } + else + return v1 > v2 ? -1 : 1; +} + +/* + * Fuzzy search the string 'str' in a list of 'items' and return the matching + * strings in 'fmatchlist'. + * If 'matchseq' is TRUE, then for multi-word search strings, match all the + * words in sequence. + * If 'items' is a list of strings, then search for 'str' in the list. + * If 'items' is a list of dicts, then either use 'key' to lookup the string + * for each item or use 'item_cb' Funcref function to get the string. + * If 'retmatchpos' is TRUE, then return a list of positions where 'str' + * matches for each item. + */ + static void +fuzzy_match_in_list( + list_T *l, + char_u *str, + int matchseq, + char_u *key, + callback_T *item_cb, + int retmatchpos, + list_T *fmatchlist, + long max_matches) +{ + long len; + fuzzyItem_T *items; + listitem_T *li; + int i = 0; + long match_count = 0; + int_u matches[FUZZY_MATCH_MAX_LEN]; + + len = list_len(l); + if (len == 0) + return; + if (max_matches > 0 && len > max_matches) + len = max_matches; + + items = ALLOC_CLEAR_MULT(fuzzyItem_T, len); + if (items == NULL) + return; + + // For all the string items in items, get the fuzzy matching score + FOR_ALL_LIST_ITEMS(l, li) + { + int score; + char_u *itemstr; + typval_T rettv; + int itemstr_allocate = FALSE; + + if (max_matches > 0 && match_count >= max_matches) + break; + + itemstr = NULL; + rettv.v_type = VAR_UNKNOWN; + if (li->li_tv.v_type == VAR_STRING) // list of strings + itemstr = li->li_tv.vval.v_string; + else if (li->li_tv.v_type == VAR_DICT + && (key != NULL || item_cb->cb_name != NULL)) + { + // For a dict, either use the specified key to lookup the string or + // use the specified callback function to get the string. + if (key != NULL) + itemstr = dict_get_string(li->li_tv.vval.v_dict, + (char *)key, FALSE); + else + { + typval_T argv[2]; + + // Invoke the supplied callback (if any) to get the dict item + li->li_tv.vval.v_dict->dv_refcount++; + argv[0].v_type = VAR_DICT; + argv[0].vval.v_dict = li->li_tv.vval.v_dict; + argv[1].v_type = VAR_UNKNOWN; + if (call_callback(item_cb, -1, &rettv, 1, argv) != FAIL) + { + if (rettv.v_type == VAR_STRING) + { + itemstr = rettv.vval.v_string; + itemstr_allocate = TRUE; + } + } + dict_unref(li->li_tv.vval.v_dict); + } + } + + if (itemstr != NULL + && fuzzy_match(itemstr, str, matchseq, &score, matches, + FUZZY_MATCH_MAX_LEN)) + { + items[match_count].idx = match_count; + items[match_count].item = li; + items[match_count].score = score; + items[match_count].pat = str; + items[match_count].startpos = matches[0]; + if (itemstr_allocate) + items[match_count].itemstr = vim_strsave(itemstr); + else + items[match_count].itemstr = itemstr; + items[match_count].itemstr_allocated = itemstr_allocate; + + // Copy the list of matching positions in itemstr to a list + { + int j = 0; + char_u *p; + + items[match_count].lmatchpos = list_alloc(); + if (items[match_count].lmatchpos == NULL) + goto done; + + p = str; + while (*p != NUL && j < FUZZY_MATCH_MAX_LEN) + { + if (!VIM_ISWHITE(PTR2CHAR(p)) || matchseq) + { + if (list_append_number(items[match_count].lmatchpos, + matches[j]) == FAIL) + goto done; + j++; + } + if (has_mbyte) + MB_PTR_ADV(p); + else + ++p; + } + } + ++match_count; + } + clear_tv(&rettv); + } + + if (match_count > 0) + { + list_T *retlist; + + // Sort the list by the descending order of the match score + qsort((void *)items, (size_t)match_count, sizeof(fuzzyItem_T), + fuzzy_match_item_compare); + + // For matchfuzzy(), return a list of matched strings. + // ['str1', 'str2', 'str3'] + // For matchfuzzypos(), return a list with three items. + // The first item is a list of matched strings. The second item + // is a list of lists where each list item is a list of matched + // character positions. The third item is a list of matching scores. + // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] + if (retmatchpos) + { + li = list_find(fmatchlist, 0); + if (li == NULL || li->li_tv.vval.v_list == NULL) + goto done; + retlist = li->li_tv.vval.v_list; + } + else + retlist = fmatchlist; + + // Copy the matching strings to the return list + for (i = 0; i < match_count; i++) + list_append_tv(retlist, &items[i].item->li_tv); + + // next copy the list of matching positions + if (retmatchpos) + { + li = list_find(fmatchlist, -2); + if (li == NULL || li->li_tv.vval.v_list == NULL) + goto done; + retlist = li->li_tv.vval.v_list; + + for (i = 0; i < match_count; i++) + if (items[i].lmatchpos != NULL + && list_append_list(retlist, items[i].lmatchpos) == FAIL) + goto done; + + // copy the matching scores + li = list_find(fmatchlist, -1); + if (li == NULL || li->li_tv.vval.v_list == NULL) + goto done; + retlist = li->li_tv.vval.v_list; + for (i = 0; i < match_count; i++) + if (list_append_number(retlist, items[i].score) == FAIL) + goto done; + } + } + +done: + for (i = 0; i < match_count; i++) + if (items[i].itemstr_allocated) + vim_free(items[i].itemstr); + vim_free(items); +} + +/* + * Do fuzzy matching. Returns the list of matched strings in 'rettv'. + * If 'retmatchpos' is TRUE, also returns the matching character positions. + */ + static void +do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos) +{ + callback_T cb; + char_u *key = NULL; + int ret; + int matchseq = FALSE; + long max_matches = 0; + + if (in_vim9script() + && (check_for_list_arg(argvars, 0) == FAIL + || check_for_string_arg(argvars, 1) == FAIL + || check_for_opt_dict_arg(argvars, 2) == FAIL)) + return; + + CLEAR_POINTER(&cb); + + // validate and get the arguments + if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) + { + semsg(_(e_argument_of_str_must_be_list), + retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); + return; + } + if (argvars[1].v_type != VAR_STRING + || argvars[1].vval.v_string == NULL) + { + semsg(_(e_invalid_argument_str), tv_get_string(&argvars[1])); + return; + } + + if (argvars[2].v_type != VAR_UNKNOWN) + { + dict_T *d; + dictitem_T *di; + + if (check_for_nonnull_dict_arg(argvars, 2) == FAIL) + return; + + // To search a dict, either a callback function or a key can be + // specified. + d = argvars[2].vval.v_dict; + if ((di = dict_find(d, (char_u *)"key", -1)) != NULL) + { + if (di->di_tv.v_type != VAR_STRING + || di->di_tv.vval.v_string == NULL + || *di->di_tv.vval.v_string == NUL) + { + semsg(_(e_invalid_value_for_argument_str_str), "key", + tv_get_string(&di->di_tv)); + return; + } + key = tv_get_string(&di->di_tv); + } + else if ((di = dict_find(d, (char_u *)"text_cb", -1)) != NULL) + { + cb = get_callback(&di->di_tv); + if (cb.cb_name == NULL) + { + semsg(_(e_invalid_value_for_argument_str), "text_cb"); + return; + } + } + + if ((di = dict_find(d, (char_u *)"limit", -1)) != NULL) + { + if (di->di_tv.v_type != VAR_NUMBER) + { + semsg(_(e_invalid_value_for_argument_str), "limit"); + return; + } + max_matches = (long)tv_get_number_chk(&di->di_tv, NULL); + } + + if (dict_has_key(d, "matchseq")) + matchseq = TRUE; + } + + // get the fuzzy matches + ret = rettv_list_alloc(rettv); + if (ret == FAIL) + goto done; + if (retmatchpos) + { + list_T *l; + + // For matchfuzzypos(), a list with three items are returned. First + // item is a list of matching strings, the second item is a list of + // lists with matching positions within each string and the third item + // is the list of scores of the matches. + l = list_alloc(); + if (l == NULL) + goto done; + if (list_append_list(rettv->vval.v_list, l) == FAIL) + { + vim_free(l); + goto done; + } + l = list_alloc(); + if (l == NULL) + goto done; + if (list_append_list(rettv->vval.v_list, l) == FAIL) + { + vim_free(l); + goto done; + } + l = list_alloc(); + if (l == NULL) + goto done; + if (list_append_list(rettv->vval.v_list, l) == FAIL) + { + vim_free(l); + goto done; + } + } + + fuzzy_match_in_list(argvars[0].vval.v_list, tv_get_string(&argvars[1]), + matchseq, key, &cb, retmatchpos, rettv->vval.v_list, max_matches); + +done: + free_callback(&cb); +} + +/* + * "matchfuzzy()" function + */ + void +f_matchfuzzy(typval_T *argvars, typval_T *rettv) +{ + do_fuzzymatch(argvars, rettv, FALSE); +} + +/* + * "matchfuzzypos()" function + */ + void +f_matchfuzzypos(typval_T *argvars, typval_T *rettv) +{ + do_fuzzymatch(argvars, rettv, TRUE); +} +#endif + +/* + * Same as fuzzy_match_item_compare() except for use with a string match + */ + static int +fuzzy_match_str_compare(const void *s1, const void *s2) +{ + int v1 = ((fuzmatch_str_T *)s1)->score; + int v2 = ((fuzmatch_str_T *)s2)->score; + int idx1 = ((fuzmatch_str_T *)s1)->idx; + int idx2 = ((fuzmatch_str_T *)s2)->idx; + + if (v1 == v2) + return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; + else + return v1 > v2 ? -1 : 1; +} + +/* + * Sort fuzzy matches by score + */ + static void +fuzzy_match_str_sort(fuzmatch_str_T *fm, int sz) +{ + // Sort the list by the descending order of the match score + qsort((void *)fm, (size_t)sz, sizeof(fuzmatch_str_T), + fuzzy_match_str_compare); +} + +/* + * Same as fuzzy_match_item_compare() except for use with a function name + * string match. functions should be sorted to the end. + */ + static int +fuzzy_match_func_compare(const void *s1, const void *s2) +{ + int v1 = ((fuzmatch_str_T *)s1)->score; + int v2 = ((fuzmatch_str_T *)s2)->score; + int idx1 = ((fuzmatch_str_T *)s1)->idx; + int idx2 = ((fuzmatch_str_T *)s2)->idx; + char_u *str1 = ((fuzmatch_str_T *)s1)->str; + char_u *str2 = ((fuzmatch_str_T *)s2)->str; + + if (*str1 != '<' && *str2 == '<') + return -1; + if (*str1 == '<' && *str2 != '<') + return 1; + if (v1 == v2) + return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; + else + return v1 > v2 ? -1 : 1; +} + +/* + * Sort fuzzy matches of function names by score. + * functions should be sorted to the end. + */ + static void +fuzzy_match_func_sort(fuzmatch_str_T *fm, int sz) +{ + // Sort the list by the descending order of the match score + qsort((void *)fm, (size_t)sz, sizeof(fuzmatch_str_T), + fuzzy_match_func_compare); +} + +/* + * Fuzzy match 'pat' in 'str'. Returns 0 if there is no match. Otherwise, + * returns the match score. + */ + int +fuzzy_match_str(char_u *str, char_u *pat) +{ + int score = FUZZY_SCORE_NONE; + int_u matchpos[FUZZY_MATCH_MAX_LEN]; + + if (str == NULL || pat == NULL) + return score; + + fuzzy_match(str, pat, TRUE, &score, matchpos, + sizeof(matchpos) / sizeof(matchpos[0])); + + return score; +} + +/* + * Fuzzy match the position of string 'pat' in string 'str'. + * Returns a dynamic array of matching positions. If there is no match, + * returns NULL. + */ + garray_T * +fuzzy_match_str_with_pos(char_u *str UNUSED, char_u *pat UNUSED) +{ +#ifdef FEAT_SEARCH_EXTRA + int score = FUZZY_SCORE_NONE; + garray_T *match_positions = NULL; + int_u matches[FUZZY_MATCH_MAX_LEN]; + int j = 0; + + if (str == NULL || pat == NULL) + return NULL; + + match_positions = ALLOC_ONE(garray_T); + if (match_positions == NULL) + return NULL; + ga_init2(match_positions, sizeof(int_u), 10); + + if (!fuzzy_match(str, pat, FALSE, &score, matches, FUZZY_MATCH_MAX_LEN) + || score == FUZZY_SCORE_NONE) + { + ga_clear(match_positions); + vim_free(match_positions); + return NULL; + } + + for (char_u *p = pat; *p != NUL; MB_PTR_ADV(p)) + { + if (!VIM_ISWHITE(PTR2CHAR(p))) + { + ga_grow(match_positions, 1); + ((int_u *)match_positions->ga_data)[match_positions->ga_len] = + matches[j]; + match_positions->ga_len++; + j++; + } + } + + return match_positions; +#else + return NULL; +#endif +} + +/* + * This function splits the line pointed to by `*ptr` into words and performs + * a fuzzy match for the pattern `pat` on each word. It iterates through the + * line, moving `*ptr` to the start of each word during the process. + * + * If a match is found: + * - `*ptr` points to the start of the matched word. + * - `*len` is set to the length of the matched word. + * - `*score` contains the match score. + * + * If no match is found, `*ptr` is updated to the end of the line. + */ + int +fuzzy_match_str_in_line( + char_u **ptr, + char_u *pat, + int *len, + pos_T *current_pos, + int *score) +{ + char_u *str = *ptr; + char_u *strBegin = str; + char_u *end = NULL; + char_u *start = NULL; + int found = FALSE; + char save_end; + char_u *line_end = NULL; + + if (str == NULL || pat == NULL) + return found; + line_end = find_line_end(str); + + while (str < line_end) + { + // Skip non-word characters + start = find_word_start(str); + if (*start == NUL) + break; + end = find_word_end(start); + + // Extract the word from start to end + save_end = *end; + *end = NUL; + + // Perform fuzzy match + *score = fuzzy_match_str(start, pat); + *end = save_end; + + if (*score != FUZZY_SCORE_NONE) + { + *len = (int)(end - start); + found = TRUE; + *ptr = start; + if (current_pos) + current_pos->col += (int)(end - strBegin); + break; + } + + // Move to the end of the current word for the next iteration + str = end; + // Ensure we continue searching after the current word + while (*str != NUL && !vim_iswordp(str)) + MB_PTR_ADV(str); + } + + if (!found) + *ptr = line_end; + + return found; +} + +/* + * Search for the next fuzzy match in the specified buffer. + * This function attempts to find the next occurrence of the given pattern + * in the buffer, starting from the current position. It handles line wrapping + * and direction of search. + * + * Return TRUE if a match is found, otherwise FALSE. + */ + int +search_for_fuzzy_match( + buf_T *buf, + pos_T *pos, + char_u *pattern, + int dir, + pos_T *start_pos, + int *len, + char_u **ptr, + int *score) +{ + pos_T current_pos = *pos; + pos_T circly_end; + int found_new_match = FALSE; + int looped_around = FALSE; + int whole_line = ctrl_x_mode_whole_line(); + + if (buf == curbuf) + circly_end = *start_pos; + else + { + circly_end.lnum = buf->b_ml.ml_line_count; + circly_end.col = 0; + circly_end.coladd = 0; + } + + if (whole_line && start_pos->lnum != pos->lnum) + current_pos.lnum += dir; + + do + { + + // Check if looped around and back to start position + if (looped_around && EQUAL_POS(current_pos, circly_end)) + break; + + // Ensure current_pos is valid + if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) + { + // Get the current line buffer + *ptr = ml_get_buf(buf, current_pos.lnum, FALSE); + if (!whole_line) + *ptr += current_pos.col; + + // If ptr is end of line is reached, move to next line + // or previous line based on direction + if (*ptr != NULL && **ptr != NUL) + { + if (!whole_line) + { + // Try to find a fuzzy match in the current line starting + // from current position + found_new_match = fuzzy_match_str_in_line(ptr, pattern, + len, ¤t_pos, score); + if (found_new_match) + { + *pos = current_pos; + break; + } + else if (looped_around && current_pos.lnum == circly_end.lnum) + break; + } + else + { + if (fuzzy_match_str(*ptr, pattern) != FUZZY_SCORE_NONE) + { + found_new_match = TRUE; + *pos = current_pos; + *len = (int)ml_get_buf_len(buf, current_pos.lnum); + break; + } + } + } + } + + // Move to the next line or previous line based on direction + if (dir == FORWARD) + { + if (++current_pos.lnum > buf->b_ml.ml_line_count) + { + if (p_ws) + { + current_pos.lnum = 1; + looped_around = TRUE; + } + else + break; + } + } + else + { + if (--current_pos.lnum < 1) + { + if (p_ws) + { + current_pos.lnum = buf->b_ml.ml_line_count; + looped_around = TRUE; + } + else + break; + + } + } + current_pos.col = 0; + } while (TRUE); + + return found_new_match; +} + +/* + * Free an array of fuzzy string matches "fuzmatch[count]". + */ + void +fuzmatch_str_free(fuzmatch_str_T *fuzmatch, int count) +{ + int i; + + if (fuzmatch == NULL) + return; + for (i = 0; i < count; ++i) + vim_free(fuzmatch[i].str); + vim_free(fuzmatch); +} + +/* + * Copy a list of fuzzy matches into a string list after sorting the matches by + * the fuzzy score. Frees the memory allocated for 'fuzmatch'. + * Returns OK on success and FAIL on memory allocation failure. + */ + int +fuzzymatches_to_strmatches( + fuzmatch_str_T *fuzmatch, + char_u ***matches, + int count, + int funcsort) +{ + int i; + + if (count <= 0) + return OK; + + *matches = ALLOC_MULT(char_u *, count); + if (*matches == NULL) + { + fuzmatch_str_free(fuzmatch, count); + return FAIL; + } + + // Sort the list by the descending order of the match score + if (funcsort) + fuzzy_match_func_sort((void *)fuzmatch, (size_t)count); + else + fuzzy_match_str_sort((void *)fuzmatch, (size_t)count); + + for (i = 0; i < count; i++) + (*matches)[i] = fuzmatch[i].str; + vim_free(fuzmatch); + + return OK; +} + +/* + * Fuzzy match algorithm ported from https://github.com/jhawthorn/fzy. + * This implementation extends the original by supporting multibyte characters. + */ + +#define MATCH_MAX_LEN FUZZY_MATCH_MAX_LEN + +#define SCORE_GAP_LEADING -0.005 +#define SCORE_GAP_TRAILING -0.005 +#define SCORE_GAP_INNER -0.01 +#define SCORE_MATCH_CONSECUTIVE 1.0 +#define SCORE_MATCH_SLASH 0.9 +#define SCORE_MATCH_WORD 0.8 +#define SCORE_MATCH_CAPITAL 0.7 +#define SCORE_MATCH_DOT 0.6 + + static int +has_match(char_u *needle, char_u *haystack) +{ + while (*needle != NUL) + { + int n_char = mb_ptr2char(needle); + char_u *p = haystack; + int h_char; + int matched = FALSE; + + while (*p != NUL) + { + h_char = mb_ptr2char(p); + + if (n_char == h_char + || MB_TOUPPER(n_char) == h_char) + { + matched = TRUE; + break; + } + p += mb_ptr2len(p); + } + + if (!matched) + return 0; + + needle += mb_ptr2len(needle); + haystack = p + mb_ptr2len(p); + } + return 1; +} + +typedef struct match_struct +{ + int needle_len; + int haystack_len; + int lower_needle[MATCH_MAX_LEN]; // stores codepoints + int lower_haystack[MATCH_MAX_LEN]; // stores codepoints + score_t match_bonus[MATCH_MAX_LEN]; +} match_struct; + +#define IS_WORD_SEP(c) ((c) == '-' || (c) == '_' || (c) == ' ') +#define IS_PATH_SEP(c) ((c) == '/') +#define IS_DOT(c) ((c) == '.') + + static score_t +compute_bonus_codepoint(int last_c, int c) +{ + if (ASCII_ISALNUM(c) || vim_iswordc(c)) + { + if (IS_PATH_SEP(last_c)) + return SCORE_MATCH_SLASH; + if (IS_WORD_SEP(last_c)) + return SCORE_MATCH_WORD; + if (IS_DOT(last_c)) + return SCORE_MATCH_DOT; + if (MB_ISUPPER(c) && MB_ISLOWER(last_c)) + return SCORE_MATCH_CAPITAL; + } + return 0; +} + + static void +setup_match_struct(match_struct *match, char_u *needle, + char_u *haystack) +{ + int i = 0; + char_u *p = needle; + while (*p != NUL && i < MATCH_MAX_LEN) + { + int c = mb_ptr2char(p); + match->lower_needle[i++] = MB_TOLOWER(c); + MB_PTR_ADV(p); + } + match->needle_len = i; + + i = 0; + p = haystack; + int prev_c = '/'; + while (*p != NUL && i < MATCH_MAX_LEN) + { + int c = mb_ptr2char(p); + match->lower_haystack[i] = MB_TOLOWER(c); + match->match_bonus[i] = compute_bonus_codepoint(prev_c, c); + prev_c = c; + MB_PTR_ADV(p); + i++; + } + match->haystack_len = i; +} + + static inline void +match_row(const match_struct *match, int row, score_t *curr_D, + score_t *curr_M, const score_t *last_D, const score_t *last_M) +{ + int n = match->needle_len; + int m = match->haystack_len; + int i = row; + + const int *lower_needle = match->lower_needle; + const int *lower_haystack = match->lower_haystack; + const score_t *match_bonus = match->match_bonus; + + score_t prev_score = SCORE_MIN; + score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; + + /* These will not be used with this value, but not all compilers see it */ + score_t prev_M = SCORE_MIN, prev_D = SCORE_MIN; + + for (int j = 0; j < m; j++) + { + if (lower_needle[i] == lower_haystack[j]) + { + score_t score = SCORE_MIN; + if (!i) + { + score = (j * SCORE_GAP_LEADING) + match_bonus[j]; + } + else if (j) + { /* i > 0 && j > 0*/ + score = MAX( + prev_M + match_bonus[j], + /* consecutive match, doesn't stack with match_bonus */ + prev_D + SCORE_MATCH_CONSECUTIVE); + } + prev_D = last_D[j]; + prev_M = last_M[j]; + curr_D[j] = score; + curr_M[j] = prev_score = MAX(score, prev_score + gap_score); + } + else + { + prev_D = last_D[j]; + prev_M = last_M[j]; + curr_D[j] = SCORE_MIN; + curr_M[j] = prev_score = prev_score + gap_score; + } + } +} + + static score_t +match_positions(char_u *needle, char_u *haystack, int_u *positions) +{ + if (!*needle) + return SCORE_MIN; + + match_struct match; + setup_match_struct(&match, needle, haystack); + + int n = match.needle_len; + int m = match.haystack_len; + + if (m > MATCH_MAX_LEN || n > m) + { + /* + * Unreasonably large candidate: return no score + * If it is a valid match it will still be returned, it will + * just be ranked below any reasonably sized candidates + */ + return SCORE_MIN; + } + else if (n == m) + { + /* Since this method can only be called with a haystack which + * matches needle. If the lengths of the strings are equal the + * strings themselves must also be equal (ignoring case). + */ + if (positions) + for (int i = 0; i < n; i++) + positions[i] = i; + return SCORE_MAX; + } + + /* + * D[][] Stores the best score for this position ending with a match. + * M[][] Stores the best possible score at this position. + */ + score_t (*D)[MATCH_MAX_LEN], (*M)[MATCH_MAX_LEN]; + M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n); + D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n); + + match_row(&match, 0, D[0], M[0], D[0], M[0]); + for (int i = 1; i < n; i++) + match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]); + + /* backtrace to find the positions of optimal matching */ + if (positions) + { + int match_required = 0; + for (int i = n - 1, j = m - 1; i >= 0; i--) + { + for (; j >= 0; j--) + { + /* + * There may be multiple paths which result in + * the optimal weight. + * + * For simplicity, we will pick the first one + * we encounter, the latest in the candidate + * string. + */ + if (D[i][j] != SCORE_MIN && + (match_required || D[i][j] == M[i][j])) + { + /* If this score was determined using + * SCORE_MATCH_CONSECUTIVE, the + * previous character MUST be a match + */ + match_required = + i && j && + M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE; + positions[i] = j--; + break; + } + } + } + } + + score_t result = M[n - 1][m - 1]; + + vim_free(M); + vim_free(D); + + return result; +} diff --git a/src/insexpand.c b/src/insexpand.c index bc77b2d02f..454466b30c 100644 --- a/src/insexpand.c +++ b/src/insexpand.c @@ -966,7 +966,7 @@ ins_compl_add( // current match in the list of matches . if (compl_first_match == NULL) match->cp_next = match->cp_prev = NULL; - else if (cfc_has_mode() && score > 0 && compl_get_longest) + else if (cfc_has_mode() && score != FUZZY_SCORE_NONE && compl_get_longest) { current = compl_first_match->cp_next; prev = compl_first_match; @@ -1193,7 +1193,8 @@ ins_compl_add_matches( for (int i = 0; i < num_matches && add_r != FAIL; i++) { add_r = ins_compl_add(matches[i], -1, NULL, NULL, NULL, dir, - CP_FAST | (icase ? CP_ICASE : 0), FALSE, NULL, 0); + CP_FAST | (icase ? CP_ICASE : 0), FALSE, NULL, + FUZZY_SCORE_NONE); if (add_r == OK) // if dir was BACKWARD then honor it just once dir = FORWARD; @@ -1430,7 +1431,7 @@ cp_compare_nearest(const void* a, const void* b) { int score_a = ((compl_T*)a)->cp_score; int score_b = ((compl_T*)b)->cp_score; - if (score_a == 0 || score_b == 0) + if (score_a == FUZZY_SCORE_NONE || score_b == FUZZY_SCORE_NONE) return 0; return (score_a > score_b) ? 1 : (score_a < score_b) ? -1 : 0; } @@ -1627,7 +1628,7 @@ ins_compl_build_pum(void) && (leader->string == NULL || ins_compl_equal(compl, leader->string, (int)leader->length) - || (fuzzy_filter && compl->cp_score > 0))) + || (fuzzy_filter && compl->cp_score != FUZZY_SCORE_NONE))) { // Limit number of items from each source if max_items is set. int match_limit_exceeded = FALSE; @@ -2001,7 +2002,7 @@ thesaurus_add_words_in_line( if (wstart != skip_word) { status = ins_compl_add_infercase(wstart, (int)(ptr - wstart), p_ic, - fname, dir, FALSE, 0); + fname, dir, FALSE, FUZZY_SCORE_NONE); if (status == FAIL) break; } @@ -2072,7 +2073,7 @@ ins_compl_files( : find_word_end(ptr); add_r = ins_compl_add_infercase(regmatch->startp[0], (int)(ptr - regmatch->startp[0]), - p_ic, files[i], *dir, FALSE, 0); + p_ic, files[i], *dir, FALSE, FUZZY_SCORE_NONE); if (thesaurus) { // For a thesaurus, add all the words in the line @@ -3662,7 +3663,7 @@ ins_compl_add_tv(typval_T *tv, int dir, int fast) return FAIL; } status = ins_compl_add(word, -1, NULL, cptext, - &user_data, dir, flags, dup, user_hl, 0); + &user_data, dir, flags, dup, user_hl, FUZZY_SCORE_NONE); if (status != OK) clear_tv(&user_data); return status; @@ -3757,7 +3758,7 @@ set_completion(colnr_T startcol, list_T *list) compl_orig_text.length = (size_t)compl_length; if (ins_compl_add(compl_orig_text.string, (int)compl_orig_text.length, NULL, NULL, NULL, 0, - flags | CP_FAST, FALSE, NULL, 0) != OK) + flags | CP_FAST, FALSE, NULL, FUZZY_SCORE_NONE) != OK) return; ctrl_x_mode = CTRL_X_EVAL; @@ -4747,7 +4748,7 @@ get_next_filename_completion(void) { ptr = matches[i]; score = fuzzy_match_str(ptr, leader); - if (score > 0) + if (score != FUZZY_SCORE_NONE) { if (ga_grow(&fuzzy_indices, 1) == OK) { @@ -4959,7 +4960,7 @@ get_next_default_completion(ins_compl_next_state_T *st, pos_T *start_pos) int in_fuzzy_collect = (cfc_has_mode() && compl_length > 0) || ((get_cot_flags() & COT_FUZZY) && compl_autocomplete); char_u *leader = ins_compl_leader(); - int score = 0; + int score = FUZZY_SCORE_NONE; int in_curbuf = st->ins_buf == curbuf; // If 'infercase' is set, don't use 'smartcase' here @@ -5053,7 +5054,6 @@ get_next_default_completion(ins_compl_next_state_T *st, pos_T *start_pos) score = st->cur_match_pos->lnum - curwin->w_cursor.lnum; if (score < 0) score = -score; - score++; } if (ins_compl_add_infercase(ptr, len, p_ic, @@ -5159,7 +5159,7 @@ get_register_completion(void) compl_orig_text.length) == 0)) { if (ins_compl_add_infercase(str, str_len, p_ic, NULL, - dir, FALSE, 0) == OK) + dir, FALSE, FUZZY_SCORE_NONE) == OK) dir = FORWARD; } } @@ -5198,7 +5198,7 @@ get_register_completion(void) compl_orig_text.length) == 0))) { if (ins_compl_add_infercase(p, len, p_ic, NULL, - dir, FALSE, 0) == OK) + dir, FALSE, FUZZY_SCORE_NONE) == OK) dir = FORWARD; } @@ -5568,7 +5568,8 @@ ins_compl_get_exp(pos_T *ini) // For `^P` completion, reset `compl_curr_match` to the head to avoid // mixing matches from different sources. if (!compl_dir_forward()) - while (compl_curr_match->cp_prev) + while (compl_curr_match->cp_prev + && !match_at_original_text(compl_curr_match->cp_prev)) compl_curr_match = compl_curr_match->cp_prev; } cpt_sources_index = -1; @@ -5966,7 +5967,8 @@ find_next_completion_match( && leader->string != NULL && !ins_compl_equal(compl_shown_match, leader->string, (int)leader->length) - && !(compl_fuzzy_match && compl_shown_match->cp_score > 0)) + && !(compl_fuzzy_match + && compl_shown_match->cp_score != FUZZY_SCORE_NONE)) ++todo; else // Remember a matching item. @@ -6947,7 +6949,7 @@ ins_compl_start(void) if (compl_orig_text.string == NULL || ins_compl_add(compl_orig_text.string, (int)compl_orig_text.length, - NULL, NULL, NULL, 0, flags, FALSE, NULL, 0) != OK) + NULL, NULL, NULL, 0, flags, FALSE, NULL, FUZZY_SCORE_NONE) != OK) { VIM_CLEAR_STRING(compl_pattern); VIM_CLEAR_STRING(compl_orig_text); diff --git a/src/map.c b/src/map.c index 2d378b55c2..fbecf4aced 100644 --- a/src/map.c +++ b/src/map.c @@ -1433,7 +1433,7 @@ ExpandMappings( else { score = fuzzy_match_str(p, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } if (!match) @@ -1480,7 +1480,7 @@ ExpandMappings( else { score = fuzzy_match_str(p, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } if (!match) diff --git a/src/option.c b/src/option.c index 46a38d5005..2ec959c726 100644 --- a/src/option.c +++ b/src/option.c @@ -7956,7 +7956,7 @@ match_str( int score; score = fuzzy_match_str(str, fuzzystr); - if (score != 0) + if (score != FUZZY_SCORE_NONE) { if (!test_only) { diff --git a/src/proto.h b/src/proto.h index 8c345b90f0..e9e5efcbc5 100644 --- a/src/proto.h +++ b/src/proto.h @@ -188,6 +188,7 @@ void mbyte_im_set_active(int active_arg); # if defined(FEAT_CRYPT) || defined(FEAT_PERSISTENT_UNDO) # include "sha256.pro" # endif +# include "fuzzy.pro" # include "search.pro" # ifdef FEAT_SIGNS # include "sign.pro" diff --git a/src/proto/fuzzy.pro b/src/proto/fuzzy.pro new file mode 100644 index 0000000000..dc78af52d1 --- /dev/null +++ b/src/proto/fuzzy.pro @@ -0,0 +1,11 @@ +/* fuzzy.c */ +int fuzzy_match(char_u *str, char_u *pat_arg, int matchseq, int *outScore, int_u *matches, int maxMatches); +void f_matchfuzzy(typval_T *argvars, typval_T *rettv); +void f_matchfuzzypos(typval_T *argvars, typval_T *rettv); +int fuzzy_match_str(char_u *str, char_u *pat); +garray_T *fuzzy_match_str_with_pos(char_u *str, char_u *pat); +int fuzzy_match_str_in_line(char_u **ptr, char_u *pat, int *len, pos_T *current_pos, int *score); +int search_for_fuzzy_match(buf_T *buf, pos_T *pos, char_u *pattern, int dir, pos_T *start_pos, int *len, char_u **ptr, int *score); +void fuzmatch_str_free(fuzmatch_str_T *fuzmatch, int count); +int fuzzymatches_to_strmatches(fuzmatch_str_T *fuzmatch, char_u ***matches, int count, int funcsort); +/* vim: set ft=c : */ diff --git a/src/proto/search.pro b/src/proto/search.pro index 4fcce1bd30..445817762d 100644 --- a/src/proto/search.pro +++ b/src/proto/search.pro @@ -37,13 +37,4 @@ void find_pattern_in_path(char_u *ptr, int dir, int len, int whole, int skip_com spat_T *get_spat(int idx); int get_spat_last_idx(void); void f_searchcount(typval_T *argvars, typval_T *rettv); -int fuzzy_match(char_u *str, char_u *pat_arg, int matchseq, int *outScore, int_u *matches, int maxMatches, int camelcase); -void f_matchfuzzy(typval_T *argvars, typval_T *rettv); -void f_matchfuzzypos(typval_T *argvars, typval_T *rettv); -int fuzzy_match_str(char_u *str, char_u *pat); -garray_T *fuzzy_match_str_with_pos(char_u *str, char_u *pat); -int fuzzy_match_str_in_line(char_u **ptr, char_u *pat, int *len, pos_T *current_pos, int *score); -int search_for_fuzzy_match(buf_T *buf, pos_T *pos, char_u *pattern, int dir, pos_T *start_pos, int *len, char_u **ptr, int *score); -void fuzmatch_str_free(fuzmatch_str_T *fuzmatch, int count); -int fuzzymatches_to_strmatches(fuzmatch_str_T *fuzmatch, char_u ***matches, int count, int funcsort); /* vim: set ft=c : */ diff --git a/src/quickfix.c b/src/quickfix.c index ab595d7bbc..9cd2fe245e 100644 --- a/src/quickfix.c +++ b/src/quickfix.c @@ -6429,8 +6429,8 @@ vgr_match_buflines( long lnum; colnr_T col; int pat_len = (int)STRLEN(spat); - if (pat_len > MAX_FUZZY_MATCHES) - pat_len = MAX_FUZZY_MATCHES; + if (pat_len > FUZZY_MATCH_MAX_LEN) + pat_len = FUZZY_MATCH_MAX_LEN; for (lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; ++lnum) { @@ -6483,13 +6483,13 @@ vgr_match_buflines( char_u *str = ml_get_buf(buf, lnum, FALSE); colnr_T linelen = ml_get_buf_len(buf, lnum); int score; - int_u matches[MAX_FUZZY_MATCHES]; + int_u matches[FUZZY_MATCH_MAX_LEN]; int_u sz = ARRAY_LENGTH(matches); // Fuzzy string match CLEAR_FIELD(matches); while (fuzzy_match(str + col, spat, FALSE, &score, - matches, sz, TRUE) > 0) + matches, sz) > 0) { // Pass the buffer number so that it gets used even for a // dummy buffer, unless duplicate_name is set, then the diff --git a/src/search.c b/src/search.c index 68f17e25f3..9a6bb3fbdd 100644 --- a/src/search.c +++ b/src/search.c @@ -42,17 +42,6 @@ static void find_mps_values(int *initc, int *findc, int *backwards, int switchit static int is_zero_width(char_u *pattern, size_t patternlen, int move, pos_T *cur, int direction); static void cmdline_search_stat(int dirc, pos_T *pos, pos_T *cursor_pos, int show_top_bot_msg, char_u *msgbuf, size_t msgbuflen, int recompute, int maxcount, long timeout); static void update_search_stat(int dirc, pos_T *pos, pos_T *cursor_pos, searchstat_T *stat, int recompute, int maxcount, long timeout); -static int fuzzy_match_compute_score(char_u *fuzpat, char_u *str, int strSz, int_u *matches, int numMatches, int camelcase); -static int fuzzy_match_recursive(char_u *fuzpat, char_u *str, int_u strIdx, int *outScore, char_u *strBegin, int strLen, int_u *srcMatches, int_u *matches, int maxMatches, int nextMatch, int *recursionCount, int camelcase); -#if defined(FEAT_EVAL) || defined(FEAT_PROTO) -static int fuzzy_match_item_compare(const void *s1, const void *s2); -static void fuzzy_match_in_list(list_T *l, char_u *str, int matchseq, char_u *key, callback_T *item_cb, int retmatchpos, list_T *fmatchlist, long max_matches, int camelcase); -static void do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos); -#endif -static int fuzzy_match_str_compare(const void *s1, const void *s2); -static void fuzzy_match_str_sort(fuzmatch_str_T *fm, int sz); -static int fuzzy_match_func_compare(const void *s1, const void *s2); -static void fuzzy_match_func_sort(fuzmatch_str_T *fm, int sz); #define SEARCH_STAT_DEF_TIMEOUT 40L // 'W ': 2 + @@ -4292,1190 +4281,3 @@ the_end: #endif } #endif - -/* - * Fuzzy string matching - * - * Ported from the lib_fts library authored by Forrest Smith. - * https://github.com/forrestthewoods/lib_fts/tree/master/code - * - * The following blog describes the fuzzy matching algorithm: - * https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/ - * - * Each matching string is assigned a score. The following factors are checked: - * - Matched letter - * - Unmatched letter - * - Consecutively matched letters - * - Proximity to start - * - Letter following a separator (space, underscore) - * - Uppercase letter following lowercase (aka CamelCase) - * - * Matched letters are good. Unmatched letters are bad. Matching near the start - * is good. Matching the first letter in the middle of a phrase is good. - * Matching the uppercase letters in camel case entries is good. - * - * The score assigned for each factor is explained below. - * File paths are different from file names. File extensions may be ignorable. - * Single words care about consecutive matches but not separators or camel - * case. - * Score starts at 100 - * Matched letter: +0 points - * Unmatched letter: -1 point - * Consecutive match bonus: +15 points - * First letter bonus: +15 points - * Separator bonus: +30 points - * Camel case bonus: +30 points - * Unmatched leading letter: -5 points (max: -15) - * - * There is some nuance to this. Scores don’t have an intrinsic meaning. The - * score range isn’t 0 to 100. It’s roughly [50, 150]. Longer words have a - * lower minimum score due to unmatched letter penalty. Longer search patterns - * have a higher maximum score due to match bonuses. - * - * Separator and camel case bonus is worth a LOT. Consecutive matches are worth - * quite a bit. - * - * There is a penalty if you DON’T match the first three letters. Which - * effectively rewards matching near the start. However there’s no difference - * in matching between the middle and end. - * - * There is not an explicit bonus for an exact match. Unmatched letters receive - * a penalty. So shorter strings and closer matches are worth more. - */ -typedef struct -{ - int idx; // used for stable sort - listitem_T *item; - int score; - list_T *lmatchpos; -} fuzzyItem_T; - -// bonus for adjacent matches; this is higher than SEPARATOR_BONUS so that -// matching a whole word is preferred. -#define SEQUENTIAL_BONUS 40 -// bonus if match occurs after a path separator -#define PATH_SEPARATOR_BONUS 30 -// bonus if match occurs after a word separator -#define WORD_SEPARATOR_BONUS 25 -// bonus if match is uppercase and prev is lower -#define CAMEL_BONUS 30 -// bonus if the first letter is matched -#define FIRST_LETTER_BONUS 15 -// bonus if exact match -#define EXACT_MATCH_BONUS 100 -// bonus if case match when no ignorecase -#define CASE_MATCH_BONUS 25 -// penalty applied for every letter in str before the first match -#define LEADING_LETTER_PENALTY (-5) -// maximum penalty for leading letters -#define MAX_LEADING_LETTER_PENALTY (-15) -// penalty for every letter that doesn't match -#define UNMATCHED_LETTER_PENALTY (-1) -// penalty for gap in matching positions (-2 * k) -#define GAP_PENALTY (-2) -// Score for a string that doesn't fuzzy match the pattern -#define SCORE_NONE (-9999) - -#define FUZZY_MATCH_RECURSION_LIMIT 10 - -/* - * Compute a score for a fuzzy matched string. The matching character locations - * are in 'matches'. - */ - static int -fuzzy_match_compute_score( - char_u *fuzpat, - char_u *str, - int strSz, - int_u *matches, - int numMatches, - int camelcase) -{ - int score; - int penalty; - int unmatched; - int i; - char_u *p = str; - int_u sidx = 0; - int is_exact_match = TRUE; - char_u *orig_fuzpat = fuzpat - numMatches; - char_u *curpat = orig_fuzpat; - int pat_idx = 0; - // Track consecutive camel case matches - int consecutive_camel = 0; - - // Initialize score - score = 100; - - // Apply leading letter penalty - penalty = LEADING_LETTER_PENALTY * matches[0]; - if (penalty < MAX_LEADING_LETTER_PENALTY) - penalty = MAX_LEADING_LETTER_PENALTY; - score += penalty; - - // Apply unmatched penalty - unmatched = strSz - numMatches; - score += UNMATCHED_LETTER_PENALTY * unmatched; - // In a long string, not all matches may be found due to the recursion limit. - // If at least one match is found, reset the score to a non-negative value. - if (score < 0 && numMatches > 0) - score = 0; - - // Apply ordering bonuses - for (i = 0; i < numMatches; ++i) - { - int_u currIdx = matches[i]; - int curr; - int is_camel = FALSE; - - if (i > 0) - { - int_u prevIdx = matches[i - 1]; - - // Sequential - if (currIdx == (prevIdx + 1)) - score += SEQUENTIAL_BONUS; - else - { - score += GAP_PENALTY * (currIdx - prevIdx); - // Reset consecutive camel count on gap - consecutive_camel = 0; - } - } - - // Check for bonuses based on neighbor character value - if (currIdx > 0) - { - // Camel case - int neighbor = ' '; - - if (has_mbyte) - { - while (sidx < currIdx) - { - neighbor = (*mb_ptr2char)(p); - MB_PTR_ADV(p); - sidx++; - } - curr = (*mb_ptr2char)(p); - } - else - { - neighbor = str[currIdx - 1]; - curr = str[currIdx]; - } - - // Enhanced camel case scoring - if (camelcase && vim_islower(neighbor) && vim_isupper(curr)) - { - score += CAMEL_BONUS * 2; // Double the camel case bonus - is_camel = TRUE; - consecutive_camel++; - // Additional bonus for consecutive camel - if (consecutive_camel > 1) - score += CAMEL_BONUS; - } - else - consecutive_camel = 0; - - // Bonus if the match follows a separator character - if (neighbor == '/' || neighbor == '\\') - score += PATH_SEPARATOR_BONUS; - else if (neighbor == ' ' || neighbor == '_') - score += WORD_SEPARATOR_BONUS; - } - else - { - // First letter - score += FIRST_LETTER_BONUS; - curr = has_mbyte ? (*mb_ptr2char)(p) : str[currIdx]; - } - - // Case matching bonus - if (vim_isalpha(curr)) - { - while (pat_idx < i && *curpat) - { - if (has_mbyte) - MB_PTR_ADV(curpat); - else - curpat++; - pat_idx++; - } - - if (has_mbyte) - { - if (curr == (*mb_ptr2char)(curpat)) - { - score += CASE_MATCH_BONUS; - // Extra bonus for exact case match in camel - if (is_camel) - score += CASE_MATCH_BONUS / 2; - } - } - else if (curr == *curpat) - { - score += CASE_MATCH_BONUS; - if (is_camel) - score += CASE_MATCH_BONUS / 2; - } - } - - // Check exact match condition - if (currIdx != (int_u)i) - is_exact_match = FALSE; - } - - // Boost score for exact matches - if (is_exact_match && numMatches == strSz) - score += EXACT_MATCH_BONUS; - - return score; -} - -/* - * Perform a recursive search for fuzzy matching 'fuzpat' in 'str'. - * Return the number of matching characters. - */ - static int -fuzzy_match_recursive( - char_u *fuzpat, - char_u *str, - int_u strIdx, - int *outScore, - char_u *strBegin, - int strLen, - int_u *srcMatches, - int_u *matches, - int maxMatches, - int nextMatch, - int *recursionCount, - int camelcase) -{ - // Recursion params - int recursiveMatch = FALSE; - int_u bestRecursiveMatches[MAX_FUZZY_MATCHES]; - int bestRecursiveScore = 0; - int first_match; - int matched; - - // Count recursions - ++*recursionCount; - if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) - return 0; - - // Detect end of strings - if (*fuzpat == NUL || *str == NUL) - return 0; - - // Loop through fuzpat and str looking for a match - first_match = TRUE; - while (*fuzpat != NUL && *str != NUL) - { - int c1; - int c2; - - c1 = PTR2CHAR(fuzpat); - c2 = PTR2CHAR(str); - - // Found match - if (vim_tolower(c1) == vim_tolower(c2)) - { - // Supplied matches buffer was too short - if (nextMatch >= maxMatches) - return 0; - - int recursiveScore = 0; - int_u recursiveMatches[MAX_FUZZY_MATCHES]; - CLEAR_FIELD(recursiveMatches); - - // "Copy-on-Write" srcMatches into matches - if (first_match && srcMatches) - { - memcpy(matches, srcMatches, nextMatch * sizeof(srcMatches[0])); - first_match = FALSE; - } - - // Recursive call that "skips" this match - char_u *next_char = str + (has_mbyte ? (*mb_ptr2len)(str) : 1); - if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, - &recursiveScore, strBegin, strLen, matches, - recursiveMatches, - ARRAY_LENGTH(recursiveMatches), - nextMatch, recursionCount, camelcase)) - { - // Pick best recursive score - if (!recursiveMatch || recursiveScore > bestRecursiveScore) - { - memcpy(bestRecursiveMatches, recursiveMatches, - MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0])); - bestRecursiveScore = recursiveScore; - } - recursiveMatch = TRUE; - } - - // Advance - matches[nextMatch++] = strIdx; - if (has_mbyte) - MB_PTR_ADV(fuzpat); - else - ++fuzpat; - } - if (has_mbyte) - MB_PTR_ADV(str); - else - ++str; - strIdx++; - } - - // Determine if full fuzpat was matched - matched = *fuzpat == NUL ? TRUE : FALSE; - - // Calculate score - if (matched) - *outScore = fuzzy_match_compute_score(fuzpat, strBegin, strLen, matches, - nextMatch, camelcase); - - // Return best result - if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) - { - // Recursive score is better than "this" - memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0])); - *outScore = bestRecursiveScore; - return nextMatch; - } - else if (matched) - return nextMatch; // "this" score is better than recursive - - return 0; // no match -} - -/* - * fuzzy_match() - * - * Performs exhaustive search via recursion to find all possible matches and - * match with highest score. - * Scores values have no intrinsic meaning. Possible score range is not - * normalized and varies with pattern. - * Recursion is limited internally (default=10) to prevent degenerate cases - * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). - * Uses char_u for match indices. Therefore patterns are limited to - * MAX_FUZZY_MATCHES characters. - * - * Returns TRUE if "pat_arg" matches "str". Also returns the match score in - * "outScore" and the matching character positions in "matches". - */ - int -fuzzy_match( - char_u *str, - char_u *pat_arg, - int matchseq, - int *outScore, - int_u *matches, - int maxMatches, - int camelcase) -{ - int recursionCount = 0; - int len = MB_CHARLEN(str); - char_u *save_pat; - char_u *pat; - char_u *p; - int complete = FALSE; - int score = 0; - int numMatches = 0; - int matchCount; - - *outScore = 0; - - save_pat = vim_strsave(pat_arg); - if (save_pat == NULL) - return FALSE; - pat = save_pat; - p = pat; - - // Try matching each word in 'pat_arg' in 'str' - while (TRUE) - { - if (matchseq) - complete = TRUE; - else - { - // Extract one word from the pattern (separated by space) - p = skipwhite(p); - if (*p == NUL) - break; - pat = p; - while (*p != NUL && !VIM_ISWHITE(PTR2CHAR(p))) - { - if (has_mbyte) - MB_PTR_ADV(p); - else - ++p; - } - if (*p == NUL) // processed all the words - complete = TRUE; - *p = NUL; - } - - score = 0; - recursionCount = 0; - matchCount = fuzzy_match_recursive(pat, str, 0, &score, str, len, NULL, - matches + numMatches, maxMatches - numMatches, - 0, &recursionCount, camelcase); - if (matchCount == 0) - { - numMatches = 0; - break; - } - - // Accumulate the match score and the number of matches - *outScore += score; - numMatches += matchCount; - - if (complete) - break; - - // try matching the next word - ++p; - } - - vim_free(save_pat); - return numMatches != 0; -} - -#if defined(FEAT_EVAL) || defined(FEAT_PROTO) -/* - * Sort the fuzzy matches in the descending order of the match score. - * For items with same score, retain the order using the index (stable sort) - */ - static int -fuzzy_match_item_compare(const void *s1, const void *s2) -{ - int v1 = ((fuzzyItem_T *)s1)->score; - int v2 = ((fuzzyItem_T *)s2)->score; - int idx1 = ((fuzzyItem_T *)s1)->idx; - int idx2 = ((fuzzyItem_T *)s2)->idx; - - if (v1 == v2) - return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; - else - return v1 > v2 ? -1 : 1; -} - -/* - * Fuzzy search the string 'str' in a list of 'items' and return the matching - * strings in 'fmatchlist'. - * If 'matchseq' is TRUE, then for multi-word search strings, match all the - * words in sequence. - * If 'items' is a list of strings, then search for 'str' in the list. - * If 'items' is a list of dicts, then either use 'key' to lookup the string - * for each item or use 'item_cb' Funcref function to get the string. - * If 'retmatchpos' is TRUE, then return a list of positions where 'str' - * matches for each item. - */ - static void -fuzzy_match_in_list( - list_T *l, - char_u *str, - int matchseq, - char_u *key, - callback_T *item_cb, - int retmatchpos, - list_T *fmatchlist, - long max_matches, - int camelcase) -{ - long len; - fuzzyItem_T *items; - listitem_T *li; - long i = 0; - long match_count = 0; - int_u matches[MAX_FUZZY_MATCHES]; - - len = list_len(l); - if (len == 0) - return; - if (max_matches > 0 && len > max_matches) - len = max_matches; - - items = ALLOC_CLEAR_MULT(fuzzyItem_T, len); - if (items == NULL) - return; - - // For all the string items in items, get the fuzzy matching score - FOR_ALL_LIST_ITEMS(l, li) - { - int score; - char_u *itemstr; - typval_T rettv; - - if (max_matches > 0 && match_count >= max_matches) - break; - - itemstr = NULL; - rettv.v_type = VAR_UNKNOWN; - if (li->li_tv.v_type == VAR_STRING) // list of strings - itemstr = li->li_tv.vval.v_string; - else if (li->li_tv.v_type == VAR_DICT - && (key != NULL || item_cb->cb_name != NULL)) - { - // For a dict, either use the specified key to lookup the string or - // use the specified callback function to get the string. - if (key != NULL) - itemstr = dict_get_string(li->li_tv.vval.v_dict, - (char *)key, FALSE); - else - { - typval_T argv[2]; - - // Invoke the supplied callback (if any) to get the dict item - li->li_tv.vval.v_dict->dv_refcount++; - argv[0].v_type = VAR_DICT; - argv[0].vval.v_dict = li->li_tv.vval.v_dict; - argv[1].v_type = VAR_UNKNOWN; - if (call_callback(item_cb, -1, &rettv, 1, argv) != FAIL) - { - if (rettv.v_type == VAR_STRING) - itemstr = rettv.vval.v_string; - } - dict_unref(li->li_tv.vval.v_dict); - } - } - - if (itemstr != NULL - && fuzzy_match(itemstr, str, matchseq, &score, matches, - MAX_FUZZY_MATCHES, camelcase)) - { - items[match_count].idx = match_count; - items[match_count].item = li; - items[match_count].score = score; - - // Copy the list of matching positions in itemstr to a list, if - // 'retmatchpos' is set. - if (retmatchpos) - { - int j = 0; - char_u *p; - - items[match_count].lmatchpos = list_alloc(); - if (items[match_count].lmatchpos == NULL) - goto done; - - p = str; - while (*p != NUL) - { - if (!VIM_ISWHITE(PTR2CHAR(p)) || matchseq) - { - if (list_append_number(items[match_count].lmatchpos, - matches[j]) == FAIL) - goto done; - j++; - } - if (has_mbyte) - MB_PTR_ADV(p); - else - ++p; - } - } - ++match_count; - } - clear_tv(&rettv); - } - - if (match_count > 0) - { - list_T *retlist; - - // Sort the list by the descending order of the match score - qsort((void *)items, (size_t)match_count, sizeof(fuzzyItem_T), - fuzzy_match_item_compare); - - // For matchfuzzy(), return a list of matched strings. - // ['str1', 'str2', 'str3'] - // For matchfuzzypos(), return a list with three items. - // The first item is a list of matched strings. The second item - // is a list of lists where each list item is a list of matched - // character positions. The third item is a list of matching scores. - // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] - if (retmatchpos) - { - li = list_find(fmatchlist, 0); - if (li == NULL || li->li_tv.vval.v_list == NULL) - goto done; - retlist = li->li_tv.vval.v_list; - } - else - retlist = fmatchlist; - - // Copy the matching strings with a valid score to the return list - for (i = 0; i < match_count; i++) - { - if (items[i].score == SCORE_NONE) - break; - list_append_tv(retlist, &items[i].item->li_tv); - } - - // next copy the list of matching positions - if (retmatchpos) - { - li = list_find(fmatchlist, -2); - if (li == NULL || li->li_tv.vval.v_list == NULL) - goto done; - retlist = li->li_tv.vval.v_list; - - for (i = 0; i < match_count; i++) - { - if (items[i].score == SCORE_NONE) - break; - if (items[i].lmatchpos != NULL - && list_append_list(retlist, items[i].lmatchpos) == FAIL) - goto done; - } - - // copy the matching scores - li = list_find(fmatchlist, -1); - if (li == NULL || li->li_tv.vval.v_list == NULL) - goto done; - retlist = li->li_tv.vval.v_list; - for (i = 0; i < match_count; i++) - { - if (items[i].score == SCORE_NONE) - break; - if (list_append_number(retlist, items[i].score) == FAIL) - goto done; - } - } - } - -done: - vim_free(items); -} - -/* - * Do fuzzy matching. Returns the list of matched strings in 'rettv'. - * If 'retmatchpos' is TRUE, also returns the matching character positions. - */ - static void -do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos) -{ - callback_T cb; - char_u *key = NULL; - int ret; - int matchseq = FALSE; - long max_matches = 0; - int camelcase = TRUE; - - if (in_vim9script() - && (check_for_list_arg(argvars, 0) == FAIL - || check_for_string_arg(argvars, 1) == FAIL - || check_for_opt_dict_arg(argvars, 2) == FAIL)) - return; - - CLEAR_POINTER(&cb); - - // validate and get the arguments - if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) - { - semsg(_(e_argument_of_str_must_be_list), - retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); - return; - } - if (argvars[1].v_type != VAR_STRING - || argvars[1].vval.v_string == NULL) - { - semsg(_(e_invalid_argument_str), tv_get_string(&argvars[1])); - return; - } - - if (argvars[2].v_type != VAR_UNKNOWN) - { - dict_T *d; - dictitem_T *di; - - if (check_for_nonnull_dict_arg(argvars, 2) == FAIL) - return; - - // To search a dict, either a callback function or a key can be - // specified. - d = argvars[2].vval.v_dict; - if ((di = dict_find(d, (char_u *)"key", -1)) != NULL) - { - if (di->di_tv.v_type != VAR_STRING - || di->di_tv.vval.v_string == NULL - || *di->di_tv.vval.v_string == NUL) - { - semsg(_(e_invalid_value_for_argument_str_str), "key", - tv_get_string(&di->di_tv)); - return; - } - key = tv_get_string(&di->di_tv); - } - else if ((di = dict_find(d, (char_u *)"text_cb", -1)) != NULL) - { - cb = get_callback(&di->di_tv); - if (cb.cb_name == NULL) - { - semsg(_(e_invalid_value_for_argument_str), "text_cb"); - return; - } - } - - if ((di = dict_find(d, (char_u *)"limit", -1)) != NULL) - { - if (di->di_tv.v_type != VAR_NUMBER) - { - semsg(_(e_invalid_value_for_argument_str), "limit"); - return; - } - max_matches = (long)tv_get_number_chk(&di->di_tv, NULL); - } - - if ((di = dict_find(d, (char_u *)"camelcase", -1)) != NULL) - { - if (di->di_tv.v_type != VAR_BOOL) - { - semsg(_(e_invalid_value_for_argument_str), "camelcase"); - return; - } - camelcase = tv_get_bool_chk(&di->di_tv, NULL); - } - - if (dict_has_key(d, "matchseq")) - matchseq = TRUE; - } - - // get the fuzzy matches - ret = rettv_list_alloc(rettv); - if (ret == FAIL) - goto done; - if (retmatchpos) - { - list_T *l; - - // For matchfuzzypos(), a list with three items are returned. First - // item is a list of matching strings, the second item is a list of - // lists with matching positions within each string and the third item - // is the list of scores of the matches. - l = list_alloc(); - if (l == NULL) - goto done; - if (list_append_list(rettv->vval.v_list, l) == FAIL) - { - vim_free(l); - goto done; - } - l = list_alloc(); - if (l == NULL) - goto done; - if (list_append_list(rettv->vval.v_list, l) == FAIL) - { - vim_free(l); - goto done; - } - l = list_alloc(); - if (l == NULL) - goto done; - if (list_append_list(rettv->vval.v_list, l) == FAIL) - { - vim_free(l); - goto done; - } - } - - fuzzy_match_in_list(argvars[0].vval.v_list, tv_get_string(&argvars[1]), - matchseq, key, &cb, retmatchpos, rettv->vval.v_list, max_matches, - camelcase); - -done: - free_callback(&cb); -} - -/* - * "matchfuzzy()" function - */ - void -f_matchfuzzy(typval_T *argvars, typval_T *rettv) -{ - do_fuzzymatch(argvars, rettv, FALSE); -} - -/* - * "matchfuzzypos()" function - */ - void -f_matchfuzzypos(typval_T *argvars, typval_T *rettv) -{ - do_fuzzymatch(argvars, rettv, TRUE); -} -#endif - -/* - * Same as fuzzy_match_item_compare() except for use with a string match - */ - static int -fuzzy_match_str_compare(const void *s1, const void *s2) -{ - int v1 = ((fuzmatch_str_T *)s1)->score; - int v2 = ((fuzmatch_str_T *)s2)->score; - int idx1 = ((fuzmatch_str_T *)s1)->idx; - int idx2 = ((fuzmatch_str_T *)s2)->idx; - - if (v1 == v2) - return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; - else - return v1 > v2 ? -1 : 1; -} - -/* - * Sort fuzzy matches by score - */ - static void -fuzzy_match_str_sort(fuzmatch_str_T *fm, int sz) -{ - // Sort the list by the descending order of the match score - qsort((void *)fm, (size_t)sz, sizeof(fuzmatch_str_T), - fuzzy_match_str_compare); -} - -/* - * Same as fuzzy_match_item_compare() except for use with a function name - * string match. functions should be sorted to the end. - */ - static int -fuzzy_match_func_compare(const void *s1, const void *s2) -{ - int v1 = ((fuzmatch_str_T *)s1)->score; - int v2 = ((fuzmatch_str_T *)s2)->score; - int idx1 = ((fuzmatch_str_T *)s1)->idx; - int idx2 = ((fuzmatch_str_T *)s2)->idx; - char_u *str1 = ((fuzmatch_str_T *)s1)->str; - char_u *str2 = ((fuzmatch_str_T *)s2)->str; - - if (*str1 != '<' && *str2 == '<') - return -1; - if (*str1 == '<' && *str2 != '<') - return 1; - if (v1 == v2) - return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; - else - return v1 > v2 ? -1 : 1; -} - -/* - * Sort fuzzy matches of function names by score. - * functions should be sorted to the end. - */ - static void -fuzzy_match_func_sort(fuzmatch_str_T *fm, int sz) -{ - // Sort the list by the descending order of the match score - qsort((void *)fm, (size_t)sz, sizeof(fuzmatch_str_T), - fuzzy_match_func_compare); -} - -/* - * Fuzzy match 'pat' in 'str'. Returns 0 if there is no match. Otherwise, - * returns the match score. - */ - int -fuzzy_match_str(char_u *str, char_u *pat) -{ - int score = 0; - int_u matchpos[MAX_FUZZY_MATCHES]; - - if (str == NULL || pat == NULL) - return 0; - - fuzzy_match(str, pat, TRUE, &score, matchpos, - sizeof(matchpos) / sizeof(matchpos[0]), TRUE); - - return score; -} - -/* - * Fuzzy match the position of string 'pat' in string 'str'. - * Returns a dynamic array of matching positions. If there is no match, - * returns NULL. - */ - garray_T * -fuzzy_match_str_with_pos(char_u *str UNUSED, char_u *pat UNUSED) -{ -#ifdef FEAT_SEARCH_EXTRA - int score = 0; - garray_T *match_positions = NULL; - int_u matches[MAX_FUZZY_MATCHES]; - int j = 0; - - if (str == NULL || pat == NULL) - return NULL; - - match_positions = ALLOC_ONE(garray_T); - if (match_positions == NULL) - return NULL; - ga_init2(match_positions, sizeof(int_u), 10); - - if (!fuzzy_match(str, pat, FALSE, &score, matches, MAX_FUZZY_MATCHES, TRUE) - || score == 0) - { - ga_clear(match_positions); - vim_free(match_positions); - return NULL; - } - - for (char_u *p = pat; *p != NUL; MB_PTR_ADV(p)) - { - if (!VIM_ISWHITE(PTR2CHAR(p))) - { - ga_grow(match_positions, 1); - ((int_u *)match_positions->ga_data)[match_positions->ga_len] = - matches[j]; - match_positions->ga_len++; - j++; - } - } - - return match_positions; -#else - return NULL; -#endif -} - -/* - * This function splits the line pointed to by `*ptr` into words and performs - * a fuzzy match for the pattern `pat` on each word. It iterates through the - * line, moving `*ptr` to the start of each word during the process. - * - * If a match is found: - * - `*ptr` points to the start of the matched word. - * - `*len` is set to the length of the matched word. - * - `*score` contains the match score. - * - * If no match is found, `*ptr` is updated to the end of the line. - */ - int -fuzzy_match_str_in_line( - char_u **ptr, - char_u *pat, - int *len, - pos_T *current_pos, - int *score) -{ - char_u *str = *ptr; - char_u *strBegin = str; - char_u *end = NULL; - char_u *start = NULL; - int found = FALSE; - char save_end; - char_u *line_end = NULL; - - if (str == NULL || pat == NULL) - return found; - line_end = find_line_end(str); - - while (str < line_end) - { - // Skip non-word characters - start = find_word_start(str); - if (*start == NUL) - break; - end = find_word_end(start); - - // Extract the word from start to end - save_end = *end; - *end = NUL; - - // Perform fuzzy match - *score = fuzzy_match_str(start, pat); - *end = save_end; - - if (*score > 0) - { - *len = (int)(end - start); - found = TRUE; - *ptr = start; - if (current_pos) - current_pos->col += (int)(end - strBegin); - break; - } - - // Move to the end of the current word for the next iteration - str = end; - // Ensure we continue searching after the current word - while (*str != NUL && !vim_iswordp(str)) - MB_PTR_ADV(str); - } - - if (!found) - *ptr = line_end; - - return found; -} - -/* - * Search for the next fuzzy match in the specified buffer. - * This function attempts to find the next occurrence of the given pattern - * in the buffer, starting from the current position. It handles line wrapping - * and direction of search. - * - * Return TRUE if a match is found, otherwise FALSE. - */ - int -search_for_fuzzy_match( - buf_T *buf, - pos_T *pos, - char_u *pattern, - int dir, - pos_T *start_pos, - int *len, - char_u **ptr, - int *score) -{ - pos_T current_pos = *pos; - pos_T circly_end; - int found_new_match = FALSE; - int looped_around = FALSE; - int whole_line = ctrl_x_mode_whole_line(); - - if (buf == curbuf) - circly_end = *start_pos; - else - { - circly_end.lnum = buf->b_ml.ml_line_count; - circly_end.col = 0; - circly_end.coladd = 0; - } - - if (whole_line && start_pos->lnum != pos->lnum) - current_pos.lnum += dir; - - do - { - - // Check if looped around and back to start position - if (looped_around && EQUAL_POS(current_pos, circly_end)) - break; - - // Ensure current_pos is valid - if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) - { - // Get the current line buffer - *ptr = ml_get_buf(buf, current_pos.lnum, FALSE); - if (!whole_line) - *ptr += current_pos.col; - - // If ptr is end of line is reached, move to next line - // or previous line based on direction - if (*ptr != NULL && **ptr != NUL) - { - if (!whole_line) - { - // Try to find a fuzzy match in the current line starting - // from current position - found_new_match = fuzzy_match_str_in_line(ptr, pattern, - len, ¤t_pos, score); - if (found_new_match) - { - *pos = current_pos; - break; - } - else if (looped_around && current_pos.lnum == circly_end.lnum) - break; - } - else - { - if (fuzzy_match_str(*ptr, pattern) > 0) - { - found_new_match = TRUE; - *pos = current_pos; - *len = (int)ml_get_buf_len(buf, current_pos.lnum); - break; - } - } - } - } - - // Move to the next line or previous line based on direction - if (dir == FORWARD) - { - if (++current_pos.lnum > buf->b_ml.ml_line_count) - { - if (p_ws) - { - current_pos.lnum = 1; - looped_around = TRUE; - } - else - break; - } - } - else - { - if (--current_pos.lnum < 1) - { - if (p_ws) - { - current_pos.lnum = buf->b_ml.ml_line_count; - looped_around = TRUE; - } - else - break; - - } - } - current_pos.col = 0; - } while (TRUE); - - return found_new_match; -} - -/* - * Free an array of fuzzy string matches "fuzmatch[count]". - */ - void -fuzmatch_str_free(fuzmatch_str_T *fuzmatch, int count) -{ - int i; - - if (fuzmatch == NULL) - return; - for (i = 0; i < count; ++i) - vim_free(fuzmatch[i].str); - vim_free(fuzmatch); -} - -/* - * Copy a list of fuzzy matches into a string list after sorting the matches by - * the fuzzy score. Frees the memory allocated for 'fuzmatch'. - * Returns OK on success and FAIL on memory allocation failure. - */ - int -fuzzymatches_to_strmatches( - fuzmatch_str_T *fuzmatch, - char_u ***matches, - int count, - int funcsort) -{ - int i; - - if (count <= 0) - return OK; - - *matches = ALLOC_MULT(char_u *, count); - if (*matches == NULL) - { - fuzmatch_str_free(fuzmatch, count); - return FAIL; - } - - // Sort the list by the descending order of the match score - if (funcsort) - fuzzy_match_func_sort((void *)fuzmatch, (size_t)count); - else - fuzzy_match_str_sort((void *)fuzmatch, (size_t)count); - - for (i = 0; i < count; i++) - (*matches)[i] = fuzmatch[i].str; - vim_free(fuzmatch); - - return OK; -} diff --git a/src/testdir/test_cmdline.vim b/src/testdir/test_cmdline.vim index e6dcfd3998..ef311ed739 100644 --- a/src/testdir/test_cmdline.vim +++ b/src/testdir/test_cmdline.vim @@ -3224,7 +3224,7 @@ func Test_fuzzy_completion_bufname_fullpath() call assert_equal('"b CmdStateFile', @:) set wildoptions=fuzzy call feedkeys(":b CmdStateFile\\\"\", 'tx') - call assert_match('Xcmd/Xstate/Xfile.js$', @:) + call assert_equal('"b CmdStateFile', @:) cd - set wildoptions& endfunc @@ -3502,7 +3502,7 @@ func Test_fuzzy_completion_mapname() nmap state : nmap FendingOff : call feedkeys(":nmap fo\\\"\", 'tx') - call assert_equal("\"nmap format TestFOrmat FendingOff goformat fendoff", @:) + call assert_equal("\"nmap format TestFOrmat FendingOff fendoff goformat", @:) nunmap format nunmap goformat nunmap TestFOrmat @@ -3674,7 +3674,7 @@ func Test_fuzzy_completion_cmd_sort_results() command T123FendingOff : set wildoptions=fuzzy call feedkeys(":T123fo\\\"\", 'tx') - call assert_equal('"T123format T123TestFOrmat T123FendingOff T123goformat T123fendoff', @:) + call assert_equal('"T123format T123TestFOrmat T123FendingOff T123fendoff T123goformat', @:) delcommand T123format delcommand T123goformat delcommand T123TestFOrmat diff --git a/src/testdir/test_matchfuzzy.vim b/src/testdir/test_matchfuzzy.vim index 8aa7f32187..0ee1b7ba67 100644 --- a/src/testdir/test_matchfuzzy.vim +++ b/src/testdir/test_matchfuzzy.vim @@ -14,11 +14,11 @@ func Test_matchfuzzy() call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) - call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) - call assert_equal(['onetwo', 'one_two'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) + call assert_equal(['oneTwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) + call assert_equal([], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len()) - call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) + call assert_equal([repeat('a', 300)], matchfuzzy([repeat('a', 300)], repeat('a', 257))) " full match has highest score call assert_equal(['Cursor', 'lCursor'], matchfuzzy(["hello", "lCursor", "Cursor"], "Cursor")) " matches with same score should not be reordered @@ -26,8 +26,7 @@ func Test_matchfuzzy() call assert_equal(l, l->matchfuzzy('abc')) " Tests for match preferences - " preference for camel case match - call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) + call assert_equal(['onetwo', 'oneTwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) " preference for match after a separator (_ or space) call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) " preference for leading letter match @@ -43,7 +42,7 @@ func Test_matchfuzzy() " gap penalty call assert_equal(['xxayybxxxx', 'xxayyybxxx', 'xxayyyybxx'], ['xxayyyybxx', 'xxayyybxxx', 'xxayybxxxx']->matchfuzzy('ab')) " path separator vs word separator - call assert_equal(['color/setup.vim', 'color\\setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\\setup.vim'], 'setup.vim')) + call assert_equal(['color/setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim', 'color\\setup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\\setup.vim'], 'setup.vim')) " match multiple words (separated by space) call assert_equal(['foo bar baz'], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('baz foo')) @@ -85,15 +84,16 @@ func Test_matchfuzzy() call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:') " camelcase - call assert_equal(['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], - \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur')) - call assert_equal(['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], - \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:true})) call assert_equal(['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], - \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:false})) + \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur')) call assert_equal(['things', 'sThings', 'thisThings'], - \ matchfuzzy(['things','sThings', 'thisThings'], 'thin', {'camelcase': v:false})) - call assert_fails("let x = matchfuzzy([], 'foo', {'camelcase': []})", 'E475: Invalid value for argument camelcase') + \ matchfuzzy(['things','sThings', 'thisThings'], 'thin')) + call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc')) + call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc')) + call assert_equal(['MyTest'], matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT')) + call assert_equal(['CamelCaseMatchIngAlg'], + \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], 'CamelCase')) + call assert_equal(['SomeWord', 'PrefixSomeWord'], matchfuzzy(['PrefixSomeWord', 'SomeWord'], 'somewor')) " Test in latin1 encoding let save_enc = &encoding @@ -104,50 +104,57 @@ endfunc " Test for the matchfuzzypos() function func Test_matchfuzzypos() - call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'curl'], 'rl')) - call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [990, 985]], matchfuzzypos(['world', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [990, 985]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) call assert_equal([['hello', 'hello world hello world'], - \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [500, 382]], + \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [2147483647, 4810]], \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello')) - call assert_equal([['aaaaaaa'], [[0, 1, 2]], [266]], matchfuzzypos(['aaaaaaa'], 'aaa')) - call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b')) - call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b')) - call assert_equal([['a b'], [[0]], [137]], matchfuzzypos(['a b'], ' a ')) + call assert_equal([['aaaaaaa'], [[0, 1, 2]], [2880]], matchfuzzypos(['aaaaaaa'], 'aaa')) + call assert_equal([['a b'], [[0, 3]], [1670]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0, 3]], [1670]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0]], [885]], matchfuzzypos(['a b'], ' a ')) call assert_equal([[], [], []], matchfuzzypos(['a b'], ' ')) call assert_equal([[], [], []], matchfuzzypos(['world', 'curl'], 'ab')) let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256)) call assert_equal(range(256), x[1][0]) - call assert_equal([[], [], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257))) + + " fuzzy matches limited to 1024 chars + let matches = matchfuzzypos([repeat('a', 1030)], repeat('a', 1025)) + call assert_equal([repeat('a', 1030)], matches[0]) + call assert_equal(1024, len(matches[1][0])) + call assert_equal(1023, matches[1][0][1023]) + call assert_equal([[], [], []], matchfuzzypos([], 'abc')) + call assert_equal([[], [], []], matchfuzzypos(['abc'], '')) " match in a long string - call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [155]], + call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [500]], \ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc')) " preference for camel case match - call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [269]], matchfuzzypos(['xabcxxaBc'], 'abc')) + call assert_equal([['xabcxxaBc'], [[7, 8]], [1665]], matchfuzzypos(['xabcxxaBc'], 'bc')) " preference for match after a separator (_ or space) - call assert_equal([['xabx_ab'], [[5, 6]], [195]], matchfuzzypos(['xabx_ab'], 'ab')) + call assert_equal([['xabx_ab'], [[5, 6]], [1775]], matchfuzzypos(['xabx_ab'], 'ab')) " preference for leading letter match - call assert_equal([['abcxabc'], [[0, 1]], [200]], matchfuzzypos(['abcxabc'], 'ab')) + call assert_equal([['abcxabc'], [[0, 1]], [1875]], matchfuzzypos(['abcxabc'], 'ab')) " preference for sequential match - call assert_equal([['aobncedone'], [[7, 8, 9]], [233]], matchfuzzypos(['aobncedone'], 'one')) + call assert_equal([['aobncedone'], [[7, 8, 9]], [1965]], matchfuzzypos(['aobncedone'], 'one')) " best recursive match - call assert_equal([['xoone'], [[2, 3, 4]], [243]], matchfuzzypos(['xoone'], 'one')) + call assert_equal([['xoone'], [[2, 3, 4]], [1990]], matchfuzzypos(['xoone'], 'one')) " match multiple words (separated by space) - call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) + call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [5620]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo', {'matchseq': 1})) - call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz')) - call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 5, 10]], [476]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1})) + call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [5620]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz')) + call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 9, 10]], [6660]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1})) call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two')) call assert_equal([[], [], []], ['foo bar']->matchfuzzypos(" \t ")) - call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [1057]], ['grace']->matchfuzzypos('race ace grace')) + call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [2147483647]], ['grace']->matchfuzzypos('race ace grace')) let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] - call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]], + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [2885]], \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}})) - call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]], + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [2885]], \ matchfuzzypos(l, 'cam', {'key' : 'val'})) call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}})) call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'key' : 'val'})) @@ -164,30 +171,17 @@ func Test_matchfuzzypos() call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') " case match - call assert_equal([['Match', 'match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['match', 'Match'], 'Ma')) - call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['Match', 'match'], 'ma')) - " CamelCase has high weight even case match - call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc')) - call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc')) - call assert_equal(['MyTest', 'Mytest', 'mytest', ],matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT')) - call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], - \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelCase')) - call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], - \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelcaseM')) + call assert_equal([['Match'], [[0, 1]], [1885]], matchfuzzypos(['match', 'Match'], 'Ma')) + call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [1885, 1885]], matchfuzzypos(['Match', 'match'], 'ma')) let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:') " camelcase - call assert_equal([['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], [[1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8], [0, 1, 2], [0, 1, 2], [0, 1, 2]], [318, 311, 308, 303, 267, 264, 263]], + call assert_equal([['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8]], [2885, 2870, 2865, 2680, 2670, 2655, 2655]], \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur')) - call assert_equal([['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], [[1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8], [0, 1, 2], [0, 1, 2], [0, 1, 2]], [318, 311, 308, 303, 267, 264, 263]], - \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:true})) - call assert_equal([['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8]], [267, 264, 263, 246, 239, 236, 231]], - \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:false})) - call assert_equal([['things', 'sThings', 'thisThings'], [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 2, 7]], [333, 287, 279]], - \ matchfuzzypos(['things','sThings', 'thisThings'], 'thin', {'camelcase': v:false})) - call assert_fails("let x = matchfuzzypos([], 'foo', {'camelcase': []})", 'E475: Invalid value for argument camelcase') + call assert_equal([['things', 'sThings', 'thisThings'], [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 6, 7]], [3890, 3685, 3670]], + \ matchfuzzypos(['things','sThings', 'thisThings'], 'thin')) endfunc " Test for matchfuzzy() with multibyte characters @@ -205,9 +199,14 @@ func Test_matchfuzzy_mbyte() call assert_equal(['세 마리의 작은 돼지'], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('돼지 마리의')) call assert_equal([], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('파란 하늘')) - " preference for camel case match + " camel case match + call assert_equal(['oneąwo', 'oneĄwo'], + \ ['oneĄwo', 'oneąwo']->matchfuzzy('oneąwo')) + call assert_equal(['oneĄwo'], + \ ['oneĄwo', 'oneąwo']->matchfuzzy('Ąwo')) + " prefer camelcase when searched first character matches upper case call assert_equal(['oneĄwo', 'oneąwo'], - \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo')) + \ ['oneĄwo', 'oneąwo']->matchfuzzy('ąw')) " preference for complete match then match after separator (_ or space) call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']), \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) @@ -231,41 +230,39 @@ endfunc " Test for matchfuzzypos() with multibyte characters func Test_matchfuzzypos_mbyte() CheckFeature multi_lang - call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [273]], + call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [4890]], \ matchfuzzypos(['こんにちは世界'], 'こんにちは')) - call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [88]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) + call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [-20]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) " reverse the order of characters call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ')) - call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [252, 143]], + call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [2885, 670]], \ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], - \ [[0, 1], [0, 1], [0, 1], [0, 2]], [176, 173, 170, 135]], + \ [[0, 1], [0, 1], [0, 1], [0, 2]], [1880, 1865, 1850, 890]], \ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) - call assert_equal([['ααααααα'], [[0, 1, 2]], [216]], + call assert_equal([['ααααααα'], [[0, 1, 2]], [2880]], \ matchfuzzypos(['ααααααα'], 'ααα')) call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl')) let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256)) call assert_equal(range(256), x[1][0]) - call assert_equal([[], [], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))) + call assert_equal([repeat('✓', 300)], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))[0]) " match multiple words (separated by space) - call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [328]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의')) + call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [4515]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의')) call assert_equal([[], [], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘')) " match in a long string - call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [105]], + call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [500]], \ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ')) - " preference for camel case match - call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [219]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) " preference for match after a separator (_ or space) - call assert_equal([['xちだx_ちだ'], [[5, 6]], [145]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) + call assert_equal([['xちだx_ちだ'], [[5, 6]], [1775]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) " preference for leading letter match - call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [150]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) + call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [1875]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) " preference for sequential match - call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [158]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) + call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [1965]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) " best recursive match - call assert_equal([['xффйд'], [[2, 3, 4]], [168]], matchfuzzypos(['xффйд'], 'фйд')) + call assert_equal([['xффйд'], [[2, 3, 4]], [1990]], matchfuzzypos(['xффйд'], 'фйд')) endfunc " Test for matchfuzzy() with limit diff --git a/src/version.c b/src/version.c index 071eca3f04..87550542e0 100644 --- a/src/version.c +++ b/src/version.c @@ -719,6 +719,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1627, /**/ 1626, /**/ diff --git a/src/vim.h b/src/vim.h index 2c42c2cdde..79d3add8ac 100644 --- a/src/vim.h +++ b/src/vim.h @@ -3036,8 +3036,9 @@ long elapsed(DWORD start_tick); #define EVAL_VAR_IMPORT 4 // may return special variable for import #define EVAL_VAR_NO_FUNC 8 // do not look for a function -// Maximum number of characters that can be fuzzy matched -#define MAX_FUZZY_MATCHES 256 +// Fuzzy matching +#define FUZZY_MATCH_MAX_LEN 1024 // max characters that can be matched +#define FUZZY_SCORE_NONE INT_MIN // invalid fuzzy score // flags for equal_type() #define ETYPE_ARG_UNKNOWN 1 From c93a2b332734086ef94c4abf407f584e1e41bd55 Mon Sep 17 00:00:00 2001 From: Girish Palya Date: Wed, 13 Aug 2025 21:53:53 +0200 Subject: [PATCH 07/11] runtime(doc): Adapt fuzzy doc to reflect 'fzy' algorithm closes: #17988 Signed-off-by: Girish Palya Signed-off-by: Christian Brabandt --- runtime/doc/pattern.txt | 68 +++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index 02668a0473..414b0f97dc 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -1,4 +1,4 @@ -*pattern.txt* For Vim version 9.1. Last change: 2025 Aug 12 +*pattern.txt* For Vim version 9.1. Last change: 2025 Aug 13 VIM REFERENCE MANUAL by Bram Moolenaar @@ -1501,37 +1501,51 @@ Finally, these constructs are unique to Perl: ============================================================================== 11. Fuzzy matching *fuzzy-matching* -Fuzzy matching refers to matching strings using a non-exact search string. -Fuzzy matching will match a string, if all the characters in the search string -are present anywhere in the string in the same order. Case is ignored. In a -matched string, other characters can be present between two consecutive -characters in the search string. If the search string has multiple words, then -each word is matched separately. So the words in the search string can be -present in any order in a string. +Fuzzy matching scores how well a string matches a pattern when the pattern +characters appear in order but not necessarily contiguously. -Vim uses the same improved algorithm as the fzy project: +Example: > + Pattern: "vim" + Candidates: "vim" -> perfect + "vimeo" -> good (v i m) + "voice mail" -> weaker (v _ i _ _ _ m) + "vintage" -> no match (no "m") +< +If the search string has multiple words, each word is matched separately and +may appear in any order in the candidate. For example "get pat" matches +"GetPattern", "PatternGet", "getPattern", "patGetter", "getSomePattern", +"MatchpatternGet", etc. + +The 'ignorecase' and 'smartcase' options do not apply, case is ignored if the +pattern is all lower case. + +Vim's implementation is based on the algorithm from the fzy project: https://github.com/jhawthorn/fzy -Fuzzy matching assigns a score for each matched string based on the following -criteria: - - The number of sequentially matching characters. - - The number of characters (distance) between two consecutive matching - characters. - - Matches at the beginning of a word - - Matches at a camel case character (e.g. Case in CamelCase) - - Matches after a path separator or a hyphen. - - The number of unmatched characters in a string. - - A full/exact match is preferred. -The matching string with the highest score is returned first. +It uses dynamic programming to compute an optimal score for a given pattern +and candidate. -For example, when you search for the "get pat" string using fuzzy matching, it -will match the strings "GetPattern", "PatternGet", "getPattern", "patGetter", -"getSomePattern", "MatchpatternGet" etc. +The algorithm works in two stages: -The functions |matchfuzzy()| and |matchfuzzypos()| can be used to fuzzy search -a string in a List of strings. The matchfuzzy() function returns a List of -matching strings. The matchfuzzypos() functions returns the List of matches, -the matching positions and the fuzzy match scores. +1. Forward pass + Scan the candidate left to right, tracking the best score for each + pattern position. Matches score higher when they occur at the start + of the candidate, the start of a word (space, underscore, dash, + camelCase), or directly after the previous match. + +2. Backward pass + Start from the best-scoring end position and step back to find match + positions, ensuring the alignment is optimal. + +Vim extends the original algorithm to support multibyte codepoints, allowing +correct matching for UTF-8 and other encodings. + +Time complexity is O(pattern * candidate). Memory usage is proportional +to the same. + +The |matchfuzzy()| and |matchfuzzypos()| functions perform fuzzy searching in +a List of strings. |matchfuzzy()| returns the matching strings, while +|matchfuzzypos()| returns the matches along with their positions and scores. The "f" flag of `:vimgrep` enables fuzzy matching. From 17a6d696bd2f9526f2c4beae95208626d0e8d922 Mon Sep 17 00:00:00 2001 From: glepnir Date: Wed, 13 Aug 2025 22:02:30 +0200 Subject: [PATCH 08/11] patch 9.1.1628: fuzzy.c has a few issues Problem: fuzzy.c has a few issues Solution: Use Vims memory management, update style (glepnir) Problem: - Missing cleanup of lmatchpos lists causing memory leaks - Missing error handling for list operations - Use of malloc() instead of Vim's alloc() functions - Inconsistent C-style comments - Missing null pointer checks for memory allocation - Incorrect use of vim_free() for list objects Solution: - Add proper cleanup of lmatchpos in done section using list_free() - Set lmatchpos to NULL after successful transfer to avoid confusion - Add error handling for list_append_tv() failures - Replace malloc() with alloc() and add null pointer checks - Convert C-style comments to C++ style for consistency - Fix vim_free() calls to use list_free() for list objects closes: #17984 Signed-off-by: glepnir Signed-off-by: Christian Brabandt --- src/fuzzy.c | 149 +++++++++++++++++++++++++++----------------------- src/version.c | 2 + 2 files changed, 82 insertions(+), 69 deletions(-) diff --git a/src/fuzzy.c b/src/fuzzy.c index 6ebfa5b217..c1b216ce59 100644 --- a/src/fuzzy.c +++ b/src/fuzzy.c @@ -49,7 +49,7 @@ static score_t match_positions(char_u *needle, char_u *haystack, int_u *position static int has_match(char_u *needle, char_u *haystack); #define SCORE_MAX INFINITY -#define SCORE_MIN -INFINITY +#define SCORE_MIN (-INFINITY) #define SCORE_SCALE 1000 typedef struct @@ -72,12 +72,12 @@ typedef struct */ int fuzzy_match( - char_u *str, - char_u *pat_arg, - int matchseq, - int *outScore, - int_u *matches, - int maxMatches) + char_u *str, + char_u *pat_arg, + int matchseq, + int *outScore, + int_u *matches, + int maxMatches) { char_u *save_pat; char_u *pat; @@ -206,21 +206,20 @@ fuzzy_match_item_compare(const void *s1, const void *s2) */ static void fuzzy_match_in_list( - list_T *l, - char_u *str, - int matchseq, - char_u *key, - callback_T *item_cb, - int retmatchpos, - list_T *fmatchlist, - long max_matches) + list_T *l, + char_u *str, + int matchseq, + char_u *key, + callback_T *item_cb, + int retmatchpos, + list_T *fmatchlist, + long max_matches) { - long len; - fuzzyItem_T *items; - listitem_T *li; - int i = 0; - long match_count = 0; - int_u matches[FUZZY_MATCH_MAX_LEN]; + long len; + fuzzyItem_T *items; + listitem_T *li; + long match_count = 0; + int_u matches[FUZZY_MATCH_MAX_LEN]; len = list_len(l); if (len == 0) @@ -285,13 +284,13 @@ fuzzy_match_in_list( items[match_count].score = score; items[match_count].pat = str; items[match_count].startpos = matches[0]; - if (itemstr_allocate) - items[match_count].itemstr = vim_strsave(itemstr); - else - items[match_count].itemstr = itemstr; + items[match_count].itemstr = itemstr_allocate + ? vim_strsave(itemstr) : itemstr; items[match_count].itemstr_allocated = itemstr_allocate; - // Copy the list of matching positions in itemstr to a list + // Copy the list of matching positions in itemstr to a list, if + // "retmatchpos" is set. + if (retmatchpos) { int j = 0; char_u *p; @@ -347,8 +346,11 @@ fuzzy_match_in_list( retlist = fmatchlist; // Copy the matching strings to the return list - for (i = 0; i < match_count; i++) - list_append_tv(retlist, &items[i].item->li_tv); + for (int i = 0; i < match_count; i++) + { + if (list_append_tv(retlist, &items[i].item->li_tv) == FAIL) + goto done; + } // next copy the list of matching positions if (retmatchpos) @@ -358,26 +360,40 @@ fuzzy_match_in_list( goto done; retlist = li->li_tv.vval.v_list; - for (i = 0; i < match_count; i++) - if (items[i].lmatchpos != NULL - && list_append_list(retlist, items[i].lmatchpos) == FAIL) - goto done; + for (int i = 0; i < match_count; i++) + { + if (items[i].lmatchpos != NULL) + { + if (list_append_list(retlist, items[i].lmatchpos) == OK) + items[i].lmatchpos = NULL; + else + goto done; + + } + } // copy the matching scores li = list_find(fmatchlist, -1); if (li == NULL || li->li_tv.vval.v_list == NULL) goto done; retlist = li->li_tv.vval.v_list; - for (i = 0; i < match_count; i++) + for (int i = 0; i < match_count; i++) + { if (list_append_number(retlist, items[i].score) == FAIL) goto done; + } } } done: - for (i = 0; i < match_count; i++) + for (int i = 0; i < match_count; i++) + { if (items[i].itemstr_allocated) vim_free(items[i].itemstr); + + if (items[i].lmatchpos) + list_free(items[i].lmatchpos); + } vim_free(items); } @@ -480,7 +496,7 @@ do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos) goto done; if (list_append_list(rettv->vval.v_list, l) == FAIL) { - vim_free(l); + list_free(l); goto done; } l = list_alloc(); @@ -488,7 +504,7 @@ do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos) goto done; if (list_append_list(rettv->vval.v_list, l) == FAIL) { - vim_free(l); + list_free(l); goto done; } l = list_alloc(); @@ -496,7 +512,7 @@ do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos) goto done; if (list_append_list(rettv->vval.v_list, l) == FAIL) { - vim_free(l); + list_free(l); goto done; } } @@ -1020,7 +1036,7 @@ match_row(const match_struct *match, int row, score_t *curr_D, score_t prev_score = SCORE_MIN; score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; - /* These will not be used with this value, but not all compilers see it */ + // These will not be used with this value, but not all compilers see it score_t prev_M = SCORE_MIN, prev_D = SCORE_MIN; for (int j = 0; j < m; j++) @@ -1036,7 +1052,7 @@ match_row(const match_struct *match, int row, score_t *curr_D, { /* i > 0 && j > 0*/ score = MAX( prev_M + match_bonus[j], - /* consecutive match, doesn't stack with match_bonus */ + // consecutive match, doesn't stack with match_bonus prev_D + SCORE_MATCH_CONSECUTIVE); } prev_D = last_D[j]; @@ -1068,38 +1084,37 @@ match_positions(char_u *needle, char_u *haystack, int_u *positions) if (m > MATCH_MAX_LEN || n > m) { - /* - * Unreasonably large candidate: return no score - * If it is a valid match it will still be returned, it will - * just be ranked below any reasonably sized candidates - */ + // Unreasonably large candidate: return no score + // If it is a valid match it will still be returned, it will + // just be ranked below any reasonably sized candidates return SCORE_MIN; } else if (n == m) { - /* Since this method can only be called with a haystack which - * matches needle. If the lengths of the strings are equal the - * strings themselves must also be equal (ignoring case). - */ + // Since this method can only be called with a haystack which + // matches needle. If the lengths of the strings are equal the + // strings themselves must also be equal (ignoring case). if (positions) for (int i = 0; i < n; i++) positions[i] = i; return SCORE_MAX; } - /* - * D[][] Stores the best score for this position ending with a match. - * M[][] Stores the best possible score at this position. - */ + // D[][] Stores the best score for this position ending with a match. + // M[][] Stores the best possible score at this position. score_t (*D)[MATCH_MAX_LEN], (*M)[MATCH_MAX_LEN]; - M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n); - D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n); + M = alloc(sizeof(score_t) * MATCH_MAX_LEN * n); + if (!M) + return SCORE_MIN; + D = alloc(sizeof(score_t) * MATCH_MAX_LEN * n); + if (!D) + return SCORE_MIN; match_row(&match, 0, D[0], M[0], D[0], M[0]); for (int i = 1; i < n; i++) match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]); - /* backtrace to find the positions of optimal matching */ + // backtrace to find the positions of optimal matching if (positions) { int match_required = 0; @@ -1107,23 +1122,19 @@ match_positions(char_u *needle, char_u *haystack, int_u *positions) { for (; j >= 0; j--) { - /* - * There may be multiple paths which result in - * the optimal weight. - * - * For simplicity, we will pick the first one - * we encounter, the latest in the candidate - * string. - */ + // There may be multiple paths which result in + // the optimal weight. + // + // For simplicity, we will pick the first one + // we encounter, the latest in the candidate + // string. if (D[i][j] != SCORE_MIN && (match_required || D[i][j] == M[i][j])) { - /* If this score was determined using - * SCORE_MATCH_CONSECUTIVE, the - * previous character MUST be a match - */ - match_required = - i && j && + // If this score was determined using + // SCORE_MATCH_CONSECUTIVE, the + // previous character MUST be a match + match_required = i && j && M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE; positions[i] = j--; break; diff --git a/src/version.c b/src/version.c index 87550542e0..01eadc2f00 100644 --- a/src/version.c +++ b/src/version.c @@ -719,6 +719,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1628, /**/ 1627, /**/ From dd9ed09d8eb54fe3328bfdf385a0c978f308eccd Mon Sep 17 00:00:00 2001 From: Shane-XB-Qian Date: Wed, 13 Aug 2025 22:31:09 +0200 Subject: [PATCH 09/11] runtime(vimgoto): make gf extract packadd/colo name more flexible closes: #17983 Signed-off-by: Shane-XB-Qian Signed-off-by: Christian Brabandt --- runtime/autoload/vimgoto.vim | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/runtime/autoload/vimgoto.vim b/runtime/autoload/vimgoto.vim index b34dcc1a13..ffeab0af4b 100644 --- a/runtime/autoload/vimgoto.vim +++ b/runtime/autoload/vimgoto.vim @@ -3,7 +3,7 @@ vim9script # Language: Vim9 script # Contributers: @lacygoill # Shane-XB-Qian -# Last Change: 2025 Aug 12 +# Last Change: 2025 Aug 13 # # Vim Script to handle # :import, :packadd and :colorscheme @@ -39,7 +39,7 @@ enddef #}}}1 # Core {{{1 def HandlePackaddLine(editcmd: string, curline: string) #{{{2 - var pat: string = '^\s*\%(:\s*\)\=packadd!\=\s\+\zs\S\+$' + var pat: string = '\s*\%(:\s*\)\=packadd!\=\s\+\zs\S\+\>\ze' var plugin: string = curline ->matchstr(pat) ->substitute('^vim-\|\.vim$', '', 'g') @@ -65,7 +65,7 @@ def HandlePackaddLine(editcmd: string, curline: string) #{{{2 enddef def HandleColoLine(editcmd: string, curline: string) #{{{2 - var pat: string = '^\s*\%(:\s*\)\=colo\%[rscheme]\s\+\zs\S\+$' + var pat: string = '\s*\%(:\s*\)\=colo\%[rscheme]\s\+\zs\S\+\>\ze' var colo: string = curline->matchstr(pat) if colo == '' From 1ee1d9b43da37b157a91662a312415e4db400ba9 Mon Sep 17 00:00:00 2001 From: Jon Parise Date: Wed, 13 Aug 2025 22:34:51 +0200 Subject: [PATCH 10/11] runtime(python): highlight "self" and "cls" in syntax script These are special names by convention, and giving them distinct highlighting is a nice visual clue (using Identifier by default). This group is named "pythonClassVar" to match the name used by python-syntax. Some third-party color schemes are aware of this name and customized their colors accordingly. closes: #17968 Signed-off-by: Jon Parise Signed-off-by: Zvezdan Petkovic Signed-off-by: Christian Brabandt --- runtime/syntax/python.vim | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/runtime/syntax/python.vim b/runtime/syntax/python.vim index cc62babbfa..5c437b511f 100644 --- a/runtime/syntax/python.vim +++ b/runtime/syntax/python.vim @@ -1,7 +1,7 @@ " Vim syntax file " Language: Python " Maintainer: Zvezdan Petkovic -" Last Change: 2025 Aug 11 +" Last Change: 2025 Aug 13 " Credits: Neil Schemenauer " Dmitry Vasiliev " Rob B @@ -113,6 +113,10 @@ syn keyword pythonAsync async await syn match pythonConditional "^\s*\zscase\%(\s\+.*:.*$\)\@=" syn match pythonConditional "^\s*\zsmatch\%(\s\+.*:\s*\%(#.*\)\=$\)\@=" +" These names are special by convention. While they aren't real keywords, +" giving them distinct highlighting provides a nice visual cue. +syn keyword pythonClassVar self cls + " Decorators " A dot must be allowed because of @MyClass.myfunc decorators. syn match pythonDecorator "@" display contained @@ -378,6 +382,7 @@ hi def link pythonOperator Operator hi def link pythonException Exception hi def link pythonInclude Include hi def link pythonAsync Statement +hi def link pythonClassVar Identifier hi def link pythonDecorator Define hi def link pythonDecoratorName Function hi def link pythonClass Structure From 706b6f5867cb0fe65c4fefac8382fe2995eedde7 Mon Sep 17 00:00:00 2001 From: Yegappan Lakshmanan Date: Wed, 13 Aug 2025 22:39:37 +0200 Subject: [PATCH 11/11] patch 9.1.1629: Vim9: Not able to use more than 10 type arguments in a generic function Problem: Vim9: Not able to use more than 10 type arguments in a generic function Solution: Initialize the types after reading all the type arg variable names (Yegappan Lakshmanan) closes: #17981 Signed-off-by: Yegappan Lakshmanan Signed-off-by: Christian Brabandt --- src/testdir/test_vim9_generics.vim | 17 +++++++++++++++ src/version.c | 2 ++ src/vim9generics.c | 35 +++++++++++++++++++----------- 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/src/testdir/test_vim9_generics.vim b/src/testdir/test_vim9_generics.vim index be279821d6..07648a3326 100644 --- a/src/testdir/test_vim9_generics.vim +++ b/src/testdir/test_vim9_generics.vim @@ -3553,4 +3553,21 @@ def Test_generic_enum_constructor_error() v9.CheckSourceFailure(lines, "E1010: Type not recognized: A", 4) enddef +" Test for using more than 10 type arguments +def Test_generic_max_type_args() + var lines =<< trim END + vim9script + + def Fn(a1: A1): A1 + var x: A1 = a1 + return x + enddef + + assert_equal(10, Fn(10)) + + assert_equal('abc', Fn('abc')) + END + v9.CheckSourceSuccess(lines) +enddef + " vim: ts=8 sw=2 sts=2 expandtab tw=80 fdm=marker diff --git a/src/version.c b/src/version.c index 01eadc2f00..14178370ae 100644 --- a/src/version.c +++ b/src/version.c @@ -719,6 +719,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1629, /**/ 1628, /**/ diff --git a/src/vim9generics.c b/src/vim9generics.c index e74e3436ea..54400903e4 100644 --- a/src/vim9generics.c +++ b/src/vim9generics.c @@ -526,16 +526,6 @@ parse_generic_func_type_params( if (name_exists) return NULL; - if (ga_grow(&gfatab->gfat_param_types, 1) == FAIL) - return NULL; - type_T *gt = - &((type_T *)gfatab->gfat_param_types.ga_data)[gfatab->gfat_param_types.ga_len]; - gfatab->gfat_param_types.ga_len++; - - CLEAR_POINTER(gt); - gt->tt_type = VAR_ANY; - gt->tt_flags = TTFLAG_GENERIC; - if (ga_grow(&gfatab->gfat_args, 1) == FAIL) return NULL; generic_T *generic = @@ -546,7 +536,7 @@ parse_generic_func_type_params( if (generic->gt_name == NULL) return NULL; vim_strncpy(generic->gt_name, name_start, name_len); - generic->gt_type = gt; + generic->gt_type = NULL; if (VIM_ISWHITE(*p)) { @@ -572,13 +562,32 @@ parse_generic_func_type_params( } if (*p != '>') return NULL; + p++; - if (generic_func_args_table_size(gfatab) == 0) + int gfat_sz = generic_func_args_table_size(gfatab); + + if (gfat_sz == 0) { emsg_funcname(e_empty_type_list_for_generic_function_str, func_name); return NULL; } - p++; + + // set the generic parms to VAR_ANY type + if (ga_grow(&gfatab->gfat_param_types, gfat_sz) == FAIL) + return NULL; + + gfatab->gfat_param_types.ga_len = gfat_sz; + for (int i = 0; i < generic_func_args_table_size(gfatab); i++) + { + type_T *gt = &((type_T *)gfatab->gfat_param_types.ga_data)[i]; + + CLEAR_POINTER(gt); + gt->tt_type = VAR_ANY; + gt->tt_flags = TTFLAG_GENERIC; + + generic_T *generic = &((generic_T *)gfatab->gfat_args.ga_data)[i]; + generic->gt_type = gt; + } return p; }