diff --git a/.hgtags b/.hgtags index 5a826ae348..4436e90573 100644 --- a/.hgtags +++ b/.hgtags @@ -2485,3 +2485,11 @@ a30e3762957d61966152040315d160f860433576 v7-3-1145 e3f9e33fb28c367aac7b2270b6fd304fd755b6d4 v7-3-1146 3cd3cc1e91193a0b45e6aff1373cd4802e595a16 v7-3-1147 220bdea4f57919c290d92b1f6c83a7252cd660b6 v7-3-1148 +66803af09906a828b9f41d7ae0f7a379137eaf99 v7-3-1149 +3dbd251777de232c168d87650acda5fec408146c v7-3-1150 +4d7e3df04256790855f7a6dc289f32ffc04da133 v7-3-1151 +9909e44879b9f80679fd05af19a423b2673b2301 v7-3-1152 +bc3f4804cf470cec5773d8842743efb760f69102 v7-3-1153 +6419ee8098c80f0418081c73b790b09c61f13c3a v7-3-1154 +bf1e6326df1104cabc04b8490f9456dbda901fd2 v7-3-1155 +8c4324e6f4779ee316361511ff783f6344750be9 v7-3-1156 diff --git a/src/buffer.c b/src/buffer.c index 93643a0eb0..80c91b3f7c 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -1902,7 +1902,7 @@ free_buf_options(buf, free_p_ff) #ifdef FEAT_SPELL clear_string_option(&buf->b_s.b_p_spc); clear_string_option(&buf->b_s.b_p_spf); - vim_free(buf->b_s.b_cap_prog); + vim_regfree(buf->b_s.b_cap_prog); buf->b_s.b_cap_prog = NULL; clear_string_option(&buf->b_s.b_p_spl); #endif @@ -2250,7 +2250,7 @@ buflist_findpat(pattern, pattern_end, unlisted, diffmode, curtab_only) match = buf->b_fnum; /* remember first match */ } - vim_free(prog); + vim_regfree(prog); if (match >= 0) /* found one match */ break; } @@ -2359,14 +2359,14 @@ ExpandBufnames(pat, num_file, file, options) *file = (char_u **)alloc((unsigned)(count * sizeof(char_u *))); if (*file == NULL) { - vim_free(prog); + vim_regfree(prog); if (patc != pat) vim_free(patc); return FAIL; } } } - vim_free(prog); + vim_regfree(prog); if (count) /* match(es) found, break here */ break; } diff --git a/src/edit.c b/src/edit.c index c2e5f7bb39..0ba3404d34 100644 --- a/src/edit.c +++ b/src/edit.c @@ -3144,7 +3144,7 @@ ins_compl_dictionaries(dict_start, pat, flags, thesaurus) theend: p_scs = save_p_scs; - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); vim_free(buf); } diff --git a/src/eval.c b/src/eval.c index d5b7ec45b4..dc454cb800 100644 --- a/src/eval.c +++ b/src/eval.c @@ -2734,6 +2734,8 @@ get_lval(name, rettv, lp, unlet, skip, quiet, fne_flags) prevval = key[len]; key[len] = NUL; } + else + prevval = 0; /* avoid compiler warning */ wrong = (lp->ll_dict->dv_scope == VAR_DEF_SCOPE && rettv->v_type == VAR_FUNC && var_check_func_name(key, lp->ll_di == NULL)) @@ -4560,7 +4562,7 @@ eval4(arg, rettv, evaluate) if (regmatch.regprog != NULL) { n1 = vim_regexec_nl(®match, s1, (colnr_T)0); - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); if (type == TYPE_NOMATCH) n1 = !n1; } @@ -14001,7 +14003,7 @@ find_some_match(argvars, rettv, type) rettv->vval.v_number += (varnumber_T)(str - expr); } } - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } theend: @@ -17242,7 +17244,7 @@ f_split(argvars, rettv) str = regmatch.endp[0]; } - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } p_cpo = save_cpo; @@ -21094,7 +21096,7 @@ ex_function(eap) list_func_head(fp, FALSE); } } - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } } if (*p == '/') @@ -24248,7 +24250,7 @@ do_string_sub(str, pat, sub, flags) if (ga.ga_data != NULL) STRCPY((char *)ga.ga_data + ga.ga_len, tail); - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } ret = vim_strsave(ga.ga_data == NULL ? str : (char_u *)ga.ga_data); diff --git a/src/ex_cmds.c b/src/ex_cmds.c index cd6772ed52..a61bcdd39e 100644 --- a/src/ex_cmds.c +++ b/src/ex_cmds.c @@ -571,7 +571,7 @@ sortend: vim_free(nrs); vim_free(sortbuf1); vim_free(sortbuf2); - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); if (got_int) EMSG(_(e_interr)); } @@ -5261,7 +5261,7 @@ outofmem: changed_window_setting(); #endif - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } /* @@ -5436,7 +5436,7 @@ ex_global(eap) global_exe(cmd); ml_clearmarked(); /* clear rest of the marks */ - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } /* diff --git a/src/ex_cmds2.c b/src/ex_cmds2.c index 0908660eb0..dee5b7542b 100644 --- a/src/ex_cmds2.c +++ b/src/ex_cmds2.c @@ -652,7 +652,7 @@ ex_breakdel(eap) while (gap->ga_len > 0) { vim_free(DEBUGGY(gap, todel).dbg_name); - vim_free(DEBUGGY(gap, todel).dbg_prog); + vim_regfree(DEBUGGY(gap, todel).dbg_prog); --gap->ga_len; if (todel < gap->ga_len) mch_memmove(&DEBUGGY(gap, todel), &DEBUGGY(gap, todel + 1), @@ -2058,7 +2058,7 @@ do_arglist(str, what, after) --match; } - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); vim_free(p); if (!didone) EMSG2(_(e_nomatch2), ((char_u **)new_ga.ga_data)[i]); diff --git a/src/ex_docmd.c b/src/ex_docmd.c index 045fdbecdb..42f023d6ab 100644 --- a/src/ex_docmd.c +++ b/src/ex_docmd.c @@ -7797,7 +7797,7 @@ ex_open(eap) curwin->w_cursor.col = (colnr_T)(regmatch.startp[0] - p); else EMSG(_(e_nomatch)); - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } /* Move to the NUL, ignore any other arguments. */ eap->arg += STRLEN(eap->arg); diff --git a/src/ex_eval.c b/src/ex_eval.c index 5b849696e0..1ad696f497 100644 --- a/src/ex_eval.c +++ b/src/ex_eval.c @@ -1576,7 +1576,7 @@ ex_catch(eap) caught = vim_regexec_nl(®match, current_exception->value, (colnr_T)0); got_int |= prev_got_int; - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } } } diff --git a/src/ex_getln.c b/src/ex_getln.c index a29cc9e253..6cc34b9c19 100644 --- a/src/ex_getln.c +++ b/src/ex_getln.c @@ -4731,7 +4731,7 @@ ExpandFromContext(xp, pat, num_file, file, options) } } - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); return ret; #endif /* FEAT_CMDL_COMPL */ @@ -5799,7 +5799,7 @@ del_history_entry(histype, str) if (history[histype][idx].hisstr == NULL) hisidx[histype] = -1; } - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); return found; } diff --git a/src/fileio.c b/src/fileio.c index 850f24753e..82a8e820c6 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -7959,7 +7959,7 @@ au_cleanup() if (ap->pat == NULL) { *prev_ap = ap->next; - vim_free(ap->reg_prog); + vim_regfree(ap->reg_prog); vim_free(ap); } else @@ -10108,7 +10108,7 @@ match_file_pat(pattern, prog, fname, sfname, tail, allow_dirs) result = TRUE; if (prog == NULL) - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); return result; } #endif diff --git a/src/gui.c b/src/gui.c index a91b011b14..600be66411 100644 --- a/src/gui.c +++ b/src/gui.c @@ -5377,7 +5377,7 @@ gui_do_findrepl(flags, find_text, repl_text, down) } else MSG(_("No match at cursor, finding next")); - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } } diff --git a/src/macros.h b/src/macros.h index 9e3ba44b4e..1737cfa548 100644 --- a/src/macros.h +++ b/src/macros.h @@ -272,6 +272,7 @@ # define MB_COPY_CHAR(f, t) if (has_mbyte) mb_copy_char(&f, &t); else *t++ = *f++ # define MB_CHARLEN(p) (has_mbyte ? mb_charlen(p) : (int)STRLEN(p)) +# define MB_CHAR2LEN(c) (has_mbyte ? mb_char2len(c) : 1) # define PTR2CHAR(p) (has_mbyte ? mb_ptr2char(p) : (int)*(p)) #else # define MB_PTR2LEN(p) 1 @@ -280,6 +281,7 @@ # define mb_ptr_back(s, p) --p # define MB_COPY_CHAR(f, t) *t++ = *f++ # define MB_CHARLEN(p) STRLEN(p) +# define MB_CHAR2LEN(c) 1 # define PTR2CHAR(p) ((int)*(p)) #endif diff --git a/src/misc1.c b/src/misc1.c index 250e79442d..5875059cd4 100644 --- a/src/misc1.c +++ b/src/misc1.c @@ -456,8 +456,8 @@ get_number_indent(lnum) pos.coladd = 0; #endif } + vim_regfree(regmatch.regprog); } - vim_free(regmatch.regprog); if (pos.lnum == 0 || *ml_get_pos(&pos) == NUL) return -1; @@ -9757,7 +9757,7 @@ dos_expandpath( # endif #endif vim_free(buf); - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); vim_free(matchname); matches = gap->ga_len - start_len; @@ -9999,7 +9999,7 @@ unix_expandpath(gap, path, wildoff, flags, didstar) } vim_free(buf); - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); matches = gap->ga_len - start_len; if (matches > 0) @@ -10364,7 +10364,7 @@ theend: vim_free(in_curdir); } ga_clear_strings(&path_ga); - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); if (sort_again) remove_duplicates(gap); diff --git a/src/misc2.c b/src/misc2.c index c0e8263c82..97956cca4a 100644 --- a/src/misc2.c +++ b/src/misc2.c @@ -1134,7 +1134,7 @@ free_all_mem() /* Free some global vars. */ vim_free(username); # ifdef FEAT_CLIPBOARD - vim_free(clip_exclude_prog); + vim_regfree(clip_exclude_prog); # endif vim_free(last_cmdline); # ifdef FEAT_CMDHIST @@ -5014,8 +5014,8 @@ vim_findfile(search_ctx_arg) #endif { /* - * we don't have further wildcards to expand, so we have to - * check for the final file now + * We don't have further wildcards to expand, so we have to + * check for the final file now. */ for (i = stackp->ffs_filearray_cur; i < stackp->ffs_filearray_size; ++i) diff --git a/src/option.c b/src/option.c index 904686567f..bfdaef7980 100644 --- a/src/option.c +++ b/src/option.c @@ -7578,7 +7578,7 @@ check_clipboard_option() clip_autoselect_plus = new_autoselect_plus; clip_autoselectml = new_autoselectml; clip_html = new_html; - vim_free(clip_exclude_prog); + vim_regfree(clip_exclude_prog); clip_exclude_prog = new_exclude_prog; #ifdef FEAT_GUI_GTK if (gui.in_use) @@ -7589,7 +7589,7 @@ check_clipboard_option() #endif } else - vim_free(new_exclude_prog); + vim_regfree(new_exclude_prog); return errmsg; } @@ -7616,16 +7616,16 @@ compile_cap_prog(synblock) if (re != NULL) { synblock->b_cap_prog = vim_regcomp(re, RE_MAGIC); + vim_free(re); if (synblock->b_cap_prog == NULL) { synblock->b_cap_prog = rp; /* restore the previous program */ return e_invarg; } - vim_free(re); } } - vim_free(rp); + vim_regfree(rp); return NULL; } #endif diff --git a/src/proto/regexp.pro b/src/proto/regexp.pro index 5cd1731c33..38c9c33c4e 100644 --- a/src/proto/regexp.pro +++ b/src/proto/regexp.pro @@ -2,16 +2,17 @@ int re_multiline __ARGS((regprog_T *prog)); int re_lookbehind __ARGS((regprog_T *prog)); char_u *skip_regexp __ARGS((char_u *startp, int dirc, int magic, char_u **newp)); -regprog_T *vim_regcomp __ARGS((char_u *expr, int re_flags)); int vim_regcomp_had_eol __ARGS((void)); void free_regexp_stuff __ARGS((void)); -int vim_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); -int vim_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); -long vim_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); reg_extmatch_T *ref_extmatch __ARGS((reg_extmatch_T *em)); void unref_extmatch __ARGS((reg_extmatch_T *em)); char_u *regtilde __ARGS((char_u *source, int magic)); int vim_regsub __ARGS((regmatch_T *rmp, char_u *source, char_u *dest, int copy, int magic, int backslash)); int vim_regsub_multi __ARGS((regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, int copy, int magic, int backslash)); char_u *reg_submatch __ARGS((int no)); +regprog_T *vim_regcomp __ARGS((char_u *expr_arg, int re_flags)); +void vim_regfree __ARGS((regprog_T *prog)); +int vim_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); +int vim_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); +long vim_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); /* vim: set ft=c : */ diff --git a/src/quickfix.c b/src/quickfix.c index a8fc010139..3ac534dc14 100644 --- a/src/quickfix.c +++ b/src/quickfix.c @@ -863,7 +863,7 @@ qf_init_ok: for (fmt_ptr = fmt_first; fmt_ptr != NULL; fmt_ptr = fmt_first) { fmt_first = fmt_ptr->next; - vim_free(fmt_ptr->prog); + vim_regfree(fmt_ptr->prog); vim_free(fmt_ptr); } qf_clean_dir_stack(&dir_stack); @@ -3487,7 +3487,7 @@ theend: vim_free(dirname_now); vim_free(dirname_start); vim_free(target_dir); - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } /* @@ -4178,7 +4178,7 @@ ex_helpgrep(eap) } } - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); #ifdef FEAT_MBYTE if (vc.vc_type != CONV_NONE) convert_setup(&vc, NULL, NULL); diff --git a/src/regexp.c b/src/regexp.c index ae29ef53b9..ef8c78db68 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1297,7 +1297,8 @@ skip_regexp(startp, dirc, magic, newp) return p; } -static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags)); +static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags)); +static void bt_regfree __ARGS((regprog_T *prog)); /* * bt_regcomp() - compile a regular expression into internal code for the @@ -1454,6 +1455,16 @@ bt_regcomp(expr, re_flags) return (regprog_T *)r; } +/* + * Free a compiled regexp program, returned by bt_regcomp(). + */ + static void +bt_regfree(prog) + regprog_T *prog; +{ + vim_free(prog); +} + /* * Setup to parse the regexp. Used once to get the length and once to do it. */ @@ -7876,6 +7887,7 @@ reg_submatch(no) static regengine_T bt_regengine = { bt_regcomp, + bt_regfree, bt_regexec, #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ || defined(FIND_REPLACE_DIALOG) || defined(PROTO) @@ -7893,6 +7905,7 @@ static regengine_T bt_regengine = static regengine_T nfa_regengine = { nfa_regcomp, + nfa_regfree, nfa_regexec, #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ || defined(FIND_REPLACE_DIALOG) || defined(PROTO) @@ -7920,7 +7933,9 @@ static char_u regname[][30] = { /* * Compile a regular expression into internal code. - * Returns the program in allocated memory. Returns NULL for an error. + * Returns the program in allocated memory. + * Use vim_regfree() to free the memory. + * Returns NULL for an error. */ regprog_T * vim_regcomp(expr_arg, re_flags) @@ -7996,6 +8011,17 @@ vim_regcomp(expr_arg, re_flags) return prog; } +/* + * Free a compiled regexp program, returned by vim_regcomp(). + */ + void +vim_regfree(prog) + regprog_T *prog; +{ + if (prog != NULL) + prog->engine->regfree(prog); +} + /* * Match a regexp against a string. * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). diff --git a/src/regexp.h b/src/regexp.h index 184228213d..976927e04a 100644 --- a/src/regexp.h +++ b/src/regexp.h @@ -89,6 +89,7 @@ typedef struct int reganch; /* pattern starts with ^ */ int regstart; /* char at start of pattern */ + char_u *match_text; /* plain text to match with */ int has_zend; /* pattern contains \ze */ int has_backref; /* pattern contains \1 .. \9 */ @@ -147,6 +148,7 @@ typedef struct struct regengine { regprog_T *(*regcomp)(char_u*, int); + void (*regfree)(regprog_T *); int (*regexec)(regmatch_T*, char_u*, colnr_T); #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ || defined(FIND_REPLACE_DIALOG) || defined(PROTO) diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c index b03d09aeb7..9a89b49ecf 100644 --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -38,19 +38,19 @@ enum NFA_START_COLL, /* [abc] start */ NFA_END_COLL, /* [abc] end */ NFA_START_NEG_COLL, /* [^abc] start */ - NFA_END_NEG_COLL, /* [^abc] end (only used in postfix) */ - NFA_RANGE, /* range of the two previous items (only - * used in postfix) */ + NFA_END_NEG_COLL, /* [^abc] end (postfix only) */ + NFA_RANGE, /* range of the two previous items + * (postfix only) */ NFA_RANGE_MIN, /* low end of a range */ NFA_RANGE_MAX, /* high end of a range */ - NFA_CONCAT, /* concatenate two previous items (only - * used in postfix) */ - NFA_OR, - NFA_STAR, /* greedy * */ - NFA_STAR_NONGREEDY, /* non-greedy * */ - NFA_QUEST, /* greedy \? */ - NFA_QUEST_NONGREEDY, /* non-greedy \? */ + NFA_CONCAT, /* concatenate two previous items (postfix + * only) */ + NFA_OR, /* \| (postfix only) */ + NFA_STAR, /* greedy * (posfix only) */ + NFA_STAR_NONGREEDY, /* non-greedy * (postfix only) */ + NFA_QUEST, /* greedy \? (postfix only) */ + NFA_QUEST_NONGREEDY, /* non-greedy \? (postfix only) */ NFA_BOL, /* ^ Begin line */ NFA_EOL, /* $ End line */ @@ -153,8 +153,6 @@ enum /* NFA_FIRST_NL */ NFA_ANY, /* Match any one character. */ - NFA_ANYOF, /* Match any character in this string. */ - NFA_ANYBUT, /* Match any character not in this string. */ NFA_IDENT, /* Match identifier char */ NFA_SIDENT, /* Match identifier char but no digit */ NFA_KWORD, /* Match keyword char */ @@ -270,6 +268,7 @@ static int nfa_ll_index = 0; static int nfa_regcomp_start __ARGS((char_u *expr, int re_flags)); static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth)); static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth)); +static char_u *nfa_get_match_text __ARGS((nfa_state_T *start)); static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); static int nfa_emit_equi_class __ARGS((int c)); static int nfa_regatom __ARGS((void)); @@ -295,6 +294,7 @@ static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos)); static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col)); static long nfa_regexec_both __ARGS((char_u *line, colnr_T col)); static regprog_T *nfa_regcomp __ARGS((char_u *expr, int re_flags)); +static void nfa_regfree __ARGS((regprog_T *prog)); static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); static long nfa_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); @@ -492,6 +492,52 @@ nfa_get_regstart(start, depth) return 0; } +/* + * Figure out if the NFA state list contains just literal text and nothing + * else. If so return a string in allocated memory with what must match after + * regstart. Otherwise return NULL. + */ + static char_u * +nfa_get_match_text(start) + nfa_state_T *start; +{ + nfa_state_T *p = start; + int len = 0; + char_u *ret; + char_u *s; + + if (p->c != NFA_MOPEN) + return NULL; /* just in case */ + p = p->out; + while (p->c > 0) + { + len += MB_CHAR2LEN(p->c); + p = p->out; + } + if (p->c != NFA_MCLOSE || p->out->c != NFA_MATCH) + return NULL; + + ret = alloc(len); + if (ret != NULL) + { + len = 0; + p = start->out->out; /* skip first char, it goes into regstart */ + s = ret; + while (p->c > 0) + { +#ifdef FEAT_MBYTE + if (has_mbyte) + s += (*mb_char2bytes)(p->c, s); + else +#endif + *s++ = p->c; + p = p->out; + } + *s = NUL; + } + return ret; +} + /* * Allocate more space for post_start. Called when * running above the estimated number of states. @@ -2280,8 +2326,13 @@ nfa_dump(prog) { nfa_print_state(debugf, prog->start); - fprintf(debugf, "reganch: %d\n", prog->reganch); - fprintf(debugf, "regstart: %d\n", prog->regstart); + if (prog->reganch) + fprintf(debugf, "reganch: %d\n", prog->reganch); + if (prog->regstart != NUL) + fprintf(debugf, "regstart: %c (decimal: %d)\n", + prog->regstart, prog->regstart); + if (prog->match_text != NULL) + fprintf(debugf, "match_text: \"%s\"\n", prog->match_text); fclose(debugf); } @@ -2524,6 +2575,226 @@ st_pop(p, stack) return **p; } +/* + * Estimate the maximum byte length of anything matching "state". + * When unknown or unlimited return -1. + */ + static int +nfa_max_width(startstate, depth) + nfa_state_T *startstate; + int depth; +{ + int l, r; + nfa_state_T *state = startstate; + int len = 0; + + /* detect looping in a NFA_SPLIT */ + if (depth > 4) + return -1; + + for (;;) + { + switch (state->c) + { + case NFA_END_INVISIBLE: + case NFA_END_INVISIBLE_NEG: + /* the end, return what we have */ + return len; + + case NFA_SPLIT: + /* two alternatives, use the maximum */ + l = nfa_max_width(state->out, depth + 1); + r = nfa_max_width(state->out1, depth + 1); + if (l < 0 || r < 0) + return -1; + return len + (l > r ? l : r); + + case NFA_ANY: + case NFA_START_COLL: + case NFA_START_NEG_COLL: + /* matches some character, including composing chars */ +#ifdef FEAT_MBYTE + if (enc_utf8) + len += MB_MAXBYTES; + else if (has_mbyte) + len += 2; + else +#endif + ++len; + if (state->c != NFA_ANY) + { + /* skip over the characters */ + state = state->out1->out; + continue; + } + break; + + case NFA_DIGIT: + case NFA_WHITE: + case NFA_HEX: + case NFA_OCTAL: + /* ascii */ + ++len; + break; + + case NFA_IDENT: + case NFA_SIDENT: + case NFA_KWORD: + case NFA_SKWORD: + case NFA_FNAME: + case NFA_SFNAME: + case NFA_PRINT: + case NFA_SPRINT: + case NFA_NWHITE: + case NFA_NDIGIT: + case NFA_NHEX: + case NFA_NOCTAL: + case NFA_WORD: + case NFA_NWORD: + case NFA_HEAD: + case NFA_NHEAD: + case NFA_ALPHA: + case NFA_NALPHA: + case NFA_LOWER: + case NFA_NLOWER: + case NFA_UPPER: + case NFA_NUPPER: + /* possibly non-ascii */ +#ifdef FEAT_MBYTE + if (has_mbyte) + len += 3; + else +#endif + ++len; + break; + + case NFA_START_INVISIBLE: + case NFA_START_INVISIBLE_NEG: + case NFA_START_INVISIBLE_BEFORE: + case NFA_START_INVISIBLE_BEFORE_NEG: + /* zero-width, out1 points to the END state */ + state = state->out1->out; + continue; + + case NFA_BACKREF1: + case NFA_BACKREF2: + case NFA_BACKREF3: + case NFA_BACKREF4: + case NFA_BACKREF5: + case NFA_BACKREF6: + case NFA_BACKREF7: + case NFA_BACKREF8: + case NFA_BACKREF9: +#ifdef FEAT_SYN_HL + case NFA_ZREF1: + case NFA_ZREF2: + case NFA_ZREF3: + case NFA_ZREF4: + case NFA_ZREF5: + case NFA_ZREF6: + case NFA_ZREF7: + case NFA_ZREF8: + case NFA_ZREF9: +#endif + case NFA_NEWL: + case NFA_SKIP: + /* unknown width */ + return -1; + + case NFA_BOL: + case NFA_EOL: + case NFA_BOF: + case NFA_EOF: + case NFA_BOW: + case NFA_EOW: + case NFA_MOPEN: + case NFA_MOPEN1: + case NFA_MOPEN2: + case NFA_MOPEN3: + case NFA_MOPEN4: + case NFA_MOPEN5: + case NFA_MOPEN6: + case NFA_MOPEN7: + case NFA_MOPEN8: + case NFA_MOPEN9: +#ifdef FEAT_SYN_HL + case NFA_ZOPEN: + case NFA_ZOPEN1: + case NFA_ZOPEN2: + case NFA_ZOPEN3: + case NFA_ZOPEN4: + case NFA_ZOPEN5: + case NFA_ZOPEN6: + case NFA_ZOPEN7: + case NFA_ZOPEN8: + case NFA_ZOPEN9: + case NFA_ZCLOSE: + case NFA_ZCLOSE1: + case NFA_ZCLOSE2: + case NFA_ZCLOSE3: + case NFA_ZCLOSE4: + case NFA_ZCLOSE5: + case NFA_ZCLOSE6: + case NFA_ZCLOSE7: + case NFA_ZCLOSE8: + case NFA_ZCLOSE9: +#endif + case NFA_MCLOSE: + case NFA_MCLOSE1: + case NFA_MCLOSE2: + case NFA_MCLOSE3: + case NFA_MCLOSE4: + case NFA_MCLOSE5: + case NFA_MCLOSE6: + case NFA_MCLOSE7: + case NFA_MCLOSE8: + case NFA_MCLOSE9: + case NFA_NOPEN: + case NFA_NCLOSE: + + case NFA_LNUM_GT: + case NFA_LNUM_LT: + case NFA_COL_GT: + case NFA_COL_LT: + case NFA_VCOL_GT: + case NFA_VCOL_LT: + case NFA_MARK_GT: + case NFA_MARK_LT: + case NFA_VISUAL: + case NFA_LNUM: + case NFA_CURSOR: + case NFA_COL: + case NFA_VCOL: + case NFA_MARK: + + case NFA_ZSTART: + case NFA_ZEND: + case NFA_OPT_CHARS: + case NFA_SKIP_CHAR: + case NFA_START_PATTERN: + case NFA_END_PATTERN: + case NFA_COMPOSING: + case NFA_END_COMPOSING: + /* zero-width */ + break; + + default: + if (state->c < 0) + /* don't know what this is */ + return -1; + /* normal character */ + len += MB_CHAR2LEN(state->c); + break; + } + + /* normal way to continue */ + state = state->out; + } + + /* unrecognized */ + return -1; +} + /* * Convert a postfix form into its equivalent NFA. * Return the NFA start state on success, NULL otherwise. @@ -2774,7 +3045,7 @@ post2nfa(postfix, end, nfa_calc_size) start_state = NFA_START_INVISIBLE_BEFORE_NEG; end_state = NFA_END_INVISIBLE_NEG; break; - case NFA_PREV_ATOM_LIKE_PATTERN: + default: /* NFA_PREV_ATOM_LIKE_PATTERN: */ start_state = NFA_START_PATTERN; end_state = NFA_END_PATTERN; break; @@ -2803,8 +3074,6 @@ post2nfa(postfix, end, nfa_calc_size) s = alloc_state(start_state, e.start, s1); if (s == NULL) goto theend; - if (before) - s->val = n; /* store the count */ if (pattern) { /* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */ @@ -2818,6 +3087,14 @@ post2nfa(postfix, end, nfa_calc_size) { patch(e.out, s1); PUSH(frag(s, list1(&s1->out))); + if (before) + { + if (n <= 0) + /* See if we can guess the maximum width, it avoids a + * lot of pointless tries. */ + n = nfa_max_width(e.start, 0); + s->val = n; /* store the count */ + } } break; } @@ -3077,16 +3354,21 @@ typedef struct typedef struct nfa_pim_S nfa_pim_T; struct nfa_pim_S { - nfa_state_T *state; - int result; /* NFA_PIM_TODO, NFA_PIM_[NO]MATCH */ - nfa_pim_T *pim; /* another PIM at the same position */ + int result; /* NFA_PIM_*, see below */ + nfa_state_T *state; /* the invisible match start state */ regsubs_T subs; /* submatch info, only party used */ + union + { + lpos_T pos; + char_u *ptr; + } end; /* where the match must end */ }; /* Values for done in nfa_pim_T. */ -#define NFA_PIM_TODO 0 -#define NFA_PIM_MATCH 1 -#define NFA_PIM_NOMATCH -1 +#define NFA_PIM_UNUSED 0 /* pim not used */ +#define NFA_PIM_TODO 1 /* pim not done yet */ +#define NFA_PIM_MATCH 2 /* pim executed, matches */ +#define NFA_PIM_NOMATCH 3 /* pim executed, no match */ /* nfa_thread_T contains execution information of a NFA state */ @@ -3094,7 +3376,8 @@ typedef struct { nfa_state_T *state; int count; - nfa_pim_T *pim; /* if not NULL: postponed invisible match */ + nfa_pim_T pim; /* if pim.result != NFA_PIM_UNUSED: postponed + * invisible match */ regsubs_T subs; /* submatch info, only party used */ } nfa_thread_T; @@ -3147,20 +3430,56 @@ log_subexpr(sub) e == NULL ? "NULL" : e); } } + + static char * +pim_info(nfa_pim_T *pim) +{ + static char buf[30]; + + if (pim == NULL || pim->result == NFA_PIM_UNUSED) + buf[0] = NUL; + else + { + sprintf(buf, " PIM col %d", REG_MULTI ? (int)pim->end.pos.col + : (int)(pim->end.ptr - reginput)); + } + return buf; +} + #endif /* Used during execution: whether a match has been found. */ static int nfa_match; +static void copy_pim __ARGS((nfa_pim_T *to, nfa_pim_T *from)); static void clear_sub __ARGS((regsub_T *sub)); static void copy_sub __ARGS((regsub_T *to, regsub_T *from)); static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from)); static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2)); static int has_state_with_pos __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs)); +static int match_follows __ARGS((nfa_state_T *startstate, int depth)); static int state_in_list __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs)); -static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, int off)); +static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int off)); static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int *ip)); +/* + * Copy postponed invisible match info from "from" to "to". + */ + static void +copy_pim(to, from) + nfa_pim_T *to; + nfa_pim_T *from; +{ + to->result = from->result; + to->state = from->state; + copy_sub(&to->subs.norm, &from->subs.norm); +#ifdef FEAT_SYN_HL + if (nfa_has_zsubexpr) + copy_sub(&to->subs.synt, &from->subs.synt); +#endif + to->end = from->end; +} + static void clear_sub(sub) regsub_T *sub; @@ -3305,7 +3624,11 @@ sub_equal(sub1, sub2) #ifdef ENABLE_LOG static void -report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid) +report_state(char *action, + regsub_T *sub, + nfa_state_T *state, + int lid, + nfa_pim_T *pim) { int col; @@ -3316,8 +3639,9 @@ report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid) else col = (int)(sub->list.line[0].start - regline); nfa_set_code(state->c); - fprintf(log_fd, "> %s state %d to list %d. char %d: %s (start col %d)\n", - action, abs(state->id), lid, state->c, code, col); + fprintf(log_fd, "> %s state %d to list %d. char %d: %s (start col %d)%s\n", + action, abs(state->id), lid, state->c, code, col, + pim_info(pim)); } #endif @@ -3349,6 +3673,92 @@ has_state_with_pos(l, state, subs) return FALSE; } +/* + * Return TRUE if "state" leads to a NFA_MATCH without advancing the input. + */ + static int +match_follows(startstate, depth) + nfa_state_T *startstate; + int depth; +{ + nfa_state_T *state = startstate; + + /* avoid too much recursion */ + if (depth > 10) + return FALSE; + + for (;;) + { + switch (state->c) + { + case NFA_MATCH: + case NFA_MCLOSE: + case NFA_END_INVISIBLE: + case NFA_END_INVISIBLE_NEG: + case NFA_END_PATTERN: + return TRUE; + + case NFA_SPLIT: + return match_follows(state->out, depth + 1) + || match_follows(state->out1, depth + 1); + + case NFA_START_INVISIBLE: + case NFA_START_INVISIBLE_BEFORE: + case NFA_START_INVISIBLE_NEG: + case NFA_START_INVISIBLE_BEFORE_NEG: + case NFA_COMPOSING: + /* skip ahead to next state */ + state = state->out1->out; + break; + + case NFA_ANY: + case NFA_IDENT: + case NFA_SIDENT: + case NFA_KWORD: + case NFA_SKWORD: + case NFA_FNAME: + case NFA_SFNAME: + case NFA_PRINT: + case NFA_SPRINT: + case NFA_WHITE: + case NFA_NWHITE: + case NFA_DIGIT: + case NFA_NDIGIT: + case NFA_HEX: + case NFA_NHEX: + case NFA_OCTAL: + case NFA_NOCTAL: + case NFA_WORD: + case NFA_NWORD: + case NFA_HEAD: + case NFA_NHEAD: + case NFA_ALPHA: + case NFA_NALPHA: + case NFA_LOWER: + case NFA_NLOWER: + case NFA_UPPER: + case NFA_NUPPER: + case NFA_START_COLL: + case NFA_START_NEG_COLL: + case NFA_NEWL: + /* state will advance input */ + return FALSE; + + default: + if (state->c > 0) + /* state will advance input */ + return FALSE; + + /* Others: zero-width or possibly zero-width, might still find + * a match at the same position, keep looking. */ + break; + } + state = state->out; + } + return FALSE; +} + + /* * Return TRUE if "state" is already in list "l". */ @@ -3367,10 +3777,11 @@ state_in_list(l, state, subs) } static void -addstate(l, state, subs, off) +addstate(l, state, subs, pim, off) nfa_list_T *l; /* runtime state list */ nfa_state_T *state; /* state to update */ regsubs_T *subs; /* pointers to subexpressions */ + nfa_pim_T *pim; /* postponed look-behind match */ int off; /* byte offset, when -1 go to next line */ { int subidx; @@ -3384,14 +3795,8 @@ addstate(l, state, subs, off) int did_print = FALSE; #endif - if (l == NULL || state == NULL) - return; - switch (state->c) { - case NFA_SPLIT: - case NFA_NOPEN: - case NFA_SKIP_CHAR: case NFA_NCLOSE: case NFA_MCLOSE: case NFA_MCLOSE1: @@ -3416,6 +3821,9 @@ addstate(l, state, subs, off) case NFA_ZCLOSE9: #endif case NFA_ZEND: + case NFA_SPLIT: + case NFA_NOPEN: + case NFA_SKIP_CHAR: /* These nodes are not added themselves but their "out" and/or * "out1" may be added below. */ break; @@ -3478,12 +3886,14 @@ skip_add: return; } + /* Do not add the state again when it exists with the same + * positions. */ if (has_state_with_pos(l, state, subs)) goto skip_add; } - /* when there are backreferences or look-behind matches the number - * of states may be (a lot) bigger */ + /* When there are backreferences the number of states may be (a + * lot) bigger than anticipated. */ if (nfa_has_backref && l->n == l->len) { int newlen = l->len * 3 / 2 + 50; @@ -3496,21 +3906,24 @@ skip_add: state->lastlist[nfa_ll_index] = l->id; thread = &l->t[l->n++]; thread->state = state; - thread->pim = NULL; + if (pim == NULL) + thread->pim.result = NFA_PIM_UNUSED; + else + copy_pim(&thread->pim, pim); copy_sub(&thread->subs.norm, &subs->norm); #ifdef FEAT_SYN_HL if (nfa_has_zsubexpr) copy_sub(&thread->subs.synt, &subs->synt); #endif #ifdef ENABLE_LOG - report_state("Adding", &thread->subs.norm, state, l->id); + report_state("Adding", &thread->subs.norm, state, l->id, pim); did_print = TRUE; #endif } #ifdef ENABLE_LOG if (!did_print) - report_state("Processing", &subs->norm, state, l->id); + report_state("Processing", &subs->norm, state, l->id, pim); #endif switch (state->c) { @@ -3520,14 +3933,14 @@ skip_add: case NFA_SPLIT: /* order matters here */ - addstate(l, state->out, subs, off); - addstate(l, state->out1, subs, off); + addstate(l, state->out, subs, pim, off); + addstate(l, state->out1, subs, pim, off); break; case NFA_SKIP_CHAR: case NFA_NOPEN: case NFA_NCLOSE: - addstate(l, state->out, subs, off); + addstate(l, state->out, subs, pim, off); break; case NFA_MOPEN: @@ -3571,8 +3984,8 @@ skip_add: sub = &subs->norm; } - /* Set the position (with "off") in the subexpression. Save and - * restore it when it was in use. Otherwise fill any gap. */ + /* Set the position (with "off" added) in the subexpression. Save + * and restore it when it was in use. Otherwise fill any gap. */ save_ptr = NULL; if (REG_MULTI) { @@ -3623,7 +4036,7 @@ skip_add: sub->list.line[subidx].start = reginput + off; } - addstate(l, state->out, subs, off); + addstate(l, state->out, subs, pim, off); if (save_in_use == -1) { @@ -3641,7 +4054,7 @@ skip_add: { /* Do not overwrite the position set by \ze. If no \ze * encountered end will be set in nfa_regtry(). */ - addstate(l, state->out, subs, off); + addstate(l, state->out, subs, pim, off); break; } case NFA_MCLOSE1: @@ -3710,7 +4123,7 @@ skip_add: sub->list.line[subidx].end = reginput + off; } - addstate(l, state->out, subs, off); + addstate(l, state->out, subs, pim, off); if (REG_MULTI) sub->list.multi[subidx].end = save_lpos; @@ -3738,15 +4151,9 @@ addstate_here(l, state, subs, pim, ip) int tlen = l->n; int count; int listidx = *ip; - int i; /* first add the state(s) at the end, so that we know how many there are */ - addstate(l, state, subs, 0); - - /* fill in the "pim" field in the new states */ - if (pim != NULL) - for (i = tlen; i < l->n; ++i) - l->t[i].pim = pim; + addstate(l, state, subs, pim, 0); /* when "*ip" was at the end of the list, nothing to do */ if (listidx + 1 == tlen) @@ -3995,15 +4402,18 @@ nfa_re_num_cmp(val, op, pos) return val == pos; } -static int recursive_regmatch __ARGS((nfa_state_T *state, nfa_regprog_T *prog, regsubs_T *submatch, regsubs_T *m, int **listids)); +static int recursive_regmatch __ARGS((nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog, regsubs_T *submatch, regsubs_T *m, int **listids)); static int nfa_regmatch __ARGS((nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m)); /* * Recursively call nfa_regmatch() + * "pim" is NULL or contains info about a Postponed Invisible Match (start + * position). */ static int -recursive_regmatch(state, prog, submatch, m, listids) +recursive_regmatch(state, pim, prog, submatch, m, listids) nfa_state_T *state; + nfa_pim_T *pim; nfa_regprog_T *prog; regsubs_T *submatch; regsubs_T *m; @@ -4020,24 +4430,43 @@ recursive_regmatch(state, prog, submatch, m, listids) int result; int need_restore = FALSE; + if (pim != NULL) + { + /* start at the position where the postponed match was */ + if (REG_MULTI) + reginput = regline + pim->end.pos.col; + else + reginput = pim->end.ptr; + } + if (state->c == NFA_START_INVISIBLE_BEFORE || state->c == NFA_START_INVISIBLE_BEFORE_NEG) { - /* The recursive match must end at the current position. */ + /* The recursive match must end at the current position. When "pim" is + * not NULL it specifies the current position. */ endposp = &endpos; if (REG_MULTI) { - endpos.se_u.pos.col = (int)(reginput - regline); - endpos.se_u.pos.lnum = reglnum; + if (pim == NULL) + { + endpos.se_u.pos.col = (int)(reginput - regline); + endpos.se_u.pos.lnum = reglnum; + } + else + endpos.se_u.pos = pim->end.pos; } else - endpos.se_u.ptr = reginput; + { + if (pim == NULL) + endpos.se_u.ptr = reginput; + else + endpos.se_u.ptr = pim->end.ptr; + } /* Go back the specified number of bytes, or as far as the * start of the previous line, to try matching "\@<=" or - * not matching "\@val <= 0) { if (REG_MULTI) @@ -4154,6 +4583,7 @@ recursive_regmatch(state, prog, submatch, m, listids) static int failure_chance __ARGS((nfa_state_T *state, int depth)); static int skip_to_start __ARGS((int c, colnr_T *colp)); +static long find_match_text __ARGS((colnr_T startcol, int regstart, char_u *match_text)); /* * Estimate the chance of a match with "state" failing. @@ -4330,6 +4760,69 @@ skip_to_start(c, colp) return OK; } +/* + * Check for a match with match_text. + * Called after skip_to_start() has found regstart. + * Returns zero for no match, 1 for a match. + */ + static long +find_match_text(startcol, regstart, match_text) + colnr_T startcol; + int regstart; + char_u *match_text; +{ + colnr_T col = startcol; + int c1, c2; + int len1, len2; + int match; + + for (;;) + { + match = TRUE; + len2 = MB_CHAR2LEN(regstart); /* skip regstart */ + for (len1 = 0; match_text[len1] != NUL; len1 += MB_CHAR2LEN(c1)) + { + c1 = PTR2CHAR(match_text + len1); + c2 = PTR2CHAR(regline + col + len2); + if (c1 != c2 && (!ireg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2))) + { + match = FALSE; + break; + } + len2 += MB_CHAR2LEN(c2); + } + if (match +#ifdef FEAT_MBYTE + /* check that no composing char follows */ + && !(enc_utf8 + && utf_iscomposing(PTR2CHAR(regline + col + len2))) +#endif + ) + { + cleanup_subexpr(); + if (REG_MULTI) + { + reg_startpos[0].lnum = reglnum; + reg_startpos[0].col = col; + reg_endpos[0].lnum = reglnum; + reg_endpos[0].col = col + len2; + } + else + { + reg_startp[0] = regline + col; + reg_endp[0] = regline + col + len2; + } + return 1L; + } + + /* Try finding regstart after the current match. */ + col += MB_CHAR2LEN(regstart); /* skip regstart */ + if (skip_to_start(regstart, &col) == FAIL) + break; + } + return 0L; +} + /* * Main matching routine. * @@ -4360,8 +4853,7 @@ nfa_regmatch(prog, start, submatch, m) nfa_state_T *add_state; int add_here; int add_count; - int add_off; - garray_T pimlist; + int add_off = 0; int toplevel = start->c == NFA_MOPEN; #ifdef NFA_REGEXP_DEBUG_LOG FILE *debug = fopen(NFA_REGEXP_DEBUG_LOG, "a"); @@ -4373,7 +4865,6 @@ nfa_regmatch(prog, start, submatch, m) } #endif nfa_match = FALSE; - ga_init2(&pimlist, sizeof(nfa_pim_T), 5); /* Allocate memory for the lists of nodes. */ size = (nstate + 1) * sizeof(nfa_thread_T); @@ -4422,10 +4913,10 @@ nfa_regmatch(prog, start, submatch, m) else m->norm.list.line[0].start = reginput; m->norm.in_use = 1; - addstate(thislist, start->out, m, 0); + addstate(thislist, start->out, m, NULL, 0); } else - addstate(thislist, start, m, 0); + addstate(thislist, start, m, NULL, 0); #define ADD_STATE_IF_MATCH(state) \ if (result) { \ @@ -4467,8 +4958,6 @@ nfa_regmatch(prog, start, submatch, m) thislist->id = nfa_listid; nextlist->id = nfa_listid + 1; - pimlist.ga_len = 0; - #ifdef ENABLE_LOG fprintf(log_fd, "------------------------------------------\n"); fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); @@ -4512,8 +5001,9 @@ nfa_regmatch(prog, start, submatch, m) else col = (int)(t->subs.norm.list.line[0].start - regline); nfa_set_code(t->state->c); - fprintf(log_fd, "(%d) char %d %s (start col %d) ... \n", - abs(t->state->id), (int)t->state->c, code, col); + fprintf(log_fd, "(%d) char %d %s (start col %d)%s ... \n", + abs(t->state->id), (int)t->state->c, code, col, + pim_info(&t->pim)); } #endif @@ -4605,22 +5095,19 @@ nfa_regmatch(prog, start, submatch, m) case NFA_START_INVISIBLE_BEFORE: case NFA_START_INVISIBLE_BEFORE_NEG: { - nfa_pim_T *pim; int cout = t->state->out1->out->c; /* Do it directly when what follows is possibly end of * match (closing paren). + * Do it directly if there already is a PIM. * Postpone when it is \@<= or \@pim and check multiple - * where it's used? * Otherwise first do the one that has the highest chance * of failing. */ if ((cout >= NFA_MCLOSE && cout <= NFA_MCLOSE9) #ifdef FEAT_SYN_HL || (cout >= NFA_ZCLOSE && cout <= NFA_ZCLOSE9) #endif - || cout == NFA_NCLOSE - || t->pim != NULL + || t->pim.result != NFA_PIM_UNUSED || (t->state->c != NFA_START_INVISIBLE_BEFORE && t->state->c != NFA_START_INVISIBLE_BEFORE_NEG && failure_chance(t->state->out1->out, 0) @@ -4630,7 +5117,7 @@ nfa_regmatch(prog, start, submatch, m) * First try matching the invisible match, then what * follows. */ - result = recursive_regmatch(t->state, prog, + result = recursive_regmatch(t->state, NULL, prog, submatch, m, &listids); /* for \@! and \@state = t->state; - pim->pim = NULL; - pim->result = NFA_PIM_TODO; + pim.state = t->state; + pim.result = NFA_PIM_TODO; + pim.subs.norm.in_use = 0; +#ifdef FEAT_SYN_HL + pim.subs.synt.in_use = 0; +#endif + if (REG_MULTI) + { + pim.end.pos.col = (int)(reginput - regline); + pim.end.pos.lnum = reglnum; + } + else + pim.end.ptr = reginput; /* t->state->out1 is the corresponding END_INVISIBLE * node; Add its out to the current list (zero-width * match). */ addstate_here(thislist, t->state->out1->out, &t->subs, - pim, &listidx); + &pim, &listidx); } } break; @@ -4722,7 +5216,7 @@ nfa_regmatch(prog, start, submatch, m) } /* First try matching the pattern. */ - result = recursive_regmatch(t->state, prog, + result = recursive_regmatch(t->state, NULL, prog, submatch, m, &listids); if (result) { @@ -5376,10 +5870,18 @@ nfa_regmatch(prog, start, submatch, m) if (add_state != NULL) { - if (t->pim != NULL) + nfa_pim_T *pim; + + if (t->pim.result == NFA_PIM_UNUSED) + pim = NULL; + else + pim = &t->pim; + + /* Handle the postponed invisible match if the match might end + * without advancing and before the end of the line. */ + if (pim != NULL && (clen == 0 || match_follows(add_state, 0))) { - /* postponed invisible match */ - if (t->pim->result == NFA_PIM_TODO) + if (pim->result == NFA_PIM_TODO) { #ifdef ENABLE_LOG fprintf(log_fd, "\n"); @@ -5387,58 +5889,60 @@ nfa_regmatch(prog, start, submatch, m) fprintf(log_fd, "Postponed recursive nfa_regmatch()\n"); fprintf(log_fd, "\n"); #endif - result = recursive_regmatch(t->pim->state, + result = recursive_regmatch(pim->state, pim, prog, submatch, m, &listids); - t->pim->result = result ? NFA_PIM_MATCH - : NFA_PIM_NOMATCH; + pim->result = result ? NFA_PIM_MATCH : NFA_PIM_NOMATCH; /* for \@! and \@pim->state->c - == NFA_START_INVISIBLE_NEG - || t->pim->state->c + if (result != (pim->state->c == NFA_START_INVISIBLE_NEG + || pim->state->c == NFA_START_INVISIBLE_BEFORE_NEG)) { /* Copy submatch info from the recursive call */ - copy_sub_off(&t->pim->subs.norm, &m->norm); + copy_sub_off(&pim->subs.norm, &m->norm); #ifdef FEAT_SYN_HL if (nfa_has_zsubexpr) - copy_sub_off(&t->pim->subs.synt, &m->synt); + copy_sub_off(&pim->subs.synt, &m->synt); #endif } } else { - result = (t->pim->result == NFA_PIM_MATCH); + result = (pim->result == NFA_PIM_MATCH); #ifdef ENABLE_LOG fprintf(log_fd, "\n"); - fprintf(log_fd, "Using previous recursive nfa_regmatch() result, result == %d\n", t->pim->result); + fprintf(log_fd, "Using previous recursive nfa_regmatch() result, result == %d\n", pim->result); fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE"); fprintf(log_fd, "\n"); #endif } /* for \@! and \@pim->state->c == NFA_START_INVISIBLE_NEG - || t->pim->state->c + if (result != (pim->state->c == NFA_START_INVISIBLE_NEG + || pim->state->c == NFA_START_INVISIBLE_BEFORE_NEG)) { /* Copy submatch info from the recursive call */ - copy_sub_off(&t->subs.norm, &t->pim->subs.norm); + copy_sub_off(&t->subs.norm, &pim->subs.norm); #ifdef FEAT_SYN_HL if (nfa_has_zsubexpr) - copy_sub_off(&t->subs.synt, &t->pim->subs.synt); + copy_sub_off(&t->subs.synt, &pim->subs.synt); #endif } else /* look-behind match failed, don't add the state */ continue; + + /* Postponed invisible match was handled, don't add it to + * following states. */ + pim = NULL; } if (add_here) - addstate_here(thislist, add_state, &t->subs, NULL, &listidx); + addstate_here(thislist, add_state, &t->subs, pim, &listidx); else { - addstate(nextlist, add_state, &t->subs, add_off); + addstate(nextlist, add_state, &t->subs, pim, add_off); if (add_count > 0) nextlist->t[nextlist->n - 1].count = add_count; } @@ -5517,11 +6021,11 @@ nfa_regmatch(prog, start, submatch, m) (colnr_T)(reginput - regline) + clen; else m->norm.list.line[0].start = reginput + clen; - addstate(nextlist, start->out, m, clen); + addstate(nextlist, start->out, m, NULL, clen); } } else - addstate(nextlist, start, m, clen); + addstate(nextlist, start, m, NULL, clen); } #ifdef ENABLE_LOG @@ -5558,7 +6062,6 @@ theend: vim_free(list[0].t); vim_free(list[1].t); vim_free(listids); - ga_clear(&pimlist); #undef ADD_STATE_IF_MATCH #ifdef NFA_REGEXP_DEBUG_LOG fclose(debug); @@ -5584,17 +6087,6 @@ nfa_regtry(prog, col) #endif reginput = regline + col; - need_clear_subexpr = TRUE; -#ifdef FEAT_SYN_HL - /* Clear the external match subpointers if necessary. */ - if (prog->reghasz == REX_SET) - { - nfa_has_zsubexpr = TRUE; - need_clear_zsubexpr = TRUE; - } - else - nfa_has_zsubexpr = FALSE; -#endif #ifdef ENABLE_LOG f = fopen(NFA_REGEXP_RUN_LOG, "a"); @@ -5764,12 +6256,35 @@ nfa_regexec_both(line, startcol) if (prog->reganch && col > 0) return 0L; + need_clear_subexpr = TRUE; +#ifdef FEAT_SYN_HL + /* Clear the external match subpointers if necessary. */ + if (prog->reghasz == REX_SET) + { + nfa_has_zsubexpr = TRUE; + need_clear_zsubexpr = TRUE; + } + else + nfa_has_zsubexpr = FALSE; +#endif + if (prog->regstart != NUL) + { /* Skip ahead until a character we know the match must start with. * When there is none there is no match. */ if (skip_to_start(prog->regstart, &col) == FAIL) return 0L; + /* If match_text is set it contains the full text that must match. + * Nothing else to try. Doesn't handle combining chars well. */ + if (prog->match_text != NULL +#ifdef FEAT_MBYTE + && !ireg_icombine +#endif + ) + return find_match_text(col, prog->regstart, prog->match_text); + } + /* If the start column is past the maximum column: no need to try. */ if (ireg_maxcol > 0 && col >= ireg_maxcol) goto theend; @@ -5876,6 +6391,8 @@ nfa_regcomp(expr, re_flags) prog->reganch = nfa_get_reganch(prog->start, 0); prog->regstart = nfa_get_regstart(prog->start, 0); + prog->match_text = nfa_get_match_text(prog->start); + #ifdef ENABLE_LOG nfa_postfix_dump(expr, OK); nfa_dump(prog); @@ -5885,7 +6402,7 @@ nfa_regcomp(expr, re_flags) prog->reghasz = re_has_z; #endif #ifdef DEBUG - prog->pattern = vim_strsave(expr); /* memory will leak */ + prog->pattern = vim_strsave(expr); nfa_regengine.expr = NULL; #endif @@ -5907,6 +6424,22 @@ fail: goto out; } +/* + * Free a compiled regexp program, returned by nfa_regcomp(). + */ + static void +nfa_regfree(prog) + regprog_T *prog; +{ + if (prog != NULL) + { + vim_free(((nfa_regprog_T *)prog)->match_text); +#ifdef DEBUG + vim_free(((nfa_regprog_T *)prog)->pattern); +#endif + vim_free(prog); + } +} /* * Match a regexp against a string. diff --git a/src/screen.c b/src/screen.c index c5ba7177db..7ddee269f2 100644 --- a/src/screen.c +++ b/src/screen.c @@ -7095,7 +7095,7 @@ end_search_hl() { if (search_hl.rm.regprog != NULL) { - vim_free(search_hl.rm.regprog); + vim_regfree(search_hl.rm.regprog); search_hl.rm.regprog = NULL; } } @@ -7297,7 +7297,7 @@ next_search_hl(win, shl, lnum, mincol) if (shl == &search_hl) { /* don't free regprog in the match list, it's a copy */ - vim_free(shl->rm.regprog); + vim_regfree(shl->rm.regprog); no_hlsearch = TRUE; } shl->rm.regprog = NULL; diff --git a/src/search.c b/src/search.c index 863b0005b2..c262c8435a 100644 --- a/src/search.c +++ b/src/search.c @@ -981,7 +981,7 @@ searchit(win, buf, pos, dir, pat, count, options, pat_use, stop_lnum, tm) } while (--count > 0 && found); /* stop after count matches or no match */ - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); called_emsg |= save_called_emsg; @@ -4689,7 +4689,7 @@ is_zerowidth(pattern) } called_emsg |= save_called_emsg; - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); return result; } #endif /* FEAT_VISUAL */ @@ -5417,9 +5417,9 @@ exit_matched: fpip_end: vim_free(file_line); - vim_free(regmatch.regprog); - vim_free(incl_regmatch.regprog); - vim_free(def_regmatch.regprog); + vim_regfree(regmatch.regprog); + vim_regfree(incl_regmatch.regprog); + vim_regfree(def_regmatch.regprog); } static void diff --git a/src/spell.c b/src/spell.c index 29356bf9e9..e558a99e4a 100644 --- a/src/spell.c +++ b/src/spell.c @@ -2658,7 +2658,7 @@ slang_clear(lp) ga_clear(gap); for (i = 0; i < lp->sl_prefixcnt; ++i) - vim_free(lp->sl_prefprog[i]); + vim_regfree(lp->sl_prefprog[i]); lp->sl_prefixcnt = 0; vim_free(lp->sl_prefprog); lp->sl_prefprog = NULL; @@ -2669,7 +2669,7 @@ slang_clear(lp) vim_free(lp->sl_midword); lp->sl_midword = NULL; - vim_free(lp->sl_compprog); + vim_regfree(lp->sl_compprog); vim_free(lp->sl_comprules); vim_free(lp->sl_compstartflags); vim_free(lp->sl_compallflags); @@ -5802,7 +5802,7 @@ spell_read_aff(spin, fname) { sprintf((char *)buf, "^%s", aff_entry->ae_cond); - vim_free(aff_entry->ae_prog); + vim_regfree(aff_entry->ae_prog); aff_entry->ae_prog = vim_regcomp( buf, RE_MAGIC + RE_STRING); } @@ -6507,7 +6507,7 @@ spell_free_aff(aff) --todo; ah = HI2AH(hi); for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) - vim_free(ae->ae_prog); + vim_regfree(ae->ae_prog); } } if (ht == &aff->af_suff) diff --git a/src/syntax.c b/src/syntax.c index 08a908e4c1..a715bdc2fb 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -3495,7 +3495,7 @@ syntax_clear(block) block->b_syn_sync_maxlines = 0; block->b_syn_sync_linebreaks = 0; - vim_free(block->b_syn_linecont_prog); + vim_regfree(block->b_syn_linecont_prog); block->b_syn_linecont_prog = NULL; vim_free(block->b_syn_linecont_pat); block->b_syn_linecont_pat = NULL; @@ -3544,7 +3544,7 @@ syntax_sync_clear() curwin->w_s->b_syn_sync_maxlines = 0; curwin->w_s->b_syn_sync_linebreaks = 0; - vim_free(curwin->w_s->b_syn_linecont_prog); + vim_regfree(curwin->w_s->b_syn_linecont_prog); curwin->w_s->b_syn_linecont_prog = NULL; vim_free(curwin->w_s->b_syn_linecont_pat); curwin->w_s->b_syn_linecont_pat = NULL; @@ -3583,7 +3583,7 @@ syn_clear_pattern(block, i) int i; { vim_free(SYN_ITEMS(block)[i].sp_pattern); - vim_free(SYN_ITEMS(block)[i].sp_prog); + vim_regfree(SYN_ITEMS(block)[i].sp_prog); /* Only free sp_cont_list and sp_next_list of first start pattern */ if (i == 0 || SYN_ITEMS(block)[i - 1].sp_type != SPTYPE_START) { @@ -4991,7 +4991,7 @@ syn_cmd_match(eap, syncing) /* * Something failed, free the allocated memory. */ - vim_free(item.sp_prog); + vim_regfree(item.sp_prog); vim_free(item.sp_pattern); vim_free(syn_opt_arg.cont_list); vim_free(syn_opt_arg.cont_in_list); @@ -5248,7 +5248,7 @@ syn_cmd_region(eap, syncing) { if (!success) { - vim_free(ppp->pp_synp->sp_prog); + vim_regfree(ppp->pp_synp->sp_prog); vim_free(ppp->pp_synp->sp_pattern); } vim_free(ppp->pp_synp); @@ -6022,7 +6022,7 @@ get_id_list(arg, keylen, list) id = -1; /* remember that we found one */ } } - vim_free(regmatch.regprog); + vim_regfree(regmatch.regprog); } } vim_free(name); @@ -6295,7 +6295,7 @@ ex_ownsyntax(eap) curwin->w_p_spell = FALSE; /* No spell checking */ clear_string_option(&curwin->w_s->b_p_spc); clear_string_option(&curwin->w_s->b_p_spf); - vim_free(curwin->w_s->b_cap_prog); + vim_regfree(curwin->w_s->b_cap_prog); curwin->w_s->b_cap_prog = NULL; clear_string_option(&curwin->w_s->b_p_spl); #endif diff --git a/src/tag.c b/src/tag.c index 34e9c4ff27..248280c7c9 100644 --- a/src/tag.c +++ b/src/tag.c @@ -2491,7 +2491,7 @@ line_read_in: findtag_end: vim_free(lbuf); - vim_free(orgpat.regmatch.regprog); + vim_regfree(orgpat.regmatch.regprog); vim_free(tag_fname); #ifdef FEAT_EMACS_TAGS vim_free(ebuf); diff --git a/src/testdir/Make_dos.mak b/src/testdir/Make_dos.mak index 679f16ce5d..fbb67cc29e 100644 --- a/src/testdir/Make_dos.mak +++ b/src/testdir/Make_dos.mak @@ -77,5 +77,5 @@ clean: -del X* -del X*.* -del test.ok - -rmdir /s /q Xfind + -rd /s /q Xfind -if exist viminfo del viminfo diff --git a/src/testdir/test64.in b/src/testdir/test64.in index 0ba833fed4..42703c2e2e 100644 --- a/src/testdir/test64.in +++ b/src/testdir/test64.in @@ -260,6 +260,8 @@ STARTTEST :call add(tl, [2, '[^[:alpha:]]\+','abcccadfoij7787ysf287yrnccdu','7787']) :call add(tl, [2, '[-a]', '-', '-']) :call add(tl, [2, '[a-]', '-', '-']) +:call add(tl, [2, '[a-f]*\c','ABCDEFGH','ABCDEF']) +:call add(tl, [2, '[abc][xyz]\c','-af-AF-BY--','BY']) :" filename regexp :call add(tl, [2, '[-./[:alnum:]_~]\+', 'log13.file', 'log13.file']) :" special chars @@ -385,6 +387,12 @@ STARTTEST :call add(tl, [2, '\(<<\)\@2<=span.', 'xxspanxxxx', 'foobar']) +:call add(tl, [2, '\(foo\)\@<=\>', 'barfoo', '', 'foo']) +:" :""""" \@> :call add(tl, [2, '\(a*\)\@>a', 'aaaa']) :call add(tl, [2, '\(a*\)\@>b', 'aaab', 'aaab', 'aaa']) diff --git a/src/testdir/test64.ok b/src/testdir/test64.ok index 49a570ae25..0e25737dd0 100644 --- a/src/testdir/test64.ok +++ b/src/testdir/test64.ok @@ -584,6 +584,12 @@ OK 2 - [-a] OK 0 - [a-] OK 1 - [a-] OK 2 - [a-] +OK 0 - [a-f]*\c +OK 1 - [a-f]*\c +OK 2 - [a-f]*\c +OK 0 - [abc][xyz]\c +OK 1 - [abc][xyz]\c +OK 2 - [abc][xyz]\c OK 0 - [-./[:alnum:]_~]\+ OK 1 - [-./[:alnum:]_~]\+ OK 2 - [-./[:alnum:]_~]\+ @@ -872,6 +878,18 @@ OK 2 - \(<<\)\@2<=span. OK 0 - \(foo\)\@ +OK 1 - \(foo\)\@<=\> +OK 2 - \(foo\)\@<=\> +OK 0 - \(foo\)\@<=\> +OK 1 - \(foo\)\@<=\> +OK 2 - \(foo\)\@<=\> OK 0 - \(a*\)\@>a OK 1 - \(a*\)\@>a OK 2 - \(a*\)\@>a diff --git a/src/version.c b/src/version.c index a8c7edd52f..49b8002330 100644 --- a/src/version.c +++ b/src/version.c @@ -743,6 +743,22 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1156, +/**/ + 1155, +/**/ + 1154, +/**/ + 1153, +/**/ + 1152, +/**/ + 1151, +/**/ + 1150, +/**/ + 1149, /**/ 1148, /**/ diff --git a/src/window.c b/src/window.c index fa4c36e141..087c3bb6a6 100644 --- a/src/window.c +++ b/src/window.c @@ -6828,7 +6828,7 @@ match_delete(wp, id, perr) wp->w_match_head = cur->next; else prev->next = cur->next; - vim_free(cur->match.regprog); + vim_regfree(cur->match.regprog); vim_free(cur->pattern); vim_free(cur); redraw_later(SOME_VALID); @@ -6847,7 +6847,7 @@ clear_matches(wp) while (wp->w_match_head != NULL) { m = wp->w_match_head->next; - vim_free(wp->w_match_head->match.regprog); + vim_regfree(wp->w_match_head->match.regprog); vim_free(wp->w_match_head->pattern); vim_free(wp->w_match_head); wp->w_match_head = m;