runtime(sed): Update syntax, support more GNU address extensions

- Support all GNU address extensions.
- Fix some bugs related to erroneous matching of pattern delimiters in
  bracket expressions.

closes: #19587

Signed-off-by: Doug Kearns <dougkearns@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
This commit is contained in:
Doug Kearns
2026-03-07 10:11:39 +00:00
committed by Christian Brabandt
parent 78ae2ff094
commit e948fea640
6 changed files with 209 additions and 14 deletions
+64 -14
View File
@@ -3,7 +3,7 @@
" Maintainer: Doug Kearns <dougkearns@gmail.com>
" Previous Maintainer: Haakon Riiser <hakonrk@fys.uio.no>
" Contributor: Jack Haden-Enneking
" Last Change: 2022 Oct 15
" Last Change: 2026 Mar 06
" quit when a syntax file was already loaded
if exists("b:current_syntax")
@@ -16,10 +16,31 @@ syn match sedError "\S"
syn match sedWhitespace "\s\+" contained
syn match sedSemicolon ";"
syn match sedAddress "[[:digit:]$]"
" Addresses {{{1
syn match sedAddress "\d\+\|\$"
" GNU extensions
syn match sedAddress "\d\+\~\d\+"
syn region sedAddress matchgroup=Special start="[{,;]\s*/\%(\\/\)\="lc=1 skip="[^\\]\%(\\\\\)*\\/" end="/I\=" contains=sedTab,sedRegexpMeta
syn region sedAddress matchgroup=Special start="^\s*/\%(\\/\)\=" skip="[^\\]\%(\\\\\)*\\/" end="/I\=" contains=sedTab,sedRegexpMeta
syn match sedAddress "\~\d\+"
syn match sedAddress "[-+]\d\+"
syn region sedAddress
\ matchgroup=Delimiter
\ start="[{,;]\s*/\%(\\/\)\="lc=1
\ skip="[^\\]\%(\\\\\)*\\/"
"\ GNU extensions
\ end="/\%(IM\|MI\|[IM]\)\="
\ contains=sedTab,sedRegexpMeta
syn region sedAddress
\ matchgroup=Delimiter
\ start="^\s*/\%(\\/\)\="
"\ GNU extensions
\ skip="[^\\]\%(\\\\\)*\\/"
\ end="/\%(IM\|MI\|[IM]\)\="
\ contains=sedTab,sedRegexpMeta
" }}}
syn match sedFunction "[dDgGhHlnNpPqQx=]\s*\%($\|;\)" contains=sedSemicolon,sedWhitespace
if exists("g:sed_dialect") && g:sed_dialect ==? "bsd"
syn match sedComment "^\s*#.*$" contains=sedTodo
@@ -50,7 +71,7 @@ syn region sedFlagWrite matchgroup=sedFlag start="w" matchgroup=sedSemicolon
syn match sedFlag "[[:digit:]gpI]*w\=" contains=sedFlagWrite contained
syn match sedRegexpMeta "[.*^$]" contained
syn match sedRegexpMeta "\\." contains=sedTab contained
syn match sedRegexpMeta "\[.\{-}\]" contains=sedTab contained
syn match sedRegexpMeta "\[\^\=\]\=\%(\[:.\{-}:\]\|\[\..\{-}\.\]\|\[=.\{-}=\]\|[^]]\)*\]" contains=sedTab contained
syn match sedRegexpMeta "\\{\d\*,\d*\\}" contained
syn match sedRegexpMeta "\\%(.\{-}\\)" contains=sedTab contained
syn match sedReplaceMeta "&\|\\\%($\|.\)" contains=sedTab contained
@@ -68,15 +89,44 @@ let s:metacharacters = '$*.\^[~'
while s:i <= s:last
let s:delimiter = escape(nr2char(s:i), s:metacharacters)
if s:i != s:at
exe 'syn region sedAddress matchgroup=Special start=@\\'.s:delimiter.'\%(\\'.s:delimiter.'\)\=@ skip=@[^\\]\%(\\\\\)*\\'.s:delimiter.'@ end=@'.s:delimiter.'[IM]\=@ contains=sedTab'
exe 'syn region sedRegexp'.s:i 'matchgroup=Special start=@'.s:delimiter.'\%(\\\\\|\\'.s:delimiter.'\)*@ skip=@[^\\'.s:delimiter.']\%(\\\\\)*\\'.s:delimiter.'@ end=@'.s:delimiter.'@me=e-1 contains=sedTab,sedRegexpMeta keepend contained nextgroup=sedReplacement'.s:i
exe 'syn region sedReplacement'.s:i 'matchgroup=Special start=@'.s:delimiter.'\%(\\\\\|\\'.s:delimiter.'\)*@ skip=@[^\\'.s:delimiter.']\%(\\\\\)*\\'.s:delimiter.'@ end=@'.s:delimiter.'@ contains=sedTab,sedReplaceMeta keepend contained nextgroup=@sedFlags'
exe 'syn region sedAddress'
\ 'matchgroup=Delimiter'
\ 'start=@\\' .. s:delimiter .. '\%(\\' .. s:delimiter .. '\)\=@'
\ 'skip=@[^\\]\%(\\\\\)*\\' .. s:delimiter .. '\|\[.\{-}' .. s:delimiter .. '@'
\ 'end=@' .. s:delimiter .. '\%(IM\|MI\|[IM]\)\=@'
\ 'contains=sedTab,sedRegexpMeta'
exe 'syn region sedRegexp' .. s:i 'contained'
\ 'matchgroup=Delimiter'
\ 'start=@' .. s:delimiter .. '\%(\\\\\|\\' .. s:delimiter .. '\)*@'
\ 'end=@' .. s:delimiter .. '@me=e-1'
\ 'nextgroup=sedReplacement' .. s:i
\ 'contains=sedTab,sedRegexpMeta'
exe 'syn region sedReplacement' .. s:i 'contained'
\ 'matchgroup=Delimiter'
\ 'start=@' .. s:delimiter .. '\%(\\\\\|\\' .. s:delimiter .. '\)*@'
\ 'end=@' .. s:delimiter .. '@'
\ 'nextgroup=@sedFlags'
\ 'contains=sedTab,sedReplaceMeta'
endif
let s:i = s:i + 1
endwhile
syn region sedAddress matchgroup=Special start=+\\@\%(\\@\)\=+ skip=+[^\\]\%(\\\\\)*\\@+ end=+@I\=+ contains=sedTab,sedRegexpMeta
syn region sedRegexp64 matchgroup=Special start=+@\%(\\\\\|\\@\)*+ skip=+[^\\@]\%(\\\\\)*\\@+ end=+@+me=e-1 contains=sedTab,sedRegexpMeta keepend contained nextgroup=sedReplacement64
syn region sedReplacement64 matchgroup=Special start=+@\%(\\\\\|\\@\)*+ skip=+[^\\@]\%(\\\\\)*\\@+ end=+@+ contains=sedTab,sedReplaceMeta keepend contained nextgroup=sedFlag
syn region sedAddress
\ matchgroup=Delimiter
\ start=+\\\z(@\)+
\ end=+\z1\%(IM\|MI\|[IM]\)\=+
\ contains=sedTab,sedRegexpMeta
syn region sedRegexp64 contained
\ matchgroup=Delimiter
\ start=+@\%(\\\\\|\\@\)*+
\ end=+@+me=e-1
\ nextgroup=sedReplacement64
\ contains=sedTab,sedRegexpMeta
syn region sedReplacement64 contained
\ matchgroup=Delimiter
\ start=+@\%(\\\\\|\\@\)*+
\ end=+@+
\ nextgroup=sedFlag
\ contains=sedTab,sedReplaceMeta
" Since the syntax for the substitution command is very similar to the
" syntax for the transform command, I use the same pattern matching
@@ -110,8 +160,8 @@ if s:highlight_tabs
endif
let s:i = char2nr(" ") " ASCII: 32, EBCDIC: 64
while s:i <= s:last
exe "hi def link sedRegexp".s:i "Macro"
exe "hi def link sedReplacement".s:i "NONE"
exe "hi def link sedRegexp" .. s:i "Macro"
exe "hi def link sedReplacement" .. s:i "NONE"
let s:i = s:i + 1
endwhile
@@ -120,4 +170,4 @@ unlet s:highlight_tabs
let b:current_syntax = "sed"
" vim: nowrap sw=2 sts=2 ts=8 noet:
" vim: nowrap sw=2 sts=2 ts=8 noet fdm=marker:
+20
View File
@@ -0,0 +1,20 @@
>#+0#0000e05#ffffff0| |s|e|d|(|1|)| +0#0000000&@66
@75
@75
|#+0#0000e05&| |A|d@1|r|e|s@1|e|s| +0#0000000&@63
@75
@75
|#+0#0000e05&| |l|i|n|e| |n|u|m|b|e|r| +0#0000000&@61
@75
|4+0#e000e06&|2|p+0#00e0e07&| +0#0000000&@71
|$+0#e000e06&|p+0#00e0e07&| +0#0000000&@72
@75
|/+0#e000e06&|f|o@1|b|a|r|/|p+0#00e0e07&| +0#0000000&@65
|/+0#e000e06&|f|o@1|[|/|]|b|a|r|/|p+0#00e0e07&| +0#0000000&@62
|/+0#e000e06&|f|o@1|\|/|b|a|r|/|p+0#00e0e07&| +0#0000000&@63
@75
|\+0#e000e06&|x|f|o@1|b|a|r|x|p+0#00e0e07&| +0#0000000&@64
|\+0#e000e06&|x|f|o@1|\|x|b|a|r|x|p+0#00e0e07&| +0#0000000&@62
|\+0#e000e06&|x|f|o@1|[|x|]|b|a|r|x|p+0#00e0e07&| +0#0000000&@61
@75
@57|1|,|1| @10|T|o|p|
+20
View File
@@ -0,0 +1,20 @@
|/+0#e000e06#ffffff0|f|o@1|\|/|b|a|r|/|p+0#00e0e07&| +0#0000000&@63
@75
|\+0#e000e06&|x|f|o@1|b|a|r|x|p+0#00e0e07&| +0#0000000&@64
|\+0#e000e06&|x|f|o@1|\|x|b|a|r|x|p+0#00e0e07&| +0#0000000&@62
|\+0#e000e06&|x|f|o@1|[|x|]|b|a|r|x|p+0#00e0e07&| +0#0000000&@61
> @74
|#+0#0000e05&| |s|k|i|p| |b|r|a|c|k|e|t| |e|x|p|r|e|s@1|i|o|n|s| +0#0000000&@48
|\+0#e000e06&|a|_|\|a|_|[|a|[|:|a|s|c|i@1|:|]|a|[|.|a|.|]|a|[|=|a|=|]|a|]|_|a|p+0#00e0e07&| +0#0000000&@40
|\+0#e000e06&|a|_|\|a|_|[|^|a|[|:|a|s|c|i@1|:|]|a|[|.|a|.|]|a|[|=|a|=|]|a|]|_|a|p+0#00e0e07&| +0#0000000&@39
|\+0#e000e06&|a|_|\|a|_|[|]|a|[|:|a|s|c|i@1|:|]|a|[|.|a|.|]|a|[|=|a|=|]|a|]|_|a|p+0#00e0e07&| +0#0000000&@39
|\+0#e000e06&|a|_|\|a|_|[|^|]|a|[|:|a|s|c|i@1|:|]|a|[|.|a|.|]|a|[|=|a|=|]|a|]|_|a|p+0#00e0e07&| +0#0000000&@38
@75
@75
|#+0#0000e05&| |r|a|n|g|e| +0#0000000&@67
@75
|4+0#e000e06&|2|,|8|4|p+0#00e0e07&| +0#0000000&@68
|/+0#e000e06&|f|o@1|/|,|/|b|a|r|/|p+0#00e0e07&| +0#0000000&@62
@75
|/+0#e000e06&|f|o@1|/|,|4|2|p+0#00e0e07&| +0#0000000&@65
@57|1|9|,|0|-|1| @7|2|8|%|
+20
View File
@@ -0,0 +1,20 @@
|/+0#e000e06#ffffff0|f|o@1|/|,|4|2|p+0#00e0e07&| +0#0000000&@65
|4+0#e000e06&|2|,|/|b|a|r|/|p+0#00e0e07&| +0#0000000&@65
@75
@75
|#+0#0000e05&| |G|N|U| |e|x|t|e|n|s|i|o|n|s| +0#0000000&@58
> @74
@75
|#+0#0000e05&| |s|t|e|p| +0#0000000&@68
@75
|1+0#e000e06&|~|2|p+0#00e0e07&| +0#0000000&@70
@75
@75
|#+0#0000e05&| |i|g|n|o|r|e| |c|a|s|e|,| |m|u|l|t|i|l|i|n|e| +0#0000000&@50
@75
|/+0#e000e06&|f|o@1|b|a|r|/|I|p+0#00e0e07&| +0#0000000&@64
|/+0#e000e06&|f|o@1|b|a|r|/|M|p+0#00e0e07&| +0#0000000&@64
|/+0#e000e06&|f|o@1|b|a|r|/|I|M|p+0#00e0e07&| +0#0000000&@63
|/+0#e000e06&|f|o@1|b|a|r|/|M|I|p+0#00e0e07&| +0#0000000&@63
@75
@57|3|7|,|0|-|1| @7|6|7|%|
+20
View File
@@ -0,0 +1,20 @@
| +0&#ffffff0@74
|\+0#e000e06&|a|f|o@1|b|\|a|r|a|I|p+0#00e0e07&| +0#0000000&@62
|\+0#e000e06&|a|f|o@1|b|\|a|r|a|M|p+0#00e0e07&| +0#0000000&@62
|\+0#e000e06&|a|f|o@1|b|\|a|r|a|I|M|p+0#00e0e07&| +0#0000000&@61
|\+0#e000e06&|a|f|o@1|b|\|a|r|a|M|I|p+0#00e0e07&| +0#0000000&@61
> @74
@75
|#+0#0000e05&| |i|n|c|r|e|m|e|n|t| +0#0000000&@63
@75
|4+0#e000e06&|2|,|+|4|2|p+0#00e0e07&| +0#0000000&@67
@75
@75
|#+0#0000e05&| |s|t|e|p| +0#0000000&@68
@75
|4+0#e000e06&|2|,|~|2|p+0#00e0e07&| +0#0000000&@68
@75
|~+0#4040ff13&| @73
|~| @73
|~| @73
| +0#0000000&@56|5@1|,|0|-|1| @7|B|o|t|
+65
View File
@@ -0,0 +1,65 @@
# sed(1)
# Addresses
# line number
42p
$p
/foobar/p
/foo[/]bar/p
/foo\/bar/p
\xxfoobarxp
\xxfoo\xbarxp
\xxfoo[x]barxp
# skip bracket expressions
\aa_\a_[a[:ascii:]a[.a.]a[=a=]a]_ap
\aa_\a_[^a[:ascii:]a[.a.]a[=a=]a]_ap
\aa_\a_[]a[:ascii:]a[.a.]a[=a=]a]_ap
\aa_\a_[^]a[:ascii:]a[.a.]a[=a=]a]_ap
# range
42,84p
/foo/,/bar/p
/foo/,42p
42,/bar/p
# GNU extensions
# step
1~2p
# ignore case, multiline
/foobar/Ip
/foobar/Mp
/foobar/IMp
/foobar/MIp
\aafoob\araIp
\aafoob\araMp
\aafoob\araIMp
\aafoob\araMIp
# increment
42,+42p
# step
42,~2p