Fix slow server startup of very big content trees

As in content trees with 10 thousand of directories and more.

A benchmark with the bottle neck code in `helpers.ExtractAndGroupRootPaths`:

```
                            │ cmp20251125.bench │ fix-extractandgrouproot-14211.bench │
                            │      sec/op       │    sec/op     vs base               │
ExtractAndGroupRootPaths-10     1282818.8µ ± 8%   493.8µ ± 38%  -99.96% (p=0.002 n=6)

                            │ cmp20251125.bench │ fix-extractandgrouproot-14211.bench │
                            │       B/op        │     B/op      vs base               │
ExtractAndGroupRootPaths-10       3343.8Ki ± 0%   146.3Ki ± 0%  -95.63% (p=0.002 n=6)

                            │ cmp20251125.bench │ fix-extractandgrouproot-14211.bench │
                            │     allocs/op     │  allocs/op    vs base               │
ExtractAndGroupRootPaths-10        20.043k ± 0%    2.979k ± 0%  -85.14% (p=0.002 n=6)
```

For test project that started this (a 60k directory conent tree), the server startup with no rendering, wen from 1.5 minutes to less than 4 seconds:

```
hugop server --renderSegments none                        main ✚ ✖ ✱ ◼
Watching for changes in /Users/bep/dev/sites/hugotestsites/60k/content/{section0,section1,section10,section100,section101,section102,section103,section104,section105,section106,...}
Watching for changes in /Users/bep/dev/sites/hugotestsites/60k/layouts/_default
Watching for config changes in /Users/bep/dev/sites/hugotestsites/60k/config.toml
Start building sites …
hugo v0.153.0-DEV-7e27c303904ed8b221d6a5a4fc9a764bb7b2935b darwin/arm64 BuildDate=2025-11-25T15:02:58Z

                  │ EN
──────────────────┼────
 Pages            │  0
 Paginator pages  │  0
 Non-page files   │  0
 Static files     │  0
 Processed images │  0
 Aliases          │  0
 Cleaned          │  0

Built in 3884 ms
Environment: "development"
Serving pages from disk
Running in Fast Render Mode. For full rebuilds on change: hugo server --disableFastRender
Web Server is available at //localhost:1313/ (bind address 127.0.0.1)
Press Ctrl+C to stop
```

Note that the output may be a little different and a little more verbose than before., but the information is correct and this implementation is significantly faster and simpler.

Fixes #14211
This commit is contained in:
Bjørn Erik Pedersen
2025-11-25 15:37:24 +01:00
parent 555dfa207a
commit 7a43b928a6
4 changed files with 60 additions and 126 deletions

View File

@@ -401,9 +401,7 @@ func (r *rootCommand) Run(ctx context.Context, cd *simplecobra.Commandeer, args
watchGroups := helpers.ExtractAndGroupRootPaths(watchDirs)
for _, group := range watchGroups {
r.Printf("Watching for changes in %s\n", group)
}
r.Printf("Watching for changes in %s\n", strings.Join(watchGroups, ", "))
watcher, err := b.newWatcher(r.poll, watchDirs...)
if err != nil {
return err

View File

@@ -492,9 +492,7 @@ func (c *serverCommand) Run(ctx context.Context, cd *simplecobra.Commandeer, arg
watchGroups := helpers.ExtractAndGroupRootPaths(watchDirs)
for _, group := range watchGroups {
c.r.Printf("Watching for changes in %s\n", group)
}
c.r.Printf("Watching for changes in %s\n", strings.Join(watchGroups, ", "))
watcher, err := c.newWatcher(c.r.poll, watchDirs...)
if err != nil {
return err

View File

@@ -21,11 +21,12 @@ import (
"path"
"path/filepath"
"regexp"
"slices"
"sort"
"strings"
"github.com/gohugoio/go-radix"
"github.com/gohugoio/hugo/common/herrors"
"github.com/gohugoio/hugo/common/hstrings"
"github.com/gohugoio/hugo/common/text"
"github.com/gohugoio/hugo/htesting"
@@ -129,113 +130,63 @@ func (n NamedSlice) String() string {
return fmt.Sprintf("%s%s{%s}", n.Name, FilePathSeparator, strings.Join(n.Slice, ","))
}
func ExtractAndGroupRootPaths(paths []string) []NamedSlice {
if len(paths) == 0 {
// ExtractAndGroupRootPaths extracts and groups root paths from the supplied list of paths.
// Note that the in slice will be sorted in place.
func ExtractAndGroupRootPaths(in []string) []string {
if len(in) == 0 {
return nil
}
const maxGroups = 5
sort.Strings(in)
var groups []string
tree := radix.New[[]string]()
pathsCopy := make([]string, len(paths))
hadSlashPrefix := strings.HasPrefix(paths[0], FilePathSeparator)
for i, p := range paths {
pathsCopy[i] = strings.Trim(filepath.ToSlash(p), "/")
}
sort.Strings(pathsCopy)
pathsParts := make([][]string, len(pathsCopy))
for i, p := range pathsCopy {
pathsParts[i] = strings.Split(p, "/")
}
var groups [][]string
for i, p1 := range pathsParts {
c1 := -1
for j, p2 := range pathsParts {
if i == j {
continue
LOOP:
for _, s := range in {
s = filepath.ToSlash(s)
if ss, g, found := tree.LongestPrefix(s); found {
if len(g) > maxGroups {
continue LOOP
}
parts := strings.Split(strings.TrimPrefix(strings.TrimPrefix(s, ss), "/"), "/")
if len(parts) > 0 && parts[0] != "" && !slices.Contains(g, parts[0]) {
g = append(g, parts[0])
tree.Insert(ss, g)
}
c2 := -1
for i, v := range p1 {
if i >= len(p2) {
break
}
if v != p2[i] {
break
}
c2 = i
}
if c1 == -1 || (c2 != -1 && c2 < c1) {
c1 = c2
}
}
if c1 != -1 {
groups = append(groups, p1[:c1+1])
} else {
groups = append(groups, p1)
tree.Insert(s, []string{})
}
}
groupsStr := make([]string, len(groups))
for i, g := range groups {
groupsStr[i] = strings.Join(g, "/")
var collect radix.WalkFn[[]string] = func(s string, g []string) (radix.WalkFlag, []string, error) {
if len(g) == 0 {
groups = append(groups, s)
return radix.WalkContinue, nil, nil
}
if len(g) == 1 {
groups = append(groups, path.Join(s, g[0]))
return radix.WalkContinue, nil, nil
}
var sb strings.Builder
sb.WriteString(s)
// This is used to print "Watching for changes in /Users/bep/dev/sites/hugotestsites/60k/content/{section0,section1,section10..."
// Having too many groups here is not helpful.
if len(g) > maxGroups {
// This will modify the slice in the tree, but that is OK since we are done with it.
g = g[:maxGroups]
g = append(g, "...")
}
sb.WriteString("/{")
sb.WriteString(strings.Join(g, ","))
sb.WriteString("}")
groups = append(groups, sb.String())
return radix.WalkContinue, nil, nil
}
groupsStr = hstrings.UniqueStringsSorted(groupsStr)
tree.Walk(collect)
var result []NamedSlice
for _, g := range groupsStr {
name := filepath.FromSlash(g)
if hadSlashPrefix {
name = FilePathSeparator + name
}
ns := NamedSlice{Name: name}
for _, p := range pathsCopy {
if !strings.HasPrefix(p, g) {
continue
}
p = strings.TrimPrefix(p, g)
if p != "" {
ns.Slice = append(ns.Slice, p)
}
}
ns.Slice = hstrings.UniqueStrings(ExtractRootPaths(ns.Slice))
result = append(result, ns)
}
return result
}
// ExtractRootPaths extracts the root paths from the supplied list of paths.
// The resulting root path will not contain any file separators, but there
// may be duplicates.
// So "/content/section/" becomes "content"
func ExtractRootPaths(paths []string) []string {
r := make([]string, len(paths))
for i, p := range paths {
root := filepath.ToSlash(p)
sections := strings.SplitSeq(root, "/")
for section := range sections {
if section != "" {
root = section
break
}
}
r[i] = root
}
return r
return groups
}
// FindCWD returns the current working directory from where the Hugo

View File

@@ -17,7 +17,6 @@ import (
"fmt"
"os"
"path/filepath"
"reflect"
"runtime"
"strconv"
"strings"
@@ -355,36 +354,24 @@ func TestExtractAndGroupRootPaths(t *testing.T) {
filepath.FromSlash("/c/d/e"),
}
inCopy := make([]string, len(in))
copy(inCopy, in)
result := helpers.ExtractAndGroupRootPaths(in)
c := qt.New(t)
c.Assert(fmt.Sprint(result), qt.Equals, filepath.FromSlash("[/a/b/{c,e} /c/d/e]"))
// Make sure the original is preserved
c.Assert(in, qt.DeepEquals, inCopy)
c.Assert(result, qt.DeepEquals, []string{"/a/b/{c,e}", "/c/d/e"})
}
func TestExtractRootPaths(t *testing.T) {
tests := []struct {
input []string
expected []string
}{{
[]string{
filepath.FromSlash("a/b"), filepath.FromSlash("a/b/c/"), "b",
filepath.FromSlash("/c/d"), filepath.FromSlash("d/"), filepath.FromSlash("//e//"),
},
[]string{"a", "a", "b", "c", "d", "e"},
}}
for _, test := range tests {
output := helpers.ExtractRootPaths(test.input)
if !reflect.DeepEqual(output, test.expected) {
t.Errorf("Expected %#v, got %#v\n", test.expected, output)
func BenchmarkExtractAndGroupRootPaths(b *testing.B) {
in := []string{}
for i := 0; i < 10; i++ {
for j := 0; j < 1000; j++ {
in = append(in, fmt.Sprintf("/a/b/c/s%d/p%d", i, j))
}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
helpers.ExtractAndGroupRootPaths(in)
}
}
func TestFindCWD(t *testing.T) {