Files
hugo-mirror/cache/filecache/filecache.go
Bjørn Erik Pedersen dfece5b674 Add Page.GitInfo support for content from Git modules
Use blobless git clone (--filter=blob:none --no-checkout) to fetch
commit history for content from Git modules, then map it using the
existing gitmap library.  Cloned repos are cached in the modulegitinfo file cache.

Closes #14431
Fixes #5533
2026-02-23 17:43:17 +01:00

580 lines
12 KiB
Go

// Copyright 2026 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package filecache
import (
"bytes"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/gohugoio/httpcache"
"github.com/gohugoio/hugo/common/hugio"
"github.com/gohugoio/hugo/hugofs"
"github.com/gohugoio/hugo/helpers"
"github.com/BurntSushi/locker"
"github.com/spf13/afero"
)
// ErrFatal can be used to signal an unrecoverable error.
var ErrFatal = errors.New("fatal filecache error")
const (
FilecacheRootDirname = "filecache"
)
// Cache caches a set of files in a directory. This is usually a file on
// disk, but since this is backed by an Afero file system, it can be anything.
type Cache struct {
Fs afero.Fs
cfg FileCacheConfig
entryLocker *lockTracker
initOnce sync.Once
isInited bool
initErr error
}
type lockTracker struct {
seenMu sync.RWMutex
seen map[string]struct{}
*locker.Locker
}
// Lock tracks the ids in use. We use this information to do garbage collection
// after a Hugo build.
func (l *lockTracker) Lock(id string) {
l.seenMu.RLock()
if _, seen := l.seen[id]; !seen {
l.seenMu.RUnlock()
l.seenMu.Lock()
l.seen[id] = struct{}{}
l.seenMu.Unlock()
} else {
l.seenMu.RUnlock()
}
l.Locker.Lock(id)
}
// ItemInfo contains info about a cached file.
type ItemInfo struct {
// This is the file's name relative to the cache's filesystem.
Name string
}
// NewCache creates a new file cache with the given filesystem and max age.
func NewCache(fs afero.Fs, cfg FileCacheConfig) *Cache {
if err := cfg.init(); err != nil {
panic(fmt.Sprintf("invalid cache config: %s", err))
}
return &Cache{
Fs: fs,
entryLocker: &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})},
cfg: cfg,
}
}
// lockedFile is a file with a lock that is released on Close.
type lockedFile struct {
afero.File
unlock func()
}
func (l *lockedFile) Close() error {
defer l.unlock()
return l.File.Close()
}
func (c *Cache) init() error {
if c == nil {
panic("cache is nil")
}
c.initOnce.Do(func() {
c.isInited = true
// Create the base dir if it does not exist.
if err := c.Fs.MkdirAll("", 0o777); err != nil && !os.IsExist(err) {
err = fmt.Errorf("failled to create base cache directory: %s", err)
c.initErr = err
}
})
return c.initErr
}
// WriteCloser returns a transactional writer into the cache.
// It's important that it's closed when done.
func (c *Cache) WriteCloser(id string) (ItemInfo, io.WriteCloser, error) {
if err := c.init(); err != nil {
return ItemInfo{}, nil, err
}
id = cleanID(id)
c.entryLocker.Lock(id)
info := ItemInfo{Name: id}
f, err := helpers.OpenFileForWriting(c.Fs, id)
if err != nil {
c.entryLocker.Unlock(id)
return info, nil, err
}
return info, &lockedFile{
File: f,
unlock: func() { c.entryLocker.Unlock(id) },
}, nil
}
// ReadOrCreate tries to lookup the file in cache.
// If found, it is passed to read and then closed.
// If not found a new file is created and passed to create, which should close
// it when done.
func (c *Cache) ReadOrCreate(id string,
read func(info ItemInfo, r io.ReadSeeker) error,
create func(info ItemInfo, w io.WriteCloser) error,
) (info ItemInfo, err error) {
if err := c.init(); err != nil {
return ItemInfo{}, err
}
id = cleanID(id)
c.entryLocker.Lock(id)
defer c.entryLocker.Unlock(id)
info = ItemInfo{Name: id}
if r := c.getOrRemove(id); r != nil {
err = read(info, r)
defer r.Close()
if err == nil || err == ErrFatal {
// See https://github.com/gohugoio/hugo/issues/6401
// To recover from file corruption we handle read errors
// as the cache item was not found.
// Any file permission issue will also fail in the next step.
return
}
}
f, err := helpers.OpenFileForWriting(c.Fs, id)
if err != nil {
return
}
err = create(info, f)
return
}
// NamedLock locks the given id. The lock is released when the returned function is called.
func (c *Cache) NamedLock(id string) func() {
id = cleanID(id)
c.entryLocker.Lock(id)
return func() {
c.entryLocker.Unlock(id)
}
}
// GetOrCreate tries to get the file with the given id from cache. If not found or expired, create will
// be invoked and the result cached.
// This method is protected by a named lock using the given id as identifier.
func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (ItemInfo, io.ReadCloser, error) {
if err := c.init(); err != nil {
return ItemInfo{}, nil, err
}
id = cleanID(id)
c.entryLocker.Lock(id)
defer c.entryLocker.Unlock(id)
info := ItemInfo{Name: id}
if r := c.getOrRemove(id); r != nil {
return info, r, nil
}
var (
r io.ReadCloser
err error
)
r, err = create()
if err != nil {
return info, nil, err
}
if c.cfg.MaxAge == 0 {
// No caching.
return info, hugio.ToReadCloser(r), nil
}
var buff bytes.Buffer
return info,
hugio.ToReadCloser(&buff),
c.writeReader(id, io.TeeReader(r, &buff))
}
// AbsFilenameFromID returns the filename for the given id in the cache.
// This will be an absolute path.
func (c *Cache) AbsFilenameFromID(id string) string {
return filepath.Join(c.cfg.DirCompiled, cleanID(id))
}
// GetOrCreateInfo tries to get the item info with the given id from cache. If not found or expired, create will
// be invoked with the id. The create function is expected to create the cache item with the given id. The returned ItemInfo will have the id as Name.
// This method is protected by a named lock using the given id as identifier.
func (c *Cache) GetOrCreateInfo(id string, create func(id string) error) (ItemInfo, error) {
if err := c.init(); err != nil {
return ItemInfo{}, err
}
id = cleanID(id)
c.entryLocker.Lock(id)
defer c.entryLocker.Unlock(id)
info := ItemInfo{Name: id}
if !c.removeIfNeeded(id) {
// The file exists and is not expired, so we consider it a cache hit.
return info, nil
}
if err := create(id); err != nil {
c.remove(id)
return info, err
}
return info, nil
}
func (c *Cache) writeReader(id string, r io.Reader) error {
dir := filepath.Dir(id)
if dir != "" {
_ = c.Fs.MkdirAll(dir, 0o777)
}
f, err := c.Fs.Create(id)
if err != nil {
return err
}
defer f.Close()
_, _ = io.Copy(f, r)
return nil
}
// GetOrCreateBytes is the same as GetOrCreate, but produces a byte slice.
func (c *Cache) GetOrCreateBytes(id string, create func() ([]byte, error)) (ItemInfo, []byte, error) {
if err := c.init(); err != nil {
return ItemInfo{}, nil, err
}
id = cleanID(id)
c.entryLocker.Lock(id)
defer c.entryLocker.Unlock(id)
info := ItemInfo{Name: id}
if r := c.getOrRemove(id); r != nil {
defer r.Close()
b, err := io.ReadAll(r)
return info, b, err
}
var (
b []byte
err error
)
b, err = create()
if err != nil {
return info, nil, err
}
if c.cfg.MaxAge == 0 {
return info, b, nil
}
if err := c.writeReader(id, bytes.NewReader(b)); err != nil {
return info, nil, err
}
return info, b, nil
}
// SetBytes sets the file content with the given id in the cache.
func (c *Cache) SetBytes(id string, data []byte) error {
if err := c.init(); err != nil {
return err
}
id = cleanID(id)
c.entryLocker.Lock(id)
defer c.entryLocker.Unlock(id)
if c.cfg.MaxAge == 0 {
// No caching.
return nil
}
return c.writeReader(id, bytes.NewReader(data))
}
// GetBytes gets the file content with the given id from the cache, nil if none found.
func (c *Cache) GetBytes(id string) ([]byte, error) {
_, b, err := c.GetItemBytes(id)
return b, err
}
// GetItemBytes gets the ItemInfo and file content with the given id from the cache, nil if none found.
func (c *Cache) GetItemBytes(id string) (ItemInfo, []byte, error) {
if err := c.init(); err != nil {
return ItemInfo{}, nil, err
}
id = cleanID(id)
c.entryLocker.Lock(id)
defer c.entryLocker.Unlock(id)
info := ItemInfo{Name: id}
if r := c.getOrRemove(id); r != nil {
defer r.Close()
b, err := io.ReadAll(r)
return info, b, err
}
return info, nil, nil
}
// Get gets the file with the given id from the cache, nil if none found.
func (c *Cache) Get(id string) (ItemInfo, io.ReadCloser, error) {
if err := c.init(); err != nil {
return ItemInfo{}, nil, err
}
id = cleanID(id)
c.entryLocker.Lock(id)
defer c.entryLocker.Unlock(id)
info := ItemInfo{Name: id}
r := c.getOrRemove(id)
return info, r, nil
}
// removeIfNeeded checks if the file with the given id should be re-created.
func (c *Cache) removeIfNeeded(id string) bool {
if c.cfg.MaxAge == 0 {
// No caching, remove.
c.remove(id)
return true
}
if removed, err := c.removeIfExpired(id); err != nil || removed {
return true
}
return false
}
// getOrRemove gets the file with the given id. If it's expired, it will
// be removed.
func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser {
if c.cfg.MaxAge == 0 {
// No caching.
return nil
}
if removed, err := c.removeIfExpired(id); err != nil || removed {
return nil
}
f, err := c.Fs.Open(id)
if err != nil {
return nil
}
return f
}
func (c *Cache) getBytesAndRemoveIfExpired(id string) ([]byte, bool) {
if c.cfg.MaxAge == 0 {
// No caching.
return nil, false
}
f, err := c.Fs.Open(id)
if err != nil {
return nil, false
}
defer f.Close()
b, err := io.ReadAll(f)
if err != nil {
return nil, false
}
removed, err := c.removeIfExpired(id)
if err != nil {
return nil, false
}
return b, removed
}
func (c *Cache) removeIfExpired(id string) (bool, error) {
if c.cfg.MaxAge <= 0 {
return false, nil
}
fi, err := c.Fs.Stat(id)
if err != nil {
return false, err
}
if c.isExpired(fi.ModTime()) {
c.remove(id)
return true, nil
}
return false, nil
}
func (c *Cache) remove(id string) {
if c.cfg.entryIsDir {
c.Fs.RemoveAll(id)
} else {
c.Fs.Remove(id)
}
}
func (c *Cache) isExpired(modTime time.Time) bool {
if c.cfg.MaxAge < 0 {
return false
}
// Note the use of time.Since here.
// We cannot use Hugo's global Clock for this.
return c.cfg.MaxAge == 0 || time.Since(modTime) > c.cfg.MaxAge
}
// For testing
func (c *Cache) GetString(id string) string {
id = cleanID(id)
c.entryLocker.Lock(id)
defer c.entryLocker.Unlock(id)
f, err := c.Fs.Open(id)
if err != nil {
return ""
}
defer f.Close()
b, _ := io.ReadAll(f)
return string(b)
}
// Caches is a named set of caches.
type Caches map[string]*Cache
func (f Caches) SetResourceFs(fs afero.Fs) {
for _, c := range f {
if c.cfg.IsResourceDir {
if c.isInited {
panic("cannot set resource fs after init")
}
c.Fs = hugofs.NewBasePathFs(fs, c.cfg.DirCompiled)
}
}
}
// Get gets a named cache, nil if none found.
func (f Caches) Get(name string) *Cache {
return f[strings.ToLower(name)]
}
// NewCaches creates a new set of file caches from the given
// configuration.
func NewCaches(dcfg Configs, sourceFs afero.Fs) (Caches, error) {
fs := sourceFs
m := make(Caches)
for k, v := range dcfg {
var cfs afero.Fs
if v.IsResourceDir {
cfs = nil // Set later. TODO(bep) this needs to be cleanded up.
} else {
cfs = hugofs.NewBasePathFs(fs, v.DirCompiled)
}
c := NewCache(cfs, v)
m[k] = c
}
return m, nil
}
func cleanID(name string) string {
return strings.TrimPrefix(filepath.Clean(name), helpers.FilePathSeparator)
}
// AsHTTPCache returns an httpcache.Cache implementation for this file cache.
// Note that none of the methods are protected by named locks, so you need to make sure
// to do that in your own code.
func (c *Cache) AsHTTPCache() httpcache.Cache {
return &httpCache{c: c}
}
type httpCache struct {
c *Cache
}
func (h *httpCache) Get(id string) (resp []byte, ok bool) {
id = cleanID(id)
b, removed := h.c.getBytesAndRemoveIfExpired(id)
return b, !removed
}
func (h *httpCache) Set(id string, resp []byte) {
if h.c.cfg.MaxAge == 0 {
return
}
id = cleanID(id)
if err := h.c.writeReader(id, bytes.NewReader(resp)); err != nil {
panic(err)
}
}
func (h *httpCache) Delete(key string) {
h.c.Fs.Remove(key)
}