Replace to gopkg.in/yaml with github.com/goccy/go-yaml (note)

This commit also adds validation to prevent the "Billion Laughs" attack (see https://github.com/goccy/go-yaml/issues/461). The limit of non-scalar aliases to the same node is set to 10,000. See benchmarks below.

```                                                            │        sec/op         │
UnmarshalBillionLaughs/Billion_Laughs_no_validation-10                 125.2µ ± ∞ ¹
UnmarshalBillionLaughs/Billion_Laughs_with_validation-10               655.8µ ± ∞ ¹
UnmarshalBillionLaughs/YAML_Front_Matter_no_validation-10              9.223µ ± ∞ ¹
UnmarshalBillionLaughs/YAML_Front_Matter_with_validation-10            9.443µ ± ∞ ¹
geomean                                                                51.71µ
¹ need >= 6 samples for confidence interval at level 0.95

                                                            │ fix-goyaml-8822.bench │
                                                            │         B/op          │
UnmarshalBillionLaughs/Billion_Laughs_no_validation-10                177.0Ki ± ∞ ¹
UnmarshalBillionLaughs/Billion_Laughs_with_validation-10              177.0Ki ± ∞ ¹
UnmarshalBillionLaughs/YAML_Front_Matter_no_validation-10             11.67Ki ± ∞ ¹
UnmarshalBillionLaughs/YAML_Front_Matter_with_validation-10           11.67Ki ± ∞ ¹
geomean                                                               45.45Ki
¹ need >= 6 samples for confidence interval at level 0.95

                                                            │ fix-goyaml-8822.bench │
                                                            │       allocs/op       │
UnmarshalBillionLaughs/Billion_Laughs_no_validation-10                 3.302k ± ∞ ¹
UnmarshalBillionLaughs/Billion_Laughs_with_validation-10               3.305k ± ∞ ¹
UnmarshalBillionLaughs/YAML_Front_Matter_no_validation-10               253.0 ± ∞ ¹
UnmarshalBillionLaughs/YAML_Front_Matter_with_validation-10             253.0 ± ∞ ¹
````

Fixes #8822
Fixes #13043
Fixes #14053
Fixes ##8427
This commit is contained in:
Bjørn Erik Pedersen
2025-10-17 12:49:54 +02:00
parent 9e344bbe49
commit a3d9548469
21 changed files with 448 additions and 250 deletions

View File

@@ -28,6 +28,7 @@ import (
"github.com/alecthomas/chroma/v2/formatters/html"
"github.com/alecthomas/chroma/v2/styles"
"github.com/bep/simplecobra"
"github.com/goccy/go-yaml"
"github.com/gohugoio/hugo/common/hugo"
"github.com/gohugoio/hugo/docshelper"
"github.com/gohugoio/hugo/helpers"
@@ -36,7 +37,6 @@ import (
"github.com/gohugoio/hugo/parser"
"github.com/spf13/cobra"
"github.com/spf13/cobra/doc"
"gopkg.in/yaml.v2"
)
func newGenCommand() *genCommand {

View File

@@ -1166,7 +1166,6 @@ func chmodFilter(dst, src os.FileInfo) bool {
}
func cleanErrorLog(content string) string {
content = strings.ReplaceAll(content, "\n", " ")
content = logReplacer.Replace(content)
content = logDuplicateTemplateExecuteRe.ReplaceAllString(content, "")
content = logDuplicateTemplateParseRe.ReplaceAllString(content, "")

View File

@@ -110,11 +110,11 @@ func (fe *fileError) UpdateContent(r io.Reader, linematcher LineMatcherFn) FileE
fe.errorContext = ectx
if ectx.Position.LineNumber > 0 {
if ectx.Position.LineNumber > 0 && ectx.Position.LineNumber > fe.position.LineNumber {
fe.position.LineNumber = ectx.Position.LineNumber
}
if ectx.Position.ColumnNumber > 0 {
if ectx.Position.ColumnNumber > 0 && ectx.Position.ColumnNumber > fe.position.ColumnNumber {
fe.position.ColumnNumber = ectx.Position.ColumnNumber
}
@@ -177,6 +177,7 @@ func NewFileErrorFromName(err error, name string) FileError {
// Filetype is used to determine the Chroma lexer to use.
fileType, pos := extractFileTypePos(err)
pos.Filename = name
if fileType == "" {
_, fileType = paths.FileAndExtNoDelimiter(filepath.Clean(name))
}
@@ -234,7 +235,9 @@ func NewFileErrorFromFile(err error, filename string, fs afero.Fs, linematcher L
return NewFileErrorFromName(err, realFilename)
}
defer f.Close()
return NewFileErrorFromName(err, realFilename).UpdateContent(f, linematcher)
fe := NewFileErrorFromName(err, realFilename)
fe = fe.UpdateContent(f, linematcher)
return fe
}
func openFile(filename string, fs afero.Fs) (afero.File, string, error) {
@@ -321,13 +324,9 @@ func extractFileTypePos(err error) (string, text.Position) {
}
// Look in the error message for the line number.
for _, handle := range lineNumberExtractors {
lno, col := handle(err)
if lno > 0 {
pos.ColumnNumber = col
pos.LineNumber = lno
break
}
if lno, col := commonLineNumberExtractor(err); lno > 0 {
pos.ColumnNumber = col
pos.LineNumber = lno
}
if fileType == "" && pos.Filename != "" {

View File

@@ -19,17 +19,27 @@ import (
)
var lineNumberExtractors = []lineNumberExtractor{
// YAML parse errors.
newLineNumberErrHandlerFromRegexp(`\[(\d+):(\d+)\]`),
// Template/shortcode parse errors
newLineNumberErrHandlerFromRegexp(`:(\d+):(\d*):`),
newLineNumberErrHandlerFromRegexp(`:(\d+):`),
// YAML parse errors
newLineNumberErrHandlerFromRegexp(`line (\d+):`),
// i18n bundle errors
newLineNumberErrHandlerFromRegexp(`\((\d+),\s(\d*)`),
}
func commonLineNumberExtractor(e error) (int, int) {
for _, handler := range lineNumberExtractors {
lno, col := handler(e)
if lno > 0 {
return lno, col
}
}
return 0, 0
}
type lineNumberExtractor func(e error) (int, int)
func newLineNumberErrHandlerFromRegexp(expression string) lineNumberExtractor {

View File

@@ -0,0 +1,31 @@
// Copyright 2025 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package herrors
import (
"errors"
"testing"
qt "github.com/frankban/quicktest"
)
func TestCommonLineNumberExtractor(t *testing.T) {
t.Parallel()
c := qt.New(t)
lno, col := commonLineNumberExtractor(errors.New("[4:9] value is not allowed in this context"))
c.Assert(lno, qt.Equals, 4)
c.Assert(col, qt.Equals, 9)
}

4
go.mod
View File

@@ -33,9 +33,9 @@ require (
github.com/frankban/quicktest v1.14.6
github.com/fsnotify/fsnotify v1.9.0
github.com/getkin/kin-openapi v0.133.0
github.com/ghodss/yaml v1.0.0
github.com/gobuffalo/flect v1.0.3
github.com/gobwas/glob v0.2.3
github.com/goccy/go-yaml v1.18.0
github.com/gohugoio/go-i18n/v2 v2.1.3-0.20230805085216-e63c13218d0e
github.com/gohugoio/hashstructure v0.6.0
github.com/gohugoio/httpcache v0.8.0
@@ -83,7 +83,6 @@ require (
golang.org/x/text v0.30.0
golang.org/x/tools v0.38.0
google.golang.org/api v0.251.0
gopkg.in/yaml.v2 v2.4.0
rsc.io/qr v0.2.0
)
@@ -188,6 +187,7 @@ require (
google.golang.org/genproto/googleapis/rpc v0.0.0-20250929231259-57b25ae835d4 // indirect
google.golang.org/grpc v1.75.1 // indirect
google.golang.org/protobuf v1.36.9 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
howett.net/plist v1.0.0 // indirect
software.sslmate.com/src/go-pkcs12 v0.2.0 // indirect

4
go.sum
View File

@@ -242,8 +242,6 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/getkin/kin-openapi v0.133.0 h1:pJdmNohVIJ97r4AUFtEXRXwESr8b0bD721u/Tz6k8PQ=
github.com/getkin/kin-openapi v0.133.0/go.mod h1:boAciF6cXk5FhPqe/NQeBTeenbjqU4LhWBf09ILVvWE=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
@@ -266,6 +264,8 @@ github.com/gobuffalo/flect v1.0.3 h1:xeWBM2nui+qnVvNM4S3foBhCAL2XgPU+a7FdpelbTq4
github.com/gobuffalo/flect v1.0.3/go.mod h1:A5msMlrHtLqh9umBSnvabjsMrCcCpAyzglnDvkbYKHs=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
github.com/gohugoio/go-i18n/v2 v2.1.3-0.20230805085216-e63c13218d0e h1:QArsSubW7eDh8APMXkByjQWvuljwPGAGQpJEFn0F0wY=
github.com/gohugoio/go-i18n/v2 v2.1.3-0.20230805085216-e63c13218d0e/go.mod h1:3Ltoo9Banwq0gOtcOwxuHG6omk+AwsQPADyw2vQYOJQ=
github.com/gohugoio/hashstructure v0.6.0 h1:7wMB/2CfXoThFYhdWRGv3u3rUM761Cq29CxUW+NltUg=

View File

@@ -172,3 +172,26 @@ func TestTargetPathHTMLRedirectAlias(t *testing.T) {
}
}
}
func TestAliasNIssue14053(t *testing.T) {
t.Parallel()
files := `
-- hugo.toml --
baseURL = "http://example.com"
-- layouts/all.html --
All.
-- content/page.md --
---
title: "Page"
aliases:
- n
- y
- no
- yes
---
`
b := Test(t, files)
b.AssertPublishDir("n/index.html", "yes/index.html", "no/index.html", "yes/index.html")
}

View File

@@ -40,7 +40,7 @@ Strings: {{ printf "%T" .Params.strings }} {{ range .Params.strings }}Strings: {
b.Build()
b.AssertFileContent("public/post/one/index.html", "Ints: []interface {} Int: 1 (int)|Int: 2 (int)|Int: 3 (int)|")
b.AssertFileContent("public/post/one/index.html", "Mixed: []interface {} Mixed: 1 (string)|Mixed: 2 (int)|Mixed: 3 (int)|")
b.AssertFileContent("public/post/one/index.html", "Ints: []interface {} Int: 1 (uint64)|Int: 2 (uint64)|Int: 3 (uint64)|")
b.AssertFileContent("public/post/one/index.html", "Mixed: []interface {} Mixed: 1 (string)|Mixed: 2 (uint64)|Mixed: 3 (uint64)|")
b.AssertFileContent("public/post/one/index.html", "Strings: []string Strings: 1 (string)|Strings: 2 (string)|Strings: 3 (string)|")
}

View File

@@ -476,7 +476,7 @@ line 5
errors := herrors.UnwrapFileErrorsWithErrorContext(err)
b.Assert(errors, qt.HasLen, 3)
b.Assert(errors[0].Error(), qt.Contains, filepath.FromSlash(`"/content/_index.md:1:1": "/layouts/_default/_markup/render-heading.html:2:5": execute of template failed`))
b.Assert(errors[0].Error(), qt.Contains, filepath.FromSlash(`"/content/_index.md:2:5": "/layouts/_default/_markup/render-heading.html:2:5": execute of template failed`))
}
func TestErrorRenderHookCodeblock(t *testing.T) {
@@ -642,3 +642,35 @@ Home.
b.Assert(err.Error(), qt.Contains, filepath.FromSlash(`/layouts/index.html:2:3`))
b.Assert(err.Error(), qt.Contains, `can't evaluate field ThisDoesNotExist`)
}
func TestErrorFrontmatterYAMLSyntax(t *testing.T) {
t.Parallel()
files := `
-- hugo.toml --
-- content/_index.md --
---
line1: 'value1'
x
line2: 'value2'
line3: 'value3'
---
`
b, err := TestE(t, files)
b.Assert(err, qt.Not(qt.IsNil))
b.Assert(err.Error(), qt.Contains, "[2:1] non-map value is specified")
fe := herrors.UnwrapFileError(err)
b.Assert(fe, qt.Not(qt.IsNil))
pos := fe.Position()
b.Assert(pos.Filename, qt.Contains, filepath.FromSlash("content/_index.md"))
b.Assert(fe.ErrorContext(), qt.Not(qt.IsNil))
b.Assert(pos.LineNumber, qt.Equals, 8)
b.Assert(pos.ColumnNumber, qt.Equals, 1)
}

View File

@@ -285,23 +285,20 @@ func (c *contentParseInfo) parseFrontMatter(it pageparser.Item, iter *pageparser
var err error
c.frontMatter, err = metadecoders.Default.UnmarshalToMap(it.Val(source), f)
if err != nil {
if fe, ok := err.(herrors.FileError); ok {
pos := fe.Position()
// Offset the starting position of front matter.
offset := iter.LineNumber(source) - 1
if f == metadecoders.YAML {
offset -= 1
}
pos.LineNumber += offset
fe.UpdatePosition(pos)
fe.SetFilename("") // It will be set later.
return fe
} else {
return err
fe := herrors.UnwrapFileError(err)
if fe == nil {
fe = herrors.NewFileError(err)
}
pos := fe.Position()
// Offset the starting position of front matter.
offset := iter.LineNumber(source) - 1
pos.LineNumber += offset
fe.UpdatePosition(pos)
fe.SetFilename("") // It will be set later.
return fe
}
return nil

View File

@@ -123,7 +123,7 @@ func (c *pagesCollector) Collect() (collectErr error) {
Handle: func(ctx context.Context, fi hugofs.FileMetaInfo) error {
numPages, numResources, err := c.m.AddFi(fi, c.buildConfig)
if err != nil {
return hugofs.AddFileInfoToError(err, fi, c.fs)
return hugofs.AddFileInfoToError(err, fi, c.h.SourceFs)
}
numFilesProcessedTotal.Add(1)
numPagesProcessedTotal.Add(numPages)

View File

@@ -19,10 +19,10 @@ import (
"strings"
"github.com/gohugoio/hugo/common/paths"
"github.com/gohugoio/hugo/parser/metadecoders"
"github.com/gohugoio/hugo/common/herrors"
"golang.org/x/text/language"
yaml "gopkg.in/yaml.v2"
"github.com/gohugoio/go-i18n/v2/i18n"
"github.com/gohugoio/hugo/helpers"
@@ -53,8 +53,8 @@ func (tp *TranslationProvider) NewResource(dst *deps.Deps) error {
bundle := i18n.NewBundle(defaultLangTag)
bundle.RegisterUnmarshalFunc("toml", toml.Unmarshal)
bundle.RegisterUnmarshalFunc("yaml", yaml.Unmarshal)
bundle.RegisterUnmarshalFunc("yml", yaml.Unmarshal)
bundle.RegisterUnmarshalFunc("yaml", metadecoders.UnmarshalYaml)
bundle.RegisterUnmarshalFunc("yml", metadecoders.UnmarshalYaml)
bundle.RegisterUnmarshalFunc("json", json.Unmarshal)
w := hugofs.NewWalkway(

View File

@@ -22,8 +22,6 @@ import (
toml "github.com/pelletier/go-toml/v2"
yaml "gopkg.in/yaml.v2"
xml "github.com/clbanning/mxj/v2"
)
@@ -39,7 +37,7 @@ func InterfaceToConfig(in any, format metadecoders.Format, w io.Writer) error {
switch format {
case metadecoders.YAML:
b, err := yaml.Marshal(in)
b, err := metadecoders.MarshalYAML(in)
if err != nil {
return err
}

View File

@@ -1,4 +1,4 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
// Copyright 2025 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -22,16 +22,17 @@ import (
"regexp"
"strconv"
"strings"
"unsafe"
"github.com/gohugoio/hugo/common/herrors"
"github.com/gohugoio/hugo/common/maps"
"github.com/niklasfasching/go-org/org"
xml "github.com/clbanning/mxj/v2"
yaml "github.com/goccy/go-yaml"
toml "github.com/pelletier/go-toml/v2"
"github.com/spf13/afero"
"github.com/spf13/cast"
yaml "gopkg.in/yaml.v2"
)
// Decoder provides some configuration options for the decoders.
@@ -153,6 +154,117 @@ func (d Decoder) Unmarshal(data []byte, f Format) (any, error) {
return v, err
}
// UnmarshalYaml unmarshals data in YAML format into v.
func UnmarshalYaml(data []byte, v any) error {
if err := yaml.Unmarshal(data, v); err != nil {
return err
}
if err := validateAliasLimitForCollections(v, calculateCollectionAliasLimit(len(data))); err != nil {
return err
}
return nil
}
// The Billion Laughs YAML example is about 500 bytes in size,
// but even halving that when converted to JSON would produce a file of about 4 MB in size,
// which, when repeated enough times, could be disruptive.
// For large data files where every row shares a common map via aliases,
// a large number of aliases could make sense.
// The primary goal here is to catch the small but malicious files.
func calculateCollectionAliasLimit(sizeInBytes int) int {
sizeInKB := sizeInBytes / 1024
if sizeInKB == 0 {
sizeInKB = 1
}
if sizeInKB < 2 {
// This should allow at most "thousand laughs",
// which should be plenty of room for legitimate uses.
return 100
}
// The numbers below are somewhat arbitrary, but should provide
// a reasonable trade-off between safety and usability.
if sizeInKB < 10 {
return 5000
}
return 10000
}
// Used in benchmarks.
func unmarshalYamlNoValidation(data []byte, v any) error {
if err := yaml.Unmarshal(data, v); err != nil {
return err
}
return nil
}
// See https://github.com/goccy/go-yaml/issues/461
// While it's true that yaml.Unmarshal isn't vulnerable to the Billion Laughs attack,
// we can easily get a delayed laughter when we try to render this very big structure later,
// e.g. via RenderString.
func validateAliasLimitForCollections(v any, limit int) error {
if limit <= 0 {
limit = 1000
}
collectionRefCounts := make(map[uintptr]int)
checkCollectionRef := func(v *any) error {
// Conversion of a Pointer to a uintptr (but not back to Pointer) is considered safe.
// See https://pkg.go.dev/unsafe#pkg-functions
ptr := uintptr(unsafe.Pointer(v))
if ptr == 0 {
return nil
}
collectionRefCounts[ptr]++
if collectionRefCounts[ptr] > limit {
return fmt.Errorf("too many YAML aliases for non-scalar nodes")
}
return nil
}
var validate func(v any) error
validate = func(v any) error {
switch vv := v.(type) {
case *map[string]any:
if err := checkCollectionRef(&v); err != nil {
return err
}
for _, vvv := range *vv {
if err := validate(vvv); err != nil {
return err
}
}
case map[string]any:
if err := checkCollectionRef(&v); err != nil {
return err
}
for _, vvv := range vv {
if err := validate(vvv); err != nil {
return err
}
}
case []any:
if err := checkCollectionRef(&v); err != nil {
return err
}
for _, vvv := range vv {
if err := validate(vvv); err != nil {
return err
}
}
case *any:
return validate(*vv)
default:
// ok
}
return nil
}
return validate(v)
}
// UnmarshalTo unmarshals data in format f into v.
func (d Decoder) UnmarshalTo(data []byte, f Format, v any) error {
var err error
@@ -196,35 +308,7 @@ func (d Decoder) UnmarshalTo(data []byte, f Format, v any) error {
case TOML:
err = toml.Unmarshal(data, v)
case YAML:
err = yaml.Unmarshal(data, v)
if err != nil {
return toFileError(f, data, fmt.Errorf("failed to unmarshal YAML: %w", err))
}
// To support boolean keys, the YAML package unmarshals maps to
// map[interface{}]interface{}. Here we recurse through the result
// and change all maps to map[string]interface{} like we would've
// gotten from `json`.
var ptr any
switch vv := v.(type) {
case *map[string]any:
ptr = *vv
case *any:
ptr = *vv
default:
// Not a map.
}
if ptr != nil {
if mm, changed := stringifyMapKeys(ptr); changed {
switch vv := v.(type) {
case *map[string]any:
*vv = mm.(map[string]any)
case *any:
*vv = mm
}
}
}
return UnmarshalYaml(data, v)
case CSV:
return d.unmarshalCSV(data, v)
@@ -331,50 +415,3 @@ func (d Decoder) unmarshalORG(data []byte, v any) error {
func toFileError(f Format, data []byte, err error) error {
return herrors.NewFileErrorFromName(err, fmt.Sprintf("_stream.%s", f)).UpdateContent(bytes.NewReader(data), nil)
}
// stringifyMapKeys recurses into in and changes all instances of
// map[interface{}]interface{} to map[string]interface{}. This is useful to
// work around the impedance mismatch between JSON and YAML unmarshaling that's
// described here: https://github.com/go-yaml/yaml/issues/139
//
// Inspired by https://github.com/stripe/stripe-mock, MIT licensed
func stringifyMapKeys(in any) (any, bool) {
switch in := in.(type) {
case []any:
for i, v := range in {
if vv, replaced := stringifyMapKeys(v); replaced {
in[i] = vv
}
}
case map[string]any:
for k, v := range in {
if vv, changed := stringifyMapKeys(v); changed {
in[k] = vv
}
}
case map[any]any:
res := make(map[string]any)
var (
ok bool
err error
)
for k, v := range in {
var ks string
if ks, ok = k.(string); !ok {
ks, err = cast.ToStringE(k)
if err != nil {
ks = fmt.Sprintf("%v", k)
}
}
if vv, replaced := stringifyMapKeys(v); replaced {
res[ks] = vv
} else {
res[ks] = v
}
}
return res, true
}
return nil, false
}

View File

@@ -14,7 +14,6 @@
package metadecoders
import (
"reflect"
"testing"
qt "github.com/frankban/quicktest"
@@ -91,8 +90,8 @@ func TestUnmarshalToMap(t *testing.T) {
{`a = "b"`, TOML, expect},
{`a: "b"`, YAML, expect},
// Make sure we get all string keys, even for YAML
{"a: Easy!\nb:\n c: 2\n d: [3, 4]", YAML, map[string]any{"a": "Easy!", "b": map[string]any{"c": 2, "d": []any{3, 4}}}},
{"a:\n true: 1\n false: 2", YAML, map[string]any{"a": map[string]any{"true": 1, "false": 2}}},
{"a: Easy!\nb:\n c: 2\n d: [3, 4]", YAML, map[string]any{"a": "Easy!", "b": map[string]any{"c": uint64(2), "d": []any{uint64(3), uint64(4)}}}},
{"a:\n true: 1\n false: 2", YAML, map[string]any{"a": map[string]any{"true": uint64(1), "false": uint64(2)}}},
{`{ "a": "b" }`, JSON, expect},
{`<root><a>b</a></root>`, XML, expect},
{`#+a: b`, ORG, expect},
@@ -140,7 +139,7 @@ func TestUnmarshalToInterface(t *testing.T) {
{[]byte(`a: "b"`), YAML, expect},
{[]byte(`<root><a>b</a></root>`), XML, expect},
{[]byte(`a,b,c`), CSV, [][]string{{"a", "b", "c"}}},
{[]byte("a: Easy!\nb:\n c: 2\n d: [3, 4]"), YAML, map[string]any{"a": "Easy!", "b": map[string]any{"c": 2, "d": []any{3, 4}}}},
{[]byte("a: Easy!\nb:\n c: 2\n d: [3, 4]"), YAML, map[string]any{"a": "Easy!", "b": map[string]any{"c": uint64(2), "d": []any{uint64(3), uint64(4)}}}},
// errors
{[]byte(`a = "`), TOML, false},
} {
@@ -173,7 +172,7 @@ func TestUnmarshalStringTo(t *testing.T) {
{"32", int64(1234), int64(32)},
{"32", int(1234), int(32)},
{"3.14159", float64(1), float64(3.14159)},
{"[3,7,9]", []any{}, []any{3, 7, 9}},
{"[3,7,9]", []any{}, []any{uint64(3), uint64(7), uint64(9)}},
{"[3.1,7.2,9.3]", []any{}, []any{3.1, 7.2, 9.3}},
} {
msg := qt.Commentf("%d: %T", i, test.to)
@@ -188,126 +187,18 @@ func TestUnmarshalStringTo(t *testing.T) {
}
}
func TestStringifyYAMLMapKeys(t *testing.T) {
cases := []struct {
input any
want any
replaced bool
}{
{
map[any]any{"a": 1, "b": 2},
map[string]any{"a": 1, "b": 2},
true,
},
{
map[any]any{"a": []any{1, map[any]any{"b": 2}}},
map[string]any{"a": []any{1, map[string]any{"b": 2}}},
true,
},
{
map[any]any{true: 1, "b": false},
map[string]any{"true": 1, "b": false},
true,
},
{
map[any]any{1: "a", 2: "b"},
map[string]any{"1": "a", "2": "b"},
true,
},
{
map[any]any{"a": map[any]any{"b": 1}},
map[string]any{"a": map[string]any{"b": 1}},
true,
},
{
map[string]any{"a": map[string]any{"b": 1}},
map[string]any{"a": map[string]any{"b": 1}},
false,
},
{
[]any{map[any]any{1: "a", 2: "b"}},
[]any{map[string]any{"1": "a", "2": "b"}},
false,
},
}
func TestCalculateAliasLimit(t *testing.T) {
c := qt.New(t)
for i, c := range cases {
res, replaced := stringifyMapKeys(c.input)
const kb = 1024
if c.replaced != replaced {
t.Fatalf("[%d] Replaced mismatch: %t", i, replaced)
}
if !c.replaced {
res = c.input
}
if !reflect.DeepEqual(res, c.want) {
t.Errorf("[%d] given %q\nwant: %q\n got: %q", i, c.input, c.want, res)
}
}
}
func BenchmarkStringifyMapKeysStringsOnlyInterfaceMaps(b *testing.B) {
maps := make([]map[any]any, b.N)
for i := 0; i < b.N; i++ {
maps[i] = map[any]any{
"a": map[any]any{
"b": 32,
"c": 43,
"d": map[any]any{
"b": 32,
"c": 43,
},
},
"b": []any{"a", "b"},
"c": "d",
}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
stringifyMapKeys(maps[i])
}
}
func BenchmarkStringifyMapKeysStringsOnlyStringMaps(b *testing.B) {
m := map[string]any{
"a": map[string]any{
"b": 32,
"c": 43,
"d": map[string]any{
"b": 32,
"c": 43,
},
},
"b": []any{"a", "b"},
"c": "d",
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
stringifyMapKeys(m)
}
}
func BenchmarkStringifyMapKeysIntegers(b *testing.B) {
maps := make([]map[any]any, b.N)
for i := 0; i < b.N; i++ {
maps[i] = map[any]any{
1: map[any]any{
4: 32,
5: 43,
6: map[any]any{
7: 32,
8: 43,
},
},
2: []any{"a", "b"},
3: "d",
}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
stringifyMapKeys(maps[i])
}
c.Assert(calculateCollectionAliasLimit(0), qt.Equals, 100)
c.Assert(calculateCollectionAliasLimit(500), qt.Equals, 100)
c.Assert(calculateCollectionAliasLimit(1*kb), qt.Equals, 100)
c.Assert(calculateCollectionAliasLimit(2*kb), qt.Equals, 5000)
c.Assert(calculateCollectionAliasLimit(8*kb), qt.Equals, 5000)
c.Assert(calculateCollectionAliasLimit(12*kb), qt.Equals, 10000)
c.Assert(calculateCollectionAliasLimit(10000*kb), qt.Equals, 10000)
}
func BenchmarkDecodeYAMLToMap(b *testing.B) {
@@ -332,3 +223,61 @@ c: "d"
}
}
}
func BenchmarkUnmarshalBillionLaughs(b *testing.B) {
yamlBillionLaughs := []byte(`
a: &a [_, _, _, _, _, _, _, _, _, _, _, _, _, _, _]
b: &b [*a, *a, *a, *a, *a, *a, *a, *a, *a, *a]
c: &c [*b, *b, *b, *b, *b, *b, *b, *b, *b, *b]
d: &d [*c, *c, *c, *c, *c, *c, *c, *c, *c, *c]
e: &e [*d, *d, *d, *d, *d, *d, *d, *d, *d, *d]
f: &f [*e, *e, *e, *e, *e, *e, *e, *e, *e, *e]
g: &g [*f, *f, *f, *f, *f, *f, *f, *f, *f, *f]
h: &h [*g, *g, *g, *g, *g, *g, *g, *g, *g, *g]
i: &i [*h, *h, *h, *h, *h, *h, *h, *h, *h, *h]
`)
yamlFrontMatter := []byte(`
title: mysect
tags: [tag1, tag2]
params:
color: blue
`)
yamlTests := []struct {
Title string
Content []byte
IsExpectedToFailValidation bool
}{
{"Billion Laughs", yamlBillionLaughs, true},
{"YAML Front Matter", yamlFrontMatter, false},
}
for _, tt := range yamlTests {
b.Run(tt.Title+" no validation", func(b *testing.B) {
for range b.N {
var v any
if err := unmarshalYamlNoValidation(tt.Content, &v); err != nil {
b.Fatal(err)
}
}
})
b.Run(tt.Title+" with validation", func(b *testing.B) {
for range b.N {
var v any
err := UnmarshalYaml(tt.Content, &v)
if tt.IsExpectedToFailValidation {
if err == nil {
b.Fatal("expected to fail validation but did not")
}
} else {
if err != nil {
b.Fatal(err)
}
}
}
})
}
}

View File

@@ -0,0 +1,27 @@
// Copyright 2025 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metadecoders
import yaml "github.com/goccy/go-yaml"
var yamlEncodeOptions = []yaml.EncodeOption{
yaml.UseSingleQuote(true),
// This prevents excessively large objects, see https://github.com/goccy/go-yaml/issues/461
yaml.WithSmartAnchor(),
}
// MarshalYAML marshals the given value to YAML.
var MarshalYAML = func(v any) ([]byte, error) {
return yaml.MarshalWithOptions(v, yamlEncodeOptions...)
}

View File

@@ -8,7 +8,7 @@ waitServer
httpget ${HUGOTEST_BASEURL_0}p1/ 'Title: P1'
replace $WORK/content/p1/index.md 'title:' 'titlecolon'
httpget ${HUGOTEST_BASEURL_0}p1/ 'failed'
httpget ${HUGOTEST_BASEURL_0}p1/ 'Error'
replace $WORK/content/p1/index.md 'titlecolon' 'title:'
httpget ${HUGOTEST_BASEURL_0}p1/ 'Title: P1'

View File

@@ -19,8 +19,6 @@ import (
"fmt"
"io"
gyaml "github.com/ghodss/yaml"
kopenapi3 "github.com/getkin/kin-openapi/openapi3"
"github.com/gohugoio/hugo/cache/dynacache"
"github.com/gohugoio/hugo/deps"
@@ -80,7 +78,7 @@ func (ns *Namespace) Unmarshal(r resource.UnmarshableResource) (*OpenAPIDocument
s := &kopenapi3.T{}
switch f {
case metadecoders.YAML:
err = gyaml.Unmarshal(b, s)
err = metadecoders.UnmarshalYaml(b, s)
default:
err = metadecoders.Default.UnmarshalTo(b, f, s)
}

View File

@@ -201,3 +201,73 @@ a = "b"
c.Assert(err, qt.Not(qt.IsNil))
})
}
func TestRemarshaBillionLaughs(t *testing.T) {
t.Parallel()
yamlBillionLaughs := `
a: &a [_, _, _, _, _, _, _, _, _, _, _, _, _, _, _]
b: &b [*a, *a, *a, *a, *a, *a, *a, *a, *a, *a]
c: &c [*b, *b, *b, *b, *b, *b, *b, *b, *b, *b]
d: &d [*c, *c, *c, *c, *c, *c, *c, *c, *c, *c]
e: &e [*d, *d, *d, *d, *d, *d, *d, *d, *d, *d]
f: &f [*e, *e, *e, *e, *e, *e, *e, *e, *e, *e]
g: &g [*f, *f, *f, *f, *f, *f, *f, *f, *f, *f]
h: &h [*g, *g, *g, *g, *g, *g, *g, *g, *g, *g]
i: &i [*h, *h, *h, *h, *h, *h, *h, *h, *h, *h]
`
yamlMillionLaughs := `
a: &a [_, _, _, _, _, _, _, _, _, _, _, _, _, _, _]
b: &b [*a, *a, *a, *a, *a, *a, *a, *a, *a, *a]
c: &c [*b, *b, *b, *b, *b, *b, *b, *b, *b, *b]
d: &d [*c, *c, *c, *c, *c, *c, *c, *c, *c, *c]
e: &e [*d, *d, *d, *d, *d, *d, *d, *d, *d, *d]
f: &f [*e, *e, *e, *e, *e, *e, *e, *e, *e, *e]
`
yamlTenThousandLaughs := `
a: &a [_, _, _, _, _, _, _, _, _, _, _, _, _, _, _]
b: &b [*a, *a, *a, *a, *a, *a, *a, *a, *a, *a]
c: &c [*b, *b, *b, *b, *b, *b, *b, *b, *b, *b]
d: &d [*c, *c, *c, *c, *c, *c, *c, *c, *c, *c]
`
yamlThousandLaughs := `
a: &a [_, _, _, _, _, _, _, _, _, _, _, _, _, _, _]
b: &b [*a, *a, *a, *a, *a, *a, *a, *a, *a, *a]
c: &c [*b, *b, *b, *b, *b, *b, *b, *b, *b, *b]
`
b := hugolib.NewIntegrationTestBuilder(
hugolib.IntegrationTestConfig{T: t},
).Build()
ns := transform.New(b.H.Deps)
for _, test := range []struct {
name string
data string
}{
{"10k", yamlTenThousandLaughs},
{"1M", yamlMillionLaughs},
{"1B", yamlBillionLaughs},
} {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
c := qt.New(t)
_, err := ns.Remarshal("json", test.data)
c.Assert(err, qt.Not(qt.IsNil))
})
}
// Thousand laughs should be ok.
// It produces about 29KB of JSON,
// which is still a large output for such a large input,
// but there may be use cases for this.
_, err := ns.Remarshal("json", yamlThousandLaughs)
c := qt.New(t)
c.Assert(err, qt.IsNil)
}

View File

@@ -591,3 +591,31 @@ outputs: ["html", "markdown"]
// There are some white space differences, so we cannot do an exact match.
b.AssertFileContent("public/p1/index.md", markdown)
}
// See https://github.com/goccy/go-yaml/issues/461
func TestUnmarshalExcessiveYAMLStructureShouldFail(t *testing.T) {
t.Parallel()
files := `
-- hugo.toml --
disableKinds = ['page','rss','section','sitemap','taxonomy','term']
-- assets/ddos.yaml --
a: &a [_, _, _, _, _, _, _, _, _, _, _, _, _, _, _]
b: &b [*a, *a, *a, *a, *a, *a, *a, *a, *a, *a]
c: &c [*b, *b, *b, *b, *b, *b, *b, *b, *b, *b]
d: &d [*c, *c, *c, *c, *c, *c, *c, *c, *c, *c]
e: &e [*d, *d, *d, *d, *d, *d, *d, *d, *d, *d]
f: &f [*e, *e, *e, *e, *e, *e, *e, *e, *e, *e]
g: &g [*f, *f, *f, *f, *f, *f, *f, *f, *f, *f]
h: &h [*g, *g, *g, *g, *g, *g, *g, *g, *g, *g]
i: &i [*h, *h, *h, *h, *h, *h, *h, *h, *h, *h]
-- layouts/home.html --
{{ $m := resources.Get "ddos.yaml" | transform.Unmarshal }}
{{ printf "Length: %d" (len $m) }}
`
b, err := hugolib.TestE(t, files)
b.Assert(err, qt.IsNotNil)
b.Assert(err.Error(), qt.Contains, "too many YAML aliases for non-scalar nodes")
}