hugo/parser/metadecoders/decoder.go

312 lines
7.7 KiB
Go
Raw Normal View History

// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metadecoders
import (
"bytes"
"encoding/csv"
"encoding/json"
"fmt"
"regexp"
"strings"
"github.com/gohugoio/hugo/common/herrors"
"github.com/niklasfasching/go-org/org"
xml "github.com/clbanning/mxj/v2"
toml "github.com/pelletier/go-toml/v2"
Add /config dir support This commit adds support for a configuration directory (default `config`). The different pieces in this puzzle are: * A new `--environment` (or `-e`) flag. This can also be set with the `HUGO_ENVIRONMENT` OS environment variable. The value for `environment` defaults to `production` when running `hugo` and `development` when running `hugo server`. You can set it to any value you want (e.g. `hugo server -e "Sensible Environment"`), but as it is used to load configuration from the file system, the letter case may be important. You can get this value in your templates with `{{ hugo.Environment }}`. * A new `--configDir` flag (defaults to `config` below your project). This can also be set with `HUGO_CONFIGDIR` OS environment variable. If the `configDir` exists, the configuration files will be read and merged on top of each other from left to right; the right-most value will win on duplicates. Given the example tree below: If `environment` is `production`, the left-most `config.toml` would be the one directly below the project (this can now be omitted if you want), and then `_default/config.toml` and finally `production/config.toml`. And since these will be merged, you can just provide the environment specific configuration setting in you production config, e.g. `enableGitInfo = true`. The order within the directories will be lexical (`config.toml` and then `params.toml`). ```bash config ├── _default │   ├── config.toml │   ├── languages.toml │   ├── menus │   │   ├── menus.en.toml │   │   └── menus.zh.toml │   └── params.toml ├── development │   └── params.toml └── production ├── config.toml └── params.toml ``` Some configuration maps support the language code in the filename (e.g. `menus.en.toml`): `menus` (`menu` also works) and `params`. Also note that the only folders with "a meaning" in the above listing is the top level directories below `config`. The `menus` sub folder is just added for better organization. We use `TOML` in the example above, but Hugo also supports `JSON` and `YAML` as configuration formats. These can be mixed. Fixes #5422
2018-11-15 03:28:02 -05:00
"github.com/spf13/afero"
"github.com/spf13/cast"
jww "github.com/spf13/jwalterweatherman"
yaml "gopkg.in/yaml.v2"
)
// Decoder provides some configuration options for the decoders.
type Decoder struct {
// Delimiter is the field delimiter used in the CSV decoder. It defaults to ','.
Delimiter rune
// Comment, if not 0, is the comment character ued in the CSV decoder. Lines beginning with the
// Comment character without preceding whitespace are ignored.
Comment rune
}
// OptionsKey is used in cache keys.
func (d Decoder) OptionsKey() string {
var sb strings.Builder
sb.WriteRune(d.Delimiter)
sb.WriteRune(d.Comment)
return sb.String()
}
// Default is a Decoder in its default configuration.
var Default = Decoder{
Delimiter: ',',
}
// UnmarshalToMap will unmarshall data in format f into a new map. This is
// what's needed for Hugo's front matter decoding.
func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]any, error) {
m := make(map[string]any)
if data == nil {
return m, nil
}
err := d.UnmarshalTo(data, f, &m)
return m, err
Add /config dir support This commit adds support for a configuration directory (default `config`). The different pieces in this puzzle are: * A new `--environment` (or `-e`) flag. This can also be set with the `HUGO_ENVIRONMENT` OS environment variable. The value for `environment` defaults to `production` when running `hugo` and `development` when running `hugo server`. You can set it to any value you want (e.g. `hugo server -e "Sensible Environment"`), but as it is used to load configuration from the file system, the letter case may be important. You can get this value in your templates with `{{ hugo.Environment }}`. * A new `--configDir` flag (defaults to `config` below your project). This can also be set with `HUGO_CONFIGDIR` OS environment variable. If the `configDir` exists, the configuration files will be read and merged on top of each other from left to right; the right-most value will win on duplicates. Given the example tree below: If `environment` is `production`, the left-most `config.toml` would be the one directly below the project (this can now be omitted if you want), and then `_default/config.toml` and finally `production/config.toml`. And since these will be merged, you can just provide the environment specific configuration setting in you production config, e.g. `enableGitInfo = true`. The order within the directories will be lexical (`config.toml` and then `params.toml`). ```bash config ├── _default │   ├── config.toml │   ├── languages.toml │   ├── menus │   │   ├── menus.en.toml │   │   └── menus.zh.toml │   └── params.toml ├── development │   └── params.toml └── production ├── config.toml └── params.toml ``` Some configuration maps support the language code in the filename (e.g. `menus.en.toml`): `menus` (`menu` also works) and `params`. Also note that the only folders with "a meaning" in the above listing is the top level directories below `config`. The `menus` sub folder is just added for better organization. We use `TOML` in the example above, but Hugo also supports `JSON` and `YAML` as configuration formats. These can be mixed. Fixes #5422
2018-11-15 03:28:02 -05:00
}
// UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from
// the given filename.
func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]any, error) {
Add /config dir support This commit adds support for a configuration directory (default `config`). The different pieces in this puzzle are: * A new `--environment` (or `-e`) flag. This can also be set with the `HUGO_ENVIRONMENT` OS environment variable. The value for `environment` defaults to `production` when running `hugo` and `development` when running `hugo server`. You can set it to any value you want (e.g. `hugo server -e "Sensible Environment"`), but as it is used to load configuration from the file system, the letter case may be important. You can get this value in your templates with `{{ hugo.Environment }}`. * A new `--configDir` flag (defaults to `config` below your project). This can also be set with `HUGO_CONFIGDIR` OS environment variable. If the `configDir` exists, the configuration files will be read and merged on top of each other from left to right; the right-most value will win on duplicates. Given the example tree below: If `environment` is `production`, the left-most `config.toml` would be the one directly below the project (this can now be omitted if you want), and then `_default/config.toml` and finally `production/config.toml`. And since these will be merged, you can just provide the environment specific configuration setting in you production config, e.g. `enableGitInfo = true`. The order within the directories will be lexical (`config.toml` and then `params.toml`). ```bash config ├── _default │   ├── config.toml │   ├── languages.toml │   ├── menus │   │   ├── menus.en.toml │   │   └── menus.zh.toml │   └── params.toml ├── development │   └── params.toml └── production ├── config.toml └── params.toml ``` Some configuration maps support the language code in the filename (e.g. `menus.en.toml`): `menus` (`menu` also works) and `params`. Also note that the only folders with "a meaning" in the above listing is the top level directories below `config`. The `menus` sub folder is just added for better organization. We use `TOML` in the example above, but Hugo also supports `JSON` and `YAML` as configuration formats. These can be mixed. Fixes #5422
2018-11-15 03:28:02 -05:00
format := FormatFromString(filename)
if format == "" {
return nil, fmt.Errorf("%q is not a valid configuration format", filename)
Add /config dir support This commit adds support for a configuration directory (default `config`). The different pieces in this puzzle are: * A new `--environment` (or `-e`) flag. This can also be set with the `HUGO_ENVIRONMENT` OS environment variable. The value for `environment` defaults to `production` when running `hugo` and `development` when running `hugo server`. You can set it to any value you want (e.g. `hugo server -e "Sensible Environment"`), but as it is used to load configuration from the file system, the letter case may be important. You can get this value in your templates with `{{ hugo.Environment }}`. * A new `--configDir` flag (defaults to `config` below your project). This can also be set with `HUGO_CONFIGDIR` OS environment variable. If the `configDir` exists, the configuration files will be read and merged on top of each other from left to right; the right-most value will win on duplicates. Given the example tree below: If `environment` is `production`, the left-most `config.toml` would be the one directly below the project (this can now be omitted if you want), and then `_default/config.toml` and finally `production/config.toml`. And since these will be merged, you can just provide the environment specific configuration setting in you production config, e.g. `enableGitInfo = true`. The order within the directories will be lexical (`config.toml` and then `params.toml`). ```bash config ├── _default │   ├── config.toml │   ├── languages.toml │   ├── menus │   │   ├── menus.en.toml │   │   └── menus.zh.toml │   └── params.toml ├── development │   └── params.toml └── production ├── config.toml └── params.toml ``` Some configuration maps support the language code in the filename (e.g. `menus.en.toml`): `menus` (`menu` also works) and `params`. Also note that the only folders with "a meaning" in the above listing is the top level directories below `config`. The `menus` sub folder is just added for better organization. We use `TOML` in the example above, but Hugo also supports `JSON` and `YAML` as configuration formats. These can be mixed. Fixes #5422
2018-11-15 03:28:02 -05:00
}
Add /config dir support This commit adds support for a configuration directory (default `config`). The different pieces in this puzzle are: * A new `--environment` (or `-e`) flag. This can also be set with the `HUGO_ENVIRONMENT` OS environment variable. The value for `environment` defaults to `production` when running `hugo` and `development` when running `hugo server`. You can set it to any value you want (e.g. `hugo server -e "Sensible Environment"`), but as it is used to load configuration from the file system, the letter case may be important. You can get this value in your templates with `{{ hugo.Environment }}`. * A new `--configDir` flag (defaults to `config` below your project). This can also be set with `HUGO_CONFIGDIR` OS environment variable. If the `configDir` exists, the configuration files will be read and merged on top of each other from left to right; the right-most value will win on duplicates. Given the example tree below: If `environment` is `production`, the left-most `config.toml` would be the one directly below the project (this can now be omitted if you want), and then `_default/config.toml` and finally `production/config.toml`. And since these will be merged, you can just provide the environment specific configuration setting in you production config, e.g. `enableGitInfo = true`. The order within the directories will be lexical (`config.toml` and then `params.toml`). ```bash config ├── _default │   ├── config.toml │   ├── languages.toml │   ├── menus │   │   ├── menus.en.toml │   │   └── menus.zh.toml │   └── params.toml ├── development │   └── params.toml └── production ├── config.toml └── params.toml ``` Some configuration maps support the language code in the filename (e.g. `menus.en.toml`): `menus` (`menu` also works) and `params`. Also note that the only folders with "a meaning" in the above listing is the top level directories below `config`. The `menus` sub folder is just added for better organization. We use `TOML` in the example above, but Hugo also supports `JSON` and `YAML` as configuration formats. These can be mixed. Fixes #5422
2018-11-15 03:28:02 -05:00
data, err := afero.ReadFile(fs, filename)
if err != nil {
return nil, err
}
return d.UnmarshalToMap(data, format)
}
// UnmarshalStringTo tries to unmarshal data to a new instance of type typ.
func (d Decoder) UnmarshalStringTo(data string, typ any) (any, error) {
data = strings.TrimSpace(data)
// We only check for the possible types in YAML, JSON and TOML.
switch typ.(type) {
case string:
return data, nil
case map[string]any:
format := d.FormatFromContentString(data)
return d.UnmarshalToMap([]byte(data), format)
case []any:
// A standalone slice. Let YAML handle it.
return d.Unmarshal([]byte(data), YAML)
case bool:
return cast.ToBoolE(data)
case int:
return cast.ToIntE(data)
case int64:
return cast.ToInt64E(data)
case float64:
return cast.ToFloat64E(data)
default:
return nil, fmt.Errorf("unmarshal: %T not supported", typ)
}
}
// Unmarshal will unmarshall data in format f into an interface{}.
// This is what's needed for Hugo's /data handling.
func (d Decoder) Unmarshal(data []byte, f Format) (any, error) {
if data == nil {
switch f {
case CSV:
return make([][]string, 0), nil
default:
return make(map[string]any), nil
}
}
var v any
err := d.UnmarshalTo(data, f, &v)
return v, err
}
// UnmarshalTo unmarshals data in format f into v.
func (d Decoder) UnmarshalTo(data []byte, f Format, v any) error {
var err error
switch f {
case ORG:
err = d.unmarshalORG(data, v)
case JSON:
err = json.Unmarshal(data, v)
case XML:
var xmlRoot xml.Map
xmlRoot, err = xml.NewMapXml(data)
var xmlValue map[string]any
if err == nil {
xmlRootName, err := xmlRoot.Root()
if err != nil {
return toFileError(f, data, fmt.Errorf("failed to unmarshal XML: %w", err))
}
xmlValue = xmlRoot[xmlRootName].(map[string]any)
}
switch v := v.(type) {
case *map[string]any:
*v = xmlValue
case *any:
*v = xmlValue
}
case TOML:
err = toml.Unmarshal(data, v)
case YAML:
err = yaml.Unmarshal(data, v)
if err != nil {
return toFileError(f, data, fmt.Errorf("failed to unmarshal YAML: %w", err))
}
// To support boolean keys, the YAML package unmarshals maps to
// map[interface{}]interface{}. Here we recurse through the result
// and change all maps to map[string]interface{} like we would've
// gotten from `json`.
var ptr any
switch v.(type) {
case *map[string]any:
ptr = *v.(*map[string]any)
case *any:
ptr = *v.(*any)
default:
// Not a map.
}
if ptr != nil {
if mm, changed := stringifyMapKeys(ptr); changed {
switch v.(type) {
case *map[string]any:
*v.(*map[string]any) = mm.(map[string]any)
case *any:
*v.(*any) = mm
}
}
}
case CSV:
return d.unmarshalCSV(data, v)
default:
return fmt.Errorf("unmarshal of format %q is not supported", f)
}
if err == nil {
return nil
}
return toFileError(f, data, fmt.Errorf("unmarshal failed: %w", err))
}
func (d Decoder) unmarshalCSV(data []byte, v any) error {
r := csv.NewReader(bytes.NewReader(data))
r.Comma = d.Delimiter
r.Comment = d.Comment
records, err := r.ReadAll()
if err != nil {
return err
}
switch v.(type) {
case *any:
*v.(*any) = records
default:
return fmt.Errorf("CSV cannot be unmarshaled into %T", v)
}
return nil
}
func parseORGDate(s string) string {
r := regexp.MustCompile(`[<\[](\d{4}-\d{2}-\d{2}) .*[>\]]`)
if m := r.FindStringSubmatch(s); m != nil {
return m[1]
}
return s
}
func (d Decoder) unmarshalORG(data []byte, v any) error {
config := org.New()
config.Log = jww.WARN
document := config.Parse(bytes.NewReader(data), "")
if document.Error != nil {
return document.Error
}
frontMatter := make(map[string]any, len(document.BufferSettings))
for k, v := range document.BufferSettings {
k = strings.ToLower(k)
if strings.HasSuffix(k, "[]") {
frontMatter[k[:len(k)-2]] = strings.Fields(v)
} else if k == "tags" || k == "categories" || k == "aliases" {
jww.WARN.Printf("Please use '#+%s[]:' notation, automatic conversion is deprecated.", k)
frontMatter[k] = strings.Fields(v)
} else if k == "date" {
frontMatter[k] = parseORGDate(v)
} else {
frontMatter[k] = v
}
}
switch v.(type) {
case *map[string]any:
*v.(*map[string]any) = frontMatter
default:
*v.(*any) = frontMatter
}
return nil
}
func toFileError(f Format, data []byte, err error) error {
2022-05-15 05:40:34 -04:00
return herrors.NewFileErrorFromName(err, fmt.Sprintf("_stream.%s", f)).UpdateContent(bytes.NewReader(data), nil)
}
// stringifyMapKeys recurses into in and changes all instances of
// map[interface{}]interface{} to map[string]interface{}. This is useful to
2018-11-13 12:28:40 -05:00
// work around the impedance mismatch between JSON and YAML unmarshaling that's
// described here: https://github.com/go-yaml/yaml/issues/139
//
// Inspired by https://github.com/stripe/stripe-mock, MIT licensed
func stringifyMapKeys(in any) (any, bool) {
switch in := in.(type) {
case []any:
for i, v := range in {
if vv, replaced := stringifyMapKeys(v); replaced {
in[i] = vv
}
}
case map[string]any:
for k, v := range in {
if vv, changed := stringifyMapKeys(v); changed {
in[k] = vv
}
}
case map[any]any:
res := make(map[string]any)
var (
ok bool
err error
)
for k, v := range in {
var ks string
if ks, ok = k.(string); !ok {
ks, err = cast.ToStringE(k)
if err != nil {
ks = fmt.Sprintf("%v", k)
}
}
if vv, replaced := stringifyMapKeys(v); replaced {
res[ks] = vv
} else {
res[ks] = v
}
}
return res, true
}
return nil, false
}