URI: 
       decoder.go - hugo - [fork] hugo port for 9front
  HTML git clone https://git.drkhsh.at/hugo.git
   DIR Log
   DIR Files
   DIR Refs
   DIR Submodules
   DIR README
   DIR LICENSE
       ---
       decoder.go (9839B)
       ---
            1 // Copyright 2018 The Hugo Authors. All rights reserved.
            2 //
            3 // Licensed under the Apache License, Version 2.0 (the "License");
            4 // you may not use this file except in compliance with the License.
            5 // You may obtain a copy of the License at
            6 // http://www.apache.org/licenses/LICENSE-2.0
            7 //
            8 // Unless required by applicable law or agreed to in writing, software
            9 // distributed under the License is distributed on an "AS IS" BASIS,
           10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
           11 // See the License for the specific language governing permissions and
           12 // limitations under the License.
           13 
           14 package metadecoders
           15 
           16 import (
           17         "bytes"
           18         "encoding/csv"
           19         "encoding/json"
           20         "fmt"
           21         "log"
           22         "regexp"
           23         "strconv"
           24         "strings"
           25 
           26         "github.com/gohugoio/hugo/common/herrors"
           27         "github.com/gohugoio/hugo/common/maps"
           28         "github.com/niklasfasching/go-org/org"
           29 
           30         xml "github.com/clbanning/mxj/v2"
           31         toml "github.com/pelletier/go-toml/v2"
           32         "github.com/spf13/afero"
           33         "github.com/spf13/cast"
           34         yaml "gopkg.in/yaml.v2"
           35 )
           36 
           37 // Decoder provides some configuration options for the decoders.
           38 type Decoder struct {
           39         // Delimiter is the field delimiter. Used in the CSV decoder. Default is
           40         // ','.
           41         Delimiter rune
           42 
           43         // Comment, if not 0, is the comment character. Lines beginning with the
           44         // Comment character without preceding whitespace are ignored. Used in the
           45         // CSV decoder.
           46         Comment rune
           47 
           48         // If true, a quote may appear in an unquoted field and a non-doubled quote
           49         // may appear in a quoted field. Used in the CSV decoder. Default is false.
           50         LazyQuotes bool
           51 
           52         // The target data type, either slice or map. Used in the CSV decoder.
           53         // Default is slice.
           54         TargetType string
           55 }
           56 
           57 // OptionsKey is used in cache keys.
           58 func (d Decoder) OptionsKey() string {
           59         var sb strings.Builder
           60         sb.WriteRune(d.Delimiter)
           61         sb.WriteRune(d.Comment)
           62         sb.WriteString(strconv.FormatBool(d.LazyQuotes))
           63         sb.WriteString(d.TargetType)
           64         return sb.String()
           65 }
           66 
           67 // Default is a Decoder in its default configuration.
           68 var Default = Decoder{
           69         Delimiter:  ',',
           70         TargetType: "slice",
           71 }
           72 
           73 // UnmarshalToMap will unmarshall data in format f into a new map. This is
           74 // what's needed for Hugo's front matter decoding.
           75 func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]any, error) {
           76         m := make(map[string]any)
           77         if data == nil {
           78                 return m, nil
           79         }
           80 
           81         err := d.UnmarshalTo(data, f, &m)
           82 
           83         return m, err
           84 }
           85 
           86 // UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from
           87 // the given filename.
           88 func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]any, error) {
           89         format := FormatFromString(filename)
           90         if format == "" {
           91                 return nil, fmt.Errorf("%q is not a valid configuration format", filename)
           92         }
           93 
           94         data, err := afero.ReadFile(fs, filename)
           95         if err != nil {
           96                 return nil, err
           97         }
           98         return d.UnmarshalToMap(data, format)
           99 }
          100 
          101 // UnmarshalStringTo tries to unmarshal data to a new instance of type typ.
          102 func (d Decoder) UnmarshalStringTo(data string, typ any) (any, error) {
          103         data = strings.TrimSpace(data)
          104         // We only check for the possible types in YAML, JSON and TOML.
          105         switch typ.(type) {
          106         case string:
          107                 return data, nil
          108         case map[string]any, maps.Params:
          109                 format := d.FormatFromContentString(data)
          110                 return d.UnmarshalToMap([]byte(data), format)
          111         case []any:
          112                 // A standalone slice. Let YAML handle it.
          113                 return d.Unmarshal([]byte(data), YAML)
          114         case bool:
          115                 return cast.ToBoolE(data)
          116         case int:
          117                 return cast.ToIntE(data)
          118         case int64:
          119                 return cast.ToInt64E(data)
          120         case float64:
          121                 return cast.ToFloat64E(data)
          122         default:
          123                 return nil, fmt.Errorf("unmarshal: %T not supported", typ)
          124         }
          125 }
          126 
          127 // Unmarshal will unmarshall data in format f into an interface{}.
          128 // This is what's needed for Hugo's /data handling.
          129 func (d Decoder) Unmarshal(data []byte, f Format) (any, error) {
          130         if len(data) == 0 {
          131                 switch f {
          132                 case CSV:
          133                         switch d.TargetType {
          134                         case "map":
          135                                 return make(map[string]any), nil
          136                         case "slice":
          137                                 return make([][]string, 0), nil
          138                         default:
          139                                 return nil, fmt.Errorf("invalid targetType: expected either slice or map, received %s", d.TargetType)
          140                         }
          141                 default:
          142                         return make(map[string]any), nil
          143                 }
          144         }
          145         var v any
          146         err := d.UnmarshalTo(data, f, &v)
          147 
          148         return v, err
          149 }
          150 
          151 // UnmarshalTo unmarshals data in format f into v.
          152 func (d Decoder) UnmarshalTo(data []byte, f Format, v any) error {
          153         var err error
          154 
          155         switch f {
          156         case ORG:
          157                 err = d.unmarshalORG(data, v)
          158         case JSON:
          159                 err = json.Unmarshal(data, v)
          160         case XML:
          161                 var xmlRoot xml.Map
          162                 xmlRoot, err = xml.NewMapXml(data)
          163 
          164                 var xmlValue map[string]any
          165                 if err == nil {
          166                         xmlRootName, err := xmlRoot.Root()
          167                         if err != nil {
          168                                 return toFileError(f, data, fmt.Errorf("failed to unmarshal XML: %w", err))
          169                         }
          170 
          171                         // Get the root value and verify it's a map
          172                         rootValue := xmlRoot[xmlRootName]
          173                         if rootValue == nil {
          174                                 return toFileError(f, data, fmt.Errorf("XML root element '%s' has no value", xmlRootName))
          175                         }
          176 
          177                         // Type check before conversion
          178                         mapValue, ok := rootValue.(map[string]any)
          179                         if !ok {
          180                                 return toFileError(f, data, fmt.Errorf("XML root element '%s' must be a map/object, got %T", xmlRootName, rootValue))
          181                         }
          182                         xmlValue = mapValue
          183                 }
          184 
          185                 switch v := v.(type) {
          186                 case *map[string]any:
          187                         *v = xmlValue
          188                 case *any:
          189                         *v = xmlValue
          190                 }
          191         case TOML:
          192                 err = toml.Unmarshal(data, v)
          193         case YAML:
          194                 err = yaml.Unmarshal(data, v)
          195                 if err != nil {
          196                         return toFileError(f, data, fmt.Errorf("failed to unmarshal YAML: %w", err))
          197                 }
          198 
          199                 // To support boolean keys, the YAML package unmarshals maps to
          200                 // map[interface{}]interface{}. Here we recurse through the result
          201                 // and change all maps to map[string]interface{} like we would've
          202                 // gotten from `json`.
          203                 var ptr any
          204                 switch vv := v.(type) {
          205                 case *map[string]any:
          206                         ptr = *vv
          207                 case *any:
          208                         ptr = *vv
          209                 default:
          210                         // Not a map.
          211                 }
          212 
          213                 if ptr != nil {
          214                         if mm, changed := stringifyMapKeys(ptr); changed {
          215                                 switch vv := v.(type) {
          216                                 case *map[string]any:
          217                                         *vv = mm.(map[string]any)
          218                                 case *any:
          219                                         *vv = mm
          220                                 }
          221                         }
          222                 }
          223         case CSV:
          224                 return d.unmarshalCSV(data, v)
          225 
          226         default:
          227                 return fmt.Errorf("unmarshal of format %q is not supported", f)
          228         }
          229 
          230         if err == nil {
          231                 return nil
          232         }
          233 
          234         return toFileError(f, data, fmt.Errorf("unmarshal failed: %w", err))
          235 }
          236 
          237 func (d Decoder) unmarshalCSV(data []byte, v any) error {
          238         r := csv.NewReader(bytes.NewReader(data))
          239         r.Comma = d.Delimiter
          240         r.Comment = d.Comment
          241         r.LazyQuotes = d.LazyQuotes
          242 
          243         records, err := r.ReadAll()
          244         if err != nil {
          245                 return err
          246         }
          247 
          248         switch vv := v.(type) {
          249         case *any:
          250                 switch d.TargetType {
          251                 case "map":
          252                         if len(records) < 2 {
          253                                 return fmt.Errorf("cannot unmarshal CSV into %T: expected at least a header row and one data row", v)
          254                         }
          255 
          256                         seen := make(map[string]bool, len(records[0]))
          257                         for _, fieldName := range records[0] {
          258                                 if seen[fieldName] {
          259                                         return fmt.Errorf("cannot unmarshal CSV into %T: header row contains duplicate field names", v)
          260                                 }
          261                                 seen[fieldName] = true
          262                         }
          263 
          264                         sm := make([]map[string]string, len(records)-1)
          265                         for i, record := range records[1:] {
          266                                 m := make(map[string]string, len(records[0]))
          267                                 for j, col := range record {
          268                                         m[records[0][j]] = col
          269                                 }
          270                                 sm[i] = m
          271                         }
          272                         *vv = sm
          273                 case "slice":
          274                         *vv = records
          275                 default:
          276                         return fmt.Errorf("cannot unmarshal CSV into %T: invalid targetType: expected either slice or map, received %s", v, d.TargetType)
          277                 }
          278         default:
          279                 return fmt.Errorf("cannot unmarshal CSV into %T", v)
          280         }
          281 
          282         return nil
          283 }
          284 
          285 func parseORGDate(s string) string {
          286         r := regexp.MustCompile(`[<\[](\d{4}-\d{2}-\d{2}) .*[>\]]`)
          287         if m := r.FindStringSubmatch(s); m != nil {
          288                 return m[1]
          289         }
          290         return s
          291 }
          292 
          293 func (d Decoder) unmarshalORG(data []byte, v any) error {
          294         config := org.New()
          295         config.Log = log.Default() // TODO(bep)
          296         document := config.Parse(bytes.NewReader(data), "")
          297         if document.Error != nil {
          298                 return document.Error
          299         }
          300         frontMatter := make(map[string]any, len(document.BufferSettings))
          301         for k, v := range document.BufferSettings {
          302                 k = strings.ToLower(k)
          303                 if strings.HasSuffix(k, "[]") {
          304                         frontMatter[k[:len(k)-2]] = strings.Fields(v)
          305                 } else if strings.Contains(v, "\n") {
          306                         frontMatter[k] = strings.Split(v, "\n")
          307                 } else if k == "filetags" {
          308                         trimmed := strings.TrimPrefix(v, ":")
          309                         trimmed = strings.TrimSuffix(trimmed, ":")
          310                         frontMatter[k] = strings.Split(trimmed, ":")
          311                 } else if k == "date" || k == "lastmod" || k == "publishdate" || k == "expirydate" {
          312                         frontMatter[k] = parseORGDate(v)
          313                 } else {
          314                         frontMatter[k] = v
          315                 }
          316         }
          317         switch vv := v.(type) {
          318         case *map[string]any:
          319                 *vv = frontMatter
          320         case *any:
          321                 *vv = frontMatter
          322         }
          323         return nil
          324 }
          325 
          326 func toFileError(f Format, data []byte, err error) error {
          327         return herrors.NewFileErrorFromName(err, fmt.Sprintf("_stream.%s", f)).UpdateContent(bytes.NewReader(data), nil)
          328 }
          329 
          330 // stringifyMapKeys recurses into in and changes all instances of
          331 // map[interface{}]interface{} to map[string]interface{}. This is useful to
          332 // work around the impedance mismatch between JSON and YAML unmarshaling that's
          333 // described here: https://github.com/go-yaml/yaml/issues/139
          334 //
          335 // Inspired by https://github.com/stripe/stripe-mock, MIT licensed
          336 func stringifyMapKeys(in any) (any, bool) {
          337         switch in := in.(type) {
          338         case []any:
          339                 for i, v := range in {
          340                         if vv, replaced := stringifyMapKeys(v); replaced {
          341                                 in[i] = vv
          342                         }
          343                 }
          344         case map[string]any:
          345                 for k, v := range in {
          346                         if vv, changed := stringifyMapKeys(v); changed {
          347                                 in[k] = vv
          348                         }
          349                 }
          350         case map[any]any:
          351                 res := make(map[string]any)
          352                 var (
          353                         ok  bool
          354                         err error
          355                 )
          356                 for k, v := range in {
          357                         var ks string
          358 
          359                         if ks, ok = k.(string); !ok {
          360                                 ks, err = cast.ToStringE(k)
          361                                 if err != nil {
          362                                         ks = fmt.Sprintf("%v", k)
          363                                 }
          364                         }
          365                         if vv, replaced := stringifyMapKeys(v); replaced {
          366                                 res[ks] = vv
          367                         } else {
          368                                 res[ks] = v
          369                         }
          370                 }
          371                 return res, true
          372         }
          373 
          374         return nil, false
          375 }