decoder.go - hugo - [fork] hugo port for 9front
HTML git clone https://git.drkhsh.at/hugo.git
DIR Log
DIR Files
DIR Refs
DIR Submodules
DIR README
DIR LICENSE
---
decoder.go (9839B)
---
1 // Copyright 2018 The Hugo Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 package metadecoders
15
16 import (
17 "bytes"
18 "encoding/csv"
19 "encoding/json"
20 "fmt"
21 "log"
22 "regexp"
23 "strconv"
24 "strings"
25
26 "github.com/gohugoio/hugo/common/herrors"
27 "github.com/gohugoio/hugo/common/maps"
28 "github.com/niklasfasching/go-org/org"
29
30 xml "github.com/clbanning/mxj/v2"
31 toml "github.com/pelletier/go-toml/v2"
32 "github.com/spf13/afero"
33 "github.com/spf13/cast"
34 yaml "gopkg.in/yaml.v2"
35 )
36
37 // Decoder provides some configuration options for the decoders.
38 type Decoder struct {
39 // Delimiter is the field delimiter. Used in the CSV decoder. Default is
40 // ','.
41 Delimiter rune
42
43 // Comment, if not 0, is the comment character. Lines beginning with the
44 // Comment character without preceding whitespace are ignored. Used in the
45 // CSV decoder.
46 Comment rune
47
48 // If true, a quote may appear in an unquoted field and a non-doubled quote
49 // may appear in a quoted field. Used in the CSV decoder. Default is false.
50 LazyQuotes bool
51
52 // The target data type, either slice or map. Used in the CSV decoder.
53 // Default is slice.
54 TargetType string
55 }
56
57 // OptionsKey is used in cache keys.
58 func (d Decoder) OptionsKey() string {
59 var sb strings.Builder
60 sb.WriteRune(d.Delimiter)
61 sb.WriteRune(d.Comment)
62 sb.WriteString(strconv.FormatBool(d.LazyQuotes))
63 sb.WriteString(d.TargetType)
64 return sb.String()
65 }
66
67 // Default is a Decoder in its default configuration.
68 var Default = Decoder{
69 Delimiter: ',',
70 TargetType: "slice",
71 }
72
73 // UnmarshalToMap will unmarshall data in format f into a new map. This is
74 // what's needed for Hugo's front matter decoding.
75 func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]any, error) {
76 m := make(map[string]any)
77 if data == nil {
78 return m, nil
79 }
80
81 err := d.UnmarshalTo(data, f, &m)
82
83 return m, err
84 }
85
86 // UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from
87 // the given filename.
88 func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]any, error) {
89 format := FormatFromString(filename)
90 if format == "" {
91 return nil, fmt.Errorf("%q is not a valid configuration format", filename)
92 }
93
94 data, err := afero.ReadFile(fs, filename)
95 if err != nil {
96 return nil, err
97 }
98 return d.UnmarshalToMap(data, format)
99 }
100
101 // UnmarshalStringTo tries to unmarshal data to a new instance of type typ.
102 func (d Decoder) UnmarshalStringTo(data string, typ any) (any, error) {
103 data = strings.TrimSpace(data)
104 // We only check for the possible types in YAML, JSON and TOML.
105 switch typ.(type) {
106 case string:
107 return data, nil
108 case map[string]any, maps.Params:
109 format := d.FormatFromContentString(data)
110 return d.UnmarshalToMap([]byte(data), format)
111 case []any:
112 // A standalone slice. Let YAML handle it.
113 return d.Unmarshal([]byte(data), YAML)
114 case bool:
115 return cast.ToBoolE(data)
116 case int:
117 return cast.ToIntE(data)
118 case int64:
119 return cast.ToInt64E(data)
120 case float64:
121 return cast.ToFloat64E(data)
122 default:
123 return nil, fmt.Errorf("unmarshal: %T not supported", typ)
124 }
125 }
126
127 // Unmarshal will unmarshall data in format f into an interface{}.
128 // This is what's needed for Hugo's /data handling.
129 func (d Decoder) Unmarshal(data []byte, f Format) (any, error) {
130 if len(data) == 0 {
131 switch f {
132 case CSV:
133 switch d.TargetType {
134 case "map":
135 return make(map[string]any), nil
136 case "slice":
137 return make([][]string, 0), nil
138 default:
139 return nil, fmt.Errorf("invalid targetType: expected either slice or map, received %s", d.TargetType)
140 }
141 default:
142 return make(map[string]any), nil
143 }
144 }
145 var v any
146 err := d.UnmarshalTo(data, f, &v)
147
148 return v, err
149 }
150
151 // UnmarshalTo unmarshals data in format f into v.
152 func (d Decoder) UnmarshalTo(data []byte, f Format, v any) error {
153 var err error
154
155 switch f {
156 case ORG:
157 err = d.unmarshalORG(data, v)
158 case JSON:
159 err = json.Unmarshal(data, v)
160 case XML:
161 var xmlRoot xml.Map
162 xmlRoot, err = xml.NewMapXml(data)
163
164 var xmlValue map[string]any
165 if err == nil {
166 xmlRootName, err := xmlRoot.Root()
167 if err != nil {
168 return toFileError(f, data, fmt.Errorf("failed to unmarshal XML: %w", err))
169 }
170
171 // Get the root value and verify it's a map
172 rootValue := xmlRoot[xmlRootName]
173 if rootValue == nil {
174 return toFileError(f, data, fmt.Errorf("XML root element '%s' has no value", xmlRootName))
175 }
176
177 // Type check before conversion
178 mapValue, ok := rootValue.(map[string]any)
179 if !ok {
180 return toFileError(f, data, fmt.Errorf("XML root element '%s' must be a map/object, got %T", xmlRootName, rootValue))
181 }
182 xmlValue = mapValue
183 }
184
185 switch v := v.(type) {
186 case *map[string]any:
187 *v = xmlValue
188 case *any:
189 *v = xmlValue
190 }
191 case TOML:
192 err = toml.Unmarshal(data, v)
193 case YAML:
194 err = yaml.Unmarshal(data, v)
195 if err != nil {
196 return toFileError(f, data, fmt.Errorf("failed to unmarshal YAML: %w", err))
197 }
198
199 // To support boolean keys, the YAML package unmarshals maps to
200 // map[interface{}]interface{}. Here we recurse through the result
201 // and change all maps to map[string]interface{} like we would've
202 // gotten from `json`.
203 var ptr any
204 switch vv := v.(type) {
205 case *map[string]any:
206 ptr = *vv
207 case *any:
208 ptr = *vv
209 default:
210 // Not a map.
211 }
212
213 if ptr != nil {
214 if mm, changed := stringifyMapKeys(ptr); changed {
215 switch vv := v.(type) {
216 case *map[string]any:
217 *vv = mm.(map[string]any)
218 case *any:
219 *vv = mm
220 }
221 }
222 }
223 case CSV:
224 return d.unmarshalCSV(data, v)
225
226 default:
227 return fmt.Errorf("unmarshal of format %q is not supported", f)
228 }
229
230 if err == nil {
231 return nil
232 }
233
234 return toFileError(f, data, fmt.Errorf("unmarshal failed: %w", err))
235 }
236
237 func (d Decoder) unmarshalCSV(data []byte, v any) error {
238 r := csv.NewReader(bytes.NewReader(data))
239 r.Comma = d.Delimiter
240 r.Comment = d.Comment
241 r.LazyQuotes = d.LazyQuotes
242
243 records, err := r.ReadAll()
244 if err != nil {
245 return err
246 }
247
248 switch vv := v.(type) {
249 case *any:
250 switch d.TargetType {
251 case "map":
252 if len(records) < 2 {
253 return fmt.Errorf("cannot unmarshal CSV into %T: expected at least a header row and one data row", v)
254 }
255
256 seen := make(map[string]bool, len(records[0]))
257 for _, fieldName := range records[0] {
258 if seen[fieldName] {
259 return fmt.Errorf("cannot unmarshal CSV into %T: header row contains duplicate field names", v)
260 }
261 seen[fieldName] = true
262 }
263
264 sm := make([]map[string]string, len(records)-1)
265 for i, record := range records[1:] {
266 m := make(map[string]string, len(records[0]))
267 for j, col := range record {
268 m[records[0][j]] = col
269 }
270 sm[i] = m
271 }
272 *vv = sm
273 case "slice":
274 *vv = records
275 default:
276 return fmt.Errorf("cannot unmarshal CSV into %T: invalid targetType: expected either slice or map, received %s", v, d.TargetType)
277 }
278 default:
279 return fmt.Errorf("cannot unmarshal CSV into %T", v)
280 }
281
282 return nil
283 }
284
285 func parseORGDate(s string) string {
286 r := regexp.MustCompile(`[<\[](\d{4}-\d{2}-\d{2}) .*[>\]]`)
287 if m := r.FindStringSubmatch(s); m != nil {
288 return m[1]
289 }
290 return s
291 }
292
293 func (d Decoder) unmarshalORG(data []byte, v any) error {
294 config := org.New()
295 config.Log = log.Default() // TODO(bep)
296 document := config.Parse(bytes.NewReader(data), "")
297 if document.Error != nil {
298 return document.Error
299 }
300 frontMatter := make(map[string]any, len(document.BufferSettings))
301 for k, v := range document.BufferSettings {
302 k = strings.ToLower(k)
303 if strings.HasSuffix(k, "[]") {
304 frontMatter[k[:len(k)-2]] = strings.Fields(v)
305 } else if strings.Contains(v, "\n") {
306 frontMatter[k] = strings.Split(v, "\n")
307 } else if k == "filetags" {
308 trimmed := strings.TrimPrefix(v, ":")
309 trimmed = strings.TrimSuffix(trimmed, ":")
310 frontMatter[k] = strings.Split(trimmed, ":")
311 } else if k == "date" || k == "lastmod" || k == "publishdate" || k == "expirydate" {
312 frontMatter[k] = parseORGDate(v)
313 } else {
314 frontMatter[k] = v
315 }
316 }
317 switch vv := v.(type) {
318 case *map[string]any:
319 *vv = frontMatter
320 case *any:
321 *vv = frontMatter
322 }
323 return nil
324 }
325
326 func toFileError(f Format, data []byte, err error) error {
327 return herrors.NewFileErrorFromName(err, fmt.Sprintf("_stream.%s", f)).UpdateContent(bytes.NewReader(data), nil)
328 }
329
330 // stringifyMapKeys recurses into in and changes all instances of
331 // map[interface{}]interface{} to map[string]interface{}. This is useful to
332 // work around the impedance mismatch between JSON and YAML unmarshaling that's
333 // described here: https://github.com/go-yaml/yaml/issues/139
334 //
335 // Inspired by https://github.com/stripe/stripe-mock, MIT licensed
336 func stringifyMapKeys(in any) (any, bool) {
337 switch in := in.(type) {
338 case []any:
339 for i, v := range in {
340 if vv, replaced := stringifyMapKeys(v); replaced {
341 in[i] = vv
342 }
343 }
344 case map[string]any:
345 for k, v := range in {
346 if vv, changed := stringifyMapKeys(v); changed {
347 in[k] = vv
348 }
349 }
350 case map[any]any:
351 res := make(map[string]any)
352 var (
353 ok bool
354 err error
355 )
356 for k, v := range in {
357 var ks string
358
359 if ks, ok = k.(string); !ok {
360 ks, err = cast.ToStringE(k)
361 if err != nil {
362 ks = fmt.Sprintf("%v", k)
363 }
364 }
365 if vv, replaced := stringifyMapKeys(v); replaced {
366 res[ks] = vv
367 } else {
368 res[ks] = v
369 }
370 }
371 return res, true
372 }
373
374 return nil, false
375 }