pageparser.go - hugo - [fork] hugo port for 9front
HTML git clone https://git.drkhsh.at/hugo.git
DIR Log
DIR Files
DIR Refs
DIR Submodules
DIR README
DIR LICENSE
---
pageparser.go (6359B)
---
1 // Copyright 2019 The Hugo Authors. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13
14 package pageparser
15
16 import (
17 "bytes"
18 "errors"
19 "fmt"
20 "io"
21 "regexp"
22 "strings"
23
24 "github.com/gohugoio/hugo/parser/metadecoders"
25 )
26
27 // Result holds the parse result.
28 type Result interface {
29 // Iterator returns a new Iterator positioned at the beginning of the parse tree.
30 Iterator() *Iterator
31 // Input returns the input to Parse.
32 Input() []byte
33 }
34
35 var _ Result = (*pageLexer)(nil)
36
37 // ParseBytes parses the page in b according to the given Config.
38 func ParseBytes(b []byte, cfg Config) (Items, error) {
39 startLexer := lexIntroSection
40 if cfg.NoFrontMatter {
41 startLexer = lexMainSection
42 }
43 l, err := parseBytes(b, cfg, startLexer)
44 if err != nil {
45 return nil, err
46 }
47 return l.items, l.err
48 }
49
50 type ContentFrontMatter struct {
51 Content []byte
52 FrontMatter map[string]any
53 FrontMatterFormat metadecoders.Format
54 }
55
56 // ParseFrontMatterAndContent is a convenience method to extract front matter
57 // and content from a content page.
58 func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) {
59 var cf ContentFrontMatter
60
61 input, err := io.ReadAll(r)
62 if err != nil {
63 return cf, fmt.Errorf("failed to read page content: %w", err)
64 }
65
66 psr, err := ParseBytes(input, Config{})
67 if err != nil {
68 return cf, err
69 }
70
71 var frontMatterSource []byte
72
73 iter := NewIterator(psr)
74
75 walkFn := func(item Item) bool {
76 if frontMatterSource != nil {
77 // The rest is content.
78 cf.Content = input[item.low:]
79 // Done
80 return false
81 } else if item.IsFrontMatter() {
82 cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type)
83 frontMatterSource = item.Val(input)
84 }
85 return true
86 }
87
88 iter.PeekWalk(walkFn)
89
90 cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat)
91 return cf, err
92 }
93
94 func FormatFromFrontMatterType(typ ItemType) metadecoders.Format {
95 switch typ {
96 case TypeFrontMatterJSON:
97 return metadecoders.JSON
98 case TypeFrontMatterORG:
99 return metadecoders.ORG
100 case TypeFrontMatterTOML:
101 return metadecoders.TOML
102 case TypeFrontMatterYAML:
103 return metadecoders.YAML
104 default:
105 return ""
106 }
107 }
108
109 // ParseMain parses starting with the main section. Used in tests.
110 func ParseMain(r io.Reader, cfg Config) (Result, error) {
111 return parseSection(r, cfg, lexMainSection)
112 }
113
114 func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) {
115 b, err := io.ReadAll(r)
116 if err != nil {
117 return nil, fmt.Errorf("failed to read page content: %w", err)
118 }
119 return parseBytes(b, cfg, start)
120 }
121
122 func parseBytes(b []byte, cfg Config, start stateFunc) (*pageLexer, error) {
123 lexer := newPageLexer(b, start, cfg)
124 lexer.run()
125 return lexer, nil
126 }
127
128 // NewIterator creates a new Iterator.
129 func NewIterator(items Items) *Iterator {
130 return &Iterator{items: items, lastPos: -1}
131 }
132
133 // An Iterator has methods to iterate a parsed page with support going back
134 // if needed.
135 type Iterator struct {
136 items Items
137 lastPos int // position of the last item returned by nextItem
138 }
139
140 // consumes and returns the next item
141 func (t *Iterator) Next() Item {
142 t.lastPos++
143 return t.Current()
144 }
145
146 var errIndexOutOfBounds = Item{Type: tError, Err: errors.New("no more tokens")}
147
148 // Current will repeatably return the current item.
149 func (t *Iterator) Current() Item {
150 if t.lastPos >= len(t.items) {
151 return errIndexOutOfBounds
152 }
153 return t.items[t.lastPos]
154 }
155
156 // backs up one token.
157 func (t *Iterator) Backup() {
158 if t.lastPos < 0 {
159 panic("need to go forward before going back")
160 }
161 t.lastPos--
162 }
163
164 // Pos returns the current position in the input.
165 func (t *Iterator) Pos() int {
166 return t.lastPos
167 }
168
169 // check for non-error and non-EOF types coming next
170 func (t *Iterator) IsValueNext() bool {
171 i := t.Peek()
172 return i.Type != tError && i.Type != tEOF
173 }
174
175 // look at, but do not consume, the next item
176 // repeated, sequential calls will return the same item
177 func (t *Iterator) Peek() Item {
178 return t.items[t.lastPos+1]
179 }
180
181 // PeekWalk will feed the next items in the iterator to walkFn
182 // until it returns false.
183 func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
184 for i := t.lastPos + 1; i < len(t.items); i++ {
185 item := t.items[i]
186 if !walkFn(item) {
187 break
188 }
189 }
190 }
191
192 // Consume is a convenience method to consume the next n tokens,
193 // but back off Errors and EOF.
194 func (t *Iterator) Consume(cnt int) {
195 for range cnt {
196 token := t.Next()
197 if token.Type == tError || token.Type == tEOF {
198 t.Backup()
199 break
200 }
201 }
202 }
203
204 // LineNumber returns the current line number. Used for logging.
205 func (t *Iterator) LineNumber(source []byte) int {
206 return bytes.Count(source[:t.Current().low], lf) + 1
207 }
208
209 // IsProbablySourceOfItems returns true if the given source looks like original
210 // source of the items.
211 // There may be some false positives, but that is highly unlikely and good enough
212 // for the planned purpose.
213 // It will also return false if the last item is not EOF (error situations) and
214 // true if both source and items are empty.
215 func IsProbablySourceOfItems(source []byte, items Items) bool {
216 if len(source) == 0 && len(items) == 0 {
217 return false
218 }
219 if len(items) == 0 {
220 return false
221 }
222
223 last := items[len(items)-1]
224 if last.Type != tEOF {
225 return false
226 }
227
228 if last.Pos() != len(source) {
229 return false
230 }
231
232 for _, item := range items {
233 if item.Type == tError {
234 return false
235 }
236 if item.Type == tEOF {
237 return true
238 }
239
240 if item.Pos() >= len(source) {
241 return false
242 }
243
244 if item.firstByte != source[item.Pos()] {
245 return false
246 }
247 }
248
249 return true
250 }
251
252 var hasShortcodeRe = regexp.MustCompile(`{{[%,<][^\/]`)
253
254 // HasShortcode returns true if the given string contains a shortcode.
255 func HasShortcode(s string) bool {
256 // Fast path for the common case.
257 if !strings.Contains(s, "{{") {
258 return false
259 }
260 return hasShortcodeRe.MatchString(s)
261 }