drkhsh.at/1/scm/hugo/file/parser/pageparser/pageparser.go.gph

  URI:

       pageparser.go - hugo - [fork] hugo port for 9front
  HTML git clone https://git.drkhsh.at/hugo.git
   DIR Log
   DIR Files
   DIR Refs
   DIR Submodules
   DIR README
   DIR LICENSE
       ---
       pageparser.go (6359B)
       ---
            1 // Copyright 2019 The Hugo Authors. All rights reserved.
            2 //
            3 // Licensed under the Apache License, Version 2.0 (the "License");
            4 // you may not use this file except in compliance with the License.
            5 // You may obtain a copy of the License at
            6 // http://www.apache.org/licenses/LICENSE-2.0
            7 //
            8 // Unless required by applicable law or agreed to in writing, software
            9 // distributed under the License is distributed on an "AS IS" BASIS,
           10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
           11 // See the License for the specific language governing permissions and
           12 // limitations under the License.
           13 
           14 package pageparser
           15 
           16 import (
           17         "bytes"
           18         "errors"
           19         "fmt"
           20         "io"
           21         "regexp"
           22         "strings"
           23 
           24         "github.com/gohugoio/hugo/parser/metadecoders"
           25 )
           26 
           27 // Result holds the parse result.
           28 type Result interface {
           29         // Iterator returns a new Iterator positioned at the beginning of the parse tree.
           30         Iterator() *Iterator
           31         // Input returns the input to Parse.
           32         Input() []byte
           33 }
           34 
           35 var _ Result = (*pageLexer)(nil)
           36 
           37 // ParseBytes parses the page in b according to the given Config.
           38 func ParseBytes(b []byte, cfg Config) (Items, error) {
           39         startLexer := lexIntroSection
           40         if cfg.NoFrontMatter {
           41                 startLexer = lexMainSection
           42         }
           43         l, err := parseBytes(b, cfg, startLexer)
           44         if err != nil {
           45                 return nil, err
           46         }
           47         return l.items, l.err
           48 }
           49 
           50 type ContentFrontMatter struct {
           51         Content           []byte
           52         FrontMatter       map[string]any
           53         FrontMatterFormat metadecoders.Format
           54 }
           55 
           56 // ParseFrontMatterAndContent is a convenience method to extract front matter
           57 // and content from a content page.
           58 func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) {
           59         var cf ContentFrontMatter
           60 
           61         input, err := io.ReadAll(r)
           62         if err != nil {
           63                 return cf, fmt.Errorf("failed to read page content: %w", err)
           64         }
           65 
           66         psr, err := ParseBytes(input, Config{})
           67         if err != nil {
           68                 return cf, err
           69         }
           70 
           71         var frontMatterSource []byte
           72 
           73         iter := NewIterator(psr)
           74 
           75         walkFn := func(item Item) bool {
           76                 if frontMatterSource != nil {
           77                         // The rest is content.
           78                         cf.Content = input[item.low:]
           79                         // Done
           80                         return false
           81                 } else if item.IsFrontMatter() {
           82                         cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type)
           83                         frontMatterSource = item.Val(input)
           84                 }
           85                 return true
           86         }
           87 
           88         iter.PeekWalk(walkFn)
           89 
           90         cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat)
           91         return cf, err
           92 }
           93 
           94 func FormatFromFrontMatterType(typ ItemType) metadecoders.Format {
           95         switch typ {
           96         case TypeFrontMatterJSON:
           97                 return metadecoders.JSON
           98         case TypeFrontMatterORG:
           99                 return metadecoders.ORG
          100         case TypeFrontMatterTOML:
          101                 return metadecoders.TOML
          102         case TypeFrontMatterYAML:
          103                 return metadecoders.YAML
          104         default:
          105                 return ""
          106         }
          107 }
          108 
          109 // ParseMain parses starting with the main section. Used in tests.
          110 func ParseMain(r io.Reader, cfg Config) (Result, error) {
          111         return parseSection(r, cfg, lexMainSection)
          112 }
          113 
          114 func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) {
          115         b, err := io.ReadAll(r)
          116         if err != nil {
          117                 return nil, fmt.Errorf("failed to read page content: %w", err)
          118         }
          119         return parseBytes(b, cfg, start)
          120 }
          121 
          122 func parseBytes(b []byte, cfg Config, start stateFunc) (*pageLexer, error) {
          123         lexer := newPageLexer(b, start, cfg)
          124         lexer.run()
          125         return lexer, nil
          126 }
          127 
          128 // NewIterator creates a new Iterator.
          129 func NewIterator(items Items) *Iterator {
          130         return &Iterator{items: items, lastPos: -1}
          131 }
          132 
          133 // An Iterator has methods to iterate a parsed page with support going back
          134 // if needed.
          135 type Iterator struct {
          136         items   Items
          137         lastPos int // position of the last item returned by nextItem
          138 }
          139 
          140 // consumes and returns the next item
          141 func (t *Iterator) Next() Item {
          142         t.lastPos++
          143         return t.Current()
          144 }
          145 
          146 var errIndexOutOfBounds = Item{Type: tError, Err: errors.New("no more tokens")}
          147 
          148 // Current will repeatably return the current item.
          149 func (t *Iterator) Current() Item {
          150         if t.lastPos >= len(t.items) {
          151                 return errIndexOutOfBounds
          152         }
          153         return t.items[t.lastPos]
          154 }
          155 
          156 // backs up one token.
          157 func (t *Iterator) Backup() {
          158         if t.lastPos < 0 {
          159                 panic("need to go forward before going back")
          160         }
          161         t.lastPos--
          162 }
          163 
          164 // Pos returns the current position in the input.
          165 func (t *Iterator) Pos() int {
          166         return t.lastPos
          167 }
          168 
          169 // check for non-error and non-EOF types coming next
          170 func (t *Iterator) IsValueNext() bool {
          171         i := t.Peek()
          172         return i.Type != tError && i.Type != tEOF
          173 }
          174 
          175 // look at, but do not consume, the next item
          176 // repeated, sequential calls will return the same item
          177 func (t *Iterator) Peek() Item {
          178         return t.items[t.lastPos+1]
          179 }
          180 
          181 // PeekWalk will feed the next items in the iterator to walkFn
          182 // until it returns false.
          183 func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
          184         for i := t.lastPos + 1; i < len(t.items); i++ {
          185                 item := t.items[i]
          186                 if !walkFn(item) {
          187                         break
          188                 }
          189         }
          190 }
          191 
          192 // Consume is a convenience method to consume the next n tokens,
          193 // but back off Errors and EOF.
          194 func (t *Iterator) Consume(cnt int) {
          195         for range cnt {
          196                 token := t.Next()
          197                 if token.Type == tError || token.Type == tEOF {
          198                         t.Backup()
          199                         break
          200                 }
          201         }
          202 }
          203 
          204 // LineNumber returns the current line number. Used for logging.
          205 func (t *Iterator) LineNumber(source []byte) int {
          206         return bytes.Count(source[:t.Current().low], lf) + 1
          207 }
          208 
          209 // IsProbablySourceOfItems returns true if the given source looks like original
          210 // source of the items.
          211 // There may be some false positives, but that is highly unlikely and good enough
          212 // for the planned purpose.
          213 // It will also return false if the last item is not EOF (error situations) and
          214 // true if both source and items are empty.
          215 func IsProbablySourceOfItems(source []byte, items Items) bool {
          216         if len(source) == 0 && len(items) == 0 {
          217                 return false
          218         }
          219         if len(items) == 0 {
          220                 return false
          221         }
          222 
          223         last := items[len(items)-1]
          224         if last.Type != tEOF {
          225                 return false
          226         }
          227 
          228         if last.Pos() != len(source) {
          229                 return false
          230         }
          231 
          232         for _, item := range items {
          233                 if item.Type == tError {
          234                         return false
          235                 }
          236                 if item.Type == tEOF {
          237                         return true
          238                 }
          239 
          240                 if item.Pos() >= len(source) {
          241                         return false
          242                 }
          243 
          244                 if item.firstByte != source[item.Pos()] {
          245                         return false
          246                 }
          247         }
          248 
          249         return true
          250 }
          251 
          252 var hasShortcodeRe = regexp.MustCompile(`{{[%,<][^\/]`)
          253 
          254 // HasShortcode returns true if the given string contains a shortcode.
          255 func HasShortcode(s string) bool {
          256         // Fast path for the common case.
          257         if !strings.Contains(s, "{{") {
          258                 return false
          259         }
          260         return hasShortcodeRe.MatchString(s)
          261 }