URI: 
       Fix slow HTML elements collector for the pre case - hugo - [fork] hugo port for 9front
  HTML git clone git@git.drkhsh.at/hugo.git
   DIR Log
   DIR Files
   DIR Refs
   DIR Submodules
   DIR README
   DIR LICENSE
       ---
   DIR commit f9fc0e045bc1f72ba61fdf4a79b10a75a240394e
   DIR parent 4f4a1c00bfdc385c5afda9dcc1f259b1f9956991
  HTML Author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
       Date:   Sun,  5 Feb 2023 15:14:30 +0100
       
       Fix slow HTML elements collector for the pre case
       
       ```
       name                           old time/op    new time/op    delta
       ElementsCollectorWriterPre-10    25.2µs ± 1%     3.4µs ± 0%  -86.54%  (p=0.029 n=4+4)
       
       name                           old alloc/op   new alloc/op   delta
       ElementsCollectorWriterPre-10      624B ± 0%      142B ± 0%  -77.18%  (p=0.029 n=4+4)
       
       name                           old allocs/op  new allocs/op  delta
       ElementsCollectorWriterPre-10      16.0 ± 0%       6.0 ± 0%  -62.50%  (p=0.029 n=4+4)
       ```
       
       Fixes #10698
       
       Diffstat:
         M publisher/htmlElementsCollector.go  |      73 +++++++++++++++++++++++++++----
         M publisher/htmlElementsCollector_te… |      28 ++++++++++++++++++++++++++++
       
       2 files changed, 93 insertions(+), 8 deletions(-)
       ---
   DIR diff --git a/publisher/htmlElementsCollector.go b/publisher/htmlElementsCollector.go
       @@ -36,7 +36,6 @@ var (
        
                skipInnerElementRe = regexp.MustCompile(`(?i)^(pre|textarea|script|style)`)
                skipAllElementRe   = regexp.MustCompile(`(?i)^!DOCTYPE`)
       -        endTagRe           = regexp.MustCompile(`(?i)<\/\s*([a-zA-Z]+)\s*>$`)
        
                exceptionList = map[string]bool{
                        "thead": true,
       @@ -312,11 +311,7 @@ func htmlLexElementStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc 
                                                        if w.r != '>' {
                                                                return false
                                                        }
       -                                                m := endTagRe.FindSubmatch(w.buff.Bytes())
       -                                                if m == nil {
       -                                                        return false
       -                                                }
       -                                                return bytes.EqualFold(m[1], tagNameCopy)
       +                                                return isClosedByTag(w.buff.Bytes(), tagNameCopy)
                                                },
                                                htmlLexStart,
                                        ))
       @@ -428,8 +423,9 @@ func parseHTMLElement(elStr string) (el htmlElement, err error) {
        }
        
        // Variants of s
       -//    <body class="b a">
       -//    <div>
       +//
       +//        <body class="b a">
       +//        <div>
        func parseStartTag(s string) string {
                spaceIndex := strings.IndexFunc(s, func(r rune) bool {
                        return unicode.IsSpace(r)
       @@ -441,3 +437,64 @@ func parseStartTag(s string) string {
        
                return s[1:spaceIndex]
        }
       +
       +// isClosedByTag reports whether b ends with a closing tag for tagName.
       +func isClosedByTag(b, tagName []byte) bool {
       +        if len(b) == 0 {
       +                return false
       +        }
       +
       +        if b[len(b)-1] != '>' {
       +                return false
       +        }
       +
       +        var (
       +                lo int
       +                hi int
       +
       +                state  int
       +                inWord bool
       +        )
       +
       +LOOP:
       +        for i := len(b) - 2; i >= 0; i-- {
       +                switch {
       +                case b[i] == '<':
       +                        if state != 1 {
       +                                return false
       +                        }
       +                        state = 2
       +                        break LOOP
       +                case b[i] == '/':
       +                        if state != 0 {
       +                                return false
       +                        }
       +                        state++
       +                        if inWord {
       +                                lo = i + 1
       +                                inWord = false
       +                        }
       +                case isSpace(b[i]):
       +                        if inWord {
       +                                lo = i + 1
       +                                inWord = false
       +                        }
       +                default:
       +                        if !inWord {
       +                                hi = i + 1
       +                                inWord = true
       +                        }
       +                }
       +        }
       +
       +        if state != 2 {
       +                return false
       +        }
       +
       +        return bytes.EqualFold(tagName, b[lo:hi])
       +
       +}
       +
       +func isSpace(b byte) bool {
       +        return b == ' ' || b == '\t' || b == '\n'
       +}
   DIR diff --git a/publisher/htmlElementsCollector_test.go b/publisher/htmlElementsCollector_test.go
       @@ -155,6 +155,34 @@ func TestClassCollector(t *testing.T) {
        
        }
        
       +func TestEndsWithTag(t *testing.T) {
       +        c := qt.New((t))
       +
       +        for _, test := range []struct {
       +                name    string
       +                s       string
       +                tagName string
       +                expect  bool
       +        }{
       +                {"empty", "", "div", false},
       +                {"no match", "foo", "div", false},
       +                {"no close", "foo<div>", "div", false},
       +                {"no close 2", "foo/div>", "div", false},
       +                {"no close 2", "foo//div>", "div", false},
       +                {"no tag", "foo</>", "div", false},
       +                {"match", "foo</div>", "div", true},
       +                {"match space", "foo<  / div>", "div", true},
       +                {"match space 2", "foo<  / div   \n>", "div", true},
       +                {"match case", "foo</DIV>", "div", true},
       +        } {
       +                c.Run(test.name, func(c *qt.C) {
       +                        got := isClosedByTag([]byte(test.s), []byte(test.tagName))
       +                        c.Assert(got, qt.Equals, test.expect)
       +                })
       +        }
       +
       +}
       +
        func BenchmarkElementsCollectorWriter(b *testing.B) {
                const benchHTML = `
        <!DOCTYPE html>