URI: 
       Make the HTML collector parsing more robust - hugo - [fork] hugo port for 9front
  HTML git clone git@git.drkhsh.at/hugo.git
   DIR Log
   DIR Files
   DIR Refs
   DIR Submodules
   DIR README
   DIR LICENSE
       ---
   DIR commit d33a7ebcc16e804f1db0dc1f1edad4d9f9e816ef
   DIR parent 2a364cca6487e7ecc1982c8ade2927d18c592cee
  HTML Author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
       Date:   Mon,  6 Feb 2023 17:29:12 +0100
       
       Make the HTML collector parsing more robust
       
       Most notably better handling self-closing elements
       
       Closes #10698
       
       Diffstat:
         M publisher/htmlElementsCollector.go  |      17 +++++++++++++----
         M publisher/htmlElementsCollector_te… |       4 ++++
       
       2 files changed, 17 insertions(+), 4 deletions(-)
       ---
   DIR diff --git a/publisher/htmlElementsCollector.go b/publisher/htmlElementsCollector.go
       @@ -294,9 +294,10 @@ func htmlLexElementStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc 
                        }
        
                        tagName := w.buff.Bytes()[1:]
       +                isSelfClosing := tagName[len(tagName)-1] == '/'
        
                        switch {
       -                case skipInnerElementRe.Match(tagName):
       +                case !isSelfClosing && skipInnerElementRe.Match(tagName):
                                // pre, script etc. We collect classes etc. on the surrounding
                                // element, but skip the inner content.
                                w.backup()
       @@ -432,10 +433,18 @@ func parseStartTag(s string) string {
                })
        
                if spaceIndex == -1 {
       -                return s[1 : len(s)-1]
       +                s = s[1 : len(s)-1]
       +        } else {
       +                s = s[1:spaceIndex]
                }
        
       -        return s[1:spaceIndex]
       +        if s[len(s)-1] == '/' {
       +                // Self closing.
       +                s = s[:len(s)-1]
       +        }
       +
       +        return s
       +
        }
        
        // isClosedByTag reports whether b ends with a closing tag for tagName.
       @@ -487,7 +496,7 @@ LOOP:
                        }
                }
        
       -        if state != 2 {
       +        if state != 2 || lo >= hi {
                        return false
                }
        
   DIR diff --git a/publisher/htmlElementsCollector_test.go b/publisher/htmlElementsCollector_test.go
       @@ -110,6 +110,9 @@ func TestClassCollector(t *testing.T) {
                        {"DOCTYPE should beskipped", `<!DOCTYPE html>`, f("", "", "")},
                        {"Comments should be skipped", `<!-- example comment -->`, f("", "", "")},
                        {"Comments with elements before and after", `<div></div><!-- example comment --><span><span>`, f("div span", "", "")},
       +                {"Self closing tag", `<div><hr/></div>`, f("div hr", "", "")},
       +                // svg with self closing style tag.
       +                {"SVG with self closing style tag", `<svg><style/><g><path class="foo"/></g></svg>`, f("g path style svg", "foo", "")},
                        // Issue #8530
                        {"Comment with single quote", `<!-- Hero Area Image d'accueil --><i class="foo">`, f("i", "foo", "")},
                        {"Uppercase tags", `<DIV></DIV>`, f("div", "", "")},
       @@ -174,6 +177,7 @@ func TestEndsWithTag(t *testing.T) {
                        {"match space", "foo<  / div>", "div", true},
                        {"match space 2", "foo<  / div   \n>", "div", true},
                        {"match case", "foo</DIV>", "div", true},
       +                {"self closing", `</defs><g><g><path fill="#010101" d=asdf"/>`, "div", false},
                } {
                        c.Run(test.name, func(c *qt.C) {
                                got := isClosedByTag([]byte(test.s), []byte(test.tagName))