URI: 
       Lazy calculate  WordCount, ReadingTime and FuzzyWordCount - hugo - [fork] hugo port for 9front
  HTML git clone git@git.drkhsh.at/hugo.git
   DIR Log
   DIR Files
   DIR Refs
   DIR Submodules
   DIR README
   DIR LICENSE
       ---
   DIR commit dd45e6d7e5406991d8df3a2f9ba4c7e5ae039c34
   DIR parent 4abaec5c045e92ae5f8b3a2dc66606b080ef6ea5
  HTML Author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
       Date:   Wed, 17 Aug 2016 13:41:48 +0200
       
       Lazy calculate  WordCount, ReadingTime and FuzzyWordCount
       
       This avoids having to execute these expensive operations for sites not using these values.
       
       This commit sums up a set of wordcounting and autosummary related performance improvements.
       
       The effect of these kind of depends on what features your site use, but a benchmark from 4 Hugo sites in the wild shows promise:
       
       ```
       benchmark           old ns/op       new ns/op       delta
       BenchmarkHugo-4     21293005843     20032857342     -5.92%
       
       benchmark           old allocs     new allocs     delta
       BenchmarkHugo-4     65290922       65186032       -0.16%
       
       benchmark           old bytes      new bytes      delta
       BenchmarkHugo-4     9771213416     9681866464     -0.91%
       ```
       
       Closes #2378
       
       Diffstat:
         M helpers/content.go                  |      21 +++++++++++++++------
         M helpers/content_test.go             |      13 ++++++++++++-
         M hugolib/page.go                     |      61 +++++++++++++++++++++----------
         M hugolib/pageSort_test.go            |      10 +++++-----
         M hugolib/page_test.go                |      40 ++++++++++++++++----------------
         M hugolib/pagination_test.go          |      10 +++++-----
         M hugolib/site_test.go                |       5 +++++
       
       7 files changed, 103 insertions(+), 57 deletions(-)
       ---
   DIR diff --git a/helpers/content.go b/helpers/content.go
       @@ -138,19 +138,28 @@ func StripHTML(s string) string {
                // Walk through the string removing all tags
                b := bp.GetBuffer()
                defer bp.PutBuffer(b)
       -
       -        inTag := false
       +        var inTag, isSpace, wasSpace bool
                for _, r := range s {
       -                switch r {
       -                case '<':
       +                if !inTag {
       +                        isSpace = false
       +                }
       +
       +                switch {
       +                case r == '<':
                                inTag = true
       -                case '>':
       +                case r == '>':
                                inTag = false
       +                case unicode.IsSpace(r):
       +                        isSpace = true
       +                        fallthrough
                        default:
       -                        if !inTag {
       +                        if !inTag && (!isSpace || (isSpace && !wasSpace)) {
                                        b.WriteRune(r)
                                }
                        }
       +
       +                wasSpace = isSpace
       +
                }
                return b.String()
        }
   DIR diff --git a/helpers/content_test.go b/helpers/content_test.go
       @@ -34,11 +34,22 @@ func TestStripHTML(t *testing.T) {
                }
                data := []test{
                        {"<h1>strip h1 tag <h1>", "strip h1 tag "},
       -                {"<p> strip p tag </p>", " strip p tag \n"},
       +                {"<p> strip p tag </p>", " strip p tag "},
                        {"</br> strip br<br>", " strip br\n"},
                        {"</br> strip br2<br />", " strip br2\n"},
                        {"This <strong>is</strong> a\nnewline", "This is a newline"},
                        {"No Tags", "No Tags"},
       +                {`<p>Summary Next Line. 
       +<figure >
       +    
       +        <img src="/not/real" />
       +    
       +    
       +</figure>
       +.
       +More text here.</p>
       +
       +<p>Some more text</p>`, "Summary Next Line.  . More text here.\nSome more text\n"},
                }
                for i, d := range data {
                        output := StripHTML(d.input)
   DIR diff --git a/hugolib/page.go b/hugolib/page.go
       @@ -107,9 +107,10 @@ type Source struct {
                source.File
        }
        type PageMeta struct {
       -        WordCount      int
       -        FuzzyWordCount int
       -        ReadingTime    int
       +        wordCount      int
       +        fuzzyWordCount int
       +        readingTime    int
       +        pageMetaInit   sync.Once
                Weight         int
        }
        
       @@ -485,28 +486,48 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {
                return int64(len(p.rawContent)), nil
        }
        
       +func (p *Page) WordCount() int {
       +        p.analyzePage()
       +        return p.wordCount
       +}
       +
       +func (p *Page) ReadingTime() int {
       +        p.analyzePage()
       +        return p.readingTime
       +}
       +
       +func (p *Page) FuzzyWordCount() int {
       +        p.analyzePage()
       +        return p.fuzzyWordCount
       +}
       +
        func (p *Page) analyzePage() {
       -        if p.isCJKLanguage {
       -                p.WordCount = 0
       -                for _, word := range p.PlainWords() {
       -                        runeCount := utf8.RuneCountInString(word)
       -                        if len(word) == runeCount {
       -                                p.WordCount++
       -                        } else {
       -                                p.WordCount += runeCount
       +        p.pageMetaInit.Do(func() {
       +                if p.isCJKLanguage {
       +                        p.wordCount = 0
       +                        for _, word := range p.PlainWords() {
       +                                runeCount := utf8.RuneCountInString(word)
       +                                if len(word) == runeCount {
       +                                        p.wordCount++
       +                                } else {
       +                                        p.wordCount += runeCount
       +                                }
                                }
       +                } else {
       +                        p.wordCount = helpers.TotalWords(p.Plain())
                        }
       -        } else {
       -                p.WordCount = len(p.PlainWords())
       -        }
        
       -        p.FuzzyWordCount = (p.WordCount + 100) / 100 * 100
       +                // TODO(bep) is set in a test. Fix that.
       +                if p.fuzzyWordCount == 0 {
       +                        p.fuzzyWordCount = (p.wordCount + 100) / 100 * 100
       +                }
        
       -        if p.isCJKLanguage {
       -                p.ReadingTime = (p.WordCount + 500) / 501
       -        } else {
       -                p.ReadingTime = (p.WordCount + 212) / 213
       -        }
       +                if p.isCJKLanguage {
       +                        p.readingTime = (p.wordCount + 500) / 501
       +                } else {
       +                        p.readingTime = (p.wordCount + 212) / 213
       +                }
       +        })
        }
        
        func (p *Page) permalink() (*url.URL, error) {
   DIR diff --git a/hugolib/pageSort_test.go b/hugolib/pageSort_test.go
       @@ -95,11 +95,11 @@ func TestLimit(t *testing.T) {
        
        func TestPageSortReverse(t *testing.T) {
                p1 := createSortTestPages(10)
       -        assert.Equal(t, 0, p1[0].FuzzyWordCount)
       -        assert.Equal(t, 9, p1[9].FuzzyWordCount)
       +        assert.Equal(t, 0, p1[0].fuzzyWordCount)
       +        assert.Equal(t, 9, p1[9].fuzzyWordCount)
                p2 := p1.Reverse()
       -        assert.Equal(t, 9, p2[0].FuzzyWordCount)
       -        assert.Equal(t, 0, p2[9].FuzzyWordCount)
       +        assert.Equal(t, 9, p2[0].fuzzyWordCount)
       +        assert.Equal(t, 0, p2[9].fuzzyWordCount)
                // cached
                assert.True(t, probablyEqualPages(p2, p1.Reverse()))
        }
       @@ -149,7 +149,7 @@ func createSortTestPages(num int) Pages {
                        if i%2 == 0 {
                                w = 10
                        }
       -                pages[i].FuzzyWordCount = i
       +                pages[i].fuzzyWordCount = i
                        pages[i].Weight = w
                        pages[i].Description = "initial"
                }
   DIR diff --git a/hugolib/page_test.go b/hugolib/page_test.go
       @@ -504,10 +504,13 @@ func checkPageContent(t *testing.T, page *Page, content string, msg ...interface
        }
        
        func normalizeContent(c string) string {
       -        norm := strings.Replace(c, "\n", "", -1)
       +        norm := c
       +        norm = strings.Replace(norm, "\n", " ", -1)
                norm = strings.Replace(norm, "    ", " ", -1)
                norm = strings.Replace(norm, "   ", " ", -1)
                norm = strings.Replace(norm, "  ", " ", -1)
       +        norm = strings.Replace(norm, "p> ", "p>", -1)
       +        norm = strings.Replace(norm, ">  <", "> <", -1)
                return strings.TrimSpace(norm)
        }
        
       @@ -710,8 +713,8 @@ func TestPageWithShortCodeInSummary(t *testing.T) {
        
                assertFunc := func(t *testing.T, ext string, p *Page) {
                        checkPageTitle(t, p, "Simple")
       -                checkPageContent(t, p, normalizeExpected(ext, "<p>Summary Next Line. <figure > <img src=\"/not/real\" /> </figure>.\nMore text here.</p><p>Some more text</p>"), ext)
       -                checkPageSummary(t, p, "Summary Next Line. . More text here. Some more text", ext)
       +                checkPageContent(t, p, normalizeExpected(ext, "<p>Summary Next Line. \n<figure >\n    \n        <img src=\"/not/real\" />\n    \n    \n</figure>\n.\nMore text here.</p>\n\n<p>Some more text</p>\n"))
       +                checkPageSummary(t, p, "Summary Next Line.  . More text here. Some more text")
                        checkPageType(t, p, "page")
                        checkPageLayout(t, p, "page/single.html", "_default/single.html", "theme/page/single.html", "theme/_default/single.html")
                }
       @@ -793,8 +796,8 @@ func TestWordCountWithAllCJKRunesWithoutHasCJKLanguage(t *testing.T) {
                testCommonResetState()
        
                assertFunc := func(t *testing.T, ext string, p *Page) {
       -                if p.WordCount != 8 {
       -                        t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 8, p.WordCount)
       +                if p.WordCount() != 8 {
       +                        t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 8, p.WordCount())
                        }
                }
        
       @@ -806,11 +809,10 @@ func TestWordCountWithAllCJKRunesHasCJKLanguage(t *testing.T) {
                viper.Set("HasCJKLanguage", true)
        
                assertFunc := func(t *testing.T, ext string, p *Page) {
       -                if p.WordCount != 15 {
       -                        t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 15, p.WordCount)
       +                if p.WordCount() != 15 {
       +                        t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 15, p.WordCount())
                        }
                }
       -
                testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithAllCJKRunes)
        }
        
       @@ -820,15 +822,14 @@ func TestWordCountWithMainEnglishWithCJKRunes(t *testing.T) {
                viper.Set("HasCJKLanguage", true)
        
                assertFunc := func(t *testing.T, ext string, p *Page) {
       -                if p.WordCount != 74 {
       -                        t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount)
       +                if p.WordCount() != 74 {
       +                        t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount())
                        }
        
                        if p.Summary != simplePageWithMainEnglishWithCJKRunesSummary {
                                t.Fatalf("[%s] incorrect Summary for content '%s'. expected %v, got %v", ext, p.plain,
                                        simplePageWithMainEnglishWithCJKRunesSummary, p.Summary)
                        }
       -
                }
        
                testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithMainEnglishWithCJKRunes)
       @@ -839,15 +840,14 @@ func TestWordCountWithIsCJKLanguageFalse(t *testing.T) {
                viper.Set("HasCJKLanguage", true)
        
                assertFunc := func(t *testing.T, ext string, p *Page) {
       -                if p.WordCount != 75 {
       -                        t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount)
       +                if p.WordCount() != 75 {
       +                        t.Fatalf("[%s] incorrect word count for content '%s'. expected %v, got %v", ext, p.plain, 74, p.WordCount())
                        }
        
                        if p.Summary != simplePageWithIsCJKLanguageFalseSummary {
                                t.Fatalf("[%s] incorrect Summary for content '%s'. expected %v, got %v", ext, p.plain,
                                        simplePageWithIsCJKLanguageFalseSummary, p.Summary)
                        }
       -
                }
        
                testAllMarkdownEnginesForPage(t, assertFunc, "simple", simplePageWithIsCJKLanguageFalse)
       @@ -857,16 +857,16 @@ func TestWordCountWithIsCJKLanguageFalse(t *testing.T) {
        func TestWordCount(t *testing.T) {
        
                assertFunc := func(t *testing.T, ext string, p *Page) {
       -                if p.WordCount != 483 {
       -                        t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 483, p.WordCount)
       +                if p.WordCount() != 483 {
       +                        t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 483, p.WordCount())
                        }
        
       -                if p.FuzzyWordCount != 500 {
       -                        t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 500, p.WordCount)
       +                if p.FuzzyWordCount() != 500 {
       +                        t.Fatalf("[%s] incorrect word count. expected %v, got %v", ext, 500, p.WordCount())
                        }
        
       -                if p.ReadingTime != 3 {
       -                        t.Fatalf("[%s] incorrect min read. expected %v, got %v", ext, 3, p.ReadingTime)
       +                if p.ReadingTime() != 3 {
       +                        t.Fatalf("[%s] incorrect min read. expected %v, got %v", ext, 3, p.ReadingTime())
                        }
        
                        checkTruncation(t, p, true, "long page")
   DIR diff --git a/hugolib/pagination_test.go b/hugolib/pagination_test.go
       @@ -55,7 +55,7 @@ func TestSplitPageGroups(t *testing.T) {
                                // first group 10 in weight
                                assert.Equal(t, 10, pg.Key)
                                for _, p := range pg.Pages {
       -                                assert.True(t, p.FuzzyWordCount%2 == 0) // magic test
       +                                assert.True(t, p.fuzzyWordCount%2 == 0) // magic test
                                }
                        }
                } else {
       @@ -70,7 +70,7 @@ func TestSplitPageGroups(t *testing.T) {
                                // last should have 5 in weight
                                assert.Equal(t, 5, pg.Key)
                                for _, p := range pg.Pages {
       -                                assert.True(t, p.FuzzyWordCount%2 != 0) // magic test
       +                                assert.True(t, p.fuzzyWordCount%2 != 0) // magic test
                                }
                        }
                } else {
       @@ -443,10 +443,10 @@ func TestPage(t *testing.T) {
                page21, _ := f2.page(1)
                page2Nil, _ := f2.page(3)
        
       -        assert.Equal(t, 1, page11.FuzzyWordCount)
       +        assert.Equal(t, 3, page11.fuzzyWordCount)
                assert.Nil(t, page1Nil)
        
       -        assert.Equal(t, 1, page21.FuzzyWordCount)
       +        assert.Equal(t, 3, page21.fuzzyWordCount)
                assert.Nil(t, page2Nil)
        }
        
       @@ -468,7 +468,7 @@ func createTestPages(num int) Pages {
                        if i%2 == 0 {
                                w = 10
                        }
       -                pages[i].FuzzyWordCount = i
       +                pages[i].fuzzyWordCount = i + 2
                        pages[i].Weight = w
                }
        
   DIR diff --git a/hugolib/site_test.go b/hugolib/site_test.go
       @@ -33,6 +33,11 @@ import (
                "github.com/stretchr/testify/require"
        )
        
       +func init() {
       +        //There are expected ERROR logging in tests that produces a lot of noise.
       +        jww.SetStdoutThreshold(jww.LevelCritical)
       +}
       +
        const (
                pageSimpleTitle = `---
        title: simple template