tpl: Fix countwords to handle special chars - hugo - [fork] hugo port for 9front
HTML git clone git@git.drkhsh.at/hugo.git
DIR Log
DIR Files
DIR Refs
DIR Submodules
DIR README
DIR LICENSE
---
DIR commit 7a2c10ae60f096dacee4b44e0c8ae0a1b66ae033
DIR parent e1c328df2590164becc150de842f69292abe557a
HTML Author: Julien Midedji <Julien.Midedji@gmail.com>
Date: Mon, 3 May 2021 09:10:06 +0200
tpl: Fix countwords to handle special chars
Fixes #8479
Diffstat:
M tpl/strings/strings.go | 10 ++++++++++
M tpl/strings/strings_test.go | 3 +++
2 files changed, 13 insertions(+), 0 deletions(-)
---
DIR diff --git a/tpl/strings/strings.go b/tpl/strings/strings.go
@@ -17,6 +17,7 @@ package strings
import (
"errors"
"html/template"
+ "regexp"
"strings"
"unicode/utf8"
@@ -75,6 +76,15 @@ func (ns *Namespace) CountWords(s interface{}) (int, error) {
return 0, _errors.Wrap(err, "Failed to convert content to string")
}
+ isCJKLanguage, err := regexp.MatchString(`\p{Han}|\p{Hangul}|\p{Hiragana}|\p{Katakana}`, ss)
+ if err != nil {
+ return 0, _errors.Wrap(err, "Failed to match regex pattern against string")
+ }
+
+ if !isCJKLanguage {
+ return len(strings.Fields(helpers.StripHTML((ss)))), nil
+ }
+
counter := 0
for _, word := range strings.Fields(helpers.StripHTML(ss)) {
runeCount := utf8.RuneCountInString(word)
DIR diff --git a/tpl/strings/strings_test.go b/tpl/strings/strings_test.go
@@ -210,6 +210,9 @@ func TestCountWords(t *testing.T) {
{"Do Be Do Be Do", 5},
{"旁边", 2},
{`<div class="test">旁边</div>`, 2},
+ {"Here's to you...", 3},
+ {"Here’s to you...", 3},
+ {"Here’s to you…", 3},
// errors
{tstNoStringer{}, false},
} {