From 20cd023c0767ae803df2f06a9a7fdc6a60b7007b Mon Sep 17 00:00:00 2001 From: Jan-Lukas Else Date: Mon, 31 May 2021 20:42:59 +0200 Subject: [PATCH] Use runes instead of bytes to truncate JSON feed titles This fix avoid breaking Unicode string. It solves this error: pq: invalid byte sequence for encoding "UTF8": 0xf0 0x9f 0x9a 0x2e --- reader/json/json.go | 7 +++++-- reader/json/parser_test.go | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/reader/json/json.go b/reader/json/json.go index 18bbcaee..65b44e58 100644 --- a/reader/json/json.go +++ b/reader/json/json.go @@ -182,8 +182,11 @@ func getAuthor(author jsonAuthor) string { func truncate(str string) string { max := 100 str = strings.TrimSpace(str) - if len(str) > max { - return str[:max] + "..." + + // Convert to runes to be safe with unicode + runes := []rune(str) + if len(runes) > max { + return string(runes[:max]) + "…" } return str diff --git a/reader/json/parser_test.go b/reader/json/parser_test.go index 46bfba40..81eaf497 100644 --- a/reader/json/parser_test.go +++ b/reader/json/parser_test.go @@ -407,6 +407,41 @@ func TestParseTruncateItemTitle(t *testing.T) { if len(feed.Entries[0].Title) != 103 { t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) } + + if len([]rune(feed.Entries[0].Title)) != 101 { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } +} + +func TestParseTruncateItemTitleUnicode(t *testing.T) { + data := `{ + "version": "https://jsonfeed.org/version/1", + "title": "My Example Feed", + "home_page_url": "https://example.org/", + "feed_url": "https://example.org/feed.json", + "items": [ + { + "title": "I’m riding my electric bike and came across this castle. It’s called “Schloss Richmond”. 🚴‍♂️" + } + ] + }` + + feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if len(feed.Entries[0].Title) != 110 { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } + + if len([]rune(feed.Entries[0].Title)) != 93 { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } } func TestParseItemTitleWithXMLTags(t *testing.T) {