Use runes instead of bytes to truncate JSON feed titles

This fix avoid breaking Unicode string. 

It solves this error:

pq: invalid byte sequence for encoding "UTF8": 0xf0 0x9f 0x9a 0x2e
This commit is contained in:
Jan-Lukas Else 2021-05-31 20:42:59 +02:00 committed by GitHub
parent 1655ca235d
commit 20cd023c07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 2 deletions

View File

@ -182,8 +182,11 @@ func getAuthor(author jsonAuthor) string {
func truncate(str string) string {
max := 100
str = strings.TrimSpace(str)
if len(str) > max {
return str[:max] + "..."
// Convert to runes to be safe with unicode
runes := []rune(str)
if len(runes) > max {
return string(runes[:max]) + "…"
}
return str

View File

@ -407,6 +407,41 @@ func TestParseTruncateItemTitle(t *testing.T) {
if len(feed.Entries[0].Title) != 103 {
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
}
if len([]rune(feed.Entries[0].Title)) != 101 {
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
}
}
func TestParseTruncateItemTitleUnicode(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"items": [
{
"title": "Im riding my electric bike and came across this castle. Its called “Schloss Richmond”. 🚴‍♂️"
}
]
}`
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Title) != 110 {
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
}
if len([]rune(feed.Entries[0].Title)) != 93 {
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
}
}
func TestParseItemTitleWithXMLTags(t *testing.T) {