Handle atom feed with space around CDATA

Trim space around CDATA elements before extracting the CharData.

This problem was discovered when reading https://www.sethvargo.com/feed.xml.
Title and Summary fields have newlines and space between the <title>
element and the CDATA element. e.g.

  <title>
    <![CDATA[Entry title here]]>
  </title>

This meant the title of the feed was coming into MiniFlux as,
  <![CDATA[Entry title here]]>
This commit is contained in:
Adrian Smith 2022-01-17 21:31:11 +00:00 committed by Frédéric Guillot
parent 7b0a4a7803
commit cc3e65dd3c
2 changed files with 15 additions and 2 deletions

View File

@ -229,10 +229,9 @@ type atom10Text struct {
func (a *atom10Text) String() string {
var content string
switch {
case a.Type == "", a.Type == "text", a.Type == "text/plain":
if strings.HasPrefix(a.InnerXML, `<![CDATA[`) {
if strings.HasPrefix(strings.TrimSpace(a.InnerXML), `<![CDATA[`) {
content = html.EscapeString(a.CharData)
} else {
content = a.InnerXML

View File

@ -303,6 +303,16 @@ func TestParseEntryWithHTMLTitle(t *testing.T) {
<summary>Some text.</summary>
</entry>
<entry>
<title>
<![CDATA[Entry title with space around CDATA]]>
</title>
<link href="http://example.org/2003/12/13/atom03"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<summary>Some text.</summary>
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
@ -317,6 +327,10 @@ func TestParseEntryWithHTMLTitle(t *testing.T) {
if feed.Entries[1].Title != "Test “Test”" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title)
}
if feed.Entries[2].Title != "Entry title with space around CDATA" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[2].Title)
}
}
func TestParseEntryWithXHTMLTitle(t *testing.T) {