mirror of https://github.com/miniflux/v2.git
Disable strict XML parsing
This change should improve parsing of broken XML feeds. See https://golang.org/pkg/encoding/xml/#Decoder
This commit is contained in:
parent
ca48f7612a
commit
36d7732234
|
@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||||
atomFeed := new(atomFeed)
|
atomFeed := new(atomFeed)
|
||||||
decoder := xml.NewDecoder(data)
|
decoder := xml.NewDecoder(data)
|
||||||
decoder.Entity = xml.HTMLEntity
|
decoder.Entity = xml.HTMLEntity
|
||||||
|
decoder.Strict = false
|
||||||
decoder.CharsetReader = encoding.CharsetReader
|
decoder.CharsetReader = encoding.CharsetReader
|
||||||
|
|
||||||
err := decoder.Decode(atomFeed)
|
err := decoder.Decode(atomFeed)
|
||||||
|
|
|
@ -577,3 +577,22 @@ func TestParseWithHTMLEntity(t *testing.T) {
|
||||||
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseWithInvalidCharacterEntity(t *testing.T) {
|
||||||
|
data := `
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/a&b"/>
|
||||||
|
</feed>
|
||||||
|
`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.SiteURL != "http://example.org/a&b" {
|
||||||
|
t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -17,6 +17,7 @@ func Parse(data io.Reader) (SubcriptionList, *errors.LocalizedError) {
|
||||||
feeds := new(opml)
|
feeds := new(opml)
|
||||||
decoder := xml.NewDecoder(data)
|
decoder := xml.NewDecoder(data)
|
||||||
decoder.Entity = xml.HTMLEntity
|
decoder.Entity = xml.HTMLEntity
|
||||||
|
decoder.Strict = false
|
||||||
decoder.CharsetReader = encoding.CharsetReader
|
decoder.CharsetReader = encoding.CharsetReader
|
||||||
|
|
||||||
err := decoder.Decode(feeds)
|
err := decoder.Decode(feeds)
|
||||||
|
|
|
@ -193,6 +193,40 @@ func TestParseOpmlVersion1WithoutOuterOutline(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseOpmlWithInvalidCharacterEntity(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0"?>
|
||||||
|
<opml version="1.0">
|
||||||
|
<head>
|
||||||
|
<title>mySubscriptions.opml</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<outline title="Feed 1">
|
||||||
|
<outline type="rss" title="Feed 1" xmlUrl="http://example.org/feed1/a&b" htmlUrl="http://example.org/c&d"></outline>
|
||||||
|
</outline>
|
||||||
|
</body>
|
||||||
|
</opml>
|
||||||
|
`
|
||||||
|
|
||||||
|
var expected SubcriptionList
|
||||||
|
expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/a&b", SiteURL: "http://example.org/c&d", CategoryName: ""})
|
||||||
|
|
||||||
|
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(subscriptions) != 1 {
|
||||||
|
t.Errorf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(subscriptions); i++ {
|
||||||
|
if !subscriptions[i].Equals(expected[i]) {
|
||||||
|
t.Errorf(`Subscription are different: "%v" vs "%v"`, subscriptions[i], expected[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseInvalidXML(t *testing.T) {
|
func TestParseInvalidXML(t *testing.T) {
|
||||||
data := `garbage`
|
data := `garbage`
|
||||||
_, err := Parse(bytes.NewBufferString(data))
|
_, err := Parse(bytes.NewBufferString(data))
|
||||||
|
|
|
@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||||
feed := new(rdfFeed)
|
feed := new(rdfFeed)
|
||||||
decoder := xml.NewDecoder(data)
|
decoder := xml.NewDecoder(data)
|
||||||
decoder.Entity = xml.HTMLEntity
|
decoder.Entity = xml.HTMLEntity
|
||||||
|
decoder.Strict = false
|
||||||
decoder.CharsetReader = encoding.CharsetReader
|
decoder.CharsetReader = encoding.CharsetReader
|
||||||
|
|
||||||
err := decoder.Decode(feed)
|
err := decoder.Decode(feed)
|
||||||
|
|
|
@ -403,3 +403,22 @@ func TestParseFeedWithHTMLEntity(t *testing.T) {
|
||||||
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseFeedWithInvalidCharacterEntity(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||||
|
<channel>
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link>http://example.org/a&b</link>
|
||||||
|
</channel>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.SiteURL != "http://example.org/a&b" {
|
||||||
|
t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||||
feed := new(rssFeed)
|
feed := new(rssFeed)
|
||||||
decoder := xml.NewDecoder(data)
|
decoder := xml.NewDecoder(data)
|
||||||
decoder.Entity = xml.HTMLEntity
|
decoder.Entity = xml.HTMLEntity
|
||||||
|
decoder.Strict = false
|
||||||
decoder.CharsetReader = encoding.CharsetReader
|
decoder.CharsetReader = encoding.CharsetReader
|
||||||
|
|
||||||
err := decoder.Decode(feed)
|
err := decoder.Decode(feed)
|
||||||
|
|
|
@ -633,3 +633,22 @@ func TestParseWithHTMLEntity(t *testing.T) {
|
||||||
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseWithInvalidCharacterEntity(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
|
||||||
|
<channel>
|
||||||
|
<link>https://example.org/a&b</link>
|
||||||
|
<title>Example Feed</title>
|
||||||
|
</channel>
|
||||||
|
</rss>`
|
||||||
|
|
||||||
|
feed, err := Parse(bytes.NewBufferString(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.SiteURL != "https://example.org/a&b" {
|
||||||
|
t.Errorf(`Incorrect url, got: %q`, feed.SiteURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue