miniflux-v2/internal/reader/opml/parser.go
jvoisin c544dadd55 Fix categories import from Thunderbird's OPML
Thunderbird OPML exports are looking like this:

```xml
<opml version="1.0" xmlns:fz="urn:forumzilla:">
<head>
	<title>Thunderbird OPML Export - RSS</title>
    	<dateCreated>Sat, 24 Feb 2024 11:31:13 GMT</dateCreated>
</head>
<body>
	<outline title="News">
		<outline type="rss" ...>
		<outline type="rss" ...>
		...
	</outline>
	<outline title="Blogs">
		<outline type="rss" ...>
		<outline type="rss" ...>
		...
	</outline>
</body>
```

This commit make it so that categories are now correctly imported.
2024-02-24 19:43:33 -08:00

45 lines
1.3 KiB
Go

// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package opml // import "miniflux.app/v2/internal/reader/opml"
import (
"encoding/xml"
"fmt"
"io"
"miniflux.app/v2/internal/reader/encoding"
)
// Parse reads an OPML file and returns a SubcriptionList.
func Parse(data io.Reader) (SubcriptionList, error) {
opmlDocument := NewOPMLDocument()
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(opmlDocument)
if err != nil {
return nil, fmt.Errorf("opml: unable to parse document: %w", err)
}
return getSubscriptionsFromOutlines(opmlDocument.Outlines, ""), nil
}
func getSubscriptionsFromOutlines(outlines opmlOutlineCollection, category string) (subscriptions SubcriptionList) {
for _, outline := range outlines {
if outline.IsSubscription() {
subscriptions = append(subscriptions, &Subcription{
Title: outline.GetTitle(),
FeedURL: outline.FeedURL,
SiteURL: outline.GetSiteURL(),
CategoryName: category,
})
} else if outline.Outlines.HasChildren() {
subscriptions = append(subscriptions, getSubscriptionsFromOutlines(outline.Outlines, outline.GetTitle())...)
}
}
return subscriptions
}