miniflux-v2/internal/storage/feed.go

457 lines
12 KiB
Go
Raw Normal View History

// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
2017-11-20 06:10:04 +01:00
package storage // import "miniflux.app/v2/internal/storage"
2017-11-20 06:10:04 +01:00
import (
"database/sql"
"errors"
"fmt"
"log/slog"
"sort"
"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/model"
2017-11-20 06:10:04 +01:00
)
type byStateAndName struct{ f model.Feeds }
func (l byStateAndName) Len() int { return len(l.f) }
func (l byStateAndName) Swap(i, j int) { l.f[i], l.f[j] = l.f[j], l.f[i] }
func (l byStateAndName) Less(i, j int) bool {
// disabled test first, since we don't care about errors if disabled
if l.f[i].Disabled != l.f[j].Disabled {
return l.f[j].Disabled
}
if l.f[i].ParsingErrorCount != l.f[j].ParsingErrorCount {
return l.f[i].ParsingErrorCount > l.f[j].ParsingErrorCount
}
if l.f[i].UnreadCount != l.f[j].UnreadCount {
return l.f[i].UnreadCount > l.f[j].UnreadCount
}
return l.f[i].Title < l.f[j].Title
}
2017-11-28 06:30:04 +01:00
// FeedExists checks if the given feed exists.
2017-11-20 06:10:04 +01:00
func (s *Storage) FeedExists(userID, feedID int64) bool {
2019-10-30 06:48:07 +01:00
var result bool
query := `SELECT true FROM feeds WHERE user_id=$1 AND id=$2`
2017-11-20 06:10:04 +01:00
s.db.QueryRow(query, userID, feedID).Scan(&result)
2019-10-30 06:48:07 +01:00
return result
2017-11-20 06:10:04 +01:00
}
2017-11-28 06:30:04 +01:00
// FeedURLExists checks if feed URL already exists.
2017-11-20 06:10:04 +01:00
func (s *Storage) FeedURLExists(userID int64, feedURL string) bool {
2019-10-30 06:48:07 +01:00
var result bool
query := `SELECT true FROM feeds WHERE user_id=$1 AND feed_url=$2`
2017-11-20 06:10:04 +01:00
s.db.QueryRow(query, userID, feedURL).Scan(&result)
2019-10-30 06:48:07 +01:00
return result
2017-11-20 06:10:04 +01:00
}
// AnotherFeedURLExists checks if the user a duplicated feed.
func (s *Storage) AnotherFeedURLExists(userID, feedID int64, feedURL string) bool {
var result bool
query := `SELECT true FROM feeds WHERE id <> $1 AND user_id=$2 AND feed_url=$3`
s.db.QueryRow(query, feedID, userID, feedURL).Scan(&result)
return result
}
2020-09-28 01:01:06 +02:00
// CountAllFeeds returns the number of feeds in the database.
func (s *Storage) CountAllFeeds() map[string]int64 {
rows, err := s.db.Query(`SELECT disabled, count(*) FROM feeds GROUP BY disabled`)
if err != nil {
return nil
}
defer rows.Close()
results := make(map[string]int64)
results["enabled"] = 0
results["disabled"] = 0
for rows.Next() {
var disabled bool
var count int64
if err := rows.Scan(&disabled, &count); err != nil {
continue
}
if disabled {
results["disabled"] = count
} else {
results["enabled"] = count
}
}
results["total"] = results["disabled"] + results["enabled"]
return results
}
// CountFeeds returns the number of feeds that belongs to the given user.
func (s *Storage) CountFeeds(userID int64) int {
var result int
err := s.db.QueryRow(`SELECT count(*) FROM feeds WHERE user_id=$1`, userID).Scan(&result)
if err != nil {
return 0
}
return result
}
2020-09-28 01:01:06 +02:00
// CountUserFeedsWithErrors returns the number of feeds with parsing errors that belong to the given user.
func (s *Storage) CountUserFeedsWithErrors(userID int64) int {
pollingParsingErrorLimit := config.Opts.PollingParsingErrorLimit()
if pollingParsingErrorLimit <= 0 {
pollingParsingErrorLimit = 1
}
2020-09-28 01:01:06 +02:00
query := `SELECT count(*) FROM feeds WHERE user_id=$1 AND parsing_error_count >= $2`
var result int
err := s.db.QueryRow(query, userID, pollingParsingErrorLimit).Scan(&result)
if err != nil {
return 0
}
return result
}
2020-09-28 01:01:06 +02:00
// CountAllFeedsWithErrors returns the number of feeds with parsing errors.
func (s *Storage) CountAllFeedsWithErrors() int {
pollingParsingErrorLimit := config.Opts.PollingParsingErrorLimit()
if pollingParsingErrorLimit <= 0 {
pollingParsingErrorLimit = 1
}
2020-09-28 01:01:06 +02:00
query := `SELECT count(*) FROM feeds WHERE parsing_error_count >= $1`
var result int
err := s.db.QueryRow(query, pollingParsingErrorLimit).Scan(&result)
2020-09-28 01:01:06 +02:00
if err != nil {
return 0
}
return result
}
2020-05-23 02:48:53 +02:00
// Feeds returns all feeds that belongs to the given user.
2017-11-28 06:30:04 +01:00
func (s *Storage) Feeds(userID int64) (model.Feeds, error) {
2021-01-18 22:22:09 +01:00
builder := NewFeedQueryBuilder(s, userID)
builder.WithSorting(model.DefaultFeedSorting, model.DefaultFeedSortingDirection)
2021-01-18 22:22:09 +01:00
return builder.GetFeeds()
2020-05-23 02:48:53 +02:00
}
func getFeedsSorted(builder *FeedQueryBuilder) (model.Feeds, error) {
result, err := builder.GetFeeds()
if err == nil {
sort.Sort(byStateAndName{result})
return result, nil
}
return result, err
}
2020-05-23 02:48:53 +02:00
// FeedsWithCounters returns all feeds of the given user with counters of read and unread entries.
func (s *Storage) FeedsWithCounters(userID int64) (model.Feeds, error) {
2021-01-18 22:22:09 +01:00
builder := NewFeedQueryBuilder(s, userID)
builder.WithCounters()
builder.WithSorting(model.DefaultFeedSorting, model.DefaultFeedSortingDirection)
return getFeedsSorted(builder)
2020-05-23 02:48:53 +02:00
}
// Return read and unread count.
func (s *Storage) FetchCounters(userID int64) (model.FeedCounters, error) {
builder := NewFeedQueryBuilder(s, userID)
builder.WithCounters()
reads, unreads, err := builder.fetchFeedCounter()
return model.FeedCounters{ReadCounters: reads, UnreadCounters: unreads}, err
}
2020-05-23 02:48:53 +02:00
// FeedsByCategoryWithCounters returns all feeds of the given user/category with counters of read and unread entries.
func (s *Storage) FeedsByCategoryWithCounters(userID, categoryID int64) (model.Feeds, error) {
2021-01-18 22:22:09 +01:00
builder := NewFeedQueryBuilder(s, userID)
builder.WithCategoryID(categoryID)
builder.WithCounters()
builder.WithSorting(model.DefaultFeedSorting, model.DefaultFeedSortingDirection)
return getFeedsSorted(builder)
2019-10-30 05:44:35 +01:00
}
// WeeklyFeedEntryCount returns the weekly entry count for a feed.
func (s *Storage) WeeklyFeedEntryCount(userID, feedID int64) (int, error) {
query := `
SELECT
count(*)
FROM
entries
WHERE
entries.user_id=$1 AND
entries.feed_id=$2 AND
entries.published_at BETWEEN (now() - interval '1 week') AND now();
`
var weeklyCount int
err := s.db.QueryRow(query, userID, feedID).Scan(&weeklyCount)
switch {
2021-01-18 22:22:09 +01:00
case errors.Is(err, sql.ErrNoRows):
return 0, nil
case err != nil:
return 0, fmt.Errorf(`store: unable to fetch weekly count for feed #%d: %v`, feedID, err)
}
return weeklyCount, nil
}
2017-11-28 06:30:04 +01:00
// FeedByID returns a feed by the ID.
func (s *Storage) FeedByID(userID, feedID int64) (*model.Feed, error) {
2021-01-18 22:22:09 +01:00
builder := NewFeedQueryBuilder(s, userID)
builder.WithFeedID(feedID)
feed, err := builder.GetFeed()
2017-11-20 06:10:04 +01:00
switch {
2021-01-18 22:22:09 +01:00
case errors.Is(err, sql.ErrNoRows):
2017-11-20 06:10:04 +01:00
return nil, nil
case err != nil:
2019-10-30 06:48:07 +01:00
return nil, fmt.Errorf(`store: unable to fetch feed #%d: %v`, feedID, err)
2017-11-20 06:10:04 +01:00
}
2021-01-18 22:22:09 +01:00
return feed, nil
2017-11-20 06:10:04 +01:00
}
2017-11-28 06:30:04 +01:00
// CreateFeed creates a new feed.
2017-11-20 06:10:04 +01:00
func (s *Storage) CreateFeed(feed *model.Feed) error {
sql := `
2019-10-30 06:48:07 +01:00
INSERT INTO feeds (
feed_url,
site_url,
title,
category_id,
user_id,
etag_header,
last_modified_header,
crawler,
user_agent,
2021-03-23 04:27:58 +01:00
cookie,
2019-10-30 06:48:07 +01:00
username,
password,
disabled,
scraper_rules,
rewrite_rules,
blocklist_rules,
keeplist_rules,
ignore_http_cache,
allow_self_signed_certificates,
fetch_via_proxy,
hide_globally,
Add Media Player and resume to last playback position In order to ease podcast listening, the player can be put on top of the feed entry as main content. Use the `Use podcast player` option to enable that. It works on audio and video. Also, when playing audio or video, progression will be saved in order to be able to resume listening later. This position saving is done using the original attachement/enclosures player AND podcast player and do not rely on the podcast player option ti be enabled. Additionally, I made the player fill the width with the entry container to ease seeking and have a bigger video. updateEnclosures now keep existing enclosures based on URL When feeds get updated, enclosures entries are always wiped and re-created. This cause two issue - enclosure progression get lost in the process - enclosure ID changes I used the URL as identifier of an enclosure. Not perfect but hopefully should work. When an enclosure already exist, I simply do nothing and leave the entry as is in the database. If anyone is listening/watching to this enclosure during the refresh, the id stay coherent and progression saving still works. The updateEnclosures function got a bit more complex. I tried to make it the more clear I could. Some optimisation are possible but would make the function harder to read in my opinion. I'm not sure if this is often the case, but some feeds may include tracking or simply change the url each time we update the feed. In those situation, enclosures ids and progression will be lost. I have no idea how to handle this last situation. Use the size instead/alongside url to define the identity of an enclosure ? Translation: english as placeholder for every language except French Aside, I tested a video feed and fixed a few things for it. In fact, the MimeType was not working at all on my side, and found a pretty old stackoverflow discussion that suggest to use an Apple non-standard MimeType for m4v video format. I only did one substitution because I only have one feed to test. Any new video feed can make this go away or evolve depending on the situation. Real video feeds does not tend to be easy to find and test extensively this. Co-authored-by: toastal
2023-04-13 11:46:43 +02:00
url_rewrite_rules,
2023-08-26 09:16:41 +02:00
no_media_player,
apprise_service_urls
2019-10-30 06:48:07 +01:00
)
VALUES
2023-08-26 09:16:41 +02:00
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24)
2019-10-30 06:48:07 +01:00
RETURNING
id
2017-11-20 06:10:04 +01:00
`
err := s.db.QueryRow(
sql,
feed.FeedURL,
feed.SiteURL,
feed.Title,
feed.Category.ID,
feed.UserID,
feed.EtagHeader,
feed.LastModifiedHeader,
feed.Crawler,
feed.UserAgent,
2021-03-23 04:27:58 +01:00
feed.Cookie,
feed.Username,
feed.Password,
2019-07-27 06:13:06 +02:00
feed.Disabled,
feed.ScraperRules,
feed.RewriteRules,
feed.BlocklistRules,
feed.KeeplistRules,
feed.IgnoreHTTPCache,
feed.AllowSelfSignedCertificates,
feed.FetchViaProxy,
feed.HideGlobally,
feed.UrlRewriteRules,
Add Media Player and resume to last playback position In order to ease podcast listening, the player can be put on top of the feed entry as main content. Use the `Use podcast player` option to enable that. It works on audio and video. Also, when playing audio or video, progression will be saved in order to be able to resume listening later. This position saving is done using the original attachement/enclosures player AND podcast player and do not rely on the podcast player option ti be enabled. Additionally, I made the player fill the width with the entry container to ease seeking and have a bigger video. updateEnclosures now keep existing enclosures based on URL When feeds get updated, enclosures entries are always wiped and re-created. This cause two issue - enclosure progression get lost in the process - enclosure ID changes I used the URL as identifier of an enclosure. Not perfect but hopefully should work. When an enclosure already exist, I simply do nothing and leave the entry as is in the database. If anyone is listening/watching to this enclosure during the refresh, the id stay coherent and progression saving still works. The updateEnclosures function got a bit more complex. I tried to make it the more clear I could. Some optimisation are possible but would make the function harder to read in my opinion. I'm not sure if this is often the case, but some feeds may include tracking or simply change the url each time we update the feed. In those situation, enclosures ids and progression will be lost. I have no idea how to handle this last situation. Use the size instead/alongside url to define the identity of an enclosure ? Translation: english as placeholder for every language except French Aside, I tested a video feed and fixed a few things for it. In fact, the MimeType was not working at all on my side, and found a pretty old stackoverflow discussion that suggest to use an Apple non-standard MimeType for m4v video format. I only did one substitution because I only have one feed to test. Any new video feed can make this go away or evolve depending on the situation. Real video feeds does not tend to be easy to find and test extensively this. Co-authored-by: toastal
2023-04-13 11:46:43 +02:00
feed.NoMediaPlayer,
2023-08-26 09:16:41 +02:00
feed.AppriseServiceURLs,
2017-11-20 06:10:04 +01:00
).Scan(&feed.ID)
if err != nil {
2019-10-30 06:48:07 +01:00
return fmt.Errorf(`store: unable to create feed %q: %v`, feed.FeedURL, err)
2017-11-20 06:10:04 +01:00
}
for i := 0; i < len(feed.Entries); i++ {
feed.Entries[i].FeedID = feed.ID
feed.Entries[i].UserID = feed.UserID
tx, err := s.db.Begin()
if err != nil {
return fmt.Errorf(`store: unable to start transaction: %v`, err)
}
entryExists, err := s.entryExists(tx, feed.Entries[i])
if err != nil {
if rollbackErr := tx.Rollback(); rollbackErr != nil {
return fmt.Errorf(`store: unable to rollback transaction: %v (rolled back due to: %v)`, rollbackErr, err)
}
return err
}
if !entryExists {
if err := s.createEntry(tx, feed.Entries[i]); err != nil {
if rollbackErr := tx.Rollback(); rollbackErr != nil {
return fmt.Errorf(`store: unable to rollback transaction: %v (rolled back due to: %v)`, rollbackErr, err)
}
return err
}
2017-11-20 06:10:04 +01:00
}
if err := tx.Commit(); err != nil {
return fmt.Errorf(`store: unable to commit transaction: %v`, err)
}
2017-11-20 06:10:04 +01:00
}
return nil
}
2017-11-28 06:30:04 +01:00
// UpdateFeed updates an existing feed.
2017-11-20 06:10:04 +01:00
func (s *Storage) UpdateFeed(feed *model.Feed) (err error) {
2019-07-27 06:13:06 +02:00
query := `
2019-10-30 06:48:07 +01:00
UPDATE
feeds
SET
2019-07-27 06:13:06 +02:00
feed_url=$1,
site_url=$2,
title=$3,
category_id=$4,
etag_header=$5,
last_modified_header=$6,
checked_at=$7,
parsing_error_msg=$8,
parsing_error_count=$9,
scraper_rules=$10,
rewrite_rules=$11,
blocklist_rules=$12,
keeplist_rules=$13,
crawler=$14,
user_agent=$15,
2021-03-23 04:27:58 +01:00
cookie=$16,
username=$17,
password=$18,
disabled=$19,
next_check_at=$20,
ignore_http_cache=$21,
allow_self_signed_certificates=$22,
fetch_via_proxy=$23,
hide_globally=$24,
Add Media Player and resume to last playback position In order to ease podcast listening, the player can be put on top of the feed entry as main content. Use the `Use podcast player` option to enable that. It works on audio and video. Also, when playing audio or video, progression will be saved in order to be able to resume listening later. This position saving is done using the original attachement/enclosures player AND podcast player and do not rely on the podcast player option ti be enabled. Additionally, I made the player fill the width with the entry container to ease seeking and have a bigger video. updateEnclosures now keep existing enclosures based on URL When feeds get updated, enclosures entries are always wiped and re-created. This cause two issue - enclosure progression get lost in the process - enclosure ID changes I used the URL as identifier of an enclosure. Not perfect but hopefully should work. When an enclosure already exist, I simply do nothing and leave the entry as is in the database. If anyone is listening/watching to this enclosure during the refresh, the id stay coherent and progression saving still works. The updateEnclosures function got a bit more complex. I tried to make it the more clear I could. Some optimisation are possible but would make the function harder to read in my opinion. I'm not sure if this is often the case, but some feeds may include tracking or simply change the url each time we update the feed. In those situation, enclosures ids and progression will be lost. I have no idea how to handle this last situation. Use the size instead/alongside url to define the identity of an enclosure ? Translation: english as placeholder for every language except French Aside, I tested a video feed and fixed a few things for it. In fact, the MimeType was not working at all on my side, and found a pretty old stackoverflow discussion that suggest to use an Apple non-standard MimeType for m4v video format. I only did one substitution because I only have one feed to test. Any new video feed can make this go away or evolve depending on the situation. Real video feeds does not tend to be easy to find and test extensively this. Co-authored-by: toastal
2023-04-13 11:46:43 +02:00
url_rewrite_rules=$25,
2023-08-26 09:16:41 +02:00
no_media_player=$26,
2023-09-09 07:45:17 +02:00
apprise_service_urls=$27
2019-07-27 06:13:06 +02:00
WHERE
2023-09-09 07:45:17 +02:00
id=$28 AND user_id=$29
2019-07-27 06:13:06 +02:00
`
2017-11-20 06:10:04 +01:00
_, err = s.db.Exec(query,
feed.FeedURL,
feed.SiteURL,
feed.Title,
feed.Category.ID,
feed.EtagHeader,
feed.LastModifiedHeader,
feed.CheckedAt,
feed.ParsingErrorMsg,
feed.ParsingErrorCount,
2017-12-11 05:51:04 +01:00
feed.ScraperRules,
feed.RewriteRules,
feed.BlocklistRules,
feed.KeeplistRules,
feed.Crawler,
feed.UserAgent,
2021-03-23 04:27:58 +01:00
feed.Cookie,
feed.Username,
feed.Password,
2019-07-27 06:13:06 +02:00
feed.Disabled,
feed.NextCheckAt,
2020-06-06 06:50:59 +02:00
feed.IgnoreHTTPCache,
feed.AllowSelfSignedCertificates,
feed.FetchViaProxy,
feed.HideGlobally,
feed.UrlRewriteRules,
Add Media Player and resume to last playback position In order to ease podcast listening, the player can be put on top of the feed entry as main content. Use the `Use podcast player` option to enable that. It works on audio and video. Also, when playing audio or video, progression will be saved in order to be able to resume listening later. This position saving is done using the original attachement/enclosures player AND podcast player and do not rely on the podcast player option ti be enabled. Additionally, I made the player fill the width with the entry container to ease seeking and have a bigger video. updateEnclosures now keep existing enclosures based on URL When feeds get updated, enclosures entries are always wiped and re-created. This cause two issue - enclosure progression get lost in the process - enclosure ID changes I used the URL as identifier of an enclosure. Not perfect but hopefully should work. When an enclosure already exist, I simply do nothing and leave the entry as is in the database. If anyone is listening/watching to this enclosure during the refresh, the id stay coherent and progression saving still works. The updateEnclosures function got a bit more complex. I tried to make it the more clear I could. Some optimisation are possible but would make the function harder to read in my opinion. I'm not sure if this is often the case, but some feeds may include tracking or simply change the url each time we update the feed. In those situation, enclosures ids and progression will be lost. I have no idea how to handle this last situation. Use the size instead/alongside url to define the identity of an enclosure ? Translation: english as placeholder for every language except French Aside, I tested a video feed and fixed a few things for it. In fact, the MimeType was not working at all on my side, and found a pretty old stackoverflow discussion that suggest to use an Apple non-standard MimeType for m4v video format. I only did one substitution because I only have one feed to test. Any new video feed can make this go away or evolve depending on the situation. Real video feeds does not tend to be easy to find and test extensively this. Co-authored-by: toastal
2023-04-13 11:46:43 +02:00
feed.NoMediaPlayer,
2023-09-09 07:45:17 +02:00
feed.AppriseServiceURLs,
2017-11-20 06:10:04 +01:00
feed.ID,
feed.UserID,
)
if err != nil {
2019-10-30 06:48:07 +01:00
return fmt.Errorf(`store: unable to update feed #%d (%s): %v`, feed.ID, feed.FeedURL, err)
2017-11-20 06:10:04 +01:00
}
return nil
}
2018-12-15 22:04:38 +01:00
// UpdateFeedError updates feed errors.
func (s *Storage) UpdateFeedError(feed *model.Feed) (err error) {
query := `
2019-10-30 06:48:07 +01:00
UPDATE
feeds
2018-12-15 22:04:38 +01:00
SET
parsing_error_msg=$1,
parsing_error_count=$2,
checked_at=$3,
next_check_at=$4
2019-10-30 06:48:07 +01:00
WHERE
id=$5 AND user_id=$6
2019-10-30 06:48:07 +01:00
`
2018-12-15 22:04:38 +01:00
_, err = s.db.Exec(query,
feed.ParsingErrorMsg,
feed.ParsingErrorCount,
feed.CheckedAt,
feed.NextCheckAt,
2018-12-15 22:04:38 +01:00
feed.ID,
feed.UserID,
)
if err != nil {
2019-10-30 06:48:07 +01:00
return fmt.Errorf(`store: unable to update feed error #%d (%s): %v`, feed.ID, feed.FeedURL, err)
2018-12-15 22:04:38 +01:00
}
return nil
}
// RemoveFeed removes a feed and all entries.
// This operation can takes time if the feed has lot of entries.
2017-11-20 06:10:04 +01:00
func (s *Storage) RemoveFeed(userID, feedID int64) error {
rows, err := s.db.Query(`SELECT id FROM entries WHERE user_id=$1 AND feed_id=$2`, userID, feedID)
2017-11-20 06:10:04 +01:00
if err != nil {
return fmt.Errorf(`store: unable to get user feed entries: %v`, err)
2017-11-20 06:10:04 +01:00
}
defer rows.Close()
2017-11-20 06:10:04 +01:00
for rows.Next() {
var entryID int64
if err := rows.Scan(&entryID); err != nil {
return fmt.Errorf(`store: unable to read user feed entry ID: %v`, err)
}
slog.Debug("Deleting entry",
slog.Int64("user_id", userID),
slog.Int64("feed_id", feedID),
slog.Int64("entry_id", entryID),
)
if _, err := s.db.Exec(`DELETE FROM entries WHERE id=$1 AND user_id=$2`, entryID, userID); err != nil {
return fmt.Errorf(`store: unable to delete user feed entries #%d: %v`, entryID, err)
}
2017-11-20 06:10:04 +01:00
}
if _, err := s.db.Exec(`DELETE FROM feeds WHERE id=$1 AND user_id=$2`, feedID, userID); err != nil {
return fmt.Errorf(`store: unable to delete feed #%d: %v`, feedID, err)
2017-11-20 06:10:04 +01:00
}
return nil
}
2018-06-30 23:22:45 +02:00
// ResetFeedErrors removes all feed errors.
func (s *Storage) ResetFeedErrors() error {
_, err := s.db.Exec(`UPDATE feeds SET parsing_error_count=0, parsing_error_msg=''`)
return err
}