From 36868e648c2fece6a9d75c904368f68136b64b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sun, 23 May 2021 20:45:37 -0700 Subject: [PATCH] Add new config option CLEANUP_ARCHIVE_BATCH_SIZE --- config/options.go | 9 +++++++++ config/parser.go | 2 ++ miniflux.1 | 5 +++++ service/scheduler/scheduler.go | 7 ++++--- storage/entry.go | 8 ++++---- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/config/options.go b/config/options.go index 2d62fe9d..041c0222 100644 --- a/config/options.go +++ b/config/options.go @@ -43,6 +43,7 @@ const ( defaultCleanupFrequencyHours = 24 defaultCleanupArchiveReadDays = 60 defaultCleanupArchiveUnreadDays = 180 + defaultCleanupArchiveBatchSize = 10000 defaultCleanupRemoveSessionsDays = 30 defaultProxyImages = "http-only" defaultFetchYouTubeWatchTime = false @@ -101,6 +102,7 @@ type Options struct { cleanupFrequencyHours int cleanupArchiveReadDays int cleanupArchiveUnreadDays int + cleanupArchiveBatchSize int cleanupRemoveSessionsDays int pollingFrequency int batchSize int @@ -160,6 +162,7 @@ func NewOptions() *Options { cleanupFrequencyHours: defaultCleanupFrequencyHours, cleanupArchiveReadDays: defaultCleanupArchiveReadDays, cleanupArchiveUnreadDays: defaultCleanupArchiveUnreadDays, + cleanupArchiveBatchSize: defaultCleanupArchiveBatchSize, cleanupRemoveSessionsDays: defaultCleanupRemoveSessionsDays, pollingFrequency: defaultPollingFrequency, batchSize: defaultBatchSize, @@ -293,6 +296,11 @@ func (o *Options) CleanupArchiveUnreadDays() int { return o.cleanupArchiveUnreadDays } +// CleanupArchiveBatchSize returns the number of entries to archive for each interval. +func (o *Options) CleanupArchiveBatchSize() int { + return o.cleanupArchiveBatchSize +} + // CleanupRemoveSessionsDays returns the number of days after which to remove sessions. func (o *Options) CleanupRemoveSessionsDays() int { return o.cleanupRemoveSessionsDays @@ -488,6 +496,7 @@ func (o *Options) SortedOptions() []*Option { "CERT_FILE": o.certFile, "CLEANUP_ARCHIVE_READ_DAYS": o.cleanupArchiveReadDays, "CLEANUP_ARCHIVE_UNREAD_DAYS": o.cleanupArchiveUnreadDays, + "CLEANUP_ARCHIVE_BATCH_SIZE": o.cleanupArchiveBatchSize, "CLEANUP_FREQUENCY_HOURS": o.cleanupFrequencyHours, "CLEANUP_REMOVE_SESSIONS_DAYS": o.cleanupRemoveSessionsDays, "CREATE_ADMIN": o.createAdmin, diff --git a/config/parser.go b/config/parser.go index cff06de7..5e62b73a 100644 --- a/config/parser.go +++ b/config/parser.go @@ -119,6 +119,8 @@ func (p *Parser) parseLines(lines []string) (err error) { p.opts.cleanupArchiveReadDays = parseInt(value, defaultCleanupArchiveReadDays) case "CLEANUP_ARCHIVE_UNREAD_DAYS": p.opts.cleanupArchiveUnreadDays = parseInt(value, defaultCleanupArchiveUnreadDays) + case "CLEANUP_ARCHIVE_BATCH_SIZE": + p.opts.cleanupArchiveBatchSize = parseInt(value, defaultCleanupArchiveBatchSize) case "CLEANUP_REMOVE_SESSIONS_DAYS": p.opts.cleanupRemoveSessionsDays = parseInt(value, defaultCleanupRemoveSessionsDays) case "WORKER_POOL_SIZE": diff --git a/miniflux.1 b/miniflux.1 index ab1c2696..eb7db1c2 100644 --- a/miniflux.1 +++ b/miniflux.1 @@ -223,6 +223,11 @@ Set to -1 to keep all unread entries. .br Default is 180 days\&. .TP +.B CLEANUP_ARCHIVE_BATCH_SIZE +Number of entries to archive for each job interval\&. +.br +Default is 10000 entries\&. +.TP .B CLEANUP_REMOVE_SESSIONS_DAYS Number of days after removing old sessions from the database\&. .br diff --git a/service/scheduler/scheduler.go b/service/scheduler/scheduler.go index 6ec55279..c8dd1322 100644 --- a/service/scheduler/scheduler.go +++ b/service/scheduler/scheduler.go @@ -31,6 +31,7 @@ func Serve(store *storage.Storage, pool *worker.Pool) { config.Opts.CleanupFrequencyHours(), config.Opts.CleanupArchiveReadDays(), config.Opts.CleanupArchiveUnreadDays(), + config.Opts.CleanupArchiveBatchSize(), config.Opts.CleanupRemoveSessionsDays(), ) } @@ -47,14 +48,14 @@ func feedScheduler(store *storage.Storage, pool *worker.Pool, frequency, batchSi } } -func cleanupScheduler(store *storage.Storage, frequency, archiveReadDays, archiveUnreadDays, sessionsDays int) { +func cleanupScheduler(store *storage.Storage, frequency, archiveReadDays, archiveUnreadDays, archiveBatchSize, sessionsDays int) { for range time.Tick(time.Duration(frequency) * time.Hour) { nbSessions := store.CleanOldSessions(sessionsDays) nbUserSessions := store.CleanOldUserSessions(sessionsDays) logger.Info("[Scheduler:Cleanup] Cleaned %d sessions and %d user sessions", nbSessions, nbUserSessions) startTime := time.Now() - if rowsAffected, err := store.ArchiveEntries(model.EntryStatusRead, archiveReadDays); err != nil { + if rowsAffected, err := store.ArchiveEntries(model.EntryStatusRead, archiveReadDays, archiveBatchSize); err != nil { logger.Error("[Scheduler:ArchiveReadEntries] %v", err) } else { logger.Info("[Scheduler:ArchiveReadEntries] %d entries changed", rowsAffected) @@ -65,7 +66,7 @@ func cleanupScheduler(store *storage.Storage, frequency, archiveReadDays, archiv } startTime = time.Now() - if rowsAffected, err := store.ArchiveEntries(model.EntryStatusUnread, archiveUnreadDays); err != nil { + if rowsAffected, err := store.ArchiveEntries(model.EntryStatusUnread, archiveUnreadDays, archiveBatchSize); err != nil { logger.Error("[Scheduler:ArchiveUnreadEntries] %v", err) } else { logger.Info("[Scheduler:ArchiveUnreadEntries] %d entries changed", rowsAffected) diff --git a/storage/entry.go b/storage/entry.go index 6e50aee7..968877f2 100644 --- a/storage/entry.go +++ b/storage/entry.go @@ -299,8 +299,8 @@ func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries } // ArchiveEntries changes the status of entries to "removed" after the given number of days. -func (s *Storage) ArchiveEntries(status string, days int) (int64, error) { - if days < 0 { +func (s *Storage) ArchiveEntries(status string, days, limit int) (int64, error) { + if days < 0 || limit <= 0 { return 0, nil } @@ -310,10 +310,10 @@ func (s *Storage) ArchiveEntries(status string, days int) (int64, error) { SET status='removed' WHERE - id=ANY(SELECT id FROM entries WHERE status=$1 AND starred is false AND share_code='' AND created_at < now () - '%d days'::interval ORDER BY created_at ASC LIMIT 5000) + id=ANY(SELECT id FROM entries WHERE status=$1 AND starred is false AND share_code='' AND created_at < now () - '%d days'::interval ORDER BY created_at ASC LIMIT %d) ` - result, err := s.db.Exec(fmt.Sprintf(query, days), status) + result, err := s.db.Exec(fmt.Sprintf(query, days, limit), status) if err != nil { return 0, fmt.Errorf(`store: unable to archive %s entries: %v`, status, err) }