Compile block/keep regex only once per feed

No need to compile them once for matching on the url,
once per tag, once per title, once per author, … one time is enough.
It also simplify error handling, since while regexp compilation can fail,
matching can't.
This commit is contained in:
jvoisin 2024-03-17 14:38:13 +01:00 committed by Frédéric Guillot
parent 00dabc1d3c
commit 02a074ed26
1 changed files with 46 additions and 39 deletions

View File

@ -116,12 +116,24 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
} }
func isBlockedEntry(feed *model.Feed, entry *model.Entry) bool { func isBlockedEntry(feed *model.Feed, entry *model.Entry) bool {
if feed.BlocklistRules != "" { if feed.BlocklistRules == "" {
return false
}
compiledBlocklist, err := regexp.Compile(feed.BlocklistRules)
if err != nil {
slog.Debug("Failed on regexp compilation",
slog.String("pattern", feed.BlocklistRules),
slog.Any("error", err),
)
return false
}
containsBlockedTag := slices.ContainsFunc(entry.Tags, func(tag string) bool { containsBlockedTag := slices.ContainsFunc(entry.Tags, func(tag string) bool {
return matchField(feed.BlocklistRules, tag) return compiledBlocklist.MatchString(tag)
}) })
if matchField(feed.BlocklistRules, entry.URL) || matchField(feed.BlocklistRules, entry.Title) || matchField(feed.BlocklistRules, entry.Author) || containsBlockedTag { if compiledBlocklist.MatchString(entry.URL) || compiledBlocklist.MatchString(entry.Title) || compiledBlocklist.MatchString(entry.Author) || containsBlockedTag {
slog.Debug("Blocking entry based on rule", slog.Debug("Blocking entry based on rule",
slog.Int64("entry_id", entry.ID), slog.Int64("entry_id", entry.ID),
slog.String("entry_url", entry.URL), slog.String("entry_url", entry.URL),
@ -131,18 +143,28 @@ func isBlockedEntry(feed *model.Feed, entry *model.Entry) bool {
) )
return true return true
} }
}
return false return false
} }
func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool { func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool {
if feed.KeeplistRules != "" { if feed.KeeplistRules == "" {
return true
}
compiledKeeplist, err := regexp.Compile(feed.KeeplistRules)
if err != nil {
slog.Debug("Failed on regexp compilation",
slog.String("pattern", feed.KeeplistRules),
slog.Any("error", err),
)
return false
}
containsAllowedTag := slices.ContainsFunc(entry.Tags, func(tag string) bool { containsAllowedTag := slices.ContainsFunc(entry.Tags, func(tag string) bool {
return matchField(feed.KeeplistRules, tag) return compiledKeeplist.MatchString(tag)
}) })
if matchField(feed.KeeplistRules, entry.URL) || matchField(feed.KeeplistRules, entry.Title) || matchField(feed.KeeplistRules, entry.Author) || containsAllowedTag { if compiledKeeplist.MatchString(entry.URL) || compiledKeeplist.MatchString(entry.Title) || compiledKeeplist.MatchString(entry.Author) || containsAllowedTag {
slog.Debug("Allow entry based on rule", slog.Debug("Allow entry based on rule",
slog.Int64("entry_id", entry.ID), slog.Int64("entry_id", entry.ID),
slog.String("entry_url", entry.URL), slog.String("entry_url", entry.URL),
@ -153,21 +175,6 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool {
return true return true
} }
return false return false
}
return true
}
func matchField(pattern, value string) bool {
match, err := regexp.MatchString(pattern, value)
if err != nil {
slog.Debug("Failed on regexp match",
slog.String("pattern", pattern),
slog.String("value", value),
slog.Bool("match", match),
slog.Any("error", err),
)
}
return match
} }
// ProcessEntryWebPage downloads the entry web page and apply rewrite rules. // ProcessEntryWebPage downloads the entry web page and apply rewrite rules.