mirror of https://github.com/miniflux/v2.git
Minor internal/reader/readability/readability.go speedup
- Don't use a capturing group in `divToPElementsRegexp` - Remove a duplicate condition - Replace a regex with a fixed-comparison and a `Contains`
This commit is contained in:
parent
f12d5131b0
commit
4db138d4b8
|
@ -21,8 +21,7 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
divToPElementsRegexp = regexp.MustCompile(`(?i)<(a|blockquote|dl|div|img|ol|p|pre|table|ul)`)
|
divToPElementsRegexp = regexp.MustCompile(`(?i)<(?:a|blockquote|dl|div|img|ol|p|pre|table|ul)`)
|
||||||
sentenceRegexp = regexp.MustCompile(`\.( |$)`)
|
|
||||||
|
|
||||||
blacklistCandidatesRegexp = regexp.MustCompile(`(?i)popupbody|-ad|g-plus`)
|
blacklistCandidatesRegexp = regexp.MustCompile(`(?i)popupbody|-ad|g-plus`)
|
||||||
okMaybeItsACandidateRegexp = regexp.MustCompile(`(?i)and|article|body|column|main|shadow`)
|
okMaybeItsACandidateRegexp = regexp.MustCompile(`(?i)and|article|body|column|main|shadow`)
|
||||||
|
@ -114,9 +113,11 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
|
||||||
content := s.Text()
|
content := s.Text()
|
||||||
contentLength := len(content)
|
contentLength := len(content)
|
||||||
|
|
||||||
if contentLength >= 80 && linkDensity < .25 {
|
if contentLength >= 80 {
|
||||||
|
if linkDensity < .25 {
|
||||||
append = true
|
append = true
|
||||||
} else if contentLength < 80 && linkDensity == 0 && sentenceRegexp.MatchString(content) {
|
}
|
||||||
|
} else if linkDensity == 0 && (content[len(content)-1] == '.' || strings.Contains(content, ". ")) {
|
||||||
append = true
|
append = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue