2023-06-19 23:42:47 +02:00
|
|
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0
|
2017-11-20 06:10:04 +01:00
|
|
|
|
2023-08-11 04:46:45 +02:00
|
|
|
package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"
|
2017-11-20 06:10:04 +01:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"io"
|
|
|
|
|
|
|
|
"golang.org/x/net/html"
|
|
|
|
)
|
|
|
|
|
|
|
|
// StripTags removes all HTML/XML tags from the input string.
|
2024-02-27 13:23:47 +01:00
|
|
|
// This function must *only* be used for cosmetic purposes, not to prevent code injections like XSS.
|
2017-11-20 06:10:04 +01:00
|
|
|
func StripTags(input string) string {
|
|
|
|
tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
|
|
|
|
var buffer bytes.Buffer
|
|
|
|
|
|
|
|
for {
|
|
|
|
if tokenizer.Next() == html.ErrorToken {
|
|
|
|
err := tokenizer.Err()
|
|
|
|
if err == io.EOF {
|
|
|
|
return buffer.String()
|
|
|
|
}
|
|
|
|
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
token := tokenizer.Token()
|
2024-03-17 21:26:51 +01:00
|
|
|
if token.Type == html.TextToken {
|
2017-11-20 06:10:04 +01:00
|
|
|
buffer.WriteString(token.Data)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|