reddit plumb: workaround reddit stricter bot checks - randomcrap - random crap programs of varying quality
HTML git clone git://git.codemadness.org/randomcrap
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
DIR commit c130fa316c56c228630e8d167c63075e11bd84c8
DIR parent 9c9828b1d7d2e7f385f10e90025847ba70a69cbf
HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 29 May 2026 13:59:59 +0200
reddit plumb: workaround reddit stricter bot checks
It requires a more common User-Agent and also a cookie.
In particular the "loid" cookie is important.
The loid cookie is only set on loading a HTML page. After that
the regular '.json" suffix can be fetched.
https://www.reddit.com/dev/api/
Diffstat:
M config/scripts/plumb/reddit.sh | 47 ++++++++++++++++++++++++++++---
1 file changed, 43 insertions(+), 4 deletions(-)
---
DIR diff --git a/config/scripts/plumb/reddit.sh b/config/scripts/plumb/reddit.sh
@@ -1,10 +1,26 @@
#!/bin/sh
# extract .mpd playlist urls (video) for mpv from reddit posts.
-url="$1.json"
+# user-agent is important, tested "Firefox" works, "007" doesn't so there are some basic checks.
+ua="Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0"
-hurl -H 'User-Agent: 007' "$url" | \
-jaq '
+arg="$1"
+cookiejar="/tmp/reddit_plumb_cookies.txt"
+
+# "loid" cookie is important, else reddit blocks.
+# getcookie(url)
+getcookie() {
+ curl -s -f -H "User-Agent: $ua" -b "$cookiejar" -c "$cookiejar" "$1" >/dev/null
+}
+
+# getjson(url)
+getjson() {
+ curl -s -f -H "User-Agent: $ua" -b "$cookiejar" -c "$cookiejar" "$1"
+}
+
+# parse JSON from stdin.
+parsejson() {
+ jaq '
$1 == "[].data.children[].data.url" && $2 == "s" { data_url = $3; }
$1 ~ /\.dash_url$/ && $2 == "s" { dash_url = $3; }
$1 ~ /\.hls_url$/ && $2 == "s" { hls_url = $3; }
@@ -15,4 +31,27 @@ END {
print data_url;
}' | \
-LC_ALL=C awk 'length($0) && !x[$0]++'
+ LC_ALL=C awk 'length($0) && !x[$0]++'
+}
+
+# get cookie (by HTML page) if cookie do not exist.
+if ! test -s "$cookiejar"; then
+ getcookie "$arg"
+fi
+
+tmp=$(mktemp)
+url="${arg}.json"
+if ! getjson "$url" > "$tmp"; then
+ # on failure, retry, cookie might be expired.
+ getcookie "$arg"
+ # try once
+ if ! getjson "$url" > "$tmp"; then
+ exit 1
+ fi
+fi
+
+if test -s "$tmp"; then
+ parsejson < "$tmp"
+fi
+
+rm -f "$tmp"