Add archive.org URI retrieval. Add first support for URI shortening. - annna - Annna the nice friendly bot. HTML git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/annna/ DIR Log DIR Files DIR Refs DIR Tags DIR README --- DIR commit b9b579a607ee73661922628578156f44e2fc23da DIR parent a8df83437f63bfe2a68eafedcdb86502927aafe8 HTML Author: Annna Robert-Houdin <annna@bitreich.org> Date: Sun, 12 Jan 2025 20:56:28 +0100 Add archive.org URI retrieval. Add first support for URI shortening. Diffstat: M annna-message-common | 25 +++++++++++++++---------- A archiveorg-uri | 32 +++++++++++++++++++++++++++++++ M fetch-uri | 14 -------------- M modules/tor/whitelist-from-tor | 1 + 4 files changed, 48 insertions(+), 24 deletions(-) --- DIR diff --git a/annna-message-common b/annna-message-common @@ -83,6 +83,8 @@ case "${text}" in esac # Set below and annna will concatenate at the end. + # HTML title. + urititle="" # Subtitle URI. sturi="" # Replacement URI. @@ -113,7 +115,17 @@ case "${text}" in tmpf=$(mktemp) fetch-uri "${uri}" > "${tmpf}" - urititle="$(grabtitle < "${tmpf}")" + if [ ! -s "${tmpf}" ]; + then + archiveorguri="$(archiveorg-uri "${uri}")" + if [ -n "${archiveorguri}" ]; + then + sarchiveorguri="$(bitreich-uri-shortener "${archiveorguri}")" + nuris="archive: ${sarchiveorguri}" + fetch-uri "${archiveorguri}" > "${tmpf}" + fi + fi + [ -s "${tmpf}" ] && urititle="$(grabtitle < "${tmpf}")" case "${urititle}" in "") @@ -181,10 +193,6 @@ case "${text}" in nuris="$nuris metadata: gophers://codemadness.org/1/idiotbox.cgi?v=$ytid" fi ;; - *www.wsj.com/*|*www.ft.com/*|*www.nytimes.com/*) - archvuri=$(fetch-uri -h "https://archive.is/newest/$uri" | awk 'NR == 1 && !/302/ { exit } /^location: / { print substr($2, 1, length($2)-1) }') - [ -n "$archvuri" ] && nuris="archive: $archvuri" - ;; *www.reddit.com*) nuri="$(printf '%s\n' "${uri}" | sed "s;www.reddit.com;old.reddit.com;")" nuris="old.reddit: ${nuri}" @@ -267,7 +275,7 @@ case "${text}" in *) mimetype="$(file -b --mime-type "${tmpf}")" case "${mimetype}" in - text/*) + text/*|application/javascript) nocuri=0 ;; esac @@ -278,10 +286,7 @@ case "${text}" in then if [ $nocuri -eq 0 ]; then - if [ -z "${curi}" ]; - then - curi="$(html2text < "${tmpf}" | /br/bin/bitreich-paste)" - fi + [ -z "${curi}" ] && curi="$(html2text < "${tmpf}" | /br/bin/bitreich-paste)" outputstr="${outputstr} content: ${curi} ;" fi DIR diff --git a/archiveorg-uri b/archiveorg-uri @@ -0,0 +1,32 @@ +#!/bin/sh + +export PATH="$HOME/bin:$PATH" + +usage() { + printf "usage: %s [-h] URI\n" "$(basename "$0")" >&2 + exit 1 +} + +if [ $# -lt 1 ] || [ $# -gt 2 ] +then + usage +fi + +if [ $# -eq 2 ] +then + [ $1 = -h ] || usage + opth=-I + shift +fi + +uri="$1" +aiapiuri="http://archive.org/wayback/available?url=${uri}" +usetor=0 +apiai="$(fetch-uri "$aiapiuri")" +available="$(echo "$apiai" | grep 'available')" +if [ -n "${available}" ]; +then + aiuri="$(echo "$apiai" | awk -F 'url": "' '{print $3}' 2>/dev/null | awk -F '", "' '{print $1}' | awk -F '"' '{print $1}')" + printf "%s\n" "${aiuri}" +fi + DIR diff --git a/fetch-uri b/fetch-uri @@ -31,17 +31,3 @@ esac grep -qx "$host" "/home/annna/bin/modules/tor/whitelist-from-tor" || usetor=1 curl -qgsm 5 --fail -L --max-redirs 3 -A "$ua" $opth ${usetor:+--preproxy socks5h://127.0.0.1:9050 }"$uri" -# Taken from: https://github.com/uriel1998/muna/blob/master/muna.sh -if [ $? -eq 22 ]; -then - aiapiuri="http://archive.org/wayback/available?url=${uri}" - usetor=0 - apiai="$(curl -qgsm 5 --fail -L --max-redirs 3 -A "$ua" $opth ${usetor:+--preproxy socks5h://127.0.0.1:9050 }"$aiapiuri")" - available="$(echo "$apiai" | grep 'available')" - if [ -n "${available}" ]; - then - aiuri="$(echo "$apiai" | awk -F 'url": "' '{print $3}' 2>/dev/null | awk -F '", "' '{print $1}' | awk -F '"' '{print $1}')" - curl -qgsm 5 --fail -L --max-redirs 3 -A "$ua" $opth ${usetor:+--preproxy socks5h://127.0.0.1:9050 }"$aiuri" - fi -fi - DIR diff --git a/modules/tor/whitelist-from-tor b/modules/tor/whitelist-from-tor @@ -1,3 +1,4 @@ www.forgottenweapons.com forgottenweapons.com archive.is +archive.org