Fix title parsing for imdb. - annna - Annna the nice friendly bot. HTML git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/annna/ DIR Log DIR Files DIR Refs DIR Tags DIR README --- DIR commit 0925485876164dc16cd6dd149ec600ba690b117a DIR parent 81fd3449a3013908f44bfad761da51b99430101a HTML Author: Annna Robert-Houdin <annna@bitreich.org> Date: Sun, 5 Dec 2021 19:56:33 +0100 Fix title parsing for imdb. Thanks Bob! Diffstat: M imdb2gopherbay | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) --- DIR diff --git a/imdb2gopherbay b/imdb2gopherbay @@ -6,16 +6,27 @@ then exit 1 fi +extractjson() { +awk ' +/<script id="__NEXT_DATA__"/ { + match($0, "<script id=\"__NEXT_DATA__\"[^>]*>"); + s = substr($0, RSTART + RLENGTH);OB + match(s, "</script>"); + s = substr(s, 1, RSTART - 1); + print s; +}' +} + imdburi="$1" title="$(curl -s "${imdburi}" \ - | xml2tsv 2>/dev/null \ - | grep __NEXT_ \ - | cut -f 4- \ - | sed 's,\\\\,\\,g' \ - | jshon -e head -e 9 -e 1 -e children -u \ - | sed 's, - IMDb,,')" + | extractjson \ + | json2tsv \ + | grep associatedTitle.originalTitleText.text \ + | head -n 1 \ + | cut -f 3)" [ -z "${title}" ] && exit 1 printf "%s\n" "${title}" +