Add a cleaner for subtitles. - annna - Annna the nice friendly bot. HTML git clone git://bitreich.org/annna/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/annna/ DIR Log DIR Files DIR Refs DIR Tags DIR README --- DIR commit 30eb06f31d2a42e280ea01521f6baa1bca2fec33 DIR parent a622486a185d90ca0225311dbc9e88a9fbab5994 HTML Author: Annna Robert-Houdin <annna@bitreich.org> Date: Sun, 10 May 2020 18:57:47 +0200 Add a cleaner for subtitles. Be careful, it is under Mafia Domain. Thanks leot! Diffstat: M subtitle-paste | 3 +++ A ytautosubcleaner.awk | 51 +++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 0 deletions(-) --- DIR diff --git a/subtitle-paste b/subtitle-paste @@ -22,6 +22,9 @@ if [ $(stat -c%s "${ofile}") -eq 0 ]; then rm "${ofile}" else + # Make it more human readable. + awk -f /home/annna/bin/ytautosubcleaner.awk < "${ofile}" > "${ofile}.bak" + mv "${ofile}.bak" "${ofile}" printf "gopher://bitreich.org/0/p/%s\n" "${ofile}" fi DIR diff --git a/ytautosubcleaner.awk b/ytautosubcleaner.awk @@ -0,0 +1,51 @@ +#!/usr/bin/awk -f + +# This file is licensed under Mafia Domain. So be careful. + +# +# Make YouTube automatic subtitles more human readable +# + +/^[0-9]+:[0-9]+:[0-9]+\.[0-9]+ -->/ { + sub(/ align:start position:0%$/, "") + + if (caption && split(caption, lines, "\n") > 2) { + print caption + } + + ignore = 0 + caption = $0 + next +} + +ignore { + next +} + +# skip extra empty lines +/^ +$/ { + next +} + +# current caption contains <c> garbage and will be repeated in the next +# caption, ignore it +caption && /><c>/ { + ignore = 1 + caption = "" + next +} + +caption { + caption = caption "\n" $0 + next +} + +{ + print +} + +END { + if (caption && split(caption, lines, "\n") > 2) { + print caption + } +}