Add python file - bookdarts - Extract KOReader generated highlights and publish them in a variety of formats↵ DIR Log DIR Files DIR Refs --- DIR commit a2eec07ead104ba756d1abb0b6f727cd4e369632 DIR parent 75968d58e15a9c4c2fb15036ac2d33547641964f HTML Author: Scarlett McAllister <no+reply@roygbyte.com> Date: Thu, 28 Sep 2023 20:05:40 -0300 Add python file Decided to implement this in Python. It's been much easier and enjoyable than shell. Diffstat: A bookdarts.py | 83 +++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+), 0 deletions(-) --- DIR diff --git a/bookdarts.py b/bookdarts.py @@ -0,0 +1,83 @@ +############################################# +# Dependencies +import sys +import fileinput +import json +from json import JSONDecodeError +import subprocess +from subprocess import CalledProcessError +from datetime import datetime +import re + +############################################# +# Configuration variables +EXPORT = dict() +EXPORT['DOC_WIDTH'] = 70 + +############################################# +# Function definitions +def format_entry(entry): + try: + completed_proc = subprocess.run(["fmt","-w", str(EXPORT['DOC_WIDTH'])], + check=True, + input=entry['text'].encode('UTF-8'), + capture_output=True) + # Prefix each line of highlight with a pipe and space, eg.: + # | example of highlight + # Learned about re.sub from distutils/command/install_egg_info.py + highlight = "\n".join(re.sub('^', '| ', completed_proc.stdout.decode('UTF-8'))\ + .replace("\n","\n| ")\ + .split("\n")[:-1]) + chapter = "Chapter: " + entry['chapter'] + date = "Date: " + datetime.fromtimestamp(entry['time'])\ + .strftime('%Y-%m-%d') + # Learned this function with the % from /usr/lib/python.../timeit.py + # which I found through find . -type f -name "*.py" | + # xargs -n 1 grep -Hn "map" + return "%s\n%s\n%s\n" % (date, highlight, chapter) + except CalledProcessError as e: + print("CalledProcessError:", e) + except Exception as e: + print("Exception", e) + +def format_document(document): + formatted_document = [] + try: + completed_proc = subprocess.run(["sed", "-E", "s/[^a-zA-Z0-9?=&\-\ ]//g"], + check=True, + input=document['title'].encode('UTF-8'), + capture_output=True) + document['filename'] = completed_proc.stdout\ + .decode('UTF-8')\ + .lower()\ + .replace(' ', '_') + except CalledProcessError as e: + print("Exception:", e) + + document['link_label'] = document['title'] + document['entries'] = map(format_entry, + list(document['entries'])) + # Trim link label to doc size + # document['link_label'] = document['title'][:EXPORT['DOC_WIDTH']] + "..." + return document + +############################################# +# Main control flow +# # The program starts here. +# +# Collect the standard input into a string. +strin = "" +documents = [] +for line in fileinput.input(encoding="utf-8"): + strin += line + +try: + documents = json.loads(strin)['documents'] +except JSONDecodeError as e: + print("Error decoding JSON. Exiting.", + f'{e.msg=} {e.pos=} {e.lineno=}') + exit + +for document in list(map(format_document, documents)): + print("\n#",document['title']) + print("\n\n".join(document['entries']))