#! /usr/bin/env python from __future__ import print_function import glob import os import socket import sys import time #FIXME: join lines that end with = and were split due to excessive length if socket.gethostname().startswith("ash2"): Root = "/sw/www" # only on ash2 elif socket.gethostname().startswith("websites"): Root = os.path.join(os.path.expanduser("~"), "websites") # on VM else: Root = "/nfs/www" # every other host Text_Path = Root + "/netlib/na-digest" HTML_Path = Root + "/netlib/na-digest-html" def html_filename(out_path, b_fname): return os.path.join(out_path, b_fname + ".html") def check_tag(close_tag, outl): if close_tag: outl.append("" % close_tag) return "" return close_tag def make_single(tag, txt, suffix="", attributes={}): atr = "" if attributes: for key in attributes: atr += " " atr += "%s=%s" % (key, attributes[key]) return "<%s%s>" % (tag, atr) + txt + "" % tag + suffix def generate_html(year2d, number): """ year2d - two-digit year number - NA Digest issue number """ if 0: txt_path = os.path.join(dst_path, "../na-digest") out_path = os.path.join(dst_path, "%02d" % year2d) base_name = "v%02dn%02d" % (year2d, number) if year2d >= 17: txt_full_path = os.path.join(Text_Path, "%02d" % year2d) else: txt_full_path = Text_Path txt_path = os.path.join(txt_full_path, base_name) out_path = os.path.join(os.path.join(HTML_Path, "%02d" % year2d), base_name + ".html") outl = generate_html_list(txt_path) fobj = open(out_path, "w") for line in outl: fobj.write(line) fobj.write("\n") fobj.close() os.chmod(out_path, 0o644) print("Wrote", out_path) def empty(): l_fname = [] for fname in glob.glob(os.path.join(txt_path, "v%02d*" % year2d)): b_fname = os.path.split(fname)[1] if os.path.exists(html_filename(out_path, b_fname)): continue l_fname.append(fname) def extract_pairs_from_year_path(glob_pttrn, year_path, to_remove=["v", ".html"]): """ year2d - two digit year Returns a list of tuples: year,digest_number """ l = list() for fname in glob.glob(os.path.join(year_path, glob_pttrn)): fn = os.path.split(fname)[1] # file name without full path for r in to_remove: # remove prefixes and suffixes fn = fn.replace(r, "") l.append(list(map(int, fn.split("n")))) # split on "n" and convert to int-only tuple for easy lookups return l def find_missing_html(year=None): """ year - the full or two digit year for which to find missing HTML Returns a list of tuples: year,digest_number missing in HTML directory but present in TXT directory """ if year is None: year = time.localtime()[0] orig_year = year if year > 99: year %= 2000 if year > 99: raise ValueError("Year out of range {}".format(org_year)) html_l = extract_pairs_from_year_path("v%02dn??.html" % year, os.path.join(HTML_Path, str(year))) print(html_l) l = list() for yi in extract_pairs_from_year_path("v%02dn??" % year, os.path.join(Text_Path, str(year))): if yi not in html_l: l.append(yi) return l def split_link_punct(link_with_punct): if link_with_punct[-1] == "," or link_with_punct[-1] == ".": link_text, punct = link_with_punct[:-1], link_with_punct[-1] else: link_text, punct = link_with_punct, "" return link_text, punct def generate_html_list(fname): outl = ["""""", ""] title_found = False subject_no = 1 mode = "outside" prev_line = "" fobj = open(fname) print("Reading", fname) while 1: line = fobj.readline() if not line: break line = line[:-1] # remove EOL if prev_line: # merge with previous line if it exists line = prev_line + line if line and "=" == line[-1]: # must merge lines with = at end prev_line = line[:-1] continue else: prev_line = "" for seq, repl in ( # Windows code page 1252 ("\xc3\x82\xc2\xa3", "£"), # pound ("\xc3\x84\xe2\x80\xba", "ě"), # e with v on top ("\xc3\x85\xc2\xa1", "š"), # e with v on top ("\xc3\x85\xc2\xaf", "ů"), # u with o on top ("\xc3\x82\xc2\xae", "®"), # registered sign ("\xc3\x83\xe2\x80\xb0", "\xc3\x89"), # E with up accent ("\xc3\x83\xe2\x80\x93", "\xc3\x96"), # O umlaut ("\xc3\x83\xc2\xa3", "\xc3\xa3"), # a tilde ("\xc3\x83\xc2\xa4", "\xc3\xa4"), # a umlaut ("\xc3\x83\xc2\xa5", "å"), # a with o accent ("\xc3\x83\xc2\xa9", "\xc3\xa9"), # e with up accent ("\xc3\x83\xc2\xa8", "\xc3\xa8"), # e with down accent ("\xc3\x83\xc2\xab", "\xc3\xab"), # e umlaut ("\xc3\x83\xc2\xad", "\xc3\xad"), # e umlaut ("\xc3\x83\xc2\xb6", "\xc3\xb6"), # o umlaut ("\xc3\x83\xc2\xba", "\xc3\xba"), # u with up accent ("\xc3\x83\xc2\xbc", "\xc3\xbc"), # u umlaut ("\\337", "ß"), # sharfes s ): line = line.replace(seq,repl) if line.startswith("Subject: "): if title_found: mode = "item" outl.append(make_single("b", """%s""" % (subject_no, line), "
")) outl.append("
") subject_no += 1 else: mode = "title" outl.append(make_single("title", line.replace("Subject: ", ""))) outl.append("") title_found = True elif line.startswith("NA Digest "): mode = "na-digest" outl.append(make_single("h2", line)) outl.append("") elif line.startswith("Today's Topics:"): mode = "topics" subject_no = 1 outl.append(make_single("b", line)) outl.append("