#! /usr/bin/env python
from __future__ import print_function
import glob
import os
import socket
import sys
import time
#FIXME: join lines that end with = and were split due to excessive length
if socket.gethostname().startswith("ash2"):
Root = "/sw/www" # only on ash2
elif socket.gethostname().startswith("websites"):
Root = os.path.join(os.path.expanduser("~"), "websites") # on VM
else:
Root = "/nfs/www" # every other host
Text_Path = Root + "/netlib/na-digest"
HTML_Path = Root + "/netlib/na-digest-html"
def html_filename(out_path, b_fname):
return os.path.join(out_path, b_fname + ".html")
def check_tag(close_tag, outl):
if close_tag:
outl.append("%s>" % close_tag)
return ""
return close_tag
def make_single(tag, txt, suffix="", attributes={}):
atr = ""
if attributes:
for key in attributes:
atr += " "
atr += "%s=%s" % (key, attributes[key])
return "<%s%s>" % (tag, atr) + txt + "%s>" % tag + suffix
def generate_html(year2d, number):
"""
year2d - two-digit year
number - NA Digest issue number
"""
if 0:
txt_path = os.path.join(dst_path, "../na-digest")
out_path = os.path.join(dst_path, "%02d" % year2d)
base_name = "v%02dn%02d" % (year2d, number)
if year2d >= 17:
txt_full_path = os.path.join(Text_Path, "%02d" % year2d)
else:
txt_full_path = Text_Path
txt_path = os.path.join(txt_full_path, base_name)
out_path = os.path.join(os.path.join(HTML_Path, "%02d" % year2d), base_name + ".html")
outl = generate_html_list(txt_path)
fobj = open(out_path, "w")
for line in outl:
fobj.write(line)
fobj.write("\n")
fobj.close()
os.chmod(out_path, 0o644)
print("Wrote", out_path)
def empty():
l_fname = []
for fname in glob.glob(os.path.join(txt_path, "v%02d*" % year2d)):
b_fname = os.path.split(fname)[1]
if os.path.exists(html_filename(out_path, b_fname)):
continue
l_fname.append(fname)
def extract_pairs_from_year_path(glob_pttrn, year_path, to_remove=["v", ".html"]):
"""
year2d - two digit year
Returns a list of tuples: year,digest_number
"""
l = list()
for fname in glob.glob(os.path.join(year_path, glob_pttrn)):
fn = os.path.split(fname)[1] # file name without full path
for r in to_remove: # remove prefixes and suffixes
fn = fn.replace(r, "")
l.append(list(map(int, fn.split("n")))) # split on "n" and convert to int-only tuple for easy lookups
return l
def find_missing_html(year=None):
"""
year - the full or two digit year for which to find missing HTML
Returns a list of tuples: year,digest_number missing in HTML directory but present in TXT directory
"""
if year is None:
year = time.localtime()[0]
orig_year = year
if year > 99:
year %= 2000
if year > 99:
raise ValueError("Year out of range {}".format(org_year))
html_l = extract_pairs_from_year_path("v%02dn??.html" % year, os.path.join(HTML_Path, str(year)))
print(html_l)
l = list()
for yi in extract_pairs_from_year_path("v%02dn??" % year, os.path.join(Text_Path, str(year))):
if yi not in html_l:
l.append(yi)
return l
def split_link_punct(link_with_punct):
if link_with_punct[-1] == "," or link_with_punct[-1] == ".":
link_text, punct = link_with_punct[:-1], link_with_punct[-1]
else:
link_text, punct = link_with_punct, ""
return link_text, punct
def generate_html_list(fname):
outl = ["""""", ""]
title_found = False
subject_no = 1
mode = "outside"
prev_line = ""
fobj = open(fname)
print("Reading", fname)
while 1:
line = fobj.readline()
if not line:
break
line = line[:-1] # remove EOL
if prev_line: # merge with previous line if it exists
line = prev_line + line
if line and "=" == line[-1]: # must merge lines with = at end
prev_line = line[:-1]
continue
else:
prev_line = ""
for seq, repl in (
# Windows code page 1252
("\xc3\x82\xc2\xa3", "£"), # pound
("\xc3\x84\xe2\x80\xba", "ě"), # e with v on top
("\xc3\x85\xc2\xa1", "š"), # e with v on top
("\xc3\x85\xc2\xaf", "ů"), # u with o on top
("\xc3\x82\xc2\xae", "®"), # registered sign
("\xc3\x83\xe2\x80\xb0", "\xc3\x89"), # E with up accent
("\xc3\x83\xe2\x80\x93", "\xc3\x96"), # O umlaut
("\xc3\x83\xc2\xa3", "\xc3\xa3"), # a tilde
("\xc3\x83\xc2\xa4", "\xc3\xa4"), # a umlaut
("\xc3\x83\xc2\xa5", "å"), # a with o accent
("\xc3\x83\xc2\xa9", "\xc3\xa9"), # e with up accent
("\xc3\x83\xc2\xa8", "\xc3\xa8"), # e with down accent
("\xc3\x83\xc2\xab", "\xc3\xab"), # e umlaut
("\xc3\x83\xc2\xad", "\xc3\xad"), # e umlaut
("\xc3\x83\xc2\xb6", "\xc3\xb6"), # o umlaut
("\xc3\x83\xc2\xba", "\xc3\xba"), # u with up accent
("\xc3\x83\xc2\xbc", "\xc3\xbc"), # u umlaut
("\\337", "ß"), # sharfes s
):
line = line.replace(seq,repl)
if line.startswith("Subject: "):
if title_found:
mode = "item"
outl.append(make_single("b", """%s""" % (subject_no, line), "
"))
outl.append("
")
subject_no += 1
else:
mode = "title"
outl.append(make_single("title", line.replace("Subject: ", "")))
outl.append("")
title_found = True
elif line.startswith("NA Digest "):
mode = "na-digest"
outl.append(make_single("h2", line))
outl.append("")
elif line.startswith("Today's Topics:"):
mode = "topics"
subject_no = 1
outl.append(make_single("b", line))
outl.append("