#! /usr/bin/env python

from __future__ import print_function

import glob
import os
import socket
import sys
import time

#FIXME: join lines that end with = and were split due to excessive length

if socket.gethostname().startswith("ash2"):
    Root = "/sw/www"  # only on ash2
elif socket.gethostname().startswith("websites"):
    Root = os.path.join(os.path.expanduser("~"), "websites")  # on VM
else:
    Root = "/nfs/www"  # every other host

Text_Path = Root + "/netlib/na-digest"
HTML_Path = Root + "/netlib/na-digest-html"

def html_filename(out_path, b_fname):
  return os.path.join(out_path, b_fname + ".html")

def check_tag(close_tag, outl):
  if close_tag:
    outl.append("</%s>" % close_tag)
    return ""
  return close_tag

def make_single(tag, txt, suffix="", attributes={}):
  atr = ""
  if attributes:
    for key in attributes:
      atr += " "
      atr += "%s=%s" % (key, attributes[key])

  return "<%s%s>" % (tag, atr) + txt + "</%s>" % tag + suffix

def generate_html(year2d, number):
  """
  year2d - two-digit year
  number - NA Digest issue number
  """
  if 0:
    txt_path = os.path.join(dst_path, "../na-digest")
    out_path = os.path.join(dst_path, "%02d" % year2d)

  base_name = "v%02dn%02d" % (year2d, number)
  if year2d >= 17:
    txt_full_path = os.path.join(Text_Path, "%02d" % year2d)
  else:
    txt_full_path = Text_Path
  txt_path = os.path.join(txt_full_path, base_name)
  out_path = os.path.join(os.path.join(HTML_Path, "%02d" % year2d), base_name + ".html")

  outl = generate_html_list(txt_path)

  fobj = open(out_path, "w")
  for line in outl:
    fobj.write(line)
    fobj.write("\n")
  fobj.close()
  os.chmod(out_path, 0o644)

  print("Wrote", out_path)


def empty():
  l_fname = []
  for fname in glob.glob(os.path.join(txt_path, "v%02d*" % year2d)):
    b_fname = os.path.split(fname)[1]
    if os.path.exists(html_filename(out_path, b_fname)):
      continue
    l_fname.append(fname)

def extract_pairs_from_year_path(glob_pttrn, year_path, to_remove=["v", ".html"]):
  """
  year2d - two digit year

  Returns a list of tuples: year,digest_number
  """
  l = list()
  for fname in glob.glob(os.path.join(year_path, glob_pttrn)):
    fn = os.path.split(fname)[1]  # file name without full path
    for r in to_remove:  # remove prefixes and suffixes
        fn = fn.replace(r, "")
    l.append(list(map(int, fn.split("n"))))  # split on "n" and convert to int-only tuple for easy lookups

  return l

def find_missing_html(year=None):
  """
  year - the full or two digit year for which to find missing HTML

  Returns a list of tuples: year,digest_number missing in HTML directory but present in TXT directory
  """
  if year is None:
      year = time.localtime()[0]
  orig_year = year
  if year > 99:
      year %= 2000
  if year > 99:
      raise ValueError("Year out of range {}".format(org_year))

  html_l = extract_pairs_from_year_path("v%02dn??.html" % year, os.path.join(HTML_Path, str(year)))

  print(html_l)

  l = list()
  for yi in extract_pairs_from_year_path("v%02dn??" % year, os.path.join(Text_Path, str(year))):
    if yi not in html_l:
      l.append(yi)

  return l

def split_link_punct(link_with_punct):
  if link_with_punct[-1] == "," or link_with_punct[-1] == ".":
    link_text, punct = link_with_punct[:-1], link_with_punct[-1]
  else:
    link_text, punct = link_with_punct, ""
  return link_text, punct


def generate_html_list(fname):
  outl = ["""<meta charset="UTF-8"/>""", "<html>"]

  title_found = False
  subject_no = 1

  mode = "outside"
  prev_line = ""

  fobj = open(fname)
  print("Reading", fname)
  while 1:
    line = fobj.readline()
    if not line:
      break

    line = line[:-1] # remove EOL

    if prev_line: # merge with previous line if it exists
        line = prev_line + line

    if line and "=" == line[-1]: # must merge lines with = at end
        prev_line = line[:-1]
        continue
    else:
        prev_line = ""

    for seq, repl in (
      # Windows code page 1252
      
      ("\xc3\x82\xc2\xa3", "&pound;"), # pound
      ("\xc3\x84\xe2\x80\xba", "&ecaron;"), # e with v on top
      ("\xc3\x85\xc2\xa1", "&scaron;"), # e with v on top
      ("\xc3\x85\xc2\xaf", "&uring;"), # u with o on top
      ("\xc3\x82\xc2\xae", "&reg;"), # registered sign
      ("\xc3\x83\xe2\x80\xb0", "\xc3\x89"), # E with up accent
      ("\xc3\x83\xe2\x80\x93", "\xc3\x96"), # O umlaut
      ("\xc3\x83\xc2\xa3", "\xc3\xa3"), # a tilde
      ("\xc3\x83\xc2\xa4", "\xc3\xa4"), # a umlaut
      ("\xc3\x83\xc2\xa5", "&aring;"), # a with o accent
      ("\xc3\x83\xc2\xa9", "\xc3\xa9"), # e with up accent
      ("\xc3\x83\xc2\xa8", "\xc3\xa8"), # e with down accent
      ("\xc3\x83\xc2\xab", "\xc3\xab"), # e umlaut
      ("\xc3\x83\xc2\xad", "\xc3\xad"), # e umlaut
      ("\xc3\x83\xc2\xb6", "\xc3\xb6"), # o umlaut
      ("\xc3\x83\xc2\xba", "\xc3\xba"), # u with up accent
      ("\xc3\x83\xc2\xbc", "\xc3\xbc"), # u umlaut
      ("\\337", "&szlig;"), # sharfes s
      ):
      line = line.replace(seq,repl)

    if line.startswith("Subject: "):
      if title_found:
        mode = "item"
        outl.append(make_single("b", """<a name="%d">%s</a>""" % (subject_no, line), "<br/>"))
        outl.append("<br/>")
        subject_no += 1
      else:
        mode = "title"
        outl.append(make_single("title", line.replace("Subject: ", "")))
        outl.append("")
        title_found = True

    elif line.startswith("NA Digest "):
      mode = "na-digest"
      outl.append(make_single("h2", line))
      outl.append("")

    elif line.startswith("Today's Topics:"):
      mode = "topics"
      subject_no = 1
      outl.append(make_single("b", line))
      outl.append("<ul>")
      outl.append("")
      close_tag = "ul"

    elif line.startswith("Subscribe"):
      mode = "subscribe"
      subject_no = 1
      close_tag = check_tag(close_tag, outl)
      outl.append(line)

    elif line.strip().startswith("http"):
      fields = line.strip().split()
      link_text, punct = split_link_punct(fields[0])
      outl.append(make_single("a", link_text, attributes={"href" : '"' + link_text + '"'}) + punct + " " + " ".join(fields[1:]))
      outl.append("<br/>")

    elif line.strip().startswith("www."):
      fields = line.strip().split()
      link_text, punct = split_link_punct(fields[0])
      outl.append(make_single("a", link_text, attributes={"href" : '"http://' + link_text + '"'}) + " " + " ".join(fields[1:]))
      outl.append("<br/>")

    elif line.startswith("Submissions for NA"):
      mode = "submissions"
      subject_no = 1
      close_tag = check_tag(close_tag, outl)
      outl.append(make_single("h3", line))

    elif line.startswith("-----"):
      outl.append("")
      outl.append("<hr/>")
      outl.append("")

    elif mode == "topics":
      ls = line.strip()
      if len(ls) > 1:
        outl.append(make_single("li", """<a href="#%d">%s</a>""" % (subject_no, ls)))
        subject_no += 1
      else:
        outl.append("")

    else:
      # empty lines will become empty lines in HTML
      newflds = list()
      for fld in line.split():
        outfld = fld
        if fld.startswith("http"):
          link_text, punct = split_link_punct(fld)
          outfld = make_single("a", link_text, attributes={"href" : '"' + link_text + '"'}) + punct
        elif fld.startswith("www."):
          link_text, punct = split_link_punct(fld)
          outfld = make_single("a", link_text, attributes={"href" : '"http://' + link_text + '"'}) + punct
        newflds.append(outfld)
      outl.append(" ".join(newflds) + "<br/>")

  fobj.close()

  outl.append("</html>")

  return outl

def help(progname, error_msg):
  print(progname, "<two_digit_year>", "<twodigit_number>")
  print()
  print("Generate NA Digest HTML for a given year.")
  print("Error:", error_msg)

def main(argv):
  if len(argv) == 1:  # no command lie arguments
    for yi in find_missing_html():
      generate_html(yi[0], yi[1])
    return 0

  if len(argv) != 3:
    help(argv[0], "wrong number of arguments.")
    return 127

  try:
    year2d = int(argv[1])
  except ValueError:
    help(argv[0], "digest year argument is not an integer")
    return 126

  if year2d < 16:
    help(argv[0], "digest year value out of rage (must be less than 100)")
    return 125

  try:
    number = int(argv[2])
  except ValueError:
    help(argv[0], "digest number argument is not an integer")
    return 124

  if number < 1 or number > 56:
    help(argv[0], "digest number value out of rage (must be less than 56)")
    return 123

  #gen_html(os.path.split(os.path.abspath(argv[0]))[0], year2d)
  generate_html(year2d, number)

  return 0

if "__main__" == __name__:
  sys.exit(main(sys.argv))

.