tsome improvements - webdump - [FORK] git://git.codemadness.org/webdump
HTML git clone git://git.z3bra.org/webdump.git
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
DIR commit b708236e10ae2b6af6e62514f2ca159fd6eeeabd
DIR parent 69314d208de2a232366a14a9c9fef7400e4e0647
HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sun, 15 Sep 2019 20:03:21 +0200
some improvements
- improve table rendering (in a hacky way for now).
- print some characters safe (disallow control-characters except TAB and
newline for now).
- print "Link references" before the links at the bottom.
- update TODO.
Diffstat:
M TODO | 2 ++
M main.c | 72 ++++++++++++++++++++++---------
2 files changed, 54 insertions(+), 20 deletions(-)
---
DIR diff --git a/TODO b/TODO
t@@ -1,3 +1,5 @@
+- print safe (not certain control chars, except newline, TAB etc).
+
- improve/remove duplicate white-space/newlines?
- cleanup code.
DIR diff --git a/main.c b/main.c
t@@ -47,6 +47,7 @@ static char src[4096]; /* src or href attribute */
#define MAX_DEPTH 256
static struct node nodes[MAX_DEPTH];
static int curnode;
+static int ignoredata;
static char *pretags[] = {
"pre",
t@@ -152,32 +153,46 @@ string_append(String *s, const char *data, size_t len)
s->data[s->len] = '\0';
}
-#if 0
-static void
-safeprint(const char *s, size_t len)
+char *
+xstrdup(const char *s)
{
- size_t i;
+ char *p;
+
+ if (!(p = strdup(s)))
+ err(1, "strdup");
+ return p;
+}
+
+void *
+xcalloc(size_t nmemb, size_t size)
+{
+ void *p;
+
+ if (!(p = calloc(nmemb, size)))
+ err(1, "calloc");
+ return p;
+}
- for (i = 0; i < len && *s; i++) {
+static void
+printsafe(const char *s)
+{
+ for (; *s; s++) {
switch (*s) {
case '\t':
case '\n':
putchar(*s);
break;
default:
- if (iscntrl(*s))
- putchar(' ');
- else
+ if (!iscntrl((unsigned char)*s))
putchar(*s);
}
}
}
-#endif
static void
xmlcdata(XMLParser *p, const char *data, size_t datalen)
{
- fputs(data, stdout);
+ printsafe(data);
}
static void
t@@ -215,7 +230,8 @@ xmldataend(XMLParser *p)
if (s != start && !isspace((unsigned char)s[-1]))
putchar(' ');
} else {
- putchar(*s);
+ if (!iscntrl((unsigned char)*s))
+ putchar(*s);
}
}
if (s != start && e != start && !isspace((unsigned char)s[-1]) &&
t@@ -231,6 +247,8 @@ xmldataend(XMLParser *p)
static void
xmldata(XMLParser *p, const char *data, size_t datalen)
{
+ if (ignoredata)
+ return;
string_append(&htmldata, data, datalen);
}
t@@ -267,6 +285,11 @@ xmltagstart(XMLParser *x, const char *t, size_t tl)
src[0] = '\0'; /* src, href */
strlcpy(cur->tag, t, sizeof(cur->tag));
+ if (!strcasecmp(t, "table"))
+ ignoredata = 1;
+ else if (!strcasecmp(t, "td") || !strcasecmp(t, "th"))
+ ignoredata = 0;
+
for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) {
if (!strcasecmp(pretags[i], t)) {
cur->ispre = 1;
t@@ -295,18 +318,25 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
cur = &nodes[curnode];
- if (!strcasecmp(t, "tr"))
- fputs(" | ", stdout); /* HACK */
+ if (!strcasecmp(t, "tr")) {
+ fputs(" | ", stdout); /* HACK: last cell */
+ return;
+ } else if (!strcasecmp(t, "td") || !strcasecmp(t, "th")) {
+ ignoredata = 1;
+ return;
+ } else if (!strcasecmp(t, "table")) {
+ ignoredata = 0;
+ }
if (cur->isblock)
fputs("\n", stdout);
if (tl == 2 && t[0] == 'h' && t[1] >= '1' && t[1] <= '6') {
if (t[1] >= '3')
- for (i = 0; i < 36; i++)
+ for (i = 0; i < 72; i++)
putchar('-');
else if (t[1] >= '1')
- for (i = 0; i < 36; i++)
+ for (i = 0; i < 72; i++)
putchar('=');
putchar('\n');
}
t@@ -345,11 +375,11 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
printf("[%s]", t);
/* TODO: check allocation */
if (!links_head)
- links_cur = links_head = calloc(1, sizeof(*links_head));
+ links_cur = links_head = xcalloc(1, sizeof(*links_head));
else
- links_cur = links_cur->next = calloc(1, sizeof(*links_head));
- links_cur->type = strdup(t);
- links_cur->url = strdup(src);
+ links_cur = links_cur->next = xcalloc(1, sizeof(*links_head));
+ links_cur->type = xstrdup(t);
+ links_cur->url = xstrdup(src);
}
src[0] = '\0';
#endif
t@@ -373,7 +403,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
support ordered number type only */
fputs("* ", stdout);
} else if (!strcasecmp(t, "hr")) {
- for (i = 0; i < 36; i++)
+ for (i = 0; i < 72; i++)
putchar('-');
}
}
t@@ -396,6 +426,8 @@ printlinkrefs(void)
{
size_t i;
+ printf("\n\nLink references:\n");
+
/* TODO: add title attribute or some basic description? */
for (i = 1, links_cur = links_head; links_cur; links_cur = links_cur->next, i++)
printf("[%zu] - %s (%s)\n", i, links_cur->url, links_cur->type);