URI: 
       twip - webdump - [FORK] git://git.codemadness.org/webdump
  HTML git clone git://git.z3bra.org/webdump.git
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit 114efd43e79a417abbda2e8c427d9dd57b482bce
   DIR parent ea14e82082be78917aaa6e380879c0e230330b47
  HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Mon, 24 Jul 2017 10:06:48 +0200
       
       wip
       
       Diffstat:
         A TODO                                |       1 +
         M main.c                              |     110 +++++++++++++++++++++++++++----
       
       2 files changed, 97 insertions(+), 14 deletions(-)
       ---
   DIR diff --git a/TODO b/TODO
       t@@ -0,0 +1 @@
       +? xml.c: make sure to always call xmldata handler even if datalen == 0 ?
   DIR diff --git a/main.c b/main.c
       t@@ -1,3 +1,6 @@
       +/* TODO: escape control characters */
       +/* TODO: specify and parse relative url */
       +
        #include <ctype.h>
        #include <err.h>
        #include <stdint.h>
       t@@ -76,9 +79,12 @@ static char *blocktags[] = {
                "title",
                "tr",
                "table",
       +        "code",
       +        "blockquote",
        };
        
        static String htmldata;
       +static String preprocess;
        
        /* Clear string only; don't free, prevents unnecessary reallocation. */
        static void
       t@@ -115,6 +121,28 @@ string_append(String *s, const char *data, size_t len)
                s->data[s->len] = '\0';
        }
        
       +#if 0
       +static void
       +safeprint(const char *s, size_t len)
       +{
       +        size_t i;
       +
       +        for (i = 0; i < len && *s; i++) {
       +                switch (*s) {
       +                case '\t':
       +                case '\n':
       +                        putchar(*s);
       +                        break;
       +                default:
       +                        if (iscntrl(*s))
       +                                putchar(' ');
       +                        else
       +                                putchar(*s);
       +                }
       +        }
       +}
       +#endif
       +
        static void
        xmlcdata(XMLParser *p, const char *data, size_t datalen)
        {
       t@@ -128,31 +156,41 @@ xmldataend(XMLParser *p)
                char *start, *s, *e;
        
                cur = &nodes[curnode];
       +        if (!htmldata.data || !htmldata.len)
       +                return;
        
                start = htmldata.data;
       -        for (s = start; *s; s++)
       +#if 1
       +        s = start;
       +        e = s + strlen(s);
       +#else
       +        for (s = start; *s; s++) {
                        if (*s != '\r' && *s != '\n')
                                break;
       +        }
        
       -        e = s + strlen(s);
       -        for (; e > s; e--)
       +        for (e = s + strlen(s); e > s; e--) {
                        if (*e != '\r' && *e != '\n')
                                break;
       +        }
       +#endif
        
                if (cur->ispre) {
                        fwrite(s, 1, e - s, stdout);
                } else {
       +#if 0
                        for (; s < e; s++) {
       -                        if (!isspace(*s))
       -                                break;
       -                }
       -                for (; s < e; s++) {
       -                        if (!isspace(*s)) {
       -                                if (s != start && isspace(s[-1]))
       +                        if (isspace(*s)) {
       +                                if (s != start && !isspace(s[-1]))
                                                putchar(' ');
       +                        } else {
                                        putchar(*s);
                                }
                        }
       +                if (s != start && e != start && !isspace(s[-1]) && isspace(e[-1]))
       +                        putchar(' ');
       +#endif
       +                printf("DEBUG: |%s|\n", start);
                }
        
                string_clear(&htmldata);
       t@@ -164,10 +202,9 @@ xmldata(XMLParser *p, const char *data, size_t datalen)
                struct node *cur;
        
                cur = &nodes[curnode];
       +        string_append(&htmldata, data, datalen);
                if (cur->isignore)
                        return;
       -
       -        string_append(&htmldata, data, datalen);
        }
        
        static void
       t@@ -239,6 +276,9 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
                src[0] = '\0';
        #endif
        
       +        if (!strcmp(tag, "tr"))
       +                fputs(" | ", stdout); /* HACK */
       +
                if (cur->isblock)
                        fputs("\n", stdout);
        
       t@@ -266,6 +306,9 @@ xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
                if (cur->isblock)
                        fputs("\n", stdout);
        
       +        if (!strcmp(tag, "td"))
       +                fputs(" | ", stdout); /* HACK */
       +
                if (!strcmp(cur->tag, "li")) {
                        /* indent nested list items */
                        for (i = curnode; i; i--) {
       t@@ -295,18 +338,57 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
                if (!strcmp(tag, "a") && !strcmp(name, "href") && valuelen)
                        strlcpy(src, value, sizeof(src));
        
       -        /* TODO: check alt and title attr also? */
       -        if (!strcmp(tag, "img") && !strcmp(name, "src") && valuelen)
       +        if ((!strcmp(tag, "img") || !strcmp(tag, "video") || !strcmp(tag, "audio")) &&
       +            !strcmp(name, "src") && valuelen)
                        strlcpy(src, value, sizeof(src));
        }
        
       +static size_t read_offset;
       +
       +int
       +readchar(void)
       +{
       +        size_t i, j;
       +        int c;
       +        
       +        for (; readoffset < preprocess.len; ) {
       +                if (preprocess.data[read_offset] != '<')
       +                        return preprocess.data[read_offset++];
       +                        
       +                for (j = 0; j < sizeof(ignoretags) / sizeof(*ignoretags); j++) {
       +                        if (!strncmp(&preprocess.data[i + 1], ignoretags[i], sizeof(ignoretags[i]) - 1)) {
       +                                if (strchr(" \t>", preprocess.data[i + 1 + sizeof(ignoretags[i]) - 1])) {
       +                                        /* TODO: search until end of this tag */
       +                                }
       +                        }
       +                }
       +                /* TODO: if no match just return char */
       +                return preprocess.data[read_offset++];
       +        }
       +        return EOF;
       +}
       +
        /* TODO: preprocess data, strip <script>, <style> etc */
        int
        main(void)
        {
       +
       +        char buf[BUFSIZ];
       +        int n;
       +
                if (pledge("stdio", NULL) < 0)
                        err(1, "pledge");
        
       +        /* TODO: optimize later */
       +        while (1) {
       +                /* TODO: check read error */
       +                n = read(0, buf, sizeof(buf) - 1);
       +                if (n <= 0)
       +                        break;
       +                buf[n] = '\0';
       +                string_append(&preprocess, buf, n);
       +        }
       +        
                parser.xmlattr = xmlattr;
                parser.xmlcdata = xmlcdata;
                parser.xmldata = xmldata;
       t@@ -316,7 +398,7 @@ main(void)
                parser.xmltagend = xmltagend;
                parser.xmltagstartparsed = xmltagstartparsed;
        
       -        parser.getnext = getchar;
       +        parser.getnext = readchar;
                xml_parse(&parser);
                putchar('\n');