URI: 
       tadd work-in-progress code, listing of goals/scope and TODO - webdump - [FORK] git://git.codemadness.org/webdump
  HTML git clone git://git.z3bra.org/webdump.git
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit 26361ccd0ab0f19276d7727b8f589b1109cfbfd1
   DIR parent d22cedcf1a4d6a4066489e029ee2888d76308318
  HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Wed, 12 Sep 2018 22:29:07 +0200
       
       add work-in-progress code, listing of goals/scope and TODO
       
       Diffstat:
         A LICENSE                             |      15 +++++++++++++++
         A README                              |      15 +++++++++++++++
         M TODO                                |       7 +++++++
         M main.c                              |      54 +++++++++++++++++++++++++------
       
       4 files changed, 81 insertions(+), 10 deletions(-)
       ---
   DIR diff --git a/LICENSE b/LICENSE
       t@@ -0,0 +1,15 @@
       +ISC License
       +
       +Copyright (c) 2017-2018 Hiltjo Posthuma <hiltjo@codemadness.org>
       +
       +Permission to use, copy, modify, and/or distribute this software for any
       +purpose with or without fee is hereby granted, provided that the above
       +copyright notice and this permission notice appear in all copies.
       +
       +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   DIR diff --git a/README b/README
       t@@ -0,0 +1,15 @@
       +Work in progress:
       +
       +
       +text-based webpage viewer
       +
       +
       +Goals:
       +
       +- The tool will render a webpage only to stdout, similarly like links -dump or
       +  lynx -dump.
       +- It will be usable and secure for rendering HTML mails.
       +- Without the user consent no remote resources or Javascript code will be executed.
       +- No filesystem access will be required.
       +
       +Will use OpenBSD pledge(2).
   DIR diff --git a/TODO b/TODO
       t@@ -1,3 +1,10 @@
       +- improve/remove duplicate white-space/newlines?
       +- cleanup code.
       +
       +===
       +
        - <code> should not be treated as a block (<pre> does?)
        
        ? xml.c: make sure to always call xmldata handler even if datalen == 0 ?
       +
       +- add links as reference, for example on page: http://absmagazin.de/2018 the MP3 urls.
   DIR diff --git a/main.c b/main.c
       t@@ -1,5 +1,5 @@
        /* TODO: escape control characters */
       -/* TODO: specify and parse relative url */
       +/* TODO: specify and parse relative url, allow to specify base and also parse <base href=""> ? */
        
        #include <ctype.h>
        #include <err.h>
       t@@ -16,6 +16,16 @@
        
        static XMLParser parser;
        
       +struct linkref {
       +        char *type;
       +        char *url;
       +        struct linkref *next;
       +};
       +
       +static struct linkref *links_head;
       +static struct linkref *links_cur;
       +static int linkcount;
       +
        struct node {
                char tag[256];
                int ispre;
       t@@ -173,14 +183,15 @@ xmldataend(XMLParser *p)
                } else {
        #if 1
                        for (; s < e; s++) {
       -                        if (isspace(*s)) {
       -                                if (s != start && !isspace(s[-1]))
       +                        if (isspace((unsigned char)*s)) {
       +                                if (s != start && !isspace((unsigned char)s[-1]))
                                                putchar(' ');
                                } else {
                                        putchar(*s);
                                }
                        }
       -                if (s != start && e != start && !isspace(s[-1]) && isspace(e[-1]))
       +                if (s != start && e != start && !isspace((unsigned char)s[-1]) &&
       +                    isspace((unsigned char)e[-1]))
                                putchar(' ');
        #endif
        /*                printf("DEBUG: |%s|\n", start);*/
       t@@ -254,12 +265,6 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
                        curnode--;
                cur = &nodes[curnode];
        
       -#if 0
       -        if (src[0])
       -                printf(" [%s]", src);
       -        src[0] = '\0';
       -#endif
       -
                if (!strcasecmp(t, "tr"))
                        fputs(" | ", stdout); /* HACK */
        
       t@@ -307,6 +312,23 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
        
                cur = &nodes[curnode];
        
       +#if 1
       +        if (src[0]) {
       +                printf(" [%d]", ++linkcount);
       +                if (!strcasecmp(t, "img") || !strcasecmp(t, "video") ||
       +                    !strcasecmp(t, "audio"))
       +                        printf("[%s]", t);
       +                /* TODO: check allocation */
       +                if (!links_head)
       +                        links_cur = links_head = calloc(1, sizeof(*links_head));
       +                else
       +                        links_cur = links_cur->next = calloc(1, sizeof(*links_head));
       +                links_cur->type = strdup(t);
       +                links_cur->url = strdup(src);
       +        }
       +        src[0] = '\0';
       +#endif
       +
                if (cur->isblock)
                        fputs("\n", stdout);
        
       t@@ -348,6 +370,16 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
                        strlcpy(src, value, sizeof(src));
        }
        
       +void
       +printlinkrefs(void)
       +{
       +        size_t i;
       +
       +        /* TODO: add title attribute or some basic description? */
       +        for (i = 1, links_cur = links_head; links_cur; links_cur = links_cur->next, i++)
       +                printf("[%zu] - %s (%s)\n", i, links_cur->url, links_cur->type);
       +}
       +
        int
        main(void)
        {
       t@@ -365,6 +397,8 @@ main(void)
        
                parser.getnext = getchar;
                xml_parse(&parser);
       +
       +        printlinkrefs();
                putchar('\n');
        
                return 0;