URI: 
       tinitial patch to ignore <script> and <style> - webdump - [FORK] git://git.codemadness.org/webdump
  HTML git clone git://git.z3bra.org/webdump.git
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit ac91a742d386618a025609433e3e43b303272b3e
   DIR parent 54f38abd3722c07e900820343e7c5288c6b0fdce
  HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Thu, 24 Aug 2017 17:17:59 +0200
       
       initial patch to ignore <script> and <style>
       
       Diffstat:
         M main.c                              |      43 +++++++++++++++++--------------
       
       1 file changed, 23 insertions(+), 20 deletions(-)
       ---
   DIR diff --git a/main.c b/main.c
       t@@ -40,7 +40,7 @@ static char src[4096]; /* src or href attribute */
        static struct node nodes[MAX_DEPTH];
        static int curnode;
        
       -/* TODO: support literal text in script somehow? < > */
       +/* ignored tag, all text between this is interpreted literally and ignored */
        static char *ignoretags[] = {
                "style",
                "script",
       t@@ -232,14 +232,12 @@ xmltagstart(XMLParser *p, const char *tag, size_t taglen)
                src[0] = '\0'; /* src, href */
                strlcpy(cur->tag, tag, sizeof(cur->tag));
        
       -#if 1
                for (i = 0; i < sizeof(ignoretags) / sizeof(*ignoretags); i++) {
                        if (!strcasecmp(ignoretags[i], tag)) {
                                cur->isignore = 1;
                                break;
                        }
                }
       -#endif
                for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) {
                        if (!strcasecmp(pretags[i], tag)) {
                                cur->ispre = 1;
       t@@ -299,25 +297,36 @@ static void
        xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
        {
                struct node *cur;
       +        char tmp[64];
                int c, i;
        
                cur = &nodes[curnode];
                if (cur->isignore) {
       -#if 0
                        /* HACK: ignored tag is parsed, hook into reader and read raw data
       -                   until literal end tag (without using the normal parser). */
       -                   
       -                /* TODO: process (buffered) as xml[c]data (no entity) */
       -                while ((c = getchar()) != EOF) {
       -                        if (c == '<') {
       -                                /* TODO: check /endtag */
       +                   until literal end tag (without using the normal parser).
       +                   process (buffered) as xml[c]data (no entity) */
       +startignore:
       +                while ((c = p->getnext()) != EOF) {
       +                        if (c == '<')
                                        break;
       -                        }
                        }
       -                if (c == EOF) {
       +                if (c == EOF)
       +                        return;
       +                if ((c = p->getnext()) != '/')
       +                        goto startignore;
       +                i = 0;
       +                while ((c = p->getnext()) != EOF) {
       +                        if (c == '>')
       +                                break;
       +                        if (i + 1 >= sizeof(tmp))
       +                                goto startignore;
       +                        tmp[i++] = c;
                        }
       +                tmp[i] = '\0';
        
       -#endif
       +                /* compare against current ignored tag */
       +                if (!strcasecmp(cur->tag, tmp))
       +                        cur->isignore = 0;
                        return;
                }
        
       t@@ -363,12 +372,6 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
        }
        
        int
       -readchar(void)
       -{
       -        return getchar();
       -}
       -
       -int
        main(void)
        {
                if (pledge("stdio", NULL) < 0)
       t@@ -383,7 +386,7 @@ main(void)
                parser.xmltagend = xmltagend;
                parser.xmltagstartparsed = xmltagstartparsed;
        
       -        parser.getnext = readchar;
       +        parser.getnext = getchar;
                xml_parse(&parser);
                putchar('\n');