URI: 
       tsimplify ignore tags parsing - webdump - [FORK] git://git.codemadness.org/webdump
  HTML git clone git://git.z3bra.org/webdump.git
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit 0a87ef4d7cdee5b3b0fc5b5430edd21eb9dba8d4
   DIR parent de7e902fa925618e4cfb24b044a18b5db2118b03
  HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat, 26 Aug 2017 12:49:21 +0200
       
       simplify ignore tags parsing
       
       Diffstat:
         M main.c                              |      94 +++++++++++++------------------
       
       1 file changed, 38 insertions(+), 56 deletions(-)
       ---
   DIR diff --git a/main.c b/main.c
       t@@ -19,7 +19,6 @@ static XMLParser parser;
        
        struct node {
                char tag[256];
       -        int isignore;
                int ispre;
                int isinline;
                int isblock;
       t@@ -40,12 +39,6 @@ static char src[4096]; /* src or href attribute */
        static struct node nodes[MAX_DEPTH];
        static int curnode;
        
       -/* ignored tag, all text between this is interpreted literally and ignored */
       -static char *ignoretags[] = {
       -        "style",
       -        "script",
       -};
       -
        static char *pretags[] = {
                "pre",
                "code",
       t@@ -200,12 +193,7 @@ xmldataend(XMLParser *p)
        static void
        xmldata(XMLParser *p, const char *data, size_t datalen)
        {
       -        struct node *cur;
       -
       -        cur = &nodes[curnode];
                string_append(&htmldata, data, datalen);
       -        if (cur->isignore)
       -                return;
        }
        
        static void
       t@@ -227,39 +215,63 @@ xmldataentity(XMLParser *p, const char *data, size_t datalen)
        }
        
        static void
       -xmltagstart(XMLParser *p, const char *tag, size_t taglen)
       +xmltagstart(XMLParser *x, const char *t, size_t tl)
        {
       -        struct node *cur = &nodes[curnode];
       -        int i;
       +        char tmp[64];
       +        struct node *cur;
       +        int c, i;
        
       +        cur = &nodes[curnode];
                memset(cur, 0, sizeof(*cur));
                src[0] = '\0'; /* src, href */
       -        strlcpy(cur->tag, tag, sizeof(cur->tag));
       +        strlcpy(cur->tag, t, sizeof(cur->tag));
        
       -        for (i = 0; i < sizeof(ignoretags) / sizeof(*ignoretags); i++) {
       -                if (!strcasecmp(ignoretags[i], tag)) {
       -                        cur->isignore = 1;
       -                        break;
       -                }
       -        }
                for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) {
       -                if (!strcasecmp(pretags[i], tag)) {
       +                if (!strcasecmp(pretags[i], t)) {
                                cur->ispre = 1;
                                break;
                        }
                }
                for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) {
       -                if (!strcasecmp(blocktags[i], tag)) {
       +                if (!strcasecmp(blocktags[i], t)) {
                                cur->isblock = 1;
                                break;
                        }
                }
                for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) {
       -                if (!strcasecmp(inlinetags[i], tag)) {
       +                if (!strcasecmp(inlinetags[i], t)) {
                                cur->isinline = 1;
                                break;
                        }
                }
       +
       +        /* HACK: ignored tag is parsed, hook into reader and read raw data
       +           until literal end tag (without using the normal parser).
       +           process (buffered) as xml[c]data (no entity) */
       +        if (strcasecmp(t, "script") && strcasecmp(t, "style"))
       +                return;
       +
       +startignore:
       +        while ((c = x->getnext()) != EOF) {
       +                if (c == '<')
       +                        break;
       +        }
       +        if (c == EOF)
       +                return;
       +        if ((c = x->getnext()) != '/')
       +                goto startignore;
       +        for (i = 0; (c = x->getnext()) != EOF; i++) {
       +                if (c == '>')
       +                        break;
       +                if (i + 1 >= sizeof(tmp))
       +                        goto startignore;
       +                tmp[i] = c;
       +        }
       +        tmp[i] = '\0';
       +
       +        /* compare against current ignored tag */
       +        if (strcasecmp(t, tmp))
       +                goto startignore;
        }
        
        static void
       t@@ -271,8 +283,6 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
                if (curnode)
                        curnode--;
                cur = &nodes[curnode];
       -        if (cur->isignore)
       -                return;
        
        #if 0
                if (src[0])
       t@@ -301,37 +311,9 @@ static void
        xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
        {
                struct node *cur;
       -        char tmp[64];
       -        int c, i;
       +        int i;
        
                cur = &nodes[curnode];
       -        if (cur->isignore) {
       -                /* HACK: ignored tag is parsed, hook into reader and read raw data
       -                   until literal end tag (without using the normal parser).
       -                   process (buffered) as xml[c]data (no entity) */
       -startignore:
       -                while ((c = p->getnext()) != EOF) {
       -                        if (c == '<')
       -                                break;
       -                }
       -                if (c == EOF)
       -                        return;
       -                if ((c = p->getnext()) != '/')
       -                        goto startignore;
       -                for (i = 0; (c = p->getnext()) != EOF; i++) {
       -                        if (c == '>')
       -                                break;
       -                        if (i + 1 >= sizeof(tmp))
       -                                goto startignore;
       -                        tmp[i] = c;
       -                }
       -                tmp[i] = '\0';
       -
       -                /* compare against current ignored tag */
       -                if (!strcasecmp(tag, tmp))
       -                        cur->isignore = 0;
       -                return;
       -        }
        
                if (cur->isblock)
                        fputs("\n", stdout);