timprove ignore tag handling, cleanup a bit - webdump - [FORK] git://git.codemadness.org/webdump
HTML git clone git://git.z3bra.org/webdump.git
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
DIR commit b0fd3fce528a98b283ee135d2a09da04191223c3
DIR parent 0a87ef4d7cdee5b3b0fc5b5430edd21eb9dba8d4
HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 26 Aug 2017 15:33:55 +0200
improve ignore tag handling, cleanup a bit
Diffstat:
M main.c | 71 ++++++++++++++-----------------
1 file changed, 32 insertions(+), 39 deletions(-)
---
DIR diff --git a/main.c b/main.c
t@@ -217,9 +217,8 @@ xmldataentity(XMLParser *p, const char *data, size_t datalen)
static void
xmltagstart(XMLParser *x, const char *t, size_t tl)
{
- char tmp[64];
struct node *cur;
- int c, i;
+ int i;
cur = &nodes[curnode];
memset(cur, 0, sizeof(*cur));
t@@ -244,38 +243,10 @@ xmltagstart(XMLParser *x, const char *t, size_t tl)
break;
}
}
-
- /* HACK: ignored tag is parsed, hook into reader and read raw data
- until literal end tag (without using the normal parser).
- process (buffered) as xml[c]data (no entity) */
- if (strcasecmp(t, "script") && strcasecmp(t, "style"))
- return;
-
-startignore:
- while ((c = x->getnext()) != EOF) {
- if (c == '<')
- break;
- }
- if (c == EOF)
- return;
- if ((c = x->getnext()) != '/')
- goto startignore;
- for (i = 0; (c = x->getnext()) != EOF; i++) {
- if (c == '>')
- break;
- if (i + 1 >= sizeof(tmp))
- goto startignore;
- tmp[i] = c;
- }
- tmp[i] = '\0';
-
- /* compare against current ignored tag */
- if (strcasecmp(t, tmp))
- goto startignore;
}
static void
-xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
+xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
{
struct node *cur;
int i;
t@@ -290,38 +261,60 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
src[0] = '\0';
#endif
- if (!strcasecmp(tag, "tr"))
+ if (!strcasecmp(t, "tr"))
fputs(" | ", stdout); /* HACK */
if (cur->isblock)
fputs("\n", stdout);
- if (taglen == 2 && tag[0] == 'h' && tag[1] >= '1' && tag[1] <= '6') {
- if (tag[1] >= '3')
+ if (tl == 2 && t[0] == 'h' && t[1] >= '1' && t[1] <= '6') {
+ if (t[1] >= '3')
for (i = 0; i < 36; i++)
putchar('-');
- else if (tag[1] >= '1')
+ else if (t[1] >= '1')
for (i = 0; i < 36; i++)
putchar('=');
putchar('\n');
}
}
+static char ignoretag[8];
+static XMLParser xo; /* old context */
+
static void
-xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
+xmlignoretagend(XMLParser *p, const char *t, size_t tl, int isshort)
+{
+ if (!strcasecmp(t, ignoretag))
+ memcpy(p, &xo, sizeof(*p)); /* restore context */
+}
+
+static void
+xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
{
struct node *cur;
int i;
+ /* temporary replace the callback except the reader and end of tag
+ restore the context once we receive the same ignored tag in the
+ end tag handler */
+ if (!strcasecmp(t, "script") || !strcasecmp(t, "style")) {
+ strlcpy(ignoretag, t, sizeof(ignoretag));
+ memcpy(&xo, p, sizeof(xo)); /* store old context */
+ memset(p, 0, sizeof(*p));
+ p->xmltagend = xmlignoretagend;
+ p->getnext = xo.getnext;
+ return;
+ }
+
cur = &nodes[curnode];
if (cur->isblock)
fputs("\n", stdout);
- if (!strcasecmp(tag, "td") || !strcasecmp(tag, "th"))
+ if (!strcasecmp(t, "td") || !strcasecmp(t, "th"))
fputs(" | ", stdout); /* HACK */
- if (!strcasecmp(cur->tag, "li")) {
+ if (!strcasecmp(t, "li")) {
/* indent nested list items */
for (i = curnode; i; i--) {
if (!strcasecmp(nodes[i].tag, "li"))
t@@ -333,7 +326,7 @@ xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
/* TODO: for <ol>, keep list counter on ol element (parent),
support ordered number type only */
fputs("* ", stdout);
- } else if (!strcasecmp(cur->tag, "hr")) {
+ } else if (!strcasecmp(t, "hr")) {
for (i = 0; i < 36; i++)
putchar('-');
}