tsimplify ignore tags parsing - webdump - [FORK] git://git.codemadness.org/webdump
HTML git clone git://git.z3bra.org/webdump.git
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
DIR commit 0a87ef4d7cdee5b3b0fc5b5430edd21eb9dba8d4
DIR parent de7e902fa925618e4cfb24b044a18b5db2118b03
HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 26 Aug 2017 12:49:21 +0200
simplify ignore tags parsing
Diffstat:
M main.c | 94 +++++++++++++------------------
1 file changed, 38 insertions(+), 56 deletions(-)
---
DIR diff --git a/main.c b/main.c
t@@ -19,7 +19,6 @@ static XMLParser parser;
struct node {
char tag[256];
- int isignore;
int ispre;
int isinline;
int isblock;
t@@ -40,12 +39,6 @@ static char src[4096]; /* src or href attribute */
static struct node nodes[MAX_DEPTH];
static int curnode;
-/* ignored tag, all text between this is interpreted literally and ignored */
-static char *ignoretags[] = {
- "style",
- "script",
-};
-
static char *pretags[] = {
"pre",
"code",
t@@ -200,12 +193,7 @@ xmldataend(XMLParser *p)
static void
xmldata(XMLParser *p, const char *data, size_t datalen)
{
- struct node *cur;
-
- cur = &nodes[curnode];
string_append(&htmldata, data, datalen);
- if (cur->isignore)
- return;
}
static void
t@@ -227,39 +215,63 @@ xmldataentity(XMLParser *p, const char *data, size_t datalen)
}
static void
-xmltagstart(XMLParser *p, const char *tag, size_t taglen)
+xmltagstart(XMLParser *x, const char *t, size_t tl)
{
- struct node *cur = &nodes[curnode];
- int i;
+ char tmp[64];
+ struct node *cur;
+ int c, i;
+ cur = &nodes[curnode];
memset(cur, 0, sizeof(*cur));
src[0] = '\0'; /* src, href */
- strlcpy(cur->tag, tag, sizeof(cur->tag));
+ strlcpy(cur->tag, t, sizeof(cur->tag));
- for (i = 0; i < sizeof(ignoretags) / sizeof(*ignoretags); i++) {
- if (!strcasecmp(ignoretags[i], tag)) {
- cur->isignore = 1;
- break;
- }
- }
for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) {
- if (!strcasecmp(pretags[i], tag)) {
+ if (!strcasecmp(pretags[i], t)) {
cur->ispre = 1;
break;
}
}
for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) {
- if (!strcasecmp(blocktags[i], tag)) {
+ if (!strcasecmp(blocktags[i], t)) {
cur->isblock = 1;
break;
}
}
for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) {
- if (!strcasecmp(inlinetags[i], tag)) {
+ if (!strcasecmp(inlinetags[i], t)) {
cur->isinline = 1;
break;
}
}
+
+ /* HACK: ignored tag is parsed, hook into reader and read raw data
+ until literal end tag (without using the normal parser).
+ process (buffered) as xml[c]data (no entity) */
+ if (strcasecmp(t, "script") && strcasecmp(t, "style"))
+ return;
+
+startignore:
+ while ((c = x->getnext()) != EOF) {
+ if (c == '<')
+ break;
+ }
+ if (c == EOF)
+ return;
+ if ((c = x->getnext()) != '/')
+ goto startignore;
+ for (i = 0; (c = x->getnext()) != EOF; i++) {
+ if (c == '>')
+ break;
+ if (i + 1 >= sizeof(tmp))
+ goto startignore;
+ tmp[i] = c;
+ }
+ tmp[i] = '\0';
+
+ /* compare against current ignored tag */
+ if (strcasecmp(t, tmp))
+ goto startignore;
}
static void
t@@ -271,8 +283,6 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
if (curnode)
curnode--;
cur = &nodes[curnode];
- if (cur->isignore)
- return;
#if 0
if (src[0])
t@@ -301,37 +311,9 @@ static void
xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
{
struct node *cur;
- char tmp[64];
- int c, i;
+ int i;
cur = &nodes[curnode];
- if (cur->isignore) {
- /* HACK: ignored tag is parsed, hook into reader and read raw data
- until literal end tag (without using the normal parser).
- process (buffered) as xml[c]data (no entity) */
-startignore:
- while ((c = p->getnext()) != EOF) {
- if (c == '<')
- break;
- }
- if (c == EOF)
- return;
- if ((c = p->getnext()) != '/')
- goto startignore;
- for (i = 0; (c = p->getnext()) != EOF; i++) {
- if (c == '>')
- break;
- if (i + 1 >= sizeof(tmp))
- goto startignore;
- tmp[i] = c;
- }
- tmp[i] = '\0';
-
- /* compare against current ignored tag */
- if (!strcasecmp(tag, tmp))
- cur->isignore = 0;
- return;
- }
if (cur->isblock)
fputs("\n", stdout);