tinitial patch to ignore <script> and <style> - webdump - [FORK] git://git.codemadness.org/webdump
HTML git clone git://git.z3bra.org/webdump.git
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
DIR commit ac91a742d386618a025609433e3e43b303272b3e
DIR parent 54f38abd3722c07e900820343e7c5288c6b0fdce
HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Thu, 24 Aug 2017 17:17:59 +0200
initial patch to ignore <script> and <style>
Diffstat:
M main.c | 43 +++++++++++++++++--------------
1 file changed, 23 insertions(+), 20 deletions(-)
---
DIR diff --git a/main.c b/main.c
t@@ -40,7 +40,7 @@ static char src[4096]; /* src or href attribute */
static struct node nodes[MAX_DEPTH];
static int curnode;
-/* TODO: support literal text in script somehow? < > */
+/* ignored tag, all text between this is interpreted literally and ignored */
static char *ignoretags[] = {
"style",
"script",
t@@ -232,14 +232,12 @@ xmltagstart(XMLParser *p, const char *tag, size_t taglen)
src[0] = '\0'; /* src, href */
strlcpy(cur->tag, tag, sizeof(cur->tag));
-#if 1
for (i = 0; i < sizeof(ignoretags) / sizeof(*ignoretags); i++) {
if (!strcasecmp(ignoretags[i], tag)) {
cur->isignore = 1;
break;
}
}
-#endif
for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) {
if (!strcasecmp(pretags[i], tag)) {
cur->ispre = 1;
t@@ -299,25 +297,36 @@ static void
xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
{
struct node *cur;
+ char tmp[64];
int c, i;
cur = &nodes[curnode];
if (cur->isignore) {
-#if 0
/* HACK: ignored tag is parsed, hook into reader and read raw data
- until literal end tag (without using the normal parser). */
-
- /* TODO: process (buffered) as xml[c]data (no entity) */
- while ((c = getchar()) != EOF) {
- if (c == '<') {
- /* TODO: check /endtag */
+ until literal end tag (without using the normal parser).
+ process (buffered) as xml[c]data (no entity) */
+startignore:
+ while ((c = p->getnext()) != EOF) {
+ if (c == '<')
break;
- }
}
- if (c == EOF) {
+ if (c == EOF)
+ return;
+ if ((c = p->getnext()) != '/')
+ goto startignore;
+ i = 0;
+ while ((c = p->getnext()) != EOF) {
+ if (c == '>')
+ break;
+ if (i + 1 >= sizeof(tmp))
+ goto startignore;
+ tmp[i++] = c;
}
+ tmp[i] = '\0';
-#endif
+ /* compare against current ignored tag */
+ if (!strcasecmp(cur->tag, tmp))
+ cur->isignore = 0;
return;
}
t@@ -363,12 +372,6 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
}
int
-readchar(void)
-{
- return getchar();
-}
-
-int
main(void)
{
if (pledge("stdio", NULL) < 0)
t@@ -383,7 +386,7 @@ main(void)
parser.xmltagend = xmltagend;
parser.xmltagstartparsed = xmltagstartparsed;
- parser.getnext = readchar;
+ parser.getnext = getchar;
xml_parse(&parser);
putchar('\n');