feed.c - frontends - front-ends for some sites (experiment)
HTML git clone git://git.codemadness.org/frontends
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
feed.c (31612B)
---
1 #include <err.h>
2 #include <errno.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <strings.h>
8 #include <time.h>
9 #include <unistd.h>
10
11 #include "https.h"
12 #include "util.h"
13 #include "youtube.h"
14 #include "xml.h"
15
16 #define ISINCONTENT(ctx) ((ctx).iscontent && !((ctx).iscontenttag))
17 #define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
18
19 /* string and byte-length */
20 #define STRP(s) s,sizeof(s)-1
21
22 #define YT_SHORTS_TITLE " [Short]"
23
24 enum FeedType {
25 FeedTypeNone = 0,
26 FeedTypeAtom = 2
27 };
28
29 /* String data / memory pool */
30 typedef struct string {
31 char *data; /* data */
32 size_t len; /* string length */
33 size_t bufsiz; /* allocated size */
34 } String;
35
36 /* NOTE: the order of these fields (content, date, author) indicate the
37 * priority to use them, from least important to high. */
38 enum TagId {
39 TagUnknown = 0,
40 /* Atom */
41 /* creation date has higher priority */
42 AtomTagPublished,
43 AtomTagTitle,
44 AtomTagMediaDescription,
45 AtomTagId,
46 AtomTagLink,
47 AtomTagLinkAlternate,
48 AtomTagAuthor, AtomTagAuthorName,
49 TagYoutubeVideoId,
50 TagLast
51 };
52
53 typedef struct feedtag {
54 char *name; /* name of tag to match */
55 size_t len; /* len of `name` */
56 enum TagId id; /* unique ID */
57 } FeedTag;
58
59 typedef struct field {
60 String str;
61 enum TagId tagid; /* tagid set previously, used for tag priority */
62 } FeedField;
63
64 enum {
65 /* sfeed fields */
66 FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
67 FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
68 FeedFieldYoutubeId, /* yt:videoId */
69 FeedFieldLast
70 };
71
72 typedef struct feedcontext {
73 String *field; /* current FeedItem field String */
74 FeedField fields[FeedFieldLast]; /* data for current item */
75 FeedTag tag; /* unique current parsed tag */
76 int iscontent; /* in content data */
77 int iscontenttag; /* in content tag */
78 enum FeedType feedtype;
79 } FeedContext;
80
81 static long long datetounix(long long, int, int, int, int, int);
82 static FeedTag * gettag(enum FeedType, const char *, size_t);
83 static long gettzoffset(const char *);
84 static int isattr(const char *, size_t, const char *, size_t);
85 static int istag(const char *, size_t, const char *, size_t);
86 static int parsetime(const char *, long long *);
87
88 static void atom_header(void);
89 static void atom_item(void);
90 static void atom_footer(void);
91 static void gph_header(void);
92 static void gph_footer(void);
93 static void html_header(void);
94 static void html_footer(void);
95 static void json_header(void);
96 static void json_item(void);
97 static void json_footer(void);
98 static void sfeed_item(void); /* TSV / sfeed */
99 static void twtxt_item(void);
100
101 static void string_append(String *, const char *, size_t);
102 static void string_buffer_realloc(String *, size_t);
103 static void string_clear(String *);
104 static void string_print_encoded(String *);
105 static void string_print_timestamp(String *);
106 static void string_print(String *);
107 static void xmlattr(XMLParser *, const char *, size_t, const char *, size_t,
108 const char *, size_t);
109 static void xmlattrentity(XMLParser *, const char *, size_t, const char *,
110 size_t, const char *, size_t);
111 static void xmlattrstart(XMLParser *, const char *, size_t, const char *,
112 size_t);
113 static void xmldata(XMLParser *, const char *, size_t);
114 static void xmldataentity(XMLParser *, const char *, size_t);
115 static void xmltagend(XMLParser *, const char *, size_t, int);
116 static void xmltagstart(XMLParser *, const char *, size_t);
117 static void xmltagstartparsed(XMLParser *, const char *, size_t, int);
118
119 /* Atom, must be alphabetical order */
120 static const FeedTag atomtags[] = {
121 { STRP("author"), AtomTagAuthor },
122 { STRP("id"), AtomTagId },
123 /* Atom: <link href="" />, RSS has <link></link> */
124 { STRP("link"), AtomTagLink },
125 { STRP("media:description"), AtomTagMediaDescription },
126 { STRP("published"), AtomTagPublished },
127 { STRP("title"), AtomTagTitle },
128 { STRP("yt:videoId"), TagYoutubeVideoId }
129 };
130
131 /* special case: nested <author><name> */
132 static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
133 static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName };
134
135 /* reference to no / unknown tag */
136 static const FeedTag notag = { STRP(""), TagUnknown };
137
138 /* map TagId type to RSS/Atom field, all tags must be defined */
139 static const int fieldmap[TagLast] = {
140 [TagUnknown] = -1,
141 /* Atom */
142 [AtomTagPublished] = FeedFieldTime,
143 [AtomTagTitle] = FeedFieldTitle,
144 [AtomTagMediaDescription] = FeedFieldContent,
145 [AtomTagId] = FeedFieldId,
146 [AtomTagLink] = -1,
147 [AtomTagLinkAlternate] = FeedFieldLink,
148 [AtomTagAuthor] = -1,
149 [AtomTagAuthorName] = FeedFieldAuthor,
150 [TagYoutubeVideoId] = FeedFieldYoutubeId
151 };
152
153 static const int FieldSeparator = '\t';
154
155 static FeedContext ctx;
156 static XMLParser parser; /* XML parser state */
157 static String attrrel, tmpstr;
158
159 static struct search_response *search_res = NULL;
160 static void (*printfields)(void) = sfeed_item; /* default: sfeed(5) format */
161 static int cgimode = 0, godmode = 0;
162 /* only show items found/matched on the channel with the feed. */
163 static int showfound = 0;
164 /* show shorts ("/shorts/" in the URL) or not. */
165 static int showshorts = 0;
166 static const char *server_name = "127.0.0.1", *server_port = "70";
167
168 static int
169 tagcmp(const void *v1, const void *v2)
170 {
171 return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name);
172 }
173
174 /* Unique tagid for parsed tag name. */
175 static FeedTag *
176 gettag(enum FeedType feedtype, const char *name, size_t namelen)
177 {
178 FeedTag f, *r = NULL;
179
180 f.name = (char *)name;
181
182 switch (feedtype) {
183 case FeedTypeAtom:
184 r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0]),
185 sizeof(atomtags[0]), tagcmp);
186 break;
187 default:
188 break;
189 }
190
191 return r;
192 }
193
194 /* Clear string only; don't free, prevents unnecessary reallocation. */
195 static void
196 string_clear(String *s)
197 {
198 if (s->data)
199 s->data[0] = '\0';
200 s->len = 0;
201 }
202
203 static void
204 string_buffer_realloc(String *s, size_t newlen)
205 {
206 size_t alloclen;
207
208 if (newlen > SIZE_MAX / 2) {
209 alloclen = SIZE_MAX;
210 } else {
211 for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
212 ;
213 }
214 if (!(s->data = realloc(s->data, alloclen)))
215 err(1, "realloc");
216 s->bufsiz = alloclen;
217 }
218
219 /* Append data to String, s->data and data may not overlap. */
220 static void
221 string_append(String *s, const char *data, size_t len)
222 {
223 if (!len)
224 return;
225
226 if (s->len >= SIZE_MAX - len) {
227 errno = ENOMEM;
228 err(1, "realloc");
229 }
230
231 /* check if allocation is necessary, never shrink the buffer. */
232 if (s->len + len >= s->bufsiz)
233 string_buffer_realloc(s, s->len + len + 1);
234 memcpy(s->data + s->len, data, len);
235 s->len += len;
236 s->data[s->len] = '\0';
237 }
238
239 /* Print text, encode TABs, newlines and '\', remove other whitespace.
240 * Remove leading and trailing whitespace. */
241 static void
242 string_print_encoded(String *s)
243 {
244 const char *p, *e;
245
246 if (!s->data || !s->len)
247 return;
248
249 p = s->data;
250 e = p + strlen(p);
251
252 for (; *p && p != e; p++) {
253 switch (*p) {
254 case '\n': putchar('\\'); putchar('n'); break;
255 case '\\': putchar('\\'); putchar('\\'); break;
256 case '\t': putchar('\\'); putchar('t'); break;
257 default:
258 /* ignore control chars */
259 if (!ISCNTRL((unsigned char)*p))
260 putchar(*p);
261 break;
262 }
263 }
264 }
265
266 /* Print text, replace TABs, carriage return and other whitespace with ' '.
267 * Other control chars are removed. Remove leading and trailing whitespace. */
268 static void
269 string_print(String *s)
270 {
271 char *p, *e;
272
273 if (!s->data || !s->len)
274 return;
275
276 p = s->data;
277 e = p + s->len;
278 for (; *p && p != e; p++) {
279 if (ISSPACE((unsigned char)*p))
280 putchar(' '); /* any whitespace to space */
281 else if (!ISCNTRL((unsigned char)*p))
282 /* ignore other control chars */
283 putchar(*p);
284 }
285 }
286
287 /* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
288 static void
289 string_print_timestamp(String *s)
290 {
291 long long t;
292
293 if (!s->data || !s->len)
294 return;
295
296 if (parsetime(s->data, &t) != -1)
297 printf("%lld", t);
298 }
299
300 /* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp.
301 Parameters should be passed as they are in a struct tm and in a valid range:
302 that is: year = year - 1900, month = month - 1. */
303 static long long
304 datetounix(long long year, int mon, int day, int hour, int min, int sec)
305 {
306 /* seconds in a month in a regular (non-leap) year */
307 static const long secs_through_month[] = {
308 0, 31 * 86400, 59 * 86400, 90 * 86400,
309 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
310 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
311 int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
312 long long t;
313
314 /* optimization: handle common range year 1902 up to and including 2038 */
315 if (year - 2ULL <= 136) {
316 /* amount of leap days relative to 1970: every 4 years */
317 leaps = (year / 4) - 17; /* 17 leap years offset for 1902 - 1970 */
318 if (!(year & 3)) {
319 leaps--;
320 is_leap = 1;
321 } else {
322 is_leap = 0;
323 }
324 t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 = 31536000 */
325 } else {
326 /* general leap year calculation:
327 leap years occur mostly every 4 years but every 100 years
328 a leap year is skipped unless the year is divisible by 400 */
329 cycles = (year - 100) / 400;
330 rem = (year - 100) % 400;
331 if (rem < 0) {
332 cycles--;
333 rem += 400;
334 }
335 if (!rem) {
336 is_leap = 1;
337 } else {
338 if (rem >= 300)
339 centuries = 3, rem -= 300;
340 else if (rem >= 200)
341 centuries = 2, rem -= 200;
342 else if (rem >= 100)
343 centuries = 1, rem -= 100;
344 if (rem) {
345 leaps = rem / 4U;
346 rem %= 4U;
347 is_leap = !rem;
348 }
349 }
350 leaps += (97 * cycles) + (24 * centuries) - is_leap;
351
352 /* adjust 8 leap days from 1970 up to and including 2000:
353 ((30 * 365) + 8) * 86400 = 946771200 */
354 t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL;
355 }
356 t += secs_through_month[mon];
357 if (is_leap && mon >= 2)
358 t += 86400;
359 t += 86400LL * (day - 1);
360 t += 3600LL * hour;
361 t += 60LL * min;
362 t += sec;
363
364 return t;
365 }
366
367 /* Get timezone from string, return time offset in seconds from UTC. */
368 static long
369 gettzoffset(const char *s)
370 {
371 const char *p;
372 long tzhour = 0, tzmin = 0;
373 size_t i;
374
375 switch (*s) {
376 case '-': /* offset */
377 case '+':
378 for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
379 tzhour = (tzhour * 10) + (*p - '0');
380 if (*p == ':')
381 p++;
382 for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
383 tzmin = (tzmin * 10) + (*p - '0');
384 return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1);
385 default: /* timezone name */
386 break;
387 }
388 return 0;
389 }
390
391 /* Parse time string `s` into the UNIX timestamp `tp`.
392 Returns 0 on success or -1 on failure. */
393 static int
394 parsetime(const char *s, long long *tp)
395 {
396 int va[6] = { 0 }, i, v, vi;
397
398 /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */
399 if (!ISDIGIT((unsigned char)s[0]) ||
400 !ISDIGIT((unsigned char)s[1]) ||
401 !ISDIGIT((unsigned char)s[2]) ||
402 !ISDIGIT((unsigned char)s[3]))
403 return -1;
404
405 /* parse time parts (and possibly remaining date parts) */
406 for (vi = 0; *s && vi < 6; vi++) {
407 for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
408 ISDIGIT((unsigned char)*s); s++, i++) {
409 v = (v * 10) + (*s - '0');
410 }
411 va[vi] = v;
412
413 if ((vi < 2 && *s == '-') ||
414 (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
415 (vi > 2 && *s == ':'))
416 s++;
417 }
418
419 /* invalid range */
420 if (va[0] < 0 || va[0] > 9999 ||
421 va[1] < 1 || va[1] > 12 ||
422 va[2] < 1 || va[2] > 31 ||
423 va[3] < 0 || va[3] > 23 ||
424 va[4] < 0 || va[4] > 59 ||
425 va[5] < 0 || va[5] > 60) /* allow leap second */
426 return -1;
427
428 *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
429 gettzoffset(s);
430
431 return 0;
432 }
433
434 static void
435 atom_header(void)
436 {
437 fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
438 "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"
439 "\t<title>Newsfeed</title>\n", stdout);
440 }
441
442 static void
443 atom_footer(void)
444 {
445 fputs("</feed>\n", stdout);
446 }
447
448 static int
449 iscurrentitemshort(void)
450 {
451 return ctx.fields[FeedFieldLink].str.len &&
452 strstr(ctx.fields[FeedFieldLink].str.data, "/shorts/");
453 }
454
455 static int
456 iscurrentitemallowed(struct item *found)
457 {
458 /* Only print the video if it was found in the feed aswell. */
459 if (showfound && !found)
460 return 0;
461
462 /* Show shorts or not. */
463 if (!showshorts && iscurrentitemshort())
464 return 0;
465
466 return 1;
467 }
468
469 static void
470 atom_item(void)
471 {
472 struct item *v, *found = NULL;
473 size_t i;
474
475 /* must have a video id */
476 if (!ctx.fields[FeedFieldYoutubeId].str.len)
477 return;
478
479 for (i = 0; i < search_res->nitems; i++) {
480 v = &(search_res->items[i]);
481 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
482 found = v;
483 }
484
485 if (!iscurrentitemallowed(found))
486 return;
487
488 fputs("<entry>\n\t<title>", stdout);
489 if (found && found->membersonly)
490 xmlencode(MEMBERS_ONLY);
491 xmlencode(ctx.fields[FeedFieldTitle].str.data);
492
493 if (iscurrentitemshort())
494 fputs(YT_SHORTS_TITLE, stdout);
495 if (found && found->duration[0]) {
496 fputs(" [", stdout);
497 xmlencode(found->duration);
498 fputs("]", stdout);
499 }
500 fputs("</title>\n", stdout);
501 if (ctx.fields[FeedFieldLink].str.len) {
502 fputs("\t<link rel=\"alternate\" href=\"", stdout);
503 xmlencode(ctx.fields[FeedFieldLink].str.data);
504 fputs("\" />\n", stdout);
505 }
506 /* prefer link over id for Atom <id>. */
507 fputs("\t<id>", stdout);
508 if (ctx.fields[FeedFieldLink].str.len)
509 xmlencode(ctx.fields[FeedFieldLink].str.data);
510 else if (ctx.fields[FeedFieldId].str.len)
511 xmlencode(ctx.fields[FeedFieldId].str.data);
512 fputs("</id>\n", stdout);
513
514 /* just print the original timestamp, it should conform */
515 fputs("\t<updated>", stdout);
516 string_print(&ctx.fields[FeedFieldTime].str);
517 fputs("</updated>\n", stdout);
518
519 if (ctx.fields[FeedFieldAuthor].str.len) {
520 fputs("\t<author><name>", stdout);
521 xmlencode(ctx.fields[FeedFieldAuthor].str.data);
522 fputs("</name></author>\n", stdout);
523 }
524 if (ctx.fields[FeedFieldContent].str.len) {
525 fputs("\t<content>", stdout);
526 xmlencode(ctx.fields[FeedFieldContent].str.data);
527 fputs("</content>\n", stdout);
528 }
529 fputs("</entry>\n", stdout);
530 }
531
532
533 static void
534 html_header(void)
535 {
536 fputs("<!DOCTYPE HTML>\n"
537 "<html>\n"
538 "<head>\n"
539 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
540 "</head>\n"
541 "<body><pre>\n", stdout);
542 }
543
544 static void
545 html_footer(void)
546 {
547 fputs("</pre></body>\n</html>\n", stdout);
548 }
549
550 static void
551 html_item(void)
552 {
553 struct item *v, *found = NULL;
554 size_t i;
555
556 /* must have a video id */
557 if (!ctx.fields[FeedFieldYoutubeId].str.len)
558 return;
559
560 for (i = 0; i < search_res->nitems; i++) {
561 v = &(search_res->items[i]);
562 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
563 found = v;
564 }
565
566 if (!iscurrentitemallowed(found))
567 return;
568
569 /* just print the original timestamp, it should conform */
570 xmlencode(ctx.fields[FeedFieldTime].str.data);
571 fputs(" ", stdout);
572
573 if (ctx.fields[FeedFieldLink].str.len) {
574 fputs("<a href=\"", stdout);
575 xmlencode(ctx.fields[FeedFieldLink].str.data);
576 fputs("\">", stdout);
577 }
578
579 if (found && found->membersonly)
580 xmlencode(MEMBERS_ONLY);
581 xmlencode(ctx.fields[FeedFieldTitle].str.data);
582 if (iscurrentitemshort())
583 fputs(YT_SHORTS_TITLE, stdout);
584 if (found && found->duration[0]) {
585 fputs(" [", stdout);
586 xmlencode(found->duration);
587 fputs("]", stdout);
588 }
589 if (ctx.fields[FeedFieldLink].str.len) {
590 fputs("</a>", stdout);
591 }
592 fputs("\n", stdout);
593 }
594
595 static void
596 gphencode(const char *s)
597 {
598 gophertext(stdout, s, strlen(s));
599 }
600
601 static void
602 gph_header(void)
603 {
604 }
605
606 static void
607 gph_footer(void)
608 {
609 fputs(".\r\n", stdout);
610 }
611
612 static void
613 gph_item(void)
614 {
615 struct item *v, *found = NULL;
616 size_t i;
617
618 /* must have a video id */
619 if (!ctx.fields[FeedFieldYoutubeId].str.len)
620 return;
621
622 for (i = 0; i < search_res->nitems; i++) {
623 v = &(search_res->items[i]);
624 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
625 found = v;
626 }
627
628 if (!iscurrentitemallowed(found))
629 return;
630
631 fputs("h", stdout);
632 /* just print the original timestamp, it should conform */
633 gphencode(ctx.fields[FeedFieldTime].str.data);
634 fputs(" ", stdout);
635 if (found && found->membersonly)
636 gphencode(MEMBERS_ONLY);
637 gphencode(ctx.fields[FeedFieldTitle].str.data);
638 if (iscurrentitemshort())
639 gphencode(YT_SHORTS_TITLE);
640 if (found && found->duration[0]) {
641 fputs(" [", stdout);
642 gphencode(found->duration);
643 fputs("]", stdout);
644 }
645 fputs("\t", stdout);
646 if (ctx.fields[FeedFieldLink].str.len) {
647 fputs("URL:", stdout);
648 gphencode(ctx.fields[FeedFieldLink].str.data);
649 }
650 printf("\t%s\t%s\r\n", server_name, server_port);
651 }
652
653 static void
654 json_header(void)
655 {
656 fputs("{\n"
657 "\"version\": \"https://jsonfeed.org/version/1.1\",\n"
658 "\"title\": \"Newsfeed\",\n"
659 "\"items\": [\n", stdout);
660 }
661
662 static void
663 json_footer(void)
664 {
665 fputs("]\n}\n", stdout);
666 }
667
668 static void
669 json_printfield(const char *s)
670 {
671 for (; *s; s++) {
672 if (*s == '\\')
673 fputs("\\\\", stdout);
674 else if (*s == '"')
675 fputs("\\\"", stdout);
676 else if (ISCNTRL((unsigned char)*s))
677 printf("\\u00%02x", (unsigned char)*s);
678 else
679 putchar(*s);
680 }
681 }
682
683 static void
684 json_item(void)
685 {
686 static int json_firstitem = 1;
687 struct item *v, *found = NULL;
688 size_t i;
689
690 /* must have a video id */
691 if (!ctx.fields[FeedFieldYoutubeId].str.len)
692 return;
693
694 for (i = 0; i < search_res->nitems; i++) {
695 v = &(search_res->items[i]);
696 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
697 found = v;
698 }
699
700 if (!iscurrentitemallowed(found))
701 return;
702
703 if (!json_firstitem)
704 fputs(",\n", stdout);
705 json_firstitem = 0;
706
707 fputs("{\n\t\"id\": \"", stdout);
708 json_printfield(ctx.fields[FeedFieldId].str.data);
709 fputs("\"", stdout);
710
711 /* just print the original timestamp, it should conform */
712 fputs(",\n\t\"date_published\": \"", stdout);
713 string_print(&ctx.fields[FeedFieldTime].str);
714 fputs("\"", stdout);
715
716 fputs(",\n\t\"title\": \"", stdout);
717 if (found && found->membersonly)
718 json_printfield(MEMBERS_ONLY);
719 json_printfield(ctx.fields[FeedFieldTitle].str.data);
720 if (iscurrentitemshort())
721 json_printfield(YT_SHORTS_TITLE);
722 if (found && found->duration[0]) {
723 fputs(" [", stdout);
724 json_printfield(found->duration);
725 fputs("]", stdout);
726 }
727 fputs("\"", stdout);
728
729 if (ctx.fields[FeedFieldLink].str.len) {
730 fputs(",\n\t\"url\": \"", stdout);
731 json_printfield(ctx.fields[FeedFieldLink].str.data);
732 fputs("\"", stdout);
733 }
734
735 if (ctx.fields[FeedFieldAuthor].str.len) {
736 fputs(",\n\t\"authors\": [{\"name\": \"", stdout);
737 json_printfield(ctx.fields[FeedFieldAuthor].str.data);
738 fputs("\"}]", stdout);
739 }
740
741 fputs(",\n\t\"content_text\": \"", stdout);
742 json_printfield(ctx.fields[FeedFieldContent].str.data);
743 fputs("\"\n}", stdout);
744 }
745
746 static void
747 sfeed_item(void)
748 {
749 struct item *v, *found = NULL;
750 size_t i;
751
752 /* must have a video id */
753 if (!ctx.fields[FeedFieldYoutubeId].str.len)
754 return;
755
756 for (i = 0; i < search_res->nitems; i++) {
757 v = &(search_res->items[i]);
758 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
759 found = v;
760 }
761
762 if (!iscurrentitemallowed(found))
763 return;
764
765 string_print_timestamp(&ctx.fields[FeedFieldTime].str);
766 putchar(FieldSeparator);
767 if (found && found->membersonly)
768 fputs(MEMBERS_ONLY, stdout);
769 string_print(&ctx.fields[FeedFieldTitle].str);
770 if (iscurrentitemshort())
771 fputs(YT_SHORTS_TITLE, stdout);
772 if (found && found->duration[0]) {
773 fputs(" [", stdout);
774 fputs(found->duration, stdout);
775 fputs("]", stdout);
776 }
777 putchar(FieldSeparator);
778 string_print(&ctx.fields[FeedFieldLink].str);
779 putchar(FieldSeparator);
780 string_print_encoded(&ctx.fields[FeedFieldContent].str);
781 putchar(FieldSeparator);
782 fputs("plain", stdout);
783 putchar(FieldSeparator);
784 string_print(&ctx.fields[FeedFieldId].str);
785 putchar(FieldSeparator);
786 string_print(&ctx.fields[FeedFieldAuthor].str);
787 putchar(FieldSeparator);
788 /* no/empty enclosure */
789 putchar(FieldSeparator);
790 /* empty category */
791 putchar('\n');
792 }
793
794 static void
795 twtxt_item(void)
796 {
797 struct item *v, *found = NULL;
798 size_t i;
799
800 /* must have a video id */
801 if (!ctx.fields[FeedFieldYoutubeId].str.len)
802 return;
803
804 for (i = 0; i < search_res->nitems; i++) {
805 v = &(search_res->items[i]);
806 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
807 found = v;
808 }
809
810 if (!iscurrentitemallowed(found))
811 return;
812
813 string_print(&ctx.fields[FeedFieldTime].str);
814 putchar(FieldSeparator);
815 if (found && found->membersonly)
816 fputs(MEMBERS_ONLY, stdout);
817 string_print(&ctx.fields[FeedFieldTitle].str);
818 if (iscurrentitemshort())
819 fputs(YT_SHORTS_TITLE, stdout);
820 if (found && found->duration[0]) {
821 fputs(" [", stdout);
822 fputs(found->duration, stdout);
823 fputs("]", stdout);
824 }
825 fputs(": ", stdout);
826 string_print(&ctx.fields[FeedFieldLink].str);
827 putchar('\n');
828 }
829
830 static int
831 istag(const char *name, size_t len, const char *name2, size_t len2)
832 {
833 return (len == len2 && !strcasecmp(name, name2));
834 }
835
836 static int
837 isattr(const char *name, size_t len, const char *name2, size_t len2)
838 {
839 return (len == len2 && !strcasecmp(name, name2));
840 }
841
842 static void
843 xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
844 const char *v, size_t vl)
845 {
846 if (ISINCONTENT(ctx))
847 return;
848
849 if (!ctx.tag.id)
850 return;
851
852 if (ISCONTENTTAG(ctx))
853 return;
854
855 if (ctx.tag.id == AtomTagLink) {
856 if (isattr(n, nl, STRP("rel"))) {
857 string_append(&attrrel, v, vl);
858 } else if (isattr(n, nl, STRP("href"))) {
859 string_append(&tmpstr, v, vl);
860 }
861 }
862 }
863
864 static void
865 xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
866 const char *data, size_t datalen)
867 {
868 char buf[8];
869 int len;
870
871 if (ISINCONTENT(ctx))
872 return;
873
874 if (!ctx.tag.id)
875 return;
876
877 /* try to translate entity, else just pass as data to
878 * xmlattr handler. */
879 if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
880 xmlattr(p, t, tl, n, nl, buf, (size_t)len);
881 else
882 xmlattr(p, t, tl, n, nl, data, datalen);
883 }
884
885 static void
886 xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl)
887 {
888 if (ISINCONTENT(ctx))
889 return;
890
891 if (attrrel.len && isattr(n, nl, STRP("rel")))
892 string_clear(&attrrel);
893 else if (tmpstr.len &&
894 (isattr(n, nl, STRP("href")) ||
895 isattr(n, nl, STRP("url"))))
896 string_clear(&tmpstr); /* use the last value for multiple attribute values */
897 }
898
899 static void
900 xmldata(XMLParser *p, const char *s, size_t len)
901 {
902 if (!ctx.field)
903 return;
904
905 string_append(ctx.field, s, len);
906 }
907
908 static void
909 xmldataentity(XMLParser *p, const char *data, size_t datalen)
910 {
911 char buf[8];
912 int len;
913
914 if (!ctx.field)
915 return;
916
917 /* try to translate entity, else just pass as data to
918 * xmldata handler. */
919 if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
920 xmldata(p, buf, (size_t)len);
921 else
922 xmldata(p, data, datalen);
923 }
924
925 static void
926 xmltagstart(XMLParser *p, const char *t, size_t tl)
927 {
928 const FeedTag *f;
929
930 if (ISINCONTENT(ctx))
931 return;
932
933 /* start of RSS or Atom item / entry */
934 if (ctx.feedtype == FeedTypeNone) {
935 if (istag(t, tl, STRP("entry")))
936 ctx.feedtype = FeedTypeAtom;
937 return;
938 }
939
940 /* field tagid already set or nested tags. */
941 if (ctx.tag.id) {
942 /* nested <author><name> for Atom */
943 if (ctx.tag.id == AtomTagAuthor &&
944 istag(t, tl, STRP("name"))) {
945 memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ctx.tag));
946 } else {
947 return; /* other nested tags are not allowed: return */
948 }
949 }
950
951 /* in item */
952 if (ctx.tag.id == TagUnknown) {
953 if (!(f = gettag(ctx.feedtype, t, tl)))
954 f = ¬ag;
955 memcpy(&(ctx.tag), f, sizeof(ctx.tag));
956 }
957
958 ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent);
959 string_clear(&attrrel);
960 }
961
962 static void
963 xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
964 {
965 enum TagId tagid;
966
967 if (ISINCONTENT(ctx))
968 return;
969
970 /* set tag type based on its attribute value */
971 if (ctx.tag.id == AtomTagLink) {
972 /* empty or "alternate": other types could be
973 "enclosure", "related", "self" or "via" */
974 if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("alternate")))
975 ctx.tag.id = AtomTagLinkAlternate;
976 else
977 ctx.tag.id = AtomTagLink; /* unknown */
978 }
979
980 tagid = ctx.tag.id;
981
982 /* map tag type to field: unknown or lesser priority is ignored,
983 when tags of the same type are repeated only the first is used. */
984 if (fieldmap[tagid] == -1 ||
985 tagid <= ctx.fields[fieldmap[tagid]].tagid) {
986 return;
987 }
988
989 if (ctx.iscontenttag) {
990 ctx.iscontent = 1;
991 ctx.iscontenttag = 0;
992 }
993
994 ctx.field = &(ctx.fields[fieldmap[tagid]].str);
995 ctx.fields[fieldmap[tagid]].tagid = tagid;
996
997 /* clear field if it is overwritten (with a priority order) for the new
998 value, if the field can have multiple values then do not clear it. */
999 string_clear(ctx.field);
1000 }
1001
1002 static void
1003 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
1004 {
1005 size_t i;
1006
1007 if (ctx.feedtype == FeedTypeNone)
1008 return;
1009
1010 if (ISINCONTENT(ctx)) {
1011 /* not a closed content field */
1012 if (!istag(ctx.tag.name, ctx.tag.len, t, tl))
1013 return;
1014 } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) {
1015 /* matched tag end: close it */
1016 } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom &&
1017 istag(t, tl, STRP("entry"))))) /* Atom */
1018 {
1019 /* end of Atom entry */
1020 printfields();
1021
1022 /* clear strings */
1023 for (i = 0; i < FeedFieldLast; i++) {
1024 string_clear(&ctx.fields[i].str);
1025 ctx.fields[i].tagid = TagUnknown;
1026 }
1027 /* allow parsing of Atom and RSS concatenated in one XML stream. */
1028 ctx.feedtype = FeedTypeNone;
1029 } else {
1030 return; /* not end of field */
1031 }
1032
1033 /* temporary string: for fields that cannot be processed
1034 directly and need more context, for example by its tag
1035 attributes, like the Atom link rel="alternate|enclosure". */
1036 if (tmpstr.len && ctx.field) {
1037 string_clear(ctx.field);
1038 string_append(ctx.field, tmpstr.data, tmpstr.len);
1039 }
1040
1041 /* close field */
1042 string_clear(&tmpstr); /* reuse and clear temporary string */
1043
1044 if (ctx.tag.id == AtomTagAuthorName)
1045 memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* outer tag */
1046 else
1047 memcpy(&(ctx.tag), ¬ag, sizeof(ctx.tag));
1048
1049 ctx.iscontent = 0;
1050 ctx.field = NULL;
1051 }
1052
1053 static char *
1054 request_channel_feed(const char *channelid)
1055 {
1056 char path[2048];
1057 int r;
1058
1059 r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%s", channelid);
1060 /* check if request is too long (truncation) */
1061 if (r < 0 || (size_t)r >= sizeof(path))
1062 return NULL;
1063
1064 return request("www.youtube.com", path, "");
1065 }
1066
1067 int
1068 isvalidchannel(const char *s)
1069 {
1070 size_t len;
1071
1072 for (len = 0; *s; s++, len++) {
1073 if (ISALPHA((unsigned char)*s) ||
1074 ISDIGIT((unsigned char)*s) ||
1075 *s == '-' || *s == '_')
1076 continue;
1077 return 0;
1078 }
1079
1080 return *s == '\0' && len == 24;
1081 }
1082
1083 void
1084 usage(void)
1085 {
1086 const char *line1 = "Bad Request, path should be the channel id + file extension, for example: UCrbvoMC0zUvPL8vjswhLOSw.json";
1087 const char *line2 = "Supported extensions are: [atom|gph|html|json|tsv|txt][[+-]found|shorts]";
1088
1089 if (cgimode) {
1090 if (godmode) {
1091 printf("3%s\tErr\t%s\t%s\r\n", line1, server_name, server_port);
1092 printf("3%s\tErr\t%s\t%s\r\n", line2, server_name, server_port);
1093 } else {
1094 fputs("Status: 400 Bad Request\r\n", stdout);
1095 fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
1096 printf("400 %s\n", line1);
1097 printf("\n%s", line2);
1098 }
1099 exit(0);
1100 } else {
1101 fputs("usage: feed <channelid> [atom|gph|html|json|tsv|txt][[+-found|shorts]\n", stderr);
1102 fputs("For example: feed UCrbvoMC0zUvPL8vjswhLOSw txt\n", stderr);
1103 exit(1);
1104 }
1105 }
1106
1107 /* check format, ignore modifier, like "+notfound" */
1108 int
1109 isformat(const char *input, const char *check)
1110 {
1111 size_t len;
1112
1113 len = strcspn(input, "+-");
1114 if (!len)
1115 return 0;
1116
1117 if (!strncmp(input, check, len))
1118 return 1;
1119
1120 return 0;
1121 }
1122
1123 void
1124 parseformatmodifier(const char *input)
1125 {
1126 /* only show items found/matched on the channel with the feed. */
1127 if (strstr(input, "+found"))
1128 showfound = 1;
1129 if (strstr(input, "-found"))
1130 showfound = 0;
1131 /* show shorts ("/shorts/" in the URL) or not. */
1132 if (strstr(input, "+shorts"))
1133 showshorts = 1;
1134 if (strstr(input, "-shorts"))
1135 showshorts = 0;
1136 }
1137
1138 int
1139 main(int argc, char *argv[])
1140 {
1141 char buf[256];
1142 const char *channelid = NULL;
1143 char *data, *format = "tsv", *p, *path = NULL, *tmp;
1144 size_t i;
1145
1146 if (pledge("stdio dns inet rpath unveil", NULL) == -1)
1147 err(1, "pledge");
1148
1149 if ((tmp = getenv("REQUEST_URI")))
1150 path = tmp;
1151 else if ((tmp = getenv("REQUEST")))
1152 path = tmp;
1153
1154 if (path) {
1155 cgimode = 1;
1156
1157 if ((tmp = getenv("SERVER_NAME")))
1158 server_name = tmp;
1159 if ((tmp = getenv("SERVER_PORT")))
1160 server_port = tmp;
1161 if ((tmp = getenv("SERVER_PROTOCOL")) && strstr(tmp, "gopher"))
1162 godmode = 1;
1163
1164 strlcpy(buf, path, sizeof(buf));
1165 path = buf;
1166
1167 if (!(p = strrchr(path, '/')))
1168 usage();
1169
1170 channelid = p + 1;
1171 if ((p = strrchr(channelid, '.'))) {
1172 *p = '\0'; /* NULL terminate */
1173 format = p + 1;
1174 }
1175 } else {
1176 if (argc <= 1)
1177 usage();
1178
1179 channelid = argv[1];
1180 if (argc > 2)
1181 format = argv[2];
1182 }
1183 if (!channelid || !isvalidchannel(channelid))
1184 usage();
1185
1186 /* formats: if invalid use the default */
1187 if (isformat(format, "atom") || isformat(format, "xml"))
1188 printfields = atom_item;
1189 else if (isformat(format, "gph"))
1190 printfields = gph_item;
1191 else if (isformat(format, "html"))
1192 printfields = html_item;
1193 else if (isformat(format, "json"))
1194 printfields = json_item;
1195 else if (isformat(format, "tsv") || isformat(format, "sfeed"))
1196 printfields = sfeed_item;
1197 else if (isformat(format, "txt") || isformat(format, "twtxt"))
1198 printfields = twtxt_item;
1199 else
1200 usage();
1201
1202 parseformatmodifier(format);
1203
1204 search_res = youtube_channel_videos(channelid);
1205 if (!search_res || search_res->nitems == 0) {
1206 /* error or no videos found */
1207 return 0;
1208 }
1209
1210 if (!(data = request_channel_feed(channelid)))
1211 return 1; /* error, no data at all */
1212
1213 if (pledge("stdio", NULL) == -1)
1214 err(1, "pledge");
1215
1216 setxmldata(data, strlen(data));
1217
1218 memcpy(&(ctx.tag), ¬ag, sizeof(ctx.tag));
1219
1220 parser.xmlattr = xmlattr;
1221 parser.xmlattrentity = xmlattrentity;
1222 parser.xmlattrstart = xmlattrstart;
1223 parser.xmlcdata = xmldata;
1224 parser.xmldata = xmldata;
1225 parser.xmldataentity = xmldataentity;
1226 parser.xmltagend = xmltagend;
1227 parser.xmltagstart = xmltagstart;
1228 parser.xmltagstartparsed = xmltagstartparsed;
1229
1230 /* init all fields, make sure it has a value */
1231 for (i = 0; i < FeedFieldLast; i++) {
1232 string_append(&(ctx.fields[i].str), " ", 1);
1233 string_clear(&(ctx.fields[i].str));
1234 }
1235
1236 if (cgimode && !godmode) {
1237 fputs("Status: 200 OK\r\n", stdout);
1238 if (isformat(format, "atom") || isformat(format, "xml"))
1239 fputs("Content-Type: text/xml; charset=utf-8\r\n\r\n", stdout);
1240 else if (isformat(format, "html"))
1241 fputs("Content-Type: text/html; charset=utf-8\r\n\r\n", stdout);
1242 else if (isformat(format, "json"))
1243 fputs("Content-Type: application/json; charset=utf-8\r\n\r\n", stdout);
1244 else
1245 fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
1246 }
1247
1248 if (isformat(format, "atom") || isformat(format, "xml"))
1249 atom_header();
1250 else if (isformat(format, "gph"))
1251 gph_header();
1252 else if (isformat(format, "html"))
1253 html_header();
1254 else if (isformat(format, "json"))
1255 json_header();
1256
1257 /* NOTE: getnext is defined in xml.h for inline optimization */
1258 xml_parse(&parser);
1259
1260 if (isformat(format, "atom") || isformat(format, "xml"))
1261 atom_footer();
1262 else if (isformat(format, "gph"))
1263 gph_footer();
1264 else if (isformat(format, "html"))
1265 html_footer();
1266 else if (isformat(format, "json"))
1267 json_footer();
1268
1269 return 0;
1270 }