codemadness.org/1/git/frontends/file/youtube/feed.c.gph

  URI:

       feed.c - frontends - front-ends for some sites (experiment)
  HTML git clone git://git.codemadness.org/frontends
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       feed.c (31612B)
       ---
            1 #include <err.h>
            2 #include <errno.h>
            3 #include <stdint.h>
            4 #include <stdio.h>
            5 #include <stdlib.h>
            6 #include <string.h>
            7 #include <strings.h>
            8 #include <time.h>
            9 #include <unistd.h>
           10 
           11 #include "https.h"
           12 #include "util.h"
           13 #include "youtube.h"
           14 #include "xml.h"
           15 
           16 #define ISINCONTENT(ctx)  ((ctx).iscontent && !((ctx).iscontenttag))
           17 #define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
           18 
           19 /* string and byte-length */
           20 #define STRP(s)           s,sizeof(s)-1
           21 
           22 #define YT_SHORTS_TITLE " [Short]"
           23 
           24 enum FeedType {
           25         FeedTypeNone = 0,
           26         FeedTypeAtom = 2
           27 };
           28 
           29 /* String data / memory pool */
           30 typedef struct string {
           31         char   *data;   /* data */
           32         size_t  len;    /* string length */
           33         size_t  bufsiz; /* allocated size */
           34 } String;
           35 
           36 /* NOTE: the order of these fields (content, date, author) indicate the
           37  *       priority to use them, from least important to high. */
           38 enum TagId {
           39         TagUnknown = 0,
           40         /* Atom */
           41         /* creation date has higher priority */
           42         AtomTagPublished,
           43         AtomTagTitle,
           44         AtomTagMediaDescription,
           45         AtomTagId,
           46         AtomTagLink,
           47         AtomTagLinkAlternate,
           48         AtomTagAuthor, AtomTagAuthorName,
           49         TagYoutubeVideoId,
           50         TagLast
           51 };
           52 
           53 typedef struct feedtag {
           54         char       *name; /* name of tag to match */
           55         size_t      len;  /* len of `name` */
           56         enum TagId  id;   /* unique ID */
           57 } FeedTag;
           58 
           59 typedef struct field {
           60         String     str;
           61         enum TagId tagid; /* tagid set previously, used for tag priority */
           62 } FeedField;
           63 
           64 enum {
           65         /* sfeed fields */
           66         FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
           67         FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
           68         FeedFieldYoutubeId, /* yt:videoId */
           69         FeedFieldLast
           70 };
           71 
           72 typedef struct feedcontext {
           73         String          *field;        /* current FeedItem field String */
           74         FeedField        fields[FeedFieldLast]; /* data for current item */
           75         FeedTag          tag;          /* unique current parsed tag */
           76         int              iscontent;    /* in content data */
           77         int              iscontenttag; /* in content tag */
           78         enum FeedType    feedtype;
           79 } FeedContext;
           80 
           81 static long long datetounix(long long, int, int, int, int, int);
           82 static FeedTag * gettag(enum FeedType, const char *, size_t);
           83 static long gettzoffset(const char *);
           84 static int  isattr(const char *, size_t, const char *, size_t);
           85 static int  istag(const char *, size_t, const char *, size_t);
           86 static int  parsetime(const char *, long long *);
           87 
           88 static void atom_header(void);
           89 static void atom_item(void);
           90 static void atom_footer(void);
           91 static void gph_header(void);
           92 static void gph_footer(void);
           93 static void html_header(void);
           94 static void html_footer(void);
           95 static void json_header(void);
           96 static void json_item(void);
           97 static void json_footer(void);
           98 static void sfeed_item(void); /* TSV / sfeed */
           99 static void twtxt_item(void);
          100 
          101 static void string_append(String *, const char *, size_t);
          102 static void string_buffer_realloc(String *, size_t);
          103 static void string_clear(String *);
          104 static void string_print_encoded(String *);
          105 static void string_print_timestamp(String *);
          106 static void string_print(String *);
          107 static void xmlattr(XMLParser *, const char *, size_t, const char *, size_t,
          108                     const char *, size_t);
          109 static void xmlattrentity(XMLParser *, const char *, size_t, const char *,
          110                           size_t, const char *, size_t);
          111 static void xmlattrstart(XMLParser *, const char *, size_t, const char *,
          112                          size_t);
          113 static void xmldata(XMLParser *, const char *, size_t);
          114 static void xmldataentity(XMLParser *, const char *, size_t);
          115 static void xmltagend(XMLParser *, const char *, size_t, int);
          116 static void xmltagstart(XMLParser *, const char *, size_t);
          117 static void xmltagstartparsed(XMLParser *, const char *, size_t, int);
          118 
          119 /* Atom, must be alphabetical order */
          120 static const FeedTag atomtags[] = {
          121         { STRP("author"),            AtomTagAuthor           },
          122         { STRP("id"),                AtomTagId               },
          123         /* Atom: <link href="" />, RSS has <link></link> */
          124         { STRP("link"),              AtomTagLink             },
          125         { STRP("media:description"), AtomTagMediaDescription },
          126         { STRP("published"),         AtomTagPublished        },
          127         { STRP("title"),             AtomTagTitle            },
          128         { STRP("yt:videoId"),        TagYoutubeVideoId       }
          129 };
          130 
          131 /* special case: nested <author><name> */
          132 static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
          133 static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName };
          134 
          135 /* reference to no / unknown tag */
          136 static const FeedTag notag = { STRP(""), TagUnknown };
          137 
          138 /* map TagId type to RSS/Atom field, all tags must be defined */
          139 static const int fieldmap[TagLast] = {
          140         [TagUnknown]               = -1,
          141         /* Atom */
          142         [AtomTagPublished]         = FeedFieldTime,
          143         [AtomTagTitle]             = FeedFieldTitle,
          144         [AtomTagMediaDescription]  = FeedFieldContent,
          145         [AtomTagId]                = FeedFieldId,
          146         [AtomTagLink]              = -1,
          147         [AtomTagLinkAlternate]     = FeedFieldLink,
          148         [AtomTagAuthor]            = -1,
          149         [AtomTagAuthorName]        = FeedFieldAuthor,
          150         [TagYoutubeVideoId]        = FeedFieldYoutubeId
          151 };
          152 
          153 static const int FieldSeparator = '\t';
          154 
          155 static FeedContext ctx;
          156 static XMLParser parser; /* XML parser state */
          157 static String attrrel, tmpstr;
          158 
          159 static struct search_response *search_res = NULL;
          160 static void (*printfields)(void) = sfeed_item; /* default: sfeed(5) format */
          161 static int cgimode = 0, godmode = 0;
          162 /* only show items found/matched on the channel with the feed. */
          163 static int showfound = 0;
          164 /* show shorts ("/shorts/" in the URL) or not. */
          165 static int showshorts = 0;
          166 static const char *server_name = "127.0.0.1", *server_port = "70";
          167 
          168 static int
          169 tagcmp(const void *v1, const void *v2)
          170 {
          171         return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name);
          172 }
          173 
          174 /* Unique tagid for parsed tag name. */
          175 static FeedTag *
          176 gettag(enum FeedType feedtype, const char *name, size_t namelen)
          177 {
          178         FeedTag f, *r = NULL;
          179 
          180         f.name = (char *)name;
          181 
          182         switch (feedtype) {
          183         case FeedTypeAtom:
          184                 r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0]),
          185                         sizeof(atomtags[0]), tagcmp);
          186                 break;
          187         default:
          188                 break;
          189         }
          190 
          191         return r;
          192 }
          193 
          194 /* Clear string only; don't free, prevents unnecessary reallocation. */
          195 static void
          196 string_clear(String *s)
          197 {
          198         if (s->data)
          199                 s->data[0] = '\0';
          200         s->len = 0;
          201 }
          202 
          203 static void
          204 string_buffer_realloc(String *s, size_t newlen)
          205 {
          206         size_t alloclen;
          207 
          208         if (newlen > SIZE_MAX / 2) {
          209                 alloclen = SIZE_MAX;
          210         } else {
          211                 for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
          212                         ;
          213         }
          214         if (!(s->data = realloc(s->data, alloclen)))
          215                 err(1, "realloc");
          216         s->bufsiz = alloclen;
          217 }
          218 
          219 /* Append data to String, s->data and data may not overlap. */
          220 static void
          221 string_append(String *s, const char *data, size_t len)
          222 {
          223         if (!len)
          224                 return;
          225 
          226         if (s->len >= SIZE_MAX - len) {
          227                 errno = ENOMEM;
          228                 err(1, "realloc");
          229         }
          230 
          231         /* check if allocation is necessary, never shrink the buffer. */
          232         if (s->len + len >= s->bufsiz)
          233                 string_buffer_realloc(s, s->len + len + 1);
          234         memcpy(s->data + s->len, data, len);
          235         s->len += len;
          236         s->data[s->len] = '\0';
          237 }
          238 
          239 /* Print text, encode TABs, newlines and '\', remove other whitespace.
          240  * Remove leading and trailing whitespace. */
          241 static void
          242 string_print_encoded(String *s)
          243 {
          244         const char *p, *e;
          245 
          246         if (!s->data || !s->len)
          247                 return;
          248 
          249         p = s->data;
          250         e = p + strlen(p);
          251 
          252         for (; *p && p != e; p++) {
          253                 switch (*p) {
          254                 case '\n': putchar('\\'); putchar('n'); break;
          255                 case '\\': putchar('\\'); putchar('\\'); break;
          256                 case '\t': putchar('\\'); putchar('t'); break;
          257                 default:
          258                         /* ignore control chars */
          259                         if (!ISCNTRL((unsigned char)*p))
          260                                 putchar(*p);
          261                         break;
          262                 }
          263         }
          264 }
          265 
          266 /* Print text, replace TABs, carriage return and other whitespace with ' '.
          267  * Other control chars are removed. Remove leading and trailing whitespace. */
          268 static void
          269 string_print(String *s)
          270 {
          271         char *p, *e;
          272 
          273         if (!s->data || !s->len)
          274                 return;
          275 
          276         p = s->data;
          277         e = p + s->len;
          278         for (; *p && p != e; p++) {
          279                 if (ISSPACE((unsigned char)*p))
          280                         putchar(' '); /* any whitespace to space */
          281                 else if (!ISCNTRL((unsigned char)*p))
          282                         /* ignore other control chars */
          283                         putchar(*p);
          284         }
          285 }
          286 
          287 /* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
          288 static void
          289 string_print_timestamp(String *s)
          290 {
          291         long long t;
          292 
          293         if (!s->data || !s->len)
          294                 return;
          295 
          296         if (parsetime(s->data, &t) != -1)
          297                 printf("%lld", t);
          298 }
          299 
          300 /* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp.
          301    Parameters should be passed as they are in a struct tm and in a valid range:
          302    that is: year = year - 1900, month = month - 1. */
          303 static long long
          304 datetounix(long long year, int mon, int day, int hour, int min, int sec)
          305 {
          306         /* seconds in a month in a regular (non-leap) year */
          307         static const long secs_through_month[] = {
          308                 0, 31 * 86400, 59 * 86400, 90 * 86400,
          309                 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
          310                 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
          311         int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
          312         long long t;
          313 
          314         /* optimization: handle common range year 1902 up to and including 2038 */
          315         if (year - 2ULL <= 136) {
          316                 /* amount of leap days relative to 1970: every 4 years */
          317                 leaps = (year / 4) - 17; /* 17 leap years offset for 1902 - 1970 */
          318                 if (!(year & 3)) {
          319                         leaps--;
          320                         is_leap = 1;
          321                 } else {
          322                         is_leap = 0;
          323                 }
          324                 t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 = 31536000 */
          325         } else {
          326                 /* general leap year calculation:
          327                    leap years occur mostly every 4 years but every 100 years
          328                    a leap year is skipped unless the year is divisible by 400 */
          329                 cycles = (year - 100) / 400;
          330                 rem = (year - 100) % 400;
          331                 if (rem < 0) {
          332                         cycles--;
          333                         rem += 400;
          334                 }
          335                 if (!rem) {
          336                         is_leap = 1;
          337                 } else {
          338                         if (rem >= 300)
          339                                 centuries = 3, rem -= 300;
          340                         else if (rem >= 200)
          341                                 centuries = 2, rem -= 200;
          342                         else if (rem >= 100)
          343                                 centuries = 1, rem -= 100;
          344                         if (rem) {
          345                                 leaps = rem / 4U;
          346                                 rem %= 4U;
          347                                 is_leap = !rem;
          348                         }
          349                 }
          350                 leaps += (97 * cycles) + (24 * centuries) - is_leap;
          351 
          352                 /* adjust 8 leap days from 1970 up to and including 2000:
          353                    ((30 * 365) + 8) * 86400 = 946771200 */
          354                 t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL;
          355         }
          356         t += secs_through_month[mon];
          357         if (is_leap && mon >= 2)
          358                 t += 86400;
          359         t += 86400LL * (day - 1);
          360         t += 3600LL * hour;
          361         t += 60LL * min;
          362         t += sec;
          363 
          364         return t;
          365 }
          366 
          367 /* Get timezone from string, return time offset in seconds from UTC. */
          368 static long
          369 gettzoffset(const char *s)
          370 {
          371         const char *p;
          372         long tzhour = 0, tzmin = 0;
          373         size_t i;
          374 
          375         switch (*s) {
          376         case '-': /* offset */
          377         case '+':
          378                 for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
          379                         tzhour = (tzhour * 10) + (*p - '0');
          380                 if (*p == ':')
          381                         p++;
          382                 for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
          383                         tzmin = (tzmin * 10) + (*p - '0');
          384                 return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1);
          385         default: /* timezone name */
          386                 break;
          387         }
          388         return 0;
          389 }
          390 
          391 /* Parse time string `s` into the UNIX timestamp `tp`.
          392    Returns 0 on success or -1 on failure. */
          393 static int
          394 parsetime(const char *s, long long *tp)
          395 {
          396         int va[6] = { 0 }, i, v, vi;
          397 
          398         /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */
          399         if (!ISDIGIT((unsigned char)s[0]) ||
          400             !ISDIGIT((unsigned char)s[1]) ||
          401             !ISDIGIT((unsigned char)s[2]) ||
          402             !ISDIGIT((unsigned char)s[3]))
          403                 return -1;
          404 
          405         /* parse time parts (and possibly remaining date parts) */
          406         for (vi = 0; *s && vi < 6; vi++) {
          407                 for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
          408                                    ISDIGIT((unsigned char)*s); s++, i++) {
          409                         v = (v * 10) + (*s - '0');
          410                 }
          411                 va[vi] = v;
          412 
          413                 if ((vi < 2 && *s == '-') ||
          414                     (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
          415                     (vi > 2 && *s == ':'))
          416                         s++;
          417         }
          418 
          419         /* invalid range */
          420         if (va[0] < 0 || va[0] > 9999 ||
          421             va[1] < 1 || va[1] > 12 ||
          422             va[2] < 1 || va[2] > 31 ||
          423             va[3] < 0 || va[3] > 23 ||
          424             va[4] < 0 || va[4] > 59 ||
          425             va[5] < 0 || va[5] > 60) /* allow leap second */
          426                 return -1;
          427 
          428         *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
          429               gettzoffset(s);
          430 
          431         return 0;
          432 }
          433 
          434 static void
          435 atom_header(void)
          436 {
          437         fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
          438               "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"
          439               "\t<title>Newsfeed</title>\n", stdout);
          440 }
          441 
          442 static void
          443 atom_footer(void)
          444 {
          445         fputs("</feed>\n", stdout);
          446 }
          447 
          448 static int
          449 iscurrentitemshort(void)
          450 {
          451         return ctx.fields[FeedFieldLink].str.len &&
          452                strstr(ctx.fields[FeedFieldLink].str.data, "/shorts/");
          453 }
          454 
          455 static int
          456 iscurrentitemallowed(struct item *found)
          457 {
          458         /* Only print the video if it was found in the feed aswell. */
          459         if (showfound && !found)
          460                 return 0;
          461 
          462         /* Show shorts or not. */
          463         if (!showshorts && iscurrentitemshort())
          464                 return 0;
          465 
          466         return 1;
          467 }
          468 
          469 static void
          470 atom_item(void)
          471 {
          472         struct item *v, *found = NULL;
          473         size_t i;
          474 
          475         /* must have a video id */
          476         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          477                 return;
          478 
          479         for (i = 0; i < search_res->nitems; i++) {
          480                 v = &(search_res->items[i]);
          481                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          482                         found = v;
          483         }
          484 
          485         if (!iscurrentitemallowed(found))
          486                 return;
          487 
          488         fputs("<entry>\n\t<title>", stdout);
          489         if (found && found->membersonly)
          490                 xmlencode(MEMBERS_ONLY);
          491         xmlencode(ctx.fields[FeedFieldTitle].str.data);
          492 
          493         if (iscurrentitemshort())
          494                 fputs(YT_SHORTS_TITLE, stdout);
          495         if (found && found->duration[0]) {
          496                 fputs(" [", stdout);
          497                 xmlencode(found->duration);
          498                 fputs("]", stdout);
          499         }
          500         fputs("</title>\n", stdout);
          501         if (ctx.fields[FeedFieldLink].str.len) {
          502                 fputs("\t<link rel=\"alternate\" href=\"", stdout);
          503                 xmlencode(ctx.fields[FeedFieldLink].str.data);
          504                 fputs("\" />\n", stdout);
          505         }
          506         /* prefer link over id for Atom <id>. */
          507         fputs("\t<id>", stdout);
          508         if (ctx.fields[FeedFieldLink].str.len)
          509                 xmlencode(ctx.fields[FeedFieldLink].str.data);
          510         else if (ctx.fields[FeedFieldId].str.len)
          511                 xmlencode(ctx.fields[FeedFieldId].str.data);
          512         fputs("</id>\n", stdout);
          513 
          514         /* just print the original timestamp, it should conform */
          515         fputs("\t<updated>", stdout);
          516         string_print(&ctx.fields[FeedFieldTime].str);
          517         fputs("</updated>\n", stdout);
          518 
          519         if (ctx.fields[FeedFieldAuthor].str.len) {
          520                 fputs("\t<author><name>", stdout);
          521                 xmlencode(ctx.fields[FeedFieldAuthor].str.data);
          522                 fputs("</name></author>\n", stdout);
          523         }
          524         if (ctx.fields[FeedFieldContent].str.len) {
          525                 fputs("\t<content>", stdout);
          526                 xmlencode(ctx.fields[FeedFieldContent].str.data);
          527                 fputs("</content>\n", stdout);
          528         }
          529         fputs("</entry>\n", stdout);
          530 }
          531 
          532 
          533 static void
          534 html_header(void)
          535 {
          536         fputs("<!DOCTYPE HTML>\n"
          537         "<html>\n"
          538         "<head>\n"
          539         "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
          540         "</head>\n"
          541         "<body><pre>\n", stdout);
          542 }
          543 
          544 static void
          545 html_footer(void)
          546 {
          547         fputs("</pre></body>\n</html>\n", stdout);
          548 }
          549 
          550 static void
          551 html_item(void)
          552 {
          553         struct item *v, *found = NULL;
          554         size_t i;
          555 
          556         /* must have a video id */
          557         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          558                 return;
          559 
          560         for (i = 0; i < search_res->nitems; i++) {
          561                 v = &(search_res->items[i]);
          562                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          563                         found = v;
          564         }
          565 
          566         if (!iscurrentitemallowed(found))
          567                 return;
          568 
          569         /* just print the original timestamp, it should conform */
          570         xmlencode(ctx.fields[FeedFieldTime].str.data);
          571         fputs("&nbsp;", stdout);
          572 
          573         if (ctx.fields[FeedFieldLink].str.len) {
          574                 fputs("<a href=\"", stdout);
          575                 xmlencode(ctx.fields[FeedFieldLink].str.data);
          576                 fputs("\">", stdout);
          577         }
          578 
          579         if (found && found->membersonly)
          580                 xmlencode(MEMBERS_ONLY);
          581         xmlencode(ctx.fields[FeedFieldTitle].str.data);
          582         if (iscurrentitemshort())
          583                 fputs(YT_SHORTS_TITLE, stdout);
          584         if (found && found->duration[0]) {
          585                 fputs(" [", stdout);
          586                 xmlencode(found->duration);
          587                 fputs("]", stdout);
          588         }
          589         if (ctx.fields[FeedFieldLink].str.len) {
          590                 fputs("</a>", stdout);
          591         }
          592         fputs("\n", stdout);
          593 }
          594 
          595 static void
          596 gphencode(const char *s)
          597 {
          598         gophertext(stdout, s, strlen(s));
          599 }
          600 
          601 static void
          602 gph_header(void)
          603 {
          604 }
          605 
          606 static void
          607 gph_footer(void)
          608 {
          609         fputs(".\r\n", stdout);
          610 }
          611 
          612 static void
          613 gph_item(void)
          614 {
          615         struct item *v, *found = NULL;
          616         size_t i;
          617 
          618         /* must have a video id */
          619         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          620                 return;
          621 
          622         for (i = 0; i < search_res->nitems; i++) {
          623                 v = &(search_res->items[i]);
          624                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          625                         found = v;
          626         }
          627 
          628         if (!iscurrentitemallowed(found))
          629                 return;
          630 
          631         fputs("h", stdout);
          632         /* just print the original timestamp, it should conform */
          633         gphencode(ctx.fields[FeedFieldTime].str.data);
          634         fputs(" ", stdout);
          635         if (found && found->membersonly)
          636                 gphencode(MEMBERS_ONLY);
          637         gphencode(ctx.fields[FeedFieldTitle].str.data);
          638         if (iscurrentitemshort())
          639                 gphencode(YT_SHORTS_TITLE);
          640         if (found && found->duration[0]) {
          641                 fputs(" [", stdout);
          642                 gphencode(found->duration);
          643                 fputs("]", stdout);
          644         }
          645         fputs("\t", stdout);
          646         if (ctx.fields[FeedFieldLink].str.len) {
          647                 fputs("URL:", stdout);
          648                 gphencode(ctx.fields[FeedFieldLink].str.data);
          649         }
          650         printf("\t%s\t%s\r\n", server_name, server_port);
          651 }
          652 
          653 static void
          654 json_header(void)
          655 {
          656         fputs("{\n"
          657               "\"version\": \"https://jsonfeed.org/version/1.1\",\n"
          658               "\"title\": \"Newsfeed\",\n"
          659               "\"items\": [\n", stdout);
          660 }
          661 
          662 static void
          663 json_footer(void)
          664 {
          665         fputs("]\n}\n", stdout);
          666 }
          667 
          668 static void
          669 json_printfield(const char *s)
          670 {
          671         for (; *s; s++) {
          672                 if (*s == '\\')
          673                         fputs("\\\\", stdout);
          674                 else if (*s == '"')
          675                         fputs("\\\"", stdout);
          676                 else if (ISCNTRL((unsigned char)*s))
          677                         printf("\\u00%02x", (unsigned char)*s);
          678                 else
          679                         putchar(*s);
          680         }
          681 }
          682 
          683 static void
          684 json_item(void)
          685 {
          686         static int json_firstitem = 1;
          687         struct item *v, *found = NULL;
          688         size_t i;
          689 
          690         /* must have a video id */
          691         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          692                 return;
          693 
          694         for (i = 0; i < search_res->nitems; i++) {
          695                 v = &(search_res->items[i]);
          696                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          697                         found = v;
          698         }
          699 
          700         if (!iscurrentitemallowed(found))
          701                 return;
          702 
          703         if (!json_firstitem)
          704                 fputs(",\n", stdout);
          705         json_firstitem = 0;
          706 
          707         fputs("{\n\t\"id\": \"", stdout);
          708         json_printfield(ctx.fields[FeedFieldId].str.data);
          709         fputs("\"", stdout);
          710 
          711         /* just print the original timestamp, it should conform */
          712         fputs(",\n\t\"date_published\": \"", stdout);
          713         string_print(&ctx.fields[FeedFieldTime].str);
          714         fputs("\"", stdout);
          715 
          716         fputs(",\n\t\"title\": \"", stdout);
          717         if (found && found->membersonly)
          718                 json_printfield(MEMBERS_ONLY);
          719         json_printfield(ctx.fields[FeedFieldTitle].str.data);
          720         if (iscurrentitemshort())
          721                 json_printfield(YT_SHORTS_TITLE);
          722         if (found && found->duration[0]) {
          723                 fputs(" [", stdout);
          724                 json_printfield(found->duration);
          725                 fputs("]", stdout);
          726         }
          727         fputs("\"", stdout);
          728 
          729         if (ctx.fields[FeedFieldLink].str.len) {
          730                 fputs(",\n\t\"url\": \"", stdout);
          731                 json_printfield(ctx.fields[FeedFieldLink].str.data);
          732                 fputs("\"", stdout);
          733         }
          734 
          735         if (ctx.fields[FeedFieldAuthor].str.len) {
          736                 fputs(",\n\t\"authors\": [{\"name\": \"", stdout);
          737                 json_printfield(ctx.fields[FeedFieldAuthor].str.data);
          738                 fputs("\"}]", stdout);
          739         }
          740 
          741         fputs(",\n\t\"content_text\": \"", stdout);
          742         json_printfield(ctx.fields[FeedFieldContent].str.data);
          743         fputs("\"\n}", stdout);
          744 }
          745 
          746 static void
          747 sfeed_item(void)
          748 {
          749         struct item *v, *found = NULL;
          750         size_t i;
          751 
          752         /* must have a video id */
          753         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          754                 return;
          755 
          756         for (i = 0; i < search_res->nitems; i++) {
          757                 v = &(search_res->items[i]);
          758                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          759                         found = v;
          760         }
          761 
          762         if (!iscurrentitemallowed(found))
          763                 return;
          764 
          765         string_print_timestamp(&ctx.fields[FeedFieldTime].str);
          766         putchar(FieldSeparator);
          767         if (found && found->membersonly)
          768                 fputs(MEMBERS_ONLY, stdout);
          769         string_print(&ctx.fields[FeedFieldTitle].str);
          770         if (iscurrentitemshort())
          771                 fputs(YT_SHORTS_TITLE, stdout);
          772         if (found && found->duration[0]) {
          773                 fputs(" [", stdout);
          774                 fputs(found->duration, stdout);
          775                 fputs("]", stdout);
          776         }
          777         putchar(FieldSeparator);
          778         string_print(&ctx.fields[FeedFieldLink].str);
          779         putchar(FieldSeparator);
          780         string_print_encoded(&ctx.fields[FeedFieldContent].str);
          781         putchar(FieldSeparator);
          782         fputs("plain", stdout);
          783         putchar(FieldSeparator);
          784         string_print(&ctx.fields[FeedFieldId].str);
          785         putchar(FieldSeparator);
          786         string_print(&ctx.fields[FeedFieldAuthor].str);
          787         putchar(FieldSeparator);
          788         /* no/empty enclosure */
          789         putchar(FieldSeparator);
          790         /* empty category */
          791         putchar('\n');
          792 }
          793 
          794 static void
          795 twtxt_item(void)
          796 {
          797         struct item *v, *found = NULL;
          798         size_t i;
          799 
          800         /* must have a video id */
          801         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          802                 return;
          803 
          804         for (i = 0; i < search_res->nitems; i++) {
          805                 v = &(search_res->items[i]);
          806                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          807                         found = v;
          808         }
          809 
          810         if (!iscurrentitemallowed(found))
          811                 return;
          812 
          813         string_print(&ctx.fields[FeedFieldTime].str);
          814         putchar(FieldSeparator);
          815         if (found && found->membersonly)
          816                 fputs(MEMBERS_ONLY, stdout);
          817         string_print(&ctx.fields[FeedFieldTitle].str);
          818         if (iscurrentitemshort())
          819                 fputs(YT_SHORTS_TITLE, stdout);
          820         if (found && found->duration[0]) {
          821                 fputs(" [", stdout);
          822                 fputs(found->duration, stdout);
          823                 fputs("]", stdout);
          824         }
          825         fputs(": ", stdout);
          826         string_print(&ctx.fields[FeedFieldLink].str);
          827         putchar('\n');
          828 }
          829 
          830 static int
          831 istag(const char *name, size_t len, const char *name2, size_t len2)
          832 {
          833         return (len == len2 && !strcasecmp(name, name2));
          834 }
          835 
          836 static int
          837 isattr(const char *name, size_t len, const char *name2, size_t len2)
          838 {
          839         return (len == len2 && !strcasecmp(name, name2));
          840 }
          841 
          842 static void
          843 xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
          844         const char *v, size_t vl)
          845 {
          846         if (ISINCONTENT(ctx))
          847                 return;
          848 
          849         if (!ctx.tag.id)
          850                 return;
          851 
          852         if (ISCONTENTTAG(ctx))
          853                 return;
          854 
          855         if (ctx.tag.id == AtomTagLink) {
          856                 if (isattr(n, nl, STRP("rel"))) {
          857                         string_append(&attrrel, v, vl);
          858                 } else if (isattr(n, nl, STRP("href"))) {
          859                         string_append(&tmpstr, v, vl);
          860                 }
          861         }
          862 }
          863 
          864 static void
          865 xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
          866               const char *data, size_t datalen)
          867 {
          868         char buf[8];
          869         int len;
          870 
          871         if (ISINCONTENT(ctx))
          872                 return;
          873 
          874         if (!ctx.tag.id)
          875                 return;
          876 
          877         /* try to translate entity, else just pass as data to
          878          * xmlattr handler. */
          879         if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
          880                 xmlattr(p, t, tl, n, nl, buf, (size_t)len);
          881         else
          882                 xmlattr(p, t, tl, n, nl, data, datalen);
          883 }
          884 
          885 static void
          886 xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl)
          887 {
          888         if (ISINCONTENT(ctx))
          889                 return;
          890 
          891         if (attrrel.len && isattr(n, nl, STRP("rel")))
          892                 string_clear(&attrrel);
          893         else if (tmpstr.len &&
          894             (isattr(n, nl, STRP("href")) ||
          895              isattr(n, nl, STRP("url"))))
          896                 string_clear(&tmpstr); /* use the last value for multiple attribute values */
          897 }
          898 
          899 static void
          900 xmldata(XMLParser *p, const char *s, size_t len)
          901 {
          902         if (!ctx.field)
          903                 return;
          904 
          905         string_append(ctx.field, s, len);
          906 }
          907 
          908 static void
          909 xmldataentity(XMLParser *p, const char *data, size_t datalen)
          910 {
          911         char buf[8];
          912         int len;
          913 
          914         if (!ctx.field)
          915                 return;
          916 
          917         /* try to translate entity, else just pass as data to
          918          * xmldata handler. */
          919         if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
          920                 xmldata(p, buf, (size_t)len);
          921         else
          922                 xmldata(p, data, datalen);
          923 }
          924 
          925 static void
          926 xmltagstart(XMLParser *p, const char *t, size_t tl)
          927 {
          928         const FeedTag *f;
          929 
          930         if (ISINCONTENT(ctx))
          931                 return;
          932 
          933         /* start of RSS or Atom item / entry */
          934         if (ctx.feedtype == FeedTypeNone) {
          935                 if (istag(t, tl, STRP("entry")))
          936                         ctx.feedtype = FeedTypeAtom;
          937                 return;
          938         }
          939 
          940         /* field tagid already set or nested tags. */
          941         if (ctx.tag.id) {
          942                 /* nested <author><name> for Atom */
          943                 if (ctx.tag.id == AtomTagAuthor &&
          944                     istag(t, tl, STRP("name"))) {
          945                         memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ctx.tag));
          946                 } else {
          947                         return; /* other nested tags are not allowed: return */
          948                 }
          949         }
          950 
          951         /* in item */
          952         if (ctx.tag.id == TagUnknown) {
          953                 if (!(f = gettag(ctx.feedtype, t, tl)))
          954                         f = &notag;
          955                 memcpy(&(ctx.tag), f, sizeof(ctx.tag));
          956         }
          957 
          958         ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent);
          959         string_clear(&attrrel);
          960 }
          961 
          962 static void
          963 xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
          964 {
          965         enum TagId tagid;
          966 
          967         if (ISINCONTENT(ctx))
          968                 return;
          969 
          970         /* set tag type based on its attribute value */
          971         if (ctx.tag.id == AtomTagLink) {
          972                 /* empty or "alternate": other types could be
          973                    "enclosure", "related", "self" or "via" */
          974                 if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("alternate")))
          975                         ctx.tag.id = AtomTagLinkAlternate;
          976                 else
          977                         ctx.tag.id = AtomTagLink; /* unknown */
          978         }
          979 
          980         tagid = ctx.tag.id;
          981 
          982         /* map tag type to field: unknown or lesser priority is ignored,
          983            when tags of the same type are repeated only the first is used. */
          984         if (fieldmap[tagid] == -1 ||
          985             tagid <= ctx.fields[fieldmap[tagid]].tagid) {
          986                 return;
          987         }
          988 
          989         if (ctx.iscontenttag) {
          990                 ctx.iscontent = 1;
          991                 ctx.iscontenttag = 0;
          992         }
          993 
          994         ctx.field = &(ctx.fields[fieldmap[tagid]].str);
          995         ctx.fields[fieldmap[tagid]].tagid = tagid;
          996 
          997         /* clear field if it is overwritten (with a priority order) for the new
          998            value, if the field can have multiple values then do not clear it. */
          999         string_clear(ctx.field);
         1000 }
         1001 
         1002 static void
         1003 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
         1004 {
         1005         size_t i;
         1006 
         1007         if (ctx.feedtype == FeedTypeNone)
         1008                 return;
         1009 
         1010         if (ISINCONTENT(ctx)) {
         1011                 /* not a closed content field */
         1012                 if (!istag(ctx.tag.name, ctx.tag.len, t, tl))
         1013                         return;
         1014         } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) {
         1015                 /* matched tag end: close it */
         1016         } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom &&
         1017            istag(t, tl, STRP("entry"))))) /* Atom */
         1018         {
         1019                 /* end of Atom entry */
         1020                 printfields();
         1021 
         1022                 /* clear strings */
         1023                 for (i = 0; i < FeedFieldLast; i++) {
         1024                         string_clear(&ctx.fields[i].str);
         1025                         ctx.fields[i].tagid = TagUnknown;
         1026                 }
         1027                 /* allow parsing of Atom and RSS concatenated in one XML stream. */
         1028                 ctx.feedtype = FeedTypeNone;
         1029         } else {
         1030                 return; /* not end of field */
         1031         }
         1032 
         1033         /* temporary string: for fields that cannot be processed
         1034            directly and need more context, for example by its tag
         1035            attributes, like the Atom link rel="alternate|enclosure". */
         1036         if (tmpstr.len && ctx.field) {
         1037                 string_clear(ctx.field);
         1038                 string_append(ctx.field, tmpstr.data, tmpstr.len);
         1039         }
         1040 
         1041         /* close field */
         1042         string_clear(&tmpstr); /* reuse and clear temporary string */
         1043 
         1044         if (ctx.tag.id == AtomTagAuthorName)
         1045                 memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* outer tag */
         1046         else
         1047                 memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
         1048 
         1049         ctx.iscontent = 0;
         1050         ctx.field = NULL;
         1051 }
         1052 
         1053 static char *
         1054 request_channel_feed(const char *channelid)
         1055 {
         1056         char path[2048];
         1057         int r;
         1058 
         1059         r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%s", channelid);
         1060         /* check if request is too long (truncation) */
         1061         if (r < 0 || (size_t)r >= sizeof(path))
         1062                 return NULL;
         1063 
         1064         return request("www.youtube.com", path, "");
         1065 }
         1066 
         1067 int
         1068 isvalidchannel(const char *s)
         1069 {
         1070         size_t len;
         1071 
         1072         for (len = 0; *s; s++, len++) {
         1073                 if (ISALPHA((unsigned char)*s) ||
         1074                         ISDIGIT((unsigned char)*s) ||
         1075                         *s == '-' || *s == '_')
         1076                         continue;
         1077                 return 0;
         1078         }
         1079 
         1080         return *s == '\0' && len == 24;
         1081 }
         1082 
         1083 void
         1084 usage(void)
         1085 {
         1086         const char *line1 = "Bad Request, path should be the channel id + file extension, for example: UCrbvoMC0zUvPL8vjswhLOSw.json";
         1087         const char *line2 = "Supported extensions are: [atom|gph|html|json|tsv|txt][[+-]found|shorts]";
         1088 
         1089         if (cgimode) {
         1090                 if (godmode) {
         1091                         printf("3%s\tErr\t%s\t%s\r\n", line1, server_name, server_port);
         1092                         printf("3%s\tErr\t%s\t%s\r\n", line2, server_name, server_port);
         1093                 } else {
         1094                         fputs("Status: 400 Bad Request\r\n", stdout);
         1095                         fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
         1096                         printf("400 %s\n", line1);
         1097                         printf("\n%s", line2);
         1098                 }
         1099                 exit(0);
         1100         } else {
         1101                 fputs("usage: feed <channelid> [atom|gph|html|json|tsv|txt][[+-found|shorts]\n", stderr);
         1102                 fputs("For example: feed UCrbvoMC0zUvPL8vjswhLOSw txt\n", stderr);
         1103                 exit(1);
         1104         }
         1105 }
         1106 
         1107 /* check format, ignore modifier, like "+notfound" */
         1108 int
         1109 isformat(const char *input, const char *check)
         1110 {
         1111         size_t len;
         1112 
         1113         len = strcspn(input, "+-");
         1114         if (!len)
         1115                 return 0;
         1116 
         1117         if (!strncmp(input, check, len))
         1118                 return 1;
         1119 
         1120         return 0;
         1121 }
         1122 
         1123 void
         1124 parseformatmodifier(const char *input)
         1125 {
         1126         /* only show items found/matched on the channel with the feed. */
         1127         if (strstr(input, "+found"))
         1128                 showfound = 1;
         1129         if (strstr(input, "-found"))
         1130                 showfound = 0;
         1131         /* show shorts ("/shorts/" in the URL) or not. */
         1132         if (strstr(input, "+shorts"))
         1133                 showshorts = 1;
         1134         if (strstr(input, "-shorts"))
         1135                 showshorts = 0;
         1136 }
         1137 
         1138 int
         1139 main(int argc, char *argv[])
         1140 {
         1141         char buf[256];
         1142         const char *channelid = NULL;
         1143         char *data, *format = "tsv", *p, *path = NULL, *tmp;
         1144         size_t i;
         1145 
         1146         if (pledge("stdio dns inet rpath unveil", NULL) == -1)
         1147                 err(1, "pledge");
         1148 
         1149         if ((tmp = getenv("REQUEST_URI")))
         1150                 path = tmp;
         1151         else if ((tmp = getenv("REQUEST")))
         1152                 path = tmp;
         1153 
         1154         if (path) {
         1155                 cgimode = 1;
         1156 
         1157                 if ((tmp = getenv("SERVER_NAME")))
         1158                         server_name = tmp;
         1159                 if ((tmp = getenv("SERVER_PORT")))
         1160                         server_port = tmp;
         1161                 if ((tmp = getenv("SERVER_PROTOCOL")) && strstr(tmp, "gopher"))
         1162                         godmode = 1;
         1163 
         1164                 strlcpy(buf, path, sizeof(buf));
         1165                 path = buf;
         1166 
         1167                 if (!(p = strrchr(path, '/')))
         1168                         usage();
         1169 
         1170                 channelid = p + 1;
         1171                 if ((p = strrchr(channelid, '.'))) {
         1172                         *p = '\0'; /* NULL terminate */
         1173                         format = p + 1;
         1174                 }
         1175         } else {
         1176                 if (argc <= 1)
         1177                         usage();
         1178 
         1179                 channelid = argv[1];
         1180                 if (argc > 2)
         1181                         format = argv[2];
         1182         }
         1183         if (!channelid || !isvalidchannel(channelid))
         1184                 usage();
         1185 
         1186         /* formats: if invalid use the default */
         1187         if (isformat(format, "atom") || isformat(format, "xml"))
         1188                 printfields = atom_item;
         1189         else if (isformat(format, "gph"))
         1190                 printfields = gph_item;
         1191         else if (isformat(format, "html"))
         1192                 printfields = html_item;
         1193         else if (isformat(format, "json"))
         1194                 printfields = json_item;
         1195         else if (isformat(format, "tsv") || isformat(format, "sfeed"))
         1196                 printfields = sfeed_item;
         1197         else if (isformat(format, "txt") || isformat(format, "twtxt"))
         1198                 printfields = twtxt_item;
         1199         else
         1200                 usage();
         1201 
         1202         parseformatmodifier(format);
         1203 
         1204         search_res = youtube_channel_videos(channelid);
         1205         if (!search_res || search_res->nitems == 0) {
         1206                 /* error or no videos found */
         1207                 return 0;
         1208         }
         1209 
         1210         if (!(data = request_channel_feed(channelid)))
         1211                 return 1; /* error, no data at all */
         1212 
         1213         if (pledge("stdio", NULL) == -1)
         1214                 err(1, "pledge");
         1215 
         1216         setxmldata(data, strlen(data));
         1217 
         1218         memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
         1219 
         1220         parser.xmlattr = xmlattr;
         1221         parser.xmlattrentity = xmlattrentity;
         1222         parser.xmlattrstart = xmlattrstart;
         1223         parser.xmlcdata = xmldata;
         1224         parser.xmldata = xmldata;
         1225         parser.xmldataentity = xmldataentity;
         1226         parser.xmltagend = xmltagend;
         1227         parser.xmltagstart = xmltagstart;
         1228         parser.xmltagstartparsed = xmltagstartparsed;
         1229 
         1230         /* init all fields, make sure it has a value */
         1231         for (i = 0; i < FeedFieldLast; i++) {
         1232                 string_append(&(ctx.fields[i].str), " ", 1);
         1233                 string_clear(&(ctx.fields[i].str));
         1234         }
         1235 
         1236         if (cgimode && !godmode) {
         1237                 fputs("Status: 200 OK\r\n", stdout);
         1238                 if (isformat(format, "atom") || isformat(format, "xml"))
         1239                         fputs("Content-Type: text/xml; charset=utf-8\r\n\r\n", stdout);
         1240                 else if (isformat(format, "html"))
         1241                         fputs("Content-Type: text/html; charset=utf-8\r\n\r\n", stdout);
         1242                 else if (isformat(format, "json"))
         1243                         fputs("Content-Type: application/json; charset=utf-8\r\n\r\n", stdout);
         1244                 else
         1245                         fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
         1246         }
         1247 
         1248         if (isformat(format, "atom") || isformat(format, "xml"))
         1249                 atom_header();
         1250         else if (isformat(format, "gph"))
         1251                 gph_header();
         1252         else if (isformat(format, "html"))
         1253                 html_header();
         1254         else if (isformat(format, "json"))
         1255                 json_header();
         1256 
         1257         /* NOTE: getnext is defined in xml.h for inline optimization */
         1258         xml_parse(&parser);
         1259 
         1260         if (isformat(format, "atom") || isformat(format, "xml"))
         1261                 atom_footer();
         1262         else if (isformat(format, "gph"))
         1263                 gph_footer();
         1264         else if (isformat(format, "html"))
         1265                 html_footer();
         1266         else if (isformat(format, "json"))
         1267                 json_footer();
         1268 
         1269         return 0;
         1270 }