URI: 
       gopher-validator.c - gopher-validator - Simple gopher menu validator.
  HTML git clone git://bitreich.org/gopher-validator git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/gopher-validator
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
       gopher-validator.c (12543B)
       ---
            1 #include <sys/socket.h>
            2 #include <sys/time.h>
            3 #include <sys/types.h>
            4 
            5 #include <ctype.h>
            6 #include <errno.h>
            7 #include <locale.h>
            8 #include <limits.h>
            9 #include <netdb.h>
           10 #include <stdarg.h>
           11 #include <stdio.h>
           12 #include <stdlib.h>
           13 #include <string.h>
           14 #include <unistd.h>
           15 #include <wchar.h>
           16 #include <wctype.h>
           17 
           18 #define MAX_RESPONSETIMEOUT 10      /* timeout in seconds */
           19 
           20 #ifndef __OpenBSD__
           21 #define pledge(a,b) 0
           22 #endif
           23 
           24 struct uri {
           25         char host[256];
           26         char port[8];
           27         char path[1024];
           28 };
           29 
           30 struct visited {
           31         int _type;
           32         char username[1024];
           33         char path[1024];
           34         char host[256];
           35         char port[8];
           36 };
           37 
           38 /* check valid types with extension in path */
           39 struct gophertype {
           40         const char *ext; /* filename extension */
           41         const char *allow; /* allowed types for this extension */
           42 };
           43 
           44 /* must be sorted alphabetically by extension */
           45 struct gophertype types[] = {
           46         { .ext = "asc",     "0" },
           47         { .ext = "avi",     "9" },
           48         { .ext = "bz2",     "9" },
           49         { .ext = "c",       "0" },
           50         { .ext = "dcgi",    "17" },
           51         { .ext = "doc",     "9" },
           52         { .ext = "exe",     "9" },
           53         { .ext = "gif",     "gI" },
           54         { .ext = "go",      "0" },
           55         { .ext = "gph",     "1" },
           56         { .ext = "gz",      "9" },
           57         { .ext = "h",       "0" },
           58         { .ext = "htm",     "0h" },
           59         { .ext = "html",    "0h" },
           60         { .ext = "iso",     "9" },
           61         { .ext = "jpeg",    "I" },
           62         { .ext = "jpg",     "I" },
           63         { .ext = "json",    "0" },
           64         { .ext = "lzma",    "9" },
           65         { .ext = "m3u",     "0" },
           66         { .ext = "md",      "0" },
           67         { .ext = "md5",     "0" },
           68         { .ext = "md5sum",  "0" },
           69         { .ext = "mkv",     "9" },
           70         { .ext = "mp3",     "9" },
           71         { .ext = "mp4",     "9" },
           72         { .ext = "ogg",     "9" },
           73         { .ext = "ogv",     "9" },
           74         { .ext = "pdf",     "9" },
           75         { .ext = "png",     "I" },
           76         { .ext = "rss",     "0" },
           77         { .ext = "sh",      "0" },
           78         { .ext = "sha1",    "0" },
           79         { .ext = "sha1sum", "0" },
           80         { .ext = "sha256",    "0" },
           81         { .ext = "sha256sum", "0" },
           82         { .ext = "sha512",    "0" },
           83         { .ext = "sha512sum", "0" },
           84         { .ext = "srt",     "0" },
           85         { .ext = "tgz",     "9" },
           86         { .ext = "txt",     "0" },
           87         { .ext = "wav",     "9" },
           88         { .ext = "xml",     "0" },
           89         { .ext = "xz",      "9" },
           90 };
           91 
           92 int exitcode = 0;
           93 FILE *errfp, *outfp;
           94 
           95 void
           96 die(const char *fmt, ...)
           97 {
           98         va_list ap;
           99 
          100         fputs("fatal: ", errfp);
          101 
          102         va_start(ap, fmt);
          103         vfprintf(errfp, fmt, ap);
          104         va_end(ap);
          105 
          106         exit(2);
          107 }
          108 
          109 void
          110 error(const char *fmt, ...)
          111 {
          112         va_list ap;
          113 
          114         fputs("error: ", outfp);
          115 
          116         va_start(ap, fmt);
          117         vfprintf(outfp, fmt, ap);
          118         va_end(ap);
          119 
          120         exitcode = 1;
          121 }
          122 
          123 void
          124 warning(const char *fmt, ...)
          125 {
          126         va_list ap;
          127 
          128         fputs("warning: ", outfp);
          129 
          130         va_start(ap, fmt);
          131         vfprintf(outfp, fmt, ap);
          132         va_end(ap);
          133 }
          134 
          135 int
          136 gophertypecmp(const void *v1, const void *v2)
          137 {
          138         return strcasecmp(((struct gophertype *)v1)->ext,
          139                ((struct gophertype *)v2)->ext);
          140 }
          141 
          142 int
          143 isvalidhost(const char *s)
          144 {
          145         int colons;
          146 
          147         /* IPv6 */
          148         if (*s == '[') {
          149                 colons = 0;
          150                 s++;
          151                 for (; *s; s++) {
          152                         if (*s == ':')
          153                                 colons++;
          154                         else if (*s == ']')
          155                                 break;
          156                         else if (isxdigit((unsigned char)*s) || *s == '.')
          157                                 ;
          158                         else
          159                                 return 0;
          160                 }
          161                 if (colons < 2 || *s != ']')
          162                         return 0;
          163         } else {
          164                 if (!*s)
          165                         return 0;
          166                 for (; *s; s++) {
          167                         if (!isalpha((unsigned char)*s) &&
          168                             !isdigit((unsigned char)*s) &&
          169                             *s != '-' && *s != '.')
          170                                 return 0;
          171                 }
          172         }
          173 
          174         return 1;
          175 }
          176 
          177 int
          178 edial(const char *host, const char *port)
          179 {
          180         struct addrinfo hints, *res, *res0;
          181         int error, save_errno, s;
          182         const char *cause = NULL;
          183         struct timeval timeout;
          184 
          185         memset(&hints, 0, sizeof(hints));
          186         hints.ai_family = AF_UNSPEC;
          187         hints.ai_socktype = SOCK_STREAM;
          188         hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
          189         if ((error = getaddrinfo(host, port, &hints, &res0)))
          190                 die("%s: %s: %s:%s\n", __func__, gai_strerror(error), host, port);
          191         s = -1;
          192         for (res = res0; res; res = res->ai_next) {
          193                 s = socket(res->ai_family, res->ai_socktype,
          194                            res->ai_protocol);
          195                 if (s == -1) {
          196                         cause = "socket";
          197                         continue;
          198                 }
          199 
          200                 timeout.tv_sec = MAX_RESPONSETIMEOUT;
          201                 timeout.tv_usec = 0;
          202                 if (setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) == -1)
          203                         die("%s: setsockopt: %s\n", __func__, strerror(errno));
          204 
          205                 timeout.tv_sec = MAX_RESPONSETIMEOUT;
          206                 timeout.tv_usec = 0;
          207                 if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) == -1)
          208                         die("%s: setsockopt: %s\n", __func__, strerror(errno));
          209 
          210                 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
          211                         cause = "connect";
          212                         save_errno = errno;
          213                         close(s);
          214                         errno = save_errno;
          215                         s = -1;
          216                         continue;
          217                 }
          218                 break;
          219         }
          220         if (s == -1)
          221                 die("%s: %s: %s:%s\n", __func__, cause, host, port);
          222         freeaddrinfo(res0);
          223 
          224         return s;
          225 }
          226 
          227 void
          228 checkdir(FILE *fp)
          229 {
          230         struct gophertype gt, *rgt;
          231         struct visited v;
          232         char line[1024], *end, *s;
          233         size_t linenr;
          234         ssize_t n;
          235         long long l;
          236         int i, r, len, hasdotend = 0, c, primarytype = 0, wc, col;
          237         wchar_t w;
          238 
          239         if (pledge("stdio", NULL) == -1)
          240                 die("pledge: %s\n", strerror(errno));
          241 
          242         for (linenr = 1; fgets(line, sizeof(line), fp); linenr++) {
          243                 n = strcspn(line, "\n");
          244                 if (line[n] != '\n')
          245                         die("%zu: line too long\n", linenr); /* fatal */
          246                 if (n && line[n] == '\n')
          247                         line[n] = '\0';
          248                 if (n && line[n - 1] == '\r')
          249                         line[--n] = '\0';
          250                 else
          251                         error("%zu: invalid line-ending, not CRLF (\\r\\n)\n", linenr);
          252                 if (n == 1 && line[0] == '.') {
          253                         hasdotend = 1;
          254                         break;
          255                 }
          256 
          257                 memset(&v, 0, sizeof(v));
          258 
          259                 v._type = line[0];
          260 
          261                 /* "username" */
          262                 i = 1;
          263                 len = strcspn(line + i, "\t");
          264                 if (len + 1 < sizeof(v.username)) {
          265                         memcpy(v.username, line + i, len);
          266                         v.username[len] = '\0';
          267                 } else  {
          268                         error("%zu: username field too long\n", linenr);
          269                         continue;
          270                 }
          271                 if (line[i + len] == '\t') {
          272                         i += len + 1;
          273                 } else {
          274                         error("%zu: invalid line / field count\n", linenr);
          275                         continue;
          276                 }
          277 
          278                 /* selector / path */
          279                 len = strcspn(line + i, "\t");
          280                 if (len + 1 < sizeof(v.path)) {
          281                         memcpy(v.path, line + i, len);
          282                         v.path[len] = '\0';
          283                 } else {
          284                         error("%zu: path field too long\n", linenr);
          285                         continue;
          286                 }
          287                 if (line[i + len] == '\t') {
          288                         i += len + 1;
          289                 } else {
          290                         error("%zu: invalid line / field count\n", linenr);
          291                         continue;
          292                 }
          293 
          294                 /* host */
          295                 len = strcspn(line + i, "\t");
          296                 if (len + 1 < sizeof(v.host)) {
          297                         memcpy(v.host, line + i, len);
          298                         v.host[len] = '\0';
          299                 } else {
          300                         error("%zu: host field too long\n", linenr);
          301                         continue;
          302                 }
          303                 if (line[i + len] == '\t') {
          304                         i += len + 1;
          305                 } else {
          306                         error("%zu: invalid line / field count\n", linenr);
          307                         continue;
          308                 }
          309 
          310                 /* port */
          311                 len = strcspn(line + i, "\t");
          312                 if (len + 1 < sizeof(v.port)) {
          313                         memcpy(v.port, line + i, len);
          314                         v.port[len] = '\0';
          315                 } else {
          316                         error("%zu: port field too long\n", linenr);
          317                         continue;
          318                 }
          319 
          320                 /* check non-standard types */
          321                 c = v._type;
          322                 if (v._type == '+' && !primarytype)
          323                         error("%zu: mirror type used, but no previous type set\n", linenr);
          324                 if (v._type != '+')
          325                         primarytype = v._type;
          326 
          327                 if (!(isdigit(c) || c == 'g' || c == 'I' || c == 'T' || c == '+')) {
          328                         /* common-used */
          329                         if (c == 'i' || c == 'h') {
          330 #if 0
          331                                 warning("%zu: non-standard, but common-used type: %c\n",
          332                                         linenr, c);
          333 #endif
          334                         } else {
          335                                 /* 3.8: "Characters '0' through 'Z' are reserved. Local
          336                                    experiments should use other characters.
          337                                    Machine-specific extensions are not encouraged." */
          338                                 if (c >= '0' && c <= 'Z')
          339                                         error("%zu: unknown / non-standard type: %c\n",
          340                                               linenr, c);
          341                         }
          342                 }
          343 
          344                 /* check type with file extension, unless it is the HTML 'h'
          345                    type with a "URL:" prefix */
          346                 if ((s = strrchr(v.path, '.')) && !strchr(s, '/') &&
          347                     !(primarytype == 'h' && !strncmp(v.path, "URL:", sizeof("URL:") - 1))) {
          348                         gt.ext = ++s;
          349                         if (!(rgt = bsearch(&gt, &types, sizeof(types) / sizeof(types[0]),
          350                                 sizeof(types[0]), &gophertypecmp)))
          351                                 continue;
          352 
          353                         if (!strchr(rgt->allow, primarytype))
          354                                 warning("%zu: invalid type '%c' for extension '%s', valid types: '%s'\n",
          355                                         linenr, primarytype, rgt->ext, rgt->allow);
          356                 }
          357 
          358                 if (!isvalidhost(v.host))
          359                         error("%zu: invalid host: %s\n", linenr, v.host);
          360 
          361                 /* check port, must be numeric and in range, port 0 is allowed:
          362                 "Appendix:
          363                 Note: Port corresponds the the TCP Port Number, its value should
          364                       be in the range [0..65535]; port 70 is officially assigned
          365                       to gopher." */
          366 
          367                 errno = 0;
          368                 l = strtoll(v.port, &end, 10);
          369                 if (errno || v.port == end || *end || l < 0 || l > 65535) {
          370                         error("%zu: invalid port: %s\n", linenr, v.port);
          371                 } else {
          372 #if 0
          373                         if (l != 70)
          374                                 warning("%zu: non-standard gopher port: %lld, not 70\n",
          375                                         linenr, l);
          376 #endif
          377                 }
          378 
          379                 /* RFC "Notes": "The Selector string should be no longer than
          380                    255 characters." */
          381                 if ((len = strlen(v.path)) > 255)
          382                         error("%zu: selector should not be longer than 255 characters: %d bytes\n",
          383                               linenr, len);
          384 
          385                 /* decode UTF-8 (text-encoding is ASCII/Latin1 in the RFC, but
          386                    Latin1 sucks, recommend UTF-8 instead.
          387                    Check column length as recommended as described in the RFC
          388                    in section 3.9. */
          389                 s = v.username;
          390                 len = strlen(s);
          391                 col = 0;
          392                 for (i = 0; i < len; i += r) {
          393                         r = mbtowc(&w, &s[i], len - i < 4 ? len - i : 4);
          394                         if (r == 0)
          395                                 break;
          396                         if (r == -1) {
          397                                 mbtowc(NULL, NULL, 0); /* reset state */
          398                                 warning("%zu:%d: username: first invalid byte, not UTF-8\n",
          399                                         linenr, i + 1);
          400                                 break;
          401                         }
          402                         if ((wc = wcwidth(w)) == -1)
          403                                 wc = 1;
          404                         col += (size_t)wc;
          405 
          406                         /* RFC "Notes": "It is *highly* recommended that the
          407                            User_Name field contain only printable characters". */
          408                         if (!iswprint(w)) {
          409                                 error("%zu:%d: first non-printable character in username field\n",
          410                                       linenr, i + 1);
          411                                 break;
          412                         }
          413                 }
          414 #if 0
          415                 /* instead of 70 check 79 */
          416                 if (col > 79)
          417                         warning("%zu: username column length is > 79 (%d), see section 3.9 of the RFC\n",
          418                                 linenr, col);
          419 #endif
          420 
          421                 if (!strcmp(v.path, "..") || strstr(v.path, "../"))
          422                         warning("%zu: found ../ in path: don't use relative paths\n", linenr);
          423         }
          424         if (ferror(fp))
          425                 die("fgets: %s\n", strerror(errno));
          426 
          427         if (!hasdotend)
          428                 error("no .\\r\\n end\n");
          429 }
          430 
          431 void
          432 checkremote(const char *host, const char *port, const char *path, const char *param)
          433 {
          434         FILE *fp;
          435         int fd, r;
          436 
          437         fd = edial(host, port);
          438 
          439         if (param[0])
          440                 r = dprintf(fd, "%s\t%s\r\n", path, param);
          441         else
          442                 r = dprintf(fd, "%s\r\n", path);
          443         if (r == -1)
          444                 die("write: %s\n", strerror(errno));
          445 
          446         if (!(fp = fdopen(fd, "rb+")))
          447                 die("fdopen: %s\n", strerror(errno));
          448         checkdir(fp);
          449         fclose(fp);
          450 }
          451 
          452 int
          453 parseuri(const char *str, struct uri *u)
          454 {
          455         const char *s, *e;
          456 
          457         memset(u, 0, sizeof(struct uri));
          458 
          459         s = str;
          460 
          461         /* IPv6 */
          462         if (*s == '[') {
          463                 s++;
          464                 e = strchr(s, ']');
          465                 if (!e || e - s + 1 >= sizeof(u->host))
          466                         return 0;
          467                 memcpy(u->host, s, e - s);
          468                 u->host[e - s] = '\0';
          469                 e++;
          470         } else {
          471                 e = &s[strcspn(s, ":/")];
          472                 if (e - s + 1 >= sizeof(u->host))
          473                         return 0;
          474                 memcpy(u->host, s, e - s);
          475                 u->host[e - s] = '\0';
          476         }
          477 
          478         if (*e == ':') {
          479                 s = e + 1;
          480                 e = &s[strcspn(s, "/")];
          481 
          482                 if (e - s + 1 >= sizeof(u->port))
          483                         return 0;
          484                 memcpy(u->port, s, e - s);
          485                 u->port[e - s] = '\0';
          486         }
          487         if (*e && *e != '/')
          488                 return 0; /* invalid path */
          489 
          490         s = e;
          491         e = s + strlen(s);
          492 
          493         if (e - s + 1 >= sizeof(u->path))
          494                 return 0;
          495         memcpy(u->path, s, e - s);
          496         u->path[e - s] = '\0';
          497 
          498         return 1;
          499 }
          500 
          501 int
          502 main(int argc, char **argv)
          503 {
          504         struct uri u;
          505         const char *path, *uri = "", *param = "", *s;
          506         int _type = '1';
          507 
          508         setlocale(LC_CTYPE, "");
          509 
          510         outfp = stdout;
          511         errfp = stderr;
          512 
          513         /* CGI-mode or stand-alone */
          514         if ((s = getenv("QUERY_STRING"))) {
          515                 uri = s;
          516                 param = "";
          517                 errfp = stdout; /* output errors to stdout also in CGI mode */
          518         } else {
          519                 switch (argc) {
          520                 case 3:
          521                         param = argv[2];
          522                 case 2:
          523                         uri = argv[1];
          524                         break;
          525                 case 1:
          526                         checkdir(stdin);
          527                         return exitcode;
          528                 default:
          529                         fprintf(errfp, "usage: %s [uri] [param]\n", argv[0]);
          530                         return 1;
          531                 }
          532         }
          533 
          534         if (pledge("stdio inet dns", NULL) == -1)
          535                 die("pledge: %s\n", strerror(errno));
          536 
          537         if (!strncmp(uri, "gopher://", sizeof("gopher://") - 1))
          538                 uri += sizeof("gopher://") - 1;
          539 
          540         if (!parseuri(uri, &u))
          541                 die("Invalid uri\n");
          542         if (u.host[0] == '\0')
          543                 die("Invalid hostname\n");
          544 
          545         if (u.path[0] == '\0')
          546                 memcpy(u.path, "/", 2);
          547         if (u.port[0] == '\0')
          548                 memcpy(u.port, "70", 3);
          549 
          550         path = u.path;
          551         if (path[0] == '/') {
          552                 path++;
          553                 if (*path) {
          554                         _type = *path;
          555                         path++;
          556                 }
          557         } else {
          558                 path = "";
          559         }
          560 
          561         switch (_type) {
          562         case '1':
          563         case '7':
          564                 break; /* handled below */
          565         default: /* these types are not validated */
          566                 fprintf(errfp, "only types 1 (dir) and 7 (search) are validated\n");
          567                 return 1;
          568         }
          569 
          570         if (_type != '7')
          571                 param = "";
          572 
          573         checkremote(u.host, u.port, path, param);
          574 
          575         return exitcode;
          576 }