gopher-validator.c - gopher-validator - Simple gopher menu validator. HTML git clone git://bitreich.org/gopher-validator git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/gopher-validator DIR Log DIR Files DIR Refs DIR Tags DIR README DIR LICENSE --- gopher-validator.c (12543B) --- 1 #include <sys/socket.h> 2 #include <sys/time.h> 3 #include <sys/types.h> 4 5 #include <ctype.h> 6 #include <errno.h> 7 #include <locale.h> 8 #include <limits.h> 9 #include <netdb.h> 10 #include <stdarg.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <unistd.h> 15 #include <wchar.h> 16 #include <wctype.h> 17 18 #define MAX_RESPONSETIMEOUT 10 /* timeout in seconds */ 19 20 #ifndef __OpenBSD__ 21 #define pledge(a,b) 0 22 #endif 23 24 struct uri { 25 char host[256]; 26 char port[8]; 27 char path[1024]; 28 }; 29 30 struct visited { 31 int _type; 32 char username[1024]; 33 char path[1024]; 34 char host[256]; 35 char port[8]; 36 }; 37 38 /* check valid types with extension in path */ 39 struct gophertype { 40 const char *ext; /* filename extension */ 41 const char *allow; /* allowed types for this extension */ 42 }; 43 44 /* must be sorted alphabetically by extension */ 45 struct gophertype types[] = { 46 { .ext = "asc", "0" }, 47 { .ext = "avi", "9" }, 48 { .ext = "bz2", "9" }, 49 { .ext = "c", "0" }, 50 { .ext = "dcgi", "17" }, 51 { .ext = "doc", "9" }, 52 { .ext = "exe", "9" }, 53 { .ext = "gif", "gI" }, 54 { .ext = "go", "0" }, 55 { .ext = "gph", "1" }, 56 { .ext = "gz", "9" }, 57 { .ext = "h", "0" }, 58 { .ext = "htm", "0h" }, 59 { .ext = "html", "0h" }, 60 { .ext = "iso", "9" }, 61 { .ext = "jpeg", "I" }, 62 { .ext = "jpg", "I" }, 63 { .ext = "json", "0" }, 64 { .ext = "lzma", "9" }, 65 { .ext = "m3u", "0" }, 66 { .ext = "md", "0" }, 67 { .ext = "md5", "0" }, 68 { .ext = "md5sum", "0" }, 69 { .ext = "mkv", "9" }, 70 { .ext = "mp3", "9" }, 71 { .ext = "mp4", "9" }, 72 { .ext = "ogg", "9" }, 73 { .ext = "ogv", "9" }, 74 { .ext = "pdf", "9" }, 75 { .ext = "png", "I" }, 76 { .ext = "rss", "0" }, 77 { .ext = "sh", "0" }, 78 { .ext = "sha1", "0" }, 79 { .ext = "sha1sum", "0" }, 80 { .ext = "sha256", "0" }, 81 { .ext = "sha256sum", "0" }, 82 { .ext = "sha512", "0" }, 83 { .ext = "sha512sum", "0" }, 84 { .ext = "srt", "0" }, 85 { .ext = "tgz", "9" }, 86 { .ext = "txt", "0" }, 87 { .ext = "wav", "9" }, 88 { .ext = "xml", "0" }, 89 { .ext = "xz", "9" }, 90 }; 91 92 int exitcode = 0; 93 FILE *errfp, *outfp; 94 95 void 96 die(const char *fmt, ...) 97 { 98 va_list ap; 99 100 fputs("fatal: ", errfp); 101 102 va_start(ap, fmt); 103 vfprintf(errfp, fmt, ap); 104 va_end(ap); 105 106 exit(2); 107 } 108 109 void 110 error(const char *fmt, ...) 111 { 112 va_list ap; 113 114 fputs("error: ", outfp); 115 116 va_start(ap, fmt); 117 vfprintf(outfp, fmt, ap); 118 va_end(ap); 119 120 exitcode = 1; 121 } 122 123 void 124 warning(const char *fmt, ...) 125 { 126 va_list ap; 127 128 fputs("warning: ", outfp); 129 130 va_start(ap, fmt); 131 vfprintf(outfp, fmt, ap); 132 va_end(ap); 133 } 134 135 int 136 gophertypecmp(const void *v1, const void *v2) 137 { 138 return strcasecmp(((struct gophertype *)v1)->ext, 139 ((struct gophertype *)v2)->ext); 140 } 141 142 int 143 isvalidhost(const char *s) 144 { 145 int colons; 146 147 /* IPv6 */ 148 if (*s == '[') { 149 colons = 0; 150 s++; 151 for (; *s; s++) { 152 if (*s == ':') 153 colons++; 154 else if (*s == ']') 155 break; 156 else if (isxdigit((unsigned char)*s) || *s == '.') 157 ; 158 else 159 return 0; 160 } 161 if (colons < 2 || *s != ']') 162 return 0; 163 } else { 164 if (!*s) 165 return 0; 166 for (; *s; s++) { 167 if (!isalpha((unsigned char)*s) && 168 !isdigit((unsigned char)*s) && 169 *s != '-' && *s != '.') 170 return 0; 171 } 172 } 173 174 return 1; 175 } 176 177 int 178 edial(const char *host, const char *port) 179 { 180 struct addrinfo hints, *res, *res0; 181 int error, save_errno, s; 182 const char *cause = NULL; 183 struct timeval timeout; 184 185 memset(&hints, 0, sizeof(hints)); 186 hints.ai_family = AF_UNSPEC; 187 hints.ai_socktype = SOCK_STREAM; 188 hints.ai_flags = AI_NUMERICSERV; /* numeric port only */ 189 if ((error = getaddrinfo(host, port, &hints, &res0))) 190 die("%s: %s: %s:%s\n", __func__, gai_strerror(error), host, port); 191 s = -1; 192 for (res = res0; res; res = res->ai_next) { 193 s = socket(res->ai_family, res->ai_socktype, 194 res->ai_protocol); 195 if (s == -1) { 196 cause = "socket"; 197 continue; 198 } 199 200 timeout.tv_sec = MAX_RESPONSETIMEOUT; 201 timeout.tv_usec = 0; 202 if (setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) == -1) 203 die("%s: setsockopt: %s\n", __func__, strerror(errno)); 204 205 timeout.tv_sec = MAX_RESPONSETIMEOUT; 206 timeout.tv_usec = 0; 207 if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) == -1) 208 die("%s: setsockopt: %s\n", __func__, strerror(errno)); 209 210 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) { 211 cause = "connect"; 212 save_errno = errno; 213 close(s); 214 errno = save_errno; 215 s = -1; 216 continue; 217 } 218 break; 219 } 220 if (s == -1) 221 die("%s: %s: %s:%s\n", __func__, cause, host, port); 222 freeaddrinfo(res0); 223 224 return s; 225 } 226 227 void 228 checkdir(FILE *fp) 229 { 230 struct gophertype gt, *rgt; 231 struct visited v; 232 char line[1024], *end, *s; 233 size_t linenr; 234 ssize_t n; 235 long long l; 236 int i, r, len, hasdotend = 0, c, primarytype = 0, wc, col; 237 wchar_t w; 238 239 if (pledge("stdio", NULL) == -1) 240 die("pledge: %s\n", strerror(errno)); 241 242 for (linenr = 1; fgets(line, sizeof(line), fp); linenr++) { 243 n = strcspn(line, "\n"); 244 if (line[n] != '\n') 245 die("%zu: line too long\n", linenr); /* fatal */ 246 if (n && line[n] == '\n') 247 line[n] = '\0'; 248 if (n && line[n - 1] == '\r') 249 line[--n] = '\0'; 250 else 251 error("%zu: invalid line-ending, not CRLF (\\r\\n)\n", linenr); 252 if (n == 1 && line[0] == '.') { 253 hasdotend = 1; 254 break; 255 } 256 257 memset(&v, 0, sizeof(v)); 258 259 v._type = line[0]; 260 261 /* "username" */ 262 i = 1; 263 len = strcspn(line + i, "\t"); 264 if (len + 1 < sizeof(v.username)) { 265 memcpy(v.username, line + i, len); 266 v.username[len] = '\0'; 267 } else { 268 error("%zu: username field too long\n", linenr); 269 continue; 270 } 271 if (line[i + len] == '\t') { 272 i += len + 1; 273 } else { 274 error("%zu: invalid line / field count\n", linenr); 275 continue; 276 } 277 278 /* selector / path */ 279 len = strcspn(line + i, "\t"); 280 if (len + 1 < sizeof(v.path)) { 281 memcpy(v.path, line + i, len); 282 v.path[len] = '\0'; 283 } else { 284 error("%zu: path field too long\n", linenr); 285 continue; 286 } 287 if (line[i + len] == '\t') { 288 i += len + 1; 289 } else { 290 error("%zu: invalid line / field count\n", linenr); 291 continue; 292 } 293 294 /* host */ 295 len = strcspn(line + i, "\t"); 296 if (len + 1 < sizeof(v.host)) { 297 memcpy(v.host, line + i, len); 298 v.host[len] = '\0'; 299 } else { 300 error("%zu: host field too long\n", linenr); 301 continue; 302 } 303 if (line[i + len] == '\t') { 304 i += len + 1; 305 } else { 306 error("%zu: invalid line / field count\n", linenr); 307 continue; 308 } 309 310 /* port */ 311 len = strcspn(line + i, "\t"); 312 if (len + 1 < sizeof(v.port)) { 313 memcpy(v.port, line + i, len); 314 v.port[len] = '\0'; 315 } else { 316 error("%zu: port field too long\n", linenr); 317 continue; 318 } 319 320 /* check non-standard types */ 321 c = v._type; 322 if (v._type == '+' && !primarytype) 323 error("%zu: mirror type used, but no previous type set\n", linenr); 324 if (v._type != '+') 325 primarytype = v._type; 326 327 if (!(isdigit(c) || c == 'g' || c == 'I' || c == 'T' || c == '+')) { 328 /* common-used */ 329 if (c == 'i' || c == 'h') { 330 #if 0 331 warning("%zu: non-standard, but common-used type: %c\n", 332 linenr, c); 333 #endif 334 } else { 335 /* 3.8: "Characters '0' through 'Z' are reserved. Local 336 experiments should use other characters. 337 Machine-specific extensions are not encouraged." */ 338 if (c >= '0' && c <= 'Z') 339 error("%zu: unknown / non-standard type: %c\n", 340 linenr, c); 341 } 342 } 343 344 /* check type with file extension, unless it is the HTML 'h' 345 type with a "URL:" prefix */ 346 if ((s = strrchr(v.path, '.')) && !strchr(s, '/') && 347 !(primarytype == 'h' && !strncmp(v.path, "URL:", sizeof("URL:") - 1))) { 348 gt.ext = ++s; 349 if (!(rgt = bsearch(>, &types, sizeof(types) / sizeof(types[0]), 350 sizeof(types[0]), &gophertypecmp))) 351 continue; 352 353 if (!strchr(rgt->allow, primarytype)) 354 warning("%zu: invalid type '%c' for extension '%s', valid types: '%s'\n", 355 linenr, primarytype, rgt->ext, rgt->allow); 356 } 357 358 if (!isvalidhost(v.host)) 359 error("%zu: invalid host: %s\n", linenr, v.host); 360 361 /* check port, must be numeric and in range, port 0 is allowed: 362 "Appendix: 363 Note: Port corresponds the the TCP Port Number, its value should 364 be in the range [0..65535]; port 70 is officially assigned 365 to gopher." */ 366 367 errno = 0; 368 l = strtoll(v.port, &end, 10); 369 if (errno || v.port == end || *end || l < 0 || l > 65535) { 370 error("%zu: invalid port: %s\n", linenr, v.port); 371 } else { 372 #if 0 373 if (l != 70) 374 warning("%zu: non-standard gopher port: %lld, not 70\n", 375 linenr, l); 376 #endif 377 } 378 379 /* RFC "Notes": "The Selector string should be no longer than 380 255 characters." */ 381 if ((len = strlen(v.path)) > 255) 382 error("%zu: selector should not be longer than 255 characters: %d bytes\n", 383 linenr, len); 384 385 /* decode UTF-8 (text-encoding is ASCII/Latin1 in the RFC, but 386 Latin1 sucks, recommend UTF-8 instead. 387 Check column length as recommended as described in the RFC 388 in section 3.9. */ 389 s = v.username; 390 len = strlen(s); 391 col = 0; 392 for (i = 0; i < len; i += r) { 393 r = mbtowc(&w, &s[i], len - i < 4 ? len - i : 4); 394 if (r == 0) 395 break; 396 if (r == -1) { 397 mbtowc(NULL, NULL, 0); /* reset state */ 398 warning("%zu:%d: username: first invalid byte, not UTF-8\n", 399 linenr, i + 1); 400 break; 401 } 402 if ((wc = wcwidth(w)) == -1) 403 wc = 1; 404 col += (size_t)wc; 405 406 /* RFC "Notes": "It is *highly* recommended that the 407 User_Name field contain only printable characters". */ 408 if (!iswprint(w)) { 409 error("%zu:%d: first non-printable character in username field\n", 410 linenr, i + 1); 411 break; 412 } 413 } 414 #if 0 415 /* instead of 70 check 79 */ 416 if (col > 79) 417 warning("%zu: username column length is > 79 (%d), see section 3.9 of the RFC\n", 418 linenr, col); 419 #endif 420 421 if (!strcmp(v.path, "..") || strstr(v.path, "../")) 422 warning("%zu: found ../ in path: don't use relative paths\n", linenr); 423 } 424 if (ferror(fp)) 425 die("fgets: %s\n", strerror(errno)); 426 427 if (!hasdotend) 428 error("no .\\r\\n end\n"); 429 } 430 431 void 432 checkremote(const char *host, const char *port, const char *path, const char *param) 433 { 434 FILE *fp; 435 int fd, r; 436 437 fd = edial(host, port); 438 439 if (param[0]) 440 r = dprintf(fd, "%s\t%s\r\n", path, param); 441 else 442 r = dprintf(fd, "%s\r\n", path); 443 if (r == -1) 444 die("write: %s\n", strerror(errno)); 445 446 if (!(fp = fdopen(fd, "rb+"))) 447 die("fdopen: %s\n", strerror(errno)); 448 checkdir(fp); 449 fclose(fp); 450 } 451 452 int 453 parseuri(const char *str, struct uri *u) 454 { 455 const char *s, *e; 456 457 memset(u, 0, sizeof(struct uri)); 458 459 s = str; 460 461 /* IPv6 */ 462 if (*s == '[') { 463 s++; 464 e = strchr(s, ']'); 465 if (!e || e - s + 1 >= sizeof(u->host)) 466 return 0; 467 memcpy(u->host, s, e - s); 468 u->host[e - s] = '\0'; 469 e++; 470 } else { 471 e = &s[strcspn(s, ":/")]; 472 if (e - s + 1 >= sizeof(u->host)) 473 return 0; 474 memcpy(u->host, s, e - s); 475 u->host[e - s] = '\0'; 476 } 477 478 if (*e == ':') { 479 s = e + 1; 480 e = &s[strcspn(s, "/")]; 481 482 if (e - s + 1 >= sizeof(u->port)) 483 return 0; 484 memcpy(u->port, s, e - s); 485 u->port[e - s] = '\0'; 486 } 487 if (*e && *e != '/') 488 return 0; /* invalid path */ 489 490 s = e; 491 e = s + strlen(s); 492 493 if (e - s + 1 >= sizeof(u->path)) 494 return 0; 495 memcpy(u->path, s, e - s); 496 u->path[e - s] = '\0'; 497 498 return 1; 499 } 500 501 int 502 main(int argc, char **argv) 503 { 504 struct uri u; 505 const char *path, *uri = "", *param = "", *s; 506 int _type = '1'; 507 508 setlocale(LC_CTYPE, ""); 509 510 outfp = stdout; 511 errfp = stderr; 512 513 /* CGI-mode or stand-alone */ 514 if ((s = getenv("QUERY_STRING"))) { 515 uri = s; 516 param = ""; 517 errfp = stdout; /* output errors to stdout also in CGI mode */ 518 } else { 519 switch (argc) { 520 case 3: 521 param = argv[2]; 522 case 2: 523 uri = argv[1]; 524 break; 525 case 1: 526 checkdir(stdin); 527 return exitcode; 528 default: 529 fprintf(errfp, "usage: %s [uri] [param]\n", argv[0]); 530 return 1; 531 } 532 } 533 534 if (pledge("stdio inet dns", NULL) == -1) 535 die("pledge: %s\n", strerror(errno)); 536 537 if (!strncmp(uri, "gopher://", sizeof("gopher://") - 1)) 538 uri += sizeof("gopher://") - 1; 539 540 if (!parseuri(uri, &u)) 541 die("Invalid uri\n"); 542 if (u.host[0] == '\0') 543 die("Invalid hostname\n"); 544 545 if (u.path[0] == '\0') 546 memcpy(u.path, "/", 2); 547 if (u.port[0] == '\0') 548 memcpy(u.port, "70", 3); 549 550 path = u.path; 551 if (path[0] == '/') { 552 path++; 553 if (*path) { 554 _type = *path; 555 path++; 556 } 557 } else { 558 path = ""; 559 } 560 561 switch (_type) { 562 case '1': 563 case '7': 564 break; /* handled below */ 565 default: /* these types are not validated */ 566 fprintf(errfp, "only types 1 (dir) and 7 (search) are validated\n"); 567 return 1; 568 } 569 570 if (_type != '7') 571 param = ""; 572 573 checkremote(u.host, u.port, path, param); 574 575 return exitcode; 576 }