gopher-validator.c - gopher-validator - Simple gopher menu validator.
HTML git clone git://bitreich.org/gopher-validator git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/gopher-validator
DIR Log
DIR Files
DIR Refs
DIR Tags
DIR README
DIR LICENSE
---
gopher-validator.c (12543B)
---
1 #include <sys/socket.h>
2 #include <sys/time.h>
3 #include <sys/types.h>
4
5 #include <ctype.h>
6 #include <errno.h>
7 #include <locale.h>
8 #include <limits.h>
9 #include <netdb.h>
10 #include <stdarg.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <wchar.h>
16 #include <wctype.h>
17
18 #define MAX_RESPONSETIMEOUT 10 /* timeout in seconds */
19
20 #ifndef __OpenBSD__
21 #define pledge(a,b) 0
22 #endif
23
24 struct uri {
25 char host[256];
26 char port[8];
27 char path[1024];
28 };
29
30 struct visited {
31 int _type;
32 char username[1024];
33 char path[1024];
34 char host[256];
35 char port[8];
36 };
37
38 /* check valid types with extension in path */
39 struct gophertype {
40 const char *ext; /* filename extension */
41 const char *allow; /* allowed types for this extension */
42 };
43
44 /* must be sorted alphabetically by extension */
45 struct gophertype types[] = {
46 { .ext = "asc", "0" },
47 { .ext = "avi", "9" },
48 { .ext = "bz2", "9" },
49 { .ext = "c", "0" },
50 { .ext = "dcgi", "17" },
51 { .ext = "doc", "9" },
52 { .ext = "exe", "9" },
53 { .ext = "gif", "gI" },
54 { .ext = "go", "0" },
55 { .ext = "gph", "1" },
56 { .ext = "gz", "9" },
57 { .ext = "h", "0" },
58 { .ext = "htm", "0h" },
59 { .ext = "html", "0h" },
60 { .ext = "iso", "9" },
61 { .ext = "jpeg", "I" },
62 { .ext = "jpg", "I" },
63 { .ext = "json", "0" },
64 { .ext = "lzma", "9" },
65 { .ext = "m3u", "0" },
66 { .ext = "md", "0" },
67 { .ext = "md5", "0" },
68 { .ext = "md5sum", "0" },
69 { .ext = "mkv", "9" },
70 { .ext = "mp3", "9" },
71 { .ext = "mp4", "9" },
72 { .ext = "ogg", "9" },
73 { .ext = "ogv", "9" },
74 { .ext = "pdf", "9" },
75 { .ext = "png", "I" },
76 { .ext = "rss", "0" },
77 { .ext = "sh", "0" },
78 { .ext = "sha1", "0" },
79 { .ext = "sha1sum", "0" },
80 { .ext = "sha256", "0" },
81 { .ext = "sha256sum", "0" },
82 { .ext = "sha512", "0" },
83 { .ext = "sha512sum", "0" },
84 { .ext = "srt", "0" },
85 { .ext = "tgz", "9" },
86 { .ext = "txt", "0" },
87 { .ext = "wav", "9" },
88 { .ext = "xml", "0" },
89 { .ext = "xz", "9" },
90 };
91
92 int exitcode = 0;
93 FILE *errfp, *outfp;
94
95 void
96 die(const char *fmt, ...)
97 {
98 va_list ap;
99
100 fputs("fatal: ", errfp);
101
102 va_start(ap, fmt);
103 vfprintf(errfp, fmt, ap);
104 va_end(ap);
105
106 exit(2);
107 }
108
109 void
110 error(const char *fmt, ...)
111 {
112 va_list ap;
113
114 fputs("error: ", outfp);
115
116 va_start(ap, fmt);
117 vfprintf(outfp, fmt, ap);
118 va_end(ap);
119
120 exitcode = 1;
121 }
122
123 void
124 warning(const char *fmt, ...)
125 {
126 va_list ap;
127
128 fputs("warning: ", outfp);
129
130 va_start(ap, fmt);
131 vfprintf(outfp, fmt, ap);
132 va_end(ap);
133 }
134
135 int
136 gophertypecmp(const void *v1, const void *v2)
137 {
138 return strcasecmp(((struct gophertype *)v1)->ext,
139 ((struct gophertype *)v2)->ext);
140 }
141
142 int
143 isvalidhost(const char *s)
144 {
145 int colons;
146
147 /* IPv6 */
148 if (*s == '[') {
149 colons = 0;
150 s++;
151 for (; *s; s++) {
152 if (*s == ':')
153 colons++;
154 else if (*s == ']')
155 break;
156 else if (isxdigit((unsigned char)*s) || *s == '.')
157 ;
158 else
159 return 0;
160 }
161 if (colons < 2 || *s != ']')
162 return 0;
163 } else {
164 if (!*s)
165 return 0;
166 for (; *s; s++) {
167 if (!isalpha((unsigned char)*s) &&
168 !isdigit((unsigned char)*s) &&
169 *s != '-' && *s != '.')
170 return 0;
171 }
172 }
173
174 return 1;
175 }
176
177 int
178 edial(const char *host, const char *port)
179 {
180 struct addrinfo hints, *res, *res0;
181 int error, save_errno, s;
182 const char *cause = NULL;
183 struct timeval timeout;
184
185 memset(&hints, 0, sizeof(hints));
186 hints.ai_family = AF_UNSPEC;
187 hints.ai_socktype = SOCK_STREAM;
188 hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
189 if ((error = getaddrinfo(host, port, &hints, &res0)))
190 die("%s: %s: %s:%s\n", __func__, gai_strerror(error), host, port);
191 s = -1;
192 for (res = res0; res; res = res->ai_next) {
193 s = socket(res->ai_family, res->ai_socktype,
194 res->ai_protocol);
195 if (s == -1) {
196 cause = "socket";
197 continue;
198 }
199
200 timeout.tv_sec = MAX_RESPONSETIMEOUT;
201 timeout.tv_usec = 0;
202 if (setsockopt(s, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) == -1)
203 die("%s: setsockopt: %s\n", __func__, strerror(errno));
204
205 timeout.tv_sec = MAX_RESPONSETIMEOUT;
206 timeout.tv_usec = 0;
207 if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) == -1)
208 die("%s: setsockopt: %s\n", __func__, strerror(errno));
209
210 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
211 cause = "connect";
212 save_errno = errno;
213 close(s);
214 errno = save_errno;
215 s = -1;
216 continue;
217 }
218 break;
219 }
220 if (s == -1)
221 die("%s: %s: %s:%s\n", __func__, cause, host, port);
222 freeaddrinfo(res0);
223
224 return s;
225 }
226
227 void
228 checkdir(FILE *fp)
229 {
230 struct gophertype gt, *rgt;
231 struct visited v;
232 char line[1024], *end, *s;
233 size_t linenr;
234 ssize_t n;
235 long long l;
236 int i, r, len, hasdotend = 0, c, primarytype = 0, wc, col;
237 wchar_t w;
238
239 if (pledge("stdio", NULL) == -1)
240 die("pledge: %s\n", strerror(errno));
241
242 for (linenr = 1; fgets(line, sizeof(line), fp); linenr++) {
243 n = strcspn(line, "\n");
244 if (line[n] != '\n')
245 die("%zu: line too long\n", linenr); /* fatal */
246 if (n && line[n] == '\n')
247 line[n] = '\0';
248 if (n && line[n - 1] == '\r')
249 line[--n] = '\0';
250 else
251 error("%zu: invalid line-ending, not CRLF (\\r\\n)\n", linenr);
252 if (n == 1 && line[0] == '.') {
253 hasdotend = 1;
254 break;
255 }
256
257 memset(&v, 0, sizeof(v));
258
259 v._type = line[0];
260
261 /* "username" */
262 i = 1;
263 len = strcspn(line + i, "\t");
264 if (len + 1 < sizeof(v.username)) {
265 memcpy(v.username, line + i, len);
266 v.username[len] = '\0';
267 } else {
268 error("%zu: username field too long\n", linenr);
269 continue;
270 }
271 if (line[i + len] == '\t') {
272 i += len + 1;
273 } else {
274 error("%zu: invalid line / field count\n", linenr);
275 continue;
276 }
277
278 /* selector / path */
279 len = strcspn(line + i, "\t");
280 if (len + 1 < sizeof(v.path)) {
281 memcpy(v.path, line + i, len);
282 v.path[len] = '\0';
283 } else {
284 error("%zu: path field too long\n", linenr);
285 continue;
286 }
287 if (line[i + len] == '\t') {
288 i += len + 1;
289 } else {
290 error("%zu: invalid line / field count\n", linenr);
291 continue;
292 }
293
294 /* host */
295 len = strcspn(line + i, "\t");
296 if (len + 1 < sizeof(v.host)) {
297 memcpy(v.host, line + i, len);
298 v.host[len] = '\0';
299 } else {
300 error("%zu: host field too long\n", linenr);
301 continue;
302 }
303 if (line[i + len] == '\t') {
304 i += len + 1;
305 } else {
306 error("%zu: invalid line / field count\n", linenr);
307 continue;
308 }
309
310 /* port */
311 len = strcspn(line + i, "\t");
312 if (len + 1 < sizeof(v.port)) {
313 memcpy(v.port, line + i, len);
314 v.port[len] = '\0';
315 } else {
316 error("%zu: port field too long\n", linenr);
317 continue;
318 }
319
320 /* check non-standard types */
321 c = v._type;
322 if (v._type == '+' && !primarytype)
323 error("%zu: mirror type used, but no previous type set\n", linenr);
324 if (v._type != '+')
325 primarytype = v._type;
326
327 if (!(isdigit(c) || c == 'g' || c == 'I' || c == 'T' || c == '+')) {
328 /* common-used */
329 if (c == 'i' || c == 'h') {
330 #if 0
331 warning("%zu: non-standard, but common-used type: %c\n",
332 linenr, c);
333 #endif
334 } else {
335 /* 3.8: "Characters '0' through 'Z' are reserved. Local
336 experiments should use other characters.
337 Machine-specific extensions are not encouraged." */
338 if (c >= '0' && c <= 'Z')
339 error("%zu: unknown / non-standard type: %c\n",
340 linenr, c);
341 }
342 }
343
344 /* check type with file extension, unless it is the HTML 'h'
345 type with a "URL:" prefix */
346 if ((s = strrchr(v.path, '.')) && !strchr(s, '/') &&
347 !(primarytype == 'h' && !strncmp(v.path, "URL:", sizeof("URL:") - 1))) {
348 gt.ext = ++s;
349 if (!(rgt = bsearch(>, &types, sizeof(types) / sizeof(types[0]),
350 sizeof(types[0]), &gophertypecmp)))
351 continue;
352
353 if (!strchr(rgt->allow, primarytype))
354 warning("%zu: invalid type '%c' for extension '%s', valid types: '%s'\n",
355 linenr, primarytype, rgt->ext, rgt->allow);
356 }
357
358 if (!isvalidhost(v.host))
359 error("%zu: invalid host: %s\n", linenr, v.host);
360
361 /* check port, must be numeric and in range, port 0 is allowed:
362 "Appendix:
363 Note: Port corresponds the the TCP Port Number, its value should
364 be in the range [0..65535]; port 70 is officially assigned
365 to gopher." */
366
367 errno = 0;
368 l = strtoll(v.port, &end, 10);
369 if (errno || v.port == end || *end || l < 0 || l > 65535) {
370 error("%zu: invalid port: %s\n", linenr, v.port);
371 } else {
372 #if 0
373 if (l != 70)
374 warning("%zu: non-standard gopher port: %lld, not 70\n",
375 linenr, l);
376 #endif
377 }
378
379 /* RFC "Notes": "The Selector string should be no longer than
380 255 characters." */
381 if ((len = strlen(v.path)) > 255)
382 error("%zu: selector should not be longer than 255 characters: %d bytes\n",
383 linenr, len);
384
385 /* decode UTF-8 (text-encoding is ASCII/Latin1 in the RFC, but
386 Latin1 sucks, recommend UTF-8 instead.
387 Check column length as recommended as described in the RFC
388 in section 3.9. */
389 s = v.username;
390 len = strlen(s);
391 col = 0;
392 for (i = 0; i < len; i += r) {
393 r = mbtowc(&w, &s[i], len - i < 4 ? len - i : 4);
394 if (r == 0)
395 break;
396 if (r == -1) {
397 mbtowc(NULL, NULL, 0); /* reset state */
398 warning("%zu:%d: username: first invalid byte, not UTF-8\n",
399 linenr, i + 1);
400 break;
401 }
402 if ((wc = wcwidth(w)) == -1)
403 wc = 1;
404 col += (size_t)wc;
405
406 /* RFC "Notes": "It is *highly* recommended that the
407 User_Name field contain only printable characters". */
408 if (!iswprint(w)) {
409 error("%zu:%d: first non-printable character in username field\n",
410 linenr, i + 1);
411 break;
412 }
413 }
414 #if 0
415 /* instead of 70 check 79 */
416 if (col > 79)
417 warning("%zu: username column length is > 79 (%d), see section 3.9 of the RFC\n",
418 linenr, col);
419 #endif
420
421 if (!strcmp(v.path, "..") || strstr(v.path, "../"))
422 warning("%zu: found ../ in path: don't use relative paths\n", linenr);
423 }
424 if (ferror(fp))
425 die("fgets: %s\n", strerror(errno));
426
427 if (!hasdotend)
428 error("no .\\r\\n end\n");
429 }
430
431 void
432 checkremote(const char *host, const char *port, const char *path, const char *param)
433 {
434 FILE *fp;
435 int fd, r;
436
437 fd = edial(host, port);
438
439 if (param[0])
440 r = dprintf(fd, "%s\t%s\r\n", path, param);
441 else
442 r = dprintf(fd, "%s\r\n", path);
443 if (r == -1)
444 die("write: %s\n", strerror(errno));
445
446 if (!(fp = fdopen(fd, "rb+")))
447 die("fdopen: %s\n", strerror(errno));
448 checkdir(fp);
449 fclose(fp);
450 }
451
452 int
453 parseuri(const char *str, struct uri *u)
454 {
455 const char *s, *e;
456
457 memset(u, 0, sizeof(struct uri));
458
459 s = str;
460
461 /* IPv6 */
462 if (*s == '[') {
463 s++;
464 e = strchr(s, ']');
465 if (!e || e - s + 1 >= sizeof(u->host))
466 return 0;
467 memcpy(u->host, s, e - s);
468 u->host[e - s] = '\0';
469 e++;
470 } else {
471 e = &s[strcspn(s, ":/")];
472 if (e - s + 1 >= sizeof(u->host))
473 return 0;
474 memcpy(u->host, s, e - s);
475 u->host[e - s] = '\0';
476 }
477
478 if (*e == ':') {
479 s = e + 1;
480 e = &s[strcspn(s, "/")];
481
482 if (e - s + 1 >= sizeof(u->port))
483 return 0;
484 memcpy(u->port, s, e - s);
485 u->port[e - s] = '\0';
486 }
487 if (*e && *e != '/')
488 return 0; /* invalid path */
489
490 s = e;
491 e = s + strlen(s);
492
493 if (e - s + 1 >= sizeof(u->path))
494 return 0;
495 memcpy(u->path, s, e - s);
496 u->path[e - s] = '\0';
497
498 return 1;
499 }
500
501 int
502 main(int argc, char **argv)
503 {
504 struct uri u;
505 const char *path, *uri = "", *param = "", *s;
506 int _type = '1';
507
508 setlocale(LC_CTYPE, "");
509
510 outfp = stdout;
511 errfp = stderr;
512
513 /* CGI-mode or stand-alone */
514 if ((s = getenv("QUERY_STRING"))) {
515 uri = s;
516 param = "";
517 errfp = stdout; /* output errors to stdout also in CGI mode */
518 } else {
519 switch (argc) {
520 case 3:
521 param = argv[2];
522 case 2:
523 uri = argv[1];
524 break;
525 case 1:
526 checkdir(stdin);
527 return exitcode;
528 default:
529 fprintf(errfp, "usage: %s [uri] [param]\n", argv[0]);
530 return 1;
531 }
532 }
533
534 if (pledge("stdio inet dns", NULL) == -1)
535 die("pledge: %s\n", strerror(errno));
536
537 if (!strncmp(uri, "gopher://", sizeof("gopher://") - 1))
538 uri += sizeof("gopher://") - 1;
539
540 if (!parseuri(uri, &u))
541 die("Invalid uri\n");
542 if (u.host[0] == '\0')
543 die("Invalid hostname\n");
544
545 if (u.path[0] == '\0')
546 memcpy(u.path, "/", 2);
547 if (u.port[0] == '\0')
548 memcpy(u.port, "70", 3);
549
550 path = u.path;
551 if (path[0] == '/') {
552 path++;
553 if (*path) {
554 _type = *path;
555 path++;
556 }
557 } else {
558 path = "";
559 }
560
561 switch (_type) {
562 case '1':
563 case '7':
564 break; /* handled below */
565 default: /* these types are not validated */
566 fprintf(errfp, "only types 1 (dir) and 7 (search) are validated\n");
567 return 1;
568 }
569
570 if (_type != '7')
571 param = "";
572
573 checkremote(u.host, u.port, path, param);
574
575 return exitcode;
576 }