Add gopher hackathon. - brcon2024-hackathons - Bitreichcon 2024 Hackathons HTML git clone git://bitreich.org/brcon2024-hackathons git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/brcon2024-hackathons DIR Log DIR Files DIR Refs DIR Tags DIR Submodules --- DIR commit ead4dcfa2976dac07e8fe03d23fa19424922b771 DIR parent 14f1a3a9baed42894753c5378cd2b3eb7ead1d4d HTML Author: Christoph Lohmann <20h@r-36.net> Date: Fri, 2 Aug 2024 15:45:14 +0200 Add gopher hackathon. Diffstat: A gopher/README.md | 28 ++++++++++++++++++++++++++++ A gopher/gopher-clustering/ga.c | 206 +++++++++++++++++++++++++++++++ A gopher/gopher-clustering/gopher-cl… | 2219 +++++++++++++++++++++++++++++++ A gopher/gopher-clustering/gopher-cl… | 1165 +++++++++++++++++++++++++++++++ A gopher/gopher-clustering/hint.md | 9 +++++++++ 5 files changed, 3627 insertions(+), 0 deletions(-) --- DIR diff --git a/gopher/README.md b/gopher/README.md @@ -0,0 +1,28 @@ +# Gopher Hackathon + +## GopherVR + +Recreate GopherVR with modern methods: + + https://github.com/michael-lazar/gopherVR + https://github.com/michael-lazar/gopherVR-models + +Here is more modern exports: + + https://www.khronos.org/gltf/ + +Just plumb gltf files into some usable game engine? + + https://godotengine.org/article/we-should-all-use-gltf-20-export-3d-assets-game-engines/ + +## Gopher Clustering + +In gopher-clustering/ you find ideas by pazz0 for how to scan gopher +menus and allow some clustering to topics. + +How can this be optimized and made a common algorithm? + +This will help further gopher automated development and new UIs, like +GopherVR. + + DIR diff --git a/gopher/gopher-clustering/ga.c b/gopher/gopher-clustering/ga.c @@ -0,0 +1,206 @@ +#include <stdint.h> +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +struct machine { + uint8_t a; + + uint8_t loads; + uint8_t adds; + uint8_t subs; + uint8_t times; + uint8_t noops; + + uint8_t instr; +}; + +enum { + LOAD, + ADD, + SUB, + TIMES, + END, +}; + +#define CODE_LENGTH 16 +#define POOL_LENGTH 16 +struct genome { + uint8_t code[CODE_LENGTH + 1]; /* +1 sentinal (0) */ + uint16_t fitness; +} pool[POOL_LENGTH]; + +uint8_t newcode[CODE_LENGTH]; + +void +execute(struct machine *m, size_t l, uint8_t code[]) +{ + size_t ip; + int16_t c; + + for (ip = 0; ip < l;) { + switch (code[ip++] & 0x7) { + case LOAD: + m->loads++; + m->a = code[ip++]; + break; + case ADD: + m->adds++; + c = (int16_t)m->a + code[ip++]; + if (c > 255) + m->a = 0; + else + m->a = c; + break; + case SUB: + m->subs++; + c = (int16_t)m->a - code[ip++]; + if (c < 0) + m->a = 0; + else + m->a = c; + break; + case TIMES: + m->times++; + m->a = (uint16_t)m->a * code[ip++] / 255; + break; + case END: + m->instr++; + return; + break; + default: + m->noops++; + break; + } + m->instr++; + } +} + +void +mutate(uint8_t code[], size_t l) +{ + uint8_t i, bi, j; + + for (j = 0; j < 4; j++) { + if (rand() % 100 >= 50) + continue; + + i = rand() % CODE_LENGTH; + bi = rand() % 8; + + code[i] ^= 1 << bi; + } +} + +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +void +cross(uint8_t a[], size_t al, uint8_t b[], size_t bl, uint8_t c[], size_t cl) +{ + uint8_t i, si, sl; + uint8_t *one, *two; + + if (rand() % 2) { + one = a; + two = b; + } else { + one = b; + two = a; + } + + si = rand() % (min(al, bl) + 1); + sl = rand() % (min(al, bl) - si + 1); + + for (i = 0; i < si; i++) + c[i] = one[i]; + + for (; i < si + sl; i++) + c[i] = two[i]; + + for (; i < cl; i++) + c[i] = one[i]; +} + +void +cross2(uint8_t a[], size_t al, uint8_t b[], size_t bl, uint8_t c[], size_t cl) +{ + uint8_t i; + + for (i = 0; i < al; i++) + c[i] = a[i] + b[i]; +} + +void +printcode(uint8_t code[], size_t l) +{ + uint8_t i; + + for (i = 0; i < l;) { + switch (code[i++] & 0x7) { + case LOAD: + printf("LOAD %d ", code[i++]); + break; + case ADD: + printf("ADD %d ", code[i++]); + break; + case SUB: + printf("SUB %d ", code[i++]); + break; + case TIMES: + printf("TIMES %d ", code[i++]); + break; + case END: + printf("END "); + return; + break; + default: + printf("NOOP "); + break; + } + } +} + +int +main(void) +{ + struct machine m; + struct genome *g1, *g2, *w1; + size_t i, j; + + for (i = 0; i < POOL_LENGTH; i++) { + for (j = 0; j < CODE_LENGTH; j++) + pool[i].code[j] = rand(); + pool[i].code[j] = 0; + } + + do { + g1 = g2 = w1 = NULL; + + for (i = 0; i < POOL_LENGTH; i++) { + memset(&m, 0, sizeof(m)); + + execute(&m, CODE_LENGTH, pool[i].code); + pool[i].fitness = m.a * 255 / (m.instr + m.loads); + + if (!g1 || g1->fitness < pool[i].fitness) { + g2 = g1; + g1 = &pool[i]; + } else if (!g2 || g2->fitness < pool[i].fitness) { + w1 = g2; + g2 = &pool[i]; + } else if (!w1 || w1->fitness > pool[i].fitness) { + w1 = &pool[i]; + } + } + + printf("g1->fitness = %d, g2->fitness = %d, w1->fitness = %d\n", g1->fitness, g2->fitness, w1->fitness); + cross(g1->code, CODE_LENGTH, g2->code, CODE_LENGTH, newcode, CODE_LENGTH); + mutate(newcode, CODE_LENGTH); + + printcode(newcode, CODE_LENGTH); + puts(""); + + memcpy(w1->code, newcode, CODE_LENGTH); + } while (1); +} DIR diff --git a/gopher/gopher-clustering/gopher-clustering.c b/gopher/gopher-clustering/gopher-clustering.c @@ -0,0 +1,2219 @@ +#define _POSIX_C_SOURCE 200112L + +#include <assert.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <math.h> + +#include <netdb.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <termios.h> +#include <unistd.h> + +uint32_t +fnv1a(int n,...) +{ + int i; + char *s; + va_list l; + uint32_t h; + + h = 0x811c9dc5; + + va_start(l, n); + for (i = 0; i < n; i++) { + for (s = va_arg(l, char*); *s; s++) { + h ^= *s; + h *= 0x01000193; + } + } + va_end(l); + + return h; +} + +uint32_t +xorshift(uint32_t *s) +{ + *s ^= *s << 13; + *s ^= *s >> 17; + *s ^= *s << 5; + return *s; +} + +/* + https://github.com/skeeto/hash-prospector/issues/19#issuecomment-1120105785 +*/ +uint32_t +mix(uint32_t x) +{ + x ^= x >> 16; + x *= 0x21f0aaad; + x ^= x >> 15; + x *= 0x735a2d97; + x ^= x >> 16; + return x; +} + +struct cell { + void *data; + char c; +}; + +#define MAPHEIGHT (50) +#define MAPWIDTH (160) +struct cell map[MAPHEIGHT][MAPWIDTH]; + +struct gopherentry { + struct gopherentry *next; + char type; + char *fields[4]; + size_t i, s; +}; + +char * +gopher_fieldend(char *s) +{ + for (; *s; s++) { + if (*s == '\t') + break; + if (*s == '\r' && *(s+1) == '\n') + break; + if (*s == '\n') + break; + } + return s; +} + +char * +gopher_nextentry(char *s) +{ + char *c; + + if (c = strchr(s, '\n')) + return c + 1; + return NULL; +} + +void * +xmalloc(size_t s) +{ + void *p; + + if (!(p = malloc(s))) { + perror(NULL); + exit(1); + } + return p; +} + +void * +xrealloc(void *p, size_t s) +{ + if (!(p = realloc(p, s))) { + perror(NULL); + exit(1); + } + return p; +} + +/* Allow a lot of ugly things... */ +size_t +gopher_parsemenu(char *d, struct gopherentry **g) +{ + ssize_t gl; + size_t fl, i, s; + char *c, *p, pt; + struct gopherentry *cg; + + gl = 0; + cg = NULL; + + s = 0; + pt = 0; + for (c = d; c && *c; c = gopher_nextentry(c)) { + if (*c == '.') + break; + if (*c == '\n') + continue; + + gl++; + cg = xrealloc(cg, gl * sizeof(struct gopherentry)); + memset(&cg[gl-1], 0, sizeof(struct gopherentry)); + + if (*c != 'i' && pt == 'i') + s++; + pt = *c; + + cg[gl-1].type = *c; + cg[gl-1].i = gl-1; + cg[gl-1].s = s; + c++; + + for (i = 0; i < 4; i++) { + p = gopher_fieldend(c); + fl = p - c; + cg[gl-1].fields[i] = xmalloc(fl + 1); + memcpy(cg[gl-1].fields[i], c, fl); + cg[gl-1].fields[i][fl] = '\0'; + if (*p != '\t') + break; + c = p + 1; + } + } + s++; + *g = cg; + return gl; +} + +ssize_t +gopher_removeentries(struct gopherentry *g, size_t l) +{ + size_t i, j; + + for (i = 0; i < l;) { + if ((g[i].type == 'i' && g[i].type == '3') || !g[i].fields[1] || !g[i].fields[2] || !g[i].fields[3]) { + l--; + memmove(&g[i], &g[i+1], (l - i) * sizeof(*g)); + continue; + } + for (j = 0; j < i; j++) { + if (!strcmp(g[i].fields[1], g[j].fields[1]) && + !strcmp(g[i].fields[2], g[j].fields[2]) && + !strcmp(g[i].fields[3], g[j].fields[3])) + break; + } + if (j < i) { + l--; + memmove(&g[i], &g[i+1], (l - i) * sizeof(*g)); + continue; + } + i++; + } + + return l; +} + +ssize_t +gopher_request(char *host, char *port, char *selector, char *buffer, size_t l) +{ + int fd; + struct addrinfo hints; + struct addrinfo *r, *rp; + ssize_t n, result, rn; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + if (getaddrinfo(host, port, &hints, &r) != 0) + return -1; + + for (rp = r; rp; rp = rp->ai_next) { + if ((fd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol)) == -1) + continue; + if (connect(fd, rp->ai_addr, rp->ai_addrlen) != -1) + break; + close(fd); + } + freeaddrinfo(r); + if (!rp) + return -1; + + result = -1; + if (write(fd, selector, strlen(selector)) != strlen(selector)) + goto cleanup; + + if (write(fd, "\r\n", 2) != 2) + goto cleanup; + + n = 0; + while (1) { + if (n + 512 > l - 1) + goto cleanup; + if ((rn = read(fd, &buffer[n], 512)) == -1) + goto cleanup; + else if (!rn) + break; + n += rn; + } + + buffer[n] = '\0'; + result = n; + +cleanup: + close(fd); + + return result; +} + +size_t +triangularnumber(size_t n) +{ + return (n * n + n) / 2; +} + +size_t +distanceindex(size_t i, size_t j) +{ + size_t t; + + if (i < j) { + t = i; + i = j; + j = t; + } + + return triangularnumber(i) + j; +} + +/* + https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows + Which references https://www.codeproject.com/Articles/13525/Fast-memory-efficient-Levenshtein-algorithm-2 + The code there is using the CPOL: https://www.codeproject.com/info/cpol10.aspx +*/ +size_t +levenshteindistance(char *a, char *b) +{ + size_t i, j, dc, ic, sc, r; + size_t *v0, *v1, *t; + + v0 = xmalloc((strlen(b) + 1) * sizeof(*v0)); + v1 = xmalloc((strlen(b) + 1) * sizeof(*v1)); + + for (i = 0; i <= strlen(b); i++) + v0[i] = i; + + for (i = 0; i < strlen(a); i++) { + v1[0] = i + 1; + for (j = 0; j < strlen(b); j++) { + dc = v0[j + 1] + 1; + ic = v1[j] + 1; + if (a[i] == b[j]) + sc = v0[j]; + else + sc = v0[j] + 1; + if (dc < ic && dc < sc) + v1[j + 1] = dc; + else if (ic < dc && ic < sc) + v1[j + 1] = ic; + else + v1[j + 1] = sc; + } + t = v0; + v0 = v1; + v1 = t; + } + + r = v0[strlen(b)]; + + free(v1); + free(v0); + + return r; +} + +/* + Remove common prefix and suffix. The pazz0distance is the sum of the length of the leftover strings. +*/ +size_t +pazz0distance(char *a, char *b) +{ + size_t pl, sl; + size_t al, bl; + + al = strlen(a); + bl = strlen(b); + + for (pl = 0; pl < al && pl < bl && a[pl] == b[pl]; pl++); + for (sl = 0; al - sl > pl && bl - sl > pl && a[al - 1 - sl] == b[bl - 1 - sl]; sl++); + + return al + bl - 2 * (pl + sl); +} + +struct point { + double dims[2]; +}; + +void +sammon(double *distances, size_t l, uint32_t prng, struct point *points) +{ + size_t i, j, k, t, tmax; + double *gnarf; + double c, lr; + double e1, e2, sum, lasterror; + struct point *newpoints; + + gnarf = calloc(triangularnumber(l), sizeof(*gnarf)); + newpoints = calloc(l, sizeof(*newpoints)); + + for (i = 0; i < l; i++) { + points[i].dims[0] = (double)xorshift(&prng) / UINT32_MAX - 0.5; + points[i].dims[1] = (double)xorshift(&prng) / UINT32_MAX - 0.5; + } + + c = 0; + for (i = 0; i < l; i++) { + gnarf[distanceindex(i, i)] = 0; + for (j = 0; j < i; j++) { + gnarf[distanceindex(i, j)] = sqrt(pow(points[i].dims[0] - points[j].dims[0], 2) + pow(points[i].dims[1] - points[j].dims[1], 2)); + c += distances[distanceindex(i, j)]; + } + } + + lasterror = 0; + for (t = 0, tmax = 1 << 12; t < tmax; t++) { + lr = 0.01; + for (i = 0; i < l; i++) { + for (j = 0; j < 2; j++) { + e1 = -2. / c; + sum = 0; + for (k = 0; k < l; k++) { + if (i == k) + continue; + sum += (distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) / (distances[distanceindex(i, k)] * gnarf[distanceindex(i, k)]) * (points[i].dims[j] - points[k].dims[j]); + } + e1 *= sum; + + e2 = -2. / c; + sum = 0; + for (k = 0; k < l; k++) { + if (i == k) + continue; + sum += 1. / (distances[distanceindex(i, k)] * gnarf[distanceindex(i, k)]) * ((distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) - pow(points[i].dims[j] - points[k].dims[j], 2) / gnarf[distanceindex(i, k)] * (1 + (distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) / gnarf[distanceindex(i, k)])); + } + e2 *= sum; + + newpoints[i].dims[j] = points[i].dims[j] - lr * (e1 / fabs(e2)); + } + } + + sum = 0; + for (i = 0; i < l; i++) { + for (j = 0; j < i; j++) { + gnarf[distanceindex(i, j)] = sqrt(pow(newpoints[i].dims[0] - newpoints[j].dims[0], 2) + pow(newpoints[i].dims[1] - newpoints[j].dims[1], 2)); + sum += pow(distances[distanceindex(i, j)] - gnarf[distanceindex(i, j)], 2) / distances[distanceindex(i, j)]; + } + } +printf("%lf\n", sum / c); +if (t > 0 && fabs(sum / c - lasterror) < 0.000000000001) + break; +lasterror = sum / c; + + memcpy(points, newpoints, l * sizeof(*points)); + } + + free(newpoints); + free(gnarf); +} + +int +dynmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng) +{ + size_t i, j, n, ck; + ssize_t m1, m, m2; + size_t ti, th, huh; + size_t iter; + double dsum, d, d2, gnarf, tdsum; + struct { + size_t medoid; + size_t n; + double dsum; + } *clusters; + struct { + size_t n1, n2, n3; + double d1, d2, d3; + } *cache; + double *dS, *dS2; + double cdS, cdS2; + size_t cm, cx; + ssize_t lx; + int flag, result; + uint32_t r; + double *C; + + if (!l) + return 0; + + if (l < k) + k = l; + + clusters = calloc(k, sizeof(*clusters)); + if (!clusters) + return -1; + + result = -1; + +/* + Seed the medoids randomly +*/ + for (i = n = 0; i < l; i++) { + if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) + clusters[n++].medoid = i; + if (n == k) + break; + } + + cache = calloc(l, sizeof(*cache)); + if (!cache) + goto cleanup; + + dS = calloc(k, sizeof(*dS)); + if (!dS) + goto cleanup; + + dS2 = calloc(k, sizeof(*dS2)); + if (!dS2) + goto cleanup; + +while (k > 3) { + for (i = 0; i < l; i++) { + cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY; + for (j = 0; j < k; j++) { + d = distances[distanceindex(i, clusters[j].medoid)]; + if (d < cache[i].d1) { + cache[i].d3 = cache[i].d2; + cache[i].d2 = cache[i].d1; + cache[i].d1 = d; + cache[i].n3 = cache[i].n2; + cache[i].n2 = cache[i].n1; + cache[i].n1 = j; + } else if (d < cache[i].d2) { + cache[i].d3 = cache[i].d2; + cache[i].d2 = d; + cache[i].n3 = cache[i].n2; + cache[i].n2 = j; + } else if (d < cache[i].d3) { + cache[i].d3 = d; + cache[i].n3 = j; + } + } + } + + for (i = 0; i < k; i++) { + dS[i] = 0; + for (j = 0; j < l; j++) { + if (cache[j].n1 != i) + continue; + dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3; + } + for (j = 0; j < l; j++) { + if (cache[j].n2 != i) + continue; + dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3; + } + } + + lx = -1; + for (iter = 0; iter < 100; iter++) { + cdS = 0; + for (i = 0; i < l; i++) { + if (i == lx) + goto fastermsc_finished; + + for (j = 0; j < k; j++) { + if (clusters[j].medoid == i) + goto fastmsc_nexti; + } + + memcpy(dS2, dS, k * sizeof(*dS2)); + cdS2 = 0; + for (j = 0; j < l; j++) { + d = distances[distanceindex(i, j)]; + if (d < cache[j].d1) { + cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1; + dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; + } else if (d < cache[j].d2) { + cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d; + dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; + } else if (d < cache[j].d3) { + dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d; + } + } + d = -INFINITY; + for (j = 0; j < k; j++) { + if (dS2[j] > d) { + d = dS2[j]; + m = j; + } + } + dS2[m] += cdS2; + if (dS2[m] <= 0) + continue; +//printf("%lf\t%lf\n", dS2[m], cdS2); +printf("%lu (%lu) <-> %lu\n", clusters[m].medoid, (size_t)m, i); + lx = i; + clusters[m].medoid = i; + for (j = 0; j < l; j++) { + if (cache[j].n1 != m && cache[j].n2 != m && cache[j].n3 != m) + continue; + cache[j].d1 = cache[j].d2 = cache[j].d3 = INFINITY; + for (n = 0; n < k; n++) { + d = distances[distanceindex(j, clusters[n].medoid)]; + if (d < cache[j].d1) { + cache[j].d3 = cache[j].d2; + cache[j].d2 = cache[j].d1; + cache[j].d1 = d; + cache[j].n3 = cache[j].n2; + cache[j].n2 = cache[j].n1; + cache[j].n1 = n; + } else if (d < cache[j].d2) { + cache[j].d3 = cache[j].d2; + cache[j].d2 = d; + cache[j].n3 = cache[j].n2; + cache[j].n2 = n; + } else if (d < cache[j].d3) { + cache[j].d3 = d; + cache[j].n3 = n; + } + } + } + for (j = 0; j < k; j++) { + dS[j] = 0; + for (n = 0; n < l; n++) { + if (cache[n].n1 != j) + continue; + dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d2 / cache[n].d3; + } + for (n = 0; n < l; n++) { + if (cache[n].n2 != j) + continue; + dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d1 / cache[n].d3; + } + } +printf("lx = %lu\n", i); +fastmsc_nexti:; + } + if (lx == -1) + break; + } +fastermsc_finished:; + d = INFINITY; + for (j = 0; j < k; j++) { + if (dS2[j] < d) { + d = dS2[j]; + m = j; + } + } + if (m < k - 1) + memmove(&clusters[m], &clusters[m + 1], k - m - 1); + k--; +printf("k = %lu\n", k); +} + + for (i = 0; i < k; i++) + clusters[i].n = 0; + + for (i = 0; i < l; i++) { + d = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + assoc[i] = j; + d = distances[distanceindex(clusters[j].medoid, i)]; + } + } + clusters[assoc[i]].n++; + } + + dsum = 0; + for (i = 0; i < l; i++) { + d = INFINITY; + d2 = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + d2 = d; + d = distances[distanceindex(clusters[j].medoid, i)]; + } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) { + d2 = distances[distanceindex(clusters[j].medoid, i)]; + } + } + if (clusters[assoc[i]].medoid != i) + dsum += 1 - d / (d2 + 0.000000000001); + printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]); + } + printf("Silhouette = %lf\n", dsum / (l - k)); + + result = k; +cleanup: + free(clusters); + return result; +} + +int +fastermsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng) +{ + size_t i, j, n, ck; + ssize_t m1, m, m2; + size_t ti, th, huh; + double dsum, d, d2, gnarf, tdsum; + struct { + size_t medoid; + size_t n; + double dsum; + } *clusters; + struct { + size_t n1, n2, n3; + double d1, d2, d3; + } *cache; + double *dS, *dS2; + double cdS, cdS2; + size_t cm, cx; + ssize_t lx; + int flag, result; + uint32_t r; + double *C; + + if (!l) + return 0; + + if (l < k) + k = l; + + clusters = calloc(k, sizeof(*clusters)); + if (!clusters) + return -1; + + result = -1; + +/* + Seed the medoids randomly +*/ + for (i = n = 0; i < l; i++) { + if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) + clusters[n++].medoid = i; + if (n == k) + break; + } + + cache = calloc(l, sizeof(*cache)); + if (!cache) + goto cleanup; + + dS = calloc(k, sizeof(*dS)); + if (!dS) + goto cleanup; + + dS2 = calloc(k, sizeof(*dS2)); + if (!dS2) + goto cleanup; + + for (i = 0; i < l; i++) { + cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY; + for (j = 0; j < k; j++) { + d = distances[distanceindex(i, clusters[j].medoid)]; + if (d < cache[i].d1) { + cache[i].d3 = cache[i].d2; + cache[i].d2 = cache[i].d1; + cache[i].d1 = d; + cache[i].n3 = cache[i].n2; + cache[i].n2 = cache[i].n1; + cache[i].n1 = j; + } else if (d < cache[i].d2) { + cache[i].d3 = cache[i].d2; + cache[i].d2 = d; + cache[i].n3 = cache[i].n2; + cache[i].n2 = j; + } else if (d < cache[i].d3) { + cache[i].d3 = d; + cache[i].n3 = j; + } + } + } + + for (i = 0; i < k; i++) { + dS[i] = 0; + for (j = 0; j < l; j++) { + if (cache[j].n1 != i) + continue; + dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3; + } + for (j = 0; j < l; j++) { + if (cache[j].n2 != i) + continue; + dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3; + } + } + + lx = -1; + for (;;) { + cdS = 0; + for (i = 0; i < l; i++) { + if (i == lx) + goto fastermsc_finished; + + for (j = 0; j < k; j++) { + if (clusters[j].medoid == i) + goto fastmsc_nexti; + } + + memcpy(dS2, dS, k * sizeof(*dS2)); + cdS2 = 0; + for (j = 0; j < l; j++) { + d = distances[distanceindex(i, j)]; + if (d < cache[j].d1) { + cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1; + dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; + } else if (d < cache[j].d2) { + cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d; + dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; + } else if (d < cache[j].d3) { + dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d; + } + } + d = -INFINITY; + for (j = 0; j < k; j++) { + if (dS2[j] > d) { + d = dS2[j]; + m = j; + } + } + dS2[m] += cdS2; + if (dS2[m] <= 0) + continue; +//printf("%lf\t%lf\n", dS2[m], cdS2); +printf("%lu (%lu) <-> %lu\n", clusters[m].medoid, (size_t)m, i); + lx = i; + clusters[m].medoid = i; + for (j = 0; j < l; j++) { + if (cache[j].n1 != m && cache[j].n2 != m && cache[j].n3 != m) + continue; + cache[j].d1 = cache[j].d2 = cache[j].d3 = INFINITY; + for (n = 0; n < k; n++) { + d = distances[distanceindex(j, clusters[n].medoid)]; + if (d < cache[j].d1) { + cache[j].d3 = cache[j].d2; + cache[j].d2 = cache[j].d1; + cache[j].d1 = d; + cache[j].n3 = cache[j].n2; + cache[j].n2 = cache[j].n1; + cache[j].n1 = n; + } else if (d < cache[j].d2) { + cache[j].d3 = cache[j].d2; + cache[j].d2 = d; + cache[j].n3 = cache[j].n2; + cache[j].n2 = n; + } else if (d < cache[j].d3) { + cache[j].d3 = d; + cache[j].n3 = n; + } + } + } + for (j = 0; j < k; j++) { + dS[j] = 0; + for (n = 0; n < l; n++) { + if (cache[n].n1 != j) + continue; + dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d2 / cache[n].d3; + } + for (n = 0; n < l; n++) { + if (cache[n].n2 != j) + continue; + dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d1 / cache[n].d3; + } + } +printf("lx = %lu\n", i); +fastmsc_nexti:; + } + if (lx == -1) + break; + } +fastermsc_finished:; + + for (i = 0; i < k; i++) + clusters[i].n = 0; + + for (i = 0; i < l; i++) { + d = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + assoc[i] = j; + d = distances[distanceindex(clusters[j].medoid, i)]; + } + } + clusters[assoc[i]].n++; + } + + dsum = 0; + for (i = 0; i < l; i++) { + d = INFINITY; + d2 = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + d2 = d; + d = distances[distanceindex(clusters[j].medoid, i)]; + } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) { + d2 = distances[distanceindex(clusters[j].medoid, i)]; + } + } + if (clusters[assoc[i]].medoid != i) + dsum += 1 - d / (d2 + 0.000000000001); + printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]); + } + printf("Silhouette = %lf\n", dsum / (l - k)); + + result = k; +cleanup: + free(clusters); + return result; +} + +int +fastmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng) +{ + size_t i, j, n, ck; + ssize_t m1, m, m2; + size_t ti, th; + double dsum, d, d2, gnarf, tdsum; + struct { + size_t medoid; + size_t n; + double dsum; + } *clusters; + struct { + size_t n1, n2; + double d1, d2, d3; + } *cache; + double *dS, *dS2; + double cdS, cdS2; + size_t cm, cx; + int flag, result; + uint32_t r; + double *C; + + if (!l) + return 0; + + if (l < k) + k = l; + + clusters = calloc(k, sizeof(*clusters)); + if (!clusters) + return -1; + + result = -1; + +/* + seed the medoids using PAM BUILD +*/ + d = INFINITY; + m = -1; + for (i = 0; i < l; i++) { + dsum = 0; + for (j = 0; j < l; j++) + dsum += distances[distanceindex(i, j)]; + if (dsum < d) { + m = i; + d = dsum; + } + } + if (m == -1) + goto cleanup; + ck = 0; + clusters[ck].medoid = m; + ck++; + + C = xmalloc(l * l * sizeof(*C)); + + for (; ck < k; ck++) { + memset(C, 0, l * l * sizeof(*C)); + for (i = 0; i < l; i++) { + for (j = 0; j < ck; j++) { + if (clusters[j].medoid == i) + goto build_nexti; + } + for (j = 0; j < l; j++) { + if (i == j) + continue; + for (n = 0; n < ck; n++) { + if (clusters[n].medoid == j) + goto build_nextj; + } + d = INFINITY; + for (n = 0; n < ck; n++) { + if (distances[distanceindex(j, clusters[n].medoid)] < d) + d = distances[distanceindex(j, clusters[n].medoid)]; + } + C[j * l + i] = d - distances[distanceindex(j, i)]; + if (C[j * l + i] < 0) + C[j * l + i] = 0; +build_nextj:; + } +build_nexti:; + } + d = -1; + m = -1; + for (i = 0; i < l; i++) { + for (j = 0; j < ck; j++) { + if (clusters[j].medoid == i) + goto build_nexti2; + } + dsum = 0; + for (j = 0; j < l; j++) + dsum += C[j * l + i]; + if (dsum > d) { + d = dsum; + m = i; + } +build_nexti2:; + } + clusters[ck].medoid = m; + } + free(C); + +/* + Seed the medoids randomly + for (i = n = 0; i < l; i++) { + if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) + clusters[n++].medoid = i; + if (n == k) + break; + } +*/ + + cache = calloc(l, sizeof(*cache)); + if (!cache) + goto cleanup; + + dS = calloc(k, sizeof(*dS)); + if (!dS) + goto cleanup; + + dS2 = calloc(k, sizeof(*dS2)); + if (!dS2) + goto cleanup; + + for (;;) { + for (i = 0; i < l; i++) { + cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY; + for (j = 0; j < k; j++) { + d = distances[distanceindex(i, clusters[j].medoid)]; + if (d < cache[i].d1) { + cache[i].d3 = cache[i].d2; + cache[i].d2 = cache[i].d1; + cache[i].d1 = d; + cache[i].n2 = cache[i].n1; + cache[i].n1 = j; + } else if (d < cache[i].d2) { + cache[i].d3 = cache[i].d2; + cache[i].d2 = d; + cache[i].n2 = j; + } else if (d < cache[i].d3) { + cache[i].d3 = d; + } + } + } + + for (i = 0; i < k; i++) { + dS[i] = 0; + for (j = 0; j < l; j++) { + if (cache[j].n1 != i) + continue; + dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3; + } + for (j = 0; j < l; j++) { + if (cache[j].n2 != i) + continue; + dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3; + } + } + + cdS = 0; + for (i = 0; i < l; i++) { + for (j = 0; j < k; j++) { + if (clusters[j].medoid == i) + goto fastmsc_nexti; + } + memcpy(dS2, dS, k * sizeof(*dS2)); + cdS2 = 0; + for (j = 0; j < l; j++) { + d = distances[distanceindex(i, j)]; + if (d < cache[j].d1) { + cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1; + dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; + } else if (d < cache[j].d2) { + cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d; + dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; + } else if (d < cache[j].d3) { + dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d; + } + } + d = -INFINITY; + d2 = INFINITY; + for (j = 0; j < k; j++) { + if (dS2[j] > d) { + d = dS2[j]; + m = j; + } + if (dS2[j] < d2) { + d2 = dS2[j]; + m2 = j; + } + } +printf("%lf (%ld)\t%lf (%ld)\n", d, m, d2, m2); + dS2[m] += cdS2; + if (dS2[m] > cdS) { + cdS = dS2[m]; + cm = m; + cx = i; + } +fastmsc_nexti:; + } + if (cdS <= 0) + break; + clusters[cm].medoid = cx; + } + + for (i = 0; i < k; i++) + clusters[i].n = 0; + + for (i = 0; i < l; i++) { + d = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + assoc[i] = j; + d = distances[distanceindex(clusters[j].medoid, i)]; + } + } + clusters[assoc[i]].n++; + } + + dsum = 0; + for (i = 0; i < l; i++) { + d = INFINITY; + d2 = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + d2 = d; + d = distances[distanceindex(clusters[j].medoid, i)]; + } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) { + d2 = distances[distanceindex(clusters[j].medoid, i)]; + } + } + if (clusters[assoc[i]].medoid != i) + dsum += 1 - d / (d2 + 0.000000000001); + printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]); + } + printf("Silhouette = %lf\n", dsum / (l - k)); + + result = k; +cleanup: + free(clusters); + return result; +} + +/* + Seeding of medoids based on the PAM BUILD description in https://www.cs.umb.edu/cs738/pam1.pdf + I was too stupid to understand other descriptions. +*/ +int +kmedoids(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng) +{ + size_t i, j, n, ck; + ssize_t m1, m; + size_t ti, th; + double dsum, d, d2, gnarf, tdsum; + struct { + size_t medoid; + size_t n; + double dsum; + } *clusters; + int flag, result; + uint32_t r; + double *C; + + if (!l) + return 0; + + if (l < k) + k = l; + + clusters = calloc(k, sizeof(*clusters)); + if (!clusters) + return -1; + + result = -1; + + /* seed the medoids using PAM BUILD */ + d = INFINITY; + m = -1; + for (i = 0; i < l; i++) { + dsum = 0; + for (j = 0; j < l; j++) + dsum += distances[distanceindex(i, j)]; + if (dsum < d) { + m = i; + d = dsum; + } + } + if (m == -1) + goto cleanup; + ck = 0; + clusters[ck].medoid = m; + ck++; + + C = xmalloc(l * l * sizeof(*C)); + + for (; ck < k; ck++) { + memset(C, 0, l * l * sizeof(*C)); + for (i = 0; i < l; i++) { + for (j = 0; j < ck; j++) { + if (clusters[j].medoid == i) + goto build_nexti; + } + for (j = 0; j < l; j++) { + if (i == j) + continue; + for (n = 0; n < ck; n++) { + if (clusters[n].medoid == j) + goto build_nextj; + } + d = INFINITY; + for (n = 0; n < ck; n++) { + if (distances[distanceindex(j, clusters[n].medoid)] < d) + d = distances[distanceindex(j, clusters[n].medoid)]; + } + C[j * l + i] = d - distances[distanceindex(j, i)]; + if (C[j * l + i] < 0) + C[j * l + i] = 0; +build_nextj:; + } +build_nexti:; + } + d = -1; + m = -1; + for (i = 0; i < l; i++) { + for (j = 0; j < ck; j++) { + if (clusters[j].medoid == i) + goto build_nexti2; + } + dsum = 0; + for (j = 0; j < l; j++) + dsum += C[j * l + i]; + if (dsum > d) { + d = dsum; + m = i; + } +build_nexti2:; + } + clusters[ck].medoid = m; + } + free(C); + +/* + Seed the medoids randomly + for (i = n = 0; i < l; i++) { + if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) + clusters[n++].medoid = i; + if (n == k) + break; + } +*/ + + for (;;) { + tdsum = INFINITY; + for (i = 0; i < k; i++) { + for (j = 0; j < l; j++) { + for (n = 0; n < k; n++) { + if (j == clusters[n].medoid) + goto swap_nextj; + } + dsum = 0; + for (n = 0; n < l; n++) { + if (n == j) + continue; + for (m = 0; m < k; m++) { + if (clusters[m].medoid == n) + goto swap_nextn; + } + d = INFINITY; + d2 = INFINITY; + for (m = 0; m < k; m++) { + if (distances[distanceindex(clusters[m].medoid, n)] < d) { + d2 = d; + d = distances[distanceindex(clusters[m].medoid, n)]; + } else if (distances[distanceindex(clusters[m].medoid, n)] < d2) { + d2 = distances[distanceindex(clusters[m].medoid, n)]; + } + } +/* +printf("%lf\t%lf\t%lf\t%lf\n", distances[distanceindex(clusters[i].medoid, n)], distances[distanceindex(j, n)], d, d2); +*/ + if (distances[distanceindex(clusters[i].medoid, n)] > d) { + if (distances[distanceindex(j, n)] >= d) + gnarf = 0; + else + gnarf = distances[distanceindex(j, n)] - d; + } else if (distances[distanceindex(clusters[i].medoid, n)] == d) { + if (distances[distanceindex(j, n)] < d2) + gnarf = distances[distanceindex(j, n)] - d; + else + gnarf = d2 - d; + } else { + continue; + } + dsum += gnarf; +swap_nextn:; + } + if (dsum < tdsum) { + ti = i; + th = j; + tdsum = dsum; +/* +printf("%lu\t%lu\t%lf\n", clusters[ti].medoid, th, tdsum); +*/ + } +swap_nextj:; + } + } + if (tdsum >= 0) + break; + clusters[ti].medoid = th; + } + + for (i = 0; i < k; i++) + clusters[i].n = 0; + + for (i = 0; i < l; i++) { + d = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + assoc[i] = j; + d = distances[distanceindex(clusters[j].medoid, i)]; + } + } + clusters[assoc[i]].n++; + } + + dsum = 0; + for (i = 0; i < l; i++) { + d = INFINITY; + d2 = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + d2 = d; + d = distances[distanceindex(clusters[j].medoid, i)]; + } else if (distances[distanceindex(clusters[j].medoid, i)] < d2) { + d2 = distances[distanceindex(clusters[j].medoid, i)]; + } + } + if (clusters[assoc[i]].medoid != i) + dsum += 1 - d / (d2 + 0.000000000001); + printf("%lu\t%lu\t%lf\t%lf\t%lf\t%lu\t%s\n", i, assoc[i], 1 - d / (d2 + 0.000000000001), d, d2, g[i].s, g[i].fields[1]); + if (clusters[assoc[i]].medoid == i) + assoc[i] |= 128; + } + printf("Silhouette = %lf\n", dsum / (l - k)); + +/* + while (1) { + flag = 0; + for (i = 0; i < l; i++) { + d = INFINITY; + m = assoc[i]; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + d = distances[distanceindex(clusters[j].medoid, i)]; + assoc[i] = j; + } + } + if (assoc[i] != m) + flag = 1; + } + if (!flag) + break; + + for (i = 0; i < k; i++) + clusters[i].dsum = INFINITY; + + for (i = 0; i < l; i++) { + dsum = 0; + for (j = 0; j < l; j++) { + if (assoc[i] != assoc[j]) + continue; + dsum += distanceindex(i, j); + } + if (dsum < clusters[assoc[i]].dsum) { + clusters[assoc[i]].dsum = dsum; + clusters[assoc[i]].medoid = i; + } + } + } +*/ + result = k; +cleanup: + free(clusters); + return result; +} + +struct room { + struct room *p; + void *d; + size_t x, y; + size_t w, h; +}; + +int +generaterooms_collides(size_t x1, size_t y1, size_t w1, size_t h1, + size_t x2, size_t y2, size_t w2, size_t h2, + size_t margin) +{ + return !((y1 >= y2 + h2 + margin || y2 >= y1 + h1 + margin) || (x1 >= x2 + w2 + margin || x2 >= x1 + w1 + margin)); +} + +int +generaterooms_isfree(struct room *rs, size_t l, size_t x, size_t y, size_t w, size_t h, size_t margin) +{ + size_t i; + + for (i = 0; i < l; i++) { + if (generaterooms_collides(rs[i].x, rs[i].y, rs[i].w, rs[i].h, + x, y, w, h, margin)) + return 0; + } + return 1; +} + +struct rect { + struct rect *next, *next2; + struct room *room; + size_t x1, y1; + size_t x2, y2; + size_t d; +}; + +struct rect * +generaterooms_gnarf_randomneighbor(struct rect *x, struct rect *rs, uint32_t *prng) +{ + struct rect *r, *result; + size_t n; + + n = 0; + result = NULL; + for (r = rs; r; r = r->next) { + if (r->y2 < x->y1 || r->y1 > x->y2 || r->x2 < x->x1 || r->x1 > x->x2) + continue; + if ((r->y2 == x->y1 || r->y1 == x->y2) && (r->x2 == x->x1 || r->x1 == x->x2)) + continue; + n++; + if (xorshift(prng) / (1. + UINT32_MAX) < 1. / n) + result = r; + } + + return result; +} + +#define ROOM_HEIGHT_MIN 3 +#define ROOM_WIDTH_MIN 5 +#define ROOM_MARGIN_MIN 1 +#define CELL_HEIGHT_MIN (ROOM_HEIGHT_MIN + ROOM_MARGIN_MIN + 3) +#define CELL_WIDTH_MIN (ROOM_WIDTH_MIN + ROOM_MARGIN_MIN + 3) +size_t +generaterooms_gnarf(char *host, char *port, char *selector, struct room *rs, size_t l) +{ + struct rect *queuehead, *queuetail; + struct rect *r, *t; + struct rect *rects, *walk; + size_t w, h, i, j, rl, n; + uint32_t prng; + int vertical; + struct room *room; + + prng = fnv1a(4, host, port, selector, "seed"); + + r = xmalloc(sizeof(*r)); + r->x1 = r->y1 = ROOM_MARGIN_MIN; + r->x2 = MAPWIDTH; + r->y2 = MAPHEIGHT; + r->d = 0; + + queuetail = r; + queuetail->next = NULL; + queuehead = r; + + rects = NULL; + rl = 0; + + while (queuehead) { + r = queuehead; + if (queuetail == queuehead) + queuetail = NULL; + queuehead = queuehead->next; + +/* + if (r->d >= 8) { + r->next = rects; + rects = r; + rl++; + continue; + } +*/ + + if (r->x2 - r->x1 >= CELL_WIDTH_MIN * 2 && r->y2 - r->y1 >= CELL_HEIGHT_MIN * 2) { + vertical = xorshift(&prng) & 1; + } else if (r->x2 - r->x1 >= CELL_WIDTH_MIN * 2) { + vertical = 0; + } else if (r->y2 - r->y1 >= CELL_HEIGHT_MIN * 2) { + vertical = 1; + } else { + r->next = rects; + rects = r; + rl++; + continue; + } + + if (vertical) { + w = r->x2 - r->x1; + h = CELL_HEIGHT_MIN + xorshift(&prng) % (1 + r->y2 - r->y1 - CELL_HEIGHT_MIN * 2); + } else { + w = CELL_WIDTH_MIN + xorshift(&prng) % (1 + r->x2 - r->x1 - CELL_WIDTH_MIN * 2); + h = r->y2 - r->y1; + } + + t = xmalloc(sizeof(*t)); + t->x1 = r->x1; + t->y1 = r->y1; + t->x2 = r->x1 + w; + t->y2 = r->y1 + h; + t->d = r->d + 1; + t->next = NULL; + t->room = NULL; + + if (!queuetail) { + queuehead = t; + queuetail = t; + } else { + queuetail->next = t; + queuetail = t; + } + + t = xmalloc(sizeof(*t)); + if (vertical) { + t->x1 = r->x1; + t->y1 = r->y1 + h; + } else { + t->x1 = r->x1 + w; + t->y1 = r->y1; + } + t->x2 = r->x2; + t->y2 = r->y2; + t->d = r->d + 1; + t->next = NULL; + t->room = NULL; + + queuetail->next = t; + queuetail = t; + + free(r); + } + + if (l > rl) + l = rl; + + for (r = rects; r; r = r->next) { + if (MAPHEIGHT / 2 >= r->y1 && MAPHEIGHT / 2 < r->y2 && + MAPWIDTH / 2 >= r->x1 && MAPWIDTH / 2 < r->x2) + break; + } + + i = 0; + rs[i].w = ROOM_WIDTH_MIN + xorshift(&prng) % (1 + r->x2 - r->x1 - ROOM_MARGIN_MIN - ROOM_WIDTH_MIN); + rs[i].h = ROOM_HEIGHT_MIN + xorshift(&prng) % (1 + r->y2 - r->y1 - ROOM_MARGIN_MIN - ROOM_HEIGHT_MIN); + rs[i].x = r->x1 + xorshift(&prng) % (1 + r->x2 - r->x1 - ROOM_MARGIN_MIN - rs[i].w); + rs[i].y = r->y1 + xorshift(&prng) % (1 + r->y2 - r->y1 - ROOM_MARGIN_MIN - rs[i].h); + rs[i].p = NULL; + r->room = &rs[i]; + + walk = r; + walk->next2 = NULL; + + i++; + for (; i < l;) { + t = generaterooms_gnarf_randomneighbor(r, rects, &prng); + if (!t || t->room) { + n = 0; + for (t = walk; t; t = t->next2) { + n++; + if (xorshift(&prng) / (1. + UINT32_MAX) < 1. / n) + r = t; + + } + continue; + } + rs[i].w = ROOM_WIDTH_MIN + xorshift(&prng) % (1 + t->x2 - t->x1 - ROOM_MARGIN_MIN - ROOM_WIDTH_MIN); + rs[i].h = ROOM_HEIGHT_MIN + xorshift(&prng) % (1 + t->y2 - t->y1 - ROOM_MARGIN_MIN - ROOM_HEIGHT_MIN); + rs[i].x = t->x1 + xorshift(&prng) % (1 + t->x2 - t->x1 - ROOM_MARGIN_MIN - rs[i].w); + rs[i].y = t->y1 + xorshift(&prng) % (1 + t->y2 - t->y1 - ROOM_MARGIN_MIN - rs[i].h); + rs[i].p = r->room; + t->room = &rs[i]; + i++; + r = t; + r->next2 = walk; + walk = r; + } + +/* + for (r = rects, i = 0; i < l; i++, r = r->next) { + rs[i].w = 3 + xorshift(&prng) % (1 + r->x2 - r->x1 - 1 - 3); + rs[i].h = 2 + xorshift(&prng) % (1 + r->y2 - r->y1 - 1 - 2); + rs[i].x = r->x1 + xorshift(&prng) % (1 + r->x2 - r->x1 - 1 - rs[i].w); + rs[i].y = r->y1 + xorshift(&prng) % (1 + r->y2 - r->y1 - 1 - rs[i].h); + rs[i].p = NULL; + } +*/ + + return l; +} + +#define ROOMS_MARGIN_MIN 1 +#define ROOMS_MARGIN_MAX 3 +#define MAP_MARGIN 1 +size_t +generaterooms_stupid(char *host, char *port, char *selector, struct room *rs, size_t l) +{ + size_t i, j, ri, m, n; + ssize_t y, x; + ssize_t w, h; + char buffer[3]; + struct room *r; + uint32_t prng; + + rs[0].h = 4 + fnv1a(4, host, port, selector, "h") % 5; + rs[0].w = 4 + fnv1a(4, host, port, selector, "w") % 5; + rs[0].y = MAPHEIGHT / 2 - rs[0].h / 2; + rs[0].x = MAPWIDTH / 2 - rs[0].w / 2; + for (i = 1; i < l; i++) { + snprintf(buffer, sizeof(buffer), "%u", i); + prng = fnv1a(5, host, port, selector, buffer, "seed"); + n = 0; + do { + do { + switch (xorshift(&prng) % 3) { + case 0: + h = 3 + xorshift(&prng) % 3; + w = 3 + xorshift(&prng) % 2; + break; + case 1: + w = 3 + xorshift(&prng) % 3; + h = 4 + xorshift(&prng) % 2; + break; + case 2: + h = w = 4 + xorshift(&prng) % 5; + break; + } + m = ROOMS_MARGIN_MIN + xorshift(&prng) % (1 + ROOMS_MARGIN_MAX - ROOMS_MARGIN_MIN); + j = xorshift(&prng) % i; + switch (xorshift(&prng) % 4) { + case 0: + x = rs[j].x; + if (w < rs[j].w) + x += xorshift(&prng) % (rs[j].w - w + 1); + else + x -= xorshift(&prng) % (w - rs[j].w + 1); + y = rs[j].y - h - m; + break; + case 1: + x = rs[j].x + rs[j].w + m; + y = rs[j].y; + if (h < rs[j].h) + y += xorshift(&prng) % (rs[j].h - h + 1); + else + y -= xorshift(&prng) % (h - rs[j].h + 1); + break; + case 2: + x = rs[j].x; + if (w < rs[j].w) + x += xorshift(&prng) % (rs[j].w - w + 1); + else + x -= xorshift(&prng) % (w - rs[j].w + 1); + y = rs[j].y + rs[j].h + m; + break; + case 3: + x = rs[j].x - w - m; + y = rs[j].y; + if (h < rs[j].h) + y += xorshift(&prng) % (rs[j].h - h + 1); + else + y -= xorshift(&prng) % (h - rs[j].h + 1); + break; + } + } while (x < MAP_MARGIN || x + w + MAP_MARGIN > MAPWIDTH || y < MAP_MARGIN || y + h + MAP_MARGIN > MAPHEIGHT); + n++; + } while (n <= 100 && !generaterooms_isfree(rs, i, x, y, w, h, ROOMS_MARGIN_MIN)); + if (n > 100) + break; + rs[i].h = h; + rs[i].w = w; + rs[i].x = x; + rs[i].y = y; + rs[i].p = &rs[j]; + } + return i; +} + +#define ROOMS_GRID_ROWS 5 +#define ROOMS_GRID_COLS 10 +size_t +generaterooms_grid(char *host, char *port, char *selector, struct room *rs, size_t l) +{ + size_t i, j, k, x, y; + unsigned char cells[ROOMS_GRID_ROWS * ROOMS_GRID_COLS]; + char buffer[3]; + uint32_t prng; + + memset(cells, 0, sizeof(cells)); + + if (l > ROOMS_GRID_ROWS * ROOMS_GRID_COLS) + l = ROOMS_GRID_ROWS * ROOMS_GRID_COLS; + + for (i = 0; i < l; i++) { + snprintf(buffer, sizeof(buffer), "%u", i); + prng = fnv1a(5, host, port, selector, buffer, "seed"); + j = 0; + do { + k = xorshift(&prng) % (ROOMS_GRID_ROWS * ROOMS_GRID_COLS); + j++; + } while (j <= 100 && cells[k]); + if (j > 100) + break; + y = k / ROOMS_GRID_COLS; + x = k % ROOMS_GRID_COLS; + cells[k] = 1; + rs[i].w = 3 + xorshift(&prng) % (1 + 8 - 2 - 3); + rs[i].h = 3 + xorshift(&prng) % (1 + 5 - 2 - 3); + rs[i].x = x * 8 + 1 + xorshift(&prng) % (1 + 8 - 2 - rs[i].w); + rs[i].y = y * 5 + 1 + xorshift(&prng) % (1 + 5 - 2 - rs[i].h); +/* +printf("%u\t%u\t%u\t%u\n", rs[i].w, rs[i].h, rs[i].x, rs[i].y); +*/ + rs[i].p = NULL; + } + + return i; +} + +void +generaterooms_grid2_(struct room *r, uint32_t *prng, size_t x, size_t y) +{ + r->w = 3 + xorshift(prng) % (1 + 8 - 2 - 3); + r->h = 3 + xorshift(prng) % (1 + 5 - 2 - 3); + r->x = x * 8 + 1 + xorshift(prng) % (1 + 8 - 2 - r->w); + r->y = y * 5 + 1 + xorshift(prng) % (1 + 5 - 2 - r->h); + r->p = NULL; +/* +printf("%u\t%u\t%u\t%u\n", r->w, r->h, r->x, r->y); +*/ +} + +size_t +generaterooms_grid2(char *host, char *port, char *selector, struct room *rs, size_t l) +{ + const struct { + ssize_t y, x; + } gnarf[8] = { + { -1, 0 }, + { 0, 1 }, + { 1, 0 }, + { 0, -1 }, + { -1, 1 }, + { 1, 1 }, + { 1, -1 }, + { -1, -1 } + }; + size_t i, j, k, m, n; + ssize_t x, y; + struct room *cells[ROOMS_GRID_ROWS][ROOMS_GRID_COLS]; + char buffer[3]; + uint32_t prng; + struct room *r; + + memset(cells, 0, sizeof(cells)); + + if (l > ROOMS_GRID_ROWS * ROOMS_GRID_COLS) + l = ROOMS_GRID_ROWS * ROOMS_GRID_COLS; + + prng = fnv1a(5, host, port, selector, "seed"); + x = ROOMS_GRID_COLS / 2; + y = ROOMS_GRID_ROWS / 2; + + r = &rs[0]; + generaterooms_grid2_(r, &prng, x, y); + cells[y][x] = r; + + for (i = 1; i < l; i++) { + for (n = 0; n < 100; n++) { + m = xorshift(&prng) % 4; + x += gnarf[m].x; + y += gnarf[m].y; + if (x < 0 || x >= ROOMS_GRID_COLS || y < 0 || y >= ROOMS_GRID_ROWS || cells[y][x]) { + m = xorshift(&prng) % i; + r = &rs[m]; + x = r->x / 8; + y = r->y / 5; + } else { + generaterooms_grid2_(&rs[i], &prng, x, y); + rs[i].p = r; + cells[y][x] = r = &rs[i]; + break; + } + } + if (n >= 100) + break; + } + + return i; +} + +size_t +distance(size_t x1, size_t y1, size_t x2, size_t y2) +{ + size_t d; + + if (y1 < y2) + d = y2 - y1; + else + d = y1 - y2; + if (x1 < x2) + d += x2 - x1; + else + d += x1 - x2; + + return d; +} + +void +nearestpoints(struct room *a, struct room *b, size_t *ax, size_t *ay, size_t *bx, size_t *by) +{ + if (a->y >= b->y && a->y < b->y + b->h) { + *ay = *by = a->y; + } else if (b->y >= a->y && b->y < a->y + a->h) { + *ay = *by = b->y; + } else if (a->y >= b->y) { + *ay = a->y; + *by = b->y + b->h - 1; + } else if (b->y >= a->y) { + *ay = a->y + a->h - 1; + *by = b->y; + } + + if (a->x >= b->x && a->x < b->x + b->w) { + *ax = *bx = a->x; + } else if (b->x >= a->x && b->x < a->x + a->w) { + *ax = *bx = b->x; + } else if (a->x >= b->x) { + *ax = a->x; + *bx = b->x + b->w - 1; + } else if (b->x >= a->x) { + *ax = a->x + a->w - 1; + *bx = b->x; + } +} + +size_t +roomdistance(struct room *a, struct room *b) +{ + size_t x1, y1; + size_t x2, y2; + + nearestpoints(a, b, &x1, &y1, &x2, &y2); + + return distance(x1, y1, x2, y2); +} + +struct room * +nearestroom(struct room *rs, size_t l, struct room *x, uint32_t prng) +{ + size_t i, d, cd; + ssize_t j; + double n; + + j = -1; + d = UINT32_MAX; + for (i = 0; i < l; i++) { + if (&rs[i] == x) + continue; + if ((cd = roomdistance(&rs[i], x)) < d) { + j = i; + d = cd; + n = 1; + } else if (cd == d) { + n++; + if (xorshift(&prng) / (UINT32_MAX + 1.) < 1. / n) + j = i; + } + } + + if (j == -1) + return NULL; + + return &rs[j]; +} + +void +connectrooms(struct room *a, struct room *b) +{ + size_t i, j; + ssize_t ii; + size_t x1, y1; + size_t x2, y2; + + nearestpoints(a, b, &x1, &y1, &x2, &y2); + + if (y1 > y2) { + ii = -1; + } else if (y2 > y1) { + ii = 1; + } else { + ii = 0; + } + +/* +printf("%lu\t%lu\t%d\n", y1, y2, ii); +*/ + for (i = y1; i != y2; i += ii) + map[i][x1].c = '.'; + + if (x1 > x2) { + ii = -1; + } else if (x2 > x1) { + ii = 1; + } else { + ii = 0; + } + + for (i = x1; i != x2; i += ii) + map[y2][i].c = '.'; +} + +void +rendermap(void) +{ + size_t i, j; + + for (i = 0; i < MAPHEIGHT; i++) { + for (j = 0; j < MAPWIDTH; j++) + putchar(map[i][j].c); + putchar('\n'); + } +} + +size_t +diff(size_t a, size_t b) +{ + if (a < b) + return b - a; + return a - b; +} + +/* + https://en.wikipedia.org/wiki/Levenshtein_distance#Recursive +*/ +size_t +levenshteindistance_slow(char *a, char *b) +{ + size_t k, l, m; + + if (!*a) + return strlen(b); + else if (!*b) + return strlen(a); + else if (*a == *b) + return levenshteindistance_slow(a + 1, b + 1); + + k = levenshteindistance_slow(a + 1, b); + l = levenshteindistance_slow(a, b + 1); + m = levenshteindistance_slow(a + 1, b + 1); + + if (k < l && k < m) + return 1 + k; + else if (l < k && l < m) + return 1 + l; + else + return 1 + m; +} + +ssize_t +placeitems_kmedoid(char *host, char *port, char *selector, struct gopherentry *g, size_t l, size_t *assocs, size_t k) +{ + size_t i, j; + ssize_t r; + double *distances, d; + double *mdists; + struct { + double selector; + double segment; + double index; + } *distanceparts, maxdistanceparts; + uint32_t prng; + struct point *points; + + distanceparts = xmalloc(triangularnumber(l) * sizeof(*distanceparts)); + + memset(&maxdistanceparts, 0, sizeof(maxdistanceparts)); + for (i = 0; i < l; i++) { + memset(&distanceparts[distanceindex(i, i)], 0, sizeof(*distanceparts)); + for (j = 0; j < i; j++) { + distanceparts[distanceindex(i, j)].selector = levenshteindistance(g[i].fields[1], g[j].fields[1]) + levenshteindistance(g[i].fields[2], g[j].fields[2]) + levenshteindistance(g[i].fields[3], g[j].fields[3]); + distanceparts[distanceindex(i, j)].segment = diff(g[i].s, g[j].s); + distanceparts[distanceindex(i, j)].index = diff(g[i].i, g[j].i); + + if (distanceparts[distanceindex(i, j)].selector > maxdistanceparts.selector) + maxdistanceparts.selector = distanceparts[distanceindex(i, j)].selector; + if (distanceparts[distanceindex(i, j)].segment > maxdistanceparts.segment) + maxdistanceparts.segment = distanceparts[distanceindex(i, j)].segment; + if (distanceparts[distanceindex(i, j)].index > maxdistanceparts.index) + maxdistanceparts.index = distanceparts[distanceindex(i, j)].index; + } + } + for (i = 0; i < l; i++) { + for (j = 0; j < i; j++) { + distanceparts[distanceindex(i, j)].selector /= maxdistanceparts.selector; + distanceparts[distanceindex(i, j)].segment /= maxdistanceparts.segment; + distanceparts[distanceindex(i, j)].index /= maxdistanceparts.index; + } + } + + distances = xmalloc(triangularnumber(l) * sizeof(*distances)); + prng = fnv1a(4, host, port, selector, "wiggle"); + for (i = 0; i < l; i++) { + distances[distanceindex(i, i)] = 0; + for (j = 0; j < i; j++) + distances[distanceindex(i, j)] = + distanceparts[distanceindex(i, j)].selector; + } + free(distanceparts); + + r = kmedoids(g, l, k, distances, assocs, fnv1a(4, host, port, selector, "seed")); + + points = calloc(l, sizeof(*points)); + sammon(distances, l, fnv1a(4, host, port, selector, "sammonseed"), points); + + for (i = 0; i < l; i++) + printf("%lf\t%lf\t%ld\t%lu\t%s\n", points[i].dims[0], points[i].dims[1], assocs[i] & 127, assocs[i] >> 7, g[i].fields[1]); + + free(points); + free(distances); + + return r; +} + +size_t +placeitems_hash(char *host, char *port, char *selector, struct gopherentry *g, size_t l, size_t *assocs, size_t k) +{ + size_t i; + + for (i = 0; i < l; i++) + assocs[i] = fnv1a(6, host, port, selector, g[i].fields[1], g[i].fields[2], g[i].fields[3]) % k; + + return k; +} + +size_t py, px; + +enum { + Portal, + StaircaseDown, + Bookshelf +}; + +void +generatemap(char *host, char *port, char *selector, struct gopherentry *g, size_t l) +{ + size_t i, j, k; + size_t x, y; + ssize_t n, m; + size_t *cassocs; + struct room *rooms, *r; + struct { + unsigned char x, y; + } positions[3]; + uint32_t prng; + char buffer[3]; + + memset(map, 0, sizeof(map)); + + for (i = 0; i < MAPHEIGHT; i++) { + for (j = 0; j < MAPWIDTH; j++) { + map[i][j].c = '#'; + } + } + k = 7; + rooms = calloc(k, sizeof(*rooms)); + if (!rooms) + return; + k = generaterooms_gnarf(host, port, selector, rooms, k); + + cassocs = calloc(l, sizeof(*cassocs)); + if (!cassocs) + goto cleanup; + + k = placeitems_kmedoid(host, port, selector, g, l, cassocs, k); + + /* Insert rooms */ + for (i = 0; i < k; i++) { + for (y = rooms[i].y; y < rooms[i].y + rooms[i].h; y++) { + for (x = rooms[i].x; x < rooms[i].x + rooms[i].w; x++) + map[y][x].c = '.'; + } + } + + /* Insert connections */ + for (i = 0; i < k; i++) { + if (rooms[i].p) + connectrooms(&rooms[i], rooms[i].p); +/* + if ((r = nearestroom(rooms, i, &rooms[i], fnv1a(4, host, port, selector, "gnarf"))) && r != rooms[i].p && r->p != &rooms[i]) + connectrooms(&rooms[i], r); +*/ + } + + /* Insert items */ + for (i = 0; i < k; i++) { + snprintf(buffer, sizeof(buffer), "%d", i); + prng = fnv1a(4, host, port, selector, buffer); + for (j = 0; j < 3; j++) { + /* Ugly brute force strategy... I'm sure there is a math-way of doing this. */ + do { + positions[j].x = rooms[i].x + xorshift(&prng) % rooms[i].w; + positions[j].y = rooms[i].y + xorshift(&prng) % rooms[i].h; + for (m = 0; m < j && positions[m].x != positions[j].x || + positions[m].y != positions[j].y; m++); + } while (m < j); + } + for (j = 0; j < l; j++) { + if (cassocs[j] != i) + continue; +/* + printf("%s\t%lu\n", g[j].fields[1], cassocs[j]); +*/ + switch (g[j].type) { + case '0': + x = positions[Bookshelf].x; + y = positions[Bookshelf].y; + map[y][x].c = 'E'; + break; + case '1': + if (strcmp(g[j].fields[2], host) || strcmp(g[j].fields[3], port)) { + x = positions[Portal].x; + y = positions[Portal].y; + map[y][x].c = 'O'; + } else { + x = positions[StaircaseDown].x; + y = positions[StaircaseDown].y; + if (map[y][x].data) + map[y][x].c = 'L'; + else + map[y][x].c = '>'; + } + break; + } + g[j].next = map[y][x].data; + map[y][x].data = &g[j]; + } + } + + free(cassocs); +cleanup: + free(rooms); +} + +char gopherbuffer[16 * 1024 * 1024]; + +void +interact(void) +{ +} + +void +loop(void) +{ + char c; + + do { + switch (c) { + case 'h': + break; + case 'j': + break; + case 'k': + break; + case 'l': + break; + case ' ': + interact(); + break; + } + } while (1); +} + +int +main(int argc, char *argv[]) +{ + ssize_t l; + struct gopherentry *g; + char *host, *port, *selector; + struct termios termios, ntermios; + + if (argc < 2) { + printf("%s host [port [selector]]\n", argv[0]); + return 1; + } + +/* + memset(&termios, 0, sizeof(termios)); + if (tcgetattr(0, &termios) == -1) + return 1; + + memcpy(&ntermios, &termios, sizeof(termios)); + ntermios.c_lflag &= ~(ICANON | ECHO); + ntermios.c_cc[VMIN] = 1; + ntermios.c_cc[VTIME] = 0; + + if (tcsetattr(0, TCSANOW, &ntermios) == -1) + return 1; +*/ + + host = argv[1]; + port = (argc > 2) ? argv[2] : "70"; + selector = (argc > 3) ? argv[3] : ""; + + do { + l = gopher_request(host, port, selector, gopherbuffer, sizeof(gopherbuffer)); + if (l == -1) + return 1; + + l = gopher_parsemenu(gopherbuffer, &g); + if (l == -1) + return 1; + + l = gopher_removeentries(g, l); + + generatemap(host, port, selector, g, l); +/* + rendermap(); +*/ + +/* + loop(); +*/ + } while (0); + +/* + if (tcsetattr(0, TCSANOW, &termios) == -1) + return 1; +*/ + + return 0; +} DIR diff --git a/gopher/gopher-clustering/gopher-clustering2.c b/gopher/gopher-clustering/gopher-clustering2.c @@ -0,0 +1,1165 @@ +#define _POSIX_C_SOURCE 200112L + +#include <assert.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <math.h> + +#include <netdb.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +uint32_t +fnv1a(int n,...) +{ + int i; + char *s; + va_list l; + uint32_t h; + + h = 0x811c9dc5; + + va_start(l, n); + for (i = 0; i < n; i++) { + for (s = va_arg(l, char*); *s; s++) { + h ^= *s; + h *= 0x01000193; + } + } + va_end(l); + + return h; +} + +uint32_t +xorshift(uint32_t *s) +{ + *s ^= *s << 13; + *s ^= *s >> 17; + *s ^= *s << 5; + return *s; +} + +/* + https://github.com/skeeto/hash-prospector/issues/19#issuecomment-1120105785 +*/ +uint32_t +mix(uint32_t x) +{ + x ^= x >> 16; + x *= 0x21f0aaad; + x ^= x >> 15; + x *= 0x735a2d97; + x ^= x >> 16; + return x; +} + +struct gopherentry { + struct gopherentry *next; + char type; + char *fields[4]; + size_t i, s; +}; + +char * +gopher_fieldend(char *s) +{ + for (; *s; s++) { + if (*s == '\t') + break; + if (*s == '\r' && *(s+1) == '\n') + break; + if (*s == '\n') + break; + } + return s; +} + +char * +gopher_nextentry(char *s) +{ + char *c; + + if (c = strchr(s, '\n')) + return c + 1; + return NULL; +} + +void * +xmalloc(size_t s) +{ + void *p; + + if (!(p = malloc(s))) { + perror(NULL); + exit(1); + } + return p; +} + +void * +xrealloc(void *p, size_t s) +{ + if (!(p = realloc(p, s))) { + perror(NULL); + exit(1); + } + return p; +} + +/* Allow a lot of ugly things... */ +size_t +gopher_parsemenu(char *d, struct gopherentry **g) +{ + ssize_t gl; + size_t fl, i, s; + char *c, *p, pt; + struct gopherentry *cg; + + gl = 0; + cg = NULL; + + s = 0; + pt = 0; + for (c = d; c && *c; c = gopher_nextentry(c)) { + if (*c == '.') + break; + if (*c == '\n') + continue; + + gl++; + cg = xrealloc(cg, gl * sizeof(struct gopherentry)); + memset(&cg[gl-1], 0, sizeof(struct gopherentry)); + + if (*c != 'i' && pt == 'i') + s++; + pt = *c; + + cg[gl-1].type = *c; + cg[gl-1].i = gl-1; + cg[gl-1].s = s; + c++; + + for (i = 0; i < 4; i++) { + p = gopher_fieldend(c); + fl = p - c; + cg[gl-1].fields[i] = xmalloc(fl + 1); + memcpy(cg[gl-1].fields[i], c, fl); + cg[gl-1].fields[i][fl] = '\0'; + if (*p != '\t') + break; + c = p + 1; + } + } + s++; + *g = cg; + return gl; +} + +ssize_t +gopher_removeentries(struct gopherentry *g, size_t l) +{ + size_t i, j; + + for (i = 0; i < l;) { + if (g[i].type == 'i' || g[i].type == '3' || !g[i].fields[2] || !g[i].fields[3]) { + l--; + memmove(&g[i], &g[i+1], (l - i) * sizeof(*g)); + continue; + } + for (j = 0; j < i; j++) { + if (!strcmp(g[i].fields[1], g[j].fields[1]) && + !strcmp(g[i].fields[2], g[j].fields[2]) && + !strcmp(g[i].fields[3], g[j].fields[3])) + break; + } + if (j < i) { + l--; + memmove(&g[i], &g[i+1], (l - i) * sizeof(*g)); + continue; + } + i++; + } + + return l; +} + +ssize_t +gopher_request(char *host, char *port, char *selector, char *buffer, size_t l) +{ + int fd; + struct addrinfo hints; + struct addrinfo *r, *rp; + ssize_t n, result, rn; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + if (getaddrinfo(host, port, &hints, &r) != 0) + return -1; + + for (rp = r; rp; rp = rp->ai_next) { + if ((fd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol)) == -1) + continue; + if (connect(fd, rp->ai_addr, rp->ai_addrlen) != -1) + break; + close(fd); + } + freeaddrinfo(r); + if (!rp) + return -1; + + result = -1; + if (write(fd, selector, strlen(selector)) != strlen(selector)) + goto cleanup; + + if (write(fd, "\r\n", 2) != 2) + goto cleanup; + + n = 0; + while (1) { + if (n + 512 > l - 1) + goto cleanup; + if ((rn = read(fd, &buffer[n], 512)) == -1) + goto cleanup; + else if (!rn) + break; + n += rn; + } + + buffer[n] = '\0'; + result = n; + +cleanup: + close(fd); + + return result; +} + +size_t +triangularnumber(size_t n) +{ + return (n * n + n) / 2; +} + +size_t +distanceindex(size_t i, size_t j) +{ + size_t t; + + if (i < j) { + t = i; + i = j; + j = t; + } + + return triangularnumber(i) + j; +} + +struct point { + double dims[2]; +}; + +#define SAMMON_MAPPING_MAX_ITER (1 << 13) +#define SAMMON_MAPPING_LR (0.01) +void +sammon(double *distances, size_t l, uint32_t prng, struct point *points) +{ + size_t i, j, k, t, tmax; + double *gnarf; + double c, lr; + double e1, e2, sum, lasterror; + struct point *newpoints; + + gnarf = calloc(triangularnumber(l), sizeof(*gnarf)); + newpoints = calloc(l, sizeof(*newpoints)); + + for (i = 0; i < l; i++) { + points[i].dims[0] = (double)xorshift(&prng) / UINT32_MAX - 0.5; + points[i].dims[1] = (double)xorshift(&prng) / UINT32_MAX - 0.5; + } + + c = 0; + for (i = 0; i < l; i++) { + gnarf[distanceindex(i, i)] = 0; + for (j = 0; j < i; j++) { + gnarf[distanceindex(i, j)] = sqrt(pow(points[i].dims[0] - points[j].dims[0], 2) + pow(points[i].dims[1] - points[j].dims[1], 2)); + c += distances[distanceindex(i, j)]; + } + } + + lasterror = 0; + for (t = 0, tmax = SAMMON_MAPPING_MAX_ITER; t < tmax; t++) { + lr = SAMMON_MAPPING_LR; + for (i = 0; i < l; i++) { + for (j = 0; j < 2; j++) { + e1 = -2. / c; + sum = 0; + for (k = 0; k < l; k++) { + if (i == k) + continue; + sum += (distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) / (distances[distanceindex(i, k)] * gnarf[distanceindex(i, k)]) * (points[i].dims[j] - points[k].dims[j]); + } + e1 *= sum; + + e2 = -2. / c; + sum = 0; + for (k = 0; k < l; k++) { + if (i == k) + continue; + sum += 1. / (distances[distanceindex(i, k)] * gnarf[distanceindex(i, k)]) * ((distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) - pow(points[i].dims[j] - points[k].dims[j], 2) / gnarf[distanceindex(i, k)] * (1 + (distances[distanceindex(i, k)] - gnarf[distanceindex(i, k)]) / gnarf[distanceindex(i, k)])); + } + e2 *= sum; + + newpoints[i].dims[j] = points[i].dims[j] - lr * (e1 / fabs(e2)); + } + } + + sum = 0; + for (i = 0; i < l; i++) { + for (j = 0; j < i; j++) { + gnarf[distanceindex(i, j)] = sqrt(pow(newpoints[i].dims[0] - newpoints[j].dims[0], 2) + pow(newpoints[i].dims[1] - newpoints[j].dims[1], 2)); + sum += pow(distances[distanceindex(i, j)] - gnarf[distanceindex(i, j)], 2) / distances[distanceindex(i, j)]; + } + } + + fprintf(stderr, "sammon mapping error: %lf\n", sum / c); + if (t > 0 && fabs(sum / c - lasterror) < 0.000000000001) + break; + lasterror = sum / c; + + memcpy(points, newpoints, l * sizeof(*points)); + } + + free(newpoints); + free(gnarf); +} + +int +fastermsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng) +{ + size_t i, j, n, ck, grimmel; + ssize_t m1, m, m2; + size_t ti, th, huh; + double dsum, d, d2, gnarf, tdsum; + struct { + size_t medoid; + size_t n; + double dsum; + } *clusters; + struct { + size_t n1, n2, n3; + double d1, d2, d3; + } *cache; + double *dS, *dS2; + double cdS, cdS2; + size_t cm, cx; + ssize_t lx; + int flag, result; + uint32_t r; + double *C; + + if (!l) + return 0; + + if (l < k) + k = l; + + clusters = calloc(k, sizeof(*clusters)); + if (!clusters) + return -1; + + result = -1; + +/* + Seed the medoids randomly +*/ + for (i = n = 0; i < l; i++) { + if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) + clusters[n++].medoid = i; + if (n == k) + break; + } + + cache = calloc(l, sizeof(*cache)); + if (!cache) + goto cleanup; + + dS = calloc(k, sizeof(*dS)); + if (!dS) + goto cleanup; + + dS2 = calloc(k, sizeof(*dS2)); + if (!dS2) + goto cleanup; + + for (i = 0; i < l; i++) { + cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY; + for (j = 0; j < k; j++) { + d = distances[distanceindex(i, clusters[j].medoid)]; + if (d < cache[i].d1) { + cache[i].d3 = cache[i].d2; + cache[i].d2 = cache[i].d1; + cache[i].d1 = d; + cache[i].n3 = cache[i].n2; + cache[i].n2 = cache[i].n1; + cache[i].n1 = j; + } else if (d < cache[i].d2) { + cache[i].d3 = cache[i].d2; + cache[i].d2 = d; + cache[i].n3 = cache[i].n2; + cache[i].n2 = j; + } else if (d < cache[i].d3) { + cache[i].d3 = d; + cache[i].n3 = j; + } + } + } + + for (i = 0; i < k; i++) { + dS[i] = 0; + for (j = 0; j < l; j++) { + if (cache[j].n1 != i) + continue; + dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3; + } + for (j = 0; j < l; j++) { + if (cache[j].n2 != i) + continue; + dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3; + } + } + + lx = -1; + for (grimmel = 0;grimmel < 32;grimmel++) { + cdS = 0; + for (i = 0; i < l; i++) { + if (i == lx) + goto fastermsc_finished; + + for (j = 0; j < k; j++) { + if (clusters[j].medoid == i) + goto fastmsc_nexti; + } + + memcpy(dS2, dS, k * sizeof(*dS2)); + cdS2 = 0; + for (j = 0; j < l; j++) { + d = distances[distanceindex(i, j)]; + if (d < cache[j].d1) { + cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1; + dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; + } else if (d < cache[j].d2) { + cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d; + dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; + } else if (d < cache[j].d3) { + dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d; + } + } + d = -INFINITY; + for (j = 0; j < k; j++) { + if (dS2[j] > d) { + d = dS2[j]; + m = j; + } + } + dS2[m] += cdS2; + if (dS2[m] <= 0) + continue; + lx = i; + clusters[m].medoid = i; + for (j = 0; j < l; j++) { + if (cache[j].n1 != m && cache[j].n2 != m && cache[j].n3 != m) + continue; + cache[j].d1 = cache[j].d2 = cache[j].d3 = INFINITY; + for (n = 0; n < k; n++) { + d = distances[distanceindex(j, clusters[n].medoid)]; + if (d < cache[j].d1) { + cache[j].d3 = cache[j].d2; + cache[j].d2 = cache[j].d1; + cache[j].d1 = d; + cache[j].n3 = cache[j].n2; + cache[j].n2 = cache[j].n1; + cache[j].n1 = n; + } else if (d < cache[j].d2) { + cache[j].d3 = cache[j].d2; + cache[j].d2 = d; + cache[j].n3 = cache[j].n2; + cache[j].n2 = n; + } else if (d < cache[j].d3) { + cache[j].d3 = d; + cache[j].n3 = n; + } + } + } + for (j = 0; j < k; j++) { + dS[j] = 0; + for (n = 0; n < l; n++) { + if (cache[n].n1 != j) + continue; + dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d2 / cache[n].d3; + } + for (n = 0; n < l; n++) { + if (cache[n].n2 != j) + continue; + dS[j] += cache[n].d1 / cache[n].d2 - cache[n].d1 / cache[n].d3; + } + } +fastmsc_nexti:; + } +/* + if (lx == -1) + break; +*/ + } +fastermsc_finished:; + + for (i = 0; i < k; i++) + clusters[i].n = 0; + + for (i = 0; i < l; i++) { + d = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + assoc[i] = j; + d = distances[distanceindex(clusters[j].medoid, i)]; + } + } + clusters[assoc[i]].n++; + } + + for (i = 0; i < l; i++) { + if (clusters[assoc[i]].medoid == i) + assoc[i] |= 128; + } + + result = k; +cleanup: + free(clusters); + return result; +} + +int +fastmsc(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng) +{ + size_t i, j, n, ck; + ssize_t m1, m, m2; + size_t ti, th; + double dsum, d, d2, gnarf, tdsum; + struct { + size_t medoid; + size_t n; + double dsum; + } *clusters; + struct { + size_t n1, n2; + double d1, d2, d3; + } *cache; + double *dS, *dS2; + double cdS, cdS2; + double _e, _y, _z, _s; + size_t cm, cx; + int flag, result; + uint32_t r; + double *C; + + if (!l) + return 0; + + if (l < k) + k = l; + + clusters = calloc(k, sizeof(*clusters)); + if (!clusters) + return -1; + + result = -1; + +/* + seed the medoids using PAM BUILD + d = INFINITY; + m = -1; + for (i = 0; i < l; i++) { + dsum = 0; + for (j = 0; j < l; j++) + dsum += distances[distanceindex(i, j)]; + if (dsum < d) { + m = i; + d = dsum; + } + } + if (m == -1) + goto cleanup; + ck = 0; + clusters[ck].medoid = m; + ck++; + + C = xmalloc(l * l * sizeof(*C)); + + for (; ck < k; ck++) { + memset(C, 0, l * l * sizeof(*C)); + for (i = 0; i < l; i++) { + for (j = 0; j < ck; j++) { + if (clusters[j].medoid == i) + goto build_nexti; + } + for (j = 0; j < l; j++) { + if (i == j) + continue; + for (n = 0; n < ck; n++) { + if (clusters[n].medoid == j) + goto build_nextj; + } + d = INFINITY; + for (n = 0; n < ck; n++) { + if (distances[distanceindex(j, clusters[n].medoid)] < d) + d = distances[distanceindex(j, clusters[n].medoid)]; + } + C[j * l + i] = d - distances[distanceindex(j, i)]; + if (C[j * l + i] < 0) + C[j * l + i] = 0; +build_nextj:; + } +build_nexti:; + } + d = -1; + m = -1; + for (i = 0; i < l; i++) { + for (j = 0; j < ck; j++) { + if (clusters[j].medoid == i) + goto build_nexti2; + } + dsum = 0; + for (j = 0; j < l; j++) + dsum += C[j * l + i]; + if (dsum > d) { + d = dsum; + m = i; + } +build_nexti2:; + } + clusters[ck].medoid = m; + } + free(C); +*/ + +/* + Seed the medoids randomly +*/ + for (i = n = 0; i < l; i++) { + if ((l - i) * (xorshift(&prng) / (double)UINT32_MAX) < k - n) + clusters[n++].medoid = i; + if (n == k) + break; + } + + cache = calloc(l, sizeof(*cache)); + if (!cache) + goto cleanup; + + dS = calloc(k, sizeof(*dS)); + if (!dS) + goto cleanup; + + dS2 = calloc(k, sizeof(*dS2)); + if (!dS2) + goto cleanup; + + for (;;) { + for (i = 0; i < l; i++) { + cache[i].d1 = cache[i].d2 = cache[i].d3 = INFINITY; + for (j = 0; j < k; j++) { + d = distances[distanceindex(i, clusters[j].medoid)]; + if (d < cache[i].d1) { + cache[i].d3 = cache[i].d2; + cache[i].d2 = cache[i].d1; + cache[i].d1 = d; + cache[i].n2 = cache[i].n1; + cache[i].n1 = j; + } else if (d < cache[i].d2) { + cache[i].d3 = cache[i].d2; + cache[i].d2 = d; + cache[i].n2 = j; + } else if (d < cache[i].d3) { + cache[i].d3 = d; + } + } + } + + for (i = 0; i < k; i++) { + dS[i] = 0; + for (j = 0; j < l; j++) { + if (cache[j].n1 != i) + continue; + dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d2 / cache[j].d3; + } + for (j = 0; j < l; j++) { + if (cache[j].n2 != i) + continue; + dS[i] += cache[j].d1 / cache[j].d2 - cache[j].d1 / cache[j].d3; + } + } + + cdS = 0; + for (i = 0; i < l; i++) { + for (j = 0; j < k; j++) { + if (clusters[j].medoid == i) + goto fastmsc_nexti; + } + memcpy(dS2, dS, k * sizeof(*dS2)); + cdS2 = _e = 0; + for (j = 0; j < l; j++) { + d = distances[distanceindex(i, j)]; + if (d < cache[j].d1) { + cdS2 += cache[j].d1 / cache[j].d2 - d / cache[j].d1; + dS2[cache[j].n1] += d / cache[j].d1 + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; + } else if (d < cache[j].d2) { + cdS2 += cache[j].d1 / cache[j].d2 - cache[j].d1 / d; + dS2[cache[j].n1] += cache[j].d1 / d + cache[j].d2 / cache[j].d3 - (cache[j].d1 + d) / cache[j].d2; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / cache[j].d2; + } else if (d < cache[j].d3) { + dS2[cache[j].n1] += cache[j].d2 / cache[j].d3 - cache[j].d2 / d; + dS2[cache[j].n2] += cache[j].d1 / cache[j].d3 - cache[j].d1 / d; + } + } + d = -INFINITY; + d2 = INFINITY; + for (j = 0; j < k; j++) { + if (dS2[j] > d) { + d = dS2[j]; + m = j; + } + if (dS2[j] < d2) { + d2 = dS2[j]; + m2 = j; + } + } + dS2[m] += cdS2; + if (dS2[m] > cdS) { + cdS = dS2[m]; + cm = m; + cx = i; + } +fastmsc_nexti:; + } + /* Should be 0. I guess some floating point problems? */ + if (cdS <= 0.00000000000001) + break; + clusters[cm].medoid = cx; + } + + for (i = 0; i < k; i++) + clusters[i].n = 0; + + for (i = 0; i < l; i++) { + d = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + assoc[i] = j; + d = distances[distanceindex(clusters[j].medoid, i)]; + } + } + clusters[assoc[i]].n++; + } + + for (i = 0; i < l; i++) { + if (clusters[assoc[i]].medoid == i) + assoc[i] |= 128; + } + + result = k; +cleanup: + free(clusters); + return result; +} + +/* + Seeding of medoids based on the PAM BUILD description in https://www.cs.umb.edu/cs738/pam1.pdf + I was too stupid to understand other descriptions. +*/ +int +kmedoids(struct gopherentry *g, size_t l, size_t k, double *distances, size_t *assoc, uint32_t prng) +{ + size_t i, j, n, ck; + ssize_t m1, m; + size_t ti, th; + double dsum, d, d2, gnarf, tdsum; + struct { + size_t medoid; + size_t n; + double dsum; + } *clusters; + int flag, result; + uint32_t r; + double *C; + + if (!l) + return 0; + + if (l < k) + k = l; + + clusters = calloc(k, sizeof(*clusters)); + if (!clusters) + return -1; + + result = -1; + + /* seed the medoids using PAM BUILD */ + d = INFINITY; + m = -1; + for (i = 0; i < l; i++) { + dsum = 0; + for (j = 0; j < l; j++) + dsum += distances[distanceindex(i, j)]; + if (dsum < d) { + m = i; + d = dsum; + } + } + if (m == -1) + goto cleanup; + ck = 0; + clusters[ck].medoid = m; + ck++; + + C = xmalloc(l * l * sizeof(*C)); + + for (; ck < k; ck++) { + memset(C, 0, l * l * sizeof(*C)); + for (i = 0; i < l; i++) { + for (j = 0; j < ck; j++) { + if (clusters[j].medoid == i) + goto build_nexti; + } + for (j = 0; j < l; j++) { + if (i == j) + continue; + for (n = 0; n < ck; n++) { + if (clusters[n].medoid == j) + goto build_nextj; + } + d = INFINITY; + for (n = 0; n < ck; n++) { + if (distances[distanceindex(j, clusters[n].medoid)] < d) + d = distances[distanceindex(j, clusters[n].medoid)]; + } + C[j * l + i] = d - distances[distanceindex(j, i)]; + if (C[j * l + i] < 0) + C[j * l + i] = 0; +build_nextj:; + } +build_nexti:; + } + d = -1; + m = -1; + for (i = 0; i < l; i++) { + for (j = 0; j < ck; j++) { + if (clusters[j].medoid == i) + goto build_nexti2; + } + dsum = 0; + for (j = 0; j < l; j++) + dsum += C[j * l + i]; + if (dsum > d) { + d = dsum; + m = i; + } +build_nexti2:; + } + clusters[ck].medoid = m; + } + free(C); + + for (;;) { + tdsum = INFINITY; + for (i = 0; i < k; i++) { + for (j = 0; j < l; j++) { + for (n = 0; n < k; n++) { + if (j == clusters[n].medoid) + goto swap_nextj; + } + dsum = 0; + for (n = 0; n < l; n++) { + if (n == j) + continue; + for (m = 0; m < k; m++) { + if (clusters[m].medoid == n) + goto swap_nextn; + } + d = INFINITY; + d2 = INFINITY; + for (m = 0; m < k; m++) { + if (distances[distanceindex(clusters[m].medoid, n)] < d) { + d2 = d; + d = distances[distanceindex(clusters[m].medoid, n)]; + } else if (distances[distanceindex(clusters[m].medoid, n)] < d2) { + d2 = distances[distanceindex(clusters[m].medoid, n)]; + } + } + if (distances[distanceindex(clusters[i].medoid, n)] > d) { + if (distances[distanceindex(j, n)] >= d) + gnarf = 0; + else + gnarf = distances[distanceindex(j, n)] - d; + } else if (distances[distanceindex(clusters[i].medoid, n)] == d) { + if (distances[distanceindex(j, n)] < d2) + gnarf = distances[distanceindex(j, n)] - d; + else + gnarf = d2 - d; + } else { + continue; + } + dsum += gnarf; +swap_nextn:; + } + if (dsum < tdsum) { + ti = i; + th = j; + tdsum = dsum; + } +swap_nextj:; + } + } + if (tdsum >= 0) + break; + clusters[ti].medoid = th; + } + + for (i = 0; i < k; i++) + clusters[i].n = 0; + + for (i = 0; i < l; i++) { + d = INFINITY; + for (j = 0; j < k; j++) { + if (distances[distanceindex(clusters[j].medoid, i)] < d) { + assoc[i] = j; + d = distances[distanceindex(clusters[j].medoid, i)]; + } + } + clusters[assoc[i]].n++; + } + + for (i = 0; i < l; i++) { + if (clusters[assoc[i]].medoid == i) + assoc[i] |= 128; + } + + result = k; +cleanup: + free(clusters); + return result; +} + +size_t +diff(size_t a, size_t b) +{ + if (a < b) + return b - a; + return a - b; +} + +/* + https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows + Which references https://www.codeproject.com/Articles/13525/Fast-memory-efficient-Levenshtein-algorithm-2 + The code there is using the CPOL: https://www.codeproject.com/info/cpol10.aspx +*/ +size_t +levenshteindistance(char *a, char *b) +{ + size_t i, j, dc, ic, sc, r; + size_t *v0, *v1, *t; + + v0 = xmalloc((strlen(b) + 1) * sizeof(*v0)); + v1 = xmalloc((strlen(b) + 1) * sizeof(*v1)); + + for (i = 0; i <= strlen(b); i++) + v0[i] = i; + + for (i = 0; i < strlen(a); i++) { + v1[0] = i + 1; + for (j = 0; j < strlen(b); j++) { + dc = v0[j + 1] + 1; + ic = v1[j] + 1; + if (a[i] == b[j]) + sc = v0[j]; + else + sc = v0[j] + 1; + if (dc < ic && dc < sc) + v1[j + 1] = dc; + else if (ic < dc && ic < sc) + v1[j + 1] = ic; + else + v1[j + 1] = sc; + } + t = v0; + v0 = v1; + v1 = t; + } + + r = v0[strlen(b)]; + + free(v1); + free(v0); + + return r; +} + +/* + Remove common prefix and suffix. The pazz0distance is the sum of the length of the leftover strings. +*/ +size_t +pazz0distance(char *a, char *b) +{ + size_t pl, sl; + size_t al, bl; + + al = strlen(a); + bl = strlen(b); + + for (pl = 0; pl < al && pl < bl && a[pl] == b[pl]; pl++); + for (sl = 0; al - sl > pl && bl - sl > pl && a[al - 1 - sl] == b[bl - 1 - sl]; sl++); + + return al + bl - 2 * (pl + sl); +} + +/* + https://en.wikipedia.org/wiki/Levenshtein_distance#Recursive +*/ +size_t +levenshteindistance_slow(char *a, char *b) +{ + size_t k, l, m; + + if (!*a) + return strlen(b); + else if (!*b) + return strlen(a); + else if (*a == *b) + return levenshteindistance_slow(a + 1, b + 1); + + k = levenshteindistance_slow(a + 1, b); + l = levenshteindistance_slow(a, b + 1); + m = levenshteindistance_slow(a + 1, b + 1); + + if (k < l && k < m) + return 1 + k; + else if (l < k && l < m) + return 1 + l; + else + return 1 + m; +} + +ssize_t +clustering(char *host, char *port, char *selector, struct gopherentry *g, size_t l, size_t *assocs, size_t k) +{ + size_t i, j; + ssize_t r; + double *distances, d; + double *mdists; + struct { + double selector; + double segment; + double index; + } *distanceparts, maxdistanceparts; + uint32_t prng; + struct point *points; + + distanceparts = xmalloc(triangularnumber(l) * sizeof(*distanceparts)); + + /* So the sammon mapping can be happy. */ + prng = fnv1a(4, host, port, selector, "distancewiggle"); + memset(&maxdistanceparts, 0, sizeof(maxdistanceparts)); + for (i = 0; i < l; i++) { + memset(&distanceparts[distanceindex(i, i)], 0, sizeof(*distanceparts)); + for (j = 0; j < i; j++) { + distanceparts[distanceindex(i, j)].selector = levenshteindistance(g[i].fields[1], g[j].fields[1]) + levenshteindistance(g[i].fields[2], g[j].fields[2]) + levenshteindistance(g[i].fields[3], g[j].fields[3]) + ((double)xorshift(&prng) / UINT32_MAX) * 0.000000000000000000000000000000000000001; + distanceparts[distanceindex(i, j)].segment = diff(g[i].s, g[j].s); + distanceparts[distanceindex(i, j)].index = diff(g[i].i, g[j].i); + + if (distanceparts[distanceindex(i, j)].selector > maxdistanceparts.selector) + maxdistanceparts.selector = distanceparts[distanceindex(i, j)].selector; + if (distanceparts[distanceindex(i, j)].segment > maxdistanceparts.segment) + maxdistanceparts.segment = distanceparts[distanceindex(i, j)].segment; + if (distanceparts[distanceindex(i, j)].index > maxdistanceparts.index) + maxdistanceparts.index = distanceparts[distanceindex(i, j)].index; + } + } + for (i = 0; i < l; i++) { + for (j = 0; j < i; j++) { + distanceparts[distanceindex(i, j)].selector /= maxdistanceparts.selector; + distanceparts[distanceindex(i, j)].segment /= maxdistanceparts.segment; + distanceparts[distanceindex(i, j)].index /= maxdistanceparts.index; + } + } + + distances = xmalloc(triangularnumber(l) * sizeof(*distances)); + for (i = 0; i < l; i++) { + distances[distanceindex(i, i)] = 0; + for (j = 0; j < i; j++) + distances[distanceindex(i, j)] = 0.75 * distanceparts[distanceindex(i, j)].selector + 0.125 * distanceparts[distanceindex(i, j)].segment + 0.125 * distanceparts[distanceindex(i, j)].index; //distanceparts[distanceindex(i, j)].selector; + } + free(distanceparts); + + r = kmedoids(g, l, k, distances, assocs, fnv1a(4, host, port, selector, "seed")); + + points = calloc(l, sizeof(*points)); + sammon(distances, l, fnv1a(4, host, port, selector, "sammonseed"), points); + + for (i = 0; i < l; i++) + printf("%lf\t%lf\t%ld\t%lu\t%s\n", points[i].dims[0], points[i].dims[1], assocs[i] & 127, assocs[i] >> 7, g[i].fields[1]); + + free(points); + free(distances); + + return r; +} + +char gopherbuffer[16 * 1024 * 1024]; + +int +main(int argc, char *argv[]) +{ + ssize_t l; + size_t k; + struct gopherentry *g; + char *host, *port, *selector; + size_t *cassocs; + + if (argc < 2) { + printf("%s host [port [selector]]\n", argv[0]); + return 1; + } + + host = argv[1]; + port = (argc > 2) ? argv[2] : "70"; + selector = (argc > 3) ? argv[3] : ""; + + l = gopher_request(host, port, selector, gopherbuffer, sizeof(gopherbuffer)); + if (l == -1) + return 1; + + l = gopher_parsemenu(gopherbuffer, &g); + if (l == -1) + return 1; + + l = gopher_removeentries(g, l); + + cassocs = calloc(l, sizeof(*cassocs)); + if (!cassocs) + return 1; + k = 5; + clustering(host, port, selector, g, l, cassocs, k); + + return 0; +} DIR diff --git a/gopher/gopher-clustering/hint.md b/gopher/gopher-clustering/hint.md @@ -0,0 +1,9 @@ + +14:28:09 pazz0 | `egcc -Ofast -o gopher-clustering +gopher-clustering.c -lm` +14:28:19 pazz0 | `./gopher-clustering bitreich.org 70 /index.gph +| sed '1,/^Silh/d' > clustering-output` +14:28:34 pazz0 | And in gnuplot: `plot 'clustering-output' using +1:2:5:(4+$4):3 with labels point pt var lc + | var` +