URI: 
       treturn of venti - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit a0d146edd7a7de6236a0d60baafeeb59f8452aae
   DIR parent 88bb285e3d87ec2508840af33f7e0af53ec3c13c
  HTML Author: rsc <devnull@localhost>
       Date:   Tue, 12 Jul 2005 15:23:36 +0000
       
       return of venti
       
       Diffstat:
         A src/cmd/venti/copy.c                |     170 +++++++++++++++++++++++++++++++
         A src/cmd/venti/devnull.c             |      80 +++++++++++++++++++++++++++++++
         M src/cmd/venti/mkfile                |      14 ++++++++++++--
         A src/cmd/venti/mkroot.c              |      59 +++++++++++++++++++++++++++++++
         A src/cmd/venti/randtest.c            |     334 +++++++++++++++++++++++++++++++
         A src/cmd/venti/read.c                |      75 +++++++++++++++++++++++++++++++
         A src/cmd/venti/readlist.c            |     112 +++++++++++++++++++++++++++++++
         A src/cmd/venti/ro.c                  |     112 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/arena.c           |     737 ++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/arenas.c          |     414 ++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/bloom.c           |     210 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/buildbuck.c       |     132 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/buildindex.c      |     160 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/checkarenas.c     |     135 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/checkindex.c      |     293 ++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/clump.c           |     222 ++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/clumpstats.c      |     127 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/config.c          |     245 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/conv.c            |     632 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/dat.h             |     718 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/dcache.c          |     816 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/dump.c            |      47 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/findscore.c       |     121 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/fmtarenas.c       |     135 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/fmtbloom.c        |     115 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/fmtindex.c        |     120 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/fmtisect.c        |      83 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/fns.h             |     206 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/graph.c           |     202 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/httpd.c           |     988 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/icache.c          |     348 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/icachewrite.c     |     318 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/ifile.c           |      93 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/index.c           |     819 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/lump.c            |     249 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/lumpcache.c       |     417 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/lumpqueue.c       |     187 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/mkfile            |     146 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/part.c            |     383 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/png.c             |     241 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/printarena.c      |     130 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/printarenas.c     |     113 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/printindex.c      |      99 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/printmap.c        |      42 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/rdarena.c         |      91 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/round.c           |     102 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/score.c           |      43 ++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/sortientry.c      |     376 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/stats.c           |     212 ++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/stdinc.h          |       9 +++++++++
         A src/cmd/venti/srv/syncarena.c       |     174 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/syncindex.c       |      73 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/syncindex0.c      |     167 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/trace.c           |      38 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/unittoull.c       |      30 ++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/unwhack.c         |     179 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/utils.c           |     252 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/venti.c           |     266 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/verifyarena.c     |     127 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/whack.c           |     331 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/whack.h           |      40 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/wrarena.c         |     217 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/xml.c             |      68 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/xml.h             |      11 +++++++++++
         A src/cmd/venti/srv/zblock.c          |      93 +++++++++++++++++++++++++++++++
         A src/cmd/venti/srv/zeropart.c        |      31 +++++++++++++++++++++++++++++++
         A src/cmd/venti/sync.c                |      54 +++++++++++++++++++++++++++++++
         A src/cmd/venti/write.c               |      62 +++++++++++++++++++++++++++++++
       
       68 files changed, 14443 insertions(+), 2 deletions(-)
       ---
   DIR diff --git a/src/cmd/venti/copy.c b/src/cmd/venti/copy.c
       t@@ -0,0 +1,170 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <venti.h>
       +#include <libsec.h>
       +#include <thread.h>
       +
       +int changes;
       +int rewrite;
       +int ignoreerrors;
       +int fast;
       +int verbose;
       +VtConn *zsrc, *zdst;
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: copy [-fir] [-t type] srchost dsthost score\n");
       +        threadexitsall("usage");
       +}
       +
       +void
       +walk(uchar score[VtScoreSize], uint type, int base)
       +{
       +        int i, n;
       +        uchar *buf;
       +        VtEntry e;
       +        VtRoot root;
       +
       +        if(memcmp(score, vtzeroscore, VtScoreSize) == 0)
       +                return;
       +
       +        buf = vtmallocz(VtMaxLumpSize);
       +        if(fast && vtread(zdst, score, type, buf, VtMaxLumpSize) >= 0){
       +                if(verbose)
       +                        fprint(2, "skip %V\n", score);
       +                free(buf);
       +                return;
       +        }
       +
       +        n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
       +        if(n < 0){
       +                if(rewrite){
       +                        changes++;
       +                        memmove(score, vtzeroscore, VtScoreSize);
       +                }else if(!ignoreerrors)
       +                        sysfatal("reading block %V (type %d): %r", type, score);
       +                return;
       +        }
       +
       +        switch(type){
       +        case VtRootType:
       +                if(vtrootunpack(&root, buf) < 0){
       +                        fprint(2, "warning: could not unpack root in %V %d\n", score, type);
       +                        break;
       +                }
       +                walk(root.score, VtDirType, 0);
       +                walk(root.prev, VtRootType, 0);
       +                vtrootpack(&root, buf);        /* walk might have changed score */
       +                break;
       +
       +        case VtDirType:
       +                for(i=0; i<n/VtEntrySize; i++){
       +                        if(vtentryunpack(&e, buf, i) < 0){
       +                                fprint(2, "warning: could not unpack entry #%d in %V %d\n", i, score, type);
       +                                continue;
       +                        }
       +                        if(!(e.flags & VtEntryActive))
       +                                continue;
       +                        walk(e.score, e.type, e.type&VtTypeBaseMask);
       +                        vtentrypack(&e, buf, i);
       +                }
       +                break;
       +
       +        case VtDataType:
       +                break;
       +
       +        default:        /* pointers */
       +                for(i=0; i<n; i+=VtScoreSize)
       +                        if(memcmp(buf+i, vtzeroscore, VtScoreSize) != 0)
       +                                walk(buf+i, type-1, base);
       +                break;
       +        }
       +
       +        if(vtwrite(zdst, score, type, buf, n) < 0){
       +                /* figure out score for better error message */
       +                /* can't use input argument - might have changed contents */
       +                n = vtzerotruncate(type, buf, n);
       +                sha1(buf, n, score, nil);
       +                sysfatal("writing block %V (type %d): %r", score, type);
       +        }
       +        free(buf);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        int type, n;
       +        uchar score[VtScoreSize];
       +        uchar *buf;
       +        char *prefix;
       +
       +        fmtinstall('F', vtfcallfmt);
       +        fmtinstall('V', vtscorefmt);
       +
       +        type = -1;
       +        ARGBEGIN{
       +        case 'f':
       +                fast = 1;
       +                break;
       +        case 'i':
       +                if(rewrite)
       +                        usage();
       +                ignoreerrors = 1;
       +                break;
       +        case 'r':
       +                if(ignoreerrors)
       +                        usage();
       +                rewrite = 1;
       +                break;
       +        case 't':
       +                type = atoi(EARGF(usage()));
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc != 3)
       +                usage();
       +
       +        if(vtparsescore(argv[2], &prefix, score) < 0)
       +                sysfatal("could not parse score: %r");
       +
       +        buf = vtmallocz(VtMaxLumpSize);
       +
       +        zsrc = vtdial(argv[0]);
       +        if(zsrc == nil)
       +                sysfatal("could not dial src server: %r");
       +        if(vtconnect(zsrc) < 0)
       +                sysfatal("vtconnect src: %r");
       +
       +        zdst = vtdial(argv[1]);
       +        if(zdst == nil)
       +                sysfatal("could not dial dst server: %r");
       +        if(vtconnect(zdst) < 0)
       +                sysfatal("vtconnect dst: %r");
       +
       +        if(type != -1){
       +                n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
       +                if(n < 0)
       +                        sysfatal("could not read block: %r");
       +        }else{
       +                for(type=0; type<VtMaxType; type++){
       +                        n = vtread(zsrc, score, type, buf, VtMaxLumpSize);
       +                        if(n >= 0)
       +                                break;
       +                }
       +                if(type == VtMaxType)
       +                        sysfatal("could not find block %V of any type", score);
       +        }
       +
       +        walk(score, type, VtDirType);
       +        if(changes)
       +                print("%s:%V (%d pointers rewritten)\n", prefix, score, changes);
       +
       +        if(vtsync(zdst) < 0)
       +                sysfatal("could not sync dst server: %r");
       +
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/devnull.c b/src/cmd/venti/devnull.c
       t@@ -0,0 +1,80 @@
       +/* Copyright (c) 2004 Russ Cox */
       +#include <u.h>
       +#include <libc.h>
       +#include <venti.h>
       +#include <thread.h>
       +#include <libsec.h>
       +
       +#ifndef _UNISTD_H_
       +#pragma varargck type "F" VtFcall*
       +#pragma varargck type "T" void
       +#endif
       +
       +int verbose;
       +
       +enum
       +{
       +        STACK = 8192,
       +};
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: venti/devnull [-v] [-a address]\n");
       +        threadexitsall("usage");
       +}
       +
       +void
       +threadmain(int argc, char **argv)
       +{
       +        VtReq *r;
       +        VtSrv *srv;
       +        char *address;
       +        Packet *p;
       +
       +        fmtinstall('V', vtscorefmt);
       +        fmtinstall('F', vtfcallfmt);
       +        
       +        address = "tcp!*!venti";
       +
       +        ARGBEGIN{
       +        case 'v':
       +                verbose++;
       +                break;
       +        case 'a':
       +                address = EARGF(usage());
       +                break;
       +        default:
       +                usage();
       +        }ARGEND
       +
       +        srv = vtlisten(address);
       +        if(srv == nil)
       +                sysfatal("vtlisten %s: %r", argv[1]);
       +
       +        while((r = vtgetreq(srv)) != nil){
       +                r->rx.msgtype = r->tx.msgtype+1;
       +                if(verbose)
       +                        fprint(2, "<- %F\n", &r->tx);
       +                switch(r->tx.msgtype){
       +                case VtTping:
       +                        break;
       +                case VtTgoodbye:
       +                        break;
       +                case VtTread:
       +                        r->rx.error = vtstrdup("no such block");
       +                        r->rx.msgtype = VtRerror;
       +                        break;
       +                case VtTwrite:
       +                        packetsha1(r->tx.data, r->rx.score);
       +                        break;
       +                case VtTsync:
       +                        break;
       +                }
       +                if(verbose)
       +                        fprint(2, "-> %F\n", &r->rx);
       +                vtrespond(r);
       +        }
       +        threadexitsall(nil);
       +}
       +
   DIR diff --git a/src/cmd/venti/mkfile b/src/cmd/venti/mkfile
       t@@ -1,3 +1,13 @@
       -%:VQ:
       -        echo venti will return once it is debugged.
       +<$PLAN9/src/mkhdr
       +
       +DIRS=srv
       +
       +TARG=\
       +        copy\
       +        read\
       +        sync\
       +        write\
       +
       +<$PLAN9/src/mkmany
       +<$PLAN9/src/mkdirs
        
   DIR diff --git a/src/cmd/venti/mkroot.c b/src/cmd/venti/mkroot.c
       t@@ -0,0 +1,59 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +char *host;
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: mkroot [-h host] name type score blocksize prev\n");
       +        threadexitsall("usage");
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        uchar score[VtScoreSize];
       +        uchar buf[VtRootSize];
       +        VtConn *z;
       +        VtRoot root;
       +
       +        ARGBEGIN{
       +        case 'h':
       +                host = EARGF(usage());
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc != 5)
       +                usage();
       +
       +        ventifmtinstall();
       +
       +        strecpy(root.name, root.name+sizeof root.name, argv[0]);
       +        strecpy(root.type, root.type+sizeof root.type, argv[1]);
       +        if(vtparsescore(argv[2], strlen(argv[2]), nil, root.score) < 0)
       +                sysfatal("bad score '%s'", argv[2]);
       +        root.blocksize = atoi(argv[3]);
       +        if(vtparsescore(argv[4], strlen(argv[4]), nil, root.prev) < 0)
       +                sysfatal("bad score '%s'", argv[4]);
       +        vtrootpack(&root, buf);
       +
       +        z = vtdial(host);
       +        if(z == nil)
       +                sysfatal("could not connect to server: %r");
       +
       +        if(vtconnect(z) < 0)
       +                sysfatal("vtconnect: %r");
       +
       +        if(vtwrite(z, score, VtRootType, buf, VtRootSize) < 0)
       +                sysfatal("vtwrite: %r");
       +        if(vtsync(z) < 0)
       +                sysfatal("vtsync: %r");
       +        vthangup(z);
       +        print("%V\n", score);
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/randtest.c b/src/cmd/venti/randtest.c
       t@@ -0,0 +1,334 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <venti.h>
       +#include <libsec.h>
       +#include <thread.h>
       +
       +
       +enum { STACK = 32768 };
       +void xxxsrand(long);
       +long xxxlrand(void);
       +
       +Channel *cw;
       +Channel *cr;
       +char *host;
       +int blocksize, seed, randpct;
       +int doread, dowrite, packets, permute;
       +vlong totalbytes, cur;
       +VtConn *z;
       +int multi;
       +int maxpackets;
       +int sequence;
       +int doublecheck = 1;
       +uint *order;
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: randtest [-q] [-h host] [-s seed] [-b blocksize] [-p randpct] [-n totalbytes] [-M maxblocks] [-P] [-r] [-w]\n");
       +        threadexitsall("usage");
       +}
       +
       +void
       +wr(char *buf, char *buf2)
       +{
       +        uchar score[VtScoreSize], score2[VtScoreSize];
       +        DigestState ds;
       +
       +        memset(&ds, 0, sizeof ds);
       +        if(doublecheck)
       +                sha1((uchar*)buf, blocksize, score, &ds);
       +        if(vtwrite(z, score2, VtDataType, (uchar*)buf, blocksize) < 0)
       +                sysfatal("vtwrite %V at %,lld: %r", score, cur);
       +        if(doublecheck && memcmp(score, score2, VtScoreSize) != 0)
       +                sysfatal("score mismatch! %V %V", score, score2);
       +}
       +
       +void
       +wrthread(void *v)
       +{
       +        char *p;
       +
       +        USED(v);
       +        while((p = recvp(cw)) != nil){
       +                wr(p, nil);
       +                free(p);
       +        }
       +}
       +
       +void
       +rd(char *buf, char *buf2)
       +{
       +        uchar score[VtScoreSize];
       +        DigestState ds;
       +
       +        memset(&ds, 0, sizeof ds);
       +        sha1((uchar*)buf, blocksize, score, &ds);
       +        if(vtread(z, score, VtDataType, (uchar*)buf2, blocksize) < 0)
       +                sysfatal("vtread %V at %,lld: %r", score, cur);
       +        if(memcmp(buf, buf2, blocksize) != 0)
       +                sysfatal("bad data read! %V", score);
       +}
       +
       +void
       +rdthread(void *v)
       +{
       +        char *p, *buf2;
       +
       +        buf2 = vtmalloc(blocksize);
       +        USED(v);
       +        while((p = recvp(cr)) != nil){
       +                rd(p, buf2);
       +                free(p);
       +        }
       +}
       +
       +char *template;
       +
       +void
       +run(void (*fn)(char*, char*), Channel *c)
       +{
       +        int i, t, j, packets;
       +        char *buf2, *buf;
       +
       +        buf2 = vtmalloc(blocksize);
       +        buf = vtmalloc(blocksize);
       +        cur = 0;
       +        packets = totalbytes/blocksize;
       +        if(maxpackets == 0)
       +                maxpackets = packets;
       +        order = vtmalloc(packets*sizeof order[0]);
       +        for(i=0; i<packets; i++)
       +                order[i] = i;
       +        if(permute){
       +                for(i=1; i<packets; i++){
       +                        j = nrand(i+1);
       +                        t = order[i];
       +                        order[i] = order[j];
       +                        order[j] = t;
       +                }
       +        }
       +        for(i=0; i<packets && i<maxpackets; i++){
       +                memmove(buf, template, blocksize);
       +                *(uint*)buf = order[i];
       +                if(c){
       +                        sendp(c, buf);
       +                        buf = vtmalloc(blocksize);
       +                }else
       +                        (*fn)(buf, buf2);
       +                cur += blocksize;
       +        }
       +        free(order);
       +}
       +
       +#define TWID64        ((u64int)~(u64int)0)
       +
       +u64int
       +unittoull(char *s)
       +{
       +        char *es;
       +        u64int n;
       +
       +        if(s == nil)
       +                return TWID64;
       +        n = strtoul(s, &es, 0);
       +        if(*es == 'k' || *es == 'K'){
       +                n *= 1024;
       +                es++;
       +        }else if(*es == 'm' || *es == 'M'){
       +                n *= 1024*1024;
       +                es++;
       +        }else if(*es == 'g' || *es == 'G'){
       +                n *= 1024*1024*1024;
       +                es++;
       +        }else if(*es == 't' || *es == 'T'){
       +                n *= 1024*1024;
       +                n *= 1024*1024;
       +        }
       +        if(*es != '\0')
       +                return TWID64;
       +        return n;
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        int i, max;
       +        vlong t0;
       +        double t;
       +
       +        blocksize = 8192;
       +        seed = 0;
       +        randpct = 50;
       +        host = nil;
       +        doread = 0;
       +        dowrite = 0;
       +        totalbytes = 1*1024*1024*1024;
       +        fmtinstall('V', vtscorefmt);
       +        fmtinstall('F', vtfcallfmt);
       +
       +        ARGBEGIN{
       +        case 'b':
       +                blocksize = unittoull(EARGF(usage()));
       +                break;
       +        case 'h':
       +                host = EARGF(usage());
       +                break;
       +        case 'M':
       +                maxpackets = unittoull(EARGF(usage()));
       +                break;
       +        case 'm':
       +                multi = atoi(EARGF(usage()));
       +                break;
       +        case 'n':
       +                totalbytes = unittoull(EARGF(usage()));
       +                break;
       +        case 'p':
       +                randpct = atoi(EARGF(usage()));
       +                break;
       +        case 'P':
       +                permute = 1;
       +                break;
       +        case 'S':
       +                doublecheck = 0;
       +                ventidoublechecksha1 = 0;
       +                break;
       +        case 's':
       +                seed = atoi(EARGF(usage()));
       +                break;
       +        case 'r':
       +                doread = 1;
       +                break;
       +        case 'w':
       +                dowrite = 1;
       +                break;
       +        case 'V':
       +                chattyventi++;
       +                break;
       +        default:
       +                usage();
       +        }ARGEND
       +
       +        if(doread==0 && dowrite==0){
       +                doread = 1;
       +                dowrite = 1;
       +        }
       +
       +        z = vtdial(host);
       +        if(z == nil)
       +                sysfatal("could not connect to server: %r");
       +        if(vtconnect(z) < 0)
       +                sysfatal("vtconnect: %r");
       +
       +        if(multi){
       +                cr = chancreate(sizeof(void*), 0);
       +                cw = chancreate(sizeof(void*), 0);
       +                for(i=0; i<multi; i++){
       +                        proccreate(wrthread, nil, STACK);
       +                        proccreate(rdthread, nil, STACK);
       +                }
       +        }
       +
       +        template = vtmalloc(blocksize);
       +        xxxsrand(seed);
       +        max = (256*randpct)/100;
       +        if(max == 0)
       +                max = 1;
       +        for(i=0; i<blocksize; i++)
       +                template[i] = xxxlrand()%max;
       +        if(dowrite){
       +                t0 = nsec();
       +                run(wr, cw);
       +                for(i=0; i<multi; i++)
       +                        sendp(cw, nil);
       +                t = (nsec() - t0)/1.e9;
       +                print("write: %lld bytes / %.3f seconds = %.6f MB/s\n",
       +                        totalbytes, t, (double)totalbytes/1e6/t);
       +        }
       +        if(doread){
       +                t0 = nsec();
       +                run(rd, cr);
       +                for(i=0; i<multi; i++)
       +                        sendp(cr, nil);
       +                t = (nsec() - t0)/1.e9;
       +                print("read: %lld bytes / %.3f seconds = %.6f MB/s\n",
       +                        totalbytes, t, (double)totalbytes/1e6/t);
       +        }
       +        threadexitsall(nil);
       +}
       +
       +
       +/*
       + *        algorithm by
       + *        D. P. Mitchell & J. A. Reeds
       + */
       +
       +#define        LEN        607
       +#define        TAP        273
       +#define        MASK        0x7fffffffL
       +#define        A        48271
       +#define        M        2147483647
       +#define        Q        44488
       +#define        R        3399
       +#define        NORM        (1.0/(1.0+MASK))
       +
       +static        ulong        rng_vec[LEN];
       +static        ulong*        rng_tap = rng_vec;
       +static        ulong*        rng_feed = 0;
       +
       +static void
       +isrand(long seed)
       +{
       +        long lo, hi, x;
       +        int i;
       +
       +        rng_tap = rng_vec;
       +        rng_feed = rng_vec+LEN-TAP;
       +        seed = seed%M;
       +        if(seed < 0)
       +                seed += M;
       +        if(seed == 0)
       +                seed = 89482311;
       +        x = seed;
       +        /*
       +         *        Initialize by x[n+1] = 48271 * x[n] mod (2**31 - 1)
       +         */
       +        for(i = -20; i < LEN; i++) {
       +                hi = x / Q;
       +                lo = x % Q;
       +                x = A*lo - R*hi;
       +                if(x < 0)
       +                        x += M;
       +                if(i >= 0)
       +                        rng_vec[i] = x;
       +        }
       +}
       +
       +void
       +xxxsrand(long seed)
       +{
       +        isrand(seed);
       +}
       +
       +long
       +xxxlrand(void)
       +{
       +        ulong x;
       +
       +        rng_tap--;
       +        if(rng_tap < rng_vec) {
       +                if(rng_feed == 0) {
       +                        isrand(1);
       +                        rng_tap--;
       +                }
       +                rng_tap += LEN;
       +        }
       +        rng_feed--;
       +        if(rng_feed < rng_vec)
       +                rng_feed += LEN;
       +        x = (*rng_feed + *rng_tap) & MASK;
       +        *rng_feed = x;
       +
       +        return x;
       +}
       +
   DIR diff --git a/src/cmd/venti/read.c b/src/cmd/venti/read.c
       t@@ -0,0 +1,75 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <venti.h>
       +#include <libsec.h>
       +#include <thread.h>
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: read [-h host] [-t type] score\n");
       +        threadexitsall("usage");
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        int type, n;
       +        uchar score[VtScoreSize];
       +        uchar *buf;
       +        VtConn *z;
       +        char *host;
       +
       +        fmtinstall('F', vtfcallfmt);
       +        fmtinstall('V', vtscorefmt);
       +
       +        host = nil;
       +        type = -1;
       +        ARGBEGIN{
       +        case 'h':
       +                host = EARGF(usage());
       +                break;
       +        case 't':
       +                type = atoi(argv[1]);
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc != 1)
       +                usage();
       +
       +        if(vtparsescore(argv[0], nil, score) < 0)
       +                sysfatal("could not parse score '%s': %r", argv[0]);
       +
       +        buf = vtmallocz(VtMaxLumpSize);
       +
       +        z = vtdial(host);
       +        if(z == nil)
       +                sysfatal("could not connect to server: %r");
       +
       +        if(vtconnect(z) < 0)
       +                sysfatal("vtconnect: %r");
       +
       +        if(type == -1){
       +                n = -1;
       +                for(type=0; type<VtMaxType; type++){
       +                        n = vtread(z, score, type, buf, VtMaxLumpSize);
       +                        if(n >= 0){
       +                                fprint(2, "venti/read%s%s %V %d\n", host ? " -h" : "", host ? host : "",
       +                                        score, type);
       +                                break;
       +                        }
       +                }
       +        }else{
       +                type = atoi(argv[1]);
       +                n = vtread(z, score, type, buf, VtMaxLumpSize);
       +        }
       +        vthangup(z);
       +        if(n < 0)
       +                sysfatal("could not read block: %r");
       +        if(write(1, buf, n) != n)
       +                sysfatal("write: %r");
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/readlist.c b/src/cmd/venti/readlist.c
       t@@ -0,0 +1,112 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <thread.h>
       +#include <venti.h>
       +#include <bio.h>
       +
       +char *host;
       +Biobuf b;
       +VtConn *z;
       +uchar *buf;
       +void run(Biobuf*);
       +int nn;
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: readlist [-h host] list\n");
       +        threadexitsall("usage");
       +}
       +
       +int
       +parsescore(uchar *score, char *buf, int n)
       +{
       +        int i, c;
       +
       +        memset(score, 0, VtScoreSize);
       +
       +        if(n != VtScoreSize*2){
       +                werrstr("score wrong length %d", n);
       +                return -1;
       +        }
       +        for(i=0; i<VtScoreSize*2; i++) {
       +                if(buf[i] >= '0' && buf[i] <= '9')
       +                        c = buf[i] - '0';
       +                else if(buf[i] >= 'a' && buf[i] <= 'f')
       +                        c = buf[i] - 'a' + 10;
       +                else if(buf[i] >= 'A' && buf[i] <= 'F')
       +                        c = buf[i] - 'A' + 10;
       +                else {
       +                        c = buf[i];
       +                        werrstr("bad score char %d '%c'", c, c);
       +                        return -1;
       +                }
       +
       +                if((i & 1) == 0)
       +                        c <<= 4;
       +        
       +                score[i>>1] |= c;
       +        }
       +        return 0;
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        int fd, i;
       +
       +        ARGBEGIN{
       +        case 'h':
       +                host = EARGF(usage());
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        fmtinstall('V', vtscorefmt);
       +        buf = vtmallocz(VtMaxLumpSize);
       +        z = vtdial(host);
       +        if(z == nil)
       +                sysfatal("could not connect to server: %r");
       +        if(vtconnect(z) < 0)
       +                sysfatal("vtconnect: %r");
       +
       +        if(argc == 0){
       +                Binit(&b, 0, OREAD);
       +                run(&b);
       +        }else{
       +                for(i=0; i<argc; i++){
       +                        if((fd = open(argv[i], OREAD)) < 0)
       +                                sysfatal("open %s: %r", argv[i]);
       +                        Binit(&b, fd, OREAD);
       +                        run(&b);
       +                }
       +        }
       +        threadexitsall(nil);
       +}
       +
       +void
       +run(Biobuf *b)
       +{
       +        char *p, *f[10];
       +        int nf;
       +        uchar score[20];
       +        int type, n;
       +
       +        while((p = Brdline(b, '\n')) != nil){
       +                p[Blinelen(b)-1] = 0;
       +                nf = tokenize(p, f, nelem(f));
       +                if(nf != 2)
       +                        sysfatal("syntax error in work list");
       +                if(parsescore(score, f[0], strlen(f[0])) < 0)
       +                        sysfatal("bad score %s in work list", f[0]);
       +                type = atoi(f[1]);
       +                n = vtread(z, score, type, buf, VtMaxLumpSize);
       +                if(n < 0)
       +                        sysfatal("could not read %s %s: %r", f[0], f[1]);
       +                // write(1, buf, n);
       +                if(++nn%1000 == 0)
       +                        print("%d...", nn);
       +        }
       +}
   DIR diff --git a/src/cmd/venti/ro.c b/src/cmd/venti/ro.c
       t@@ -0,0 +1,112 @@
       +/* Copyright (c) 2004 Russ Cox */
       +#include <u.h>
       +#include <libc.h>
       +#include <venti.h>
       +#include <thread.h>
       +#include <libsec.h>
       +
       +#ifndef _UNISTD_H_
       +#pragma varargck type "F" VtFcall*
       +#pragma varargck type "T" void
       +#endif
       +
       +VtConn *z;
       +int verbose;
       +
       +enum
       +{
       +        STACK = 8192,
       +};
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: venti/ro [-v] [-a address] [-h address]\n");
       +        threadexitsall("usage");
       +}
       +
       +void
       +readthread(void *v)
       +{
       +        char err[ERRMAX];
       +        VtReq *r;
       +        uchar *buf;
       +        int n;
       +        
       +        r = v;
       +        buf = vtmalloc(r->tx.count);
       +        if((n=vtread(z, r->tx.score, r->tx.blocktype, buf, r->tx.count)) < 0){
       +                r->rx.msgtype = VtRerror;
       +                rerrstr(err, sizeof err);
       +                r->rx.error = vtstrdup(err);
       +                free(buf);
       +        }else{
       +                r->rx.data = packetforeign(buf, n, free, buf);
       +        }
       +        if(verbose)
       +                fprint(2, "-> %F\n", &r->rx);
       +        vtrespond(r);
       +}
       +
       +void
       +threadmain(int argc, char **argv)
       +{
       +        VtReq *r;
       +        VtSrv *srv;
       +        char *address, *ventiaddress;
       +
       +        fmtinstall('F', vtfcallfmt);
       +        fmtinstall('V', vtscorefmt);
       +        
       +        address = "tcp!*!venti";
       +        ventiaddress = nil;
       +        
       +        ARGBEGIN{
       +        case 'v':
       +                verbose++;
       +                break;
       +        case 'a':
       +                address = EARGF(usage());
       +                break;
       +        case 'h':
       +                ventiaddress = EARGF(usage());
       +                break;
       +        default:
       +                usage();
       +        }ARGEND
       +
       +        if((z = vtdial(ventiaddress)) == nil)
       +                sysfatal("vtdial %s: %r", ventiaddress);
       +        if(vtconnect(z) < 0)
       +                sysfatal("vtconnect: %r");
       +
       +        srv = vtlisten(address);
       +        if(srv == nil)
       +                sysfatal("vtlisten %s: %r", address);
       +
       +        while((r = vtgetreq(srv)) != nil){
       +                r->rx.msgtype = r->tx.msgtype+1;
       +                if(verbose)
       +                        fprint(2, "<- %F\n", &r->tx);
       +                switch(r->tx.msgtype){
       +                case VtTping:
       +                        break;
       +                case VtTgoodbye:
       +                        break;
       +                case VtTread:
       +                        threadcreate(readthread, r, 16384);
       +                        continue;
       +                case VtTwrite:
       +                        r->rx.error = vtstrdup("read-only server");
       +                        r->rx.msgtype = VtRerror;
       +                        break;
       +                case VtTsync:
       +                        break;
       +                }
       +                if(verbose)
       +                        fprint(2, "-> %F\n", &r->rx);
       +                vtrespond(r);
       +        }
       +        threadexitsall(nil);
       +}
       +
   DIR diff --git a/src/cmd/venti/srv/arena.c b/src/cmd/venti/srv/arena.c
       t@@ -0,0 +1,737 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +typedef struct ASum ASum;
       +
       +struct ASum
       +{
       +        Arena        *arena;
       +        ASum        *next;
       +};
       +
       +static void        sealarena(Arena *arena);
       +static int        okarena(Arena *arena);
       +static int        loadarena(Arena *arena);
       +static CIBlock        *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
       +static void        putcib(Arena *arena, CIBlock *cib);
       +static void        sumproc(void *);
       +
       +static QLock        sumlock;
       +static Rendez        sumwait;
       +static ASum        *sumq;
       +static uchar zero[8192];
       +
       +int        arenasumsleeptime;
       +
       +int
       +initarenasum(void)
       +{
       +        sumwait.l = &sumlock;
       +
       +        if(vtproc(sumproc, nil) < 0){
       +                seterr(EOk, "can't start arena checksum slave: %r");
       +                return -1;
       +        }
       +        return 0;
       +}
       +
       +/*
       + * make an Arena, and initialize it based upon the disk header and trailer.
       + */
       +Arena*
       +initarena(Part *part, u64int base, u64int size, u32int blocksize)
       +{
       +        Arena *arena;
       +
       +        arena = MKZ(Arena);
       +        arena->part = part;
       +        arena->blocksize = blocksize;
       +        arena->clumpmax = arena->blocksize / ClumpInfoSize;
       +        arena->base = base + blocksize;
       +        arena->size = size - 2 * blocksize;
       +
       +        if(loadarena(arena) < 0){
       +                seterr(ECorrupt, "arena header or trailer corrupted");
       +                freearena(arena);
       +                return nil;
       +        }
       +        if(okarena(arena) < 0){
       +                freearena(arena);
       +                return nil;
       +        }
       +
       +        if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
       +                backsumarena(arena);
       +
       +        return arena;
       +}
       +
       +void
       +freearena(Arena *arena)
       +{
       +        if(arena == nil)
       +                return;
       +        free(arena);
       +}
       +
       +Arena*
       +newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
       +{
       +        int bsize;
       +        Arena *arena;
       +
       +        if(nameok(name) < 0){
       +                seterr(EOk, "illegal arena name", name);
       +                return nil;
       +        }
       +        arena = MKZ(Arena);
       +        arena->part = part;
       +        arena->version = vers;
       +        if(vers == ArenaVersion4)
       +                arena->clumpmagic = _ClumpMagic;
       +        else{
       +                do
       +                        arena->clumpmagic = fastrand();
       +                while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
       +        }
       +        arena->blocksize = blocksize;
       +        arena->clumpmax = arena->blocksize / ClumpInfoSize;
       +        arena->base = base + blocksize;
       +        arena->size = size - 2 * blocksize;
       +
       +        namecp(arena->name, name);
       +
       +        bsize = sizeof zero;
       +        if(bsize > arena->blocksize)
       +                bsize = arena->blocksize;
       +
       +        if(wbarena(arena)<0 || wbarenahead(arena)<0
       +        || writepart(arena->part, arena->base, zero, bsize)<0){
       +                freearena(arena);
       +                return nil;
       +        }
       +
       +        return arena;
       +}
       +
       +int
       +readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
       +{
       +        CIBlock *cib, r;
       +
       +        cib = getcib(arena, clump, 0, &r);
       +        if(cib == nil)
       +                return -1;
       +        unpackclumpinfo(ci, &cib->data->data[cib->offset]);
       +        putcib(arena, cib);
       +        return 0;
       +}
       +
       +int
       +readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
       +{
       +        CIBlock *cib, r;
       +        int i;
       +
       +        for(i = 0; i < n; i++){
       +                cib = getcib(arena, clump + i, 0, &r);
       +                if(cib == nil)
       +                        break;
       +                unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
       +                putcib(arena, cib);
       +        }
       +        return i;
       +}
       +
       +/*
       + * write directory information for one clump
       + * must be called the arena locked
       + */
       +int
       +writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
       +{
       +        CIBlock *cib, r;
       +
       +        cib = getcib(arena, clump, 1, &r);
       +        if(cib == nil)
       +                return -1;
       +        dirtydblock(cib->data, DirtyArenaCib);
       +        packclumpinfo(ci, &cib->data->data[cib->offset]);
       +        putcib(arena, cib);
       +        return 0;
       +}
       +
       +u64int
       +arenadirsize(Arena *arena, u32int clumps)
       +{
       +        return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
       +}
       +
       +/*
       + * read a clump of data
       + * n is a hint of the size of the data, not including the header
       + * make sure it won't run off the end, then return the number of bytes actually read
       + */
       +u32int
       +readarena(Arena *arena, u64int aa, u8int *buf, long n)
       +{
       +        DBlock *b;
       +        u64int a;
       +        u32int blocksize, off, m;
       +        long nn;
       +
       +        if(n == 0)
       +                return -1;
       +
       +        qlock(&arena->lock);
       +        a = arena->size - arenadirsize(arena, arena->memstats.clumps);
       +        qunlock(&arena->lock);
       +        if(aa >= a){
       +                seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
       +                return -1;
       +        }
       +        if(aa + n > a)
       +                n = a - aa;
       +
       +        blocksize = arena->blocksize;
       +        a = arena->base + aa;
       +        off = a & (blocksize - 1);
       +        a -= off;
       +        nn = 0;
       +        for(;;){
       +                b = getdblock(arena->part, a, OREAD);
       +                if(b == nil)
       +                        return -1;
       +                m = blocksize - off;
       +                if(m > n - nn)
       +                        m = n - nn;
       +                memmove(&buf[nn], &b->data[off], m);
       +                putdblock(b);
       +                nn += m;
       +                if(nn == n)
       +                        break;
       +                off = 0;
       +                a += blocksize;
       +        }
       +        return n;
       +}
       +
       +/*
       + * write some data to the clump section at a given offset
       + * used to fix up corrupted arenas.
       + */
       +u32int
       +writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
       +{
       +        DBlock *b;
       +        u64int a;
       +        u32int blocksize, off, m;
       +        long nn;
       +        int ok;
       +
       +        if(n == 0)
       +                return -1;
       +
       +        qlock(&arena->lock);
       +        a = arena->size - arenadirsize(arena, arena->memstats.clumps);
       +        if(aa >= a || aa + n > a){
       +                qunlock(&arena->lock);
       +                seterr(EOk, "writing beyond arena clump storage");
       +                return -1;
       +        }
       +
       +        blocksize = arena->blocksize;
       +        a = arena->base + aa;
       +        off = a & (blocksize - 1);
       +        a -= off;
       +        nn = 0;
       +        for(;;){
       +                b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
       +                if(b == nil){
       +                        qunlock(&arena->lock);
       +                        return -1;
       +                }
       +                dirtydblock(b, DirtyArena);
       +                m = blocksize - off;
       +                if(m > n - nn)
       +                        m = n - nn;
       +                memmove(&b->data[off], &clbuf[nn], m);
       +                // ok = writepart(arena->part, a, b->data, blocksize);
       +                ok = 0;
       +                putdblock(b);
       +                if(ok < 0){
       +                        qunlock(&arena->lock);
       +                        return -1;
       +                }
       +                nn += m;
       +                if(nn == n)
       +                        break;
       +                off = 0;
       +                a += blocksize;
       +        }
       +        qunlock(&arena->lock);
       +        return n;
       +}
       +
       +/*
       + * allocate space for the clump and write it,
       + * updating the arena directory
       +ZZZ question: should this distinguish between an arena
       +filling up and real errors writing the clump?
       + */
       +u64int
       +writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64int start, u64int *pa)
       +{
       +        DBlock *b;
       +        u64int a, aa;
       +        u32int clump, n, nn, m, off, blocksize;
       +        int ok;
       +        AState as;
       +
       +        n = c->info.size + ClumpSize + U32Size;
       +        qlock(&arena->lock);
       +        aa = arena->memstats.used;
       +        if(arena->memstats.sealed
       +        || aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
       +                if(!arena->memstats.sealed){
       +                        trace(0, "seal memstats %s", arena->name);
       +                        arena->memstats.sealed = 1;
       +                        as.arena = arena;
       +                        as.aa = start+aa;
       +                        as.stats = arena->memstats;
       +                        setdcachestate(&as);
       +                }
       +                qunlock(&arena->lock);
       +                return TWID64;
       +        }
       +        if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
       +                qunlock(&arena->lock);
       +                return TWID64;
       +        }
       +
       +        /*
       +         * write the data out one block at a time
       +         */
       +        blocksize = arena->blocksize;
       +        a = arena->base + aa;
       +        off = a & (blocksize - 1);
       +        a -= off;
       +        nn = 0;
       +        for(;;){
       +                b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
       +                if(b == nil){
       +                        qunlock(&arena->lock);
       +                        return TWID64;
       +                }
       +                dirtydblock(b, DirtyArena);
       +                m = blocksize - off;
       +                if(m > n - nn)
       +                        m = n - nn;
       +                memmove(&b->data[off], &clbuf[nn], m);
       +        //        ok = writepart(arena->part, a, b->data, blocksize);
       +                ok = 0;
       +                putdblock(b);
       +                if(ok < 0){
       +                        qunlock(&arena->lock);
       +                        return TWID64;
       +                }
       +                nn += m;
       +                if(nn == n)
       +                        break;
       +                off = 0;
       +                a += blocksize;
       +        }
       +
       +        arena->memstats.used += c->info.size + ClumpSize;
       +        arena->memstats.uncsize += c->info.uncsize;
       +        if(c->info.size < c->info.uncsize)
       +                arena->memstats.cclumps++;
       +
       +        clump = arena->memstats.clumps++;
       +        if(arena->memstats.clumps == 0)
       +                sysfatal("clumps wrapped");
       +        arena->wtime = now();
       +        if(arena->ctime == 0)
       +                arena->ctime = arena->wtime;
       +
       +        writeclumpinfo(arena, clump, &c->info);
       +
       +        /* set up for call to setdcachestate */
       +        as.arena = arena;
       +        as.aa = start+arena->memstats.used;
       +        as.stats = arena->memstats;
       +
       +        /* update this before calling setdcachestate so it cannot be behind dcache.diskstate */
       +        *pa = start+aa;
       +        setdcachestate(&as);
       +        qunlock(&arena->lock);
       +
       +        return aa;
       +}
       +
       +int
       +atailcmp(ATailStats *a, ATailStats *b)
       +{
       +        /* good test */
       +        if(a->used < b->used)
       +                return -1;
       +        if(a->used > b->used)
       +                return 1;
       +                
       +        /* suspect tests - why order this way? (no one cares) */
       +        if(a->clumps < b->clumps)
       +                return -1;
       +        if(a->clumps > b->clumps)
       +                return 1;
       +        if(a->cclumps < b->cclumps)
       +                return -1;
       +        if(a->cclumps > b->cclumps)
       +                return 1;
       +        if(a->uncsize < b->uncsize)
       +                return -1;
       +        if(a->uncsize > b->uncsize)
       +                return 1;
       +        if(a->sealed < b->sealed)
       +                return -1;
       +        if(a->sealed > b->sealed)
       +                return 1;
       +                
       +        /* everything matches */
       +        return 0;
       +}
       +
       +void
       +setatailstate(AState *as)
       +{
       +        int i, j, osealed;
       +        Arena *a;
       +        Index *ix;
       +
       +        trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
       +
       +        ix = mainindex;
       +        for(i=0; i<ix->narenas; i++)
       +                if(ix->arenas[i] == as->arena)
       +                        break;
       +        if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
       +                fprint(2, "funny settailstate 0x%llux\n", as->aa);
       +                return;
       +        }
       +
       +        for(j=i; --j>=0; ){
       +                a = ix->arenas[j];
       +                if(atailcmp(&a->diskstats, &a->memstats) == 0)
       +                        break;
       +        }
       +        for(j++; j<=i; j++){
       +                a = ix->arenas[j];
       +                qlock(&a->lock);
       +                osealed = a->diskstats.sealed;
       +                if(j == i)
       +                        a->diskstats = as->stats;
       +                else
       +                        a->diskstats = a->memstats;
       +                wbarena(a);
       +                if(a->diskstats.sealed != osealed && !a->inqueue)
       +                        sealarena(a);
       +                qunlock(&a->lock);
       +        }
       +}
       +
       +/*
       + * once sealed, an arena never has any data added to it.
       + * it should only be changed to fix errors.
       + * this also syncs the clump directory.
       + */
       +static void
       +sealarena(Arena *arena)
       +{
       +        arena->inqueue = 1;
       +        backsumarena(arena);
       +}
       +
       +void
       +backsumarena(Arena *arena)
       +{
       +        ASum *as;
       +
       +        if(sumwait.l == nil)
       +                return;
       +
       +        as = MK(ASum);
       +        if(as == nil)
       +                return;
       +        qlock(&sumlock);
       +        as->arena = arena;
       +        as->next = sumq;
       +        sumq = as;
       +        rwakeup(&sumwait);
       +        qunlock(&sumlock);
       +}
       +
       +static void
       +sumproc(void *unused)
       +{
       +        ASum *as;
       +        Arena *arena;
       +
       +        USED(unused);
       +
       +        for(;;){
       +                qlock(&sumlock);
       +                while(sumq == nil)
       +                        rsleep(&sumwait);
       +                as = sumq;
       +                sumq = as->next;
       +                qunlock(&sumlock);
       +                arena = as->arena;
       +                free(as);
       +
       +                sumarena(arena);
       +        }
       +}
       +
       +void
       +sumarena(Arena *arena)
       +{
       +        ZBlock *b;
       +        DigestState s;
       +        u64int a, e;
       +        u32int bs;
       +        u8int score[VtScoreSize];
       +
       +        bs = MaxIoSize;
       +        if(bs < arena->blocksize)
       +                bs = arena->blocksize;
       +
       +        /*
       +         * read & sum all blocks except the last one
       +         */
       +        memset(&s, 0, sizeof s);
       +        b = alloczblock(bs, 0, arena->part->blocksize);
       +        e = arena->base + arena->size;
       +        for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
       +                sleep(arenasumsleeptime);
       +                if(a + bs > e)
       +                        bs = arena->blocksize;
       +                if(readpart(arena->part, a, b->data, bs) < 0)
       +                        goto ReadErr;
       +                addstat(StatSumRead, 1);
       +                addstat(StatSumReadBytes, bs);
       +                sha1(b->data, bs, nil, &s);
       +        }
       +
       +        /*
       +         * the last one is special, since it may already have the checksum included
       +         */
       +        bs = arena->blocksize;
       +        if(readpart(arena->part, e, b->data, bs) < 0){
       +ReadErr:
       +                logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
       +                freezblock(b);
       +                return;
       +        }
       +        addstat(StatSumRead, 1);
       +        addstat(StatSumReadBytes, bs);
       +
       +        sha1(b->data, bs-VtScoreSize, nil, &s);
       +        sha1(zeroscore, VtScoreSize, nil, &s);
       +        sha1(nil, 0, score, &s);
       +
       +        /*
       +         * check for no checksum or the same
       +         *
       +         * the writepart is okay because we flushed the dcache in sealarena
       +         */
       +        if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0){
       +                if(scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
       +                        logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
       +                                arena->name, &b->data[bs - VtScoreSize], score);
       +                scorecp(&b->data[bs - VtScoreSize], score);
       +                if(writepart(arena->part, e, b->data, bs) < 0)
       +                        logerr(EOk, "sumarena can't write sum for %s: %r", arena->name);
       +        }
       +        freezblock(b);
       +
       +        qlock(&arena->lock);
       +        scorecp(arena->score, score);
       +        qunlock(&arena->lock);
       +}
       +
       +/*
       + * write the arena trailer block to the partition
       + */
       +int
       +wbarena(Arena *arena)
       +{
       +        DBlock *b;
       +        int bad;
       +
       +        if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
       +                logerr(EAdmin, "can't write arena trailer: %r");
       +                return -1;
       +        }
       +        dirtydblock(b, DirtyArenaTrailer);
       +        bad = okarena(arena)<0 || packarena(arena, b->data)<0;
       +        putdblock(b);
       +        if(bad)
       +                return -1;
       +        return 0;
       +}
       +
       +int
       +wbarenahead(Arena *arena)
       +{
       +        ZBlock *b;
       +        ArenaHead head;
       +        int bad;
       +
       +        namecp(head.name, arena->name);
       +        head.version = arena->version;
       +        head.size = arena->size + 2 * arena->blocksize;
       +        head.blocksize = arena->blocksize;
       +        head.clumpmagic = arena->clumpmagic;
       +        b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
       +        if(b == nil){
       +                logerr(EAdmin, "can't write arena header: %r");
       +///ZZZ add error message?
       +                return -1;
       +        }
       +        /*
       +         * this writepart is okay because it only happens
       +         * during initialization.
       +         */
       +        bad = packarenahead(&head, b->data)<0 ||
       +              writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0;
       +        freezblock(b);
       +        if(bad)
       +                return -1;
       +        return 0;
       +}
       +
       +/*
       + * read the arena header and trailer blocks from disk
       + */
       +static int
       +loadarena(Arena *arena)
       +{
       +        ArenaHead head;
       +        ZBlock *b;
       +
       +        b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
       +        if(b == nil)
       +                return -1;
       +        if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
       +                freezblock(b);
       +                return -1;
       +        }
       +        if(unpackarena(arena, b->data) < 0){
       +                freezblock(b);
       +                return -1;
       +        }
       +        if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
       +                seterr(EAdmin, "unknown arena version %d", arena->version);
       +                freezblock(b);
       +                return -1;
       +        }
       +        scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
       +
       +        if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
       +                logerr(EAdmin, "can't read arena header: %r");
       +                freezblock(b);
       +                return 0;
       +        }
       +        if(unpackarenahead(&head, b->data) < 0)
       +                logerr(ECorrupt, "corrupted arena header: %r");
       +        else if(namecmp(arena->name, head.name)!=0
       +             || arena->clumpmagic != head.clumpmagic
       +             || arena->version != head.version
       +             || arena->blocksize != head.blocksize
       +             || arena->size + 2 * arena->blocksize != head.size){
       +                if(namecmp(arena->name, head.name)!=0)
       +                        logerr(ECorrupt, "arena tail name %s head %s", 
       +                                arena->name, head.name);
       +                else if(arena->clumpmagic != head.clumpmagic)
       +                        logerr(ECorrupt, "arena tail clumpmagic 0x%lux head 0x%lux",
       +                                (ulong)arena->clumpmagic, (ulong)head.clumpmagic);
       +                else if(arena->version != head.version)
       +                        logerr(ECorrupt, "arena tail version %d head version %d",
       +                                arena->version, head.version);
       +                else if(arena->blocksize != head.blocksize)
       +                        logerr(ECorrupt, "arena tail block size %d head %d",
       +                                arena->blocksize, head.blocksize);
       +                else if(arena->size+2*arena->blocksize != head.size)
       +                        logerr(ECorrupt, "arena tail size %lud head %lud",
       +                                (ulong)arena->size+2*arena->blocksize, head.size);
       +                else
       +                        logerr(ECorrupt, "arena header inconsistent with arena data");
       +        }
       +        freezblock(b);
       +
       +        return 0;
       +}
       +
       +static int
       +okarena(Arena *arena)
       +{
       +        u64int dsize;
       +        int ok;
       +
       +        ok = 0;
       +        dsize = arenadirsize(arena, arena->diskstats.clumps);
       +        if(arena->diskstats.used + dsize > arena->size){
       +                seterr(ECorrupt, "arena used > size");
       +                ok = -1;
       +        }
       +
       +        if(arena->diskstats.cclumps > arena->diskstats.clumps)
       +                logerr(ECorrupt, "arena has more compressed clumps than total clumps");
       +
       +        if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
       +                logerr(ECorrupt, "arena uncompressed size inconsistent with used space %lld %d %lld", arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
       +
       +        if(arena->ctime > arena->wtime)
       +                logerr(ECorrupt, "arena creation time after last write time");
       +
       +        return ok;
       +}
       +
       +static CIBlock*
       +getcib(Arena *arena, int clump, int writing, CIBlock *rock)
       +{
       +        int mode;
       +        CIBlock *cib;
       +        u32int block, off;
       +
       +        if(clump >= arena->memstats.clumps){
       +                seterr(EOk, "clump directory access out of range");
       +                return nil;
       +        }
       +        block = clump / arena->clumpmax;
       +        off = (clump - block * arena->clumpmax) * ClumpInfoSize;
       +        cib = rock;
       +        cib->block = block;
       +        cib->offset = off;
       +
       +        if(writing){
       +                if(off == 0 && clump == arena->memstats.clumps-1)
       +                        mode = OWRITE;
       +                else
       +                        mode = ORDWR;
       +        }else
       +                mode = OREAD;
       +
       +        cib->data = getdblock(arena->part,
       +                arena->base + arena->size - (block + 1) * arena->blocksize, mode);
       +        if(cib->data == nil)
       +                return nil;
       +        return cib;
       +}
       +
       +static void
       +putcib(Arena *arena, CIBlock *cib)
       +{
       +        putdblock(cib->data);
       +        cib->data = nil;
       +}
   DIR diff --git a/src/cmd/venti/srv/arenas.c b/src/cmd/venti/srv/arenas.c
       t@@ -0,0 +1,414 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +typedef struct AHash        AHash;
       +
       +/*
       + * hash table for finding arena's based on their names.
       + */
       +struct AHash
       +{
       +        AHash        *next;
       +        Arena        *arena;
       +};
       +
       +enum
       +{
       +        AHashSize        = 512
       +};
       +
       +static AHash        *ahash[AHashSize];
       +
       +static u32int
       +hashstr(char *s)
       +{
       +        u32int h;
       +        int c;
       +
       +        h = 0;
       +        for(; c = *s; s++){
       +                c ^= c << 6;
       +                h += (c << 11) ^ (c >> 1);
       +                c = *s;
       +                h ^= (c << 14) + (c << 7) + (c << 4) + c;
       +        }
       +        return h;
       +}
       +
       +int
       +addarena(Arena *arena)
       +{
       +        AHash *a;
       +        u32int h;
       +
       +        h = hashstr(arena->name) & (AHashSize - 1);
       +        a = MK(AHash);
       +        if(a == nil)
       +                return -1;
       +        a->arena = arena;
       +        a->next = ahash[h];
       +        ahash[h] = a;
       +        return 0;
       +}
       +
       +Arena*
       +findarena(char *name)
       +{
       +        AHash *a;
       +        u32int h;
       +
       +        h = hashstr(name) & (AHashSize - 1);
       +        for(a = ahash[h]; a != nil; a = a->next)
       +                if(strcmp(a->arena->name, name) == 0)
       +                        return a->arena;
       +        return nil;
       +}
       +
       +int
       +delarena(Arena *arena)
       +{
       +        AHash *a, *last;
       +        u32int h;
       +
       +        h = hashstr(arena->name) & (AHashSize - 1);
       +        last = nil;
       +        for(a = ahash[h]; a != nil; a = a->next){
       +                if(a->arena == arena){
       +                        if(last != nil)
       +                                last->next = a->next;
       +                        else
       +                                ahash[h] = a->next;
       +                        free(a);
       +                        return 0;
       +                }
       +                last = a;
       +        }
       +        return -1;
       +}
       +
       +ArenaPart*
       +initarenapart(Part *part)
       +{
       +        AMapN amn;
       +        ArenaPart *ap;
       +        ZBlock *b;
       +        u32int i;
       +        int ok;
       +
       +        b = alloczblock(HeadSize, 0, 0);
       +        if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
       +                seterr(EAdmin, "can't read arena partition header: %r");
       +                return nil;
       +        }
       +
       +        ap = MKZ(ArenaPart);
       +        if(ap == nil){
       +                freezblock(b);
       +                return nil;
       +        }
       +        ap->part = part;
       +        ok = unpackarenapart(ap, b->data);
       +        freezblock(b);
       +        if(ok < 0){
       +                freearenapart(ap, 0);
       +                return nil;
       +        }
       +
       +        ap->tabbase = (PartBlank + HeadSize + ap->blocksize - 1) & ~(ap->blocksize - 1);
       +        if(ap->version != ArenaPartVersion){
       +                seterr(ECorrupt, "unknown arena partition version %d", ap->version);
       +                freearenapart(ap, 0);
       +                return nil;
       +        }
       +        if(ap->blocksize & (ap->blocksize - 1)){
       +                seterr(ECorrupt, "illegal non-power-of-2 block size %d\n", ap->blocksize);
       +                freearenapart(ap, 0);
       +                return nil;
       +        }
       +        if(ap->tabbase >= ap->arenabase){
       +                seterr(ECorrupt, "arena partition table overlaps with arena storage");
       +                freearenapart(ap, 0);
       +                return nil;
       +        }
       +        ap->tabsize = ap->arenabase - ap->tabbase;
       +        partblocksize(part, ap->blocksize);
       +        ap->size = ap->part->size & ~(u64int)(ap->blocksize - 1);
       +
       +        if(readarenamap(&amn, part, ap->tabbase, ap->tabsize) < 0){
       +                freearenapart(ap, 0);
       +                return nil;
       +        }
       +        ap->narenas = amn.n;
       +        ap->map = amn.map;
       +        if(okamap(ap->map, ap->narenas, ap->arenabase, ap->size, "arena table") < 0){
       +                freearenapart(ap, 0);
       +                return nil;
       +        }
       +
       +        ap->arenas = MKNZ(Arena*, ap->narenas);
       +        for(i = 0; i < ap->narenas; i++){
       +                ap->arenas[i] = initarena(part, ap->map[i].start, ap->map[i].stop - ap->map[i].start, ap->blocksize);
       +                if(ap->arenas[i] == nil){
       +                        seterr(ECorrupt, "%s: %r", ap->map[i].name);
       +                        freearenapart(ap, 1);
       +                        return nil;
       +                }
       +                if(namecmp(ap->map[i].name, ap->arenas[i]->name) != 0){
       +                        seterr(ECorrupt, "arena name mismatches with expected name: %s vs. %s",
       +                                ap->map[i].name, ap->arenas[i]->name);
       +                        freearenapart(ap, 1);
       +                        return nil;
       +                }
       +                if(findarena(ap->arenas[i]->name)){
       +                        seterr(ECorrupt, "duplicate arena name %s in %s",
       +                                ap->map[i].name, ap->part->name);
       +                        freearenapart(ap, 1);
       +                        return nil;
       +                }
       +        }
       +
       +        for(i = 0; i < ap->narenas; i++)
       +                addarena(ap->arenas[i]);
       +
       +        return ap;
       +}
       +
       +ArenaPart*
       +newarenapart(Part *part, u32int blocksize, u32int tabsize)
       +{
       +        ArenaPart *ap;
       +
       +        if(blocksize & (blocksize - 1)){
       +                seterr(ECorrupt, "illegal non-power-of-2 block size %d\n", blocksize);
       +                return nil;
       +        }
       +        ap = MKZ(ArenaPart);
       +        if(ap == nil)
       +                return nil;
       +
       +        ap->version = ArenaPartVersion;
       +        ap->part = part;
       +        ap->blocksize = blocksize;
       +        partblocksize(part, blocksize);
       +        ap->size = part->size & ~(u64int)(blocksize - 1);
       +        ap->tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
       +        ap->arenabase = (ap->tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
       +        ap->tabsize = ap->arenabase - ap->tabbase;
       +        ap->narenas = 0;
       +
       +        if(wbarenapart(ap) < 0){
       +                freearenapart(ap, 0);
       +                return nil;
       +        }
       +
       +        return ap;
       +}
       +
       +int
       +wbarenapart(ArenaPart *ap)
       +{
       +        ZBlock *b;
       +
       +        if(okamap(ap->map, ap->narenas, ap->arenabase, ap->size, "arena table") < 0)
       +                return -1;
       +        b = alloczblock(HeadSize, 1, 0);
       +        if(b == nil)
       +//ZZZ set error message?
       +                return -1;
       +
       +        if(packarenapart(ap, b->data) < 0){
       +                seterr(ECorrupt, "can't make arena partition header: %r");
       +                freezblock(b);
       +                return -1;
       +        }
       +        if(writepart(ap->part, PartBlank, b->data, HeadSize) < 0){
       +                seterr(EAdmin, "can't write arena partition header: %r");
       +                freezblock(b);
       +                return -1;
       +        }
       +        freezblock(b);
       +
       +        return wbarenamap(ap->map, ap->narenas, ap->part, ap->tabbase, ap->tabsize);
       +}
       +
       +void
       +freearenapart(ArenaPart *ap, int freearenas)
       +{
       +        int i;
       +
       +        if(ap == nil)
       +                return;
       +        if(freearenas){
       +                for(i = 0; i < ap->narenas; i++){
       +                        if(ap->arenas[i] == nil)
       +                                continue;
       +                        delarena(ap->arenas[i]);
       +                        freearena(ap->arenas[i]);
       +                }
       +        }
       +        free(ap->map);
       +        free(ap->arenas);
       +        free(ap);
       +}
       +
       +int
       +okamap(AMap *am, int n, u64int start, u64int stop, char *what)
       +{
       +        u64int last;
       +        u32int i;
       +
       +        last = start;
       +        for(i = 0; i < n; i++){
       +                if(am[i].start < last){
       +                        if(i == 0)
       +                                seterr(ECorrupt, "invalid start address in %s", what);
       +                        else
       +                                seterr(ECorrupt, "overlapping ranges in %s", what);
       +                        return -1;
       +                }
       +                if(am[i].stop < am[i].start){
       +                        seterr(ECorrupt, "invalid range in %s", what);
       +                        return -1;
       +                }
       +                last = am[i].stop;
       +        }
       +        if(last > stop){
       +                seterr(ECorrupt, "invalid ending address in %s", what);
       +                return -1;
       +        }
       +        return 0;
       +}
       +
       +int
       +maparenas(AMap *am, Arena **arenas, int n, char *what)
       +{
       +        u32int i;
       +
       +        for(i = 0; i < n; i++){
       +                arenas[i] = findarena(am[i].name);
       +                if(arenas[i] == nil){
       +                        seterr(EAdmin, "can't find arena '%s' for '%s'\n", am[i].name, what);
       +                        return -1;
       +                }
       +        }
       +        return 0;
       +}
       +
       +int
       +readarenamap(AMapN *amn, Part *part, u64int base, u32int size)
       +{
       +        IFile f;
       +        u32int ok;
       +
       +        if(partifile(&f, part, base, size) < 0)
       +                return -1;
       +        ok = parseamap(&f, amn);
       +        freeifile(&f);
       +        return ok;
       +}
       +
       +int
       +wbarenamap(AMap *am, int n, Part *part, u64int base, u64int size)
       +{
       +        Fmt f;
       +        ZBlock *b;
       +
       +        b = alloczblock(size, 1, part->blocksize);
       +        if(b == nil)
       +                return -1;
       +
       +        fmtzbinit(&f, b);
       +
       +        if(outputamap(&f, am, n) < 0){
       +                seterr(ECorrupt, "arena set size too small");
       +                freezblock(b);
       +                return -1;
       +        }
       +        if(writepart(part, base, b->data, size) < 0){
       +                seterr(EAdmin, "can't write arena set: %r");
       +                freezblock(b);
       +                return -1;
       +        }
       +        freezblock(b);
       +        return 0;
       +}
       +
       +/*
       + * amap: n '\n' amapelem * n
       + * n: u32int
       + * amapelem: name '\t' astart '\t' asize '\n'
       + * astart, asize: u64int
       + */
       +int
       +parseamap(IFile *f, AMapN *amn)
       +{
       +        AMap *am;
       +        u64int v64;
       +        u32int v;
       +        char *s, *t, *flds[4];
       +        int i, n;
       +
       +        /*
       +         * arenas
       +         */
       +        if(ifileu32int(f, &v) < 0){
       +                seterr(ECorrupt, "syntax error: bad number of elements in %s", f->name);
       +                return -1;
       +        }
       +        n = v;
       +        if(n > MaxAMap){
       +                seterr(ECorrupt, "illegal number of elements in %s", f->name);
       +                return -1;
       +        }
       +        am = MKNZ(AMap, n);
       +        if(am == nil){
       +                fprint(2, "out of memory\n");
       +                return -1;
       +        }
       +        for(i = 0; i < n; i++){
       +                s = ifileline(f);
       +                if(s)
       +                        t = estrdup(s);
       +                else
       +                        t = nil;
       +                if(s == nil || getfields(s, flds, 4, 0, "\t") != 3){
       +                        fprint(2, "early eof after %d of %d, %s:#%d: %s\n", i, n, f->name, f->pos, t);
       +                        free(t);
       +                        return -1;
       +                }
       +                free(t);
       +                if(nameok(flds[0]) < 0)
       +                        return -1;
       +                namecp(am[i].name, flds[0]);
       +                if(stru64int(flds[1], &v64) < 0){
       +                        seterr(ECorrupt, "syntax error: bad arena base address in %s", f->name);
       +                        free(am);
       +                        return -1;
       +                }
       +                am[i].start = v64;
       +                if(stru64int(flds[2], &v64) < 0){
       +                        seterr(ECorrupt, "syntax error: bad arena size in %s", f->name);
       +                        free(am);
       +                        return -1;
       +                }
       +                am[i].stop = v64;
       +        }
       +
       +        amn->map = am;
       +        amn->n = n;
       +        return 0;
       +}
       +
       +int
       +outputamap(Fmt *f, AMap *am, int n)
       +{
       +        int i;
       +
       +        if(fmtprint(f, "%ud\n", n) < 0)
       +                return -1;
       +        for(i = 0; i < n; i++)
       +                if(fmtprint(f, "%s\t%llud\t%llud\n", am[i].name, am[i].start, am[i].stop) < 0)
       +                        return -1;
       +        return 0;
       +}
   DIR diff --git a/src/cmd/venti/srv/bloom.c b/src/cmd/venti/srv/bloom.c
       t@@ -0,0 +1,210 @@
       +/*
       + * Bloom filter tracking which scores are present in our arenas
       + * and (more importantly) which are not.  
       + */
       +
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +int
       +bloominit(Bloom *b, vlong vsize, u8int *data)
       +{
       +        ulong size;
       +        
       +        size = vsize;
       +        if(size != vsize){        /* truncation */
       +                werrstr("bloom data too big");
       +                return -1;
       +        }
       +        
       +        b->size = size;
       +        b->nhash = 32;        /* will be fixed by caller on initialization */
       +        if(data != nil)
       +                if(unpackbloomhead(b, data) < 0)
       +                        return -1;
       +        
       +fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash);
       +        b->mask = b->size-1;
       +        b->data = data;
       +        return 0;
       +}
       +
       +void
       +wbbloomhead(Bloom *b)
       +{
       +        packbloomhead(b, b->data);
       +}
       +
       +Bloom*
       +readbloom(Part *p)
       +{
       +        int i, n;
       +        uint ones;
       +        uchar buf[512];
       +        uchar *data;
       +        u32int *a;
       +        Bloom *b;
       +        
       +        b = vtmallocz(sizeof *b);
       +        if(readpart(p, 0, buf, sizeof buf) < 0)
       +                return nil;
       +fprint(2, "header %.16H\n", buf);
       +        if(bloominit(b, 0, buf) < 0){
       +                vtfree(b);
       +                return nil;
       +        }
       +        data = vtmallocz(b->size);
       +        if(readpart(p, 0, data, b->size) < 0){
       +                vtfree(b);
       +                vtfree(data);
       +                return nil;
       +        }
       +        b->data = data;
       +        b->part = p;
       +
       +        a = (u32int*)b->data;
       +        n = b->size/4;
       +        ones = 0;
       +        for(i=0; i<n; i++)
       +                ones += countbits(a[i]); 
       +        addstat(StatBloomOnes, ones);
       +
       +        if(b->size == MaxBloomSize)        /* 2^32 overflows ulong */
       +                addstat(StatBloomBits, b->size*8-1);
       +        else
       +                addstat(StatBloomBits, b->size*8);
       +                
       +        return b;
       +}
       +
       +int
       +writebloom(Bloom *b)
       +{
       +        wbbloomhead(b);
       +        return writepart(b->part, 0, b->data, b->size);
       +}
       +
       +/*
       + * Derive two random 32-bit quantities a, b from the score
       + * and then use a+b*i as a sequence of bloom filter indices.
       + * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
       + * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
       + */
       +static void
       +gethashes(u8int *score, ulong *h)
       +{
       +        int i;
       +        u32int a, b;
       +
       +        a = 0;
       +        b = 0;
       +        for(i=4; i+8<=VtScoreSize; i+=8){
       +                a ^= *(u32int*)(score+i);
       +                b ^= *(u32int*)(score+i+4);
       +        }
       +        if(i+4 <= VtScoreSize)        /* 20 is not 4-aligned */
       +                a ^= *(u32int*)(score+i);
       +        for(i=0; i<BloomMaxHash; i++, a+=b)
       +                h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
       +}
       +
       +static void
       +_markbloomfilter(Bloom *b, u8int *score)
       +{
       +        int i, nnew;
       +        ulong h[BloomMaxHash];
       +        u32int x, *y, z, *tab;
       +
       +        trace("markbloomfilter", "markbloomfilter %V", score);
       +        gethashes(score, h);
       +        nnew = 0;
       +        tab = (u32int*)b->data;
       +        for(i=0; i<b->nhash; i++){
       +                x = h[i];
       +                y = &tab[(x&b->mask)>>5];
       +                z = 1<<(x&31);
       +                if(!(*y&z)){
       +                        nnew++;
       +                        *y |= z;
       +                }
       +        }
       +        if(nnew)
       +                addstat(StatBloomOnes, nnew);
       +
       +        trace("markbloomfilter", "markbloomfilter exit");
       +}
       +
       +static int
       +_inbloomfilter(Bloom *b, u8int *score)
       +{
       +        int i;
       +        ulong h[BloomMaxHash], x;
       +        u32int *tab;
       +
       +        gethashes(score, h);
       +        tab = (u32int*)b->data;
       +        for(i=0; i<b->nhash; i++){
       +                x = h[i];
       +                if(!(tab[(x&b->mask)>>5] & (1<<(x&31))))
       +                        return 0;
       +        }
       +        return 1;
       +}
       +
       +int
       +inbloomfilter(Bloom *b, u8int *score)
       +{
       +        int r;
       +        uint ms;
       +
       +        if(b == nil)
       +                return 1;
       +
       +        ms = msec();
       +        rlock(&b->lk);
       +        r = _inbloomfilter(b, score);
       +        runlock(&b->lk);
       +        ms = ms - msec();
       +        addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
       +        if(r)
       +                addstat(StatBloomMiss, 1);
       +        else
       +                addstat(StatBloomHit, 1);
       +        return r;
       +}
       +
       +void
       +markbloomfilter(Bloom *b, u8int *score)
       +{
       +        if(b == nil)
       +                return;
       +
       +        rlock(&b->lk);
       +        qlock(&b->mod);
       +        _markbloomfilter(b, score);
       +        qunlock(&b->mod);
       +        runlock(&b->lk);
       +}
       +
       +static void
       +bloomwriteproc(void *v)
       +{
       +        Bloom *b;
       +        
       +        b = v;
       +        for(;;){
       +                recv(b->writechan, 0);
       +                if(writebloom(b) < 0)
       +                        fprint(2, "oops! writing bloom: %r\n");
       +                send(b->writedonechan, 0);
       +        }
       +}
       +
       +void
       +startbloomproc(Bloom *b)
       +{
       +        b->writechan = chancreate(sizeof(void*), 0);
       +        b->writedonechan = chancreate(sizeof(void*), 0);
       +        vtproc(bloomwriteproc, b);        
       +}
   DIR diff --git a/src/cmd/venti/srv/buildbuck.c b/src/cmd/venti/srv/buildbuck.c
       t@@ -0,0 +1,132 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +/*
       + * An IEStream is a sorted list of index entries.
       + */
       +struct IEStream
       +{
       +        Part        *part;
       +        u64int        off;                /* read position within part */
       +        u64int        n;                /* number of valid ientries left to read */
       +        u32int        size;                /* allocated space in buffer */
       +        u8int        *buf;
       +        u8int        *pos;                /* current place in buffer */
       +        u8int        *epos;                /* end of valid buffer contents */
       +};
       +
       +IEStream*
       +initiestream(Part *part, u64int off, u64int clumps, u32int size)
       +{
       +        IEStream *ies;
       +
       +//ZZZ out of memory?
       +        ies = MKZ(IEStream);
       +        ies->buf = MKN(u8int, size);
       +        ies->epos = ies->buf;
       +        ies->pos = ies->epos;
       +        ies->off = off;
       +        ies->n = clumps;
       +        ies->size = size;
       +        ies->part = part;
       +        return ies;
       +}
       +
       +void
       +freeiestream(IEStream *ies)
       +{
       +        if(ies == nil)
       +                return;
       +        free(ies->buf);
       +        free(ies);
       +}
       +
       +/*
       + * Return the next IEntry (still packed) in the stream.
       + */
       +static u8int*
       +peekientry(IEStream *ies)
       +{
       +        u32int n, nn;
       +
       +        n = ies->epos - ies->pos;
       +        if(n < IEntrySize){
       +                memmove(ies->buf, ies->pos, n);
       +                ies->epos = &ies->buf[n];
       +                ies->pos = ies->buf;
       +                nn = ies->size;
       +                if(nn > ies->n * IEntrySize)
       +                        nn = ies->n * IEntrySize;
       +                nn -= n;
       +                if(nn == 0)
       +                        return nil;
       +//fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos);
       +                if(readpart(ies->part, ies->off, ies->epos, nn) < 0){
       +                        seterr(EOk, "can't read sorted index entries: %r");
       +                        return nil;
       +                }
       +                ies->epos += nn;
       +                ies->off += nn;
       +        }
       +        return ies->pos;
       +}
       +
       +/*
       + * Compute the bucket number for the given IEntry.
       + * Knows that the score is the first thing in the packed
       + * representation.
       + */
       +static u32int
       +iebuck(Index *ix, u8int *b, IBucket *ib, IEStream *ies)
       +{
       +        USED(ies);
       +        USED(ib);
       +        return hashbits(b, 32) / ix->div;
       +}
       +
       +/*
       + * Fill ib with the next bucket in the stream.
       + */
       +u32int
       +buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata)
       +{
       +        IEntry ie1, ie2;
       +        u8int *b;
       +        u32int buck;
       +
       +        buck = TWID32;
       +        ib->n = 0;
       +        while(ies->n){
       +                b = peekientry(ies);
       +                if(b == nil)
       +                        return TWID32;
       +//fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b);
       +                if(ib->n == 0)
       +                        buck = iebuck(ix, b, ib, ies);
       +                else{
       +                        if(buck != iebuck(ix, b, ib, ies))
       +                                break;
       +                        if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){
       +                                /*
       +                                 * guess that the larger address is the correct one to use
       +                                 */
       +                                unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]);
       +                                unpackientry(&ie2, b);
       +                                seterr(EOk, "duplicate index entry for score=%V type=%d", ie1.score, ie1.ia.type);
       +                                ib->n--;
       +                                if(ie1.ia.addr > ie2.ia.addr)
       +                                        memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize);
       +                        }
       +                }
       +                if((ib->n+1)*IEntrySize > maxdata){
       +                        seterr(EOk, "bucket overflow");
       +                        return TWID32;
       +                }
       +                memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize);
       +                ib->n++;
       +                ies->n--;
       +                ies->pos += IEntrySize;
       +        }
       +        return buck;
       +}
   DIR diff --git a/src/cmd/venti/srv/buildindex.c b/src/cmd/venti/srv/buildindex.c
       t@@ -0,0 +1,160 @@
       +/*
       + * Rebuild the Venti index from scratch.
       + */
       +
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +/*
       + * Write a single bucket.  Could profit from a big buffer here
       + * so that we can absorb sporadic runs of blocks into one write,
       + * avoiding disk seeks.
       + */
       +static int
       +writebucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b)
       +{
       +        ISect *is;
       +
       +        is = ix->sects[indexsect0(ix, buck)];
       +        if(buck < is->start || buck >= is->stop){
       +                seterr(EAdmin, "cannot find index section for bucket %lud\n", (ulong)buck);
       +                return -1;
       +        }
       +        buck -= is->start;
       +
       +/*
       +        qlock(&stats.lock);
       +        stats.indexwrites++;
       +        qunlock(&stats.lock);
       +*/
       +        packibucket(ib, b->data, is->bucketmagic);
       +        return writepart(is->part, is->blockbase + ((u64int)buck << is->blocklog), b->data, is->blocksize);
       +}
       +
       +static int
       +buildindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
       +{
       +        IEStream *ies;
       +        IBucket ib, zib;
       +        ZBlock *z, *b;
       +        u32int next, buck;
       +        int ok;
       +        uint nbuck;
       +        u64int found = 0;
       +
       +//ZZZ make buffer size configurable
       +        b = alloczblock(ix->blocksize, 0, ix->blocksize);
       +        z = alloczblock(ix->blocksize, 1, ix->blocksize);
       +        ies = initiestream(part, off, clumps, 64*1024);
       +        if(b == nil || z == nil || ies == nil){
       +                ok = 0;
       +                goto breakout;
       +                return -1;
       +        }
       +        ok = 0;
       +        next = 0;
       +        memset(&ib, 0, sizeof ib);
       +        ib.data = b->data + IBucketSize;
       +        zib.data = z->data + IBucketSize;
       +        zib.n = 0;
       +        nbuck = 0;
       +        for(;;){
       +                buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize);
       +                found += ib.n;
       +                if(zero){
       +                        for(; next != buck; next++){
       +                                if(next == ix->buckets){
       +                                        if(buck != TWID32){
       +                                                fprint(2, "bucket out of range\n");
       +                                                ok = -1;
       +                                        }
       +                                        goto breakout;
       +                                }
       +                                if(writebucket(ix, next, &zib, z) < 0){
       +                                        fprint(2, "can't write zero bucket to buck=%d: %r", next);
       +                                        ok = -1;
       +                                }
       +                        }
       +                }
       +                if(buck >= ix->buckets){
       +                        if(buck == TWID32)
       +                                break;
       +                        fprint(2, "bucket out of range\n");
       +                        ok = -1;
       +                        goto breakout;
       +                }
       +                if(writebucket(ix, buck, &ib, b) < 0){
       +                        fprint(2, "bad bucket found=%lld: %r\n", found);
       +                        ok = -1;
       +                }
       +                next = buck + 1;
       +                if(++nbuck%10000 == 0)
       +                        fprint(2, "\t%,d buckets written...\n", nbuck);
       +        }
       +breakout:;
       +        fprint(2, "wrote index with %lld entries\n", found);
       +        freeiestream(ies);
       +        freezblock(z);
       +        freezblock(b);
       +        return ok;
       +}
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n");
       +        threadexitsall(0);
       +}
       +
       +Config conf;
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        Part *part;
       +        u64int clumps, base;
       +        u32int bcmem;
       +        int zero;
       +
       +        zero = 1;
       +        bcmem = 0;
       +        ARGBEGIN{
       +        case 'B':
       +                bcmem = unittoull(ARGF());
       +                break;
       +        case 'Z':
       +                zero = 0;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc != 2)
       +                usage();
       +
       +        if(initventi(argv[0], &conf) < 0)
       +                sysfatal("can't init venti: %r");
       +
       +        if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
       +                bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
       +        if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
       +        initdcache(bcmem);
       +
       +        fprint(2, "building a new index %s using %s for temporary storage\n", mainindex->name, argv[1]);
       +
       +        part = initpart(argv[1], ORDWR|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't initialize temporary partition: %r");
       +
       +        clumps = sortrawientries(mainindex, part, &base, mainindex->bloom);
       +        if(clumps == TWID64)
       +                sysfatal("can't build sorted index: %r");
       +        fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", clumps, base);
       +
       +        if(buildindex(mainindex, part, base, clumps, zero) < 0)
       +                sysfatal("can't build new index: %r");
       +        
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/checkarenas.c b/src/cmd/venti/srv/checkarenas.c
       t@@ -0,0 +1,135 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +static int        verbose;
       +
       +static void
       +checkarena(Arena *arena, int scan, int fix)
       +{
       +        ATailStats old;
       +        int err, e;
       +
       +        if(verbose && arena->memstats.clumps)
       +                printarena(2, arena);
       +
       +        old = arena->memstats;
       +
       +        if(scan){
       +                arena->memstats.used = 0;
       +                arena->memstats.clumps = 0;
       +                arena->memstats.cclumps = 0;
       +                arena->memstats.uncsize = 0;
       +        }
       +
       +        err = 0;
       +        for(;;){
       +                e = syncarena(arena, 0, 1000, 0, fix);
       +                err |= e;
       +                if(!(e & SyncHeader))
       +                        break;
       +                if(verbose && arena->memstats.clumps)
       +                        fprint(2, ".");
       +        }
       +        if(verbose && arena->memstats.clumps)
       +                fprint(2, "\n");
       +
       +        err &= ~SyncHeader;
       +        if(arena->memstats.used != old.used
       +        || arena->memstats.clumps != old.clumps
       +        || arena->memstats.cclumps != old.cclumps
       +        || arena->memstats.uncsize != old.uncsize){
       +                fprint(2, "%s: incorrect arena header fields\n", arena->name);
       +                printarena(2, arena);
       +                err |= SyncHeader;
       +        }
       +
       +        if(!err || !fix)
       +                return;
       +
       +        fprint(2, "%s: writing fixed arena header fields\n", arena->name);
       +        arena->diskstats = arena->memstats;
       +        if(wbarena(arena) < 0)
       +                fprint(2, "arena header write failed: %r\n");
       +        flushdcache();
       +}
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: checkarenas [-afv] file [arenaname...]\n");
       +        threadexitsall(0);
       +}
       +
       +int
       +should(char *name, int argc, char **argv)
       +{
       +        int i;
       +
       +        if(argc == 0)
       +                return 1;
       +        for(i=0; i<argc; i++)
       +                if(strcmp(name, argv[i]) == 0)
       +                        return 1;
       +        return 0;
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        ArenaPart *ap;
       +        Part *part;
       +        char *file;
       +        int i, fix, scan;
       +
       +        ventifmtinstall();
       +        statsinit();
       +
       +        fix = 0;
       +        scan = 0;
       +        ARGBEGIN{
       +        case 'f':
       +                fix++;
       +                break;
       +        case 'a':
       +                scan = 1;
       +                break;
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(!fix)
       +                readonly = 1;
       +
       +        if(argc < 1)
       +                usage();
       +
       +        file = argv[0];
       +
       +        part = initpart(file, ORDWR|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't open partition %s: %r", file);
       +
       +        ap = initarenapart(part);
       +        if(ap == nil)
       +                sysfatal("can't initialize arena partition in %s: %r", file);
       +
       +        if(verbose > 1){
       +                printarenapart(2, ap);
       +                fprint(2, "\n");
       +        }
       +
       +        initdcache(8 * MaxDiskBlock);
       +
       +        for(i = 0; i < ap->narenas; i++)
       +                if(should(ap->arenas[i]->name, argc, argv))
       +                        checkarena(ap->arenas[i], scan, fix);
       +
       +        if(verbose > 1)
       +                printstats();
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/checkindex.c b/src/cmd/venti/srv/checkindex.c
       t@@ -0,0 +1,293 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +static int extra, missing, wrong;
       +
       +static void
       +phdr(DBlock *eb)
       +{
       +        static int did;
       +        
       +        if(!did){
       +                did = 1;
       +                print("# diff actual correct\n");
       +        }
       +        print("%s block 0x%llux\n", eb->part->name, eb->addr);
       +}
       +
       +static void
       +pie(IEntry *ie, char c)
       +{
       +        print("%c %V %22lld %3d %5d %3d\n",
       +                c, ie->score, ie->ia.addr, ie->ia.type, ie->ia.size, ie->ia.blocks);
       +}
       +
       +static int
       +checkbucket(Index *ix, u32int buck, IBucket *ib)
       +{
       +        ISect *is;
       +        DBlock *eb;
       +        IBucket eib;
       +        IEntry ie, eie;
       +        int i, ei, ok, c, hdr;
       +
       +        is = ix->sects[indexsect0(ix, buck)];
       +        if(buck < is->start || buck >= is->stop){
       +                seterr(EAdmin, "cannot find index section for bucket %lud\n", (ulong)buck);
       +                return -1;
       +        }
       +        buck -= is->start;
       +        eb = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), OREAD);
       +        if(eb == nil)
       +                return -1;
       +        unpackibucket(&eib, eb->data, is->bucketmagic);
       +
       +        ok = 0;
       +        ei = 0;
       +        hdr = 0;
       +        for(i = 0; i < ib->n; i++){
       +                while(ei < eib.n){
       +                        c = ientrycmp(&ib->data[i * IEntrySize], &eib.data[ei * IEntrySize]);
       +                        if(c == 0){
       +                                unpackientry(&ie, &ib->data[i * IEntrySize]);
       +                                unpackientry(&eie, &eib.data[ei * IEntrySize]);
       +                                if(iaddrcmp(&ie.ia, &eie.ia) != 0){
       +                                        if(!hdr){
       +                                                phdr(eb);
       +                                                hdr = 1;
       +                                        }
       +                                        wrong++;
       +                                        pie(&eie, '<');
       +                                        pie(&ie, '>');
       +                                }
       +                                ei++;
       +                                goto cont;
       +                        }
       +                        if(c < 0)
       +                                break;
       +                        if(!hdr){
       +                                phdr(eb);
       +                                hdr = 1;
       +                        }
       +                        unpackientry(&eie, &eib.data[ei*IEntrySize]);
       +                        extra++;
       +                        pie(&eie, '<');
       +                        ei++;
       +                        ok = -1;
       +                }
       +                if(!hdr){
       +                        phdr(eb);
       +                        hdr = 1;
       +                }
       +                unpackientry(&ie, &ib->data[i*IEntrySize]);
       +                missing++;
       +                pie(&ie, '>');
       +                ok = -1;
       +        cont:;
       +        }
       +        for(; ei < eib.n; ei++){
       +                if(!hdr){
       +                        phdr(eb);
       +                        hdr = 1;
       +                }
       +                unpackientry(&eie, &eib.data[ei*IEntrySize]);
       +                pie(&eie, '<');
       +                ok = -1;
       +        }
       +        putdblock(eb);
       +        return ok;
       +}
       +
       +int
       +checkindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
       +{
       +        IEStream *ies;
       +        IBucket ib, zib;
       +        ZBlock *z, *b;
       +        u32int next, buck;
       +        int ok, bok;
       +u64int found = 0;
       +
       +//ZZZ make buffer size configurable
       +        b = alloczblock(ix->blocksize, 0, ix->blocksize);
       +        z = alloczblock(ix->blocksize, 1, ix->blocksize);
       +        ies = initiestream(part, off, clumps, 64*1024);
       +        if(b == nil || z == nil || ies == nil){
       +                werrstr("allocating: %r");
       +                ok = -1;
       +                goto breakout;
       +                return -1;
       +        }
       +        ok = 0;
       +        next = 0;
       +        memset(&ib, 0, sizeof ib);
       +        ib.data = b->data;
       +        zib.data = z->data;
       +        zib.n = 0;
       +        zib.buck = 0;
       +        for(;;){
       +                buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize);
       +                found += ib.n;
       +                if(zero){
       +                        for(; next != buck; next++){
       +                                if(next == ix->buckets){
       +                                        if(buck != TWID32){
       +                                                ok = -1;
       +                                                werrstr("internal error: bucket out of range");
       +                                        }
       +                                        if(ok < 0)
       +                                                werrstr("%d spurious entries, %d missing, %d wrong", extra, missing, wrong);
       +                                        goto breakout;
       +                                }
       +                                bok = checkbucket(ix, next, &zib);
       +                                if(bok < 0)
       +                                        ok = -1;
       +                        }
       +                }
       +                if(buck >= ix->buckets){
       +                        if(buck == TWID32)
       +                                break;
       +                        werrstr("internal error: bucket out of range");
       +                        ok = -1;
       +                        goto breakout;
       +                }
       +                bok = checkbucket(ix, buck, &ib);
       +                if(bok < 0)
       +                        ok = -1;
       +                next = buck + 1;
       +        }
       +breakout:
       +        freeiestream(ies);
       +        freezblock(z);
       +        freezblock(b);
       +        return ok;
       +}
       +
       +int
       +checkbloom(Bloom *b1, Bloom *b2, int fix)
       +{
       +        u32int *a1, *a2;
       +        int i, n, extra, missing;
       +        
       +        if(b1==nil && b2==nil)
       +                return 0;
       +        if(b1==nil || b2==nil){
       +                werrstr("nil/non-nil");
       +                return -1;
       +        }
       +        wbbloomhead(b1);
       +        wbbloomhead(b2);
       +        if(memcmp(b1->data, b2->data, BloomHeadSize) != 0){
       +                werrstr("bloom header mismatch");
       +                return -1;
       +        }
       +        a1 = (u32int*)b1->data;
       +        a2 = (u32int*)b2->data;
       +        n = b1->size/4;
       +        extra = 0;
       +        missing = 0;
       +        for(i=BloomHeadSize/4; i<n; i++){
       +                if(a1[i] != a2[i]){
       +print("%.8ux/%.8ux.", a1[i], a2[i]);
       +                        extra += countbits(a1[i] & ~a2[i]);
       +                        missing += countbits(a2[i] & ~a1[i]);
       +                }
       +        }
       +        if(extra || missing)
       +                fprint(2, "bloom filter: %d spurious bits, %d missing bits\n", extra, missing);
       +        else
       +                fprint(2, "bloom filter: correct\n");
       +        if(!fix && missing){
       +                werrstr("missing bits");
       +                return -1;
       +        }
       +        if(fix && (missing || extra)){
       +                memmove(b1->data, b2->data, b1->size);
       +                return writebloom(b1);
       +        }
       +        return 0;
       +}
       +
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: checkindex [-f] [-B blockcachesize] config tmp\n");
       +        threadexitsall(0);
       +}
       +
       +Config conf;
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        Bloom *oldbloom, *newbloom;
       +        Part *part;
       +        u64int clumps, base;
       +        u32int bcmem;
       +        int fix, skipz, ok;
       +
       +        fix = 0;
       +        bcmem = 0;
       +        skipz = 0;
       +        ARGBEGIN{
       +        case 'B':
       +                bcmem = unittoull(ARGF());
       +                break;
       +        case 'f':
       +                fix++;
       +                break;
       +        case 'Z':
       +                skipz = 1;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc != 2)
       +                usage();
       +
       +        ventifmtinstall();
       +
       +        part = initpart(argv[1], ORDWR|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't initialize temporary partition: %r");
       +
       +        if(!fix)
       +                readonly = 1;
       +
       +        if(initventi(argv[0], &conf) < 0)
       +                sysfatal("can't init venti: %r");
       +        oldbloom = mainindex->bloom;
       +        newbloom = nil;
       +        if(oldbloom){
       +                newbloom = vtmallocz(sizeof *newbloom);
       +                bloominit(newbloom, oldbloom->size, nil);
       +                newbloom->data = vtmallocz(oldbloom->size);
       +        }
       +        if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
       +                bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
       +        if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
       +        initdcache(bcmem);
       +
       +        fprint(2, "checkindex: building entry list\n");
       +        clumps = sortrawientries(mainindex, part, &base, newbloom);
       +        if(clumps == TWID64)
       +                sysfatal("can't build sorted index: %r");
       +        fprint(2, "checkindex: checking %lld entries at %lld\n", clumps, base);
       +        ok = 0;
       +        if(checkindex(mainindex, part, base, clumps, !skipz) < 0){
       +                fprint(2, "checkindex: %r\n");
       +                ok = -1;
       +        }
       +        if(checkbloom(oldbloom, newbloom, fix) < 0){
       +                fprint(2, "checkbloom: %r\n");
       +                ok = -1;
       +        }
       +        if(ok < 0)
       +                sysfatal("errors found");
       +        fprint(2, "checkindex: index is correct\n");
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/clump.c b/src/cmd/venti/srv/clump.c
       t@@ -0,0 +1,222 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "whack.h"
       +
       +/*
       + * Write a lump to disk.  Updates ia with an index address
       + * for the newly-written lump.  Upon return, the lump will
       + * have been placed in the disk cache but will likely not be on disk yet.
       + */
       +int
       +storeclump(Index *ix, ZBlock *zb, u8int *sc, int type, u32int creator, IAddr *ia)
       +{
       +        ZBlock *cb;
       +        Clump cl;
       +        u64int a;
       +        u8int bh[VtScoreSize];
       +        int size, dsize;
       +
       +        trace(TraceLump, "storeclump enter", sc, type);
       +        size = zb->len;
       +        if(size > VtMaxLumpSize){
       +                seterr(EStrange, "lump too large");
       +                return -1;
       +        }
       +        if(vttypevalid(type) < 0){
       +                seterr(EStrange, "invalid lump type");
       +                return -1;
       +        }
       +
       +        if(0){
       +                scoremem(bh, zb->data, size);
       +                if(scorecmp(sc, bh) != 0){
       +                        seterr(ECorrupt, "storing clump: corrupted; expected=%V got=%V, size=%d", sc, bh, size);
       +                        return -1;
       +                }
       +        }
       +
       +        cb = alloczblock(size + ClumpSize + U32Size, 0, 0);
       +        if(cb == nil)
       +                return -1;
       +
       +        cl.info.type = type;
       +        cl.info.uncsize = size;
       +        cl.creator = creator;
       +        cl.time = now();
       +        scorecp(cl.info.score, sc);
       +
       +        trace(TraceLump, "storeclump whackblock");
       +        dsize = whackblock(&cb->data[ClumpSize], zb->data, size);
       +        if(dsize > 0 && dsize < size){
       +                cl.encoding = ClumpECompress;
       +        }else{
       +                if(dsize > size){
       +                        fprint(2, "whack error: dsize=%d size=%d\n", dsize, size);
       +                        abort();
       +                }
       +                cl.encoding = ClumpENone;
       +                dsize = size;
       +                memmove(&cb->data[ClumpSize], zb->data, size);
       +        }
       +        memset(cb->data+ClumpSize+dsize, 0, 4);
       +        cl.info.size = dsize;
       +
       +        ia->addr = 0;
       +        ia->type = type;
       +        ia->size = size;
       +        ia->blocks = (dsize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
       +
       +        a = writeiclump(ix, &cl, cb->data, &ia->addr);
       +
       +        trace(TraceLump, "storeclump exit %lld", a);
       +
       +        freezblock(cb);
       +        if(a == TWID64)
       +                return -1;
       +
       +/*
       +        qlock(&stats.lock);
       +        stats.clumpwrites++;
       +        stats.clumpbwrites += size;
       +        stats.clumpbcomp += dsize;
       +        qunlock(&stats.lock);
       +*/
       +
       +        return 0;
       +}
       +
       +u32int
       +clumpmagic(Arena *arena, u64int aa)
       +{
       +        u8int buf[U32Size];
       +
       +        if(readarena(arena, aa, buf, U32Size) < 0)
       +                return TWID32;
       +        return unpackmagic(buf);
       +}
       +
       +/*
       + * fetch a block based at addr.
       + * score is filled in with the block's score.
       + * blocks is roughly the length of the clump on disk;
       + * if zero, the length is unknown.
       + */
       +ZBlock*
       +loadclump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int verify)
       +{
       +        Unwhack uw;
       +        ZBlock *zb, *cb;
       +        u8int bh[VtScoreSize], *buf;
       +        u32int n;
       +        int nunc;
       +
       +/*
       +        qlock(&stats.lock);
       +        stats.clumpreads++;
       +        qunlock(&stats.lock);
       +*/
       +
       +        if(blocks <= 0)
       +                blocks = 1;
       +
       +        trace(TraceLump, "loadclump enter");
       +
       +        cb = alloczblock(blocks << ABlockLog, 0, 0);
       +        if(cb == nil)
       +                return nil;
       +        n = readarena(arena, aa, cb->data, blocks << ABlockLog);
       +        if(n < ClumpSize){
       +                if(n != 0)
       +                        seterr(ECorrupt, "loadclump read less than a header");
       +                freezblock(cb);
       +                return nil;
       +        }
       +        trace(TraceLump, "loadclump unpack");
       +        if(unpackclump(cl, cb->data, arena->clumpmagic) < 0){
       +                seterr(ECorrupt, "loadclump %s %llud: %r", arena->name, aa);
       +                freezblock(cb);
       +                return nil;
       +        }
       +        n -= ClumpSize;
       +        if(n < cl->info.size){
       +                freezblock(cb);
       +                n = cl->info.size;
       +                cb = alloczblock(n, 0, 0);
       +                if(cb == nil)
       +                        return nil;
       +                if(readarena(arena, aa + ClumpSize, cb->data, n) != n){
       +                        seterr(ECorrupt, "loadclump read too little data");
       +                        freezblock(cb);
       +                        return nil;
       +                }
       +                buf = cb->data;
       +        }else
       +                buf = cb->data + ClumpSize;
       +
       +        scorecp(score, cl->info.score);
       +
       +        zb = alloczblock(cl->info.uncsize, 0, 0);
       +        if(zb == nil){
       +                freezblock(cb);
       +                return nil;
       +        }
       +        switch(cl->encoding){
       +        case ClumpECompress:
       +                trace(TraceLump, "loadclump decompress");
       +                unwhackinit(&uw);
       +                nunc = unwhack(&uw, zb->data, cl->info.uncsize, buf, cl->info.size);
       +                if(nunc != cl->info.uncsize){
       +                        if(nunc < 0)
       +                                seterr(ECorrupt, "decompression of %llud failed: %s", aa, uw.err);
       +                        else
       +                                seterr(ECorrupt, "decompression of %llud gave partial block: %d/%d\n", aa, nunc, cl->info.uncsize);
       +                        freezblock(cb);
       +                        freezblock(zb);
       +                        return nil;
       +                }
       +                break;
       +        case ClumpENone:
       +                if(cl->info.size != cl->info.uncsize){
       +                        seterr(ECorrupt, "loading clump: bad uncompressed size for uncompressed block %llud", aa);
       +                        freezblock(cb);
       +                        freezblock(zb);
       +                        return nil;
       +                }
       +                scoremem(bh, buf, cl->info.uncsize);
       +                if(scorecmp(cl->info.score, bh) != 0)
       +                        seterr(ECorrupt, "pre-copy sha1 wrong at %s %llud: expected=%V got=%V", arena->name, aa, cl->info.score, bh);
       +                memmove(zb->data, buf, cl->info.uncsize);
       +                break;
       +        default:
       +                seterr(ECorrupt, "unknown encoding in loadlump %llud", aa);
       +                freezblock(cb);
       +                freezblock(zb);
       +                return nil;
       +        }
       +        freezblock(cb);
       +
       +        if(verify){
       +                trace(TraceLump, "loadclump verify");
       +                scoremem(bh, zb->data, cl->info.uncsize);
       +                if(scorecmp(cl->info.score, bh) != 0){
       +                        seterr(ECorrupt, "loading clump: corrupted at %s %llud; expected=%V got=%V", arena->name, aa, cl->info.score, bh);
       +                        freezblock(zb);
       +                        return nil;
       +                }
       +                if(vttypevalid(cl->info.type) < 0){
       +                        seterr(ECorrupt, "loading lump at %s %llud: invalid lump type %d", arena->name, aa, cl->info.type);
       +                        freezblock(zb);
       +                        return nil;
       +                }
       +        }
       +
       +        trace(TraceLump, "loadclump exit");
       +/*
       +        qlock(&stats.lock);
       +        stats.clumpbreads += cl->info.size;
       +        stats.clumpbuncomp += cl->info.uncsize;
       +        qunlock(&stats.lock);
       +*/
       +        return zb;
       +}
   DIR diff --git a/src/cmd/venti/srv/clumpstats.c b/src/cmd/venti/srv/clumpstats.c
       t@@ -0,0 +1,127 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +int        count[VtMaxLumpSize][VtMaxType];
       +Config conf;
       +
       +enum
       +{
       +        ClumpChunks        = 32*1024
       +};
       +
       +static int
       +readarenainfo(Arena *arena)
       +{
       +        ClumpInfo *ci, *cis;
       +        u32int clump;
       +        int i, n, ok;
       +
       +        if(arena->memstats.clumps)
       +                fprint(2, "reading directory for arena=%s with %d entries\n", arena->name, arena->memstats.clumps);
       +
       +        cis = MKN(ClumpInfo, ClumpChunks);
       +        ok = 0;
       +        for(clump = 0; clump < arena->memstats.clumps; clump += n){
       +                n = ClumpChunks;
       +
       +                if(n > arena->memstats.clumps - clump)
       +                        n = arena->memstats.clumps - clump;
       +
       +                if((i=readclumpinfos(arena, clump, cis, n)) != n){
       +                        seterr(EOk, "arena directory read failed %d not %d: %r", i, n);
       +                        ok = -1;
       +                        break;
       +                }
       +
       +                for(i = 0; i < n; i++){
       +                        ci = &cis[i];
       +                        if(ci->type >= VtMaxType || ci->uncsize >= VtMaxLumpSize) {
       +                                fprint(2, "bad clump: %d: type = %d: size = %d\n", clump+i, ci->type, ci->uncsize);
       +                                continue;
       +                        }
       +                        count[ci->uncsize][ci->type]++;
       +                }
       +        }
       +        free(cis);
       +        if(ok < 0)
       +                return TWID32;
       +        return clump;
       +}
       +
       +static void
       +clumpstats(Index *ix)
       +{
       +        int ok;
       +        ulong clumps, n;
       +        int i, j, t;
       +
       +        ok = 0;
       +        clumps = 0;
       +        for(i = 0; i < ix->narenas; i++){
       +                n = readarenainfo(ix->arenas[i]);
       +                if(n == TWID32){
       +                        ok = -1;
       +                        break;
       +                }
       +                clumps += n;
       +        }
       +
       +        if(ok < 0)
       +                return;
       +
       +        print("clumps = %ld\n", clumps);
       +        for(i=0; i<VtMaxLumpSize; i++) {
       +                t = 0;
       +                for(j=0; j<VtMaxType; j++)
       +                        t += count[i][j];
       +                if(t == 0)
       +                        continue;
       +                print("%d\t%d", i, t);
       +                for(j=0; j<VtMaxType; j++)
       +                        print("\t%d", count[i][j]);
       +                print("\n");
       +        }
       +}
       +
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: clumpstats [-B blockcachesize] config\n");
       +        threadexitsall(0);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        u32int bcmem;
       +
       +        bcmem = 0;
       +
       +        ARGBEGIN{
       +        case 'B':
       +                bcmem = unittoull(ARGF());
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        readonly = 1;
       +
       +        if(argc != 1)
       +                usage();
       +
       +        if(initventi(argv[0], &conf) < 0)
       +                sysfatal("can't init venti: %r");
       +
       +        if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
       +                bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
       +        if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
       +        initdcache(bcmem);
       +
       +        clumpstats(mainindex);
       +        
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/config.c b/src/cmd/venti/srv/config.c
       t@@ -0,0 +1,245 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +Index                        *mainindex;
       +int                        paranoid = 1;                /* should verify hashes on disk read */
       +
       +static ArenaPart        *configarenas(char *file);
       +static ISect                *configisect(char *file);
       +static Bloom                *configbloom(char *file);
       +
       +int
       +initventi(char *file, Config *conf)
       +{
       +        statsinit();
       +
       +        if(file == nil){
       +                seterr(EOk, "no configuration file");
       +                return -1;
       +        }
       +        if(runconfig(file, conf) < 0){
       +                seterr(EOk, "can't initialize venti: %r");
       +                return -1;
       +        }
       +        mainindex = initindex(conf->index, conf->sects, conf->nsects);
       +        if(mainindex == nil)
       +                return -1;
       +        mainindex->bloom = conf->bloom;
       +        return 0;
       +}
       +
       +static int
       +numok(char *s)
       +{
       +        char *p;
       +
       +        strtoull(s, &p, 0);
       +        if(p == s)
       +                return -1;
       +        if(*p == 0)
       +                return 0;
       +        if(p[1] == 0 && strchr("MmGgKk", *p))
       +                return 0;
       +        return 0;
       +}
       +
       +/*
       + * configs        :
       + *                | configs config
       + * config        : "isect" filename
       + *                | "arenas" filename
       + *                | "index" name
       + *                | "bcmem" num
       + *                | "mem" num
       + *                | "icmem" num
       + *                | "queuewrites"
       + *                | "httpaddr" address
       + *                | "addr" address
       + *
       + * '#' and \n delimit comments
       + */
       +enum
       +{
       +        MaxArgs        = 2
       +};
       +int
       +runconfig(char *file, Config *config)
       +{
       +        ArenaPart **av;
       +        ISect **sv;
       +        IFile f;
       +        char *s, *line, *flds[MaxArgs + 1];
       +        int i, ok;
       +
       +        if(readifile(&f, file) < 0)
       +                return -1;
       +        memset(config, 0, sizeof *config);
       +        config->mem = 0xFFFFFFFFUL;
       +        ok = -1;
       +        line = nil;
       +        for(;;){
       +                s = ifileline(&f);
       +                if(s == nil){
       +                        ok = 0;
       +                        break;
       +                }
       +                line = estrdup(s);
       +                i = getfields(s, flds, MaxArgs + 1, 1, " \t\r");
       +                if(i == 2 && strcmp(flds[0], "isect") == 0){
       +                        sv = MKN(ISect*, config->nsects + 1);
       +                        for(i = 0; i < config->nsects; i++)
       +                                sv[i] = config->sects[i];
       +                        free(config->sects);
       +                        config->sects = sv;
       +                        config->sects[config->nsects] = configisect(flds[1]);
       +                        if(config->sects[config->nsects] == nil)
       +                                break;
       +                        config->nsects++;
       +                }else if(i == 2 && strcmp(flds[0], "arenas") == 0){
       +                        av = MKN(ArenaPart*, config->naparts + 1);
       +                        for(i = 0; i < config->naparts; i++)
       +                                av[i] = config->aparts[i];
       +                        free(config->aparts);
       +                        config->aparts = av;
       +                        config->aparts[config->naparts] = configarenas(flds[1]);
       +                        if(config->aparts[config->naparts] == nil)
       +                                break;
       +                        config->naparts++;
       +                }else if(i == 2 && strcmp(flds[0], "bloom") == 0){
       +                        if(config->bloom){
       +                                seterr(EAdmin, "duplicate bloom lines in configuration file %s", file);
       +                                break;
       +                        }
       +                        if((config->bloom = configbloom(flds[1])) == nil)
       +                                break;
       +                }else if(i == 2 && strcmp(flds[0], "index") == 0){
       +                        if(nameok(flds[1]) < 0){
       +                                seterr(EAdmin, "illegal index name %s in config file %s", flds[1], file);
       +                                break;
       +                        }
       +                        if(config->index != nil){
       +                                seterr(EAdmin, "duplicate indices in config file %s", file);
       +                                break;
       +                        }
       +                        config->index = estrdup(flds[1]);
       +                }else if(i == 2 && strcmp(flds[0], "bcmem") == 0){
       +                        if(numok(flds[1]) < 0){
       +                                seterr(EAdmin, "illegal size %s in config file %s",
       +                                        flds[1], file);
       +                                break;
       +                        }
       +                        if(config->bcmem != 0){
       +                                seterr(EAdmin, "duplicate bcmem lines in config file %s", file);
       +                                break;
       +                        }
       +                        config->bcmem = unittoull(flds[1]);
       +                }else if(i == 2 && strcmp(flds[0], "mem") == 0){
       +                        if(numok(flds[1]) < 0){
       +                                seterr(EAdmin, "illegal size %s in config file %s",
       +                                        flds[1], file);
       +                                break;
       +                        }
       +                        if(config->mem != 0xFFFFFFFFUL){
       +                                seterr(EAdmin, "duplicate mem lines in config file %s", file);
       +                                break;
       +                        }
       +                        config->mem = unittoull(flds[1]);
       +                }else if(i == 2 && strcmp(flds[0], "icmem") == 0){
       +                        if(numok(flds[1]) < 0){
       +                                seterr(EAdmin, "illegal size %s in config file %s",
       +                                        flds[1], file);
       +                                break;
       +                        }
       +                        if(config->icmem != 0){
       +                                seterr(EAdmin, "duplicate icmem lines in config file %s", file);
       +                                break;
       +                        }
       +                        config->icmem = unittoull(flds[1]);
       +                }else if(i == 1 && strcmp(flds[0], "queuewrites") == 0){
       +                        config->queuewrites = 1;
       +                }else if(i == 2 && strcmp(flds[0], "httpaddr") == 0){
       +                        if(config->haddr){
       +                                seterr(EAdmin, "duplicate httpaddr lines in configuration file %s", file);
       +                                break;
       +                        }
       +                        config->haddr = estrdup(flds[1]);
       +                }else if(i == 2 && strcmp(flds[0], "webroot") == 0){
       +                        if(config->webroot){
       +                                seterr(EAdmin, "duplicate webroot lines in configuration file %s", file);
       +                                break;
       +                        }
       +                        config->webroot = estrdup(flds[1]);
       +                }else if(i == 2 && strcmp(flds[0], "addr") == 0){
       +                        if(config->vaddr){
       +                                seterr(EAdmin, "duplicate addr lines in configuration file %s", file);
       +                                break;
       +                        }
       +                        config->vaddr = estrdup(flds[1]);
       +                }else{
       +                        seterr(EAdmin, "illegal line '%s' in configuration file %s", line, file);
       +                        break;
       +                }
       +                free(line);
       +                line = nil;
       +        }
       +        free(line);
       +        freeifile(&f);
       +        if(ok < 0){
       +                free(config->sects);
       +                config->sects = nil;
       +                free(config->aparts);
       +                config->aparts = nil;
       +        }
       +        return ok;
       +}
       +
       +static ISect*
       +configisect(char *file)
       +{
       +        Part *part;
       +        ISect *is;
       +        
       +        if(0) fprint(2, "configure index section in %s\n", file);
       +
       +        part = initpart(file, ORDWR|ODIRECT);
       +        if(part == nil)
       +                return nil;
       +        is = initisect(part);
       +        if(is == nil)
       +                werrstr("%s: %r", file);
       +        return is;
       +}
       +
       +static ArenaPart*
       +configarenas(char *file)
       +{
       +        ArenaPart *ap;
       +        Part *part;
       +
       +        if(0) fprint(2, "configure arenas in %s\n", file);
       +        part = initpart(file, ORDWR|ODIRECT);
       +        if(part == nil)
       +                return nil;
       +        ap = initarenapart(part);
       +        if(ap == nil)
       +                werrstr("%s: %r", file);
       +        return ap;
       +}
       +
       +static Bloom*
       +configbloom(char *file)
       +{
       +        Bloom *b;
       +        Part *part;
       +
       +        if(0) fprint(2, "configure bloom in %s\n", file);
       +        part = initpart(file, ORDWR|ODIRECT);
       +        if(part == nil)
       +                return nil;
       +        b = readbloom(part);
       +        if(b == nil)
       +                werrstr("%s: %r", file);
       +        return b;
       +}
       +
   DIR diff --git a/src/cmd/venti/srv/conv.c b/src/cmd/venti/srv/conv.c
       t@@ -0,0 +1,632 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +/*
       + * disk structure conversion routines
       + */
       +#define        U8GET(p)        ((p)[0])
       +#define        U16GET(p)        (((p)[0]<<8)|(p)[1])
       +#define        U32GET(p)        ((u32int)(((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3]))
       +#define        U64GET(p)        (((u64int)U32GET(p)<<32)|(u64int)U32GET((p)+4))
       +
       +#define        U8PUT(p,v)        (p)[0]=(v)&0xFF
       +#define        U16PUT(p,v)        (p)[0]=((v)>>8)&0xFF;(p)[1]=(v)&0xFF
       +#define        U32PUT(p,v)        (p)[0]=((v)>>24)&0xFF;(p)[1]=((v)>>16)&0xFF;(p)[2]=((v)>>8)&0xFF;(p)[3]=(v)&0xFF
       +#define        U64PUT(p,v,t32)        t32=(v)>>32;U32PUT(p,t32);t32=(v);U32PUT((p)+4,t32)
       +
       +static struct {
       +        u32int m;
       +        char *s;
       +} magics[] = {
       +        ArenaPartMagic, "ArenaPartMagic",
       +        ArenaHeadMagic, "ArenaHeadMagic",
       +        ArenaMagic, "ArenaMagic",
       +        ISectMagic, "ISectMagic",
       +        BloomMagic, "BloomMagic",
       +};
       +
       +static char*
       +fmtmagic(char *s, u32int m)
       +{
       +        int i;
       +
       +        for(i=0; i<nelem(magics); i++)
       +                if(magics[i].m == m)
       +                        return magics[i].s;
       +        sprint(s, "0x%08ux", m);
       +        return s;
       +}
       +
       +u32int
       +unpackmagic(u8int *buf)
       +{
       +        return U32GET(buf);
       +}
       +
       +void
       +packmagic(u32int magic, u8int *buf)
       +{
       +        U32PUT(buf, magic);
       +}
       +
       +int
       +unpackarenapart(ArenaPart *ap, u8int *buf)
       +{
       +        u8int *p;
       +        u32int m;
       +        char fbuf[20];
       +
       +        p = buf;
       +
       +        m = U32GET(p);
       +        if(m != ArenaPartMagic){
       +                seterr(ECorrupt, "arena set has wrong magic number: %s expected ArenaPartMagic (%lux)", fmtmagic(fbuf, m), ArenaPartMagic);
       +                return -1;
       +        }
       +        p += U32Size;
       +        ap->version = U32GET(p);
       +        p += U32Size;
       +        ap->blocksize = U32GET(p);
       +        p += U32Size;
       +        ap->arenabase = U32GET(p);
       +        p += U32Size;
       +
       +        if(buf + ArenaPartSize != p)
       +                sysfatal("unpackarenapart unpacked wrong amount");
       +
       +        return 0;
       +}
       +
       +int
       +packarenapart(ArenaPart *ap, u8int *buf)
       +{
       +        u8int *p;
       +
       +        p = buf;
       +
       +        U32PUT(p, ArenaPartMagic);
       +        p += U32Size;
       +        U32PUT(p, ap->version);
       +        p += U32Size;
       +        U32PUT(p, ap->blocksize);
       +        p += U32Size;
       +        U32PUT(p, ap->arenabase);
       +        p += U32Size;
       +
       +        if(buf + ArenaPartSize != p)
       +                sysfatal("packarenapart packed wrong amount");
       +
       +        return 0;
       +}
       +
       +int
       +unpackarena(Arena *arena, u8int *buf)
       +{
       +        int sz;
       +        u8int *p;
       +        u32int m;
       +        char fbuf[20];
       +
       +        p = buf;
       +
       +        m = U32GET(p);
       +        if(m != ArenaMagic){
       +                seterr(ECorrupt, "arena has wrong magic number: %s expected ArenaMagic (%lux)", fmtmagic(fbuf, m), ArenaMagic);
       +                return -1;
       +        }
       +        p += U32Size;
       +        arena->version = U32GET(p);
       +        p += U32Size;
       +        namecp(arena->name, (char*)p);
       +        p += ANameSize;
       +        arena->diskstats.clumps = U32GET(p);
       +        p += U32Size;
       +        arena->diskstats.cclumps = U32GET(p);
       +        p += U32Size;
       +        arena->ctime = U32GET(p);
       +        p += U32Size;
       +        arena->wtime = U32GET(p);
       +        p += U32Size;
       +        if(arena->version == ArenaVersion5){
       +                arena->clumpmagic = U32GET(p);
       +                p += U32Size;
       +        }
       +        arena->diskstats.used = U64GET(p);
       +        p += U64Size;
       +        arena->diskstats.uncsize = U64GET(p);
       +        p += U64Size;
       +        arena->diskstats.sealed = U8GET(p);
       +        p += U8Size;
       +
       +        arena->memstats = arena->diskstats;
       +
       +        switch(arena->version){
       +        case ArenaVersion4:
       +                sz = ArenaSize4;
       +                arena->clumpmagic = _ClumpMagic;
       +                break;
       +        case ArenaVersion5:
       +                sz = ArenaSize5;
       +                break;
       +        default:
       +                seterr(ECorrupt, "arena has bad version number %d", arena->version);
       +                return -1;
       +        }
       +        if(buf + sz != p)
       +                sysfatal("unpackarena unpacked wrong amount");
       +
       +        return 0;
       +}
       +
       +int
       +packarena(Arena *arena, u8int *buf)
       +{
       +        int sz;
       +        u8int *p;
       +        u32int t32;
       +
       +        switch(arena->version){
       +        case ArenaVersion4:
       +                sz = ArenaSize4;
       +                if(arena->clumpmagic != _ClumpMagic)
       +                        fprint(2, "warning: writing old arena tail loses clump magic 0x%lux != 0x%lux\n",
       +                                (ulong)arena->clumpmagic, (ulong)_ClumpMagic);
       +                break;
       +        case ArenaVersion5:
       +                sz = ArenaSize5;
       +                break;
       +        default:
       +                sysfatal("packarena unknown version %d", arena->version);
       +                return -1;
       +        }
       +
       +        p = buf;
       +
       +        U32PUT(p, ArenaMagic);
       +        p += U32Size;
       +        U32PUT(p, arena->version);
       +        p += U32Size;
       +        namecp((char*)p, arena->name);
       +        p += ANameSize;
       +        U32PUT(p, arena->diskstats.clumps);
       +        p += U32Size;
       +        U32PUT(p, arena->diskstats.cclumps);
       +        p += U32Size;
       +        U32PUT(p, arena->ctime);
       +        p += U32Size;
       +        U32PUT(p, arena->wtime);
       +        p += U32Size;
       +        if(arena->version == ArenaVersion5){
       +                U32PUT(p, arena->clumpmagic);
       +                p += U32Size;
       +        }
       +        U64PUT(p, arena->diskstats.used, t32);
       +        p += U64Size;
       +        U64PUT(p, arena->diskstats.uncsize, t32);
       +        p += U64Size;
       +        U8PUT(p, arena->diskstats.sealed);
       +        p += U8Size;
       +
       +        if(buf + sz != p)
       +                sysfatal("packarena packed wrong amount");
       +
       +        return 0;
       +}
       +
       +int
       +unpackarenahead(ArenaHead *head, u8int *buf)
       +{
       +        u8int *p;
       +        u32int m;
       +        int sz;
       +
       +        p = buf;
       +
       +        m = U32GET(p);
       +        /* XXX check magic! */
       +
       +        p += U32Size;
       +        head->version = U32GET(p);
       +        p += U32Size;
       +        namecp(head->name, (char*)p);
       +        p += ANameSize;
       +        head->blocksize = U32GET(p);
       +        p += U32Size;
       +        head->size = U64GET(p);
       +        p += U64Size;
       +        if(head->version == ArenaVersion5){
       +                head->clumpmagic = U32GET(p);
       +                p += U32Size;
       +        }
       +
       +        switch(head->version){
       +        case ArenaVersion4:
       +                sz = ArenaHeadSize4;
       +                head->clumpmagic = _ClumpMagic;
       +                break;
       +        case ArenaVersion5:
       +                sz = ArenaHeadSize5;
       +                break;
       +        default:
       +                seterr(ECorrupt, "arena head has unexpected version %d", head->version);
       +                return -1;
       +        }
       +
       +        if(buf + sz != p)
       +                sysfatal("unpackarenahead unpacked wrong amount");
       +
       +        return 0;
       +}
       +
       +int
       +packarenahead(ArenaHead *head, u8int *buf)
       +{
       +        u8int *p;
       +        int sz;
       +        u32int t32;
       +
       +        switch(head->version){
       +        case ArenaVersion4:
       +                sz = ArenaHeadSize4;
       +                if(head->clumpmagic != _ClumpMagic)
       +                        fprint(2, "warning: writing old arena header loses clump magic 0x%lux != 0x%lux\n",
       +                                (ulong)head->clumpmagic, (ulong)_ClumpMagic);
       +                break;
       +        case ArenaVersion5:
       +                sz = ArenaHeadSize5;
       +                break;
       +        default:
       +                sysfatal("packarenahead unknown version %d", head->version);
       +                return -1;
       +        }
       +
       +        p = buf;
       +
       +        U32PUT(p, ArenaHeadMagic);
       +        p += U32Size;
       +        U32PUT(p, head->version);
       +        p += U32Size;
       +        namecp((char*)p, head->name);
       +        p += ANameSize;
       +        U32PUT(p, head->blocksize);
       +        p += U32Size;
       +        U64PUT(p, head->size, t32);
       +        p += U64Size;
       +        if(head->version == ArenaVersion5){
       +                U32PUT(p, head->clumpmagic);
       +                p += U32Size;
       +        }
       +        if(buf + sz != p)
       +                sysfatal("packarenahead packed wrong amount");
       +
       +        return 0;
       +}
       +
       +static int
       +checkclump(Clump *w)
       +{
       +        if(w->encoding == ClumpENone){
       +                if(w->info.size != w->info.uncsize){
       +                        seterr(ECorrupt, "uncompressed wad size mismatch");
       +                        return -1;
       +                }
       +        }else if(w->encoding == ClumpECompress){
       +                if(w->info.size >= w->info.uncsize){
       +                        seterr(ECorrupt, "compressed lump has inconsistent block sizes %d %d", w->info.size, w->info.uncsize);
       +                        return -1;
       +                }
       +        }else{
       +                seterr(ECorrupt, "clump has illegal encoding");
       +                return -1;
       +        }
       +
       +        return 0;
       +}
       +
       +int
       +unpackclump(Clump *c, u8int *buf, u32int cmagic)
       +{
       +        u8int *p;
       +        u32int magic;
       +
       +        p = buf;
       +        magic = U32GET(p);
       +        if(magic != cmagic){
       +                seterr(ECorrupt, "clump has bad magic number=%#8.8ux != %#8.8ux", magic, cmagic);
       +                return -1;
       +        }
       +        p += U32Size;
       +
       +        c->info.type = vtfromdisktype(U8GET(p));
       +        p += U8Size;
       +        c->info.size = U16GET(p);
       +        p += U16Size;
       +        c->info.uncsize = U16GET(p);
       +        p += U16Size;
       +        scorecp(c->info.score, p);
       +        p += VtScoreSize;
       +
       +        c->encoding = U8GET(p);
       +        p += U8Size;
       +        c->creator = U32GET(p);
       +        p += U32Size;
       +        c->time = U32GET(p);
       +        p += U32Size;
       +
       +        if(buf + ClumpSize != p)
       +                sysfatal("unpackclump unpacked wrong amount");
       +
       +        return checkclump(c);
       +}
       +
       +int
       +packclump(Clump *c, u8int *buf, u32int magic)
       +{
       +        u8int *p;
       +
       +        p = buf;
       +        U32PUT(p, magic);
       +        p += U32Size;
       +
       +        U8PUT(p, vttodisktype(c->info.type));
       +        p += U8Size;
       +        U16PUT(p, c->info.size);
       +        p += U16Size;
       +        U16PUT(p, c->info.uncsize);
       +        p += U16Size;
       +        scorecp(p, c->info.score);
       +        p += VtScoreSize;
       +
       +        U8PUT(p, c->encoding);
       +        p += U8Size;
       +        U32PUT(p, c->creator);
       +        p += U32Size;
       +        U32PUT(p, c->time);
       +        p += U32Size;
       +
       +        if(buf + ClumpSize != p)
       +                sysfatal("packclump packed wrong amount");
       +
       +        return checkclump(c);
       +}
       +
       +void
       +unpackclumpinfo(ClumpInfo *ci, u8int *buf)
       +{
       +        u8int *p;
       +
       +        p = buf;
       +        ci->type = vtfromdisktype(U8GET(p));
       +        p += U8Size;
       +        ci->size = U16GET(p);
       +        p += U16Size;
       +        ci->uncsize = U16GET(p);
       +        p += U16Size;
       +        scorecp(ci->score, p);
       +        p += VtScoreSize;
       +
       +        if(buf + ClumpInfoSize != p)
       +                sysfatal("unpackclumpinfo unpacked wrong amount");
       +}
       +
       +void
       +packclumpinfo(ClumpInfo *ci, u8int *buf)
       +{
       +        u8int *p;
       +
       +        p = buf;
       +        U8PUT(p, vttodisktype(ci->type));
       +        p += U8Size;
       +        U16PUT(p, ci->size);
       +        p += U16Size;
       +        U16PUT(p, ci->uncsize);
       +        p += U16Size;
       +        scorecp(p, ci->score);
       +        p += VtScoreSize;
       +
       +        if(buf + ClumpInfoSize != p)
       +                sysfatal("packclumpinfo packed wrong amount");
       +}
       +
       +int
       +unpackisect(ISect *is, u8int *buf)
       +{
       +        u8int *p;
       +        u32int m;
       +        char fbuf[20];
       +
       +        p = buf;
       +
       +
       +        m = U32GET(p);
       +        if(m != ISectMagic){
       +                seterr(ECorrupt, "index section has wrong magic number: %s expected ISectMagic (%lux)",
       +                        fmtmagic(fbuf, m), ISectMagic);
       +                return -1;
       +        }
       +        p += U32Size;
       +        is->version = U32GET(p);
       +        p += U32Size;
       +        namecp(is->name, (char*)p);
       +        p += ANameSize;
       +        namecp(is->index, (char*)p);
       +        p += ANameSize;
       +        is->blocksize = U32GET(p);
       +        p += U32Size;
       +        is->blockbase = U32GET(p);
       +        p += U32Size;
       +        is->blocks = U32GET(p);
       +        p += U32Size;
       +        is->start = U32GET(p);
       +        p += U32Size;
       +        is->stop = U32GET(p);
       +        p += U32Size;
       +        if(buf + ISectSize1 != p)
       +                sysfatal("unpackisect unpacked wrong amount");
       +        is->bucketmagic = 0;
       +        if(is->version == ISectVersion2){
       +                is->bucketmagic = U32GET(p);
       +                p += U32Size;
       +                if(buf + ISectSize2 != p)
       +                        sysfatal("unpackisect unpacked wrong amount");
       +        }
       +
       +        return 0;
       +}
       +
       +int
       +packisect(ISect *is, u8int *buf)
       +{
       +        u8int *p;
       +
       +        p = buf;
       +
       +        U32PUT(p, ISectMagic);
       +        p += U32Size;
       +        U32PUT(p, is->version);
       +        p += U32Size;
       +        namecp((char*)p, is->name);
       +        p += ANameSize;
       +        namecp((char*)p, is->index);
       +        p += ANameSize;
       +        U32PUT(p, is->blocksize);
       +        p += U32Size;
       +        U32PUT(p, is->blockbase);
       +        p += U32Size;
       +        U32PUT(p, is->blocks);
       +        p += U32Size;
       +        U32PUT(p, is->start);
       +        p += U32Size;
       +        U32PUT(p, is->stop);
       +        p += U32Size;
       +        if(buf + ISectSize1 != p)
       +                sysfatal("packisect packed wrong amount");
       +        if(is->version == ISectVersion2){
       +                U32PUT(p, is->bucketmagic);
       +                p += U32Size;
       +                if(buf + ISectSize2 != p)
       +                        sysfatal("packisect packed wrong amount");
       +        }
       +
       +        return 0;
       +}
       +
       +void
       +unpackientry(IEntry *ie, u8int *buf)
       +{
       +        u8int *p;
       +
       +        p = buf;
       +
       +        scorecp(ie->score, p);
       +        p += VtScoreSize;
       +        ie->wtime = U32GET(p);
       +        p += U32Size;
       +        ie->train = U16GET(p);
       +        p += U16Size;
       +        ie->ia.addr = U64GET(p);
       +if(ie->ia.addr>>56) print("%.8H => %llux\n", p, ie->ia.addr);
       +        p += U64Size;
       +        ie->ia.size = U16GET(p);
       +        p += U16Size;
       +        if(p - buf != IEntryTypeOff)
       +                sysfatal("unpackientry bad IEntryTypeOff amount");
       +        ie->ia.type = vtfromdisktype(U8GET(p));
       +        p += U8Size;
       +        ie->ia.blocks = U8GET(p);
       +        p += U8Size;
       +
       +        if(p - buf != IEntrySize)
       +                sysfatal("unpackientry unpacked wrong amount");
       +}
       +
       +void
       +packientry(IEntry *ie, u8int *buf)
       +{
       +        u32int t32;
       +        u8int *p;
       +
       +        p = buf;
       +
       +        scorecp(p, ie->score);
       +        p += VtScoreSize;
       +        U32PUT(p, ie->wtime);
       +        p += U32Size;
       +        U16PUT(p, ie->train);
       +        p += U16Size;
       +        U64PUT(p, ie->ia.addr, t32);
       +        p += U64Size;
       +        U16PUT(p, ie->ia.size);
       +        p += U16Size;
       +        U8PUT(p, vttodisktype(ie->ia.type));
       +        p += U8Size;
       +        U8PUT(p, ie->ia.blocks);
       +        p += U8Size;
       +
       +        if(p - buf != IEntrySize)
       +                sysfatal("packientry packed wrong amount");
       +}
       +
       +void
       +unpackibucket(IBucket *b, u8int *buf, u32int magic)
       +{
       +        b->n = U16GET(buf);
       +        b->data = buf + IBucketSize;
       +        if(magic && magic != U32GET(buf+U16Size))
       +                b->n = 0;
       +}                
       +
       +void
       +packibucket(IBucket *b, u8int *buf, u32int magic)
       +{
       +        U16PUT(buf, b->n);
       +        U32PUT(buf+U16Size, magic);
       +}
       +
       +void
       +packbloomhead(Bloom *b, u8int *buf)
       +{
       +        u8int *p;
       +
       +        p = buf;
       +        U32PUT(p, BloomMagic);
       +        U32PUT(p+4, BloomVersion);
       +        U32PUT(p+8, b->nhash);
       +        U32PUT(p+12, b->size);
       +}
       +
       +int
       +unpackbloomhead(Bloom *b, u8int *buf)
       +{
       +        u8int *p;
       +        u32int m;
       +        char fbuf[20];
       +
       +        p = buf;
       +
       +        m = U32GET(p);
       +        if(m != BloomMagic){
       +                seterr(ECorrupt, "bloom filter has wrong magic number: %s expected BloomMagic (%lux)", fmtmagic(fbuf, m), (ulong)BloomMagic);
       +                return -1;
       +        }
       +        p += U32Size;
       +        
       +        m = U32GET(p);
       +        if(m != BloomVersion){
       +                seterr(ECorrupt, "bloom filter has wrong version %ud expected %ud", (uint)m, (uint)BloomVersion);
       +                return -1;
       +        }
       +        p += U32Size;
       +
       +        b->nhash = U32GET(p);
       +        p += U32Size;
       +
       +        b->size = U32GET(p);
       +        p += U32Size;
       +
       +        if(buf + BloomHeadSize != p)
       +                sysfatal("unpackarena unpacked wrong amount");
       +
       +        return 0;
       +}
   DIR diff --git a/src/cmd/venti/srv/dat.h b/src/cmd/venti/srv/dat.h
       t@@ -0,0 +1,718 @@
       +typedef struct Config                Config;
       +typedef struct AMap                AMap;
       +typedef struct AMapN                AMapN;
       +typedef struct Arena                Arena;
       +typedef struct AState        AState;
       +typedef struct ArenaHead        ArenaHead;
       +typedef struct ArenaPart        ArenaPart;
       +typedef struct ArenaTail        ArenaTail;
       +typedef struct ATailStats        ATailStats;
       +typedef struct CIBlock                CIBlock;
       +typedef struct Clump                Clump;
       +typedef struct ClumpInfo        ClumpInfo;
       +typedef struct Graph Graph;
       +typedef struct IAddr                IAddr;
       +typedef struct IBucket                IBucket;
       +typedef struct IEStream                IEStream;
       +typedef struct IEntry                IEntry;
       +typedef struct IFile                IFile;
       +typedef struct ISect                ISect;
       +typedef struct Index                Index;
       +typedef struct Lump                Lump;
       +typedef struct DBlock                DBlock;
       +typedef struct Part                Part;
       +typedef struct Statbin Statbin;
       +typedef struct Statdesc        Statdesc;
       +typedef struct Stats                Stats;
       +typedef struct ZBlock                ZBlock;
       +typedef struct Round        Round;
       +typedef struct Bloom        Bloom;
       +
       +#define TWID32        ((u32int)~(u32int)0)
       +#define TWID64        ((u64int)~(u64int)0)
       +#define        TWID8        ((u8int)~(u8int)0)
       +
       +enum
       +{
       +        ABlockLog                = 9,                /* log2(512), the quantum for reading arenas */
       +        ANameSize                = 64,
       +        MaxDiskBlock                = 64*1024,        /* max. allowed size for a disk block */
       +        MaxIoSize                = 64*1024,        /* max. allowed size for a disk io operation */
       +        PartBlank                = 256*1024,        /* untouched section at beginning of partition */
       +        HeadSize                = 512,                /* size of a header after PartBlank */
       +        MinArenaSize                = 1*1024*1024,        /* smallest reasonable arena size */
       +        IndexBase                = 1024*1024,        /* initial address to use in an index */
       +        MaxIo                        = 64*1024,        /* max size of a single read or write operation */
       +        ICacheBits                = 16,                /* default bits for indexing icache */
       +        ICacheDepth                = 4,                /* default depth of an icache hash chain */
       +        MaxAMap                        = 2*1024,        /* max. allowed arenas in an address mapping; must be < 32*1024 */
       +
       +        /*
       +         * return codes from syncarena
       +         */
       +        SyncDataErr        = 1 << 0,                /* problem reading the clump data */
       +        SyncCIErr        = 1 << 1,                /* found erroneous clump directory entries */
       +        SyncCIZero        = 1 << 2,                /* found unwritten clump directory entries */
       +        SyncFixErr        = 1 << 3,                /* error writing fixed data */
       +        SyncHeader        = 1 << 4,                /* altered header fields */
       +
       +        /*
       +         * error severity
       +         */
       +        EOk                        = 0,                /* error expected in normal operation */
       +        EStrange,                                /* strange error that should be logged */
       +        ECorrupt,                                /* corrupted data found in arenas */
       +        EICorrupt,                                /* corrupted data found in index */
       +        EAdmin,                                        /* should be brought to administrators' attention */
       +        ECrash,                                        /* really bad internal error */
       +        EBug,                                        /* a limitation which should be fixed */
       +        EInconsist,                                /* inconsistencies between index and arena */
       +        EMax,
       +
       +        /*
       +         * internal disk formats for the venti archival storage system
       +         */
       +        /*
       +         * magic numbers on disk
       +         */
       +        _ClumpMagic                = 0xd15cb10c,        /* clump header, deprecated */
       +        ClumpFreeMagic                = 0,                /* free clump; terminates active clump log */
       +
       +        ArenaPartMagic                = 0xa9e4a5e7,        /* arena partition header */
       +        ArenaMagic                = 0xf2a14ead,        /* arena trailer */
       +        ArenaHeadMagic                = 0xd15c4ead,        /* arena header */
       +        
       +        BloomMagic                = 0xb1004ead,        /* bloom filter header */
       +        BloomMaxHash        = 32,
       +
       +        ISectMagic                = 0xd15c5ec7,        /* index header */
       +
       +        ArenaPartVersion        = 3,
       +        ArenaVersion4                = 4,
       +        ArenaVersion5                = 5,
       +        BloomVersion                = 1,
       +        IndexVersion                = 1,
       +        ISectVersion1                = 1,
       +        ISectVersion2                = 2,
       +
       +        /*
       +         * encodings of clumps on disk
       +         */
       +        ClumpEErr                = 0,                /* can't happen */
       +        ClumpENone,                                /* plain */
       +        ClumpECompress,                                /* compressed */
       +        ClumpEMax,
       +
       +        /*
       +         * sizes in bytes on disk
       +         */
       +        U8Size                        = 1,
       +        U16Size                        = 2,
       +        U32Size                        = 4,
       +        U64Size                        = 8,
       +
       +        ArenaPartSize                = 4 * U32Size,
       +        ArenaSize4                = 2 * U64Size + 6 * U32Size + ANameSize + U8Size,
       +        ArenaSize5                        = ArenaSize4 + U32Size,
       +        ArenaHeadSize4                = U64Size + 3 * U32Size + ANameSize,
       +        ArenaHeadSize5                = ArenaHeadSize4 + U32Size,
       +        BloomHeadSize        = 4 * U32Size,
       +        ISectSize1                = 7 * U32Size + 2 * ANameSize,
       +        ISectSize2                = ISectSize1 + U32Size,
       +        ClumpInfoSize                = U8Size + 2 * U16Size + VtScoreSize,
       +        ClumpSize                = ClumpInfoSize + U8Size + 3 * U32Size,
       +        MaxBloomSize                = 1<<(32-3),        /* 2^32 bits */
       +        MaxBloomHash        = 32,                /* bits per score */
       +        /*
       +         * BUG - The various block copies that manipulate entry buckets
       +         * would be faster if we bumped IBucketSize up to 8 and IEntrySize up to 40,
       +         * so that everything is word-aligned.  Buildindex is actually cpu-bound
       +         * by the (byte at a time) copying in qsort.
       +         */
       +        IBucketSize                = U32Size + U16Size,
       +        IEntrySize                = U64Size + U32Size + 2*U16Size + 2*U8Size + VtScoreSize,
       +        IEntryTypeOff                = VtScoreSize + U64Size + U32Size + 2 * U16Size,
       +
       +        MaxClumpBlocks                =  (VtMaxLumpSize + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog,
       +
       +        /*
       +         * dirty flags - order controls disk write order
       +         */
       +        DirtyArena                = 1,
       +        DirtyArenaCib,
       +        DirtyArenaTrailer,
       +        DirtyMax,
       +
       +        VentiZZZZZZZZ
       +};
       +
       +extern char TraceDisk[];
       +extern char TraceLump[];
       +extern char TraceBlock[];
       +extern char TraceProc[];
       +extern char TraceWork[];
       +extern char TraceQuiet[];
       +extern char TraceRpc[];
       +
       +/*
       + * results of parsing and initializing a config file
       + */
       +struct Config
       +{
       +        char                *index;                        /* name of the index to initialize */
       +        int                naparts;                /* arena partitions initialized */
       +        ArenaPart        **aparts;
       +        int                nsects;                        /* index sections initialized */
       +        ISect                **sects;
       +        Bloom        *bloom;                /* bloom filter */
       +        u32int        bcmem;
       +        u32int        mem;
       +        u32int        icmem;
       +        int                queuewrites;
       +        char*        haddr;
       +        char*        vaddr;
       +        char*        webroot;
       +};
       +
       +/*
       + * a Part is the low level interface to files or disks.
       + * there are two main types of partitions
       + *        arena paritions, which some number of arenas, each in a sub-partition.
       + *        index partition, which only have one subpartition.
       + */
       +struct Part
       +{
       +        int                fd;                        /* rock for accessing the disk */
       +        int                mode;
       +        u64int                offset;
       +        u64int                size;                        /* size of the partiton */
       +        u32int                blocksize;                /* block size for reads and writes */
       +        u32int                fsblocksize;        /* minimum file system block size */
       +        char                *name;
       +        char                *filename;
       +        Channel                *writechan;                /* chan[dcache.nblock](DBlock*) */
       +};
       +
       +/*
       + * a cached block from the partition
       + * yuck -- most of this is internal structure for the cache
       + * all other routines should only use data
       + */
       +struct DBlock
       +{
       +        u8int        *data;
       +
       +        Part        *part;                        /* partition in which cached */
       +        u64int        addr;                        /* base address on the partition */
       +        u32int        size;                        /* amount of data available, not amount allocated; should go away */
       +        u32int        mode;
       +        u32int        dirty;
       +        u32int        dirtying;
       +        DBlock        *next;                        /* doubly linked hash chains */
       +        DBlock        *prev;
       +        u32int        heap;                        /* index in heap table */
       +        u32int        used;                        /* last reference times */
       +        u32int        used2;
       +        u32int        ref;                        /* reference count */
       +        RWLock        lock;                        /* for access to data only */
       +        Channel        *writedonechan;        
       +        void*        chanbuf[1];                /* buffer for the chan! */
       +};
       +
       +/*
       + * a cached block from the partition
       + * yuck -- most of this is internal structure for the cache
       + * all other routines should only use data
       + * double yuck -- this is mostly the same as a DBlock
       + */
       +struct Lump
       +{
       +        Packet        *data;
       +
       +        Part        *part;                        /* partition in which cached */
       +        u8int        score[VtScoreSize];        /* score of packet */
       +        u8int        type;                        /* type of packet */
       +        u32int        size;                        /* amount of data allocated to hold packet */
       +        Lump        *next;                        /* doubly linked hash chains */
       +        Lump        *prev;
       +        u32int        heap;                        /* index in heap table */
       +        u32int        used;                        /* last reference times */
       +        u32int        used2;
       +        u32int        ref;                        /* reference count */
       +        QLock        lock;                        /* for access to data only */
       +};
       +
       +/*
       + * mapping between names and address ranges
       + */
       +struct AMap
       +{
       +        u64int                start;
       +        u64int                stop;
       +        char                name[ANameSize];
       +};
       +
       +/*
       + * an AMap along with a length
       + */
       +struct AMapN
       +{
       +        int                n;
       +        AMap                *map;
       +};
       +
       +/*
       + * an ArenaPart is a partition made up of Arenas
       + * it exists because most os's don't support many partitions,
       + * and we want to have many different Arenas
       + */
       +struct ArenaPart
       +{
       +        Part                *part;
       +        u64int                size;                        /* size of underlying partition, rounded down to blocks */
       +        Arena                **arenas;
       +        u32int                tabbase;                /* base address of arena table on disk */
       +        u32int                tabsize;                /* max. bytes in arena table */
       +
       +        /*
       +         * fields stored on disk
       +         */
       +        u32int                version;
       +        u32int                blocksize;                /* "optimal" block size for reads and writes */
       +        u32int                arenabase;                /* base address of first arena */
       +
       +        /*
       +         * stored in the arena mapping table on disk
       +         */
       +        AMap                *map;
       +        int                narenas;
       +};
       +
       +/*
       + * info about one block in the clump info cache
       + */
       +struct CIBlock
       +{
       +        u32int                block;                        /* blocks in the directory */
       +        int                offset;                        /* offsets of one clump in the data */
       +        DBlock                *data;
       +};
       +
       +/*
       + * Statistics kept in the tail. 
       + */
       +struct ATailStats
       +{
       +        u32int                clumps;                /* number of clumps */
       +        u32int                cclumps;                /* number of compressed clumps */
       +        u64int                used;
       +        u64int                uncsize;
       +        u8int                sealed;
       +};
       +
       +/*
       + * Arena state - represents a point in the data log
       + */
       +struct AState
       +{
       +        Arena                *arena;
       +        u64int                aa;                        /* index address */
       +        ATailStats                stats;
       +};
       +
       +/*
       + * an Arena is a log of Clumps, preceeded by an ArenaHeader,
       + * and followed by a Arena, each in one disk block.
       + * struct on disk is not always up to date, but should be self-consistent.
       + * to sync after reboot, follow clumps starting at used until ClumpFreeMagic if found.
       + * <struct name="Arena" type="Arena *">
       + *        <field name="name" val="s->name" type="AName"/>
       + *        <field name="version" val="s->version" type="U32int"/>
       + *        <field name="partition" val="s->part->name" type="AName"/>
       + *        <field name="blocksize" val="s->blocksize" type="U32int"/>
       + *        <field name="start" val="s->base" type="U64int"/>
       + *        <field name="stop" val="s->base+2*s->blocksize" type="U64int"/>
       + *        <field name="created" val="s->ctime" type="U32int"/>
       + *        <field name="modified" val="s->wtime" type="U32int"/>
       + *        <field name="sealed" val="s->sealed" type="Sealed"/>
       + *        <field name="score" val="s->score" type="Score"/>
       + *        <field name="clumps" val="s->clumps" type="U32int"/>
       + *        <field name="compressedclumps" val="s->cclumps" type="U32int"/>
       + *        <field name="data" val="s->uncsize" type="U64int"/>
       + *        <field name="compresseddata" val="s->used - s->clumps * ClumpSize" type="U64int"/>
       + *        <field name="storage" val="s->used + s->clumps * ClumpInfoSize" type="U64int"/>
       + * </struct>
       + */
       +struct Arena
       +{
       +        QLock                lock;                        /* lock for arena fields, writing to disk */
       +        Part                *part;                        /* partition in which arena lives */
       +        int                blocksize;                /* size of block to read or write */
       +        u64int                base;                        /* base address on disk */
       +        u64int                size;                        /* total space in the arena */
       +        u64int                limit;                        /* storage limit for clumps */
       +        u8int                score[VtScoreSize];        /* score of the entire sealed & summed arena */
       +
       +        int                clumpmax;                /* ClumpInfos per block */
       +        AState                mem;
       +        int                inqueue;
       +        DigestState        sha1;
       +
       +        /*
       +         * fields stored on disk
       +         */
       +        u32int                version;
       +        char                name[ANameSize];        /* text label */
       +        ATailStats                memstats;
       +        ATailStats                diskstats;
       +        u32int                ctime;                        /* first time a block was written */
       +        u32int                wtime;                        /* last time a block was written */
       +        u32int                clumpmagic;
       +};
       +
       +/*
       + * redundant storage of some fields at the beginning of each arena
       + */
       +struct ArenaHead
       +{
       +        u32int                version;
       +        char                name[ANameSize];
       +        u32int                blocksize;
       +        u64int                size;
       +        u32int                clumpmagic;
       +};
       +
       +/*
       + * most interesting meta information for a clump.
       + * stored in each clump's header and in the Arena's directory,
       + * stored in reverse order just prior to the arena trailer
       + */
       +struct ClumpInfo
       +{
       +        u8int                type;
       +        u16int                size;                        /* size of disk data, not including header */
       +        u16int                uncsize;                /* size of uncompressed data */
       +        u8int                score[VtScoreSize];        /* score of the uncompressed data only */
       +};
       +
       +/*
       + * header for an immutable clump of data
       + */
       +struct Clump
       +{
       +        ClumpInfo        info;
       +        u8int                encoding;
       +        u32int                creator;                /* initial client which wrote the block */
       +        u32int                time;                        /* creation at gmt seconds since 1/1/1970 */
       +};
       +
       +/*
       + * index of all clumps according to their score
       + * this is just a wrapper to tie together the index sections
       + * <struct name="Index" type="Index *">
       + *        <field name="name" val="s->name" type="AName"/>
       + *        <field name="version" val="s->version" type="U32int"/>
       + *        <field name="blocksize" val="s->blocksize" type="U32int"/>
       + *        <field name="tabsize" val="s->tabsize" type="U32int"/>
       + *        <field name="buckets" val="s->buckets" type="U32int"/>
       + *        <field name="buckdiv" val="s->div" type="U32int"/>
       + *        <field name="bitblocks" val="s->div" type="U32int"/>
       + *        <field name="maxdepth" val="s->div" type="U32int"/>
       + *        <field name="bitkeylog" val="s->div" type="U32int"/>
       + *        <field name="bitkeymask" val="s->div" type="U32int"/>
       + *        <array name="sect" val="&s->smap[i]" elems="s->nsects" type="Amap"/>
       + *        <array name="amap" val="&s->amap[i]" elems="s->narenas" type="Amap"/>
       + *        <array name="arena" val="s->arenas[i]" elems="s->narenas" type="Arena"/>
       + * </struct>
       + * <struct name="Amap" type="AMap *">
       + *        <field name="name" val="s->name" type="AName"/>
       + *        <field name="start" val="s->start" type="U64int"/>
       + *        <field name="stop" val="s->stop" type="U64int"/>
       + * </struct>
       + */
       +struct Index
       +{
       +        u32int                div;                        /* divisor for mapping score to bucket */
       +        u32int                buckets;                /* last bucket used in disk hash table */
       +        u32int                blocksize;
       +        u32int                tabsize;                /* max. bytes in index config */
       +        u32int                bitblocks;        //XXX remove these fields
       +        u32int                maxdepth;
       +        u32int                bitkeylog;
       +        u32int                bitkeymask;
       +
       +        int                mapalloc;                /* first arena to check when adding a lump */
       +        Arena                **arenas;                /* arenas in the mapping */
       +        ISect                **sects;                /* sections which hold the buckets */
       +        Bloom                *bloom;        /* bloom filter */
       +
       +        /*
       +         * fields stored in config file 
       +         */
       +        u32int                version;
       +        char                name[ANameSize];        /* text label */
       +        int                nsects;
       +        AMap                *smap;                        /* mapping of buckets to index sections */
       +        int                narenas;
       +        AMap                *amap;                        /* mapping from index addesses to arenas */
       +};
       +
       +/*
       + * one part of the bucket storage for an index.
       + * the index blocks are sequentially allocated
       + * across all of the sections.
       + */
       +struct ISect
       +{
       +        Part                *part;
       +        int                blocklog;                /* log2(blocksize) */
       +        int                buckmax;                /* max. entries in a index bucket */
       +        u32int                tabbase;                /* base address of index config table on disk */
       +        u32int                tabsize;                /* max. bytes in index config */
       +        Channel        *writechan;
       +        Channel        *writedonechan;
       +
       +        /*
       +         * fields stored on disk
       +         */
       +        u32int                version;
       +        u32int                bucketmagic;
       +        char                name[ANameSize];        /* text label */
       +        char                index[ANameSize];        /* index owning the section */
       +        u32int                blocksize;                /* size of hash buckets in index */
       +        u32int                blockbase;                /* address of start of on disk index table */
       +        u32int                blocks;                        /* total blocks on disk; some may be unused */
       +        u32int                start;                        /* first bucket in this section */
       +        u32int                stop;                        /* limit of buckets in this section */
       +};
       +
       +/*
       + * externally interesting part of an IEntry
       + */
       +struct IAddr
       +{
       +        u64int                addr;
       +        u16int                size;                        /* uncompressed size */
       +        u8int                type;                        /* type of block */
       +        u8int                blocks;                        /* arena io quanta for Clump + data */
       +};
       +
       +/*
       + * entries in the index
       + * kept in IBuckets in the disk index table,
       + * cached in the memory ICache.
       + */
       +struct IEntry
       +{
       +        u8int                score[VtScoreSize];
       +        IEntry                *next;                        /* next in hash chain */
       +        IEntry                *nextdirty;                 /* next in dirty chain */
       +        u32int                wtime;                        /* last write time */
       +        u16int                train;                        /* relative train containing the most recent ref; 0 if no ref, 1 if in same car */
       +        u8int                rac;                        /* read ahead count */
       +        u8int                dirty;                /* is dirty */
       +        IAddr                ia;
       +};
       +
       +/*
       + * buckets in the on disk index table
       + */
       +struct IBucket
       +{
       +        u16int                n;                        /* number of active indices */
       +        u32int                buck;                /* used by buildindex/checkindex only */
       +        u8int                *data;
       +};
       +
       +/*
       + * temporary buffers used by individual threads
       + */
       +struct ZBlock
       +{
       +        u32int                len;
       +        u32int                _size;
       +        u8int                *data;
       +        u8int                *free;
       +};
       +
       +/*
       + * simple input buffer for a '\0' terminated text file
       + */
       +struct IFile
       +{
       +        char                *name;                                /* name of the file */
       +        ZBlock                *b;                                /* entire contents of file */
       +        u32int                pos;                                /* current position in the file */
       +};
       +
       +struct Statdesc
       +{
       +        char *name;
       +        ulong max;
       +};
       +
       +/* keep in sync with stats.c:/statdesc and httpd.c:/graphname*/
       +enum
       +{
       +        StatRpcTotal,
       +        StatRpcRead,
       +        StatRpcReadOk,
       +        StatRpcReadFail,
       +        StatRpcReadBytes,
       +        StatRpcReadTime,
       +        StatRpcReadCached,
       +        StatRpcReadCachedTime,
       +        StatRpcReadUncached,
       +        StatRpcReadUncachedTime,
       +        StatRpcWrite,
       +        StatRpcWriteNew,
       +        StatRpcWriteOld,
       +        StatRpcWriteFail,
       +        StatRpcWriteBytes,
       +        StatRpcWriteTime,
       +        StatRpcWriteNewTime,
       +        StatRpcWriteOldTime,
       +
       +        StatLcacheHit,
       +        StatLcacheMiss,
       +        StatLcacheRead,
       +        StatLcacheWrite,
       +        StatLcacheSize,
       +        StatLcacheStall,
       +        StatLcacheReadTime,
       +
       +        StatDcacheHit,
       +        StatDcacheMiss,
       +        StatDcacheLookup,
       +        StatDcacheRead,
       +        StatDcacheWrite,
       +        StatDcacheDirty,
       +        StatDcacheSize,
       +        StatDcacheFlush,
       +        StatDcacheStall,
       +        StatDcacheLookupTime,
       +
       +        StatDblockStall,
       +        StatLumpStall,
       +
       +        StatIcacheHit,
       +        StatIcacheMiss,
       +        StatIcacheRead,
       +        StatIcacheWrite,
       +        StatIcacheFill,
       +        StatIcachePrefetch,
       +        StatIcacheDirty,
       +        StatIcacheSize,
       +        StatIcacheFlush,
       +        StatIcacheStall,
       +        StatIcacheReadTime,
       +
       +        StatBloomHit,
       +        StatBloomMiss,
       +        StatBloomFalseMiss,
       +        StatBloomLookup,
       +        StatBloomOnes,
       +        StatBloomBits,
       +        StatBloomLookupTime,
       +
       +        StatApartRead,
       +        StatApartReadBytes,
       +        StatApartWrite,
       +        StatApartWriteBytes,
       +
       +        StatIsectRead,
       +        StatIsectReadBytes,
       +        StatIsectWrite,
       +        StatIsectWriteBytes,
       +
       +        StatSumRead,
       +        StatSumReadBytes,
       +
       +        NStat
       +};
       +
       +extern Statdesc statdesc[NStat];
       +
       +/*
       + * statistics about the operation of the server
       + * mainly for performance monitoring and profiling.
       + */
       +struct Stats
       +{
       +        ulong                now;
       +        ulong                n[NStat];
       +};
       +
       +struct Statbin
       +{
       +        uint nsamp;
       +        uint min;
       +        uint max;
       +        uint avg;
       +};
       +
       +struct Graph
       +{
       +        long (*fn)(Stats*, Stats*, void*);
       +        void *arg;
       +        long t0;
       +        long t1;
       +        long min;
       +        long max;
       +        long wid;
       +        long ht;
       +        int fill;
       +};
       +
       +/*
       + * for kicking background processes that run one round after another after another
       + */
       +struct Round
       +{
       +        QLock        lock;
       +        Rendez        start;
       +        Rendez        finish;
       +        Rendez        delaywait;
       +        int                delaytime;
       +        int                delaykick;
       +        char*        name;
       +        int                last;
       +        int                current;
       +        int                next;
       +        int                doanother;
       +};
       +
       +/*
       + * Bloom filter of stored block hashes
       + */
       +struct Bloom
       +{
       +        RWLock lk;                /* protects nhash, nbits, tab, mb */
       +        QLock mod;                /* one marker at a time, protects nb */
       +        int nhash;
       +        ulong size;                /* bytes in tab */
       +        ulong mask;                /* to produce index */
       +        u8int *data;
       +        Part *part;
       +        Channel *writechan;
       +        Channel *writedonechan;
       +};
       +
       +extern        Index                *mainindex;
       +extern        u32int                maxblocksize;                /* max. block size used by any partition */
       +extern        int                paranoid;                /* should verify hashes on disk read */
       +extern        int                queuewrites;                /* put all lump writes on a queue and finish later */
       +extern        int                readonly;                /* only allowed to read the disk data */
       +extern        Stats                stats;
       +extern        u8int                zeroscore[VtScoreSize];
       +extern        int                compressblocks;
       +extern        int                writestodevnull;        /* dangerous - for performance debugging */
       +extern        int                collectstats;
       +extern        QLock        memdrawlock;
       +extern        int                icachesleeptime;
       +extern        int                arenasumsleeptime;
       +
       +#ifndef PLAN9PORT
       +#pragma varargck type "V" uchar*
       +#define ODIRECT 0
       +#endif
   DIR diff --git a/src/cmd/venti/srv/dcache.c b/src/cmd/venti/srv/dcache.c
       t@@ -0,0 +1,816 @@
       +/*
       + * Disk cache.
       + * 
       + * Caches raw disk blocks.  Getdblock() gets a block, putdblock puts it back.
       + * Getdblock has a mode parameter that determines i/o and access to a block:
       + * if mode is OREAD or ORDWR, it is read from disk if not already in memory.
       + * If mode is ORDWR or OWRITE, it is locked for exclusive use before being returned.
       + * It is *not* marked dirty -- once changes have been made, they should be noted
       + * by using dirtydblock() before putdblock().  
       + *
       + * There is a global cache lock as well as a lock on each block. 
       + * Within a thread, the cache lock can be acquired while holding a block lock,
       + * but not vice versa; and a block cannot be locked if you already hold the lock
       + * on another block.
       + * 
       + * The flush proc writes out dirty blocks in batches, one batch per dirty tag.
       + * For example, the DirtyArena blocks are all written to disk before any of the
       + * DirtyArenaCib blocks.
       + *
       + * This code used to be in charge of flushing the dirty index blocks out to 
       + * disk, but updating the index turned out to benefit from extra care.
       + * Now cached index blocks are never marked dirty.  The index.c code takes
       + * care of updating them behind our back, and uses _getdblock to update any
       + * cached copies of the blocks as it changes them on disk.
       + */
       +
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +typedef struct DCache        DCache;
       +
       +enum
       +{
       +        HashLog                = 9,
       +        HashSize        = 1<<HashLog,
       +        HashMask        = HashSize - 1,
       +};
       +
       +struct DCache
       +{
       +        QLock                lock;
       +        RWLock                dirtylock;                /* must be held to inspect or set b->dirty */
       +        Rendez                full;
       +        Round                round;
       +        DBlock                *free;                        /* list of available lumps */
       +        u32int                now;                        /* ticks for usage timestamps */
       +        int                size;                        /* max. size of any block; allocated to each block */
       +        DBlock                **heads;                /* hash table for finding address */
       +        int                nheap;                        /* number of available victims */
       +        DBlock                **heap;                        /* heap for locating victims */
       +        int                nblocks;                /* number of blocks allocated */
       +        DBlock                *blocks;                /* array of block descriptors */
       +        DBlock                **write;                /* array of block pointers to be written */
       +        u8int                *mem;                        /* memory for all block descriptors */
       +        int                ndirty;                        /* number of dirty blocks */
       +        int                maxdirty;                /* max. number of dirty blocks */
       +        Channel        *ra;
       +        u8int                *rabuf;
       +        u32int                ramax;
       +        u32int                rasize;
       +        u64int                raaddr;
       +        Part                *rapart;
       +
       +        AState        diskstate;
       +        AState        state;
       +};
       +
       +typedef struct Ra Ra;
       +struct Ra
       +{
       +        Part *part;
       +        u64int addr;
       +};
       +
       +static DCache        dcache;
       +
       +static int        downheap(int i, DBlock *b);
       +static int        upheap(int i, DBlock *b);
       +static DBlock        *bumpdblock(void);
       +static void        delheap(DBlock *db);
       +static void        fixheap(int i, DBlock *b);
       +static void        flushproc(void*);
       +static void        writeproc(void*);
       +static void raproc(void*);
       +
       +void
       +initdcache(u32int mem)
       +{
       +        DBlock *b, *last;
       +        u32int nblocks, blocksize;
       +        int i;
       +        u8int *p;
       +
       +        if(mem < maxblocksize * 2)
       +                sysfatal("need at least %d bytes for the disk cache", maxblocksize * 2);
       +        if(maxblocksize == 0)
       +                sysfatal("no max. block size given for disk cache");
       +        blocksize = maxblocksize;
       +        nblocks = mem / blocksize;
       +        dcache.full.l = &dcache.lock;
       +        dcache.nblocks = nblocks;
       +        dcache.maxdirty = (nblocks * 2) / 3;
       +        trace(TraceProc, "initialize disk cache with %d blocks of %d bytes, maximum %d dirty blocks\n",
       +                        nblocks, blocksize, dcache.maxdirty);
       +        dcache.size = blocksize;
       +        dcache.heads = MKNZ(DBlock*, HashSize);
       +        dcache.heap = MKNZ(DBlock*, nblocks);
       +        dcache.blocks = MKNZ(DBlock, nblocks);
       +        dcache.write = MKNZ(DBlock*, nblocks);
       +        dcache.mem = MKNZ(u8int, (nblocks+1+128) * blocksize);
       +        dcache.ra = chancreate(sizeof(Ra), 0);
       +
       +        last = nil;
       +        p = (u8int*)(((ulong)dcache.mem+blocksize-1)&~(ulong)(blocksize-1));
       +        for(i = 0; i < nblocks; i++){
       +                b = &dcache.blocks[i];
       +                b->data = &p[i * blocksize];
       +                b->heap = TWID32;
       +                b->writedonechan = chancreate(sizeof(void*), 1);
       +                b->next = last;
       +                last = b;
       +        }
       +        dcache.rabuf = &p[i*blocksize];
       +        dcache.ramax = 128*blocksize;
       +        dcache.raaddr = 0;
       +        dcache.rapart = nil;
       +
       +        dcache.free = last;
       +        dcache.nheap = 0;
       +        setstat(StatDcacheSize, nblocks);
       +        initround(&dcache.round, "dcache", 120*1000);
       +
       +        vtproc(flushproc, nil);
       +        vtproc(delaykickroundproc, &dcache.round);
       +        vtproc(raproc, nil);
       +}
       +
       +void
       +setdcachestate(AState *a)
       +{
       +        trace(TraceBlock, "setdcachestate %s 0x%llux clumps %d", a->arena ? a->arena->name : nil, a->aa, a->stats.clumps);
       +        qlock(&dcache.lock);
       +        dcache.state = *a;
       +        qunlock(&dcache.lock);
       +}
       +
       +AState
       +diskstate(void)
       +{
       +        AState a;
       +
       +        qlock(&dcache.lock);
       +        a = dcache.diskstate;
       +        qunlock(&dcache.lock);
       +        return a;
       +}
       +
       +static void
       +raproc(void *v)
       +{
       +        Ra ra;
       +        DBlock *b;
       +
       +        USED(v);
       +        while(recv(dcache.ra, &ra) == 1){
       +                if(ra.part->size <= ra.addr)
       +                        continue;
       +                b = _getdblock(ra.part, ra.addr, OREAD, 2);
       +                putdblock(b);
       +        }
       +}        
       +
       +void
       +dreadahead(Part *part, u64int addr, int miss)
       +{
       +        Ra ra;
       +        static struct {
       +                Part *part;
       +                u64int addr;
       +        } lastmiss;
       +        static struct {
       +                Part *part;
       +                u64int addr;
       +                int dir;
       +        } lastra;
       +
       +return;
       +        if(miss){
       +                if(lastmiss.part==part && lastmiss.addr==addr-dcache.size){
       +                XRa:
       +                        lastra.part = part;
       +                        lastra.dir = addr-lastmiss.addr;
       +                        lastra.addr = addr+lastra.dir;
       +                        ra.part = part;
       +                        ra.addr = lastra.addr;
       +                        nbsend(dcache.ra, &ra);
       +                }else if(lastmiss.part==part && lastmiss.addr==addr+dcache.size){
       +                        addr -= dcache.size;
       +                        goto XRa;
       +                }
       +        }else{
       +                if(lastra.part==part && lastra.addr==addr){
       +                        lastra.addr += lastra.dir;
       +                        ra.part = part;
       +                        ra.addr = lastra.addr;
       +                        nbsend(dcache.ra, &ra);
       +                }
       +        }
       +
       +        if(miss){
       +                lastmiss.part = part;
       +                lastmiss.addr = addr;
       +        }
       +
       +//        fprint(2, "%s %llx %s\n", part->name, addr, miss ? "miss" : "hit");
       +}
       +
       +int
       +rareadpart(Part *part, u64int addr, u8int *buf, uint n, int load)
       +{
       +        uint nn;
       +        static RWLock ralock;
       +
       +        rlock(&ralock);
       +        if(dcache.rapart==part && dcache.raaddr <= addr && addr+n <= dcache.raaddr+dcache.rasize){
       +                memmove(buf, dcache.rabuf+(addr-dcache.raaddr), n);
       +                runlock(&ralock);
       +                return 0;
       +        }
       +        if(load != 2 || addr >= part->size){        /* addr >= part->size: let readpart do the error */        
       +                runlock(&ralock);
       +                return readpart(part, addr, buf, n);
       +        }
       +
       +        runlock(&ralock);
       +        wlock(&ralock);
       +fprint(2, "raread %s %llx\n", part->name, addr);
       +        nn = dcache.ramax;
       +        if(addr+nn > part->size)
       +                nn = part->size - addr;
       +        if(readpart(part, addr, dcache.rabuf, nn) < 0){
       +                wunlock(&ralock);
       +                return -1;
       +        }
       +        memmove(buf, dcache.rabuf, n);        
       +        dcache.rapart = part;
       +        dcache.rasize = nn;
       +        dcache.raaddr = addr;
       +        wunlock(&ralock);
       +
       +        addstat(StatApartReadBytes, nn-n);
       +        return 0;
       +}
       +
       +static u32int
       +pbhash(u64int addr)
       +{
       +        u32int h;
       +
       +#define hashit(c)        ((((c) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
       +        h = (addr >> 32) ^ addr;
       +        return hashit(h);
       +}
       +
       +DBlock*
       +getdblock(Part *part, u64int addr, int mode)
       +{
       +        DBlock *b;
       +        uint ms;
       +        
       +        ms = msec();
       +        b = _getdblock(part, addr, mode, 1);
       +        if(mode == OREAD || mode == ORDWR)
       +                addstat(StatDcacheRead, 1);
       +        if(mode == OWRITE || mode == ORDWR)
       +                addstat(StatDcacheWrite, 1);
       +        ms = msec() - ms;
       +        addstat2(StatDcacheLookup, 1, StatDcacheLookupTime, ms);
       +        return b;
       +}
       +
       +DBlock*
       +_getdblock(Part *part, u64int addr, int mode, int load)
       +{
       +        DBlock *b;
       +        u32int h, size;
       +
       +        trace(TraceBlock, "getdblock enter %s 0x%llux", part->name, addr);
       +        size = part->blocksize;
       +        if(size > dcache.size){
       +                seterr(EAdmin, "block size %d too big for cache with size %d", size, dcache.size);
       +                return nil;
       +        }
       +        h = pbhash(addr);
       +
       +        /*
       +         * look for the block in the cache
       +         */
       +//checkdcache();
       +        qlock(&dcache.lock);
       +again:
       +        for(b = dcache.heads[h]; b != nil; b = b->next){
       +                if(b->part == part && b->addr == addr){
       +                        /*
       +                        qlock(&stats.lock);
       +                        stats.pchit++;
       +                        qunlock(&stats.lock);
       +                        */
       +                        if(load){
       +                                addstat(StatDcacheHit, 1);
       +                                if(load != 2 && mode != OWRITE)
       +                                        dreadahead(part, b->addr, 0);
       +                        }
       +                        goto found;
       +                }
       +        }
       +
       +        /*
       +         * missed: locate the block with the oldest second to last use.
       +         * remove it from the heap, and fix up the heap.
       +         */
       +        if(!load){
       +                qunlock(&dcache.lock);
       +                return nil;
       +        }
       +
       +        addstat(StatDcacheMiss, 1);
       +
       +        b = bumpdblock();
       +        if(b == nil){
       +                trace(TraceBlock, "all disk cache blocks in use");
       +                addstat(StatDcacheStall, 1);
       +                rsleep(&dcache.full);
       +                addstat(StatDcacheStall, -1);
       +                goto again;
       +        }
       +
       +        assert(!b->dirty);
       +
       +        /*
       +         * the new block has no last use, so assume it happens sometime in the middle
       +ZZZ this is not reasonable
       +         */
       +        b->used = (b->used2 + dcache.now) / 2;
       +
       +        /*
       +         * rechain the block on the correct hash chain
       +         */
       +        b->next = dcache.heads[h];
       +        dcache.heads[h] = b;
       +        if(b->next != nil)
       +                b->next->prev = b;
       +        b->prev = nil;
       +
       +        b->addr = addr;
       +        b->part = part;
       +        b->size = 0;
       +        if(load != 2 && mode != OWRITE)
       +                dreadahead(part, b->addr, 1);
       +
       +found:
       +        b->ref++;
       +        b->used2 = b->used;
       +        b->used = dcache.now++;
       +        if(b->heap != TWID32)
       +                fixheap(b->heap, b);
       +
       +        qunlock(&dcache.lock);
       +//checkdcache();
       +
       +        trace(TraceBlock, "getdblock lock");
       +        addstat(StatDblockStall, 1);
       +        if(mode == OREAD)
       +                rlock(&b->lock);
       +        else
       +                wlock(&b->lock);
       +        addstat(StatDblockStall, -1);
       +        trace(TraceBlock, "getdblock locked");
       +
       +        if(b->size != size){
       +                if(mode == OREAD){
       +                        addstat(StatDblockStall, 1);
       +                        runlock(&b->lock);
       +                        wlock(&b->lock);
       +                        addstat(StatDblockStall, -1);
       +                }
       +                if(b->size < size){
       +                        if(mode == OWRITE)
       +                                memset(&b->data[b->size], 0, size - b->size);
       +                        else{
       +                                trace(TraceBlock, "getdblock readpart %s 0x%llux", part->name, addr);
       +                                if(rareadpart(part, addr + b->size, &b->data[b->size], size - b->size, load) < 0){
       +                                        b->mode = ORDWR;        /* so putdblock wunlocks */
       +                                        putdblock(b);
       +                                        return nil;
       +                                }
       +                                trace(TraceBlock, "getdblock readpartdone");
       +                                addstat(StatApartRead, 1);
       +                                addstat(StatApartReadBytes, size-b->size);
       +                        }
       +                }
       +                b->size = size;
       +                if(mode == OREAD){
       +                        addstat(StatDblockStall, 1);
       +                        wunlock(&b->lock);
       +                        rlock(&b->lock);
       +                        addstat(StatDblockStall, -1);
       +                }
       +        }
       +
       +        b->mode = mode;
       +        trace(TraceBlock, "getdblock exit");
       +        return b;
       +}
       +
       +void
       +putdblock(DBlock *b)
       +{
       +        if(b == nil)
       +                return;
       +
       +        trace(TraceBlock, "putdblock %s 0x%llux", b->part->name, b->addr);
       +
       +        if(b->mode == OREAD)
       +                runlock(&b->lock);
       +        else
       +                wunlock(&b->lock);
       +
       +//checkdcache();
       +        qlock(&dcache.lock);
       +        if(--b->ref == 0 && !b->dirty){
       +                if(b->heap == TWID32)
       +                        upheap(dcache.nheap++, b);
       +                rwakeupall(&dcache.full);
       +        }
       +        qunlock(&dcache.lock);
       +//checkdcache();
       +}
       +
       +void
       +dirtydblock(DBlock *b, int dirty)
       +{
       +        int odirty;
       +        Part *p;
       +
       +
       +        trace(TraceBlock, "dirtydblock enter %s 0x%llux %d from 0x%lux", b->part->name, b->addr, dirty, getcallerpc(&b));
       +        assert(b->ref != 0);
       +        assert(b->mode==ORDWR || b->mode==OWRITE);
       +
       +        odirty = b->dirty;
       +        if(b->dirty)
       +                assert(b->dirty == dirty);
       +        else
       +                b->dirty = dirty;
       +
       +        p = b->part;
       +        if(p->writechan == nil){
       +                trace(TraceBlock, "dirtydblock allocwriteproc %s", p->name);
       +                /* XXX hope this doesn't fail! */
       +                p->writechan = chancreate(sizeof(DBlock*), dcache.nblocks);
       +                vtproc(writeproc, p);
       +        }
       +        qlock(&dcache.lock);
       +        if(!odirty){
       +                dcache.ndirty++;
       +                setstat(StatDcacheDirty, dcache.ndirty);
       +                if(dcache.ndirty >= dcache.maxdirty)
       +                        kickround(&dcache.round, 0);
       +                else
       +                        delaykickround(&dcache.round);
       +        }
       +        qunlock(&dcache.lock);
       +}
       +
       +/*
       + * remove some block from use and update the free list and counters
       + */
       +static DBlock*
       +bumpdblock(void)
       +{
       +        DBlock *b;
       +        ulong h;
       +
       +        trace(TraceBlock, "bumpdblock enter");
       +        b = dcache.free;
       +        if(b != nil){
       +                dcache.free = b->next;
       +                return b;
       +        }
       +
       +        if(dcache.ndirty >= dcache.maxdirty)
       +                kickdcache();
       +
       +        /*
       +         * remove blocks until we find one that is unused
       +         * referenced blocks are left in the heap even though
       +         * they can't be scavenged; this is simple a speed optimization
       +         */
       +        for(;;){
       +                if(dcache.nheap == 0){
       +                        kickdcache();
       +                        trace(TraceBlock, "bumpdblock gotnothing");
       +                        return nil;
       +                }
       +                b = dcache.heap[0];
       +                delheap(b);
       +                if(!b->ref && !b->dirty)
       +                        break;
       +        }
       +
       +        trace(TraceBlock, "bumpdblock bumping %s 0x%llux", b->part->name, b->addr);
       +
       +        /*
       +         * unchain the block
       +         */
       +        if(b->prev == nil){
       +                h = pbhash(b->addr);
       +                if(dcache.heads[h] != b)
       +                        sysfatal("bad hash chains in disk cache");
       +                dcache.heads[h] = b->next;
       +        }else
       +                b->prev->next = b->next;
       +        if(b->next != nil)
       +                b->next->prev = b->prev;
       +
       +        return b;
       +}
       +
       +/*
       + * delete an arbitrary block from the heap
       + */
       +static void
       +delheap(DBlock *db)
       +{
       +        if(db->heap == TWID32)
       +                return;
       +        fixheap(db->heap, dcache.heap[--dcache.nheap]);
       +        db->heap = TWID32;
       +}
       +
       +/*
       + * push an element up or down to it's correct new location
       + */
       +static void
       +fixheap(int i, DBlock *b)
       +{
       +        if(upheap(i, b) == i)
       +                downheap(i, b);
       +}
       +
       +static int
       +upheap(int i, DBlock *b)
       +{
       +        DBlock *bb;
       +        u32int now;
       +        int p;
       +
       +        now = dcache.now;
       +        for(; i != 0; i = p){
       +                p = (i - 1) >> 1;
       +                bb = dcache.heap[p];
       +                if(b->used2 - now >= bb->used2 - now)
       +                        break;
       +                dcache.heap[i] = bb;
       +                bb->heap = i;
       +        }
       +
       +        dcache.heap[i] = b;
       +        b->heap = i;
       +        return i;
       +}
       +
       +static int
       +downheap(int i, DBlock *b)
       +{
       +        DBlock *bb;
       +        u32int now;
       +        int k;
       +
       +        now = dcache.now;
       +        for(; ; i = k){
       +                k = (i << 1) + 1;
       +                if(k >= dcache.nheap)
       +                        break;
       +                if(k + 1 < dcache.nheap && dcache.heap[k]->used2 - now > dcache.heap[k + 1]->used2 - now)
       +                        k++;
       +                bb = dcache.heap[k];
       +                if(b->used2 - now <= bb->used2 - now)
       +                        break;
       +                dcache.heap[i] = bb;
       +                bb->heap = i;
       +        }
       +
       +        dcache.heap[i] = b;
       +        b->heap = i;
       +        return i;
       +}
       +
       +static void
       +findblock(DBlock *bb)
       +{
       +        DBlock *b, *last;
       +        int h;
       +
       +        last = nil;
       +        h = pbhash(bb->addr);
       +        for(b = dcache.heads[h]; b != nil; b = b->next){
       +                if(last != b->prev)
       +                        sysfatal("bad prev link");
       +                if(b == bb)
       +                        return;
       +                last = b;
       +        }
       +        sysfatal("block missing from hash table");
       +}
       +
       +void
       +checkdcache(void)
       +{
       +        DBlock *b;
       +        u32int size, now;
       +        int i, k, refed, nfree;
       +
       +        qlock(&dcache.lock);
       +        size = dcache.size;
       +        now = dcache.now;
       +        for(i = 0; i < dcache.nheap; i++){
       +                if(dcache.heap[i]->heap != i)
       +                        sysfatal("dc: mis-heaped at %d: %d", i, dcache.heap[i]->heap);
       +                if(i > 0 && dcache.heap[(i - 1) >> 1]->used2 - now > dcache.heap[i]->used2 - now)
       +                        sysfatal("dc: bad heap ordering");
       +                k = (i << 1) + 1;
       +                if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
       +                        sysfatal("dc: bad heap ordering");
       +                k++;
       +                if(k < dcache.nheap && dcache.heap[i]->used2 - now > dcache.heap[k]->used2 - now)
       +                        sysfatal("dc: bad heap ordering");
       +        }
       +
       +        refed = 0;
       +        for(i = 0; i < dcache.nblocks; i++){
       +                b = &dcache.blocks[i];
       +                if(b->data != &dcache.mem[i * size])
       +                        sysfatal("dc: mis-blocked at %d", i);
       +                if(b->ref && b->heap == TWID32)
       +                        refed++;
       +                if(b->addr)
       +                        findblock(b);
       +                if(b->heap != TWID32
       +                && dcache.heap[b->heap] != b)
       +                        sysfatal("dc: spurious heap value");
       +        }
       +
       +        nfree = 0;
       +        for(b = dcache.free; b != nil; b = b->next){
       +                if(b->addr != 0 || b->heap != TWID32)
       +                        sysfatal("dc: bad free list");
       +                nfree++;
       +        }
       +
       +        if(dcache.nheap + nfree + refed != dcache.nblocks)
       +                sysfatal("dc: missing blocks: %d %d %d", dcache.nheap, refed, dcache.nblocks);
       +        qunlock(&dcache.lock);
       +}
       +
       +void
       +flushdcache(void)
       +{
       +        trace(TraceProc, "flushdcache enter");
       +        kickround(&dcache.round, 1);
       +        trace(TraceProc, "flushdcache exit");
       +}
       +
       +void
       +kickdcache(void)
       +{
       +        kickround(&dcache.round, 0);
       +}
       +
       +static int
       +parallelwrites(DBlock **b, DBlock **eb, int dirty)
       +{
       +        DBlock **p, **q;
       +        for(p=b; p<eb && (*p)->dirty == dirty; p++){
       +                assert(b<=p && p<eb);
       +                sendp((*p)->part->writechan, *p);
       +        }
       +        q = p;
       +        for(p=b; p<q; p++){
       +                assert(b<=p && p<eb);
       +                recvp((*p)->writedonechan);
       +        }
       +
       +        return p-b;
       +}
       +
       +/*
       + * Sort first by dirty flag, then by partition, then by address in partition.
       + */
       +static int
       +writeblockcmp(const void *va, const void *vb)
       +{
       +        DBlock *a, *b;
       +
       +        a = *(DBlock**)va;
       +        b = *(DBlock**)vb;
       +
       +        if(a->dirty != b->dirty)
       +                return a->dirty - b->dirty;
       +        if(a->part != b->part){
       +                if(a->part < b->part)
       +                        return -1;
       +                if(a->part > b->part)
       +                        return 1;
       +        }
       +        if(a->addr < b->addr)
       +                return -1;
       +        return 1;
       +}
       +
       +static void
       +flushproc(void *v)
       +{
       +        int i, j, n;
       +        ulong t0;
       +        DBlock *b, **write;
       +        AState as;
       +
       +        USED(v);
       +        threadsetname("flushproc");
       +        for(;;){
       +                waitforkick(&dcache.round);
       +
       +                trace(TraceWork, "start");
       +                qlock(&dcache.lock);
       +                as = dcache.state;
       +                qunlock(&dcache.lock);
       +
       +                t0 = nsec()/1000;
       +
       +                trace(TraceProc, "build t=%lud", (ulong)(nsec()/1000)-t0);
       +                write = dcache.write;
       +                n = 0;
       +                for(i=0; i<dcache.nblocks; i++){
       +                        b = &dcache.blocks[i];
       +                        if(b->dirty)
       +                                write[n++] = b;
       +                }
       +
       +                qsort(write, n, sizeof(write[0]), writeblockcmp);
       +
       +                /* Write each stage of blocks out. */
       +                trace(TraceProc, "writeblocks t=%lud", (ulong)(nsec()/1000)-t0);
       +                i = 0;
       +                for(j=1; j<DirtyMax; j++){
       +                        trace(TraceProc, "writeblocks.%d t=%lud", j, (ulong)(nsec()/1000)-t0);
       +                        i += parallelwrites(write+i, write+n, j);
       +                }
       +                if(i != n){
       +                        fprint(2, "in flushproc i=%d n=%d\n", i, n);
       +                        for(i=0; i<n; i++)
       +                                fprint(2, "\tblock %d: dirty=%d\n", i, write[i]->dirty);
       +                        abort();
       +                }
       +
       +/* XXX
       +* the locking here is suspect.  what if a block is redirtied
       +* after the write happens?  we'll still decrement dcache.ndirty here.
       +*/
       +                trace(TraceProc, "undirty.%d t=%lud", j, (ulong)(nsec()/1000)-t0);
       +                qlock(&dcache.lock);
       +                dcache.diskstate = as;
       +                for(i=0; i<n; i++){
       +                        b = write[i];
       +                        --dcache.ndirty;
       +                        if(b->ref == 0 && b->heap == TWID32){
       +                                upheap(dcache.nheap++, b);
       +                                rwakeupall(&dcache.full);
       +                        }
       +                }
       +                setstat(StatDcacheDirty, dcache.ndirty);
       +                qunlock(&dcache.lock);
       +                addstat(StatDcacheFlush, 1);
       +                trace(TraceWork, "finish");
       +        }
       +}
       +
       +static void
       +writeproc(void *v)
       +{
       +        DBlock *b;
       +        Part *p;
       +
       +        p = v;
       +
       +        threadsetname("writeproc:%s", p->name);
       +        for(;;){
       +                b = recvp(p->writechan);
       +                trace(TraceWork, "start");
       +                assert(b->part == p);
       +                trace(TraceProc, "wlock %s 0x%llux", p->name, b->addr);
       +                wlock(&b->lock);
       +                trace(TraceProc, "writepart %s 0x%llux", p->name, b->addr);
       +                if(writepart(p, b->addr, b->data, b->size) < 0)
       +                        fprint(2, "write error: %r\n"); /* XXX details! */
       +                addstat(StatApartWrite, 1);
       +                addstat(StatApartWriteBytes, b->size);
       +                b->dirty = 0;
       +                wunlock(&b->lock);
       +                trace(TraceProc, "finish %s 0x%llux", p->name, b->addr);
       +                trace(TraceWork, "finish");
       +                sendp(b->writedonechan, b);
       +        }
       +}
   DIR diff --git a/src/cmd/venti/srv/dump.c b/src/cmd/venti/srv/dump.c
       t@@ -0,0 +1,47 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +void
       +printindex(int fd, Index *ix)
       +{
       +        int i;
       +
       +        fprint(fd, "index=%s version=%d blocksize=%d tabsize=%d\n",
       +                ix->name, ix->version, ix->blocksize, ix->tabsize);
       +        fprint(fd, "\tbuckets=%d div=%d\n", ix->buckets, ix->div);
       +        for(i = 0; i < ix->nsects; i++)
       +                fprint(fd, "\tsect=%s for buckets [%lld,%lld)\n", ix->smap[i].name, ix->smap[i].start, ix->smap[i].stop);
       +        for(i = 0; i < ix->narenas; i++)
       +                fprint(fd, "\tarena=%s at [%lld,%lld)\n", ix->amap[i].name, ix->amap[i].start, ix->amap[i].stop);
       +}
       +
       +void
       +printarenapart(int fd, ArenaPart *ap)
       +{
       +        int i;
       +
       +        fprint(fd, "arena partition=%s\n\tversion=%d blocksize=%d arenas=%d\n\tsetbase=%d setsize=%d\n",
       +                ap->part->name, ap->version, ap->blocksize, ap->narenas, ap->tabbase, ap->tabsize);
       +        for(i = 0; i < ap->narenas; i++)
       +                fprint(fd, "\tarena=%s at [%lld,%lld)\n", ap->map[i].name, ap->map[i].start, ap->map[i].stop);
       +}
       +
       +void
       +printarena(int fd, Arena *arena)
       +{
       +        fprint(fd, "arena='%s' [%lld,%lld)\n\tversion=%d created=%d modified=%d",
       +                arena->name, arena->base, arena->base + arena->size + 2 * arena->blocksize,
       +                arena->version, arena->ctime, arena->wtime);
       +        if(arena->memstats.sealed)
       +                fprint(2, " sealed\n");
       +        else
       +                fprint(2, "\n");
       +        if(scorecmp(zeroscore, arena->score) != 0)
       +                fprint(2, "\tscore=%V\n", arena->score);
       +
       +        fprint(fd, "\tclumps=%,d compressed clumps=%,d data=%,lld compressed data=%,lld disk storage=%,lld\n",
       +                arena->memstats.clumps, arena->memstats.cclumps, arena->memstats.uncsize,
       +                arena->memstats.used - arena->memstats.clumps * ClumpSize,
       +                arena->memstats.used + arena->memstats.clumps * ClumpInfoSize);
       +}
   DIR diff --git a/src/cmd/venti/srv/findscore.c b/src/cmd/venti/srv/findscore.c
       t@@ -0,0 +1,121 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +enum
       +{
       +        ClumpChunks        = 32*1024
       +};
       +
       +static int        verbose;
       +
       +int
       +clumpinfoeq(ClumpInfo *c, ClumpInfo *d)
       +{
       +        return c->type == d->type
       +                && c->size == d->size
       +                && c->uncsize == d->uncsize
       +                && scorecmp(c->score, d->score)==0;
       +}
       +
       +int
       +findscore(Arena *arena, uchar *score)
       +{
       +        IEntry ie;
       +        ClumpInfo *ci, *cis;
       +        u64int a;
       +        u32int clump;
       +        int i, n, found;
       +
       +//ZZZ remove fprint?
       +        if(arena->memstats.clumps)
       +                fprint(2, "reading directory for arena=%s with %d entries\n", arena->name, arena->memstats.clumps);
       +
       +        cis = MKN(ClumpInfo, ClumpChunks);
       +        found = 0;
       +        a = 0;
       +        memset(&ie, 0, sizeof(IEntry));
       +        for(clump = 0; clump < arena->memstats.clumps; clump += n){
       +                n = ClumpChunks;
       +                if(n > arena->memstats.clumps - clump)
       +                        n = arena->memstats.clumps - clump;
       +                if(readclumpinfos(arena, clump, cis, n) != n){
       +                        seterr(EOk, "arena directory read failed: %r");
       +                        break;
       +                }
       +
       +                for(i = 0; i < n; i++){
       +                        ci = &cis[i];
       +                        if(scorecmp(score, ci->score)==0){
       +                                fprint(2, "found at clump=%d with type=%d size=%d csize=%d position=%lld\n",
       +                                        clump + i, ci->type, ci->uncsize, ci->size, a);
       +                                found++;
       +                        }
       +                        a += ci->size + ClumpSize;
       +                }
       +        }
       +        free(cis);
       +        return found;
       +}
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: findscore [-v] arenafile score\n");
       +        threadexitsall(0);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        ArenaPart *ap;
       +        Part *part;
       +        char *file;
       +        u8int score[VtScoreSize];
       +        int i, found;
       +
       +        ventifmtinstall();
       +
       +        ARGBEGIN{
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        readonly = 1;
       +
       +        if(argc != 2)
       +                usage();
       +
       +        file = argv[0];
       +        if(strscore(argv[1], score) < 0)
       +                sysfatal("bad score %s\n", argv[1]);
       +
       +        part = initpart(file, OREAD|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't open partition %s: %r", file);
       +
       +        ap = initarenapart(part);
       +        if(ap == nil)
       +                sysfatal("can't initialize arena partition in %s: %r", file);
       +
       +        if(verbose > 1){
       +                printarenapart(2, ap);
       +                fprint(2, "\n");
       +        }
       +
       +        initdcache(8 * MaxDiskBlock);
       +
       +        found = 0;
       +        for(i = 0; i < ap->narenas; i++)
       +                found += findscore(ap->arenas[i], score);
       +
       +        print("found %d occurrences of %V\n", found, score);
       +
       +        if(verbose > 1)
       +                printstats();
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/fmtarenas.c b/src/cmd/venti/srv/fmtarenas.c
       t@@ -0,0 +1,135 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +#ifndef ODIRECT
       +#define ODIRECT 0
       +#endif
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: fmtarenas [-Z] [-b blocksize] [-a arenasize] name file\n");
       +        threadexitsall(0);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        int vers;
       +        ArenaPart *ap;
       +        Part *part;
       +        Arena *arena;
       +        u64int addr, limit, asize, apsize;
       +        char *file, *name, aname[ANameSize];
       +        int i, n, blocksize, tabsize, zero;
       +
       +        ventifmtinstall();
       +        statsinit();
       +
       +        blocksize = 8 * 1024;
       +        asize = 512 * 1024 *1024;
       +        tabsize = 512 * 1024;                /* BUG: should be determine from number of arenas */
       +        zero = -1;
       +        vers = ArenaVersion5;
       +        ARGBEGIN{
       +        case 'D':
       +                settrace(EARGF(usage()));
       +                break;
       +        case 'a':
       +                asize = unittoull(ARGF());
       +                if(asize == TWID64)
       +                        usage();
       +                break;
       +        case 'b':
       +                blocksize = unittoull(ARGF());
       +                if(blocksize == ~0)
       +                        usage();
       +                if(blocksize > MaxDiskBlock){
       +                        fprint(2, "block size too large, max %d\n", MaxDiskBlock);
       +                        threadexitsall("usage");
       +                }
       +                break;
       +        case '4':
       +                vers = ArenaVersion4;
       +                break;
       +        case 'Z':
       +                zero = 0;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(zero == -1){
       +                if(vers == ArenaVersion4)
       +                        zero = 1;
       +                else
       +                        zero = 0;
       +        }
       +
       +        if(argc != 2)
       +                usage();
       +
       +        name = argv[0];
       +        file = argv[1];
       +
       +        if(nameok(name) < 0)
       +                sysfatal("illegal name template %s", name);
       +
       +        part = initpart(file, ORDWR|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't open partition %s: %r", file);
       +
       +        if(zero)
       +                zeropart(part, blocksize);
       +
       +        maxblocksize = blocksize;
       +        initdcache(20*blocksize);
       +
       +        ap = newarenapart(part, blocksize, tabsize);
       +        if(ap == nil)
       +                sysfatal("can't initialize arena: %r");
       +
       +        apsize = ap->size - ap->arenabase;
       +        n = apsize / asize;
       +        if(apsize - (n * asize) >= MinArenaSize)
       +                n++;
       +
       +        fprint(2, "fmtarenas %s: %,d arenas, %,lld bytes storage, %,d bytes for index map\n",
       +                file, n, apsize, ap->tabsize);
       +
       +        ap->narenas = n;
       +        ap->map = MKNZ(AMap, n);
       +        ap->arenas = MKNZ(Arena*, n);
       +
       +        addr = ap->arenabase;
       +        for(i = 0; i < n; i++){
       +                limit = addr + asize;
       +                if(limit >= ap->size || ap->size - limit < MinArenaSize){
       +                        limit = ap->size;
       +                        if(limit - addr < MinArenaSize)
       +                                sysfatal("bad arena set math: runt arena at %lld,%lld %lld\n", addr, limit, ap->size);
       +                }
       +
       +                snprint(aname, ANameSize, "%s%d", name, i);
       +
       +                if(0) fprint(2, "adding arena %s at [%lld,%lld)\n", aname, addr, limit);
       +
       +                arena = newarena(part, vers, aname, addr, limit - addr, blocksize);
       +                if(!arena)
       +                        fprint(2, "can't make new arena %s: %r", aname);
       +                freearena(arena);
       +
       +                ap->map[i].start = addr;
       +                ap->map[i].stop = limit;
       +                namecp(ap->map[i].name, aname);
       +
       +                addr = limit;
       +        }
       +
       +        if(wbarenapart(ap) < 0)
       +                fprint(2, "can't write back arena partition header for %s: %r\n", file);
       +
       +        flushdcache();
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/fmtbloom.c b/src/cmd/venti/srv/fmtbloom.c
       t@@ -0,0 +1,115 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +Bloom b;
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: fmtbloom [-s size] [-n nblocks | -N nhash] file\n");
       +        threadexitsall(0);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        Part *part;
       +        char *file;
       +        vlong bits, size, size2;
       +        int nhash;
       +        vlong nblocks;
       +        
       +        ventifmtinstall();
       +        statsinit();
       +
       +        size = 0;
       +        nhash = nblocks = 0;
       +        ARGBEGIN{
       +        case 'n':
       +                if(nhash || nblocks)
       +                        usage();
       +                nblocks = unittoull(EARGF(usage()));
       +                break;
       +        case 'N':
       +                if(nhash || nblocks)
       +                        usage();
       +                nhash = unittoull(EARGF(usage()));
       +                if(nhash > BloomMaxHash){
       +                        fprint(2, "maximum possible is -N %d", BloomMaxHash);
       +                        usage();
       +                }
       +                break;
       +        case 's':
       +                size = unittoull(ARGF());
       +                if(size == ~0)
       +                        usage();
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc != 1)
       +                usage();
       +
       +        file = argv[0];
       +
       +        part = initpart(file, ORDWR|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't open partition %s: %r", file);
       +
       +        if(size == 0)
       +                size = part->size;
       +        
       +        if(size < 1024*1024)
       +                sysfatal("bloom filter too small");
       +
       +        if(size > MaxBloomSize){
       +                fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
       +                        size, MaxBloomSize);
       +                size = MaxBloomSize;
       +        }
       +        if(size&(size-1)){
       +                for(size2=1; size2<size; size2*=2)
       +                        ;
       +                size = size2/2;
       +                fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
       +        }
       +
       +        if(nblocks){
       +                /*
       +                 * no use for more than 32 bits per block
       +                 * shoot for less than 64 bits per block
       +                 */
       +                size2 = size;
       +                while(size2*8 >= nblocks*64)
       +                        size2 >>= 1;
       +                if(size2 != size){
       +                        size = size2;
       +                        fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
       +                                size/1024/1024);
       +                }
       +
       +                /*
       +                 * optimal is to use ln 2 times as many hash functions as we have bits per blocks.  
       +                 */
       +                bits = (8*size)/nblocks;
       +                nhash = bits*7/10;
       +                if(nhash > BloomMaxHash)
       +                        nhash = BloomMaxHash;
       +        }
       +        if(!nhash)
       +                nhash = BloomMaxHash;
       +        if(bloominit(&b, size, nil) < 0)
       +                sysfatal("bloominit: %r");
       +        b.nhash = nhash;
       +        bits = nhash*10/7;
       +        nblocks = (8*size)/bits;
       +        fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size, nhash, nblocks);
       +        b.data = vtmallocz(size);
       +        b.part = part;
       +        if(writebloom(&b) < 0)
       +                sysfatal("writing %s: %r", file);
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/fmtindex.c b/src/cmd/venti/srv/fmtindex.c
       t@@ -0,0 +1,120 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: fmtindex [-a] config\n");
       +        threadexitsall(0);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        Config conf;
       +        Index *ix;
       +        ArenaPart *ap;
       +        Arena **arenas;
       +        AMap *amap;
       +        u64int addr;
       +        char *file;
       +        u32int i, j, n, narenas;
       +        int add;
       +
       +        ventifmtinstall();
       +        statsinit();
       +
       +        add = 0;
       +        ARGBEGIN{
       +        case 'a':
       +                add = 1;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc != 1)
       +                usage();
       +
       +        file = argv[0];
       +
       +        if(runconfig(file, &conf) < 0)
       +                sysfatal("can't initialize config %s: %r", file);
       +        if(conf.index == nil)
       +                sysfatal("no index specified in %s", file);
       +        if(nameok(conf.index) < 0)
       +                sysfatal("illegal index name %s", conf.index);
       +
       +        narenas = 0;
       +        for(i = 0; i < conf.naparts; i++){
       +                ap = conf.aparts[i];
       +                narenas += ap->narenas;
       +        }
       +
       +        if(add){
       +                ix = initindex(conf.index, conf.sects, conf.nsects);
       +                if(ix == nil)
       +                        sysfatal("can't initialize index %s: %r", conf.index);
       +        }else{
       +                ix = newindex(conf.index, conf.sects, conf.nsects);
       +                if(ix == nil)
       +                        sysfatal("can't create new index %s: %r", conf.index);
       +
       +                n = 0;
       +                for(i = 0; i < ix->nsects; i++)
       +                        n += ix->sects[i]->blocks;
       +
       +                if(0) fprint(2, "using %ud buckets of %ud; div=%d\n", ix->buckets, n, ix->div);
       +        }
       +        amap = MKNZ(AMap, narenas);
       +        arenas = MKNZ(Arena*, narenas);
       +
       +        addr = IndexBase;
       +        n = 0;
       +        for(i = 0; i < conf.naparts; i++){
       +                ap = conf.aparts[i];
       +                for(j = 0; j < ap->narenas; j++){
       +                        if(n >= narenas)
       +                                sysfatal("too few slots in index's arena set");
       +
       +                        arenas[n] = ap->arenas[j];
       +                        if(n < ix->narenas){
       +                                if(arenas[n] != ix->arenas[n])
       +                                        sysfatal("mismatched arenas %s and %s at slot %d\n",
       +                                                arenas[n]->name, ix->arenas[n]->name, n);
       +                                amap[n] = ix->amap[n];
       +                                if(amap[n].start != addr)
       +                                        sysfatal("mis-located arena %s in index %s\n", arenas[n]->name, ix->name);
       +                                addr = amap[n].stop;
       +                        }else{
       +                                amap[n].start = addr;
       +                                addr += ap->arenas[j]->size;
       +                                amap[n].stop = addr;
       +                                namecp(amap[n].name, ap->arenas[j]->name);
       +                                if(0) fprint(2, "add arena %s at [%lld,%lld)\n",
       +                                        amap[n].name, amap[n].start, amap[n].stop);
       +                        }
       +
       +                        n++;
       +                }
       +        }
       +        if(0){
       +                fprint(2, "configured index=%s with arenas=%d and storage=%lld\n",
       +                        ix->name, n, addr - IndexBase);
       +                fprint(2, "\tbitblocks=%d maxdepth=%d buckets=%d\n",
       +                        ix->bitblocks, ix->maxdepth, ix->buckets);
       +        }
       +        fprint(2, "fmtindex: %,d arenas, %,d index buckets, %,lld bytes storage\n",
       +                n, ix->buckets, addr-IndexBase);
       +
       +        ix->amap = amap;
       +        ix->arenas = arenas;
       +        ix->narenas = narenas;
       +
       +        if(wbindex(ix) < 0)
       +                fprint(2, "can't write back arena partition header for %s: %r\n", file);
       +
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/fmtisect.c b/src/cmd/venti/srv/fmtisect.c
       t@@ -0,0 +1,83 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: fmtisect [-Z] [-b blocksize] name file\n");
       +        threadexitsall(0);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        int vers;
       +        ISect *is;
       +        Part *part;
       +        char *file, *name;
       +        int blocksize, setsize, zero;
       +
       +        ventifmtinstall();
       +        statsinit();
       +
       +        blocksize = 8 * 1024;
       +        setsize = 512 * 1024;
       +        zero = -1;
       +        vers = ISectVersion2;
       +        ARGBEGIN{
       +        case 'b':
       +                blocksize = unittoull(ARGF());
       +                if(blocksize == ~0)
       +                        usage();
       +                if(blocksize > MaxDiskBlock){
       +                        fprint(2, "block size too large, max %d\n", MaxDiskBlock);
       +                        threadexitsall("usage");
       +                }
       +                break;
       +        case '1':
       +                vers = ISectVersion1;
       +                break;
       +        case 'Z':
       +                zero = 0;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(zero == -1){
       +                if(vers == ISectVersion1)
       +                        zero = 1;
       +                else
       +                        zero = 0;
       +        }
       +
       +        if(argc != 2)
       +                usage();
       +
       +        name = argv[0];
       +        file = argv[1];
       +
       +        if(nameok(name) < 0)
       +                sysfatal("illegal name %s", name);
       +
       +        part = initpart(file, ORDWR|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't open partition %s: %r", file);
       +
       +        if(zero)
       +                zeropart(part, blocksize);
       +
       +        is = newisect(part, vers, name, blocksize, setsize);
       +        if(is == nil)
       +                sysfatal("can't initialize new index: %r");
       +
       +        fprint(2, "fmtisect %s: %,d buckets of %,d entries, %,d bytes for index map\n",
       +                file, is->blocks, is->buckmax, setsize);
       +
       +        if(wbisect(is) < 0)
       +                fprint(2, "can't write back index section header for %s: %r\n", file);
       +
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/fns.h b/src/cmd/venti/srv/fns.h
       t@@ -0,0 +1,206 @@
       +/*
       + * sorted by 4,/^$/|sort -bd +1
       + */
       +int                addarena(Arena *name);
       +void                addstat(int, int);
       +void                addstat2(int, int, int, int);
       +ZBlock                *alloczblock(u32int size, int zeroed, uint alignment);
       +Arena                *amapitoa(Index *index, u64int a, u64int *aa);
       +u64int                arenadirsize(Arena *arena, u32int clumps);
       +void                arenaupdate(Arena *arena, u32int size, u8int *score);
       +void                backsumarena(Arena *arena);
       +void        binstats(long (*fn)(Stats *s0, Stats *s1, void*), void *arg, long t0, long t1, Statbin *bin, int nbin);
       +int                bloominit(Bloom*, vlong, uchar*);
       +int                bucklook(u8int*, int, u8int*, int);
       +u32int                buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint);
       +void                checkdcache(void);
       +void                checklumpcache(void);
       +int                clumpinfoeq(ClumpInfo *c, ClumpInfo *d);
       +int                clumpinfoeq(ClumpInfo *c, ClumpInfo *d);
       +u32int                clumpmagic(Arena *arena, u64int aa);
       +uint                countbits(uint n);
       +int                delarena(Arena *arena);
       +void                delaykickicache(void);
       +void                delaykickround(Round*);
       +void                delaykickroundproc(void*);
       +void                dirtydblock(DBlock*, int);
       +AState        diskstate(void);
       +void                *emalloc(ulong);
       +void                *erealloc(void *, ulong);
       +char                *estrdup(char*);
       +void                *ezmalloc(ulong);
       +Arena                *findarena(char *name);
       +int                flushciblocks(Arena *arena);
       +void                flushdcache(void);
       +void                flushicache(void);
       +void                flushqueue(void);
       +void                fmtzbinit(Fmt *f, ZBlock *b);
       +void                freearena(Arena *arena);
       +void                freearenapart(ArenaPart *ap, int freearenas);
       +void                freeiestream(IEStream *ies);
       +void                freeifile(IFile *f);
       +void                freeisect(ISect *is);
       +void                freeindex(Index *index);
       +void                freepart(Part *part);
       +void                freezblock(ZBlock *b);
       +DBlock                *_getdblock(Part *part, u64int addr, int mode, int load);
       +DBlock                *getdblock(Part *part, u64int addr, int mode);
       +u32int                hashbits(u8int *score, int nbits);
       +int                httpdinit(char *address, char *webroot);
       +int                iaddrcmp(IAddr *ia1, IAddr *ia2);
       +IEntry*        icachedirty(u32int, u32int, u64int);
       +void                icacheclean(IEntry*);
       +int                ientrycmp(const void *vie1, const void *vie2);
       +char                *ifileline(IFile *f);
       +int                ifilename(IFile *f, char *dst);
       +int                ifileu32int(IFile *f, u32int *r);
       +int                inbloomfilter(Bloom*, u8int*);
       +int                indexsect(Index *ix, u8int *score);
       +int                indexsect0(Index *ix, u32int buck);
       +Arena                *initarena(Part *part, u64int base, u64int size, u32int blocksize);
       +ArenaPart        *initarenapart(Part *part);
       +int                initarenasum(void);
       +void                initbloomfilter(Index*);
       +void                initdcache(u32int mem);
       +void                initicache(int bits, int depth);
       +void                initicachewrite(void);
       +IEStream        *initiestream(Part *part, u64int off, u64int clumps, u32int size);
       +ISect                *initisect(Part *part);
       +Index                *initindex(char *name, ISect **sects, int n);
       +void                initlumpcache(u32int size, u32int nblocks);
       +int                initlumpqueues(int nq);
       +Part*                initpart(char *name, int mode);
       +void                initround(Round*, char*, int);
       +int                initventi(char *config, Config *conf);
       +void                insertlump(Lump *lump, Packet *p);
       +int                insertscore(u8int *score, IAddr *ia, int write);
       +void                kickdcache(void);
       +void                kickicache(void);
       +void                kickround(Round*, int wait);
       +ZBlock                *loadclump(Arena *arena, u64int aa, int blocks, Clump *cl, u8int *score, int verify);
       +DBlock        *loadibucket(Index *index, u8int *score, ISect **is, u32int *buck, IBucket *ib);
       +int                loadientry(Index *index, u8int *score, int type, IEntry *ie);
       +void                logerr(int severity, char *fmt, ...);
       +Lump                *lookuplump(u8int *score, int type);
       +int                lookupscore(u8int *score, int type, IAddr *ia, int *rac);
       +int                maparenas(AMap *am, Arena **arenas, int n, char *what);
       +void                markbloomfilter(Bloom*, u8int*);
       +uint                msec(void);
       +int                namecmp(char *s, char *t);
       +void                namecp(char *dst, char *src);
       +int                nameok(char *name);
       +Arena                *newarena(Part *part, u32int, char *name, u64int base, u64int size, u32int blocksize);
       +ArenaPart        *newarenapart(Part *part, u32int blocksize, u32int tabsize);
       +ISect                *newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize);
       +Index                *newindex(char *name, ISect **sects, int n);
       +u32int                now(void);
       +int                okamap(AMap *am, int n, u64int start, u64int stop, char *what);
       +int                okibucket(IBucket*, ISect*);
       +int                outputamap(Fmt *f, AMap *am, int n);
       +int                outputindex(Fmt *f, Index *ix);
       +int                packarena(Arena *arena, u8int *buf);
       +int                packarenahead(ArenaHead *head, u8int *buf);
       +int                packarenapart(ArenaPart *as, u8int *buf);
       +void                packbloomhead(Bloom*, u8int*);
       +int                packclump(Clump *c, u8int *buf, u32int);
       +void                packclumpinfo(ClumpInfo *ci, u8int *buf);
       +void                packibucket(IBucket *b, u8int *buf, u32int magic);
       +void                packientry(IEntry *i, u8int *buf);
       +int                packisect(ISect *is, u8int *buf);
       +void                packmagic(u32int magic, u8int *buf);
       +ZBlock                *packet2zblock(Packet *p, u32int size);
       +int                parseamap(IFile *f, AMapN *amn);
       +int                parseindex(IFile *f, Index *ix);
       +void                partblocksize(Part *part, u32int blocksize);
       +int                partifile(IFile *f, Part *part, u64int start, u32int size);
       +void                printarenapart(int fd, ArenaPart *ap);
       +void                printarena(int fd, Arena *arena);
       +void                printindex(int fd, Index *ix);
       +void                printstats(void);
       +void                putdblock(DBlock *b);
       +void                putlump(Lump *b);
       +int                queuewrite(Lump *b, Packet *p, int creator, uint ms);
       +u32int                readarena(Arena *arena, u64int aa, u8int *buf, long n);
       +int                readarenamap(AMapN *amn, Part *part, u64int base, u32int size);
       +Bloom        *readbloom(Part*);
       +int                readclumpinfo(Arena *arena, int clump, ClumpInfo *ci);
       +int                readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n);
       +ZBlock                *readfile(char *name);
       +int                readifile(IFile *f, char *name);
       +Packet                *readlump(u8int *score, int type, u32int size, int *cached);
       +int                readpart(Part *part, u64int addr, u8int *buf, u32int n);
       +int                runconfig(char *config, Config*);
       +int                scorecmp(u8int *, u8int *);
       +void                scoremem(u8int *score, u8int *buf, int size);
       +void                setatailstate(AState*);
       +void                setdcachestate(AState*);
       +void                seterr(int severity, char *fmt, ...);
       +void                setstat(int, long);
       +void                settrace(char *type);
       +u64int                sortrawientries(Index *ix, Part *tmp, u64int *tmpoff, Bloom *bloom);
       +void                startbloomproc(Bloom*);
       +Memimage*        statgraph(Graph *g);
       +void                statsinit(void);
       +int                storeclump(Index *index, ZBlock *b, u8int *score, int type, u32int creator, IAddr *ia);
       +int                storeientry(Index *index, IEntry *m);
       +int                strscore(char *s, u8int *score);
       +int                stru32int(char *s, u32int *r);
       +int                stru64int(char *s, u64int *r);
       +void                sumarena(Arena *arena);
       +int                syncarena(Arena *arena, u64int start, u32int n, int zok, int fix);
       +int                syncarenaindex(Index *ix, Arena *arena, u32int clump, u64int a, int fix, int *pflush, int check);
       +int                syncindex(Index *ix, int fix, int mustflushicache, int check);
       +void                trace(char *type, char*, ...);
       +void                traceinit(void);
       +int                u64log2(u64int v);
       +u64int                unittoull(char *s);
       +int                unpackarena(Arena *arena, u8int *buf);
       +int                unpackarenahead(ArenaHead *head, u8int *buf);
       +int                unpackarenapart(ArenaPart *as, u8int *buf);
       +int                unpackbloomhead(Bloom*, u8int*);
       +int                unpackclump(Clump *c, u8int *buf, u32int);
       +void                unpackclumpinfo(ClumpInfo *ci, u8int *buf);
       +void                unpackibucket(IBucket *b, u8int *buf, u32int magic);
       +void                unpackientry(IEntry *i, u8int *buf);
       +int                unpackisect(ISect *is, u8int *buf);
       +u32int                unpackmagic(u8int *buf);
       +void                ventifmtinstall(void);
       +void                vtloghdump(Hio*, VtLog*);
       +void                vtloghlist(Hio*);
       +int                vtproc(void(*)(void*), void*);
       +int                vttypevalid(int type);
       +void                waitforkick(Round*);
       +int                wbarena(Arena *arena);
       +int                wbarenahead(Arena *arena);
       +int                wbarenamap(AMap *am, int n, Part *part, u64int base, u64int size);
       +int                wbarenapart(ArenaPart *ap);
       +void                wbbloomhead(Bloom*);
       +int                wbisect(ISect *is);
       +int                wbindex(Index *ix);
       +int                whackblock(u8int *dst, u8int *src, int ssize);
       +u64int                writeaclump(Arena *a, Clump *c, u8int *clbuf, u64int, u64int*);
       +u32int                writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n);
       +int                writebloom(Bloom*);
       +int                writeclumpinfo(Arena *arean, int clump, ClumpInfo *ci);
       +int                writepng(Hio*, Memimage*);
       +u64int                writeiclump(Index *ix, Clump *c, u8int *clbuf, u64int*);
       +int                writelump(Packet *p, u8int *score, int type, u32int creator, uint ms);
       +int                writepart(Part *part, u64int addr, u8int *buf, u32int n);
       +int                writeqlump(Lump *u, Packet *p, int creator, uint ms);
       +Packet                *zblock2packet(ZBlock *zb, u32int size);
       +void                zeropart(Part *part, int blocksize);
       +
       +/*
       +#pragma        varargck        argpos        sysfatal                1
       +#pragma        varargck        argpos        logerr                2
       +#pragma        varargck        argpos        SetErr                2
       +*/
       +
       +#define scorecmp(h1,h2)                memcmp((h1),(h2),VtScoreSize)
       +#define scorecp(h1,h2)                memmove((h1),(h2),VtScoreSize)
       +
       +#define MK(t)                        ((t*)emalloc(sizeof(t)))
       +#define MKZ(t)                        ((t*)ezmalloc(sizeof(t)))
       +#define MKN(t,n)                ((t*)emalloc((n)*sizeof(t)))
       +#define MKNZ(t,n)                ((t*)ezmalloc((n)*sizeof(t)))
       +#define MKNA(t,at,n)                ((t*)emalloc(sizeof(t) + (n)*sizeof(at)))
   DIR diff --git a/src/cmd/venti/srv/graph.c b/src/cmd/venti/srv/graph.c
       t@@ -0,0 +1,202 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +enum
       +{
       +        Top = 1,
       +        Bottom = 1,
       +        Left = 40,
       +        Right = 0,
       +        MinWidth = Left+Right+2,
       +        MinHeight = Top+Bottom+2,
       +        DefaultWidth = Left+Right+500,
       +        DefaultHeight = Top+Bottom+40
       +};
       +
       +QLock memdrawlock;
       +static Memsubfont *smallfont;
       +static Memimage *black;
       +static Memimage *blue;
       +static Memimage *red;
       +static Memimage *lofill[6];
       +static Memimage *hifill[6];
       +static Memimage *grid;
       +
       +static ulong fill[] = {
       +        0xFFAAAAFF,        0xBB5D5DFF,        /* peach */
       +        DPalegreygreen, DPurpleblue,        /* aqua */
       +        DDarkyellow, DYellowgreen,        /* yellow */
       +        DMedgreen, DDarkgreen,                /* green */
       +        0x00AAFFFF, 0x0088CCFF,        /* blue */
       +        0xCCCCCCFF, 0x888888FF,        /* grey */
       +};
       +
       +Memimage*
       +allocrepl(ulong color)
       +{
       +        Memimage *m;
       +        
       +        m = allocmemimage(Rect(0,0,1,1), RGB24);
       +        memfillcolor(m, color);
       +        m->flags |= Frepl;
       +        m->clipr = Rect(-1000000, -1000000, 1000000, 1000000);
       +        return m;
       +}
       +
       +static void
       +ginit(void)
       +{
       +        static int first = 1;
       +        int i;
       +        
       +        if(!first)
       +                return;
       +                
       +        first = 0;
       +        memimageinit();
       +        smallfont = openmemsubfont(unsharp("#9/font/lucidasans/lstr.10"));
       +        black = memblack;
       +        blue = allocrepl(DBlue);
       +        red = allocrepl(DRed);
       +        grid = allocrepl(0x77777777);
       +        for(i=0; i<nelem(fill)/2 && i<nelem(lofill) && i<nelem(hifill); i++){
       +                lofill[i] = allocrepl(fill[2*i]);
       +                hifill[i] = allocrepl(fill[2*i+1]);
       +        }
       +}
       +
       +static void
       +mklabel(char *str, int v)
       +{
       +        if(v < 0){
       +                v = -v;
       +                *str++ = '-';
       +        }
       +        if(v < 10000)
       +                sprint(str, "%d", v);
       +        else if(v < 10000000)
       +                sprint(str, "%dk", v/1000);
       +        else
       +                sprint(str, "%dM", v/1000000);
       +}
       +
       +static void
       +drawlabel(Memimage *m, Point p, int n)
       +{
       +        char buf[30];
       +        Point w;
       +        
       +        mklabel(buf, n);
       +        w = memsubfontwidth(smallfont, buf);
       +        memimagestring(m, Pt(p.x-5-w.x, p.y), memblack, ZP, smallfont, buf);
       +}
       +
       +static int
       +scalept(int val, int valmin, int valmax, int ptmin, int ptmax)
       +{
       +        if(val <= valmin)
       +                val = valmin;
       +        if(val >= valmax)
       +                val = valmax;
       +        if(valmax == valmin)
       +                valmax++;
       +        return ptmin + (vlong)(val-valmin)*(ptmax-ptmin)/(valmax-valmin);
       +}
       +
       +Memimage*
       +statgraph(Graph *g)
       +{
       +        int i, lastlo, nbin, x, lo, hi, min, max, first;
       +        Memimage *m;
       +        Rectangle r;
       +        Statbin *b, bin[2000];        /* 32 kB, but whack is worse */
       +
       +        needstack(8192);        /* double check that bin didn't kill us */
       +        
       +        if(g->wid <= MinWidth)
       +                g->wid = DefaultWidth;
       +        if(g->ht <= MinHeight)
       +                g->ht = DefaultHeight;
       +        if(g->wid > nelem(bin))
       +                g->wid = nelem(bin);
       +        if(g->fill < 0)
       +                g->fill = ((uint)g->arg>>8)%nelem(lofill);
       +        if(g->fill > nelem(lofill))
       +                g->fill %= nelem(lofill);
       +        
       +        nbin = g->wid - (Left+Right);
       +        binstats(g->fn, g->arg, g->t0, g->t1, bin, nbin);
       +
       +        /*
       +         * compute bounds
       +         */
       +        min = g->min;
       +        max = g->max;
       +        if(min < 0 || max <= min){
       +                min = max = 0;
       +                first = 1;
       +                for(i=0; i<nbin; i++){
       +                        b = &bin[i];
       +                        if(b->nsamp == 0)
       +                                continue;
       +                        if(first || b->min < min)
       +                                min = b->min;
       +                        if(first || b->max > max)
       +                                max = b->max;
       +                        first = 0;
       +                }
       +        }
       +
       +        qlock(&memdrawlock);
       +        ginit();
       +        if(smallfont==nil || black==nil || blue==nil || red==nil || hifill==nil || lofill==nil){
       +                werrstr("graphics initialization failed");
       +                qunlock(&memdrawlock);
       +                return nil;
       +        }
       +
       +        /* fresh image */
       +        m = allocmemimage(Rect(0,0,g->wid,g->ht), ABGR32);
       +        if(m == nil){
       +                qunlock(&memdrawlock);
       +                return nil;
       +        }
       +        r = Rect(Left, Top, g->wid-Right, g->ht-Bottom);
       +        memfillcolor(m, DTransparent);
       +        
       +        /* x axis */
       +        memimagedraw(m, Rect(r.min.x, r.max.y, r.max.x, r.max.y+1), black, ZP, memopaque, ZP, S);
       +
       +        /* y labels */
       +        drawlabel(m, r.min, max);
       +        if(min != 0)
       +                drawlabel(m, Pt(r.min.x, r.max.y-smallfont->height), min);
       +        
       +        /* actual data */
       +        lastlo = -1;
       +        for(i=0; i<nbin; i++){
       +                b = &bin[i];
       +                if(b->nsamp == 0)
       +                        continue;
       +                lo = scalept(b->min, min, max, r.max.y, r.min.y);
       +                hi = scalept(b->max, min, max, r.max.y, r.min.y);
       +                x = r.min.x+i;
       +                hi-=2;
       +                if(0)
       +                if(lastlo != -1){
       +                        if(lastlo < lo)
       +                                memimagedraw(m, Rect(x-1, lastlo, x, lo), hifill[g->fill], ZP, memopaque, ZP, S);
       +                        else if(lastlo > lo)
       +                                memimagedraw(m, Rect(x-1, lo, x, lastlo), hifill[g->fill], ZP, memopaque, ZP, S);
       +                }
       +                memimagedraw(m, Rect(x, hi, x+1,lo), hifill[g->fill], ZP, memopaque, ZP, S);
       +                memimagedraw(m, Rect(x, lo, x+1, r.max.y), lofill[g->fill], ZP, memopaque, ZP, S);
       +                lastlo = lo;
       +        }
       +
       +        if(bin[nbin-1].nsamp)
       +                drawlabel(m, Pt(r.max.x, r.min.y+(Dy(r)-smallfont->height)/2), bin[nbin-1].avg);
       +        qunlock(&memdrawlock);
       +        return m;
       +}
   DIR diff --git a/src/cmd/venti/srv/httpd.c b/src/cmd/venti/srv/httpd.c
       t@@ -0,0 +1,988 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "xml.h"
       +
       +typedef struct HttpObj        HttpObj;
       +extern QLock memdrawlock;
       +
       +enum
       +{
       +        ObjNameSize        = 64,
       +        MaxObjs                = 16
       +};
       +
       +struct HttpObj
       +{
       +        char        name[ObjNameSize];
       +        int        (*f)(HConnect*);
       +};
       +
       +static HttpObj        objs[MaxObjs];
       +
       +static char *webroot;
       +
       +static        void                listenproc(void*);
       +static        int                estats(HConnect *c);
       +static        int                dindex(HConnect *c);
       +static        int                xindex(HConnect *c);
       +static        int                xlog(HConnect *c);
       +static        int                sindex(HConnect *c);
       +static        int                hicacheflush(HConnect *c);
       +static        int                hdcacheflush(HConnect *c);
       +static        int                notfound(HConnect *c);
       +static        int                httpdobj(char *name, int (*f)(HConnect*));
       +static        int                xgraph(HConnect *c);
       +static        int                xset(HConnect *c);
       +static        int                fromwebdir(HConnect *c);
       +
       +int
       +httpdinit(char *address, char *dir)
       +{
       +        fmtinstall('D', hdatefmt);
       +/*        fmtinstall('H', httpfmt); */
       +        fmtinstall('U', hurlfmt);
       +
       +        if(address == nil)
       +                address = "tcp!*!http";
       +        webroot = dir;
       +        
       +        httpdobj("/stats", estats);
       +        httpdobj("/index", dindex);
       +        httpdobj("/storage", sindex);
       +        httpdobj("/xindex", xindex);
       +        httpdobj("/flushicache", hicacheflush);
       +        httpdobj("/flushdcache", hdcacheflush);
       +        httpdobj("/graph/", xgraph);
       +        httpdobj("/set/", xset);
       +        httpdobj("/log", xlog);
       +        httpdobj("/log/", xlog);
       +
       +        if(vtproc(listenproc, address) < 0)
       +                return -1;
       +        return 0;
       +}
       +
       +static int
       +httpdobj(char *name, int (*f)(HConnect*))
       +{
       +        int i;
       +
       +        if(name == nil || strlen(name) >= ObjNameSize)
       +                return -1;
       +        for(i = 0; i < MaxObjs; i++){
       +                if(objs[i].name[0] == '\0'){
       +                        strcpy(objs[i].name, name);
       +                        objs[i].f = f;
       +                        return 0;
       +                }
       +                if(strcmp(objs[i].name, name) == 0)
       +                        return -1;
       +        }
       +        return -1;
       +}
       +
       +static HConnect*
       +mkconnect(void)
       +{
       +        HConnect *c;
       +
       +        c = mallocz(sizeof(HConnect), 1);
       +        if(c == nil)
       +                sysfatal("out of memory");
       +        c->replog = nil;
       +        c->hpos = c->header;
       +        c->hstop = c->header;
       +        return c;
       +}
       +
       +void httpproc(void*);
       +
       +static void
       +listenproc(void *vaddress)
       +{
       +        HConnect *c;
       +        char *address, ndir[NETPATHLEN], dir[NETPATHLEN];
       +        int ctl, nctl, data;
       +
       +//sleep(1000);        /* let strace find us */
       +
       +        address = vaddress;
       +        ctl = announce(address, dir);
       +        if(ctl < 0){
       +                fprint(2, "venti: httpd can't announce on %s: %r\n", address);
       +                return;
       +        }
       +
       +        if(0) print("announce ctl %d dir %s\n", ctl, dir);
       +        for(;;){
       +                /*
       +                 *  wait for a call (or an error)
       +                 */
       +                nctl = listen(dir, ndir);
       +                if(0) print("httpd listen %d %s...\n", nctl, ndir);
       +                if(nctl < 0){
       +                        fprint(2, "venti: httpd can't listen on %s: %r\n", address);
       +                        return;
       +                }
       +
       +                data = accept(ctl, ndir);
       +                if(0) print("httpd accept %d...\n", data);
       +                if(data < 0){
       +                        fprint(2, "venti: httpd accept: %r\n");
       +                        close(nctl);
       +                        continue;
       +                }
       +                if(0) print("httpd close nctl %d\n", nctl);
       +                close(nctl);
       +                c = mkconnect();
       +                hinit(&c->hin, data, Hread);
       +                hinit(&c->hout, data, Hwrite);
       +                vtproc(httpproc, c);
       +        }
       +}
       +
       +void
       +httpproc(void *v)
       +{
       +        HConnect *c;
       +        int ok, i, n;
       +
       +//sleep(1000);        /* let strace find us */
       +        c = v;
       +
       +        for(;;){
       +                /*
       +                 * No timeout because the signal appears to hit every
       +                 * proc, not just us.
       +                 */
       +                if(hparsereq(c, 0) < 0)
       +                        break;
       +
       +                ok = -1;
       +                for(i = 0; i < MaxObjs && objs[i].name[0]; i++){
       +                        n = strlen(objs[i].name);
       +                        if((objs[i].name[n-1] == '/' && strncmp(c->req.uri, objs[i].name, n) == 0)
       +                        || (objs[i].name[n-1] != '/' && strcmp(c->req.uri, objs[i].name) == 0)){
       +                                ok = (*objs[i].f)(c);
       +                                goto found;
       +                        }
       +                }
       +                ok = fromwebdir(c);
       +        found:
       +                if(c->head.closeit)
       +                        ok = -1;
       +                hreqcleanup(c);
       +
       +                if(ok < 0)
       +                        break;
       +        }
       +        hreqcleanup(c);
       +        close(c->hin.fd);
       +        free(c);
       +}
       +
       +static int
       +percent(long v, long total)
       +{
       +        if(total == 0)
       +                total = 1;
       +        if(v < 1000*1000)
       +                return (v * 100) / total;
       +        total /= 100;
       +        if(total == 0)
       +                total = 1;
       +        return v / total;
       +}
       +
       +static int
       +preq(HConnect *c)
       +{
       +        if(hparseheaders(c, 0) < 0)
       +                return -1;
       +        if(strcmp(c->req.meth, "GET") != 0
       +        && strcmp(c->req.meth, "HEAD") != 0)
       +                return hunallowed(c, "GET, HEAD");
       +        if(c->head.expectother || c->head.expectcont)
       +                return hfail(c, HExpectFail, nil);
       +        return 0;
       +}
       +
       +static int
       +preqtype(HConnect *c, char *type)
       +{
       +        Hio *hout;
       +        int r;
       +
       +        r = preq(c);
       +        if(r < 0)
       +                return r;
       +
       +        hout = &c->hout;
       +        if(c->req.vermaj){
       +                hokheaders(c);
       +                hprint(hout, "Content-type: %s\r\n", type);
       +                if(http11(c))
       +                        hprint(hout, "Transfer-Encoding: chunked\r\n");
       +                hprint(hout, "\r\n");
       +        }
       +
       +        if(http11(c))
       +                hxferenc(hout, 1);
       +        else
       +                c->head.closeit = 1;
       +        return 0;
       +}
       +
       +static int
       +preqtext(HConnect *c)
       +{
       +        return preqtype(c, "text/plain");
       +}
       +
       +static int
       +notfound(HConnect *c)
       +{
       +        int r;
       +
       +        r = preq(c);
       +        if(r < 0)
       +                return r;
       +        return hfail(c, HNotFound, c->req.uri);
       +}
       +
       +struct {
       +        char *ext;
       +        char *type;
       +} exttab[] = {
       +        ".html",        "text/html",
       +        ".txt",        "text/plain",
       +        ".xml",        "text/xml",
       +        ".png",        "image/png",
       +        ".gif",        "image/gif",
       +        0
       +};
       +
       +static int
       +fromwebdir(HConnect *c)
       +{
       +        char buf[4096], *p, *ext, *type;
       +        int i, fd, n, defaulted;
       +        Dir *d;
       +        
       +        if(webroot == nil || strstr(c->req.uri, ".."))
       +                return notfound(c);
       +        snprint(buf, sizeof buf-20, "%s/%s", webroot, c->req.uri+1);
       +        defaulted = 0;
       +reopen:
       +        if((fd = open(buf, OREAD)) < 0)
       +                return notfound(c);
       +        d = dirfstat(fd);
       +        if(d == nil){
       +                close(fd);
       +                return notfound(c);
       +        }
       +        if(d->mode&DMDIR){
       +                if(!defaulted){
       +                        defaulted = 1;
       +                        strcat(buf, "/index.html");
       +                        free(d);
       +                        close(fd);
       +                        goto reopen;
       +                }
       +                free(d);
       +                return notfound(c);
       +        }
       +        free(d);
       +        p = buf+strlen(buf);
       +        type = "application/octet-stream";
       +        for(i=0; exttab[i].ext; i++){
       +                ext = exttab[i].ext;
       +                if(p-strlen(ext) >= buf && strcmp(p-strlen(ext), ext) == 0){
       +                        type = exttab[i].type;
       +                        break;
       +                }
       +        }
       +        if(preqtype(c, type) < 0){
       +                close(fd);
       +                return 0;
       +        }
       +        while((n = read(fd, buf, sizeof buf)) > 0)
       +                if(hwrite(&c->hout, buf, n) < 0)
       +                        break;
       +        close(fd);
       +        hflush(&c->hout);
       +        return 0;
       +}
       +
       +static struct
       +{
       +        char *name;
       +        int *p;
       +} namedints[] =
       +{
       +        "compress",        &compressblocks,
       +        "devnull",        &writestodevnull,
       +        "logging",        &ventilogging,
       +        "stats",        &collectstats,
       +        "icachesleeptime",        &icachesleeptime,
       +        "arenasumsleeptime",        &arenasumsleeptime,
       +        0
       +};
       +
       +static int
       +xset(HConnect *c)
       +{
       +        int i, nf, r;
       +        char *f[10], *s;
       +
       +        s = estrdup(c->req.uri);
       +        nf = getfields(s+strlen("/set/"), f, nelem(f), 1, "/");
       +
       +        if(nf < 1)
       +                return notfound(c);
       +        for(i=0; namedints[i].name; i++){
       +                if(strcmp(f[0], namedints[i].name) == 0){
       +                        if(nf >= 2)
       +                                *namedints[i].p = atoi(f[1]);
       +                        r = preqtext(c);
       +                        if(r < 0)
       +                                return r;
       +                        hprint(&c->hout, "%s = %d\n", f[0], *namedints[i].p);
       +                        hflush(&c->hout);
       +                        return 0;
       +                }
       +        }
       +        return notfound(c);
       +}
       +
       +static int
       +estats(HConnect *c)
       +{
       +        Hio *hout;
       +        int r;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +
       +
       +        hout = &c->hout;
       +/*
       +        hprint(hout, "lump writes=%,ld\n", stats.lumpwrites);
       +        hprint(hout, "lump reads=%,ld\n", stats.lumpreads);
       +        hprint(hout, "lump cache read hits=%,ld\n", stats.lumphit);
       +        hprint(hout, "lump cache read misses=%,ld\n", stats.lumpmiss);
       +
       +        hprint(hout, "clump disk writes=%,ld\n", stats.clumpwrites);
       +        hprint(hout, "clump disk bytes written=%,lld\n", stats.clumpbwrites);
       +        hprint(hout, "clump disk bytes compressed=%,lld\n", stats.clumpbcomp);
       +        hprint(hout, "clump disk reads=%,ld\n", stats.clumpreads);
       +        hprint(hout, "clump disk bytes read=%,lld\n", stats.clumpbreads);
       +        hprint(hout, "clump disk bytes uncompressed=%,lld\n", stats.clumpbuncomp);
       +
       +        hprint(hout, "clump directory disk writes=%,ld\n", stats.ciwrites);
       +        hprint(hout, "clump directory disk reads=%,ld\n", stats.cireads);
       +
       +        hprint(hout, "index disk writes=%,ld\n", stats.indexwrites);
       +        hprint(hout, "index disk reads=%,ld\n", stats.indexreads);
       +        hprint(hout, "index disk bloom filter hits=%,ld %d%% falsemisses=%,ld %d%%\n",
       +                stats.indexbloomhits,
       +                percent(stats.indexbloomhits, stats.indexreads),
       +                stats.indexbloomfalsemisses,
       +                percent(stats.indexbloomfalsemisses, stats.indexreads));
       +        hprint(hout, "bloom filter bits=%,ld of %,ld %d%%\n",
       +                stats.bloomones, stats.bloombits, percent(stats.bloomones, stats.bloombits));
       +        hprint(hout, "index disk reads for modify=%,ld\n", stats.indexwreads);
       +        hprint(hout, "index disk reads for allocation=%,ld\n", stats.indexareads);
       +        hprint(hout, "index block splits=%,ld\n", stats.indexsplits);
       +
       +        hprint(hout, "index cache lookups=%,ld\n", stats.iclookups);
       +        hprint(hout, "index cache hits=%,ld %d%%\n", stats.ichits,
       +                percent(stats.ichits, stats.iclookups));
       +        hprint(hout, "index cache fills=%,ld %d%%\n", stats.icfills,
       +                percent(stats.icfills, stats.iclookups));
       +        hprint(hout, "index cache inserts=%,ld\n", stats.icinserts);
       +
       +        hprint(hout, "disk cache hits=%,ld\n", stats.pchit);
       +        hprint(hout, "disk cache misses=%,ld\n", stats.pcmiss);
       +        hprint(hout, "disk cache reads=%,ld\n", stats.pcreads);
       +        hprint(hout, "disk cache bytes read=%,lld\n", stats.pcbreads);
       +
       +        hprint(hout, "disk cache writes=%,ld\n", stats.dirtydblocks);
       +        hprint(hout, "disk cache writes absorbed=%,ld %d%%\n", stats.absorbedwrites,
       +                percent(stats.absorbedwrites, stats.dirtydblocks));
       +
       +        hprint(hout, "disk cache flushes=%,ld\n", stats.dcacheflushes);
       +        hprint(hout, "disk cache flush writes=%,ld (%,ld per flush)\n", 
       +                stats.dcacheflushwrites,
       +                stats.dcacheflushwrites/(stats.dcacheflushes ? stats.dcacheflushes : 1));
       +
       +        hprint(hout, "disk writes=%,ld\n", stats.diskwrites);
       +        hprint(hout, "disk bytes written=%,lld\n", stats.diskbwrites);
       +        hprint(hout, "disk reads=%,ld\n", stats.diskreads);
       +        hprint(hout, "disk bytes read=%,lld\n", stats.diskbreads);
       +*/
       +
       +        hflush(hout);
       +        return 0;
       +}
       +
       +static int
       +sindex(HConnect *c)
       +{
       +        Hio *hout;
       +        Index *ix;
       +        Arena *arena;
       +        vlong clumps, cclumps, uncsize, used, size;
       +        int i, r, active;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +        hout = &c->hout;
       +
       +        ix = mainindex;
       +
       +        hprint(hout, "index=%s\n", ix->name);
       +
       +        active = 0;
       +        clumps = 0;
       +        cclumps = 0;
       +        uncsize = 0;
       +        used = 0;
       +        size = 0;
       +        for(i = 0; i < ix->narenas; i++){
       +                arena = ix->arenas[i];
       +                if(arena != nil && arena->memstats.clumps != 0){
       +                        active++;
       +                        clumps += arena->memstats.clumps;
       +                        cclumps += arena->memstats.cclumps;
       +                        uncsize += arena->memstats.uncsize;
       +                        used += arena->memstats.used;
       +                }
       +                size += arena->size;
       +        }
       +        hprint(hout, "total arenas=%,d active=%,d\n", ix->narenas, active);
       +        hprint(hout, "total space=%,lld used=%,lld\n", size, used + clumps * ClumpInfoSize);
       +        hprint(hout, "clumps=%,lld compressed clumps=%,lld data=%,lld compressed data=%,lld\n",
       +                clumps, cclumps, uncsize, used - clumps * ClumpSize);
       +        hflush(hout);
       +        return 0;
       +}
       +
       +static void
       +darena(Hio *hout, Arena *arena)
       +{
       +        hprint(hout, "arena='%s' on %s at [%lld,%lld)\n\tversion=%d created=%d modified=%d",
       +                arena->name, arena->part->name, arena->base, arena->base + arena->size + 2 * arena->blocksize,
       +                arena->version, arena->ctime, arena->wtime);
       +        if(arena->memstats.sealed)
       +                hprint(hout, " mem=sealed");
       +        if(arena->diskstats.sealed)
       +                hprint(hout, " disk=sealed");
       +        hprint(hout, "\n");
       +        if(scorecmp(zeroscore, arena->score) != 0)
       +                hprint(hout, "\tscore=%V\n", arena->score);
       +
       +        hprint(hout, "\tmem: clumps=%d compressed clumps=%d data=%,lld compressed data=%,lld storage=%,lld\n",
       +                arena->memstats.clumps, arena->memstats.cclumps, arena->memstats.uncsize,
       +                arena->memstats.used - arena->memstats.clumps * ClumpSize,
       +                arena->memstats.used + arena->memstats.clumps * ClumpInfoSize);
       +        hprint(hout, "\tdisk: clumps=%d compressed clumps=%d data=%,lld compressed data=%,lld storage=%,lld\n",
       +                arena->diskstats.clumps, arena->diskstats.cclumps, arena->diskstats.uncsize,
       +                arena->diskstats.used - arena->diskstats.clumps * ClumpSize,
       +                arena->diskstats.used + arena->diskstats.clumps * ClumpInfoSize);
       +}
       +
       +static int
       +hicacheflush(HConnect *c)
       +{
       +        Hio *hout;
       +        int r;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +        hout = &c->hout;
       +
       +        flushicache();
       +        hprint(hout, "flushed icache\n");
       +        hflush(hout);
       +        return 0;
       +}
       +
       +static int
       +hdcacheflush(HConnect *c)
       +{
       +        Hio *hout;
       +        int r;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +        hout = &c->hout;
       +
       +        flushdcache();
       +        hprint(hout, "flushed dcache\n");
       +        hflush(hout);
       +        return 0;
       +}
       +
       +static int
       +dindex(HConnect *c)
       +{
       +        Hio *hout;
       +        Index *ix;
       +        int i, r;
       +
       +        r = preqtext(c);
       +        if(r < 0)
       +                return r;
       +        hout = &c->hout;
       +
       +
       +        ix = mainindex;
       +        hprint(hout, "index=%s version=%d blocksize=%d tabsize=%d\n",
       +                ix->name, ix->version, ix->blocksize, ix->tabsize);
       +        hprint(hout, "\tbuckets=%d div=%d\n", ix->buckets, ix->div);
       +        for(i = 0; i < ix->nsects; i++)
       +                hprint(hout, "\tsect=%s for buckets [%lld,%lld) buckmax=%d\n", ix->smap[i].name, ix->smap[i].start, ix->smap[i].stop, ix->sects[i]->buckmax);
       +        for(i = 0; i < ix->narenas; i++){
       +                if(ix->arenas[i] != nil && ix->arenas[i]->memstats.clumps != 0){
       +                        hprint(hout, "arena=%s at index [%lld,%lld)\n\t", ix->amap[i].name, ix->amap[i].start, ix->amap[i].stop);
       +                        darena(hout, ix->arenas[i]);
       +                }
       +        }
       +        hflush(hout);
       +        return 0;
       +}
       +
       +typedef struct Arg Arg;
       +struct Arg
       +{
       +        int index;
       +        int index2;
       +};
       +
       +static long
       +rawgraph(Stats *s, Stats *t, void *va)
       +{
       +        Arg *a;
       +
       +        a = va;
       +        return t->n[a->index];
       +}
       +
       +static long
       +diffgraph(Stats *s, Stats *t, void *va)
       +{
       +        Arg *a;
       +
       +        a = va;
       +        return t->n[a->index] - s->n[a->index];
       +}
       +
       +static long
       +pctgraph(Stats *s, Stats *t, void *va)
       +{
       +        Arg *a;
       +
       +        a = va;
       +        return percent(t->n[a->index], t->n[a->index2]);
       +}
       +
       +static long
       +pctdiffgraph(Stats *s, Stats *t, void *va)
       +{
       +        Arg *a;
       +
       +        a = va;
       +        return percent(t->n[a->index]-s->n[a->index], t->n[a->index2]-s->n[a->index2]);
       +}
       +
       +static long
       +netbw(Stats *s)
       +{
       +        ulong *n;
       +
       +        n = s->n;
       +        return n[StatRpcReadBytes]+n[StatRpcWriteBytes];        /* not exactly right */
       +}
       +
       +static long
       +diskbw(Stats *s)
       +{
       +        ulong *n;
       +
       +        n = s->n;
       +        return n[StatApartReadBytes]+n[StatApartWriteBytes]        
       +                + n[StatIsectReadBytes]+n[StatIsectWriteBytes]
       +                + n[StatSumReadBytes];
       +}
       +
       +static long
       +iobw(Stats *s)
       +{
       +        return netbw(s)+diskbw(s);
       +}
       +
       +static long
       +diskgraph(Stats *s, Stats *t, void *va)
       +{
       +        USED(va);
       +        return diskbw(t)-diskbw(s);
       +}
       +
       +static long
       +netgraph(Stats *s, Stats *t, void *va)
       +{
       +        USED(va);
       +        return netbw(t)-netbw(s);
       +}
       +
       +static long
       +iograph(Stats *s, Stats *t, void *va)
       +{
       +        USED(va);
       +        return iobw(t)-iobw(s);
       +}
       +
       +
       +static char* graphname[] =
       +{
       +        "rpctotal",
       +        "rpcread",
       +        "rpcreadok",
       +        "rpcreadfail",
       +        "rpcreadbyte",
       +        "rpcreadtime",
       +        "rpcreadcached",
       +        "rpcreadcachedtime",
       +        "rpcreaduncached",
       +        "rpcreaduncachedtime",
       +        "rpcwrite",
       +        "rpcwritenew",
       +        "rpcwriteold",
       +        "rpcwritefail",
       +        "rpcwritebyte",
       +        "rpcwritetime",
       +        "rpcwritenewtime",
       +        "rpcwriteoldtime",
       +
       +        "lcachehit",
       +        "lcachemiss",
       +        "lcachelookup",
       +        "lcachewrite",
       +        "lcachesize",
       +        "lcachestall",
       +        "lcachelookuptime",
       +        
       +        "dcachehit",
       +        "dcachemiss",
       +        "dcachelookup",
       +        "dcacheread",
       +        "dcachewrite",
       +        "dcachedirty",
       +        "dcachesize",
       +        "dcacheflush",
       +        "dcachestall",
       +        "dcachelookuptime",
       +
       +        "dblockstall",
       +        "lumpstall",
       +
       +        "icachehit",
       +        "icachemiss",
       +        "icachelookup",
       +        "icachewrite",
       +        "icachefill",
       +        "icacheprefetch",
       +        "icachedirty",
       +        "icachesize",
       +        "icacheflush",
       +        "icachestall",
       +        "icachelookuptime",
       +
       +        "bloomhit",
       +        "bloommiss",
       +        "bloomfalsemiss",
       +        "bloomlookup",
       +        "bloomones",
       +        "bloombits",
       +        "bloomlookuptime",
       +
       +        "apartread",
       +        "apartreadbyte",
       +        "apartwrite",
       +        "apartwritebyte",
       +
       +        "isectread",
       +        "isectreadbyte",
       +        "isectwrite",
       +        "isectwritebyte",
       +
       +        "sumread",
       +        "sumreadbyte",
       +};
       +
       +static int
       +findname(char *s)
       +{
       +        int i;
       +
       +        for(i=0; i<nelem(graphname); i++)
       +                if(strcmp(graphname[i], s) == 0)
       +                        return i;
       +fprint(2, "no name '%s'\n", s);
       +        return -1;
       +}
       +
       +static void
       +dotextbin(Hio *io, Graph *g)
       +{
       +        int i, nbin;
       +        Statbin *b, bin[2000];        /* 32 kB, but whack is worse */
       +
       +        needstack(8192);        /* double check that bin didn't kill us */
       +        nbin = 100;
       +        binstats(g->fn, g->arg, g->t0, g->t1, bin, nbin);
       +
       +        hprint(io, "stats\n\n");
       +        for(i=0; i<nbin; i++){
       +                b = &bin[i];
       +                hprint(io, "%d: nsamp=%d min=%d max=%d avg=%d\n",
       +                        i, b->nsamp, b->min, b->max, b->avg);
       +        }
       +}
       +
       +static int
       +xgraph(HConnect *c)
       +{
       +        char *f[20], *s;
       +        Hio *hout;
       +        Memimage *m;
       +        int i, nf, dotext;
       +        Graph g;
       +        Arg arg;
       +
       +        s = estrdup(c->req.uri);
       +if(0) fprint(2, "graph %s\n" ,s);
       +        memset(&g, 0, sizeof g);
       +        nf = getfields(s+strlen("/graph/"), f, nelem(f), 1, "/");
       +        if(nf < 1)
       +                goto notfound;
       +        if((arg.index = findname(f[0])) == -1 && strcmp(f[0], "*") != 0)
       +                goto notfound;
       +        g.arg = &arg;
       +        g.t0 = -120;
       +        g.t1 = 0;
       +        g.min = -1;
       +        g.max = -1;
       +        g.fn = rawgraph;
       +        g.wid = -1;
       +        g.ht = -1;
       +        dotext = 0;
       +        g.fill = -1;
       +        for(i=1; i<nf; i++){
       +                if(strncmp(f[i], "t0=", 3) == 0)
       +                        g.t0 = atoi(f[i]+3);
       +                else if(strncmp(f[i], "t1=", 3) == 0)
       +                        g.t1 = atoi(f[i]+3);
       +                else if(strncmp(f[i], "min=", 4) == 0)
       +                        g.min = atoi(f[i]+4);
       +                else if(strncmp(f[i], "max=", 4) == 0)
       +                        g.max = atoi(f[i]+4);
       +                else if(strncmp(f[i], "pct=", 4) == 0){
       +                        if((arg.index2 = findname(f[i]+4)) == -1)
       +                                goto notfound;
       +                        g.fn = pctgraph;
       +                        g.min = 0;
       +                        g.max = 100;
       +                }else if(strncmp(f[i], "pctdiff=", 8) == 0){
       +                        if((arg.index2 = findname(f[i]+8)) == -1)
       +                                goto notfound;
       +                        g.fn = pctdiffgraph;
       +                        g.min = 0;
       +                        g.max = 100;
       +                }else if(strcmp(f[i], "diff") == 0)
       +                        g.fn = diffgraph;
       +                else if(strcmp(f[i], "text") == 0)
       +                        dotext = 1;
       +                else if(strncmp(f[i], "wid=", 4) == 0)
       +                        g.wid = atoi(f[i]+4);
       +                else if(strncmp(f[i], "ht=", 3) == 0)
       +                        g.ht = atoi(f[i]+3);
       +                else if(strncmp(f[i], "fill=", 5) == 0)
       +                        g.fill = atoi(f[i]+5);
       +                else if(strcmp(f[i], "diskbw") == 0)
       +                        g.fn = diskgraph;
       +                else if(strcmp(f[i], "iobw") == 0)
       +                        g.fn = iograph;
       +                else if(strcmp(f[i], "netbw") == 0)
       +                        g.fn = netgraph;
       +        }
       +        if(dotext){
       +                preqtype(c, "text/plain");
       +                dotextbin(&c->hout, &g);
       +                hflush(&c->hout);
       +                return 0;
       +        }
       +
       +        m = statgraph(&g);
       +        if(m == nil)
       +                goto notfound;
       +
       +        if(preqtype(c, "image/png") < 0)
       +                return -1;
       +        hout = &c->hout;
       +        writepng(hout, m);
       +        qlock(&memdrawlock);
       +        freememimage(m);
       +        qunlock(&memdrawlock);
       +        hflush(hout);
       +        free(s);
       +        return 0;
       +
       +notfound:
       +        free(s);
       +        return notfound(c);
       +}
       +
       +static int
       +xloglist(HConnect *c)
       +{
       +        if(preqtype(c, "text/html") < 0)
       +                return -1;
       +        vtloghlist(&c->hout);
       +        hflush(&c->hout);
       +        return 0;
       +}
       +
       +static int
       +xlog(HConnect *c)
       +{
       +        char *name;
       +        VtLog *l;
       +
       +        if(strcmp(c->req.uri, "/log") == 0 || strcmp(c->req.uri, "/log/") == 0)
       +                return xloglist(c);
       +        if(strncmp(c->req.uri, "/log/", 5) != 0)
       +                return notfound(c);
       +        name = c->req.uri + strlen("/log/");
       +        l = vtlogopen(name, 0);
       +        if(l == nil)
       +                return notfound(c);
       +        if(preqtype(c, "text/html") < 0){
       +                vtlogclose(l);
       +                return -1;
       +        }
       +        vtloghdump(&c->hout, l);
       +        vtlogclose(l);
       +        hflush(&c->hout);
       +        return 0;
       +}
       +
       +static int
       +xindex(HConnect *c)
       +{
       +        if(preqtype(c, "text/xml") < 0)
       +                return -1;
       +        xmlindex(&c->hout, mainindex, "index", 0);
       +        hflush(&c->hout);
       +        return 0;
       +}
       +
       +void
       +xmlindent(Hio *hout, int indent)
       +{
       +        int i;
       +
       +        for(i = 0; i < indent; i++)
       +                hputc(hout, '\t');
       +}
       +
       +void
       +xmlaname(Hio *hout, char *v, char *tag)
       +{
       +        hprint(hout, " %s=\"%s\"", tag, v);
       +}
       +
       +void
       +xmlscore(Hio *hout, u8int *v, char *tag)
       +{
       +        if(scorecmp(zeroscore, v) == 0)
       +                return;
       +        hprint(hout, " %s=\"%V\"", tag, v);
       +}
       +
       +void
       +xmlsealed(Hio *hout, int v, char *tag)
       +{
       +        if(!v)
       +                return;
       +        hprint(hout, " %s=\"yes\"", tag);
       +}
       +
       +void
       +xmlu32int(Hio *hout, u32int v, char *tag)
       +{
       +        hprint(hout, " %s=\"%ud\"", tag, v);
       +}
       +
       +void
       +xmlu64int(Hio *hout, u64int v, char *tag)
       +{
       +        hprint(hout, " %s=\"%llud\"", tag, v);
       +}
       +
       +void
       +vtloghdump(Hio *h, VtLog *l)
       +{
       +        int i;
       +        VtLogChunk *c;
       +        char *name;
       +        
       +        name = l ? l->name : "&lt;nil&gt;";
       +
       +fprint(2, "hdump xfer %d\n", h->xferenc);
       +        hprint(h, "<html><head>\n");
       +        hprint(h, "<title>Venti Server Log: %s</title>\n", name);
       +        hprint(h, "</head><body>\n");
       +        hprint(h, "<b>Venti Server Log: %s</b>\n<p>\n", name);
       +        
       +        if(l){
       +                c = l->w;
       +                for(i=0; i<l->nchunk; i++){
       +                        if(++c == l->chunk+l->nchunk)
       +                                c = l->chunk;
       +                        hwrite(h, c->p, c->wp-c->p);
       +                }
       +        }
       +        hprint(h, "</body></html>\n");
       +}
       +
       +static int
       +strpcmp(const void *va, const void *vb)
       +{
       +        return strcmp(*(char**)va, *(char**)vb);
       +}
       +
       +void
       +vtloghlist(Hio *h)
       +{
       +        char **p;
       +        int i, n;
       +        
       +        hprint(h, "<html><head>\n");
       +        hprint(h, "<title>Venti Server Logs</title>\n");
       +        hprint(h, "</head><body>\n");
       +        hprint(h, "<b>Venti Server Logs</b>\n<p>\n");
       +        
       +        p = vtlognames(&n);
       +        qsort(p, n, sizeof(p[0]), strpcmp);
       +        for(i=0; i<n; i++)
       +                hprint(h, "<a href=\"/log/%s\">%s</a><br>\n", p[i], p[i]);
       +        vtfree(p);
       +        hprint(h, "</body></html>\n");
       +}
   DIR diff --git a/src/cmd/venti/srv/icache.c b/src/cmd/venti/srv/icache.c
       t@@ -0,0 +1,348 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +typedef struct ICache ICache;
       +struct ICache
       +{
       +        QLock        lock;                        /* locks hash table & all associated data */
       +        Rendez        full;
       +        IEntry        **heads;                /* heads of all the hash chains */
       +        int        bits;                        /* bits to use for indexing heads */
       +        u32int        size;                        /* number of heads; == 1 << bits, should be < entries */
       +        IEntry        *base;                        /* all allocated hash table entries */
       +        u32int        entries;                /* elements in base */
       +        IEntry        *dirty;                /* chain of dirty elements */
       +        u32int        ndirty;
       +        u32int        maxdirty;
       +        u32int        unused;                        /* index of first unused element in base */
       +        u32int        stolen;                        /* last head from which an element was stolen */
       +
       +        Arena        *last[4];
       +        Arena        *lastload;
       +        int                nlast;
       +};
       +
       +static ICache icache;
       +
       +static IEntry        *icachealloc(IAddr *ia, u8int *score);
       +
       +/*
       + * bits is the number of bits in the icache hash table
       + * depth is the average depth
       + * memory usage is about (1<<bits) * depth * sizeof(IEntry) + (1<<bits) * sizeof(IEntry*)
       + */
       +void
       +initicache(int bits, int depth)
       +{
       +        icache.bits = bits;
       +        icache.size = 1 << bits;
       +        icache.entries = depth * icache.size;
       +        icache.maxdirty = icache.entries/2;
       +        icache.base = MKNZ(IEntry, icache.entries);
       +        icache.heads = MKNZ(IEntry*, icache.size);
       +        icache.full.l = &icache.lock;
       +        setstat(StatIcacheSize, icache.entries);
       +}
       +
       +u32int
       +hashbits(u8int *sc, int bits)
       +{
       +        u32int v;
       +
       +        v = (sc[0] << 24) | (sc[1] << 16) | (sc[2] << 8) | sc[3];
       +        if(bits < 32)
       +                 v >>= (32 - bits);
       +        return v;
       +}
       +
       +static void
       +loadarenaclumps(Arena *arena, u64int aa)
       +{
       +        ulong i;
       +        ClumpInfo ci;
       +        IAddr ia;
       +
       +fprint(2, "seed index cache with arena @%llud, (map %llud), %d clumps\n", arena->base, aa, arena->memstats.clumps);
       +        for(i=0; i<arena->memstats.clumps; i++){
       +                if(readclumpinfo(arena, i, &ci) < 0)
       +                        break;
       +                ia.type = ci.type;
       +                ia.size = ci.uncsize;
       +                ia.blocks = (ci.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
       +                ia.addr = aa;
       +                aa += ClumpSize + ci.size;
       +                if(ia.type != VtCorruptType)
       +                        insertscore(ci.score, &ia, 0);
       +        }
       +}
       +
       +/*
       +ZZZ need to think about evicting the correct IEntry,
       +and writing back the wtime.
       + * look up data score in the index cache
       + * if this fails, pull it in from the disk index table, if it exists.
       + *
       + * must be called with the lump for this score locked
       + */
       +int
       +lookupscore(u8int *score, int type, IAddr *ia, int *rac)
       +{
       +        IEntry d, *ie, *last;
       +        u32int h;
       +        u64int aa;
       +        Arena *load;
       +        int i;
       +        uint ms;
       +
       +        load = nil;
       +        aa = 0;
       +        ms = msec();
       +        
       +        trace(TraceLump, "lookupscore %V.%d", score, type);
       +
       +        qlock(&icache.lock);
       +        h = hashbits(score, icache.bits);
       +        last = nil;
       +        for(ie = icache.heads[h]; ie != nil; ie = ie->next){
       +                if(ie->ia.type == type && scorecmp(ie->score, score)==0){
       +                        if(last != nil)
       +                                last->next = ie->next;
       +                        else
       +                                icache.heads[h] = ie->next;
       +                        addstat(StatIcacheHit, 1);
       +                        ie->rac = 1;
       +                        trace(TraceLump, "lookupscore incache");
       +                        goto found;
       +                }
       +                last = ie;
       +        }
       +        addstat(StatIcacheMiss, 1);
       +        qunlock(&icache.lock);
       +
       +        if(loadientry(mainindex, score, type, &d) < 0){
       +                ms = msec() - ms;
       +                addstat2(StatIcacheRead, 1, StatIcacheReadTime, ms);
       +                return -1;
       +        }
       +
       +        addstat(StatIcacheFill, 1);
       +
       +        trace(TraceLump, "lookupscore loaded");
       +
       +        /*
       +         * no one else can load an entry for this score,
       +         * since we have the overall score lock.
       +         */
       +        qlock(&icache.lock);
       +
       +        /*
       +         * If we notice that all the hits are coming from one arena,
       +         * load the table of contents for that arena into the cache.
       +         */
       +        ie = icachealloc(&d.ia, score);
       +        icache.last[icache.nlast++%nelem(icache.last)] = amapitoa(mainindex, ie->ia.addr, &aa);
       +        aa = ie->ia.addr - aa;        /* compute base addr of arena */
       +        for(i=0; i<nelem(icache.last); i++)
       +                if(icache.last[i] != icache.last[0])
       +                        break;
       +        if(i==nelem(icache.last) && icache.lastload != icache.last[0]){
       +                load = icache.last[0];
       +                icache.lastload = load;
       +        }
       +
       +found:
       +        ie->next = icache.heads[h];
       +        icache.heads[h] = ie;
       +
       +        *ia = ie->ia;
       +        *rac = ie->rac;
       +
       +        qunlock(&icache.lock);
       +
       +        if(load){
       +                trace(TraceProc, "preload 0x%llux", aa);
       +                loadarenaclumps(load, aa);
       +        }
       +        ms = msec() - ms;
       +        addstat2(StatIcacheRead, 1, StatIcacheReadTime, ms);
       +
       +        return 0;
       +}
       +
       +/*
       + * insert a new element in the hash table.
       + */
       +int
       +insertscore(u8int *score, IAddr *ia, int write)
       +{
       +        IEntry *ie, se;
       +        u32int h;
       +
       +        trace(TraceLump, "insertscore enter");
       +        if(write)
       +                addstat(StatIcacheWrite, 1);
       +        else
       +                addstat(StatIcachePrefetch, 1);
       +
       +        qlock(&icache.lock);
       +        h = hashbits(score, icache.bits);
       +
       +        ie = icachealloc(ia, score);
       +        if(write){
       +                icache.ndirty++;
       +                setstat(StatIcacheDirty, icache.ndirty);
       +                delaykickicache();
       +                ie->dirty = 1;
       +        }
       +        ie->next = icache.heads[h];
       +        icache.heads[h] = ie;
       +
       +        se = *ie;
       +        qunlock(&icache.lock);
       +
       +        if(write && icache.ndirty >= icache.maxdirty)
       +                kickicache();
       +
       +        /*
       +         * It's okay not to do this under icache.lock.
       +         * Calling insertscore only happens when we hold
       +         * the lump, meaning any searches for this block
       +         * will hit in the lump cache until after we return.
       +         */
       +        markbloomfilter(mainindex->bloom, score);
       +
       +        return 0;
       +}
       +
       +/*
       + * allocate a index cache entry which hasn't been used in a while.
       + * must be called with icache.lock locked
       + * if the score is already in the table, update the entry.
       + */
       +static IEntry *
       +icachealloc(IAddr *ia, u8int *score)
       +{
       +        int i;
       +        IEntry *ie, *last, *clean, *lastclean;
       +        u32int h;
       +
       +        h = hashbits(score, icache.bits);
       +        last = nil;
       +        for(ie = icache.heads[h]; ie != nil; ie = ie->next){
       +                if(ie->ia.type == ia->type && scorecmp(ie->score, score)==0){
       +                        if(last != nil)
       +                                last->next = ie->next;
       +                        else
       +                                icache.heads[h] = ie->next;
       +                        trace(TraceLump, "icachealloc hit");
       +                        ie->rac = 1;
       +                        return ie;
       +                }
       +                last = ie;
       +        }
       +
       +        h = icache.unused;
       +        if(h < icache.entries){
       +                ie = &icache.base[h++];
       +                icache.unused = h;
       +                trace(TraceLump, "icachealloc unused");
       +                goto Found;
       +        }
       +
       +        h = icache.stolen;
       +        for(i=0;; i++){
       +                h++;
       +                if(h >= icache.size)
       +                        h = 0;
       +                if(i == icache.size){
       +                        trace(TraceLump, "icachealloc sleep");
       +                        addstat(StatIcacheStall, 1);
       +                        while(icache.ndirty == icache.entries){
       +                                /*
       +                                 * This is a bit suspect.  Kickicache will wake up the
       +                                 * icachewritecoord, but if all the index entries are for
       +                                 * unflushed disk blocks, icachewritecoord won't be
       +                                 * able to do much.  It always rewakes everyone when
       +                                 * it thinks it is done, though, so at least we'll go around
       +                                 * the while loop again.  Also, if icachewritecoord sees
       +                                 * that the disk state hasn't change at all since the last
       +                                 * time around, it kicks the disk.  This needs to be
       +                                 * rethought, but it shouldn't deadlock anymore.
       +                                 */
       +                                kickicache();
       +                                rsleep(&icache.full);
       +                        }
       +                        addstat(StatIcacheStall, -1);
       +                        i = 0;
       +                }
       +                lastclean = nil;
       +                clean = nil;
       +                last = nil;
       +                for(ie=icache.heads[h]; ie; last=ie, ie=ie->next){
       +                        if(!ie->dirty){
       +                                clean = ie;
       +                                lastclean = last;
       +                        }
       +                }
       +                if(clean){
       +                        if(lastclean)
       +                                lastclean->next = clean->next;
       +                        else
       +                                icache.heads[h] = clean->next;
       +                        clean->next = nil;
       +                        icache.stolen = h;
       +                        ie = clean;
       +                        trace(TraceLump, "icachealloc steal");
       +                        goto Found;
       +                }
       +        }
       +
       +Found:
       +        ie->ia = *ia;
       +        scorecp(ie->score, score);
       +        ie->rac = 0;        
       +        return ie;
       +}
       +
       +IEntry*
       +icachedirty(u32int lo, u32int hi, u64int limit)
       +{
       +        int i;
       +        u32int h;
       +        IEntry *ie, *dirty;
       +
       +        dirty = nil;
       +        trace(TraceProc, "icachedirty enter");
       +        qlock(&icache.lock);
       +        for(i=0; i<icache.size; i++)
       +        for(ie = icache.heads[i]; ie; ie=ie->next)
       +                if(ie->dirty && ie->ia.addr != 0 && ie->ia.addr < limit){
       +                        h = hashbits(ie->score, 32);
       +                        if(lo <= h && h <= hi){
       +                                ie->nextdirty = dirty;
       +                                dirty = ie;
       +                        }
       +                }
       +        qunlock(&icache.lock);
       +        trace(TraceProc, "icachedirty exit");
       +        if(dirty == nil)
       +                flushdcache();
       +        return dirty;
       +}
       +
       +void
       +icacheclean(IEntry *ie)
       +{
       +        trace(TraceProc, "icachedirty enter");
       +        qlock(&icache.lock);
       +        for(; ie; ie=ie->nextdirty){
       +                icache.ndirty--;
       +                ie->dirty = 0;
       +        }
       +        setstat(StatIcacheDirty, icache.ndirty);
       +        rwakeupall(&icache.full);
       +        qunlock(&icache.lock);
       +        trace(TraceProc, "icachedirty exit");
       +}
       +
   DIR diff --git a/src/cmd/venti/srv/icachewrite.c b/src/cmd/venti/srv/icachewrite.c
       t@@ -0,0 +1,318 @@
       +/*
       + * Write the dirty icache entries to disk.  Random seeks are
       + * so expensive that it makes sense to wait until we have
       + * a lot and then just make a sequential pass over the disk.
       + */
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +static void icachewriteproc(void*);
       +static void icachewritecoord(void*);
       +static IEntry *iesort(IEntry*);
       +
       +int icachesleeptime = 1000;        /* milliseconds */
       +
       +enum
       +{
       +        Bufsize = 8*1024*1024
       +};
       +
       +typedef struct IWrite IWrite;
       +struct IWrite
       +{
       +        Round round;
       +        AState as;
       +};
       +
       +static IWrite iwrite;
       +
       +void
       +initicachewrite(void)
       +{
       +        int i;
       +        Index *ix;
       +
       +        initround(&iwrite.round, "icache", 120*60*1000);
       +        ix = mainindex;
       +        for(i=0; i<ix->nsects; i++){
       +                ix->sects[i]->writechan = chancreate(sizeof(ulong), 1);
       +                ix->sects[i]->writedonechan = chancreate(sizeof(ulong), 1);
       +                vtproc(icachewriteproc, ix->sects[i]);
       +        }
       +        vtproc(icachewritecoord, nil);
       +        vtproc(delaykickroundproc, &iwrite.round);
       +}
       +
       +static IEntry*
       +nextchunk(Index *ix, ISect *is, IEntry **pie, u64int *paddr, uint *pnbuf)
       +{
       +        u64int addr, naddr;
       +        uint nbuf;
       +        int bsize;
       +        IEntry *iefirst, *ie, **l;
       +
       +        bsize = 1<<is->blocklog;
       +        iefirst = *pie;
       +        addr = is->blockbase + ((u64int)(hashbits(iefirst->score, 32) / ix->div - is->start) << is->blocklog);
       +        nbuf = 0;
       +        for(l=&iefirst->nextdirty; (ie=*l)!=nil; l=&(*l)->nextdirty){
       +                naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog);
       +                if(naddr - addr >= Bufsize)
       +                        break;
       +                nbuf = naddr-addr;
       +        }
       +        nbuf += bsize;
       +
       +        *l = nil;
       +        *pie = ie;
       +        *paddr = addr;
       +        *pnbuf = nbuf;
       +        return iefirst;
       +}
       +        
       +static int
       +icachewritesect(Index *ix, ISect *is, u8int *buf)
       +{
       +        int err, h, bsize;
       +        u32int lo, hi;
       +        u64int addr, naddr;
       +        uint nbuf, off;
       +        DBlock *b;
       +        IBucket ib;
       +        IEntry *ie, *iedirty, **l, *chunk;
       +
       +        lo = is->start * ix->div;
       +        if(TWID32/ix->div < is->stop)
       +                hi = TWID32;
       +        else
       +                hi = is->stop * ix->div - 1;
       +
       +        trace(TraceProc, "icachewritesect enter %ud %ud %llud", lo, hi, iwrite.as.aa);
       +
       +        iedirty = icachedirty(lo, hi, iwrite.as.aa);
       +        iedirty = iesort(iedirty);
       +        bsize = 1<<is->blocklog;
       +        err = 0;
       +
       +        while(iedirty){
       +                sleep(icachesleeptime);
       +                trace(TraceProc, "icachewritesect nextchunk");
       +                chunk = nextchunk(ix, is, &iedirty, &addr, &nbuf);
       +
       +                trace(TraceProc, "icachewritesect readpart 0x%llux+0x%ux", addr, nbuf);
       +                if(readpart(is->part, addr, buf, nbuf) < 0){
       +                        // XXX
       +                        fprint(2, "icachewriteproc readpart: %r\n");
       +                        err  = -1;
       +                        continue;
       +                }
       +                trace(TraceProc, "icachewritesect updatebuf");
       +                addstat(StatIsectReadBytes, nbuf);
       +                addstat(StatIsectRead, 1);
       +
       +                for(l=&chunk; (ie=*l)!=nil; l=&ie->nextdirty){
       +                again:
       +                        naddr = is->blockbase + ((u64int)(hashbits(ie->score, 32) / ix->div - is->start) << is->blocklog);
       +                        off = naddr - addr;
       +                        if(off+bsize > nbuf){
       +                                fprint(2, "whoops! addr=0x%llux nbuf=%ud addr+nbuf=0x%llux naddr=0x%llux\n",
       +                                        addr, nbuf, addr+nbuf, naddr);
       +                                assert(off+bsize <= nbuf);
       +                        }
       +                        unpackibucket(&ib, buf+off, is->bucketmagic);
       +                        if(okibucket(&ib, is) < 0){
       +                                fprint(2, "bad bucket XXX\n");
       +                                goto skipit;
       +                        }
       +                        trace(TraceProc, "icachewritesect add %V at 0x%llux", ie->score, naddr);
       +                        h = bucklook(ie->score, ie->ia.type, ib.data, ib.n);
       +                        if(h & 1){
       +                                h ^= 1;
       +                                packientry(ie, &ib.data[h]);
       +                        }else if(ib.n < is->buckmax){
       +                                memmove(&ib.data[h+IEntrySize], &ib.data[h], ib.n*IEntrySize - h);
       +                                ib.n++;
       +                                packientry(ie, &ib.data[h]);
       +                        }else{
       +                                fprint(2, "bucket overflow XXX\n");
       +                        skipit:
       +                                err = -1;
       +                                *l = ie->nextdirty;
       +                                ie = *l;
       +                                if(ie)
       +                                        goto again;
       +                                else
       +                                        break;
       +                        }
       +                        packibucket(&ib, buf+off, is->bucketmagic);
       +                        if((b = _getdblock(is->part, naddr, ORDWR, 0)) != nil){
       +                                memmove(b->data, buf+off, bsize);
       +                                putdblock(b);
       +                        }
       +                }
       +
       +                trace(TraceProc, "icachewritesect writepart", addr, nbuf);
       +                if(writepart(is->part, addr, buf, nbuf) < 0){
       +                        // XXX
       +                        fprint(2, "icachewriteproc writepart: %r\n");
       +                        err = -1;
       +                        continue;
       +                }
       +                addstat(StatIsectWriteBytes, nbuf);
       +                addstat(StatIsectWrite, 1);
       +                icacheclean(chunk);
       +        }
       +
       +        trace(TraceProc, "icachewritesect done");
       +        return err;
       +}
       +
       +static void
       +icachewriteproc(void *v)
       +{
       +        uint bsize;
       +        ISect *is;
       +        Index *ix;
       +        u8int *buf;
       +
       +        ix = mainindex;
       +        is = v;
       +        threadsetname("icachewriteproc:%s", is->part->name);
       +
       +        bsize = 1<<is->blocklog;
       +        buf = emalloc(Bufsize+bsize);
       +        buf = (u8int*)(((ulong)buf+bsize-1)&~(ulong)(bsize-1));
       +
       +        for(;;){
       +                trace(TraceProc, "icachewriteproc recv");
       +                recv(is->writechan, 0);
       +                trace(TraceWork, "start");
       +                icachewritesect(ix, is, buf);
       +                trace(TraceProc, "icachewriteproc send");
       +                trace(TraceWork, "finish");
       +                send(is->writedonechan, 0);
       +        }
       +}
       +
       +static void
       +icachewritecoord(void *v)
       +{
       +        int i;
       +        Index *ix;
       +        AState as;
       +
       +        USED(v);
       +
       +        threadsetname("icachewritecoord");
       +
       +        ix = mainindex;
       +        iwrite.as = diskstate();
       +
       +        for(;;){
       +                trace(TraceProc, "icachewritecoord sleep");
       +                waitforkick(&iwrite.round);
       +                trace(TraceWork, "start");
       +                as = diskstate();
       +                if(as.arena==iwrite.as.arena && as.aa==iwrite.as.aa){
       +                        /* will not be able to do anything more than last flush - kick disk */
       +                        trace(TraceProc, "icachewritecoord flush dcache");
       +                        kickdcache();
       +                        trace(TraceProc, "icachewritecoord flushed dcache");
       +                }
       +                iwrite.as = as;
       +
       +                trace(TraceProc, "icachewritecoord start flush");
       +                if(iwrite.as.arena){
       +                        for(i=0; i<ix->nsects; i++)
       +                                send(ix->sects[i]->writechan, 0);
       +                        if(ix->bloom)
       +                                send(ix->bloom->writechan, 0);
       +                
       +                        for(i=0; i<ix->nsects; i++)
       +                                recv(ix->sects[i]->writedonechan, 0);
       +                        if(ix->bloom)
       +                                recv(ix->bloom->writedonechan, 0);
       +
       +                        trace(TraceProc, "icachewritecoord donewrite");
       +                        setatailstate(&iwrite.as);
       +                }
       +                icacheclean(nil);        /* wake up anyone waiting */
       +                trace(TraceWork, "finish");
       +                addstat(StatIcacheFlush, 1);
       +        }
       +}
       +
       +void
       +flushicache(void)
       +{
       +        trace(TraceProc, "flushicache enter");
       +        kickround(&iwrite.round, 1);
       +        trace(TraceProc, "flushicache exit");
       +}
       +
       +void
       +kickicache(void)
       +{
       +        kickround(&iwrite.round, 0);
       +}
       +
       +void
       +delaykickicache(void)
       +{
       +        delaykickround(&iwrite.round);
       +}
       +
       +static IEntry*
       +iesort(IEntry *ie)
       +{
       +        int cmp;
       +        IEntry **l;
       +        IEntry *ie1, *ie2, *sorted;
       +
       +        if(ie == nil || ie->nextdirty == nil)
       +                return ie;
       +
       +        /* split the lists */
       +        ie1 = ie;
       +        ie2 = ie;
       +        if(ie2)
       +                ie2 = ie2->nextdirty;
       +        if(ie2)
       +                ie2 = ie2->nextdirty;
       +        while(ie1 && ie2){
       +                ie1 = ie1->nextdirty;
       +                ie2 = ie2->nextdirty;
       +                if(ie2)
       +                        ie2 = ie2->nextdirty;
       +        }
       +        if(ie1){
       +                ie2 = ie1->nextdirty;
       +                ie1->nextdirty = nil;
       +        }
       +
       +        /* sort the lists */
       +        ie1 = iesort(ie);
       +        ie2 = iesort(ie2);
       +
       +        /* merge the lists */
       +        sorted = nil;
       +        l = &sorted;
       +        cmp = 0;
       +        while(ie1 || ie2){
       +                if(ie1 && ie2)
       +                        cmp = scorecmp(ie1->score, ie2->score);
       +                if(ie1==nil || (ie2 && cmp > 0)){
       +                        *l = ie2;
       +                        l = &ie2->nextdirty;
       +                        ie2 = ie2->nextdirty;
       +                }else{
       +                        *l = ie1;
       +                        l = &ie1->nextdirty;
       +                        ie1 = ie1->nextdirty;
       +                }
       +        }
       +        *l = nil;
       +        return sorted;
       +}
       +
   DIR diff --git a/src/cmd/venti/srv/ifile.c b/src/cmd/venti/srv/ifile.c
       t@@ -0,0 +1,93 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +int
       +readifile(IFile *f, char *name)
       +{
       +        ZBlock *b;
       +
       +        b = readfile(name);
       +        if(b == nil)
       +                return -1;
       +        f->name = name;
       +        f->b = b;
       +        f->pos = 0;
       +        return 0;
       +}
       +
       +void
       +freeifile(IFile *f)
       +{
       +        freezblock(f->b);
       +        f->b = nil;
       +        f->pos = 0;
       +}
       +
       +int
       +partifile(IFile *f, Part *part, u64int start, u32int size)
       +{
       +        ZBlock *b;
       +
       +        b = alloczblock(size, 0, part->blocksize);
       +        if(b == nil)
       +                return -1;
       +        if(readpart(part, start, b->data, size) < 0){
       +                seterr(EAdmin, "can't read %s: %r", part->name);
       +                freezblock(b);
       +                return -1;
       +        }
       +        f->name = part->name;
       +        f->b = b;
       +        f->pos = 0;
       +        return 0;
       +}
       +
       +/*
       + * return the next non-blank input line,
       + * stripped of leading white space and with # comments eliminated
       + */
       +char*
       +ifileline(IFile *f)
       +{
       +        char *s, *e, *t;
       +        int c;
       +
       +        for(;;){
       +                s = (char*)&f->b->data[f->pos];
       +                e = memchr(s, '\n', f->b->len - f->pos);
       +                if(e == nil)
       +                        return nil;
       +                *e++ = '\0';
       +                f->pos = e - (char*)f->b->data;
       +                t = strchr(s, '#');
       +                if(t != nil)
       +                        *t = '\0';
       +                for(; c = *s; s++)
       +                        if(c != ' ' && c != '\t' && c != '\r')
       +                                return s;
       +        }
       +}
       +
       +int
       +ifilename(IFile *f, char *dst)
       +{
       +        char *s;
       +
       +        s = ifileline(f);
       +        if(s == nil || strlen(s) >= ANameSize)
       +                return -1;
       +        namecp(dst, s);
       +        return 0;
       +}
       +
       +int
       +ifileu32int(IFile *f, u32int *r)
       +{
       +        char *s;
       +
       +        s = ifileline(f);
       +        if(s == nil)
       +                return -1;
       +        return stru32int(s, r);
       +}
   DIR diff --git a/src/cmd/venti/srv/index.c b/src/cmd/venti/srv/index.c
       t@@ -0,0 +1,819 @@
       +/*
       + * Index, mapping scores to log positions. 
       + *
       + * The index is made up of some number of index sections, each of
       + * which is typically stored on a different disk.  The blocks in all the 
       + * index sections are logically numbered, with each index section 
       + * responsible for a range of blocks.  Blocks are typically 8kB.
       + *
       + * The N index blocks are treated as a giant hash table.  The top 32 bits
       + * of score are used as the key for a lookup.  Each index block holds
       + * one hash bucket, which is responsible for ceil(2^32 / N) of the key space.
       + * 
       + * The index is sized so that a particular bucket is extraordinarily 
       + * unlikely to overflow: assuming compressed data blocks are 4kB 
       + * on disk, and assuming each block has a 40 byte index entry,
       + * the index data will be 1% of the total data.  Since scores are essentially
       + * random, all buckets should be about the same fullness.
       + * A factor of 5 gives us a wide comfort boundary to account for 
       + * random variation.  So the index disk space should be 5% of the arena disk space.
       + */
       +
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +//static int        bucklook(u8int *score, int type, u8int *data, int n);
       +//static int        writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b);
       +//static int        okibucket(IBucket *ib, ISect *is);
       +static int        initindex1(Index*);
       +static ISect        *initisect1(ISect *is);
       +//static int        splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, IBucket *ib);
       +
       +#define KEY(k,d)        ((d) ? (k)>>(32-(d)) : 0)
       +
       +//static QLock        indexlock;        //ZZZ
       +
       +static char IndexMagic[] = "venti index configuration";
       +
       +Index*
       +initindex(char *name, ISect **sects, int n)
       +{
       +        IFile f;
       +        Index *ix;
       +        ISect *is;
       +        u32int last, blocksize, tabsize;
       +        int i;
       +
       +        if(n <= 0){
       +fprint(2, "bad n\n");
       +                seterr(EOk, "no index sections to initialize index");
       +                return nil;
       +        }
       +        ix = MKZ(Index);
       +        if(ix == nil){
       +fprint(2, "no mem\n");
       +                seterr(EOk, "can't initialize index: out of memory");
       +                freeindex(ix);
       +                return nil;
       +        }
       +
       +        tabsize = sects[0]->tabsize;
       +        if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
       +                return nil;
       +        if(parseindex(&f, ix) < 0){
       +                freeifile(&f);
       +                freeindex(ix);
       +                return nil;
       +        }
       +        freeifile(&f);
       +        if(namecmp(ix->name, name) != 0){
       +                seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
       +                return nil;
       +        }
       +        if(ix->nsects != n){
       +                seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
       +                freeindex(ix);
       +                return nil;
       +        }
       +        ix->sects = sects;
       +        last = 0;
       +        blocksize = ix->blocksize;
       +        for(i = 0; i < ix->nsects; i++){
       +                is = sects[i];
       +                if(namecmp(ix->name, is->index) != 0
       +                || is->blocksize != blocksize
       +                || is->tabsize != tabsize
       +                || namecmp(is->name, ix->smap[i].name) != 0
       +                || is->start != ix->smap[i].start
       +                || is->stop != ix->smap[i].stop
       +                || last != is->start
       +                || is->start > is->stop){
       +                        seterr(ECorrupt, "inconsistent index sections in %s", ix->name);
       +                        freeindex(ix);
       +                        return nil;
       +                }
       +                last = is->stop;
       +        }
       +        ix->tabsize = tabsize;
       +        ix->buckets = last;
       +
       +        if(initindex1(ix) < 0){
       +                freeindex(ix);
       +                return nil;
       +        }
       +
       +        ix->arenas = MKNZ(Arena*, ix->narenas);
       +        if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
       +                freeindex(ix);
       +                return nil;
       +        }
       +
       +        return ix;
       +}
       +
       +static int
       +initindex1(Index *ix)
       +{
       +        u32int buckets;
       +
       +        ix->div = (((u64int)1 << 32) + ix->buckets - 1) / ix->buckets;
       +        buckets = (((u64int)1 << 32) - 1) / ix->div + 1;
       +        if(buckets != ix->buckets){
       +                seterr(ECorrupt, "inconsistent math for divisor and buckets in %s", ix->name);
       +                return -1;
       +        }
       +
       +        return 0;
       +}
       +
       +int
       +wbindex(Index *ix)
       +{
       +        Fmt f;
       +        ZBlock *b;
       +        int i;
       +
       +        if(ix->nsects == 0){
       +                seterr(EOk, "no sections in index %s", ix->name);
       +                return -1;
       +        }
       +        b = alloczblock(ix->tabsize, 1, ix->blocksize);
       +        if(b == nil){
       +                seterr(EOk, "can't write index configuration: out of memory");
       +                return -1;
       +        }
       +        fmtzbinit(&f, b);
       +        if(outputindex(&f, ix) < 0){
       +                seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
       +                freezblock(b);
       +                return -1;
       +        }
       +        for(i = 0; i < ix->nsects; i++){
       +                if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0){
       +                        seterr(EOk, "can't write index: %r");
       +                        freezblock(b);
       +                        return -1;
       +                }
       +        }
       +        freezblock(b);
       +
       +        for(i = 0; i < ix->nsects; i++)
       +                if(wbisect(ix->sects[i]) < 0)
       +                        return -1;
       +
       +        return 0;
       +}
       +
       +/*
       + * index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' [V2: bitblocks '\n'] sections arenas
       + * version, blocksize: u32int
       + * name: max. ANameSize string
       + * sections, arenas: AMap
       + */
       +int
       +outputindex(Fmt *f, Index *ix)
       +{
       +        if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
       +        || outputamap(f, ix->smap, ix->nsects) < 0
       +        || outputamap(f, ix->amap, ix->narenas) < 0)
       +                return -1;
       +        return 0;
       +}
       +
       +int
       +parseindex(IFile *f, Index *ix)
       +{
       +        AMapN amn;
       +        u32int v;
       +        char *s;
       +
       +        /*
       +         * magic
       +         */
       +        s = ifileline(f);
       +        if(s == nil || strcmp(s, IndexMagic) != 0){
       +                seterr(ECorrupt, "bad index magic for %s", f->name);
       +                return -1;
       +        }
       +
       +        /*
       +         * version
       +         */
       +        if(ifileu32int(f, &v) < 0){
       +                seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
       +                return -1;
       +        }
       +        ix->version = v;
       +        if(ix->version != IndexVersion){
       +                seterr(ECorrupt, "bad version number in %s", f->name);
       +                return -1;
       +        }
       +
       +        /*
       +         * name
       +         */
       +        if(ifilename(f, ix->name) < 0){
       +                seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
       +                return -1;
       +        }
       +
       +        /*
       +         * block size
       +         */
       +        if(ifileu32int(f, &v) < 0){
       +                seterr(ECorrupt, "syntax error: bad block size number in %s", f->name);
       +                return -1;
       +        }
       +        ix->blocksize = v;
       +
       +        if(parseamap(f, &amn) < 0)
       +                return -1;
       +        ix->nsects = amn.n;
       +        ix->smap = amn.map;
       +
       +        if(parseamap(f, &amn) < 0)
       +                return -1;
       +        ix->narenas = amn.n;
       +        ix->amap = amn.map;
       +
       +        return 0;
       +}
       +
       +/*
       + * initialize an entirely new index
       + */
       +Index *
       +newindex(char *name, ISect **sects, int n)
       +{
       +        Index *ix;
       +        AMap *smap;
       +        u64int nb;
       +        u32int div, ub, xb, fb, start, stop, blocksize, tabsize;
       +        int i, j;
       +
       +        if(n < 1){
       +                seterr(EOk, "creating index with no index sections");
       +                return nil;
       +        }
       +
       +        /*
       +         * compute the total buckets available in the index,
       +         * and the total buckets which are used.
       +         */
       +        nb = 0;
       +        blocksize = sects[0]->blocksize;
       +        tabsize = sects[0]->tabsize;
       +        for(i = 0; i < n; i++){
       +                if(sects[i]->start != 0 || sects[i]->stop != 0
       +                || sects[i]->index[0] != '\0'){
       +                        seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
       +                        return nil;
       +                }
       +                if(blocksize != sects[i]->blocksize){
       +                        seterr(EOk, "mismatched block sizes in index sections");
       +                        return nil;
       +                }
       +                if(tabsize != sects[i]->tabsize){
       +                        seterr(EOk, "mismatched config table sizes in index sections");
       +                        return nil;
       +                }
       +                nb += sects[i]->blocks;
       +        }
       +
       +        /*
       +         * check for duplicate names
       +         */
       +        for(i = 0; i < n; i++){
       +                for(j = i + 1; j < n; j++){
       +                        if(namecmp(sects[i]->name, sects[j]->name) == 0){
       +                                seterr(EOk, "duplicate section name %s for index %s", sects[i]->name, name);
       +                                return nil;
       +                        }
       +                }
       +        }
       +
       +        if(nb >= ((u64int)1 << 32)){
       +                seterr(EBug, "index too large");
       +                return nil;
       +        }
       +
       +        fb = 0;
       +        div = (((u64int)1 << 32) + nb - 1) / nb;
       +        ub = (((u64int)1 << 32) - 1) / div + 1;
       +        if(div < 100){
       +                seterr(EBug, "index divisor too coarse");
       +                return nil;
       +        }
       +        if(ub > nb){
       +                seterr(EBug, "index initialization math wrong");
       +                return nil;
       +        }
       +        xb = nb - ub;
       +
       +        /*
       +         * initialize each of the index sections
       +         * and the section map table
       +         */
       +        smap = MKNZ(AMap, n);
       +        if(smap == nil){
       +                seterr(EOk, "can't create new index: out of memory");
       +                return nil;
       +        }
       +        start = 0;
       +        for(i = 0; i < n; i++){
       +                stop = start + sects[i]->blocks - xb / n;
       +                if(i == n - 1)
       +                        stop = ub;
       +                sects[i]->start = start;
       +                sects[i]->stop = stop;
       +                namecp(sects[i]->index, name);
       +
       +                smap[i].start = start;
       +                smap[i].stop = stop;
       +                namecp(smap[i].name, sects[i]->name);
       +                start = stop;
       +        }
       +
       +        /*
       +         * initialize the index itself
       +         */
       +        ix = MKZ(Index);
       +        if(ix == nil){
       +                seterr(EOk, "can't create new index: out of memory");
       +                free(smap);
       +                return nil;
       +        }
       +        ix->version = IndexVersion;
       +        namecp(ix->name, name);
       +        ix->sects = sects;
       +        ix->smap = smap;
       +        ix->nsects = n;
       +        ix->blocksize = blocksize;
       +        ix->buckets = ub;
       +        ix->tabsize = tabsize;
       +        ix->div = div;
       +        ix->bitblocks = fb;
       +
       +        if(initindex1(ix) < 0){
       +                free(smap);
       +                return nil;
       +        }
       +
       +        return ix;
       +}
       +
       +ISect*
       +initisect(Part *part)
       +{
       +        ISect *is;
       +        ZBlock *b;
       +        int ok;
       +
       +        b = alloczblock(HeadSize, 0, 0);
       +        if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
       +                seterr(EAdmin, "can't read index section header: %r");
       +                return nil;
       +        }
       +
       +        is = MKZ(ISect);
       +        if(is == nil){
       +                freezblock(b);
       +                return nil;
       +        }
       +        is->part = part;
       +        ok = unpackisect(is, b->data);
       +        freezblock(b);
       +        if(ok < 0){
       +                seterr(ECorrupt, "corrupted index section header: %r");
       +                freeisect(is);
       +                return nil;
       +        }
       +
       +        if(is->version != ISectVersion1 && is->version != ISectVersion2){
       +                seterr(EAdmin, "unknown index section version %d", is->version);
       +                freeisect(is);
       +                return nil;
       +        }
       +
       +        return initisect1(is);
       +}
       +
       +ISect*
       +newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize)
       +{
       +        ISect *is;
       +        u32int tabbase;
       +
       +        is = MKZ(ISect);
       +        if(is == nil)
       +                return nil;
       +
       +        namecp(is->name, name);
       +        is->version = vers;
       +        is->part = part;
       +        is->blocksize = blocksize;
       +        is->start = 0;
       +        is->stop = 0;
       +        tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
       +        is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
       +        is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
       +        is->bucketmagic = 0;
       +        if(is->version == ISectVersion2){
       +                do{
       +                        is->bucketmagic = fastrand();
       +                }while(is->bucketmagic==0);
       +        }
       +        is = initisect1(is);
       +        if(is == nil)
       +                return nil;
       +
       +        return is;
       +}
       +
       +/*
       + * initialize the computed parameters for an index
       + */
       +static ISect*
       +initisect1(ISect *is)
       +{
       +        u64int v;
       +
       +        is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
       +        is->blocklog = u64log2(is->blocksize);
       +        if(is->blocksize != (1 << is->blocklog)){
       +                seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
       +                freeisect(is);
       +                return nil;
       +        }
       +        partblocksize(is->part, is->blocksize);
       +        is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
       +        if(is->tabbase >= is->blockbase){
       +                seterr(ECorrupt, "index section config table overlaps bucket storage");
       +                freeisect(is);
       +                return nil;
       +        }
       +        is->tabsize = is->blockbase - is->tabbase;
       +        v = is->part->size & ~(u64int)(is->blocksize - 1);
       +        if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
       +                seterr(ECorrupt, "invalid blocks in index section %s", is->name);
       +//ZZZZZZZZZ
       +//                freeisect(is);
       +//                return nil;
       +        }
       +
       +        if(is->stop - is->start > is->blocks){
       +                seterr(ECorrupt, "index section overflows available space");
       +                freeisect(is);
       +                return nil;
       +        }
       +        if(is->start > is->stop){
       +                seterr(ECorrupt, "invalid index section range");
       +                freeisect(is);
       +                return nil;
       +        }
       +
       +        return is;
       +}
       +
       +int
       +wbisect(ISect *is)
       +{
       +        ZBlock *b;
       +
       +        b = alloczblock(HeadSize, 1, 0);
       +        if(b == nil)
       +//ZZZ set error?
       +                return -1;
       +
       +        if(packisect(is, b->data) < 0){
       +                seterr(ECorrupt, "can't make index section header: %r");
       +                freezblock(b);
       +                return -1;
       +        }
       +        if(writepart(is->part, PartBlank, b->data, HeadSize) < 0){
       +                seterr(EAdmin, "can't write index section header: %r");
       +                freezblock(b);
       +                return -1;
       +        }
       +        freezblock(b);
       +
       +        return 0;
       +}
       +
       +void
       +freeisect(ISect *is)
       +{
       +        if(is == nil)
       +                return;
       +        free(is);
       +}
       +
       +void
       +freeindex(Index *ix)
       +{
       +        int i;
       +
       +        if(ix == nil)
       +                return;
       +        free(ix->amap);
       +        free(ix->arenas);
       +        if(ix->sects)
       +                for(i = 0; i < ix->nsects; i++)
       +                        freeisect(ix->sects[i]);
       +        free(ix->sects);
       +        free(ix->smap);
       +        free(ix);
       +}
       +
       +/*
       + * write a clump to an available arena in the index
       + * and return the address of the clump within the index.
       +ZZZ question: should this distinguish between an arena
       +filling up and real errors writing the clump?
       + */
       +u64int
       +writeiclump(Index *ix, Clump *c, u8int *clbuf, u64int *pa)
       +{
       +        u64int a;
       +        int i;
       +
       +        trace(TraceLump, "writeiclump enter");
       +        for(i = ix->mapalloc; i < ix->narenas; i++){
       +                a = writeaclump(ix->arenas[i], c, clbuf, ix->amap[i].start, pa);
       +                if(a != TWID64){
       +                        ix->mapalloc = i;        /* assuming write is atomic, race is okay */
       +                        trace(TraceLump, "writeiclump exit");
       +                        return a;
       +                }
       +        }
       +
       +        seterr(EAdmin, "no space left in arenas");
       +        trace(TraceLump, "writeiclump failed");
       +        return TWID64;
       +}
       +
       +/*
       + * convert an arena index to an relative arena address
       + */
       +Arena*
       +amapitoa(Index *ix, u64int a, u64int *aa)
       +{
       +        int i, r, l, m;
       +
       +        l = 1;
       +        r = ix->narenas - 1;
       +        while(l <= r){
       +                m = (r + l) / 2;
       +                if(ix->amap[m].start <= a)
       +                        l = m + 1;
       +                else
       +                        r = m - 1;
       +        }
       +        l--;
       +
       +        if(a > ix->amap[l].stop){
       +for(i=0; i<ix->narenas; i++)
       +        print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
       +print("want arena %d for %llux\n", l, a);
       +                seterr(ECrash, "unmapped address passed to amapitoa");
       +                return nil;
       +        }
       +
       +        if(ix->arenas[l] == nil){
       +                seterr(ECrash, "unmapped arena selected in amapitoa");
       +                return nil;
       +        }
       +        *aa = a - ix->amap[l].start;
       +        return ix->arenas[l];
       +}
       +
       +int
       +iaddrcmp(IAddr *ia1, IAddr *ia2)
       +{
       +        return ia1->type != ia2->type
       +                || ia1->size != ia2->size
       +                || ia1->blocks != ia2->blocks
       +                || ia1->addr != ia2->addr;
       +}
       +
       +/*
       + * lookup the score in the partition
       + *
       + * nothing needs to be explicitly locked:
       + * only static parts of ix are used, and
       + * the bucket is locked by the DBlock lock.
       + */
       +int
       +loadientry(Index *ix, u8int *score, int type, IEntry *ie)
       +{
       +        ISect *is;
       +        DBlock *b;
       +        IBucket ib;
       +        u32int buck;
       +        int h, ok;
       +
       +        ok = -1;
       +
       +        trace(TraceLump, "loadientry enter");
       +
       +        /*
       +        qlock(&stats.lock);
       +        stats.indexreads++;
       +        qunlock(&stats.lock);
       +        */
       +
       +        if(!inbloomfilter(mainindex->bloom, score)){
       +                trace(TraceLump, "loadientry bloomhit");
       +                return -1;
       +        }
       +
       +        trace(TraceLump, "loadientry loadibucket");
       +        b = loadibucket(ix, score, &is, &buck, &ib);
       +        trace(TraceLump, "loadientry loadedibucket");
       +        if(b == nil)
       +                return -1;
       +
       +        if(okibucket(&ib, is) < 0){
       +                trace(TraceLump, "loadientry badbucket");
       +                goto out;
       +        }
       +
       +        h = bucklook(score, type, ib.data, ib.n);
       +        if(h & 1){
       +                h ^= 1;
       +                trace(TraceLump, "loadientry found");
       +                unpackientry(ie, &ib.data[h]);
       +                ok = 0;
       +                goto out;
       +        }
       +        trace(TraceLump, "loadientry notfound");
       +        addstat(StatBloomFalseMiss, 1);
       +out:
       +        putdblock(b);
       +        trace(TraceLump, "loadientry exit");
       +        return ok;
       +}
       +
       +int
       +okibucket(IBucket *ib, ISect *is)
       +{
       +        if(ib->n <= is->buckmax)
       +                return 0;
       +
       +        seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, range=[%lud,%lud)",
       +                ib->n, is->buckmax, is->start, is->stop);
       +        return -1;
       +}
       +
       +/*
       + * look for score within data;
       + * return 1 | byte index of matching index,
       + * or 0 | index of least element > score
       + */
       +int
       +bucklook(u8int *score, int otype, u8int *data, int n)
       +{
       +        int i, r, l, m, h, c, cc, type;
       +
       +        type = vttodisktype(otype);
       +        l = 0;
       +        r = n - 1;
       +        while(l <= r){
       +                m = (r + l) >> 1;
       +                h = m * IEntrySize;
       +                for(i = 0; i < VtScoreSize; i++){
       +                        c = score[i];
       +                        cc = data[h + i];
       +                        if(c != cc){
       +                                if(c > cc)
       +                                        l = m + 1;
       +                                else
       +                                        r = m - 1;
       +                                goto cont;
       +                        }
       +                }
       +                cc = data[h + IEntryTypeOff];
       +                if(type != cc){
       +                        if(type > cc)
       +                                l = m + 1;
       +                        else
       +                                r = m - 1;
       +                        goto cont;
       +                }
       +                return h | 1;
       +        cont:;
       +        }
       +
       +        return l * IEntrySize;
       +}
       +
       +/*
       + * compare two IEntries; consistent with bucklook
       + */
       +int
       +ientrycmp(const void *vie1, const void *vie2)
       +{
       +        u8int *ie1, *ie2;
       +        int i, v1, v2;
       +
       +        ie1 = (u8int*)vie1;
       +        ie2 = (u8int*)vie2;
       +        for(i = 0; i < VtScoreSize; i++){
       +                v1 = ie1[i];
       +                v2 = ie2[i];
       +                if(v1 != v2){
       +                        if(v1 < v2)
       +                                return -1;
       +                        return 1;
       +                }
       +        }
       +        v1 = ie1[IEntryTypeOff];
       +        v2 = ie2[IEntryTypeOff];
       +        if(v1 != v2){
       +                if(v1 < v2)
       +                        return -1;
       +                return 1;
       +        }
       +        return 0;
       +}
       +
       +/*
       + * find the number of the index section holding bucket #buck
       + */
       +int
       +indexsect0(Index *ix, u32int buck)
       +{
       +        int r, l, m;
       +
       +        l = 1;
       +        r = ix->nsects - 1;
       +        while(l <= r){
       +                m = (r + l) >> 1;
       +                if(ix->sects[m]->start <= buck)
       +                        l = m + 1;
       +                else
       +                        r = m - 1;
       +        }
       +        return l - 1;
       +}
       +
       +/*
       + * load the index block at bucket #buck
       + */
       +static DBlock*
       +loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, int mode)
       +{
       +        ISect *is;
       +        DBlock *b;
       +
       +        is = ix->sects[indexsect0(ix, buck)];
       +        if(buck < is->start || is->stop <= buck){
       +                seterr(EAdmin, "index lookup out of range: %ud not found in index\n", buck);
       +                return nil;
       +        }
       +
       +        buck -= is->start;
       +        if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), mode)) == nil)
       +                return nil;
       +
       +        if(pis)
       +                *pis = is;
       +        if(pbuck)
       +                *pbuck = buck;
       +        if(ib)
       +                unpackibucket(ib, b->data, is->bucketmagic);
       +        return b;
       +}
       +
       +/*
       + * find the number of the index section holding score
       + */
       +static int
       +indexsect1(Index *ix, u8int *score)
       +{
       +        return indexsect0(ix, hashbits(score, 32) / ix->div);
       +}
       +
       +/*
       + * load the index block responsible for score.
       + */
       +static DBlock*
       +loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
       +{
       +        return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, OREAD);
       +}
       +
       +int
       +indexsect(Index *ix, u8int *score)
       +{
       +        return indexsect1(ix, score);
       +}
       +
       +DBlock*
       +loadibucket(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
       +{
       +        return loadibucket1(ix, score, pis, pbuck, ib);
       +}
       +
       +
   DIR diff --git a/src/cmd/venti/srv/lump.c b/src/cmd/venti/srv/lump.c
       t@@ -0,0 +1,249 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +int                        queuewrites = 0;
       +int                        writestodevnull = 0;
       +
       +static Packet                *readilump(Lump *u, IAddr *ia, u8int *score, int rac);
       +
       +Packet*
       +readlump(u8int *score, int type, u32int size, int *cached)
       +{
       +        Lump *u;
       +        Packet *p;
       +        IAddr ia;
       +        u32int n;
       +        int rac;
       +
       +        trace(TraceLump, "readlump enter");
       +/*
       +        qlock(&stats.lock);
       +        stats.lumpreads++;
       +        qunlock(&stats.lock);
       +*/
       +        if(scorecmp(score, zeroscore) == 0)
       +                return packetalloc();
       +        u = lookuplump(score, type);
       +        if(u->data != nil){
       +                trace(TraceLump, "readlump lookuplump hit");
       +                if(cached)
       +                        *cached = 1;
       +                n = packetsize(u->data);
       +                if(n > size){
       +                        seterr(EOk, "read too small: asked for %d need at least %d", size, n);
       +                        putlump(u);
       +
       +                        return nil;
       +                }
       +                p = packetdup(u->data, 0, n);
       +                putlump(u);
       +                return p;
       +        }
       +
       +        if(cached)
       +                *cached = 0;
       +
       +        if(lookupscore(score, type, &ia, &rac) < 0){
       +                //ZZZ place to check for someone trying to guess scores
       +                seterr(EOk, "no block with score %V/%d exists", score, type);
       +
       +                putlump(u);
       +                return nil;
       +        }
       +        if(ia.size > size){
       +                seterr(EOk, "read too small 1: asked for %d need at least %d", size, ia.size);
       +
       +                putlump(u);
       +                return nil;
       +        }
       +
       +        trace(TraceLump, "readlump readilump");
       +        p = readilump(u, &ia, score, rac);
       +        putlump(u);
       +
       +        trace(TraceLump, "readlump exit");
       +        return p;
       +}
       +
       +/*
       + * save away a lump, and return it's score.
       + * doesn't store duplicates, but checks that the data is really the same.
       + */
       +int
       +writelump(Packet *p, u8int *score, int type, u32int creator, uint ms)
       +{
       +        Lump *u;
       +        int ok;
       +
       +/*
       +        qlock(&stats.lock);
       +        stats.lumpwrites++;
       +        qunlock(&stats.lock);
       +*/
       +
       +        packetsha1(p, score);
       +        if(packetsize(p) == 0 || writestodevnull==1){
       +                packetfree(p);
       +                return 0;
       +        }
       +
       +        u = lookuplump(score, type);
       +        if(u->data != nil){
       +                ok = 0;
       +                if(packetcmp(p, u->data) != 0){
       +                        seterr(EStrange, "score collision");
       +                        ok = -1;
       +                }
       +                packetfree(p);
       +                putlump(u);
       +                return ok;
       +        }
       +
       +        if(writestodevnull==2){
       +                packetfree(p);
       +                return 0;
       +        }
       +
       +        if(queuewrites)
       +                return queuewrite(u, p, creator, ms);
       +
       +        ok = writeqlump(u, p, creator, ms);
       +
       +        putlump(u);
       +        return ok;
       +}
       +
       +int
       +writeqlump(Lump *u, Packet *p, int creator, uint ms)
       +{
       +        ZBlock *flat;
       +        Packet *old;
       +        IAddr ia;
       +        int ok;
       +        int rac;
       +
       +        if(lookupscore(u->score, u->type, &ia, &rac) == 0){
       +                /* assume the data is here! XXX */
       +                packetfree(p);
       +                ms = msec() - ms;
       +                addstat2(StatRpcWriteOld, 1, StatRpcWriteOldTime, ms);
       +                return 0;
       +
       +                /*
       +                 * if the read fails,
       +                 * assume it was corrupted data and store the block again
       +                 */
       +                old = readilump(u, &ia, u->score, rac);
       +                if(old != nil){
       +                        ok = 0;
       +                        if(packetcmp(p, old) != 0){
       +                                seterr(EStrange, "score collision");
       +                                ok = -1;
       +                        }
       +                        packetfree(p);
       +                        packetfree(old);
       +
       +                        ms = msec() - ms;
       +                        addstat2(StatRpcWriteOld, 1, StatRpcWriteOldTime, ms);
       +                        return ok;
       +                }
       +                logerr(EAdmin, "writelump: read %V failed, rewriting: %r\n", u->score);
       +        }
       +
       +        flat = packet2zblock(p, packetsize(p));
       +        ok = storeclump(mainindex, flat, u->score, u->type, creator, &ia);
       +        freezblock(flat);
       +        if(ok == 0)
       +                ok = insertscore(u->score, &ia, 1);
       +        if(ok == 0)
       +                insertlump(u, p);
       +        else
       +                packetfree(p);
       +
       +        ms = msec() - ms;
       +        addstat2(StatRpcWriteNew, 1, StatRpcWriteNewTime, ms);
       +        return ok;
       +}
       +
       +static void
       +lreadahead(u64int a, Arena *arena, u64int aa, int n)
       +{        
       +        u8int buf[ClumpSize];
       +        Clump cl;
       +        IAddr ia;
       +
       +        while(n > 0) {
       +                if (aa >= arena->memstats.used)
       +                        break;
       +                if(readarena(arena, aa, buf, ClumpSize) < ClumpSize)
       +                        break;
       +                if(unpackclump(&cl, buf, arena->clumpmagic) < 0)
       +                        break;
       +                ia.addr = a;
       +                ia.type = cl.info.type;
       +                ia.size = cl.info.uncsize;
       +                ia.blocks = (cl.info.size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
       +                insertscore(cl.info.score, &ia, 0);
       +                a += ClumpSize + cl.info.size;
       +                aa += ClumpSize + cl.info.size;
       +                n--;
       +        }
       +}
       +
       +static Packet*
       +readilump(Lump *u, IAddr *ia, u8int *score, int rac)
       +{
       +        Arena *arena;
       +        ZBlock *zb;
       +        Packet *p, *pp;
       +        Clump cl;
       +        u64int a, aa;
       +        u8int sc[VtScoreSize];
       +
       +        trace(TraceLump, "readilump enter");
       +        arena = amapitoa(mainindex, ia->addr, &aa);
       +        if(arena == nil){
       +                trace(TraceLump, "readilump amapitoa failed");
       +                return nil;
       +        }
       +
       +        trace(TraceLump, "readilump loadclump");
       +        zb = loadclump(arena, aa, ia->blocks, &cl, sc, paranoid);
       +        if(zb == nil){
       +                trace(TraceLump, "readilump loadclump failed");
       +                return nil;
       +        }
       +
       +        if(ia->size != cl.info.uncsize){
       +                seterr(EInconsist, "index and clump size mismatch");
       +                freezblock(zb);
       +                return nil;
       +        }
       +        if(ia->type != cl.info.type){
       +                seterr(EInconsist, "index and clump type mismatch");
       +                freezblock(zb);
       +                return nil;
       +        }
       +        if(scorecmp(score, sc) != 0){
       +                seterr(ECrash, "score mismatch");
       +                freezblock(zb);
       +                return nil;
       +        }
       +
       +        if(rac == 0) {
       +                trace(TraceLump, "readilump readahead");
       +                a = ia->addr + ClumpSize + cl.info.size;
       +                aa += ClumpSize + cl.info.size;
       +                lreadahead(a, arena, aa, 20);
       +        }
       +
       +        trace(TraceLump, "readilump success");
       +        p = zblock2packet(zb, cl.info.uncsize);
       +        freezblock(zb);
       +        pp = packetdup(p, 0, packetsize(p));
       +        trace(TraceLump, "readilump insertlump");
       +        insertlump(u, pp);
       +        trace(TraceLump, "readilump exit");
       +        return p;
       +}
   DIR diff --git a/src/cmd/venti/srv/lumpcache.c b/src/cmd/venti/srv/lumpcache.c
       t@@ -0,0 +1,417 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +/* #define CHECK(x)        x */
       +#define CHECK(x)
       +
       +typedef struct LumpCache        LumpCache;
       +
       +enum
       +{
       +        HashLog                = 9,
       +        HashSize        = 1<<HashLog,
       +        HashMask        = HashSize - 1,
       +};
       +
       +struct LumpCache
       +{
       +        QLock                lock;
       +        Rendez                full;
       +        Lump                *free;                        /* list of available lumps */
       +        u32int                allowed;                /* total allowable space for packets */
       +        u32int                avail;                        /* remaining space for packets */
       +        u32int                now;                        /* ticks for usage timestamps */
       +        Lump                **heads;                /* hash table for finding address */
       +        int                nheap;                        /* number of available victims */
       +        Lump                **heap;                        /* heap for locating victims */
       +        int                nblocks;                /* number of blocks allocated */
       +        Lump                *blocks;                /* array of block descriptors */
       +};
       +
       +static LumpCache        lumpcache;
       +
       +static void        delheap(Lump *db);
       +static int        downheap(int i, Lump *b);
       +static void        fixheap(int i, Lump *b);
       +static int        upheap(int i, Lump *b);
       +static Lump        *bumplump(void);
       +
       +void
       +initlumpcache(u32int size, u32int nblocks)
       +{
       +        Lump *last, *b;
       +        int i;
       +
       +        lumpcache.full.l = &lumpcache.lock;
       +        lumpcache.nblocks = nblocks;
       +        lumpcache.allowed = size;
       +        lumpcache.avail = size;
       +        lumpcache.heads = MKNZ(Lump*, HashSize);
       +        lumpcache.heap = MKNZ(Lump*, nblocks);
       +        lumpcache.blocks = MKNZ(Lump, nblocks);
       +        setstat(StatLcacheSize, lumpcache.nblocks);
       +
       +        last = nil;
       +        for(i = 0; i < nblocks; i++){
       +                b = &lumpcache.blocks[i];
       +                b->type = TWID8;
       +                b->heap = TWID32;
       +                b->next = last;
       +                last = b;
       +        }
       +        lumpcache.free = last;
       +        lumpcache.nheap = 0;
       +}
       +
       +Lump*
       +lookuplump(u8int *score, int type)
       +{
       +        uint ms;
       +        Lump *b;
       +        u32int h;
       +
       +        ms = msec();
       +        trace(TraceLump, "lookuplump enter");
       +        
       +        h = hashbits(score, HashLog);
       +
       +        /*
       +         * look for the block in the cache
       +         */
       +        qlock(&lumpcache.lock);
       +        CHECK(checklumpcache());
       +again:
       +        for(b = lumpcache.heads[h]; b != nil; b = b->next){
       +                if(scorecmp(score, b->score)==0 && type == b->type){
       +                        addstat(StatLcacheHit, 1);
       +                        trace(TraceLump, "lookuplump hit");
       +                        goto found;
       +                }
       +        }
       +
       +        trace(TraceLump, "lookuplump miss");
       +
       +        /*
       +         * missed: locate the block with the oldest second to last use.
       +         * remove it from the heap, and fix up the heap.
       +         */
       +        while(lumpcache.free == nil){
       +                trace(TraceLump, "lookuplump bump");
       +                CHECK(checklumpcache());
       +                if(bumplump() == nil){
       +                        CHECK(checklumpcache());
       +                        logerr(EAdmin, "all lump cache blocks in use");
       +                        addstat(StatLcacheStall, 1);
       +                        CHECK(checklumpcache());
       +                        rsleep(&lumpcache.full);
       +                        CHECK(checklumpcache());
       +                        addstat(StatLcacheStall, -1);
       +                        goto again;
       +                }
       +                CHECK(checklumpcache());
       +        }
       +
       +        addstat(StatLcacheMiss, 1);
       +        b = lumpcache.free;
       +        lumpcache.free = b->next;
       +
       +        /*
       +         * the new block has no last use, so assume it happens sometime in the middle
       +ZZZ this is not reasonable
       +         */
       +        b->used = (b->used2 + lumpcache.now) / 2;
       +
       +        /*
       +         * rechain the block on the correct hash chain
       +         */
       +        b->next = lumpcache.heads[h];
       +        lumpcache.heads[h] = b;
       +        if(b->next != nil)
       +                b->next->prev = b;
       +        b->prev = nil;
       +
       +        scorecp(b->score, score);
       +        b->type = type;
       +        b->size = 0;
       +        b->data = nil;
       +
       +found:
       +        b->ref++;
       +        b->used2 = b->used;
       +        b->used = lumpcache.now++;
       +        if(b->heap != TWID32)
       +                fixheap(b->heap, b);
       +        CHECK(checklumpcache());
       +        qunlock(&lumpcache.lock);
       +
       +
       +        addstat(StatLumpStall, 1);
       +        qlock(&b->lock);
       +        addstat(StatLumpStall, -1);
       +
       +        trace(TraceLump, "lookuplump exit");
       +        addstat2(StatLcacheRead, 1, StatLcacheReadTime, msec()-ms);
       +        return b;
       +}
       +
       +void
       +insertlump(Lump *b, Packet *p)
       +{
       +        u32int size;
       +
       +        /*
       +         * look for the block in the cache
       +         */
       +        trace(TraceLump, "insertlump enter");
       +        qlock(&lumpcache.lock);
       +        CHECK(checklumpcache());
       +again:
       +
       +        addstat(StatLcacheWrite, 1);
       +
       +        /*
       +         * missed: locate the block with the oldest second to last use.
       +         * remove it from the heap, and fix up the heap.
       +         */
       +        size = packetasize(p);
       +//ZZZ
       +        while(lumpcache.avail < size){
       +                trace(TraceLump, "insertlump bump");
       +                CHECK(checklumpcache());
       +                if(bumplump() == nil){
       +                        logerr(EAdmin, "all lump cache blocks in use");
       +                        addstat(StatLcacheStall, 1);
       +                        CHECK(checklumpcache());
       +                        rsleep(&lumpcache.full);
       +                        CHECK(checklumpcache());
       +                        addstat(StatLcacheStall, -1);
       +                        goto again;
       +                }
       +                CHECK(checklumpcache());
       +        }
       +        b->data = p;
       +        b->size = size;
       +        lumpcache.avail -= size;
       +        CHECK(checklumpcache());
       +        qunlock(&lumpcache.lock);
       +        trace(TraceLump, "insertlump exit");
       +}
       +
       +void
       +putlump(Lump *b)
       +{
       +        if(b == nil)
       +                return;
       +
       +        trace(TraceLump, "putlump");
       +        qunlock(&b->lock);
       +        qlock(&lumpcache.lock);
       +        CHECK(checklumpcache());
       +        if(--b->ref == 0){
       +                if(b->heap == TWID32)
       +                        upheap(lumpcache.nheap++, b);
       +                trace(TraceLump, "putlump wakeup");
       +                rwakeupall(&lumpcache.full);
       +        }
       +        CHECK(checklumpcache());
       +        qunlock(&lumpcache.lock);
       +}
       +
       +/*
       + * remove some lump from use and update the free list and counters
       + */
       +static Lump*
       +bumplump(void)
       +{
       +        Lump *b;
       +        u32int h;
       +
       +        /*
       +         * remove blocks until we find one that is unused
       +         * referenced blocks are left in the heap even though
       +         * they can't be scavenged; this is simple a speed optimization
       +         */
       +        CHECK(checklumpcache());
       +        for(;;){
       +                if(lumpcache.nheap == 0){
       +                        trace(TraceLump, "bumplump emptyheap");
       +                        return nil;
       +                }
       +                b = lumpcache.heap[0];
       +                delheap(b);
       +                if(!b->ref){
       +                        trace(TraceLump, "bumplump wakeup");
       +                        rwakeupall(&lumpcache.full);
       +                        break;
       +                }
       +        }
       +
       +        /*
       +         * unchain the block
       +         */
       +        trace(TraceLump, "bumplump unchain");
       +        if(b->prev == nil){
       +                h = hashbits(b->score, HashLog);
       +                if(lumpcache.heads[h] != b)
       +                        sysfatal("bad hash chains in lump cache");
       +                lumpcache.heads[h] = b->next;
       +        }else
       +                b->prev->next = b->next;
       +        if(b->next != nil)
       +                b->next->prev = b->prev;
       +
       +        if(b->data != nil){
       +                packetfree(b->data);
       +                b->data = nil;
       +                lumpcache.avail += b->size;
       +                b->size = 0;
       +        }
       +        b->type = TWID8;
       +
       +        b->next = lumpcache.free;
       +        lumpcache.free = b;
       +
       +        CHECK(checklumpcache());
       +        trace(TraceLump, "bumplump exit");
       +        return b;
       +}
       +
       +/*
       + * delete an arbitrary block from the heap
       + */
       +static void
       +delheap(Lump *db)
       +{
       +        fixheap(db->heap, lumpcache.heap[--lumpcache.nheap]);
       +        db->heap = TWID32;
       +}
       +
       +/*
       + * push an element up or down to it's correct new location
       + */
       +static void
       +fixheap(int i, Lump *b)
       +{
       +        if(upheap(i, b) == i)
       +                downheap(i, b);
       +}
       +
       +static int
       +upheap(int i, Lump *b)
       +{
       +        Lump *bb;
       +        u32int now;
       +        int p;
       +
       +        now = lumpcache.now;
       +        for(; i != 0; i = p){
       +                p = (i - 1) >> 1;
       +                bb = lumpcache.heap[p];
       +                if(b->used2 - now >= bb->used2 - now)
       +                        break;
       +                lumpcache.heap[i] = bb;
       +                bb->heap = i;
       +        }
       +
       +        lumpcache.heap[i] = b;
       +        b->heap = i;
       +        return i;
       +}
       +
       +static int
       +downheap(int i, Lump *b)
       +{
       +        Lump *bb;
       +        u32int now;
       +        int k;
       +
       +        now = lumpcache.now;
       +        for(; ; i = k){
       +                k = (i << 1) + 1;
       +                if(k >= lumpcache.nheap)
       +                        break;
       +                if(k + 1 < lumpcache.nheap && lumpcache.heap[k]->used2 - now > lumpcache.heap[k + 1]->used2 - now)
       +                        k++;
       +                bb = lumpcache.heap[k];
       +                if(b->used2 - now <= bb->used2 - now)
       +                        break;
       +                lumpcache.heap[i] = bb;
       +                bb->heap = i;
       +        }
       +
       +        lumpcache.heap[i] = b;
       +        b->heap = i;
       +        return i;
       +}
       +
       +static void
       +findblock(Lump *bb)
       +{
       +        Lump *b, *last;
       +        int h;
       +
       +        last = nil;
       +        h = hashbits(bb->score, HashLog);
       +        for(b = lumpcache.heads[h]; b != nil; b = b->next){
       +                if(last != b->prev)
       +                        sysfatal("bad prev link");
       +                if(b == bb)
       +                        return;
       +                last = b;
       +        }
       +        sysfatal("block score=%V type=%#x missing from hash table", bb->score, bb->type);
       +}
       +
       +void
       +checklumpcache(void)
       +{
       +        Lump *b;
       +        u32int size, now, nfree;
       +        int i, k, refed;
       +
       +        now = lumpcache.now;
       +        for(i = 0; i < lumpcache.nheap; i++){
       +                if(lumpcache.heap[i]->heap != i)
       +                        sysfatal("lc: mis-heaped at %d: %d", i, lumpcache.heap[i]->heap);
       +                if(i > 0 && lumpcache.heap[(i - 1) >> 1]->used2 - now > lumpcache.heap[i]->used2 - now)
       +                        sysfatal("lc: bad heap ordering");
       +                k = (i << 1) + 1;
       +                if(k < lumpcache.nheap && lumpcache.heap[i]->used2 - now > lumpcache.heap[k]->used2 - now)
       +                        sysfatal("lc: bad heap ordering");
       +                k++;
       +                if(k < lumpcache.nheap && lumpcache.heap[i]->used2 - now > lumpcache.heap[k]->used2 - now)
       +                        sysfatal("lc: bad heap ordering");
       +        }
       +
       +        refed = 0;
       +        size = 0;
       +        for(i = 0; i < lumpcache.nblocks; i++){
       +                b = &lumpcache.blocks[i];
       +                if(b->data == nil && b->size != 0)
       +                        sysfatal("bad size: %d data=%p", b->size, b->data);
       +                if(b->ref && b->heap == TWID32)
       +                        refed++;
       +                if(b->type != TWID8){
       +                        findblock(b);
       +                        size += b->size;
       +                }
       +                if(b->heap != TWID32
       +                && lumpcache.heap[b->heap] != b)
       +                        sysfatal("lc: spurious heap value");
       +        }
       +        if(lumpcache.avail != lumpcache.allowed - size){
       +                fprint(2, "mismatched available=%d and allowed=%d - used=%d space", lumpcache.avail, lumpcache.allowed, size);
       +                *(int*)0=0;
       +        }
       +
       +        nfree = 0;
       +        for(b = lumpcache.free; b != nil; b = b->next){
       +                if(b->type != TWID8 || b->heap != TWID32)
       +                        sysfatal("lc: bad free list");
       +                nfree++;
       +        }
       +
       +        if(lumpcache.nheap + nfree + refed != lumpcache.nblocks)
       +                sysfatal("lc: missing blocks: %d %d %d %d", lumpcache.nheap, refed, nfree, lumpcache.nblocks);
       +}
   DIR diff --git a/src/cmd/venti/srv/lumpqueue.c b/src/cmd/venti/srv/lumpqueue.c
       t@@ -0,0 +1,187 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +typedef struct LumpQueue        LumpQueue;
       +typedef struct WLump                WLump;
       +
       +enum
       +{
       +        MaxLumpQ        = 1 << 3        /* max. lumps on a single write queue, must be pow 2 */
       +};
       +
       +struct WLump
       +{
       +        Lump        *u;
       +        Packet        *p;
       +        int        creator;
       +        int        gen;
       +        uint        ms;
       +};
       +
       +struct LumpQueue
       +{
       +        QLock        lock;
       +        Rendez         flush;
       +        Rendez        full;
       +        Rendez        empty;
       +        WLump        q[MaxLumpQ];
       +        int        w;
       +        int        r;
       +};
       +
       +static LumpQueue        *lumpqs;
       +static int                nqs;
       +
       +static QLock                glk;
       +static int                gen;
       +
       +static void        queueproc(void *vq);
       +
       +int
       +initlumpqueues(int nq)
       +{
       +        LumpQueue *q;
       +
       +        int i;
       +        nqs = nq;
       +
       +        lumpqs = MKNZ(LumpQueue, nq);
       +
       +        for(i = 0; i < nq; i++){
       +                q = &lumpqs[i];
       +                q->full.l = &q->lock;
       +                q->empty.l = &q->lock;
       +                q->flush.l = &q->lock;
       +
       +                if(vtproc(queueproc, q) < 0){
       +                        seterr(EOk, "can't start write queue slave: %r");
       +                        return -1;
       +                }
       +                if(vtproc(queueproc, q) < 0){
       +                        seterr(EOk, "can't start write queue slave: %r");
       +                        return -1;
       +                }
       +                if(vtproc(queueproc, q) < 0){
       +                        seterr(EOk, "can't start write queue slave: %r");
       +                        return -1;
       +                }
       +                if(vtproc(queueproc, q) < 0){
       +                        seterr(EOk, "can't start write queue slave: %r");
       +                        return -1;
       +                }
       +                if(vtproc(queueproc, q) < 0){
       +                        seterr(EOk, "can't start write queue slave: %r");
       +                        return -1;
       +                }
       +        }
       +
       +        return 0;
       +}
       +
       +/*
       + * queue a lump & it's packet data for writing
       + */
       +int
       +queuewrite(Lump *u, Packet *p, int creator, uint ms)
       +{
       +        LumpQueue *q;
       +        int i;
       +
       +        trace(TraceProc, "queuewrite");
       +        i = indexsect(mainindex, u->score);
       +        if(i < 0 || i >= nqs){
       +                seterr(EBug, "internal error: illegal index section in queuewrite");
       +                return -1;
       +        }
       +
       +        q = &lumpqs[i];
       +
       +        qlock(&q->lock);
       +        while(q->r == ((q->w + 1) & (MaxLumpQ - 1))){
       +                trace(TraceProc, "queuewrite sleep");
       +                rsleep(&q->full);
       +        }
       +
       +        q->q[q->w].u = u;
       +        q->q[q->w].p = p;
       +        q->q[q->w].creator = creator;
       +        q->q[q->w].ms = ms;
       +        q->q[q->w].gen = gen;
       +        q->w = (q->w + 1) & (MaxLumpQ - 1);
       +
       +        trace(TraceProc, "queuewrite wakeup");
       +        rwakeup(&q->empty);
       +
       +        qunlock(&q->lock);
       +
       +        return 0;
       +}
       +
       +void
       +flushqueue(void)
       +{
       +        int i;
       +        LumpQueue *q;
       +
       +        if(!lumpqs)
       +                return;
       +
       +        trace(TraceProc, "flushqueue");
       +
       +        qlock(&glk);
       +        gen++;
       +        qunlock(&glk);
       +
       +        for(i=0; i<mainindex->nsects; i++){
       +                q = &lumpqs[i];
       +                qlock(&q->lock);
       +                while(q->w != q->r && gen - q->q[q->r].gen > 0){
       +                        trace(TraceProc, "flushqueue sleep q%d", i);
       +                        rsleep(&q->flush);
       +                }
       +                qunlock(&q->lock);
       +        }
       +}
       +        
       +static void
       +queueproc(void *vq)
       +{
       +        LumpQueue *q;
       +        Lump *u;
       +        Packet *p;
       +        int creator;
       +        uint ms;
       +
       +        threadsetname("queueproc");
       +
       +        q = vq;
       +        for(;;){
       +                qlock(&q->lock);
       +                while(q->w == q->r){
       +                        trace(TraceProc, "queueproc sleep empty");
       +                        rsleep(&q->empty);
       +                }
       +
       +                u = q->q[q->r].u;
       +                p = q->q[q->r].p;
       +                creator = q->q[q->r].creator;
       +                ms = q->q[q->r].ms;
       +
       +                q->r = (q->r + 1) & (MaxLumpQ - 1);
       +                trace(TraceProc, "queueproc wakeup flush");
       +                rwakeupall(&q->flush);
       +
       +                trace(TraceProc, "queueproc wakeup full");
       +                rwakeup(&q->full);
       +
       +                qunlock(&q->lock);
       +
       +                trace(TraceProc, "queueproc writelump %V", u->score);
       +                if(writeqlump(u, p, creator, ms) < 0)
       +                        fprint(2, "failed to write lump for %V: %r", u->score);
       +                trace(TraceProc, "queueproc wrotelump %V", u->score);
       +
       +                putlump(u);
       +        }
       +}
   DIR diff --git a/src/cmd/venti/srv/mkfile b/src/cmd/venti/srv/mkfile
       t@@ -0,0 +1,146 @@
       +<$PLAN9/src/mkhdr
       +CC=9c
       +
       +AR=ar
       +
       +LIBOFILES=\
       +        arena.$O\
       +        arenas.$O\
       +        bloom.$O\
       +        buildbuck.$O\
       +        clump.$O\
       +        config.$O\
       +        conv.$O\
       +        dcache.$O\
       +        dump.$O\
       +        graph.$O\
       +        httpd.$O\
       +        icache.$O\
       +        icachewrite.$O\
       +        ifile.$O\
       +        index.$O\
       +        lump.$O\
       +        lumpcache.$O\
       +        lumpqueue.$O\
       +        part.$O\
       +        png.$O\
       +        round.$O\
       +        score.$O\
       +        sortientry.$O\
       +        stats.$O\
       +        syncarena.$O\
       +        syncindex0.$O\
       +        trace.$O\
       +        unwhack.$O\
       +        utils.$O\
       +        unittoull.$O\
       +        whack.$O\
       +        xml.$O\
       +        zblock.$O\
       +        zeropart.$O\
       +
       +SLIB=libvs.a
       +
       +LIB=$SLIB
       +
       +HFILES=        dat.h\
       +        fns.h\
       +        stdinc.h\
       +
       +TARG=\
       +        venti\
       +        fmtarenas\
       +        fmtbloom\
       +        fmtisect\
       +        fmtindex\
       +        buildindex\
       +        checkarenas\
       +        checkindex\
       +        clumpstats\
       +        findscore\
       +        rdarena\
       +        wrarena\
       +        syncindex\
       +        printarena\
       +        verifyarena\
       +
       +OFILES=
       +
       +BIN=$BIN/venti
       +
       +it:V: $O.venti
       +
       +$O.venti: # debugmalloc2.$O # debugmalloc.$O #_p9dir.$O debugmalloc.$O
       +
       +CLEANFILES=$CLEANFILES $SLIB
       +
       +<$PLAN9/src/mkmany
       +
       +$SLIB: $LIBOFILES
       +        $AR rvc $SLIB $LIBOFILES
       +
       +# xml.c:D:        mkxml dat.h
       +#         ./mkxml dat.h > xml.c
       +
       +ainstall:V: ${TARG:%=%.ainstall}
       +
       +%.ainstall:V:        $O.%
       +        scp $prereq amsterdam:/usr/local/bin/venti/$stem
       +
       +test:VQ: ${TARG:%=o.%}
       +        slay o.venti|rc
       +        vtmp=/home/tmp
       +        echo '**********' FMTARENAS
       +        ./o.fmtarenas -a 40M -b 8k arenas $vtmp/arena
       +        echo '**********' FMTBLOOM
       +        ./o.fmtbloom -s 10M $vtmp/bloom
       +        echo '**********' FMTISECT
       +        ./o.fmtisect -b 8k isect $vtmp/isect
       +        (
       +                echo index main
       +                echo isect $vtmp/isect
       +                echo arenas $vtmp/arena
       +                echo bloom $vtmp/bloom
       +                echo webroot $HOME/src/venti/www
       +                echo mem 64M
       +                echo icmem 64M
       +                echo bcmem 64M
       +        ) >vtmp.conf
       +        echo '**********' FMTINDEX
       +        ./o.fmtindex vtmp.conf
       +        echo '**********' VENTI
       +        # ./o.venti -c vtmp.conf -B 64M -I 64M -C 64M -a 'tcp!*!17034' -h 'tcp!*!8001'  >a 2>&1 &
       +        ./o.venti -c vtmp.conf -a 'tcp!*!17034' -h 'tcp!*!8001'  >a 2>&1 &
       +        sleep 5
       +        echo '**********' VAC
       +        venti='tcp!127.0.0.1!17034' export venti
       +        9 time vac /usr/local/plan9 >a.vac
       +        case ${websync:-no} in
       +        yes)
       +                echo '**********' SYNC VIA WEB
       +                hget http://127.0.0.1:8001/flushdcache
       +                hget http://127.0.0.1:8001/flushicache
       +                hget http://127.0.0.1:8001/flushdcache
       +                echo '**********' KILL VENTI
       +                killall -9 o.venti
       +                ;;
       +        no)
       +                echo '**********' KILL VENTI
       +                killall -9 o.venti
       +                echo '**********' SYNCINDEX
       +                ./o.syncindex -B64M -I64M -f vtmp.conf
       +                ;;
       +        esac
       +        echo '**********' CHECKINDEX
       +        ./o.checkindex -B64M vtmp.conf /home/tmp/check >check.out
       +        wc check.out
       +
       +luadisk.o: luadisk.c
       +        gcc -c -ggdb -Wall -I/usr/include/lua50 luadisk.c
       +
       +libluadisk.so: luadisk.o
       +        gcc -shared -o $target luadisk.o -llua50 -llualib50
       +
       +$O.xwrarena: xwrarena.$O
       +        $LD -o $target xwrarena.$O 
       +
   DIR diff --git a/src/cmd/venti/srv/part.c b/src/cmd/venti/srv/part.c
       t@@ -0,0 +1,383 @@
       +#ifdef PLAN9PORT        /* SORRY! */
       +#include <u.h>
       +#include <sys/types.h>
       +#include <sys/vfs.h>
       +#endif
       +#include "stdinc.h"
       +#include <ctype.h>
       +#include "dat.h"
       +#include "fns.h"
       +
       +u32int        maxblocksize;
       +int        readonly;
       +
       +static int
       +strtoullsuf(char *p, char **pp, int rad, u64int *u)
       +{
       +        u64int v;
       +
       +        if(!isdigit(*p))
       +                return -1;
       +        v = strtoull(p, &p, rad);
       +        switch(*p){
       +        case 'k':
       +        case 'K':
       +                v *= 1024;
       +                p++;
       +                break;
       +        case 'm':
       +        case 'M':
       +                v *= 1024*1024;
       +                p++;
       +                break;
       +        case 'g':
       +        case 'G':
       +                v *= 1024*1024*1024;
       +                p++;
       +                break;
       +        case 't':
       +        case 'T':
       +                v *= 1024*1024;
       +                v *= 1024*1024;
       +                p++;
       +                break;
       +        }
       +        *pp = p;
       +        *u = v;
       +        return 0;
       +}
       +        
       +static int
       +parsepart(char *name, char **file, u64int *lo, u64int *hi)
       +{
       +        char *p;
       +
       +        *file = estrdup(name);
       +        if((p = strrchr(*file, ':')) == nil){
       +                *lo = 0;
       +                *hi = 0;
       +                return 0;
       +        }
       +        *p++ = 0;
       +        if(*p == '-')
       +                *lo = 0;
       +        else{
       +                if(strtoullsuf(p, &p, 0, lo) < 0){
       +                        free(*file);
       +                        return -1;
       +                }
       +        }
       +        if(*p == '-')
       +                p++;
       +        if(*p == 0){
       +                *hi = 0;
       +                return 0;
       +        }
       +        if(strtoullsuf(p, &p, 0, hi) < 0 || *p != 0){
       +                free(*file);
       +                return -1;
       +        }
       +        return 0;
       +}
       +
       +Part*
       +initpart(char *name, int mode)
       +{
       +        Part *part;
       +        Dir *dir;
       +        char *file;
       +        u64int lo, hi;
       +
       +        if(parsepart(name, &file, &lo, &hi) < 0)
       +                return nil;
       +        trace(TraceDisk, "initpart %s file %s lo 0x%llx hi 0x%llx", name, file, lo, hi);
       +        part = MKZ(Part);
       +        part->name = estrdup(name);
       +        part->filename = estrdup(file);
       +        if(readonly){
       +                mode &= (OREAD|OWRITE|ORDWR);
       +                mode |= OREAD;
       +        }
       +        part->fd = open(file, mode);
       +        if(part->fd < 0){
       +                if((mode&(OREAD|OWRITE|ORDWR)) == ORDWR)
       +                        part->fd = open(file, (mode&~ORDWR)|OREAD);
       +                if(part->fd < 0){
       +                        freepart(part);
       +                        fprint(2, "can't open partition='%s': %r\n", file);
       +                        seterr(EOk, "can't open partition='%s': %r", file);
       +                        fprint(2, "%r\n");
       +                        free(file);
       +                        return nil;
       +                }
       +                fprint(2, "warning: %s opened for reading only\n", name);
       +        }
       +        part->offset = lo;
       +        dir = dirfstat(part->fd);
       +        if(dir == nil){
       +                freepart(part);
       +                seterr(EOk, "can't stat partition='%s': %r", file);
       +                free(file);
       +                return nil;
       +        }
       +        if(dir->length == 0){
       +                free(dir);
       +                freepart(part);
       +                seterr(EOk, "can't determine size of partition %s", file);
       +                free(file);
       +                return nil;
       +        }
       +        if(dir->length < hi || dir->length < lo){
       +                freepart(part);
       +                seterr(EOk, "partition '%s': bounds out of range (max %lld)", name, dir->length);
       +                free(dir);
       +                free(file);
       +                return nil;
       +        }
       +        if(hi == 0)
       +                hi = dir->length;
       +        part->size = hi - part->offset;
       +#ifdef _LIBC_H_
       +        {
       +                struct statfs sfs;
       +                if(fstatfs(part->fd, &sfs) >= 0)
       +                        part->fsblocksize = sfs.f_bsize;
       +        }
       +#endif
       +        free(dir);
       +        return part;
       +}
       +
       +void
       +freepart(Part *part)
       +{
       +        if(part == nil)
       +                return;
       +        if(part->fd >= 0)
       +                close(part->fd);
       +        free(part->name);
       +        free(part);
       +}
       +
       +void
       +partblocksize(Part *part, u32int blocksize)
       +{
       +        if(part->blocksize)
       +                sysfatal("resetting partition=%s's block size", part->name);
       +        part->blocksize = blocksize;
       +        if(blocksize > maxblocksize)
       +                maxblocksize = blocksize;
       +}
       +
       +/*
       + * Read/write some amount of data between a block device or file and a memory buffer.
       + *
       + * Most Unix systems require that when accessing a block device directly,
       + * the buffer, offset, and count are all multiples of the device block size,
       + * making this a lot more complicated than it otherwise would be.
       + * 
       + * Most of our callers will make things easy on us, but for some callers it's best
       + * if we just do the work here, with only one place to get it right (hopefully).
       + * 
       + * If everything is aligned properly, prwb will try to do big transfers in the main 
       + * body of the loop: up to MaxIo bytes at a time.  If everything isn't aligned properly,
       + * we work one block at a time.
       + */
       +#undef min
       +#define min(a, b) ((a) < (b) ? (a) : (b))
       +int
       +prwb(char *name, int fd, int isread, u64int offset, void *vbuf, u32int count, u32int blocksize)
       +{
       +        char *op;
       +        u8int *buf, *tmp, *freetmp, *dst;
       +        u32int c, delta, icount, opsize;
       +        int r;
       +
       +        buf = vbuf;
       +        tmp = nil;
       +        freetmp = nil;
       +        icount = count;
       +        opsize = blocksize;
       +
       +        if(count == 0){
       +                logerr(EStrange, "pwrb %s called to %s 0 bytes", name, isread ? "read" : "write");
       +                return 0;
       +        }
       +
       +        assert(blocksize > 0);
       +
       +        /* allocate blocksize-aligned temp buffer if needed */
       +        if((ulong)offset%blocksize || (ulong)buf%blocksize || count%blocksize){
       +                if((freetmp = malloc(blocksize*2)) == nil)
       +                        return -1;
       +                tmp = freetmp;
       +                tmp += blocksize - (ulong)tmp%blocksize;
       +        }
       +
       +        /* handle beginning fringe */
       +        if((delta = (ulong)offset%blocksize) != 0){
       +                assert(tmp != nil);
       +                if((r=pread(fd, tmp, blocksize, offset-delta)) != blocksize){
       +                        dst = tmp;
       +                        offset = offset-delta;
       +                        op = "read";
       +                        goto Error;
       +                }
       +                c = min(count, blocksize-delta);
       +                assert(c > 0 && c < blocksize);
       +                if(isread)
       +                        memmove(buf, tmp+delta, c);
       +                else{
       +                        memmove(tmp+delta, buf, c);
       +                        if((r=pwrite(fd, tmp, blocksize, offset-delta)) != blocksize){
       +                                dst = tmp;
       +                                offset = offset-delta;
       +                                op = "read";
       +                                goto Error;
       +                        }
       +                }
       +                assert(c > 0);
       +                offset += c;
       +                buf += c;
       +                count -= c;
       +        }
       +
       +        /* handle full blocks */
       +        while(count >= blocksize){
       +                assert((ulong)offset%blocksize == 0);
       +                if((ulong)buf%blocksize){
       +                        assert(tmp != nil);
       +                        dst = tmp;
       +                        opsize = blocksize;
       +                }else{
       +                        dst = buf;
       +                        opsize = count - count%blocksize;
       +                        if(opsize > MaxIo)
       +                                opsize = MaxIo;
       +                }
       +                if(isread){
       +                        if((r=pread(fd, dst, opsize, offset))<=0 || r%blocksize){
       +                                op = "read";
       +                                goto Error;
       +                        }
       +                        if(dst == tmp){
       +                                assert(r == blocksize);
       +                                memmove(buf, tmp, blocksize);
       +                        }
       +                }else{
       +                        if(dst == tmp){
       +                                assert(opsize == blocksize);
       +                                memmove(dst, buf, blocksize);
       +                        }
       +                        if((r=pwrite(fd, dst, opsize, offset))<=0 || r%blocksize){
       +                                op = "write";
       +                                goto Error;
       +                        }
       +                        if(dst == tmp)
       +                                assert(r == blocksize);
       +                }
       +                assert(r > 0);
       +                offset += r;
       +                buf += r;
       +                count -= r;
       +        }
       +
       +        /* handle ending fringe */
       +        if(count > 0){
       +                assert((ulong)offset%blocksize == 0);
       +                assert(tmp != nil);
       +                /*
       +                 * Complicated condition: if we're reading it's okay to get less than
       +                 * a block as long as it's enough to satisfy the read - maybe this is
       +                 * a normal file.  (We never write to normal files, or else things would
       +                 * be even more complicated.)
       +                 */
       +                r = pread(fd, tmp, blocksize, offset);
       +                if((isread && r < count) || (!isread && r != blocksize)){
       +print("FAILED isread=%d r=%d count=%d blocksize=%d\n", isread, r, count, blocksize);
       +                        dst = tmp;
       +                        op = "read";
       +                        goto Error;
       +                }
       +                if(isread)
       +                        memmove(buf, tmp, count);
       +                else{
       +                        memmove(tmp, buf, count);
       +                        if(pwrite(fd, tmp, opsize, offset) != blocksize){
       +                                dst = tmp;
       +                                op = "write";
       +                                goto Error;
       +                        }
       +                }
       +        }
       +        if(freetmp)
       +                free(freetmp);
       +        return icount;
       +
       +Error:
       +        seterr(EAdmin, "%s %s offset 0x%llux count %ud buf %p returned %d: %r",
       +                op, name, offset, opsize, dst, r);
       +        if(freetmp)
       +                free(freetmp);
       +        return -1;
       +}
       +
       +int
       +rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count)
       +{
       +        u32int blocksize;
       +
       +        trace(TraceDisk, "%s %s %ud at 0x%llx", 
       +                isread ? "read" : "write", part->name, count, offset);
       +        if(offset >= part->size || offset+count > part->size){
       +                seterr(EStrange, "out of bounds %s offset 0x%llux count %ud to partition %s size 0x%llux",
       +                        isread ? "read" : "write", offset, count, part->name, part->size);
       +                return -1;
       +        }
       +
       +        blocksize = part->fsblocksize;
       +        if(blocksize == 0)
       +                blocksize = part->blocksize;
       +        if(blocksize == 0)
       +                blocksize = 4096;
       +
       +        return prwb(part->filename, part->fd, isread, part->offset+offset, buf, count, blocksize);
       +}
       +
       +int
       +readpart(Part *part, u64int offset, u8int *buf, u32int count)
       +{
       +        return rwpart(part, 1, offset, buf, count);
       +}
       +
       +int
       +writepart(Part *part, u64int offset, u8int *buf, u32int count)
       +{
       +        return rwpart(part, 0, offset, buf, count);
       +}
       +
       +ZBlock*
       +readfile(char *name)
       +{
       +        Part *p;
       +        ZBlock *b;
       +
       +        p = initpart(name, OREAD);
       +        if(p == nil)
       +                return nil;
       +        b = alloczblock(p->size, 0, p->blocksize);
       +        if(b == nil){
       +                seterr(EOk, "can't alloc %s: %r", name);
       +                freepart(p);
       +                return nil;
       +        }
       +        if(readpart(p, 0, b->data, p->size) < 0){
       +                seterr(EOk, "can't read %s: %r", name);
       +                freepart(p);
       +                freezblock(b);
       +                return nil;
       +        }
       +        freepart(p);
       +        return b;
       +}
       +
   DIR diff --git a/src/cmd/venti/srv/png.c b/src/cmd/venti/srv/png.c
       t@@ -0,0 +1,241 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +enum
       +{
       +        IDATSIZE        = 20000,
       +        FilterNone = 0
       +};
       +
       +typedef struct ZlibR ZlibR;
       +typedef struct ZlibW ZlibW;
       +
       +struct ZlibR
       +{
       +        uchar *data;
       +        int width;
       +        int dx;
       +        int dy;
       +        int x;
       +        int y;
       +        int pixwid;
       +};
       +
       +struct ZlibW
       +{
       +        Hio *io;
       +        uchar *buf;
       +        uchar *b;
       +        uchar *e;
       +};
       +
       +static ulong *crctab;
       +static uchar PNGmagic[] = { 137, 'P', 'N', 'G', '\r', '\n', 26, '\n'};
       +
       +static void
       +put4(uchar *a, ulong v)
       +{
       +        a[0] = v>>24;
       +        a[1] = v>>16;
       +        a[2] = v>>8;
       +        a[3] = v;
       +}
       +
       +static void
       +chunk(Hio *io, char *type, uchar *d, int n)
       +{
       +        uchar buf[4];
       +        ulong crc = 0;
       +
       +        if(strlen(type) != 4)
       +                return;
       +        put4(buf, n);
       +        hwrite(io, buf, 4);
       +        hwrite(io, type, 4);
       +        hwrite(io, d, n);
       +        crc = blockcrc(crctab, crc, type, 4);
       +        crc = blockcrc(crctab, crc, d, n);
       +        put4(buf, crc);
       +        hwrite(io, buf, 4);
       +}
       +
       +static int
       +zread(void *va, void *buf, int n)
       +{
       +        int a, i, pixels, pixwid;
       +        uchar *b, *e, *img;
       +        ZlibR *z;
       +
       +        z = va;
       +        pixwid = z->pixwid;
       +        b = buf;
       +        e = b+n;
       +        while(b+pixwid <= e){
       +                if(z->y >= z->dy)
       +                        break;
       +                if(z->x == 0)
       +                        *b++ = FilterNone;
       +                pixels = (e-b)/pixwid;
       +                if(pixels > z->dx - z->x)
       +                        pixels = z->dx - z->x;
       +                img = z->data + z->width*z->y + pixwid*z->x;
       +                memmove(b, img, pixwid*pixels);
       +                if(pixwid == 4){
       +                        /*
       +                         * Convert to non-premultiplied alpha.
       +                         */
       +                        for(i=0; i<pixels; i++, b+=4){
       +                                a = b[3];
       +                                if(a == 255 || a == 0)
       +                                        ;
       +                                else{
       +                                        if(b[0] >= a)
       +                                                b[0] = a;
       +                                        b[0] = (b[0]*255)/a;
       +                                        if(b[1] >= a)
       +                                                b[1] = a;
       +                                        b[1] = (b[1]*255)/a;
       +                                        if(b[2] >= a)
       +                                                b[2] = a;
       +                                        b[2] = (b[2]*255)/a;
       +                                }
       +                        }
       +                }else        
       +                        b += pixwid*pixels;
       +
       +                z->x += pixels;
       +                if(z->x >= z->dx){
       +                        z->x = 0;
       +                        z->y++;
       +                }
       +        }
       +        return b - (uchar*)buf;
       +}
       +
       +static void
       +IDAT(ZlibW *z)
       +{
       +        chunk(z->io, "IDAT", z->buf, z->b - z->buf);
       +        z->b = z->buf;
       +}
       +
       +static int
       +zwrite(void *va, void *buf, int n)
       +{
       +        int m;
       +        uchar *b, *e;
       +        ZlibW *z;
       +
       +        z = va;
       +        b = buf;
       +        e = b+n;
       +
       +        while(b < e){
       +                m = z->e - z->b;
       +                if(m > e - b)
       +                        m = e - b;
       +                memmove(z->b, b, m);
       +                z->b += m;
       +                b += m;
       +                if(z->b >= z->e)
       +                        IDAT(z);
       +        }
       +        return n;
       +}
       +
       +static Memimage*
       +memRGBA(Memimage *i)
       +{
       +        Memimage *ni;
       +        char buf[32];
       +        ulong dst;
       +        
       +        /*
       +         * [A]BGR because we want R,G,B,[A] in big-endian order.  Sigh.
       +         */
       +        chantostr(buf, i->chan);
       +        if(strchr(buf, 'a'))
       +                dst = ABGR32;
       +        else
       +                dst = BGR24;
       +                
       +        if(i->chan == dst)
       +                return i;
       +
       +        qlock(&memdrawlock);
       +        ni = allocmemimage(i->r, dst);
       +        if(ni)
       +                memimagedraw(ni, ni->r, i, i->r.min, nil, i->r.min, S);
       +        qunlock(&memdrawlock);
       +        return ni;
       +}
       +
       +int
       +writepng(Hio *io, Memimage *m)
       +{
       +        static int first = 1;
       +        static QLock lk;
       +        uchar buf[200], *h;
       +        Memimage *rgb;
       +        ZlibR zr;
       +        ZlibW zw;
       +
       +        if(first){
       +                qlock(&lk);
       +                if(first){
       +                        deflateinit();
       +                        crctab = mkcrctab(0xedb88320);
       +                        first = 0;
       +                }
       +                qunlock(&lk);
       +        }
       +
       +        rgb = memRGBA(m);
       +        if(rgb == nil)
       +                return -1;
       +
       +        hwrite(io, PNGmagic, sizeof PNGmagic);
       +        
       +        /* IHDR chunk */
       +        h = buf;
       +        put4(h, Dx(m->r)); h += 4;
       +        put4(h, Dy(m->r)); h += 4;
       +        *h++ = 8;        /* 8 bits per channel */
       +        if(rgb->chan == BGR24)
       +                *h++ = 2;                /* RGB */
       +        else
       +                *h++ = 6;                /* RGBA */
       +        *h++ = 0;        /* compression - deflate */
       +        *h++ = 0;        /* filter - none */
       +        *h++ = 0;        /* interlace - none */
       +        chunk(io, "IHDR", buf, h-buf);
       +
       +        /* image data */
       +        zr.dx = Dx(m->r);
       +        zr.dy = Dy(m->r);
       +        zr.width = rgb->width * sizeof(ulong);
       +        zr.data = rgb->data->bdata;
       +        zr.x = 0;
       +        zr.y = 0;
       +        zr.pixwid = chantodepth(rgb->chan)/8;
       +        zw.io = io;
       +        zw.buf = vtmalloc(IDATSIZE);
       +        zw.b = zw.buf;
       +        zw.e = zw.b + IDATSIZE;
       +        if(deflatezlib(&zw, zwrite, &zr, zread, 6, 0) < 0){
       +                free(zw.buf);
       +                return -1;
       +        }
       +        if(zw.b > zw.buf)
       +                IDAT(&zw);
       +        free(zw.buf);
       +        chunk(io, "IEND", nil, 0);
       +
       +        if(m != rgb){
       +                qlock(&memdrawlock);
       +                freememimage(rgb);
       +                qunlock(&memdrawlock);
       +        }
       +        return 0;
       +}
   DIR diff --git a/src/cmd/venti/srv/printarena.c b/src/cmd/venti/srv/printarena.c
       t@@ -0,0 +1,130 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: printarena arenafile [offset]\n");
       +        threadexitsall("usage");
       +}
       +
       +static void
       +rdarena(Arena *arena, u64int offset)
       +{
       +        u64int a, aa, e;
       +        u32int magic;
       +        Clump cl;
       +        uchar score[VtScoreSize];
       +        ZBlock *lump;
       +
       +        printarena(2, arena);
       +
       +        a = arena->base;
       +        e = arena->base + arena->size;
       +        if(offset != ~(u64int)0) {
       +                if(offset >= e-a)
       +                        sysfatal("bad offset %llud >= %llud\n",
       +                                offset, e-a);
       +                aa = offset;
       +        } else
       +                aa = 0;
       +
       +        for(; aa < e; aa += ClumpSize+cl.info.size) {
       +                magic = clumpmagic(arena, aa);
       +                if(magic == ClumpFreeMagic)
       +                        break;
       +                if(magic != arena->clumpmagic) {
       +                        fprint(2, "illegal clump magic number %#8.8ux offset %llud\n",
       +                                magic, aa);
       +                        break;
       +                }
       +                lump = loadclump(arena, aa, 0, &cl, score, 0);
       +                if(lump == nil) {
       +                        fprint(2, "clump %llud failed to read: %r\n", aa);
       +                        break;
       +                }
       +                if(cl.info.type != VtCorruptType) {
       +                        scoremem(score, lump->data, cl.info.uncsize);
       +                        if(scorecmp(cl.info.score, score) != 0) {
       +                                fprint(2, "clump %llud has mismatched score\n", aa);
       +                                break;
       +                        }
       +                        if(vttypevalid(cl.info.type) < 0) {
       +                                fprint(2, "clump %llud has bad type %d\n", aa, cl.info.type);
       +                                break;
       +                        }
       +                }
       +                print("%22llud %V %3d %5d\n", aa, score, cl.info.type, cl.info.uncsize);
       +                freezblock(lump);
       +        }
       +        print("end offset %llud\n", aa);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        char *file;
       +        Arena *arena;
       +        u64int offset, aoffset;
       +        Part *part;
       +        Dir *d;
       +        uchar buf[8192];
       +        ArenaHead head;
       +
       +        readonly = 1;        /* for part.c */
       +        aoffset = 0;
       +        ARGBEGIN{
       +        case 'o':
       +                aoffset = strtoull(EARGF(usage()), 0, 0);
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        offset = ~(u64int)0;
       +        switch(argc) {
       +        default:
       +                usage();
       +        case 2:
       +                offset = strtoull(argv[1], 0, 0);
       +                /* fall through */
       +        case 1:
       +                file = argv[0];
       +        }
       +
       +
       +        ventifmtinstall();
       +        statsinit();
       +
       +        if((d = dirstat(file)) == nil)
       +                sysfatal("can't stat file %s: %r", file);
       +
       +        part = initpart(file, OREAD|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't open file %s: %r", file);
       +        if(readpart(part, aoffset, buf, sizeof buf) < 0)
       +                sysfatal("can't read file %s: %r", file);
       +
       +        if(unpackarenahead(&head, buf) < 0)
       +                sysfatal("corrupted arena header: %r");
       +
       +        print("# arena head version=%d name=%.*s blocksize=%d size=%lld clumpmagic=0x%.8ux\n",
       +                head.version, ANameSize, head.name, head.blocksize,
       +                head.size, head.clumpmagic);
       +
       +        if(aoffset+head.size > d->length)
       +                sysfatal("arena is truncated: want %llud bytes have %llud\n",
       +                        head.size, d->length);
       +
       +        partblocksize(part, head.blocksize);
       +        initdcache(8 * MaxDiskBlock);
       +
       +        arena = initarena(part, aoffset, head.size, head.blocksize);
       +        if(arena == nil)
       +                sysfatal("initarena: %r");
       +
       +        rdarena(arena, offset);
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/printarenas.c b/src/cmd/venti/srv/printarenas.c
       t@@ -0,0 +1,113 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include <bio.h>
       +
       +Biobuf bout;
       +
       +static void
       +pie(IEntry *ie)
       +{
       +        Bprint(&bout, "%22lld %V %3d %5d\n",
       +                ie->ia.addr, ie->score, ie->ia.type, ie->ia.size);
       +}
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: printarenas [-B blockcachesize] config [arenaname...]\n");
       +        threadexitsall(0);
       +}
       +
       +Config conf;
       +
       +int
       +shoulddump(char *name, int argc, char **argv)
       +{
       +        int i;
       +
       +        if(argc == 0)
       +                return 1;
       +        for(i=0; i<argc; i++)
       +                if(strcmp(name, argv[i]) == 0)
       +                        return 1;
       +        return 0;
       +}
       +
       +enum
       +{
       +        ClumpChunks = 32*1024,
       +};
       +
       +void
       +dumparena(Arena *arena, u64int a)
       +{
       +        IEntry ie;
       +        ClumpInfo *ci, *cis;
       +        u32int clump;
       +        int i, n, nskip;
       +
       +        cis = MKN(ClumpInfo, ClumpChunks);
       +        nskip = 0;
       +        memset(&ie, 0, sizeof(IEntry));
       +        for(clump = 0; clump < arena->memstats.clumps; clump += n){
       +                n = ClumpChunks;
       +                if(n > arena->memstats.clumps - clump)
       +                        n = arena->memstats.clumps - clump;
       +                if(readclumpinfos(arena, clump, cis, n) != n){
       +                        fprint(2, "arena directory read failed: %r\n");
       +                        break;
       +                }
       +
       +                for(i = 0; i < n; i++){
       +                        ci = &cis[i];
       +                        ie.ia.type = ci->type;
       +                        ie.ia.size = ci->uncsize;
       +                        ie.ia.addr = a;
       +                        a += ci->size + ClumpSize;
       +                        ie.ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
       +                        scorecp(ie.score, ci->score);
       +                        pie(&ie);
       +                }
       +        }
       +        free(cis);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        int i;
       +        Index *ix;
       +        u32int bcmem;
       +
       +        bcmem = 0;
       +        ARGBEGIN{
       +        case 'B':
       +                bcmem = unittoull(ARGF());
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc < 1)
       +                usage();
       +
       +        ventifmtinstall();
       +
       +        if(initventi(argv[0], &conf) < 0)
       +                sysfatal("can't init venti: %r");
       +
       +        if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
       +                bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
       +        if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
       +        initdcache(bcmem);
       +
       +        Binit(&bout, 1, OWRITE);
       +        ix = mainindex;
       +        for(i=0; i<ix->narenas; i++)
       +                if(shoulddump(ix->arenas[i]->name, argc-1, argv+1))
       +                        dumparena(ix->arenas[i], ix->amap[i].start);
       +        Bterm(&bout);
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/printindex.c b/src/cmd/venti/srv/printindex.c
       t@@ -0,0 +1,99 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include <bio.h>
       +
       +Biobuf bout;
       +
       +static void
       +pie(IEntry *ie)
       +{
       +        Bprint(&bout, "%22lld %V %3d %5d\n",
       +                ie->ia.addr, ie->score, ie->ia.type, ie->ia.size);
       +}
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: printindex [-B blockcachesize] config [isectname...]\n");
       +        threadexitsall(0);
       +}
       +
       +Config conf;
       +
       +int
       +shoulddump(char *name, int argc, char **argv)
       +{
       +        int i;
       +
       +        if(argc == 0)
       +                return 1;
       +        for(i=0; i<argc; i++)
       +                if(strcmp(name, argv[i]) == 0)
       +                        return 1;
       +        return 0;
       +}
       +
       +void
       +dumpisect(ISect *is)
       +{
       +        int j;
       +        uchar *buf;
       +        u32int i;
       +        u64int off;
       +        IBucket ib;
       +        IEntry ie;
       +
       +        buf = emalloc(is->blocksize);
       +        for(i=0; i<is->blocks; i++){
       +                off = is->blockbase+(u64int)is->blocksize*i;
       +                if(readpart(is->part, off, buf, is->blocksize) < 0)
       +                        fprint(2, "read %s at 0x%llux: %r\n", is->part->name, off);
       +                else{
       +                        unpackibucket(&ib, buf, is->bucketmagic);
       +                        for(j=0; j<ib.n; j++){
       +                                unpackientry(&ie, &ib.data[j*IEntrySize]);
       +                                pie(&ie);
       +                        }
       +                }
       +        }
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        int i;
       +        Index *ix;
       +        u32int bcmem;
       +
       +        bcmem = 0;
       +        ARGBEGIN{
       +        case 'B':
       +                bcmem = unittoull(ARGF());
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc < 1)
       +                usage();
       +
       +        fmtinstall('H', encodefmt);
       +
       +        if(initventi(argv[0], &conf) < 0)
       +                sysfatal("can't init venti: %r");
       +
       +        if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
       +                bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
       +        if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
       +        initdcache(bcmem);
       +
       +        ix = mainindex;
       +        Binit(&bout, 1, OWRITE);
       +        for(i=0; i<ix->nsects; i++)
       +                if(shoulddump(ix->sects[i]->name, argc-1, argv+1))
       +                        dumpisect(ix->sects[i]);
       +        Bterm(&bout);
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/printmap.c b/src/cmd/venti/srv/printmap.c
       t@@ -0,0 +1,42 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: printmap [-B blockcachesize] config\n");
       +        threadexitsall("usage");
       +}
       +
       +Config conf;
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        u32int bcmem;
       +        int fix;
       +
       +        fix = 0;
       +        bcmem = 0;
       +        ARGBEGIN{
       +        case 'B':
       +                bcmem = unittoull(ARGF());
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(!fix)
       +                readonly = 1;
       +
       +        if(argc != 1)
       +                usage();
       +
       +        if(initventi(argv[0], &conf) < 0)
       +                sysfatal("can't init venti: %r");
       +
       +        printindex(1, mainindex);
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/rdarena.c b/src/cmd/venti/srv/rdarena.c
       t@@ -0,0 +1,91 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +static int        verbose;
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: rdarena [-v] arenapart arena\n");
       +        threadexitsall(0);
       +}
       +
       +static void
       +rdarena(Arena *arena)
       +{
       +        ZBlock *b;
       +        u64int a, e;
       +        u32int bs;
       +
       +        fprint(2, "copying %s to standard output\n", arena->name);
       +        printarena(2, arena);
       +
       +        bs = MaxIoSize;
       +        if(bs < arena->blocksize)
       +                bs = arena->blocksize;
       +
       +        b = alloczblock(bs, 0, arena->blocksize);
       +        e = arena->base + arena->size + arena->blocksize;
       +        for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
       +                if(a + bs > e)
       +                        bs = arena->blocksize;
       +                if(readpart(arena->part, a, b->data, bs) < 0)
       +                        fprint(2, "can't copy %s, read at %lld failed: %r\n", arena->name, a);        
       +                if(write(1, b->data, bs) != bs)
       +                        sysfatal("can't copy %s, write at %lld failed: %r", arena->name, a);
       +        }
       +
       +        freezblock(b);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        ArenaPart *ap;
       +        Part *part;
       +        char *file, *aname;
       +        int i;
       +
       +        ventifmtinstall();
       +        statsinit();
       +
       +        ARGBEGIN{
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        readonly = 1;
       +
       +        if(argc != 2)
       +                usage();
       +
       +        file = argv[0];
       +        aname = argv[1];
       +
       +        part = initpart(file, OREAD|ODIRECT);
       +        if(part == nil)
       +                sysfatal("can't open partition %s: %r", file);
       +
       +        ap = initarenapart(part);
       +        if(ap == nil)
       +                sysfatal("can't initialize arena partition in %s: %r", file);
       +
       +        if(verbose)
       +                printarenapart(2, ap);
       +
       +        initdcache(8 * MaxDiskBlock);
       +
       +        for(i = 0; i < ap->narenas; i++){
       +                if(strcmp(ap->arenas[i]->name, aname) == 0){
       +                        rdarena(ap->arenas[i]);
       +                        threadexitsall(0);
       +                }
       +        }
       +
       +        sysfatal("couldn't find arena %s\n", aname);
       +}
   DIR diff --git a/src/cmd/venti/srv/round.c b/src/cmd/venti/srv/round.c
       t@@ -0,0 +1,102 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +void
       +waitforkick(Round *r)
       +{
       +        int n;
       +
       +        qlock(&r->lock);
       +        r->last = r->current;
       +        assert(r->current+1 == r->next);
       +        rwakeupall(&r->finish);
       +        while(!r->doanother)
       +                rsleep(&r->start);
       +        n = r->next++;
       +        r->current = n;
       +        r->doanother = 0;
       +        qunlock(&r->lock);
       +}
       +
       +static void
       +_kickround(Round *r, int wait)
       +{
       +        int n;
       +
       +        if(!r->doanother)
       +                trace(TraceProc, "kick %s", r->name);
       +        r->doanother = 1;
       +        rwakeup(&r->start);
       +        if(wait){
       +                n = r->next;
       +                while((int)(n - r->last) > 0){
       +                        r->doanother = 1;
       +                        rwakeup(&r->start);
       +                        rsleep(&r->finish);
       +                }
       +        }
       +}
       +
       +void
       +kickround(Round *r, int wait)
       +{
       +        qlock(&r->lock);
       +        _kickround(r, wait);
       +        qunlock(&r->lock);
       +}
       +
       +void
       +initround(Round *r, char *name, int delay)
       +{
       +        memset(r, 0, sizeof *r);
       +        r->name = name;
       +        r->start.l = &r->lock;
       +        r->finish.l = &r->lock;
       +        r->delaywait.l = &r->lock;
       +        r->last = 0;
       +        r->current = 0;
       +        r->next = 1;
       +        r->doanother = 0;
       +        r->delaytime = delay;
       +}
       +
       +void
       +delaykickround(Round *r)
       +{
       +        qlock(&r->lock);
       +        r->delaykick = 1;
       +        rwakeup(&r->delaywait);
       +        qunlock(&r->lock);
       +}
       +
       +void
       +delaykickroundproc(void *v)
       +{
       +        Round *r = v;
       +        int n;
       +
       +        threadsetname("delaykickproc %s", r->name);
       +        qlock(&r->lock);
       +        for(;;){
       +                while(r->delaykick == 0){
       +                        trace(TraceProc, "sleep");
       +                        rsleep(&r->delaywait);
       +                }
       +
       +                n = r->next;
       +                qunlock(&r->lock);
       +
       +                trace(TraceProc, "waitround 0x%ux", (uint)n);
       +                sleep(r->delaytime);
       +
       +                qlock(&r->lock);
       +                if(n == r->next){
       +                        trace(TraceProc, "kickround 0x%ux", (uint)n);
       +                        _kickround(r, 1);
       +                }
       +
       +                trace(TraceProc, "finishround 0x%ux", (uint)n);
       +        }
       +}
       +
   DIR diff --git a/src/cmd/venti/srv/score.c b/src/cmd/venti/srv/score.c
       t@@ -0,0 +1,43 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +u8int zeroscore[VtScoreSize];
       +
       +void
       +scoremem(u8int *score, u8int *buf, int n)
       +{
       +        DigestState s;
       +
       +        memset(&s, 0, sizeof s);
       +        sha1(buf, n, score, &s);
       +}
       +
       +static int
       +hexv(int c)
       +{
       +        if(c >= '0' && c <= '9')
       +                return c - '0';
       +        if(c >= 'a' && c <= 'f')
       +                return c - 'a' + 10;
       +        if(c >= 'A' && c <= 'F')
       +                return c - 'A' + 10;
       +        return -1;
       +}
       +
       +int
       +strscore(char *s, u8int *score)
       +{
       +        int i, c, d;
       +
       +        for(i = 0; i < VtScoreSize; i++){
       +                c = hexv(s[2 * i]);
       +                if(c < 0)
       +                        return -1;
       +                d = hexv(s[2 * i + 1]);
       +                if(d < 0)
       +                        return -1;
       +                score[i] = (c << 4) + d;
       +        }
       +        return s[2 * i] == '\0';
       +}
   DIR diff --git a/src/cmd/venti/srv/sortientry.c b/src/cmd/venti/srv/sortientry.c
       t@@ -0,0 +1,376 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include <bio.h>
       +
       +typedef struct IEBuck        IEBuck;
       +typedef struct IEBucks        IEBucks;
       +
       +enum
       +{
       +        ClumpChunks        = 32*1024
       +};
       +
       +struct IEBuck
       +{
       +        u32int        head;                /* head of chain of chunks on the disk */
       +        u32int        used;                /* usage of the last chunk */
       +        u64int        total;                /* total number of bytes in this bucket */
       +        u8int        *buf;                /* chunk of entries for this bucket */
       +};
       +
       +struct IEBucks
       +{
       +        Part        *part;
       +        u64int        off;                /* offset for writing data in the partition */
       +        u32int        chunks;                /* total chunks written to fd */
       +        u64int        max;                /* max bytes entered in any one bucket */
       +        int        bits;                /* number of bits in initial bucket sort */
       +        int        nbucks;                /* 1 << bits, the number of buckets */
       +        u32int        size;                /* bytes in each of the buckets chunks */
       +        u32int        usable;                /* amount usable for IEntry data */
       +        u8int        *buf;                /* buffer for all chunks */
       +        u8int        *xbuf;
       +        IEBuck        *bucks;
       +};
       +
       +#define        U32GET(p)        (((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3])
       +#define        U32PUT(p,v)        (p)[0]=(v)>>24;(p)[1]=(v)>>16;(p)[2]=(v)>>8;(p)[3]=(v)
       +
       +static IEBucks        *initiebucks(Part *part, int bits, u32int size);
       +static int        flushiebuck(IEBucks *ib, int b, int reset);
       +static int        flushiebucks(IEBucks *ib);
       +static u32int        sortiebuck(IEBucks *ib, int b);
       +static u64int        sortiebucks(IEBucks *ib);
       +static int        sprayientry(IEBucks *ib, IEntry *ie);
       +static u32int        readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b);
       +static u32int        readiebuck(IEBucks *ib, int b);
       +static void        freeiebucks(IEBucks *ib);
       +
       +/*
       + * build a sorted file with all IEntries which should be in ix.
       + * assumes the arenas' directories are up to date.
       + * reads each, converts the entries to index entries,
       + * and sorts them.
       + */
       +u64int
       +sortrawientries(Index *ix, Part *tmp, u64int *base, Bloom *bloom)
       +{
       +        IEBucks *ib;
       +        u64int clumps, sorted;
       +        u32int n;
       +        int i, ok;
       +
       +//ZZZ should allow configuration of bits, bucket size
       +        ib = initiebucks(tmp, 8, 64*1024);
       +        if(ib == nil){
       +                seterr(EOk, "can't create sorting buckets: %r");
       +                return TWID64;
       +        }
       +        ok = 0;
       +        clumps = 0;
       +        fprint(2, "constructing entry list\n");
       +        for(i = 0; i < ix->narenas; i++){
       +                n = readarenainfo(ib, ix->arenas[i], ix->amap[i].start, bloom);
       +                if(n == TWID32){
       +                        ok = -1;
       +                        break;
       +                }
       +                clumps += n;
       +        }
       +        fprint(2, "sorting %lld entries\n", clumps);
       +        if(ok == 0){
       +                sorted = sortiebucks(ib);
       +                *base = (u64int)ib->chunks * ib->size;
       +                if(sorted != clumps){
       +                        fprint(2, "sorting messed up: clumps=%lld sorted=%lld\n", clumps, sorted);
       +                        ok = -1;
       +                }
       +        }
       +        freeiebucks(ib);
       +        if(ok < 0)
       +                return TWID64;
       +        return clumps;
       +}
       +
       +#define CHECK(cis)        if(((ulong*)cis)[-4] != 0xA110C09) xabort();
       +
       +void
       +xabort(void)
       +{
       +        int *x;
       +
       +        x = 0;
       +        *x = 0;
       +}
       +
       +/*
       + * read in all of the arena's clump directory,
       + * convert to IEntry format, and bucket sort based
       + * on the first few bits.
       + */
       +static u32int
       +readarenainfo(IEBucks *ib, Arena *arena, u64int a, Bloom *b)
       +{
       +        IEntry ie;
       +        ClumpInfo *ci, *cis;
       +        u32int clump;
       +        int i, n, ok, nskip;
       +//        static Biobuf bout;
       +
       +//ZZZ remove fprint?
       +//fprint(2, "ra %s %d %d\n", arena->name, arena->memstats.clumps, arena->diskstats.clumps);
       +        if(arena->memstats.clumps)
       +                fprint(2, "\tarena %s: %d entries\n", arena->name, arena->memstats.clumps);
       +        else
       +                fprint(2, "[%s] ", arena->name);
       +
       +        cis = MKN(ClumpInfo, ClumpChunks);
       +        ok = 0;
       +        nskip = 0;
       +        memset(&ie, 0, sizeof(IEntry));
       +//        Binit(&bout, 1, OWRITE);
       +        for(clump = 0; clump < arena->memstats.clumps; clump += n){
       +                n = ClumpChunks;
       +                if(n > arena->memstats.clumps - clump)
       +                        n = arena->memstats.clumps - clump;
       +                if(readclumpinfos(arena, clump, cis, n) != n){
       +                        seterr(EOk, "arena directory read failed: %r");
       +                        ok = -1;
       +                        break;
       +                }
       +
       +                for(i = 0; i < n; i++){
       +                        ci = &cis[i];
       +                        ie.ia.type = ci->type;
       +                        ie.ia.size = ci->uncsize;
       +                        ie.ia.addr = a;
       +                        a += ci->size + ClumpSize;
       +                        ie.ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
       +                        scorecp(ie.score, ci->score);
       +                //        Bprint(&bout, "%22lld %V %3d %5d\n",
       +                //                ie.ia.addr, ie.score, ie.ia.type, ie.ia.size);
       +                        if(ci->type == VtCorruptType){
       +                        //        print("! %V %22lld %3d %5d %3d\n",
       +                        //                ie.score, ie.ia.addr, ie.ia.type, ie.ia.size, ie.ia.blocks);
       +                                nskip++;
       +                        }else
       +                                sprayientry(ib, &ie);
       +                        markbloomfilter(b, ie.score);
       +                }
       +        }
       +//        Bterm(&bout);
       +        free(cis);
       +        if(ok < 0)
       +                return TWID32;
       +        return clump - nskip;
       +}
       +
       +/*
       + * initialize the external bucket sorting data structures
       + */
       +static IEBucks*
       +initiebucks(Part *part, int bits, u32int size)
       +{
       +        IEBucks *ib;
       +        int i;
       +
       +        ib = MKZ(IEBucks);
       +        if(ib == nil){
       +                seterr(EOk, "out of memory");
       +                return nil;
       +        }
       +        ib->bits = bits;
       +        ib->nbucks = 1 << bits;
       +        ib->size = size;
       +        ib->usable = (size - U32Size) / IEntrySize * IEntrySize;
       +        ib->bucks = MKNZ(IEBuck, ib->nbucks);
       +        if(ib->bucks == nil){
       +                seterr(EOk, "out of memory allocation sorting buckets");
       +                freeiebucks(ib);
       +                return nil;
       +        }
       +        ib->xbuf = MKN(u8int, size * ((1 << bits)+1));
       +        ib->buf = (u8int*)(((ulong)ib->xbuf+size-1)&~(ulong)(size-1));
       +        if(ib->buf == nil){
       +                seterr(EOk, "out of memory allocating sorting buckets' buffers");
       +                freeiebucks(ib);
       +                return nil;
       +        }
       +        for(i = 0; i < ib->nbucks; i++){
       +                ib->bucks[i].head = TWID32;
       +                ib->bucks[i].buf = &ib->buf[i * size];
       +        }
       +        ib->part = part;
       +        return ib;
       +}
       +
       +static void
       +freeiebucks(IEBucks *ib)
       +{
       +        if(ib == nil)
       +                return;
       +        free(ib->bucks);
       +        free(ib->buf);
       +        free(ib);
       +}
       +
       +/*
       + * initial sort: put the entry into the correct bucket
       + */
       +static int
       +sprayientry(IEBucks *ib, IEntry *ie)
       +{
       +        u32int n;
       +        int b;
       +
       +        b = hashbits(ie->score, ib->bits);
       +        n = ib->bucks[b].used;
       +        if(n + IEntrySize > ib->usable){
       +                /* should be flushed below, but if flush fails, this can happen */
       +                seterr(EOk, "out of space in bucket");
       +                return -1;
       +        }
       +        packientry(ie, &ib->bucks[b].buf[n]);
       +        n += IEntrySize;
       +        ib->bucks[b].used = n;
       +        if(n + IEntrySize <= ib->usable)
       +                return 0;
       +        return flushiebuck(ib, b, 1);
       +}
       +
       +/*
       + * finish sorting:
       + * for each bucket, read it in and sort it
       + * write out the the final file
       + */
       +static u64int
       +sortiebucks(IEBucks *ib)
       +{
       +        u64int tot;
       +        u32int n;
       +        int i;
       +
       +        if(flushiebucks(ib) < 0)
       +                return TWID64;
       +        for(i = 0; i < ib->nbucks; i++)
       +                ib->bucks[i].buf = nil;
       +        ib->off = (u64int)ib->chunks * ib->size;
       +        free(ib->xbuf);
       +if(0){
       +        fprint(2, "ib->max = %lld\n", ib->max);
       +        fprint(2, "ib->chunks = %ud\n", ib->chunks);
       +}
       +        ib->buf = MKN(u8int, ib->max + U32Size);
       +        if(ib->buf == nil){
       +                seterr(EOk, "out of memory allocating final sorting buffer; try more buckets");
       +                return TWID64;
       +        }
       +        tot = 0;
       +        for(i = 0; i < ib->nbucks; i++){
       +                n = sortiebuck(ib, i);
       +                if(n == TWID32)
       +                        return TWID64;
       +                if(n != ib->bucks[i].total/IEntrySize)
       +                        fprint(2, "bucket %d changed count %d => %d\n", 
       +                                i, (int)(ib->bucks[i].total/IEntrySize), n);
       +                tot += n;
       +        }
       +        return tot;
       +        return 0;
       +}
       +
       +/*
       + * sort from bucket b of ib into the output file to
       + */
       +static u32int
       +sortiebuck(IEBucks *ib, int b)
       +{
       +        u32int n;
       +
       +        n = readiebuck(ib, b);
       +        if(n == TWID32)
       +                return TWID32;
       +        qsort(ib->buf, n, IEntrySize, ientrycmp);
       +        if(writepart(ib->part, ib->off, ib->buf, n * IEntrySize) < 0){
       +                seterr(EOk, "can't write sorted bucket: %r");
       +                return TWID32;
       +        }
       +        ib->off += n * IEntrySize;
       +        return n;
       +}
       +
       +/*
       + * write out a single bucket
       + */
       +static int
       +flushiebuck(IEBucks *ib, int b, int reset)
       +{
       +        u32int n;
       +
       +        if(ib->bucks[b].used == 0)
       +                return 0;
       +        n = ib->bucks[b].used;
       +        U32PUT(&ib->bucks[b].buf[n], ib->bucks[b].head);
       +        n += U32Size;
       +        USED(n);
       +        if(writepart(ib->part, (u64int)ib->chunks * ib->size, ib->bucks[b].buf, ib->size) < 0){
       +                seterr(EOk, "can't write sorting bucket to file: %r");
       +xabort();
       +                return -1;
       +        }
       +        ib->bucks[b].head = ib->chunks++;
       +        ib->bucks[b].total += ib->bucks[b].used;
       +        if(reset)
       +                ib->bucks[b].used = 0;
       +        return 0;
       +}
       +
       +/*
       + * write out all of the buckets, and compute
       + * the maximum size of any bucket
       + */
       +static int
       +flushiebucks(IEBucks *ib)
       +{
       +        int i;
       +
       +        for(i = 0; i < ib->nbucks; i++){
       +                if(flushiebuck(ib, i, 0) < 0)
       +                        return -1;
       +                if(ib->bucks[i].total > ib->max)
       +                        ib->max = ib->bucks[i].total;
       +        }
       +        return 0;
       +}
       +
       +/*
       + * read in the chained buffers for bucket b,
       + * and return it's total number of IEntries
       + */
       +static u32int
       +readiebuck(IEBucks *ib, int b)
       +{
       +        u32int head, m, n;
       +
       +        head = ib->bucks[b].head;
       +        n = 0;
       +        m = ib->bucks[b].used;
       +        if(m == 0)
       +                m = ib->usable;
       +//        if(ib->bucks[b].total)
       +//                fprint(2, "\tbucket %d: %d entries\n", b, ib->bucks[b].total/IEntrySize);
       +        while(head != TWID32){
       +                if(readpart(ib->part, (u64int)head * ib->size, &ib->buf[n], m + U32Size) < 0){
       +                        seterr(EOk, "can't read index sort bucket: %r");
       +                        return TWID32;
       +                }
       +                n += m;
       +                head = U32GET(&ib->buf[n]);
       +                m = ib->usable;
       +        }
       +        if(n != ib->bucks[b].total)
       +                fprint(2, "\tbucket %d: expected %d entries, got %d\n",
       +                        b, (int)ib->bucks[b].total/IEntrySize, n/IEntrySize);
       +        return n / IEntrySize;
       +}
   DIR diff --git a/src/cmd/venti/srv/stats.c b/src/cmd/venti/srv/stats.c
       t@@ -0,0 +1,212 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +int collectstats = 1;
       +
       +/* keep in sync with dat.h:/NStat */
       +Statdesc statdesc[NStat] =
       +{
       +        { "rpc total", },
       +        { "rpc reads", },
       +        { "rpc reads ok", },
       +        { "rpc reads failed", },
       +        { "rpc read bytes", },
       +        { "rpc read time", },
       +        { "rpc read cached", },
       +        { "rpc read cached time", },
       +        { "rpc read uncached", },
       +        { "rpc read uncached time "},
       +        
       +        { "rpc writes", },
       +        { "rpc writes new", },
       +        { "rpc writes old", },
       +        { "rpc writes failed", },
       +        { "rpc write bytes", },
       +        { "rpc write time", },
       +        { "rpc write new time", },
       +        { "rpc write old time", },
       +
       +        { "lump cache hits", },
       +        { "lump cache misses", },
       +        { "lump cache reads", },
       +        { "lump cache writes", },
       +        { "lump cache size", },
       +        { "lump cache stall", },
       +        { "lump cache read time", },
       +
       +        { "disk cache hits", },
       +        { "disk cache misses", },
       +        { "disk cache lookups", },
       +        { "disk cache reads", },
       +        { "disk cache writes", },
       +        { "disk cache dirty", },
       +        { "disk cache size", },
       +        { "disk cache flushes", },
       +        { "disk cache stalls", },
       +        { "disk cache lookup time", },
       +
       +        { "disk block stalls", },
       +        { "lump stalls", },
       +
       +        { "index cache hits", },
       +        { "index cache misses", },
       +        { "index cache reads", },
       +        { "index cache writes", },
       +        { "index cache fills", },
       +        { "index cache prefetches", },
       +        { "index cache dirty", },
       +        { "index cache size", },
       +        { "index cache flushes", },
       +        { "index cache stalls", },
       +        { "index cache read time", },
       +
       +        { "bloom filter hits", },
       +        { "bloom filter misses", },
       +        { "bloom filter false misses", },
       +        { "bloom filter lookups", },
       +        { "bloom filter ones", },
       +        { "bloom filter bits", },
       +        { "bloom filter lookup time", },
       +
       +        { "arena block reads", },
       +        { "arena block read bytes", },
       +        { "arena block writes", },
       +        { "arena block write bytes", },
       +
       +        { "isect block reads", },
       +        { "isect block read bytes", },
       +        { "isect block writes", },
       +        { "isect block write bytes", },
       +
       +        { "sum reads", },
       +        { "sum read bytes", },
       +};
       +
       +QLock statslock;
       +Stats stats;
       +Stats *stathist;
       +int nstathist;
       +ulong statind;
       +ulong stattime;
       +
       +void
       +statsproc(void *v)
       +{
       +        USED(v);
       +
       +        for(;;){
       +                stats.now = time(0);
       +                stathist[stattime%nstathist] = stats;
       +                stattime++;
       +                sleep(1000);
       +        }
       +}
       +
       +void
       +statsinit(void)
       +{
       +        nstathist = 90000;
       +        stathist = MKNZ(Stats, nstathist);
       +        vtproc(statsproc, nil);
       +}
       +
       +void
       +setstat(int index, long val)
       +{
       +        qlock(&statslock);
       +        stats.n[index] = val;
       +        qunlock(&statslock);
       +}
       +
       +void
       +addstat(int index, int inc)
       +{
       +        if(!collectstats)
       +                return;
       +        qlock(&statslock);
       +        stats.n[index] += inc;
       +        qunlock(&statslock);
       +}
       +
       +void
       +addstat2(int index, int inc, int index1, int inc1)
       +{
       +        if(!collectstats)
       +                return;
       +        qlock(&statslock);
       +        stats.n[index] += inc;
       +        stats.n[index1] += inc1;
       +        qunlock(&statslock);
       +}
       +
       +void
       +printstats(void)
       +{
       +}
       +
       +void
       +binstats(long (*fn)(Stats *s0, Stats *s1, void *arg), void *arg,
       +        long t0, long t1, Statbin *bin, int nbin)
       +{
       +        long t, xt0, te, v;
       +        int i, j, lo, hi, m, oj;
       +        vlong tot;
       +        Statbin *b;
       +        
       +        t = stats.now;
       +        
       +        /* negative times mean relative to now. */
       +        if(t0 <= 0)
       +                t0 += t;
       +        if(t1 <= 0)
       +                t1 += t;
       +        /* ten minute range if none given */
       +        if(t1 <= t0)
       +                t0 = t1 - 60*10;
       +        if(0) fprint(2, "stats %ld-%ld\n", t0, t1);
       +        
       +        /* binary search to find t0-1 or close */
       +        lo = stattime;
       +        hi = stattime+nstathist;
       +        while(lo+1 < hi){
       +                m = (lo+hi)/2;
       +                if(stathist[m%nstathist].now >= t0)
       +                        hi = m;
       +                else
       +                        lo = m;
       +        }
       +        xt0 = stathist[lo%nstathist].now;
       +        if(0) fprint(2, "bsearch found %ld\n", xt0);
       +        if(xt0 >= t1){
       +                /* no samples */
       +                memset(bin, 0, nbin*sizeof bin[0]);
       +                return;
       +        }
       +
       +        hi = stattime+nstathist;
       +        te = t0;
       +        j = lo+1;
       +        for(i=0; i<nbin; i++){
       +                t = te;
       +                te = t0 + (t1-t0)*i/nbin;
       +                b = &bin[i];
       +                memset(b, 0, sizeof *b);
       +                tot = 0;
       +                oj = j;
       +                for(; j<hi && stathist[j%nstathist].now<te; j++){
       +                        v = fn(&stathist[(j-1)%nstathist], &stathist[j%nstathist], arg);
       +                        if(b->nsamp==0 || v < b->min)
       +                                b->min = v;
       +                        if(b->nsamp==0 || v > b->max)
       +                                b->max = v;
       +                        tot += v;
       +                        b->nsamp++;
       +                }
       +                if(0) fprint(2, "bin%d: %ld to %ld; %d to %d - %d samples\n", i, t, te, oj, j, b->nsamp);
       +                if(b->nsamp)
       +                        b->avg = tot / b->nsamp;
       +                if(b->nsamp==0 && i>0)
       +                        *b = bin[i-1];
       +        }        
       +}
   DIR diff --git a/src/cmd/venti/srv/stdinc.h b/src/cmd/venti/srv/stdinc.h
       t@@ -0,0 +1,9 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <venti.h>
       +#include <flate.h>
       +#include <libsec.h>
       +#include <thread.h>
       +#include <httpd.h>
       +#include <draw.h>
       +#include <memdraw.h>
   DIR diff --git a/src/cmd/venti/srv/syncarena.c b/src/cmd/venti/srv/syncarena.c
       t@@ -0,0 +1,174 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +static int        writeclumphead(Arena *arena, u64int aa, Clump *cl);
       +static int        writeclumpmagic(Arena *arena, u64int aa, u32int magic);
       +
       +int
       +clumpinfocmp(ClumpInfo *c, ClumpInfo *d)
       +{
       +        return c->type != d->type
       +                || c->size != d->size
       +                || c->uncsize != d->uncsize
       +                || scorecmp(c->score, d->score)!=0;
       +}
       +
       +/*
       + * synchronize the clump info directory with
       + * with the clumps actually stored in the arena.
       + * the directory should be at least as up to date
       + * as the arena's trailer.
       + *
       + * checks/updates at most n clumps.
       + *
       + * returns 0 if ok, flags if error occurred
       + */
       +int
       +syncarena(Arena *arena, u64int start, u32int n, int zok, int fix)
       +{
       +        ZBlock *lump;
       +        Clump cl;
       +        ClumpInfo ci;
       +        static ClumpInfo zci = { .type = -1 };
       +        u8int score[VtScoreSize];
       +        u64int uncsize, used, aa;
       +        u32int clump, clumps, cclumps, magic;
       +        int err, flush, broken;
       +        AState as;
       +
       +        used = arena->memstats.used;
       +        clumps = arena->memstats.clumps;
       +        cclumps = arena->memstats.cclumps;
       +        uncsize = arena->memstats.uncsize;
       +        trace(TraceProc, "syncarena start");
       +        flush = 0;
       +        err = 0;
       +        for(; n; n--){
       +                aa = arena->memstats.used;
       +                clump = arena->memstats.clumps;
       +                magic = clumpmagic(arena, aa);
       +                if(magic == ClumpFreeMagic)
       +                        break;
       +                if(magic != arena->clumpmagic){
       +                        fprint(2, "%s: illegal clump magic number=%#8.8ux at clump=%d\n", arena->name, magic, clump);
       +                        /* err |= SyncDataErr; */
       +                        if(fix && writeclumpmagic(arena, aa, ClumpFreeMagic) < 0){
       +                                fprint(2, "can't write corrected clump free magic: %r");
       +                                err |= SyncFixErr;
       +                        }
       +                        break;
       +                }
       +
       +                broken = 0;
       +                lump = loadclump(arena, aa, 0, &cl, score, 0);
       +                if(lump == nil){
       +                        fprint(2, "%s: clump=%d failed to read correctly: %r\n", arena->name, clump);
       +                        break;
       +                        err |= SyncDataErr;
       +                }else if(cl.info.type != VtCorruptType){
       +                        scoremem(score, lump->data, cl.info.uncsize);
       +                        if(scorecmp(cl.info.score, score) != 0){
       +                                /* ignore partially written block */
       +                                if(cl.encoding == ClumpENone)
       +                                        break;
       +                                fprint(2, "%s: clump=%d has mismatched score\n", arena->name, clump);
       +                                err |= SyncDataErr;
       +                                broken = 1;
       +                        }else if(vttypevalid(cl.info.type) < 0){
       +                                fprint(2, "%s: clump=%d has invalid type %d", arena->name, clump, cl.info.type);
       +                                err |= SyncDataErr;
       +                                broken = 1;
       +                        }
       +                        if(broken && fix){
       +                                cl.info.type = VtCorruptType;
       +                                if(writeclumphead(arena, aa, &cl) < 0){
       +                                        fprint(2, "%s: can't write corrected clump header: %r", arena->name);
       +                                        err |= SyncFixErr;
       +                                }
       +                        }
       +                }
       +                freezblock(lump);
       +                arena->memstats.used += ClumpSize + cl.info.size;
       +
       +                arena->memstats.clumps++;
       +                if(!broken && readclumpinfo(arena, clump, &ci)<0){
       +                        fprint(2, "%s: arena directory read failed\n", arena->name);
       +                        broken = 1;
       +                }else if(!broken && clumpinfocmp(&ci, &cl.info)!=0){
       +                        if(clumpinfocmp(&ci, &zci) == 0){
       +                                err |= SyncCIZero;
       +                                if(!zok)
       +                                        fprint(2, "%s: unwritten clump info for clump=%d\n", arena->name, clump);
       +                        }else{
       +                                err |= SyncCIErr;
       +                                fprint(2, "%s: bad clump info for clump=%d\n", arena->name, clump);
       +                                fprint(2, "\texpected score=%V type=%d size=%d uncsize=%d\n",
       +                                        cl.info.score, cl.info.type, cl.info.size, cl.info.uncsize);
       +                                fprint(2, "\tfound score=%V type=%d size=%d uncsize=%d\n",
       +                                        ci.score, ci.type, ci.size, ci.uncsize);
       +                        }
       +                        broken = 1;
       +                }
       +                if(broken && fix){
       +                        flush = 1;
       +                        ci = cl.info;
       +                        if(writeclumpinfo(arena, clump, &ci) < 0){
       +                                fprint(2, "%s: can't write correct clump directory: %r\n", arena->name);
       +                                err |= SyncFixErr;
       +                        }
       +                }
       +                trace(TraceProc, "syncarena unindexed clump %V %d", cl.info.score, arena->memstats.clumps);
       +
       +                arena->memstats.uncsize += cl.info.uncsize;
       +                if(cl.info.size < cl.info.uncsize)
       +                        arena->memstats.cclumps++;
       +        }
       +
       +        if(flush){
       +                trace(TraceProc, "syncarena flush");
       +                arena->wtime = now();
       +                if(arena->ctime == 0 && arena->memstats.clumps)
       +                        arena->ctime = arena->wtime;
       +                flushdcache();
       +        }
       +
       +        if(used != arena->memstats.used
       +        || clumps != arena->memstats.clumps
       +        || cclumps != arena->memstats.cclumps
       +        || uncsize != arena->memstats.uncsize)
       +                err |= SyncHeader;
       +        if(start && (err&SyncHeader)){
       +                trace(TraceProc, "syncarena setdcachestate");
       +                as.arena = arena;
       +                as.aa = start+arena->memstats.used;
       +                as.stats = arena->memstats;
       +                setdcachestate(&as);
       +        }
       +
       +        return err;
       +}
       +
       +static int
       +writeclumphead(Arena *arena, u64int aa, Clump *cl)
       +{
       +        ZBlock *zb;
       +        int bad;
       +
       +        zb = alloczblock(ClumpSize, 0, arena->blocksize);
       +        if(zb == nil)
       +                return -1;
       +        bad = packclump(cl, zb->data, arena->clumpmagic)<0
       +                || writearena(arena, aa, zb->data, ClumpSize) != ClumpSize;
       +        freezblock(zb);
       +        return bad ? -1 : 0;
       +}
       +
       +static int
       +writeclumpmagic(Arena *arena, u64int aa, u32int magic)
       +{
       +        u8int buf[U32Size];
       +
       +        packmagic(magic, buf);
       +        return writearena(arena, aa, buf, U32Size) == U32Size;
       +}
   DIR diff --git a/src/cmd/venti/srv/syncindex.c b/src/cmd/venti/srv/syncindex.c
       t@@ -0,0 +1,73 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +static        int        verbose;
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: syncindex [-fv] [-B blockcachesize] config\n");
       +        threadexitsall("usage");
       +}
       +
       +Config conf;
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        u32int bcmem, icmem;
       +        int fix;
       +
       +        fix = 0;
       +        bcmem = 0;
       +        icmem = 0;
       +        ARGBEGIN{
       +        case 'B':
       +                bcmem = unittoull(EARGF(usage()));
       +                break;
       +        case 'I':
       +                icmem = unittoull(EARGF(usage()));
       +                break;
       +        case 'f':
       +                fix++;
       +                break;
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(!fix)
       +                readonly = 1;
       +
       +        if(argc != 1)
       +                usage();
       +
       +        if(initventi(argv[0], &conf) < 0)
       +                sysfatal("can't init venti: %r");
       +
       +        if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
       +                bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
       +        if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
       +        initdcache(bcmem);
       +        initlumpcache(1*1024*1024, 1024/8);
       +        icmem = u64log2(icmem / (sizeof(IEntry)+sizeof(IEntry*)) / ICacheDepth);
       +        if(icmem < 4)
       +                icmem = 4;
       +        if(1) fprint(2, "initialize %d bytes of index cache for %d index entries\n",
       +                (sizeof(IEntry)+sizeof(IEntry*)) * (1 << icmem) * ICacheDepth,
       +                (1 << icmem) * ICacheDepth);
       +        initicache(icmem, ICacheDepth);
       +        initicachewrite();
       +        if(mainindex->bloom)
       +                startbloomproc(mainindex->bloom);
       +
       +        if(verbose)
       +                printindex(2, mainindex);
       +        if(syncindex(mainindex, fix, 1, 0) < 0)
       +                sysfatal("failed to sync index=%s: %r\n", mainindex->name);
       +
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/syncindex0.c b/src/cmd/venti/srv/syncindex0.c
       t@@ -0,0 +1,167 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +enum
       +{
       +        ClumpChunks        = 32*1024
       +};
       +
       +static int missing, wrong;
       +
       +/*
       + * shell sort is plenty good enough
       + * because we're going to do a bunch of disk i/o's
       + */
       +static void
       +sortclumpinfo(ClumpInfo *ci, int *s, int n)
       +{
       +        int i, j, m, t;
       +
       +        for(m = (n + 3) / 5; m > 0; m = (m + 1) / 3){
       +                for(i = n - m; i-- > 0;){
       +                        for(j = i + m; j < n; j += m){
       +                                if(memcmp(ci[s[j - m]].score, ci[s[j]].score, VtScoreSize) <= 0)
       +                                        break;
       +                                t = s[j];
       +                                s[j] = s[j - m];
       +                                s[j - m] = t;
       +                        }
       +                }
       +        }
       +}
       +
       +int
       +syncarenaindex(Index *ix, Arena *arena, u32int clump, u64int a, int fix, int *pflush, int check)
       +{
       +        Packet *pack;
       +        IEntry ie;
       +        IAddr ia;
       +        ClumpInfo *ci, *cis;
       +        u64int *addrs;
       +        int i, n, ok, *s, flush;
       +
       +        trace(TraceProc, "syncarenaindex enter");
       +
       +        flush = 0;
       +        cis = MKN(ClumpInfo, ClumpChunks);
       +        addrs = MKN(u64int, ClumpChunks);
       +        s = MKN(int, ClumpChunks);
       +        ok = 0;
       +        for(; clump < arena->memstats.clumps; clump += n){
       +                n = ClumpChunks;
       +                if(n > arena->memstats.clumps - clump)
       +                        n = arena->memstats.clumps - clump;
       +                n = readclumpinfos(arena, clump, cis, n);
       +                if(n <= 0){
       +                        fprint(2, "arena directory read failed\n");
       +                        ok = -1;
       +                        break;
       +                }
       +
       +                for(i = 0; i < n; i++){
       +                        addrs[i] = a;
       +                        a += cis[i].size + ClumpSize;
       +                        s[i] = i;
       +                }
       +
       +                sortclumpinfo(cis, s, n);
       +
       +                for(i = 0; i < n; i++){
       +                        ci = &cis[s[i]];
       +                        ia.type = ci->type;
       +                        ia.size = ci->uncsize;
       +                        ia.addr = addrs[s[i]];
       +                        ia.blocks = (ci->size + ClumpSize + (1 << ABlockLog) - 1) >> ABlockLog;
       +
       +                        if(!check)
       +                                goto Add;
       +                        if(loadientry(ix, ci->score, ci->type, &ie) < 0){
       +                                trace(TraceProc, "syncarenaindex missing block %V.%d", ci->score, ci->type);
       +                                missing++;
       +                        if(0)        fprint(2, "missing block type=%d score=%V\n", ci->type, ci->score);
       +                        }else if(iaddrcmp(&ia, &ie.ia) != 0){
       +                                trace(TraceProc, "syncarenaindex mismatched entry");
       +                                fprint(2, "\nmismatched index entry and clump at %d\n", clump + i);
       +                                fprint(2, "\tclump: type=%d size=%d blocks=%d addr=%lld\n", ia.type, ia.size, ia.blocks, ia.addr);
       +                                fprint(2, "\tindex: type=%d size=%d block=%d addr=%lld\n", ie.ia.type, ie.ia.size, ie.ia.blocks, ie.ia.addr);
       +                                pack = readlump(ie.score, ie.ia.type, ie.ia.size, nil);
       +                                packetfree(pack);
       +                                if(pack != nil){
       +                                        fprint(2, "duplicated lump\n");
       +                                        continue;
       +                                }
       +                                wrong++;
       +                        }else
       +                                continue;
       +                Add:
       +                        if(!fix){
       +                                ok = -1;
       +                                continue;
       +                        }
       +                        flush = 1;
       +                        trace(TraceProc, "syncarenaindex insert %V", ci->score);
       +                        insertscore(ci->score, &ia, 1);
       +                }
       +
       +                if(0 && clump / 1000 != (clump + n) / 1000)
       +                        fprint(2, ".");
       +        }
       +        free(cis);
       +        free(addrs);
       +        free(s);
       +        if(flush){
       +                flushdcache();
       +                *pflush = 1;
       +        }
       +        return ok;
       +}
       +
       +int
       +syncindex(Index *ix, int fix, int mustflush, int check)
       +{
       +        Arena *arena;
       +        u64int a;
       +        u32int clump;
       +        int i, e, e1, ok, ok1, flush;
       +
       +        ok = 0;
       +        flush = 0;
       +        for(i = 0; i < ix->narenas; i++){
       +                trace(TraceProc, "syncindex start %d", i);
       +                arena = ix->arenas[i];
       +                clump = arena->memstats.clumps;
       +                a = arena->memstats.used;
       +                e = syncarena(arena, ix->amap[i].start, TWID32, fix, fix);
       +                e1 = e;
       +                if(fix)
       +                        e1 &= ~(SyncHeader|SyncCIZero|SyncCIErr);
       +                if(e1 == SyncHeader)
       +                        fprint(2, "arena %s: header is out-of-date\n", arena->name);
       +                if(e1)
       +                        ok = -1;
       +                else{
       +                        ok1 = syncarenaindex(ix, arena, clump, a + ix->amap[i].start, fix, &flush, check);
       +                        if(ok1 < 0)
       +                                fprint(2, "syncarenaindex: %r\n");
       +                        if(fix && ok1==0 && (e & SyncHeader) && wbarena(arena) < 0)
       +                                fprint(2, "arena=%s header write failed: %r\n", arena->name);
       +                        ok |= ok1;
       +                }
       +        }
       +        if(missing || wrong)
       +                fprint(2, "syncindex: %d missing entries, %d wrong entries (flush=%d)\n", missing, wrong, flush);
       +        if(fix && wbindex(ix) < 0){
       +                fprint(2, "can't write back index header for %s: %r\n", ix->name);
       +                return -1;
       +        }
       +        if(fix && flush){
       +                flushdcache();
       +                if(mustflush){
       +                        flushicache();
       +                        flushdcache();
       +                }else
       +                        kickicache();
       +        }
       +        return ok;
       +}
   DIR diff --git a/src/cmd/venti/srv/trace.c b/src/cmd/venti/srv/trace.c
       t@@ -0,0 +1,38 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +char TraceDisk[] = "disk";
       +char TraceLump[] = "lump";
       +char TraceBlock[] = "block";
       +char TraceProc[] = "proc";
       +char TraceWork[] = "work";
       +char TraceQuiet[] = "quiet";
       +char TraceRpc[] = "rpc";
       +
       +void
       +trace(char *level, char *fmt, ...)
       +{
       +        char buf[512];
       +        va_list arg;
       +
       +        if(level == nil || !ventilogging)
       +                return;
       +        va_start(arg, fmt);
       +        vsnprint(buf, sizeof buf, fmt, arg);
       +        va_end(arg);
       +        vtlog(level, "<font size=-1>%T %s:</font> %s<br>\n",
       +                        threadgetname(), buf);
       +        vtlog("all", "<font size=-1>%T <font color=#777777>%s</font> %s:</font> %s<br>\n",
       +                        level, threadgetname(), buf);
       +}
       +
       +void
       +traceinit(void)
       +{
       +}
       +
       +void
       +settrace(char *trace)
       +{
       +}
   DIR diff --git a/src/cmd/venti/srv/unittoull.c b/src/cmd/venti/srv/unittoull.c
       t@@ -0,0 +1,30 @@
       +#include "stdinc.h"
       +
       +#define TWID64        ((u64int)~(u64int)0)
       +
       +u64int
       +unittoull(char *s)
       +{
       +        char *es;
       +        u64int n;
       +
       +        if(s == nil)
       +                return TWID64;
       +        n = strtoul(s, &es, 0);
       +        if(*es == 'k' || *es == 'K'){
       +                n *= 1024;
       +                es++;
       +        }else if(*es == 'm' || *es == 'M'){
       +                n *= 1024*1024;
       +                es++;
       +        }else if(*es == 'g' || *es == 'G'){
       +                n *= 1024*1024*1024;
       +                es++;
       +        }else if(*es == 't' || *es == 'T'){
       +                n *= 1024*1024;
       +                n *= 1024*1024;
       +        }
       +        if(*es != '\0')
       +                return TWID64;
       +        return n;
       +}
   DIR diff --git a/src/cmd/venti/srv/unwhack.c b/src/cmd/venti/srv/unwhack.c
       t@@ -0,0 +1,179 @@
       +#include "stdinc.h"
       +#include "whack.h"
       +
       +enum
       +{
       +        DMaxFastLen        = 7,
       +        DBigLenCode        = 0x3c,                /* minimum code for large lenth encoding */
       +        DBigLenBits        = 6,
       +        DBigLenBase        = 1                /* starting items to encode for big lens */
       +};
       +
       +static uchar lenval[1 << (DBigLenBits - 1)] =
       +{
       +        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       +        3, 3, 3, 3, 3, 3, 3, 3,
       +        4, 4, 4, 4,
       +        5,
       +        6,
       +        255,
       +        255
       +};
       +
       +static uchar lenbits[] =
       +{
       +        0, 0, 0,
       +        2, 3, 5, 5,
       +};
       +
       +static uchar offbits[16] =
       +{
       +        5, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 12, 13
       +};
       +
       +static ushort offbase[16] =
       +{
       +        0, 0x20,
       +        0x40, 0x60,
       +        0x80, 0xc0,
       +        0x100, 0x180,
       +        0x200, 0x300,
       +        0x400, 0x600,
       +        0x800, 0xc00,
       +        0x1000,
       +        0x2000
       +};
       +
       +void
       +unwhackinit(Unwhack *uw)
       +{
       +        uw->err[0] = '\0';
       +}
       +
       +int
       +unwhack(Unwhack *uw, uchar *dst, int ndst, uchar *src, int nsrc)
       +{
       +        uchar *s, *d, *dmax, *smax, lit;
       +        ulong uwbits, lithist;
       +        int i, off, len, bits, use, code, uwnbits, overbits;
       +
       +        d = dst;
       +        dmax = d + ndst;
       +
       +        smax = src + nsrc;
       +        uwnbits = 0;
       +        uwbits = 0;
       +        overbits = 0;
       +        lithist = ~0;
       +        while(src < smax || uwnbits - overbits >= MinDecode){
       +                while(uwnbits <= 24){
       +                        uwbits <<= 8;
       +                        if(src < smax)
       +                                uwbits |= *src++;
       +                        else
       +                                overbits += 8;
       +                        uwnbits += 8;
       +                }
       +
       +                /*
       +                 * literal
       +                 */
       +                len = lenval[(uwbits >> (uwnbits - 5)) & 0x1f];
       +                if(len == 0){
       +                        if(lithist & 0xf){
       +                                uwnbits -= 9;
       +                                lit = (uwbits >> uwnbits) & 0xff;
       +                                lit &= 255;
       +                        }else{
       +                                uwnbits -= 8;
       +                                lit = (uwbits >> uwnbits) & 0x7f;
       +                                if(lit < 32){
       +                                        if(lit < 24){
       +                                                uwnbits -= 2;
       +                                                lit = (lit << 2) | ((uwbits >> uwnbits) & 3);
       +                                        }else{
       +                                                uwnbits -= 3;
       +                                                lit = (lit << 3) | ((uwbits >> uwnbits) & 7);
       +                                        }
       +                                        lit = (lit - 64) & 0xff;
       +                                }
       +                        }
       +                        if(d >= dmax){
       +                                snprint(uw->err, WhackErrLen, "too much output");
       +                                return -1;
       +                        }
       +                        *d++ = lit;
       +                        lithist = (lithist << 1) | (lit < 32) | (lit > 127);
       +                        continue;
       +                }
       +
       +                /*
       +                 * length
       +                 */
       +                if(len < 255)
       +                        uwnbits -= lenbits[len];
       +                else{
       +                        uwnbits -= DBigLenBits;
       +                        code = ((uwbits >> uwnbits) & ((1 << DBigLenBits) - 1)) - DBigLenCode;
       +                        len = DMaxFastLen;
       +                        use = DBigLenBase;
       +                        bits = (DBigLenBits & 1) ^ 1;
       +                        while(code >= use){
       +                                len += use;
       +                                code -= use;
       +                                code <<= 1;
       +                                uwnbits--;
       +                                if(uwnbits < 0){
       +                                        snprint(uw->err, WhackErrLen, "len out of range");
       +                                        return -1;
       +                                }
       +                                code |= (uwbits >> uwnbits) & 1;
       +                                use <<= bits;
       +                                bits ^= 1;
       +                        }
       +                        len += code;
       +
       +                        while(uwnbits <= 24){
       +                                uwbits <<= 8;
       +                                if(src < smax)
       +                                        uwbits |= *src++;
       +                                else
       +                                        overbits += 8;
       +                                uwnbits += 8;
       +                        }
       +                }
       +
       +                /*
       +                 * offset
       +                 */
       +                uwnbits -= 4;
       +                bits = (uwbits >> uwnbits) & 0xf;
       +                off = offbase[bits];
       +                bits = offbits[bits];
       +
       +                uwnbits -= bits;
       +                off |= (uwbits >> uwnbits) & ((1 << bits) - 1);
       +                off++;
       +
       +                if(off > d - dst){
       +                        snprint(uw->err, WhackErrLen, "offset out of range: off=%d d=%ld len=%d nbits=%d", off, d - dst, len, uwnbits);
       +                        return -1;
       +                }
       +                if(d + len > dmax){
       +                        snprint(uw->err, WhackErrLen, "len out of range");
       +                        return -1;
       +                }
       +                s = d - off;
       +                for(i = 0; i < len; i++)
       +                        d[i] = s[i];
       +                d += len;
       +        }
       +        if(uwnbits < overbits){
       +                snprint(uw->err, WhackErrLen, "compressed data overrun");
       +                return -1;
       +        }
       +
       +        len = d - dst;
       +
       +        return len;
       +}
   DIR diff --git a/src/cmd/venti/srv/utils.c b/src/cmd/venti/srv/utils.c
       t@@ -0,0 +1,252 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +int
       +namecmp(char *s, char *t)
       +{
       +        return strncmp(s, t, ANameSize);
       +}
       +
       +void
       +namecp(char *dst, char *src)
       +{
       +        strncpy(dst, src, ANameSize - 1);
       +        dst[ANameSize - 1] = '\0';
       +}
       +
       +int
       +nameok(char *name)
       +{
       +        char *t;
       +        int c;
       +
       +        if(name == nil)
       +                return -1;
       +        for(t = name; c = *t; t++)
       +                if(t - name >= ANameSize
       +                || c < ' ' || c >= 0x7f)
       +                        return -1;
       +        return 0;
       +}
       +
       +int
       +stru32int(char *s, u32int *r)
       +{
       +        char *t;
       +        u32int n, nn, m;
       +        int c;
       +
       +        m = TWID32 / 10;
       +        n = 0;
       +        for(t = s; ; t++){
       +                c = *t;
       +                if(c < '0' || c > '9')
       +                        break;
       +                if(n > m)
       +                        return -1;
       +                nn = n * 10 + c - '0';
       +                if(nn < n)
       +                        return -1;
       +                n = nn;
       +        }
       +        *r = n;
       +        return s != t && *t == '\0';
       +}
       +
       +int
       +stru64int(char *s, u64int *r)
       +{
       +        char *t;
       +        u64int n, nn, m;
       +        int c;
       +
       +        m = TWID64 / 10;
       +        n = 0;
       +        for(t = s; ; t++){
       +                c = *t;
       +                if(c < '0' || c > '9')
       +                        break;
       +                if(n > m)
       +                        return -1;
       +                nn = n * 10 + c - '0';
       +                if(nn < n)
       +                        return -1;
       +                n = nn;
       +        }
       +        *r = n;
       +        return s != t && *t == '\0';
       +}
       +
       +int
       +vttypevalid(int type)
       +{
       +        return type < VtMaxType;
       +}
       +
       +static char*
       +logit(int severity, char *fmt, va_list args)
       +{
       +        char *s;
       +
       +        s = vsmprint(fmt, args);
       +        if(s == nil)
       +                return nil;
       +        if(argv0 == nil)
       +                fprint(2, "%s: err %d: %s\n", argv0, severity, s);
       +        else
       +                fprint(2, "err %d: %s\n", severity, s);
       +        return s;
       +}
       +
       +void
       +seterr(int severity, char *fmt, ...)
       +{
       +        char *s;
       +        va_list args;
       +
       +        va_start(args, fmt);
       +        s = logit(severity, fmt, args);
       +        va_end(args);
       +        if(s == nil)
       +                werrstr("error setting error");
       +        else{
       +                werrstr("%s", s);
       +                free(s);
       +        }
       +}
       +
       +void
       +logerr(int severity, char *fmt, ...)
       +{
       +        char *s;
       +        va_list args;
       +
       +        va_start(args, fmt);
       +        s = logit(severity, fmt, args);
       +        va_end(args);
       +        free(s);
       +}
       +
       +u32int
       +now(void)
       +{
       +        return time(nil);
       +}
       +
       +int abortonmem = 1;
       +
       +void *
       +emalloc(ulong n)
       +{
       +        void *p;
       +
       +        p = malloc(n);
       +        if(p == nil){
       +                if(abortonmem)
       +                        abort();
       +                sysfatal("out of memory allocating %lud", n);
       +        }
       +        memset(p, 0xa5, n);
       +if(0)print("emalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n));
       +        return p;
       +}
       +
       +void *
       +ezmalloc(ulong n)
       +{
       +        void *p;
       +
       +        p = malloc(n);
       +        if(p == nil){
       +                if(abortonmem)
       +                        abort();
       +                sysfatal("out of memory allocating %lud", n);
       +        }
       +        memset(p, 0, n);
       +if(0)print("ezmalloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&n));
       +        return p;
       +}
       +
       +void *
       +erealloc(void *p, ulong n)
       +{
       +        p = realloc(p, n);
       +        if(p == nil){
       +                if(abortonmem)
       +                        abort();
       +                sysfatal("out of memory allocating %lud", n);
       +        }
       +if(0)print("erealloc %p-%p by %lux\n", p, (char*)p+n, getcallerpc(&p));
       +        return p;
       +}
       +
       +char *
       +estrdup(char *s)
       +{
       +        char *t;
       +        int n;
       +
       +        n = strlen(s) + 1;
       +        t = emalloc(n);
       +        memmove(t, s, n);
       +if(0)print("estrdup %p-%p by %lux\n", t, (char*)t+n, getcallerpc(&s));
       +        return t;
       +}
       +
       +/*
       + * return floor(log2(v))
       + */
       +int
       +u64log2(u64int v)
       +{
       +        int i;
       +
       +        for(i = 0; i < 64; i++)
       +                if((v >> i) <= 1)
       +                        break;
       +        return i;
       +}
       +
       +int
       +vtproc(void (*fn)(void*), void *arg)
       +{
       +        proccreate(fn, arg, 256*1024);
       +        return 0;
       +}
       +
       +int
       +ientryfmt(Fmt *fmt)
       +{
       +        IEntry *ie;
       +
       +        ie = va_arg(fmt->args, IEntry*);
       +        return fmtprint(fmt, "%V %22lld %3d %5d %3d",
       +                ie->score, ie->ia.addr, ie->ia.type, ie->ia.size, ie->ia.blocks);
       +}
       +
       +void
       +ventifmtinstall(void)
       +{
       +        fmtinstall('F', vtfcallfmt);
       +        fmtinstall('H', encodefmt);
       +        fmtinstall('I', ientryfmt);
       +        fmtinstall('V', vtscorefmt);
       +}
       +
       +uint
       +msec(void)
       +{
       +        return nsec()/1000000;
       +}
       +
       +uint
       +countbits(uint n)
       +{
       +        n = (n&0x55555555)+((n>>1)&0x55555555);
       +        n = (n&0x33333333)+((n>>2)&0x33333333);
       +        n = (n&0x0F0F0F0F)+((n>>4)&0x0F0F0F0F);
       +        n = (n&0x00FF00FF)+((n>>8)&0x00FF00FF);
       +        n = (n&0x0000FFFF)+((n>>16)&0x0000FFFF);
       +        return n;
       +}
   DIR diff --git a/src/cmd/venti/srv/venti.c b/src/cmd/venti/srv/venti.c
       t@@ -0,0 +1,266 @@
       +#ifdef PLAN9PORT
       +#include <u.h>
       +#include <signal.h>
       +#endif
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +#include "whack.h"
       +
       +int debug;
       +int nofork;
       +int mainstacksize = 256*1024;
       +VtSrv *ventisrv;
       +
       +static void        ventiserver(void*);
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: venti [-dw] [-a ventiaddress] [-h httpaddress] [-c config] [-C cachesize] [-I icachesize] [-B blockcachesize]\n");
       +        threadexitsall("usage");
       +}
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        char *configfile, *haddr, *vaddr, *webroot;
       +        u32int mem, icmem, bcmem, minbcmem;
       +        Config config;
       +
       +        traceinit();
       +        threadsetname("main");
       +        vaddr = nil;
       +        haddr = nil;
       +        configfile = nil;
       +        webroot = nil;
       +        mem = 0;
       +        icmem = 0;
       +        bcmem = 0;
       +        ARGBEGIN{
       +        case 'a':
       +                vaddr = EARGF(usage());
       +                break;
       +        case 'B':
       +                bcmem = unittoull(EARGF(usage()));
       +                break;
       +        case 'c':
       +                configfile = EARGF(usage());
       +                break;
       +        case 'C':
       +                mem = unittoull(EARGF(usage()));
       +                break;
       +        case 'D':
       +                settrace(EARGF(usage()));
       +                break;
       +        case 'd':
       +                debug = 1;
       +                nofork = 1;
       +                break;
       +        case 'h':
       +                haddr = EARGF(usage());
       +                break;
       +        case 'I':
       +                icmem = unittoull(EARGF(usage()));
       +                break;
       +        case 'L':
       +                ventilogging = 1;
       +                break;
       +        case 's':
       +                nofork = 1;
       +                break;
       +        case 'W':
       +                webroot = EARGF(usage());
       +                break;
       +        default:
       +                usage();
       +        }ARGEND
       +
       +        if(argc)
       +                usage();
       +
       +        if(!nofork)
       +                rfork(RFNOTEG);
       +
       +#ifdef PLAN9PORT
       +        {
       +                /* sigh - needed to avoid signals when writing to hungup networks */
       +                struct sigaction sa;
       +                memset(&sa, 0, sizeof sa);
       +                sa.sa_handler = SIG_IGN;
       +                sigaction(SIGPIPE, &sa, nil);
       +        }
       +#endif
       +
       +        trace(TraceQuiet, "venti started");
       +        fprint(2, "venti: ");
       +
       +        ventifmtinstall();
       +        if(configfile == nil)
       +                configfile = "venti.conf";
       +
       +        if(initarenasum() < 0)
       +                fprint(2, "warning: can't initialize arena summing process: %r");
       +
       +        fprint(2, "conf...");
       +        if(initventi(configfile, &config) < 0)
       +                sysfatal("can't init server: %r");
       +
       +        if(mem == 0)
       +                mem = config.mem;
       +        if(bcmem == 0)
       +                bcmem = config.bcmem;
       +        if(icmem == 0)
       +                icmem = config.icmem;
       +        if(haddr == nil)
       +                haddr = config.haddr;
       +        if(vaddr == nil)
       +                vaddr = config.vaddr;
       +        if(vaddr == nil)
       +                vaddr = "tcp!*!venti";
       +        if(webroot == nil)
       +                webroot = config.webroot;
       +        if(queuewrites == 0)
       +                queuewrites = config.queuewrites;
       +
       +        if(haddr){
       +                fprint(2, "httpd %s...", haddr);
       +                if(httpdinit(haddr, webroot) < 0)
       +                        fprint(2, "warning: can't start http server: %r");
       +        }
       +
       +        fprint(2, "init...");
       +
       +        if(mem == 0xffffffffUL)
       +                mem = 1 * 1024 * 1024;
       +        if(0) fprint(2, "initialize %d bytes of lump cache for %d lumps\n",
       +                mem, mem / (8 * 1024));
       +        initlumpcache(mem, mem / (8 * 1024));
       +
       +        icmem = u64log2(icmem / (sizeof(IEntry)+sizeof(IEntry*)) / ICacheDepth);
       +        if(icmem < 4)
       +                icmem = 4;
       +        if(0) fprint(2, "initialize %d bytes of index cache for %d index entries\n",
       +                (sizeof(IEntry)+sizeof(IEntry*)) * (1 << icmem) * ICacheDepth,
       +                (1 << icmem) * ICacheDepth);
       +        initicache(icmem, ICacheDepth);
       +        initicachewrite();
       +
       +        /*
       +         * need a block for every arena and every process
       +         */
       +        minbcmem = maxblocksize * 
       +                (mainindex->narenas + mainindex->nsects*4 + 16);
       +        if(bcmem < minbcmem)
       +                bcmem = minbcmem;
       +
       +        if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
       +        initdcache(bcmem);
       +
       +        if(mainindex->bloom)
       +                startbloomproc(mainindex->bloom);
       +
       +        fprint(2, "sync...");
       +        if(syncindex(mainindex, 1, 0, 0) < 0)
       +                sysfatal("can't sync server: %r");
       +
       +        if(queuewrites){
       +                fprint(2, "queue...");
       +                if(initlumpqueues(mainindex->nsects) < 0){
       +                        fprint(2, "can't initialize lump queues,"
       +                                " disabling write queueing: %r");
       +                        queuewrites = 0;
       +                }
       +        }
       +
       +        fprint(2, "announce %s...", vaddr);
       +        ventisrv = vtlisten(vaddr);
       +        if(ventisrv == nil)
       +                sysfatal("can't announce %s: %r", vaddr);
       +
       +        fprint(2, "serving.\n");
       +        if(nofork)
       +                ventiserver(nil);
       +        else
       +                vtproc(ventiserver, nil);
       +}
       +
       +static void
       +vtrerror(VtReq *r, char *error)
       +{
       +        r->rx.msgtype = VtRerror;
       +        r->rx.error = estrdup(error);
       +}
       +
       +static void
       +ventiserver(void *v)
       +{
       +        Packet *p;
       +        VtReq *r;
       +        char err[ERRMAX];
       +        uint ms;
       +        int cached, ok;
       +
       +        USED(v);
       +        threadsetname("ventiserver");
       +        trace(TraceWork, "start");
       +        while((r = vtgetreq(ventisrv)) != nil){
       +                trace(TraceWork, "finish");
       +                trace(TraceWork, "start request %F", &r->tx);
       +                trace(TraceRpc, "<- %F", &r->tx);
       +                r->rx.msgtype = r->tx.msgtype+1;
       +                addstat(StatRpcTotal, 1);
       +        //        print("req (arenas[0]=%p sects[0]=%p) %F\n",
       +        //                mainindex->arenas[0], mainindex->sects[0], &r->tx);
       +                switch(r->tx.msgtype){
       +                default:
       +                        vtrerror(r, "unknown request");
       +                        break;
       +                case VtTread:
       +                        ms = msec();
       +                        r->rx.data = readlump(r->tx.score, r->tx.blocktype, r->tx.count, &cached);
       +                        ms = msec() - ms;
       +                        addstat2(StatRpcRead, 1, StatRpcReadTime, ms);
       +                        if(r->rx.data == nil){
       +                                addstat(StatRpcReadFail, 1);
       +                                rerrstr(err, sizeof err);
       +                                vtrerror(r, err);
       +                        }else{
       +                                addstat(StatRpcReadBytes, packetsize(r->rx.data));
       +                                addstat(StatRpcReadOk, 1);
       +                                if(cached)
       +                                        addstat2(StatRpcReadCached, 1, StatRpcReadCachedTime, ms);
       +                                else
       +                                        addstat2(StatRpcReadUncached, 1, StatRpcReadUncachedTime, ms);
       +                        }
       +                        break;
       +                case VtTwrite:
       +                        p = r->tx.data;
       +                        r->tx.data = nil;
       +                        addstat(StatRpcWriteBytes, packetsize(p));
       +                        ms = msec();
       +                        ok = writelump(p, r->rx.score, r->tx.blocktype, 0, ms);
       +                        ms = msec() - ms;
       +                        addstat2(StatRpcWrite, 1, StatRpcWriteTime, ms);
       +
       +                        if(ok < 0){
       +                                addstat(StatRpcWriteFail, 1);
       +                                rerrstr(err, sizeof err);
       +                                vtrerror(r, err);
       +                        }
       +                        break;
       +                case VtTsync:
       +                        flushqueue();
       +                        flushdcache();
       +                        break;
       +                }
       +                trace(TraceRpc, "-> %F", &r->rx);
       +                vtrespond(r);
       +                trace(TraceWork, "start");
       +        }
       +        flushdcache();
       +        flushicache();
       +        threadexitsall(0);
       +}
       +
       +
   DIR diff --git a/src/cmd/venti/srv/verifyarena.c b/src/cmd/venti/srv/verifyarena.c
       t@@ -0,0 +1,127 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +static int        verbose;
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: verifyarena [-v]\n");
       +        threadexitsall(0);
       +}
       +
       +static void
       +readblock(uchar *buf, int n)
       +{
       +        int nr, m;
       +
       +        for(nr = 0; nr < n; nr += m){
       +                m = n - nr;
       +                m = read(0, &buf[nr], m);
       +                if(m <= 0)
       +                        sysfatal("can't read arena from standard input: %r");
       +        }
       +}
       +
       +static void
       +verifyarena(void)
       +{
       +        Arena arena;
       +        ArenaHead head;
       +        ZBlock *b;
       +        DigestState s;
       +        u64int n, e;
       +        u32int bs;
       +        u8int score[VtScoreSize];
       +
       +        fprint(2, "verify arena from standard input\n");
       +
       +        memset(&arena, 0, sizeof arena);
       +        memset(&s, 0, sizeof s);
       +
       +        /*
       +         * read the little bit, which will included the header
       +         */
       +        bs = MaxIoSize;
       +        b = alloczblock(bs, 0, 0);
       +        readblock(b->data, HeadSize);
       +        sha1(b->data, HeadSize, nil, &s);
       +        if(unpackarenahead(&head, b->data) < 0)
       +                sysfatal("corrupted arena header: %r");
       +        if(head.version != ArenaVersion4 && head.version != ArenaVersion5)
       +                fprint(2, "warning: unknown arena version %d\n", head.version);
       +
       +        /*
       +         * now we know how much to read
       +         * read everything but the last block, which is special
       +         */
       +        e = head.size - head.blocksize;
       +        for(n = HeadSize; n < e; n += bs){
       +                if(n + bs > e)
       +                        bs = e - n;
       +                readblock(b->data, bs);
       +                sha1(b->data, bs, nil, &s);
       +        }
       +
       +        /*
       +         * read the last block update the sum.
       +         * the sum is calculated assuming the slot for the sum is zero.
       +         */
       +        bs = head.blocksize;
       +        readblock(b->data, bs);
       +        sha1(b->data, bs-VtScoreSize, nil, &s);
       +        sha1(zeroscore, VtScoreSize, nil, &s);
       +        sha1(nil, 0, score, &s);
       +
       +        /*
       +         * validity check on the trailer
       +         */
       +        arena.blocksize = head.blocksize;
       +        if(unpackarena(&arena, b->data) < 0)
       +                sysfatal("corrupted arena trailer: %r");
       +        scorecp(arena.score, &b->data[arena.blocksize - VtScoreSize]);
       +
       +        if(namecmp(arena.name, head.name) != 0)
       +                sysfatal("arena header and trailer names clash: %s vs. %s\n", head.name, arena.name);
       +        if(arena.version != head.version)
       +                sysfatal("arena header and trailer versions clash: %d vs. %d\n", head.version, arena.version);
       +        arena.size = head.size - 2 * head.blocksize;
       +
       +        /*
       +         * check for no checksum or the same
       +         */
       +        if(scorecmp(score, arena.score) != 0){
       +                if(scorecmp(zeroscore, arena.score) != 0)
       +                        fprint(2, "warning: mismatched checksums for arena=%s, found=%V calculated=%V",
       +                                arena.name, arena.score, score);
       +                scorecp(arena.score, score);
       +        }else
       +                fprint(2, "matched score\n");
       +
       +        printarena(2, &arena);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        ventifmtinstall();
       +        statsinit();
       +
       +        ARGBEGIN{
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        readonly = 1;
       +
       +        if(argc != 0)
       +                usage();
       +
       +        verifyarena();
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/whack.c b/src/cmd/venti/srv/whack.c
       t@@ -0,0 +1,331 @@
       +#include "stdinc.h"
       +#include "whack.h"
       +
       +typedef struct Huff        Huff;
       +int compressblocks = 1;
       +
       +enum
       +{
       +        MaxFastLen        = 9,
       +        BigLenCode        = 0x1f4,        /* minimum code for large lenth encoding */
       +        BigLenBits        = 9,
       +        BigLenBase        = 4,                /* starting items to encode for big lens */
       +
       +        MinOffBits        = 6,
       +        MaxOffBits        = MinOffBits + 8,
       +
       +        MaxLen                = 2051                /* max. length encodable in 24 bits */
       +};
       +
       +enum
       +{
       +        StatBytes,
       +        StatOutBytes,
       +        StatLits,
       +        StatMatches,
       +        StatLitBits,
       +        StatOffBits,
       +        StatLenBits,
       +
       +        MaxStat
       +};
       +
       +struct Huff
       +{
       +        short        bits;                                /* length of the code */
       +        ulong        encode;                                /* the code */
       +};
       +
       +static        Huff        lentab[MaxFastLen] =
       +{
       +        {2,        0x2},                /* 10 */
       +        {3,        0x6},                /* 110 */
       +        {5,        0x1c},                /* 11100 */
       +        {5,        0x1d},                /* 11101 */
       +        {6,        0x3c},                /* 111100 */
       +        {7,        0x7a},                /* 1111010 */
       +        {7,        0x7b},                /* 1111011 */
       +        {8,        0xf8},                /* 11111000 */
       +        {8,        0xf9},                /* 11111001 */
       +};
       +
       +static int        thwmaxcheck;
       +
       +void
       +whackinit(Whack *tw, int level)
       +{
       +        thwmaxcheck = (1 << level);
       +        thwmaxcheck -= thwmaxcheck >> 2;
       +        if(thwmaxcheck < 2)
       +                thwmaxcheck = 2;
       +        else if(thwmaxcheck > 1024)
       +                thwmaxcheck = 1024;
       +        memset(tw, 0, sizeof *tw);
       +        tw->begin = 2 * WhackMaxOff;
       +}
       +
       +/*
       + * find a string in the dictionary
       + */
       +static int
       +whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
       +{
       +        ushort then, off, last;
       +        int bestoff, bestlen, check;
       +        uchar *s, *t;
       +
       +        s = *ss;
       +        if(esrc < s + MinMatch)
       +                return -1;
       +        if(s + MaxLen < esrc)
       +                esrc = s + MaxLen;
       +
       +        bestoff = 0;
       +        bestlen = 0;
       +        check = thwmaxcheck;
       +        last = 0;
       +        for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
       +                off = now - then;
       +                if(off <= last || off > WhackMaxOff)
       +                        break;
       +
       +                /*
       +                 * don't need to check for the end because
       +                 * 1) s too close check above
       +                 */
       +                t = s - off;
       +                if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
       +                        if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
       +                                t += 3;
       +                                for(s += 3; s < esrc; s++){
       +                                        if(*s != *t)
       +                                                break;
       +                                        t++;
       +                                }
       +                                if(s - *ss > bestlen){
       +                                        bestlen = s - *ss;
       +                                        bestoff = off;
       +                                        if(bestlen > thwmaxcheck)
       +                                                break;
       +                                }
       +                        }
       +                }
       +                s = *ss;
       +                last = off;
       +        }
       +        *ss += bestlen;
       +        return bestoff;
       +}
       +
       +/*
       + * knuth vol. 3 multiplicative hashing
       + * each byte x chosen according to rules
       + * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
       + * with reasonable spread between the bytes & their complements
       + *
       + * the 3 byte value appears to be as almost good as the 4 byte value,
       + * and might be faster on some machines
       + */
       +/*
       +#define hashit(c)        ((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
       +*/
       +#define hashit(c)        (((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
       +
       +/*
       + * lz77 compression with single lookup in a hash table for each block
       + */
       +int
       +whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
       +{
       +        uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
       +        ulong cont, code, wbits;
       +        ushort now;
       +        int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
       +
       +        if(!compressblocks || n < MinMatch)
       +                return -1;
       +
       +        wdst = dst;
       +        wdmax = dst + n;
       +
       +        now = w->begin;
       +        s = src;
       +        w->data = s;
       +
       +        cont = (s[0] << 16) | (s[1] << 8) | s[2];
       +
       +        esrc = s + n;
       +        half = s + (n >> 1);
       +        wnbits = 0;
       +        wbits = 0;
       +        lits = 0;
       +        matches = 0;
       +        offbits = 0;
       +        lenbits = 0;
       +        lithist = ~0;
       +        while(s < esrc){
       +                h = hashit(cont);
       +
       +                sss = s;
       +                toff = whackmatch(w, &sss, esrc, h, now);
       +                ss = sss;
       +
       +                len = ss - s;
       +                for(; wnbits >= 8; wnbits -= 8){
       +                        if(wdst >= wdmax){
       +                                w->begin = now;
       +                                return -1;
       +                        }
       +                        *wdst++ = wbits >> (wnbits - 8);
       +                }
       +                if(len < MinMatch){
       +                        toff = *s;
       +                        lithist = (lithist << 1) | toff < 32 | toff > 127;
       +                        if(lithist & 0x1e){
       +                                wbits = (wbits << 9) | toff;
       +                                wnbits += 9;
       +                        }else if(lithist & 1){
       +                                toff = (toff + 64) & 0xff;
       +                                if(toff < 96){
       +                                        wbits = (wbits << 10) | toff;
       +                                        wnbits += 10;
       +                                }else{
       +                                        wbits = (wbits << 11) | toff;
       +                                        wnbits += 11;
       +                                }
       +                        }else{
       +                                wbits = (wbits << 8) | toff;
       +                                wnbits += 8;
       +                        }
       +                        lits++;
       +
       +                        /*
       +                         * speed hack
       +                         * check for compression progress, bail if none achieved
       +                         */
       +                        if(s > half){
       +                                if(4 * (s - src) < 5 * lits){
       +                                        w->begin = now;
       +                                        return -1;
       +                                }
       +                                half = esrc;
       +                        }
       +
       +                        if(s + MinMatch <= esrc){
       +                                w->next[now & (WhackMaxOff - 1)] = w->hash[h];
       +                                w->hash[h] = now;
       +                                if(s + MinMatch < esrc)
       +                                        cont = (cont << 8) | s[MinMatch];
       +                        }
       +                        now++;
       +                        s++;
       +                        continue;
       +                }
       +
       +                matches++;
       +
       +                /*
       +                 * length of match
       +                 */
       +                if(len > MaxLen){
       +                        len = MaxLen;
       +                        ss = s + len;
       +                }
       +                len -= MinMatch;
       +                if(len < MaxFastLen){
       +                        bits = lentab[len].bits;
       +                        wbits = (wbits << bits) | lentab[len].encode;
       +                        wnbits += bits;
       +                        lenbits += bits;
       +                }else{
       +                        code = BigLenCode;
       +                        bits = BigLenBits;
       +                        use = BigLenBase;
       +                        len -= MaxFastLen;
       +                        while(len >= use){
       +                                len -= use;
       +                                code = (code + use) << 1;
       +                                use <<= (bits & 1) ^ 1;
       +                                bits++;
       +                        }
       +
       +                        wbits = (wbits << bits) | (code + len);
       +                        wnbits += bits;
       +                        lenbits += bits;
       +
       +                        for(; wnbits >= 8; wnbits -= 8){
       +                                if(wdst >= wdmax){
       +                                        w->begin = now;
       +                                        return -1;
       +                                }
       +                                *wdst++ = wbits >> (wnbits - 8);
       +                        }
       +                }
       +
       +                /*
       +                 * offset in history
       +                 */
       +                toff--;
       +                for(bits = MinOffBits; toff >= (1 << bits); bits++)
       +                        ;
       +                if(bits < MaxOffBits-1){
       +                        wbits = (wbits << 3) | (bits - MinOffBits);
       +                        if(bits != MinOffBits)
       +                                bits--;
       +                        wnbits += bits + 3;
       +                        offbits += bits + 3;
       +                }else{
       +                        wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
       +                        bits--;
       +                        wnbits += bits + 4;
       +                        offbits += bits + 4;
       +                }
       +                wbits = (wbits << bits) | toff & ((1 << bits) - 1);
       +
       +                for(; s != ss; s++){
       +                        if(s + MinMatch <= esrc){
       +                                h = hashit(cont);
       +                                w->next[now & (WhackMaxOff - 1)] = w->hash[h];
       +                                w->hash[h] = now;
       +                                if(s + MinMatch < esrc)
       +                                        cont = (cont << 8) | s[MinMatch];
       +                        }
       +                        now++;
       +                }
       +        }
       +
       +        w->begin = now;
       +
       +        stats[StatBytes] += esrc - src;
       +        stats[StatLits] += lits;
       +        stats[StatMatches] += matches;
       +        stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
       +        stats[StatOffBits] += offbits;
       +        stats[StatLenBits] += lenbits;
       +
       +        if(wnbits & 7){
       +                wbits <<= 8 - (wnbits & 7);
       +                wnbits += 8 - (wnbits & 7);
       +        }
       +        for(; wnbits >= 8; wnbits -= 8){
       +                if(wdst >= wdmax)
       +                        return -1;
       +                *wdst++ = wbits >> (wnbits - 8);
       +        }
       +
       +        stats[StatOutBytes] += wdst - dst;
       +
       +        return wdst - dst;
       +}
       +
       +int
       +whackblock(uchar *dst, uchar *src, int ssize)
       +{
       +        Whack w;
       +        ulong stats[MaxStat];
       +        int r;
       +
       +        whackinit(&w, 6);
       +        r = whack(&w, dst, src, ssize, stats);
       +        return r;
       +}
   DIR diff --git a/src/cmd/venti/srv/whack.h b/src/cmd/venti/srv/whack.h
       t@@ -0,0 +1,40 @@
       +typedef struct Whack                Whack;
       +typedef struct Unwhack                Unwhack;
       +
       +enum
       +{
       +        WhackStats        = 8,
       +        WhackErrLen        = 64,                /* max length of error message from thwack or unthwack */
       +        WhackMaxOff        = 16*1024,        /* max allowed offset */
       +
       +        HashLog                = 14,
       +        HashSize        = 1<<HashLog,
       +        HashMask        = HashSize - 1,
       +
       +        MinMatch        = 3,                /* shortest match possible */
       +
       +        MinDecode        = 8,                /* minimum bits to decode a match or lit; >= 8 */
       +
       +        MaxSeqMask        = 8,                /* number of bits in coding block mask */
       +        MaxSeqStart        = 256                /* max offset of initial coding block */
       +};
       +
       +struct Whack
       +{
       +        ushort                begin;                        /* time of first byte in hash */
       +        ushort                hash[HashSize];
       +        ushort                next[WhackMaxOff];
       +        uchar                *data;
       +};
       +
       +struct Unwhack
       +{
       +        char                err[WhackErrLen];
       +};
       +
       +void        whackinit(Whack*, int level);
       +void        unwhackinit(Unwhack*);
       +int        whack(Whack*, uchar *dst, uchar *src, int nsrc, ulong stats[WhackStats]);
       +int        unwhack(Unwhack*, uchar *dst, int ndst, uchar *src, int nsrc);
       +
       +int        whackblock(uchar *dst, uchar *src, int ssize);
   DIR diff --git a/src/cmd/venti/srv/wrarena.c b/src/cmd/venti/srv/wrarena.c
       t@@ -0,0 +1,217 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +QLock godot;
       +char *host;
       +int readonly = 1;        /* for part.c */
       +int mainstacksize = 256*1024;
       +Channel *c;
       +VtConn *z;
       +int fast;        /* and a bit unsafe; only for benchmarking */
       +int haveaoffset;
       +int maxwrites = -1;
       +
       +typedef struct ZClump ZClump;
       +struct ZClump
       +{
       +        ZBlock *lump;
       +        Clump cl;
       +        u64int aa;
       +};
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: wrarena [-h host] arenafile [offset]\n");
       +        threadexitsall("usage");
       +}
       +
       +void
       +vtsendthread(void *v)
       +{
       +        ZClump zcl;
       +
       +        USED(v);
       +        while(recv(c, &zcl) == 1){
       +                if(zcl.lump == nil)
       +                        break;
       +                if(vtwrite(z, zcl.cl.info.score, zcl.cl.info.type, zcl.lump->data, zcl.cl.info.uncsize) < 0)
       +                        sysfatal("failed writing clump %llud: %r", zcl.aa);
       +                freezblock(zcl.lump);
       +        }
       +        /*
       +         * All the send threads try to exit right when
       +         * threadmain is calling threadexitsall.  
       +         * Either libthread or the Linux NPTL pthreads library
       +         * can't handle this condition (I suspect NPTL but have
       +         * not confirmed this) and we get a seg fault in exit.
       +         * I spent a day tracking this down with no success,
       +         * so we're going to work around it instead by just
       +         * sitting here and waiting for the threadexitsall to
       +         * take effect.
       +         */
       +        qlock(&godot);
       +}
       +
       +static void
       +rdarena(Arena *arena, u64int offset)
       +{
       +        u64int a, aa, e;
       +        u32int magic;
       +        Clump cl;
       +        uchar score[VtScoreSize];
       +        ZBlock *lump;
       +        ZClump zcl;
       +
       +        fprint(2, "wrarena: copying %s to venti\n", arena->name);
       +        printarena(2, arena);
       +
       +        a = arena->base;
       +        e = arena->base + arena->size;
       +        if(offset != ~(u64int)0) {
       +                if(offset >= e-a)
       +                        sysfatal("bad offset %llud >= %llud\n",
       +                                offset, e-a);
       +                aa = offset;
       +        } else
       +                aa = 0;
       +
       +        if(maxwrites != 0)
       +        for(; aa < e; aa += ClumpSize+cl.info.size) {
       +                magic = clumpmagic(arena, aa);
       +                if(magic == ClumpFreeMagic)
       +                        break;
       +                if(magic != arena->clumpmagic) {
       +                //        fprint(2, "illegal clump magic number %#8.8ux offset %llud\n",
       +                //                magic, aa);
       +                        break;
       +                }
       +                lump = loadclump(arena, aa, 0, &cl, score, 0);
       +                if(lump == nil) {
       +                        fprint(2, "clump %llud failed to read: %r\n", aa);
       +                        break;
       +                }
       +                if(!fast && cl.info.type != VtCorruptType) {
       +                        scoremem(score, lump->data, cl.info.uncsize);
       +                        if(scorecmp(cl.info.score, score) != 0) {
       +                                fprint(2, "clump %llud has mismatched score\n", aa);
       +                                break;
       +                        }
       +                        if(vttypevalid(cl.info.type) < 0) {
       +                                fprint(2, "clump %llud has bad type %d\n", aa, cl.info.type);
       +                                break;
       +                        }
       +                }
       +                if(z && cl.info.type != VtCorruptType){
       +                        zcl.cl = cl;
       +                        zcl.lump = lump;
       +                        zcl.aa = aa;
       +                        send(c, &zcl);
       +                }else
       +                        freezblock(lump);
       +                if(maxwrites>0 && --maxwrites == 0)
       +                        break;
       +        }
       +        if(haveaoffset)
       +                print("end offset %llud\n", aa);
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        int i;
       +        char *file;
       +        Arena *arena;
       +        u64int offset, aoffset;
       +        Part *part;
       +        Dir *d;
       +        uchar buf[8192];
       +        ArenaHead head;
       +        ZClump zerocl;
       +
       +        qlock(&godot);
       +        aoffset = 0;
       +        ARGBEGIN{
       +        case 'f':
       +                fast = 1;
       +                ventidoublechecksha1 = 0;
       +                break;
       +        case 'h':
       +                host = EARGF(usage());
       +                break;
       +        case 'o':
       +                haveaoffset = 1;
       +                aoffset = strtoull(EARGF(usage()), 0, 0);
       +                break;
       +        case 'M':
       +                maxwrites = atoi(EARGF(usage()));
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        offset = ~(u64int)0;
       +        switch(argc) {
       +        default:
       +                usage();
       +        case 2:
       +                offset = strtoull(argv[1], 0, 0);
       +                /* fall through */
       +        case 1:
       +                file = argv[0];
       +        }
       +
       +        fmtinstall('V', vtscorefmt);
       +
       +        statsinit();
       +
       +        if((d = dirstat(file)) == nil)
       +                sysfatal("can't stat file %s: %r", file);
       +
       +        part = initpart(file, OREAD);
       +        if(part == nil)
       +                sysfatal("can't open file %s: %r", file);
       +        if(readpart(part, aoffset, buf, sizeof buf) < 0)
       +                sysfatal("can't read file %s: %r", file);
       +
       +        if(unpackarenahead(&head, buf) < 0)
       +                sysfatal("corrupted arena header: %r");
       +
       +        if(aoffset+head.size > d->length)
       +                sysfatal("arena is truncated: want %llud bytes have %llud\n",
       +                        head.size, d->length);
       +
       +        partblocksize(part, head.blocksize);
       +        initdcache(8 * MaxDiskBlock);
       +
       +        arena = initarena(part, aoffset, head.size, head.blocksize);
       +        if(arena == nil)
       +                sysfatal("initarena: %r");
       +
       +        if(host && strcmp(host, "/dev/null") != 0){
       +                z = vtdial(host);
       +                if(z == nil)
       +                        sysfatal("could not connect to server: %r");
       +                if(vtconnect(z) < 0)
       +                        sysfatal("vtconnect: %r");
       +        }else
       +                z = nil;
       +        
       +        c = chancreate(sizeof(ZClump), 0);
       +        for(i=0; i<12; i++)
       +                vtproc(vtsendthread, nil);
       +
       +        rdarena(arena, offset);
       +                if(vtsync(z) < 0)
       +                        sysfatal("executing sync: %r");
       +
       +        memset(&zerocl, 0, sizeof zerocl);
       +        for(i=0; i<12; i++)
       +                send(c, &zerocl);
       +        if(z){
       +                vthangup(z);
       +        }
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/srv/xml.c b/src/cmd/venti/srv/xml.c
       t@@ -0,0 +1,68 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +#include "xml.h"
       +
       +void xmlarena(Hio *hout, Arena *s, char *tag, int indent){
       +        xmlindent(hout, indent);
       +        hprint(hout, "<%s", tag);
       +        xmlaname(hout, s->name, "name");
       +        xmlu32int(hout, s->version, "version");
       +        xmlaname(hout, s->part->name, "partition");
       +        xmlu32int(hout, s->blocksize, "blocksize");
       +        xmlu64int(hout, s->base, "start");
       +        xmlu64int(hout, s->base+2*s->blocksize, "stop");
       +        xmlu32int(hout, s->ctime, "created");
       +        xmlu32int(hout, s->wtime, "modified");
       +        xmlsealed(hout, s->memstats.sealed, "sealed");
       +        xmlscore(hout, s->score, "score");
       +        xmlu32int(hout, s->memstats.clumps, "clumps");
       +        xmlu32int(hout, s->memstats.cclumps, "compressedclumps");
       +        xmlu64int(hout, s->memstats.uncsize, "data");
       +        xmlu64int(hout, s->memstats.used - s->memstats.clumps * ClumpSize, "compresseddata");
       +        xmlu64int(hout, s->memstats.used + s->memstats.clumps * ClumpInfoSize, "storage");
       +        hprint(hout, "/>\n");
       +}
       +
       +void xmlindex(Hio *hout, Index *s, char *tag, int indent){
       +        int i;
       +        xmlindent(hout, indent);
       +        hprint(hout, "<%s", tag);
       +        xmlaname(hout, s->name, "name");
       +        xmlu32int(hout, s->version, "version");
       +        xmlu32int(hout, s->blocksize, "blocksize");
       +        xmlu32int(hout, s->tabsize, "tabsize");
       +        xmlu32int(hout, s->buckets, "buckets");
       +        xmlu32int(hout, s->div, "buckdiv");
       +        hprint(hout, ">\n");
       +        xmlindent(hout, indent + 1);
       +        hprint(hout, "<sects>\n");
       +        for(i = 0; i < s->nsects; i++)
       +                xmlamap(hout, &s->smap[i], "sect", indent + 2);
       +        xmlindent(hout, indent + 1);
       +        hprint(hout, "</sects>\n");
       +        xmlindent(hout, indent + 1);
       +        hprint(hout, "<amaps>\n");
       +        for(i = 0; i < s->narenas; i++)
       +                xmlamap(hout, &s->amap[i], "amap", indent + 2);
       +        xmlindent(hout, indent + 1);
       +        hprint(hout, "</amaps>\n");
       +        xmlindent(hout, indent + 1);
       +        hprint(hout, "<arenas>\n");
       +        for(i = 0; i < s->narenas; i++)
       +                xmlarena(hout, s->arenas[i], "arena", indent + 2);
       +        xmlindent(hout, indent + 1);
       +        hprint(hout, "</arenas>\n");
       +        xmlindent(hout, indent);
       +        hprint(hout, "</%s>\n", tag);
       +}
       +
       +void xmlamap(Hio *hout, AMap *s, char *tag, int indent){
       +        xmlindent(hout, indent);
       +        hprint(hout, "<%s", tag);
       +        xmlaname(hout, s->name, "name");
       +        xmlu64int(hout, s->start, "start");
       +        xmlu64int(hout, s->stop, "stop");
       +        hprint(hout, "/>\n");
       +}
       +
   DIR diff --git a/src/cmd/venti/srv/xml.h b/src/cmd/venti/srv/xml.h
       t@@ -0,0 +1,11 @@
       +void        xmlamap(Hio *hout, AMap *v, char *tag, int indent);
       +void        xmlarena(Hio *hout, Arena *v, char *tag, int indent);
       +void        xmlindex(Hio *hout, Index *v, char *tag, int indent);
       +
       +void        xmlaname(Hio *hout, char *v, char *tag);
       +void        xmlscore(Hio *hout, u8int *v, char *tag);
       +void        xmlsealed(Hio *hout, int v, char *tag);
       +void        xmlu32int(Hio *hout, u32int v, char *tag);
       +void        xmlu64int(Hio *hout, u64int v, char *tag);
       +
       +void        xmlindent(Hio *hout, int indent);
   DIR diff --git a/src/cmd/venti/srv/zblock.c b/src/cmd/venti/srv/zblock.c
       t@@ -0,0 +1,93 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +void
       +fmtzbinit(Fmt *f, ZBlock *b)
       +{
       +        f->runes = 0;
       +        f->start = b->data;
       +        f->to = f->start;
       +        f->stop = (char*)f->start + b->len;
       +        f->flush = nil;
       +        f->farg = nil;
       +        f->nfmt = 0;
       +}
       +
       +#define ROUNDUP(p, n) ((void*)(((ulong)(p)+(n)-1)&~(ulong)((n)-1)))
       +
       +static char zmagic[] = "1234567890abcdefghijkl";
       +
       +ZBlock *
       +alloczblock(u32int size, int zeroed, uint blocksize)
       +{
       +        uchar *p, *data;
       +        ZBlock *b;
       +        static ZBlock z;
       +        int n;
       +
       +        if(blocksize == 0)
       +                blocksize = 32;        /* try for cache line alignment */
       +
       +        n = size+32/*XXX*/+sizeof(ZBlock)+blocksize+8;
       +        p = malloc(n);
       +        if(p == nil){
       +                seterr(EOk, "out of memory");
       +                return nil;
       +        }
       +
       +        data = ROUNDUP(p, blocksize);
       +        b = ROUNDUP(data+size+32/*XXX*/, 8);
       +        if(0) fprint(2, "alloc %p-%p data %p-%p b %p-%p\n",
       +                p, p+n, data, data+size, b, b+1);
       +        *b = z;
       +        b->data = data;
       +        b->free = p;
       +        b->len = size;
       +        b->_size = size;
       +        if(zeroed)
       +                memset(b->data, 0, size);
       +        memmove(b->data+size, zmagic, 32/*XXX*/);
       +        return b;
       +}
       +
       +void
       +freezblock(ZBlock *b)
       +{
       +        if(b){
       +                if(memcmp(b->data+b->_size, zmagic, 32) != 0)
       +                        abort();
       +                memset(b->data+b->_size, 0, 32);
       +                free(b->free);
       +        }
       +}
       +
       +ZBlock*
       +packet2zblock(Packet *p, u32int size)
       +{
       +        ZBlock *b;
       +
       +        if(p == nil)
       +                return nil;
       +        b = alloczblock(size, 0, 0);
       +        if(b == nil)
       +                return nil;
       +        if(packetcopy(p, b->data, 0, size) < 0){
       +                freezblock(b);
       +                return nil;
       +        }
       +        return b;
       +}
       +
       +Packet*
       +zblock2packet(ZBlock *zb, u32int size)
       +{
       +        Packet *p;
       +
       +        if(zb == nil)
       +                return nil;
       +        p = packetalloc();
       +        packetappend(p, zb->data, size);
       +        return p;
       +}
       +
   DIR diff --git a/src/cmd/venti/srv/zeropart.c b/src/cmd/venti/srv/zeropart.c
       t@@ -0,0 +1,31 @@
       +#include "stdinc.h"
       +#include "dat.h"
       +#include "fns.h"
       +
       +void
       +zeropart(Part *part, int blocksize)
       +{
       +        ZBlock *b;
       +        u64int addr;
       +        int w;
       +
       +        fprint(2, "clearing the partition\n");
       +//fprint(2, "NOT!\n");
       +//return;
       +//b=alloczblock(MaxIoSize, 1, blocksize);
       +//freezblock(b);
       +        b = alloczblock(MaxIoSize, 1, blocksize);
       +
       +        w = 0;
       +        for(addr = PartBlank; addr + MaxIoSize <= part->size; addr += MaxIoSize){
       +                if(writepart(part, addr, b->data, MaxIoSize) < 0)
       +                        sysfatal("can't initialize %s, writing block %d failed: %r", part->name, w);
       +                w++;
       +        }
       +
       +        for(; addr + blocksize <= part->size; addr += blocksize)
       +                if(writepart(part, addr, b->data, blocksize) < 0)
       +                        sysfatal("can't initialize %s: %r", part->name);
       +
       +        freezblock(b);
       +}
   DIR diff --git a/src/cmd/venti/sync.c b/src/cmd/venti/sync.c
       t@@ -0,0 +1,54 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <thread.h>
       +#include <venti.h>
       +
       +char *host;
       +int donothing;
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: sync [-h host]\n");
       +        threadexitsall("usage");
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        VtConn *z;
       +
       +        fmtinstall('V', vtscorefmt);
       +        fmtinstall('F', vtfcallfmt);
       +        
       +        ARGBEGIN{
       +        case 'h':
       +                host = EARGF(usage());
       +                if(host == nil)
       +                        usage();
       +                break;
       +        case 'x':
       +                donothing = 1;
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc != 0)
       +                usage();
       +
       +        z = vtdial(host);
       +        if(z == nil)
       +                sysfatal("could not connect to server: %r");
       +
       +        if(vtconnect(z) < 0)
       +                sysfatal("vtconnect: %r");
       +
       +        if(!donothing)
       +        if(vtsync(z) < 0)
       +                sysfatal("vtsync: %r");
       +
       +        vthangup(z);
       +        threadexitsall(0);
       +}
   DIR diff --git a/src/cmd/venti/write.c b/src/cmd/venti/write.c
       t@@ -0,0 +1,62 @@
       +#include <u.h>
       +#include <libc.h>
       +#include <venti.h>
       +#include <libsec.h>
       +#include <thread.h>
       +
       +void
       +usage(void)
       +{
       +        fprint(2, "usage: write [-z] [-h host] [-t type] <datablock\n");
       +        threadexitsall("usage");
       +}
       +
       +void
       +threadmain(int argc, char *argv[])
       +{
       +        char *host;
       +        int dotrunc, n, type;
       +        uchar *p, score[VtScoreSize];
       +        VtConn *z;
       +
       +        fmtinstall('F', vtfcallfmt);
       +        fmtinstall('V', vtscorefmt);
       +
       +        host = nil;
       +        dotrunc = 0;
       +        type = VtDataType;
       +        ARGBEGIN{
       +        case 'z':
       +                dotrunc = 1;
       +                break;
       +        case 'h':
       +                host = EARGF(usage());
       +                break;
       +        case 't':
       +                type = atoi(EARGF(usage()));
       +                break;
       +        default:
       +                usage();
       +                break;
       +        }ARGEND
       +
       +        if(argc != 0)
       +                usage();
       +
       +        p = vtmallocz(VtMaxLumpSize+1);
       +        n = readn(0, p, VtMaxLumpSize+1);
       +        if(n > VtMaxLumpSize)
       +                sysfatal("input too big: max block size is %d", VtMaxLumpSize);
       +        z = vtdial(host);
       +        if(z == nil)
       +                sysfatal("could not connect to server: %r");
       +        if(vtconnect(z) < 0)
       +                sysfatal("vtconnect: %r");
       +        if(dotrunc)
       +                n = vtzerotruncate(type, p, n);
       +        if(vtwrite(z, score, type, p, n) < 0)
       +                sysfatal("vtwrite: %r");
       +        vthangup(z);
       +        print("%V\n", score);
       +        threadexitsall(0);
       +}