URI: 
       tfixarenas.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tfixarenas.c (40556B)
       ---
            1 /*
            2  * Check and fix an arena partition.
            3  *
            4  * This is a lot grittier than the rest of Venti because
            5  * it can't just give up if a byte here or there is wrong.
            6  *
            7  * The rule here (hopefully followed!) is that block corruption
            8  * only ever has a local effect -- there are no blocks that you
            9  * can wipe out that will cause large portions of
           10  * uncorrupted data blocks to be useless.
           11  */
           12 
           13 #include "stdinc.h"
           14 #include "dat.h"
           15 #include "fns.h"
           16 #include "whack.h"
           17 
           18 #define ROUNDUP(x,n)                (((x)+(n)-1)&~((n)-1))
           19 
           20 #pragma varargck type "z" uvlong
           21 #pragma varargck type "z" vlong
           22 #pragma varargck type "t" uint
           23 
           24 enum
           25 {
           26         K = 1024,
           27         M = 1024*1024,
           28         G = 1024*1024*1024,
           29 
           30         Block = 4096,
           31 };
           32 
           33 int debugsha1;
           34 
           35 int verbose;
           36 Part *part;
           37 char *file;
           38 char *basename;
           39 char *dumpbase;
           40 int fix;
           41 int badreads;
           42 int unseal;
           43 uchar zero[MaxDiskBlock];
           44 
           45 Arena lastarena;
           46 ArenaPart ap;
           47 uvlong arenasize;
           48 int nbadread;
           49 int nbad;
           50 uvlong partend;
           51 void checkarena(vlong, int);
           52 
           53 void
           54 usage(void)
           55 {
           56         fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file [ranges]\n");
           57         threadexitsall(0);
           58 }
           59 
           60 /*
           61  * Format number in simplest way that is okay with unittoull.
           62  */
           63 static int
           64 zfmt(Fmt *fmt)
           65 {
           66         vlong x;
           67 
           68         x = va_arg(fmt->args, vlong);
           69         if(x == 0)
           70                 return fmtstrcpy(fmt, "0");
           71         if(x%G == 0)
           72                 return fmtprint(fmt, "%lldG", x/G);
           73         if(x%M == 0)
           74                 return fmtprint(fmt, "%lldM", x/M);
           75         if(x%K == 0)
           76                 return fmtprint(fmt, "%lldK", x/K);
           77         return fmtprint(fmt, "%lld", x);
           78 }
           79 
           80 /*
           81  * Format time like ctime without newline.
           82  */
           83 static int
           84 tfmt(Fmt *fmt)
           85 {
           86         uint t;
           87         char buf[30];
           88 
           89         t = va_arg(fmt->args, uint);
           90         strcpy(buf, ctime(t));
           91         buf[28] = 0;
           92         return fmtstrcpy(fmt, buf);
           93 }
           94 
           95 /*
           96  * Coalesce messages about unreadable sectors into larger ranges.
           97  * bad(0, 0) flushes the buffer.
           98  */
           99 static void
          100 bad(char *msg, vlong o, int len)
          101 {
          102         static vlong lb0, lb1;
          103         static char *lmsg;
          104 
          105         if(msg == nil)
          106                 msg = lmsg;
          107         if(o == -1){
          108                 lmsg = nil;
          109                 lb0 = 0;
          110                 lb1 = 0;
          111                 return;
          112         }
          113         if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){
          114                 if(lb0 != lb1)
          115                         print("%s %#llux+%#llux (%,lld+%,lld)\n",
          116                                 lmsg, lb0, lb1-lb0, lb0, lb1-lb0);
          117                 lb0 = o;
          118         }
          119         lmsg = msg;
          120         lb1 = o+len;
          121 }
          122 
          123 /*
          124  * Read in the len bytes of data at the offset.  If can't for whatever reason,
          125  * fill it with garbage but print an error.
          126  */
          127 static uchar*
          128 readdisk(uchar *buf, vlong offset, int len)
          129 {
          130         int i, j, k, n;
          131 
          132         if(offset >= partend){
          133                 memset(buf, 0xFB, len);
          134                 return buf;
          135         }
          136 
          137         if(offset+len > partend){
          138                 memset(buf, 0xFB, len);
          139                 len = partend - offset;
          140         }
          141 
          142         if(readpart(part, offset, buf, len) >= 0)
          143                 return buf;
          144 
          145         /*
          146          * The read failed.  Clear the buffer to nonsense, and
          147          * then try reading in smaller pieces.  If that fails,
          148          * read in even smaller pieces.  And so on down to sectors.
          149          */
          150         memset(buf, 0xFD, len);
          151         for(i=0; i<len; i+=64*K){
          152                 n = 64*K;
          153                 if(i+n > len)
          154                         n = len-i;
          155                 if(readpart(part, offset+i, buf+i, n) >= 0)
          156                         continue;
          157                 for(j=i; j<len && j<i+64*K; j+=4*K){
          158                         n = 4*K;
          159                         if(j+n > len)
          160                                 n = len-j;
          161                         if(readpart(part, offset+j, buf+j, n) >= 0)
          162                                 continue;
          163                         for(k=j; k<len && k<j+4*K; k+=512){
          164                                 if(readpart(part, offset+k, buf+k, 512) >= 0)
          165                                         continue;
          166                                 bad("disk read failed at", k, 512);
          167                                 badreads++;
          168                         }
          169                 }
          170         }
          171         bad(nil, 0, 0);
          172         return buf;
          173 }
          174 
          175 /*
          176  * Buffer to support running SHA1 hash of the disk.
          177  */
          178 typedef struct Shabuf Shabuf;
          179 struct Shabuf
          180 {
          181         int fd;
          182         vlong offset;
          183         DigestState state;
          184         int rollback;
          185         vlong r0;
          186         DigestState *hist;
          187         int nhist;
          188 };
          189 
          190 void
          191 sbdebug(Shabuf *sb, char *file)
          192 {
          193         int fd;
          194 
          195         if(sb->fd > 0){
          196                 close(sb->fd);
          197                 sb->fd = 0;
          198         }
          199         if((fd = create(file, OWRITE, 0666)) < 0)
          200                 return;
          201         if(fd == 0){
          202                 fd = dup(fd, -1);
          203                 close(0);
          204         }
          205         sb->fd = fd;
          206 }
          207 
          208 void
          209 sbupdate(Shabuf *sb, uchar *p, vlong offset, int len)
          210 {
          211         int n, x;
          212         vlong o;
          213 
          214         if(sb->rollback && !sb->hist){
          215                 sb->r0 = offset;
          216                 sb->nhist = 1;
          217                 sb->hist = vtmalloc(sb->nhist*sizeof *sb->hist);
          218                 memset(sb->hist, 0, sizeof sb->hist[0]);
          219         }
          220         if(sb->r0 == 0)
          221                 sb->r0 = offset;
          222 
          223         if(sb->offset < offset || sb->offset >= offset+len){
          224                 if(0) print("sbupdate %p %#llux+%d but offset=%#llux\n",
          225                         p, offset, len, sb->offset);
          226                 return;
          227         }
          228         x = sb->offset - offset;
          229         if(0) print("sbupdate %p %#llux+%d skip %d\n",
          230                 sb, offset, len, x);
          231         if(x){
          232                 p += x;
          233                 offset += x;
          234                 len -= x;
          235         }
          236         assert(sb->offset == offset);
          237 
          238         if(sb->fd > 0)
          239                 pwrite(sb->fd, p, len, offset - sb->r0);
          240 
          241         if(!sb->rollback){
          242                 sha1(p, len, nil, &sb->state);
          243                 sb->offset += len;
          244                 return;
          245         }
          246 
          247         /* save state every 4M so we can roll back quickly */
          248         o = offset - sb->r0;
          249         while(len > 0){
          250                 n = 4*M - o%(4*M);
          251                 if(n > len)
          252                         n = len;
          253                 sha1(p, n, nil, &sb->state);
          254                 sb->offset += n;
          255                 o += n;
          256                 p += n;
          257                 len -= n;
          258                 if(o%(4*M) == 0){
          259                         x = o/(4*M);
          260                         if(x >= sb->nhist){
          261                                 if(x != sb->nhist)
          262                                         print("oops! x=%d nhist=%d\n", x, sb->nhist);
          263                                 sb->nhist += 32;
          264                                 sb->hist = vtrealloc(sb->hist, sb->nhist*sizeof *sb->hist);
          265                         }
          266                         sb->hist[x] = sb->state;
          267                 }
          268         }
          269 }
          270 
          271 void
          272 sbdiskhash(Shabuf *sb, vlong eoffset)
          273 {
          274         static uchar dbuf[4*M];
          275         int n;
          276 
          277         while(sb->offset < eoffset){
          278                 n = sizeof dbuf;
          279                 if(sb->offset+n > eoffset)
          280                         n = eoffset - sb->offset;
          281                 readdisk(dbuf, sb->offset, n);
          282                 sbupdate(sb, dbuf, sb->offset, n);
          283         }
          284 }
          285 
          286 void
          287 sbrollback(Shabuf *sb, vlong offset)
          288 {
          289         int x;
          290         vlong o;
          291         Dir d;
          292 
          293         if(!sb->rollback || !sb->r0){
          294                 print("cannot rollback sha\n");
          295                 return;
          296         }
          297         if(offset >= sb->offset)
          298                 return;
          299         o = offset - sb->r0;
          300         x = o/(4*M);
          301         if(x >= sb->nhist){
          302                 print("cannot rollback sha\n");
          303                 return;
          304         }
          305         sb->state = sb->hist[x];
          306         sb->offset = sb->r0 + x*4*M;
          307         assert(sb->offset <= offset);
          308 
          309         if(sb->fd > 0){
          310                 nulldir(&d);
          311                 d.length = sb->offset - sb->r0;
          312                 dirfwstat(sb->fd, &d);
          313         }
          314 }
          315 
          316 void
          317 sbscore(Shabuf *sb, uchar *score)
          318 {
          319         if(sb->hist){
          320                 free(sb->hist);
          321                 sb->hist = nil;
          322         }
          323         sha1(nil, 0, score, &sb->state);
          324 }
          325 
          326 /*
          327  * If we're fixing arenas, then editing this memory edits the disk!
          328  * It will be written back out as new data is paged in.
          329  */
          330 uchar buf[4*M];
          331 uchar sbuf[4*M];
          332 vlong bufoffset;
          333 int buflen;
          334 
          335 static void pageout(void);
          336 static uchar*
          337 pagein(vlong offset, int len)
          338 {
          339         pageout();
          340         if(offset >= partend){
          341                 memset(buf, 0xFB, sizeof buf);
          342                 return buf;
          343         }
          344 
          345         if(offset+len > partend){
          346                 memset(buf, 0xFB, sizeof buf);
          347                 len = partend - offset;
          348         }
          349         bufoffset = offset;
          350         buflen = len;
          351         readdisk(buf, offset, len);
          352         memmove(sbuf, buf, len);
          353         return buf;
          354 }
          355 
          356 static void
          357 pageout(void)
          358 {
          359         if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){
          360                 buflen = 0;
          361                 return;
          362         }
          363         if(writepart(part, bufoffset, buf, buflen) < 0)
          364                 print("disk write failed at %#llux+%#ux (%,lld+%,d)\n",
          365                         bufoffset, buflen, bufoffset, buflen);
          366         buflen = 0;
          367 }
          368 
          369 static void
          370 zerorange(vlong offset, int len)
          371 {
          372         int i;
          373         vlong ooff;
          374         int olen;
          375         enum { MinBlock = 4*K, MaxBlock = 8*K };
          376 
          377         if(0)
          378         if(bufoffset <= offset && offset+len <= bufoffset+buflen){
          379                 memset(buf+(offset-bufoffset), 0, len);
          380                 return;
          381         }
          382 
          383         ooff = bufoffset;
          384         olen = buflen;
          385 
          386         i = offset%MinBlock;
          387         if(i+len < MaxBlock){
          388                 pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1));
          389                 memset(buf+i, 0, len);
          390         }else{
          391                 pagein(offset-i, MaxBlock);
          392                 memset(buf+i, 0, MaxBlock-i);
          393                 offset += MaxBlock-i;
          394                 len -= MaxBlock-i;
          395                 while(len >= MaxBlock){
          396                         pagein(offset, MaxBlock);
          397                         memset(buf, 0, MaxBlock);
          398                         offset += MaxBlock;
          399                         len -= MaxBlock;
          400                 }
          401                 pagein(offset, (len+MinBlock-1)&~(MinBlock-1));
          402                 memset(buf, 0, len);
          403         }
          404         pagein(ooff, olen);
          405 }
          406 
          407 /*
          408  * read/write integers
          409  *
          410 static void
          411 p16(uchar *p, u16int u)
          412 {
          413         p[0] = (u>>8) & 0xFF;
          414         p[1] = u & 0xFF;
          415 }
          416 */
          417 
          418 static u16int
          419 u16(uchar *p)
          420 {
          421         return (p[0]<<8)|p[1];
          422 }
          423 
          424 static void
          425 p32(uchar *p, u32int u)
          426 {
          427         p[0] = (u>>24) & 0xFF;
          428         p[1] = (u>>16) & 0xFF;
          429         p[2] = (u>>8) & 0xFF;
          430         p[3] = u & 0xFF;
          431 }
          432 
          433 static u32int
          434 u32(uchar *p)
          435 {
          436         return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3];
          437 }
          438 
          439 /*
          440 static void
          441 p64(uchar *p, u64int u)
          442 {
          443         p32(p, u>>32);
          444         p32(p, u);
          445 }
          446 */
          447 
          448 static u64int
          449 u64(uchar *p)
          450 {
          451         return ((u64int)u32(p)<<32) | u32(p+4);
          452 }
          453 
          454 static int
          455 vlongcmp(const void *va, const void *vb)
          456 {
          457         vlong a, b;
          458 
          459         a = *(vlong*)va;
          460         b = *(vlong*)vb;
          461         if(a < b)
          462                 return -1;
          463         if(b > a)
          464                 return 1;
          465         return 0;
          466 }
          467 
          468 /* D and S are in draw.h */
          469 #define D VD
          470 #define S VS
          471 
          472 enum
          473 {
          474         D = 0x10000,
          475         Z = 0x20000,
          476         S = 0x30000,
          477         T = 0x40000,
          478         N = 0xFFFF
          479 };
          480 typedef struct Info Info;
          481 struct Info
          482 {
          483         int len;
          484         char *name;
          485 };
          486 
          487 Info partinfo[] = {
          488         4,        "magic",
          489         D|4,        "version",
          490         Z|4,        "blocksize",
          491         4,        "arenabase",
          492         0
          493 };
          494 
          495 Info headinfo4[] = {
          496         4,        "magic",
          497         D|4,        "version",
          498         S|ANameSize,        "name",
          499         Z|4,        "blocksize",
          500         Z|8,        "size",
          501         0
          502 };
          503 
          504 Info headinfo5[] = {
          505         4,        "magic",
          506         D|4,        "version",
          507         S|ANameSize,        "name",
          508         Z|4,        "blocksize",
          509         Z|8,        "size",
          510         4,        "clumpmagic",
          511         0
          512 };
          513 
          514 Info tailinfo4[] = {
          515         4,        "magic",
          516         D|4,        "version",
          517         S|ANameSize,        "name",
          518         D|4,        "clumps",
          519         D|4,        "cclumps",
          520         T|4,        "ctime",
          521         T|4,        "wtime",
          522         D|8,        "used",
          523         D|8,        "uncsize",
          524         1,        "sealed",
          525         0
          526 };
          527 
          528 Info tailinfo4a[] = {
          529         /* tailinfo 4 */
          530         4,        "magic",
          531         D|4,        "version",
          532         S|ANameSize,        "name",
          533         D|4,        "clumps",
          534         D|4,        "cclumps",
          535         T|4,        "ctime",
          536         T|4,        "wtime",
          537         D|8,        "used",
          538         D|8,        "uncsize",
          539         1,        "sealed",
          540 
          541         /* mem stats */
          542         1,        "extension",
          543         D|4,        "mem.clumps",
          544         D|4,        "mem.cclumps",
          545         D|8,        "mem.used",
          546         D|8,        "mem.uncsize",
          547         1,        "mem.sealed",
          548         0
          549 };
          550 
          551 Info tailinfo5[] = {
          552         4,        "magic",
          553         D|4,        "version",
          554         S|ANameSize,        "name",
          555         D|4,        "clumps",
          556         D|4,        "cclumps",
          557         T|4,        "ctime",
          558         T|4,        "wtime",
          559         4,        "clumpmagic",
          560         D|8,        "used",
          561         D|8,        "uncsize",
          562         1,        "sealed",
          563         0
          564 };
          565 
          566 Info tailinfo5a[] = {
          567         /* tailinfo 5 */
          568         4,        "magic",
          569         D|4,        "version",
          570         S|ANameSize,        "name",
          571         D|4,        "clumps",
          572         D|4,        "cclumps",
          573         T|4,        "ctime",
          574         T|4,        "wtime",
          575         4,        "clumpmagic",
          576         D|8,        "used",
          577         D|8,        "uncsize",
          578         1,        "sealed",
          579 
          580         /* mem stats */
          581         1,        "extension",
          582         D|4,        "mem.clumps",
          583         D|4,        "mem.cclumps",
          584         D|8,        "mem.used",
          585         D|8,        "mem.uncsize",
          586         1,        "mem.sealed",
          587         0
          588 };
          589 
          590 void
          591 showdiffs(uchar *want, uchar *have, int len, Info *info)
          592 {
          593         int n;
          594 
          595         while(len > 0 && (n=info->len&N) > 0){
          596                 if(memcmp(have, want, n) != 0){
          597                         switch(info->len){
          598                         case 1:
          599                                 print("\t%s: correct=%d disk=%d\n",
          600                                         info->name, *want, *have);
          601                                 break;
          602                         case 4:
          603                                 print("\t%s: correct=%#ux disk=%#ux\n",
          604                                         info->name, u32(want), u32(have));
          605                                 break;
          606                         case D|4:
          607                                 print("\t%s: correct=%,ud disk=%,ud\n",
          608                                         info->name, u32(want), u32(have));
          609                                 break;
          610                         case T|4:
          611                                 print("\t%s: correct=%t\n\t\tdisk=%t\n",
          612                                         info->name, u32(want), u32(have));
          613                                 break;
          614                         case Z|4:
          615                                 print("\t%s: correct=%z disk=%z\n",
          616                                         info->name, (uvlong)u32(want), (uvlong)u32(have));
          617                                 break;
          618                         case D|8:
          619                                 print("\t%s: correct=%,lld disk=%,lld\n",
          620                                         info->name, u64(want), u64(have));
          621                                 break;
          622                         case Z|8:
          623                                 print("\t%s: correct=%z disk=%z\n",
          624                                         info->name, u64(want), u64(have));
          625                                 break;
          626                         case S|ANameSize:
          627                                 print("\t%s: correct=%s disk=%.*s\n",
          628                                         info->name, (char*)want,
          629                                         utfnlen((char*)have, ANameSize-1),
          630                                         (char*)have);
          631                                 break;
          632                         default:
          633                                 print("\t%s: correct=%.*H disk=%.*H\n",
          634                                         info->name, n, want, n, have);
          635                                 break;
          636                         }
          637                 }
          638                 have += n;
          639                 want += n;
          640                 len -= n;
          641                 info++;
          642         }
          643         if(len > 0 && memcmp(have, want, len) != 0){
          644                 if(memcmp(want, zero, len) != 0)
          645                         print("!!\textra want data in showdiffs (bug in fixarenas)\n");
          646                 else
          647                         print("\tnon-zero data on disk after structure\n");
          648                 if(verbose > 1){
          649                         print("want: %.*H\n", len, want);
          650                         print("have: %.*H\n", len, have);
          651                 }
          652         }
          653 }
          654 
          655 /*
          656  * Does part begin with an arena?
          657  */
          658 int
          659 isonearena(void)
          660 {
          661         return u32(pagein(0, Block)) == ArenaHeadMagic;
          662 }
          663 
          664 static int tabsizes[] = { 16*1024, 64*1024, 512*1024, 768*1024, };
          665 /*
          666  * Poke around on the disk to guess what the ArenaPart numbers are.
          667  */
          668 void
          669 guessgeometry(void)
          670 {
          671         int i, j, n, bestn, ndiff, nhead, ntail;
          672         uchar *p, *ep, *sp;
          673         u64int diff[100], head[20], tail[20];
          674         u64int offset, bestdiff;
          675 
          676         ap.version = ArenaPartVersion;
          677 
          678         if(arenasize == 0 || ap.blocksize == 0){
          679                 /*
          680                  * The ArenaPart block at offset PartBlank may be corrupt or just wrong.
          681                  * Instead, look for the individual arena headers and tails, which there
          682                  * are many of, and once we've seen enough, infer the spacing.
          683                  *
          684                  * Of course, nothing in the file format requires that arenas be evenly
          685                  * spaced, but fmtarenas always does that for us.
          686                  */
          687                 nhead = 0;
          688                 ntail = 0;
          689                 for(offset=PartBlank; offset<partend; offset+=4*M){
          690                         p = pagein(offset, 4*M);
          691                         for(sp=p, ep=p+4*M; p<ep; p+=K){
          692                                 if(u32(p) == ArenaHeadMagic && nhead < nelem(head)){
          693                                         if(verbose)
          694                                                 print("arena head at %#llx\n", offset+(p-sp));
          695                                         head[nhead++] = offset+(p-sp);
          696                                 }
          697                                 if(u32(p) == ArenaMagic && ntail < nelem(tail)){
          698                                         tail[ntail++] = offset+(p-sp);
          699                                         if(verbose)
          700                                                 print("arena tail at %#llx\n", offset+(p-sp));
          701                                 }
          702                         }
          703                         if(nhead == nelem(head) && ntail == nelem(tail))
          704                                 break;
          705                 }
          706                 if(nhead < 3 && ntail < 3)
          707                         sysfatal("too few intact arenas: %d heads, %d tails", nhead, ntail);
          708 
          709                 /*
          710                  * Arena size is likely the most common
          711                  * inter-head or inter-tail spacing.
          712                  */
          713                 ndiff = 0;
          714                 for(i=1; i<nhead; i++)
          715                         diff[ndiff++] = head[i] - head[i-1];
          716                 for(i=1; i<ntail; i++)
          717                         diff[ndiff++] = tail[i] - tail[i-1];
          718                 qsort(diff, ndiff, sizeof diff[0], vlongcmp);
          719                 bestn = 0;
          720                 bestdiff = 0;
          721                 for(i=1, n=1; i<=ndiff; i++, n++){
          722                         if(i==ndiff || diff[i] != diff[i-1]){
          723                                 if(n > bestn){
          724                                         bestn = n;
          725                                         bestdiff = diff[i-1];
          726                                 }
          727                                 n = 0;
          728                         }
          729                 }
          730                 print("arena size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
          731                 if(arenasize != 0 && arenasize != bestdiff)
          732                         print("using user-specified size %z instead\n", arenasize);
          733                 else
          734                         arenasize = bestdiff;
          735 
          736                 /*
          737                  * The arena tail for an arena is arenasize-blocksize from the head.
          738                  */
          739                 ndiff = 0;
          740                 for(i=j=0; i<nhead && j<ntail; ){
          741                         if(tail[j] < head[i]){
          742                                 j++;
          743                                 continue;
          744                         }
          745                         if(tail[j] < head[i]+arenasize){
          746                                 diff[ndiff++] = head[i]+arenasize - tail[j];
          747                                 j++;
          748                                 continue;
          749                         }
          750                         i++;
          751                 }
          752                 if(ndiff < 3)
          753                         sysfatal("too few intact arenas: %d head, tail pairs", ndiff);
          754                 qsort(diff, ndiff, sizeof diff[0], vlongcmp);
          755                 bestn = 0;
          756                 bestdiff = 0;
          757                 for(i=1, n=1; i<=ndiff; i++, n++){
          758                         if(i==ndiff || diff[i] != diff[i-1]){
          759                                 if(n > bestn){
          760                                         bestn = n;
          761                                         bestdiff = diff[i-1];
          762                                 }
          763                                 n = 0;
          764                         }
          765                 }
          766                 print("block size likely %z (%d of %d)\n", bestdiff, bestn, ndiff);
          767                 if(ap.blocksize != 0 && ap.blocksize != bestdiff)
          768                         print("using user-specified size %z instead\n", (vlong)ap.blocksize);
          769                 else
          770                         ap.blocksize = bestdiff;
          771                 if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1))
          772                         sysfatal("block size not a power of two");
          773                 if(ap.blocksize > MaxDiskBlock)
          774                         sysfatal("block size too big (max=%d)", MaxDiskBlock);
          775 
          776                 /*
          777                  * Use head/tail information to deduce arena base.
          778                  */
          779                 ndiff = 0;
          780                 for(i=0; i<nhead; i++)
          781                         diff[ndiff++] = head[i]%arenasize;
          782                 for(i=0; i<ntail; i++)
          783                         diff[ndiff++] = (tail[i]+ap.blocksize)%arenasize;
          784                 qsort(diff, ndiff, sizeof diff[0], vlongcmp);
          785                 bestn = 0;
          786                 bestdiff = 0;
          787                 for(i=1, n=1; i<=ndiff; i++, n++){
          788                         if(i==ndiff || diff[i] != diff[i-1]){
          789                                 if(n > bestn){
          790                                         bestn = n;
          791                                         bestdiff = diff[i-1];
          792                                 }
          793                                 n = 0;
          794                         }
          795                 }
          796                 ap.arenabase = bestdiff;
          797         }
          798 
          799         ap.tabbase = ROUNDUP(PartBlank+HeadSize, ap.blocksize);
          800         /*
          801          * XXX pick up table, check arenabase.
          802          * XXX pick up table, record base name.
          803          */
          804 
          805         /*
          806          * Somewhat standard computation.
          807          * Fmtarenas used to use 64k tab, now uses 512k tab.
          808          */
          809         if(ap.arenabase == 0){
          810                 print("trying standard arena bases...\n");
          811                 for(i=0; i<nelem(tabsizes); i++){
          812                         ap.arenabase = ROUNDUP(PartBlank+HeadSize+tabsizes[i], ap.blocksize);
          813                         p = pagein(ap.arenabase, Block);
          814                         if(u32(p) == ArenaHeadMagic)
          815                                 break;
          816                 }
          817         }
          818         p = pagein(ap.arenabase, Block);
          819         print("arena base likely %z%s\n", (vlong)ap.arenabase,
          820                 u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : "");
          821 
          822         ap.tabsize = ap.arenabase - ap.tabbase;
          823 }
          824 
          825 /*
          826  * Check the arena partition blocks and then the arenas listed in range.
          827  */
          828 void
          829 checkarenas(char *range)
          830 {
          831         char *s, *t;
          832         int i, lo, hi, narena;
          833         uchar dbuf[HeadSize];
          834         uchar *p;
          835 
          836         guessgeometry();
          837 
          838         partend -= partend%ap.blocksize;
          839 
          840         memset(dbuf, 0, sizeof dbuf);
          841         packarenapart(&ap, dbuf);
          842         p = pagein(PartBlank, Block);
          843         if(memcmp(p, dbuf, HeadSize) != 0){
          844                 print("on-disk arena part superblock incorrect\n");
          845                 showdiffs(dbuf, p, HeadSize, partinfo);
          846         }
          847         memmove(p, dbuf, HeadSize);
          848 
          849         narena = (partend-ap.arenabase + arenasize-1)/arenasize;
          850         if(range == nil){
          851                 for(i=0; i<narena; i++)
          852                         checkarena(ap.arenabase+(vlong)i*arenasize, i);
          853         }else if(strcmp(range, "none") == 0){
          854                 /* nothing */
          855         }else{
          856                 /* parse, e.g., -4,8-9,10- */
          857                 for(s=range; *s; s=t){
          858                         t = strchr(s, ',');
          859                         if(t)
          860                                 *t++ = 0;
          861                         else
          862                                 t = s+strlen(s);
          863                         if(*s == '-')
          864                                 lo = 0;
          865                         else
          866                                 lo = strtol(s, &s, 0);
          867                         hi = lo;
          868                         if(*s == '-'){
          869                                 s++;
          870                                 if(*s == 0)
          871                                         hi = narena-1;
          872                                 else
          873                                         hi = strtol(s, &s, 0);
          874                         }
          875                         if(*s != 0){
          876                                 print("bad arena range: %s\n", s);
          877                                 continue;
          878                         }
          879                         for(i=lo; i<=hi; i++)
          880                                 checkarena(ap.arenabase+(vlong)i*arenasize, i);
          881                 }
          882         }
          883 }
          884 
          885 /*
          886  * Is there a clump here at p?
          887  */
          888 static int
          889 isclump(uchar *p, Clump *cl, u32int *pmagic)
          890 {
          891         int n;
          892         u32int magic;
          893         uchar score[VtScoreSize], *bp;
          894         Unwhack uw;
          895         uchar ubuf[70*1024];
          896 
          897         bp = p;
          898         magic = u32(p);
          899         if(magic == 0)
          900                 return 0;
          901         p += U32Size;
          902 
          903         cl->info.type = vtfromdisktype(*p);
          904         if(cl->info.type == 0xFF)
          905                 return 0;
          906         p++;
          907         cl->info.size = u16(p);
          908         p += U16Size;
          909         cl->info.uncsize = u16(p);
          910         if(cl->info.size > cl->info.uncsize)
          911                 return 0;
          912         p += U16Size;
          913         scorecp(cl->info.score, p);
          914         p += VtScoreSize;
          915         cl->encoding = *p;
          916         p++;
          917         cl->creator = u32(p);
          918         p += U32Size;
          919         cl->time = u32(p);
          920         p += U32Size;
          921 
          922         switch(cl->encoding){
          923         case ClumpENone:
          924                 if(cl->info.size != cl->info.uncsize)
          925                         return 0;
          926                 scoremem(score, p, cl->info.size);
          927                 if(scorecmp(score, cl->info.score) != 0)
          928                         return 0;
          929                 break;
          930         case ClumpECompress:
          931                 if(cl->info.size >= cl->info.uncsize)
          932                         return 0;
          933                 unwhackinit(&uw);
          934                 n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size);
          935                 if(n != cl->info.uncsize)
          936                         return 0;
          937                 scoremem(score, ubuf, cl->info.uncsize);
          938                 if(scorecmp(score, cl->info.score) != 0)
          939                         return 0;
          940                 break;
          941         default:
          942                 return 0;
          943         }
          944         p += cl->info.size;
          945 
          946         /* it all worked out in the end */
          947         *pmagic = magic;
          948         return p - bp;
          949 }
          950 
          951 /*
          952  * All ClumpInfos seen in this arena.
          953  * Kept in binary tree so we can look up by score.
          954  */
          955 typedef struct Cit Cit;
          956 struct Cit
          957 {
          958         int left;
          959         int right;
          960         vlong corrupt;
          961         ClumpInfo ci;
          962 };
          963 Cit *cibuf;
          964 int ciroot;
          965 int ncibuf, mcibuf;
          966 
          967 void
          968 resetcibuf(void)
          969 {
          970         ncibuf = 0;
          971         ciroot = -1;
          972 }
          973 
          974 int*
          975 ltreewalk(int *p, uchar *score)
          976 {
          977         int i;
          978 
          979         for(;;){
          980                 if(*p == -1)
          981                         return p;
          982                 i = scorecmp(cibuf[*p].ci.score, score);
          983                 if(i == 0)
          984                         return p;
          985                 if(i < 0)
          986                         p = &cibuf[*p].right;
          987                 else
          988                         p = &cibuf[*p].left;
          989         }
          990 }
          991 
          992 void
          993 addcibuf(ClumpInfo *ci, vlong corrupt)
          994 {
          995         Cit *cit;
          996 
          997         if(ncibuf == mcibuf){
          998                 mcibuf += 131072;
          999                 cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]);
         1000         }
         1001         cit = &cibuf[ncibuf];
         1002         cit->ci = *ci;
         1003         cit->left = -1;
         1004         cit->right = -1;
         1005         cit->corrupt = corrupt;
         1006         if(!corrupt)
         1007                 *ltreewalk(&ciroot, ci->score) = ncibuf;
         1008         ncibuf++;
         1009 }
         1010 
         1011 void
         1012 addcicorrupt(vlong len)
         1013 {
         1014         static ClumpInfo zci;
         1015 
         1016         addcibuf(&zci, len);
         1017 }
         1018 
         1019 int
         1020 haveclump(uchar *score)
         1021 {
         1022         int i;
         1023         int p;
         1024 
         1025         p = ciroot;
         1026         for(;;){
         1027                 if(p == -1)
         1028                         return 0;
         1029                 i = scorecmp(cibuf[p].ci.score, score);
         1030                 if(i == 0)
         1031                         return 1;
         1032                 if(i < 0)
         1033                         p = cibuf[p].right;
         1034                 else
         1035                         p = cibuf[p].left;
         1036         }
         1037 }
         1038 
         1039 int
         1040 matchci(ClumpInfo *ci, uchar *p)
         1041 {
         1042         if(ci->type != vtfromdisktype(p[0]))
         1043                 return 0;
         1044         if(ci->size != u16(p+1))
         1045                 return 0;
         1046         if(ci->uncsize != u16(p+3))
         1047                 return 0;
         1048         if(scorecmp(ci->score, p+5) != 0)
         1049                 return 0;
         1050         return 1;
         1051 }
         1052 
         1053 int
         1054 sealedarena(uchar *p, int blocksize)
         1055 {
         1056         int v, n;
         1057 
         1058         v = u32(p+4);
         1059         switch(v){
         1060         default:
         1061                 return 0;
         1062         case ArenaVersion4:
         1063                 n = ArenaSize4;
         1064                 break;
         1065         case ArenaVersion5:
         1066                 n = ArenaSize5;
         1067                 break;
         1068         }
         1069         if(p[n-1] != 1){
         1070                 print("arena tail says not sealed\n");
         1071                 return 0;
         1072         }
         1073         if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){
         1074                 print("arena tail followed by non-zero data\n");
         1075                 return 0;
         1076         }
         1077         if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){
         1078                 print("arena score zero\n");
         1079                 return 0;
         1080         }
         1081         return 1;
         1082 }
         1083 
         1084 int
         1085 okayname(char *name, int n)
         1086 {
         1087         char buf[20];
         1088 
         1089         if(nameok(name) < 0)
         1090                 return 0;
         1091         sprint(buf, "%d", n);
         1092         if(n == 0)
         1093                 buf[0] = 0;
         1094         if(strlen(name) < strlen(buf)
         1095         || strcmp(name+strlen(name)-strlen(buf), buf) != 0)
         1096                 return 0;
         1097         return 1;
         1098 }
         1099 
         1100 int
         1101 clumpinfocmp(ClumpInfo *a, ClumpInfo *b)
         1102 {
         1103         if(a->type != b->type)
         1104                 return a->type - b->type;
         1105         if(a->size != b->size)
         1106                 return a->size - b->size;
         1107         if(a->uncsize != b->uncsize)
         1108                 return a->uncsize - b->uncsize;
         1109         return scorecmp(a->score, b->score);
         1110 }
         1111 
         1112 ClumpInfo*
         1113 loadci(vlong offset, Arena *arena, int nci)
         1114 {
         1115         int i, j, per;
         1116         uchar *p, *sp;
         1117         ClumpInfo *bci, *ci;
         1118 
         1119         per = arena->blocksize/ClumpInfoSize;
         1120         bci = vtmalloc(nci*sizeof bci[0]);
         1121         ci = bci;
         1122         offset += arena->size - arena->blocksize;
         1123         p = sp = nil;
         1124         for(i=0; i<nci; i+=per){
         1125                 if(p == sp){
         1126                         sp = pagein(offset-4*M, 4*M);
         1127                         p = sp+4*M;
         1128                 }
         1129                 p -= arena->blocksize;
         1130                 offset -= arena->blocksize;
         1131                 for(j=0; j<per && i+j<nci; j++)
         1132                         unpackclumpinfo(ci++, p+j*ClumpInfoSize);
         1133         }
         1134         return bci;
         1135 }
         1136 
         1137 vlong
         1138 writeci(vlong offset, Arena *arena, ClumpInfo *ci, int nci)
         1139 {
         1140         int i, j, per;
         1141         uchar *p, *sp;
         1142 
         1143         per = arena->blocksize/ClumpInfoSize;
         1144         offset += arena->size - arena->blocksize;
         1145         p = sp = nil;
         1146         for(i=0; i<nci; i+=per){
         1147                 if(p == sp){
         1148                         sp = pagein(offset-4*M, 4*M);
         1149                         p = sp+4*M;
         1150                 }
         1151                 p -= arena->blocksize;
         1152                 offset -= arena->blocksize;
         1153                 memset(p, 0, arena->blocksize);
         1154                 for(j=0; j<per && i+j<nci; j++)
         1155                         packclumpinfo(ci++, p+j*ClumpInfoSize);
         1156         }
         1157         pageout();
         1158         return offset;
         1159 }
         1160 
         1161 void
         1162 loadarenabasics(vlong offset0, int anum, ArenaHead *head, Arena *arena)
         1163 {
         1164         char dname[ANameSize];
         1165         static char lastbase[ANameSize];
         1166         uchar *p;
         1167         Arena oarena;
         1168         ArenaHead ohead;
         1169 
         1170         /*
         1171          * Fmtarenas makes all arenas the same size
         1172          * except the last, which may be smaller.
         1173          * It uses the same block size for arenas as for
         1174          * the arena partition blocks.
         1175          */
         1176         arena->size = arenasize;
         1177         if(offset0+arena->size > partend)
         1178                 arena->size = partend - offset0;
         1179         head->size = arena->size;
         1180 
         1181         arena->blocksize = ap.blocksize;
         1182         head->blocksize = arena->blocksize;
         1183 
         1184         /*
         1185          * Look for clump magic and name in head/tail blocks.
         1186          * All the other info we will reconstruct just in case.
         1187          */
         1188         p = pagein(offset0, arena->blocksize);
         1189         memset(&ohead, 0, sizeof ohead);
         1190         if(unpackarenahead(&ohead, p) >= 0){
         1191                 head->version = ohead.version;
         1192                 head->clumpmagic = ohead.clumpmagic;
         1193                 if(okayname(ohead.name, anum))
         1194                         strcpy(head->name, ohead.name);
         1195         }
         1196 
         1197         p = pagein(offset0+arena->size-arena->blocksize,
         1198                 arena->blocksize);
         1199         memset(&oarena, 0, sizeof oarena);
         1200         if(unpackarena(&oarena, p) >= 0){
         1201                 arena->version = oarena.version;
         1202                 arena->clumpmagic = oarena.clumpmagic;
         1203                 if(okayname(oarena.name, anum))
         1204                         strcpy(arena->name, oarena.name);
         1205                 arena->diskstats.clumps = oarena.diskstats.clumps;
         1206 print("old arena: sealed=%d\n", oarena.diskstats.sealed);
         1207                 arena->diskstats.sealed = oarena.diskstats.sealed;
         1208         }
         1209 
         1210         /* Head trumps arena. */
         1211         if(head->version){
         1212                 arena->version = head->version;
         1213                 arena->clumpmagic = head->clumpmagic;
         1214         }
         1215         if(arena->version == 0)
         1216                 arena->version = ArenaVersion5;
         1217         if(basename){
         1218                 if(anum == -1)
         1219                         snprint(arena->name, ANameSize, "%s", basename);
         1220                 else
         1221                         snprint(arena->name, ANameSize, "%s%d", basename, anum);
         1222         }else if(lastbase[0])
         1223                 snprint(arena->name, ANameSize, "%s%d", lastbase, anum);
         1224         else if(head->name[0])
         1225                 strcpy(arena->name, head->name);
         1226         else if(arena->name[0] == 0)
         1227                 sysfatal("cannot determine base name for arena; use -n");
         1228         strcpy(lastbase, arena->name);
         1229         sprint(dname, "%d", anum);
         1230         lastbase[strlen(lastbase)-strlen(dname)] = 0;
         1231 
         1232         /* Was working in arena, now copy to head. */
         1233         head->version = arena->version;
         1234         memmove(head->name, arena->name, sizeof head->name);
         1235         head->blocksize = arena->blocksize;
         1236         head->size = arena->size;
         1237 }
         1238 
         1239 void
         1240 shahead(Shabuf *sb, vlong offset0, ArenaHead *head)
         1241 {
         1242         uchar headbuf[MaxDiskBlock];
         1243 
         1244         sb->offset = offset0;
         1245         memset(headbuf, 0, sizeof headbuf);
         1246         packarenahead(head, headbuf);
         1247         sbupdate(sb, headbuf, offset0, head->blocksize);
         1248 }
         1249 
         1250 u32int
         1251 newclumpmagic(int version)
         1252 {
         1253         u32int m;
         1254 
         1255         if(version == ArenaVersion4)
         1256                 return _ClumpMagic;
         1257         do{
         1258                 m = fastrand();
         1259         }while(m==0 || m == _ClumpMagic);
         1260         return m;
         1261 }
         1262 
         1263 /*
         1264  * Poke around in the arena to find the clump data
         1265  * and compute the relevant statistics.
         1266  */
         1267 void
         1268 guessarena(vlong offset0, int anum, ArenaHead *head, Arena *arena,
         1269         uchar *oldscore, uchar *score)
         1270 {
         1271         uchar dbuf[MaxDiskBlock];
         1272         int needtozero, clumps, nb1, nb2, minclumps;
         1273         int inbad, n, ncib, printed, sealing, smart;
         1274         u32int magic;
         1275         uchar *sp, *ep, *p;
         1276         vlong boffset, eoffset, lastclumpend, leaked;
         1277         vlong offset, toffset, totalcorrupt, v;
         1278         Clump cl;
         1279         ClumpInfo *bci, *ci, *eci, *xci;
         1280         Cit *bcit, *cit, *ecit;
         1281         Shabuf oldsha, newsha;
         1282 
         1283         /*
         1284          * We expect to find an arena, with data, between offset
         1285          * and offset+arenasize.  With any luck, the data starts at
         1286          * offset+ap.blocksize.  The blocks have variable size and
         1287          * aren't padded at all, which doesn't give us any alignment
         1288          * constraints.  The blocks are compressed or high entropy,
         1289          * but the headers are pretty low entropy (except the score):
         1290          *
         1291          *        type[1] (range 0 thru 9, 13)
         1292          *        size[2]
         1293          *        uncsize[2] (<= size)
         1294          *
         1295          * so we can look for these.  We check the scores as we go,
         1296          * so we can't make any wrong turns.  If we find ourselves
         1297          * in a dead end, scan forward looking for a new start.
         1298          */
         1299 
         1300         resetcibuf();
         1301         memset(head, 0, sizeof *head);
         1302         memset(arena, 0, sizeof *arena);
         1303         memset(oldscore, 0, VtScoreSize);
         1304         memset(score, 0, VtScoreSize);
         1305         memset(&oldsha, 0, sizeof oldsha);
         1306         memset(&newsha, 0, sizeof newsha);
         1307         newsha.rollback = 1;
         1308 
         1309         if(0){
         1310                 sbdebug(&oldsha, "old.sha");
         1311                 sbdebug(&newsha, "new.sha");
         1312         }
         1313 
         1314         loadarenabasics(offset0, anum, head, arena);
         1315 
         1316         /* start the clump hunt */
         1317 
         1318         clumps = 0;
         1319         totalcorrupt = 0;
         1320         sealing = 1;
         1321         boffset = offset0 + arena->blocksize;
         1322         offset = boffset;
         1323         eoffset = offset0+arena->size - arena->blocksize;
         1324         toffset = eoffset;
         1325         sp = pagein(offset0, 4*M);
         1326 
         1327         if(arena->diskstats.sealed){
         1328                 oldsha.offset = offset0;
         1329                 sbupdate(&oldsha, sp, offset0, 4*M);
         1330         }
         1331         ep = sp+4*M;
         1332         p = sp + (boffset - offset0);
         1333         ncib = arena->blocksize / ClumpInfoSize;        /* ci per block in index */
         1334         lastclumpend = offset;
         1335         nbad = 0;
         1336         inbad = 0;
         1337         needtozero = 0;
         1338         minclumps = 0;
         1339         while(offset < eoffset){
         1340                 /*
         1341                  * Shift buffer if we're running out of room.
         1342                  */
         1343                 if(p+70*K >= ep){
         1344                         /*
         1345                          * Start the post SHA1 buffer.   By now we should know the
         1346                          * clumpmagic and arena version, so we can create a
         1347                          * correct head block to get things going.
         1348                          */
         1349                         if(sealing && fix && newsha.offset == 0){
         1350                                 newsha.offset = offset0;
         1351                                 if(arena->clumpmagic == 0){
         1352                                         if(arena->version == 0)
         1353                                                 arena->version = ArenaVersion5;
         1354                                         arena->clumpmagic = newclumpmagic(arena->version);
         1355                                 }
         1356                                 head->clumpmagic = arena->clumpmagic;
         1357                                 shahead(&newsha, offset0, head);
         1358                         }
         1359                         n = 4*M-256*K;
         1360                         if(sealing && fix){
         1361                                 sbdiskhash(&newsha, bufoffset);
         1362                                 sbupdate(&newsha, buf, bufoffset, 4*M-256*K);
         1363                         }
         1364                         pagein(bufoffset+n, 4*M);
         1365                         p -= n;
         1366                         if(arena->diskstats.sealed)
         1367                                 sbupdate(&oldsha, buf, bufoffset, 4*M);
         1368                 }
         1369 
         1370                 /*
         1371                  * Check for a clump at p, which is at offset in the disk.
         1372                  * Duplicate clumps happen in corrupted disks
         1373                  * (the same pattern gets written many times in a row)
         1374                  * and should never happen during regular use.
         1375                  */
         1376                 magic = 0;
         1377                 if((n = isclump(p, &cl, &magic)) > 0){
         1378                         /*
         1379                          * If we were in the middle of some corrupted data,
         1380                          * flush a warning about it and then add any clump
         1381                          * info blocks as necessary.
         1382                          */
         1383                         if(inbad){
         1384                                 inbad = 0;
         1385                                 v = offset-lastclumpend;
         1386                                 if(needtozero){
         1387                                         zerorange(lastclumpend, v);
         1388                                         sbrollback(&newsha, lastclumpend);
         1389                                         print("corrupt clump data - %#llux+%#llux (%,llud bytes)\n",
         1390                                                 lastclumpend, v, v);
         1391                                 }
         1392                                 addcicorrupt(v);
         1393                                 totalcorrupt += v;
         1394                                 nb1 = (minclumps+ncib-1)/ncib;
         1395                                 minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(ClumpSize+VtMaxLumpSize);
         1396                                 nb2 = (minclumps+ncib-1)/ncib;
         1397                                 eoffset -= (nb2-nb1)*arena->blocksize;
         1398                         }
         1399 
         1400                         if(haveclump(cl.info.score))
         1401                                 print("warning: duplicate clump %d %V at %#llux+%#d\n", cl.info.type, cl.info.score, offset, n);
         1402 
         1403                         /*
         1404                          * If clumps use different magic numbers, we don't care.
         1405                          * We'll just use the first one we find and make the others
         1406                          * follow suit.
         1407                          */
         1408                         if(arena->clumpmagic == 0){
         1409                                 print("clump type %d size %d score %V magic %x\n",
         1410                                         cl.info.type, cl.info.size, cl.info.score, magic);
         1411                                 arena->clumpmagic = magic;
         1412                                 if(magic == _ClumpMagic)
         1413                                         arena->version = ArenaVersion4;
         1414                                 else
         1415                                         arena->version = ArenaVersion5;
         1416                         }
         1417                         if(magic != arena->clumpmagic)
         1418                                 p32(p, arena->clumpmagic);
         1419                         if(clumps == 0)
         1420                                 arena->ctime = cl.time;
         1421 
         1422                         /*
         1423                          * Record the clump, update arena stats,
         1424                          * grow clump info blocks if needed.
         1425                          */
         1426                         if(verbose > 1)
         1427                                 print("\tclump %d: %d %V at %#llux+%#ux (%d)\n",
         1428                                         clumps, cl.info.type, cl.info.score, offset, n, n);
         1429                         addcibuf(&cl.info, 0);
         1430                         if(minclumps%ncib == 0)
         1431                                 eoffset -= arena->blocksize;
         1432                         minclumps++;
         1433                         clumps++;
         1434                         if(cl.encoding != ClumpENone)
         1435                                 arena->diskstats.cclumps++;
         1436                         arena->diskstats.uncsize += cl.info.uncsize;
         1437                         arena->wtime = cl.time;
         1438 
         1439                         /*
         1440                          * Move to next clump.
         1441                          */
         1442                         offset += n;
         1443                         p += n;
         1444                         lastclumpend = offset;
         1445                 }else{
         1446                         /*
         1447                          * Overwrite malformed clump data with zeros later.
         1448                          * For now, just record whether it needs to be overwritten.
         1449                          * Bad regions must be of size at least ClumpSize.
         1450                          * Postponing the overwriting keeps us from writing past
         1451                          * the end of the arena data (which might be directory data)
         1452                          * with zeros.
         1453                          */
         1454                         if(!inbad){
         1455                                 inbad = 1;
         1456                                 needtozero = 0;
         1457                                 if(memcmp(p, zero, ClumpSize) != 0)
         1458                                         needtozero = 1;
         1459                                 p += ClumpSize;
         1460                                 offset += ClumpSize;
         1461                                 nbad++;
         1462                         }else{
         1463                                 if(*p != 0)
         1464                                         needtozero = 1;
         1465                                 p++;
         1466                                 offset++;
         1467                         }
         1468                 }
         1469         }
         1470         pageout();
         1471 
         1472         if(verbose)
         1473                 print("readable clumps: %d; min. directory entries: %d\n",
         1474                         clumps, minclumps);
         1475         arena->diskstats.used = lastclumpend - boffset;
         1476         leaked = eoffset - lastclumpend;
         1477         if(verbose)
         1478                 print("used from %#llux to %#llux = %,lld (%,lld unused)\n",
         1479                         boffset, lastclumpend, arena->diskstats.used, leaked);
         1480 
         1481         /*
         1482          * Finish the SHA1 of the old data.
         1483          */
         1484         if(arena->diskstats.sealed){
         1485                 sbdiskhash(&oldsha, toffset);
         1486                 readdisk(dbuf, toffset, arena->blocksize);
         1487                 scorecp(dbuf+arena->blocksize-VtScoreSize, zero);
         1488                 sbupdate(&oldsha, dbuf, toffset, arena->blocksize);
         1489                 sbscore(&oldsha, oldscore);
         1490         }
         1491 
         1492         /*
         1493          * If we still don't know the clump magic, the arena
         1494          * must be empty.  It still needs a value, so make
         1495          * something up.
         1496          */
         1497         if(arena->version == 0)
         1498                 arena->version = ArenaVersion5;
         1499         if(arena->clumpmagic == 0){
         1500                 if(arena->version == ArenaVersion4)
         1501                         arena->clumpmagic = _ClumpMagic;
         1502                 else{
         1503                         do
         1504                                 arena->clumpmagic = fastrand();
         1505                         while(arena->clumpmagic==_ClumpMagic
         1506                                 ||arena->clumpmagic==0);
         1507                 }
         1508                 head->clumpmagic = arena->clumpmagic;
         1509         }
         1510 
         1511         /*
         1512          * Guess at number of clumpinfo blocks to load.
         1513          * If we guess high, it's no big deal.  If we guess low,
         1514          * we'll be forced into rewriting the whole directory.
         1515          * Still not such a big deal.
         1516          */
         1517         if(clumps == 0 || arena->diskstats.used == totalcorrupt)
         1518                 goto Nocib;
         1519         if(clumps < arena->diskstats.clumps)
         1520                 clumps = arena->diskstats.clumps;
         1521         if(clumps < ncibuf)
         1522                 clumps = ncibuf;
         1523         clumps += totalcorrupt/
         1524                 ((arena->diskstats.used - totalcorrupt)/clumps);
         1525         clumps += totalcorrupt/2000;
         1526         if(clumps < minclumps)
         1527                 clumps = minclumps;
         1528         clumps += ncib-1;
         1529         clumps -= clumps%ncib;
         1530 
         1531         /*
         1532          * Can't write into the actual data.
         1533          */
         1534         v = offset0 + arena->size - arena->blocksize;
         1535         v -= (clumps+ncib-1)/ncib * arena->blocksize;
         1536         if(v < lastclumpend){
         1537                 v = offset0 + arena->size - arena->blocksize;
         1538                 clumps = (v-lastclumpend)/arena->blocksize * ncib;
         1539         }
         1540 
         1541         if(clumps < minclumps)
         1542                 print("cannot happen?\n");
         1543 
         1544         /*
         1545          * Check clumpinfo blocks against directory we created.
         1546          * The tricky part is handling the corrupt sections of arena.
         1547          * If possible, we remark just the affected directory entries
         1548          * rather than slide everything down.
         1549          *
         1550          * Allocate clumps+1 blocks and check that we don't need
         1551          * the last one at the end.
         1552          */
         1553         bci = loadci(offset0, arena, clumps+1);
         1554         eci = bci+clumps+1;
         1555         bcit = cibuf;
         1556         ecit = cibuf+ncibuf;
         1557 
         1558         smart = 0;        /* Somehow the smart code doesn't do corrupt clumps right. */
         1559 Again:
         1560         nbad = 0;
         1561         ci = bci;
         1562         for(cit=bcit; cit<ecit && ci<eci; cit++){
         1563                 if(cit->corrupt){
         1564                         vlong n, m;
         1565                         if(smart){
         1566                                 /*
         1567                                  * If we can, just mark existing entries as corrupt.
         1568                                  */
         1569                                 n = cit->corrupt;
         1570                                 for(xci=ci; n>0 && xci<eci; xci++)
         1571                                         n -= ClumpSize+xci->size;
         1572                                 if(n > 0 || xci >= eci)
         1573                                         goto Dumb;
         1574                                 printed = 0;
         1575                                 for(; ci<xci; ci++){
         1576                                         if(verbose && ci->type != VtCorruptType){
         1577                                                 if(!printed){
         1578                                                         print("marking directory %d-%d as corrupt\n",
         1579                                                                 (int)(ci-bci), (int)(xci-bci));
         1580                                                         printed = 1;
         1581                                                 }
         1582                                                 print("\ttype=%d size=%d uncsize=%d score=%V\n",
         1583                                                         ci->type, ci->size, ci->uncsize, ci->score);
         1584                                         }
         1585                                         ci->type = VtCorruptType;
         1586                                 }
         1587                         }else{
         1588                         Dumb:
         1589                                 print("\trewriting clump directory\n");
         1590                                 /*
         1591                                  * Otherwise, blaze a new trail.
         1592                                  */
         1593                                 n = cit->corrupt;
         1594                                 while(n > 0 && ci < eci){
         1595                                         if(n < ClumpSize)
         1596                                                 sysfatal("bad math in clump corrupt");
         1597                                         if(n <= VtMaxLumpSize+ClumpSize)
         1598                                                 m = n;
         1599                                         else{
         1600                                                 m = VtMaxLumpSize+ClumpSize;
         1601                                                 if(n-m < ClumpSize)
         1602                                                         m -= ClumpSize;
         1603                                         }
         1604                                         ci->type = VtCorruptType;
         1605                                         ci->size = m-ClumpSize;
         1606                                         ci->uncsize = m-ClumpSize;
         1607                                         memset(ci->score, 0, VtScoreSize);
         1608                                         ci++;
         1609                                         n -= m;
         1610                                 }
         1611                         }
         1612                         continue;
         1613                 }
         1614                 if(clumpinfocmp(&cit->ci, ci) != 0){
         1615                         if(verbose && (smart || verbose>1)){
         1616                                 print("clumpinfo %d\n", (int)(ci-bci));
         1617                                 print("\twant: %d %d %d %V\n",
         1618                                         cit->ci.type, cit->ci.size,
         1619                                         cit->ci.uncsize, cit->ci.score);
         1620                                 print("\thave: %d %d %d %V\n",
         1621                                         ci->type, ci->size,
         1622                                         ci->uncsize, ci->score);
         1623                         }
         1624                         *ci = cit->ci;
         1625                         nbad++;
         1626                 }
         1627                 ci++;
         1628         }
         1629         if(ci >= eci || cit < ecit){
         1630                 print("ran out of space editing existing directory; rewriting\n");
         1631                 print("# eci %ld ci %ld ecit %ld cit %ld\n", eci-bci, ci-bci, ecit-bcit, cit-bcit);
         1632                 assert(smart);        /* can't happen second time thru */
         1633                 smart = 0;
         1634                 goto Again;
         1635         }
         1636 
         1637         assert(ci <= eci);
         1638         arena->diskstats.clumps = ci-bci;
         1639         eoffset = writeci(offset0, arena, bci, ci-bci);
         1640         if(sealing && fix)
         1641                 sbrollback(&newsha, v);
         1642 print("eoffset=%lld lastclumpend=%lld diff=%lld unseal=%d\n", eoffset, lastclumpend, eoffset-lastclumpend, unseal);
         1643         if(lastclumpend > eoffset)
         1644                 print("arena directory overwrote blocks!  cannot happen!\n");
         1645         free(bci);
         1646         if(smart && nbad)
         1647                 print("arena directory has %d bad or missing entries\n", nbad);
         1648 Nocib:
         1649         if(eoffset - lastclumpend > 64*1024 && (!arena->diskstats.sealed || unseal)){
         1650                 if(arena->diskstats.sealed)
         1651                         print("unsealing arena\n");
         1652                 sealing = 0;
         1653                 memset(oldscore, 0, VtScoreSize);
         1654         }
         1655 
         1656         /*
         1657          * Finish the SHA1 of the new data - only meaningful
         1658          * if we've been writing to disk (`fix').
         1659          */
         1660         arena->diskstats.sealed = sealing;
         1661         arena->memstats = arena->diskstats;
         1662         if(sealing && fix){
         1663                 uchar tbuf[MaxDiskBlock];
         1664 
         1665                 sbdiskhash(&newsha, toffset);
         1666                 memset(tbuf, 0, sizeof tbuf);
         1667                 packarena(arena, tbuf);
         1668                 sbupdate(&newsha, tbuf, toffset, arena->blocksize);
         1669                 sbscore(&newsha, score);
         1670         }
         1671 }
         1672 
         1673 void
         1674 dumparena(vlong offset, int anum, Arena *arena)
         1675 {
         1676         char buf[1000];
         1677         vlong o, e;
         1678         int fd, n;
         1679 
         1680         snprint(buf, sizeof buf, "%s.%d", dumpbase, anum);
         1681         if((fd = create(buf, OWRITE, 0666)) < 0){
         1682                 fprint(2, "create %s: %r\n", buf);
         1683                 return;
         1684         }
         1685         e = offset+arena->size;
         1686         for(o=offset; o<e; o+=n){
         1687                 n = 4*M;
         1688                 if(o+n > e)
         1689                         n = e-o;
         1690                 if(pwrite(fd, pagein(o, n), n, o-offset) != n){
         1691                         fprint(2, "write %s at %#llux: %r\n", buf, o-offset);
         1692                         return;
         1693                 }
         1694         }
         1695 }
         1696 
         1697 void
         1698 checkarena(vlong offset, int anum)
         1699 {
         1700         uchar dbuf[MaxDiskBlock];
         1701         uchar *p, oldscore[VtScoreSize], score[VtScoreSize];
         1702         Arena arena, oarena;
         1703         ArenaHead head;
         1704         Info *fmt, *fmta;
         1705         int sz;
         1706 
         1707         print("# arena %d: offset %#llux\n", anum, offset);
         1708 
         1709         if(offset >= partend){
         1710                 print("arena offset out of bounds\n");
         1711                 return;
         1712         }
         1713 
         1714         guessarena(offset, anum, &head, &arena, oldscore, score);
         1715 
         1716         if(verbose){
         1717                 print("#\tversion=%d name=%s blocksize=%d size=%z",
         1718                         head.version, head.name, head.blocksize, head.size);
         1719                 if(head.clumpmagic)
         1720                         print(" clumpmagic=%#.8ux", head.clumpmagic);
         1721                 print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n",
         1722                         arena.diskstats.clumps, arena.diskstats.cclumps,
         1723                         arena.diskstats.used, arena.diskstats.uncsize);
         1724                 print("#\tctime=%t\n", arena.ctime);
         1725                 print("#\twtime=%t\n", arena.wtime);
         1726                 if(arena.diskstats.sealed)
         1727                         print("#\tsealed score=%V\n", score);
         1728         }
         1729 
         1730         if(dumpbase){
         1731                 dumparena(offset, anum, &arena);
         1732                 return;
         1733         }
         1734 
         1735         memset(dbuf, 0, sizeof dbuf);
         1736         packarenahead(&head, dbuf);
         1737         p = pagein(offset, arena.blocksize);
         1738         if(memcmp(dbuf, p, arena.blocksize) != 0){
         1739                 print("on-disk arena header incorrect\n");
         1740                 showdiffs(dbuf, p, arena.blocksize,
         1741                         arena.version==ArenaVersion4 ? headinfo4 : headinfo5);
         1742         }
         1743         memmove(p, dbuf, arena.blocksize);
         1744 
         1745         memset(dbuf, 0, sizeof dbuf);
         1746         packarena(&arena, dbuf);
         1747         if(arena.diskstats.sealed)
         1748                 scorecp(dbuf+arena.blocksize-VtScoreSize, score);
         1749         p = pagein(offset+arena.size-arena.blocksize, arena.blocksize);
         1750         memset(&oarena, 0, sizeof oarena);
         1751         unpackarena(&oarena, p);
         1752         if(arena.version == ArenaVersion4){
         1753                 sz = ArenaSize4;
         1754                 fmt = tailinfo4;
         1755                 fmta = tailinfo4a;
         1756         }else{
         1757                 sz = ArenaSize5;
         1758                 fmt = tailinfo5;
         1759                 fmta = tailinfo5a;
         1760         }
         1761         if(p[sz] == 1){
         1762                 fmt = fmta;
         1763                 if(oarena.diskstats.sealed){
         1764                         /*
         1765                          * some arenas were sealed with the extension
         1766                          * before we adopted the convention that if it didn't
         1767                          * add new information it gets dropped.
         1768                          */
         1769                         _packarena(&arena, dbuf, 1);
         1770                 }
         1771         }
         1772         if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){
         1773                 print("on-disk arena tail incorrect\n");
         1774                 showdiffs(dbuf, p, arena.blocksize-VtScoreSize, fmt);
         1775         }
         1776         if(arena.diskstats.sealed){
         1777                 if(oarena.diskstats.sealed)
         1778                 if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){
         1779                         print("on-disk arena seal score incorrect\n");
         1780                         print("\tcorrect=%V\n", oldscore);
         1781                         print("\t   disk=%V\n", p+arena.blocksize-VtScoreSize);
         1782                 }
         1783                 if(fix && scorecmp(p+arena.blocksize-VtScoreSize, score) != 0){
         1784                         print("%ssealing arena%s: %V\n",
         1785                                 oarena.diskstats.sealed ? "re" : "",
         1786                                 scorecmp(oldscore, score) == 0 ?
         1787                                         "" : " after changes", score);
         1788                 }
         1789         }
         1790         memmove(p, dbuf, arena.blocksize);
         1791 
         1792         pageout();
         1793 }
         1794 
         1795 AMapN*
         1796 buildamap(void)
         1797 {
         1798         uchar *p;
         1799         vlong o;
         1800         ArenaHead h;
         1801         AMapN *an;
         1802         AMap *m;
         1803 
         1804         an = vtmallocz(sizeof *an);
         1805         for(o=ap.arenabase; o<partend; o+=arenasize){
         1806                 p = pagein(o, Block);
         1807                 if(unpackarenahead(&h, p) >= 0){
         1808                         an->map = vtrealloc(an->map, (an->n+1)*sizeof an->map[0]);
         1809                         m = &an->map[an->n++];
         1810                         m->start = o;
         1811                         m->stop = o+h.size;
         1812                         strcpy(m->name, h.name);
         1813                 }
         1814         }
         1815         return an;
         1816 }
         1817 
         1818 void
         1819 checkmap(void)
         1820 {
         1821         char *s;
         1822         uchar *p;
         1823         int i, len;
         1824         AMapN *an;
         1825         Fmt fmt;
         1826 
         1827         an = buildamap();
         1828         fmtstrinit(&fmt);
         1829         fmtprint(&fmt, "%ud\n", an->n);
         1830         for(i=0; i<an->n; i++)
         1831                 fmtprint(&fmt, "%s\t%lld\t%lld\n",
         1832                         an->map[i].name, an->map[i].start, an->map[i].stop);
         1833         s = fmtstrflush(&fmt);
         1834         len = strlen(s);
         1835         if(len > ap.tabsize){
         1836                 print("arena partition map too long: need %z bytes have %z\n",
         1837                         (vlong)len, (vlong)ap.tabsize);
         1838                 len = ap.tabsize;
         1839         }
         1840 
         1841         if(ap.tabsize >= 4*M){        /* can't happen - max arenas is 2000 */
         1842                 print("arena partition map *way* too long\n");
         1843                 return;
         1844         }
         1845 
         1846         p = pagein(ap.tabbase, ap.tabsize);
         1847         if(memcmp(p, s, len) != 0){
         1848                 print("arena partition map incorrect; rewriting.\n");
         1849                 memmove(p, s, len);
         1850         }
         1851         pageout();
         1852 }
         1853 
         1854 int mainstacksize = 512*1024;
         1855 
         1856 void
         1857 threadmain(int argc, char **argv)
         1858 {
         1859         int mode;
         1860 
         1861         mode = OREAD;
         1862         readonly = 1;
         1863         ARGBEGIN{
         1864         case 'U':
         1865                 unseal = 1;
         1866                 break;
         1867         case 'a':
         1868                 arenasize = unittoull(EARGF(usage()));
         1869                 break;
         1870         case 'b':
         1871                 ap.blocksize = unittoull(EARGF(usage()));
         1872                 break;
         1873         case 'f':
         1874                 fix = 1;
         1875                 mode = ORDWR;
         1876                 readonly = 0;
         1877                 break;
         1878         case 'n':
         1879                 basename = EARGF(usage());
         1880                 break;
         1881         case 'v':
         1882                 verbose++;
         1883                 break;
         1884         case 'x':
         1885                 dumpbase = EARGF(usage());
         1886                 break;
         1887         default:
         1888                 usage();
         1889         }ARGEND
         1890 
         1891         if(argc != 1 && argc != 2)
         1892                 usage();
         1893 
         1894         file = argv[0];
         1895 
         1896         ventifmtinstall();
         1897         fmtinstall('z', zfmt);
         1898         fmtinstall('t', tfmt);
         1899         quotefmtinstall();
         1900 
         1901         part = initpart(file, mode|ODIRECT);
         1902         if(part == nil)
         1903                 sysfatal("can't open %s: %r", file);
         1904         partend = part->size;
         1905 
         1906         if(isonearena()){
         1907                 checkarena(0, -1);
         1908                 threadexitsall(nil);
         1909         }
         1910         checkarenas(argc > 1 ? argv[1] : nil);
         1911         checkmap();
         1912         threadexitsall(nil);
         1913 }