URI: 
       Refactor dedup into multiple programs - dedup - deduplicating backup program
  HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
   DIR commit 1dbc2650dde7d194f7b87e51abc5efeab4b61eb0
   DIR parent 6c3de1be215d063566c45bb412301088abf18add
  HTML Author: sin <sin@2f30.org>
       Date:   Wed, 17 Apr 2019 16:49:05 +0100
       
       Refactor dedup into multiple programs
       
       dinit(1)   - Initialize a repository
       dlist(1)   - List snapshots
       dinfo(1)   - Print information about the repository
       dpack(1)   - Create snapshot from stdin
       dunpack(1) - Extract snapshot to stdout
       dcheck(1)  - Check if repository is consistent
       
       Needs more work to share the code properly.
       
       Diffstat:
         M Makefile                            |      61 ++++++++++++++++++++++++-------
         M README                              |      19 ++++++++++---------
         A dcheck.1                            |      25 +++++++++++++++++++++++++
         A dcheck.c                            |     316 +++++++++++++++++++++++++++++++
         D dedup.1                             |      62 -------------------------------
         D dedup.c                             |     700 -------------------------------
         A dinfo.1                             |      25 +++++++++++++++++++++++++
         A dinfo.c                             |     170 +++++++++++++++++++++++++++++++
         A dinit.1                             |      39 +++++++++++++++++++++++++++++++
         A dinit.c                             |     155 +++++++++++++++++++++++++++++++
         A dlist.1                             |      25 +++++++++++++++++++++++++
         A dlist.c                             |     232 ++++++++++++++++++++++++++++++
         A dpack.1                             |      35 +++++++++++++++++++++++++++++++
         A dpack.c                             |     422 +++++++++++++++++++++++++++++++
         A dunpack.1                           |      28 ++++++++++++++++++++++++++++
         A dunpack.c                           |     288 +++++++++++++++++++++++++++++++
       
       16 files changed, 1818 insertions(+), 784 deletions(-)
       ---
   DIR diff --git a/Makefile b/Makefile
       @@ -3,7 +3,16 @@ include config.mk
        VERSION = 1.0
        PREFIX = /usr/local
        MANPREFIX = $(PREFIX)/man
       -TARGET = dedup
       +TARGET = dcheck dinfo dinit dlist dpack dunpack
       +
       +MAN = \
       +        dcheck.1 \
       +        dinfo.1 \
       +        dinit.1 \
       +        dlist.1 \
       +        dpack.1 \
       +        dunpack.1 \
       +
        HDR = \
                arg.h \
                blake2-impl.h \
       @@ -23,7 +32,12 @@ SRC = \
                compress-none.c \
                compress-snappy.c \
                compress.c \
       -        dedup.c \
       +        dcheck.c \
       +        dinfo.c \
       +        dinit.c \
       +        dlist.c \
       +        dpack.c \
       +        dunpack.c \
                hash-blake2b.c \
                hash-blake2bp.c \
                hash-blake2s.c \
       @@ -35,7 +49,7 @@ SRC = \
                unpack.c \
                utils.c \
        
       -OBJ = \
       +COMMOBJ = \
                blake2b-ref.o \
                blake2bp-ref.o \
                blake2s-ref.o \
       @@ -45,7 +59,6 @@ OBJ = \
                compress-none.o \
                compress-snappy.o \
                compress.o \
       -        dedup.o \
                hash-blake2b.o \
                hash-blake2bp.o \
                hash-blake2s.o \
       @@ -57,14 +70,21 @@ OBJ = \
                unpack.o \
                utils.o \
        
       +DCHECKOBJ = $(COMMOBJ) dcheck.o
       +DINFOOBJ = $(COMMOBJ) dinfo.o
       +DINITOBJ = $(COMMOBJ) dinit.o
       +DLISTOBJ = $(COMMOBJ) dlist.o
       +DPACKOBJ = $(COMMOBJ) dpack.o
       +DUNPACKOBJ = $(COMMOBJ) dunpack.o
       +
        DISTFILES = \
       +        $(MAN) \
                $(SRC) \
                CHANGELOG \
                LICENSE \
                Makefile \
                README \
                config.mk \
       -        dedup.1 \
        
        CFLAGS = -g -O2 -Wall $(OPENMPCFLAGS)
        CPPFLAGS = -I/usr/local/include -D_FILE_OFFSET_BITS=64
       @@ -73,20 +93,20 @@ LDLIBS = -llz4 -lsnappy $(OPENMPLDLIBS)
        
        all: $(TARGET)
        
       -$(OBJ): $(HDR)
       +$(DCHECKOBJ) $(DINFOOBJ) $(DINITOBJ) $(DLISTOBJ) $(DPACKOBJ) $(DUNPACKOBJ): $(HDR)
        
        clean:
       -        rm -f $(OBJ) dedup dedup-$(VERSION).tar.gz
       +        rm -f $(DCHECKOBJ) $(DINFOOBJ) $(DINITOBJ) $(DLISTOBJ) $(DPACKOBJ) $(DUNPACKOBJ) $(TARGET) dedup-$(VERSION).tar.gz
        
        install: all
                mkdir -p $(DESTDIR)$(PREFIX)/bin
       -        cp -f dedup $(DESTDIR)$(PREFIX)/bin
       +        cp -f $(TARGET) $(DESTDIR)$(PREFIX)/bin
                mkdir -p $(DESTDIR)$(MANPREFIX)/man1
       -        cp -f dedup.1 $(DESTDIR)$(MANPREFIX)/man1
       +        cp -f $(MAN) $(DESTDIR)$(MANPREFIX)/man1
        
        uninstall:
       -        rm -f $(DESTDIR)$(PREFIX)/bin/dedup
       -        rm -f $(DESTDIR)$(MANPREFIX)/man1/dedup.1
       +        cd $(DESTDIR)$(PREFIX)/bin && rm -f $(TARGET)
       +        cd $(DESTDIR)$(MANPREFIX)/man1 && rm -f $(MAN)
        
        dist:
                mkdir -p dedup-$(VERSION)
       @@ -102,5 +122,20 @@ dist:
        .c.o:
                $(CC) $(CPPFLAGS) $(CFLAGS) -c $<
        
       -dedup: $(OBJ)
       -        $(CC) -o $@ $(OBJ) $(LDFLAGS) $(LDLIBS)
       +dcheck: $(DCHECKOBJ)
       +        $(CC) -o $@ $(DCHECKOBJ) $(LDFLAGS) $(LDLIBS)
       +
       +dinfo: $(DINFOOBJ)
       +        $(CC) -o $@ $(DINFOOBJ) $(LDFLAGS) $(LDLIBS)
       +
       +dinit: $(DINITOBJ)
       +        $(CC) -o $@ $(DINITOBJ) $(LDFLAGS) $(LDLIBS)
       +
       +dlist: $(DLISTOBJ)
       +        $(CC) -o $@ $(DLISTOBJ) $(LDFLAGS) $(LDLIBS)
       +
       +dpack: $(DPACKOBJ)
       +        $(CC) -o $@ $(DPACKOBJ) $(LDFLAGS) $(LDLIBS)
       +
       +dunpack: $(DUNPACKOBJ)
       +        $(CC) -o $@ $(DUNPACKOBJ) $(LDFLAGS) $(LDLIBS)
   DIR diff --git a/README b/README
       @@ -8,29 +8,30 @@ Getting started
        
        To use dedup you have to first initialize the repository.
        
       -    dedup -r ~/repo -i
       +    dinit repo
       +
       +This will create .{snapshots,store} files in the repo directory.  The
       +store file contains all the unique blocks.  The snapshots file
       +contains all the revisions of files that have been deduplicated.
        
        dedup only handles a single file at a time, so using tar is advised.
       -For example, to dedup a directory tree you can invoke dedup as
       +For example, to dedup a directory tree you can invoke dpack as
        follows:
        
       -    tar -c ~/dir | dedup -r ~/repo -m "$(date)"
       +    tar -c ~/dir | dpack -m "$(date)" repo
        
       -This will create .{snapshots,store} files in the ~/repo directory.
       -The store file contains all the unique blocks.  The snapshots file
       -contains all the revisions of files that have been deduplicated.  The
       --m flag is used to attach an arbitrary message to the snapshot.
       +The -m flag is used to attach an arbitrary message to the snapshot.
        
        To list all known revisions run:
        
       -    dedup -r ~/repo -l
       +    dlist repo
        
        You will get a list of hashes.  Each hash corresponds to a single file
        (in this case, a tar archive).
        
        To extract a file from the deduplicated store run:
        
       -    dedup -r ~/repo -e <hash> > dir.tar
       +    dunpack -e <hash> repo > snapshot.tar
        
        Portability
        ===========
   DIR diff --git a/dcheck.1 b/dcheck.1
       @@ -0,0 +1,25 @@
       +.Dd April 17, 2019
       +.Dt DCHECK 1
       +.Os
       +.Sh NAME
       +.Nm dcheck
       +.Nd Perform consistency checks on a dedup repo
       +.Sh SYNOPSIS
       +.Nm dcheck
       +.Op Fl v
       +.Op repo
       +.Sh DESCRIPTION
       +.Nm
       +performs consistency checks on a dedup repo.
       +If no
       +.Ar repo
       +is specified, then the current directory
       +is assumed to be the repository.
       +.Sh OPTIONS
       +.Bl -tag -width "-v"
       +.It Fl v
       +Enable verbose mode.
       +.El
       +.Sh AUTHORS
       +.An Dimitris Papastamos Aq Mt sin@2f30.org ,
       +.An z3bra Aq Mt contactatz3bradotorg .
   DIR diff --git a/dcheck.c b/dcheck.c
       @@ -0,0 +1,316 @@
       +#include <sys/types.h>
       +#include <sys/stat.h>
       +#include <sys/file.h>
       +
       +#include <err.h>
       +#include <fcntl.h>
       +#include <stdio.h>
       +#include <stdint.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <unistd.h>
       +
       +#include "arg.h"
       +#include "blake2.h"
       +#include "dedup.h"
       +
       +#define SNAPSF ".snapshots"
       +#define STOREF ".store"
       +
       +enum {
       +        WALK_CONTINUE,
       +        WALK_STOP
       +};
       +
       +static struct snap_hdr snap_hdr;
       +static struct blk_hdr blk_hdr;
       +static int ifd;
       +static int sfd;
       +static int hash_algo = HASH_BLAKE2B;
       +static int compr_algo = COMPR_LZ4;
       +
       +int verbose;
       +char *argv0;
       +
       +static void
       +print_md(FILE *fp, uint8_t *md, size_t size)
       +{
       +        size_t i;
       +
       +        for (i = 0; i < size; i++)
       +                fprintf(fp, "%02x", md[i]);
       +}
       +
       +static struct snap *
       +alloc_snap(void)
       +{
       +        struct snap *snap;
       +
       +        snap = calloc(1, sizeof(*snap));
       +        if (snap == NULL)
       +                err(1, "%s", __func__);
       +        return snap;
       +}
       +
       +static void
       +free_snap(struct snap *snap)
       +{
       +        free(snap);
       +}
       +
       +static struct snap *
       +grow_snap(struct snap *snap, uint64_t nr_blk_descs)
       +{
       +        size_t size;
       +
       +        if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0]))
       +                errx(1, "%s: overflow", __func__);
       +        size = nr_blk_descs * sizeof(snap->blk_desc[0]);
       +
       +        if (size > SIZE_MAX - sizeof(*snap))
       +                errx(1, "%s: overflow", __func__);
       +        size += sizeof(*snap);
       +
       +        snap = realloc(snap, size);
       +        if (snap == NULL)
       +                err(1, "%s", __func__);
       +        return snap;
       +}
       +
       +static uint8_t *
       +alloc_buf(size_t size)
       +{
       +        void *p;
       +
       +        p = calloc(1, size);
       +        if (p == NULL)
       +                err(1, "%s", __func__);
       +        return p;
       +}
       +
       +static void
       +free_buf(uint8_t *buf)
       +{
       +        free(buf);
       +}
       +
       +static void
       +hash_blk(uint8_t *buf, size_t size, uint8_t *md)
       +{
       +        struct hash_ctx ctx;
       +
       +        if (hash_init(&ctx, hash_algo, MD_SIZE) < 0)
       +                errx(1, "hash_init failed");
       +        hash_update(&ctx, buf, size);
       +        hash_final(&ctx, md, MD_SIZE);
       +}
       +
       +static void
       +read_blk(uint8_t *buf, struct blk_desc *blk_desc)
       +{
       +        ssize_t n;
       +
       +        xlseek(sfd, blk_desc->offset, SEEK_SET);
       +        n = xread(sfd, buf, blk_desc->size);
       +        if (n == 0)
       +                errx(1, "%s: unexpected EOF", __func__);
       +        if (n != blk_desc->size)
       +                errx(1, "%s: short read", __func__);
       +}
       +
       +/*
       + * Hash every block referenced by the given snapshot
       + * and compare its hash with the one stored in the corresponding
       + * block descriptor.
       + */
       +static int
       +check_snap(struct snap *snap, void *arg)
       +{
       +        struct compr_ctx ctx;
       +        uint8_t *buf;
       +        int *ret = arg;
       +        uint64_t i;
       +
       +        if (verbose > 0) {
       +                fprintf(stderr, "Checking snapshot: ");
       +                print_md(stderr, snap->md, sizeof(snap->md));
       +                fputc('\n', stderr);
       +        }
       +
       +        if (compr_init(&ctx, compr_algo) < 0)
       +                errx(1, "compr_init failed");
       +        buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
       +        for (i = 0; i < snap->nr_blk_descs; i++) {
       +                uint8_t md[MD_SIZE];
       +                struct blk_desc *blk_desc;
       +
       +                blk_desc = &snap->blk_desc[i];
       +                read_blk(buf, blk_desc);
       +                hash_blk(buf, blk_desc->size, md);
       +
       +                if (memcmp(blk_desc->md, md, sizeof(blk_desc->md)) == 0)
       +                        continue;
       +
       +                fprintf(stderr, "Block hash mismatch\n");
       +                fprintf(stderr, "  Expected hash: ");
       +                print_md(stderr, blk_desc->md, sizeof(blk_desc->md));
       +                fputc('\n', stderr);
       +                fprintf(stderr, "  Actual hash: ");
       +                print_md(stderr, md, sizeof(md));
       +                fputc('\n', stderr);
       +                fprintf(stderr, "  Offset: %llu\n",
       +                        (unsigned long long)blk_desc->offset);
       +                fprintf(stderr, "  Size: %llu\n",
       +                        (unsigned long long)blk_desc->size);
       +                *ret = -1;
       +        }
       +        free_buf(buf);
       +        compr_final(&ctx);
       +        return WALK_CONTINUE;
       +}
       +
       +/* Walk through all snapshots and call fn() on each one */
       +static void
       +walk_snap(int (*fn)(struct snap *, void *), void *arg)
       +{
       +        uint64_t i;
       +
       +        xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
       +        for (i = 0; i < snap_hdr.nr_snaps; i++) {
       +                struct snap *snap;
       +                int ret;
       +
       +                snap = alloc_snap();
       +                read_snap(ifd, snap);
       +                snap = grow_snap(snap, snap->nr_blk_descs);
       +                read_snap_descs(ifd, snap);
       +
       +                ret = (*fn)(snap, arg);
       +                free_snap(snap);
       +                if (ret == WALK_STOP)
       +                        break;
       +        }
       +}
       +
       +static void
       +match_ver(uint64_t v)
       +{
       +        uint8_t maj, min;
       +
       +        min = v & VER_MIN_MASK;
       +        maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
       +        if (maj == VER_MAJ && min == VER_MIN)
       +                return;
       +        errx(1, "format version mismatch: expected %u.%u but got %u.%u",
       +             VER_MAJ, VER_MIN, maj, min);
       +}
       +
       +static void
       +load_blk_hdr(void)
       +{
       +        uint64_t v;
       +
       +        xlseek(sfd, 0, SEEK_SET);
       +        read_blk_hdr(sfd, &blk_hdr);
       +        match_ver(blk_hdr.flags);
       +
       +        v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
       +        v &= COMPR_ALGO_MASK;
       +        compr_algo = v;
       +
       +        if (compr_algo < 0 || compr_algo >= NR_COMPRS)
       +                errx(1, "unsupported compression algorithm: %d", compr_algo);
       +
       +        if (verbose > 0)
       +                fprintf(stderr, "Compression algorithm: %s\n",
       +                        compr_type2name(compr_algo));
       +
       +        v = blk_hdr.flags >> HASH_ALGO_SHIFT;
       +        v &= HASH_ALGO_MASK;
       +        hash_algo = v;
       +
       +        if (hash_algo < 0 || hash_algo >= NR_HASHES)
       +                errx(1, "unsupported hash algorithm: %d", hash_algo);
       +
       +        if (verbose > 0)
       +                fprintf(stderr, "Hash algorithm: %s\n",
       +                        hash_type2name(hash_algo));
       +}
       +
       +static void
       +load_snap_hdr(void)
       +{
       +        xlseek(ifd, 0, SEEK_SET);
       +        read_snap_hdr(ifd, &snap_hdr);
       +        match_ver(snap_hdr.flags);
       +}
       +
       +static void
       +init(void)
       +{
       +        ifd = open(SNAPSF, O_RDONLY, 0600);
       +        if (ifd < 0)
       +                err(1, "open %s", SNAPSF);
       +
       +        sfd = open(STOREF, O_RDONLY, 0600);
       +        if (sfd < 0)
       +                err(1, "open %s", STOREF);
       +
       +        if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
       +            flock(sfd, LOCK_NB | LOCK_EX) < 0)
       +                err(1, "flock");
       +
       +        load_snap_hdr();
       +        load_blk_hdr();
       +}
       +
       +static void
       +term(void)
       +{
       +        close(sfd);
       +        close(ifd);
       +}
       +
       +static void
       +usage(void)
       +{
       +        fprintf(stderr, "usage: %s [-v] [repo]\n", argv0);
       +        exit(1);
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        char *repo = NULL;
       +        int ret;
       +
       +        ARGBEGIN {
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +        } ARGEND
       +
       +        switch (argc) {
       +        case 0:
       +                repo = ".";
       +                break;
       +        case 1:
       +                repo = argv[0];
       +                break;
       +        default:
       +                usage();
       +        };
       +
       +        if (chdir(repo) < 0)
       +                err(1, "chdir: %s", repo);
       +
       +        init();
       +        ret = 0;
       +        walk_snap(check_snap, &ret);
       +        if (ret != 0)
       +                errx(1, "%s or %s is corrupted", SNAPSF, STOREF);
       +        term();
       +        return 0;
       +}
   DIR diff --git a/dedup.1 b/dedup.1
       @@ -1,62 +0,0 @@
       -.Dd April 10, 2019
       -.Dt DEDUP 1
       -.Os
       -.Sh NAME
       -.Nm dedup
       -.Nd data deduplication program
       -.Sh SYNOPSIS
       -.Nm dedup
       -.Op Fl cilv
       -.Op Fl H Ar hash
       -.Op Fl Z Ar compressor
       -.Op Fl e Ar id
       -.Op Fl r Ar root
       -.Op Fl m Ar message
       -.Op file
       -.Sh DESCRIPTION
       -.Nm
       -deduplicates data from the specified
       -.Ar file .
       -If no
       -.Ar file
       -is specified then it reads from standard input.
       -.Pp
       -.Nm
       -does not track any file metadata so to deduplicate
       -directory trees, an archival tool like
       -.Xr tar 1
       -should be used and piped into
       -.Nm .
       -.Sh OPTIONS
       -.Bl -tag -width "-Z compressor"
       -.It Fl H Ar hash
       -The cryptographic hash function used to identify
       -unique blocks in the store.
       -The supported hash functions are blake2b, blake2bp, blake2s and blake2sp.
       -This flag only has an effect when initializing the repository.
       -By default blake2b is used.
       -.It Fl Z Ar compressor
       -The compressor function used to compress the blocks
       -in the store.
       -The supported compressor functions are none, lz4 and snappy.
       -This flag only has an effect when initializing the repository.
       -By default lz4 is used.
       -.It Fl c
       -Perform a consistency check on the repository.
       -.It Fl i
       -Initialize the repository.
       -.It Fl l
       -List snapshots.
       -.It Fl v
       -Enable verbose mode.
       -.It Fl e Ar id
       -Extract snapshot with the specified id.
       -.It Fl r Ar root
       -Set the directory where the repository will be created.
       -By default the repository is created in the current directory.
       -.It Fl m Ar message
       -Attach a descriptive message to the snapshot.
       -.El
       -.Sh AUTHORS
       -.An Dimitris Papastamos Aq Mt sin@2f30.org ,
       -.An z3bra Aq Mt contactatz3bradotorg .
   DIR diff --git a/dedup.c b/dedup.c
       @@ -1,700 +0,0 @@
       -#include <sys/types.h>
       -#include <sys/stat.h>
       -#include <sys/file.h>
       -
       -#include <err.h>
       -#include <fcntl.h>
       -#include <stdio.h>
       -#include <stdint.h>
       -#include <stdlib.h>
       -#include <string.h>
       -#include <unistd.h>
       -
       -#include "arg.h"
       -#include "blake2.h"
       -#include "dedup.h"
       -
       -#define SNAPSF ".snapshots"
       -#define STOREF ".store"
       -
       -enum {
       -        WALK_CONTINUE,
       -        WALK_STOP
       -};
       -
       -struct extract_args {
       -        uint8_t *md;
       -        int fd;
       -        int ret;
       -};
       -
       -static struct snap_hdr snap_hdr;
       -static struct blk_hdr blk_hdr;
       -static struct icache *icache;
       -static int ifd;
       -static int sfd;
       -static int hash_algo = HASH_BLAKE2B;
       -static int compr_algo = COMPR_LZ4;
       -
       -int verbose;
       -char *argv0;
       -
       -static void
       -print_md(FILE *fp, uint8_t *md, size_t size)
       -{
       -        size_t i;
       -
       -        for (i = 0; i < size; i++)
       -                fprintf(fp, "%02x", md[i]);
       -}
       -
       -static void
       -print_stats(struct stats *st)
       -{
       -        unsigned long long hits, misses;
       -        double hitratio;
       -
       -        if (st->nr_blks == 0)
       -                return;
       -
       -        fprintf(stderr, "Original size: %llu bytes\n",
       -                (unsigned long long)st->orig_size);
       -        fprintf(stderr, "Compressed size: %llu bytes\n",
       -                (unsigned long long)st->compr_size);
       -        fprintf(stderr, "Deduplicated size: %llu bytes\n",
       -                (unsigned long long)st->dedup_size);
       -        fprintf(stderr, "Deduplication ratio: %.2f\n",
       -                (double)st->orig_size / st->dedup_size);
       -        fprintf(stderr, "Min/avg/max block size: %llu/%llu/%llu bytes\n",
       -                (unsigned long long)st->min_blk_size,
       -                (unsigned long long)st->dedup_size / st->nr_blks,
       -                (unsigned long long)st->max_blk_size);
       -        fprintf(stderr, "Number of unique blocks: %llu\n",
       -                (unsigned long long)st->nr_blks);
       -
       -        icache_stats(icache, &hits, &misses);
       -        if (hits == 0 && misses == 0)
       -                hitratio = 0;
       -        else
       -                hitratio = (double)hits / (hits + misses);
       -
       -        fprintf(stderr, "Index cache hit percentage: %.2f%%\n",
       -                100 * hitratio);
       -}
       -
       -static struct snap *
       -alloc_snap(void)
       -{
       -        struct snap *snap;
       -
       -        snap = calloc(1, sizeof(*snap));
       -        if (snap == NULL)
       -                err(1, "%s", __func__);
       -        return snap;
       -}
       -
       -static void
       -free_snap(struct snap *snap)
       -{
       -        free(snap);
       -}
       -
       -/*
       - * The snapshot hash is calculated over the
       - * hash of its block descriptors.
       - */
       -static void
       -hash_snap(struct snap *snap, uint8_t *md)
       -{
       -        struct hash_ctx ctx;
       -        uint64_t i;
       -
       -        if (hash_init(&ctx, hash_algo, MD_SIZE) < 0)
       -                errx(1, "hash_init failed");
       -        for (i = 0; i < snap->nr_blk_descs; i++) {
       -                struct blk_desc *blk_desc;
       -
       -                blk_desc = &snap->blk_desc[i];
       -                hash_update(&ctx, blk_desc->md, sizeof(blk_desc->md));
       -        }
       -        hash_final(&ctx, md, MD_SIZE);
       -}
       -
       -static struct snap *
       -grow_snap(struct snap *snap, uint64_t nr_blk_descs)
       -{
       -        size_t size;
       -
       -        if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0]))
       -                errx(1, "%s: overflow", __func__);
       -        size = nr_blk_descs * sizeof(snap->blk_desc[0]);
       -
       -        if (size > SIZE_MAX - sizeof(*snap))
       -                errx(1, "%s: overflow", __func__);
       -        size += sizeof(*snap);
       -
       -        snap = realloc(snap, size);
       -        if (snap == NULL)
       -                err(1, "%s", __func__);
       -        return snap;
       -}
       -
       -static void
       -append_snap(struct snap *snap)
       -{
       -        if (snap->nr_blk_descs > UINT64_MAX / BLK_DESC_SIZE)
       -                errx(1, "%s: overflow", __func__);
       -        snap->size = snap->nr_blk_descs * BLK_DESC_SIZE;
       -
       -        if (snap->size > UINT64_MAX - SNAPSHOT_SIZE)
       -                errx(1, "%s: overflow", __func__);
       -        snap->size += SNAPSHOT_SIZE;
       -
       -        xlseek(ifd, snap_hdr.size, SEEK_SET);
       -        write_snap(ifd, snap);
       -        write_snap_blk_descs(ifd, snap);
       -
       -        if (snap_hdr.size > UINT64_MAX - snap->size)
       -                errx(1, "%s: overflow", __func__);
       -        snap_hdr.size += snap->size;
       -
       -        if (snap_hdr.nr_snaps > UINT64_MAX - 1)
       -                errx(1, "%s: overflow", __func__);
       -        snap_hdr.nr_snaps++;
       -}
       -
       -static uint8_t *
       -alloc_buf(size_t size)
       -{
       -        void *p;
       -
       -        p = calloc(1, size);
       -        if (p == NULL)
       -                err(1, "%s", __func__);
       -        return p;
       -}
       -
       -static void
       -free_buf(uint8_t *buf)
       -{
       -        free(buf);
       -}
       -
       -static void
       -hash_blk(uint8_t *buf, size_t size, uint8_t *md)
       -{
       -        struct hash_ctx ctx;
       -
       -        if (hash_init(&ctx, hash_algo, MD_SIZE) < 0)
       -                errx(1, "hash_init failed");
       -        hash_update(&ctx, buf, size);
       -        hash_final(&ctx, md, MD_SIZE);
       -}
       -
       -static void
       -read_blk(uint8_t *buf, struct blk_desc *blk_desc)
       -{
       -        ssize_t n;
       -
       -        xlseek(sfd, blk_desc->offset, SEEK_SET);
       -        n = xread(sfd, buf, blk_desc->size);
       -        if (n == 0)
       -                errx(1, "%s: unexpected EOF", __func__);
       -        if (n != blk_desc->size)
       -                errx(1, "%s: short read", __func__);
       -}
       -
       -static void
       -append_blk(uint8_t *buf, struct blk_desc *blk_desc)
       -{
       -        xlseek(sfd, blk_hdr.size, SEEK_SET);
       -        xwrite(sfd, buf, blk_desc->size);
       -
       -        if (blk_hdr.size > UINT64_MAX - blk_desc->size)
       -                errx(1, "%s: overflow", __func__);
       -        blk_hdr.size += blk_desc->size;
       -}
       -
       -static void
       -dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size)
       -{
       -        uint8_t md[MD_SIZE];
       -        struct blk_desc blk_desc;
       -        struct compr_ctx ctx;
       -        uint8_t *compr_buf;
       -        size_t n, csize;
       -
       -        if (compr_init(&ctx, compr_algo) < 0)
       -                errx(1, "compr_init failed");
       -        csize = compr_size(&ctx, BLKSIZE_MAX);
       -        compr_buf = alloc_buf(csize);
       -
       -        n = compr(&ctx, chunkp, compr_buf, chunk_size, csize);
       -        hash_blk(compr_buf, n, md);
       -
       -        snap_hdr.st.orig_size += chunk_size;
       -        snap_hdr.st.compr_size += n;
       -
       -        memcpy(blk_desc.md, md, sizeof(blk_desc.md));
       -        if (lookup_icache(icache, &blk_desc) < 0) {
       -                blk_desc.offset = blk_hdr.size;
       -                blk_desc.size = n;
       -
       -                snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
       -                append_blk(compr_buf, &blk_desc);
       -
       -                insert_icache(icache, &blk_desc);
       -
       -                snap_hdr.st.dedup_size += blk_desc.size;
       -                snap_hdr.st.nr_blks++;
       -
       -                if (blk_desc.size > snap_hdr.st.max_blk_size)
       -                        snap_hdr.st.max_blk_size = blk_desc.size;
       -                if (blk_desc.size < snap_hdr.st.min_blk_size)
       -                        snap_hdr.st.min_blk_size = blk_desc.size;
       -        } else {
       -                snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
       -        }
       -
       -        free(compr_buf);
       -        compr_final(&ctx);
       -}
       -
       -static void
       -dedup(int fd, char *msg)
       -{
       -        struct snap *snap;
       -        struct chunker *chunker;
       -
       -        snap = alloc_snap();
       -        chunker = alloc_chunker(fd, BLKSIZE_MIN, BLKSIZE_MAX,
       -                                HASHMASK_BITS, WINSIZE);
       -
       -        while (fill_chunker(chunker) > 0) {
       -                uint8_t *chunkp;
       -                size_t chunk_size;
       -
       -                chunkp = get_chunk(chunker, &chunk_size);
       -                snap = grow_snap(snap, snap->nr_blk_descs + 1);
       -                dedup_chunk(snap, chunkp, chunk_size);
       -                drain_chunker(chunker);
       -        }
       -
       -        if (snap->nr_blk_descs > 0) {
       -                if (msg != NULL) {
       -                        size_t size;
       -
       -                        size = strlen(msg) + 1;
       -                        if (size > sizeof(snap->msg))
       -                                size = sizeof(snap->msg);
       -                        memcpy(snap->msg, msg, size);
       -                        snap->msg[size - 1] = '\0';
       -                }
       -                hash_snap(snap, snap->md);
       -                append_snap(snap);
       -        }
       -
       -        free_chunker(chunker);
       -        free_snap(snap);
       -}
       -
       -static int
       -extract(struct snap *snap, void *arg)
       -{
       -        uint8_t *buf[2];
       -        struct extract_args *args = arg;
       -        struct compr_ctx ctx;
       -        uint64_t i;
       -
       -        if (memcmp(snap->md, args->md, sizeof(snap->md)) != 0)
       -                return WALK_CONTINUE;
       -
       -        if (compr_init(&ctx, compr_algo) < 0)
       -                errx(1, "compr_init failed");
       -        buf[0] = alloc_buf(BLKSIZE_MAX);
       -        buf[1] = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
       -        for (i = 0; i < snap->nr_blk_descs; i++) {
       -                struct blk_desc *blk_desc;
       -                size_t blksize;
       -
       -                blk_desc = &snap->blk_desc[i];
       -                read_blk(buf[1], blk_desc);
       -                blksize = decompr(&ctx, buf[1], buf[0], blk_desc->size, BLKSIZE_MAX);
       -                xwrite(args->fd, buf[0], blksize);
       -        }
       -        free_buf(buf[1]);
       -        free_buf(buf[0]);
       -        compr_final(&ctx);
       -        args->ret = 0;
       -        return WALK_STOP;
       -}
       -
       -/*
       - * Hash every block referenced by the given snapshot
       - * and compare its hash with the one stored in the corresponding
       - * block descriptor.
       - */
       -static int
       -check_snap(struct snap *snap, void *arg)
       -{
       -        struct compr_ctx ctx;
       -        uint8_t *buf;
       -        int *ret = arg;
       -        uint64_t i;
       -
       -        if (verbose > 0) {
       -                fprintf(stderr, "Checking snapshot: ");
       -                print_md(stderr, snap->md, sizeof(snap->md));
       -                fputc('\n', stderr);
       -        }
       -
       -        if (compr_init(&ctx, compr_algo) < 0)
       -                errx(1, "compr_init failed");
       -        buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
       -        for (i = 0; i < snap->nr_blk_descs; i++) {
       -                uint8_t md[MD_SIZE];
       -                struct blk_desc *blk_desc;
       -
       -                blk_desc = &snap->blk_desc[i];
       -                read_blk(buf, blk_desc);
       -                hash_blk(buf, blk_desc->size, md);
       -
       -                if (memcmp(blk_desc->md, md, sizeof(blk_desc->md)) == 0)
       -                        continue;
       -
       -                fprintf(stderr, "Block hash mismatch\n");
       -                fprintf(stderr, "  Expected hash: ");
       -                print_md(stderr, blk_desc->md, sizeof(blk_desc->md));
       -                fputc('\n', stderr);
       -                fprintf(stderr, "  Actual hash: ");
       -                print_md(stderr, md, sizeof(md));
       -                fputc('\n', stderr);
       -                fprintf(stderr, "  Offset: %llu\n",
       -                        (unsigned long long)blk_desc->offset);
       -                fprintf(stderr, "  Size: %llu\n",
       -                        (unsigned long long)blk_desc->size);
       -                *ret = -1;
       -        }
       -        free_buf(buf);
       -        compr_final(&ctx);
       -        return WALK_CONTINUE;
       -}
       -
       -static int
       -build_icache(struct snap *snap, void *arg)
       -{
       -        struct compr_ctx ctx;
       -        uint8_t *buf;
       -        uint64_t i;
       -
       -        if (compr_init(&ctx, compr_algo) < 0)
       -                errx(1, "compr_init failed");
       -        buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
       -        for (i = 0; i < snap->nr_blk_descs; i++) {
       -                struct blk_desc *blk_desc;
       -
       -                blk_desc = &snap->blk_desc[i];
       -                insert_icache(icache, blk_desc);
       -        }
       -        free(buf);
       -        compr_final(&ctx);
       -        return WALK_CONTINUE;
       -}
       -
       -static int
       -list(struct snap *snap, void *arg)
       -{
       -        print_md(stdout, snap->md, sizeof(snap->md));
       -        if (snap->msg[0] != '\0')
       -                printf("\t%s\n", snap->msg);
       -        else
       -                putchar('\n');
       -        return WALK_CONTINUE;
       -}
       -
       -/* Walk through all snapshots and call fn() on each one */
       -static void
       -walk_snap(int (*fn)(struct snap *, void *), void *arg)
       -{
       -        uint64_t i;
       -
       -        xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
       -        for (i = 0; i < snap_hdr.nr_snaps; i++) {
       -                struct snap *snap;
       -                int ret;
       -
       -                snap = alloc_snap();
       -                read_snap(ifd, snap);
       -                snap = grow_snap(snap, snap->nr_blk_descs);
       -                read_snap_descs(ifd, snap);
       -
       -                ret = (*fn)(snap, arg);
       -                free_snap(snap);
       -                if (ret == WALK_STOP)
       -                        break;
       -        }
       -}
       -
       -static void
       -match_ver(uint64_t v)
       -{
       -        uint8_t maj, min;
       -
       -        min = v & VER_MIN_MASK;
       -        maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
       -        if (maj == VER_MAJ && min == VER_MIN)
       -                return;
       -        errx(1, "format version mismatch: expected %u.%u but got %u.%u",
       -             VER_MAJ, VER_MIN, maj, min);
       -}
       -
       -static void
       -init_blk_hdr(void)
       -{
       -        blk_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
       -        blk_hdr.flags |= compr_algo << COMPR_ALGO_SHIFT;
       -        blk_hdr.flags |= hash_algo << HASH_ALGO_SHIFT;
       -        blk_hdr.size = BLK_HDR_SIZE;
       -}
       -
       -static void
       -load_blk_hdr(void)
       -{
       -        uint64_t v;
       -
       -        xlseek(sfd, 0, SEEK_SET);
       -        read_blk_hdr(sfd, &blk_hdr);
       -        match_ver(blk_hdr.flags);
       -
       -        v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
       -        v &= COMPR_ALGO_MASK;
       -        compr_algo = v;
       -
       -        if (compr_algo < 0 || compr_algo >= NR_COMPRS)
       -                errx(1, "unsupported compression algorithm: %d", compr_algo);
       -
       -        if (verbose > 0)
       -                fprintf(stderr, "Compression algorithm: %s\n",
       -                        compr_type2name(compr_algo));
       -
       -        v = blk_hdr.flags >> HASH_ALGO_SHIFT;
       -        v &= HASH_ALGO_MASK;
       -        hash_algo = v;
       -
       -        if (hash_algo < 0 || hash_algo >= NR_HASHES)
       -                errx(1, "unsupported hash algorithm: %d", hash_algo);
       -
       -        if (verbose > 0)
       -                fprintf(stderr, "Hash algorithm: %s\n",
       -                        hash_type2name(hash_algo));
       -}
       -
       -static void
       -save_blk_hdr(void)
       -{
       -        xlseek(sfd, 0, SEEK_SET);
       -        write_blk_hdr(sfd, &blk_hdr);
       -}
       -
       -static void
       -init_snap_hdr(void)
       -{
       -        snap_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
       -        snap_hdr.size = SNAP_HDR_SIZE;
       -        snap_hdr.st.min_blk_size = UINT64_MAX;
       -}
       -
       -static void
       -load_snap_hdr(void)
       -{
       -        xlseek(ifd, 0, SEEK_SET);
       -        read_snap_hdr(ifd, &snap_hdr);
       -        match_ver(snap_hdr.flags);
       -}
       -
       -static void
       -save_snap_hdr(void)
       -{
       -        xlseek(ifd, 0, SEEK_SET);
       -        write_snap_hdr(ifd, &snap_hdr);
       -}
       -
       -static void
       -init(int iflag)
       -{
       -        int flags;
       -
       -        flags = O_RDWR;
       -        if (iflag)
       -                flags |= O_CREAT | O_EXCL;
       -
       -        ifd = open(SNAPSF, flags, 0600);
       -        if (ifd < 0)
       -                err(1, "open %s", SNAPSF);
       -
       -        sfd = open(STOREF, flags, 0600);
       -        if (sfd < 0)
       -                err(1, "open %s", STOREF);
       -
       -        if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
       -            flock(sfd, LOCK_NB | LOCK_EX) < 0)
       -                err(1, "flock");
       -
       -        if (iflag) {
       -                init_snap_hdr();
       -                init_blk_hdr();
       -        } else {
       -                load_snap_hdr();
       -                load_blk_hdr();
       -        }
       -
       -        icache = alloc_icache();
       -        walk_snap(build_icache, NULL);
       -}
       -
       -static void
       -term(void)
       -{
       -        if (verbose > 0)
       -                print_stats(&snap_hdr.st);
       -
       -        free_icache(icache);
       -
       -        save_blk_hdr();
       -        save_snap_hdr();
       -
       -        fsync(sfd);
       -        fsync(ifd);
       -
       -        close(sfd);
       -        close(ifd);
       -}
       -
       -static void
       -usage(void)
       -{
       -        fprintf(stderr, "usage: %s [-cilv] [-H hash] [-Z compressor] [-e id] [-r root] [-m message] [file]\n", argv0);
       -        exit(1);
       -}
       -
       -int
       -main(int argc, char *argv[])
       -{
       -        uint8_t md[MD_SIZE];
       -        char *id = NULL, *root = NULL, *msg = NULL, *hash_name, *compr_name;
       -        int iflag = 0, lflag = 0, cflag = 0;
       -        int fd = -1;
       -
       -        ARGBEGIN {
       -        case 'H':
       -                hash_name = EARGF(usage());
       -                if (strcmp(hash_name, "?") == 0) {
       -                        hash_list(STDERR_FILENO);
       -                        return 0;
       -                }
       -                hash_algo = hash_name2type(hash_name);
       -                if (hash_algo < 0)
       -                        errx(1, "unknown hash: %s", hash_name);
       -                break;
       -        case 'Z':
       -                compr_name = EARGF(usage());
       -                if (strcmp(compr_name, "?") == 0) {
       -                        compr_list(STDERR_FILENO);
       -                        return 0;
       -                }
       -                compr_algo = compr_name2type(compr_name);
       -                if (compr_algo < 0)
       -                        errx(1, "unknown compressor: %s", compr_name);
       -                break;
       -        case 'c':
       -                cflag = 1;
       -                break;
       -        case 'e':
       -                id = EARGF(usage());
       -                break;
       -        case 'i':
       -                iflag = 1;
       -                break;
       -        case 'l':
       -                lflag = 1;
       -                break;
       -        case 'r':
       -                root = EARGF(usage());
       -                break;
       -        case 'm':
       -                msg = EARGF(usage());
       -                break;
       -        case 'v':
       -                verbose++;
       -                break;
       -        default:
       -                usage();
       -        } ARGEND
       -
       -        if (argc > 1) {
       -                usage();
       -        } else if (argc == 1) {
       -                if (id) {
       -                        fd = open(argv[0], O_RDWR | O_CREAT, 0600);
       -                        if (fd < 0)
       -                                err(1, "open %s", argv[0]);
       -                } else {
       -                        fd = open(argv[0], O_RDONLY);
       -                        if (fd < 0)
       -                                err(1, "open %s", argv[0]);
       -                }
       -        } else {
       -                if (id)
       -                        fd = STDOUT_FILENO;
       -                else
       -                        fd = STDIN_FILENO;
       -        }
       -
       -        if (root != NULL) {
       -                mkdir(root, 0700);
       -                if (chdir(root) < 0)
       -                        err(1, "chdir: %s", root);
       -        }
       -
       -        init(iflag);
       -
       -        if (iflag) {
       -                term();
       -                return 0;
       -        }
       -
       -        if (cflag) {
       -                int ret;
       -
       -                ret = 0;
       -                walk_snap(check_snap, &ret);
       -                if (ret != 0)
       -                        errx(1, "%s or %s is corrupted", SNAPSF, STOREF);
       -
       -                term();
       -                return 0;
       -        }
       -
       -        if (lflag) {
       -                walk_snap(list, NULL);
       -                term();
       -                return 0;
       -        }
       -
       -        if (id) {
       -                struct extract_args args;
       -
       -                str2bin(id, md);
       -                args.md = md;
       -                args.fd = fd;
       -                args.ret = -1;
       -                walk_snap(extract, &args);
       -                if (args.ret != 0)
       -                        errx(1, "unknown snapshot: %s", id);
       -        } else {
       -                dedup(fd, msg);
       -        }
       -
       -        term();
       -        return 0;
       -}
   DIR diff --git a/dinfo.1 b/dinfo.1
       @@ -0,0 +1,25 @@
       +.Dd April 17, 2019
       +.Dt DINFO 1
       +.Os
       +.Sh NAME
       +.Nm dinfo
       +.Nd Print information about a dedup repository
       +.Sh SYNOPSIS
       +.Nm dinfo
       +.Op Fl v
       +.Op repo
       +.Sh DESCRIPTION
       +.Nm
       +prints information about a dedup repository.
       +If no
       +.Ar repo
       +is specified, then the current directory
       +is assumed to be the repository.
       +.Sh OPTIONS
       +.Bl -tag -width "-v"
       +.It Fl v
       +Enable verbose mode.
       +.El
       +.Sh AUTHORS
       +.An Dimitris Papastamos Aq Mt sin@2f30.org ,
       +.An z3bra Aq Mt contactatz3bradotorg .
   DIR diff --git a/dinfo.c b/dinfo.c
       @@ -0,0 +1,170 @@
       +#include <sys/types.h>
       +#include <sys/stat.h>
       +#include <sys/file.h>
       +
       +#include <err.h>
       +#include <fcntl.h>
       +#include <stdio.h>
       +#include <stdint.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <unistd.h>
       +
       +#include "arg.h"
       +#include "blake2.h"
       +#include "dedup.h"
       +
       +#define SNAPSF ".snapshots"
       +#define STOREF ".store"
       +
       +static struct snap_hdr snap_hdr;
       +static struct blk_hdr blk_hdr;
       +static int ifd;
       +static int sfd;
       +static int hash_algo = HASH_BLAKE2B;
       +static int compr_algo = COMPR_LZ4;
       +
       +int verbose;
       +char *argv0;
       +
       +static void
       +print_info(struct stats *st)
       +{
       +        if (st->nr_blks == 0)
       +                return;
       +
       +        fprintf(stderr, "Original size: %llu bytes\n",
       +                (unsigned long long)st->orig_size);
       +        fprintf(stderr, "Compressed size: %llu bytes\n",
       +                (unsigned long long)st->compr_size);
       +        fprintf(stderr, "Deduplicated size: %llu bytes\n",
       +                (unsigned long long)st->dedup_size);
       +        fprintf(stderr, "Deduplication ratio: %.2f\n",
       +                (double)st->orig_size / st->dedup_size);
       +        fprintf(stderr, "Min/avg/max block size: %llu/%llu/%llu bytes\n",
       +                (unsigned long long)st->min_blk_size,
       +                (unsigned long long)st->dedup_size / st->nr_blks,
       +                (unsigned long long)st->max_blk_size);
       +        fprintf(stderr, "Number of unique blocks: %llu\n",
       +                (unsigned long long)st->nr_blks);
       +}
       +
       +static void
       +match_ver(uint64_t v)
       +{
       +        uint8_t maj, min;
       +
       +        min = v & VER_MIN_MASK;
       +        maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
       +        if (maj == VER_MAJ && min == VER_MIN)
       +                return;
       +        errx(1, "format version mismatch: expected %u.%u but got %u.%u",
       +             VER_MAJ, VER_MIN, maj, min);
       +}
       +
       +static void
       +load_blk_hdr(void)
       +{
       +        uint64_t v;
       +
       +        xlseek(sfd, 0, SEEK_SET);
       +        read_blk_hdr(sfd, &blk_hdr);
       +        match_ver(blk_hdr.flags);
       +
       +        v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
       +        v &= COMPR_ALGO_MASK;
       +        compr_algo = v;
       +
       +        if (compr_algo < 0 || compr_algo >= NR_COMPRS)
       +                errx(1, "unsupported compression algorithm: %d", compr_algo);
       +
       +        if (verbose > 0)
       +                fprintf(stderr, "Compression algorithm: %s\n",
       +                        compr_type2name(compr_algo));
       +
       +        v = blk_hdr.flags >> HASH_ALGO_SHIFT;
       +        v &= HASH_ALGO_MASK;
       +        hash_algo = v;
       +
       +        if (hash_algo < 0 || hash_algo >= NR_HASHES)
       +                errx(1, "unsupported hash algorithm: %d", hash_algo);
       +
       +        if (verbose > 0)
       +                fprintf(stderr, "Hash algorithm: %s\n",
       +                        hash_type2name(hash_algo));
       +}
       +
       +static void
       +load_snap_hdr(void)
       +{
       +        xlseek(ifd, 0, SEEK_SET);
       +        read_snap_hdr(ifd, &snap_hdr);
       +        match_ver(snap_hdr.flags);
       +}
       +
       +static void
       +init(void)
       +{
       +        ifd = open(SNAPSF, O_RDONLY, 0600);
       +        if (ifd < 0)
       +                err(1, "open %s", SNAPSF);
       +
       +        sfd = open(STOREF, O_RDONLY, 0600);
       +        if (sfd < 0)
       +                err(1, "open %s", STOREF);
       +
       +        if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
       +            flock(sfd, LOCK_NB | LOCK_EX) < 0)
       +                err(1, "flock");
       +
       +        load_snap_hdr();
       +        load_blk_hdr();
       +}
       +
       +static void
       +term(void)
       +{
       +        close(sfd);
       +        close(ifd);
       +}
       +
       +static void
       +usage(void)
       +{
       +        fprintf(stderr, "usage: %s [-v] [repo]\n", argv0);
       +        exit(1);
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        char *repo = NULL;
       +
       +        ARGBEGIN {
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +        } ARGEND
       +
       +        switch (argc) {
       +        case 0:
       +                repo = ".";
       +                break;
       +        case 1:
       +                repo = argv[0];
       +                break;
       +        default:
       +                usage();
       +        };
       +
       +
       +        if (chdir(repo) < 0)
       +                err(1, "chdir: %s", repo);
       +
       +        init();
       +        print_info(&snap_hdr.st);
       +        term();
       +        return 0;
       +}
   DIR diff --git a/dinit.1 b/dinit.1
       @@ -0,0 +1,39 @@
       +.Dd April 17, 2019
       +.Dt DINIT 1
       +.Os
       +.Sh NAME
       +.Nm dinit
       +.Nd Initialize a dedup repository
       +.Sh SYNOPSIS
       +.Nm dinit
       +.Op Fl v
       +.Op Fl H Ar hash
       +.Op Fl Z Ar compressor
       +.Op repo
       +.Sh DESCRIPTION
       +.Nm
       +initializes a dedup repository.
       +If no
       +.Ar repo
       +is specified, then the current directory
       +is assumed to be the repository.
       +.Sh OPTIONS
       +.Bl -tag -width "-Z compressor"
       +.It Fl v
       +Enable verbose mode.
       +.It Fl H Ar hash
       +The cryptographic hash function used to identify
       +unique blocks in the store.
       +The supported hash functions are blake2b, blake2bp, blake2s and blake2sp.
       +This flag only has an effect when initializing the repository.
       +By default blake2b is used.
       +.It Fl Z Ar compressor
       +The compressor function used to compress the blocks
       +in the store.
       +The supported compressor functions are none, lz4 and snappy.
       +This flag only has an effect when initializing the repository.
       +By default lz4 is used.
       +.El
       +.Sh AUTHORS
       +.An Dimitris Papastamos Aq Mt sin@2f30.org ,
       +.An z3bra Aq Mt contactatz3bradotorg .
   DIR diff --git a/dinit.c b/dinit.c
       @@ -0,0 +1,155 @@
       +#include <sys/types.h>
       +#include <sys/stat.h>
       +#include <sys/file.h>
       +
       +#include <err.h>
       +#include <fcntl.h>
       +#include <stdio.h>
       +#include <stdint.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <unistd.h>
       +
       +#include "arg.h"
       +#include "blake2.h"
       +#include "dedup.h"
       +
       +#define SNAPSF ".snapshots"
       +#define STOREF ".store"
       +
       +static struct snap_hdr snap_hdr;
       +static struct blk_hdr blk_hdr;
       +static int ifd;
       +static int sfd;
       +static int hash_algo = HASH_BLAKE2B;
       +static int compr_algo = COMPR_LZ4;
       +
       +int verbose;
       +char *argv0;
       +
       +static void
       +init_blk_hdr(void)
       +{
       +        blk_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
       +        blk_hdr.flags |= compr_algo << COMPR_ALGO_SHIFT;
       +        blk_hdr.flags |= hash_algo << HASH_ALGO_SHIFT;
       +        blk_hdr.size = BLK_HDR_SIZE;
       +}
       +
       +static void
       +save_blk_hdr(void)
       +{
       +        xlseek(sfd, 0, SEEK_SET);
       +        write_blk_hdr(sfd, &blk_hdr);
       +}
       +
       +static void
       +init_snap_hdr(void)
       +{
       +        snap_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
       +        snap_hdr.size = SNAP_HDR_SIZE;
       +        snap_hdr.st.min_blk_size = UINT64_MAX;
       +}
       +
       +static void
       +save_snap_hdr(void)
       +{
       +        xlseek(ifd, 0, SEEK_SET);
       +        write_snap_hdr(ifd, &snap_hdr);
       +}
       +
       +static void
       +init(void)
       +{
       +        int flags;
       +
       +        flags = O_RDWR | O_CREAT | O_EXCL;
       +        ifd = open(SNAPSF, flags, 0600);
       +        if (ifd < 0)
       +                err(1, "open %s", SNAPSF);
       +
       +        sfd = open(STOREF, flags, 0600);
       +        if (sfd < 0)
       +                err(1, "open %s", STOREF);
       +
       +        if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
       +            flock(sfd, LOCK_NB | LOCK_EX) < 0)
       +                err(1, "flock");
       +
       +        init_snap_hdr();
       +        init_blk_hdr();
       +}
       +
       +static void
       +term(void)
       +{
       +        save_blk_hdr();
       +        save_snap_hdr();
       +
       +        fsync(sfd);
       +        fsync(ifd);
       +
       +        close(sfd);
       +        close(ifd);
       +}
       +
       +static void
       +usage(void)
       +{
       +        fprintf(stderr, "usage: %s [-v] [-H hash] [-Z compressor] [repo]\n", argv0);
       +        exit(1);
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        char *hash_name = NULL, *compr_name = NULL;
       +        char *repo;
       +
       +        ARGBEGIN {
       +        case 'H':
       +                hash_name = EARGF(usage());
       +                if (strcmp(hash_name, "?") == 0) {
       +                        hash_list(STDERR_FILENO);
       +                        return 0;
       +                }
       +                hash_algo = hash_name2type(hash_name);
       +                if (hash_algo < 0)
       +                        errx(1, "unknown hash: %s", hash_name);
       +                break;
       +        case 'Z':
       +                compr_name = EARGF(usage());
       +                if (strcmp(compr_name, "?") == 0) {
       +                        compr_list(STDERR_FILENO);
       +                        return 0;
       +                }
       +                compr_algo = compr_name2type(compr_name);
       +                if (compr_algo < 0)
       +                        errx(1, "unknown compressor: %s", compr_name);
       +                break;
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +        } ARGEND
       +
       +        switch (argc) {
       +        case 0:
       +                repo = ".";
       +                break;
       +        case 1:
       +                repo = argv[0];
       +                break;
       +        default:
       +                usage();
       +        };
       +
       +        mkdir(repo, 0700);
       +        if (chdir(repo) < 0)
       +                err(1, "chdir: %s", repo);
       +
       +        init();
       +        term();
       +        return 0;
       +}
   DIR diff --git a/dlist.1 b/dlist.1
       @@ -0,0 +1,25 @@
       +.Dd April 17, 2019
       +.Dt DLIST 1
       +.Os
       +.Sh NAME
       +.Nm dlist
       +.Nd List snapshots from a dedup repository
       +.Sh SYNOPSIS
       +.Nm dlist
       +.Op Fl v
       +.Op repo
       +.Sh DESCRIPTION
       +.Nm
       +lists snapshots from a dedup repository.
       +If no
       +.Ar repo
       +is specified, then the current directory
       +is assumed to be the repository.
       +.Sh OPTIONS
       +.Bl -tag -width "-v"
       +.It Fl v
       +Enable verbose mode.
       +.El
       +.Sh AUTHORS
       +.An Dimitris Papastamos Aq Mt sin@2f30.org ,
       +.An z3bra Aq Mt contactatz3bradotorg .
   DIR diff --git a/dlist.c b/dlist.c
       @@ -0,0 +1,232 @@
       +#include <sys/types.h>
       +#include <sys/stat.h>
       +#include <sys/file.h>
       +
       +#include <err.h>
       +#include <fcntl.h>
       +#include <stdio.h>
       +#include <stdint.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <unistd.h>
       +
       +#include "arg.h"
       +#include "blake2.h"
       +#include "dedup.h"
       +
       +#define SNAPSF ".snapshots"
       +#define STOREF ".store"
       +
       +enum {
       +        WALK_CONTINUE,
       +        WALK_STOP
       +};
       +
       +static struct snap_hdr snap_hdr;
       +static struct blk_hdr blk_hdr;
       +static int ifd;
       +static int sfd;
       +static int hash_algo = HASH_BLAKE2B;
       +static int compr_algo = COMPR_LZ4;
       +
       +int verbose;
       +char *argv0;
       +
       +static void
       +print_md(FILE *fp, uint8_t *md, size_t size)
       +{
       +        size_t i;
       +
       +        for (i = 0; i < size; i++)
       +                fprintf(fp, "%02x", md[i]);
       +}
       +
       +static struct snap *
       +alloc_snap(void)
       +{
       +        struct snap *snap;
       +
       +        snap = calloc(1, sizeof(*snap));
       +        if (snap == NULL)
       +                err(1, "%s", __func__);
       +        return snap;
       +}
       +
       +static void
       +free_snap(struct snap *snap)
       +{
       +        free(snap);
       +}
       +
       +static struct snap *
       +grow_snap(struct snap *snap, uint64_t nr_blk_descs)
       +{
       +        size_t size;
       +
       +        if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0]))
       +                errx(1, "%s: overflow", __func__);
       +        size = nr_blk_descs * sizeof(snap->blk_desc[0]);
       +
       +        if (size > SIZE_MAX - sizeof(*snap))
       +                errx(1, "%s: overflow", __func__);
       +        size += sizeof(*snap);
       +
       +        snap = realloc(snap, size);
       +        if (snap == NULL)
       +                err(1, "%s", __func__);
       +        return snap;
       +}
       +
       +static int
       +list(struct snap *snap, void *arg)
       +{
       +        print_md(stdout, snap->md, sizeof(snap->md));
       +        if (snap->msg[0] != '\0')
       +                printf("\t%s\n", snap->msg);
       +        else
       +                putchar('\n');
       +        return WALK_CONTINUE;
       +}
       +
       +/* Walk through all snapshots and call fn() on each one */
       +static void
       +walk_snap(int (*fn)(struct snap *, void *), void *arg)
       +{
       +        uint64_t i;
       +
       +        xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
       +        for (i = 0; i < snap_hdr.nr_snaps; i++) {
       +                struct snap *snap;
       +                int ret;
       +
       +                snap = alloc_snap();
       +                read_snap(ifd, snap);
       +                snap = grow_snap(snap, snap->nr_blk_descs);
       +                read_snap_descs(ifd, snap);
       +
       +                ret = (*fn)(snap, arg);
       +                free_snap(snap);
       +                if (ret == WALK_STOP)
       +                        break;
       +        }
       +}
       +
       +static void
       +match_ver(uint64_t v)
       +{
       +        uint8_t maj, min;
       +
       +        min = v & VER_MIN_MASK;
       +        maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
       +        if (maj == VER_MAJ && min == VER_MIN)
       +                return;
       +        errx(1, "format version mismatch: expected %u.%u but got %u.%u",
       +             VER_MAJ, VER_MIN, maj, min);
       +}
       +
       +static void
       +load_blk_hdr(void)
       +{
       +        uint64_t v;
       +
       +        xlseek(sfd, 0, SEEK_SET);
       +        read_blk_hdr(sfd, &blk_hdr);
       +        match_ver(blk_hdr.flags);
       +
       +        v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
       +        v &= COMPR_ALGO_MASK;
       +        compr_algo = v;
       +
       +        if (compr_algo < 0 || compr_algo >= NR_COMPRS)
       +                errx(1, "unsupported compression algorithm: %d", compr_algo);
       +
       +        if (verbose > 0)
       +                fprintf(stderr, "Compression algorithm: %s\n",
       +                        compr_type2name(compr_algo));
       +
       +        v = blk_hdr.flags >> HASH_ALGO_SHIFT;
       +        v &= HASH_ALGO_MASK;
       +        hash_algo = v;
       +
       +        if (hash_algo < 0 || hash_algo >= NR_HASHES)
       +                errx(1, "unsupported hash algorithm: %d", hash_algo);
       +
       +        if (verbose > 0)
       +                fprintf(stderr, "Hash algorithm: %s\n",
       +                        hash_type2name(hash_algo));
       +}
       +
       +static void
       +load_snap_hdr(void)
       +{
       +        xlseek(ifd, 0, SEEK_SET);
       +        read_snap_hdr(ifd, &snap_hdr);
       +        match_ver(snap_hdr.flags);
       +}
       +
       +static void
       +init(void)
       +{
       +        ifd = open(SNAPSF, O_RDONLY, 0600);
       +        if (ifd < 0)
       +                err(1, "open %s", SNAPSF);
       +
       +        sfd = open(STOREF, O_RDONLY, 0600);
       +        if (sfd < 0)
       +                err(1, "open %s", STOREF);
       +
       +        if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
       +            flock(sfd, LOCK_NB | LOCK_EX) < 0)
       +                err(1, "flock");
       +
       +        load_snap_hdr();
       +        load_blk_hdr();
       +}
       +
       +static void
       +term(void)
       +{
       +        close(sfd);
       +        close(ifd);
       +}
       +
       +static void
       +usage(void)
       +{
       +        fprintf(stderr, "usage: %s [-v] [repo]\n", argv0);
       +        exit(1);
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        char *repo = NULL;
       +
       +        ARGBEGIN {
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +        } ARGEND
       +
       +        switch (argc) {
       +        case 0:
       +                repo = ".";
       +                break;
       +        case 1:
       +                repo = argv[0];
       +                break;
       +        default:
       +                usage();
       +        };
       +
       +
       +        if (chdir(repo) < 0)
       +                err(1, "chdir: %s", repo);
       +
       +        init();
       +        walk_snap(list, NULL);
       +        term();
       +        return 0;
       +}
   DIR diff --git a/dpack.1 b/dpack.1
       @@ -0,0 +1,35 @@
       +.Dd April 17, 2019
       +.Dt DPACK 1
       +.Os
       +.Sh NAME
       +.Nm dpack
       +.Nd Deduplicate data from stdin
       +.Sh SYNOPSIS
       +.Nm dpack
       +.Op Fl v
       +.Op Fl m Ar message
       +.Op repo
       +.Sh DESCRIPTION
       +.Nm
       +deduplicates data from stdin.
       +If no
       +.Ar repo
       +is specified, then the current directory
       +is assumed to be the repository.
       +.Pp
       +.Nm
       +does not track any file metadata so to deduplicate
       +directory trees, an archival tool like
       +.Xr tar 1
       +should be used and piped into
       +.Nm .
       +.Sh OPTIONS
       +.Bl -tag -width "-m message"
       +.It Fl m Ar message
       +Attach a descriptive message to the snapshot.
       +.It Fl v
       +Enable verbose mode.
       +.El
       +.Sh AUTHORS
       +.An Dimitris Papastamos Aq Mt sin@2f30.org ,
       +.An z3bra Aq Mt contactatz3bradotorg .
   DIR diff --git a/dpack.c b/dpack.c
       @@ -0,0 +1,422 @@
       +#include <sys/types.h>
       +#include <sys/stat.h>
       +#include <sys/file.h>
       +
       +#include <err.h>
       +#include <fcntl.h>
       +#include <stdio.h>
       +#include <stdint.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <unistd.h>
       +
       +#include "arg.h"
       +#include "blake2.h"
       +#include "dedup.h"
       +
       +#define SNAPSF ".snapshots"
       +#define STOREF ".store"
       +
       +enum {
       +        WALK_CONTINUE,
       +        WALK_STOP
       +};
       +
       +static struct snap_hdr snap_hdr;
       +static struct blk_hdr blk_hdr;
       +static struct icache *icache;
       +static int ifd;
       +static int sfd;
       +static int hash_algo = HASH_BLAKE2B;
       +static int compr_algo = COMPR_LZ4;
       +
       +int verbose;
       +char *argv0;
       +
       +static struct snap *
       +alloc_snap(void)
       +{
       +        struct snap *snap;
       +
       +        snap = calloc(1, sizeof(*snap));
       +        if (snap == NULL)
       +                err(1, "%s", __func__);
       +        return snap;
       +}
       +
       +static void
       +free_snap(struct snap *snap)
       +{
       +        free(snap);
       +}
       +
       +/*
       + * The snapshot hash is calculated over the
       + * hash of its block descriptors.
       + */
       +static void
       +hash_snap(struct snap *snap, uint8_t *md)
       +{
       +        struct hash_ctx ctx;
       +        uint64_t i;
       +
       +        if (hash_init(&ctx, hash_algo, MD_SIZE) < 0)
       +                errx(1, "hash_init failed");
       +        for (i = 0; i < snap->nr_blk_descs; i++) {
       +                struct blk_desc *blk_desc;
       +
       +                blk_desc = &snap->blk_desc[i];
       +                hash_update(&ctx, blk_desc->md, sizeof(blk_desc->md));
       +        }
       +        hash_final(&ctx, md, MD_SIZE);
       +}
       +
       +static struct snap *
       +grow_snap(struct snap *snap, uint64_t nr_blk_descs)
       +{
       +        size_t size;
       +
       +        if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0]))
       +                errx(1, "%s: overflow", __func__);
       +        size = nr_blk_descs * sizeof(snap->blk_desc[0]);
       +
       +        if (size > SIZE_MAX - sizeof(*snap))
       +                errx(1, "%s: overflow", __func__);
       +        size += sizeof(*snap);
       +
       +        snap = realloc(snap, size);
       +        if (snap == NULL)
       +                err(1, "%s", __func__);
       +        return snap;
       +}
       +
       +static void
       +append_snap(struct snap *snap)
       +{
       +        if (snap->nr_blk_descs > UINT64_MAX / BLK_DESC_SIZE)
       +                errx(1, "%s: overflow", __func__);
       +        snap->size = snap->nr_blk_descs * BLK_DESC_SIZE;
       +
       +        if (snap->size > UINT64_MAX - SNAPSHOT_SIZE)
       +                errx(1, "%s: overflow", __func__);
       +        snap->size += SNAPSHOT_SIZE;
       +
       +        xlseek(ifd, snap_hdr.size, SEEK_SET);
       +        write_snap(ifd, snap);
       +        write_snap_blk_descs(ifd, snap);
       +
       +        if (snap_hdr.size > UINT64_MAX - snap->size)
       +                errx(1, "%s: overflow", __func__);
       +        snap_hdr.size += snap->size;
       +
       +        if (snap_hdr.nr_snaps > UINT64_MAX - 1)
       +                errx(1, "%s: overflow", __func__);
       +        snap_hdr.nr_snaps++;
       +}
       +
       +static uint8_t *
       +alloc_buf(size_t size)
       +{
       +        void *p;
       +
       +        p = calloc(1, size);
       +        if (p == NULL)
       +                err(1, "%s", __func__);
       +        return p;
       +}
       +
       +static void
       +hash_blk(uint8_t *buf, size_t size, uint8_t *md)
       +{
       +        struct hash_ctx ctx;
       +
       +        if (hash_init(&ctx, hash_algo, MD_SIZE) < 0)
       +                errx(1, "hash_init failed");
       +        hash_update(&ctx, buf, size);
       +        hash_final(&ctx, md, MD_SIZE);
       +}
       +
       +static void
       +append_blk(uint8_t *buf, struct blk_desc *blk_desc)
       +{
       +        xlseek(sfd, blk_hdr.size, SEEK_SET);
       +        xwrite(sfd, buf, blk_desc->size);
       +
       +        if (blk_hdr.size > UINT64_MAX - blk_desc->size)
       +                errx(1, "%s: overflow", __func__);
       +        blk_hdr.size += blk_desc->size;
       +}
       +
       +static void
       +dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size)
       +{
       +        uint8_t md[MD_SIZE];
       +        struct blk_desc blk_desc;
       +        struct compr_ctx ctx;
       +        uint8_t *compr_buf;
       +        size_t n, csize;
       +
       +        if (compr_init(&ctx, compr_algo) < 0)
       +                errx(1, "compr_init failed");
       +        csize = compr_size(&ctx, BLKSIZE_MAX);
       +        compr_buf = alloc_buf(csize);
       +
       +        n = compr(&ctx, chunkp, compr_buf, chunk_size, csize);
       +        hash_blk(compr_buf, n, md);
       +
       +        snap_hdr.st.orig_size += chunk_size;
       +        snap_hdr.st.compr_size += n;
       +
       +        memcpy(blk_desc.md, md, sizeof(blk_desc.md));
       +        if (lookup_icache(icache, &blk_desc) < 0) {
       +                blk_desc.offset = blk_hdr.size;
       +                blk_desc.size = n;
       +
       +                snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
       +                append_blk(compr_buf, &blk_desc);
       +
       +                insert_icache(icache, &blk_desc);
       +
       +                snap_hdr.st.dedup_size += blk_desc.size;
       +                snap_hdr.st.nr_blks++;
       +
       +                if (blk_desc.size > snap_hdr.st.max_blk_size)
       +                        snap_hdr.st.max_blk_size = blk_desc.size;
       +                if (blk_desc.size < snap_hdr.st.min_blk_size)
       +                        snap_hdr.st.min_blk_size = blk_desc.size;
       +        } else {
       +                snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
       +        }
       +
       +        free(compr_buf);
       +        compr_final(&ctx);
       +}
       +
       +static void
       +dedup(int fd, char *msg)
       +{
       +        struct snap *snap;
       +        struct chunker *chunker;
       +
       +        snap = alloc_snap();
       +        chunker = alloc_chunker(fd, BLKSIZE_MIN, BLKSIZE_MAX,
       +                                HASHMASK_BITS, WINSIZE);
       +
       +        while (fill_chunker(chunker) > 0) {
       +                uint8_t *chunkp;
       +                size_t chunk_size;
       +
       +                chunkp = get_chunk(chunker, &chunk_size);
       +                snap = grow_snap(snap, snap->nr_blk_descs + 1);
       +                dedup_chunk(snap, chunkp, chunk_size);
       +                drain_chunker(chunker);
       +        }
       +
       +        if (snap->nr_blk_descs > 0) {
       +                if (msg != NULL) {
       +                        size_t size;
       +
       +                        size = strlen(msg) + 1;
       +                        if (size > sizeof(snap->msg))
       +                                size = sizeof(snap->msg);
       +                        memcpy(snap->msg, msg, size);
       +                        snap->msg[size - 1] = '\0';
       +                }
       +                hash_snap(snap, snap->md);
       +                append_snap(snap);
       +        }
       +
       +        free_chunker(chunker);
       +        free_snap(snap);
       +}
       +
       +static int
       +build_icache(struct snap *snap, void *arg)
       +{
       +        struct compr_ctx ctx;
       +        uint8_t *buf;
       +        uint64_t i;
       +
       +        if (compr_init(&ctx, compr_algo) < 0)
       +                errx(1, "compr_init failed");
       +        buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
       +        for (i = 0; i < snap->nr_blk_descs; i++) {
       +                struct blk_desc *blk_desc;
       +
       +                blk_desc = &snap->blk_desc[i];
       +                insert_icache(icache, blk_desc);
       +        }
       +        free(buf);
       +        compr_final(&ctx);
       +        return WALK_CONTINUE;
       +}
       +
       +/* Walk through all snapshots and call fn() on each one */
       +static void
       +walk_snap(int (*fn)(struct snap *, void *), void *arg)
       +{
       +        uint64_t i;
       +
       +        xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
       +        for (i = 0; i < snap_hdr.nr_snaps; i++) {
       +                struct snap *snap;
       +                int ret;
       +
       +                snap = alloc_snap();
       +                read_snap(ifd, snap);
       +                snap = grow_snap(snap, snap->nr_blk_descs);
       +                read_snap_descs(ifd, snap);
       +
       +                ret = (*fn)(snap, arg);
       +                free_snap(snap);
       +                if (ret == WALK_STOP)
       +                        break;
       +        }
       +}
       +
       +static void
       +match_ver(uint64_t v)
       +{
       +        uint8_t maj, min;
       +
       +        min = v & VER_MIN_MASK;
       +        maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
       +        if (maj == VER_MAJ && min == VER_MIN)
       +                return;
       +        errx(1, "format version mismatch: expected %u.%u but got %u.%u",
       +             VER_MAJ, VER_MIN, maj, min);
       +}
       +
       +static void
       +load_blk_hdr(void)
       +{
       +        uint64_t v;
       +
       +        xlseek(sfd, 0, SEEK_SET);
       +        read_blk_hdr(sfd, &blk_hdr);
       +        match_ver(blk_hdr.flags);
       +
       +        v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
       +        v &= COMPR_ALGO_MASK;
       +        compr_algo = v;
       +
       +        if (compr_algo < 0 || compr_algo >= NR_COMPRS)
       +                errx(1, "unsupported compression algorithm: %d", compr_algo);
       +
       +        if (verbose > 0)
       +                fprintf(stderr, "Compression algorithm: %s\n",
       +                        compr_type2name(compr_algo));
       +
       +        v = blk_hdr.flags >> HASH_ALGO_SHIFT;
       +        v &= HASH_ALGO_MASK;
       +        hash_algo = v;
       +
       +        if (hash_algo < 0 || hash_algo >= NR_HASHES)
       +                errx(1, "unsupported hash algorithm: %d", hash_algo);
       +
       +        if (verbose > 0)
       +                fprintf(stderr, "Hash algorithm: %s\n",
       +                        hash_type2name(hash_algo));
       +}
       +
       +static void
       +save_blk_hdr(void)
       +{
       +        xlseek(sfd, 0, SEEK_SET);
       +        write_blk_hdr(sfd, &blk_hdr);
       +}
       +
       +static void
       +load_snap_hdr(void)
       +{
       +        xlseek(ifd, 0, SEEK_SET);
       +        read_snap_hdr(ifd, &snap_hdr);
       +        match_ver(snap_hdr.flags);
       +}
       +
       +static void
       +save_snap_hdr(void)
       +{
       +        xlseek(ifd, 0, SEEK_SET);
       +        write_snap_hdr(ifd, &snap_hdr);
       +}
       +
       +static void
       +init(void)
       +{
       +        ifd = open(SNAPSF, O_RDWR, 0600);
       +        if (ifd < 0)
       +                err(1, "open %s", SNAPSF);
       +
       +        sfd = open(STOREF, O_RDWR, 0600);
       +        if (sfd < 0)
       +                err(1, "open %s", STOREF);
       +
       +        if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
       +            flock(sfd, LOCK_NB | LOCK_EX) < 0)
       +                err(1, "flock");
       +
       +        load_snap_hdr();
       +        load_blk_hdr();
       +
       +        icache = alloc_icache();
       +        walk_snap(build_icache, NULL);
       +}
       +
       +static void
       +term(void)
       +{
       +        free_icache(icache);
       +
       +        save_blk_hdr();
       +        save_snap_hdr();
       +
       +        fsync(sfd);
       +        fsync(ifd);
       +
       +        close(sfd);
       +        close(ifd);
       +}
       +
       +static void
       +usage(void)
       +{
       +        fprintf(stderr, "usage: %s [-v] [-m message] [repo]\n", argv0);
       +        exit(1);
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        char *repo, *msg = NULL;
       +
       +        ARGBEGIN {
       +        case 'm':
       +                msg = EARGF(usage());
       +                break;
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +        } ARGEND
       +
       +        switch (argc) {
       +        case 0:
       +                repo = ".";
       +                break;
       +        case 1:
       +                repo = argv[0];
       +                break;
       +        default:
       +                usage();
       +        };
       +
       +        if (chdir(repo) < 0)
       +                err(1, "chdir: %s", repo);
       +
       +        init();
       +        dedup(STDIN_FILENO, msg);
       +        term();
       +        return 0;
       +}
   DIR diff --git a/dunpack.1 b/dunpack.1
       @@ -0,0 +1,28 @@
       +.Dd April 17, 2019
       +.Dt DUNPACK 1
       +.Os
       +.Sh NAME
       +.Nm dunpack
       +.Nd Extract snapshot from a dedup repository
       +.Sh SYNOPSIS
       +.Nm dunpack
       +.Op Fl v
       +.Ar id
       +.Op repo
       +.Sh DESCRIPTION
       +.Nm
       +extracts the snapshot specified by
       +.Ar id
       +from the dedup repository.
       +If no
       +.Ar repo
       +is specified, then the current directory
       +is assumed to be the repository.
       +.Sh OPTIONS
       +.Bl -tag -width "-v"
       +.It Fl v
       +Enable verbose mode.
       +.El
       +.Sh AUTHORS
       +.An Dimitris Papastamos Aq Mt sin@2f30.org ,
       +.An z3bra Aq Mt contactatz3bradotorg .
   DIR diff --git a/dunpack.c b/dunpack.c
       @@ -0,0 +1,288 @@
       +#include <sys/types.h>
       +#include <sys/stat.h>
       +#include <sys/file.h>
       +
       +#include <err.h>
       +#include <fcntl.h>
       +#include <stdio.h>
       +#include <stdint.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <unistd.h>
       +
       +#include "arg.h"
       +#include "blake2.h"
       +#include "dedup.h"
       +
       +#define SNAPSF ".snapshots"
       +#define STOREF ".store"
       +
       +enum {
       +        WALK_CONTINUE,
       +        WALK_STOP
       +};
       +
       +struct extract_args {
       +        uint8_t *md;
       +        int fd;
       +        int ret;
       +};
       +
       +static struct snap_hdr snap_hdr;
       +static struct blk_hdr blk_hdr;
       +static int ifd;
       +static int sfd;
       +static int hash_algo = HASH_BLAKE2B;
       +static int compr_algo = COMPR_LZ4;
       +
       +int verbose;
       +char *argv0;
       +
       +static struct snap *
       +alloc_snap(void)
       +{
       +        struct snap *snap;
       +
       +        snap = calloc(1, sizeof(*snap));
       +        if (snap == NULL)
       +                err(1, "%s", __func__);
       +        return snap;
       +}
       +
       +static void
       +free_snap(struct snap *snap)
       +{
       +        free(snap);
       +}
       +
       +static struct snap *
       +grow_snap(struct snap *snap, uint64_t nr_blk_descs)
       +{
       +        size_t size;
       +
       +        if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0]))
       +                errx(1, "%s: overflow", __func__);
       +        size = nr_blk_descs * sizeof(snap->blk_desc[0]);
       +
       +        if (size > SIZE_MAX - sizeof(*snap))
       +                errx(1, "%s: overflow", __func__);
       +        size += sizeof(*snap);
       +
       +        snap = realloc(snap, size);
       +        if (snap == NULL)
       +                err(1, "%s", __func__);
       +        return snap;
       +}
       +
       +static uint8_t *
       +alloc_buf(size_t size)
       +{
       +        void *p;
       +
       +        p = calloc(1, size);
       +        if (p == NULL)
       +                err(1, "%s", __func__);
       +        return p;
       +}
       +
       +static void
       +free_buf(uint8_t *buf)
       +{
       +        free(buf);
       +}
       +
       +static void
       +read_blk(uint8_t *buf, struct blk_desc *blk_desc)
       +{
       +        ssize_t n;
       +
       +        xlseek(sfd, blk_desc->offset, SEEK_SET);
       +        n = xread(sfd, buf, blk_desc->size);
       +        if (n == 0)
       +                errx(1, "%s: unexpected EOF", __func__);
       +        if (n != blk_desc->size)
       +                errx(1, "%s: short read", __func__);
       +}
       +
       +static int
       +extract(struct snap *snap, void *arg)
       +{
       +        uint8_t *buf[2];
       +        struct extract_args *args = arg;
       +        struct compr_ctx ctx;
       +        uint64_t i;
       +
       +        if (memcmp(snap->md, args->md, sizeof(snap->md)) != 0)
       +                return WALK_CONTINUE;
       +
       +        if (compr_init(&ctx, compr_algo) < 0)
       +                errx(1, "compr_init failed");
       +        buf[0] = alloc_buf(BLKSIZE_MAX);
       +        buf[1] = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
       +        for (i = 0; i < snap->nr_blk_descs; i++) {
       +                struct blk_desc *blk_desc;
       +                size_t blksize;
       +
       +                blk_desc = &snap->blk_desc[i];
       +                read_blk(buf[1], blk_desc);
       +                blksize = decompr(&ctx, buf[1], buf[0], blk_desc->size, BLKSIZE_MAX);
       +                xwrite(args->fd, buf[0], blksize);
       +        }
       +        free_buf(buf[1]);
       +        free_buf(buf[0]);
       +        compr_final(&ctx);
       +        args->ret = 0;
       +        return WALK_STOP;
       +}
       +
       +/* Walk through all snapshots and call fn() on each one */
       +static void
       +walk_snap(int (*fn)(struct snap *, void *), void *arg)
       +{
       +        uint64_t i;
       +
       +        xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
       +        for (i = 0; i < snap_hdr.nr_snaps; i++) {
       +                struct snap *snap;
       +                int ret;
       +
       +                snap = alloc_snap();
       +                read_snap(ifd, snap);
       +                snap = grow_snap(snap, snap->nr_blk_descs);
       +                read_snap_descs(ifd, snap);
       +
       +                ret = (*fn)(snap, arg);
       +                free_snap(snap);
       +                if (ret == WALK_STOP)
       +                        break;
       +        }
       +}
       +
       +static void
       +match_ver(uint64_t v)
       +{
       +        uint8_t maj, min;
       +
       +        min = v & VER_MIN_MASK;
       +        maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
       +        if (maj == VER_MAJ && min == VER_MIN)
       +                return;
       +        errx(1, "format version mismatch: expected %u.%u but got %u.%u",
       +             VER_MAJ, VER_MIN, maj, min);
       +}
       +
       +static void
       +load_blk_hdr(void)
       +{
       +        uint64_t v;
       +
       +        xlseek(sfd, 0, SEEK_SET);
       +        read_blk_hdr(sfd, &blk_hdr);
       +        match_ver(blk_hdr.flags);
       +
       +        v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
       +        v &= COMPR_ALGO_MASK;
       +        compr_algo = v;
       +
       +        if (compr_algo < 0 || compr_algo >= NR_COMPRS)
       +                errx(1, "unsupported compression algorithm: %d", compr_algo);
       +
       +        if (verbose > 0)
       +                fprintf(stderr, "Compression algorithm: %s\n",
       +                        compr_type2name(compr_algo));
       +
       +        v = blk_hdr.flags >> HASH_ALGO_SHIFT;
       +        v &= HASH_ALGO_MASK;
       +        hash_algo = v;
       +
       +        if (hash_algo < 0 || hash_algo >= NR_HASHES)
       +                errx(1, "unsupported hash algorithm: %d", hash_algo);
       +
       +        if (verbose > 0)
       +                fprintf(stderr, "Hash algorithm: %s\n",
       +                        hash_type2name(hash_algo));
       +}
       +
       +static void
       +load_snap_hdr(void)
       +{
       +        xlseek(ifd, 0, SEEK_SET);
       +        read_snap_hdr(ifd, &snap_hdr);
       +        match_ver(snap_hdr.flags);
       +}
       +
       +static void
       +init(void)
       +{
       +        ifd = open(SNAPSF, O_RDONLY, 0600);
       +        if (ifd < 0)
       +                err(1, "open %s", SNAPSF);
       +
       +        sfd = open(STOREF, O_RDONLY, 0600);
       +        if (sfd < 0)
       +                err(1, "open %s", STOREF);
       +
       +        if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
       +            flock(sfd, LOCK_NB | LOCK_EX) < 0)
       +                err(1, "flock");
       +
       +        load_snap_hdr();
       +        load_blk_hdr();
       +}
       +
       +static void
       +term(void)
       +{
       +        close(sfd);
       +        close(ifd);
       +}
       +
       +static void
       +usage(void)
       +{
       +        fprintf(stderr, "usage: %s [-v] id [repo]\n", argv0);
       +        exit(1);
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        uint8_t md[MD_SIZE];
       +        char *repo, *id = NULL;
       +        struct extract_args args;
       +
       +        ARGBEGIN {
       +        case 'v':
       +                verbose++;
       +                break;
       +        default:
       +                usage();
       +        } ARGEND
       +
       +        switch (argc) {
       +        case 1:
       +                id = argv[0];
       +                repo = ".";
       +                break;
       +        case 2:
       +                id = argv[0];
       +                repo = argv[1];
       +                break;
       +        default:
       +                usage();
       +        };
       +
       +        if (chdir(repo) < 0)
       +                err(1, "chdir: %s", repo);
       +
       +        init();
       +        str2bin(id, md);
       +        args.md = md;
       +        args.fd = STDIN_FILENO;
       +        args.ret = -1;
       +        walk_snap(extract, &args);
       +        if (args.ret != 0)
       +                errx(1, "unknown snapshot: %s", id);
       +        term();
       +        return 0;
       +}