URI: 
       Some more comments in bstorage.c - dedup - deduplicating backup program
  HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
   DIR commit 891041b03c66d2c37f4aef65de717e0f3740d1d1
   DIR parent bb81bcb6bd4b4bf7dbe36d2a47087dec0c58f812
  HTML Author: sin <sin@2f30.org>
       Date:   Wed,  1 May 2019 20:54:49 +0100
       
       Some more comments in bstorage.c
       
       Diffstat:
         M bstorage.c                          |     111 +++++++++++++++++++++----------
       
       1 file changed, 75 insertions(+), 36 deletions(-)
       ---
   DIR diff --git a/bstorage.c b/bstorage.c
       @@ -2,10 +2,10 @@
         * Storage layer implementation using a single backing file.
         * The file format is as follows:
         *
       - * [storage header]
       - * [storage descriptor 0]
       + * [block header]
       + * [block descriptor 0]
         * [data]
       - * [storage descriptor 1]
       + * [block descriptor 1]
         * [data]
         * ...
         */
       @@ -26,31 +26,30 @@
        #include "queue.h"
        #include "tree.h"
        
       +/* block header flags */
       +#define BHDRMAGIC        "DEDUPDIDUPDIDUP"
       +#define NBHDRMAGIC        sizeof(BHDRMAGIC)
        #define VMIN                0
        #define VMAJ                1
       -
        #define VMINMASK        0xff
        #define VMAJSHIFT        8
        #define VMAJMASK        0xff
       -
        #define HALGOSHIFT        19
        #define HALGOMASK        0x7
        #define CALGOSHIFT        16
        #define CALGOMASK        0x7
       -
       -#define BHDRMAGIC        "DEDUPDIDUPDIDUP"
       -#define NBHDRMAGIC        sizeof(BHDRMAGIC)
       -#define BD2BTYPE        0x100
       -#define BD2STYPE        0x101
       -
       -#define BHDRSIZE        (NBHDRMAGIC + 8 + 8)
       -#define BDSIZE                (8 + 8 + 8 + 8 + (MDSIZE))
       -
        #define CNONETYPE        0
        #define CSNAPPYTYPE        1
        #define HBLAKE2BTYPE        0
        #define HBLAKE2STYPE        1
       +#define BHDRSIZE        (NBHDRMAGIC + 8 + 8)
       +
       +/* block descriptor flags */
       +#define BD2BTYPE        0x100
       +#define BD2STYPE        0x101
       +#define BDSIZE                (8 + 8 + 8 + 8 + (MDSIZE))
        
       +/* misc helpers */
        extern ssize_t xread(int, void *, size_t);
        extern ssize_t xwrite(int, void *, size_t);
        extern int pack(unsigned char *, char *, ...);
       @@ -80,30 +79,30 @@ static struct bops bops = {
        
        /* Block header structure */
        struct bhdr {
       -        char magic[NBHDRMAGIC];
       -        uint64_t flags;
       -        uint64_t nbd;
       +        char magic[NBHDRMAGIC]; /* magic number for file(1) */
       +        uint64_t flags;        /* version number, compression/hashing configuration */
       +        uint64_t nbd;        /* number of block descriptors */
        };
        
        /* Block descriptor */
        struct bd {
       -        uint16_t type;
       -        uint8_t reserved[6];
       -        uint64_t offset;        /* offset of block */
       -        uint64_t size;                /* size of block */
       -        uint64_t refcnt;
       -        unsigned char md[MDSIZE];
       -        RB_ENTRY(bd) rbe;
       -        SLIST_ENTRY(bd) sle;
       +        uint16_t type;                /* type of hashing algorithm used */
       +        uint8_t reserved[6];        /* should be set to 0 when writing */
       +        uint64_t offset;        /* block offset */
       +        uint64_t size;                /* block size */
       +        uint64_t refcnt;        /* reference count of block, 0 if block is removed */
       +        unsigned char md[MDSIZE];        /* hash of block */
       +        RB_ENTRY(bd) rbe;        /* bdcache link node */
       +        SLIST_ENTRY(bd) sle;        /* gchead link node */
        };
        RB_HEAD(bdcache, bd);
                
        /* Storage layer context */
        struct sctx {
       -        struct bdcache bdcache;
       -        SLIST_HEAD(gchead, bd) gchead;
       -        struct bhdr bhdr;
       -        int fd;
       +        struct bdcache bdcache;        /* cache of block descriptors */
       +        SLIST_HEAD(gchead, bd) gchead;        /* list of all blocks with a zero refcount */
       +        struct bhdr bhdr;        /* block header entry */
       +        int fd;                /* underlying storage file descriptor */
                int rdonly;        /* when set to 1, the bssync() operation is a no-op */
                int type;        /* hash algorithm for new blocks */
        };
       @@ -231,7 +230,7 @@ packbd(int fd, struct bd *bd)
                return n;
        }
        
       -/* Insert block descriptor to cache */
       +/* Load block descriptor from file */
        static int
        loadbd(struct sctx *sctx)
        {
       @@ -257,6 +256,16 @@ loadbd(struct sctx *sctx)
                        return -1;
                }
        
       +        /*
       +         * When refcount is 0 the block has been removed.
       +         * In that case, the block descriptor is still present
       +         * in the file as it is used to locate the next block
       +         * descriptor which could be live.
       +         *
       +         * The garbage collection list links together all block
       +         * descriptors that have a reference count of 0.
       +         * This is needed to implement the gc operation.
       +         */
                if (bd->refcnt > 0)
                        RB_INSERT(bdcache, &sctx->bdcache, bd);
                else
       @@ -295,7 +304,7 @@ initbdcache(struct sctx *sctx)
                return 0;
        }
        
       -/* Create storage */
       +/* Create storage file */
        static int
        bscreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar)
        {
       @@ -346,6 +355,7 @@ bscreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar)
                bhdr->nbd = 0;
                sctx->fd = fd;
        
       +        /* Write the block header entry to the file */
                if (packbhdr(fd, bhdr) < 0) {
                        free(sctx);
                        close(fd);
       @@ -354,7 +364,7 @@ bscreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar)
                return 0;
        }
        
       -/* Open storage */
       +/* Open storage file */
        static int
        bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar)
        {
       @@ -387,6 +397,8 @@ bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar)
                RB_INIT(&sctx->bdcache);
                SLIST_INIT(&sctx->gchead);
                bhdr = &sctx->bhdr;
       +
       +        /* Read block header entry from file */
                if (unpackbhdr(fd, bhdr) < 0) {
                        free(sctx);
                        close(fd);
       @@ -442,6 +454,10 @@ bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar)
                sctx->fd = fd;
                sctx->rdonly = flags == O_RDONLY;
        
       +        /*
       +         * Initialize block descriptor cache
       +         * and garbage collection list.
       +         */
                if (initbdcache(sctx) < 0) {
                        free(sctx);
                        close(fd);
       @@ -450,7 +466,7 @@ bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar)
                return 0;
        }
        
       -/* Write a block */
       +/* Write a block to the storage file */
        static int
        bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
        {
       @@ -460,7 +476,6 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
                off_t offs;
        
                sctx = bctx->sctx;
       -
                switch (sctx->type) {
                case BD2BTYPE:
                        if (b2bhash(buf, n, key.md) < 0)
       @@ -474,6 +489,11 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
                        return -1;
                }
        
       +        /*
       +         * If the block is already present in the cache
       +         * just increment the reference count and write back
       +         * the block descriptor associated for that block.
       +         */
                bd = RB_FIND(bdcache, &sctx->bdcache, &key);
                if (bd != NULL) {
                        off_t bdoffs;
       @@ -482,7 +502,6 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
                        if (lseek(sctx->fd, bdoffs, SEEK_SET) < 0)
                                return -1;
        
       -                /* Block already present, increment the reference count */
                        bd->refcnt++;
                        if (packbd(sctx->fd, bd) < 0) {
                                bd->refcnt--;
       @@ -493,10 +512,12 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
                        return 0;
                }
        
       +        /* New blocks are always appended to the storage file */
                offs = lseek(sctx->fd, 0, SEEK_END);
                if (offs < 0)
                        return -1;
        
       +        /* Allocate a new block descriptor */
                bd = calloc(1, sizeof(*bd));
                if (bd == NULL)
                        return -1;
       @@ -506,25 +527,34 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
                bd->refcnt = 1;
                memcpy(bd->md, key.md, MDSIZE);
        
       +        /* Write block descriptor to storage file */
                if (packbd(sctx->fd, bd) < 0) {
                        free(bd);
                        return -1;
                }
        
       +        /* Append block payload to block descriptor */
                if (xwrite(sctx->fd, buf, n) != n) {
       +                /* Shouldn't fail but if it does rewind storage file state */
                        ftruncate(sctx->fd, offs);
                        free(bd);
                        return -1;
                }
        
       +        /*
       +         * Update block entry header.
       +         * The header will be written to the storage file
       +         * when bsclose() or bssync() is called.
       +         */
                bhdr = &sctx->bhdr;
                bhdr->nbd++;
       +
                RB_INSERT(bdcache, &sctx->bdcache, bd);
                memcpy(md, bd->md, MDSIZE);
                return bd->size;
        }
        
       -/* Read a block */
       +/* Read a block from the storage file */
        static int
        bsget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n)
        {
       @@ -576,6 +606,7 @@ bsrm(struct bctx *bctx, unsigned char *md)
                        return -1;
                }
        
       +        /* This block is still referenced so just return */
                if (bd->refcnt > 0)
                        return 0;
        
       @@ -600,6 +631,14 @@ bsrm(struct bctx *bctx, unsigned char *md)
                return 0;
        }
        
       +/*
       + * Re-punch all holes in the storage file.
       + * This is needed when the storage file is copied from
       + * one system to another and back.  The target system
       + * may not support hole punching so the holes will be
       + * filled with literal zeroes, negating the space saving
       + * effects.
       + */
        static int
        bsgc(struct bctx *bctx)
        {