URI: 
       Change cache consistency algorithm - dedup - deduplicating backup program
  HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
   DIR commit df98cd4221d5e54c88d5ce172690c6cc7cb08235
   DIR parent 13ea526261fd0d7821dbf271bd9ccd509ae4e6fd
  HTML Author: sin <sin@2f30.org>
       Date:   Tue,  5 Mar 2019 22:39:08 +0000
       
       Change cache consistency algorithm
       
       Instead of doing complicated comparisons between snapshot blocks and
       cache entries, just store the hash of all cache entries in the
       snapshot header.  When the cache is loaded, the hash is calculated and
       compared against the one in the snapshot header.  If they are
       different, a cache rebuild is triggered.
       
       Diffstat:
         M cache.c                             |       5 +++--
         M dedup.c                             |     127 +++++++++----------------------
         M dedup.h                             |       8 +++++---
         M types.c                             |      14 ++++++++++----
       
       4 files changed, 53 insertions(+), 101 deletions(-)
       ---
   DIR diff --git a/cache.c b/cache.c
       @@ -102,10 +102,11 @@ lookup_cache_entry(struct cache *cache, struct cache_entry *ent)
        }
        
        void
       -walk_cache(struct cache *cache, int (*fn)(struct cache_entry *))
       +walk_cache(struct cache *cache,
       +           int (*fn)(struct cache_entry *, void *), void *arg)
        {
                struct cache_node *node;
        
                RB_FOREACH(node, cache_head, &cache->nodes)
       -                (*fn)(&node->ent);
       +                (*fn)(&node->ent, arg);
        }
   DIR diff --git a/dedup.c b/dedup.c
       @@ -37,7 +37,6 @@ static struct cache *cache;
        static int ifd;
        static int sfd;
        static int cfd;
       -static int cache_dirty;
        static unsigned long long cache_hits;
        static unsigned long long cache_misses;
        
       @@ -253,7 +252,6 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size)
        
                        cache_entry.offset = blk_desc.offset;
                        cache_entry.size = blk_desc.size;
       -                cache_dirty = 1;
                        add_cache_entry(cache, &cache_entry);
                        cache_misses++;
        
       @@ -396,40 +394,10 @@ check_snap(struct snapshot *snap, void *arg)
                return WALK_CONTINUE;
        }
        
       -/*
       - * For each block descriptor within each snapshot, do a lookup
       - * of the block descriptor hash in the cache.  If the lookup fails
       - * the cache is corrupted.  The caller will rebuild the cache in
       - * that case.
       - */
       -static int
       -check_cache_entry(struct snapshot *snap, void *arg)
       -{
       -        int *ret = arg;
       -        uint64_t i;
       -
       -        for (i = 0; i < snap->nr_blk_descs; i++) {
       -                struct blk_desc *blk_desc;
       -                struct cache_entry cache_entry;
       -
       -                blk_desc = &snap->blk_desc[i];
       -                memcpy(&cache_entry.md, blk_desc->md, sizeof(cache_entry.md));
       -                if (lookup_cache_entry(cache, &cache_entry) < 0 ||
       -                    cache_entry.offset != blk_desc->offset ||
       -                    cache_entry.size != blk_desc->size) {
       -                        *ret = -1;
       -                        return WALK_STOP;
       -                }
       -        }
       -        return WALK_CONTINUE;
       -}
       -
        static int
        reload_cache(struct snapshot *snap, void *arg)
        {
       -        uint8_t md[MDSIZE];
                uint8_t *buf;
       -        SHA256_CTX ctx;
                uint64_t i;
        
                buf = alloc_buf(compr_size(BLKSIZE_MAX));
       @@ -438,19 +406,12 @@ reload_cache(struct snapshot *snap, void *arg)
                        struct blk_desc *blk_desc;
        
                        blk_desc = &snap->blk_desc[i];
       -                read_blk(buf, blk_desc);
       -
       -                SHA256_Init(&ctx);
       -                SHA256_Update(&ctx, buf, blk_desc->size);
       -                SHA256_Final(md, &ctx);
       -
                        memcpy(cache_entry.md, blk_desc->md, sizeof(cache_entry.md));
                        cache_entry.offset = blk_desc->offset;
                        cache_entry.size = blk_desc->size;
                        add_cache_entry(cache, &cache_entry);
                }
                free(buf);
       -        cache_dirty = 1;
                return WALK_CONTINUE;
        }
        
       @@ -500,77 +461,59 @@ match_ver(uint64_t v)
                     VER_MAJ, VER_MIN, maj, min);
        }
        
       -static uint64_t
       -cache_nr_entries(void)
       +static void
       +hash_cache_entry_update(struct cache_entry *cache_entry, SHA256_CTX *ctx)
        {
       -        struct stat sb;
       +        uint8_t buf[CACHE_ENTRY_SIZE];
       +        char fmt[BUFSIZ];
       +        int n;
        
       -        if (fstat(cfd, &sb) < 0)
       -                err(1, "fstat");
       -        return sb.st_size / CACHE_ENTRY_SIZE;
       +        snprintf(fmt, sizeof(fmt), "'%dqq", MDSIZE);
       +        n = pack(buf, fmt, cache_entry->md, cache_entry->offset,
       +                 cache_entry->size);
       +        SHA256_Update(ctx, buf, n);
        }
        
        static void
       -check_cache(void)
       +load_cache(void)
        {
       -        int ret;
       +        uint8_t md[MDSIZE];
       +        struct stat sb;
       +        SHA256_CTX ctx;
       +        uint64_t nr_entries, i;
        
       -        if (verbose > 0)
       -                fprintf(stderr, "Checking cache\n");
       +        if (fstat(cfd, &sb) < 0)
       +                err(1, "fstat");
       +        nr_entries = sb.st_size / CACHE_ENTRY_SIZE;
        
       -        if (cache_nr_entries() != snap_hdr.st.nr_blks) {
       -                ret = -1;
       -        } else {
       -                xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
       -                ret = 0;
       -                walk_snap(check_cache_entry, &ret);
       +        xlseek(cfd, 0, SEEK_SET);
       +        SHA256_Init(&ctx);
       +        for (i = 0; i < nr_entries; i++) {
       +                struct cache_entry cache_entry;
       +
       +                read_cache_entry(cfd, &cache_entry);
       +                hash_cache_entry_update(&cache_entry, &ctx);
       +                add_cache_entry(cache, &cache_entry);
                }
       +        SHA256_Final(md, &ctx);
        
       -        if (ret != 0) {
       +        if (memcmp(snap_hdr.cache_md, md, sizeof(snap_hdr.cache_md)) != 0) {
                        if (verbose > 0)
                                fprintf(stderr, "Rebuilding cache\n");
       -
                        free_cache(cache);
                        cache = alloc_cache();
       -
                        if (ftruncate(cfd, 0) < 0)
                                err(1, "ftruncate");
       -
                        xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
                        xlseek(cfd, 0, SEEK_SET);
                        walk_snap(reload_cache, NULL);
                }
        }
        
       -static void
       -load_cache(void)
       -{
       -        uint64_t nr_entries;
       -        uint64_t i;
       -
       -        xlseek(cfd, 0, SEEK_SET);
       -        nr_entries = cache_nr_entries();
       -        if (nr_entries != snap_hdr.st.nr_blks) {
       -                if (verbose > 0)
       -                        fprintf(stderr, "Rebuilding cache\n");
       -                if (ftruncate(cfd, 0) < 0)
       -                        err(1, "ftruncate");
       -                xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
       -                walk_snap(reload_cache, NULL);
       -                return;
       -        }
       -
       -        for (i = 0; i < nr_entries; i++) {
       -                struct cache_entry cache_entry;
       -
       -                read_cache_entry(cfd, &cache_entry);
       -                add_cache_entry(cache, &cache_entry);
       -        }
       -}
       -
        static int
       -flush_cache(struct cache_entry *cache_entry)
       +flush_cache(struct cache_entry *cache_entry, void *arg)
        {
       +        hash_cache_entry_update(cache_entry, arg);
                write_cache_entry(cfd, cache_entry);
                return 0;
        }
       @@ -578,10 +521,12 @@ flush_cache(struct cache_entry *cache_entry)
        static void
        save_cache(void)
        {
       -        if (cache_dirty) {
       -                xlseek(cfd, 0, SEEK_SET);
       -                walk_cache(cache, flush_cache);
       -        }
       +        SHA256_CTX ctx;
       +
       +        SHA256_Init(&ctx);
       +        xlseek(cfd, 0, SEEK_SET);
       +        walk_cache(cache, flush_cache, &ctx);
       +        SHA256_Final(snap_hdr.cache_md, &ctx);
        }
        
        static void
       @@ -784,8 +729,6 @@ main(int argc, char *argv[])
                        if (ret != 0)
                                errx(1, "%s or %s is corrupted", SNAPSF, STOREF);
        
       -                check_cache();
       -
                        term();
                        return 0;
                }
   DIR diff --git a/dedup.h b/dedup.h
       @@ -6,7 +6,7 @@
         * using the helpers from types.c.  Any modification made to
         * the structs below will need to be reflected here and in types.c.
         */
       -#define SNAP_HDR_SIZE 104
       +#define SNAP_HDR_SIZE 136
        #define BLK_HDR_SIZE 16
        #define BLK_DESC_SIZE 48
        #define SNAPSHOT_SIZE 304
       @@ -16,7 +16,7 @@
        #define MDSIZE        32
        
        /* file format version */
       -#define VER_MIN        2
       +#define VER_MIN        3
        #define VER_MAJ        0
        
        #define VER_MIN_MASK        0xff
       @@ -43,6 +43,7 @@ struct snapshot_hdr {
                uint64_t flags;                /* bottom 16 bits are maj/min version */
                uint64_t size;                /* size of snapshots file */
                uint64_t nr_snapshots;
       +        uint8_t cache_md[MDSIZE];
                struct stats st;
        };
        
       @@ -82,7 +83,8 @@ struct cache *alloc_cache(void);
        void free_cache(struct cache *cache);
        void add_cache_entry(struct cache *cache, struct cache_entry *ent);
        int lookup_cache_entry(struct cache *cache, struct cache_entry *ent);
       -void walk_cache(struct cache *cache, int (*fn)(struct cache_entry *));
       +void walk_cache(struct cache *cache,
       +                int (*fn)(struct cache_entry *, void *), void *arg);
        
        /* chunker.c */
        struct chunker *alloc_chunker(int fd, size_t cap);
   DIR diff --git a/types.c b/types.c
       @@ -10,15 +10,18 @@ void
        read_snap_hdr(int fd, struct snapshot_hdr *hdr)
        {
                uint8_t buf[SNAP_HDR_SIZE];
       +        char fmt[BUFSIZ];
                int n;
        
                if (xread(fd, buf, sizeof(buf)) == 0)
                        errx(1, "read_snap_hdr: unexpected EOF");
        
       -        n = unpack(buf, "qqq",
       +        snprintf(fmt, sizeof(fmt), "qqq'%d", MDSIZE);
       +        n = unpack(buf, fmt,
                           &hdr->flags,
                           &hdr->size,
       -                   &hdr->nr_snapshots);
       +                   &hdr->nr_snapshots,
       +                   hdr->cache_md);
        
                n += unpack(&buf[n], "qqqqqq",
                            &hdr->st.orig_size,
       @@ -41,12 +44,15 @@ void
        write_snap_hdr(int fd, struct snapshot_hdr *hdr)
        {
                uint8_t buf[SNAP_HDR_SIZE];
       +        char fmt[BUFSIZ];
                int n;
        
       -        n = pack(buf, "qqq",
       +        snprintf(fmt, sizeof(fmt), "qqq'%d", MDSIZE);
       +        n = pack(buf, fmt,
                         hdr->flags,
                         hdr->size,
       -                 hdr->nr_snapshots);
       +                 hdr->nr_snapshots,
       +                 hdr->cache_md);
        
                n += pack(&buf[n], "qqqqqq",
                          hdr->st.orig_size,