URI: 
       Rework cache code - dedup - deduplicating backup program
  HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
   DIR commit 5ae463d1c2cb6c77d735d53ee7e00c3a00b70090
   DIR parent 4a9c691eb97725f224362ee65dc088f0260bc8b6
  HTML Author: sin <sin@2f30.org>
       Date:   Wed, 21 Mar 2018 13:45:58 +0000
       
       Rework cache code
       
       Diffstat:
         M dedup.c                             |      99 ++++++++++++++++++++-----------
       
       1 file changed, 64 insertions(+), 35 deletions(-)
       ---
   DIR diff --git a/dedup.c b/dedup.c
       @@ -35,17 +35,18 @@ struct blk {
                unsigned char data[BLKSIZ];
        } __attribute__((packed));
        
       -struct cent {
       +struct cache_data {
                unsigned char md[SHA256_DIGEST_LENGTH];
                uint64_t blkidx;
       -} __attribute__((packed));
       +};
        
       -struct hash_ent {
       -        struct cent cent;
       -        RB_ENTRY(hash_ent) e;
       +struct cache_ent {
       +        struct cache_data data;
       +        int dirty;
       +        RB_ENTRY(cache_ent) e;
        };
        
       -RB_HEAD(hash_tree, hash_ent) hash_tree_head;
       +RB_HEAD(cache, cache_ent) cache_head;
        struct enthdr enthdr;
        int ifd;
        int sfd;
       @@ -122,33 +123,53 @@ xwrite(int fd, const void *buf, size_t nbytes)
        }
        
        int
       -hash_ent_cmp(struct hash_ent *e1, struct hash_ent *e2)
       +cache_ent_cmp(struct cache_ent *e1, struct cache_ent *e2)
        {
                int r;
        
       -        r = memcmp(e1->cent.md, e2->cent.md, sizeof(e1->cent.md));
       +        r = memcmp(e1->data.md, e2->data.md, sizeof(e1->data.md));
                if (r > 0)
                        return 1;
                else if (r < 0)
                        return -1;
                return 0;
        }
       -RB_PROTOTYPE(hash_tree, hash_ent, e, hash_ent_cmp);
       -RB_GENERATE(hash_tree, hash_ent, e, hash_ent_cmp);
       +RB_PROTOTYPE(cache, cache_ent, e, cache_ent_cmp);
       +RB_GENERATE(cache, cache_ent, e, cache_ent_cmp);
        
       -struct hash_ent *
       -hash_ent_add(unsigned char *md, uint64_t blkidx)
       +struct cache_ent *
       +alloc_cache_ent(unsigned char *md, uint64_t blkidx)
        {
       -        struct hash_ent *hash_ent;
       +        struct cache_ent *ent;
        
       -        hash_ent = malloc(sizeof(*hash_ent));
       -        if (hash_ent == NULL)
       +        ent = calloc(1, sizeof(*ent));
       +        if (ent == NULL)
                        err(1, "malloc");
       +        memcpy(&ent->data.md, md, sizeof(ent->data.md));
       +        ent->data.blkidx = blkidx;
       +        return ent;
       +}
       +
       +void
       +add_cache_ent(struct cache_ent *ent)
       +{
       +        RB_INSERT(cache, &cache_head, ent);
       +}
        
       -        memcpy(&hash_ent->cent.md, md, sizeof(hash_ent->cent.md));
       -        hash_ent->cent.blkidx = blkidx;
       -        RB_INSERT(hash_tree, &hash_tree_head, hash_ent);
       -        return hash_ent;
       +void
       +flush_cache(void)
       +{
       +        struct cache_ent *ent;
       +
       +        if (verbose)
       +                fprintf(stderr, "flushing cache...\n");
       +        RB_FOREACH(ent, cache, &cache_head) {
       +                if (!ent->dirty)
       +                        continue;
       +                lseek(cfd, ent->data.blkidx * sizeof(ent->data), SEEK_SET);
       +                xwrite(cfd, &ent->data, sizeof(ent->data));
       +                ent->dirty = 0;
       +        }
        }
        
        void
       @@ -200,7 +221,7 @@ storefile_nblks(void)
        uint64_t
        cachefile_nblks(void)
        {
       -        return lseek(cfd, 0, SEEK_END) / sizeof(struct cent);
       +        return lseek(cfd, 0, SEEK_END) / sizeof(struct cache_data);
        }
        
        void
       @@ -231,12 +252,12 @@ append_blk(struct blk *blk)
        int
        lookup_blk(struct blk *blk, uint64_t *blkidx)
        {
       -        struct hash_ent *hash_ent, key;
       +        struct cache_ent *ent, key;
        
       -        memcpy(key.cent.md, blk->md, sizeof(key.cent.md));
       -        hash_ent = RB_FIND(hash_tree, &hash_tree_head, &key);
       -        if (hash_ent != NULL) {
       -                *blkidx = hash_ent->cent.blkidx;
       +        memcpy(key.data.md, blk->md, sizeof(key.data.md));
       +        ent = RB_FIND(cache, &cache_head, &key);
       +        if (ent != NULL) {
       +                *blkidx = ent->data.blkidx;
                        return 0;
                }
                return -1;
       @@ -264,13 +285,15 @@ dedup(int fd)
                        ent = grow_ent(ent, ent->nblks + 1);
        
                        if (lookup_blk(&blk, &blkidx) == -1) {
       -                        struct hash_ent *hash_ent;
       +                        struct cache_ent *cache_ent;
                                uint64_t nblks = storefile_nblks();
        
       +                        /* Create a cache entry for this block */
       +                        cache_ent = alloc_cache_ent(blk.md, nblks);
       +                        add_cache_ent(cache_ent);
       +                        cache_ent->dirty = 1;
       +
                                ent->blks[ent->nblks++] = nblks;
       -                        hash_ent = hash_ent_add(blk.md, nblks);
       -                        lseek(cfd, 0, SEEK_END);
       -                        xwrite(cfd, &hash_ent->cent, sizeof(hash_ent->cent));
                                append_blk(&blk);
                        } else {
                                ent->blks[ent->nblks++] = blkidx;
       @@ -281,6 +304,7 @@ dedup(int fd)
                SHA256_Final(ent->md, &ctx);
                append_ent(ent);
                free(ent);
       +        flush_cache();
        }
        
        void
       @@ -339,13 +363,15 @@ rebuild_cache(void)
                nblks = storefile_nblks();
                lseek(cfd, 0, SEEK_SET);
                for (i = 0; i < nblks; i++) {
       -                struct hash_ent *hash_ent;
       +                struct cache_ent *ent;
                        struct blk blk;
        
                        read_blk(&blk, i);
       -                hash_ent = hash_ent_add(blk.md, i);
       -                xwrite(cfd, &hash_ent->cent, sizeof(hash_ent->cent));
       +                ent = alloc_cache_ent(blk.md, i);
       +                add_cache_ent(ent);
       +                ent->dirty = 1;
                }
       +        flush_cache();
        }
        
        void
       @@ -359,11 +385,13 @@ init_cache(void)
                nblks = cachefile_nblks();
                lseek(cfd, 0, SEEK_SET);
                for (i = 0; i < nblks; i++) {
       -                struct cent cent;
       +                struct blk blk;
       +                struct cache_ent *ent;
        
       -                if (xread(cfd, &cent, sizeof(cent)) == 0)
       +                ent = alloc_cache_ent(blk.md, i);
       +                if (xread(cfd, &ent->data, sizeof(ent->data)) == 0)
                                errx(1, "unexpected EOF");
       -                hash_ent_add(cent.md, cent.blkidx);
       +                add_cache_ent(ent);
                }
        }
        
       @@ -401,6 +429,7 @@ term(void)
                fsync(ifd);
                fsync(sfd);
                fsync(cfd);
       +
                close(ifd);
                close(sfd);
                close(cfd);