URI: 
       Move rbtree cache to cache.c - dedup - deduplicating backup program
  HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
   DIR commit 9adb9c6c1a308e2845d822987948afbf12fc943b
   DIR parent 19420eb96fb72d9d5f7940678d1b957aa26f1fce
  HTML Author: sin <sin@2f30.org>
       Date:   Fri, 22 Feb 2019 23:24:03 +0000
       
       Move rbtree cache to cache.c
       
       Diffstat:
         M Makefile                            |       5 +++--
         A cache.c                             |      94 +++++++++++++++++++++++++++++++
         M dedup.c                             |     222 +++++++++----------------------
         M dedup.h                             |       7 +++++++
         A types.h                             |      44 +++++++++++++++++++++++++++++++
       
       5 files changed, 210 insertions(+), 162 deletions(-)
       ---
   DIR diff --git a/Makefile b/Makefile
       @@ -2,8 +2,8 @@ VERSION = 0.4
        PREFIX = /usr/local
        MANPREFIX = $(PREFIX)/man
        BIN = dedup
       -SRC = $(BIN).c chunker.c hash.c pack.c unpack.c utils.c
       -OBJ = $(BIN).o chunker.o hash.o pack.o unpack.o utils.o
       +SRC = $(BIN).c cache.c chunker.c hash.c pack.c unpack.c utils.c
       +OBJ = $(BIN).o cache.o chunker.o hash.o pack.o unpack.o utils.o
        DISTFILES = \
                $(SRC) \
                LICENSE \
       @@ -14,6 +14,7 @@ DISTFILES = \
                $(BIN).1 \
                dedup.h \
                tree.h \
       +        types.h \
        
        CFLAGS = -g -Wall
        CPPFLAGS = -I/usr/local/include -D_FILE_OFFSET_BITS=64
   DIR diff --git a/cache.c b/cache.c
       @@ -0,0 +1,94 @@
       +#include <sys/types.h>
       +#include <sys/stat.h>
       +
       +#include <err.h>
       +#include <stdint.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <unistd.h>
       +
       +#include "dedup.h"
       +#include "tree.h"
       +
       +struct cache_node {
       +        struct cache_entry ent;
       +        RB_ENTRY(cache_node) e;
       +};
       +
       +static RB_HEAD(cache, cache_node) cache_head;
       +
       +static int
       +cache_node_cmp(struct cache_node *e1, struct cache_node *e2)
       +{
       +        int r;
       +
       +         r = memcmp(e1->ent.md, e2->ent.md, sizeof(e1->ent.md));
       +        if (r > 0)
       +                return 1;
       +        else if (r < 0)
       +                return -1;
       +        return 0;
       +}
       +static RB_PROTOTYPE(cache, cache_node, e, cache_node_cmp);
       +static RB_GENERATE(cache, cache_node, e, cache_node_cmp);
       +
       +static struct cache_node *
       +alloc_cache_node(struct cache_entry *ent)
       +{
       +        struct cache_node *node;
       +
       +        node = calloc(1, sizeof(*node));
       +        if (node == NULL)
       +                err(1, "calloc");
       +        node->ent = *ent;
       +        return node;
       +}
       +
       +static void
       +free_cache_node(struct cache_node *node)
       +{
       +        free(node);
       +}
       +
       +void
       +add_cache_entry(struct cache_entry *ent)
       +{
       +        struct cache_node *node;
       +
       +        node = alloc_cache_node(ent);
       +        RB_INSERT(cache, &cache_head, node);
       +}
       +
       +int
       +lookup_cache_entry(struct cache_entry *ent)
       +{
       +        struct cache_node *node, key;
       +
       +        key.ent = *ent;
       +        node = RB_FIND(cache, &cache_head, &key);
       +        if (node != NULL) {
       +                *ent = node->ent;
       +                return 0;
       +        }
       +        return -1;
       +}
       +
       +void
       +walk_cache(int (*fn)(struct cache_entry *))
       +{
       +        struct cache_node *node;
       +
       +        RB_FOREACH(node, cache, &cache_head)
       +                (*fn)(&node->ent);
       +}
       +
       +void
       +free_cache(void)
       +{
       +        struct cache_node *node, *tmp;
       +
       +        RB_FOREACH_SAFE(node, cache, &cache_head, tmp) {
       +                RB_REMOVE(cache, &cache_head, node);
       +                free_cache_node(node);
       +        }
       +}
   DIR diff --git a/dedup.c b/dedup.c
       @@ -1,3 +1,4 @@
       +#include <sys/types.h>
        #include <sys/stat.h>
        #include <sys/file.h>
        
       @@ -14,67 +15,21 @@
        
        #include "arg.h"
        #include "dedup.h"
       -#include "tree.h"
        
        #define SNAPSF ".snapshots"
        #define STOREF ".store"
        #define CACHEF ".cache"
        
       -#define MSGSIZE 256
       -#define MDSIZE SHA256_DIGEST_LENGTH
       -
       -/* file format version */
       -#define VER_MIN 1
       -#define VER_MAJ 0
       -
        enum {
                WALK_CONTINUE,
                WALK_STOP
        };
        
       -struct stats {
       -        uint64_t orig_size;
       -        uint64_t comp_size;
       -        uint64_t dedup_size;
       -        uint64_t min_blk_size;
       -        uint64_t max_blk_size;
       -        uint64_t nr_blks;
       -        uint64_t reserved[6];
       -};
       -
       -struct snapshot_hdr {
       -        uint64_t flags;
       -        uint64_t nr_snapshots;
       -        uint64_t store_size;
       -        uint64_t reserved[4];
       -        struct stats st;
       -};
       -
       -struct blk_desc {
       -        uint8_t md[MDSIZE];
       -        uint64_t offset;
       -        uint64_t size;
       -};
       -
       -struct snapshot {
       -        uint64_t size;
       -        uint8_t msg[MSGSIZE];
       -        uint8_t md[MDSIZE];        /* hash of file */
       -        uint64_t nr_blk_descs;
       -        struct blk_desc blk_desc[];
       -};
       -
       -struct cache_entry {
       -        struct blk_desc blk_desc;
       -        RB_ENTRY(cache_entry) e;
       -};
       -
        struct extract_args {
                uint8_t *md;
                int fd;
        };
        
       -static RB_HEAD(cache, cache_entry) cache_head;
        static struct snapshot_hdr snaphdr;
        static int ifd;
        static int sfd;
       @@ -144,78 +99,6 @@ print_stats(struct stats *st)
                fprintf(stderr, "cache misses: %llu\n", cache_misses);
        }
        
       -static int
       -cache_entry_cmp(struct cache_entry *e1, struct cache_entry *e2)
       -{
       -        int r;
       -
       -        r = memcmp(e1->blk_desc.md, e2->blk_desc.md, sizeof(e1->blk_desc.md));
       -        if (r > 0)
       -                return 1;
       -        else if (r < 0)
       -                return -1;
       -        return 0;
       -}
       -static RB_PROTOTYPE(cache, cache_entry, e, cache_entry_cmp);
       -static RB_GENERATE(cache, cache_entry, e, cache_entry_cmp);
       -
       -static struct cache_entry *
       -alloc_cache_entry(void)
       -{
       -        struct cache_entry *ent;
       -
       -        ent = calloc(1, sizeof(*ent));
       -        if (ent == NULL)
       -                err(1, "calloc");
       -        return ent;
       -}
       -
       -static void
       -free_cache_entry(struct cache_entry *ent)
       -{
       -        free(ent);
       -}
       -
       -static void
       -add_cache_entry(struct cache_entry *ent)
       -{
       -        RB_INSERT(cache, &cache_head, ent);
       -}
       -
       -static void
       -flush_cache(void)
       -{
       -        struct cache_entry *ent;
       -
       -        if (!cache_dirty)
       -                return;
       -
       -        xlseek(cfd, 0, SEEK_SET);
       -        RB_FOREACH(ent, cache, &cache_head)
       -                xwrite(cfd, &ent->blk_desc, sizeof(ent->blk_desc));
       -}
       -
       -static void
       -free_cache(void)
       -{
       -        struct cache_entry *ent, *tmp;
       -
       -        RB_FOREACH_SAFE(ent, cache, &cache_head, tmp) {
       -                RB_REMOVE(cache, &cache_head, ent);
       -                free_cache_entry(ent);
       -        }
       -}
       -
       -static uint64_t
       -cache_nr_entries(void)
       -{
       -        struct stat sb;
       -
       -        if (fstat(cfd, &sb) < 0)
       -                err(1, "fstat");
       -        return sb.st_size / sizeof(struct blk_desc);
       -}
       -
        static void
        append_snap(struct snapshot *snap)
        {
       @@ -304,26 +187,12 @@ append_blk(uint8_t *buf, struct blk_desc *blk_desc)
                snaphdr.store_size += blk_desc->size;
        }
        
       -static int
       -lookup_blk_desc(uint8_t *md, struct blk_desc *blk_desc)
       -{
       -        struct cache_entry *ent, key;
       -
       -        memcpy(key.blk_desc.md, md, sizeof(key.blk_desc.md));
       -        ent = RB_FIND(cache, &cache_head, &key);
       -        if (ent != NULL) {
       -                *blk_desc = ent->blk_desc;
       -                return 0;
       -        }
       -        return -1;
       -}
       -
        static void
        dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size)
        {
                uint8_t md[MDSIZE];
       +        struct cache_entry cache_entry;
                uint8_t *comp_buf;
       -        struct blk_desc blk_desc;
                size_t n;
        
                comp_buf = alloc_buf(comp_size(BLKSIZE_MAX));
       @@ -334,21 +203,21 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size)
                snaphdr.st.orig_size += chunk_size;
                snaphdr.st.comp_size += n;
        
       -        if (lookup_blk_desc(md, &blk_desc) < 0) {
       -                struct cache_entry *ent;
       +        memcpy(cache_entry.md, md, sizeof(cache_entry.md));
       +        if (lookup_cache_entry(&cache_entry) < 0) {
       +                struct blk_desc blk_desc;
        
       -                memcpy(blk_desc.md, md, sizeof(blk_desc.md));
       +                memcpy(&blk_desc.md, md, sizeof(blk_desc.md));
                        blk_desc.offset = snaphdr.store_size;
                        blk_desc.size = n;
        
                        snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
       -
                        append_blk(comp_buf, &blk_desc);
        
       -                ent = alloc_cache_entry();
       -                ent->blk_desc = blk_desc;
       -                add_cache_entry(ent);
       +                cache_entry.offset = blk_desc.offset;
       +                cache_entry.size = blk_desc.size;
                        cache_dirty = 1;
       +                add_cache_entry(&cache_entry);
                        cache_misses++;
        
                        snaphdr.st.dedup_size += blk_desc.size;
       @@ -359,6 +228,11 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size)
                        if (blk_desc.size < snaphdr.st.min_blk_size)
                                snaphdr.st.min_blk_size = blk_desc.size;
                } else {
       +                struct blk_desc blk_desc;
       +
       +                memcpy(&blk_desc.md, cache_entry.md, sizeof(blk_desc.md));
       +                blk_desc.offset = cache_entry.offset;
       +                blk_desc.size = cache_entry.size;
                        snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
                        cache_hits++;
                }
       @@ -494,19 +368,21 @@ rebuild_cache(struct snapshot *snap, void *arg)
        
                buf = alloc_buf(comp_size(BLKSIZE_MAX));
                for (i = 0; i < snap->nr_blk_descs; i++) {
       -                struct cache_entry *ent;
       +                struct cache_entry cache_entry;
       +                struct blk_desc *blk_desc;
        
       -                read_blk(buf, &snap->blk_desc[i]);
       +                blk_desc = &snap->blk_desc[i];
       +                read_blk(buf, blk_desc);
        
                        SHA256_Init(&ctx);
       -                SHA256_Update(&ctx, buf, snap->blk_desc[i].size);
       +                SHA256_Update(&ctx, buf, blk_desc->size);
                        SHA256_Final(md, &ctx);
        
       -                ent = alloc_cache_entry();
       -                memcpy(ent->blk_desc.md, md, sizeof(ent->blk_desc.md));
       -                ent->blk_desc = snap->blk_desc[i];
       -                add_cache_entry(ent);
       +                memcpy(cache_entry.md, blk_desc->md, sizeof(cache_entry.md));
       +                cache_entry.offset = blk_desc->offset;
       +                cache_entry.size = blk_desc->size;
                        cache_dirty = 1;
       +                add_cache_entry(&cache_entry);
                }
                free(buf);
                return WALK_CONTINUE;
       @@ -514,7 +390,7 @@ rebuild_cache(struct snapshot *snap, void *arg)
        
        /* Walk through all snapshots and call fn() on each one */
        static void
       -walk(int (*fn)(struct snapshot *, void *), void *arg)
       +walk_snap(int (*fn)(struct snapshot *, void *), void *arg)
        {
                uint64_t i;
        
       @@ -539,19 +415,37 @@ walk(int (*fn)(struct snapshot *, void *), void *arg)
                }
        }
        
       +static int
       +flush_cache(struct cache_entry *cache_entry)
       +{
       +        xwrite(cfd, cache_entry, sizeof(*cache_entry));
       +        return 0;
       +}
       +
       +static uint64_t
       +cache_nr_entries(void)
       +{
       +        struct stat sb;
       +
       +        if (fstat(cfd, &sb) < 0)
       +                err(1, "fstat");
       +        return sb.st_size / sizeof(struct cache_entry);
       +}
       +
        static void
       -init_cache(void)
       +load_cache(void)
        {
       +        uint64_t nr_entries;
                uint64_t i;
        
                xlseek(cfd, 0, SEEK_SET);
       -        for (i = 0; i < cache_nr_entries(); i++) {
       -                struct cache_entry *ent;
       +        nr_entries = cache_nr_entries();
       +        for (i = 0; i < nr_entries; i++) {
       +                struct cache_entry cache_entry;
        
       -                ent = alloc_cache_entry();
       -                if (xread(cfd, &ent->blk_desc, sizeof(ent->blk_desc)) == 0)
       +                if (xread(cfd, &cache_entry, sizeof(cache_entry)) == 0)
                                errx(1, "read: unexpected EOF");
       -                add_cache_entry(ent);
       +                add_cache_entry(&cache_entry);
                }
        }
        
       @@ -596,9 +490,9 @@ init(void)
                }
        
                if (cache_nr_entries() != 0)
       -                init_cache();
       +                load_cache();
                else
       -                walk(rebuild_cache, NULL);
       +                walk_snap(rebuild_cache, NULL);
        }
        
        static void
       @@ -606,7 +500,11 @@ term(void)
        {
                if (verbose)
                        print_stats(&snaphdr.st);
       -        flush_cache();
       +
       +        if (cache_dirty) {
       +                xlseek(cfd, 0, SEEK_SET);
       +                walk_cache(flush_cache);
       +        }
                free_cache();
        
                fsync(ifd);
       @@ -683,20 +581,24 @@ main(int argc, char *argv[])
                init();
        
                if (cflag) {
       -                walk(check, NULL);
       +                walk_snap(check, NULL);
                        term();
                        return 0;
                }
        
                if (lflag) {
       -                walk(list, NULL);
       +                walk_snap(list, NULL);
                        term();
                        return 0;
                }
        
                if (id) {
       +                struct extract_args args;
       +
                        str2bin(id, md);
       -                walk(extract, &(struct extract_args){ .md = md, .fd = fd });
       +                args.md = md;
       +                args.fd = fd;
       +                walk_snap(extract, &args);
                } else {
                        dedup(fd, msg);
                }
   DIR diff --git a/dedup.h b/dedup.h
       @@ -1,7 +1,14 @@
        #include "config.h"
       +#include "types.h"
        
        struct chunker;
        
       +/* cache.c */
       +void add_cache_entry(struct cache_entry *ent);
       +int lookup_cache_entry(struct cache_entry *ent);
       +void walk_cache(int (*fn)(struct cache_entry *));
       +void free_cache(void);
       +
        /* chunker.c */
        struct chunker *alloc_chunker(size_t cap, int fd);
        void free_chunker(struct chunker *chunker);
   DIR diff --git a/types.h b/types.h
       @@ -0,0 +1,44 @@
       +#define MSGSIZE 256
       +#define MDSIZE 32
       +
       +/* snashot file format version */
       +#define VER_MIN 1
       +#define VER_MAJ 0
       +
       +struct stats {
       +        uint64_t orig_size;
       +        uint64_t comp_size;
       +        uint64_t dedup_size;
       +        uint64_t min_blk_size;
       +        uint64_t max_blk_size;
       +        uint64_t nr_blks;
       +        uint64_t reserved[6];
       +};
       +
       +struct snapshot_hdr {
       +        uint64_t flags;
       +        uint64_t nr_snapshots;
       +        uint64_t store_size;
       +        uint64_t reserved[4];
       +        struct stats st;
       +};
       +
       +struct blk_desc {
       +        uint8_t md[MDSIZE];
       +        uint64_t offset;
       +        uint64_t size;
       +};
       +
       +struct snapshot {
       +        uint64_t size;
       +        uint8_t msg[MSGSIZE];
       +        uint8_t md[MDSIZE];        /* hash of snapshot */
       +        uint64_t nr_blk_descs;
       +        struct blk_desc blk_desc[];
       +};
       +
       +struct cache_entry {
       +        uint8_t md[MDSIZE];
       +        uint64_t offset;
       +        uint64_t size;
       +};