Increase dedup throughput by a factor of 2 - dedup - deduplicating backup program
  HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR commit 7f8b5b3e7b72b0d64437a87c6b412743f2ab6187
   DIR parent ba61c65bb274657b7ea643de789db2a24ea836f8
  HTML Author: sin <sin@2f30.org>
       Date:   Sun, 10 Mar 2019 09:36:05 +0000
       Increase dedup throughput by a factor of 2
       Calculating the hash of the entire snapshot inside the loop slows the
       process down by 2x.  This is because we hash the block twice.  We hash
       first the raw uncompressed stream (which will become the snapshot
       hash) and then we hash the compressed block which is stored in the
       block descriptor.
       Change the calcuation so we only hash the compressed block inside
       dedup_chunk().  The hash of the snapshot is the hash of its block
         M dedup.c                             |      24 ++++++++++++++++++------
       1 file changed, 18 insertions(+), 6 deletions(-)
   DIR diff --git a/dedup.c b/dedup.c
       @@ -229,27 +229,39 @@ dedup(int fd, char *msg)
                struct snapshot *snap;
                struct chunker *chunker;
       -        SHA256_CTX ctx;
       -        ssize_t n;
                snap = alloc_snap();
                chunker = alloc_chunker(fd, BLKSIZE_MIN, BLKSIZE_MAX,
                                        HASHMASK_BITS, WINSIZE);
       -        SHA256_Init(&ctx);
       -        while ((n = fill_chunker(chunker)) > 0) {
       +        while (fill_chunker(chunker) > 0) {
                        uint8_t *chunkp;
                        size_t chunk_size;
                        chunkp = get_chunk(chunker, &chunk_size);
       -                SHA256_Update(&ctx, chunkp, chunk_size);
                        snap = grow_snap(snap, snap->nr_blk_descs + 1);
                        dedup_chunk(snap, chunkp, chunk_size);
       -        SHA256_Final(snap->md, &ctx);
                if (snap->nr_blk_descs > 0) {
       +                SHA256_CTX ctx;
       +                uint64_t i;
       +                /*
       +                 * The snapshot hash is calculated over the
       +                 * hash of its block descriptors.
       +                 */
       +                SHA256_Init(&ctx);
       +                for (i = 0; i < snap->nr_blk_descs; i++) {
       +                        struct blk_desc *blk_desc;
       +                        blk_desc = &snap->blk_desc[i];
       +                        SHA256_Update(&ctx, blk_desc->md,
       +                                      sizeof(blk_desc->md));
       +                }
       +                SHA256_Final(snap->md, &ctx);
                        if (msg != NULL) {
                                size_t size;