URI: 
       Hook in rolling hash - dedup - deduplicating backup program
  HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
   DIR commit 9bee83eb425e6424e1cb6c9e5fbe6dced9d005ce
   DIR parent c0597760335a6dfcfeb37a023f6362567079ba7f
  HTML Author: sin <sin@2f30.org>
       Date:   Sat, 16 Feb 2019 23:49:36 +0000
       
       Hook in rolling hash
       
       Diffstat:
         M dedup.c                             |      69 ++++++++++++++++++-------------
       
       1 file changed, 41 insertions(+), 28 deletions(-)
       ---
   DIR diff --git a/dedup.c b/dedup.c
       @@ -16,8 +16,8 @@
        #define STOREF        ".store"
        #define CACHEF        ".cache"
        
       -#define BLKSIZ        65536
       -#define WINSIZ        4095
       +#define BLKSIZ        (65536*4)
       +#define WINSIZ        127
        #define MDSIZ        SHA256_DIGEST_LENGTH
        
        #define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
       @@ -140,7 +140,7 @@ chunk_blk(uint8_t *buf, size_t size)
                 */
                fp = buzh_init(buf, WINSIZ);
                for (i = 1; i < size - WINSIZ; i++) {
       -                fp = buzh_update(fp, buf[i - 1], buf[i + WINSIZ], WINSIZ);
       +                fp = buzh_update(fp, buf[i - 1], buf[i + WINSIZ - 1], WINSIZ);
                        if ((fp & 0x00001fff) == 0)
                                return i + WINSIZ;
                }
       @@ -366,10 +366,9 @@ lookup_blk(uint8_t *md, struct bdescr *bdescr)
        void
        dedup(int fd)
        {
       -        uint8_t md[MDSIZ];
                uint8_t *buf;
       -        SHA256_CTX ctx;
                struct ent *ent;
       +        SHA256_CTX ctx;
                ssize_t n;
        
                buf = alloc_buf(BLKSIZ);
       @@ -377,36 +376,50 @@ dedup(int fd)
        
                SHA256_Init(&ctx);
                while ((n = xread(fd, buf, BLKSIZ)) > 0) {
       -                struct bdescr bdescr;
       +                uint8_t *bp = buf;
        
       -                hash_blk(buf, n, md);
       +                while (n > 0) {
       +                        uint8_t md[MDSIZ];
       +                        struct bdescr bdescr;
       +                        size_t blksiz;
        
       -                /* Calculate file hash one block at a time */
       -                SHA256_Update(&ctx, buf, n);
       +                        if (n > WINSIZ)
       +                                blksiz = chunk_blk(bp, n);
       +                        else
       +                                blksiz = n;
        
       -                ent = grow_ent(ent, ent->nblks + 1);
       -                if (lookup_blk(md, &bdescr) < 0) {
       -                        struct bdescr bdescr;
       -                        struct cent *cent;
       +                        hash_blk(bp, blksiz, md);
        
       -                        /* Block not found, create new block descriptor */
       -                        memcpy(bdescr.md, md, sizeof(bdescr));
       -                        bdescr.offset = store_size();
       -                        bdescr.size = n;
       +                        /* Calculate file hash one block at a time */
       +                        SHA256_Update(&ctx, bp, blksiz);
        
       -                        /* Update index entry */
       -                        ent->bdescr[ent->nblks++] = bdescr;
       +                        ent = grow_ent(ent, ent->nblks + 1);
       +                        if (lookup_blk(md, &bdescr) < 0) {
       +                                struct bdescr bdescr;
       +                                struct cent *cent;
        
       -                        /* Store block */
       -                        append_blk(buf, n);
       +                                /* Block not found, create new block descriptor */
       +                                memcpy(bdescr.md, md, sizeof(bdescr));
       +                                bdescr.offset = store_size();
       +                                bdescr.size = blksiz;
        
       -                        /* Create a cache entry for this block */
       -                        cent = alloc_cent();
       -                        cent->bdescr = bdescr;
       -                        add_cent(cent);
       -                } else {
       -                        /* Found block with the same hash, update index entry */
       -                        ent->bdescr[ent->nblks++] = bdescr;
       +                                /* Update index entry */
       +                                ent->bdescr[ent->nblks++] = bdescr;
       +
       +                                /* Store block */
       +                                append_blk(bp, blksiz);
       +
       +                                /* Create a cache entry for this block */
       +                                cent = alloc_cent();
       +                                cent->bdescr = bdescr;
       +                                add_cent(cent);
       +                        } else {
       +                                /* Found block with the same hash, update index entry */
       +                                ent->bdescr[ent->nblks++] = bdescr;
       +                        }
       +
       +                        bp += blksiz;
       +                        n -= blksiz;
                        }
                }