URI: 
       Store params in chunker struct - dedup - deduplicating backup program
  HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
   DIR commit a41b0b187818a9659dda1fee972b8a9946889020
   DIR parent 3d32364aa8deb41341b21c25b726239c6e9c295c
  HTML Author: sin <sin@2f30.org>
       Date:   Fri,  8 Mar 2019 11:35:36 +0000
       
       Store params in chunker struct
       
       Diffstat:
         M chunker.c                           |      54 +++++++++++++++++++------------
       
       1 file changed, 33 insertions(+), 21 deletions(-)
       ---
   DIR diff --git a/chunker.c b/chunker.c
       @@ -15,6 +15,10 @@ struct chunker {
                size_t cap;
                size_t rpos;
                size_t wpos;
       +        size_t min_size;
       +        size_t max_size;
       +        size_t mask;
       +        size_t win_size;
        };
        
        /*
       @@ -65,40 +69,41 @@ static uint32_t buz[] = {
        static inline uint32_t
        buzh_init(uint8_t *buf, size_t size)
        {
       -        uint32_t fp;
       +        uint32_t sum;
                size_t i;
        
       -        for (i = 1, fp = 0; i < size; i++, buf++)
       -                fp ^= ROTL(buz[*buf], (size - i) % 32);
       +        for (i = 1, sum = 0; i < size; i++, buf++)
       +                sum ^= ROTL(buz[*buf], (size - i) % 32);
        
       -        return fp ^ buz[*buf];
       +        return sum ^ buz[*buf];
        }
        
        static inline uint32_t
       -buzh_update(uint32_t fp, uint8_t out, uint8_t in, size_t size)
       +buzh_update(uint32_t sum, uint8_t out, uint8_t in, size_t size)
        {
       -        return ROTL(fp, 1) ^ ROTL(buz[out], size % 32) ^ buz[in];
       +        return ROTL(sum, 1) ^ ROTL(buz[out], size % 32) ^ buz[in];
        }
        
        static inline int
       -match_pattern(size_t chunk_size, uint32_t fp)
       +match_pattern(struct chunker *chunker, size_t chunk_size, uint32_t sum)
        {
       -        if (chunk_size >= BLKSIZE_MAX)
       +        if (chunk_size >= chunker->max_size)
                        return 1;
       -        if (chunk_size < BLKSIZE_MIN)
       +        if (chunk_size < chunker->min_size)
                        return 0;
       -        return (fp & HASHMASK_BITS) == 0;
       +        return (sum & chunker->mask) == 0;
        }
        
        static size_t
        get_chunk_size(struct chunker *chunker)
        {
       +        size_t chunk_size, win_size, i;
       +        uint32_t sum;
                uint8_t *bp;
       -        uint32_t fp;
       -        size_t i, chunk_size;
        
                chunk_size = chunker->wpos - chunker->rpos;
       -        if (chunk_size < WINSIZE)
       +        win_size = chunker->win_size;
       +        if (chunk_size < win_size)
                        return chunk_size;
        
                /*
       @@ -110,14 +115,17 @@ get_chunk_size(struct chunker *chunker)
                 * at the end of that window.
                 */
                bp = &chunker->buf[chunker->rpos];
       -        fp = buzh_init(bp, WINSIZE);
       -        for (i = 0; i < chunk_size - WINSIZE; i++) {
       -                if (i > 0)
       -                        fp = buzh_update(fp, bp[i - 1], bp[i + WINSIZE - 1],
       -                                         WINSIZE);
       -                if (match_pattern(i + WINSIZE, fp))
       -                        return i + WINSIZE;
       -                        
       +        sum = buzh_init(bp, win_size);
       +        for (i = 0; i < chunk_size - win_size; i++) {
       +                if (i > 0) {
       +                        uint8_t out = bp[i - 1];
       +                        uint8_t in = bp[i + win_size - 1];
       +
       +                        sum = buzh_update(sum, out, in, win_size);
       +                }
       +
       +                if (match_pattern(chunker, i + win_size, sum))
       +                        return i + win_size;
                }
                return chunk_size;
        }
       @@ -136,6 +144,10 @@ alloc_chunker(int fd, size_t cap)
                        err(1, "calloc");
                chunker->fd = fd;
                chunker->cap = cap;
       +        chunker->min_size = BLKSIZE_MIN;
       +        chunker->max_size = BLKSIZE_MAX;
       +        chunker->mask = HASHMASK_BITS;
       +        chunker->win_size = WINSIZE;
        
                return chunker;
        }