Tweaks - dedup - deduplicating backup program HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/ DIR Log DIR Files DIR Refs DIR Tags DIR README DIR LICENSE --- DIR commit 9a790f78b6623fd5340369f71e8649cefbe2d4ec DIR parent 22ec348a2fff0a5f80a3bdb4fc1bc0ffd4aec838 HTML Author: sin <sin@2f30.org> Date: Sun, 17 Feb 2019 00:14:18 +0000 Tweaks Diffstat: M dedup.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) --- DIR diff --git a/dedup.c b/dedup.c @@ -16,8 +16,9 @@ #define STOREF ".store" #define CACHEF ".cache" -#define BLKSIZ 65536 -#define WINSIZ 32 +#define BLKSIZ (8*1024*1024) +#define WINSIZ 4096 +#define HASHMSK ((1ul << 21) - 1) #define MDSIZ SHA256_DIGEST_LENGTH #define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y)))) @@ -134,14 +135,14 @@ chunk_blk(uint8_t *buf, size_t size) /* * Chunking blocks is decided using a rolling hash + binary pattern. * The buzhash algorithm is used to "fingerprint" a fixed size window. - * Once the lower 13 bits of this fingerprint are all zeros, + * Once the lower bits of this fingerprint are all zeros, * the block is chunked. * If the pattern can't be matched, then we return the buffer size. */ fp = buzh_init(buf, WINSIZ); for (i = 1; i < size - WINSIZ; i++) { fp = buzh_update(fp, buf[i - 1], buf[i + WINSIZ - 1], WINSIZ); - if ((fp & 0x00001fff) == 0) + if ((fp & HASHMSK) == 0) return i + WINSIZ; } return size;