Check buffer size in chunk_blk() - dedup - deduplicating backup program HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/ DIR Log DIR Files DIR Refs DIR Tags DIR README DIR LICENSE --- DIR commit 07c41115923df14d48ec16279ed14dcb0df598e1 DIR parent 32c20d64995844daaaed9c9a11afc03ae68c7753 HTML Author: z3bra <contactatz3bradotorg> Date: Sun, 17 Feb 2019 14:42:59 +0100 Check buffer size in chunk_blk() It also changes the for loop so we can chunk a block if the pattern matches right at the beginning of the block, thus making the smallest chunk size WINSIZ instead of WINSIZ + 1. Diffstat: M dedup.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) --- DIR diff --git a/dedup.c b/dedup.c @@ -140,6 +140,10 @@ chunk_blk(uint8_t *buf, size_t size) size_t i; uint32_t fp; + /* buzhash should be at least WINSIZ */ + if (size < WINSIZ) + return size; + /* * To achieve better deduplication, we chunk blocks based on a * recurring pattern occuring on the data stream. A fixed window @@ -150,8 +154,9 @@ chunk_blk(uint8_t *buf, size_t size) * WINSIZ the smallest possible block size. */ fp = buzh_init(buf, WINSIZ); - for (i = 1; i < size - WINSIZ; i++) { - fp = buzh_update(fp, buf[i - 1], buf[i + WINSIZ - 1], WINSIZ); + for (i = 0; i < size - WINSIZ; i++) { + if (i > 0) + fp = buzh_update(fp, buf[i - 1], buf[WINSIZ + i - 1], WINSIZ); if ((fp & HASHMSK) == 0) return i + WINSIZ; } @@ -387,10 +392,7 @@ dedup(int fd, char *msg) struct bdescr bdescr; size_t blksiz; - if (n > WINSIZ) - blksiz = chunk_blk(bp, n); - else - blksiz = n; + blksiz = chunk_blk(bp, n); memcpy(bdescr.md, md, sizeof(bdescr)); bdescr.offset = enthdr.store_size;