Rework get_chunk_size() to return the size rather than the offset - dedup - deduplicating backup program HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/ DIR Log DIR Files DIR Refs DIR Tags DIR README DIR LICENSE --- DIR commit b1c7996c39784e81f73e4b1189f9b5786caad974 DIR parent 66a94e3ce6d7c193d13d0bad4b1b8f7ecd51f5d2 HTML Author: sin <sin@2f30.org> Date: Fri, 22 Feb 2019 19:32:12 +0000 Rework get_chunk_size() to return the size rather than the offset Diffstat: M chunker.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) --- DIR diff --git a/chunker.c b/chunker.c @@ -43,8 +43,6 @@ get_chunk_size(struct chunker *chunker) if (chunk_size < WINSIZE) return chunk_size; - bp = chunker->buf; - /* * To achieve better deduplication, we chunk blocks based on a * recurring pattern occuring on the data stream. A fixed window @@ -53,14 +51,15 @@ get_chunk_size(struct chunker *chunker) * When the rolling hash matches a given pattern the block is chunked * at the end of that window. */ + bp = &chunker->buf[chunker->rpos]; fp = buzh_init(bp, WINSIZE); - for (i = chunker->rpos; i < chunker->wpos - WINSIZE; i++) { - chunk_size = i + WINSIZE; + for (i = 0; i < chunk_size - WINSIZE; i++) { if (i > 0) - fp = buzh_update(fp, bp[i - 1], bp[chunk_size - 1], + fp = buzh_update(fp, bp[i - 1], bp[i + WINSIZE - 1], WINSIZE); - if (match_pattern(chunker, chunk_size, fp) == 1) - return chunk_size; + if (match_pattern(chunker, i + WINSIZE, fp) == 1) + return i + WINSIZE; + } return chunk_size; }