tdpack.c - dedup - data deduplication program HTML git clone git://bitreich.org/dedup/ git://hg6vgqziawt5s4dj.onion/dedup/ DIR Log DIR Files DIR Refs DIR Tags DIR README DIR LICENSE --- tdpack.c (4115B) --- 1 #include <sys/types.h> 2 #include <sys/stat.h> 3 #include <sys/file.h> 4 5 #include <err.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <stdint.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #include <unistd.h> 12 13 #include "arg.h" 14 #include "blake2.h" 15 #include "dedup.h" 16 17 static struct snap_hdr snap_hdr; 18 static struct blk_hdr blk_hdr; 19 static struct icache *icache; 20 static int ifd; 21 static int sfd; 22 static int hash_algo = HASH_BLAKE2B; 23 static int compr_algo = COMPR_LZ4; 24 25 int verbose; 26 char *argv0; 27 28 static void 29 dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size) 30 { 31 uint8_t md[MD_SIZE]; 32 struct blk_desc blk_desc; 33 struct compr_ctx ctx; 34 uint8_t *compr_buf; 35 size_t n, csize; 36 37 if (compr_init(&ctx, compr_algo) < 0) 38 errx(1, "compr_init failed"); 39 csize = compr_size(&ctx, BLKSIZE_MAX); 40 compr_buf = alloc_buf(csize); 41 42 n = compr(&ctx, chunkp, compr_buf, chunk_size, csize); 43 hash_blk(compr_buf, n, md, hash_algo); 44 45 snap_hdr.st.orig_size += chunk_size; 46 snap_hdr.st.compr_size += n; 47 48 memcpy(blk_desc.md, md, sizeof(blk_desc.md)); 49 if (lookup_icache(icache, &blk_desc) < 0) { 50 blk_desc.offset = blk_hdr.size; 51 blk_desc.size = n; 52 53 snap->blk_desc[snap->nr_blk_descs++] = blk_desc; 54 append_blk(sfd, &blk_hdr, compr_buf, &blk_desc); 55 56 insert_icache(icache, &blk_desc); 57 58 snap_hdr.st.dedup_size += blk_desc.size; 59 snap_hdr.st.nr_blks++; 60 61 if (blk_desc.size > snap_hdr.st.max_blk_size) 62 snap_hdr.st.max_blk_size = blk_desc.size; 63 if (blk_desc.size < snap_hdr.st.min_blk_size) 64 snap_hdr.st.min_blk_size = blk_desc.size; 65 } else { 66 snap->blk_desc[snap->nr_blk_descs++] = blk_desc; 67 } 68 69 free(compr_buf); 70 compr_final(&ctx); 71 } 72 73 static void 74 dedup(int fd, char *msg) 75 { 76 struct snap *snap; 77 struct chunker *chunker; 78 79 snap = alloc_snap(); 80 chunker = alloc_chunker(fd, BLKSIZE_MIN, BLKSIZE_MAX, 81 HASHMASK_BITS, WINSIZE); 82 83 while (fill_chunker(chunker) > 0) { 84 uint8_t *chunkp; 85 size_t chunk_size; 86 87 chunkp = get_chunk(chunker, &chunk_size); 88 snap = grow_snap(snap, snap->nr_blk_descs + 1); 89 dedup_chunk(snap, chunkp, chunk_size); 90 drain_chunker(chunker); 91 } 92 93 if (snap->nr_blk_descs > 0) { 94 if (msg != NULL) { 95 size_t size; 96 97 size = strlen(msg) + 1; 98 if (size > sizeof(snap->msg)) 99 size = sizeof(snap->msg); 100 memcpy(snap->msg, msg, size); 101 snap->msg[size - 1] = '\0'; 102 } 103 hash_snap(snap, snap->md, hash_algo); 104 append_snap(ifd, &snap_hdr, snap); 105 } 106 107 free_chunker(chunker); 108 free_snap(snap); 109 } 110 111 static int 112 build_icache(struct snap *snap, void *arg) 113 { 114 struct compr_ctx ctx; 115 uint8_t *buf; 116 uint64_t i; 117 118 if (compr_init(&ctx, compr_algo) < 0) 119 errx(1, "compr_init failed"); 120 buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX)); 121 for (i = 0; i < snap->nr_blk_descs; i++) { 122 struct blk_desc *blk_desc; 123 124 blk_desc = &snap->blk_desc[i]; 125 insert_icache(icache, blk_desc); 126 } 127 free(buf); 128 compr_final(&ctx); 129 return WALK_CONTINUE; 130 } 131 132 static void 133 init(void) 134 { 135 ifd = open(SNAPSF, O_RDWR, 0600); 136 if (ifd < 0) 137 err(1, "open %s", SNAPSF); 138 139 sfd = open(STOREF, O_RDWR, 0600); 140 if (sfd < 0) 141 err(1, "open %s", STOREF); 142 143 if (flock(ifd, LOCK_NB | LOCK_EX) < 0 || 144 flock(sfd, LOCK_NB | LOCK_EX) < 0) 145 err(1, "flock"); 146 147 148 xlseek(ifd, 0, SEEK_SET); 149 load_snap_hdr(ifd, &snap_hdr); 150 xlseek(sfd, 0, SEEK_SET); 151 load_blk_hdr(sfd, &blk_hdr, &compr_algo, &hash_algo); 152 153 icache = alloc_icache(); 154 walk_snap(ifd, &snap_hdr, build_icache, NULL); 155 } 156 157 static void 158 term(void) 159 { 160 xlseek(ifd, 0, SEEK_SET); 161 write_snap_hdr(ifd, &snap_hdr); 162 xlseek(sfd, 0, SEEK_SET); 163 write_blk_hdr(sfd, &blk_hdr); 164 165 fsync(ifd); 166 fsync(sfd); 167 168 close(ifd); 169 close(sfd); 170 171 free_icache(icache); 172 } 173 174 static void 175 usage(void) 176 { 177 fprintf(stderr, "usage: %s [-v] [-m message] [repo]\n", argv0); 178 exit(1); 179 } 180 181 int 182 main(int argc, char *argv[]) 183 { 184 char *repo, *msg = NULL; 185 186 ARGBEGIN { 187 case 'm': 188 msg = EARGF(usage()); 189 break; 190 case 'v': 191 verbose++; 192 break; 193 default: 194 usage(); 195 } ARGEND 196 197 switch (argc) { 198 case 0: 199 repo = "."; 200 break; 201 case 1: 202 repo = argv[0]; 203 break; 204 default: 205 usage(); 206 }; 207 208 if (chdir(repo) < 0) 209 err(1, "chdir: %s", repo); 210 211 init(); 212 dedup(STDIN_FILENO, msg); 213 term(); 214 return 0; 215 }