tdpack.c - dedup - data deduplication program
HTML git clone git://bitreich.org/dedup/ git://hg6vgqziawt5s4dj.onion/dedup/
DIR Log
DIR Files
DIR Refs
DIR Tags
DIR README
DIR LICENSE
---
tdpack.c (4115B)
---
1 #include <sys/types.h>
2 #include <sys/stat.h>
3 #include <sys/file.h>
4
5 #include <err.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <unistd.h>
12
13 #include "arg.h"
14 #include "blake2.h"
15 #include "dedup.h"
16
17 static struct snap_hdr snap_hdr;
18 static struct blk_hdr blk_hdr;
19 static struct icache *icache;
20 static int ifd;
21 static int sfd;
22 static int hash_algo = HASH_BLAKE2B;
23 static int compr_algo = COMPR_LZ4;
24
25 int verbose;
26 char *argv0;
27
28 static void
29 dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size)
30 {
31 uint8_t md[MD_SIZE];
32 struct blk_desc blk_desc;
33 struct compr_ctx ctx;
34 uint8_t *compr_buf;
35 size_t n, csize;
36
37 if (compr_init(&ctx, compr_algo) < 0)
38 errx(1, "compr_init failed");
39 csize = compr_size(&ctx, BLKSIZE_MAX);
40 compr_buf = alloc_buf(csize);
41
42 n = compr(&ctx, chunkp, compr_buf, chunk_size, csize);
43 hash_blk(compr_buf, n, md, hash_algo);
44
45 snap_hdr.st.orig_size += chunk_size;
46 snap_hdr.st.compr_size += n;
47
48 memcpy(blk_desc.md, md, sizeof(blk_desc.md));
49 if (lookup_icache(icache, &blk_desc) < 0) {
50 blk_desc.offset = blk_hdr.size;
51 blk_desc.size = n;
52
53 snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
54 append_blk(sfd, &blk_hdr, compr_buf, &blk_desc);
55
56 insert_icache(icache, &blk_desc);
57
58 snap_hdr.st.dedup_size += blk_desc.size;
59 snap_hdr.st.nr_blks++;
60
61 if (blk_desc.size > snap_hdr.st.max_blk_size)
62 snap_hdr.st.max_blk_size = blk_desc.size;
63 if (blk_desc.size < snap_hdr.st.min_blk_size)
64 snap_hdr.st.min_blk_size = blk_desc.size;
65 } else {
66 snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
67 }
68
69 free(compr_buf);
70 compr_final(&ctx);
71 }
72
73 static void
74 dedup(int fd, char *msg)
75 {
76 struct snap *snap;
77 struct chunker *chunker;
78
79 snap = alloc_snap();
80 chunker = alloc_chunker(fd, BLKSIZE_MIN, BLKSIZE_MAX,
81 HASHMASK_BITS, WINSIZE);
82
83 while (fill_chunker(chunker) > 0) {
84 uint8_t *chunkp;
85 size_t chunk_size;
86
87 chunkp = get_chunk(chunker, &chunk_size);
88 snap = grow_snap(snap, snap->nr_blk_descs + 1);
89 dedup_chunk(snap, chunkp, chunk_size);
90 drain_chunker(chunker);
91 }
92
93 if (snap->nr_blk_descs > 0) {
94 if (msg != NULL) {
95 size_t size;
96
97 size = strlen(msg) + 1;
98 if (size > sizeof(snap->msg))
99 size = sizeof(snap->msg);
100 memcpy(snap->msg, msg, size);
101 snap->msg[size - 1] = '\0';
102 }
103 hash_snap(snap, snap->md, hash_algo);
104 append_snap(ifd, &snap_hdr, snap);
105 }
106
107 free_chunker(chunker);
108 free_snap(snap);
109 }
110
111 static int
112 build_icache(struct snap *snap, void *arg)
113 {
114 struct compr_ctx ctx;
115 uint8_t *buf;
116 uint64_t i;
117
118 if (compr_init(&ctx, compr_algo) < 0)
119 errx(1, "compr_init failed");
120 buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
121 for (i = 0; i < snap->nr_blk_descs; i++) {
122 struct blk_desc *blk_desc;
123
124 blk_desc = &snap->blk_desc[i];
125 insert_icache(icache, blk_desc);
126 }
127 free(buf);
128 compr_final(&ctx);
129 return WALK_CONTINUE;
130 }
131
132 static void
133 init(void)
134 {
135 ifd = open(SNAPSF, O_RDWR, 0600);
136 if (ifd < 0)
137 err(1, "open %s", SNAPSF);
138
139 sfd = open(STOREF, O_RDWR, 0600);
140 if (sfd < 0)
141 err(1, "open %s", STOREF);
142
143 if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
144 flock(sfd, LOCK_NB | LOCK_EX) < 0)
145 err(1, "flock");
146
147
148 xlseek(ifd, 0, SEEK_SET);
149 load_snap_hdr(ifd, &snap_hdr);
150 xlseek(sfd, 0, SEEK_SET);
151 load_blk_hdr(sfd, &blk_hdr, &compr_algo, &hash_algo);
152
153 icache = alloc_icache();
154 walk_snap(ifd, &snap_hdr, build_icache, NULL);
155 }
156
157 static void
158 term(void)
159 {
160 xlseek(ifd, 0, SEEK_SET);
161 write_snap_hdr(ifd, &snap_hdr);
162 xlseek(sfd, 0, SEEK_SET);
163 write_blk_hdr(sfd, &blk_hdr);
164
165 fsync(ifd);
166 fsync(sfd);
167
168 close(ifd);
169 close(sfd);
170
171 free_icache(icache);
172 }
173
174 static void
175 usage(void)
176 {
177 fprintf(stderr, "usage: %s [-v] [-m message] [repo]\n", argv0);
178 exit(1);
179 }
180
181 int
182 main(int argc, char *argv[])
183 {
184 char *repo, *msg = NULL;
185
186 ARGBEGIN {
187 case 'm':
188 msg = EARGF(usage());
189 break;
190 case 'v':
191 verbose++;
192 break;
193 default:
194 usage();
195 } ARGEND
196
197 switch (argc) {
198 case 0:
199 repo = ".";
200 break;
201 case 1:
202 repo = argv[0];
203 break;
204 default:
205 usage();
206 };
207
208 if (chdir(repo) < 0)
209 err(1, "chdir: %s", repo);
210
211 init();
212 dedup(STDIN_FILENO, msg);
213 term();
214 return 0;
215 }