URI: 
       tdpack.c - dedup - data deduplication program
  HTML git clone git://bitreich.org/dedup/ git://hg6vgqziawt5s4dj.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
       tdpack.c (4115B)
       ---
            1 #include <sys/types.h>
            2 #include <sys/stat.h>
            3 #include <sys/file.h>
            4 
            5 #include <err.h>
            6 #include <fcntl.h>
            7 #include <stdio.h>
            8 #include <stdint.h>
            9 #include <stdlib.h>
           10 #include <string.h>
           11 #include <unistd.h>
           12 
           13 #include "arg.h"
           14 #include "blake2.h"
           15 #include "dedup.h"
           16 
           17 static struct snap_hdr snap_hdr;
           18 static struct blk_hdr blk_hdr;
           19 static struct icache *icache;
           20 static int ifd;
           21 static int sfd;
           22 static int hash_algo = HASH_BLAKE2B;
           23 static int compr_algo = COMPR_LZ4;
           24 
           25 int verbose;
           26 char *argv0;
           27 
           28 static void
           29 dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size)
           30 {
           31         uint8_t md[MD_SIZE];
           32         struct blk_desc blk_desc;
           33         struct compr_ctx ctx;
           34         uint8_t *compr_buf;
           35         size_t n, csize;
           36 
           37         if (compr_init(&ctx, compr_algo) < 0)
           38                 errx(1, "compr_init failed");
           39         csize = compr_size(&ctx, BLKSIZE_MAX);
           40         compr_buf = alloc_buf(csize);
           41 
           42         n = compr(&ctx, chunkp, compr_buf, chunk_size, csize);
           43         hash_blk(compr_buf, n, md, hash_algo);
           44 
           45         snap_hdr.st.orig_size += chunk_size;
           46         snap_hdr.st.compr_size += n;
           47 
           48         memcpy(blk_desc.md, md, sizeof(blk_desc.md));
           49         if (lookup_icache(icache, &blk_desc) < 0) {
           50                 blk_desc.offset = blk_hdr.size;
           51                 blk_desc.size = n;
           52 
           53                 snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
           54                 append_blk(sfd, &blk_hdr, compr_buf, &blk_desc);
           55 
           56                 insert_icache(icache, &blk_desc);
           57 
           58                 snap_hdr.st.dedup_size += blk_desc.size;
           59                 snap_hdr.st.nr_blks++;
           60 
           61                 if (blk_desc.size > snap_hdr.st.max_blk_size)
           62                         snap_hdr.st.max_blk_size = blk_desc.size;
           63                 if (blk_desc.size < snap_hdr.st.min_blk_size)
           64                         snap_hdr.st.min_blk_size = blk_desc.size;
           65         } else {
           66                 snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
           67         }
           68 
           69         free(compr_buf);
           70         compr_final(&ctx);
           71 }
           72 
           73 static void
           74 dedup(int fd, char *msg)
           75 {
           76         struct snap *snap;
           77         struct chunker *chunker;
           78 
           79         snap = alloc_snap();
           80         chunker = alloc_chunker(fd, BLKSIZE_MIN, BLKSIZE_MAX,
           81                                 HASHMASK_BITS, WINSIZE);
           82 
           83         while (fill_chunker(chunker) > 0) {
           84                 uint8_t *chunkp;
           85                 size_t chunk_size;
           86 
           87                 chunkp = get_chunk(chunker, &chunk_size);
           88                 snap = grow_snap(snap, snap->nr_blk_descs + 1);
           89                 dedup_chunk(snap, chunkp, chunk_size);
           90                 drain_chunker(chunker);
           91         }
           92 
           93         if (snap->nr_blk_descs > 0) {
           94                 if (msg != NULL) {
           95                         size_t size;
           96 
           97                         size = strlen(msg) + 1;
           98                         if (size > sizeof(snap->msg))
           99                                 size = sizeof(snap->msg);
          100                         memcpy(snap->msg, msg, size);
          101                         snap->msg[size - 1] = '\0';
          102                 }
          103                 hash_snap(snap, snap->md, hash_algo);
          104                 append_snap(ifd, &snap_hdr, snap);
          105         }
          106 
          107         free_chunker(chunker);
          108         free_snap(snap);
          109 }
          110 
          111 static int
          112 build_icache(struct snap *snap, void *arg)
          113 {
          114         struct compr_ctx ctx;
          115         uint8_t *buf;
          116         uint64_t i;
          117 
          118         if (compr_init(&ctx, compr_algo) < 0)
          119                 errx(1, "compr_init failed");
          120         buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
          121         for (i = 0; i < snap->nr_blk_descs; i++) {
          122                 struct blk_desc *blk_desc;
          123 
          124                 blk_desc = &snap->blk_desc[i];
          125                 insert_icache(icache, blk_desc);
          126         }
          127         free(buf);
          128         compr_final(&ctx);
          129         return WALK_CONTINUE;
          130 }
          131 
          132 static void
          133 init(void)
          134 {
          135         ifd = open(SNAPSF, O_RDWR, 0600);
          136         if (ifd < 0)
          137                 err(1, "open %s", SNAPSF);
          138 
          139         sfd = open(STOREF, O_RDWR, 0600);
          140         if (sfd < 0)
          141                 err(1, "open %s", STOREF);
          142 
          143         if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
          144             flock(sfd, LOCK_NB | LOCK_EX) < 0)
          145                 err(1, "flock");
          146 
          147 
          148         xlseek(ifd, 0, SEEK_SET);
          149         load_snap_hdr(ifd, &snap_hdr);
          150         xlseek(sfd, 0, SEEK_SET);
          151         load_blk_hdr(sfd, &blk_hdr, &compr_algo, &hash_algo);
          152 
          153         icache = alloc_icache();
          154         walk_snap(ifd, &snap_hdr, build_icache, NULL);
          155 }
          156 
          157 static void
          158 term(void)
          159 {
          160         xlseek(ifd, 0, SEEK_SET);
          161         write_snap_hdr(ifd, &snap_hdr);
          162         xlseek(sfd, 0, SEEK_SET);
          163         write_blk_hdr(sfd, &blk_hdr);
          164 
          165         fsync(ifd);
          166         fsync(sfd);
          167 
          168         close(ifd);
          169         close(sfd);
          170 
          171         free_icache(icache);
          172 }
          173 
          174 static void
          175 usage(void)
          176 {
          177         fprintf(stderr, "usage: %s [-v] [-m message] [repo]\n", argv0);
          178         exit(1);
          179 }
          180 
          181 int
          182 main(int argc, char *argv[])
          183 {
          184         char *repo, *msg = NULL;
          185 
          186         ARGBEGIN {
          187         case 'm':
          188                 msg = EARGF(usage());
          189                 break;
          190         case 'v':
          191                 verbose++;
          192                 break;
          193         default:
          194                 usage();
          195         } ARGEND
          196 
          197         switch (argc) {
          198         case 0:
          199                 repo = ".";
          200                 break;
          201         case 1:
          202                 repo = argv[0];
          203                 break;
          204         default:
          205                 usage();
          206         };
          207 
          208         if (chdir(repo) < 0)
          209                 err(1, "chdir: %s", repo);
          210 
          211         init();
          212         dedup(STDIN_FILENO, msg);
          213         term();
          214         return 0;
          215 }