URI: 
       Initial commit - dedup - deduplicating backup program
  HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
   DIR commit 72a5c2c2269c959061bfb34a05f29875653f3e92
  HTML Author: sin <sin@2f30.org>
       Date:   Tue, 20 Mar 2018 16:02:29 +0000
       
       Initial commit
       
       Diffstat:
         A LICENSE                             |      13 +++++++++++++
         A arg.h                               |      65 +++++++++++++++++++++++++++++++
         A dedup.c                             |     320 +++++++++++++++++++++++++++++++
       
       3 files changed, 398 insertions(+), 0 deletions(-)
       ---
   DIR diff --git a/LICENSE b/LICENSE
       @@ -0,0 +1,13 @@
       +© 2018 Dimitris Papastamos <sin@2f30.org>
       +
       +Permission to use, copy, modify, and distribute this software for any
       +purpose with or without fee is hereby granted, provided that the above
       +copyright notice and this permission notice appear in all copies.
       +
       +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   DIR diff --git a/arg.h b/arg.h
       @@ -0,0 +1,65 @@
       +/*
       + * Copy me if you can.
       + * by 20h
       + */
       +
       +#ifndef ARG_H__
       +#define ARG_H__
       +
       +extern char *argv0;
       +
       +/* use main(int argc, char *argv[]) */
       +#define ARGBEGIN        for (argv0 = *argv, argv++, argc--;\
       +                                        argv[0] && argv[0][0] == '-'\
       +                                        && argv[0][1];\
       +                                        argc--, argv++) {\
       +                                char argc_;\
       +                                char **argv_;\
       +                                int brk_;\
       +                                if (argv[0][1] == '-' && argv[0][2] == '\0') {\
       +                                        argv++;\
       +                                        argc--;\
       +                                        break;\
       +                                }\
       +                                for (brk_ = 0, argv[0]++, argv_ = argv;\
       +                                                argv[0][0] && !brk_;\
       +                                                argv[0]++) {\
       +                                        if (argv_ != argv)\
       +                                                break;\
       +                                        argc_ = argv[0][0];\
       +                                        switch (argc_)
       +
       +/* Handles obsolete -NUM syntax */
       +#define ARGNUM                                case '0':\
       +                                        case '1':\
       +                                        case '2':\
       +                                        case '3':\
       +                                        case '4':\
       +                                        case '5':\
       +                                        case '6':\
       +                                        case '7':\
       +                                        case '8':\
       +                                        case '9'
       +
       +#define ARGEND                        }\
       +                        }
       +
       +#define ARGC()                argc_
       +
       +#define ARGNUMF()        (brk_ = 1, estrtonum(argv[0], 0, INT_MAX))
       +
       +#define EARGF(x)        ((argv[0][1] == '\0' && argv[1] == NULL)?\
       +                                ((x), abort(), (char *)0) :\
       +                                (brk_ = 1, (argv[0][1] != '\0')?\
       +                                        (&argv[0][1]) :\
       +                                        (argc--, argv++, argv[0])))
       +
       +#define ARGF()                ((argv[0][1] == '\0' && argv[1] == NULL)?\
       +                                (char *)0 :\
       +                                (brk_ = 1, (argv[0][1] != '\0')?\
       +                                        (&argv[0][1]) :\
       +                                        (argc--, argv++, argv[0])))
       +
       +#define LNGARG()        &argv[0][0]
       +
       +#endif
   DIR diff --git a/dedup.c b/dedup.c
       @@ -0,0 +1,320 @@
       +#include <sys/stat.h>
       +#include <err.h>
       +#include <fcntl.h>
       +#include <stdio.h>
       +#include <stdint.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <unistd.h>
       +#include <openssl/sha.h>
       +#include "arg.h"
       +
       +#define BLKSIZ        32768
       +
       +struct enthdr {
       +        uint64_t flags;
       +        uint64_t nents;
       +} __attribute__((packed));
       +
       +struct ent {
       +        uint64_t sz;
       +        unsigned char md[SHA256_DIGEST_LENGTH];
       +        uint64_t nblks;
       +        uint64_t blks[];
       +} __attribute__((packed));
       +
       +struct blk {
       +        unsigned char md[SHA256_DIGEST_LENGTH];
       +        uint64_t sz;
       +        unsigned char data[BLKSIZ];
       +} __attribute__((packed));
       +
       +struct enthdr enthdr;
       +int ifd;
       +int sfd;
       +int verbose;
       +char *argv0;
       +
       +void
       +dump_md(const unsigned char *md, size_t len)
       +{
       +        size_t i;
       +
       +        for (i = 0; i < len; i++)
       +                fprintf(stderr, "%02x", md[i]);
       +}
       +
       +void
       +dump_enthdr(struct enthdr *hdr)
       +{
       +        uint64_t i;
       +
       +        fprintf(stderr, "hdr->flags = %llx\n",
       +               (unsigned long long)hdr->flags);
       +        fprintf(stderr, "hdr->nents = %llx\n",
       +               (unsigned long long)hdr->nents);
       +}
       +
       +void
       +dump_ent(struct ent *ent)
       +{
       +        uint64_t i;
       +
       +        fprintf(stderr, "ent->sz: %lld\n", (unsigned long long)ent->sz);
       +        fprintf(stderr, "ent->md: ");
       +        dump_md(ent->md, sizeof(ent->md));
       +        fputc('\n', stderr);
       +        if (verbose) {
       +                fprintf(stderr, "ent->nblks: %lld\n",
       +                        (unsigned long long)ent->nblks);
       +                for (i = 0; i < ent->nblks; i++)
       +                        fprintf(stderr, "ent->blks[%lld]: %lld\n",
       +                               (unsigned long long)i,
       +                               (unsigned long long)ent->blks[i]);
       +        }
       +}
       +
       +void
       +dump_blk(struct blk *blk)
       +{
       +        uint64_t i;
       +
       +        fprintf(stderr, "blk->md: ");
       +        dump_md(blk->md, sizeof(blk->md));
       +        putchar('\n');
       +        fprintf(stderr, "blk->sz: %lld\n", (unsigned long long)blk->sz);
       +}
       +
       +void
       +append_ent(struct ent *ent)
       +{
       +        enthdr.nents++;
       +        lseek(ifd, 0, SEEK_SET);
       +        write(ifd, &enthdr, sizeof(enthdr));
       +
       +        lseek(ifd, 0, SEEK_END);
       +        ent->sz = sizeof(*ent);
       +        ent->sz += ent->nblks * sizeof(ent->blks[0]);
       +        write(ifd, ent, ent->sz);
       +}
       +
       +struct ent *
       +alloc_ent(void)
       +{
       +        struct ent *ent;
       +
       +        ent = malloc(sizeof(*ent));
       +        if (ent == NULL)
       +                err(1, "malloc");
       +        return ent;
       +}
       +
       +struct ent *
       +grow_ent(struct ent *ent, uint64_t nblks)
       +{
       +        size_t sz;
       +
       +        sz = sizeof(*ent);
       +        sz += nblks * sizeof(ent->blks[0]);
       +        ent = realloc(ent, sz);
       +        if (ent == NULL)
       +                err(1, "realloc");
       +        return ent;
       +}
       +
       +void
       +hash_blk(struct blk *blk)
       +{
       +        SHA256_CTX ctx;
       +
       +        SHA256_Init(&ctx);
       +        SHA256_Update(&ctx, blk->data, blk->sz);
       +        SHA256_Final(blk->md, &ctx);
       +}
       +
       +void
       +read_blk(struct blk *blk, off_t blkidx)
       +{
       +        lseek(sfd, blkidx * sizeof(*blk), SEEK_SET);
       +        read(sfd, blk, sizeof(*blk));
       +}
       +
       +void
       +append_blk(struct blk *blk)
       +{
       +        lseek(sfd, 0, SEEK_END);
       +        write(sfd, blk, sizeof(*blk));
       +}
       +
       +int
       +lookup_blk(struct blk *b1, uint64_t *blkidx)
       +{
       +        uint64_t nblks;
       +        uint64_t i;
       +
       +        nblks = lseek(sfd, 0, SEEK_END);
       +        nblks /= sizeof(struct blk);
       +        for (i = 0; i < nblks; i++) {
       +                struct blk b2;
       +
       +                read_blk(&b2, i);
       +                if (memcmp(b1->md, b2.md, sizeof(b1->md)) == 0) {
       +                        *blkidx = i;
       +                        return 0;
       +                }
       +        }
       +        return -1;
       +}
       +
       +void
       +dedup(int fd)
       +{
       +        struct blk blk;
       +        struct ent *ent;
       +        SHA256_CTX ctx;
       +        ssize_t n;
       +
       +        ent = alloc_ent();
       +        SHA256_Init(&ctx);
       +        while ((n = read(fd, blk.data, BLKSIZ)) > 0) {
       +                uint64_t blkidx;
       +
       +                blk.sz = n;
       +                hash_blk(&blk);
       +                SHA256_Update(&ctx, blk.data, blk.sz);
       +                ent = grow_ent(ent, ent->nblks + 1);
       +
       +                if (lookup_blk(&blk, &blkidx) == -1) {
       +                        off_t offs;
       +
       +                        offs = lseek(sfd, 0, SEEK_END);
       +                        offs /= sizeof(blk);
       +                        ent->blks[ent->nblks++] = offs;
       +
       +                        append_blk(&blk);
       +                } else {
       +                        ent->blks[ent->nblks++] = blkidx;
       +                }
       +        }
       +        if (n < 0)
       +                err(1, "read");
       +
       +        SHA256_Final(ent->md, &ctx);
       +        append_ent(ent);
       +}
       +
       +void
       +str2id(unsigned char *idstr, uint8_t *id)
       +{
       +        size_t i, len = strlen(idstr) / 2;
       +        char *p = idstr;
       +
       +        for (i = 0; i < len; i++, p += 2)
       +                sscanf(p, "%2hhx", &id[i]);
       +}
       +
       +void
       +extract(unsigned char *id, int fd)
       +{
       +        unsigned char md[SHA256_DIGEST_LENGTH];
       +        struct ent *ent;
       +        uint64_t i;
       +
       +        str2id(id, md);
       +        lseek(ifd, sizeof(enthdr), SEEK_SET);
       +        for (i = 0; i < enthdr.nents; i++) {
       +                ent = alloc_ent();
       +                read(ifd, ent, sizeof(*ent));
       +                ent = grow_ent(ent, ent->nblks);
       +                read(ifd, ent->blks, ent->nblks * sizeof(ent->blks[0]));
       +                if (memcmp(ent->md, md, sizeof(ent->md)) == 0) {
       +                        uint64_t j;
       +
       +                        for (j = 0; j < ent->nblks; j++) {
       +                                struct blk blk;
       +
       +                                read_blk(&blk, ent->blks[j]);
       +                                write(1, blk.data, blk.sz);
       +                        }
       +                        break;
       +                }
       +                free(ent);
       +        }
       +}
       +
       +void
       +init(void)
       +{
       +        struct stat sb;
       +        
       +        ifd = open("index", O_RDWR | O_CREAT, 0600);
       +        if (ifd == -1)
       +                err(1, "open index");
       +
       +        sfd = open("store", O_RDWR | O_CREAT, 0600);
       +        if (sfd == -1)
       +                err(1, "open store");
       +
       +        if (fstat(ifd, &sb) == -1)
       +                err(1, "stat index");
       +        if (sb.st_size != 0)
       +                read(ifd, &enthdr, sizeof(enthdr));
       +}
       +
       +void
       +dump_index(void)
       +{
       +        struct ent *ent;
       +        uint64_t i;
       +
       +        dump_enthdr(&enthdr);
       +        lseek(ifd, sizeof(enthdr), SEEK_SET);
       +        for (i = 0; i < enthdr.nents; i++) {
       +                ent = alloc_ent();
       +                read(ifd, ent, sizeof(*ent));
       +                ent = grow_ent(ent, ent->nblks);
       +                read(ifd, ent->blks, ent->nblks * sizeof(ent->blks[0]));
       +                dump_ent(ent);
       +                free(ent);
       +        }
       +}
       +
       +void
       +usage(void)
       +{
       +        fprintf(stderr, "usage: %s [-lv] [-e id]\n", argv0);
       +        exit(1);
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        unsigned char *id = NULL;
       +        int lflag = 0;
       +
       +        ARGBEGIN {
       +        case 'e':
       +                id = EARGF(usage());
       +                break;
       +        case 'l':
       +                lflag = 1;
       +                break;
       +        case 'v':
       +                verbose = 1;
       +                break;
       +        default:
       +                usage();
       +        } ARGEND
       +
       +        init();
       +
       +        if (lflag) {
       +                dump_index();
       +                return 0;
       +        }
       +
       +        if (id)
       +                extract(id, 0);
       +        else
       +                dedup(0);
       +}