URI: 
       Add support for blake2s and blake2sp - dedup - deduplicating backup program
  HTML git clone git://bitreich.org/dedup/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/dedup/
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
   DIR LICENSE
       ---
   DIR commit 5691fed6f168e40f36f6369d43ea6984f5bfdeae
   DIR parent def0f589424b21a20077cd350fa57efcc8078d4b
  HTML Author: sin <sin@2f30.org>
       Date:   Sun,  7 Apr 2019 14:29:14 +0100
       
       Add support for blake2s and blake2sp
       
       Useful for 32-bit platforms.
       
       Diffstat:
         M CHANGELOG                           |       3 ++-
         M Makefile                            |       4 ++++
         A blake2s-ref.c                       |     367 ++++++++++++++++++++++++++++++
         A blake2sp-ref.c                      |     359 +++++++++++++++++++++++++++++++
         M dedup.1                             |       2 +-
         M dedup.h                             |       4 ++++
         M hash.c                              |      56 +++++++++++++++++++++++++++++++
       
       7 files changed, 793 insertions(+), 2 deletions(-)
       ---
   DIR diff --git a/CHANGELOG b/CHANGELOG
       @@ -2,7 +2,8 @@
        
        ## [Unreleased]
        ### Added
       -- Add support blake2bp, which is parallel variant of blake2b.
       +- Support for blake2bp, which is parallel variant of blake2b.
       +- Support for blake2s and blake2sp.
        
        ## [0.9] - 2019-03-26
        ### Added
   DIR diff --git a/Makefile b/Makefile
       @@ -17,6 +17,8 @@ SRC = \
                $(HDR) \
                blake2b-ref.c \
                blake2bp-ref.c \
       +        blake2s-ref.c \
       +        blake2sp-ref.c \
                chunker.c \
                compress.c \
                hash.c \
       @@ -30,6 +32,8 @@ OBJ = \
                $(BIN).o \
                blake2b-ref.o \
                blake2bp-ref.o \
       +        blake2s-ref.o \
       +        blake2sp-ref.o \
                chunker.o \
                compress.o \
                hash.o \
   DIR diff --git a/blake2s-ref.c b/blake2s-ref.c
       @@ -0,0 +1,367 @@
       +/*
       +   BLAKE2 reference source code package - reference C implementations
       +
       +   Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
       +   terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
       +   your option.  The terms of these licenses can be found at:
       +
       +   - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
       +   - OpenSSL license   : https://www.openssl.org/source/license.html
       +   - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
       +
       +   More information about the BLAKE2 hash function can be found at
       +   https://blake2.net.
       +*/
       +
       +#include <stdint.h>
       +#include <string.h>
       +#include <stdio.h>
       +
       +#include "blake2.h"
       +#include "blake2-impl.h"
       +
       +static const uint32_t blake2s_IV[8] =
       +{
       +  0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
       +  0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
       +};
       +
       +static const uint8_t blake2s_sigma[10][16] =
       +{
       +  {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
       +  { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
       +  { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
       +  {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
       +  {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
       +  {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
       +  { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
       +  { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
       +  {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
       +  { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
       +};
       +
       +static void blake2s_set_lastnode( blake2s_state *S )
       +{
       +  S->f[1] = (uint32_t)-1;
       +}
       +
       +/* Some helper functions, not necessarily useful */
       +static int blake2s_is_lastblock( const blake2s_state *S )
       +{
       +  return S->f[0] != 0;
       +}
       +
       +static void blake2s_set_lastblock( blake2s_state *S )
       +{
       +  if( S->last_node ) blake2s_set_lastnode( S );
       +
       +  S->f[0] = (uint32_t)-1;
       +}
       +
       +static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
       +{
       +  S->t[0] += inc;
       +  S->t[1] += ( S->t[0] < inc );
       +}
       +
       +static void blake2s_init0( blake2s_state *S )
       +{
       +  size_t i;
       +  memset( S, 0, sizeof( blake2s_state ) );
       +
       +  for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
       +}
       +
       +/* init2 xors IV with input parameter block */
       +int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
       +{
       +  const unsigned char *p = ( const unsigned char * )( P );
       +  size_t i;
       +
       +  blake2s_init0( S );
       +
       +  /* IV XOR ParamBlock */
       +  for( i = 0; i < 8; ++i )
       +    S->h[i] ^= load32( &p[i * 4] );
       +
       +  S->outlen = P->digest_length;
       +  return 0;
       +}
       +
       +
       +/* Sequential blake2s initialization */
       +int blake2s_init( blake2s_state *S, size_t outlen )
       +{
       +  blake2s_param P[1];
       +
       +  /* Move interval verification here? */
       +  if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
       +
       +  P->digest_length = (uint8_t)outlen;
       +  P->key_length    = 0;
       +  P->fanout        = 1;
       +  P->depth         = 1;
       +  store32( &P->leaf_length, 0 );
       +  store32( &P->node_offset, 0 );
       +  store16( &P->xof_length, 0 );
       +  P->node_depth    = 0;
       +  P->inner_length  = 0;
       +  /* memset(P->reserved, 0, sizeof(P->reserved) ); */
       +  memset( P->salt,     0, sizeof( P->salt ) );
       +  memset( P->personal, 0, sizeof( P->personal ) );
       +  return blake2s_init_param( S, P );
       +}
       +
       +int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
       +{
       +  blake2s_param P[1];
       +
       +  if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
       +
       +  if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
       +
       +  P->digest_length = (uint8_t)outlen;
       +  P->key_length    = (uint8_t)keylen;
       +  P->fanout        = 1;
       +  P->depth         = 1;
       +  store32( &P->leaf_length, 0 );
       +  store32( &P->node_offset, 0 );
       +  store16( &P->xof_length, 0 );
       +  P->node_depth    = 0;
       +  P->inner_length  = 0;
       +  /* memset(P->reserved, 0, sizeof(P->reserved) ); */
       +  memset( P->salt,     0, sizeof( P->salt ) );
       +  memset( P->personal, 0, sizeof( P->personal ) );
       +
       +  if( blake2s_init_param( S, P ) < 0 ) return -1;
       +
       +  {
       +    uint8_t block[BLAKE2S_BLOCKBYTES];
       +    memset( block, 0, BLAKE2S_BLOCKBYTES );
       +    memcpy( block, key, keylen );
       +    blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
       +    secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
       +  }
       +  return 0;
       +}
       +
       +#define G(r,i,a,b,c,d)                      \
       +  do {                                      \
       +    a = a + b + m[blake2s_sigma[r][2*i+0]]; \
       +    d = rotr32(d ^ a, 16);                  \
       +    c = c + d;                              \
       +    b = rotr32(b ^ c, 12);                  \
       +    a = a + b + m[blake2s_sigma[r][2*i+1]]; \
       +    d = rotr32(d ^ a, 8);                   \
       +    c = c + d;                              \
       +    b = rotr32(b ^ c, 7);                   \
       +  } while(0)
       +
       +#define ROUND(r)                    \
       +  do {                              \
       +    G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
       +    G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
       +    G(r,2,v[ 2],v[ 6],v[10],v[14]); \
       +    G(r,3,v[ 3],v[ 7],v[11],v[15]); \
       +    G(r,4,v[ 0],v[ 5],v[10],v[15]); \
       +    G(r,5,v[ 1],v[ 6],v[11],v[12]); \
       +    G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
       +    G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
       +  } while(0)
       +
       +static void blake2s_compress( blake2s_state *S, const uint8_t in[BLAKE2S_BLOCKBYTES] )
       +{
       +  uint32_t m[16];
       +  uint32_t v[16];
       +  size_t i;
       +
       +  for( i = 0; i < 16; ++i ) {
       +    m[i] = load32( in + i * sizeof( m[i] ) );
       +  }
       +
       +  for( i = 0; i < 8; ++i ) {
       +    v[i] = S->h[i];
       +  }
       +
       +  v[ 8] = blake2s_IV[0];
       +  v[ 9] = blake2s_IV[1];
       +  v[10] = blake2s_IV[2];
       +  v[11] = blake2s_IV[3];
       +  v[12] = S->t[0] ^ blake2s_IV[4];
       +  v[13] = S->t[1] ^ blake2s_IV[5];
       +  v[14] = S->f[0] ^ blake2s_IV[6];
       +  v[15] = S->f[1] ^ blake2s_IV[7];
       +
       +  ROUND( 0 );
       +  ROUND( 1 );
       +  ROUND( 2 );
       +  ROUND( 3 );
       +  ROUND( 4 );
       +  ROUND( 5 );
       +  ROUND( 6 );
       +  ROUND( 7 );
       +  ROUND( 8 );
       +  ROUND( 9 );
       +
       +  for( i = 0; i < 8; ++i ) {
       +    S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
       +  }
       +}
       +
       +#undef G
       +#undef ROUND
       +
       +int blake2s_update( blake2s_state *S, const void *pin, size_t inlen )
       +{
       +  const unsigned char * in = (const unsigned char *)pin;
       +  if( inlen > 0 )
       +  {
       +    size_t left = S->buflen;
       +    size_t fill = BLAKE2S_BLOCKBYTES - left;
       +    if( inlen > fill )
       +    {
       +      S->buflen = 0;
       +      memcpy( S->buf + left, in, fill ); /* Fill buffer */
       +      blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
       +      blake2s_compress( S, S->buf ); /* Compress */
       +      in += fill; inlen -= fill;
       +      while(inlen > BLAKE2S_BLOCKBYTES) {
       +        blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES);
       +        blake2s_compress( S, in );
       +        in += BLAKE2S_BLOCKBYTES;
       +        inlen -= BLAKE2S_BLOCKBYTES;
       +      }
       +    }
       +    memcpy( S->buf + S->buflen, in, inlen );
       +    S->buflen += inlen;
       +  }
       +  return 0;
       +}
       +
       +int blake2s_final( blake2s_state *S, void *out, size_t outlen )
       +{
       +  uint8_t buffer[BLAKE2S_OUTBYTES] = {0};
       +  size_t i;
       +
       +  if( out == NULL || outlen < S->outlen )
       +    return -1;
       +
       +  if( blake2s_is_lastblock( S ) )
       +    return -1;
       +
       +  blake2s_increment_counter( S, ( uint32_t )S->buflen );
       +  blake2s_set_lastblock( S );
       +  memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
       +  blake2s_compress( S, S->buf );
       +
       +  for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
       +    store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
       +
       +  memcpy( out, buffer, outlen );
       +  secure_zero_memory(buffer, sizeof(buffer));
       +  return 0;
       +}
       +
       +int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
       +{
       +  blake2s_state S[1];
       +
       +  /* Verify parameters */
       +  if ( NULL == in && inlen > 0 ) return -1;
       +
       +  if ( NULL == out ) return -1;
       +
       +  if ( NULL == key && keylen > 0) return -1;
       +
       +  if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
       +
       +  if( keylen > BLAKE2S_KEYBYTES ) return -1;
       +
       +  if( keylen > 0 )
       +  {
       +    if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
       +  }
       +  else
       +  {
       +    if( blake2s_init( S, outlen ) < 0 ) return -1;
       +  }
       +
       +  blake2s_update( S, ( const uint8_t * )in, inlen );
       +  blake2s_final( S, out, outlen );
       +  return 0;
       +}
       +
       +#if defined(SUPERCOP)
       +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
       +{
       +  return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 );
       +}
       +#endif
       +
       +#if defined(BLAKE2S_SELFTEST)
       +#include <string.h>
       +#include "blake2-kat.h"
       +int main( void )
       +{
       +  uint8_t key[BLAKE2S_KEYBYTES];
       +  uint8_t buf[BLAKE2_KAT_LENGTH];
       +  size_t i, step;
       +
       +  for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
       +    key[i] = ( uint8_t )i;
       +
       +  for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
       +    buf[i] = ( uint8_t )i;
       +
       +  /* Test simple API */
       +  for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
       +  {
       +    uint8_t hash[BLAKE2S_OUTBYTES];
       +    blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
       +
       +    if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
       +    {
       +      goto fail;
       +    }
       +  }
       +
       +  /* Test streaming API */
       +  for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
       +    for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
       +      uint8_t hash[BLAKE2S_OUTBYTES];
       +      blake2s_state S;
       +      uint8_t * p = buf;
       +      size_t mlen = i;
       +      int err = 0;
       +
       +      if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
       +        goto fail;
       +      }
       +
       +      while (mlen >= step) {
       +        if ( (err = blake2s_update(&S, p, step)) < 0 ) {
       +          goto fail;
       +        }
       +        mlen -= step;
       +        p += step;
       +      }
       +      if ( (err = blake2s_update(&S, p, mlen)) < 0) {
       +        goto fail;
       +      }
       +      if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
       +        goto fail;
       +      }
       +
       +      if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) {
       +        goto fail;
       +      }
       +    }
       +  }
       +
       +  puts( "ok" );
       +  return 0;
       +fail:
       +  puts("error");
       +  return -1;
       +}
       +#endif
   DIR diff --git a/blake2sp-ref.c b/blake2sp-ref.c
       @@ -0,0 +1,359 @@
       +/*
       +   BLAKE2 reference source code package - reference C implementations
       +
       +   Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
       +   terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
       +   your option.  The terms of these licenses can be found at:
       +
       +   - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
       +   - OpenSSL license   : https://www.openssl.org/source/license.html
       +   - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
       +
       +   More information about the BLAKE2 hash function can be found at
       +   https://blake2.net.
       +*/
       +
       +#include <stdlib.h>
       +#include <string.h>
       +#include <stdio.h>
       +
       +#if defined(_OPENMP)
       +#include <omp.h>
       +#endif
       +
       +#include "blake2.h"
       +#include "blake2-impl.h"
       +
       +#define PARALLELISM_DEGREE 8
       +
       +/*
       +  blake2sp_init_param defaults to setting the expecting output length
       +  from the digest_length parameter block field.
       +
       +  In some cases, however, we do not want this, as the output length
       +  of these instances is given by inner_length instead.
       +*/
       +static int blake2sp_init_leaf_param( blake2s_state *S, const blake2s_param *P )
       +{
       +  int err = blake2s_init_param(S, P);
       +  S->outlen = P->inner_length;
       +  return err;
       +}
       +
       +static int blake2sp_init_leaf( blake2s_state *S, size_t outlen, size_t keylen, uint64_t offset )
       +{
       +  blake2s_param P[1];
       +  P->digest_length = (uint8_t)outlen;
       +  P->key_length = (uint8_t)keylen;
       +  P->fanout = PARALLELISM_DEGREE;
       +  P->depth = 2;
       +  store32( &P->leaf_length, 0 );
       +  store32( &P->node_offset, offset );
       +  store16( &P->xof_length, 0 );
       +  P->node_depth = 0;
       +  P->inner_length = BLAKE2S_OUTBYTES;
       +  memset( P->salt, 0, sizeof( P->salt ) );
       +  memset( P->personal, 0, sizeof( P->personal ) );
       +  return blake2sp_init_leaf_param( S, P );
       +}
       +
       +static int blake2sp_init_root( blake2s_state *S, size_t outlen, size_t keylen )
       +{
       +  blake2s_param P[1];
       +  P->digest_length = (uint8_t)outlen;
       +  P->key_length = (uint8_t)keylen;
       +  P->fanout = PARALLELISM_DEGREE;
       +  P->depth = 2;
       +  store32( &P->leaf_length, 0 );
       +  store32( &P->node_offset, 0 );
       +  store16( &P->xof_length, 0 );
       +  P->node_depth = 1;
       +  P->inner_length = BLAKE2S_OUTBYTES;
       +  memset( P->salt, 0, sizeof( P->salt ) );
       +  memset( P->personal, 0, sizeof( P->personal ) );
       +  return blake2s_init_param( S, P );
       +}
       +
       +
       +int blake2sp_init( blake2sp_state *S, size_t outlen )
       +{
       +  size_t i;
       +
       +  if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
       +
       +  memset( S->buf, 0, sizeof( S->buf ) );
       +  S->buflen = 0;
       +  S->outlen = outlen;
       +
       +  if( blake2sp_init_root( S->R, outlen, 0 ) < 0 )
       +    return -1;
       +
       +  for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +    if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
       +
       +  S->R->last_node = 1;
       +  S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
       +  return 0;
       +}
       +
       +int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen )
       +{
       +  size_t i;
       +
       +  if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
       +
       +  if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
       +
       +  memset( S->buf, 0, sizeof( S->buf ) );
       +  S->buflen = 0;
       +  S->outlen = outlen;
       +
       +  if( blake2sp_init_root( S->R, outlen, keylen ) < 0 )
       +    return -1;
       +
       +  for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +    if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
       +
       +  S->R->last_node = 1;
       +  S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
       +  {
       +    uint8_t block[BLAKE2S_BLOCKBYTES];
       +    memset( block, 0, BLAKE2S_BLOCKBYTES );
       +    memcpy( block, key, keylen );
       +
       +    for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +      blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES );
       +
       +    secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
       +  }
       +  return 0;
       +}
       +
       +
       +int blake2sp_update( blake2sp_state *S, const void *pin, size_t inlen )
       +{
       +  const unsigned char * in = (const unsigned char *)pin;
       +  size_t left = S->buflen;
       +  size_t fill = sizeof( S->buf ) - left;
       +  size_t i;
       +
       +  if( left && inlen >= fill )
       +  {
       +    memcpy( S->buf + left, in, fill );
       +
       +    for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +      blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
       +
       +    in += fill;
       +    inlen -= fill;
       +    left = 0;
       +  }
       +
       +#if defined(_OPENMP)
       +  #pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
       +#else
       +  for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +#endif
       +  {
       +#if defined(_OPENMP)
       +    size_t      i = omp_get_thread_num();
       +#endif
       +    size_t inlen__ = inlen;
       +    const unsigned char *in__ = ( const unsigned char * )in;
       +    in__ += i * BLAKE2S_BLOCKBYTES;
       +
       +    while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
       +    {
       +      blake2s_update( S->S[i], in__, BLAKE2S_BLOCKBYTES );
       +      in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
       +      inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
       +    }
       +  }
       +
       +  in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
       +  inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
       +
       +  if( inlen > 0 )
       +    memcpy( S->buf + left, in, inlen );
       +
       +  S->buflen = left + inlen;
       +  return 0;
       +}
       +
       +
       +int blake2sp_final( blake2sp_state *S, void *out, size_t outlen )
       +{
       +  uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
       +  size_t i;
       +
       +  if(out == NULL || outlen < S->outlen) {
       +    return -1;
       +  }
       +
       +  for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +  {
       +    if( S->buflen > i * BLAKE2S_BLOCKBYTES )
       +    {
       +      size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
       +
       +      if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;
       +
       +      blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
       +    }
       +
       +    blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES );
       +  }
       +
       +  for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +    blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES );
       +
       +  return blake2s_final( S->R, out, S->outlen );
       +}
       +
       +
       +int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
       +{
       +  uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
       +  blake2s_state S[PARALLELISM_DEGREE][1];
       +  blake2s_state FS[1];
       +  size_t i;
       +
       +  /* Verify parameters */
       +  if ( NULL == in && inlen > 0 ) return -1;
       +
       +  if ( NULL == out ) return -1;
       +
       +  if ( NULL == key && keylen > 0) return -1;
       +
       +  if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
       +
       +  if( keylen > BLAKE2S_KEYBYTES ) return -1;
       +
       +  for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +    if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
       +
       +  S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
       +
       +  if( keylen > 0 )
       +  {
       +    uint8_t block[BLAKE2S_BLOCKBYTES];
       +    memset( block, 0, BLAKE2S_BLOCKBYTES );
       +    memcpy( block, key, keylen );
       +
       +    for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +      blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES );
       +
       +    secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
       +  }
       +
       +#if defined(_OPENMP)
       +  #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
       +#else
       +
       +  for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +#endif
       +  {
       +#if defined(_OPENMP)
       +    size_t      i = omp_get_thread_num();
       +#endif
       +    size_t inlen__ = inlen;
       +    const unsigned char *in__ = ( const unsigned char * )in;
       +    in__ += i * BLAKE2S_BLOCKBYTES;
       +
       +    while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
       +    {
       +      blake2s_update( S[i], in__, BLAKE2S_BLOCKBYTES );
       +      in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
       +      inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
       +    }
       +
       +    if( inlen__ > i * BLAKE2S_BLOCKBYTES )
       +    {
       +      const size_t left = inlen__ - i * BLAKE2S_BLOCKBYTES;
       +      const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES;
       +      blake2s_update( S[i], in__, len );
       +    }
       +
       +    blake2s_final( S[i], hash[i], BLAKE2S_OUTBYTES );
       +  }
       +
       +  if( blake2sp_init_root( FS, outlen, keylen ) < 0 )
       +    return -1;
       +
       +  FS->last_node = 1;
       +
       +  for( i = 0; i < PARALLELISM_DEGREE; ++i )
       +    blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES );
       +
       +  return blake2s_final( FS, out, outlen );
       +}
       +
       +
       +
       +#if defined(BLAKE2SP_SELFTEST)
       +#include <string.h>
       +#include "blake2-kat.h"
       +int main( void )
       +{
       +  uint8_t key[BLAKE2S_KEYBYTES];
       +  uint8_t buf[BLAKE2_KAT_LENGTH];
       +  size_t i, step;
       +
       +  for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
       +    key[i] = ( uint8_t )i;
       +
       +  for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
       +    buf[i] = ( uint8_t )i;
       +
       +  /* Test simple API */
       +  for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
       +  {
       +    uint8_t hash[BLAKE2S_OUTBYTES];
       +    blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
       +
       +    if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) )
       +    {
       +      goto fail;
       +    }
       +  }
       +
       +  /* Test streaming API */
       +  for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
       +    for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
       +      uint8_t hash[BLAKE2S_OUTBYTES];
       +      blake2sp_state S;
       +      uint8_t * p = buf;
       +      size_t mlen = i;
       +      int err = 0;
       +
       +      if( (err = blake2sp_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
       +        goto fail;
       +      }
       +
       +      while (mlen >= step) {
       +        if ( (err = blake2sp_update(&S, p, step)) < 0 ) {
       +          goto fail;
       +        }
       +        mlen -= step;
       +        p += step;
       +      }
       +      if ( (err = blake2sp_update(&S, p, mlen)) < 0) {
       +        goto fail;
       +      }
       +      if ( (err = blake2sp_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
       +        goto fail;
       +      }
       +
       +      if (0 != memcmp(hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES)) {
       +        goto fail;
       +      }
       +    }
       +  }
       +
       +  puts( "ok" );
       +  return 0;
       +fail:
       +  puts("error");
       +  return -1;
       +}
       +#endif
   DIR diff --git a/dedup.1 b/dedup.1
       @@ -32,7 +32,7 @@ should be used and piped into
        .It Fl H Ar hash
        The cryptographic hash function used to identify
        unique blocks in the store.
       -The supported hash functions are blake2b and blake2bp.
       +The supported hash functions are blake2b, blake2bp, blake2s and blake2sp.
        This flag only has an effect when initializing the repository.
        By default blake2b is used.
        .It Fl Z Ar compressor
   DIR diff --git a/dedup.h b/dedup.h
       @@ -36,6 +36,8 @@ enum compr_algo {
        enum hash_algo {
                HASH_BLAKE2B,
                HASH_BLAKE2BP,
       +        HASH_BLAKE2S,
       +        HASH_BLAKE2SP,
                NR_HASHES,
        };
        
       @@ -86,6 +88,8 @@ struct hash_ctx {
                union {
                        blake2b_state blake2b_ctx;
                        blake2bp_state blake2bp_ctx;
       +                blake2s_state blake2s_ctx;
       +                blake2sp_state blake2sp_ctx;
                } u;
                struct hash_ops *ops;
        };
   DIR diff --git a/hash.c b/hash.c
       @@ -14,6 +14,14 @@ static int blake2bpi(struct hash_ctx *ctx, size_t n);
        static int blake2bpu(struct hash_ctx *ctx, const void *buf, size_t n);
        static int blake2bpf(struct hash_ctx *ctx, void *buf, size_t n);
        
       +static int blake2si(struct hash_ctx *ctx, size_t n);
       +static int blake2su(struct hash_ctx *ctx, const void *buf, size_t n);
       +static int blake2sf(struct hash_ctx *ctx, void *buf, size_t n);
       +
       +static int blake2spi(struct hash_ctx *ctx, size_t n);
       +static int blake2spu(struct hash_ctx *ctx, const void *buf, size_t n);
       +static int blake2spf(struct hash_ctx *ctx, void *buf, size_t n);
       +
        static struct hash_ops {
                int (*init)(struct hash_ctx *ctx, size_t n);
                int (*update)(struct hash_ctx *ctx, const void *buf, size_t n);
       @@ -29,6 +37,16 @@ static struct hash_ops {
                        .update = blake2bpu,
                        .final = blake2bpf,
                },
       +        {
       +                .init = blake2si,
       +                .update = blake2su,
       +                .final = blake2sf,
       +        },
       +        {
       +                .init = blake2spi,
       +                .update = blake2spu,
       +                .final = blake2spf,
       +        },
        };
        
        static struct algomap {
       @@ -37,6 +55,8 @@ static struct algomap {
        } algomap[] = {
                { .name = "blake2b", .type = HASH_BLAKE2B },
                { .name = "blake2bp", .type = HASH_BLAKE2BP },
       +        { .name = "blake2s", .type = HASH_BLAKE2S },
       +        { .name = "blake2sp", .type = HASH_BLAKE2SP },
                { .name = NULL },
        };
        
       @@ -76,6 +96,42 @@ blake2bpf(struct hash_ctx *ctx, void *buf, size_t n)
                return blake2bp_final(&ctx->u.blake2bp_ctx, buf, n);
        }
        
       +static int
       +blake2si(struct hash_ctx *ctx, size_t n)
       +{
       +        return blake2s_init(&ctx->u.blake2s_ctx, n);
       +}
       +
       +static int
       +blake2su(struct hash_ctx *ctx, const void *buf, size_t n)
       +{
       +        return blake2s_update(&ctx->u.blake2s_ctx, buf, n);
       +}
       +
       +static int
       +blake2sf(struct hash_ctx *ctx, void *buf, size_t n)
       +{
       +        return blake2s_final(&ctx->u.blake2s_ctx, buf, n);
       +}
       +
       +static int
       +blake2spi(struct hash_ctx *ctx, size_t n)
       +{
       +        return blake2sp_init(&ctx->u.blake2sp_ctx, n);
       +}
       +
       +static int
       +blake2spu(struct hash_ctx *ctx, const void *buf, size_t n)
       +{
       +        return blake2sp_update(&ctx->u.blake2sp_ctx, buf, n);
       +}
       +
       +static int
       +blake2spf(struct hash_ctx *ctx, void *buf, size_t n)
       +{
       +        return blake2sp_final(&ctx->u.blake2sp_ctx, buf, n);
       +}
       +
        int
        hash_init(struct hash_ctx *ctx, int type, size_t n)
        {