// Denom.org
// bouncycastle.org

package org.denom.crypt.blockcipher;

import org.denom.Binary;

import static java.lang.Long.rotateRight;
import static java.lang.Long.rotateLeft;
import static org.denom.Binary.getLongLE;
import static org.denom.Binary.setLongLE;
import static org.denom.Binary.Bin;
import static org.denom.Ex.MUST;

/**
 * Threefish, version 1.3.
 * BlockSize = 128 bytes.
 * KeySize = 128 bytes.
 *
 * Threefish was designed by Niels Ferguson - Stefan Lucks - Bruce Schneier - Doug Whiting - Mihir Bellare
 * - Tadayoshi Kohno - Jon Callas - Jesse Walker.
 *
 * This implementation inlines all round functions, unrolls 8 rounds, and uses 1.2k of static tables
 * to speed up key schedule injection.
 */
public class Threefish1024 extends BlockCipher
{
	public final static int BLOCK_SIZE = 128;
	public final static int KEY_SIZE = 128;

	private static final int BLOCK_SIZE_WORDS = BLOCK_SIZE / 8;
	private static final int ROUNDS_NUM = 80;

	private static int[] MOD3  = new int[ ROUNDS_NUM ];
	private static int[] MOD17 = new int[ ROUNDS_NUM ];

	static
	{
		for( int i = 0; i < MOD3.length; i++ )
		{
			MOD3[ i ] = i % 3;
			MOD17[ i ] = i % 17;
		}
	}

	private long[] tw = new long[ 5 ];
	private long[] kw = new long[ 2 * BLOCK_SIZE_WORDS + 1 ];

	// -----------------------------------------------------------------------------------------------------------------
	public Threefish1024()
	{
		this( Bin( KEY_SIZE ) );
	}

	// -----------------------------------------------------------------------------------------------------------------
	/**
	 * @param key [128 bytes]
	 */
	public Threefish1024( final Binary key )
	{
		super.initialize( BLOCK_SIZE );
		setKey( Bin( KEY_SIZE) );
	}
	
	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public Threefish1024 clone()
	{
		return new Threefish1024( this.key );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public String getAlgName()
	{
		return "Threefish-1024";
	}

	// -----------------------------------------------------------------------------------------------------------------
	/**
	 * @param key [128 bytes].
	 */
	@Override
	public void setKey( final Binary key )
	{
		MUST( key.size() == KEY_SIZE, "Invalid key size" );
		this.key = key.clone();

		byte[] arr = key.getDataRef();

		long knw = 0x1BD11BDAA9FC1A22L;
		for( int i = 0; i < BLOCK_SIZE_WORDS; i++ )
		{
			kw[ i ] = getLongLE( arr, i << 3 );
			knw ^= kw[ i ];
		}
		kw[ BLOCK_SIZE_WORDS ] = knw;
		System.arraycopy( kw, 0, kw, BLOCK_SIZE_WORDS + 1, BLOCK_SIZE_WORDS );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public Binary generateKey()
	{
		Binary akey = new Binary().randomSecure( KEY_SIZE );
		setKey( akey );
		return akey;
	}

	// -----------------------------------------------------------------------------------------------------------------
	public void setTweak( final Binary tweak )
	{
		MUST( tweak.size() == 16, "Incorrect tweak size" );
		byte[] arr = tweak.getDataRef();
		tw[ 0 ] = getLongLE( arr, 0 );
		tw[ 1 ] = getLongLE( arr, 8 );
		tw[ 2 ] = tw[ 0 ] ^ tw[ 1 ];
		tw[ 3 ] = tw[ 0 ];
		tw[ 4 ] = tw[ 1 ];
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public void encryptBlock( Binary block )
	{
		MUST( block.size() == BLOCK_SIZE, "Incorrect block size" );

		byte[] arr = block.getDataRef();
		long b0  = getLongLE( arr,   0 ) + kw[  0 ];
		long b1  = getLongLE( arr,   8 ) + kw[  1 ];
		long b2  = getLongLE( arr,  16 ) + kw[  2 ];
		long b3  = getLongLE( arr,  24 ) + kw[  3 ];
		long b4  = getLongLE( arr,  32 ) + kw[  4 ];
		long b5  = getLongLE( arr,  40 ) + kw[  5 ];
		long b6  = getLongLE( arr,  48 ) + kw[  6 ];
		long b7  = getLongLE( arr,  56 ) + kw[  7 ];
		long b8  = getLongLE( arr,  64 ) + kw[  8 ];
		long b9  = getLongLE( arr,  72 ) + kw[  9 ];
		long b10 = getLongLE( arr,  80 ) + kw[ 10 ];
		long b11 = getLongLE( arr,  88 ) + kw[ 11 ];
		long b12 = getLongLE( arr,  96 ) + kw[ 12 ];
		long b13 = getLongLE( arr, 104 ) + kw[ 13 ] + tw[ 0 ];
		long b14 = getLongLE( arr, 112 ) + kw[ 14 ] + tw[ 1 ];
		long b15 = getLongLE( arr, 120 ) + kw[ 15 ];

		// Rounds loop, unrolled to 8 rounds per iteration. Unrolling to multiples of 4 avoids
		// the mod 4 check for key injection, and allows inlining of the permutations, which
		// cycle every of 4 rounds (avoiding array index/lookup). Unrolling to multiples of 8
		// avoids the mod 8 rotation constant lookup, and allows inlining constant rotation
		// values (avoiding array index/lookup).

		for( int d = 1; d < (ROUNDS_NUM / 4); d += 2 )
		{
			final int dm17 = MOD17[ d ];
			final int dm3 = MOD3[ d ];

			// 4 rounds of mix and permute. Permute schedule has a 4 round cycle, so permutes
			// are inlined in the mix operations in each 4 round block.
			b0  += b1 ;  b1  = rotateLeft( b1 , 24 ) ^ b0 ;
			b2  += b3 ;  b3  = rotateLeft( b3 , 13 ) ^ b2 ;
			b4  += b5 ;  b5  = rotateLeft( b5 ,  8 ) ^ b4 ;
			b6  += b7 ;  b7  = rotateLeft( b7 , 47 ) ^ b6 ;
			b8  += b9 ;  b9  = rotateLeft( b9 ,  8 ) ^ b8 ;
			b10 += b11;  b11 = rotateLeft( b11, 17 ) ^ b10;
			b12 += b13;  b13 = rotateLeft( b13, 22 ) ^ b12;
			b14 += b15;  b15 = rotateLeft( b15, 37 ) ^ b14;

			b0  += b9 ;  b9  = rotateLeft( b9 , 38 ) ^ b0 ;
			b2  += b13;  b13 = rotateLeft( b13, 19 ) ^ b2 ;
			b6  += b11;  b11 = rotateLeft( b11, 10 ) ^ b6 ;
			b4  += b15;  b15 = rotateLeft( b15, 55 ) ^ b4 ;
			b10 += b7 ;  b7  = rotateLeft( b7 , 49 ) ^ b10;
			b12 += b3 ;  b3  = rotateLeft( b3 , 18 ) ^ b12;
			b14 += b5 ;  b5  = rotateLeft( b5 , 23 ) ^ b14;
			b8  += b1 ;  b1  = rotateLeft( b1 , 52 ) ^ b8 ;

			b0  += b7 ;  b7  = rotateLeft( b7 , 33 ) ^ b0 ;
			b2  += b5 ;  b5  = rotateLeft( b5 ,  4 ) ^ b2 ;
			b4  += b3 ;  b3  = rotateLeft( b3 , 51 ) ^ b4 ;
			b6  += b1 ;  b1  = rotateLeft( b1 , 13 ) ^ b6 ;
			b12 += b15;  b15 = rotateLeft( b15, 34 ) ^ b12;
			b14 += b13;  b13 = rotateLeft( b13, 41 ) ^ b14;
			b8  += b11;  b11 = rotateLeft( b11, 59 ) ^ b8 ;
			b10 += b9 ;  b9  = rotateLeft( b9 , 17 ) ^ b10;

			b0  += b15;  b15 = rotateLeft( b15,  5 ) ^ b0 ;
			b2  += b11;  b11 = rotateLeft( b11, 20 ) ^ b2 ;
			b6  += b13;  b13 = rotateLeft( b13, 48 ) ^ b6 ;
			b4  += b9 ;  b9  = rotateLeft( b9 , 41 ) ^ b4 ;
			b14 += b1 ;  b1  = rotateLeft( b1 , 47 ) ^ b14;
			b8  += b5 ;  b5  = rotateLeft( b5 , 28 ) ^ b8 ;
			b10 += b3 ;  b3  = rotateLeft( b3 , 16 ) ^ b10;
			b12 += b7 ;  b7  = rotateLeft( b7 , 25 ) ^ b12;

			// Subkey injection for first 4 rounds.
			b0  += kw[ dm17 ];
			b1  += kw[ dm17 + 1 ];
			b2  += kw[ dm17 + 2 ];
			b3  += kw[ dm17 + 3 ];
			b4  += kw[ dm17 + 4 ];
			b5  += kw[ dm17 + 5 ];
			b6  += kw[ dm17 + 6 ];
			b7  += kw[ dm17 + 7 ];
			b8  += kw[ dm17 + 8 ];
			b9  += kw[ dm17 + 9 ];
			b10 += kw[ dm17 + 10 ];
			b11 += kw[ dm17 + 11 ];
			b12 += kw[ dm17 + 12 ];
			b13 += kw[ dm17 + 13 ] + tw[ dm3 ];
			b14 += kw[ dm17 + 14 ] + tw[ dm3 + 1 ];
			b15 += kw[ dm17 + 15 ] + d;

			// 4 more rounds of mix/permute
			b0  += b1 ;  b1  = rotateLeft( b1 , 41 ) ^ b0 ;
			b2  += b3 ;  b3  = rotateLeft( b3 ,  9 ) ^ b2 ;
			b4  += b5 ;  b5  = rotateLeft( b5 , 37 ) ^ b4 ;
			b6  += b7 ;  b7  = rotateLeft( b7 , 31 ) ^ b6 ;
			b8  += b9 ;  b9  = rotateLeft( b9 , 12 ) ^ b8 ;
			b10 += b11;  b11 = rotateLeft( b11, 47 ) ^ b10;
			b12 += b13;  b13 = rotateLeft( b13, 44 ) ^ b12;
			b14 += b15;  b15 = rotateLeft( b15, 30 ) ^ b14;

			b0  += b9 ;  b9  = rotateLeft( b9 , 16 ) ^ b0 ;
			b2  += b13;  b13 = rotateLeft( b13, 34 ) ^ b2 ;
			b6  += b11;  b11 = rotateLeft( b11, 56 ) ^ b6 ;
			b4  += b15;  b15 = rotateLeft( b15, 51 ) ^ b4 ;
			b10 += b7 ;  b7  = rotateLeft( b7 ,  4 ) ^ b10;
			b12 += b3 ;  b3  = rotateLeft( b3 , 53 ) ^ b12;
			b14 += b5 ;  b5  = rotateLeft( b5 , 42 ) ^ b14;
			b8  += b1 ;  b1  = rotateLeft( b1 , 41 ) ^ b8 ;

			b0  += b7 ;  b7  = rotateLeft( b7 , 31 ) ^ b0 ;
			b2  += b5 ;  b5  = rotateLeft( b5 , 44 ) ^ b2 ;
			b4  += b3 ;  b3  = rotateLeft( b3 , 47 ) ^ b4 ;
			b6  += b1 ;  b1  = rotateLeft( b1 , 46 ) ^ b6 ;
			b12 += b15;  b15 = rotateLeft( b15, 19 ) ^ b12;
			b14 += b13;  b13 = rotateLeft( b13, 42 ) ^ b14;
			b8  += b11;  b11 = rotateLeft( b11, 44 ) ^ b8 ;
			b10 += b9 ;  b9  = rotateLeft( b9 , 25 ) ^ b10;

			b0  += b15;  b15 = rotateLeft( b15,  9 ) ^ b0 ;
			b2  += b11;  b11 = rotateLeft( b11, 48 ) ^ b2 ;
			b6  += b13;  b13 = rotateLeft( b13, 35 ) ^ b6 ;
			b4  += b9 ;  b9  = rotateLeft( b9 , 52 ) ^ b4 ;
			b14 += b1 ;  b1  = rotateLeft( b1 , 23 ) ^ b14;
			b8  += b5 ;  b5  = rotateLeft( b5 , 31 ) ^ b8 ;
			b10 += b3 ;  b3  = rotateLeft( b3 , 37 ) ^ b10;
			b12 += b7 ;  b7  = rotateLeft( b7 , 20 ) ^ b12;

			// Subkey injection for next 4 rounds.
			b0  += kw[ dm17 +  1 ];
			b1  += kw[ dm17 +  2 ];
			b2  += kw[ dm17 +  3 ];
			b3  += kw[ dm17 +  4 ];
			b4  += kw[ dm17 +  5 ];
			b5  += kw[ dm17 +  6 ];
			b6  += kw[ dm17 +  7 ];
			b7  += kw[ dm17 +  8 ];
			b8  += kw[ dm17 +  9 ];
			b9  += kw[ dm17 + 10 ];
			b10 += kw[ dm17 + 11 ];
			b11 += kw[ dm17 + 12 ];
			b12 += kw[ dm17 + 13 ];
			b13 += kw[ dm17 + 14 ] + tw[ dm3 + 1 ];
			b14 += kw[ dm17 + 15 ] + tw[ dm3 + 2 ];
			b15 += kw[ dm17 + 16 ] + d + 1;
		}

		setLongLE( arr,   0, b0  );
		setLongLE( arr,   8, b1  );
		setLongLE( arr,  16, b2  );
		setLongLE( arr,  24, b3  );
		setLongLE( arr,  32, b4  );
		setLongLE( arr,  40, b5  );
		setLongLE( arr,  48, b6  );
		setLongLE( arr,  56, b7  );
		setLongLE( arr,  64, b8  );
		setLongLE( arr,  72, b9  );
		setLongLE( arr,  80, b10 );
		setLongLE( arr,  88, b11 );
		setLongLE( arr,  96, b12 );
		setLongLE( arr, 104, b13 );
		setLongLE( arr, 112, b14 );
		setLongLE( arr, 120, b15 );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public void decryptBlock( Binary block )
	{
		MUST( block.size() == BLOCK_SIZE, "Incorrect block size" );

		byte[] arr = block.getDataRef();

		long b0  = getLongLE( arr,   0 );
		long b1  = getLongLE( arr,   8 );
		long b2  = getLongLE( arr,  16 );
		long b3  = getLongLE( arr,  24 );
		long b4  = getLongLE( arr,  32 );
		long b5  = getLongLE( arr,  40 );
		long b6  = getLongLE( arr,  48 );
		long b7  = getLongLE( arr,  56 );
		long b8  = getLongLE( arr,  64 );
		long b9  = getLongLE( arr,  72 );
		long b10 = getLongLE( arr,  80 );
		long b11 = getLongLE( arr,  88 );
		long b12 = getLongLE( arr,  96 );
		long b13 = getLongLE( arr, 104 );
		long b14 = getLongLE( arr, 112 );
		long b15 = getLongLE( arr, 120 );

		for( int d = (ROUNDS_NUM / 4) - 1; d >= 1; d -= 2 )
		{
			final int dm17 = MOD17[ d ];
			final int dm3 = MOD3[ d ];

			// Reverse key injection for second 4 rounds
			b0  -= kw[ dm17 + 1  ];
			b1  -= kw[ dm17 + 2  ];
			b2  -= kw[ dm17 + 3  ];
			b3  -= kw[ dm17 + 4  ];
			b4  -= kw[ dm17 + 5  ];
			b5  -= kw[ dm17 + 6  ];
			b6  -= kw[ dm17 + 7  ];
			b7  -= kw[ dm17 + 8  ];
			b8  -= kw[ dm17 + 9  ];
			b9  -= kw[ dm17 + 10 ];
			b10 -= kw[ dm17 + 11 ];
			b11 -= kw[ dm17 + 12 ];
			b12 -= kw[ dm17 + 13 ];
			b13 -= kw[ dm17 + 14 ] + tw[ dm3 + 1 ];
			b14 -= kw[ dm17 + 15 ] + tw[ dm3 + 2 ];
			b15 -= kw[ dm17 + 16 ] + d + 1;

			// Reverse second 4 mix/permute rounds
			b15 = rotateRight( b15 ^ b0 ,  9 );  b0  -= b15;
			b11 = rotateRight( b11 ^ b2 , 48 );  b2  -= b11;
			b13 = rotateRight( b13 ^ b6 , 35 );  b6  -= b13;
			b9  = rotateRight( b9  ^ b4 , 52 );  b4  -= b9 ;
			b1  = rotateRight( b1  ^ b14, 23 );  b14 -= b1 ;
			b5  = rotateRight( b5  ^ b8 , 31 );  b8  -= b5 ;
			b3  = rotateRight( b3  ^ b10, 37 );  b10 -= b3 ;
			b7  = rotateRight( b7  ^ b12, 20 );  b12 -= b7 ;
			
			b7  = rotateRight( b7  ^ b0 , 31 );  b0  -= b7 ;
			b5  = rotateRight( b5  ^ b2 , 44 );  b2  -= b5 ;
			b3  = rotateRight( b3  ^ b4 , 47 );  b4  -= b3 ;
			b1  = rotateRight( b1  ^ b6 , 46 );  b6  -= b1 ;
			b15 = rotateRight( b15 ^ b12, 19 );  b12 -= b15;
			b13 = rotateRight( b13 ^ b14, 42 );  b14 -= b13;
			b11 = rotateRight( b11 ^ b8 , 44 );  b8  -= b11;
			b9  = rotateRight( b9  ^ b10, 25 );  b10 -= b9 ;
			
			b9  = rotateRight( b9  ^ b0 , 16 );  b0  -= b9 ;
			b13 = rotateRight( b13 ^ b2 , 34 );  b2  -= b13;
			b11 = rotateRight( b11 ^ b6 , 56 );  b6  -= b11;
			b15 = rotateRight( b15 ^ b4 , 51 );  b4  -= b15;
			b7  = rotateRight( b7  ^ b10,  4 );  b10 -= b7 ;
			b3  = rotateRight( b3  ^ b12, 53 );  b12 -= b3 ;
			b5  = rotateRight( b5  ^ b14, 42 );  b14 -= b5 ;
			b1  = rotateRight( b1  ^ b8 , 41 );  b8  -= b1 ;
			
			b1  = rotateRight( b1  ^ b0 , 41 );  b0  -= b1 ;
			b3  = rotateRight( b3  ^ b2 ,  9 );  b2  -= b3 ;
			b5  = rotateRight( b5  ^ b4 , 37 );  b4  -= b5 ;
			b7  = rotateRight( b7  ^ b6 , 31 );  b6  -= b7 ;
			b9  = rotateRight( b9  ^ b8 , 12 );  b8  -= b9 ;
			b11 = rotateRight( b11 ^ b10, 47 );  b10 -= b11;
			b13 = rotateRight( b13 ^ b12, 44 );  b12 -= b13;
			b15 = rotateRight( b15 ^ b14, 30 );  b14 -= b15;

			// Reverse key injection for first 4 rounds
			b0  -= kw[ dm17      ];
			b1  -= kw[ dm17 + 1  ];
			b2  -= kw[ dm17 + 2  ];
			b3  -= kw[ dm17 + 3  ];
			b4  -= kw[ dm17 + 4  ];
			b5  -= kw[ dm17 + 5  ];
			b6  -= kw[ dm17 + 6  ];
			b7  -= kw[ dm17 + 7  ];
			b8  -= kw[ dm17 + 8  ];
			b9  -= kw[ dm17 + 9  ];
			b10 -= kw[ dm17 + 10 ];
			b11 -= kw[ dm17 + 11 ];
			b12 -= kw[ dm17 + 12 ];
			b13 -= kw[ dm17 + 13 ] + tw[ dm3 ];
			b14 -= kw[ dm17 + 14 ] + tw[ dm3 + 1 ];
			b15 -= kw[ dm17 + 15 ] + d;

			// Reverse first 4 mix/permute rounds
			b15 = rotateRight( b15 ^ b0 ,  5 );  b0  -= b15;
			b11 = rotateRight( b11 ^ b2 , 20 );  b2  -= b11;
			b13 = rotateRight( b13 ^ b6 , 48 );  b6  -= b13;
			b9  = rotateRight( b9  ^ b4 , 41 );  b4  -= b9 ;
			b1  = rotateRight( b1  ^ b14, 47 );  b14 -= b1 ;
			b5  = rotateRight( b5  ^ b8 , 28 );  b8  -= b5 ;
			b3  = rotateRight( b3  ^ b10, 16 );  b10 -= b3 ;
			b7  = rotateRight( b7  ^ b12, 25 );  b12 -= b7 ;

			b7  = rotateRight( b7  ^ b0 , 33 );  b0  -= b7 ;
			b5  = rotateRight( b5  ^ b2 ,  4 );  b2  -= b5 ;
			b3  = rotateRight( b3  ^ b4 , 51 );  b4  -= b3 ;
			b1  = rotateRight( b1  ^ b6 , 13 );  b6  -= b1 ;
			b15 = rotateRight( b15 ^ b12, 34 );  b12 -= b15;
			b13 = rotateRight( b13 ^ b14, 41 );  b14 -= b13;
			b11 = rotateRight( b11 ^ b8 , 59 );  b8  -= b11;
			b9  = rotateRight( b9  ^ b10, 17 );  b10 -= b9 ;

			b9  = rotateRight( b9  ^ b0 , 38 );  b0  -= b9 ;
			b13 = rotateRight( b13 ^ b2 , 19 );  b2  -= b13;
			b11 = rotateRight( b11 ^ b6 , 10 );  b6  -= b11;
			b15 = rotateRight( b15 ^ b4 , 55 );  b4  -= b15;
			b7  = rotateRight( b7  ^ b10, 49 );  b10 -= b7 ;
			b3  = rotateRight( b3  ^ b12, 18 );  b12 -= b3 ;
			b5  = rotateRight( b5  ^ b14, 23 );  b14 -= b5 ;
			b1  = rotateRight( b1  ^ b8 , 52 );  b8  -= b1 ;

			b1  = rotateRight( b1  ^ b0 , 24 );  b0  -= b1 ;
			b3  = rotateRight( b3  ^ b2 , 13 );  b2  -= b3 ;
			b5  = rotateRight( b5  ^ b4 ,  8 );  b4  -= b5 ;
			b7  = rotateRight( b7  ^ b6 , 47 );  b6  -= b7 ;
			b9  = rotateRight( b9  ^ b8 ,  8 );  b8  -= b9 ;
			b11 = rotateRight( b11 ^ b10, 17 );  b10 -= b11;
			b13 = rotateRight( b13 ^ b12, 22 );  b12 -= b13;
			b15 = rotateRight( b15 ^ b14, 37 );  b14 -= b15;
		}

		setLongLE( arr,   0, b0  - kw[  0 ] );
		setLongLE( arr,   8, b1  - kw[  1 ] );
		setLongLE( arr,  16, b2  - kw[  2 ] );
		setLongLE( arr,  24, b3  - kw[  3 ] );
		setLongLE( arr,  32, b4  - kw[  4 ] );
		setLongLE( arr,  40, b5  - kw[  5 ] );
		setLongLE( arr,  48, b6  - kw[  6 ] );
		setLongLE( arr,  56, b7  - kw[  7 ] );
		setLongLE( arr,  64, b8  - kw[  8 ] );
		setLongLE( arr,  72, b9  - kw[  9 ] );
		setLongLE( arr,  80, b10 - kw[ 10 ] );
		setLongLE( arr,  88, b11 - kw[ 11 ] );
		setLongLE( arr,  96, b12 - kw[ 12 ] );
		setLongLE( arr, 104, b13 - kw[ 13 ] - tw[ 0 ] );
		setLongLE( arr, 112, b14 - kw[ 14 ] - tw[ 1 ] );
		setLongLE( arr, 120, b15 - kw[ 15 ] );
	}
}
