// Denom.org
// bouncycastle.org

package org.denom.crypt.blockcipher;

import org.denom.Binary;

import static java.lang.Long.rotateRight;
import static java.lang.Long.rotateLeft;
import static org.denom.Binary.getLongLE;
import static org.denom.Binary.setLongLE;
import static org.denom.Binary.Bin;
import static org.denom.Ex.MUST;

/**
 * Threefish, version 1.3.
 * BlockSize = 64 bytes.
 * KeySize = 64 bytes.
 *
 * Threefish was designed by Niels Ferguson - Stefan Lucks - Bruce Schneier - Doug Whiting - Mihir Bellare
 * - Tadayoshi Kohno - Jon Callas - Jesse Walker.
 *
 * This implementation inlines all round functions, unrolls 8 rounds, and uses 1.2k of static tables
 * to speed up key schedule injection.
 */
public class Threefish512 extends BlockCipher
{
	public final static int BLOCK_SIZE = 64;
	public final static int KEY_SIZE = 64;

	private static final int BLOCK_SIZE_WORDS = BLOCK_SIZE / 8;
	private static final int ROUNDS_NUM = 72;

	private static int[] MOD3 = new int[ ROUNDS_NUM ];
	private static int[] MOD9 = new int[ ROUNDS_NUM ];

	static
	{
		for( int i = 0; i < MOD3.length; i++ )
		{
			MOD3[ i ] = i % 3;
			MOD9[ i ] = i % 9;
		}
	}

	private long[] tw = new long[ 5 ];
	private long[] kw = new long[ 2 * BLOCK_SIZE_WORDS + 1 ];

	// -----------------------------------------------------------------------------------------------------------------
	public Threefish512()
	{
		this( Bin( KEY_SIZE ) );
	}

	// -----------------------------------------------------------------------------------------------------------------
	/**
	 * @param key [64 bytes]
	 */
	public Threefish512( final Binary key )
	{
		super.initialize( BLOCK_SIZE );
		setKey( Bin( KEY_SIZE) );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public Threefish512 clone()
	{
		return new Threefish512( this.key );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public String getAlgName()
	{
		return "Threefish-512";
	}

	// -----------------------------------------------------------------------------------------------------------------
	/**
	 * @param key [64 bytes].
	 */
	@Override
	public void setKey( final Binary key )
	{
		MUST( key.size() == KEY_SIZE, "Invalid key size" );
		this.key = key.clone();

		byte[] arr = key.getDataRef();

		long knw = 0x1BD11BDAA9FC1A22L;
		for( int i = 0; i < BLOCK_SIZE_WORDS; i++ )
		{
			kw[ i ] = getLongLE( arr, i << 3 );
			knw ^= kw[ i ];
		}
		kw[ BLOCK_SIZE_WORDS ] = knw;
		System.arraycopy( kw, 0, kw, BLOCK_SIZE_WORDS + 1, BLOCK_SIZE_WORDS );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public Binary generateKey()
	{
		Binary akey = new Binary().randomSecure( KEY_SIZE );
		setKey( akey );
		return akey;
	}

	// -----------------------------------------------------------------------------------------------------------------
	public void setTweak( final Binary tweak )
	{
		MUST( tweak.size() == 16, "Incorrect tweak size" );
		byte[] arr = tweak.getDataRef();
		tw[ 0 ] = getLongLE( arr, 0 );
		tw[ 1 ] = getLongLE( arr, 8 );
		tw[ 2 ] = tw[ 0 ] ^ tw[ 1 ];
		tw[ 3 ] = tw[ 0 ];
		tw[ 4 ] = tw[ 1 ];
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public void encryptBlock( Binary block )
	{
		MUST( block.size() == BLOCK_SIZE, "Incorrect block size" );

		byte[] arr = block.getDataRef();
		long b0 = getLongLE( arr, 0 )  + kw[ 0 ];
		long b1 = getLongLE( arr, 8 )  + kw[ 1 ];
		long b2 = getLongLE( arr, 16 ) + kw[ 2 ];
		long b3 = getLongLE( arr, 24 ) + kw[ 3 ];
		long b4 = getLongLE( arr, 32 ) + kw[ 4 ];
		long b5 = getLongLE( arr, 40 ) + kw[ 5 ] + tw[ 0 ];
		long b6 = getLongLE( arr, 48 ) + kw[ 6 ] + tw[ 1 ];
		long b7 = getLongLE( arr, 56 ) + kw[ 7 ];

		// Rounds loop, unrolled to 8 rounds per iteration. Unrolling to multiples of 4 avoids
		// the mod 4 check for key injection, and allows inlining of the permutations, which
		// cycle every of 4 rounds (avoiding array index/lookup). Unrolling to multiples of 8
		// avoids the mod 8 rotation constant lookup, and allows inlining constant rotation
		// values (avoiding array index/lookup).

		for( int d = 1; d < (ROUNDS_NUM / 4); d += 2 )
		{
			final int dm9 = MOD9[ d ];
			final int dm3 = MOD3[ d ];

			// 4 rounds of mix and permute. Permute schedule has a 4 round cycle, so permutes
			// are inlined in the mix operations in each 4 round block.
			b0 += b1;  b1 = rotateLeft( b1, 46 ) ^ b0;
			b2 += b3;  b3 = rotateLeft( b3, 36 ) ^ b2;
			b4 += b5;  b5 = rotateLeft( b5, 19 ) ^ b4;
			b6 += b7;  b7 = rotateLeft( b7, 37 ) ^ b6;

			b2 += b1;  b1 = rotateLeft( b1, 33 ) ^ b2;
			b4 += b7;  b7 = rotateLeft( b7, 27 ) ^ b4;
			b6 += b5;  b5 = rotateLeft( b5, 14 ) ^ b6;
			b0 += b3;  b3 = rotateLeft( b3, 42 ) ^ b0;

			b4 += b1;  b1 = rotateLeft( b1, 17 ) ^ b4;
			b6 += b3;  b3 = rotateLeft( b3, 49 ) ^ b6;
			b0 += b5;  b5 = rotateLeft( b5, 36 ) ^ b0;
			b2 += b7;  b7 = rotateLeft( b7, 39 ) ^ b2;

			b6 += b1;  b1 = rotateLeft( b1, 44 ) ^ b6;
			b0 += b7;  b7 = rotateLeft( b7,  9 ) ^ b0;
			b2 += b5;  b5 = rotateLeft( b5, 54 ) ^ b2;
			b4 += b3;  b3 = rotateLeft( b3, 56 ) ^ b4;

			// Subkey injection for first 4 rounds.
			b0 += kw[ dm9 ];
			b1 += kw[ dm9 + 1 ];
			b2 += kw[ dm9 + 2 ];
			b3 += kw[ dm9 + 3 ];
			b4 += kw[ dm9 + 4 ];
			b5 += kw[ dm9 + 5 ] + tw[ dm3 ];
			b6 += kw[ dm9 + 6 ] + tw[ dm3 + 1 ];
			b7 += kw[ dm9 + 7 ] + d;

			// 4 more rounds of mix/permute
			b0 += b1;  b1 = rotateLeft( b1, 39 ) ^  b0;
			b2 += b3;  b3 = rotateLeft( b3, 30 ) ^ b2;
			b4 += b5;  b5 = rotateLeft( b5, 34 ) ^ b4;
			b6 += b7;  b7 = rotateLeft( b7, 24 ) ^ b6;

			b2 += b1;  b1 = rotateLeft( b1, 13 ) ^ b2;
			b4 += b7;  b7 = rotateLeft( b7, 50 ) ^ b4;
			b6 += b5;  b5 = rotateLeft( b5, 10 ) ^ b6;
			b0 += b3;  b3 = rotateLeft( b3, 17 ) ^ b0;

			b4 += b1;  b1 = rotateLeft( b1, 25 ) ^ b4;
			b6 += b3;  b3 = rotateLeft( b3, 29 ) ^ b6;
			b0 += b5;  b5 = rotateLeft( b5, 39 ) ^ b0;
			b2 += b7;  b7 = rotateLeft( b7, 43 ) ^ b2;

			b6 += b1;  b1 = rotateLeft( b1,  8 ) ^ b6;
			b0 += b7;  b7 = rotateLeft( b7, 35 ) ^ b0;
			b2 += b5;  b5 = rotateLeft( b5, 56 ) ^ b2;
			b4 += b3;  b3 = rotateLeft( b3, 22 ) ^ b4;

			// Subkey injection for next 4 rounds.
			b0 += kw[ dm9 + 1 ];
			b1 += kw[ dm9 + 2 ];
			b2 += kw[ dm9 + 3 ];
			b3 += kw[ dm9 + 4 ];
			b4 += kw[ dm9 + 5 ];
			b5 += kw[ dm9 + 6 ] + tw[ dm3 + 1 ];
			b6 += kw[ dm9 + 7 ] + tw[ dm3 + 2 ];
			b7 += kw[ dm9 + 8 ] + d + 1;
		}

		setLongLE( arr,  0, b0 );
		setLongLE( arr,  8, b1 );
		setLongLE( arr, 16, b2 );
		setLongLE( arr, 24, b3 );
		setLongLE( arr, 32, b4 );
		setLongLE( arr, 40, b5 );
		setLongLE( arr, 48, b6 );
		setLongLE( arr, 56, b7 );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public void decryptBlock( Binary block )
	{
		MUST( block.size() == BLOCK_SIZE, "Incorrect block size" );

		byte[] arr = block.getDataRef();
		long b0 = getLongLE( arr, 0 );
		long b1 = getLongLE( arr, 8 );
		long b2 = getLongLE( arr, 16 );
		long b3 = getLongLE( arr, 24 );
		long b4 = getLongLE( arr, 32 );
		long b5 = getLongLE( arr, 40 );
		long b6 = getLongLE( arr, 48 );
		long b7 = getLongLE( arr, 56 );

		for( int d = (ROUNDS_NUM / 4) - 1; d >= 1; d -= 2 )
		{
			final int dm9 = MOD9[ d ];
			final int dm3 = MOD3[ d ];

			// Reverse key injection for second 4 rounds
			b0 -= kw[ dm9 + 1 ];
			b1 -= kw[ dm9 + 2 ];
			b2 -= kw[ dm9 + 3 ];
			b3 -= kw[ dm9 + 4 ];
			b4 -= kw[ dm9 + 5 ];
			b5 -= kw[ dm9 + 6 ] + tw[ dm3 + 1 ];
			b6 -= kw[ dm9 + 7 ] + tw[ dm3 + 2 ];
			b7 -= kw[ dm9 + 8 ] + d + 1;

			// Reverse second 4 mix/permute rounds
			b1 = rotateRight( b1 ^ b6,  8 );  b6 -= b1;
			b7 = rotateRight( b7 ^ b0, 35 );  b0 -= b7;
			b5 = rotateRight( b5 ^ b2, 56 );  b2 -= b5;
			b3 = rotateRight( b3 ^ b4, 22 );  b4 -= b3;
			
			b1 = rotateRight( b1 ^ b4, 25 );  b4 -= b1;
			b3 = rotateRight( b3 ^ b6, 29 );  b6 -= b3;
			b5 = rotateRight( b5 ^ b0, 39 );  b0 -= b5;
			b7 = rotateRight( b7 ^ b2, 43 );  b2 -= b7;
			
			b1 = rotateRight( b1 ^ b2, 13 );  b2 -= b1;
			b7 = rotateRight( b7 ^ b4, 50 );  b4 -= b7;
			b5 = rotateRight( b5 ^ b6, 10 );  b6 -= b5;
			b3 = rotateRight( b3 ^ b0, 17 );  b0 -= b3;
			
			b1 = rotateRight( b1 ^ b0, 39 );  b0 -= b1;
			b3 = rotateRight( b3 ^ b2, 30 );  b2 -= b3;
			b5 = rotateRight( b5 ^ b4, 34 );  b4 -= b5;
			b7 = rotateRight( b7 ^ b6, 24 );  b6 -= b7;

			// Reverse key injection for first 4 rounds
			b0 -= kw[ dm9 ];
			b1 -= kw[ dm9 + 1 ];
			b2 -= kw[ dm9 + 2 ];
			b3 -= kw[ dm9 + 3 ];
			b4 -= kw[ dm9 + 4 ];
			b5 -= kw[ dm9 + 5 ] + tw[ dm3 ];
			b6 -= kw[ dm9 + 6 ] + tw[ dm3 + 1 ];
			b7 -= kw[ dm9 + 7 ] + d;

			// Reverse first 4 mix/permute rounds
			b1 = rotateRight( b1 ^ b6, 44 );  b6 -= b1;
			b7 = rotateRight( b7 ^ b0,  9 );  b0 -= b7;
			b5 = rotateRight( b5 ^ b2, 54 );  b2 -= b5;
			b3 = rotateRight( b3 ^ b4, 56 );  b4 -= b3;

			b1 = rotateRight( b1 ^ b4, 17 );  b4 -= b1;
			b3 = rotateRight( b3 ^ b6, 49 );  b6 -= b3;
			b5 = rotateRight( b5 ^ b0, 36 );  b0 -= b5;
			b7 = rotateRight( b7 ^ b2, 39 );  b2 -= b7;

			b1 = rotateRight( b1 ^ b2, 33 );  b2 -= b1;
			b7 = rotateRight( b7 ^ b4, 27 );  b4 -= b7;
			b5 = rotateRight( b5 ^ b6, 14 );  b6 -= b5;
			b3 = rotateRight( b3 ^ b0, 42 );  b0 -= b3;

			b1 = rotateRight( b1 ^ b0, 46 );  b0 -= b1;
			b3 = rotateRight( b3 ^ b2, 36 );  b2 -= b3;
			b5 = rotateRight( b5 ^ b4, 19 );  b4 -= b5;
			b7 = rotateRight( b7 ^ b6, 37 );  b6 -= b7;
		}

		setLongLE( arr, 0, b0 - kw[ 0 ] );
		setLongLE( arr, 8, b1 - kw[ 1 ] );
		setLongLE( arr, 16, b2 - kw[ 2 ] );
		setLongLE( arr, 24, b3 - kw[ 3 ] );
		setLongLE( arr, 32, b4 - kw[ 4 ] );
		setLongLE( arr, 40, b5 - kw[ 5 ] - tw[ 0 ] );
		setLongLE( arr, 48, b6 - kw[ 6 ] - tw[ 1 ] );
		setLongLE( arr, 56, b7 - kw[ 7 ] );
	}
}
