// Denom.org
// bouncycastle.org

package org.denom.crypt.blockcipher;

import org.denom.Binary;

import static java.lang.Long.rotateRight;
import static java.lang.Long.rotateLeft;
import static org.denom.Binary.getLongLE;
import static org.denom.Binary.setLongLE;
import static org.denom.Binary.Bin;
import static org.denom.Ex.MUST;

/**
 * Threefish, version 1.3.
 * BlockSize = 32 bytes.
 * KeySize = 32 bytes.
 *
 * Threefish was designed by Niels Ferguson - Stefan Lucks - Bruce Schneier - Doug Whiting - Mihir Bellare
 * - Tadayoshi Kohno - Jon Callas - Jesse Walker.
 *
 * This implementation inlines all round functions, unrolls 8 rounds, and uses 1.2k of static tables
 * to speed up key schedule injection.
 */
public class Threefish256 extends BlockCipher
{
	public final static int BLOCK_SIZE = 32;
	public final static int KEY_SIZE = 32;

	private static final int BLOCK_SIZE_WORDS = BLOCK_SIZE / 8;
	private static final int ROUNDS_NUM = 72;

	private static int[] MOD5 = new int[ ROUNDS_NUM ];
	private static int[] MOD3 = new int[ ROUNDS_NUM ];

	static
	{
		for( int i = 0; i < MOD5.length; i++ )
		{
			MOD5[ i ] = i % 5;
			MOD3[ i ] = i % 3;
		}
	}

	private long[] tw = new long[ 5 ];
	private long[] kw = new long[ 2 * BLOCK_SIZE_WORDS + 1 ];

	// -----------------------------------------------------------------------------------------------------------------
	public Threefish256()
	{
		this( Bin( KEY_SIZE ) );
	}

	// -----------------------------------------------------------------------------------------------------------------
	/**
	 * @param key [32 bytes]
	 */
	public Threefish256( final Binary key )
	{
		super.initialize( BLOCK_SIZE );
		setKey( Bin( KEY_SIZE) );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public Threefish256 clone()
	{
		return new Threefish256( this.key );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public String getAlgName()
	{
		return "Threefish-256";
	}

	// -----------------------------------------------------------------------------------------------------------------
	/**
	 * @param key [32 bytes].
	 */
	@Override
	public void setKey( final Binary key )
	{
		MUST( key.size() == KEY_SIZE, "Invalid key size" );
		this.key = key.clone();

		long knw = 0x1BD11BDAA9FC1A22L;
		for( int i = 0; i < BLOCK_SIZE_WORDS; i++ )
		{
			kw[ i ] = key.getLongLE( i << 3 );
			knw ^= kw[ i ];
		}
		kw[ BLOCK_SIZE_WORDS ] = knw;
		System.arraycopy( kw, 0, kw, BLOCK_SIZE_WORDS + 1, BLOCK_SIZE_WORDS );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public Binary generateKey()
	{
		Binary akey = new Binary().randomSecure( KEY_SIZE );
		setKey( akey );
		return akey;
	}

	// -----------------------------------------------------------------------------------------------------------------
	public void setTweak( final Binary tweak )
	{
		MUST( tweak.size() == 16, "Incorrect tweak size" );
		byte[] arr = tweak.getDataRef();
		tw[ 0 ] = getLongLE( arr, 0 );
		tw[ 1 ] = getLongLE( arr, 8 );
		tw[ 2 ] = tw[ 0 ] ^ tw[ 1 ];
		tw[ 3 ] = tw[ 0 ];
		tw[ 4 ] = tw[ 1 ];
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public void encryptBlock( Binary block )
	{
		MUST( block.size() == BLOCK_SIZE, "Incorrect block size" );

		byte[] arr = block.getDataRef();
		long b0 = getLongLE( arr, 0 );
		long b1 = getLongLE( arr, 8 );
		long b2 = getLongLE( arr, 16 );
		long b3 = getLongLE( arr, 24 );

		// First subkey injection.
		b0 += kw[ 0 ];
		b1 += kw[ 1 ] + tw[ 0 ];
		b2 += kw[ 2 ] + tw[ 1 ];
		b3 += kw[ 3 ];

		// Rounds loop, unrolled to 8 rounds per iteration. Unrolling to multiples of 4 avoids
		// the mod 4 check for key injection, and allows inlining of the permutations, which
		// cycle every of 2 rounds (avoiding array index/lookup). Unrolling to multiples of 8
		// avoids the mod 8 rotation constant lookup, and allows inlining constant rotation
		// values (avoiding array index/lookup).

		for( int d = 1; d < (ROUNDS_NUM / 4); d += 2 )
		{
			final int dm5 = MOD5[ d ];
			final int dm3 = MOD3[ d ];

			// 4 rounds of mix and permute. Permute schedule has a 2 round cycle,
			// so permutes are inlined in the mix operations in each 4 round block.
			b0 += b1;  b1 = rotateLeft( b1, 14 ) ^ b0;
			b2 += b3;  b3 = rotateLeft( b3, 16 ) ^ b2;
			b0 += b3;  b3 = rotateLeft( b3, 52 ) ^ b0;
			b2 += b1;  b1 = rotateLeft( b1, 57 ) ^ b2;
			b0 += b1;  b1 = rotateLeft( b1, 23 ) ^ b0;
			b2 += b3;  b3 = rotateLeft( b3, 40 ) ^ b2;
			b0 += b3;  b3 = rotateLeft( b3,  5 ) ^ b0;
			b2 += b1;  b1 = rotateLeft( b1, 37 ) ^ b2;

			// Subkey injection for first 4 rounds.
			b0 += kw[ dm5 ];
			b1 += kw[ dm5 + 1 ] + tw[ dm3 ];
			b2 += kw[ dm5 + 2 ] + tw[ dm3 + 1 ];
			b3 += kw[ dm5 + 3 ] + d;

			// 4 more rounds of mix/permute
			b0 += b1;  b1 = rotateLeft( b1, 25 ) ^ b0;
			b2 += b3;  b3 = rotateLeft( b3, 33 ) ^ b2;
			b0 += b3;  b3 = rotateLeft( b3, 46 ) ^ b0;
			b2 += b1;  b1 = rotateLeft( b1, 12 ) ^ b2;
			b0 += b1;  b1 = rotateLeft( b1, 58 ) ^ b0;
			b2 += b3;  b3 = rotateLeft( b3, 22 ) ^ b2;
			b0 += b3;  b3 = rotateLeft( b3, 32 ) ^ b0;
			b2 += b1;  b1 = rotateLeft( b1, 32 ) ^ b2;

			// Subkey injection for next 4 rounds.
			b0 += kw[ dm5 + 1 ];
			b1 += kw[ dm5 + 2 ] + tw[ dm3 + 1 ];
			b2 += kw[ dm5 + 3 ] + tw[ dm3 + 2 ];
			b3 += kw[ dm5 + 4 ] + d + 1;
		}

		setLongLE( arr,  0, b0 );
		setLongLE( arr,  8, b1 );
		setLongLE( arr, 16, b2 );
		setLongLE( arr, 24, b3 );
	}

	// -----------------------------------------------------------------------------------------------------------------
	@Override
	public void decryptBlock( Binary block )
	{
		MUST( block.size() == BLOCK_SIZE, "Incorrect block size" );

		byte[] arr = block.getDataRef();
		long b0 = getLongLE( arr, 0 );
		long b1 = getLongLE( arr, 8 );
		long b2 = getLongLE( arr, 16 );
		long b3 = getLongLE( arr, 24 );

		for( int d = (ROUNDS_NUM / 4) - 1; d >= 1; d -= 2 )
		{
			final int dm5 = MOD5[ d ];
			final int dm3 = MOD3[ d ];

			// Reverse key injection for second 4 rounds
			b0 -= kw[ dm5 + 1 ];
			b1 -= kw[ dm5 + 2 ] + tw[ dm3 + 1 ];
			b2 -= kw[ dm5 + 3 ] + tw[ dm3 + 2 ];
			b3 -= kw[ dm5 + 4 ] + d + 1;

			// Reverse second 4 mix/permute rounds
			b3 = rotateRight( b3 ^ b0, 32 );  b0 -= b3;
			b1 = rotateRight( b1 ^ b2, 32 );  b2 -= b1;
			b1 = rotateRight( b1 ^ b0, 58 );  b0 -= b1;
			b3 = rotateRight( b3 ^ b2, 22 );  b2 -= b3;
			b3 = rotateRight( b3 ^ b0, 46 );  b0 -= b3;
			b1 = rotateRight( b1 ^ b2, 12 );  b2 -= b1;
			b1 = rotateRight( b1 ^ b0, 25 );  b0 -= b1;
			b3 = rotateRight( b3 ^ b2, 33 );  b2 -= b3;

			// Reverse key injection for first 4 rounds
			b0 -= kw[ dm5 ];
			b1 -= kw[ dm5 + 1 ] + tw[ dm3 ];
			b2 -= kw[ dm5 + 2 ] + tw[ dm3 + 1 ];
			b3 -= kw[ dm5 + 3 ] + d;

			// Reverse first 4 mix/permute rounds
			b3 = rotateRight( b3 ^ b0,  5 );  b0 -= b3;
			b1 = rotateRight( b1 ^ b2, 37 );  b2 -= b1;
			b1 = rotateRight( b1 ^ b0, 23 );  b0 -= b1;
			b3 = rotateRight( b3 ^ b2, 40 );  b2 -= b3;
			b3 = rotateRight( b3 ^ b0, 52 );  b0 -= b3;
			b1 = rotateRight( b1 ^ b2, 57 );  b2 -= b1;
			b1 = rotateRight( b1 ^ b0, 14 );  b0 -= b1;
			b3 = rotateRight( b3 ^ b2, 16 );  b2 -= b3;
		}

		setLongLE( arr, 0,  b0 - kw[ 0 ] );
		setLongLE( arr, 8,  b1 - kw[ 1 ] - tw[ 0 ] );
		setLongLE( arr, 16, b2 - kw[ 2 ] - tw[ 1 ] );
		setLongLE( arr, 24, b3 - kw[ 3 ] );
	}

}
