--- truecrypt/crypto/serpent.c 2018/04/24 16:41:45 1.1 +++ truecrypt/crypto/serpent.c 2018/04/24 17:07:16 1.1.1.6 @@ -1,643 +1,943 @@ -// Except as noted below copyright in this code is held by Dr B.R. Gladman -// but free direct or derivative use is permitted subject to acknowledgement -// of its origin and subject to any constraints placed on the use of the -// algorithm by its designers (if such constraints may exist, this will be -// indicated below). -// -// Dr. B. R. Gladman ( ). 25th January 2000. -// -// This is an implementation of Serpent, an encryption algorithm designed -// by Anderson, Biham and Knudsen and submitted as a candidate for the -// Advanced Encryption Standard programme of the US National Institute of -// Standards and Technology. -// -// The designers of Serpent have not placed any constraints on the use of -// this algorithm. -// -// The S box expressions used below are Copyright (C) 2000 Dag Arne Osvik. - -/* Support for multithreaded operation added by TrueCrypt Foundation */ - -#include "serpent.h" - -//static u4byte serpent_l_key[140]; - -#define sb0(r0,r1,r2,r3,r4) \ - __asm { \ - __asm xor r3,r0 \ - __asm mov r4,r1 \ - __asm and r1,r3 \ - __asm xor r4,r2 \ - __asm xor r1,r0 \ - __asm or r0,r3 \ - __asm xor r0,r4 \ - __asm xor r4,r3 \ - __asm xor r3,r2 \ - __asm or r2,r1 \ - __asm xor r2,r4 \ - __asm not r4 \ - __asm or r4,r1 \ - __asm xor r1,r3 \ - __asm xor r1,r4 \ - __asm or r3,r0 \ - __asm xor r1,r3 \ - __asm xor r4,r3 \ - } - -#define ib0(r0,r1,r2,r3,r4) \ - __asm { \ - __asm not r2 \ - __asm mov r4,r1 \ - __asm or r1,r0 \ - __asm not r4 \ - __asm xor r1,r2 \ - __asm or r2,r4 \ - __asm xor r1,r3 \ - __asm xor r0,r4 \ - __asm xor r2,r0 \ - __asm and r0,r3 \ - __asm xor r4,r0 \ - __asm or r0,r1 \ - __asm xor r0,r2 \ - __asm xor r3,r4 \ - __asm xor r2,r1 \ - __asm xor r3,r0 \ - __asm xor r3,r1 \ - __asm and r2,r3 \ - __asm xor r4,r2 \ - } - -#define sb1(r0,r1,r2,r3,r4) \ - __asm { \ - __asm not r0 \ - __asm not r2 \ - __asm mov r4,r0 \ - __asm and r0,r1 \ - __asm xor r2,r0 \ - __asm or r0,r3 \ - __asm xor r3,r2 \ - __asm xor r1,r0 \ - __asm xor r0,r4 \ - __asm or r4,r1 \ - __asm xor r1,r3 \ - __asm or r2,r0 \ - __asm and r2,r4 \ - __asm xor r0,r1 \ - __asm and r1,r2 \ - __asm xor r1,r0 \ - __asm and r0,r2 \ - __asm xor r0,r4 \ - } - -#define ib1(r0,r1,r2,r3,r4) \ - __asm { \ - __asm mov r4,r1 \ - __asm xor r1,r3 \ - __asm and r3,r1 \ - __asm xor r4,r2 \ - __asm xor r3,r0 \ - __asm or r0,r1 \ - __asm xor r2,r3 \ - __asm xor r0,r4 \ - __asm or r0,r2 \ - __asm xor r1,r3 \ - __asm xor r0,r1 \ - __asm or r1,r3 \ - __asm xor r1,r0 \ - __asm not r4 \ - __asm xor r4,r1 \ - __asm or r1,r0 \ - __asm xor r1,r0 \ - __asm or r1,r4 \ - __asm xor r3,r1 \ - } - -#define sb2(r0,r1,r2,r3,r4) \ - __asm { \ - __asm mov r4,r0 \ - __asm and r0,r2 \ - __asm xor r0,r3 \ - __asm xor r2,r1 \ - __asm xor r2,r0 \ - __asm or r3,r4 \ - __asm xor r3,r1 \ - __asm xor r4,r2 \ - __asm mov r1,r3 \ - __asm or r3,r4 \ - __asm xor r3,r0 \ - __asm and r0,r1 \ - __asm xor r4,r0 \ - __asm xor r1,r3 \ - __asm xor r1,r4 \ - __asm not r4 \ - } - -#define ib2(r0,r1,r2,r3,r4) \ - __asm { \ - __asm xor r2,r3 \ - __asm xor r3,r0 \ - __asm mov r4,r3 \ - __asm and r3,r2 \ - __asm xor r3,r1 \ - __asm or r1,r2 \ - __asm xor r1,r4 \ - __asm and r4,r3 \ - __asm xor r2,r3 \ - __asm and r4,r0 \ - __asm xor r4,r2 \ - __asm and r2,r1 \ - __asm or r2,r0 \ - __asm not r3 \ - __asm xor r2,r3 \ - __asm xor r0,r3 \ - __asm and r0,r1 \ - __asm xor r3,r4 \ - __asm xor r3,r0 \ - } - -#define sb3(r0,r1,r2,r3,r4) \ - __asm { \ - __asm mov r4,r0 \ - __asm or r0,r3 \ - __asm xor r3,r1 \ - __asm and r1,r4 \ - __asm xor r4,r2 \ - __asm xor r2,r3 \ - __asm and r3,r0 \ - __asm or r4,r1 \ - __asm xor r3,r4 \ - __asm xor r0,r1 \ - __asm and r4,r0 \ - __asm xor r1,r3 \ - __asm xor r4,r2 \ - __asm or r1,r0 \ - __asm xor r1,r2 \ - __asm xor r0,r3 \ - __asm mov r2,r1 \ - __asm or r1,r3 \ - __asm xor r1,r0 \ - } - -#define ib3(r0,r1,r2,r3,r4) \ - __asm { \ - __asm mov r4,r2 \ - __asm xor r2,r1 \ - __asm and r1,r2 \ - __asm xor r1,r0 \ - __asm and r0,r4 \ - __asm xor r4,r3 \ - __asm or r3,r1 \ - __asm xor r3,r2 \ - __asm xor r0,r4 \ - __asm xor r2,r0 \ - __asm or r0,r3 \ - __asm xor r0,r1 \ - __asm xor r4,r2 \ - __asm and r2,r3 \ - __asm or r1,r3 \ - __asm xor r1,r2 \ - __asm xor r4,r0 \ - __asm xor r2,r4 \ - } - -#define sb4(r0,r1,r2,r3,r4) \ - __asm { \ - __asm xor r1,r3 \ - __asm not r3 \ - __asm xor r2,r3 \ - __asm xor r3,r0 \ - __asm mov r4,r1 \ - __asm and r1,r3 \ - __asm xor r1,r2 \ - __asm xor r4,r3 \ - __asm xor r0,r4 \ - __asm and r2,r4 \ - __asm xor r2,r0 \ - __asm and r0,r1 \ - __asm xor r3,r0 \ - __asm or r4,r1 \ - __asm xor r4,r0 \ - __asm or r0,r3 \ - __asm xor r0,r2 \ - __asm and r2,r3 \ - __asm not r0 \ - __asm xor r4,r2 \ - } - -#define ib4(r0,r1,r2,r3,r4) \ - __asm { \ - __asm mov r4,r2 \ - __asm and r2,r3 \ - __asm xor r2,r1 \ - __asm or r1,r3 \ - __asm and r1,r0 \ - __asm xor r4,r2 \ - __asm xor r4,r1 \ - __asm and r1,r2 \ - __asm not r0 \ - __asm xor r3,r4 \ - __asm xor r1,r3 \ - __asm and r3,r0 \ - __asm xor r3,r2 \ - __asm xor r0,r1 \ - __asm and r2,r0 \ - __asm xor r3,r0 \ - __asm xor r2,r4 \ - __asm or r2,r3 \ - __asm xor r3,r0 \ - __asm xor r2,r1 \ - } - -#define sb5(r0,r1,r2,r3,r4) \ - __asm { \ - __asm xor r0,r1 \ - __asm xor r1,r3 \ - __asm not r3 \ - __asm mov r4,r1 \ - __asm and r1,r0 \ - __asm xor r2,r3 \ - __asm xor r1,r2 \ - __asm or r2,r4 \ - __asm xor r4,r3 \ - __asm and r3,r1 \ - __asm xor r3,r0 \ - __asm xor r4,r1 \ - __asm xor r4,r2 \ - __asm xor r2,r0 \ - __asm and r0,r3 \ - __asm not r2 \ - __asm xor r0,r4 \ - __asm or r4,r3 \ - __asm xor r2,r4 \ - } - -#define ib5(r0,r1,r2,r3,r4) \ - __asm { \ - __asm not r1 \ - __asm mov r4,r3 \ - __asm xor r2,r1 \ - __asm or r3,r0 \ - __asm xor r3,r2 \ - __asm or r2,r1 \ - __asm and r2,r0 \ - __asm xor r4,r3 \ - __asm xor r2,r4 \ - __asm or r4,r0 \ - __asm xor r4,r1 \ - __asm and r1,r2 \ - __asm xor r1,r3 \ - __asm xor r4,r2 \ - __asm and r3,r4 \ - __asm xor r4,r1 \ - __asm xor r3,r0 \ - __asm xor r3,r4 \ - __asm not r4 \ - } - -#define sb6(r0,r1,r2,r3,r4) \ - __asm { \ - __asm not r2 \ - __asm mov r4,r3 \ - __asm and r3,r0 \ - __asm xor r0,r4 \ - __asm xor r3,r2 \ - __asm or r2,r4 \ - __asm xor r1,r3 \ - __asm xor r2,r0 \ - __asm or r0,r1 \ - __asm xor r2,r1 \ - __asm xor r4,r0 \ - __asm or r0,r3 \ - __asm xor r0,r2 \ - __asm xor r4,r3 \ - __asm xor r4,r0 \ - __asm not r3 \ - __asm and r2,r4 \ - __asm xor r2,r3 \ - } - -#define ib6(r0,r1,r2,r3,r4) \ - __asm { \ - __asm xor r0,r2 \ - __asm mov r4,r2 \ - __asm and r2,r0 \ - __asm xor r4,r3 \ - __asm not r2 \ - __asm xor r3,r1 \ - __asm xor r2,r3 \ - __asm or r4,r0 \ - __asm xor r0,r2 \ - __asm xor r3,r4 \ - __asm xor r4,r1 \ - __asm and r1,r3 \ - __asm xor r1,r0 \ - __asm xor r0,r3 \ - __asm or r0,r2 \ - __asm xor r3,r1 \ - __asm xor r4,r0 \ - } - -#define sb7(r0,r1,r2,r3,r4) \ - __asm { \ - __asm mov r4,r2 \ - __asm and r2,r1 \ - __asm xor r2,r3 \ - __asm and r3,r1 \ - __asm xor r4,r2 \ - __asm xor r2,r1 \ - __asm xor r1,r0 \ - __asm or r0,r4 \ - __asm xor r0,r2 \ - __asm xor r3,r1 \ - __asm xor r2,r3 \ - __asm and r3,r0 \ - __asm xor r3,r4 \ - __asm xor r4,r2 \ - __asm and r2,r0 \ - __asm not r4 \ - __asm xor r2,r4 \ - __asm and r4,r0 \ - __asm xor r1,r3 \ - __asm xor r4,r1 \ - } - -#define ib7(r0,r1,r2,r3,r4) \ - __asm { \ - __asm mov r4,r2 \ - __asm xor r2,r0 \ - __asm and r0,r3 \ - __asm not r2 \ - __asm or r4,r3 \ - __asm xor r3,r1 \ - __asm or r1,r0 \ - __asm xor r0,r2 \ - __asm and r2,r4 \ - __asm xor r1,r2 \ - __asm xor r2,r0 \ - __asm or r0,r2 \ - __asm and r3,r4 \ - __asm xor r0,r3 \ - __asm xor r4,r1 \ - __asm xor r3,r4 \ - __asm or r4,r0 \ - __asm xor r3,r2 \ - __asm xor r4,r2 \ - } - -#define f_key(r0,r1,r2,r3) \ - __asm { \ - __asm mov r0,[esi] \ - __asm mov r1,[esi+4] \ - __asm mov r2,[esi+8] \ - __asm mov r3,[esi+12] \ - } - -#define t_key(r0,r1,r2,r3) \ - __asm { \ - __asm mov [esi ],r0 \ - __asm mov [esi+4],r1 \ - __asm mov [esi+8],r2 \ - __asm mov [esi+12],r3 \ - __asm add esi,16 \ - } - -#define f_xor(r0,r1,r2,r3) \ - __asm { \ - __asm xor r0,[esi] \ - __asm xor r1,[esi+4] \ - __asm xor r2,[esi+8] \ - __asm xor r3,[esi+12] \ - __asm add esi,16 \ - } - -#define i_xor(r0,r1,r2,r3) \ - __asm { \ - __asm sub esi,16 \ - __asm xor r0,[esi] \ - __asm xor r1,[esi+4] \ - __asm xor r2,[esi+8] \ - __asm xor r3,[esi+12] \ - } - -// the linear transformation and its inverse - -#define rot(r0,r1,r2,r3,r4) \ - __asm { \ - __asm rol r0,13 \ - __asm rol r2,3 \ - __asm mov r4,r0 \ - __asm sal r4,3 \ - __asm xor r3,r2 \ - __asm xor r3,r4 \ - __asm xor r1,r0 \ - __asm xor r1,r2 \ - __asm rol r1,1 \ - __asm rol r3,7 \ - __asm mov r4,r1 \ - __asm xor r0,r1 \ - __asm xor r0,r3 \ - __asm sal r4,7 \ - __asm xor r2,r3 \ - __asm xor r2,r4 \ - __asm rol r0,5 \ - __asm rol r2,22 \ - } - -#define irot(r0,r1,r2,r3,r4) \ - __asm { \ - __asm ror r2,22 \ - __asm ror r0,5 \ - __asm mov r4,r1 \ - __asm sal r4,7 \ - __asm xor r2,r3 \ - __asm xor r2,r4 \ - __asm xor r0,r1 \ - __asm xor r0,r3 \ - __asm ror r3,7 \ - __asm mov r4,r0 \ - __asm ror r1,1 \ - __asm sal r4,3 \ - __asm xor r3,r2 \ - __asm xor r3,r4 \ - __asm xor r1,r0 \ - __asm xor r1,r2 \ - __asm ror r2,3 \ - __asm ror r0,13 \ - } - -char* serpent_name(void) -{ - return "serpent"; -} - -// initialise the key schedule from the user supplied key - -#define k_loop \ - f_key(eax,ebx,ecx,edx); sb3(eax,ebx,ecx,edx,edi); t_key(ebx,ecx,edx,edi); \ - f_key(eax,ebx,ecx,edx); sb2(eax,ebx,ecx,edx,edi); t_key(ecx,edx,ebx,edi); \ - f_key(eax,ebx,ecx,edx); sb1(eax,ebx,ecx,edx,edi); t_key(ecx,eax,edx,ebx); \ - f_key(eax,ebx,ecx,edx); sb0(eax,ebx,ecx,edx,edi); t_key(ebx,edi,ecx,eax); \ - f_key(eax,ebx,ecx,edx); sb7(eax,ebx,ecx,edx,edi); t_key(ecx,edi,edx,eax); \ - f_key(eax,ebx,ecx,edx); sb6(eax,ebx,ecx,edx,edi); t_key(eax,ebx,edi,ecx); \ - f_key(eax,ebx,ecx,edx); sb5(eax,ebx,ecx,edx,edi); t_key(ebx,edx,eax,ecx); \ - f_key(eax,ebx,ecx,edx); sb4(eax,ebx,ecx,edx,edi); t_key(ebx,edi,eax,edx); - -void serpent_set_key(const u1byte in_key[], const u4byte key_len, u1byte *serpent_l_key) -{ - __asm mov edx,key_len - __asm cmp edx,256 - __asm ja short l3 - -#ifdef __cplusplus - __asm mov ebx,this - __asm lea ebx,[ebx].l_key -#else - __asm mov ebx,serpent_l_key -#endif - __asm mov esi,in_key - __asm mov ecx,edx - __asm add ecx,31 - __asm sar ecx,5 - __asm je short l0 - __asm mov edi,ebx - __asm mov eax,ecx - __asm rep movsd -l0: __asm cmp edx,256 - __asm je short l1 - __asm mov ecx,8 - __asm sub ecx,eax - __asm xor eax,eax - __asm rep stosd - __asm mov ecx,edx - __asm mov edx,1 - __asm sal edx,cl - __asm sar ecx,5 - __asm mov eax,edx - __asm dec eax - __asm and eax,[ebx+4*ecx] - __asm or eax,edx - __asm mov [ebx+4*ecx],eax -l1: __asm mov esi,ebx - __asm mov eax,0x9e3779b9 - __asm mov ebx,eax - __asm xor eax,[esi+ 8] - __asm xor ebx,[esi+12] - __asm xor eax,[esi+16] - __asm xor ebx,[esi+20] - __asm xor eax,[esi+24] - __asm xor ebx,[esi+28] - __asm mov ecx,0 - __asm push esi -l2: __asm mov edx,[esi] - __asm xor edx,ecx - __asm xor edx,ebx - __asm ror edx,21 - __asm mov [esi+32],edx - __asm xor eax,[esi+8] - __asm xor eax,edx - __asm add esi,4 - __asm inc ecx - __asm mov edx,[esi] - __asm xor edx,ecx - __asm xor edx,eax - __asm ror edx,21 - __asm mov [esi+32],edx - __asm xor ebx,[esi+8] - __asm xor ebx,edx - __asm add esi,4 - __asm inc ecx - __asm cmp ecx,132 - __asm jne l2 - __asm pop esi - __asm add esi,4*8 - - k_loop; - k_loop; - k_loop; - k_loop; - f_key(eax,ebx,ecx,edx); - sb3(eax,ebx,ecx,edx,edi); - t_key(ebx,ecx,edx,edi); - -l3: return; -} - -// encrypt a block of text - -#define f_loop(a,b,c,d,t) \ - f_xor(a,b,c,d); sb0(a,b,c,d,t); rot(b,t,c,a,d); \ - f_xor(b,t,c,a); sb1(b,t,c,a,d); rot(c,b,a,t,d); \ - f_xor(c,b,a,t); sb2(c,b,a,t,d); rot(a,t,b,d,c); \ - f_xor(a,t,b,d); sb3(a,t,b,d,c); rot(t,b,d,c,a); \ - f_xor(t,b,d,c); sb4(t,b,d,c,a); rot(b,a,t,c,d); \ - f_xor(b,a,t,c); sb5(b,a,t,c,d); rot(a,c,b,t,d); \ - f_xor(a,c,b,t); sb6(a,c,b,t,d); rot(a,c,d,b,t); \ - f_xor(a,c,d,b); sb7(a,c,d,b,t); - -void serpent_encrypt(const u1byte in_blk[16], u1byte out_blk[16], u1byte *serpent_l_key) -{ - __asm mov esi,in_blk - __asm mov eax,[esi] - __asm mov ebx,[esi+4] - __asm mov ecx,[esi+8] - __asm mov edx,[esi+12] - -#ifdef __cplusplus - __asm mov esi,this - __asm lea esi,[esi].l_key + 4*8 -#else - __asm mov esi,serpent_l_key - __asm add esi,4*8 +// serpent.cpp - written and placed in the public domain by Wei Dai + +/* Adapted for TrueCrypt */ + +#ifdef TC_WINDOWS_BOOT +#pragma optimize ("t", on) #endif - f_loop(eax,ebx,ecx,edx,edi); rot(edx,edi,ebx,eax,ecx); - f_loop(edx,edi,ebx,eax,ecx); rot(eax,ecx,edi,edx,ebx); - f_loop(eax,ecx,edi,edx,ebx); rot(edx,ebx,ecx,eax,edi); - f_loop(edx,ebx,ecx,eax,edi); f_xor(eax,edi,ebx,edx); - - __asm mov esi,out_blk - __asm mov [esi],eax - __asm mov [esi+4],edi - __asm mov [esi+8],ebx - __asm mov [esi+12],edx -} - -// decrypt a block of text - -#define i_loop(a,b,c,d,t) \ - ib7(a,b,c,d,t); i_xor(d,a,b,t); \ - irot(d,a,b,t,c); ib6(d,a,b,t,c); i_xor(a,b,c,t); \ - irot(a,b,c,t,d); ib5(a,b,c,t,d); i_xor(b,d,t,c); \ - irot(b,d,t,c,a); ib4(b,d,t,c,a); i_xor(b,c,t,a); \ - irot(b,c,t,a,d); ib3(b,c,t,a,d); i_xor(a,b,t,c); \ - irot(a,b,t,c,d); ib2(a,b,t,c,d); i_xor(b,d,t,c); \ - irot(b,d,t,c,a); ib1(b,d,t,c,a); i_xor(a,b,c,t); \ - irot(a,b,c,t,d); ib0(a,b,c,t,d); i_xor(a,d,b,t); - -void serpent_decrypt(const u1byte in_blk[16], u1byte out_blk[16], u1byte *serpent_l_key) -{ - __asm mov esi,in_blk - __asm mov eax,[esi] - __asm mov ebx,[esi+4] - __asm mov ecx,[esi+8] - __asm mov edx,[esi+12] - -#ifdef __cplusplus - __asm mov esi,this - __asm lea esi,[esi].l_key + 4*140 +#include "Serpent.h" +#include "Common/Endian.h" + +#include + +#if defined(_WIN32) && !defined(_DEBUG) +#include +#define rotlFixed _rotl +#define rotrFixed _rotr #else - __asm mov esi,serpent_l_key - __asm add esi,4*140 +#define rotlFixed(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) +#define rotrFixed(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) #endif - i_xor(eax,ebx,ecx,edx); i_loop(eax,ebx,ecx,edx,edi); - irot(eax,edx,ebx,edi,ecx); i_loop(eax,edx,ebx,edi,ecx); - irot(eax,edi,edx,ecx,ebx); i_loop(eax,edi,edx,ecx,ebx); - irot(eax,ecx,edi,ebx,edx); i_loop(eax,ecx,edi,ebx,edx); - - __asm mov esi,out_blk - __asm mov [esi],eax - __asm mov [esi+4],ebx - __asm mov [esi+8],ecx - __asm mov [esi+12],edx +// linear transformation +#define LT(i,a,b,c,d,e) {\ + a = rotlFixed(a, 13); \ + c = rotlFixed(c, 3); \ + d = rotlFixed(d ^ c ^ (a << 3), 7); \ + b = rotlFixed(b ^ a ^ c, 1); \ + a = rotlFixed(a ^ b ^ d, 5); \ + c = rotlFixed(c ^ d ^ (b << 7), 22);} + +// inverse linear transformation +#define ILT(i,a,b,c,d,e) {\ + c = rotrFixed(c, 22); \ + a = rotrFixed(a, 5); \ + c ^= d ^ (b << 7); \ + a ^= b ^ d; \ + b = rotrFixed(b, 1); \ + d = rotrFixed(d, 7) ^ c ^ (a << 3); \ + b ^= a ^ c; \ + c = rotrFixed(c, 3); \ + a = rotrFixed(a, 13);} + +// order of output from S-box functions +#define beforeS0(f) f(0,a,b,c,d,e) +#define afterS0(f) f(1,b,e,c,a,d) +#define afterS1(f) f(2,c,b,a,e,d) +#define afterS2(f) f(3,a,e,b,d,c) +#define afterS3(f) f(4,e,b,d,c,a) +#define afterS4(f) f(5,b,a,e,c,d) +#define afterS5(f) f(6,a,c,b,e,d) +#define afterS6(f) f(7,a,c,d,b,e) +#define afterS7(f) f(8,d,e,b,a,c) + +// order of output from inverse S-box functions +#define beforeI7(f) f(8,a,b,c,d,e) +#define afterI7(f) f(7,d,a,b,e,c) +#define afterI6(f) f(6,a,b,c,e,d) +#define afterI5(f) f(5,b,d,e,c,a) +#define afterI4(f) f(4,b,c,e,a,d) +#define afterI3(f) f(3,a,b,e,c,d) +#define afterI2(f) f(2,b,d,e,c,a) +#define afterI1(f) f(1,a,b,c,e,d) +#define afterI0(f) f(0,a,d,b,e,c) + +// The instruction sequences for the S-box functions +// come from Dag Arne Osvik's paper "Speeding up Serpent". + +#define S0(i, r0, r1, r2, r3, r4) \ + { \ + r3 ^= r0; \ + r4 = r1; \ + r1 &= r3; \ + r4 ^= r2; \ + r1 ^= r0; \ + r0 |= r3; \ + r0 ^= r4; \ + r4 ^= r3; \ + r3 ^= r2; \ + r2 |= r1; \ + r2 ^= r4; \ + r4 = ~r4; \ + r4 |= r1; \ + r1 ^= r3; \ + r1 ^= r4; \ + r3 |= r0; \ + r1 ^= r3; \ + r4 ^= r3; \ + } + +#define I0(i, r0, r1, r2, r3, r4) \ + { \ + r2 = ~r2; \ + r4 = r1; \ + r1 |= r0; \ + r4 = ~r4; \ + r1 ^= r2; \ + r2 |= r4; \ + r1 ^= r3; \ + r0 ^= r4; \ + r2 ^= r0; \ + r0 &= r3; \ + r4 ^= r0; \ + r0 |= r1; \ + r0 ^= r2; \ + r3 ^= r4; \ + r2 ^= r1; \ + r3 ^= r0; \ + r3 ^= r1; \ + r2 &= r3; \ + r4 ^= r2; \ + } + +#define S1(i, r0, r1, r2, r3, r4) \ + { \ + r0 = ~r0; \ + r2 = ~r2; \ + r4 = r0; \ + r0 &= r1; \ + r2 ^= r0; \ + r0 |= r3; \ + r3 ^= r2; \ + r1 ^= r0; \ + r0 ^= r4; \ + r4 |= r1; \ + r1 ^= r3; \ + r2 |= r0; \ + r2 &= r4; \ + r0 ^= r1; \ + r1 &= r2; \ + r1 ^= r0; \ + r0 &= r2; \ + r0 ^= r4; \ + } + +#define I1(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r1; \ + r1 ^= r3; \ + r3 &= r1; \ + r4 ^= r2; \ + r3 ^= r0; \ + r0 |= r1; \ + r2 ^= r3; \ + r0 ^= r4; \ + r0 |= r2; \ + r1 ^= r3; \ + r0 ^= r1; \ + r1 |= r3; \ + r1 ^= r0; \ + r4 = ~r4; \ + r4 ^= r1; \ + r1 |= r0; \ + r1 ^= r0; \ + r1 |= r4; \ + r3 ^= r1; \ + } + +#define S2(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r0; \ + r0 &= r2; \ + r0 ^= r3; \ + r2 ^= r1; \ + r2 ^= r0; \ + r3 |= r4; \ + r3 ^= r1; \ + r4 ^= r2; \ + r1 = r3; \ + r3 |= r4; \ + r3 ^= r0; \ + r0 &= r1; \ + r4 ^= r0; \ + r1 ^= r3; \ + r1 ^= r4; \ + r4 = ~r4; \ + } + +#define I2(i, r0, r1, r2, r3, r4) \ + { \ + r2 ^= r3; \ + r3 ^= r0; \ + r4 = r3; \ + r3 &= r2; \ + r3 ^= r1; \ + r1 |= r2; \ + r1 ^= r4; \ + r4 &= r3; \ + r2 ^= r3; \ + r4 &= r0; \ + r4 ^= r2; \ + r2 &= r1; \ + r2 |= r0; \ + r3 = ~r3; \ + r2 ^= r3; \ + r0 ^= r3; \ + r0 &= r1; \ + r3 ^= r4; \ + r3 ^= r0; \ + } + +#define S3(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r0; \ + r0 |= r3; \ + r3 ^= r1; \ + r1 &= r4; \ + r4 ^= r2; \ + r2 ^= r3; \ + r3 &= r0; \ + r4 |= r1; \ + r3 ^= r4; \ + r0 ^= r1; \ + r4 &= r0; \ + r1 ^= r3; \ + r4 ^= r2; \ + r1 |= r0; \ + r1 ^= r2; \ + r0 ^= r3; \ + r2 = r1; \ + r1 |= r3; \ + r1 ^= r0; \ + } + +#define I3(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 ^= r1; \ + r1 &= r2; \ + r1 ^= r0; \ + r0 &= r4; \ + r4 ^= r3; \ + r3 |= r1; \ + r3 ^= r2; \ + r0 ^= r4; \ + r2 ^= r0; \ + r0 |= r3; \ + r0 ^= r1; \ + r4 ^= r2; \ + r2 &= r3; \ + r1 |= r3; \ + r1 ^= r2; \ + r4 ^= r0; \ + r2 ^= r4; \ + } + +#define S4(i, r0, r1, r2, r3, r4) \ + { \ + r1 ^= r3; \ + r3 = ~r3; \ + r2 ^= r3; \ + r3 ^= r0; \ + r4 = r1; \ + r1 &= r3; \ + r1 ^= r2; \ + r4 ^= r3; \ + r0 ^= r4; \ + r2 &= r4; \ + r2 ^= r0; \ + r0 &= r1; \ + r3 ^= r0; \ + r4 |= r1; \ + r4 ^= r0; \ + r0 |= r3; \ + r0 ^= r2; \ + r2 &= r3; \ + r0 = ~r0; \ + r4 ^= r2; \ + } + +#define I4(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 &= r3; \ + r2 ^= r1; \ + r1 |= r3; \ + r1 &= r0; \ + r4 ^= r2; \ + r4 ^= r1; \ + r1 &= r2; \ + r0 = ~r0; \ + r3 ^= r4; \ + r1 ^= r3; \ + r3 &= r0; \ + r3 ^= r2; \ + r0 ^= r1; \ + r2 &= r0; \ + r3 ^= r0; \ + r2 ^= r4; \ + r2 |= r3; \ + r3 ^= r0; \ + r2 ^= r1; \ + } + +#define S5(i, r0, r1, r2, r3, r4) \ + { \ + r0 ^= r1; \ + r1 ^= r3; \ + r3 = ~r3; \ + r4 = r1; \ + r1 &= r0; \ + r2 ^= r3; \ + r1 ^= r2; \ + r2 |= r4; \ + r4 ^= r3; \ + r3 &= r1; \ + r3 ^= r0; \ + r4 ^= r1; \ + r4 ^= r2; \ + r2 ^= r0; \ + r0 &= r3; \ + r2 = ~r2; \ + r0 ^= r4; \ + r4 |= r3; \ + r2 ^= r4; \ + } + +#define I5(i, r0, r1, r2, r3, r4) \ + { \ + r1 = ~r1; \ + r4 = r3; \ + r2 ^= r1; \ + r3 |= r0; \ + r3 ^= r2; \ + r2 |= r1; \ + r2 &= r0; \ + r4 ^= r3; \ + r2 ^= r4; \ + r4 |= r0; \ + r4 ^= r1; \ + r1 &= r2; \ + r1 ^= r3; \ + r4 ^= r2; \ + r3 &= r4; \ + r4 ^= r1; \ + r3 ^= r0; \ + r3 ^= r4; \ + r4 = ~r4; \ + } + +#define S6(i, r0, r1, r2, r3, r4) \ + { \ + r2 = ~r2; \ + r4 = r3; \ + r3 &= r0; \ + r0 ^= r4; \ + r3 ^= r2; \ + r2 |= r4; \ + r1 ^= r3; \ + r2 ^= r0; \ + r0 |= r1; \ + r2 ^= r1; \ + r4 ^= r0; \ + r0 |= r3; \ + r0 ^= r2; \ + r4 ^= r3; \ + r4 ^= r0; \ + r3 = ~r3; \ + r2 &= r4; \ + r2 ^= r3; \ + } + +#define I6(i, r0, r1, r2, r3, r4) \ + { \ + r0 ^= r2; \ + r4 = r2; \ + r2 &= r0; \ + r4 ^= r3; \ + r2 = ~r2; \ + r3 ^= r1; \ + r2 ^= r3; \ + r4 |= r0; \ + r0 ^= r2; \ + r3 ^= r4; \ + r4 ^= r1; \ + r1 &= r3; \ + r1 ^= r0; \ + r0 ^= r3; \ + r0 |= r2; \ + r3 ^= r1; \ + r4 ^= r0; \ + } + +#define S7(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 &= r1; \ + r2 ^= r3; \ + r3 &= r1; \ + r4 ^= r2; \ + r2 ^= r1; \ + r1 ^= r0; \ + r0 |= r4; \ + r0 ^= r2; \ + r3 ^= r1; \ + r2 ^= r3; \ + r3 &= r0; \ + r3 ^= r4; \ + r4 ^= r2; \ + r2 &= r0; \ + r4 = ~r4; \ + r2 ^= r4; \ + r4 &= r0; \ + r1 ^= r3; \ + r4 ^= r1; \ + } + +#define I7(i, r0, r1, r2, r3, r4) \ + { \ + r4 = r2; \ + r2 ^= r0; \ + r0 &= r3; \ + r2 = ~r2; \ + r4 |= r3; \ + r3 ^= r1; \ + r1 |= r0; \ + r0 ^= r2; \ + r2 &= r4; \ + r1 ^= r2; \ + r2 ^= r0; \ + r0 |= r2; \ + r3 &= r4; \ + r0 ^= r3; \ + r4 ^= r1; \ + r3 ^= r4; \ + r4 |= r0; \ + r3 ^= r2; \ + r4 ^= r2; \ + } + +// key xor +#define KX(r, a, b, c, d, e) {\ + a ^= k[4 * r + 0]; \ + b ^= k[4 * r + 1]; \ + c ^= k[4 * r + 2]; \ + d ^= k[4 * r + 3];} + + +#ifdef TC_MINIMIZE_CODE_SIZE + +static void S0f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r3 ^= *r0; + *r4 = *r1; + *r1 &= *r3; + *r4 ^= *r2; + *r1 ^= *r0; + *r0 |= *r3; + *r0 ^= *r4; + *r4 ^= *r3; + *r3 ^= *r2; + *r2 |= *r1; + *r2 ^= *r4; + *r4 = ~*r4; + *r4 |= *r1; + *r1 ^= *r3; + *r1 ^= *r4; + *r3 |= *r0; + *r1 ^= *r3; + *r4 ^= *r3; } + +static void S1f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r0 = ~*r0; + *r2 = ~*r2; + *r4 = *r0; + *r0 &= *r1; + *r2 ^= *r0; + *r0 |= *r3; + *r3 ^= *r2; + *r1 ^= *r0; + *r0 ^= *r4; + *r4 |= *r1; + *r1 ^= *r3; + *r2 |= *r0; + *r2 &= *r4; + *r0 ^= *r1; + *r1 &= *r2; + *r1 ^= *r0; + *r0 &= *r2; + *r0 ^= *r4; +} + +static void S2f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r4 = *r0; + *r0 &= *r2; + *r0 ^= *r3; + *r2 ^= *r1; + *r2 ^= *r0; + *r3 |= *r4; + *r3 ^= *r1; + *r4 ^= *r2; + *r1 = *r3; + *r3 |= *r4; + *r3 ^= *r0; + *r0 &= *r1; + *r4 ^= *r0; + *r1 ^= *r3; + *r1 ^= *r4; + *r4 = ~*r4; +} + +static void S3f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r4 = *r0; + *r0 |= *r3; + *r3 ^= *r1; + *r1 &= *r4; + *r4 ^= *r2; + *r2 ^= *r3; + *r3 &= *r0; + *r4 |= *r1; + *r3 ^= *r4; + *r0 ^= *r1; + *r4 &= *r0; + *r1 ^= *r3; + *r4 ^= *r2; + *r1 |= *r0; + *r1 ^= *r2; + *r0 ^= *r3; + *r2 = *r1; + *r1 |= *r3; + *r1 ^= *r0; +} + +static void S4f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r1 ^= *r3; + *r3 = ~*r3; + *r2 ^= *r3; + *r3 ^= *r0; + *r4 = *r1; + *r1 &= *r3; + *r1 ^= *r2; + *r4 ^= *r3; + *r0 ^= *r4; + *r2 &= *r4; + *r2 ^= *r0; + *r0 &= *r1; + *r3 ^= *r0; + *r4 |= *r1; + *r4 ^= *r0; + *r0 |= *r3; + *r0 ^= *r2; + *r2 &= *r3; + *r0 = ~*r0; + *r4 ^= *r2; +} + +static void S5f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r0 ^= *r1; + *r1 ^= *r3; + *r3 = ~*r3; + *r4 = *r1; + *r1 &= *r0; + *r2 ^= *r3; + *r1 ^= *r2; + *r2 |= *r4; + *r4 ^= *r3; + *r3 &= *r1; + *r3 ^= *r0; + *r4 ^= *r1; + *r4 ^= *r2; + *r2 ^= *r0; + *r0 &= *r3; + *r2 = ~*r2; + *r0 ^= *r4; + *r4 |= *r3; + *r2 ^= *r4; +} + +static void S6f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r2 = ~*r2; + *r4 = *r3; + *r3 &= *r0; + *r0 ^= *r4; + *r3 ^= *r2; + *r2 |= *r4; + *r1 ^= *r3; + *r2 ^= *r0; + *r0 |= *r1; + *r2 ^= *r1; + *r4 ^= *r0; + *r0 |= *r3; + *r0 ^= *r2; + *r4 ^= *r3; + *r4 ^= *r0; + *r3 = ~*r3; + *r2 &= *r4; + *r2 ^= *r3; +} + +static void S7f (unsigned __int32 *r0, unsigned __int32 *r1, unsigned __int32 *r2, unsigned __int32 *r3, unsigned __int32 *r4) +{ + *r4 = *r2; + *r2 &= *r1; + *r2 ^= *r3; + *r3 &= *r1; + *r4 ^= *r2; + *r2 ^= *r1; + *r1 ^= *r0; + *r0 |= *r4; + *r0 ^= *r2; + *r3 ^= *r1; + *r2 ^= *r3; + *r3 &= *r0; + *r3 ^= *r4; + *r4 ^= *r2; + *r2 &= *r0; + *r4 = ~*r4; + *r2 ^= *r4; + *r4 &= *r0; + *r1 ^= *r3; + *r4 ^= *r1; +} + +static void KXf (const unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) +{ + *a ^= k[r]; + *b ^= k[r + 1]; + *c ^= k[r + 2]; + *d ^= k[r + 3]; +} + +#endif // TC_MINIMIZE_CODE_SIZE + +#ifndef TC_MINIMIZE_CODE_SIZE + +void serpent_set_key(const unsigned __int8 userKey[], int keylen, unsigned __int8 *ks) +{ + unsigned __int32 a,b,c,d,e; + unsigned __int32 *k = (unsigned __int32 *)ks; + unsigned __int32 t; + int i; + + for (i = 0; i < keylen / (int)sizeof(__int32); i++) + k[i] = LE32(((unsigned __int32*)userKey)[i]); + + if (keylen < 32) + k[keylen/4] |= (unsigned __int32)1 << ((keylen%4)*8); + + k += 8; + t = k[-1]; + for (i = 0; i < 132; ++i) + k[i] = t = rotlFixed(k[i-8] ^ k[i-5] ^ k[i-3] ^ t ^ 0x9e3779b9 ^ i, 11); + k -= 20; + +#define LK(r, a, b, c, d, e) {\ + a = k[(8-r)*4 + 0]; \ + b = k[(8-r)*4 + 1]; \ + c = k[(8-r)*4 + 2]; \ + d = k[(8-r)*4 + 3];} + +#define SK(r, a, b, c, d, e) {\ + k[(8-r)*4 + 4] = a; \ + k[(8-r)*4 + 5] = b; \ + k[(8-r)*4 + 6] = c; \ + k[(8-r)*4 + 7] = d;} \ + + for (i=0; i<4; i++) + { + afterS2(LK); afterS2(S3); afterS3(SK); + afterS1(LK); afterS1(S2); afterS2(SK); + afterS0(LK); afterS0(S1); afterS1(SK); + beforeS0(LK); beforeS0(S0); afterS0(SK); + k += 8*4; + afterS6(LK); afterS6(S7); afterS7(SK); + afterS5(LK); afterS5(S6); afterS6(SK); + afterS4(LK); afterS4(S5); afterS5(SK); + afterS3(LK); afterS3(S4); afterS4(SK); + } + afterS2(LK); afterS2(S3); afterS3(SK); +} + +#else // TC_MINIMIZE_CODE_SIZE + +static void LKf (unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) +{ + *a = k[r]; + *b = k[r + 1]; + *c = k[r + 2]; + *d = k[r + 3]; +} + +static void SKf (unsigned __int32 *k, unsigned int r, unsigned __int32 *a, unsigned __int32 *b, unsigned __int32 *c, unsigned __int32 *d) +{ + k[r + 4] = *a; + k[r + 5] = *b; + k[r + 6] = *c; + k[r + 7] = *d; +} + +void serpent_set_key(const unsigned __int8 userKey[], int keylen, unsigned __int8 *ks) +{ + unsigned __int32 a,b,c,d,e; + unsigned __int32 *k = (unsigned __int32 *)ks; + unsigned __int32 t; + int i; + + for (i = 0; i < keylen / (int)sizeof(__int32); i++) + k[i] = LE32(((unsigned __int32*)userKey)[i]); + + if (keylen < 32) + k[keylen/4] |= (unsigned __int32)1 << ((keylen%4)*8); + + k += 8; + t = k[-1]; + for (i = 0; i < 132; ++i) + k[i] = t = rotlFixed(k[i-8] ^ k[i-5] ^ k[i-3] ^ t ^ 0x9e3779b9 ^ i, 11); + k -= 20; + + for (i=0; i<4; i++) + { + LKf (k, 20, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); SKf (k, 16, &e, &b, &d, &c); + LKf (k, 24, &c, &b, &a, &e); S2f (&c, &b, &a, &e, &d); SKf (k, 20, &a, &e, &b, &d); + LKf (k, 28, &b, &e, &c, &a); S1f (&b, &e, &c, &a, &d); SKf (k, 24, &c, &b, &a, &e); + LKf (k, 32, &a, &b, &c, &d); S0f (&a, &b, &c, &d, &e); SKf (k, 28, &b, &e, &c, &a); + k += 8*4; + LKf (k, 4, &a, &c, &d, &b); S7f (&a, &c, &d, &b, &e); SKf (k, 0, &d, &e, &b, &a); + LKf (k, 8, &a, &c, &b, &e); S6f (&a, &c, &b, &e, &d); SKf (k, 4, &a, &c, &d, &b); + LKf (k, 12, &b, &a, &e, &c); S5f (&b, &a, &e, &c, &d); SKf (k, 8, &a, &c, &b, &e); + LKf (k, 16, &e, &b, &d, &c); S4f (&e, &b, &d, &c, &a); SKf (k, 12, &b, &a, &e, &c); + } + LKf (k, 20, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); SKf (k, 16, &e, &b, &d, &c); +} + +#endif // TC_MINIMIZE_CODE_SIZE + + +#ifndef TC_MINIMIZE_CODE_SIZE + +void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + unsigned int i=1; + const unsigned __int32 *k = (unsigned __int32 *)ks + 8; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + do + { + beforeS0(KX); beforeS0(S0); afterS0(LT); + afterS0(KX); afterS0(S1); afterS1(LT); + afterS1(KX); afterS1(S2); afterS2(LT); + afterS2(KX); afterS2(S3); afterS3(LT); + afterS3(KX); afterS3(S4); afterS4(LT); + afterS4(KX); afterS4(S5); afterS5(LT); + afterS5(KX); afterS5(S6); afterS6(LT); + afterS6(KX); afterS6(S7); + + if (i == 4) + break; + + ++i; + c = b; + b = e; + e = d; + d = a; + a = e; + k += 32; + beforeS0(LT); + } + while (1); + + afterS7(KX); + + out[0] = LE32(d); + out[1] = LE32(e); + out[2] = LE32(b); + out[3] = LE32(a); +} + +#else // TC_MINIMIZE_CODE_SIZE + +typedef unsigned __int32 uint32; + +static void LTf (uint32 *a, uint32 *b, uint32 *c, uint32 *d) +{ + *a = rotlFixed(*a, 13); + *c = rotlFixed(*c, 3); + *d = rotlFixed(*d ^ *c ^ (*a << 3), 7); + *b = rotlFixed(*b ^ *a ^ *c, 1); + *a = rotlFixed(*a ^ *b ^ *d, 5); + *c = rotlFixed(*c ^ *d ^ (*b << 7), 22); +} + +void serpent_encrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + unsigned int i=1; + const unsigned __int32 *k = (unsigned __int32 *)ks + 8; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + do + { + KXf (k, 0, &a, &b, &c, &d); S0f (&a, &b, &c, &d, &e); LTf (&b, &e, &c, &a); + KXf (k, 4, &b, &e, &c, &a); S1f (&b, &e, &c, &a, &d); LTf (&c, &b, &a, &e); + KXf (k, 8, &c, &b, &a, &e); S2f (&c, &b, &a, &e, &d); LTf (&a, &e, &b, &d); + KXf (k, 12, &a, &e, &b, &d); S3f (&a, &e, &b, &d, &c); LTf (&e, &b, &d, &c); + KXf (k, 16, &e, &b, &d, &c); S4f (&e, &b, &d, &c, &a); LTf (&b, &a, &e, &c); + KXf (k, 20, &b, &a, &e, &c); S5f (&b, &a, &e, &c, &d); LTf (&a, &c, &b, &e); + KXf (k, 24, &a, &c, &b, &e); S6f (&a, &c, &b, &e, &d); LTf (&a, &c, &d, &b); + KXf (k, 28, &a, &c, &d, &b); S7f (&a, &c, &d, &b, &e); + + if (i == 4) + break; + + ++i; + c = b; + b = e; + e = d; + d = a; + a = e; + k += 32; + LTf (&a,&b,&c,&d); + } + while (1); + + KXf (k, 32, &d, &e, &b, &a); + + out[0] = LE32(d); + out[1] = LE32(e); + out[2] = LE32(b); + out[3] = LE32(a); +} + +#endif // TC_MINIMIZE_CODE_SIZE + +#if !defined (TC_MINIMIZE_CODE_SIZE) || defined (TC_WINDOWS_BOOT_SERPENT) + +void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + const unsigned __int32 *k = (unsigned __int32 *)ks + 104; + unsigned int i=4; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + beforeI7(KX); + goto start; + + do + { + c = b; + b = d; + d = e; + k -= 32; + beforeI7(ILT); +start: + beforeI7(I7); afterI7(KX); + afterI7(ILT); afterI7(I6); afterI6(KX); + afterI6(ILT); afterI6(I5); afterI5(KX); + afterI5(ILT); afterI5(I4); afterI4(KX); + afterI4(ILT); afterI4(I3); afterI3(KX); + afterI3(ILT); afterI3(I2); afterI2(KX); + afterI2(ILT); afterI2(I1); afterI1(KX); + afterI1(ILT); afterI1(I0); afterI0(KX); + } + while (--i != 0); + + out[0] = LE32(a); + out[1] = LE32(d); + out[2] = LE32(b); + out[3] = LE32(e); +} + +#else // TC_MINIMIZE_CODE_SIZE && !TC_WINDOWS_BOOT_SERPENT + +static void ILTf (uint32 *a, uint32 *b, uint32 *c, uint32 *d) +{ + *c = rotrFixed(*c, 22); + *a = rotrFixed(*a, 5); + *c ^= *d ^ (*b << 7); + *a ^= *b ^ *d; + *b = rotrFixed(*b, 1); + *d = rotrFixed(*d, 7) ^ *c ^ (*a << 3); + *b ^= *a ^ *c; + *c = rotrFixed(*c, 3); + *a = rotrFixed(*a, 13); +} + +void serpent_decrypt(const unsigned __int8 *inBlock, unsigned __int8 *outBlock, unsigned __int8 *ks) +{ + unsigned __int32 a, b, c, d, e; + const unsigned __int32 *k = (unsigned __int32 *)ks + 104; + unsigned int i=4; + unsigned __int32 *in = (unsigned __int32 *) inBlock; + unsigned __int32 *out = (unsigned __int32 *) outBlock; + + a = LE32(in[0]); + b = LE32(in[1]); + c = LE32(in[2]); + d = LE32(in[3]); + + KXf (k, 32, &a, &b, &c, &d); + goto start; + + do + { + c = b; + b = d; + d = e; + k -= 32; + beforeI7(ILT); +start: + beforeI7(I7); KXf (k, 28, &d, &a, &b, &e); + ILTf (&d, &a, &b, &e); afterI7(I6); KXf (k, 24, &a, &b, &c, &e); + ILTf (&a, &b, &c, &e); afterI6(I5); KXf (k, 20, &b, &d, &e, &c); + ILTf (&b, &d, &e, &c); afterI5(I4); KXf (k, 16, &b, &c, &e, &a); + ILTf (&b, &c, &e, &a); afterI4(I3); KXf (k, 12, &a, &b, &e, &c); + ILTf (&a, &b, &e, &c); afterI3(I2); KXf (k, 8, &b, &d, &e, &c); + ILTf (&b, &d, &e, &c); afterI2(I1); KXf (k, 4, &a, &b, &c, &e); + ILTf (&a, &b, &c, &e); afterI1(I0); KXf (k, 0, &a, &d, &b, &e); + } + while (--i != 0); + + out[0] = LE32(a); + out[1] = LE32(d); + out[2] = LE32(b); + out[3] = LE32(e); +} + +#endif // TC_MINIMIZE_CODE_SIZE && !TC_WINDOWS_BOOT_SERPENT