|
|
1.1 root 1: /* This is an independent implementation of the encryption algorithm: */
2: /* */
3: /* Twofish by Bruce Schneier and colleagues */
4: /* */
5: /* which is a candidate algorithm in the Advanced Encryption Standard */
6: /* programme of the US National Institute of Standards and Technology. */
7: /* */
8: /* Copyright in this implementation is held by Dr B R Gladman but I */
9: /* hereby give permission for its free direct or derivative use subject */
10: /* to acknowledgment of its origin and compliance with any conditions */
11: /* that the originators of the algorithm place on its exploitation. */
12: /* */
13: /* My thanks to Doug Whiting and Niels Ferguson for comments that led */
14: /* to improvements in this implementation. */
15: /* */
16: /* Dr Brian Gladman ([email protected]) 14th January 1999 */
17:
18: /* Support for multithreaded operation added by TrueCrypt Foundation */
19:
20: #include "twofish.h"
21:
22: #define Q_TABLES
23: #define M_TABLE
24: #define MK_TABLE
25: #define ONE_STEP
26:
27: static char *alg_name[] = { "twofish", "twofish.c", "twofish" };
28:
29: static char **cipher_name()
30: {
31: return alg_name;
32: }
33:
34: //u4byte k_len;
35: //u4byte l_key[40];
36: //u4byte s_key[4];
37:
38: /* finite field arithmetic for GF(2**8) with the modular */
39: /* polynomial x^8 + x^6 + x^5 + x^3 + 1 (0x169) */
40:
41: #define G_M 0x0169
42:
43: u1byte tab_5b[4] = { 0, G_M >> 2, G_M >> 1, (G_M >> 1) ^ (G_M >> 2) };
44: u1byte tab_ef[4] = { 0, (G_M >> 1) ^ (G_M >> 2), G_M >> 1, G_M >> 2 };
45:
46: #define ffm_01(x) (x)
47: #define ffm_5b(x) ((x) ^ ((x) >> 2) ^ tab_5b[(x) & 3])
48: #define ffm_ef(x) ((x) ^ ((x) >> 1) ^ ((x) >> 2) ^ tab_ef[(x) & 3])
49:
50: u1byte ror4[16] = { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
51: u1byte ashx[16] = { 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, 5, 14, 7 };
52:
53: u1byte qt0[2][16] =
54: { { 8, 1, 7, 13, 6, 15, 3, 2, 0, 11, 5, 9, 14, 12, 10, 4 },
55: { 2, 8, 11, 13, 15, 7, 6, 14, 3, 1, 9, 4, 0, 10, 12, 5 }
56: };
57:
58: u1byte qt1[2][16] =
59: { { 14, 12, 11, 8, 1, 2, 3, 5, 15, 4, 10, 6, 7, 0, 9, 13 },
60: { 1, 14, 2, 11, 4, 12, 3, 7, 6, 13, 10, 5, 15, 9, 0, 8 }
61: };
62:
63: u1byte qt2[2][16] =
64: { { 11, 10, 5, 14, 6, 13, 9, 0, 12, 8, 15, 3, 2, 4, 7, 1 },
65: { 4, 12, 7, 5, 1, 6, 9, 10, 0, 14, 13, 8, 2, 11, 3, 15 }
66: };
67:
68: u1byte qt3[2][16] =
69: { { 13, 7, 15, 4, 1, 2, 6, 14, 9, 11, 3, 0, 8, 5, 12, 10 },
70: { 11, 9, 5, 1, 12, 3, 13, 14, 6, 4, 7, 15, 2, 0, 8, 10 }
71: };
72:
73: static u1byte qp(const u4byte n, const u1byte x)
74: { u1byte a0, a1, a2, a3, a4, b0, b1, b2, b3, b4;
75:
76: a0 = x >> 4; b0 = x & 15;
77: a1 = a0 ^ b0; b1 = ror4[b0] ^ ashx[a0];
78: a2 = qt0[n][a1]; b2 = qt1[n][b1];
79: a3 = a2 ^ b2; b3 = ror4[b2] ^ ashx[a2];
80: a4 = qt2[n][a3]; b4 = qt3[n][b3];
81: return (b4 << 4) | a4;
82: };
83:
84: #ifdef Q_TABLES
85:
86: u4byte qt_gen = 0;
87: u1byte q_tab[2][256];
88:
89: #define q(n,x) q_tab[n][x]
90:
91: static void gen_qtab(void)
92: { u4byte i;
93:
94: for(i = 0; i < 256; ++i)
95: {
96: q(0,i) = qp(0, (u1byte)i);
97: q(1,i) = qp(1, (u1byte)i);
98: }
99: };
100:
101: #else
102:
103: #define q(n,x) qp(n, x)
104:
105: #endif
106:
107: #ifdef M_TABLE
108:
109: u4byte mt_gen = 0;
110: u4byte m_tab[4][256];
111:
112: static void gen_mtab(void)
113: { u4byte i, f01, f5b, fef;
114:
115: for(i = 0; i < 256; ++i)
116: {
117: f01 = q(1,i); f5b = ffm_5b(f01); fef = ffm_ef(f01);
118: m_tab[0][i] = f01 + (f5b << 8) + (fef << 16) + (fef << 24);
119: m_tab[2][i] = f5b + (fef << 8) + (f01 << 16) + (fef << 24);
120:
121: f01 = q(0,i); f5b = ffm_5b(f01); fef = ffm_ef(f01);
122: m_tab[1][i] = fef + (fef << 8) + (f5b << 16) + (f01 << 24);
123: m_tab[3][i] = f5b + (f01 << 8) + (fef << 16) + (f5b << 24);
124: }
125: };
126:
127: #define mds(n,x) m_tab[n][x]
128:
129: #else
130:
131: #define fm_00 ffm_01
132: #define fm_10 ffm_5b
133: #define fm_20 ffm_ef
134: #define fm_30 ffm_ef
135: #define q_0(x) q(1,x)
136:
137: #define fm_01 ffm_ef
138: #define fm_11 ffm_ef
139: #define fm_21 ffm_5b
140: #define fm_31 ffm_01
141: #define q_1(x) q(0,x)
142:
143: #define fm_02 ffm_5b
144: #define fm_12 ffm_ef
145: #define fm_22 ffm_01
146: #define fm_32 ffm_ef
147: #define q_2(x) q(1,x)
148:
149: #define fm_03 ffm_5b
150: #define fm_13 ffm_01
151: #define fm_23 ffm_ef
152: #define fm_33 ffm_5b
153: #define q_3(x) q(0,x)
154:
155: #define f_0(n,x) ((u4byte)fm_0##n(x))
156: #define f_1(n,x) ((u4byte)fm_1##n(x) << 8)
157: #define f_2(n,x) ((u4byte)fm_2##n(x) << 16)
158: #define f_3(n,x) ((u4byte)fm_3##n(x) << 24)
159:
160: #define mds(n,x) f_0(n,q_##n(x)) ^ f_1(n,q_##n(x)) ^ f_2(n,q_##n(x)) ^ f_3(n,q_##n(x))
161:
162: #endif
163:
164: static u4byte h_fun(TwofishInstance *instance, const u4byte x, const u4byte key[])
165: { u4byte b0, b1, b2, b3;
166:
167: #ifndef M_TABLE
168: u4byte m5b_b0, m5b_b1, m5b_b2, m5b_b3;
169: u4byte mef_b0, mef_b1, mef_b2, mef_b3;
170: #endif
171:
172: b0 = extract_byte(x, 0); b1 = extract_byte(x, 1); b2 = extract_byte(x, 2); b3 = extract_byte(x, 3);
173:
174: switch(instance->k_len)
175: {
176: case 4: b0 = q(1, b0) ^ extract_byte(key[3],0);
177: b1 = q(0, b1) ^ extract_byte(key[3],1);
178: b2 = q(0, b2) ^ extract_byte(key[3],2);
179: b3 = q(1, b3) ^ extract_byte(key[3],3);
180: case 3: b0 = q(1, b0) ^ extract_byte(key[2],0);
181: b1 = q(1, b1) ^ extract_byte(key[2],1);
182: b2 = q(0, b2) ^ extract_byte(key[2],2);
183: b3 = q(0, b3) ^ extract_byte(key[2],3);
184: case 2: b0 = q(0,q(0,b0) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0);
185: b1 = q(0,q(1,b1) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1);
186: b2 = q(1,q(0,b2) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2);
187: b3 = q(1,q(1,b3) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3);
188: }
189: #ifdef M_TABLE
190:
191: return mds(0, b0) ^ mds(1, b1) ^ mds(2, b2) ^ mds(3, b3);
192:
193: #else
194:
195: b0 = q(1, b0); b1 = q(0, b1); b2 = q(1, b2); b3 = q(0, b3);
196: m5b_b0 = ffm_5b(b0); m5b_b1 = ffm_5b(b1); m5b_b2 = ffm_5b(b2); m5b_b3 = ffm_5b(b3);
197: mef_b0 = ffm_ef(b0); mef_b1 = ffm_ef(b1); mef_b2 = ffm_ef(b2); mef_b3 = ffm_ef(b3);
198: b0 ^= mef_b1 ^ m5b_b2 ^ m5b_b3; b3 ^= m5b_b0 ^ mef_b1 ^ mef_b2;
199: b2 ^= mef_b0 ^ m5b_b1 ^ mef_b3; b1 ^= mef_b0 ^ mef_b2 ^ m5b_b3;
200:
201: return b0 | (b3 << 8) | (b2 << 16) | (b1 << 24);
202:
203: #endif
204: };
205:
206: #ifdef MK_TABLE
207:
208: #ifdef ONE_STEP
209: //u4byte mk_tab[4][256];
210: #else
211: u1byte sb[4][256];
212: #endif
213:
214: #define q20(x) q(0,q(0,x) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0)
215: #define q21(x) q(0,q(1,x) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1)
216: #define q22(x) q(1,q(0,x) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2)
217: #define q23(x) q(1,q(1,x) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3)
218:
219: #define q30(x) q(0,q(0,q(1, x) ^ extract_byte(key[2],0)) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0)
220: #define q31(x) q(0,q(1,q(1, x) ^ extract_byte(key[2],1)) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1)
221: #define q32(x) q(1,q(0,q(0, x) ^ extract_byte(key[2],2)) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2)
222: #define q33(x) q(1,q(1,q(0, x) ^ extract_byte(key[2],3)) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3)
223:
224: #define q40(x) q(0,q(0,q(1, q(1, x) ^ extract_byte(key[3],0)) ^ extract_byte(key[2],0)) ^ extract_byte(key[1],0)) ^ extract_byte(key[0],0)
225: #define q41(x) q(0,q(1,q(1, q(0, x) ^ extract_byte(key[3],1)) ^ extract_byte(key[2],1)) ^ extract_byte(key[1],1)) ^ extract_byte(key[0],1)
226: #define q42(x) q(1,q(0,q(0, q(0, x) ^ extract_byte(key[3],2)) ^ extract_byte(key[2],2)) ^ extract_byte(key[1],2)) ^ extract_byte(key[0],2)
227: #define q43(x) q(1,q(1,q(0, q(1, x) ^ extract_byte(key[3],3)) ^ extract_byte(key[2],3)) ^ extract_byte(key[1],3)) ^ extract_byte(key[0],3)
228:
229: static gen_mk_tab(TwofishInstance *instance, u4byte key[])
230: { u4byte i;
231: u1byte by;
232:
233: u4byte *l_key = instance->l_key;
234: u4byte *s_key = instance->s_key;
235: u4byte *mk_tab = instance->mk_tab;
236:
237: switch(instance->k_len)
238: {
239: case 2: for(i = 0; i < 256; ++i)
240: {
241: by = (u1byte)i;
242: #ifdef ONE_STEP
243: mk_tab[0 + 4*i] = mds(0, q20(by)); mk_tab[1 + 4*i] = mds(1, q21(by));
244: mk_tab[2 + 4*i] = mds(2, q22(by)); mk_tab[3 + 4*i] = mds(3, q23(by));
245: #else
246: sb[0][i] = q20(by); sb[1][i] = q21(by);
247: sb[2][i] = q22(by); sb[3][i] = q23(by);
248: #endif
249: }
250: break;
251:
252: case 3: for(i = 0; i < 256; ++i)
253: {
254: by = (u1byte)i;
255: #ifdef ONE_STEP
256: mk_tab[0 + 4*i] = mds(0, q30(by)); mk_tab[1 + 4*i] = mds(1, q31(by));
257: mk_tab[2 + 4*i] = mds(2, q32(by)); mk_tab[3 + 4*i] = mds(3, q33(by));
258: #else
259: sb[0][i] = q30(by); sb[1][i] = q31(by);
260: sb[2][i] = q32(by); sb[3][i] = q33(by);
261: #endif
262: }
263: break;
264:
265: case 4: for(i = 0; i < 256; ++i)
266: {
267: by = (u1byte)i;
268: #ifdef ONE_STEP
269: mk_tab[0 + 4*i] = mds(0, q40(by)); mk_tab[1 + 4*i] = mds(1, q41(by));
270: mk_tab[2 + 4*i] = mds(2, q42(by)); mk_tab[3 + 4*i] = mds(3, q43(by));
271: #else
272: sb[0][i] = q40(by); sb[1][i] = q41(by);
273: sb[2][i] = q42(by); sb[3][i] = q43(by);
274: #endif
275: }
276: }
277: };
278:
279: # ifdef ONE_STEP
280: # define g0_fun(x) ( mk_tab[0 + 4*extract_byte(x,0)] ^ mk_tab[1 + 4*extract_byte(x,1)] \
281: ^ mk_tab[2 + 4*extract_byte(x,2)] ^ mk_tab[3 + 4*extract_byte(x,3)] )
282: # define g1_fun(x) ( mk_tab[0 + 4*extract_byte(x,3)] ^ mk_tab[1 + 4*extract_byte(x,0)] \
283: ^ mk_tab[2 + 4*extract_byte(x,1)] ^ mk_tab[3 + 4*extract_byte(x,2)] )
284:
285:
286: # else
287: # define g0_fun(x) ( mds(0, sb[0][extract_byte(x,0)]) ^ mds(1, sb[1][extract_byte(x,1)]) \
288: ^ mds(2, sb[2][extract_byte(x,2)]) ^ mds(3, sb[3][extract_byte(x,3)]) )
289: # define g1_fun(x) ( mds(0, sb[0][extract_byte(x,3)]) ^ mds(1, sb[1][extract_byte(x,0)]) \
290: ^ mds(2, sb[2][extract_byte(x,1)]) ^ mds(3, sb[3][extract_byte(x,2)]) )
291: # endif
292:
293: #else
294:
295: #define g0_fun(x) h_fun(instance, x,s_key)
296: #define g1_fun(x) h_fun(instance, rotl(x,8),s_key)
297:
298: #endif
299:
300: /* The (12,8) Reed Soloman code has the generator polynomial
301:
302: g(x) = x^4 + (a + 1/a) * x^3 + a * x^2 + (a + 1/a) * x + 1
303:
304: where the coefficients are in the finite field GF(2^8) with a
305: modular polynomial a^8 + a^6 + a^3 + a^2 + 1. To generate the
306: remainder we have to start with a 12th order polynomial with our
307: eight input bytes as the coefficients of the 4th to 11th terms.
308: That is:
309:
310: m[7] * x^11 + m[6] * x^10 ... + m[0] * x^4 + 0 * x^3 +... + 0
311:
312: We then multiply the generator polynomial by m[7] * x^7 and subtract
313: it - xor in GF(2^8) - from the above to eliminate the x^7 term (the
314: artihmetic on the coefficients is done in GF(2^8). We then multiply
315: the generator polynomial by x^6 * coeff(x^10) and use this to remove
316: the x^10 term. We carry on in this way until the x^4 term is removed
317: so that we are left with:
318:
319: r[3] * x^3 + r[2] * x^2 + r[1] 8 x^1 + r[0]
320:
321: which give the resulting 4 bytes of the remainder. This is equivalent
322: to the matrix multiplication in the Twofish description but much faster
323: to implement.
324:
325: */
326:
327: #define G_MOD 0x0000014d
328:
329: static u4byte mds_rem(u4byte p0, u4byte p1)
330: { u4byte i, t, u;
331:
332: for(i = 0; i < 8; ++i)
333: {
334: t = p1 >> 24; // get most significant coefficient
335:
336: p1 = (p1 << 8) | (p0 >> 24); p0 <<= 8; // shift others up
337:
338: // multiply t by a (the primitive element - i.e. left shift)
339:
340: u = (t << 1);
341:
342: if(t & 0x80) // subtract modular polynomial on overflow
343:
344: u ^= G_MOD;
345:
346: p1 ^= t ^ (u << 16); // remove t * (a * x^2 + 1)
347:
348: u ^= (t >> 1); // form u = a * t + t / a = t * (a + 1 / a);
349:
350: if(t & 0x01) // add the modular polynomial on underflow
351:
352: u ^= G_MOD >> 1;
353:
354: p1 ^= (u << 24) | (u << 8); // remove t * (a + 1/a) * (x^3 + x)
355: }
356:
357: return p1;
358: };
359:
360: /* initialise the key schedule from the user supplied key */
361:
362: u4byte *twofish_set_key(TwofishInstance *instance, const u4byte in_key[], const u4byte key_len)
363: { u4byte i, a, b, me_key[4], mo_key[4];
364: u4byte *l_key, *s_key;
365:
366: instance->l_key = (u4byte *) ((__int8 *)instance + sizeof (TwofishInstance));
367: instance->s_key = (u4byte *) ((__int8 *)instance + sizeof (TwofishInstance) + TF_L_KEY_SIZE);
368: instance->mk_tab = (u4byte *) ((__int8 *)instance + sizeof (TwofishInstance) + TF_L_KEY_SIZE + TF_S_KEY_SIZE);
369:
370: l_key = instance->l_key;
371: s_key = instance->s_key;
372:
373: #ifdef Q_TABLES
374: if(!qt_gen)
375: {
376: gen_qtab(); qt_gen = 1;
377: }
378: #endif
379:
380: #ifdef M_TABLE
381: if(!mt_gen)
382: {
383: gen_mtab(); mt_gen = 1;
384: }
385: #endif
386:
387: instance->k_len = key_len / 64; /* 2, 3 or 4 */
388:
389: for(i = 0; i < instance->k_len; ++i)
390: {
391: a = in_key[i + i]; me_key[i] = a;
392: b = in_key[i + i + 1]; mo_key[i] = b;
393: s_key[instance->k_len - i - 1] = mds_rem(a, b);
394: }
395:
396: for(i = 0; i < 40; i += 2)
397: {
398: a = 0x01010101 * i; b = a + 0x01010101;
399: a = h_fun(instance, a, me_key);
400: b = rotl(h_fun(instance, b, mo_key), 8);
401: l_key[i] = a + b;
402: l_key[i + 1] = rotl(a + 2 * b, 9);
403: }
404:
405: #ifdef MK_TABLE
406: gen_mk_tab(instance, s_key);
407: #endif
408:
409: return l_key;
410: };
411:
412: /* encrypt a block of text */
413:
414: #define f_rnd(i) \
415: t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); \
416: blk[2] = rotr(blk[2] ^ (t0 + t1 + l_key[4 * (i) + 8]), 1); \
417: blk[3] = rotl(blk[3], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]); \
418: t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); \
419: blk[0] = rotr(blk[0] ^ (t0 + t1 + l_key[4 * (i) + 10]), 1); \
420: blk[1] = rotl(blk[1], 1) ^ (t0 + 2 * t1 + l_key[4 * (i) + 11])
421:
422: void twofish_encrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[])
423: { u4byte t0, t1, blk[4];
424:
425: u4byte *l_key = instance->l_key;
426: u4byte *s_key = instance->s_key;
427: u4byte *mk_tab = instance->mk_tab;
428:
429: blk[0] = in_blk[0] ^ l_key[0];
430: blk[1] = in_blk[1] ^ l_key[1];
431: blk[2] = in_blk[2] ^ l_key[2];
432: blk[3] = in_blk[3] ^ l_key[3];
433:
434: f_rnd(0); f_rnd(1); f_rnd(2); f_rnd(3);
435: f_rnd(4); f_rnd(5); f_rnd(6); f_rnd(7);
436:
437: out_blk[0] = blk[2] ^ l_key[4];
438: out_blk[1] = blk[3] ^ l_key[5];
439: out_blk[2] = blk[0] ^ l_key[6];
440: out_blk[3] = blk[1] ^ l_key[7];
441: };
442:
443: /* decrypt a block of text */
444:
445: #define i_rnd(i) \
446: t1 = g1_fun(blk[1]); t0 = g0_fun(blk[0]); \
447: blk[2] = rotl(blk[2], 1) ^ (t0 + t1 + l_key[4 * (i) + 10]); \
448: blk[3] = rotr(blk[3] ^ (t0 + 2 * t1 + l_key[4 * (i) + 11]), 1); \
449: t1 = g1_fun(blk[3]); t0 = g0_fun(blk[2]); \
450: blk[0] = rotl(blk[0], 1) ^ (t0 + t1 + l_key[4 * (i) + 8]); \
451: blk[1] = rotr(blk[1] ^ (t0 + 2 * t1 + l_key[4 * (i) + 9]), 1)
452:
453: void twofish_decrypt(TwofishInstance *instance, const u4byte in_blk[4], u4byte out_blk[4])
454: { u4byte t0, t1, blk[4];
455:
456: u4byte *l_key = instance->l_key;
457: u4byte *s_key = instance->s_key;
458: u4byte *mk_tab = instance->mk_tab;
459:
460: blk[0] = in_blk[0] ^ l_key[4];
461: blk[1] = in_blk[1] ^ l_key[5];
462: blk[2] = in_blk[2] ^ l_key[6];
463: blk[3] = in_blk[3] ^ l_key[7];
464:
465: i_rnd(7); i_rnd(6); i_rnd(5); i_rnd(4);
466: i_rnd(3); i_rnd(2); i_rnd(1); i_rnd(0);
467:
468: out_blk[0] = blk[2] ^ l_key[0];
469: out_blk[1] = blk[3] ^ l_key[1];
470: out_blk[2] = blk[0] ^ l_key[2];
471: out_blk[3] = blk[1] ^ l_key[3];
472: };
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.