|
|
1.1 root 1: /*
2: * Copyright(C) 2006 Cameron Rich
3: *
4: * This library is free software; you can redistribute it and/or modify
5: * it under the terms of the GNU Lesser General Public License as published by
6: * the Free Software Foundation; either version 2 of the License, or
7: * (at your option) any later version.
8: *
9: * This library is distributed in the hope that it will be useful,
10: * but WITHOUT ANY WARRANTY; without even the implied warranty of
11: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12: * GNU Lesser General Public License for more details.
13: *
14: * You should have received a copy of the GNU Lesser General Public License
15: * along with this library; if not, write to the Free Software
16: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17: */
18:
19: FILE_LICENCE ( GPL2_OR_LATER );
20:
21: /**
22: * AES implementation - this is a small code version. There are much faster
23: * versions around but they are much larger in size (i.e. they use large
24: * submix tables).
25: */
26:
27: #include <string.h>
28: #include "crypto.h"
29:
30: /* all commented out in skeleton mode */
31: #ifndef CONFIG_SSL_SKELETON_MODE
32:
33: #define rot1(x) (((x) << 24) | ((x) >> 8))
34: #define rot2(x) (((x) << 16) | ((x) >> 16))
35: #define rot3(x) (((x) << 8) | ((x) >> 24))
36:
37: /*
38: * This cute trick does 4 'mul by two' at once. Stolen from
39: * Dr B. R. Gladman <[email protected]> but I'm sure the u-(u>>7) is
40: * a standard graphics trick
41: * The key to this is that we need to xor with 0x1b if the top bit is set.
42: * a 1xxx xxxx 0xxx 0xxx First we mask the 7bit,
43: * b 1000 0000 0000 0000 then we shift right by 7 putting the 7bit in 0bit,
44: * c 0000 0001 0000 0000 we then subtract (c) from (b)
45: * d 0111 1111 0000 0000 and now we and with our mask
46: * e 0001 1011 0000 0000
47: */
48: #define mt 0x80808080
49: #define ml 0x7f7f7f7f
50: #define mh 0xfefefefe
51: #define mm 0x1b1b1b1b
52: #define mul2(x,t) ((t)=((x)&mt), \
53: ((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
54:
55: #define inv_mix_col(x,f2,f4,f8,f9) (\
56: (f2)=mul2(x,f2), \
57: (f4)=mul2(f2,f4), \
58: (f8)=mul2(f4,f8), \
59: (f9)=(x)^(f8), \
60: (f8)=((f2)^(f4)^(f8)), \
61: (f2)^=(f9), \
62: (f4)^=(f9), \
63: (f8)^=rot3(f2), \
64: (f8)^=rot2(f4), \
65: (f8)^rot1(f9))
66:
67: /* some macros to do endian independent byte extraction */
68: #define n2l(c,l) l=ntohl(*c); c++
69: #define l2n(l,c) *c++=htonl(l)
70:
71: /*
72: * AES S-box
73: */
74: static const uint8_t aes_sbox[256] =
75: {
76: 0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
77: 0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
78: 0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
79: 0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
80: 0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
81: 0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
82: 0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
83: 0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
84: 0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
85: 0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
86: 0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
87: 0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
88: 0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
89: 0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
90: 0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
91: 0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
92: 0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
93: 0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
94: 0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
95: 0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
96: 0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
97: 0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
98: 0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
99: 0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
100: 0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
101: 0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
102: 0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
103: 0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
104: 0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
105: 0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
106: 0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
107: 0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
108: };
109:
110: /*
111: * AES is-box
112: */
113: static const uint8_t aes_isbox[256] =
114: {
115: 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
116: 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
117: 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
118: 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
119: 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
120: 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
121: 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
122: 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
123: 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
124: 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
125: 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
126: 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
127: 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
128: 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
129: 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
130: 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
131: 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
132: 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
133: 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
134: 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
135: 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
136: 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
137: 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
138: 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
139: 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
140: 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
141: 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
142: 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
143: 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
144: 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
145: 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
146: 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
147: };
148:
149: static const unsigned char Rcon[30]=
150: {
151: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
152: 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
153: 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
154: 0xb3,0x7d,0xfa,0xef,0xc5,0x91,
155: };
156:
157: /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
158: x^8+x^4+x^3+x+1 */
159: static unsigned char AES_xtime(uint32_t x)
160: {
161: return x = (x&0x80) ? (x<<1)^0x1b : x<<1;
162: }
163:
164: /**
165: * Set up AES with the key/iv and cipher size.
166: */
167: void AES_set_key(AES_CTX *ctx, const uint8_t *key,
168: const uint8_t *iv, AES_MODE mode)
169: {
170: int i, ii;
171: uint32_t *W, tmp, tmp2;
172: const unsigned char *ip;
173: int words;
174:
175: switch (mode)
176: {
177: case AES_MODE_128:
178: i = 10;
179: words = 4;
180: break;
181:
182: case AES_MODE_256:
183: i = 14;
184: words = 8;
185: break;
186:
187: default: /* fail silently */
188: return;
189: }
190:
191: ctx->rounds = i;
192: ctx->key_size = words;
193: W = ctx->ks;
194: for (i = 0; i < words; i+=2)
195: {
196: W[i+0]= ((uint32_t)key[ 0]<<24)|
197: ((uint32_t)key[ 1]<<16)|
198: ((uint32_t)key[ 2]<< 8)|
199: ((uint32_t)key[ 3] );
200: W[i+1]= ((uint32_t)key[ 4]<<24)|
201: ((uint32_t)key[ 5]<<16)|
202: ((uint32_t)key[ 6]<< 8)|
203: ((uint32_t)key[ 7] );
204: key += 8;
205: }
206:
207: ip = Rcon;
208: ii = 4 * (ctx->rounds+1);
209: for (i = words; i<ii; i++)
210: {
211: tmp = W[i-1];
212:
213: if ((i % words) == 0)
214: {
215: tmp2 =(uint32_t)aes_sbox[(tmp )&0xff]<< 8;
216: tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
217: tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
218: tmp2|=(uint32_t)aes_sbox[(tmp>>24) ];
219: tmp=tmp2^(((unsigned int)*ip)<<24);
220: ip++;
221: }
222:
223: if ((words == 8) && ((i % words) == 4))
224: {
225: tmp2 =(uint32_t)aes_sbox[(tmp )&0xff] ;
226: tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
227: tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
228: tmp2|=(uint32_t)aes_sbox[(tmp>>24) ]<<24;
229: tmp=tmp2;
230: }
231:
232: W[i]=W[i-words]^tmp;
233: }
234:
235: /* copy the iv across */
236: memcpy(ctx->iv, iv, 16);
237: }
238:
239: /**
240: * Change a key for decryption.
241: */
242: void AES_convert_key(AES_CTX *ctx)
243: {
244: int i;
245: uint32_t *k,w,t1,t2,t3,t4;
246:
247: k = ctx->ks;
248: k += 4;
249:
250: for (i=ctx->rounds*4; i>4; i--)
251: {
252: w= *k;
253: w = inv_mix_col(w,t1,t2,t3,t4);
254: *k++ =w;
255: }
256: }
257:
258: #if 0
259: /**
260: * Encrypt a byte sequence (with a block size 16) using the AES cipher.
261: */
262: void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
263: {
264: uint32_t tin0, tin1, tin2, tin3;
265: uint32_t tout0, tout1, tout2, tout3;
266: uint32_t tin[4];
267: uint32_t *iv = (uint32_t *)ctx->iv;
268: uint32_t *msg_32 = (uint32_t *)msg;
269: uint32_t *out_32 = (uint32_t *)out;
270:
271: n2l(iv, tout0);
272: n2l(iv, tout1);
273: n2l(iv, tout2);
274: n2l(iv, tout3);
275: iv -= 4;
276:
277: for (length -= 16; length >= 0; length -= 16)
278: {
279: n2l(msg_32, tin0);
280: n2l(msg_32, tin1);
281: n2l(msg_32, tin2);
282: n2l(msg_32, tin3);
283: tin[0] = tin0^tout0;
284: tin[1] = tin1^tout1;
285: tin[2] = tin2^tout2;
286: tin[3] = tin3^tout3;
287:
288: AES_encrypt(ctx, tin);
289:
290: tout0 = tin[0];
291: l2n(tout0, out_32);
292: tout1 = tin[1];
293: l2n(tout1, out_32);
294: tout2 = tin[2];
295: l2n(tout2, out_32);
296: tout3 = tin[3];
297: l2n(tout3, out_32);
298: }
299:
300: l2n(tout0, iv);
301: l2n(tout1, iv);
302: l2n(tout2, iv);
303: l2n(tout3, iv);
304: }
305:
306: /**
307: * Decrypt a byte sequence (with a block size 16) using the AES cipher.
308: */
309: void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
310: {
311: uint32_t tin0, tin1, tin2, tin3;
312: uint32_t xor0,xor1,xor2,xor3;
313: uint32_t tout0,tout1,tout2,tout3;
314: uint32_t data[4];
315: uint32_t *iv = (uint32_t *)ctx->iv;
316: uint32_t *msg_32 = (uint32_t *)msg;
317: uint32_t *out_32 = (uint32_t *)out;
318:
319: n2l(iv ,xor0);
320: n2l(iv, xor1);
321: n2l(iv, xor2);
322: n2l(iv, xor3);
323: iv -= 4;
324:
325: for (length-=16; length >= 0; length -= 16)
326: {
327: n2l(msg_32, tin0);
328: n2l(msg_32, tin1);
329: n2l(msg_32, tin2);
330: n2l(msg_32, tin3);
331:
332: data[0] = tin0;
333: data[1] = tin1;
334: data[2] = tin2;
335: data[3] = tin3;
336:
337: AES_decrypt(ctx, data);
338:
339: tout0 = data[0]^xor0;
340: tout1 = data[1]^xor1;
341: tout2 = data[2]^xor2;
342: tout3 = data[3]^xor3;
343:
344: xor0 = tin0;
345: xor1 = tin1;
346: xor2 = tin2;
347: xor3 = tin3;
348:
349: l2n(tout0, out_32);
350: l2n(tout1, out_32);
351: l2n(tout2, out_32);
352: l2n(tout3, out_32);
353: }
354:
355: l2n(xor0, iv);
356: l2n(xor1, iv);
357: l2n(xor2, iv);
358: l2n(xor3, iv);
359: }
360: #endif
361:
362: /**
363: * Encrypt a single block (16 bytes) of data
364: */
365: void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
366: {
367: /* To make this code smaller, generate the sbox entries on the fly.
368: * This will have a really heavy effect upon performance.
369: */
370: uint32_t tmp[4];
371: uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
372: int curr_rnd;
373: int rounds = ctx->rounds;
374: const uint32_t *k = ctx->ks;
375:
376: /* Pre-round key addition */
377: for (row = 0; row < 4; row++)
378: {
379: data[row] ^= *(k++);
380: }
381:
382: /* Encrypt one block. */
383: for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
384: {
385: /* Perform ByteSub and ShiftRow operations together */
386: for (row = 0; row < 4; row++)
387: {
388: a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
389: a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
390: a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF];
391: a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
392:
393: /* Perform MixColumn iff not last round */
394: if (curr_rnd < (rounds - 1))
395: {
396: tmp1 = a0 ^ a1 ^ a2 ^ a3;
397: old_a0 = a0;
398:
399: a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
400: a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
401: a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
402: a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
403:
404: }
405:
406: tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
407: }
408:
409: /* KeyAddition - note that it is vital that this loop is separate from
410: the MixColumn operation, which must be atomic...*/
411: for (row = 0; row < 4; row++)
412: {
413: data[row] = tmp[row] ^ *(k++);
414: }
415: }
416: }
417:
418: /**
419: * Decrypt a single block (16 bytes) of data
420: */
421: void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
422: {
423: uint32_t tmp[4];
424: uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
425: uint32_t a0, a1, a2, a3, row;
426: int curr_rnd;
427: int rounds = ctx->rounds;
428: uint32_t *k = (uint32_t*)ctx->ks + ((rounds+1)*4);
429:
430: /* pre-round key addition */
431: for (row=4; row > 0;row--)
432: {
433: data[row-1] ^= *(--k);
434: }
435:
436: /* Decrypt one block */
437: for (curr_rnd=0; curr_rnd < rounds; curr_rnd++)
438: {
439: /* Perform ByteSub and ShiftRow operations together */
440: for (row = 4; row > 0; row--)
441: {
442: a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
443: a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
444: a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
445: a3 = aes_isbox[(data[row%4])&0xFF];
446:
447: /* Perform MixColumn iff not last round */
448: if (curr_rnd<(rounds-1))
449: {
450: /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
451: are quite large compared to encryption; this
452: operation slows decryption down noticeably. */
453: xt0 = AES_xtime(a0^a1);
454: xt1 = AES_xtime(a1^a2);
455: xt2 = AES_xtime(a2^a3);
456: xt3 = AES_xtime(a3^a0);
457: xt4 = AES_xtime(xt0^xt1);
458: xt5 = AES_xtime(xt1^xt2);
459: xt6 = AES_xtime(xt4^xt5);
460:
461: xt0 ^= a1^a2^a3^xt4^xt6;
462: xt1 ^= a0^a2^a3^xt5^xt6;
463: xt2 ^= a0^a1^a3^xt4^xt6;
464: xt3 ^= a0^a1^a2^xt5^xt6;
465: tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
466: }
467: else
468: tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
469: }
470:
471: for (row = 4; row > 0; row--)
472: {
473: data[row-1] = tmp[row-1] ^ *(--k);
474: }
475: }
476: }
477:
478: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.