|
|
1.1 ! root 1: /* ! 2: * Copyright(C) 2006 Cameron Rich ! 3: * ! 4: * This library is free software; you can redistribute it and/or modify ! 5: * it under the terms of the GNU Lesser General Public License as published by ! 6: * the Free Software Foundation; either version 2 of the License, or ! 7: * (at your option) any later version. ! 8: * ! 9: * This library is distributed in the hope that it will be useful, ! 10: * but WITHOUT ANY WARRANTY; without even the implied warranty of ! 11: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! 12: * GNU Lesser General Public License for more details. ! 13: * ! 14: * You should have received a copy of the GNU Lesser General Public License ! 15: * along with this library; if not, write to the Free Software ! 16: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! 17: */ ! 18: ! 19: FILE_LICENCE ( GPL2_OR_LATER ); ! 20: ! 21: /** ! 22: * AES implementation - this is a small code version. There are much faster ! 23: * versions around but they are much larger in size (i.e. they use large ! 24: * submix tables). ! 25: */ ! 26: ! 27: #include <string.h> ! 28: #include "crypto.h" ! 29: ! 30: /* all commented out in skeleton mode */ ! 31: #ifndef CONFIG_SSL_SKELETON_MODE ! 32: ! 33: #define rot1(x) (((x) << 24) | ((x) >> 8)) ! 34: #define rot2(x) (((x) << 16) | ((x) >> 16)) ! 35: #define rot3(x) (((x) << 8) | ((x) >> 24)) ! 36: ! 37: /* ! 38: * This cute trick does 4 'mul by two' at once. Stolen from ! 39: * Dr B. R. Gladman <[email protected]> but I'm sure the u-(u>>7) is ! 40: * a standard graphics trick ! 41: * The key to this is that we need to xor with 0x1b if the top bit is set. ! 42: * a 1xxx xxxx 0xxx 0xxx First we mask the 7bit, ! 43: * b 1000 0000 0000 0000 then we shift right by 7 putting the 7bit in 0bit, ! 44: * c 0000 0001 0000 0000 we then subtract (c) from (b) ! 45: * d 0111 1111 0000 0000 and now we and with our mask ! 46: * e 0001 1011 0000 0000 ! 47: */ ! 48: #define mt 0x80808080 ! 49: #define ml 0x7f7f7f7f ! 50: #define mh 0xfefefefe ! 51: #define mm 0x1b1b1b1b ! 52: #define mul2(x,t) ((t)=((x)&mt), \ ! 53: ((((x)+(x))&mh)^(((t)-((t)>>7))&mm))) ! 54: ! 55: #define inv_mix_col(x,f2,f4,f8,f9) (\ ! 56: (f2)=mul2(x,f2), \ ! 57: (f4)=mul2(f2,f4), \ ! 58: (f8)=mul2(f4,f8), \ ! 59: (f9)=(x)^(f8), \ ! 60: (f8)=((f2)^(f4)^(f8)), \ ! 61: (f2)^=(f9), \ ! 62: (f4)^=(f9), \ ! 63: (f8)^=rot3(f2), \ ! 64: (f8)^=rot2(f4), \ ! 65: (f8)^rot1(f9)) ! 66: ! 67: /* some macros to do endian independent byte extraction */ ! 68: #define n2l(c,l) l=ntohl(*c); c++ ! 69: #define l2n(l,c) *c++=htonl(l) ! 70: ! 71: /* ! 72: * AES S-box ! 73: */ ! 74: static const uint8_t aes_sbox[256] = ! 75: { ! 76: 0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5, ! 77: 0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76, ! 78: 0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0, ! 79: 0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0, ! 80: 0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC, ! 81: 0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15, ! 82: 0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A, ! 83: 0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75, ! 84: 0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0, ! 85: 0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84, ! 86: 0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B, ! 87: 0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF, ! 88: 0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85, ! 89: 0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8, ! 90: 0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5, ! 91: 0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2, ! 92: 0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17, ! 93: 0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73, ! 94: 0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88, ! 95: 0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB, ! 96: 0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C, ! 97: 0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79, ! 98: 0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9, ! 99: 0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08, ! 100: 0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6, ! 101: 0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A, ! 102: 0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E, ! 103: 0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E, ! 104: 0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94, ! 105: 0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF, ! 106: 0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68, ! 107: 0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16, ! 108: }; ! 109: ! 110: /* ! 111: * AES is-box ! 112: */ ! 113: static const uint8_t aes_isbox[256] = ! 114: { ! 115: 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38, ! 116: 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, ! 117: 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87, ! 118: 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, ! 119: 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d, ! 120: 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, ! 121: 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2, ! 122: 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, ! 123: 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16, ! 124: 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, ! 125: 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda, ! 126: 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, ! 127: 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a, ! 128: 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, ! 129: 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02, ! 130: 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, ! 131: 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea, ! 132: 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, ! 133: 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85, ! 134: 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, ! 135: 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89, ! 136: 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, ! 137: 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20, ! 138: 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, ! 139: 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31, ! 140: 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, ! 141: 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d, ! 142: 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, ! 143: 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0, ! 144: 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, ! 145: 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26, ! 146: 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d ! 147: }; ! 148: ! 149: static const unsigned char Rcon[30]= ! 150: { ! 151: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, ! 152: 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f, ! 153: 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4, ! 154: 0xb3,0x7d,0xfa,0xef,0xc5,0x91, ! 155: }; ! 156: ! 157: /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial ! 158: x^8+x^4+x^3+x+1 */ ! 159: static unsigned char AES_xtime(uint32_t x) ! 160: { ! 161: return x = (x&0x80) ? (x<<1)^0x1b : x<<1; ! 162: } ! 163: ! 164: /** ! 165: * Set up AES with the key/iv and cipher size. ! 166: */ ! 167: void AES_set_key(AES_CTX *ctx, const uint8_t *key, ! 168: const uint8_t *iv, AES_MODE mode) ! 169: { ! 170: int i, ii; ! 171: uint32_t *W, tmp, tmp2; ! 172: const unsigned char *ip; ! 173: int words; ! 174: ! 175: switch (mode) ! 176: { ! 177: case AES_MODE_128: ! 178: i = 10; ! 179: words = 4; ! 180: break; ! 181: ! 182: case AES_MODE_256: ! 183: i = 14; ! 184: words = 8; ! 185: break; ! 186: ! 187: default: /* fail silently */ ! 188: return; ! 189: } ! 190: ! 191: ctx->rounds = i; ! 192: ctx->key_size = words; ! 193: W = ctx->ks; ! 194: for (i = 0; i < words; i+=2) ! 195: { ! 196: W[i+0]= ((uint32_t)key[ 0]<<24)| ! 197: ((uint32_t)key[ 1]<<16)| ! 198: ((uint32_t)key[ 2]<< 8)| ! 199: ((uint32_t)key[ 3] ); ! 200: W[i+1]= ((uint32_t)key[ 4]<<24)| ! 201: ((uint32_t)key[ 5]<<16)| ! 202: ((uint32_t)key[ 6]<< 8)| ! 203: ((uint32_t)key[ 7] ); ! 204: key += 8; ! 205: } ! 206: ! 207: ip = Rcon; ! 208: ii = 4 * (ctx->rounds+1); ! 209: for (i = words; i<ii; i++) ! 210: { ! 211: tmp = W[i-1]; ! 212: ! 213: if ((i % words) == 0) ! 214: { ! 215: tmp2 =(uint32_t)aes_sbox[(tmp )&0xff]<< 8; ! 216: tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16; ! 217: tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24; ! 218: tmp2|=(uint32_t)aes_sbox[(tmp>>24) ]; ! 219: tmp=tmp2^(((unsigned int)*ip)<<24); ! 220: ip++; ! 221: } ! 222: ! 223: if ((words == 8) && ((i % words) == 4)) ! 224: { ! 225: tmp2 =(uint32_t)aes_sbox[(tmp )&0xff] ; ! 226: tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8; ! 227: tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16; ! 228: tmp2|=(uint32_t)aes_sbox[(tmp>>24) ]<<24; ! 229: tmp=tmp2; ! 230: } ! 231: ! 232: W[i]=W[i-words]^tmp; ! 233: } ! 234: ! 235: /* copy the iv across */ ! 236: memcpy(ctx->iv, iv, 16); ! 237: } ! 238: ! 239: /** ! 240: * Change a key for decryption. ! 241: */ ! 242: void AES_convert_key(AES_CTX *ctx) ! 243: { ! 244: int i; ! 245: uint32_t *k,w,t1,t2,t3,t4; ! 246: ! 247: k = ctx->ks; ! 248: k += 4; ! 249: ! 250: for (i=ctx->rounds*4; i>4; i--) ! 251: { ! 252: w= *k; ! 253: w = inv_mix_col(w,t1,t2,t3,t4); ! 254: *k++ =w; ! 255: } ! 256: } ! 257: ! 258: #if 0 ! 259: /** ! 260: * Encrypt a byte sequence (with a block size 16) using the AES cipher. ! 261: */ ! 262: void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length) ! 263: { ! 264: uint32_t tin0, tin1, tin2, tin3; ! 265: uint32_t tout0, tout1, tout2, tout3; ! 266: uint32_t tin[4]; ! 267: uint32_t *iv = (uint32_t *)ctx->iv; ! 268: uint32_t *msg_32 = (uint32_t *)msg; ! 269: uint32_t *out_32 = (uint32_t *)out; ! 270: ! 271: n2l(iv, tout0); ! 272: n2l(iv, tout1); ! 273: n2l(iv, tout2); ! 274: n2l(iv, tout3); ! 275: iv -= 4; ! 276: ! 277: for (length -= 16; length >= 0; length -= 16) ! 278: { ! 279: n2l(msg_32, tin0); ! 280: n2l(msg_32, tin1); ! 281: n2l(msg_32, tin2); ! 282: n2l(msg_32, tin3); ! 283: tin[0] = tin0^tout0; ! 284: tin[1] = tin1^tout1; ! 285: tin[2] = tin2^tout2; ! 286: tin[3] = tin3^tout3; ! 287: ! 288: AES_encrypt(ctx, tin); ! 289: ! 290: tout0 = tin[0]; ! 291: l2n(tout0, out_32); ! 292: tout1 = tin[1]; ! 293: l2n(tout1, out_32); ! 294: tout2 = tin[2]; ! 295: l2n(tout2, out_32); ! 296: tout3 = tin[3]; ! 297: l2n(tout3, out_32); ! 298: } ! 299: ! 300: l2n(tout0, iv); ! 301: l2n(tout1, iv); ! 302: l2n(tout2, iv); ! 303: l2n(tout3, iv); ! 304: } ! 305: ! 306: /** ! 307: * Decrypt a byte sequence (with a block size 16) using the AES cipher. ! 308: */ ! 309: void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length) ! 310: { ! 311: uint32_t tin0, tin1, tin2, tin3; ! 312: uint32_t xor0,xor1,xor2,xor3; ! 313: uint32_t tout0,tout1,tout2,tout3; ! 314: uint32_t data[4]; ! 315: uint32_t *iv = (uint32_t *)ctx->iv; ! 316: uint32_t *msg_32 = (uint32_t *)msg; ! 317: uint32_t *out_32 = (uint32_t *)out; ! 318: ! 319: n2l(iv ,xor0); ! 320: n2l(iv, xor1); ! 321: n2l(iv, xor2); ! 322: n2l(iv, xor3); ! 323: iv -= 4; ! 324: ! 325: for (length-=16; length >= 0; length -= 16) ! 326: { ! 327: n2l(msg_32, tin0); ! 328: n2l(msg_32, tin1); ! 329: n2l(msg_32, tin2); ! 330: n2l(msg_32, tin3); ! 331: ! 332: data[0] = tin0; ! 333: data[1] = tin1; ! 334: data[2] = tin2; ! 335: data[3] = tin3; ! 336: ! 337: AES_decrypt(ctx, data); ! 338: ! 339: tout0 = data[0]^xor0; ! 340: tout1 = data[1]^xor1; ! 341: tout2 = data[2]^xor2; ! 342: tout3 = data[3]^xor3; ! 343: ! 344: xor0 = tin0; ! 345: xor1 = tin1; ! 346: xor2 = tin2; ! 347: xor3 = tin3; ! 348: ! 349: l2n(tout0, out_32); ! 350: l2n(tout1, out_32); ! 351: l2n(tout2, out_32); ! 352: l2n(tout3, out_32); ! 353: } ! 354: ! 355: l2n(xor0, iv); ! 356: l2n(xor1, iv); ! 357: l2n(xor2, iv); ! 358: l2n(xor3, iv); ! 359: } ! 360: #endif ! 361: ! 362: /** ! 363: * Encrypt a single block (16 bytes) of data ! 364: */ ! 365: void AES_encrypt(const AES_CTX *ctx, uint32_t *data) ! 366: { ! 367: /* To make this code smaller, generate the sbox entries on the fly. ! 368: * This will have a really heavy effect upon performance. ! 369: */ ! 370: uint32_t tmp[4]; ! 371: uint32_t tmp1, old_a0, a0, a1, a2, a3, row; ! 372: int curr_rnd; ! 373: int rounds = ctx->rounds; ! 374: const uint32_t *k = ctx->ks; ! 375: ! 376: /* Pre-round key addition */ ! 377: for (row = 0; row < 4; row++) ! 378: { ! 379: data[row] ^= *(k++); ! 380: } ! 381: ! 382: /* Encrypt one block. */ ! 383: for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++) ! 384: { ! 385: /* Perform ByteSub and ShiftRow operations together */ ! 386: for (row = 0; row < 4; row++) ! 387: { ! 388: a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF]; ! 389: a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF]; ! 390: a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF]; ! 391: a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF]; ! 392: ! 393: /* Perform MixColumn iff not last round */ ! 394: if (curr_rnd < (rounds - 1)) ! 395: { ! 396: tmp1 = a0 ^ a1 ^ a2 ^ a3; ! 397: old_a0 = a0; ! 398: ! 399: a0 ^= tmp1 ^ AES_xtime(a0 ^ a1); ! 400: a1 ^= tmp1 ^ AES_xtime(a1 ^ a2); ! 401: a2 ^= tmp1 ^ AES_xtime(a2 ^ a3); ! 402: a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0); ! 403: ! 404: } ! 405: ! 406: tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3); ! 407: } ! 408: ! 409: /* KeyAddition - note that it is vital that this loop is separate from ! 410: the MixColumn operation, which must be atomic...*/ ! 411: for (row = 0; row < 4; row++) ! 412: { ! 413: data[row] = tmp[row] ^ *(k++); ! 414: } ! 415: } ! 416: } ! 417: ! 418: /** ! 419: * Decrypt a single block (16 bytes) of data ! 420: */ ! 421: void AES_decrypt(const AES_CTX *ctx, uint32_t *data) ! 422: { ! 423: uint32_t tmp[4]; ! 424: uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6; ! 425: uint32_t a0, a1, a2, a3, row; ! 426: int curr_rnd; ! 427: int rounds = ctx->rounds; ! 428: uint32_t *k = (uint32_t*)ctx->ks + ((rounds+1)*4); ! 429: ! 430: /* pre-round key addition */ ! 431: for (row=4; row > 0;row--) ! 432: { ! 433: data[row-1] ^= *(--k); ! 434: } ! 435: ! 436: /* Decrypt one block */ ! 437: for (curr_rnd=0; curr_rnd < rounds; curr_rnd++) ! 438: { ! 439: /* Perform ByteSub and ShiftRow operations together */ ! 440: for (row = 4; row > 0; row--) ! 441: { ! 442: a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF]; ! 443: a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF]; ! 444: a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF]; ! 445: a3 = aes_isbox[(data[row%4])&0xFF]; ! 446: ! 447: /* Perform MixColumn iff not last round */ ! 448: if (curr_rnd<(rounds-1)) ! 449: { ! 450: /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E) ! 451: are quite large compared to encryption; this ! 452: operation slows decryption down noticeably. */ ! 453: xt0 = AES_xtime(a0^a1); ! 454: xt1 = AES_xtime(a1^a2); ! 455: xt2 = AES_xtime(a2^a3); ! 456: xt3 = AES_xtime(a3^a0); ! 457: xt4 = AES_xtime(xt0^xt1); ! 458: xt5 = AES_xtime(xt1^xt2); ! 459: xt6 = AES_xtime(xt4^xt5); ! 460: ! 461: xt0 ^= a1^a2^a3^xt4^xt6; ! 462: xt1 ^= a0^a2^a3^xt5^xt6; ! 463: xt2 ^= a0^a1^a3^xt4^xt6; ! 464: xt3 ^= a0^a1^a2^xt5^xt6; ! 465: tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3); ! 466: } ! 467: else ! 468: tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3); ! 469: } ! 470: ! 471: for (row = 4; row > 0; row--) ! 472: { ! 473: data[row-1] = tmp[row-1] ^ *(--k); ! 474: } ! 475: } ! 476: } ! 477: ! 478: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.