|
|
1.1 ! root 1: /* ! 2: * Optimizations for Tiny Code Generator for QEMU ! 3: * ! 4: * Copyright (c) 2010 Samsung Electronics. ! 5: * Contributed by Kirill Batuzov <[email protected]> ! 6: * ! 7: * Permission is hereby granted, free of charge, to any person obtaining a copy ! 8: * of this software and associated documentation files (the "Software"), to deal ! 9: * in the Software without restriction, including without limitation the rights ! 10: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ! 11: * copies of the Software, and to permit persons to whom the Software is ! 12: * furnished to do so, subject to the following conditions: ! 13: * ! 14: * The above copyright notice and this permission notice shall be included in ! 15: * all copies or substantial portions of the Software. ! 16: * ! 17: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ! 18: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ! 19: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ! 20: * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ! 21: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ! 22: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ! 23: * THE SOFTWARE. ! 24: */ ! 25: ! 26: #include "config.h" ! 27: ! 28: #include <stdlib.h> ! 29: #include <stdio.h> ! 30: ! 31: #include "qemu-common.h" ! 32: #include "tcg-op.h" ! 33: ! 34: #define CASE_OP_32_64(x) \ ! 35: glue(glue(case INDEX_op_, x), _i32): \ ! 36: glue(glue(case INDEX_op_, x), _i64) ! 37: ! 38: typedef enum { ! 39: TCG_TEMP_UNDEF = 0, ! 40: TCG_TEMP_CONST, ! 41: TCG_TEMP_COPY, ! 42: TCG_TEMP_HAS_COPY, ! 43: TCG_TEMP_ANY ! 44: } tcg_temp_state; ! 45: ! 46: struct tcg_temp_info { ! 47: tcg_temp_state state; ! 48: uint16_t prev_copy; ! 49: uint16_t next_copy; ! 50: tcg_target_ulong val; ! 51: }; ! 52: ! 53: static struct tcg_temp_info temps[TCG_MAX_TEMPS]; ! 54: ! 55: /* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some ! 56: class of equivalent temp's, a new representative should be chosen in this ! 57: class. */ ! 58: static void reset_temp(TCGArg temp, int nb_temps, int nb_globals) ! 59: { ! 60: int i; ! 61: TCGArg new_base = (TCGArg)-1; ! 62: if (temps[temp].state == TCG_TEMP_HAS_COPY) { ! 63: for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) { ! 64: if (i >= nb_globals) { ! 65: temps[i].state = TCG_TEMP_HAS_COPY; ! 66: new_base = i; ! 67: break; ! 68: } ! 69: } ! 70: for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) { ! 71: if (new_base == (TCGArg)-1) { ! 72: temps[i].state = TCG_TEMP_ANY; ! 73: } else { ! 74: temps[i].val = new_base; ! 75: } ! 76: } ! 77: temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; ! 78: temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; ! 79: } else if (temps[temp].state == TCG_TEMP_COPY) { ! 80: temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; ! 81: temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; ! 82: new_base = temps[temp].val; ! 83: } ! 84: temps[temp].state = TCG_TEMP_ANY; ! 85: if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) { ! 86: temps[new_base].state = TCG_TEMP_ANY; ! 87: } ! 88: } ! 89: ! 90: static int op_bits(TCGOpcode op) ! 91: { ! 92: const TCGOpDef *def = &tcg_op_defs[op]; ! 93: return def->flags & TCG_OPF_64BIT ? 64 : 32; ! 94: } ! 95: ! 96: static TCGOpcode op_to_movi(TCGOpcode op) ! 97: { ! 98: switch (op_bits(op)) { ! 99: case 32: ! 100: return INDEX_op_movi_i32; ! 101: case 64: ! 102: return INDEX_op_movi_i64; ! 103: default: ! 104: fprintf(stderr, "op_to_movi: unexpected return value of " ! 105: "function op_bits.\n"); ! 106: tcg_abort(); ! 107: } ! 108: } ! 109: ! 110: static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst, ! 111: TCGArg src, int nb_temps, int nb_globals) ! 112: { ! 113: reset_temp(dst, nb_temps, nb_globals); ! 114: assert(temps[src].state != TCG_TEMP_COPY); ! 115: /* Don't try to copy if one of temps is a global or either one ! 116: is local and another is register */ ! 117: if (src >= nb_globals && dst >= nb_globals && ! 118: tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) { ! 119: assert(temps[src].state != TCG_TEMP_CONST); ! 120: if (temps[src].state != TCG_TEMP_HAS_COPY) { ! 121: temps[src].state = TCG_TEMP_HAS_COPY; ! 122: temps[src].next_copy = src; ! 123: temps[src].prev_copy = src; ! 124: } ! 125: temps[dst].state = TCG_TEMP_COPY; ! 126: temps[dst].val = src; ! 127: temps[dst].next_copy = temps[src].next_copy; ! 128: temps[dst].prev_copy = src; ! 129: temps[temps[dst].next_copy].prev_copy = dst; ! 130: temps[src].next_copy = dst; ! 131: } ! 132: gen_args[0] = dst; ! 133: gen_args[1] = src; ! 134: } ! 135: ! 136: static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val, ! 137: int nb_temps, int nb_globals) ! 138: { ! 139: reset_temp(dst, nb_temps, nb_globals); ! 140: temps[dst].state = TCG_TEMP_CONST; ! 141: temps[dst].val = val; ! 142: gen_args[0] = dst; ! 143: gen_args[1] = val; ! 144: } ! 145: ! 146: static TCGOpcode op_to_mov(TCGOpcode op) ! 147: { ! 148: switch (op_bits(op)) { ! 149: case 32: ! 150: return INDEX_op_mov_i32; ! 151: case 64: ! 152: return INDEX_op_mov_i64; ! 153: default: ! 154: fprintf(stderr, "op_to_mov: unexpected return value of " ! 155: "function op_bits.\n"); ! 156: tcg_abort(); ! 157: } ! 158: } ! 159: ! 160: static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) ! 161: { ! 162: switch (op) { ! 163: CASE_OP_32_64(add): ! 164: return x + y; ! 165: ! 166: CASE_OP_32_64(sub): ! 167: return x - y; ! 168: ! 169: CASE_OP_32_64(mul): ! 170: return x * y; ! 171: ! 172: CASE_OP_32_64(and): ! 173: return x & y; ! 174: ! 175: CASE_OP_32_64(or): ! 176: return x | y; ! 177: ! 178: CASE_OP_32_64(xor): ! 179: return x ^ y; ! 180: ! 181: case INDEX_op_shl_i32: ! 182: return (uint32_t)x << (uint32_t)y; ! 183: ! 184: case INDEX_op_shl_i64: ! 185: return (uint64_t)x << (uint64_t)y; ! 186: ! 187: case INDEX_op_shr_i32: ! 188: return (uint32_t)x >> (uint32_t)y; ! 189: ! 190: case INDEX_op_shr_i64: ! 191: return (uint64_t)x >> (uint64_t)y; ! 192: ! 193: case INDEX_op_sar_i32: ! 194: return (int32_t)x >> (int32_t)y; ! 195: ! 196: case INDEX_op_sar_i64: ! 197: return (int64_t)x >> (int64_t)y; ! 198: ! 199: case INDEX_op_rotr_i32: ! 200: x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y); ! 201: return x; ! 202: ! 203: case INDEX_op_rotr_i64: ! 204: x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y); ! 205: return x; ! 206: ! 207: case INDEX_op_rotl_i32: ! 208: x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y)); ! 209: return x; ! 210: ! 211: case INDEX_op_rotl_i64: ! 212: x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y)); ! 213: return x; ! 214: ! 215: CASE_OP_32_64(not): ! 216: return ~x; ! 217: ! 218: CASE_OP_32_64(neg): ! 219: return -x; ! 220: ! 221: CASE_OP_32_64(andc): ! 222: return x & ~y; ! 223: ! 224: CASE_OP_32_64(orc): ! 225: return x | ~y; ! 226: ! 227: CASE_OP_32_64(eqv): ! 228: return ~(x ^ y); ! 229: ! 230: CASE_OP_32_64(nand): ! 231: return ~(x & y); ! 232: ! 233: CASE_OP_32_64(nor): ! 234: return ~(x | y); ! 235: ! 236: CASE_OP_32_64(ext8s): ! 237: return (int8_t)x; ! 238: ! 239: CASE_OP_32_64(ext16s): ! 240: return (int16_t)x; ! 241: ! 242: CASE_OP_32_64(ext8u): ! 243: return (uint8_t)x; ! 244: ! 245: CASE_OP_32_64(ext16u): ! 246: return (uint16_t)x; ! 247: ! 248: case INDEX_op_ext32s_i64: ! 249: return (int32_t)x; ! 250: ! 251: case INDEX_op_ext32u_i64: ! 252: return (uint32_t)x; ! 253: ! 254: default: ! 255: fprintf(stderr, ! 256: "Unrecognized operation %d in do_constant_folding.\n", op); ! 257: tcg_abort(); ! 258: } ! 259: } ! 260: ! 261: static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) ! 262: { ! 263: TCGArg res = do_constant_folding_2(op, x, y); ! 264: if (op_bits(op) == 32) { ! 265: res &= 0xffffffff; ! 266: } ! 267: return res; ! 268: } ! 269: ! 270: /* Propagate constants and copies, fold constant expressions. */ ! 271: static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, ! 272: TCGArg *args, TCGOpDef *tcg_op_defs) ! 273: { ! 274: int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args; ! 275: TCGOpcode op; ! 276: const TCGOpDef *def; ! 277: TCGArg *gen_args; ! 278: TCGArg tmp; ! 279: /* Array VALS has an element for each temp. ! 280: If this temp holds a constant then its value is kept in VALS' element. ! 281: If this temp is a copy of other ones then this equivalence class' ! 282: representative is kept in VALS' element. ! 283: If this temp is neither copy nor constant then corresponding VALS' ! 284: element is unused. */ ! 285: ! 286: nb_temps = s->nb_temps; ! 287: nb_globals = s->nb_globals; ! 288: memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); ! 289: ! 290: nb_ops = tcg_opc_ptr - gen_opc_buf; ! 291: gen_args = args; ! 292: for (op_index = 0; op_index < nb_ops; op_index++) { ! 293: op = gen_opc_buf[op_index]; ! 294: def = &tcg_op_defs[op]; ! 295: /* Do copy propagation */ ! 296: if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) { ! 297: assert(op != INDEX_op_call); ! 298: for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) { ! 299: if (temps[args[i]].state == TCG_TEMP_COPY) { ! 300: args[i] = temps[args[i]].val; ! 301: } ! 302: } ! 303: } ! 304: ! 305: /* For commutative operations make constant second argument */ ! 306: switch (op) { ! 307: CASE_OP_32_64(add): ! 308: CASE_OP_32_64(mul): ! 309: CASE_OP_32_64(and): ! 310: CASE_OP_32_64(or): ! 311: CASE_OP_32_64(xor): ! 312: CASE_OP_32_64(eqv): ! 313: CASE_OP_32_64(nand): ! 314: CASE_OP_32_64(nor): ! 315: if (temps[args[1]].state == TCG_TEMP_CONST) { ! 316: tmp = args[1]; ! 317: args[1] = args[2]; ! 318: args[2] = tmp; ! 319: } ! 320: break; ! 321: default: ! 322: break; ! 323: } ! 324: ! 325: /* Simplify expression if possible. */ ! 326: switch (op) { ! 327: CASE_OP_32_64(add): ! 328: CASE_OP_32_64(sub): ! 329: CASE_OP_32_64(shl): ! 330: CASE_OP_32_64(shr): ! 331: CASE_OP_32_64(sar): ! 332: CASE_OP_32_64(rotl): ! 333: CASE_OP_32_64(rotr): ! 334: if (temps[args[1]].state == TCG_TEMP_CONST) { ! 335: /* Proceed with possible constant folding. */ ! 336: break; ! 337: } ! 338: if (temps[args[2]].state == TCG_TEMP_CONST ! 339: && temps[args[2]].val == 0) { ! 340: if ((temps[args[0]].state == TCG_TEMP_COPY ! 341: && temps[args[0]].val == args[1]) ! 342: || args[0] == args[1]) { ! 343: args += 3; ! 344: gen_opc_buf[op_index] = INDEX_op_nop; ! 345: } else { ! 346: gen_opc_buf[op_index] = op_to_mov(op); ! 347: tcg_opt_gen_mov(s, gen_args, args[0], args[1], ! 348: nb_temps, nb_globals); ! 349: gen_args += 2; ! 350: args += 3; ! 351: } ! 352: continue; ! 353: } ! 354: break; ! 355: CASE_OP_32_64(mul): ! 356: if ((temps[args[2]].state == TCG_TEMP_CONST ! 357: && temps[args[2]].val == 0)) { ! 358: gen_opc_buf[op_index] = op_to_movi(op); ! 359: tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals); ! 360: args += 3; ! 361: gen_args += 2; ! 362: continue; ! 363: } ! 364: break; ! 365: CASE_OP_32_64(or): ! 366: CASE_OP_32_64(and): ! 367: if (args[1] == args[2]) { ! 368: if (args[1] == args[0]) { ! 369: args += 3; ! 370: gen_opc_buf[op_index] = INDEX_op_nop; ! 371: } else { ! 372: gen_opc_buf[op_index] = op_to_mov(op); ! 373: tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps, ! 374: nb_globals); ! 375: gen_args += 2; ! 376: args += 3; ! 377: } ! 378: continue; ! 379: } ! 380: break; ! 381: default: ! 382: break; ! 383: } ! 384: ! 385: /* Propagate constants through copy operations and do constant ! 386: folding. Constants will be substituted to arguments by register ! 387: allocator where needed and possible. Also detect copies. */ ! 388: switch (op) { ! 389: CASE_OP_32_64(mov): ! 390: if ((temps[args[1]].state == TCG_TEMP_COPY ! 391: && temps[args[1]].val == args[0]) ! 392: || args[0] == args[1]) { ! 393: args += 2; ! 394: gen_opc_buf[op_index] = INDEX_op_nop; ! 395: break; ! 396: } ! 397: if (temps[args[1]].state != TCG_TEMP_CONST) { ! 398: tcg_opt_gen_mov(s, gen_args, args[0], args[1], ! 399: nb_temps, nb_globals); ! 400: gen_args += 2; ! 401: args += 2; ! 402: break; ! 403: } ! 404: /* Source argument is constant. Rewrite the operation and ! 405: let movi case handle it. */ ! 406: op = op_to_movi(op); ! 407: gen_opc_buf[op_index] = op; ! 408: args[1] = temps[args[1]].val; ! 409: /* fallthrough */ ! 410: CASE_OP_32_64(movi): ! 411: tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals); ! 412: gen_args += 2; ! 413: args += 2; ! 414: break; ! 415: CASE_OP_32_64(not): ! 416: CASE_OP_32_64(neg): ! 417: CASE_OP_32_64(ext8s): ! 418: CASE_OP_32_64(ext8u): ! 419: CASE_OP_32_64(ext16s): ! 420: CASE_OP_32_64(ext16u): ! 421: case INDEX_op_ext32s_i64: ! 422: case INDEX_op_ext32u_i64: ! 423: if (temps[args[1]].state == TCG_TEMP_CONST) { ! 424: gen_opc_buf[op_index] = op_to_movi(op); ! 425: tmp = do_constant_folding(op, temps[args[1]].val, 0); ! 426: tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals); ! 427: gen_args += 2; ! 428: args += 2; ! 429: break; ! 430: } else { ! 431: reset_temp(args[0], nb_temps, nb_globals); ! 432: gen_args[0] = args[0]; ! 433: gen_args[1] = args[1]; ! 434: gen_args += 2; ! 435: args += 2; ! 436: break; ! 437: } ! 438: CASE_OP_32_64(add): ! 439: CASE_OP_32_64(sub): ! 440: CASE_OP_32_64(mul): ! 441: CASE_OP_32_64(or): ! 442: CASE_OP_32_64(and): ! 443: CASE_OP_32_64(xor): ! 444: CASE_OP_32_64(shl): ! 445: CASE_OP_32_64(shr): ! 446: CASE_OP_32_64(sar): ! 447: CASE_OP_32_64(rotl): ! 448: CASE_OP_32_64(rotr): ! 449: CASE_OP_32_64(andc): ! 450: CASE_OP_32_64(orc): ! 451: CASE_OP_32_64(eqv): ! 452: CASE_OP_32_64(nand): ! 453: CASE_OP_32_64(nor): ! 454: if (temps[args[1]].state == TCG_TEMP_CONST ! 455: && temps[args[2]].state == TCG_TEMP_CONST) { ! 456: gen_opc_buf[op_index] = op_to_movi(op); ! 457: tmp = do_constant_folding(op, temps[args[1]].val, ! 458: temps[args[2]].val); ! 459: tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals); ! 460: gen_args += 2; ! 461: args += 3; ! 462: break; ! 463: } else { ! 464: reset_temp(args[0], nb_temps, nb_globals); ! 465: gen_args[0] = args[0]; ! 466: gen_args[1] = args[1]; ! 467: gen_args[2] = args[2]; ! 468: gen_args += 3; ! 469: args += 3; ! 470: break; ! 471: } ! 472: case INDEX_op_call: ! 473: nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); ! 474: if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) { ! 475: for (i = 0; i < nb_globals; i++) { ! 476: reset_temp(i, nb_temps, nb_globals); ! 477: } ! 478: } ! 479: for (i = 0; i < (args[0] >> 16); i++) { ! 480: reset_temp(args[i + 1], nb_temps, nb_globals); ! 481: } ! 482: i = nb_call_args + 3; ! 483: while (i) { ! 484: *gen_args = *args; ! 485: args++; ! 486: gen_args++; ! 487: i--; ! 488: } ! 489: break; ! 490: case INDEX_op_set_label: ! 491: case INDEX_op_jmp: ! 492: case INDEX_op_br: ! 493: CASE_OP_32_64(brcond): ! 494: memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info)); ! 495: for (i = 0; i < def->nb_args; i++) { ! 496: *gen_args = *args; ! 497: args++; ! 498: gen_args++; ! 499: } ! 500: break; ! 501: default: ! 502: /* Default case: we do know nothing about operation so no ! 503: propagation is done. We only trash output args. */ ! 504: for (i = 0; i < def->nb_oargs; i++) { ! 505: reset_temp(args[i], nb_temps, nb_globals); ! 506: } ! 507: for (i = 0; i < def->nb_args; i++) { ! 508: gen_args[i] = args[i]; ! 509: } ! 510: args += def->nb_args; ! 511: gen_args += def->nb_args; ! 512: break; ! 513: } ! 514: } ! 515: ! 516: return gen_args; ! 517: } ! 518: ! 519: TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, ! 520: TCGArg *args, TCGOpDef *tcg_op_defs) ! 521: { ! 522: TCGArg *res; ! 523: res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs); ! 524: return res; ! 525: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.