|
|
1.1 ! root 1: /* Try to unroll loops, and split induction variables. ! 2: Copyright (C) 1992, 1993 Free Software Foundation, Inc. ! 3: Contributed by James E. Wilson, Cygnus Support/UC Berkeley. ! 4: ! 5: This file is part of GNU CC. ! 6: ! 7: GNU CC is free software; you can redistribute it and/or modify ! 8: it under the terms of the GNU General Public License as published by ! 9: the Free Software Foundation; either version 2, or (at your option) ! 10: any later version. ! 11: ! 12: GNU CC is distributed in the hope that it will be useful, ! 13: but WITHOUT ANY WARRANTY; without even the implied warranty of ! 14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! 15: GNU General Public License for more details. ! 16: ! 17: You should have received a copy of the GNU General Public License ! 18: along with GNU CC; see the file COPYING. If not, write to ! 19: the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ ! 20: ! 21: /* Try to unroll a loop, and split induction variables. ! 22: ! 23: Loops for which the number of iterations can be calculated exactly are ! 24: handled specially. If the number of iterations times the insn_count is ! 25: less than MAX_UNROLLED_INSNS, then the loop is unrolled completely. ! 26: Otherwise, we try to unroll the loop a number of times modulo the number ! 27: of iterations, so that only one exit test will be needed. It is unrolled ! 28: a number of times approximately equal to MAX_UNROLLED_INSNS divided by ! 29: the insn count. ! 30: ! 31: Otherwise, if the number of iterations can be calculated exactly at ! 32: run time, and the loop is always entered at the top, then we try to ! 33: precondition the loop. That is, at run time, calculate how many times ! 34: the loop will execute, and then execute the loop body a few times so ! 35: that the remaining iterations will be some multiple of 4 (or 2 if the ! 36: loop is large). Then fall through to a loop unrolled 4 (or 2) times, ! 37: with only one exit test needed at the end of the loop. ! 38: ! 39: Otherwise, if the number of iterations can not be calculated exactly, ! 40: not even at run time, then we still unroll the loop a number of times ! 41: approximately equal to MAX_UNROLLED_INSNS divided by the insn count, ! 42: but there must be an exit test after each copy of the loop body. ! 43: ! 44: For each induction variable, which is dead outside the loop (replaceable) ! 45: or for which we can easily calculate the final value, if we can easily ! 46: calculate its value at each place where it is set as a function of the ! 47: current loop unroll count and the variable's value at loop entry, then ! 48: the induction variable is split into `N' different variables, one for ! 49: each copy of the loop body. One variable is live across the backward ! 50: branch, and the others are all calculated as a function of this variable. ! 51: This helps eliminate data dependencies, and leads to further opportunities ! 52: for cse. */ ! 53: ! 54: /* Possible improvements follow: */ ! 55: ! 56: /* ??? Add an extra pass somewhere to determine whether unrolling will ! 57: give any benefit. E.g. after generating all unrolled insns, compute the ! 58: cost of all insns and compare against cost of insns in rolled loop. ! 59: ! 60: - On traditional architectures, unrolling a non-constant bound loop ! 61: is a win if there is a giv whose only use is in memory addresses, the ! 62: memory addresses can be split, and hence giv increments can be ! 63: eliminated. ! 64: - It is also a win if the loop is executed many times, and preconditioning ! 65: can be performed for the loop. ! 66: Add code to check for these and similar cases. */ ! 67: ! 68: /* ??? Improve control of which loops get unrolled. Could use profiling ! 69: info to only unroll the most commonly executed loops. Perhaps have ! 70: a user specifyable option to control the amount of code expansion, ! 71: or the percent of loops to consider for unrolling. Etc. */ ! 72: ! 73: /* ??? Look at the register copies inside the loop to see if they form a ! 74: simple permutation. If so, iterate the permutation until it gets back to ! 75: the start state. This is how many times we should unroll the loop, for ! 76: best results, because then all register copies can be eliminated. ! 77: For example, the lisp nreverse function should be unrolled 3 times ! 78: while (this) ! 79: { ! 80: next = this->cdr; ! 81: this->cdr = prev; ! 82: prev = this; ! 83: this = next; ! 84: } ! 85: ! 86: ??? The number of times to unroll the loop may also be based on data ! 87: references in the loop. For example, if we have a loop that references ! 88: x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */ ! 89: ! 90: /* ??? Add some simple linear equation solving capability so that we can ! 91: determine the number of loop iterations for more complex loops. ! 92: For example, consider this loop from gdb ! 93: #define SWAP_TARGET_AND_HOST(buffer,len) ! 94: { ! 95: char tmp; ! 96: char *p = (char *) buffer; ! 97: char *q = ((char *) buffer) + len - 1; ! 98: int iterations = (len + 1) >> 1; ! 99: int i; ! 100: for (p; p < q; p++, q--;) ! 101: { ! 102: tmp = *q; ! 103: *q = *p; ! 104: *p = tmp; ! 105: } ! 106: } ! 107: Note that: ! 108: start value = p = &buffer + current_iteration ! 109: end value = q = &buffer + len - 1 - current_iteration ! 110: Given the loop exit test of "p < q", then there must be "q - p" iterations, ! 111: set equal to zero and solve for number of iterations: ! 112: q - p = len - 1 - 2*current_iteration = 0 ! 113: current_iteration = (len - 1) / 2 ! 114: Hence, there are (len - 1) / 2 (rounded up to the nearest integer) ! 115: iterations of this loop. */ ! 116: ! 117: /* ??? Currently, no labels are marked as loop invariant when doing loop ! 118: unrolling. This is because an insn inside the loop, that loads the address ! 119: of a label inside the loop into a register, could be moved outside the loop ! 120: by the invariant code motion pass if labels were invariant. If the loop ! 121: is subsequently unrolled, the code will be wrong because each unrolled ! 122: body of the loop will use the same address, whereas each actually needs a ! 123: different address. A case where this happens is when a loop containing ! 124: a switch statement is unrolled. ! 125: ! 126: It would be better to let labels be considered invariant. When we ! 127: unroll loops here, check to see if any insns using a label local to the ! 128: loop were moved before the loop. If so, then correct the problem, by ! 129: moving the insn back into the loop, or perhaps replicate the insn before ! 130: the loop, one copy for each time the loop is unrolled. */ ! 131: ! 132: /* The prime factors looked for when trying to unroll a loop by some ! 133: number which is modulo the total number of iterations. Just checking ! 134: for these 4 prime factors will find at least one factor for 75% of ! 135: all numbers theoretically. Practically speaking, this will succeed ! 136: almost all of the time since loops are generally a multiple of 2 ! 137: and/or 5. */ ! 138: ! 139: #define NUM_FACTORS 4 ! 140: ! 141: struct _factor { int factor, count; } factors[NUM_FACTORS] ! 142: = { {2, 0}, {3, 0}, {5, 0}, {7, 0}}; ! 143: ! 144: /* Describes the different types of loop unrolling performed. */ ! 145: ! 146: enum unroll_types { UNROLL_COMPLETELY, UNROLL_MODULO, UNROLL_NAIVE }; ! 147: ! 148: #include "config.h" ! 149: #include "rtl.h" ! 150: #include "insn-config.h" ! 151: #include "integrate.h" ! 152: #include "regs.h" ! 153: #include "flags.h" ! 154: #include "expr.h" ! 155: #include <stdio.h> ! 156: #include "loop.h" ! 157: ! 158: /* This controls which loops are unrolled, and by how much we unroll ! 159: them. */ ! 160: ! 161: #ifndef MAX_UNROLLED_INSNS ! 162: #define MAX_UNROLLED_INSNS 100 ! 163: #endif ! 164: ! 165: /* Indexed by register number, if non-zero, then it contains a pointer ! 166: to a struct induction for a DEST_REG giv which has been combined with ! 167: one of more address givs. This is needed because whenever such a DEST_REG ! 168: giv is modified, we must modify the value of all split address givs ! 169: that were combined with this DEST_REG giv. */ ! 170: ! 171: static struct induction **addr_combined_regs; ! 172: ! 173: /* Indexed by register number, if this is a splittable induction variable, ! 174: then this will hold the current value of the register, which depends on the ! 175: iteration number. */ ! 176: ! 177: static rtx *splittable_regs; ! 178: ! 179: /* Indexed by register number, if this is a splittable induction variable, ! 180: then this will hold the number of instructions in the loop that modify ! 181: the induction variable. Used to ensure that only the last insn modifying ! 182: a split iv will update the original iv of the dest. */ ! 183: ! 184: static int *splittable_regs_updates; ! 185: ! 186: /* Values describing the current loop's iteration variable. These are set up ! 187: by loop_iterations, and used by precondition_loop_p. */ ! 188: ! 189: static rtx loop_iteration_var; ! 190: static rtx loop_initial_value; ! 191: static rtx loop_increment; ! 192: static rtx loop_final_value; ! 193: ! 194: /* Forward declarations. */ ! 195: ! 196: static void init_reg_map (); ! 197: static int precondition_loop_p (); ! 198: static void copy_loop_body (); ! 199: static void iteration_info (); ! 200: static rtx approx_final_value (); ! 201: static int find_splittable_regs (); ! 202: static int find_splittable_givs (); ! 203: static rtx fold_rtx_mult_add (); ! 204: ! 205: /* Try to unroll one loop and split induction variables in the loop. ! 206: ! 207: The loop is described by the arguments LOOP_END, INSN_COUNT, and ! 208: LOOP_START. END_INSERT_BEFORE indicates where insns should be added ! 209: which need to be executed when the loop falls through. STRENGTH_REDUCTION_P ! 210: indicates whether information generated in the strength reduction pass ! 211: is available. ! 212: ! 213: This function is intended to be called from within `strength_reduce' ! 214: in loop.c. */ ! 215: ! 216: void ! 217: unroll_loop (loop_end, insn_count, loop_start, end_insert_before, ! 218: strength_reduce_p) ! 219: rtx loop_end; ! 220: int insn_count; ! 221: rtx loop_start; ! 222: rtx end_insert_before; ! 223: int strength_reduce_p; ! 224: { ! 225: int i, j, temp; ! 226: int unroll_number = 1; ! 227: rtx copy_start, copy_end; ! 228: rtx insn, copy, sequence, pattern, tem; ! 229: int max_labelno, max_insnno; ! 230: rtx insert_before; ! 231: struct inline_remap *map; ! 232: char *local_label; ! 233: int maxregnum; ! 234: int new_maxregnum; ! 235: rtx exit_label = 0; ! 236: rtx start_label; ! 237: struct iv_class *bl; ! 238: struct induction *v; ! 239: int splitting_not_safe = 0; ! 240: enum unroll_types unroll_type; ! 241: int loop_preconditioned = 0; ! 242: rtx safety_label; ! 243: /* This points to the last real insn in the loop, which should be either ! 244: a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional ! 245: jumps). */ ! 246: rtx last_loop_insn; ! 247: ! 248: /* Don't bother unrolling huge loops. Since the minimum factor is ! 249: two, loops greater than one half of MAX_UNROLLED_INSNS will never ! 250: be unrolled. */ ! 251: if (insn_count > MAX_UNROLLED_INSNS / 2) ! 252: { ! 253: if (loop_dump_stream) ! 254: fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n"); ! 255: return; ! 256: } ! 257: ! 258: /* When emitting debugger info, we can't unroll loops with unequal numbers ! 259: of block_beg and block_end notes, because that would unbalance the block ! 260: structure of the function. This can happen as a result of the ! 261: "if (foo) bar; else break;" optimization in jump.c. */ ! 262: ! 263: if (write_symbols != NO_DEBUG) ! 264: { ! 265: int block_begins = 0; ! 266: int block_ends = 0; ! 267: ! 268: for (insn = loop_start; insn != loop_end; insn = NEXT_INSN (insn)) ! 269: { ! 270: if (GET_CODE (insn) == NOTE) ! 271: { ! 272: if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG) ! 273: block_begins++; ! 274: else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END) ! 275: block_ends++; ! 276: } ! 277: } ! 278: ! 279: if (block_begins != block_ends) ! 280: { ! 281: if (loop_dump_stream) ! 282: fprintf (loop_dump_stream, ! 283: "Unrolling failure: Unbalanced block notes.\n"); ! 284: return; ! 285: } ! 286: } ! 287: ! 288: /* Determine type of unroll to perform. Depends on the number of iterations ! 289: and the size of the loop. */ ! 290: ! 291: /* If there is no strength reduce info, then set loop_n_iterations to zero. ! 292: This can happen if strength_reduce can't find any bivs in the loop. ! 293: A value of zero indicates that the number of iterations could not be ! 294: calculated. */ ! 295: ! 296: if (! strength_reduce_p) ! 297: loop_n_iterations = 0; ! 298: ! 299: if (loop_dump_stream && loop_n_iterations > 0) ! 300: fprintf (loop_dump_stream, ! 301: "Loop unrolling: %d iterations.\n", loop_n_iterations); ! 302: ! 303: /* Find and save a pointer to the last nonnote insn in the loop. */ ! 304: ! 305: last_loop_insn = prev_nonnote_insn (loop_end); ! 306: ! 307: /* Calculate how many times to unroll the loop. Indicate whether or ! 308: not the loop is being completely unrolled. */ ! 309: ! 310: if (loop_n_iterations == 1) ! 311: { ! 312: /* If number of iterations is exactly 1, then eliminate the compare and ! 313: branch at the end of the loop since they will never be taken. ! 314: Then return, since no other action is needed here. */ ! 315: ! 316: /* If the last instruction is not a BARRIER or a JUMP_INSN, then ! 317: don't do anything. */ ! 318: ! 319: if (GET_CODE (last_loop_insn) == BARRIER) ! 320: { ! 321: /* Delete the jump insn. This will delete the barrier also. */ ! 322: delete_insn (PREV_INSN (last_loop_insn)); ! 323: } ! 324: else if (GET_CODE (last_loop_insn) == JUMP_INSN) ! 325: { ! 326: #ifdef HAVE_cc0 ! 327: /* The immediately preceding insn is a compare which must be ! 328: deleted. */ ! 329: delete_insn (last_loop_insn); ! 330: delete_insn (PREV_INSN (last_loop_insn)); ! 331: #else ! 332: /* The immediately preceding insn may not be the compare, so don't ! 333: delete it. */ ! 334: delete_insn (last_loop_insn); ! 335: #endif ! 336: } ! 337: return; ! 338: } ! 339: else if (loop_n_iterations > 0 ! 340: && loop_n_iterations * insn_count < MAX_UNROLLED_INSNS) ! 341: { ! 342: unroll_number = loop_n_iterations; ! 343: unroll_type = UNROLL_COMPLETELY; ! 344: } ! 345: else if (loop_n_iterations > 0) ! 346: { ! 347: /* Try to factor the number of iterations. Don't bother with the ! 348: general case, only using 2, 3, 5, and 7 will get 75% of all ! 349: numbers theoretically, and almost all in practice. */ ! 350: ! 351: for (i = 0; i < NUM_FACTORS; i++) ! 352: factors[i].count = 0; ! 353: ! 354: temp = loop_n_iterations; ! 355: for (i = NUM_FACTORS - 1; i >= 0; i--) ! 356: while (temp % factors[i].factor == 0) ! 357: { ! 358: factors[i].count++; ! 359: temp = temp / factors[i].factor; ! 360: } ! 361: ! 362: /* Start with the larger factors first so that we generally ! 363: get lots of unrolling. */ ! 364: ! 365: unroll_number = 1; ! 366: temp = insn_count; ! 367: for (i = 3; i >= 0; i--) ! 368: while (factors[i].count--) ! 369: { ! 370: if (temp * factors[i].factor < MAX_UNROLLED_INSNS) ! 371: { ! 372: unroll_number *= factors[i].factor; ! 373: temp *= factors[i].factor; ! 374: } ! 375: else ! 376: break; ! 377: } ! 378: ! 379: /* If we couldn't find any factors, then unroll as in the normal ! 380: case. */ ! 381: if (unroll_number == 1) ! 382: { ! 383: if (loop_dump_stream) ! 384: fprintf (loop_dump_stream, ! 385: "Loop unrolling: No factors found.\n"); ! 386: } ! 387: else ! 388: unroll_type = UNROLL_MODULO; ! 389: } ! 390: ! 391: ! 392: /* Default case, calculate number of times to unroll loop based on its ! 393: size. */ ! 394: if (unroll_number == 1) ! 395: { ! 396: if (8 * insn_count < MAX_UNROLLED_INSNS) ! 397: unroll_number = 8; ! 398: else if (4 * insn_count < MAX_UNROLLED_INSNS) ! 399: unroll_number = 4; ! 400: else ! 401: unroll_number = 2; ! 402: ! 403: unroll_type = UNROLL_NAIVE; ! 404: } ! 405: ! 406: /* Now we know how many times to unroll the loop. */ ! 407: ! 408: if (loop_dump_stream) ! 409: fprintf (loop_dump_stream, ! 410: "Unrolling loop %d times.\n", unroll_number); ! 411: ! 412: ! 413: if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO) ! 414: { ! 415: /* Loops of these types should never start with a jump down to ! 416: the exit condition test. For now, check for this case just to ! 417: be sure. UNROLL_NAIVE loops can be of this form, this case is ! 418: handled below. */ ! 419: insn = loop_start; ! 420: while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN) ! 421: insn = NEXT_INSN (insn); ! 422: if (GET_CODE (insn) == JUMP_INSN) ! 423: abort (); ! 424: } ! 425: ! 426: if (unroll_type == UNROLL_COMPLETELY) ! 427: { ! 428: /* Completely unrolling the loop: Delete the compare and branch at ! 429: the end (the last two instructions). This delete must done at the ! 430: very end of loop unrolling, to avoid problems with calls to ! 431: back_branch_in_range_p, which is called by find_splittable_regs. ! 432: All increments of splittable bivs/givs are changed to load constant ! 433: instructions. */ ! 434: ! 435: copy_start = loop_start; ! 436: ! 437: /* Set insert_before to the instruction immediately after the JUMP_INSN ! 438: (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of ! 439: the loop will be correctly handled by copy_loop_body. */ ! 440: insert_before = NEXT_INSN (last_loop_insn); ! 441: ! 442: /* Set copy_end to the insn before the jump at the end of the loop. */ ! 443: if (GET_CODE (last_loop_insn) == BARRIER) ! 444: copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); ! 445: else if (GET_CODE (last_loop_insn) == JUMP_INSN) ! 446: { ! 447: #ifdef HAVE_cc0 ! 448: /* The instruction immediately before the JUMP_INSN is a compare ! 449: instruction which we do not want to copy. */ ! 450: copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); ! 451: #else ! 452: /* The instruction immediately before the JUMP_INSN may not be the ! 453: compare, so we must copy it. */ ! 454: copy_end = PREV_INSN (last_loop_insn); ! 455: #endif ! 456: } ! 457: else ! 458: { ! 459: /* We currently can't unroll a loop if it doesn't end with a ! 460: JUMP_INSN. There would need to be a mechanism that recognizes ! 461: this case, and then inserts a jump after each loop body, which ! 462: jumps to after the last loop body. */ ! 463: if (loop_dump_stream) ! 464: fprintf (loop_dump_stream, ! 465: "Unrolling failure: loop does not end with a JUMP_INSN.\n"); ! 466: return; ! 467: } ! 468: } ! 469: else if (unroll_type == UNROLL_MODULO) ! 470: { ! 471: /* Partially unrolling the loop: The compare and branch at the end ! 472: (the last two instructions) must remain. Don't copy the compare ! 473: and branch instructions at the end of the loop. Insert the unrolled ! 474: code immediately before the compare/branch at the end so that the ! 475: code will fall through to them as before. */ ! 476: ! 477: copy_start = loop_start; ! 478: ! 479: /* Set insert_before to the jump insn at the end of the loop. ! 480: Set copy_end to before the jump insn at the end of the loop. */ ! 481: if (GET_CODE (last_loop_insn) == BARRIER) ! 482: { ! 483: insert_before = PREV_INSN (last_loop_insn); ! 484: copy_end = PREV_INSN (insert_before); ! 485: } ! 486: else if (GET_CODE (last_loop_insn) == JUMP_INSN) ! 487: { ! 488: #ifdef HAVE_cc0 ! 489: /* The instruction immediately before the JUMP_INSN is a compare ! 490: instruction which we do not want to copy or delete. */ ! 491: insert_before = PREV_INSN (last_loop_insn); ! 492: copy_end = PREV_INSN (insert_before); ! 493: #else ! 494: /* The instruction immediately before the JUMP_INSN may not be the ! 495: compare, so we must copy it. */ ! 496: insert_before = last_loop_insn; ! 497: copy_end = PREV_INSN (last_loop_insn); ! 498: #endif ! 499: } ! 500: else ! 501: { ! 502: /* We currently can't unroll a loop if it doesn't end with a ! 503: JUMP_INSN. There would need to be a mechanism that recognizes ! 504: this case, and then inserts a jump after each loop body, which ! 505: jumps to after the last loop body. */ ! 506: if (loop_dump_stream) ! 507: fprintf (loop_dump_stream, ! 508: "Unrolling failure: loop does not end with a JUMP_INSN.\n"); ! 509: return; ! 510: } ! 511: } ! 512: else ! 513: { ! 514: /* Normal case: Must copy the compare and branch instructions at the ! 515: end of the loop. */ ! 516: ! 517: if (GET_CODE (last_loop_insn) == BARRIER) ! 518: { ! 519: /* Loop ends with an unconditional jump and a barrier. ! 520: Handle this like above, don't copy jump and barrier. ! 521: This is not strictly necessary, but doing so prevents generating ! 522: unconditional jumps to an immediately following label. ! 523: ! 524: This will be corrected below if the target of this jump is ! 525: not the start_label. */ ! 526: ! 527: insert_before = PREV_INSN (last_loop_insn); ! 528: copy_end = PREV_INSN (insert_before); ! 529: } ! 530: else if (GET_CODE (last_loop_insn) == JUMP_INSN) ! 531: { ! 532: /* Set insert_before to immediately after the JUMP_INSN, so that ! 533: NOTEs at the end of the loop will be correctly handled by ! 534: copy_loop_body. */ ! 535: insert_before = NEXT_INSN (last_loop_insn); ! 536: copy_end = last_loop_insn; ! 537: } ! 538: else ! 539: { ! 540: /* We currently can't unroll a loop if it doesn't end with a ! 541: JUMP_INSN. There would need to be a mechanism that recognizes ! 542: this case, and then inserts a jump after each loop body, which ! 543: jumps to after the last loop body. */ ! 544: if (loop_dump_stream) ! 545: fprintf (loop_dump_stream, ! 546: "Unrolling failure: loop does not end with a JUMP_INSN.\n"); ! 547: return; ! 548: } ! 549: ! 550: /* If copying exit test branches because they can not be eliminated, ! 551: then must convert the fall through case of the branch to a jump past ! 552: the end of the loop. Create a label to emit after the loop and save ! 553: it for later use. Do not use the label after the loop, if any, since ! 554: it might be used by insns outside the loop, or there might be insns ! 555: added before it later by final_[bg]iv_value which must be after ! 556: the real exit label. */ ! 557: exit_label = gen_label_rtx (); ! 558: ! 559: insn = loop_start; ! 560: while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN) ! 561: insn = NEXT_INSN (insn); ! 562: ! 563: if (GET_CODE (insn) == JUMP_INSN) ! 564: { ! 565: /* The loop starts with a jump down to the exit condition test. ! 566: Start copying the loop after the barrier following this ! 567: jump insn. */ ! 568: copy_start = NEXT_INSN (insn); ! 569: ! 570: /* Splitting induction variables doesn't work when the loop is ! 571: entered via a jump to the bottom, because then we end up doing ! 572: a comparison against a new register for a split variable, but ! 573: we did not execute the set insn for the new register because ! 574: it was skipped over. */ ! 575: splitting_not_safe = 1; ! 576: if (loop_dump_stream) ! 577: fprintf (loop_dump_stream, ! 578: "Splitting not safe, because loop not entered at top.\n"); ! 579: } ! 580: else ! 581: copy_start = loop_start; ! 582: } ! 583: ! 584: /* This should always be the first label in the loop. */ ! 585: start_label = NEXT_INSN (copy_start); ! 586: /* There may be a line number note and/or a loop continue note here. */ ! 587: while (GET_CODE (start_label) == NOTE) ! 588: start_label = NEXT_INSN (start_label); ! 589: if (GET_CODE (start_label) != CODE_LABEL) ! 590: { ! 591: /* This can happen as a result of jump threading. If the first insns in ! 592: the loop test the same condition as the loop's backward jump, or the ! 593: opposite condition, then the backward jump will be modified to point ! 594: to elsewhere, and the loop's start label is deleted. ! 595: ! 596: This case currently can not be handled by the loop unrolling code. */ ! 597: ! 598: if (loop_dump_stream) ! 599: fprintf (loop_dump_stream, ! 600: "Unrolling failure: unknown insns between BEG note and loop label.\n"); ! 601: return; ! 602: } ! 603: if (LABEL_NAME (start_label)) ! 604: { ! 605: /* The jump optimization pass must have combined the original start label ! 606: with a named label for a goto. We can't unroll this case because ! 607: jumps which go to the named label must be handled differently than ! 608: jumps to the loop start, and it is impossible to differentiate them ! 609: in this case. */ ! 610: if (loop_dump_stream) ! 611: fprintf (loop_dump_stream, ! 612: "Unrolling failure: loop start label is gone\n"); ! 613: return; ! 614: } ! 615: ! 616: if (unroll_type == UNROLL_NAIVE ! 617: && GET_CODE (last_loop_insn) == BARRIER ! 618: && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn))) ! 619: { ! 620: /* In this case, we must copy the jump and barrier, because they will ! 621: not be converted to jumps to an immediately following label. */ ! 622: ! 623: insert_before = NEXT_INSN (last_loop_insn); ! 624: copy_end = last_loop_insn; ! 625: } ! 626: ! 627: /* Allocate a translation table for the labels and insn numbers. ! 628: They will be filled in as we copy the insns in the loop. */ ! 629: ! 630: max_labelno = max_label_num (); ! 631: max_insnno = get_max_uid (); ! 632: ! 633: map = (struct inline_remap *) alloca (sizeof (struct inline_remap)); ! 634: ! 635: map->integrating = 0; ! 636: ! 637: /* Allocate the label map. */ ! 638: ! 639: if (max_labelno > 0) ! 640: { ! 641: map->label_map = (rtx *) alloca (max_labelno * sizeof (rtx)); ! 642: ! 643: local_label = (char *) alloca (max_labelno); ! 644: bzero (local_label, max_labelno); ! 645: } ! 646: else ! 647: map->label_map = 0; ! 648: ! 649: /* Search the loop and mark all local labels, i.e. the ones which have to ! 650: be distinct labels when copied. For all labels which might be ! 651: non-local, set their label_map entries to point to themselves. ! 652: If they happen to be local their label_map entries will be overwritten ! 653: before the loop body is copied. The label_map entries for local labels ! 654: will be set to a different value each time the loop body is copied. */ ! 655: ! 656: for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn)) ! 657: { ! 658: if (GET_CODE (insn) == CODE_LABEL) ! 659: local_label[CODE_LABEL_NUMBER (insn)] = 1; ! 660: else if (GET_CODE (insn) == JUMP_INSN) ! 661: { ! 662: if (JUMP_LABEL (insn)) ! 663: map->label_map[CODE_LABEL_NUMBER (JUMP_LABEL (insn))] ! 664: = JUMP_LABEL (insn); ! 665: else if (GET_CODE (PATTERN (insn)) == ADDR_VEC ! 666: || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) ! 667: { ! 668: rtx pat = PATTERN (insn); ! 669: int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC; ! 670: int len = XVECLEN (pat, diff_vec_p); ! 671: rtx label; ! 672: ! 673: for (i = 0; i < len; i++) ! 674: { ! 675: label = XEXP (XVECEXP (pat, diff_vec_p, i), 0); ! 676: map->label_map[CODE_LABEL_NUMBER (label)] = label; ! 677: } ! 678: } ! 679: } ! 680: } ! 681: ! 682: /* Allocate space for the insn map. */ ! 683: ! 684: map->insn_map = (rtx *) alloca (max_insnno * sizeof (rtx)); ! 685: ! 686: /* Set this to zero, to indicate that we are doing loop unrolling, ! 687: not function inlining. */ ! 688: map->inline_target = 0; ! 689: ! 690: /* The register and constant maps depend on the number of registers ! 691: present, so the final maps can't be created until after ! 692: find_splittable_regs is called. However, they are needed for ! 693: preconditioning, so we create temporary maps when preconditioning ! 694: is performed. */ ! 695: ! 696: /* The preconditioning code may allocate two new pseudo registers. */ ! 697: maxregnum = max_reg_num (); ! 698: ! 699: /* Allocate and zero out the splittable_regs and addr_combined_regs ! 700: arrays. These must be zeroed here because they will be used if ! 701: loop preconditioning is performed, and must be zero for that case. ! 702: ! 703: It is safe to do this here, since the extra registers created by the ! 704: preconditioning code and find_splittable_regs will never be used ! 705: to access the splittable_regs[] and addr_combined_regs[] arrays. */ ! 706: ! 707: splittable_regs = (rtx *) alloca (maxregnum * sizeof (rtx)); ! 708: bzero (splittable_regs, maxregnum * sizeof (rtx)); ! 709: splittable_regs_updates = (int *) alloca (maxregnum * sizeof (int)); ! 710: bzero (splittable_regs_updates, maxregnum * sizeof (int)); ! 711: addr_combined_regs ! 712: = (struct induction **) alloca (maxregnum * sizeof (struct induction *)); ! 713: bzero (addr_combined_regs, maxregnum * sizeof (struct induction *)); ! 714: ! 715: /* If this loop requires exit tests when unrolled, check to see if we ! 716: can precondition the loop so as to make the exit tests unnecessary. ! 717: Just like variable splitting, this is not safe if the loop is entered ! 718: via a jump to the bottom. Also, can not do this if no strength ! 719: reduce info, because precondition_loop_p uses this info. */ ! 720: ! 721: /* Must copy the loop body for preconditioning before the following ! 722: find_splittable_regs call since that will emit insns which need to ! 723: be after the preconditioned loop copies, but immediately before the ! 724: unrolled loop copies. */ ! 725: ! 726: /* Also, it is not safe to split induction variables for the preconditioned ! 727: copies of the loop body. If we split induction variables, then the code ! 728: assumes that each induction variable can be represented as a function ! 729: of its initial value and the loop iteration number. This is not true ! 730: in this case, because the last preconditioned copy of the loop body ! 731: could be any iteration from the first up to the `unroll_number-1'th, ! 732: depending on the initial value of the iteration variable. Therefore ! 733: we can not split induction variables here, because we can not calculate ! 734: their value. Hence, this code must occur before find_splittable_regs ! 735: is called. */ ! 736: ! 737: if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p) ! 738: { ! 739: rtx initial_value, final_value, increment; ! 740: ! 741: if (precondition_loop_p (&initial_value, &final_value, &increment, ! 742: loop_start, loop_end)) ! 743: { ! 744: register rtx diff, temp; ! 745: enum machine_mode mode; ! 746: rtx *labels; ! 747: int abs_inc, neg_inc; ! 748: ! 749: map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx)); ! 750: ! 751: map->const_equiv_map = (rtx *) alloca (maxregnum * sizeof (rtx)); ! 752: map->const_age_map = (unsigned *) alloca (maxregnum ! 753: * sizeof (unsigned)); ! 754: map->const_equiv_map_size = maxregnum; ! 755: global_const_equiv_map = map->const_equiv_map; ! 756: global_const_equiv_map_size = maxregnum; ! 757: ! 758: init_reg_map (map, maxregnum); ! 759: ! 760: /* Limit loop unrolling to 4, since this will make 7 copies of ! 761: the loop body. */ ! 762: if (unroll_number > 4) ! 763: unroll_number = 4; ! 764: ! 765: /* Save the absolute value of the increment, and also whether or ! 766: not it is negative. */ ! 767: neg_inc = 0; ! 768: abs_inc = INTVAL (increment); ! 769: if (abs_inc < 0) ! 770: { ! 771: abs_inc = - abs_inc; ! 772: neg_inc = 1; ! 773: } ! 774: ! 775: start_sequence (); ! 776: ! 777: /* Decide what mode to do these calculations in. Choose the larger ! 778: of final_value's mode and initial_value's mode, or a full-word if ! 779: both are constants. */ ! 780: mode = GET_MODE (final_value); ! 781: if (mode == VOIDmode) ! 782: { ! 783: mode = GET_MODE (initial_value); ! 784: if (mode == VOIDmode) ! 785: mode = word_mode; ! 786: } ! 787: else if (mode != GET_MODE (initial_value) ! 788: && (GET_MODE_SIZE (mode) ! 789: < GET_MODE_SIZE (GET_MODE (initial_value)))) ! 790: mode = GET_MODE (initial_value); ! 791: ! 792: /* Calculate the difference between the final and initial values. ! 793: Final value may be a (plus (reg x) (const_int 1)) rtx. ! 794: Let the following cse pass simplify this if initial value is ! 795: a constant. ! 796: ! 797: We must copy the final and initial values here to avoid ! 798: improperly shared rtl. */ ! 799: ! 800: diff = expand_binop (mode, sub_optab, copy_rtx (final_value), ! 801: copy_rtx (initial_value), NULL_RTX, 0, ! 802: OPTAB_LIB_WIDEN); ! 803: ! 804: /* Now calculate (diff % (unroll * abs (increment))) by using an ! 805: and instruction. */ ! 806: diff = expand_binop (GET_MODE (diff), and_optab, diff, ! 807: GEN_INT (unroll_number * abs_inc - 1), ! 808: NULL_RTX, 0, OPTAB_LIB_WIDEN); ! 809: ! 810: /* Now emit a sequence of branches to jump to the proper precond ! 811: loop entry point. */ ! 812: ! 813: labels = (rtx *) alloca (sizeof (rtx) * unroll_number); ! 814: for (i = 0; i < unroll_number; i++) ! 815: labels[i] = gen_label_rtx (); ! 816: ! 817: /* Assuming the unroll_number is 4, and the increment is 2, then ! 818: for a negative increment: for a positive increment: ! 819: diff = 0,1 precond 0 diff = 0,7 precond 0 ! 820: diff = 2,3 precond 3 diff = 1,2 precond 1 ! 821: diff = 4,5 precond 2 diff = 3,4 precond 2 ! 822: diff = 6,7 precond 1 diff = 5,6 precond 3 */ ! 823: ! 824: /* We only need to emit (unroll_number - 1) branches here, the ! 825: last case just falls through to the following code. */ ! 826: ! 827: /* ??? This would give better code if we emitted a tree of branches ! 828: instead of the current linear list of branches. */ ! 829: ! 830: for (i = 0; i < unroll_number - 1; i++) ! 831: { ! 832: int cmp_const; ! 833: ! 834: /* For negative increments, must invert the constant compared ! 835: against, except when comparing against zero. */ ! 836: if (i == 0) ! 837: cmp_const = 0; ! 838: else if (neg_inc) ! 839: cmp_const = unroll_number - i; ! 840: else ! 841: cmp_const = i; ! 842: ! 843: emit_cmp_insn (diff, GEN_INT (abs_inc * cmp_const), ! 844: EQ, NULL_RTX, mode, 0, 0); ! 845: ! 846: if (i == 0) ! 847: emit_jump_insn (gen_beq (labels[i])); ! 848: else if (neg_inc) ! 849: emit_jump_insn (gen_bge (labels[i])); ! 850: else ! 851: emit_jump_insn (gen_ble (labels[i])); ! 852: JUMP_LABEL (get_last_insn ()) = labels[i]; ! 853: LABEL_NUSES (labels[i])++; ! 854: } ! 855: ! 856: /* If the increment is greater than one, then we need another branch, ! 857: to handle other cases equivalent to 0. */ ! 858: ! 859: /* ??? This should be merged into the code above somehow to help ! 860: simplify the code here, and reduce the number of branches emitted. ! 861: For the negative increment case, the branch here could easily ! 862: be merged with the `0' case branch above. For the positive ! 863: increment case, it is not clear how this can be simplified. */ ! 864: ! 865: if (abs_inc != 1) ! 866: { ! 867: int cmp_const; ! 868: ! 869: if (neg_inc) ! 870: cmp_const = abs_inc - 1; ! 871: else ! 872: cmp_const = abs_inc * (unroll_number - 1) + 1; ! 873: ! 874: emit_cmp_insn (diff, GEN_INT (cmp_const), EQ, NULL_RTX, ! 875: mode, 0, 0); ! 876: ! 877: if (neg_inc) ! 878: emit_jump_insn (gen_ble (labels[0])); ! 879: else ! 880: emit_jump_insn (gen_bge (labels[0])); ! 881: JUMP_LABEL (get_last_insn ()) = labels[0]; ! 882: LABEL_NUSES (labels[0])++; ! 883: } ! 884: ! 885: sequence = gen_sequence (); ! 886: end_sequence (); ! 887: emit_insn_before (sequence, loop_start); ! 888: ! 889: /* Only the last copy of the loop body here needs the exit ! 890: test, so set copy_end to exclude the compare/branch here, ! 891: and then reset it inside the loop when get to the last ! 892: copy. */ ! 893: ! 894: if (GET_CODE (last_loop_insn) == BARRIER) ! 895: copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); ! 896: else if (GET_CODE (last_loop_insn) == JUMP_INSN) ! 897: { ! 898: #ifdef HAVE_cc0 ! 899: /* The immediately preceding insn is a compare which we do not ! 900: want to copy. */ ! 901: copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); ! 902: #else ! 903: /* The immediately preceding insn may not be a compare, so we ! 904: must copy it. */ ! 905: copy_end = PREV_INSN (last_loop_insn); ! 906: #endif ! 907: } ! 908: else ! 909: abort (); ! 910: ! 911: for (i = 1; i < unroll_number; i++) ! 912: { ! 913: emit_label_after (labels[unroll_number - i], ! 914: PREV_INSN (loop_start)); ! 915: ! 916: bzero (map->insn_map, max_insnno * sizeof (rtx)); ! 917: bzero (map->const_equiv_map, maxregnum * sizeof (rtx)); ! 918: bzero (map->const_age_map, maxregnum * sizeof (unsigned)); ! 919: map->const_age = 0; ! 920: ! 921: for (j = 0; j < max_labelno; j++) ! 922: if (local_label[j]) ! 923: map->label_map[j] = gen_label_rtx (); ! 924: ! 925: /* The last copy needs the compare/branch insns at the end, ! 926: so reset copy_end here if the loop ends with a conditional ! 927: branch. */ ! 928: ! 929: if (i == unroll_number - 1) ! 930: { ! 931: if (GET_CODE (last_loop_insn) == BARRIER) ! 932: copy_end = PREV_INSN (PREV_INSN (last_loop_insn)); ! 933: else ! 934: copy_end = last_loop_insn; ! 935: } ! 936: ! 937: /* None of the copies are the `last_iteration', so just ! 938: pass zero for that parameter. */ ! 939: copy_loop_body (copy_start, copy_end, map, exit_label, 0, ! 940: unroll_type, start_label, loop_end, ! 941: loop_start, copy_end); ! 942: } ! 943: emit_label_after (labels[0], PREV_INSN (loop_start)); ! 944: ! 945: if (GET_CODE (last_loop_insn) == BARRIER) ! 946: { ! 947: insert_before = PREV_INSN (last_loop_insn); ! 948: copy_end = PREV_INSN (insert_before); ! 949: } ! 950: else ! 951: { ! 952: #ifdef HAVE_cc0 ! 953: /* The immediately preceding insn is a compare which we do not ! 954: want to copy. */ ! 955: insert_before = PREV_INSN (last_loop_insn); ! 956: copy_end = PREV_INSN (insert_before); ! 957: #else ! 958: /* The immediately preceding insn may not be a compare, so we ! 959: must copy it. */ ! 960: insert_before = last_loop_insn; ! 961: copy_end = PREV_INSN (last_loop_insn); ! 962: #endif ! 963: } ! 964: ! 965: /* Set unroll type to MODULO now. */ ! 966: unroll_type = UNROLL_MODULO; ! 967: loop_preconditioned = 1; ! 968: } ! 969: } ! 970: ! 971: /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll ! 972: the loop unless all loops are being unrolled. */ ! 973: if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops) ! 974: { ! 975: if (loop_dump_stream) ! 976: fprintf (loop_dump_stream, "Unrolling failure: Naive unrolling not being done.\n"); ! 977: return; ! 978: } ! 979: ! 980: /* At this point, we are guaranteed to unroll the loop. */ ! 981: ! 982: /* For each biv and giv, determine whether it can be safely split into ! 983: a different variable for each unrolled copy of the loop body. ! 984: We precalculate and save this info here, since computing it is ! 985: expensive. ! 986: ! 987: Do this before deleting any instructions from the loop, so that ! 988: back_branch_in_range_p will work correctly. */ ! 989: ! 990: if (splitting_not_safe) ! 991: temp = 0; ! 992: else ! 993: temp = find_splittable_regs (unroll_type, loop_start, loop_end, ! 994: end_insert_before, unroll_number); ! 995: ! 996: /* find_splittable_regs may have created some new registers, so must ! 997: reallocate the reg_map with the new larger size, and must realloc ! 998: the constant maps also. */ ! 999: ! 1000: maxregnum = max_reg_num (); ! 1001: map->reg_map = (rtx *) alloca (maxregnum * sizeof (rtx)); ! 1002: ! 1003: init_reg_map (map, maxregnum); ! 1004: ! 1005: /* Space is needed in some of the map for new registers, so new_maxregnum ! 1006: is an (over)estimate of how many registers will exist at the end. */ ! 1007: new_maxregnum = maxregnum + (temp * unroll_number * 2); ! 1008: ! 1009: /* Must realloc space for the constant maps, because the number of registers ! 1010: may have changed. */ ! 1011: ! 1012: map->const_equiv_map = (rtx *) alloca (new_maxregnum * sizeof (rtx)); ! 1013: map->const_age_map = (unsigned *) alloca (new_maxregnum * sizeof (unsigned)); ! 1014: ! 1015: map->const_equiv_map_size = new_maxregnum; ! 1016: global_const_equiv_map = map->const_equiv_map; ! 1017: global_const_equiv_map_size = new_maxregnum; ! 1018: ! 1019: /* Search the list of bivs and givs to find ones which need to be remapped ! 1020: when split, and set their reg_map entry appropriately. */ ! 1021: ! 1022: for (bl = loop_iv_list; bl; bl = bl->next) ! 1023: { ! 1024: if (REGNO (bl->biv->src_reg) != bl->regno) ! 1025: map->reg_map[bl->regno] = bl->biv->src_reg; ! 1026: #if 0 ! 1027: /* Currently, non-reduced/final-value givs are never split. */ ! 1028: for (v = bl->giv; v; v = v->next_iv) ! 1029: if (REGNO (v->src_reg) != bl->regno) ! 1030: map->reg_map[REGNO (v->dest_reg)] = v->src_reg; ! 1031: #endif ! 1032: } ! 1033: ! 1034: /* If the loop is being partially unrolled, and the iteration variables ! 1035: are being split, and are being renamed for the split, then must fix up ! 1036: the compare instruction at the end of the loop to refer to the new ! 1037: registers. This compare isn't copied, so the registers used in it ! 1038: will never be replaced if it isn't done here. */ ! 1039: ! 1040: if (unroll_type == UNROLL_MODULO) ! 1041: { ! 1042: insn = NEXT_INSN (copy_end); ! 1043: if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SET) ! 1044: { ! 1045: #if 0 ! 1046: /* If non-reduced/final-value givs were split, then this would also ! 1047: have to remap those givs. */ ! 1048: #endif ! 1049: ! 1050: tem = SET_SRC (PATTERN (insn)); ! 1051: /* The set source is a register. */ ! 1052: if (GET_CODE (tem) == REG) ! 1053: { ! 1054: if (REGNO (tem) < max_reg_before_loop ! 1055: && reg_iv_type[REGNO (tem)] == BASIC_INDUCT) ! 1056: SET_SRC (PATTERN (insn)) ! 1057: = reg_biv_class[REGNO (tem)]->biv->src_reg; ! 1058: } ! 1059: else ! 1060: { ! 1061: /* The set source is a compare of some sort. */ ! 1062: tem = XEXP (SET_SRC (PATTERN (insn)), 0); ! 1063: if (GET_CODE (tem) == REG ! 1064: && REGNO (tem) < max_reg_before_loop ! 1065: && reg_iv_type[REGNO (tem)] == BASIC_INDUCT) ! 1066: XEXP (SET_SRC (PATTERN (insn)), 0) ! 1067: = reg_biv_class[REGNO (tem)]->biv->src_reg; ! 1068: ! 1069: tem = XEXP (SET_SRC (PATTERN (insn)), 1); ! 1070: if (GET_CODE (tem) == REG ! 1071: && REGNO (tem) < max_reg_before_loop ! 1072: && reg_iv_type[REGNO (tem)] == BASIC_INDUCT) ! 1073: XEXP (SET_SRC (PATTERN (insn)), 1) ! 1074: = reg_biv_class[REGNO (tem)]->biv->src_reg; ! 1075: } ! 1076: } ! 1077: } ! 1078: ! 1079: /* For unroll_number - 1 times, make a copy of each instruction ! 1080: between copy_start and copy_end, and insert these new instructions ! 1081: before the end of the loop. */ ! 1082: ! 1083: for (i = 0; i < unroll_number; i++) ! 1084: { ! 1085: bzero (map->insn_map, max_insnno * sizeof (rtx)); ! 1086: bzero (map->const_equiv_map, new_maxregnum * sizeof (rtx)); ! 1087: bzero (map->const_age_map, new_maxregnum * sizeof (unsigned)); ! 1088: map->const_age = 0; ! 1089: ! 1090: for (j = 0; j < max_labelno; j++) ! 1091: if (local_label[j]) ! 1092: map->label_map[j] = gen_label_rtx (); ! 1093: ! 1094: /* If loop starts with a branch to the test, then fix it so that ! 1095: it points to the test of the first unrolled copy of the loop. */ ! 1096: if (i == 0 && loop_start != copy_start) ! 1097: { ! 1098: insn = PREV_INSN (copy_start); ! 1099: pattern = PATTERN (insn); ! 1100: ! 1101: tem = map->label_map[CODE_LABEL_NUMBER ! 1102: (XEXP (SET_SRC (pattern), 0))]; ! 1103: SET_SRC (pattern) = gen_rtx (LABEL_REF, VOIDmode, tem); ! 1104: ! 1105: /* Set the jump label so that it can be used by later loop unrolling ! 1106: passes. */ ! 1107: JUMP_LABEL (insn) = tem; ! 1108: LABEL_NUSES (tem)++; ! 1109: } ! 1110: ! 1111: copy_loop_body (copy_start, copy_end, map, exit_label, ! 1112: i == unroll_number - 1, unroll_type, start_label, ! 1113: loop_end, insert_before, insert_before); ! 1114: } ! 1115: ! 1116: /* Before deleting any insns, emit a CODE_LABEL immediately after the last ! 1117: insn to be deleted. This prevents any runaway delete_insn call from ! 1118: more insns that it should, as it always stops at a CODE_LABEL. */ ! 1119: ! 1120: /* Delete the compare and branch at the end of the loop if completely ! 1121: unrolling the loop. Deleting the backward branch at the end also ! 1122: deletes the code label at the start of the loop. This is done at ! 1123: the very end to avoid problems with back_branch_in_range_p. */ ! 1124: ! 1125: if (unroll_type == UNROLL_COMPLETELY) ! 1126: safety_label = emit_label_after (gen_label_rtx (), last_loop_insn); ! 1127: else ! 1128: safety_label = emit_label_after (gen_label_rtx (), copy_end); ! 1129: ! 1130: /* Delete all of the original loop instructions. Don't delete the ! 1131: LOOP_BEG note, or the first code label in the loop. */ ! 1132: ! 1133: insn = NEXT_INSN (copy_start); ! 1134: while (insn != safety_label) ! 1135: { ! 1136: if (insn != start_label) ! 1137: insn = delete_insn (insn); ! 1138: else ! 1139: insn = NEXT_INSN (insn); ! 1140: } ! 1141: ! 1142: /* Can now delete the 'safety' label emitted to protect us from runaway ! 1143: delete_insn calls. */ ! 1144: if (INSN_DELETED_P (safety_label)) ! 1145: abort (); ! 1146: delete_insn (safety_label); ! 1147: ! 1148: /* If exit_label exists, emit it after the loop. Doing the emit here ! 1149: forces it to have a higher INSN_UID than any insn in the unrolled loop. ! 1150: This is needed so that mostly_true_jump in reorg.c will treat jumps ! 1151: to this loop end label correctly, i.e. predict that they are usually ! 1152: not taken. */ ! 1153: if (exit_label) ! 1154: emit_label_after (exit_label, loop_end); ! 1155: } ! 1156: ! 1157: /* Return true if the loop can be safely, and profitably, preconditioned ! 1158: so that the unrolled copies of the loop body don't need exit tests. ! 1159: ! 1160: This only works if final_value, initial_value and increment can be ! 1161: determined, and if increment is a constant power of 2. ! 1162: If increment is not a power of 2, then the preconditioning modulo ! 1163: operation would require a real modulo instead of a boolean AND, and this ! 1164: is not considered `profitable'. */ ! 1165: ! 1166: /* ??? If the loop is known to be executed very many times, or the machine ! 1167: has a very cheap divide instruction, then preconditioning is a win even ! 1168: when the increment is not a power of 2. Use RTX_COST to compute ! 1169: whether divide is cheap. */ ! 1170: ! 1171: static int ! 1172: precondition_loop_p (initial_value, final_value, increment, loop_start, ! 1173: loop_end) ! 1174: rtx *initial_value, *final_value, *increment; ! 1175: rtx loop_start, loop_end; ! 1176: { ! 1177: int unsigned_compare, compare_dir; ! 1178: ! 1179: if (loop_n_iterations > 0) ! 1180: { ! 1181: *initial_value = const0_rtx; ! 1182: *increment = const1_rtx; ! 1183: *final_value = GEN_INT (loop_n_iterations); ! 1184: ! 1185: if (loop_dump_stream) ! 1186: fprintf (loop_dump_stream, ! 1187: "Preconditioning: Success, number of iterations known, %d.\n", ! 1188: loop_n_iterations); ! 1189: return 1; ! 1190: } ! 1191: ! 1192: if (loop_initial_value == 0) ! 1193: { ! 1194: if (loop_dump_stream) ! 1195: fprintf (loop_dump_stream, ! 1196: "Preconditioning: Could not find initial value.\n"); ! 1197: return 0; ! 1198: } ! 1199: else if (loop_increment == 0) ! 1200: { ! 1201: if (loop_dump_stream) ! 1202: fprintf (loop_dump_stream, ! 1203: "Preconditioning: Could not find increment value.\n"); ! 1204: return 0; ! 1205: } ! 1206: else if (GET_CODE (loop_increment) != CONST_INT) ! 1207: { ! 1208: if (loop_dump_stream) ! 1209: fprintf (loop_dump_stream, ! 1210: "Preconditioning: Increment not a constant.\n"); ! 1211: return 0; ! 1212: } ! 1213: else if ((exact_log2 (INTVAL (loop_increment)) < 0) ! 1214: && (exact_log2 (- INTVAL (loop_increment)) < 0)) ! 1215: { ! 1216: if (loop_dump_stream) ! 1217: fprintf (loop_dump_stream, ! 1218: "Preconditioning: Increment not a constant power of 2.\n"); ! 1219: return 0; ! 1220: } ! 1221: ! 1222: /* Unsigned_compare and compare_dir can be ignored here, since they do ! 1223: not matter for preconditioning. */ ! 1224: ! 1225: if (loop_final_value == 0) ! 1226: { ! 1227: if (loop_dump_stream) ! 1228: fprintf (loop_dump_stream, ! 1229: "Preconditioning: EQ comparison loop.\n"); ! 1230: return 0; ! 1231: } ! 1232: ! 1233: /* Must ensure that final_value is invariant, so call invariant_p to ! 1234: check. Before doing so, must check regno against max_reg_before_loop ! 1235: to make sure that the register is in the range covered by invariant_p. ! 1236: If it isn't, then it is most likely a biv/giv which by definition are ! 1237: not invariant. */ ! 1238: if ((GET_CODE (loop_final_value) == REG ! 1239: && REGNO (loop_final_value) >= max_reg_before_loop) ! 1240: || (GET_CODE (loop_final_value) == PLUS ! 1241: && REGNO (XEXP (loop_final_value, 0)) >= max_reg_before_loop) ! 1242: || ! invariant_p (loop_final_value)) ! 1243: { ! 1244: if (loop_dump_stream) ! 1245: fprintf (loop_dump_stream, ! 1246: "Preconditioning: Final value not invariant.\n"); ! 1247: return 0; ! 1248: } ! 1249: ! 1250: /* Fail for floating point values, since the caller of this function ! 1251: does not have code to deal with them. */ ! 1252: if (GET_MODE_CLASS (GET_MODE (loop_final_value)) == MODE_FLOAT ! 1253: || GET_MODE_CLASS (GET_MODE (loop_initial_value)) == MODE_FLOAT) ! 1254: { ! 1255: if (loop_dump_stream) ! 1256: fprintf (loop_dump_stream, ! 1257: "Preconditioning: Floating point final or initial value.\n"); ! 1258: return 0; ! 1259: } ! 1260: ! 1261: /* Now set initial_value to be the iteration_var, since that may be a ! 1262: simpler expression, and is guaranteed to be correct if all of the ! 1263: above tests succeed. ! 1264: ! 1265: We can not use the initial_value as calculated, because it will be ! 1266: one too small for loops of the form "while (i-- > 0)". We can not ! 1267: emit code before the loop_skip_over insns to fix this problem as this ! 1268: will then give a number one too large for loops of the form ! 1269: "while (--i > 0)". ! 1270: ! 1271: Note that all loops that reach here are entered at the top, because ! 1272: this function is not called if the loop starts with a jump. */ ! 1273: ! 1274: /* Fail if loop_iteration_var is not live before loop_start, since we need ! 1275: to test its value in the preconditioning code. */ ! 1276: ! 1277: if (uid_luid[regno_first_uid[REGNO (loop_iteration_var)]] ! 1278: > INSN_LUID (loop_start)) ! 1279: { ! 1280: if (loop_dump_stream) ! 1281: fprintf (loop_dump_stream, ! 1282: "Preconditioning: Iteration var not live before loop start.\n"); ! 1283: return 0; ! 1284: } ! 1285: ! 1286: *initial_value = loop_iteration_var; ! 1287: *increment = loop_increment; ! 1288: *final_value = loop_final_value; ! 1289: ! 1290: /* Success! */ ! 1291: if (loop_dump_stream) ! 1292: fprintf (loop_dump_stream, "Preconditioning: Successful.\n"); ! 1293: return 1; ! 1294: } ! 1295: ! 1296: ! 1297: /* All pseudo-registers must be mapped to themselves. Two hard registers ! 1298: must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_ ! 1299: REGNUM, to avoid function-inlining specific conversions of these ! 1300: registers. All other hard regs can not be mapped because they may be ! 1301: used with different ! 1302: modes. */ ! 1303: ! 1304: static void ! 1305: init_reg_map (map, maxregnum) ! 1306: struct inline_remap *map; ! 1307: int maxregnum; ! 1308: { ! 1309: int i; ! 1310: ! 1311: for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--) ! 1312: map->reg_map[i] = regno_reg_rtx[i]; ! 1313: /* Just clear the rest of the entries. */ ! 1314: for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--) ! 1315: map->reg_map[i] = 0; ! 1316: ! 1317: map->reg_map[VIRTUAL_STACK_VARS_REGNUM] ! 1318: = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM]; ! 1319: map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM] ! 1320: = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM]; ! 1321: } ! 1322: ! 1323: /* Strength-reduction will often emit code for optimized biv/givs which ! 1324: calculates their value in a temporary register, and then copies the result ! 1325: to the iv. This procedure reconstructs the pattern computing the iv; ! 1326: verifying that all operands are of the proper form. ! 1327: ! 1328: The return value is the amount that the giv is incremented by. */ ! 1329: ! 1330: static rtx ! 1331: calculate_giv_inc (pattern, src_insn, regno) ! 1332: rtx pattern, src_insn; ! 1333: int regno; ! 1334: { ! 1335: rtx increment; ! 1336: rtx increment_total = 0; ! 1337: int tries = 0; ! 1338: ! 1339: retry: ! 1340: /* Verify that we have an increment insn here. First check for a plus ! 1341: as the set source. */ ! 1342: if (GET_CODE (SET_SRC (pattern)) != PLUS) ! 1343: { ! 1344: /* SR sometimes computes the new giv value in a temp, then copies it ! 1345: to the new_reg. */ ! 1346: src_insn = PREV_INSN (src_insn); ! 1347: pattern = PATTERN (src_insn); ! 1348: if (GET_CODE (SET_SRC (pattern)) != PLUS) ! 1349: abort (); ! 1350: ! 1351: /* The last insn emitted is not needed, so delete it to avoid confusing ! 1352: the second cse pass. This insn sets the giv unnecessarily. */ ! 1353: delete_insn (get_last_insn ()); ! 1354: } ! 1355: ! 1356: /* Verify that we have a constant as the second operand of the plus. */ ! 1357: increment = XEXP (SET_SRC (pattern), 1); ! 1358: if (GET_CODE (increment) != CONST_INT) ! 1359: { ! 1360: /* SR sometimes puts the constant in a register, especially if it is ! 1361: too big to be an add immed operand. */ ! 1362: src_insn = PREV_INSN (src_insn); ! 1363: increment = SET_SRC (PATTERN (src_insn)); ! 1364: ! 1365: /* SR may have used LO_SUM to compute the constant if it is too large ! 1366: for a load immed operand. In this case, the constant is in operand ! 1367: one of the LO_SUM rtx. */ ! 1368: if (GET_CODE (increment) == LO_SUM) ! 1369: increment = XEXP (increment, 1); ! 1370: ! 1371: if (GET_CODE (increment) != CONST_INT) ! 1372: abort (); ! 1373: ! 1374: /* The insn loading the constant into a register is not longer needed, ! 1375: so delete it. */ ! 1376: delete_insn (get_last_insn ()); ! 1377: } ! 1378: ! 1379: if (increment_total) ! 1380: increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment)); ! 1381: else ! 1382: increment_total = increment; ! 1383: ! 1384: /* Check that the source register is the same as the register we expected ! 1385: to see as the source. If not, something is seriously wrong. */ ! 1386: if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG ! 1387: || REGNO (XEXP (SET_SRC (pattern), 0)) != regno) ! 1388: { ! 1389: /* Some machines (e.g. the romp), may emit two add instructions for ! 1390: certain constants, so lets try looking for another add immediately ! 1391: before this one if we have only seen one add insn so far. */ ! 1392: ! 1393: if (tries == 0) ! 1394: { ! 1395: tries++; ! 1396: ! 1397: src_insn = PREV_INSN (src_insn); ! 1398: pattern = PATTERN (src_insn); ! 1399: ! 1400: delete_insn (get_last_insn ()); ! 1401: ! 1402: goto retry; ! 1403: } ! 1404: ! 1405: abort (); ! 1406: } ! 1407: ! 1408: return increment_total; ! 1409: } ! 1410: ! 1411: /* Copy REG_NOTES, except for insn references, because not all insn_map ! 1412: entries are valid yet. We do need to copy registers now though, because ! 1413: the reg_map entries can change during copying. */ ! 1414: ! 1415: static rtx ! 1416: initial_reg_note_copy (notes, map) ! 1417: rtx notes; ! 1418: struct inline_remap *map; ! 1419: { ! 1420: rtx copy; ! 1421: ! 1422: if (notes == 0) ! 1423: return 0; ! 1424: ! 1425: copy = rtx_alloc (GET_CODE (notes)); ! 1426: PUT_MODE (copy, GET_MODE (notes)); ! 1427: ! 1428: if (GET_CODE (notes) == EXPR_LIST) ! 1429: XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map); ! 1430: else if (GET_CODE (notes) == INSN_LIST) ! 1431: /* Don't substitute for these yet. */ ! 1432: XEXP (copy, 0) = XEXP (notes, 0); ! 1433: else ! 1434: abort (); ! 1435: ! 1436: XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map); ! 1437: ! 1438: return copy; ! 1439: } ! 1440: ! 1441: /* Fixup insn references in copied REG_NOTES. */ ! 1442: ! 1443: static void ! 1444: final_reg_note_copy (notes, map) ! 1445: rtx notes; ! 1446: struct inline_remap *map; ! 1447: { ! 1448: rtx note; ! 1449: ! 1450: for (note = notes; note; note = XEXP (note, 1)) ! 1451: if (GET_CODE (note) == INSN_LIST) ! 1452: XEXP (note, 0) = map->insn_map[INSN_UID (XEXP (note, 0))]; ! 1453: } ! 1454: ! 1455: /* Copy each instruction in the loop, substituting from map as appropriate. ! 1456: This is very similar to a loop in expand_inline_function. */ ! 1457: ! 1458: static void ! 1459: copy_loop_body (copy_start, copy_end, map, exit_label, last_iteration, ! 1460: unroll_type, start_label, loop_end, insert_before, ! 1461: copy_notes_from) ! 1462: rtx copy_start, copy_end; ! 1463: struct inline_remap *map; ! 1464: rtx exit_label; ! 1465: int last_iteration; ! 1466: enum unroll_types unroll_type; ! 1467: rtx start_label, loop_end, insert_before, copy_notes_from; ! 1468: { ! 1469: rtx insn, pattern; ! 1470: rtx tem, copy; ! 1471: int dest_reg_was_split, i; ! 1472: rtx cc0_insn = 0; ! 1473: rtx final_label = 0; ! 1474: rtx giv_inc, giv_dest_reg, giv_src_reg; ! 1475: ! 1476: /* If this isn't the last iteration, then map any references to the ! 1477: start_label to final_label. Final label will then be emitted immediately ! 1478: after the end of this loop body if it was ever used. ! 1479: ! 1480: If this is the last iteration, then map references to the start_label ! 1481: to itself. */ ! 1482: if (! last_iteration) ! 1483: { ! 1484: final_label = gen_label_rtx (); ! 1485: map->label_map[CODE_LABEL_NUMBER (start_label)] = final_label; ! 1486: } ! 1487: else ! 1488: map->label_map[CODE_LABEL_NUMBER (start_label)] = start_label; ! 1489: ! 1490: start_sequence (); ! 1491: ! 1492: insn = copy_start; ! 1493: do ! 1494: { ! 1495: insn = NEXT_INSN (insn); ! 1496: ! 1497: map->orig_asm_operands_vector = 0; ! 1498: ! 1499: switch (GET_CODE (insn)) ! 1500: { ! 1501: case INSN: ! 1502: pattern = PATTERN (insn); ! 1503: copy = 0; ! 1504: giv_inc = 0; ! 1505: ! 1506: /* Check to see if this is a giv that has been combined with ! 1507: some split address givs. (Combined in the sense that ! 1508: `combine_givs' in loop.c has put two givs in the same register.) ! 1509: In this case, we must search all givs based on the same biv to ! 1510: find the address givs. Then split the address givs. ! 1511: Do this before splitting the giv, since that may map the ! 1512: SET_DEST to a new register. */ ! 1513: ! 1514: if (GET_CODE (pattern) == SET ! 1515: && GET_CODE (SET_DEST (pattern)) == REG ! 1516: && addr_combined_regs[REGNO (SET_DEST (pattern))]) ! 1517: { ! 1518: struct iv_class *bl; ! 1519: struct induction *v, *tv; ! 1520: int regno = REGNO (SET_DEST (pattern)); ! 1521: ! 1522: v = addr_combined_regs[REGNO (SET_DEST (pattern))]; ! 1523: bl = reg_biv_class[REGNO (v->src_reg)]; ! 1524: ! 1525: /* Although the giv_inc amount is not needed here, we must call ! 1526: calculate_giv_inc here since it might try to delete the ! 1527: last insn emitted. If we wait until later to call it, ! 1528: we might accidentally delete insns generated immediately ! 1529: below by emit_unrolled_add. */ ! 1530: ! 1531: giv_inc = calculate_giv_inc (pattern, insn, regno); ! 1532: ! 1533: /* Now find all address giv's that were combined with this ! 1534: giv 'v'. */ ! 1535: for (tv = bl->giv; tv; tv = tv->next_iv) ! 1536: if (tv->giv_type == DEST_ADDR && tv->same == v) ! 1537: { ! 1538: int this_giv_inc = INTVAL (giv_inc); ! 1539: ! 1540: /* Scale this_giv_inc if the multiplicative factors of ! 1541: the two givs are different. */ ! 1542: if (tv->mult_val != v->mult_val) ! 1543: this_giv_inc = (this_giv_inc / INTVAL (v->mult_val) ! 1544: * INTVAL (tv->mult_val)); ! 1545: ! 1546: tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc); ! 1547: *tv->location = tv->dest_reg; ! 1548: ! 1549: if (last_iteration && unroll_type != UNROLL_COMPLETELY) ! 1550: { ! 1551: /* Must emit an insn to increment the split address ! 1552: giv. Add in the const_adjust field in case there ! 1553: was a constant eliminated from the address. */ ! 1554: rtx value, dest_reg; ! 1555: ! 1556: /* tv->dest_reg will be either a bare register, ! 1557: or else a register plus a constant. */ ! 1558: if (GET_CODE (tv->dest_reg) == REG) ! 1559: dest_reg = tv->dest_reg; ! 1560: else ! 1561: dest_reg = XEXP (tv->dest_reg, 0); ! 1562: ! 1563: /* tv->dest_reg may actually be a (PLUS (REG) (CONST)) ! 1564: here, so we must call plus_constant to add ! 1565: the const_adjust amount before calling ! 1566: emit_unrolled_add below. */ ! 1567: value = plus_constant (tv->dest_reg, tv->const_adjust); ! 1568: ! 1569: /* The constant could be too large for an add ! 1570: immediate, so can't directly emit an insn here. */ ! 1571: emit_unrolled_add (dest_reg, XEXP (value, 0), ! 1572: XEXP (value, 1)); ! 1573: ! 1574: /* Reset the giv to be just the register again, in case ! 1575: it is used after the set we have just emitted. ! 1576: We must subtract the const_adjust factor added in ! 1577: above. */ ! 1578: tv->dest_reg = plus_constant (dest_reg, ! 1579: - tv->const_adjust); ! 1580: *tv->location = tv->dest_reg; ! 1581: } ! 1582: } ! 1583: } ! 1584: ! 1585: /* If this is a setting of a splittable variable, then determine ! 1586: how to split the variable, create a new set based on this split, ! 1587: and set up the reg_map so that later uses of the variable will ! 1588: use the new split variable. */ ! 1589: ! 1590: dest_reg_was_split = 0; ! 1591: ! 1592: if (GET_CODE (pattern) == SET ! 1593: && GET_CODE (SET_DEST (pattern)) == REG ! 1594: && splittable_regs[REGNO (SET_DEST (pattern))]) ! 1595: { ! 1596: int regno = REGNO (SET_DEST (pattern)); ! 1597: ! 1598: dest_reg_was_split = 1; ! 1599: ! 1600: /* Compute the increment value for the giv, if it wasn't ! 1601: already computed above. */ ! 1602: ! 1603: if (giv_inc == 0) ! 1604: giv_inc = calculate_giv_inc (pattern, insn, regno); ! 1605: giv_dest_reg = SET_DEST (pattern); ! 1606: giv_src_reg = SET_DEST (pattern); ! 1607: ! 1608: if (unroll_type == UNROLL_COMPLETELY) ! 1609: { ! 1610: /* Completely unrolling the loop. Set the induction ! 1611: variable to a known constant value. */ ! 1612: ! 1613: /* The value in splittable_regs may be an invariant ! 1614: value, so we must use plus_constant here. */ ! 1615: splittable_regs[regno] ! 1616: = plus_constant (splittable_regs[regno], INTVAL (giv_inc)); ! 1617: ! 1618: if (GET_CODE (splittable_regs[regno]) == PLUS) ! 1619: { ! 1620: giv_src_reg = XEXP (splittable_regs[regno], 0); ! 1621: giv_inc = XEXP (splittable_regs[regno], 1); ! 1622: } ! 1623: else ! 1624: { ! 1625: /* The splittable_regs value must be a REG or a ! 1626: CONST_INT, so put the entire value in the giv_src_reg ! 1627: variable. */ ! 1628: giv_src_reg = splittable_regs[regno]; ! 1629: giv_inc = const0_rtx; ! 1630: } ! 1631: } ! 1632: else ! 1633: { ! 1634: /* Partially unrolling loop. Create a new pseudo ! 1635: register for the iteration variable, and set it to ! 1636: be a constant plus the original register. Except ! 1637: on the last iteration, when the result has to ! 1638: go back into the original iteration var register. */ ! 1639: ! 1640: /* Handle bivs which must be mapped to a new register ! 1641: when split. This happens for bivs which need their ! 1642: final value set before loop entry. The new register ! 1643: for the biv was stored in the biv's first struct ! 1644: induction entry by find_splittable_regs. */ ! 1645: ! 1646: if (regno < max_reg_before_loop ! 1647: && reg_iv_type[regno] == BASIC_INDUCT) ! 1648: { ! 1649: giv_src_reg = reg_biv_class[regno]->biv->src_reg; ! 1650: giv_dest_reg = giv_src_reg; ! 1651: } ! 1652: ! 1653: #if 0 ! 1654: /* If non-reduced/final-value givs were split, then ! 1655: this would have to remap those givs also. See ! 1656: find_splittable_regs. */ ! 1657: #endif ! 1658: ! 1659: splittable_regs[regno] ! 1660: = GEN_INT (INTVAL (giv_inc) ! 1661: + INTVAL (splittable_regs[regno])); ! 1662: giv_inc = splittable_regs[regno]; ! 1663: ! 1664: /* Now split the induction variable by changing the dest ! 1665: of this insn to a new register, and setting its ! 1666: reg_map entry to point to this new register. ! 1667: ! 1668: If this is the last iteration, and this is the last insn ! 1669: that will update the iv, then reuse the original dest, ! 1670: to ensure that the iv will have the proper value when ! 1671: the loop exits or repeats. ! 1672: ! 1673: Using splittable_regs_updates here like this is safe, ! 1674: because it can only be greater than one if all ! 1675: instructions modifying the iv are always executed in ! 1676: order. */ ! 1677: ! 1678: if (! last_iteration ! 1679: || (splittable_regs_updates[regno]-- != 1)) ! 1680: { ! 1681: tem = gen_reg_rtx (GET_MODE (giv_src_reg)); ! 1682: giv_dest_reg = tem; ! 1683: map->reg_map[regno] = tem; ! 1684: } ! 1685: else ! 1686: map->reg_map[regno] = giv_src_reg; ! 1687: } ! 1688: ! 1689: /* The constant being added could be too large for an add ! 1690: immediate, so can't directly emit an insn here. */ ! 1691: emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc); ! 1692: copy = get_last_insn (); ! 1693: pattern = PATTERN (copy); ! 1694: } ! 1695: else ! 1696: { ! 1697: pattern = copy_rtx_and_substitute (pattern, map); ! 1698: copy = emit_insn (pattern); ! 1699: } ! 1700: REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); ! 1701: ! 1702: #ifdef HAVE_cc0 ! 1703: /* If this insn is setting CC0, it may need to look at ! 1704: the insn that uses CC0 to see what type of insn it is. ! 1705: In that case, the call to recog via validate_change will ! 1706: fail. So don't substitute constants here. Instead, ! 1707: do it when we emit the following insn. ! 1708: ! 1709: For example, see the pyr.md file. That machine has signed and ! 1710: unsigned compares. The compare patterns must check the ! 1711: following branch insn to see which what kind of compare to ! 1712: emit. ! 1713: ! 1714: If the previous insn set CC0, substitute constants on it as ! 1715: well. */ ! 1716: if (sets_cc0_p (copy) != 0) ! 1717: cc0_insn = copy; ! 1718: else ! 1719: { ! 1720: if (cc0_insn) ! 1721: try_constants (cc0_insn, map); ! 1722: cc0_insn = 0; ! 1723: try_constants (copy, map); ! 1724: } ! 1725: #else ! 1726: try_constants (copy, map); ! 1727: #endif ! 1728: ! 1729: /* Make split induction variable constants `permanent' since we ! 1730: know there are no backward branches across iteration variable ! 1731: settings which would invalidate this. */ ! 1732: if (dest_reg_was_split) ! 1733: { ! 1734: int regno = REGNO (SET_DEST (pattern)); ! 1735: ! 1736: if (regno < map->const_equiv_map_size ! 1737: && map->const_age_map[regno] == map->const_age) ! 1738: map->const_age_map[regno] = -1; ! 1739: } ! 1740: break; ! 1741: ! 1742: case JUMP_INSN: ! 1743: pattern = copy_rtx_and_substitute (PATTERN (insn), map); ! 1744: copy = emit_jump_insn (pattern); ! 1745: REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); ! 1746: ! 1747: if (JUMP_LABEL (insn) == start_label && insn == copy_end ! 1748: && ! last_iteration) ! 1749: { ! 1750: /* This is a branch to the beginning of the loop; this is the ! 1751: last insn being copied; and this is not the last iteration. ! 1752: In this case, we want to change the original fall through ! 1753: case to be a branch past the end of the loop, and the ! 1754: original jump label case to fall_through. */ ! 1755: ! 1756: if (! invert_exp (pattern, copy) ! 1757: || ! redirect_exp (&pattern, ! 1758: map->label_map[CODE_LABEL_NUMBER ! 1759: (JUMP_LABEL (insn))], ! 1760: exit_label, copy)) ! 1761: abort (); ! 1762: } ! 1763: ! 1764: #ifdef HAVE_cc0 ! 1765: if (cc0_insn) ! 1766: try_constants (cc0_insn, map); ! 1767: cc0_insn = 0; ! 1768: #endif ! 1769: try_constants (copy, map); ! 1770: ! 1771: /* Set the jump label of COPY correctly to avoid problems with ! 1772: later passes of unroll_loop, if INSN had jump label set. */ ! 1773: if (JUMP_LABEL (insn)) ! 1774: { ! 1775: rtx label = 0; ! 1776: ! 1777: /* Can't use the label_map for every insn, since this may be ! 1778: the backward branch, and hence the label was not mapped. */ ! 1779: if (GET_CODE (pattern) == SET) ! 1780: { ! 1781: tem = SET_SRC (pattern); ! 1782: if (GET_CODE (tem) == LABEL_REF) ! 1783: label = XEXP (tem, 0); ! 1784: else if (GET_CODE (tem) == IF_THEN_ELSE) ! 1785: { ! 1786: if (XEXP (tem, 1) != pc_rtx) ! 1787: label = XEXP (XEXP (tem, 1), 0); ! 1788: else ! 1789: label = XEXP (XEXP (tem, 2), 0); ! 1790: } ! 1791: } ! 1792: ! 1793: if (label && GET_CODE (label) == CODE_LABEL) ! 1794: JUMP_LABEL (copy) = label; ! 1795: else ! 1796: { ! 1797: /* An unrecognizable jump insn, probably the entry jump ! 1798: for a switch statement. This label must have been mapped, ! 1799: so just use the label_map to get the new jump label. */ ! 1800: JUMP_LABEL (copy) = map->label_map[CODE_LABEL_NUMBER ! 1801: (JUMP_LABEL (insn))]; ! 1802: } ! 1803: ! 1804: /* If this is a non-local jump, then must increase the label ! 1805: use count so that the label will not be deleted when the ! 1806: original jump is deleted. */ ! 1807: LABEL_NUSES (JUMP_LABEL (copy))++; ! 1808: } ! 1809: else if (GET_CODE (PATTERN (copy)) == ADDR_VEC ! 1810: || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC) ! 1811: { ! 1812: rtx pat = PATTERN (copy); ! 1813: int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC; ! 1814: int len = XVECLEN (pat, diff_vec_p); ! 1815: int i; ! 1816: ! 1817: for (i = 0; i < len; i++) ! 1818: LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++; ! 1819: } ! 1820: ! 1821: /* If this used to be a conditional jump insn but whose branch ! 1822: direction is now known, we must do something special. */ ! 1823: if (condjump_p (insn) && !simplejump_p (insn) && map->last_pc_value) ! 1824: { ! 1825: #ifdef HAVE_cc0 ! 1826: /* The previous insn set cc0 for us. So delete it. */ ! 1827: delete_insn (PREV_INSN (copy)); ! 1828: #endif ! 1829: ! 1830: /* If this is now a no-op, delete it. */ ! 1831: if (map->last_pc_value == pc_rtx) ! 1832: { ! 1833: delete_insn (copy); ! 1834: copy = 0; ! 1835: } ! 1836: else ! 1837: /* Otherwise, this is unconditional jump so we must put a ! 1838: BARRIER after it. We could do some dead code elimination ! 1839: here, but jump.c will do it just as well. */ ! 1840: emit_barrier (); ! 1841: } ! 1842: break; ! 1843: ! 1844: case CALL_INSN: ! 1845: pattern = copy_rtx_and_substitute (PATTERN (insn), map); ! 1846: copy = emit_call_insn (pattern); ! 1847: REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map); ! 1848: ! 1849: #ifdef HAVE_cc0 ! 1850: if (cc0_insn) ! 1851: try_constants (cc0_insn, map); ! 1852: cc0_insn = 0; ! 1853: #endif ! 1854: try_constants (copy, map); ! 1855: ! 1856: /* Be lazy and assume CALL_INSNs clobber all hard registers. */ ! 1857: for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ! 1858: map->const_equiv_map[i] = 0; ! 1859: break; ! 1860: ! 1861: case CODE_LABEL: ! 1862: /* If this is the loop start label, then we don't need to emit a ! 1863: copy of this label since no one will use it. */ ! 1864: ! 1865: if (insn != start_label) ! 1866: { ! 1867: copy = emit_label (map->label_map[CODE_LABEL_NUMBER (insn)]); ! 1868: map->const_age++; ! 1869: } ! 1870: break; ! 1871: ! 1872: case BARRIER: ! 1873: copy = emit_barrier (); ! 1874: break; ! 1875: ! 1876: case NOTE: ! 1877: /* VTOP notes are valid only before the loop exit test. If placed ! 1878: anywhere else, loop may generate bad code. */ ! 1879: ! 1880: if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED ! 1881: && (NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP ! 1882: || (last_iteration && unroll_type != UNROLL_COMPLETELY))) ! 1883: copy = emit_note (NOTE_SOURCE_FILE (insn), ! 1884: NOTE_LINE_NUMBER (insn)); ! 1885: else ! 1886: copy = 0; ! 1887: break; ! 1888: ! 1889: default: ! 1890: abort (); ! 1891: break; ! 1892: } ! 1893: ! 1894: map->insn_map[INSN_UID (insn)] = copy; ! 1895: } ! 1896: while (insn != copy_end); ! 1897: ! 1898: /* Now finish coping the REG_NOTES. */ ! 1899: insn = copy_start; ! 1900: do ! 1901: { ! 1902: insn = NEXT_INSN (insn); ! 1903: if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN ! 1904: || GET_CODE (insn) == CALL_INSN) ! 1905: && map->insn_map[INSN_UID (insn)]) ! 1906: final_reg_note_copy (REG_NOTES (map->insn_map[INSN_UID (insn)]), map); ! 1907: } ! 1908: while (insn != copy_end); ! 1909: ! 1910: /* There may be notes between copy_notes_from and loop_end. Emit a copy of ! 1911: each of these notes here, since there may be some important ones, such as ! 1912: NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last ! 1913: iteration, because the original notes won't be deleted. ! 1914: ! 1915: We can't use insert_before here, because when from preconditioning, ! 1916: insert_before points before the loop. We can't use copy_end, because ! 1917: there may be insns already inserted after it (which we don't want to ! 1918: copy) when not from preconditioning code. */ ! 1919: ! 1920: if (! last_iteration) ! 1921: { ! 1922: for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn)) ! 1923: { ! 1924: if (GET_CODE (insn) == NOTE ! 1925: && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED) ! 1926: emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn)); ! 1927: } ! 1928: } ! 1929: ! 1930: if (final_label && LABEL_NUSES (final_label) > 0) ! 1931: emit_label (final_label); ! 1932: ! 1933: tem = gen_sequence (); ! 1934: end_sequence (); ! 1935: emit_insn_before (tem, insert_before); ! 1936: } ! 1937: ! 1938: /* Emit an insn, using the expand_binop to ensure that a valid insn is ! 1939: emitted. This will correctly handle the case where the increment value ! 1940: won't fit in the immediate field of a PLUS insns. */ ! 1941: ! 1942: void ! 1943: emit_unrolled_add (dest_reg, src_reg, increment) ! 1944: rtx dest_reg, src_reg, increment; ! 1945: { ! 1946: rtx result; ! 1947: ! 1948: result = expand_binop (GET_MODE (dest_reg), add_optab, src_reg, increment, ! 1949: dest_reg, 0, OPTAB_LIB_WIDEN); ! 1950: ! 1951: if (dest_reg != result) ! 1952: emit_move_insn (dest_reg, result); ! 1953: } ! 1954: ! 1955: /* Searches the insns between INSN and LOOP_END. Returns 1 if there ! 1956: is a backward branch in that range that branches to somewhere between ! 1957: LOOP_START and INSN. Returns 0 otherwise. */ ! 1958: ! 1959: /* ??? This is quadratic algorithm. Could be rewritten to be linear. ! 1960: In practice, this is not a problem, because this function is seldom called, ! 1961: and uses a negligible amount of CPU time on average. */ ! 1962: ! 1963: static int ! 1964: back_branch_in_range_p (insn, loop_start, loop_end) ! 1965: rtx insn; ! 1966: rtx loop_start, loop_end; ! 1967: { ! 1968: rtx p, q, target_insn; ! 1969: ! 1970: /* Stop before we get to the backward branch at the end of the loop. */ ! 1971: loop_end = prev_nonnote_insn (loop_end); ! 1972: if (GET_CODE (loop_end) == BARRIER) ! 1973: loop_end = PREV_INSN (loop_end); ! 1974: ! 1975: /* Check in case insn has been deleted, search forward for first non ! 1976: deleted insn following it. */ ! 1977: while (INSN_DELETED_P (insn)) ! 1978: insn = NEXT_INSN (insn); ! 1979: ! 1980: /* Check for the case where insn is the last insn in the loop. */ ! 1981: if (insn == loop_end) ! 1982: return 0; ! 1983: ! 1984: for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p)) ! 1985: { ! 1986: if (GET_CODE (p) == JUMP_INSN) ! 1987: { ! 1988: target_insn = JUMP_LABEL (p); ! 1989: ! 1990: /* Search from loop_start to insn, to see if one of them is ! 1991: the target_insn. We can't use INSN_LUID comparisons here, ! 1992: since insn may not have an LUID entry. */ ! 1993: for (q = loop_start; q != insn; q = NEXT_INSN (q)) ! 1994: if (q == target_insn) ! 1995: return 1; ! 1996: } ! 1997: } ! 1998: ! 1999: return 0; ! 2000: } ! 2001: ! 2002: /* Try to generate the simplest rtx for the expression ! 2003: (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial ! 2004: value of giv's. */ ! 2005: ! 2006: static rtx ! 2007: fold_rtx_mult_add (mult1, mult2, add1, mode) ! 2008: rtx mult1, mult2, add1; ! 2009: enum machine_mode mode; ! 2010: { ! 2011: rtx temp, mult_res; ! 2012: rtx result; ! 2013: ! 2014: /* The modes must all be the same. This should always be true. For now, ! 2015: check to make sure. */ ! 2016: if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode) ! 2017: || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode) ! 2018: || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode)) ! 2019: abort (); ! 2020: ! 2021: /* Ensure that if at least one of mult1/mult2 are constant, then mult2 ! 2022: will be a constant. */ ! 2023: if (GET_CODE (mult1) == CONST_INT) ! 2024: { ! 2025: temp = mult2; ! 2026: mult2 = mult1; ! 2027: mult1 = temp; ! 2028: } ! 2029: ! 2030: mult_res = simplify_binary_operation (MULT, mode, mult1, mult2); ! 2031: if (! mult_res) ! 2032: mult_res = gen_rtx (MULT, mode, mult1, mult2); ! 2033: ! 2034: /* Again, put the constant second. */ ! 2035: if (GET_CODE (add1) == CONST_INT) ! 2036: { ! 2037: temp = add1; ! 2038: add1 = mult_res; ! 2039: mult_res = temp; ! 2040: } ! 2041: ! 2042: result = simplify_binary_operation (PLUS, mode, add1, mult_res); ! 2043: if (! result) ! 2044: result = gen_rtx (PLUS, mode, add1, mult_res); ! 2045: ! 2046: return result; ! 2047: } ! 2048: ! 2049: /* Searches the list of induction struct's for the biv BL, to try to calculate ! 2050: the total increment value for one iteration of the loop as a constant. ! 2051: ! 2052: Returns the increment value as an rtx, simplified as much as possible, ! 2053: if it can be calculated. Otherwise, returns 0. */ ! 2054: ! 2055: rtx ! 2056: biv_total_increment (bl, loop_start, loop_end) ! 2057: struct iv_class *bl; ! 2058: rtx loop_start, loop_end; ! 2059: { ! 2060: struct induction *v; ! 2061: rtx result; ! 2062: ! 2063: /* For increment, must check every instruction that sets it. Each ! 2064: instruction must be executed only once each time through the loop. ! 2065: To verify this, we check that the the insn is always executed, and that ! 2066: there are no backward branches after the insn that branch to before it. ! 2067: Also, the insn must have a mult_val of one (to make sure it really is ! 2068: an increment). */ ! 2069: ! 2070: result = const0_rtx; ! 2071: for (v = bl->biv; v; v = v->next_iv) ! 2072: { ! 2073: if (v->always_computable && v->mult_val == const1_rtx ! 2074: && ! back_branch_in_range_p (v->insn, loop_start, loop_end)) ! 2075: result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode); ! 2076: else ! 2077: return 0; ! 2078: } ! 2079: ! 2080: return result; ! 2081: } ! 2082: ! 2083: /* Determine the initial value of the iteration variable, and the amount ! 2084: that it is incremented each loop. Use the tables constructed by ! 2085: the strength reduction pass to calculate these values. ! 2086: ! 2087: Initial_value and/or increment are set to zero if their values could not ! 2088: be calculated. */ ! 2089: ! 2090: static void ! 2091: iteration_info (iteration_var, initial_value, increment, loop_start, loop_end) ! 2092: rtx iteration_var, *initial_value, *increment; ! 2093: rtx loop_start, loop_end; ! 2094: { ! 2095: struct iv_class *bl; ! 2096: struct induction *v, *b; ! 2097: ! 2098: /* Clear the result values, in case no answer can be found. */ ! 2099: *initial_value = 0; ! 2100: *increment = 0; ! 2101: ! 2102: /* The iteration variable can be either a giv or a biv. Check to see ! 2103: which it is, and compute the variable's initial value, and increment ! 2104: value if possible. */ ! 2105: ! 2106: /* If this is a new register, can't handle it since we don't have any ! 2107: reg_iv_type entry for it. */ ! 2108: if (REGNO (iteration_var) >= max_reg_before_loop) ! 2109: { ! 2110: if (loop_dump_stream) ! 2111: fprintf (loop_dump_stream, ! 2112: "Loop unrolling: No reg_iv_type entry for iteration var.\n"); ! 2113: return; ! 2114: } ! 2115: /* Reject iteration variables larger than the host long size, since they ! 2116: could result in a number of iterations greater than the range of our ! 2117: `unsigned long' variable loop_n_iterations. */ ! 2118: else if (GET_MODE_BITSIZE (GET_MODE (iteration_var)) > HOST_BITS_PER_LONG) ! 2119: { ! 2120: if (loop_dump_stream) ! 2121: fprintf (loop_dump_stream, ! 2122: "Loop unrolling: Iteration var rejected because mode larger than host long.\n"); ! 2123: return; ! 2124: } ! 2125: else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT) ! 2126: { ! 2127: if (loop_dump_stream) ! 2128: fprintf (loop_dump_stream, ! 2129: "Loop unrolling: Iteration var not an integer.\n"); ! 2130: return; ! 2131: } ! 2132: else if (reg_iv_type[REGNO (iteration_var)] == BASIC_INDUCT) ! 2133: { ! 2134: /* Grab initial value, only useful if it is a constant. */ ! 2135: bl = reg_biv_class[REGNO (iteration_var)]; ! 2136: *initial_value = bl->initial_value; ! 2137: ! 2138: *increment = biv_total_increment (bl, loop_start, loop_end); ! 2139: } ! 2140: else if (reg_iv_type[REGNO (iteration_var)] == GENERAL_INDUCT) ! 2141: { ! 2142: #if 1 ! 2143: /* ??? The code below does not work because the incorrect number of ! 2144: iterations is calculated when the biv is incremented after the giv ! 2145: is set (which is the usual case). This can probably be accounted ! 2146: for by biasing the initial_value by subtracting the amount of the ! 2147: increment that occurs between the giv set and the giv test. However, ! 2148: a giv as an iterator is very rare, so it does not seem worthwhile ! 2149: to handle this. */ ! 2150: /* ??? An example failure is: i = 6; do {;} while (i++ < 9). */ ! 2151: if (loop_dump_stream) ! 2152: fprintf (loop_dump_stream, ! 2153: "Loop unrolling: Giv iterators are not handled.\n"); ! 2154: return; ! 2155: #else ! 2156: /* Initial value is mult_val times the biv's initial value plus ! 2157: add_val. Only useful if it is a constant. */ ! 2158: v = reg_iv_info[REGNO (iteration_var)]; ! 2159: bl = reg_biv_class[REGNO (v->src_reg)]; ! 2160: *initial_value = fold_rtx_mult_add (v->mult_val, bl->initial_value, ! 2161: v->add_val, v->mode); ! 2162: ! 2163: /* Increment value is mult_val times the increment value of the biv. */ ! 2164: ! 2165: *increment = biv_total_increment (bl, loop_start, loop_end); ! 2166: if (*increment) ! 2167: *increment = fold_rtx_mult_add (v->mult_val, *increment, const0_rtx, ! 2168: v->mode); ! 2169: #endif ! 2170: } ! 2171: else ! 2172: { ! 2173: if (loop_dump_stream) ! 2174: fprintf (loop_dump_stream, ! 2175: "Loop unrolling: Not basic or general induction var.\n"); ! 2176: return; ! 2177: } ! 2178: } ! 2179: ! 2180: /* Calculate the approximate final value of the iteration variable ! 2181: which has an loop exit test with code COMPARISON_CODE and comparison value ! 2182: of COMPARISON_VALUE. Also returns an indication of whether the comparison ! 2183: was signed or unsigned, and the direction of the comparison. This info is ! 2184: needed to calculate the number of loop iterations. */ ! 2185: ! 2186: static rtx ! 2187: approx_final_value (comparison_code, comparison_value, unsigned_p, compare_dir) ! 2188: enum rtx_code comparison_code; ! 2189: rtx comparison_value; ! 2190: int *unsigned_p; ! 2191: int *compare_dir; ! 2192: { ! 2193: /* Calculate the final value of the induction variable. ! 2194: The exact final value depends on the branch operator, and increment sign. ! 2195: This is only an approximate value. It will be wrong if the iteration ! 2196: variable is not incremented by one each time through the loop, and ! 2197: approx final value - start value % increment != 0. */ ! 2198: ! 2199: *unsigned_p = 0; ! 2200: switch (comparison_code) ! 2201: { ! 2202: case LEU: ! 2203: *unsigned_p = 1; ! 2204: case LE: ! 2205: *compare_dir = 1; ! 2206: return plus_constant (comparison_value, 1); ! 2207: case GEU: ! 2208: *unsigned_p = 1; ! 2209: case GE: ! 2210: *compare_dir = -1; ! 2211: return plus_constant (comparison_value, -1); ! 2212: case EQ: ! 2213: /* Can not calculate a final value for this case. */ ! 2214: *compare_dir = 0; ! 2215: return 0; ! 2216: case LTU: ! 2217: *unsigned_p = 1; ! 2218: case LT: ! 2219: *compare_dir = 1; ! 2220: return comparison_value; ! 2221: break; ! 2222: case GTU: ! 2223: *unsigned_p = 1; ! 2224: case GT: ! 2225: *compare_dir = -1; ! 2226: return comparison_value; ! 2227: case NE: ! 2228: *compare_dir = 0; ! 2229: return comparison_value; ! 2230: default: ! 2231: abort (); ! 2232: } ! 2233: } ! 2234: ! 2235: /* For each biv and giv, determine whether it can be safely split into ! 2236: a different variable for each unrolled copy of the loop body. If it ! 2237: is safe to split, then indicate that by saving some useful info ! 2238: in the splittable_regs array. ! 2239: ! 2240: If the loop is being completely unrolled, then splittable_regs will hold ! 2241: the current value of the induction variable while the loop is unrolled. ! 2242: It must be set to the initial value of the induction variable here. ! 2243: Otherwise, splittable_regs will hold the difference between the current ! 2244: value of the induction variable and the value the induction variable had ! 2245: at the top of the loop. It must be set to the value 0 here. ! 2246: ! 2247: Returns the total number of instructions that set registers that are ! 2248: splittable. */ ! 2249: ! 2250: /* ?? If the loop is only unrolled twice, then most of the restrictions to ! 2251: constant values are unnecessary, since we can easily calculate increment ! 2252: values in this case even if nothing is constant. The increment value ! 2253: should not involve a multiply however. */ ! 2254: ! 2255: /* ?? Even if the biv/giv increment values aren't constant, it may still ! 2256: be beneficial to split the variable if the loop is only unrolled a few ! 2257: times, since multiplies by small integers (1,2,3,4) are very cheap. */ ! 2258: ! 2259: static int ! 2260: find_splittable_regs (unroll_type, loop_start, loop_end, end_insert_before, ! 2261: unroll_number) ! 2262: enum unroll_types unroll_type; ! 2263: rtx loop_start, loop_end; ! 2264: rtx end_insert_before; ! 2265: int unroll_number; ! 2266: { ! 2267: struct iv_class *bl; ! 2268: struct induction *v; ! 2269: rtx increment, tem; ! 2270: rtx biv_final_value; ! 2271: int biv_splittable; ! 2272: int result = 0; ! 2273: ! 2274: for (bl = loop_iv_list; bl; bl = bl->next) ! 2275: { ! 2276: /* Biv_total_increment must return a constant value, ! 2277: otherwise we can not calculate the split values. */ ! 2278: ! 2279: increment = biv_total_increment (bl, loop_start, loop_end); ! 2280: if (! increment || GET_CODE (increment) != CONST_INT) ! 2281: continue; ! 2282: ! 2283: /* The loop must be unrolled completely, or else have a known number ! 2284: of iterations and only one exit, or else the biv must be dead ! 2285: outside the loop, or else the final value must be known. Otherwise, ! 2286: it is unsafe to split the biv since it may not have the proper ! 2287: value on loop exit. */ ! 2288: ! 2289: /* loop_number_exit_labels is non-zero if the loop has an exit other than ! 2290: a fall through at the end. */ ! 2291: ! 2292: biv_splittable = 1; ! 2293: biv_final_value = 0; ! 2294: if (unroll_type != UNROLL_COMPLETELY ! 2295: && (loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]] ! 2296: || unroll_type == UNROLL_NAIVE) ! 2297: && (uid_luid[regno_last_uid[bl->regno]] >= INSN_LUID (loop_end) ! 2298: || ! bl->init_insn ! 2299: || INSN_UID (bl->init_insn) >= max_uid_for_loop ! 2300: || (uid_luid[regno_first_uid[bl->regno]] ! 2301: < INSN_LUID (bl->init_insn)) ! 2302: || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set))) ! 2303: && ! (biv_final_value = final_biv_value (bl, loop_start, loop_end))) ! 2304: biv_splittable = 0; ! 2305: ! 2306: /* If any of the insns setting the BIV don't do so with a simple ! 2307: PLUS, we don't know how to split it. */ ! 2308: for (v = bl->biv; biv_splittable && v; v = v->next_iv) ! 2309: if ((tem = single_set (v->insn)) == 0 ! 2310: || GET_CODE (SET_DEST (tem)) != REG ! 2311: || REGNO (SET_DEST (tem)) != bl->regno ! 2312: || GET_CODE (SET_SRC (tem)) != PLUS) ! 2313: biv_splittable = 0; ! 2314: ! 2315: /* If final value is non-zero, then must emit an instruction which sets ! 2316: the value of the biv to the proper value. This is done after ! 2317: handling all of the givs, since some of them may need to use the ! 2318: biv's value in their initialization code. */ ! 2319: ! 2320: /* This biv is splittable. If completely unrolling the loop, save ! 2321: the biv's initial value. Otherwise, save the constant zero. */ ! 2322: ! 2323: if (biv_splittable == 1) ! 2324: { ! 2325: if (unroll_type == UNROLL_COMPLETELY) ! 2326: { ! 2327: /* If the initial value of the biv is itself (i.e. it is too ! 2328: complicated for strength_reduce to compute), or is a hard ! 2329: register, then we must create a new pseudo reg to hold the ! 2330: initial value of the biv. */ ! 2331: ! 2332: if (GET_CODE (bl->initial_value) == REG ! 2333: && (REGNO (bl->initial_value) == bl->regno ! 2334: || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER)) ! 2335: { ! 2336: rtx tem = gen_reg_rtx (bl->biv->mode); ! 2337: ! 2338: emit_insn_before (gen_move_insn (tem, bl->biv->src_reg), ! 2339: loop_start); ! 2340: ! 2341: if (loop_dump_stream) ! 2342: fprintf (loop_dump_stream, "Biv %d initial value remapped to %d.\n", ! 2343: bl->regno, REGNO (tem)); ! 2344: ! 2345: splittable_regs[bl->regno] = tem; ! 2346: } ! 2347: else ! 2348: splittable_regs[bl->regno] = bl->initial_value; ! 2349: } ! 2350: else ! 2351: splittable_regs[bl->regno] = const0_rtx; ! 2352: ! 2353: /* Save the number of instructions that modify the biv, so that ! 2354: we can treat the last one specially. */ ! 2355: ! 2356: splittable_regs_updates[bl->regno] = bl->biv_count; ! 2357: result += bl->biv_count; ! 2358: ! 2359: if (loop_dump_stream) ! 2360: fprintf (loop_dump_stream, ! 2361: "Biv %d safe to split.\n", bl->regno); ! 2362: } ! 2363: ! 2364: /* Check every giv that depends on this biv to see whether it is ! 2365: splittable also. Even if the biv isn't splittable, givs which ! 2366: depend on it may be splittable if the biv is live outside the ! 2367: loop, and the givs aren't. */ ! 2368: ! 2369: result += find_splittable_givs (bl, unroll_type, loop_start, loop_end, ! 2370: increment, unroll_number); ! 2371: ! 2372: /* If final value is non-zero, then must emit an instruction which sets ! 2373: the value of the biv to the proper value. This is done after ! 2374: handling all of the givs, since some of them may need to use the ! 2375: biv's value in their initialization code. */ ! 2376: if (biv_final_value) ! 2377: { ! 2378: /* If the loop has multiple exits, emit the insns before the ! 2379: loop to ensure that it will always be executed no matter ! 2380: how the loop exits. Otherwise emit the insn after the loop, ! 2381: since this is slightly more efficient. */ ! 2382: if (! loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]]) ! 2383: emit_insn_before (gen_move_insn (bl->biv->src_reg, ! 2384: biv_final_value), ! 2385: end_insert_before); ! 2386: else ! 2387: { ! 2388: /* Create a new register to hold the value of the biv, and then ! 2389: set the biv to its final value before the loop start. The biv ! 2390: is set to its final value before loop start to ensure that ! 2391: this insn will always be executed, no matter how the loop ! 2392: exits. */ ! 2393: rtx tem = gen_reg_rtx (bl->biv->mode); ! 2394: emit_insn_before (gen_move_insn (tem, bl->biv->src_reg), ! 2395: loop_start); ! 2396: emit_insn_before (gen_move_insn (bl->biv->src_reg, ! 2397: biv_final_value), ! 2398: loop_start); ! 2399: ! 2400: if (loop_dump_stream) ! 2401: fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n", ! 2402: REGNO (bl->biv->src_reg), REGNO (tem)); ! 2403: ! 2404: /* Set up the mapping from the original biv register to the new ! 2405: register. */ ! 2406: bl->biv->src_reg = tem; ! 2407: } ! 2408: } ! 2409: } ! 2410: return result; ! 2411: } ! 2412: ! 2413: /* For every giv based on the biv BL, check to determine whether it is ! 2414: splittable. This is a subroutine to find_splittable_regs (). ! 2415: ! 2416: Return the number of instructions that set splittable registers. */ ! 2417: ! 2418: static int ! 2419: find_splittable_givs (bl, unroll_type, loop_start, loop_end, increment, ! 2420: unroll_number) ! 2421: struct iv_class *bl; ! 2422: enum unroll_types unroll_type; ! 2423: rtx loop_start, loop_end; ! 2424: rtx increment; ! 2425: int unroll_number; ! 2426: { ! 2427: struct induction *v; ! 2428: rtx final_value; ! 2429: rtx tem; ! 2430: int result = 0; ! 2431: ! 2432: for (v = bl->giv; v; v = v->next_iv) ! 2433: { ! 2434: rtx giv_inc, value; ! 2435: ! 2436: /* Only split the giv if it has already been reduced, or if the loop is ! 2437: being completely unrolled. */ ! 2438: if (unroll_type != UNROLL_COMPLETELY && v->ignore) ! 2439: continue; ! 2440: ! 2441: /* The giv can be split if the insn that sets the giv is executed once ! 2442: and only once on every iteration of the loop. */ ! 2443: /* An address giv can always be split. v->insn is just a use not a set, ! 2444: and hence it does not matter whether it is always executed. All that ! 2445: matters is that all the biv increments are always executed, and we ! 2446: won't reach here if they aren't. */ ! 2447: if (v->giv_type != DEST_ADDR ! 2448: && (! v->always_computable ! 2449: || back_branch_in_range_p (v->insn, loop_start, loop_end))) ! 2450: continue; ! 2451: ! 2452: /* The giv increment value must be a constant. */ ! 2453: giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx, ! 2454: v->mode); ! 2455: if (! giv_inc || GET_CODE (giv_inc) != CONST_INT) ! 2456: continue; ! 2457: ! 2458: /* The loop must be unrolled completely, or else have a known number of ! 2459: iterations and only one exit, or else the giv must be dead outside ! 2460: the loop, or else the final value of the giv must be known. ! 2461: Otherwise, it is not safe to split the giv since it may not have the ! 2462: proper value on loop exit. */ ! 2463: ! 2464: /* The used outside loop test will fail for DEST_ADDR givs. They are ! 2465: never used outside the loop anyways, so it is always safe to split a ! 2466: DEST_ADDR giv. */ ! 2467: ! 2468: final_value = 0; ! 2469: if (unroll_type != UNROLL_COMPLETELY ! 2470: && (loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]] ! 2471: || unroll_type == UNROLL_NAIVE) ! 2472: && v->giv_type != DEST_ADDR ! 2473: && ((regno_first_uid[REGNO (v->dest_reg)] != INSN_UID (v->insn) ! 2474: /* Check for the case where the pseudo is set by a shift/add ! 2475: sequence, in which case the first insn setting the pseudo ! 2476: is the first insn of the shift/add sequence. */ ! 2477: && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX)) ! 2478: || (regno_first_uid[REGNO (v->dest_reg)] ! 2479: != INSN_UID (XEXP (tem, 0))))) ! 2480: /* Line above always fails if INSN was moved by loop opt. */ ! 2481: || (uid_luid[regno_last_uid[REGNO (v->dest_reg)]] ! 2482: >= INSN_LUID (loop_end))) ! 2483: && ! (final_value = v->final_value)) ! 2484: continue; ! 2485: ! 2486: #if 0 ! 2487: /* Currently, non-reduced/final-value givs are never split. */ ! 2488: /* Should emit insns after the loop if possible, as the biv final value ! 2489: code below does. */ ! 2490: ! 2491: /* If the final value is non-zero, and the giv has not been reduced, ! 2492: then must emit an instruction to set the final value. */ ! 2493: if (final_value && !v->new_reg) ! 2494: { ! 2495: /* Create a new register to hold the value of the giv, and then set ! 2496: the giv to its final value before the loop start. The giv is set ! 2497: to its final value before loop start to ensure that this insn ! 2498: will always be executed, no matter how we exit. */ ! 2499: tem = gen_reg_rtx (v->mode); ! 2500: emit_insn_before (gen_move_insn (tem, v->dest_reg), loop_start); ! 2501: emit_insn_before (gen_move_insn (v->dest_reg, final_value), ! 2502: loop_start); ! 2503: ! 2504: if (loop_dump_stream) ! 2505: fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n", ! 2506: REGNO (v->dest_reg), REGNO (tem)); ! 2507: ! 2508: v->src_reg = tem; ! 2509: } ! 2510: #endif ! 2511: ! 2512: /* This giv is splittable. If completely unrolling the loop, save the ! 2513: giv's initial value. Otherwise, save the constant zero for it. */ ! 2514: ! 2515: if (unroll_type == UNROLL_COMPLETELY) ! 2516: { ! 2517: /* It is not safe to use bl->initial_value here, because it may not ! 2518: be invariant. It is safe to use the initial value stored in ! 2519: the splittable_regs array if it is set. In rare cases, it won't ! 2520: be set, so then we do exactly the same thing as ! 2521: find_splittable_regs does to get a safe value. */ ! 2522: rtx biv_initial_value; ! 2523: ! 2524: if (splittable_regs[bl->regno]) ! 2525: biv_initial_value = splittable_regs[bl->regno]; ! 2526: else if (GET_CODE (bl->initial_value) != REG ! 2527: || (REGNO (bl->initial_value) != bl->regno ! 2528: && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER)) ! 2529: biv_initial_value = bl->initial_value; ! 2530: else ! 2531: { ! 2532: rtx tem = gen_reg_rtx (bl->biv->mode); ! 2533: ! 2534: emit_insn_before (gen_move_insn (tem, bl->biv->src_reg), ! 2535: loop_start); ! 2536: biv_initial_value = tem; ! 2537: } ! 2538: value = fold_rtx_mult_add (v->mult_val, biv_initial_value, ! 2539: v->add_val, v->mode); ! 2540: } ! 2541: else ! 2542: value = const0_rtx; ! 2543: ! 2544: if (v->new_reg) ! 2545: { ! 2546: /* If a giv was combined with another giv, then we can only split ! 2547: this giv if the giv it was combined with was reduced. This ! 2548: is because the value of v->new_reg is meaningless in this ! 2549: case. */ ! 2550: if (v->same && ! v->same->new_reg) ! 2551: { ! 2552: if (loop_dump_stream) ! 2553: fprintf (loop_dump_stream, ! 2554: "giv combined with unreduced giv not split.\n"); ! 2555: continue; ! 2556: } ! 2557: /* If the giv is an address destination, it could be something other ! 2558: than a simple register, these have to be treated differently. */ ! 2559: else if (v->giv_type == DEST_REG) ! 2560: { ! 2561: /* If value is not a constant, register, or register plus ! 2562: constant, then compute its value into a register before ! 2563: loop start. This prevents illegal rtx sharing, and should ! 2564: generate better code. We can use bl->initial_value here ! 2565: instead of splittable_regs[bl->regno] because this code ! 2566: is going before the loop start. */ ! 2567: if (unroll_type == UNROLL_COMPLETELY ! 2568: && GET_CODE (value) != CONST_INT ! 2569: && GET_CODE (value) != REG ! 2570: && (GET_CODE (value) != PLUS ! 2571: || GET_CODE (XEXP (value, 0)) != REG ! 2572: || GET_CODE (XEXP (value, 1)) != CONST_INT)) ! 2573: { ! 2574: rtx tem = gen_reg_rtx (v->mode); ! 2575: emit_iv_add_mult (bl->initial_value, v->mult_val, ! 2576: v->add_val, tem, loop_start); ! 2577: value = tem; ! 2578: } ! 2579: ! 2580: splittable_regs[REGNO (v->new_reg)] = value; ! 2581: } ! 2582: else ! 2583: { ! 2584: /* Splitting address givs is useful since it will often allow us ! 2585: to eliminate some increment insns for the base giv as ! 2586: unnecessary. */ ! 2587: ! 2588: /* If the addr giv is combined with a dest_reg giv, then all ! 2589: references to that dest reg will be remapped, which is NOT ! 2590: what we want for split addr regs. We always create a new ! 2591: register for the split addr giv, just to be safe. */ ! 2592: ! 2593: /* ??? If there are multiple address givs which have been ! 2594: combined with the same dest_reg giv, then we may only need ! 2595: one new register for them. Pulling out constants below will ! 2596: catch some of the common cases of this. Currently, I leave ! 2597: the work of simplifying multiple address givs to the ! 2598: following cse pass. */ ! 2599: ! 2600: v->const_adjust = 0; ! 2601: if (unroll_type != UNROLL_COMPLETELY) ! 2602: { ! 2603: /* If not completely unrolling the loop, then create a new ! 2604: register to hold the split value of the DEST_ADDR giv. ! 2605: Emit insn to initialize its value before loop start. */ ! 2606: tem = gen_reg_rtx (v->mode); ! 2607: ! 2608: /* If the address giv has a constant in its new_reg value, ! 2609: then this constant can be pulled out and put in value, ! 2610: instead of being part of the initialization code. */ ! 2611: ! 2612: if (GET_CODE (v->new_reg) == PLUS ! 2613: && GET_CODE (XEXP (v->new_reg, 1)) == CONST_INT) ! 2614: { ! 2615: v->dest_reg ! 2616: = plus_constant (tem, INTVAL (XEXP (v->new_reg,1))); ! 2617: ! 2618: /* Only succeed if this will give valid addresses. ! 2619: Try to validate both the first and the last ! 2620: address resulting from loop unrolling, if ! 2621: one fails, then can't do const elim here. */ ! 2622: if (memory_address_p (v->mem_mode, v->dest_reg) ! 2623: && memory_address_p (v->mem_mode, ! 2624: plus_constant (v->dest_reg, ! 2625: INTVAL (giv_inc) ! 2626: * (unroll_number - 1)))) ! 2627: { ! 2628: /* Save the negative of the eliminated const, so ! 2629: that we can calculate the dest_reg's increment ! 2630: value later. */ ! 2631: v->const_adjust = - INTVAL (XEXP (v->new_reg, 1)); ! 2632: ! 2633: v->new_reg = XEXP (v->new_reg, 0); ! 2634: if (loop_dump_stream) ! 2635: fprintf (loop_dump_stream, ! 2636: "Eliminating constant from giv %d\n", ! 2637: REGNO (tem)); ! 2638: } ! 2639: else ! 2640: v->dest_reg = tem; ! 2641: } ! 2642: else ! 2643: v->dest_reg = tem; ! 2644: ! 2645: /* If the address hasn't been checked for validity yet, do so ! 2646: now, and fail completely if either the first or the last ! 2647: unrolled copy of the address is not a valid address. */ ! 2648: if (v->dest_reg == tem ! 2649: && (! memory_address_p (v->mem_mode, v->dest_reg) ! 2650: || ! memory_address_p (v->mem_mode, ! 2651: plus_constant (v->dest_reg, ! 2652: INTVAL (giv_inc) ! 2653: * (unroll_number -1))))) ! 2654: { ! 2655: if (loop_dump_stream) ! 2656: fprintf (loop_dump_stream, ! 2657: "Illegal address for giv at insn %d\n", ! 2658: INSN_UID (v->insn)); ! 2659: continue; ! 2660: } ! 2661: ! 2662: /* To initialize the new register, just move the value of ! 2663: new_reg into it. This is not guaranteed to give a valid ! 2664: instruction on machines with complex addressing modes. ! 2665: If we can't recognize it, then delete it and emit insns ! 2666: to calculate the value from scratch. */ ! 2667: emit_insn_before (gen_rtx (SET, VOIDmode, tem, ! 2668: copy_rtx (v->new_reg)), ! 2669: loop_start); ! 2670: if (recog_memoized (PREV_INSN (loop_start)) < 0) ! 2671: { ! 2672: delete_insn (PREV_INSN (loop_start)); ! 2673: emit_iv_add_mult (bl->initial_value, v->mult_val, ! 2674: v->add_val, tem, loop_start); ! 2675: if (loop_dump_stream) ! 2676: fprintf (loop_dump_stream, ! 2677: "Illegal init insn, rewritten.\n"); ! 2678: } ! 2679: } ! 2680: else ! 2681: { ! 2682: v->dest_reg = value; ! 2683: ! 2684: /* Check the resulting address for validity, and fail ! 2685: if the resulting address would be illegal. */ ! 2686: if (! memory_address_p (v->mem_mode, v->dest_reg) ! 2687: || ! memory_address_p (v->mem_mode, ! 2688: plus_constant (v->dest_reg, ! 2689: INTVAL (giv_inc) * ! 2690: (unroll_number -1)))) ! 2691: { ! 2692: if (loop_dump_stream) ! 2693: fprintf (loop_dump_stream, ! 2694: "Illegal address for giv at insn %d\n", ! 2695: INSN_UID (v->insn)); ! 2696: continue; ! 2697: } ! 2698: } ! 2699: ! 2700: /* Store the value of dest_reg into the insn. This sharing ! 2701: will not be a problem as this insn will always be copied ! 2702: later. */ ! 2703: ! 2704: *v->location = v->dest_reg; ! 2705: ! 2706: /* If this address giv is combined with a dest reg giv, then ! 2707: save the base giv's induction pointer so that we will be ! 2708: able to handle this address giv properly. The base giv ! 2709: itself does not have to be splittable. */ ! 2710: ! 2711: if (v->same && v->same->giv_type == DEST_REG) ! 2712: addr_combined_regs[REGNO (v->same->new_reg)] = v->same; ! 2713: ! 2714: if (GET_CODE (v->new_reg) == REG) ! 2715: { ! 2716: /* This giv maybe hasn't been combined with any others. ! 2717: Make sure that it's giv is marked as splittable here. */ ! 2718: ! 2719: splittable_regs[REGNO (v->new_reg)] = value; ! 2720: ! 2721: /* Make it appear to depend upon itself, so that the ! 2722: giv will be properly split in the main loop above. */ ! 2723: if (! v->same) ! 2724: { ! 2725: v->same = v; ! 2726: addr_combined_regs[REGNO (v->new_reg)] = v; ! 2727: } ! 2728: } ! 2729: ! 2730: if (loop_dump_stream) ! 2731: fprintf (loop_dump_stream, "DEST_ADDR giv being split.\n"); ! 2732: } ! 2733: } ! 2734: else ! 2735: { ! 2736: #if 0 ! 2737: /* Currently, unreduced giv's can't be split. This is not too much ! 2738: of a problem since unreduced giv's are not live across loop ! 2739: iterations anyways. When unrolling a loop completely though, ! 2740: it makes sense to reduce&split givs when possible, as this will ! 2741: result in simpler instructions, and will not require that a reg ! 2742: be live across loop iterations. */ ! 2743: ! 2744: splittable_regs[REGNO (v->dest_reg)] = value; ! 2745: fprintf (stderr, "Giv %d at insn %d not reduced\n", ! 2746: REGNO (v->dest_reg), INSN_UID (v->insn)); ! 2747: #else ! 2748: continue; ! 2749: #endif ! 2750: } ! 2751: ! 2752: /* Givs are only updated once by definition. Mark it so if this is ! 2753: a splittable register. Don't need to do anything for address givs ! 2754: where this may not be a register. */ ! 2755: ! 2756: if (GET_CODE (v->new_reg) == REG) ! 2757: splittable_regs_updates[REGNO (v->new_reg)] = 1; ! 2758: ! 2759: result++; ! 2760: ! 2761: if (loop_dump_stream) ! 2762: { ! 2763: int regnum; ! 2764: ! 2765: if (GET_CODE (v->dest_reg) == CONST_INT) ! 2766: regnum = -1; ! 2767: else if (GET_CODE (v->dest_reg) != REG) ! 2768: regnum = REGNO (XEXP (v->dest_reg, 0)); ! 2769: else ! 2770: regnum = REGNO (v->dest_reg); ! 2771: fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n", ! 2772: regnum, INSN_UID (v->insn)); ! 2773: } ! 2774: } ! 2775: ! 2776: return result; ! 2777: } ! 2778: ! 2779: /* Try to prove that the register is dead after the loop exits. Trace every ! 2780: loop exit looking for an insn that will always be executed, which sets ! 2781: the register to some value, and appears before the first use of the register ! 2782: is found. If successful, then return 1, otherwise return 0. */ ! 2783: ! 2784: /* ?? Could be made more intelligent in the handling of jumps, so that ! 2785: it can search past if statements and other similar structures. */ ! 2786: ! 2787: static int ! 2788: reg_dead_after_loop (reg, loop_start, loop_end) ! 2789: rtx reg, loop_start, loop_end; ! 2790: { ! 2791: rtx insn, label; ! 2792: enum rtx_code code; ! 2793: int jump_count = 0; ! 2794: ! 2795: /* HACK: Must also search the loop fall through exit, create a label_ref ! 2796: here which points to the loop_end, and append the loop_number_exit_labels ! 2797: list to it. */ ! 2798: label = gen_rtx (LABEL_REF, VOIDmode, loop_end); ! 2799: LABEL_NEXTREF (label) ! 2800: = loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]]; ! 2801: ! 2802: for ( ; label; label = LABEL_NEXTREF (label)) ! 2803: { ! 2804: /* Succeed if find an insn which sets the biv or if reach end of ! 2805: function. Fail if find an insn that uses the biv, or if come to ! 2806: a conditional jump. */ ! 2807: ! 2808: insn = NEXT_INSN (XEXP (label, 0)); ! 2809: while (insn) ! 2810: { ! 2811: code = GET_CODE (insn); ! 2812: if (GET_RTX_CLASS (code) == 'i') ! 2813: { ! 2814: rtx set; ! 2815: ! 2816: if (reg_referenced_p (reg, PATTERN (insn))) ! 2817: return 0; ! 2818: ! 2819: set = single_set (insn); ! 2820: if (set && rtx_equal_p (SET_DEST (set), reg)) ! 2821: break; ! 2822: } ! 2823: ! 2824: if (code == JUMP_INSN) ! 2825: { ! 2826: if (GET_CODE (PATTERN (insn)) == RETURN) ! 2827: break; ! 2828: else if (! simplejump_p (insn) ! 2829: /* Prevent infinite loop following infinite loops. */ ! 2830: || jump_count++ > 20) ! 2831: return 0; ! 2832: else ! 2833: insn = JUMP_LABEL (insn); ! 2834: } ! 2835: ! 2836: insn = NEXT_INSN (insn); ! 2837: } ! 2838: } ! 2839: ! 2840: /* Success, the register is dead on all loop exits. */ ! 2841: return 1; ! 2842: } ! 2843: ! 2844: /* Try to calculate the final value of the biv, the value it will have at ! 2845: the end of the loop. If we can do it, return that value. */ ! 2846: ! 2847: rtx ! 2848: final_biv_value (bl, loop_start, loop_end) ! 2849: struct iv_class *bl; ! 2850: rtx loop_start, loop_end; ! 2851: { ! 2852: rtx increment, tem; ! 2853: ! 2854: /* ??? This only works for MODE_INT biv's. Reject all others for now. */ ! 2855: ! 2856: if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT) ! 2857: return 0; ! 2858: ! 2859: /* The final value for reversed bivs must be calculated differently than ! 2860: for ordinary bivs. In this case, there is already an insn after the ! 2861: loop which sets this biv's final value (if necessary), and there are ! 2862: no other loop exits, so we can return any value. */ ! 2863: if (bl->reversed) ! 2864: { ! 2865: if (loop_dump_stream) ! 2866: fprintf (loop_dump_stream, ! 2867: "Final biv value for %d, reversed biv.\n", bl->regno); ! 2868: ! 2869: return const0_rtx; ! 2870: } ! 2871: ! 2872: /* Try to calculate the final value as initial value + (number of iterations ! 2873: * increment). For this to work, increment must be invariant, the only ! 2874: exit from the loop must be the fall through at the bottom (otherwise ! 2875: it may not have its final value when the loop exits), and the initial ! 2876: value of the biv must be invariant. */ ! 2877: ! 2878: if (loop_n_iterations != 0 ! 2879: && ! loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]] ! 2880: && invariant_p (bl->initial_value)) ! 2881: { ! 2882: increment = biv_total_increment (bl, loop_start, loop_end); ! 2883: ! 2884: if (increment && invariant_p (increment)) ! 2885: { ! 2886: /* Can calculate the loop exit value, emit insns after loop ! 2887: end to calculate this value into a temporary register in ! 2888: case it is needed later. */ ! 2889: ! 2890: tem = gen_reg_rtx (bl->biv->mode); ! 2891: /* Make sure loop_end is not the last insn. */ ! 2892: if (NEXT_INSN (loop_end) == 0) ! 2893: emit_note_after (NOTE_INSN_DELETED, loop_end); ! 2894: emit_iv_add_mult (increment, GEN_INT (loop_n_iterations), ! 2895: bl->initial_value, tem, NEXT_INSN (loop_end)); ! 2896: ! 2897: if (loop_dump_stream) ! 2898: fprintf (loop_dump_stream, ! 2899: "Final biv value for %d, calculated.\n", bl->regno); ! 2900: ! 2901: return tem; ! 2902: } ! 2903: } ! 2904: ! 2905: /* Check to see if the biv is dead at all loop exits. */ ! 2906: if (reg_dead_after_loop (bl->biv->src_reg, loop_start, loop_end)) ! 2907: { ! 2908: if (loop_dump_stream) ! 2909: fprintf (loop_dump_stream, ! 2910: "Final biv value for %d, biv dead after loop exit.\n", ! 2911: bl->regno); ! 2912: ! 2913: return const0_rtx; ! 2914: } ! 2915: ! 2916: return 0; ! 2917: } ! 2918: ! 2919: /* Try to calculate the final value of the giv, the value it will have at ! 2920: the end of the loop. If we can do it, return that value. */ ! 2921: ! 2922: rtx ! 2923: final_giv_value (v, loop_start, loop_end) ! 2924: struct induction *v; ! 2925: rtx loop_start, loop_end; ! 2926: { ! 2927: struct iv_class *bl; ! 2928: rtx insn; ! 2929: rtx increment, tem; ! 2930: enum rtx_code code; ! 2931: rtx insert_before, seq; ! 2932: ! 2933: bl = reg_biv_class[REGNO (v->src_reg)]; ! 2934: ! 2935: /* The final value for givs which depend on reversed bivs must be calculated ! 2936: differently than for ordinary givs. In this case, there is already an ! 2937: insn after the loop which sets this giv's final value (if necessary), ! 2938: and there are no other loop exits, so we can return any value. */ ! 2939: if (bl->reversed) ! 2940: { ! 2941: if (loop_dump_stream) ! 2942: fprintf (loop_dump_stream, ! 2943: "Final giv value for %d, depends on reversed biv\n", ! 2944: REGNO (v->dest_reg)); ! 2945: return const0_rtx; ! 2946: } ! 2947: ! 2948: /* Try to calculate the final value as a function of the biv it depends ! 2949: upon. The only exit from the loop must be the fall through at the bottom ! 2950: (otherwise it may not have its final value when the loop exits). */ ! 2951: ! 2952: /* ??? Can calculate the final giv value by subtracting off the ! 2953: extra biv increments times the giv's mult_val. The loop must have ! 2954: only one exit for this to work, but the loop iterations does not need ! 2955: to be known. */ ! 2956: ! 2957: if (loop_n_iterations != 0 ! 2958: && ! loop_number_exit_labels[uid_loop_num[INSN_UID (loop_start)]]) ! 2959: { ! 2960: /* ?? It is tempting to use the biv's value here since these insns will ! 2961: be put after the loop, and hence the biv will have its final value ! 2962: then. However, this fails if the biv is subsequently eliminated. ! 2963: Perhaps determine whether biv's are eliminable before trying to ! 2964: determine whether giv's are replaceable so that we can use the ! 2965: biv value here if it is not eliminable. */ ! 2966: ! 2967: increment = biv_total_increment (bl, loop_start, loop_end); ! 2968: ! 2969: if (increment && invariant_p (increment)) ! 2970: { ! 2971: /* Can calculate the loop exit value of its biv as ! 2972: (loop_n_iterations * increment) + initial_value */ ! 2973: ! 2974: /* The loop exit value of the giv is then ! 2975: (final_biv_value - extra increments) * mult_val + add_val. ! 2976: The extra increments are any increments to the biv which ! 2977: occur in the loop after the giv's value is calculated. ! 2978: We must search from the insn that sets the giv to the end ! 2979: of the loop to calculate this value. */ ! 2980: ! 2981: insert_before = NEXT_INSN (loop_end); ! 2982: ! 2983: /* Put the final biv value in tem. */ ! 2984: tem = gen_reg_rtx (bl->biv->mode); ! 2985: emit_iv_add_mult (increment, GEN_INT (loop_n_iterations), ! 2986: bl->initial_value, tem, insert_before); ! 2987: ! 2988: /* Subtract off extra increments as we find them. */ ! 2989: for (insn = NEXT_INSN (v->insn); insn != loop_end; ! 2990: insn = NEXT_INSN (insn)) ! 2991: { ! 2992: struct induction *biv; ! 2993: ! 2994: for (biv = bl->biv; biv; biv = biv->next_iv) ! 2995: if (biv->insn == insn) ! 2996: { ! 2997: start_sequence (); ! 2998: tem = expand_binop (GET_MODE (tem), sub_optab, tem, ! 2999: biv->add_val, NULL_RTX, 0, ! 3000: OPTAB_LIB_WIDEN); ! 3001: seq = gen_sequence (); ! 3002: end_sequence (); ! 3003: emit_insn_before (seq, insert_before); ! 3004: } ! 3005: } ! 3006: ! 3007: /* Now calculate the giv's final value. */ ! 3008: emit_iv_add_mult (tem, v->mult_val, v->add_val, tem, ! 3009: insert_before); ! 3010: ! 3011: if (loop_dump_stream) ! 3012: fprintf (loop_dump_stream, ! 3013: "Final giv value for %d, calc from biv's value.\n", ! 3014: REGNO (v->dest_reg)); ! 3015: ! 3016: return tem; ! 3017: } ! 3018: } ! 3019: ! 3020: /* Replaceable giv's should never reach here. */ ! 3021: if (v->replaceable) ! 3022: abort (); ! 3023: ! 3024: /* Check to see if the biv is dead at all loop exits. */ ! 3025: if (reg_dead_after_loop (v->dest_reg, loop_start, loop_end)) ! 3026: { ! 3027: if (loop_dump_stream) ! 3028: fprintf (loop_dump_stream, ! 3029: "Final giv value for %d, giv dead after loop exit.\n", ! 3030: REGNO (v->dest_reg)); ! 3031: ! 3032: return const0_rtx; ! 3033: } ! 3034: ! 3035: return 0; ! 3036: } ! 3037: ! 3038: ! 3039: /* Calculate the number of loop iterations. Returns the exact number of loop ! 3040: iterations if it can be calculated, otherwise returns zero. */ ! 3041: ! 3042: unsigned HOST_WIDE_INT ! 3043: loop_iterations (loop_start, loop_end) ! 3044: rtx loop_start, loop_end; ! 3045: { ! 3046: rtx comparison, comparison_value; ! 3047: rtx iteration_var, initial_value, increment, final_value; ! 3048: enum rtx_code comparison_code; ! 3049: HOST_WIDE_INT i; ! 3050: int increment_dir; ! 3051: int unsigned_compare, compare_dir, final_larger; ! 3052: unsigned long tempu; ! 3053: rtx last_loop_insn; ! 3054: ! 3055: /* First find the iteration variable. If the last insn is a conditional ! 3056: branch, and the insn before tests a register value, make that the ! 3057: iteration variable. */ ! 3058: ! 3059: loop_initial_value = 0; ! 3060: loop_increment = 0; ! 3061: loop_final_value = 0; ! 3062: loop_iteration_var = 0; ! 3063: ! 3064: last_loop_insn = prev_nonnote_insn (loop_end); ! 3065: ! 3066: comparison = get_condition_for_loop (last_loop_insn); ! 3067: if (comparison == 0) ! 3068: { ! 3069: if (loop_dump_stream) ! 3070: fprintf (loop_dump_stream, ! 3071: "Loop unrolling: No final conditional branch found.\n"); ! 3072: return 0; ! 3073: } ! 3074: ! 3075: /* ??? Get_condition may switch position of induction variable and ! 3076: invariant register when it canonicalizes the comparison. */ ! 3077: ! 3078: comparison_code = GET_CODE (comparison); ! 3079: iteration_var = XEXP (comparison, 0); ! 3080: comparison_value = XEXP (comparison, 1); ! 3081: ! 3082: if (GET_CODE (iteration_var) != REG) ! 3083: { ! 3084: if (loop_dump_stream) ! 3085: fprintf (loop_dump_stream, ! 3086: "Loop unrolling: Comparison not against register.\n"); ! 3087: return 0; ! 3088: } ! 3089: ! 3090: /* Loop iterations is always called before any new registers are created ! 3091: now, so this should never occur. */ ! 3092: ! 3093: if (REGNO (iteration_var) >= max_reg_before_loop) ! 3094: abort (); ! 3095: ! 3096: iteration_info (iteration_var, &initial_value, &increment, ! 3097: loop_start, loop_end); ! 3098: if (initial_value == 0) ! 3099: /* iteration_info already printed a message. */ ! 3100: return 0; ! 3101: ! 3102: if (increment == 0) ! 3103: { ! 3104: if (loop_dump_stream) ! 3105: fprintf (loop_dump_stream, ! 3106: "Loop unrolling: Increment value can't be calculated.\n"); ! 3107: return 0; ! 3108: } ! 3109: if (GET_CODE (increment) != CONST_INT) ! 3110: { ! 3111: if (loop_dump_stream) ! 3112: fprintf (loop_dump_stream, ! 3113: "Loop unrolling: Increment value not constant.\n"); ! 3114: return 0; ! 3115: } ! 3116: if (GET_CODE (initial_value) != CONST_INT) ! 3117: { ! 3118: if (loop_dump_stream) ! 3119: fprintf (loop_dump_stream, ! 3120: "Loop unrolling: Initial value not constant.\n"); ! 3121: return 0; ! 3122: } ! 3123: ! 3124: /* If the comparison value is an invariant register, then try to find ! 3125: its value from the insns before the start of the loop. */ ! 3126: ! 3127: if (GET_CODE (comparison_value) == REG && invariant_p (comparison_value)) ! 3128: { ! 3129: rtx insn, set; ! 3130: ! 3131: for (insn = PREV_INSN (loop_start); insn ; insn = PREV_INSN (insn)) ! 3132: { ! 3133: if (GET_CODE (insn) == CODE_LABEL) ! 3134: break; ! 3135: ! 3136: else if (GET_RTX_CLASS (GET_CODE (insn)) == 'i' ! 3137: && reg_set_p (comparison_value, insn)) ! 3138: { ! 3139: /* We found the last insn before the loop that sets the register. ! 3140: If it sets the entire register, and has a REG_EQUAL note, ! 3141: then use the value of the REG_EQUAL note. */ ! 3142: if ((set = single_set (insn)) ! 3143: && (SET_DEST (set) == comparison_value)) ! 3144: { ! 3145: rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX); ! 3146: ! 3147: if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST) ! 3148: comparison_value = XEXP (note, 0); ! 3149: } ! 3150: break; ! 3151: } ! 3152: } ! 3153: } ! 3154: ! 3155: final_value = approx_final_value (comparison_code, comparison_value, ! 3156: &unsigned_compare, &compare_dir); ! 3157: ! 3158: /* Save the calculated values describing this loop's bounds, in case ! 3159: precondition_loop_p will need them later. These values can not be ! 3160: recalculated inside precondition_loop_p because strength reduction ! 3161: optimizations may obscure the loop's structure. */ ! 3162: ! 3163: loop_iteration_var = iteration_var; ! 3164: loop_initial_value = initial_value; ! 3165: loop_increment = increment; ! 3166: loop_final_value = final_value; ! 3167: ! 3168: if (final_value == 0) ! 3169: { ! 3170: if (loop_dump_stream) ! 3171: fprintf (loop_dump_stream, ! 3172: "Loop unrolling: EQ comparison loop.\n"); ! 3173: return 0; ! 3174: } ! 3175: else if (GET_CODE (final_value) != CONST_INT) ! 3176: { ! 3177: if (loop_dump_stream) ! 3178: fprintf (loop_dump_stream, ! 3179: "Loop unrolling: Final value not constant.\n"); ! 3180: return 0; ! 3181: } ! 3182: ! 3183: /* ?? Final value and initial value do not have to be constants. ! 3184: Only their difference has to be constant. When the iteration variable ! 3185: is an array address, the final value and initial value might both ! 3186: be addresses with the same base but different constant offsets. ! 3187: Final value must be invariant for this to work. ! 3188: ! 3189: To do this, need some way to find the values of registers which are ! 3190: invariant. */ ! 3191: ! 3192: /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */ ! 3193: if (unsigned_compare) ! 3194: final_larger ! 3195: = ((unsigned HOST_WIDE_INT) INTVAL (final_value) ! 3196: > (unsigned HOST_WIDE_INT) INTVAL (initial_value)) ! 3197: - ((unsigned HOST_WIDE_INT) INTVAL (final_value) ! 3198: < (unsigned HOST_WIDE_INT) INTVAL (initial_value)); ! 3199: else ! 3200: final_larger = (INTVAL (final_value) > INTVAL (initial_value)) ! 3201: - (INTVAL (final_value) < INTVAL (initial_value)); ! 3202: ! 3203: if (INTVAL (increment) > 0) ! 3204: increment_dir = 1; ! 3205: else if (INTVAL (increment) == 0) ! 3206: increment_dir = 0; ! 3207: else ! 3208: increment_dir = -1; ! 3209: ! 3210: /* There are 27 different cases: compare_dir = -1, 0, 1; ! 3211: final_larger = -1, 0, 1; increment_dir = -1, 0, 1. ! 3212: There are 4 normal cases, 4 reverse cases (where the iteration variable ! 3213: will overflow before the loop exits), 4 infinite loop cases, and 15 ! 3214: immediate exit (0 or 1 iteration depending on loop type) cases. ! 3215: Only try to optimize the normal cases. */ ! 3216: ! 3217: /* (compare_dir/final_larger/increment_dir) ! 3218: Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1) ! 3219: Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1) ! 3220: Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0) ! 3221: Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */ ! 3222: ! 3223: /* ?? If the meaning of reverse loops (where the iteration variable ! 3224: will overflow before the loop exits) is undefined, then could ! 3225: eliminate all of these special checks, and just always assume ! 3226: the loops are normal/immediate/infinite. Note that this means ! 3227: the sign of increment_dir does not have to be known. Also, ! 3228: since it does not really hurt if immediate exit loops or infinite loops ! 3229: are optimized, then that case could be ignored also, and hence all ! 3230: loops can be optimized. ! 3231: ! 3232: According to ANSI Spec, the reverse loop case result is undefined, ! 3233: because the action on overflow is undefined. ! 3234: ! 3235: See also the special test for NE loops below. */ ! 3236: ! 3237: if (final_larger == increment_dir && final_larger != 0 ! 3238: && (final_larger == compare_dir || compare_dir == 0)) ! 3239: /* Normal case. */ ! 3240: ; ! 3241: else ! 3242: { ! 3243: if (loop_dump_stream) ! 3244: fprintf (loop_dump_stream, ! 3245: "Loop unrolling: Not normal loop.\n"); ! 3246: return 0; ! 3247: } ! 3248: ! 3249: /* Calculate the number of iterations, final_value is only an approximation, ! 3250: so correct for that. Note that tempu and loop_n_iterations are ! 3251: unsigned, because they can be as large as 2^n - 1. */ ! 3252: ! 3253: i = INTVAL (increment); ! 3254: if (i > 0) ! 3255: tempu = INTVAL (final_value) - INTVAL (initial_value); ! 3256: else if (i < 0) ! 3257: { ! 3258: tempu = INTVAL (initial_value) - INTVAL (final_value); ! 3259: i = -i; ! 3260: } ! 3261: else ! 3262: abort (); ! 3263: ! 3264: /* For NE tests, make sure that the iteration variable won't miss the ! 3265: final value. If tempu mod i is not zero, then the iteration variable ! 3266: will overflow before the loop exits, and we can not calculate the ! 3267: number of iterations. */ ! 3268: if (compare_dir == 0 && (tempu % i) != 0) ! 3269: return 0; ! 3270: ! 3271: return tempu / i + ((tempu % i) != 0); ! 3272: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.