|
|
1.1 root 1: /* Subroutines for insn-output.c for HPPA.
2: Copyright (C) 1992, 1993 Free Software Foundation, Inc.
3: Contributed by Tim Moore ([email protected]), based on sparc.c
4:
5: This file is part of GNU CC.
6:
7: GNU CC is free software; you can redistribute it and/or modify
8: it under the terms of the GNU General Public License as published by
9: the Free Software Foundation; either version 2, or (at your option)
10: any later version.
11:
12: GNU CC is distributed in the hope that it will be useful,
13: but WITHOUT ANY WARRANTY; without even the implied warranty of
14: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15: GNU General Public License for more details.
16:
17: You should have received a copy of the GNU General Public License
18: along with GNU CC; see the file COPYING. If not, write to
19: the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20:
21: #include <stdio.h>
22: #include "config.h"
23: #include "rtl.h"
24: #include "regs.h"
25: #include "hard-reg-set.h"
26: #include "real.h"
27: #include "insn-config.h"
28: #include "conditions.h"
29: #include "insn-flags.h"
30: #include "output.h"
31: #include "insn-attr.h"
32: #include "flags.h"
33: #include "tree.h"
34: #include "c-tree.h"
35: #include "expr.h"
36: #include "obstack.h"
37:
38: /* Save the operands last given to a compare for use when we
39: generate a scc or bcc insn. */
40:
41: rtx hppa_compare_op0, hppa_compare_op1;
42: enum cmp_type hppa_branch_type;
43:
44: rtx hppa_save_pic_table_rtx;
45:
46: /* Set by the FUNCTION_PROFILER macro. */
47: int hp_profile_labelno;
48:
49: /* Counts for the number of callee-saved general and floating point
50: registers which were saved by the current function's prologue. */
51: static int gr_saved, fr_saved;
52:
53: static rtx find_addr_reg ();
54:
55: /* Return non-zero only if OP is a register of mode MODE,
56: or CONST0_RTX. */
57: int
58: reg_or_0_operand (op, mode)
59: rtx op;
60: enum machine_mode mode;
61: {
62: return (op == CONST0_RTX (mode) || register_operand (op, mode));
63: }
64:
65: /* Return non-zero if OP is suitable for use in a call to a named
66: function.
67:
68: (???) For 2.5 try to eliminate either call_operand_address or
69: function_label_operand, they perform very similar functions. */
70: int
71: call_operand_address (op, mode)
72: rtx op;
73: enum machine_mode mode;
74: {
75: return (CONSTANT_P (op) && ! TARGET_LONG_CALLS);
76: }
77:
78: /* Return 1 if X contains a symbolic expression. We know these
79: expressions will have one of a few well defined forms, so
80: we need only check those forms. */
81: int
82: symbolic_expression_p (x)
83: register rtx x;
84: {
85:
86: /* Strip off any HIGH. */
87: if (GET_CODE (x) == HIGH)
88: x = XEXP (x, 0);
89:
90: return (symbolic_operand (x, VOIDmode));
91: }
92:
93: int
94: symbolic_operand (op, mode)
95: register rtx op;
96: enum machine_mode mode;
97: {
98: switch (GET_CODE (op))
99: {
100: case SYMBOL_REF:
101: case LABEL_REF:
102: return 1;
103: case CONST:
104: op = XEXP (op, 0);
105: return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
106: || GET_CODE (XEXP (op, 0)) == LABEL_REF)
107: && GET_CODE (XEXP (op, 1)) == CONST_INT);
108: default:
109: return 0;
110: }
111: }
112:
113: /* Return truth value of statement that OP is a symbolic memory
114: operand of mode MODE. */
115:
116: int
117: symbolic_memory_operand (op, mode)
118: rtx op;
119: enum machine_mode mode;
120: {
121: if (GET_CODE (op) == SUBREG)
122: op = SUBREG_REG (op);
123: if (GET_CODE (op) != MEM)
124: return 0;
125: op = XEXP (op, 0);
126: return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
127: || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
128: }
129:
130: /* Return 1 if the operand is either a register or a memory operand that is
131: not symbolic. */
132:
133: int
134: reg_or_nonsymb_mem_operand (op, mode)
135: register rtx op;
136: enum machine_mode mode;
137: {
138: if (register_operand (op, mode))
139: return 1;
140:
141: if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
142: return 1;
143:
144: return 0;
145: }
146:
147: /* Return 1 if the operand is either a register, zero, or a memory operand
148: that is not symbolic. */
149:
150: int
151: reg_or_0_or_nonsymb_mem_operand (op, mode)
152: register rtx op;
153: enum machine_mode mode;
154: {
155: if (register_operand (op, mode))
156: return 1;
157:
158: if (op == CONST0_RTX (mode))
159: return 1;
160:
161: if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
162: return 1;
163:
164: return 0;
165: }
166:
167: /* Accept any constant that can be moved in one instructions into a
168: general register. */
169: int
170: cint_ok_for_move (intval)
171: HOST_WIDE_INT intval;
172: {
173: /* OK if ldo, ldil, or zdepi, can be used. */
174: return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
175: || zdepi_cint_p (intval));
176: }
177:
178: /* Accept anything that can be moved in one instruction into a general
179: register. */
180: int
181: move_operand (op, mode)
182: rtx op;
183: enum machine_mode mode;
184: {
185: if (register_operand (op, mode))
186: return 1;
187:
188: if (GET_CODE (op) == CONST_INT)
189: return cint_ok_for_move (INTVAL (op));
190:
191: if (GET_MODE (op) != mode)
192: return 0;
193: if (GET_CODE (op) == SUBREG)
194: op = SUBREG_REG (op);
195: if (GET_CODE (op) != MEM)
196: return 0;
197:
198: op = XEXP (op, 0);
199: if (GET_CODE (op) == LO_SUM)
200: return (register_operand (XEXP (op, 0), Pmode)
201: && CONSTANT_P (XEXP (op, 1)));
202: return memory_address_p (mode, op);
203: }
204:
205: /* Accept REG and any CONST_INT that can be moved in one instruction into a
206: general register. */
207: int
208: reg_or_cint_move_operand (op, mode)
209: rtx op;
210: enum machine_mode mode;
211: {
212: if (register_operand (op, mode))
213: return 1;
214:
215: if (GET_CODE (op) == CONST_INT)
216: return cint_ok_for_move (INTVAL (op));
217:
218: return 0;
219: }
220:
221: int
222: pic_operand (op, mode)
223: rtx op;
224: enum machine_mode mode;
225: {
226: return flag_pic && GET_CODE (op) == LABEL_REF;
227: }
228:
229: int
230: fp_reg_operand (op, mode)
231: rtx op;
232: enum machine_mode mode;
233: {
234: return reg_renumber && FP_REG_P (op);
235: }
236:
237:
238: extern int current_function_uses_pic_offset_table;
239: extern rtx force_reg (), validize_mem ();
240:
241: /* The rtx for the global offset table which is a special form
242: that *is* a position independent symbolic constant. */
243: rtx pic_pc_rtx;
244:
245: /* Ensure that we are not using patterns that are not OK with PIC. */
246:
247: int
248: check_pic (i)
249: int i;
250: {
251: extern rtx recog_operand[];
252: switch (flag_pic)
253: {
254: case 1:
255: if (GET_CODE (recog_operand[i]) == SYMBOL_REF
256: || (GET_CODE (recog_operand[i]) == CONST
257: && ! rtx_equal_p (pic_pc_rtx, recog_operand[i])))
258: abort ();
259: case 2:
260: default:
261: return 1;
262: }
263: }
264:
265: /* Return truth value of whether OP can be used as an operand in a
266: three operand arithmetic insn that accepts registers of mode MODE
267: or 14-bit signed integers. */
268: int
269: arith_operand (op, mode)
270: rtx op;
271: enum machine_mode mode;
272: {
273: return (register_operand (op, mode)
274: || (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
275: }
276:
277: /* Return truth value of whether OP can be used as an operand in a
278: three operand arithmetic insn that accepts registers of mode MODE
279: or 11-bit signed integers. */
280: int
281: arith11_operand (op, mode)
282: rtx op;
283: enum machine_mode mode;
284: {
285: return (register_operand (op, mode)
286: || (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
287: }
288:
289: /* A constant integer suitable for use in a PRE_MODIFY memory
290: reference. */
291: int
292: pre_cint_operand (op, mode)
293: rtx op;
294: enum machine_mode mode;
295: {
296: return (GET_CODE (op) == CONST_INT
297: && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
298: }
299:
300: /* A constant integer suitable for use in a POST_MODIFY memory
301: reference. */
302: int
303: post_cint_operand (op, mode)
304: rtx op;
305: enum machine_mode mode;
306: {
307: return (GET_CODE (op) == CONST_INT
308: && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
309: }
310:
311: int
312: arith_double_operand (op, mode)
313: rtx op;
314: enum machine_mode mode;
315: {
316: return (register_operand (op, mode)
317: || (GET_CODE (op) == CONST_DOUBLE
318: && GET_MODE (op) == mode
319: && VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
320: && (CONST_DOUBLE_HIGH (op) >= 0
321: == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
322: }
323:
324: /* Return truth value of whether OP is a integer which fits the
325: range constraining immediate operands in three-address insns. */
326:
327: int
328: int5_operand (op, mode)
329: rtx op;
330: enum machine_mode mode;
331: {
332: return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
333: }
334:
335: int
336: uint5_operand (op, mode)
337: rtx op;
338: enum machine_mode mode;
339: {
340: return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
341: }
342:
343: int
344: int11_operand (op, mode)
345: rtx op;
346: enum machine_mode mode;
347: {
348: return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
349: }
350:
351: int
352: uint32_operand (op, mode)
353: rtx op;
354: enum machine_mode mode;
355: {
356: #if HOST_BITS_PER_WIDE_INT > 32
357: /* All allowed constants will fit a CONST_INT. */
358: return (GET_CODE (op) == CONST_INT
359: && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
360: #else
361: return (GET_CODE (op) == CONST_INT
362: || (GET_CODE (op) == CONST_DOUBLE
363: && CONST_DOUBLE_HIGH (op) == 0));
364: #endif
365: }
366:
367: int
368: arith5_operand (op, mode)
369: rtx op;
370: enum machine_mode mode;
371: {
372: return register_operand (op, mode) || int5_operand (op, mode);
373: }
374:
375: /* True iff zdepi can be used to generate this CONST_INT. */
376: int
377: zdepi_cint_p (x)
378: unsigned HOST_WIDE_INT x;
379: {
380: unsigned lsb_mask, t;
381:
382: /* This might not be obvious, but it's at least fast.
383: This function is critcal; we don't have the time loops would take. */
384: lsb_mask = x & -x;
385: t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
386: /* Return true iff t is a power of two. */
387: return ((t & (t - 1)) == 0);
388: }
389:
390: /* True iff depi or extru can be used to compute (reg & mask).
391: Accept bit pattern like these:
392: 0....01....1
393: 1....10....0
394: 1..10..01..1 */
395: int
396: and_mask_p (mask)
397: unsigned HOST_WIDE_INT mask;
398: {
399: mask = ~mask;
400: mask += mask & -mask;
401: return (mask & (mask - 1)) == 0;
402: }
403:
404: /* True iff depi or extru can be used to compute (reg & OP). */
405: int
406: and_operand (op, mode)
407: rtx op;
408: enum machine_mode mode;
409: {
410: return (register_operand (op, mode)
411: || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
412: }
413:
414: /* True iff depi can be used to compute (reg | MASK). */
415: int
416: ior_mask_p (mask)
417: unsigned HOST_WIDE_INT mask;
418: {
419: mask += mask & -mask;
420: return (mask & (mask - 1)) == 0;
421: }
422:
423: /* True iff depi can be used to compute (reg | OP). */
424: int
425: ior_operand (op, mode)
426: rtx op;
427: enum machine_mode mode;
428: {
429: return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
430: }
431:
432: int
433: lhs_lshift_operand (op, mode)
434: rtx op;
435: enum machine_mode mode;
436: {
437: return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
438: }
439:
440: /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
441: Such values can be the left hand side x in (x << r), using the zvdepi
442: instruction. */
443: int
444: lhs_lshift_cint_operand (op, mode)
445: rtx op;
446: enum machine_mode mode;
447: {
448: unsigned x;
449: if (GET_CODE (op) != CONST_INT)
450: return 0;
451: x = INTVAL (op) >> 4;
452: return (x & (x + 1)) == 0;
453: }
454:
455: int
456: arith32_operand (op, mode)
457: rtx op;
458: enum machine_mode mode;
459: {
460: return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
461: }
462:
463: int
464: pc_or_label_operand (op, mode)
465: rtx op;
466: enum machine_mode mode;
467: {
468: return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
469: }
470:
471: /* Legitimize PIC addresses. If the address is already
472: position-independent, we return ORIG. Newly generated
473: position-independent addresses go to REG. If we need more
474: than one register, we lose. */
475:
476: rtx
477: legitimize_pic_address (orig, mode, reg)
478: rtx orig, reg;
479: enum machine_mode mode;
480: {
481: rtx pic_ref = orig;
482:
483: if (GET_CODE (orig) == SYMBOL_REF)
484: {
485: if (reg == 0)
486: abort ();
487:
488: if (flag_pic == 2)
489: {
490: emit_insn (gen_rtx (SET, VOIDmode, reg,
491: gen_rtx (HIGH, Pmode, orig)));
492: emit_insn (gen_rtx (SET, VOIDmode, reg,
493: gen_rtx (LO_SUM, Pmode, reg, orig)));
494: orig = reg;
495: }
496: pic_ref = gen_rtx (MEM, Pmode,
497: gen_rtx (PLUS, Pmode,
498: pic_offset_table_rtx, orig));
499: current_function_uses_pic_offset_table = 1;
500: RTX_UNCHANGING_P (pic_ref) = 1;
501: emit_move_insn (reg, pic_ref);
502: return reg;
503: }
504: else if (GET_CODE (orig) == CONST)
505: {
506: rtx base;
507:
508: if (GET_CODE (XEXP (orig, 0)) == PLUS
509: && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
510: return orig;
511:
512: if (reg == 0)
513: abort ();
514:
515: if (GET_CODE (XEXP (orig, 0)) == PLUS)
516: {
517: base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
518: orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
519: base == reg ? 0 : reg);
520: }
521: else abort ();
522: if (GET_CODE (orig) == CONST_INT)
523: {
524: if (INT_14_BITS (orig))
525: return plus_constant_for_output (base, INTVAL (orig));
526: orig = force_reg (Pmode, orig);
527: }
528: pic_ref = gen_rtx (PLUS, Pmode, base, orig);
529: /* Likewise, should we set special REG_NOTEs here? */
530: }
531: return pic_ref;
532: }
533:
534: /* Emit special PIC prologues and epilogues. */
535:
536: void
537: finalize_pic ()
538: {
539: if (hppa_save_pic_table_rtx)
540: {
541: emit_insn_after (gen_rtx (SET, VOIDmode,
542: hppa_save_pic_table_rtx,
543: gen_rtx (REG, Pmode, PIC_OFFSET_TABLE_REGNUM)),
544: get_insns ());
545: /* Need to emit this whether or not we obey regdecls,
546: since setjmp/longjmp can cause life info to screw up. */
547: hppa_save_pic_table_rtx = 0;
548: }
549: emit_insn (gen_rtx (USE, VOIDmode, pic_offset_table_rtx));
550: }
551:
552: /* Try machine-dependent ways of modifying an illegitimate address
553: to be legitimate. If we find one, return the new, valid address.
554: This macro is used in only one place: `memory_address' in explow.c.
555:
556: OLDX is the address as it was before break_out_memory_refs was called.
557: In some cases it is useful to look at this to decide what needs to be done.
558:
559: MODE and WIN are passed so that this macro can use
560: GO_IF_LEGITIMATE_ADDRESS.
561:
562: It is always safe for this macro to do nothing. It exists to recognize
563: opportunities to optimize the output.
564:
565: For the PA, transform:
566:
567: memory(X + <large int>)
568:
569: into:
570:
571: if (<large int> & mask) >= 16
572: Y = (<large int> & ~mask) + mask + 1 Round up.
573: else
574: Y = (<large int> & ~mask) Round down.
575: Z = X + Y
576: memory (Z + (<large int> - Y));
577:
578: This is for CSE to find several similar references, and only use one Z.
579:
580: X can either be a SYMBOL_REF or REG, but because combine can not
581: perform a 4->2 combination we do nothing for SYMBOL_REF + D where
582: D will not fit in 14 bits.
583:
584: MODE_FLOAT references allow displacements which fit in 5 bits, so use
585: 0x1f as the mask.
586:
587: MODE_INT references allow displacements which fit in 14 bits, so use
588: 0x3fff as the mask.
589:
590: This relies on the fact that most mode MODE_FLOAT references will use FP
591: registers and most mode MODE_INT references will use integer registers.
592: (In the rare case of an FP register used in an integer MODE, we depend
593: on secondary reloads to clean things up.)
594:
595:
596: It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
597: manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
598: adressing modes to be used).
599:
600: Put X and Z into registers. Then put the entire expression into
601: a register. */
602:
603: rtx
604: hppa_legitimize_address (x, oldx, mode)
605: rtx x, oldx;
606: enum machine_mode mode;
607: {
608: rtx orig = x;
609:
610: /* Strip off CONST. */
611: if (GET_CODE (x) == CONST)
612: x = XEXP (x, 0);
613:
614: if (GET_CODE (x) == PLUS
615: && GET_CODE (XEXP (x, 1)) == CONST_INT
616: && (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
617: || GET_CODE (XEXP (x, 0)) == REG))
618: {
619: rtx int_part, ptr_reg;
620: int newoffset;
621: int offset = INTVAL (XEXP (x, 1));
622: int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
623:
624: /* Choose which way to round the offset. Round up if we
625: are >= halfway to the next boundary. */
626: if ((offset & mask) >= ((mask + 1) / 2))
627: newoffset = (offset & ~ mask) + mask + 1;
628: else
629: newoffset = (offset & ~ mask);
630:
631: /* If the newoffset will not fit in 14 bits (ldo), then
632: handling this would take 4 or 5 instructions (2 to load
633: the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
634: add the new offset and the SYMBOL_REF.) Combine can
635: not handle 4->2 or 5->2 combinations, so do not create
636: them. */
637: if (! VAL_14_BITS_P (newoffset)
638: && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
639: {
640: rtx const_part = gen_rtx (CONST, VOIDmode,
641: gen_rtx (PLUS, Pmode,
642: XEXP (x, 0),
643: GEN_INT (newoffset)));
644: rtx tmp_reg
645: = force_reg (Pmode,
646: gen_rtx (HIGH, Pmode, const_part));
647: ptr_reg
648: = force_reg (Pmode,
649: gen_rtx (LO_SUM, Pmode,
650: tmp_reg, const_part));
651: }
652: else
653: {
654: if (! VAL_14_BITS_P (newoffset))
655: int_part = force_reg (Pmode, GEN_INT (newoffset));
656: else
657: int_part = GEN_INT (newoffset);
658:
659: ptr_reg = force_reg (Pmode,
660: gen_rtx (PLUS, Pmode,
661: force_reg (Pmode, XEXP (x, 0)),
662: int_part));
663: }
664: return plus_constant (ptr_reg, offset - newoffset);
665: }
666:
667: /* Try to arrange things so that indexing modes can be used, but
668: only do so if indexing is safe.
669:
670: Indexing is safe when the second operand for the outer PLUS
671: is a REG, SUBREG, SYMBOL_REF or the like.
672:
673: For 2.5, indexing is also safe for (plus (symbol_ref) (const_int))
674: if the integer is > 0. */
675: if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
676: && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
677: && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
678: && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
679: || GET_CODE (XEXP (x, 1)) == SUBREG)
680: && GET_CODE (XEXP (x, 1)) != CONST)
681: {
682: int val = INTVAL (XEXP (XEXP (x, 0), 1));
683: rtx reg1, reg2;
684: reg1 = force_reg (Pmode, force_operand (XEXP (x, 1), 0));
685: reg2 = force_reg (Pmode,
686: force_operand (XEXP (XEXP (x, 0), 0), 0));
687: return force_reg (Pmode,
688: gen_rtx (PLUS, Pmode,
689: gen_rtx (MULT, Pmode, reg2,
690: GEN_INT (val)),
691: reg1));
692: }
693:
694: /* Uh-oh. We might have an address for x[n-100000]. This needs
695: special handling. */
696:
697: if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
698: && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
699: && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
700: {
701: /* Ugly. We modify things here so that the address offset specified
702: by the index expression is computed first, then added to x to form
703: the entire address.
704:
705: For 2.5, it might be profitable to set things up so that we
706: compute the raw (unscaled) index first, then use scaled indexing
707: to access memory, or better yet have the MI parts of the compiler
708: handle this. */
709:
710: rtx regx1, regy1, regy2, y;
711:
712: /* Strip off any CONST. */
713: y = XEXP (x, 1);
714: if (GET_CODE (y) == CONST)
715: y = XEXP (y, 0);
716:
717: if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
718: {
719: regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
720: regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
721: regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
722: regx1 = force_reg (Pmode, gen_rtx (GET_CODE (y), Pmode, regx1, regy2));
723: return force_reg (Pmode, gen_rtx (PLUS, Pmode, regx1, regy1));
724: }
725: }
726:
727: if (flag_pic)
728: return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
729:
730: return orig;
731: }
732:
733: /* For the HPPA, REG and REG+CONST is cost 0
734: and addresses involving symbolic constants are cost 2.
735:
736: PIC addresses are very expensive.
737:
738: It is no coincidence that this has the same structure
739: as GO_IF_LEGITIMATE_ADDRESS. */
740: int
741: hppa_address_cost (X)
742: rtx X;
743: {
744: if (GET_CODE (X) == PLUS)
745: return 1;
746: else if (GET_CODE (X) == LO_SUM)
747: return 1;
748: else if (GET_CODE (X) == HIGH)
749: return 2;
750: return 4;
751: }
752:
753: /* Emit insns to move operands[1] into operands[0].
754:
755: Return 1 if we have written out everything that needs to be done to
756: do the move. Otherwise, return 0 and the caller will emit the move
757: normally. */
758:
759: int
760: emit_move_sequence (operands, mode, scratch_reg)
761: rtx *operands;
762: enum machine_mode mode;
763: rtx scratch_reg;
764: {
765: register rtx operand0 = operands[0];
766: register rtx operand1 = operands[1];
767:
768: /* Handle secondary reloads for loads/stores of FP registers from
769: REG+D addresses where D does not fit in 5 bits. */
770: if (fp_reg_operand (operand0, mode)
771: && GET_CODE (operand1) == MEM
772: /* Using DFmode forces only short displacements be be
773: recognized as valid in reg+d addressing modes. */
774: && ! memory_address_p (DFmode, XEXP (operand1, 0))
775: && scratch_reg)
776: {
777: emit_move_insn (scratch_reg, XEXP (operand1, 0));
778: emit_insn (gen_rtx (SET, VOIDmode, operand0, gen_rtx (MEM, mode,
779: scratch_reg)));
780: return 1;
781: }
782: else if (fp_reg_operand (operand1, mode)
783: && GET_CODE (operand0) == MEM
784: /* Using DFmode forces only short displacements be be
785: recognized as valid in reg+d addressing modes. */
786: && ! memory_address_p (DFmode, XEXP (operand0, 0))
787: && scratch_reg)
788: {
789: emit_move_insn (scratch_reg, XEXP (operand0, 0));
790: emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (MEM, mode, scratch_reg),
791: operand1));
792: return 1;
793: }
794: /* Handle secondary reloads for loads of FP registers from constant
795: expressions by forcing the constant into memory.
796:
797: use scratch_reg to hold the address of the memory location.
798:
799: ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
800: NO_REGS when presented with a const_int and an register class
801: containing only FP registers. Doing so unfortunately creates
802: more problems than it solves. Fix this for 2.5. */
803: else if (fp_reg_operand (operand0, mode)
804: && CONSTANT_P (operand1)
805: && scratch_reg)
806: {
807: rtx xoperands[2];
808:
809: /* Force the constant into memory and put the address of the
810: memory location into scratch_reg. */
811: xoperands[0] = scratch_reg;
812: xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
813: emit_move_sequence (xoperands, Pmode, 0);
814:
815: /* Now load the destination register. */
816: emit_insn (gen_rtx (SET, mode, operand0,
817: gen_rtx (MEM, mode, scratch_reg)));
818: return 1;
819: }
820: /* Handle secondary reloads for SAR. These occur when trying to load
821: the SAR from memory or from a FP register. */
822: else if (GET_CODE (operand0) == REG
823: && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
824: && (GET_CODE (operand1) == MEM
825: || (GET_CODE (operand1) == REG
826: && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
827: && scratch_reg)
828: {
829: emit_move_insn (scratch_reg, operand1);
830: emit_move_insn (operand0, scratch_reg);
831: return 1;
832: }
833: /* Handle most common case: storing into a register. */
834: else if (register_operand (operand0, mode))
835: {
836: if (register_operand (operand1, mode)
837: || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
838: || (operand1 == CONST0_RTX (mode))
839: || (GET_CODE (operand1) == HIGH
840: && !symbolic_operand (XEXP (operand1, 0)))
841: /* Only `general_operands' can come here, so MEM is ok. */
842: || GET_CODE (operand1) == MEM)
843: {
844: /* Run this case quickly. */
845: emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
846: return 1;
847: }
848: }
849: else if (GET_CODE (operand0) == MEM)
850: {
851: if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
852: {
853: /* Run this case quickly. */
854: emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
855: return 1;
856: }
857: if (! (reload_in_progress || reload_completed))
858: {
859: operands[0] = validize_mem (operand0);
860: operands[1] = operand1 = force_reg (mode, operand1);
861: }
862: }
863:
864: /* Simplify the source if we need to. */
865: if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
866: || (GET_CODE (operand1) == HIGH
867: && symbolic_operand (XEXP (operand1, 0), mode)))
868: {
869: int ishighonly = 0;
870:
871: if (GET_CODE (operand1) == HIGH)
872: {
873: ishighonly = 1;
874: operand1 = XEXP (operand1, 0);
875: }
876: if (symbolic_operand (operand1, mode))
877: {
878: if (flag_pic)
879: {
880: rtx temp;
881:
882: if (reload_in_progress || reload_completed)
883: temp = operand0;
884: else
885: temp = gen_reg_rtx (Pmode);
886:
887: operands[1] = legitimize_pic_address (operand1, mode, temp);
888: emit_insn (gen_rtx (SET, VOIDmode, operand0, operands[1]));
889: }
890: /* On the HPPA, references to data space are supposed to */
891: /* use dp, register 27, but showing it in the RTL inhibits various
892: cse and loop optimizations. */
893: else
894: {
895: rtx temp, set;
896:
897: if (reload_in_progress || reload_completed)
898: temp = scratch_reg ? scratch_reg : operand0;
899: else
900: temp = gen_reg_rtx (mode);
901:
902: if (ishighonly)
903: set = gen_rtx (SET, mode, operand0, temp);
904: else
905: set = gen_rtx (SET, VOIDmode,
906: operand0,
907: gen_rtx (LO_SUM, mode, temp, operand1));
908:
909: emit_insn (gen_rtx (SET, VOIDmode,
910: temp,
911: gen_rtx (HIGH, mode, operand1)));
912: emit_insn (set);
913: return 1;
914: }
915: return 1;
916: }
917: else if (GET_CODE (operand1) != CONST_INT
918: || ! cint_ok_for_move (INTVAL (operand1)))
919: {
920: rtx temp;
921:
922: if (reload_in_progress || reload_completed)
923: temp = operand0;
924: else
925: temp = gen_reg_rtx (mode);
926:
927: emit_insn (gen_rtx (SET, VOIDmode, temp,
928: gen_rtx (HIGH, mode, operand1)));
929: operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
930: }
931: }
932: /* Now have insn-emit do whatever it normally does. */
933: return 0;
934: }
935:
936: /* Does operand (which is a symbolic_operand) live in text space? If
937: so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
938:
939: int
940: read_only_operand (operand)
941: rtx operand;
942: {
943: if (GET_CODE (operand) == CONST)
944: operand = XEXP (XEXP (operand, 0), 0);
945: if (GET_CODE (operand) == SYMBOL_REF)
946: return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
947: return 1;
948: }
949:
950:
951: /* Return the best assembler insn template
952: for moving operands[1] into operands[0] as a fullword. */
953: char *
954: singlemove_string (operands)
955: rtx *operands;
956: {
957: if (GET_CODE (operands[0]) == MEM)
958: return "stw %r1,%0";
959: else if (GET_CODE (operands[1]) == MEM)
960: return "ldw %1,%0";
961: else if (GET_CODE (operands[1]) == CONST_DOUBLE
962: && GET_MODE (operands[1]) == SFmode)
963: {
964: int i;
965: union real_extract u;
966: union float_extract { float f; int i; } v;
967:
968: bcopy (&CONST_DOUBLE_LOW (operands[1]), &u, sizeof u);
969: v.f = REAL_VALUE_TRUNCATE (SFmode, u.d);
970: i = v.i;
971:
972: operands[1] = gen_rtx (CONST_INT, VOIDmode, i);
973:
974: /* See if we can handle this constant in a single instruction. */
975: if (cint_ok_for_move (INTVAL (operands[1])))
976: {
977: HOST_WIDE_INT intval = INTVAL (operands[1]);
978:
979: if (intval == 0)
980: return "copy 0,%0";
981: else if (VAL_14_BITS_P (intval))
982: return "ldi %1,%0";
983: else if ((intval & 0x7ff) == 0)
984: return "ldil L'%1,%0";
985: else if (zdepi_cint_p (intval))
986: return "zdepi %Z1,%0";
987: }
988: else
989: return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
990: }
991:
992: else if (GET_CODE (operands[1]) == CONST_INT)
993: {
994: /* See if we can handle this in a single instruction. */
995: if (cint_ok_for_move (INTVAL (operands[1])))
996: {
997: int intval = INTVAL (operands[1]);
998:
999: if (intval == 0)
1000: return "copy 0,%0";
1001: else if (VAL_14_BITS_P (intval))
1002: return "ldi %1,%0";
1003: else if ((intval & 0x7ff) == 0)
1004: return "ldil L'%1,%0";
1005: else if (zdepi_cint_p (intval))
1006: return "zdepi %Z1,%0";
1007: }
1008: else
1009: return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
1010: }
1011: return "copy %1,%0";
1012: }
1013:
1014:
1015: /* Compute position (in OP[1]) and width (in OP[2])
1016: useful for copying IMM to a register using the zdepi
1017: instructions. Store the immediate value to insert in OP[0]. */
1018: void
1019: compute_zdepi_operands (imm, op)
1020: unsigned HOST_WIDE_INT imm;
1021: unsigned *op;
1022: {
1023: int lsb, len;
1024:
1025: /* Find the least significant set bit in IMM. */
1026: for (lsb = 0; lsb < 32; lsb++)
1027: {
1028: if ((imm & 1) != 0)
1029: break;
1030: imm >>= 1;
1031: }
1032:
1033: /* Choose variants based on *sign* of the 5-bit field. */
1034: if ((imm & 0x10) == 0)
1035: len = (lsb <= 28) ? 4 : 32 - lsb;
1036: else
1037: {
1038: /* Find the width of the bitstring in IMM. */
1039: for (len = 5; len < 32; len++)
1040: {
1041: if ((imm & (1 << len)) == 0)
1042: break;
1043: }
1044:
1045: /* Sign extend IMM as a 5-bit value. */
1046: imm = (imm & 0xf) - 0x10;
1047: }
1048:
1049: op[0] = imm;
1050: op[1] = 31 - lsb;
1051: op[2] = len;
1052: }
1053:
1054: /* Output assembler code to perform a doubleword move insn
1055: with operands OPERANDS. */
1056:
1057: char *
1058: output_move_double (operands)
1059: rtx *operands;
1060: {
1061: enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
1062: rtx latehalf[2];
1063: rtx addreg0 = 0, addreg1 = 0;
1064:
1065: /* First classify both operands. */
1066:
1067: if (REG_P (operands[0]))
1068: optype0 = REGOP;
1069: else if (offsettable_memref_p (operands[0]))
1070: optype0 = OFFSOP;
1071: else if (GET_CODE (operands[0]) == MEM)
1072: optype0 = MEMOP;
1073: else
1074: optype0 = RNDOP;
1075:
1076: if (REG_P (operands[1]))
1077: optype1 = REGOP;
1078: else if (CONSTANT_P (operands[1]))
1079: optype1 = CNSTOP;
1080: else if (offsettable_memref_p (operands[1]))
1081: optype1 = OFFSOP;
1082: else if (GET_CODE (operands[1]) == MEM)
1083: optype1 = MEMOP;
1084: else
1085: optype1 = RNDOP;
1086:
1087: /* Check for the cases that the operand constraints are not
1088: supposed to allow to happen. Abort if we get one,
1089: because generating code for these cases is painful. */
1090:
1091: if (optype0 != REGOP && optype1 != REGOP)
1092: abort ();
1093:
1094: /* Handle auto decrementing and incrementing loads and stores
1095: specifically, since the structure of the function doesn't work
1096: for them without major modification. Do it better when we learn
1097: this port about the general inc/dec addressing of PA.
1098: (This was written by tege. Chide him if it doesn't work.) */
1099:
1100: if (optype0 == MEMOP)
1101: {
1102: /* We have to output the address syntax ourselves, since print_operand
1103: doesn't deal with the addresses we want to use. Fix this later. */
1104:
1105: rtx addr = XEXP (operands[0], 0);
1106: if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1107: {
1108: rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1109:
1110: operands[0] = XEXP (addr, 0);
1111: if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1112: abort ();
1113:
1114: if (!reg_overlap_mentioned_p (high_reg, addr))
1115: {
1116: /* No overlap between high target register and address
1117: register. (We do this in a non-obvious way to
1118: save a register file writeback) */
1119: if (GET_CODE (addr) == POST_INC)
1120: return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
1121: return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
1122: }
1123: else
1124: abort();
1125: }
1126: else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1127: {
1128: rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
1129:
1130: operands[0] = XEXP (addr, 0);
1131: if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
1132: abort ();
1133:
1134: if (!reg_overlap_mentioned_p (high_reg, addr))
1135: {
1136: /* No overlap between high target register and address
1137: register. (We do this in a non-obvious way to
1138: save a register file writeback) */
1139: if (GET_CODE (addr) == PRE_INC)
1140: return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
1141: return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
1142: }
1143: else
1144: abort();
1145: }
1146: }
1147: if (optype1 == MEMOP)
1148: {
1149: /* We have to output the address syntax ourselves, since print_operand
1150: doesn't deal with the addresses we want to use. Fix this later. */
1151:
1152: rtx addr = XEXP (operands[1], 0);
1153: if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
1154: {
1155: rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1156:
1157: operands[1] = XEXP (addr, 0);
1158: if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1159: abort ();
1160:
1161: if (!reg_overlap_mentioned_p (high_reg, addr))
1162: {
1163: /* No overlap between high target register and address
1164: register. (We do this in a non-obvious way to
1165: save a register file writeback) */
1166: if (GET_CODE (addr) == POST_INC)
1167: return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
1168: return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
1169: }
1170: else
1171: {
1172: /* This is an undefined situation. We should load into the
1173: address register *and* update that register. Probably
1174: we don't need to handle this at all. */
1175: if (GET_CODE (addr) == POST_INC)
1176: return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
1177: return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
1178: }
1179: }
1180: else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
1181: {
1182: rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
1183:
1184: operands[1] = XEXP (addr, 0);
1185: if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
1186: abort ();
1187:
1188: if (!reg_overlap_mentioned_p (high_reg, addr))
1189: {
1190: /* No overlap between high target register and address
1191: register. (We do this in a non-obvious way to
1192: save a register file writeback) */
1193: if (GET_CODE (addr) == PRE_INC)
1194: return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
1195: return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
1196: }
1197: else
1198: {
1199: /* This is an undefined situation. We should load into the
1200: address register *and* update that register. Probably
1201: we don't need to handle this at all. */
1202: if (GET_CODE (addr) == PRE_INC)
1203: return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
1204: return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
1205: }
1206: }
1207: }
1208:
1209: /* If an operand is an unoffsettable memory ref, find a register
1210: we can increment temporarily to make it refer to the second word. */
1211:
1212: if (optype0 == MEMOP)
1213: addreg0 = find_addr_reg (XEXP (operands[0], 0));
1214:
1215: if (optype1 == MEMOP)
1216: addreg1 = find_addr_reg (XEXP (operands[1], 0));
1217:
1218: /* Ok, we can do one word at a time.
1219: Normally we do the low-numbered word first.
1220:
1221: In either case, set up in LATEHALF the operands to use
1222: for the high-numbered word and in some cases alter the
1223: operands in OPERANDS to be suitable for the low-numbered word. */
1224:
1225: if (optype0 == REGOP)
1226: latehalf[0] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1227: else if (optype0 == OFFSOP)
1228: latehalf[0] = adj_offsettable_operand (operands[0], 4);
1229: else
1230: latehalf[0] = operands[0];
1231:
1232: if (optype1 == REGOP)
1233: latehalf[1] = gen_rtx (REG, SImode, REGNO (operands[1]) + 1);
1234: else if (optype1 == OFFSOP)
1235: latehalf[1] = adj_offsettable_operand (operands[1], 4);
1236: else if (optype1 == CNSTOP)
1237: split_double (operands[1], &operands[1], &latehalf[1]);
1238: else
1239: latehalf[1] = operands[1];
1240:
1241: /* If the first move would clobber the source of the second one,
1242: do them in the other order.
1243:
1244: RMS says "This happens only for registers;
1245: such overlap can't happen in memory unless the user explicitly
1246: sets it up, and that is an undefined circumstance."
1247:
1248: but it happens on the HP-PA when loading parameter registers,
1249: so I am going to define that circumstance, and make it work
1250: as expected. */
1251:
1252: if (optype0 == REGOP && (optype1 == MEMOP || optype1 == OFFSOP)
1253: && reg_overlap_mentioned_p (operands[0], XEXP (operands[1], 0)))
1254: {
1255: /* XXX THIS PROBABLY DOESN'T WORK. */
1256: /* Do the late half first. */
1257: if (addreg1)
1258: output_asm_insn ("ldo 4(%0),%0", &addreg1);
1259: output_asm_insn (singlemove_string (latehalf), latehalf);
1260: if (addreg1)
1261: output_asm_insn ("ldo -4(%0),%0", &addreg1);
1262: /* Then clobber. */
1263: return singlemove_string (operands);
1264: }
1265:
1266: if (optype0 == REGOP && optype1 == REGOP
1267: && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1268: {
1269: output_asm_insn (singlemove_string (latehalf), latehalf);
1270: return singlemove_string (operands);
1271: }
1272:
1273: /* Normal case: do the two words, low-numbered first. */
1274:
1275: output_asm_insn (singlemove_string (operands), operands);
1276:
1277: /* Make any unoffsettable addresses point at high-numbered word. */
1278: if (addreg0)
1279: output_asm_insn ("ldo 4(%0),%0", &addreg0);
1280: if (addreg1)
1281: output_asm_insn ("ldo 4(%0),%0", &addreg1);
1282:
1283: /* Do that word. */
1284: output_asm_insn (singlemove_string (latehalf), latehalf);
1285:
1286: /* Undo the adds we just did. */
1287: if (addreg0)
1288: output_asm_insn ("ldo -4(%0),%0", &addreg0);
1289: if (addreg1)
1290: output_asm_insn ("ldo -4(%0),%0", &addreg1);
1291:
1292: return "";
1293: }
1294:
1295: char *
1296: output_fp_move_double (operands)
1297: rtx *operands;
1298: {
1299: if (FP_REG_P (operands[0]))
1300: {
1301: if (FP_REG_P (operands[1])
1302: || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1303: output_asm_insn ("fcpy,dbl %r1,%0", operands);
1304: else
1305: output_asm_insn ("fldds%F1 %1,%0", operands);
1306: }
1307: else if (FP_REG_P (operands[1]))
1308: {
1309: output_asm_insn ("fstds%F0 %1,%0", operands);
1310: }
1311: else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
1312: {
1313: if (GET_CODE (operands[0]) == REG)
1314: {
1315: rtx xoperands[2];
1316: xoperands[1] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
1317: xoperands[0] = operands[0];
1318: output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
1319: }
1320: /* This is a pain. You have to be prepared to deal with an
1321: arbritary address here including pre/post increment/decrement.
1322:
1323: so avoid this in the MD. */
1324: else
1325: abort ();
1326: }
1327: else abort ();
1328: return "";
1329: }
1330:
1331: /* Return a REG that occurs in ADDR with coefficient 1.
1332: ADDR can be effectively incremented by incrementing REG. */
1333:
1334: static rtx
1335: find_addr_reg (addr)
1336: rtx addr;
1337: {
1338: while (GET_CODE (addr) == PLUS)
1339: {
1340: if (GET_CODE (XEXP (addr, 0)) == REG)
1341: addr = XEXP (addr, 0);
1342: else if (GET_CODE (XEXP (addr, 1)) == REG)
1343: addr = XEXP (addr, 1);
1344: else if (CONSTANT_P (XEXP (addr, 0)))
1345: addr = XEXP (addr, 1);
1346: else if (CONSTANT_P (XEXP (addr, 1)))
1347: addr = XEXP (addr, 0);
1348: else
1349: abort ();
1350: }
1351: if (GET_CODE (addr) == REG)
1352: return addr;
1353: abort ();
1354: }
1355:
1356: /* Emit code to perform a block move.
1357:
1358: Restriction: If the length argument is non-constant, alignment
1359: must be 4.
1360:
1361: OPERANDS[0] is the destination pointer as a REG, clobbered.
1362: OPERANDS[1] is the source pointer as a REG, clobbered.
1363: if SIZE_IS_CONSTANT
1364: OPERANDS[2] is a register for temporary storage.
1365: OPERANDS[4] is the size as a CONST_INT
1366: else
1367: OPERANDS[2] is a REG which will contain the size, clobbered.
1368: OPERANDS[3] is a register for temporary storage.
1369: OPERANDS[5] is the alignment safe to use, as a CONST_INT. */
1370:
1371: char *
1372: output_block_move (operands, size_is_constant)
1373: rtx *operands;
1374: int size_is_constant;
1375: {
1376: int align = INTVAL (operands[5]);
1377: unsigned long n_bytes;
1378:
1379: /* We can't move more than four bytes at a time because the PA
1380: has no longer integer move insns. (Could use fp mem ops?) */
1381: if (align > 4)
1382: align = 4;
1383:
1384: if (size_is_constant)
1385: {
1386: unsigned long offset;
1387: rtx temp;
1388:
1389: n_bytes = INTVAL (operands[4]);
1390: if (n_bytes == 0)
1391: return "";
1392:
1393: if (align >= 4)
1394: {
1395: /* Don't unroll too large blocks. */
1396: if (n_bytes > 32)
1397: goto copy_with_loop;
1398:
1399: /* Read and store using two registers, and hide latency
1400: by deferring the stores until three instructions after
1401: the corresponding load. The last load insn will read
1402: the entire word were the last bytes are, possibly past
1403: the end of the source block, but since loads are aligned,
1404: this is harmless. */
1405:
1406: output_asm_insn ("ldws,ma 4(0,%1),%2", operands);
1407:
1408: for (offset = 4; offset < n_bytes; offset += 4)
1409: {
1410: output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1411: output_asm_insn ("stws,ma %2,4(0,%0)", operands);
1412:
1413: temp = operands[2];
1414: operands[2] = operands[3];
1415: operands[3] = temp;
1416: }
1417: if (n_bytes % 4 == 0)
1418: /* Store the last word. */
1419: output_asm_insn ("stw %2,0(0,%0)", operands);
1420: else
1421: {
1422: /* Store the last, partial word. */
1423: operands[4] = gen_rtx (CONST_INT, VOIDmode, n_bytes % 4);
1424: output_asm_insn ("stbys,e %2,%4(0,%0)", operands);
1425: }
1426: return "";
1427: }
1428:
1429: if (align >= 2 && n_bytes >= 2)
1430: {
1431: output_asm_insn ("ldhs,ma 2(0,%1),%2", operands);
1432:
1433: for (offset = 2; offset + 2 <= n_bytes; offset += 2)
1434: {
1435: output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
1436: output_asm_insn ("sths,ma %2,2(0,%0)", operands);
1437:
1438: temp = operands[2];
1439: operands[2] = operands[3];
1440: operands[3] = temp;
1441: }
1442: if (n_bytes % 2 != 0)
1443: output_asm_insn ("ldb 0(0,%1),%3", operands);
1444:
1445: output_asm_insn ("sths,ma %2,2(0,%0)", operands);
1446:
1447: if (n_bytes % 2 != 0)
1448: output_asm_insn ("stb %3,0(0,%0)", operands);
1449:
1450: return "";
1451: }
1452:
1453: output_asm_insn ("ldbs,ma 1(0,%1),%2", operands);
1454:
1455: for (offset = 1; offset + 1 <= n_bytes; offset += 1)
1456: {
1457: output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
1458: output_asm_insn ("stbs,ma %2,1(0,%0)", operands);
1459:
1460: temp = operands[2];
1461: operands[2] = operands[3];
1462: operands[3] = temp;
1463: }
1464: output_asm_insn ("stb %2,0(0,%0)", operands);
1465:
1466: return "";
1467: }
1468:
1469: if (align != 4)
1470: abort();
1471:
1472: copy_with_loop:
1473:
1474: if (size_is_constant)
1475: {
1476: /* Size is compile-time determined, and also not
1477: very small (such small cases are handled above). */
1478: operands[4] = gen_rtx (CONST_INT, VOIDmode, n_bytes - 4);
1479: output_asm_insn ("ldo %4(0),%2", operands);
1480: }
1481: else
1482: {
1483: /* Decrement counter by 4, and if it becomes negative, jump past the
1484: word copying loop. */
1485: output_asm_insn ("addib,<,n -4,%2,.+16", operands);
1486: }
1487:
1488: /* Copying loop. Note that the first load is in the annulled delay slot
1489: of addib. Is it OK on PA to have a load in a delay slot, i.e. is a
1490: possible page fault stopped in time? */
1491: output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
1492: output_asm_insn ("addib,>= -4,%2,.-4", operands);
1493: output_asm_insn ("stws,ma %3,4(0,%0)", operands);
1494:
1495: /* The counter is negative, >= -4. The remaining number of bytes are
1496: determined by the two least significant bits. */
1497:
1498: if (size_is_constant)
1499: {
1500: if (n_bytes % 4 != 0)
1501: {
1502: /* Read the entire word of the source block tail. */
1503: output_asm_insn ("ldw 0(0,%1),%3", operands);
1504: operands[4] = gen_rtx (CONST_INT, VOIDmode, n_bytes % 4);
1505: output_asm_insn ("stbys,e %3,%4(0,%0)", operands);
1506: }
1507: }
1508: else
1509: {
1510: /* Add 4 to counter. If it becomes zero, we're done. */
1511: output_asm_insn ("addib,=,n 4,%2,.+16", operands);
1512:
1513: /* Read the entire word of the source block tail. (Also this
1514: load is in an annulled delay slot.) */
1515: output_asm_insn ("ldw 0(0,%1),%3", operands);
1516:
1517: /* Make %0 point at the first byte after the destination block. */
1518: output_asm_insn ("add %2,%0,%0", operands);
1519: /* Store the leftmost bytes, up to, but not including, the address
1520: in %0. */
1521: output_asm_insn ("stbys,e %3,0(0,%0)", operands);
1522: }
1523: return "";
1524: }
1525:
1526: /* Count the number of insns necessary to handle this block move.
1527:
1528: Basic structure is the same as emit_block_move, except that we
1529: count insns rather than emit them. */
1530:
1531: int
1532: compute_movstrsi_length (insn)
1533: rtx insn;
1534: {
1535: rtx pat = PATTERN (insn);
1536: int size_is_constant;
1537: int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
1538: unsigned long n_bytes;
1539: int insn_count = 0;
1540:
1541: if (GET_CODE (XEXP (XVECEXP (pat, 0, 5), 0)) == CONST_INT)
1542: {
1543: size_is_constant = 1;
1544: n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
1545: }
1546: else
1547: {
1548: size_is_constant = 0;
1549: n_bytes = 0;
1550: }
1551:
1552: /* We can't move more than four bytes at a time because the PA
1553: has no longer integer move insns. (Could use fp mem ops?) */
1554: if (align > 4)
1555: align = 4;
1556:
1557: if (size_is_constant)
1558: {
1559: unsigned long offset;
1560:
1561: if (n_bytes == 0)
1562: return 0;
1563:
1564: if (align >= 4)
1565: {
1566: /* Don't unroll too large blocks. */
1567: if (n_bytes > 32)
1568: goto copy_with_loop;
1569:
1570: /* first load */
1571: insn_count = 1;
1572:
1573: /* Count the unrolled insns. */
1574: for (offset = 4; offset < n_bytes; offset += 4)
1575: insn_count += 2;
1576:
1577: /* Count last store or partial store. */
1578: insn_count += 1;
1579: return insn_count * 4;
1580: }
1581:
1582: if (align >= 2 && n_bytes >= 2)
1583: {
1584: /* initial load. */
1585: insn_count = 1;
1586:
1587: /* Unrolled loop. */
1588: for (offset = 2; offset + 2 <= n_bytes; offset += 2)
1589: insn_count += 2;
1590:
1591: /* ??? odd load/store */
1592: if (n_bytes % 2 != 0)
1593: insn_count += 2;
1594:
1595: /* ??? final store from loop. */
1596: insn_count += 1;
1597:
1598: return insn_count * 4;
1599: }
1600:
1601: /* First load. */
1602: insn_count = 1;
1603:
1604: /* The unrolled loop. */
1605: for (offset = 1; offset + 1 <= n_bytes; offset += 1)
1606: insn_count += 2;
1607:
1608: /* Final store. */
1609: insn_count += 1;
1610:
1611: return insn_count * 4;
1612: }
1613:
1614: if (align != 4)
1615: abort();
1616:
1617: copy_with_loop:
1618:
1619: /* setup for constant and non-constant case. */
1620: insn_count = 1;
1621:
1622: /* The copying loop. */
1623: insn_count += 3;
1624:
1625: /* The counter is negative, >= -4. The remaining number of bytes are
1626: determined by the two least significant bits. */
1627:
1628: if (size_is_constant)
1629: {
1630: if (n_bytes % 4 != 0)
1631: insn_count += 2;
1632: }
1633: else
1634: insn_count += 4;
1635: return insn_count * 4;
1636: }
1637:
1638:
1639: char *
1640: output_and (operands)
1641: rtx *operands;
1642: {
1643: if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
1644: {
1645: unsigned mask = INTVAL (operands[2]);
1646: int ls0, ls1, ms0, p, len;
1647:
1648: for (ls0 = 0; ls0 < 32; ls0++)
1649: if ((mask & (1 << ls0)) == 0)
1650: break;
1651:
1652: for (ls1 = ls0; ls1 < 32; ls1++)
1653: if ((mask & (1 << ls1)) != 0)
1654: break;
1655:
1656: for (ms0 = ls1; ms0 < 32; ms0++)
1657: if ((mask & (1 << ms0)) == 0)
1658: break;
1659:
1660: if (ms0 != 32)
1661: abort();
1662:
1663: if (ls1 == 32)
1664: {
1665: len = ls0;
1666:
1667: if (len == 0)
1668: abort ();
1669:
1670: operands[2] = gen_rtx (CONST_INT, VOIDmode, len);
1671: return "extru %1,31,%2,%0";
1672: }
1673: else
1674: {
1675: /* We could use this `depi' for the case above as well, but `depi'
1676: requires one more register file access than an `extru'. */
1677:
1678: p = 31 - ls0;
1679: len = ls1 - ls0;
1680:
1681: operands[2] = gen_rtx (CONST_INT, VOIDmode, p);
1682: operands[3] = gen_rtx (CONST_INT, VOIDmode, len);
1683: return "depi 0,%2,%3,%0";
1684: }
1685: }
1686: else
1687: return "and %1,%2,%0";
1688: }
1689:
1690: char *
1691: output_ior (operands)
1692: rtx *operands;
1693: {
1694: unsigned mask = INTVAL (operands[2]);
1695: int bs0, bs1, p, len;
1696:
1697: if (INTVAL (operands[2]) == 0)
1698: return "copy %1,%0";
1699:
1700: for (bs0 = 0; bs0 < 32; bs0++)
1701: if ((mask & (1 << bs0)) != 0)
1702: break;
1703:
1704: for (bs1 = bs0; bs1 < 32; bs1++)
1705: if ((mask & (1 << bs1)) == 0)
1706: break;
1707:
1708: if (bs1 != 32 && ((unsigned) 1 << bs1) <= mask)
1709: abort();
1710:
1711: p = 31 - bs0;
1712: len = bs1 - bs0;
1713:
1714: operands[2] = gen_rtx (CONST_INT, VOIDmode, p);
1715: operands[3] = gen_rtx (CONST_INT, VOIDmode, len);
1716: return "depi -1,%2,%3,%0";
1717: }
1718:
1719: /* Output an ascii string. */
1720: void
1721: output_ascii (file, p, size)
1722: FILE *file;
1723: unsigned char *p;
1724: int size;
1725: {
1726: int i;
1727: int chars_output;
1728: unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
1729:
1730: /* The HP assembler can only take strings of 256 characters at one
1731: time. This is a limitation on input line length, *not* the
1732: length of the string. Sigh. Even worse, it seems that the
1733: restriction is in number of input characters (see \xnn &
1734: \whatever). So we have to do this very carefully. */
1735:
1736: fprintf (file, "\t.STRING \"");
1737:
1738: chars_output = 0;
1739: for (i = 0; i < size; i += 4)
1740: {
1741: int co = 0;
1742: int io = 0;
1743: for (io = 0, co = 0; io < MIN (4, size - i); io++)
1744: {
1745: register unsigned int c = p[i + io];
1746:
1747: if (c == '\"' || c == '\\')
1748: partial_output[co++] = '\\';
1749: if (c >= ' ' && c < 0177)
1750: partial_output[co++] = c;
1751: else
1752: {
1753: unsigned int hexd;
1754: partial_output[co++] = '\\';
1755: partial_output[co++] = 'x';
1756: hexd = c / 16 - 0 + '0';
1757: if (hexd > '9')
1758: hexd -= '9' - 'a' + 1;
1759: partial_output[co++] = hexd;
1760: hexd = c % 16 - 0 + '0';
1761: if (hexd > '9')
1762: hexd -= '9' - 'a' + 1;
1763: partial_output[co++] = hexd;
1764: }
1765: }
1766: if (chars_output + co > 243)
1767: {
1768: fprintf (file, "\"\n\t.STRING \"");
1769: chars_output = 0;
1770: }
1771: fwrite (partial_output, 1, co, file);
1772: chars_output += co;
1773: co = 0;
1774: }
1775: fprintf (file, "\"\n");
1776: }
1777:
1778: /* You may have trouble believing this, but this is the HP-PA stack
1779: layout. Wow.
1780:
1781: Offset Contents
1782:
1783: Variable arguments (optional; any number may be allocated)
1784:
1785: SP-(4*(N+9)) arg word N
1786: : :
1787: SP-56 arg word 5
1788: SP-52 arg word 4
1789:
1790: Fixed arguments (must be allocated; may remain unused)
1791:
1792: SP-48 arg word 3
1793: SP-44 arg word 2
1794: SP-40 arg word 1
1795: SP-36 arg word 0
1796:
1797: Frame Marker
1798:
1799: SP-32 External Data Pointer (DP)
1800: SP-28 External sr4
1801: SP-24 External/stub RP (RP')
1802: SP-20 Current RP
1803: SP-16 Static Link
1804: SP-12 Clean up
1805: SP-8 Calling Stub RP (RP'')
1806: SP-4 Previous SP
1807:
1808: Top of Frame
1809:
1810: SP-0 Stack Pointer (points to next available address)
1811:
1812: */
1813:
1814: /* This function saves registers as follows. Registers marked with ' are
1815: this function's registers (as opposed to the previous function's).
1816: If a frame_pointer isn't needed, r4 is saved as a general register;
1817: the space for the frame pointer is still allocated, though, to keep
1818: things simple.
1819:
1820:
1821: Top of Frame
1822:
1823: SP (FP') Previous FP
1824: SP + 4 Alignment filler (sigh)
1825: SP + 8 Space for locals reserved here.
1826: .
1827: .
1828: .
1829: SP + n All call saved register used.
1830: .
1831: .
1832: .
1833: SP + o All call saved fp registers used.
1834: .
1835: .
1836: .
1837: SP + p (SP') points to next available address.
1838:
1839: */
1840:
1841: /* Emit RTL to store REG at the memory location specified by BASE+DISP.
1842: Handle case where DISP > 8k by using the add_high_const pattern.
1843:
1844: Note in DISP > 8k case, we will leave the high part of the address
1845: in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
1846: static void
1847: store_reg (reg, disp, base)
1848: int reg, disp, base;
1849: {
1850: if (VAL_14_BITS_P (disp))
1851: {
1852: emit_move_insn (gen_rtx (MEM, SImode,
1853: gen_rtx (PLUS, SImode,
1854: gen_rtx (REG, SImode, base),
1855: GEN_INT (disp))),
1856: gen_rtx (REG, SImode, reg));
1857: }
1858: else
1859: {
1860: emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
1861: gen_rtx (REG, SImode, base),
1862: GEN_INT (disp)));
1863: emit_move_insn (gen_rtx (MEM, SImode,
1864: gen_rtx (LO_SUM, SImode,
1865: gen_rtx (REG, SImode, 1),
1866: GEN_INT (disp))),
1867: gen_rtx (REG, SImode, reg));
1868: }
1869: }
1870:
1871: /* Emit RTL to load REG from the memory location specified by BASE+DISP.
1872: Handle case where DISP > 8k by using the add_high_const pattern.
1873:
1874: Note in DISP > 8k case, we will leave the high part of the address
1875: in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
1876: static void
1877: load_reg (reg, disp, base)
1878: int reg, disp, base;
1879: {
1880: if (VAL_14_BITS_P (disp))
1881: {
1882: emit_move_insn (gen_rtx (REG, SImode, reg),
1883: gen_rtx (MEM, SImode,
1884: gen_rtx (PLUS, SImode,
1885: gen_rtx (REG, SImode, base),
1886: GEN_INT (disp))));
1887: }
1888: else
1889: {
1890: emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
1891: gen_rtx (REG, SImode, base),
1892: GEN_INT (disp)));
1893: emit_move_insn (gen_rtx (REG, SImode, reg),
1894: gen_rtx (MEM, SImode,
1895: gen_rtx (LO_SUM, SImode,
1896: gen_rtx (REG, SImode, 1),
1897: GEN_INT (disp))));
1898: }
1899: }
1900:
1901: /* Emit RTL to set REG to the value specified by BASE+DISP.
1902: Handle case where DISP > 8k by using the add_high_const pattern.
1903:
1904: Note in DISP > 8k case, we will leave the high part of the address
1905: in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
1906: static void
1907: set_reg_plus_d(reg, base, disp)
1908: int reg, base, disp;
1909: {
1910: if (VAL_14_BITS_P (disp))
1911: {
1912: emit_move_insn (gen_rtx (REG, SImode, reg),
1913: gen_rtx (PLUS, SImode,
1914: gen_rtx (REG, SImode, base),
1915: GEN_INT (disp)));
1916: }
1917: else
1918: {
1919: emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
1920: gen_rtx (REG, SImode, base),
1921: GEN_INT (disp)));
1922: emit_move_insn (gen_rtx (REG, SImode, reg),
1923: gen_rtx (LO_SUM, SImode,
1924: gen_rtx (REG, SImode, 1),
1925: GEN_INT (disp)));
1926: }
1927: }
1928:
1929: /* Global variables set by FUNCTION_PROLOGUE. */
1930: /* Size of frame. Need to know this to emit return insns from
1931: leaf procedures. */
1932: static int actual_fsize;
1933: static int local_fsize, save_fregs;
1934:
1935: int
1936: compute_frame_size (size, fregs_live)
1937: int size;
1938: int *fregs_live;
1939: {
1940: extern int current_function_outgoing_args_size;
1941: int i, fsize;
1942:
1943: /* 8 is space for frame pointer + filler. If any frame is allocated
1944: we need to add this in because of STARTING_FRAME_OFFSET. */
1945: fsize = size + (size || frame_pointer_needed ? 8 : 0);
1946:
1947: for (i = 18; i >= 3; i--)
1948: {
1949: /* fp is stored in a special place. */
1950: if (regs_ever_live[i]
1951: && (i != FRAME_POINTER_REGNUM || !frame_pointer_needed))
1952: fsize += 4;
1953: }
1954: fsize = (fsize + 7) & ~7;
1955:
1956: if (!TARGET_SNAKE)
1957: {
1958: for (i = 43; i >= 40; i--)
1959: if (regs_ever_live[i])
1960: {
1961: fsize += 8;
1962: if (fregs_live)
1963: *fregs_live = 1;
1964: }
1965: }
1966: else
1967: {
1968: for (i = 78; i >= 60; i -= 2)
1969: if (regs_ever_live[i] || regs_ever_live[i + 1])
1970: {
1971: fsize += 8;
1972: if (fregs_live)
1973: *fregs_live = 1;
1974: }
1975: }
1976: fsize += current_function_outgoing_args_size;
1977: if (! leaf_function_p () || fsize)
1978: fsize += 32;
1979: return (fsize + 63) & ~63;
1980: }
1981:
1982: rtx hp_profile_label_rtx;
1983: static char hp_profile_label_name[8];
1984: void
1985: output_function_prologue (file, size)
1986: FILE *file;
1987: int size;
1988: {
1989: /* The function's label and associated .PROC must never be
1990: separated and must be output *after* any profiling declarations
1991: to avoid changing spaces/subspaces within a procedure. */
1992: ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
1993: fputs ("\t.PROC\n", file);
1994:
1995: /* hppa_expand_prologue does the dirty work now. We just need
1996: to output the assembler directives which denote the start
1997: of a function. */
1998: fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
1999: if (regs_ever_live[2] || profile_flag)
2000: fprintf (file, ",CALLS,SAVE_RP");
2001: else
2002: fprintf (file, ",NO_CALLS");
2003:
2004: if (frame_pointer_needed)
2005: fprintf (file, ",SAVE_SP");
2006:
2007: /* Pass on information about the number of callee register saves
2008: performed in the prologue.
2009:
2010: The compiler is supposed to pass the highest register number
2011: saved, the assembler then has to adjust that number before
2012: entering it into the unwind descriptor (to account for any
2013: caller saved registers with lower register numbers than the
2014: first callee saved register). */
2015: if (gr_saved)
2016: fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
2017:
2018: if (fr_saved)
2019: fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
2020:
2021: fprintf (file, "\n\t.ENTRY\n");
2022:
2023: /* Horrid hack. emit_function_prologue will modify this RTL in
2024: place to get the expected results. */
2025: if (profile_flag)
2026: ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
2027: hp_profile_labelno);
2028: }
2029:
2030: void
2031: hppa_expand_prologue()
2032: {
2033:
2034: extern char call_used_regs[];
2035: int size = get_frame_size ();
2036: int merge_sp_adjust_with_store = 0;
2037: int i, offset;
2038: rtx tmpreg, size_rtx;
2039:
2040:
2041: gr_saved = 0;
2042: fr_saved = 0;
2043: save_fregs = 0;
2044: local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
2045: actual_fsize = compute_frame_size (size, &save_fregs);
2046:
2047: /* Compute a few things we will use often. */
2048: tmpreg = gen_rtx (REG, SImode, 1);
2049: size_rtx = GEN_INT (actual_fsize);
2050:
2051: /* Save RP first. The calling conventions manual states RP will
2052: always be stored into the caller's frame at sp-20. */
2053: if (regs_ever_live[2] || profile_flag)
2054: store_reg (2, -20, STACK_POINTER_REGNUM);
2055:
2056: /* Allocate the local frame and set up the frame pointer if needed. */
2057: if (actual_fsize)
2058: if (frame_pointer_needed)
2059: {
2060: /* Copy the old frame pointer temporarily into %r1. Set up the
2061: new stack pointer, then store away the saved old frame pointer
2062: into the stack at sp+actual_fsize and at the same time update
2063: the stack pointer by actual_fsize bytes. Two versions, first
2064: handles small (<8k) frames. The second handles large (>8k)
2065: frames. */
2066: emit_move_insn (tmpreg, frame_pointer_rtx);
2067: emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
2068: if (VAL_14_BITS_P (actual_fsize))
2069: emit_insn (gen_post_stwm (stack_pointer_rtx,
2070: stack_pointer_rtx,
2071: size_rtx, tmpreg));
2072: else
2073: {
2074: store_reg (1, 0, FRAME_POINTER_REGNUM);
2075: set_reg_plus_d (STACK_POINTER_REGNUM,
2076: STACK_POINTER_REGNUM,
2077: actual_fsize);
2078: }
2079: }
2080: /* no frame pointer needed. */
2081: else
2082: {
2083: /* In some cases we can perform the first callee register save
2084: and allocating the stack frame at the same time. If so, just
2085: make a note of it and defer allocating the frame until saving
2086: the callee registers. */
2087: if (VAL_14_BITS_P (-actual_fsize)
2088: && local_fsize == 0
2089: && ! profile_flag
2090: && ! flag_pic)
2091: merge_sp_adjust_with_store = 1;
2092: /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
2093: else if (actual_fsize != 0)
2094: set_reg_plus_d (STACK_POINTER_REGNUM,
2095: STACK_POINTER_REGNUM,
2096: actual_fsize);
2097: }
2098: /* The hppa calling conventions say that that %r19, the pic offset
2099: register, is saved at sp - 32 (in this function's frame) when
2100: generating PIC code. */
2101: if (flag_pic)
2102: store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
2103:
2104: /* Profiling code.
2105:
2106: Instead of taking one argument, the counter label, as most normal
2107: mcounts do, _mcount appears to behave differently on the HPPA. It
2108: takes the return address of the caller, the address of this routine,
2109: and the address of the label. Also, it isn't magic, so
2110: argument registre hsave to be preserved. */
2111: if (profile_flag)
2112: {
2113: int pc_offset, i, arg_offset, basereg, offsetadj;
2114:
2115: pc_offset = 4 + (frame_pointer_needed
2116: ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
2117: : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
2118:
2119: /* When the function has a frame pointer, use it as the base
2120: register for saving/restore registers. Else use the stack
2121: pointer. Adjust the offset according to the frame size if
2122: this function does not have a frame pointer. */
2123:
2124: basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
2125: : STACK_POINTER_REGNUM;
2126: offsetadj = frame_pointer_needed ? 0 : actual_fsize;
2127:
2128: /* Horrid hack. emit_function_prologue will modify this RTL in
2129: place to get the expected results. sprintf here is just to
2130: put something in the name. */
2131: sprintf(hp_profile_label_name, "LP$%04d", -1);
2132: hp_profile_label_rtx = gen_rtx (SYMBOL_REF, SImode,
2133: hp_profile_label_name);
2134: if (current_function_returns_struct)
2135: store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
2136:
2137: for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2138: if (regs_ever_live [i])
2139: {
2140: store_reg (i, arg_offset, basereg);
2141: /* Deal with arg_offset not fitting in 14 bits. */
2142: pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
2143: }
2144:
2145: emit_move_insn (gen_rtx (REG, SImode, 26), gen_rtx (REG, SImode, 2));
2146: emit_move_insn (tmpreg, gen_rtx (HIGH, SImode, hp_profile_label_rtx));
2147: emit_move_insn (gen_rtx (REG, SImode, 24),
2148: gen_rtx (LO_SUM, SImode, tmpreg, hp_profile_label_rtx));
2149: /* %r25 is set from within the output pattern. */
2150: emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
2151:
2152: /* Restore argument registers. */
2153: for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
2154: if (regs_ever_live [i])
2155: load_reg (i, arg_offset, basereg);
2156:
2157: if (current_function_returns_struct)
2158: load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
2159:
2160: }
2161:
2162: /* Normal register save.
2163:
2164: Do not save the frame pointer in the frame_pointer_needed case. It
2165: was done earlier. */
2166: if (frame_pointer_needed)
2167: {
2168: for (i = 18, offset = local_fsize; i >= 3; i--)
2169: if (regs_ever_live[i] && ! call_used_regs[i]
2170: && i != FRAME_POINTER_REGNUM)
2171: {
2172: store_reg (i, offset, FRAME_POINTER_REGNUM);
2173: offset += 4;
2174: gr_saved++;
2175: }
2176: /* Account for %r4 which is saved in a special place. */
2177: gr_saved++;
2178: }
2179: /* No frame pointer needed. */
2180: else
2181: {
2182: for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2183: if (regs_ever_live[i] && ! call_used_regs[i])
2184: {
2185: /* If merge_sp_adjust_with_store is nonzero, then we can
2186: optimize the first GR save. */
2187: if (merge_sp_adjust_with_store)
2188: {
2189: merge_sp_adjust_with_store = 0;
2190: emit_insn (gen_post_stwm (stack_pointer_rtx,
2191: stack_pointer_rtx,
2192: GEN_INT (-offset),
2193: gen_rtx (REG, SImode, i)));
2194: }
2195: else
2196: store_reg (i, offset, STACK_POINTER_REGNUM);
2197: offset += 4;
2198: gr_saved++;
2199: }
2200:
2201: /* If we wanted to merge the SP adjustment with a GR save, but we never
2202: did any GR saves, then just emit the adjustment here. */
2203: if (merge_sp_adjust_with_store)
2204: set_reg_plus_d (STACK_POINTER_REGNUM,
2205: STACK_POINTER_REGNUM,
2206: actual_fsize);
2207: }
2208:
2209: /* Align pointer properly (doubleword boundary). */
2210: offset = (offset + 7) & ~7;
2211:
2212: /* Floating point register store. */
2213: if (save_fregs)
2214: {
2215:
2216: /* First get the frame or stack pointer to the start of the FP register
2217: save area. */
2218: if (frame_pointer_needed)
2219: set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2220: else
2221: set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2222:
2223: /* Now actually save the FP registers. */
2224: if (! TARGET_SNAKE)
2225: {
2226: for (i = 43; i >= 40; i--)
2227: if (regs_ever_live[i])
2228: {
2229: emit_move_insn (gen_rtx (MEM, DFmode,
2230: gen_rtx (POST_INC, DFmode, tmpreg)),
2231: gen_rtx (REG, DFmode, i));
2232: fr_saved++;
2233: }
2234: }
2235: else
2236: {
2237: for (i = 78; i >= 60; i -= 2)
2238: if (regs_ever_live[i] || regs_ever_live[i + 1])
2239: {
2240: emit_move_insn (gen_rtx (MEM, DFmode,
2241: gen_rtx (POST_INC, DFmode, tmpreg)),
2242: gen_rtx (REG, DFmode, i));
2243: fr_saved++;
2244: }
2245: }
2246: }
2247: }
2248:
2249:
2250: void
2251: output_function_epilogue (file, size)
2252: FILE *file;
2253: int size;
2254: {
2255:
2256: rtx insn = get_last_insn ();
2257:
2258: /* hppa_expand_epilogue does the dirty work now. We just need
2259: to output the assembler directives which denote the end
2260: of a function.
2261:
2262: To make debuggers happy, emit a nop if the epilogue was completely
2263: eliminated due to a volatile call as the last insn in the
2264: current function. That way the return address (in %r2) will
2265: always point to a valid instruction in the current function. */
2266:
2267: /* Get the last real insn. */
2268: if (GET_CODE (insn) == NOTE)
2269: insn = prev_real_insn (insn);
2270:
2271: /* If it is a sequence, then look inside. */
2272: if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
2273: insn = XVECEXP (PATTERN (insn), 0, 0);
2274:
2275: /* If insn is a CALL_INSN, then it must be a call to a volatile
2276: function (otherwise there would be epilogue insns). */
2277: if (insn && GET_CODE (insn) == CALL_INSN)
2278: fprintf (file, "\tnop\n");
2279:
2280: fprintf (file, "\t.EXIT\n\t.PROCEND\n");
2281: }
2282:
2283: void
2284: hppa_expand_epilogue ()
2285: {
2286: rtx tmpreg;
2287: int offset,i;
2288: int merge_sp_adjust_with_load = 0;
2289:
2290: /* We will use this often. */
2291: tmpreg = gen_rtx (REG, SImode, 1);
2292:
2293: /* Try to restore RP early to avoid load/use interlocks when
2294: RP gets used in the return (bv) instruction. This appears to still
2295: be necessary even when we schedule the prologue and epilogue. */
2296: if (frame_pointer_needed
2297: && (regs_ever_live [2] || profile_flag))
2298: load_reg (2, -20, FRAME_POINTER_REGNUM);
2299:
2300: /* No frame pointer, and stack is smaller than 8k. */
2301: else if (! frame_pointer_needed
2302: && VAL_14_BITS_P (actual_fsize + 20)
2303: && (regs_ever_live[2] || profile_flag))
2304: load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
2305:
2306: /* General register restores. */
2307: if (frame_pointer_needed)
2308: {
2309: for (i = 18, offset = local_fsize; i >= 3; i--)
2310: if (regs_ever_live[i] && ! call_used_regs[i]
2311: && i != FRAME_POINTER_REGNUM)
2312: {
2313: load_reg (i, offset, FRAME_POINTER_REGNUM);
2314: offset += 4;
2315: }
2316: }
2317: else
2318: {
2319: for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
2320: if (regs_ever_live[i] && ! call_used_regs[i])
2321: {
2322: /* Only for the first load.
2323: merge_sp_adjust_with_load holds the register load
2324: with which we will merge the sp adjustment. */
2325: if (VAL_14_BITS_P (actual_fsize + 20)
2326: && local_fsize == 0
2327: && ! merge_sp_adjust_with_load)
2328: merge_sp_adjust_with_load = i;
2329: else
2330: load_reg (i, offset, STACK_POINTER_REGNUM);
2331: offset += 4;
2332: }
2333: }
2334:
2335: /* Align pointer properly (doubleword boundary). */
2336: offset = (offset + 7) & ~7;
2337:
2338: /* FP register restores. */
2339: if (save_fregs)
2340: {
2341: /* Adjust the register to index off of. */
2342: if (frame_pointer_needed)
2343: set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
2344: else
2345: set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
2346:
2347: /* Actually do the restores now. */
2348: if (! TARGET_SNAKE)
2349: {
2350: for (i = 43; i >= 40; i--)
2351: if (regs_ever_live[i])
2352: emit_move_insn (gen_rtx (REG, DFmode, i),
2353: gen_rtx (MEM, DFmode,
2354: gen_rtx (POST_INC, DFmode, tmpreg)));
2355: }
2356: else
2357: {
2358: for (i = 78; i >= 60; i -= 2)
2359: if (regs_ever_live[i] || regs_ever_live[i + 1])
2360: emit_move_insn (gen_rtx (REG, DFmode, i),
2361: gen_rtx (MEM, DFmode,
2362: gen_rtx (POST_INC, DFmode, tmpreg)));
2363: }
2364: }
2365:
2366: /* No frame pointer, but we have a stack greater than 8k. We restore
2367: %r2 very late in this case. (All other cases are restored as early
2368: as possible.) */
2369: if (! frame_pointer_needed
2370: && ! VAL_14_BITS_P (actual_fsize + 20)
2371: && (regs_ever_live[2] || profile_flag))
2372: {
2373: set_reg_plus_d (STACK_POINTER_REGNUM,
2374: STACK_POINTER_REGNUM,
2375: - actual_fsize);
2376: /* Uses value left over in %r1 by set_reg_plus_d. */
2377: load_reg (2, - (actual_fsize + 20 + ((- actual_fsize) & ~0x7ff)), 1);
2378: }
2379:
2380: /* Reset stack pointer (and possibly frame pointer). The stack */
2381: /* pointer is initially set to fp + 64 to avoid a race condition.
2382: ??? What race condition?!? */
2383: else if (frame_pointer_needed)
2384: {
2385: /* Emit a blockage insn here to keep these insns from being moved
2386: to the beginning of the prologue or into the main instruction
2387: stream, doing so avoids some very obscure problems. */
2388: emit_insn (gen_blockage ());
2389: set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
2390: emit_insn (gen_pre_ldwm (stack_pointer_rtx, stack_pointer_rtx,
2391: GEN_INT (-64), frame_pointer_rtx));
2392: }
2393: /* If we were deferring a callee register restore, do it now. */
2394: else if (! frame_pointer_needed && merge_sp_adjust_with_load)
2395: emit_insn (gen_pre_ldwm (stack_pointer_rtx,
2396: stack_pointer_rtx,
2397: GEN_INT (- actual_fsize),
2398: gen_rtx (REG, SImode,
2399: merge_sp_adjust_with_load)));
2400: else if (actual_fsize != 0)
2401: set_reg_plus_d (STACK_POINTER_REGNUM,
2402: STACK_POINTER_REGNUM,
2403: - actual_fsize);
2404: }
2405:
2406: /* This is only valid once reload has completed because it depends on
2407: knowing exactly how much (if any) frame there is and...
2408:
2409: It's only valid if there is no frame marker to de-allocate and...
2410:
2411: It's only valid if %r2 hasn't been saved into the caller's frame
2412: (we're not profiling and %r2 isn't live anywhere). */
2413: int
2414: hppa_can_use_return_insn_p ()
2415: {
2416: return (reload_completed
2417: && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
2418: && ! profile_flag
2419: && ! regs_ever_live[2]
2420: && ! frame_pointer_needed);
2421: }
2422:
2423: void
2424: emit_bcond_fp (code, operand0)
2425: enum rtx_code code;
2426: rtx operand0;
2427: {
2428: emit_jump_insn (gen_rtx (SET, VOIDmode, pc_rtx,
2429: gen_rtx (IF_THEN_ELSE, VOIDmode,
2430: gen_rtx (code, VOIDmode,
2431: gen_rtx (REG, CCFPmode, 0),
2432: const0_rtx),
2433: gen_rtx (LABEL_REF, VOIDmode, operand0),
2434: pc_rtx)));
2435:
2436: }
2437:
2438: rtx
2439: gen_cmp_fp (code, operand0, operand1)
2440: enum rtx_code code;
2441: rtx operand0, operand1;
2442: {
2443: return gen_rtx (SET, VOIDmode, gen_rtx (REG, CCFPmode, 0),
2444: gen_rtx (code, CCFPmode, operand0, operand1));
2445: }
2446:
2447: /* Adjust the cost of a scheduling dependency. Return the new cost of
2448: a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
2449:
2450: int
2451: pa_adjust_cost (insn, link, dep_insn, cost)
2452: rtx insn;
2453: rtx link;
2454: rtx dep_insn;
2455: int cost;
2456: {
2457: if (! recog_memoized (insn))
2458: return 0;
2459:
2460: if (REG_NOTE_KIND (link) == 0)
2461: {
2462: /* Data dependency; DEP_INSN writes a register that INSN reads some
2463: cycles later. */
2464:
2465: if (get_attr_type (insn) == TYPE_FPSTORE)
2466: {
2467: rtx pat = PATTERN (insn);
2468: rtx dep_pat = PATTERN (dep_insn);
2469: if (GET_CODE (pat) == PARALLEL)
2470: {
2471: /* This happens for the fstXs,mb patterns. */
2472: pat = XVECEXP (pat, 0, 0);
2473: }
2474: if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
2475: /* If this happens, we have to extend this to schedule
2476: optimally. Return 0 for now. */
2477: return 0;
2478:
2479: if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
2480: {
2481: if (! recog_memoized (dep_insn))
2482: return 0;
2483: /* DEP_INSN is writing its result to the register
2484: being stored in the fpstore INSN. */
2485: switch (get_attr_type (dep_insn))
2486: {
2487: case TYPE_FPLOAD:
2488: /* This cost 3 cycles, not 2 as the md says. */
2489: return cost + 1;
2490:
2491: case TYPE_FPALU:
2492: case TYPE_FPMUL:
2493: case TYPE_FPDIVSGL:
2494: case TYPE_FPDIVDBL:
2495: case TYPE_FPSQRTSGL:
2496: case TYPE_FPSQRTDBL:
2497: /* In these important cases, we save one cycle compared to
2498: when flop instruction feed each other. */
2499: return cost - 1;
2500:
2501: default:
2502: return cost;
2503: }
2504: }
2505: }
2506:
2507: /* For other data dependencies, the default cost specified in the
2508: md is correct. */
2509: return cost;
2510: }
2511: else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
2512: {
2513: /* Anti dependency; DEP_INSN reads a register that INSN writes some
2514: cycles later. */
2515:
2516: if (get_attr_type (insn) == TYPE_FPLOAD)
2517: {
2518: rtx pat = PATTERN (insn);
2519: rtx dep_pat = PATTERN (dep_insn);
2520: if (GET_CODE (pat) == PARALLEL)
2521: {
2522: /* This happens for the fldXs,mb patterns. */
2523: pat = XVECEXP (pat, 0, 0);
2524: }
2525: if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
2526: /* If this happens, we have to extend this to schedule
2527: optimally. Return 0 for now. */
2528: return 0;
2529:
2530: if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
2531: {
2532: if (! recog_memoized (dep_insn))
2533: return 0;
2534: switch (get_attr_type (dep_insn))
2535: {
2536: case TYPE_FPALU:
2537: case TYPE_FPMUL:
2538: case TYPE_FPDIVSGL:
2539: case TYPE_FPDIVDBL:
2540: case TYPE_FPSQRTSGL:
2541: case TYPE_FPSQRTDBL:
2542: /* A fpload can't be issued until one cycle before a
2543: preceeding arithmetic operation has finished, if
2544: the target of the fpload is any of the sources
2545: (or destination) of the arithmetic operation. */
2546: return cost - 1;
2547:
2548: default:
2549: return 0;
2550: }
2551: }
2552: }
2553:
2554: /* For other anti dependencies, the cost is 0. */
2555: return 0;
2556: }
2557:
2558: /* For output dependencies, the cost is often one too high. */
2559: return cost - 1;
2560: }
2561:
2562: /* Return any length adjustment needed by INSN which already has its length
2563: computed as LENGTH. Return zero if no adjustment is necessary.
2564:
2565: For the PA: function calls, millicode calls, and backwards short
2566: conditional branches with unfilled delay slots need an adjustment by +1
2567: (to account for the NOP which will be inserted into the instruction stream).
2568:
2569: Also compute the length of an inline block move here as it is too
2570: complicated to express as a length attribute in pa.md. */
2571: int
2572: pa_adjust_insn_length (insn, length)
2573: rtx insn;
2574: int length;
2575: {
2576: rtx pat = PATTERN (insn);
2577:
2578: /* Call insns which are *not* indirect and have unfilled delay slots. */
2579: if (GET_CODE (insn) == CALL_INSN)
2580: {
2581:
2582: if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL
2583: && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF)
2584: return 4;
2585: else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET
2586: && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0))
2587: == SYMBOL_REF)
2588: return 4;
2589: else
2590: return 0;
2591: }
2592: /* Millicode insn with an unfilled delay slot. */
2593: else if (GET_CODE (insn) == INSN
2594: && GET_CODE (pat) != SEQUENCE
2595: && GET_CODE (pat) != USE
2596: && GET_CODE (pat) != CLOBBER
2597: && get_attr_type (insn) == TYPE_MILLI)
2598: return 4;
2599: /* Block move pattern. */
2600: else if (GET_CODE (insn) == INSN
2601: && GET_CODE (pat) == PARALLEL
2602: && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
2603: && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
2604: && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
2605: && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
2606: return compute_movstrsi_length (insn) - 4;
2607: /* Conditional branch with an unfilled delay slot. */
2608: else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
2609: {
2610: /* Adjust a short backwards conditional with an unfilled delay slot. */
2611: if (GET_CODE (pat) == SET
2612: && length == 4
2613: && ! forward_branch_p (insn))
2614: return 4;
2615: /* Adjust dbra insn with short backwards conditional branch with
2616: unfilled delay slot -- only for case where counter is in a
2617: general register register. */
2618: else if (GET_CODE (pat) == PARALLEL
2619: && GET_CODE (XVECEXP (pat, 0, 1)) == SET
2620: && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
2621: && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
2622: && length == 4
2623: && ! forward_branch_p (insn))
2624: return 4;
2625: else
2626: return 0;
2627: }
2628: else
2629: return 0;
2630: }
2631:
2632: /* Print operand X (an rtx) in assembler syntax to file FILE.
2633: CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
2634: For `%' followed by punctuation, CODE is the punctuation and X is null. */
2635:
2636: void
2637: print_operand (file, x, code)
2638: FILE *file;
2639: rtx x;
2640: int code;
2641: {
2642: switch (code)
2643: {
2644: case '#':
2645: /* Output a 'nop' if there's nothing for the delay slot. */
2646: if (dbr_sequence_length () == 0)
2647: fputs ("\n\tnop", file);
2648: return;
2649: case '*':
2650: /* Output an nullification completer if there's nothing for the */
2651: /* delay slot or nullification is requested. */
2652: if (dbr_sequence_length () == 0 ||
2653: (final_sequence &&
2654: INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
2655: fputs (",n", file);
2656: return;
2657: case 'R':
2658: /* Print out the second register name of a register pair.
2659: I.e., R (6) => 7. */
2660: fputs (reg_names[REGNO (x)+1], file);
2661: return;
2662: case 'r':
2663: /* A register or zero. */
2664: if (x == const0_rtx
2665: || (x == CONST0_RTX (DFmode))
2666: || (x == CONST0_RTX (SFmode)))
2667: {
2668: fputs ("0", file);
2669: return;
2670: }
2671: else
2672: break;
2673: case 'C': /* Plain (C)ondition */
2674: case 'X':
2675: switch (GET_CODE (x))
2676: {
2677: case EQ:
2678: fprintf (file, "="); break;
2679: case NE:
2680: fprintf (file, "<>"); break;
2681: case GT:
2682: fprintf (file, ">"); break;
2683: case GE:
2684: fprintf (file, ">="); break;
2685: case GEU:
2686: fprintf (file, ">>="); break;
2687: case GTU:
2688: fprintf (file, ">>"); break;
2689: case LT:
2690: fprintf (file, "<"); break;
2691: case LE:
2692: fprintf (file, "<="); break;
2693: case LEU:
2694: fprintf (file, "<<="); break;
2695: case LTU:
2696: fprintf (file, "<<"); break;
2697: default:
2698: abort ();
2699: }
2700: return;
2701: case 'N': /* Condition, (N)egated */
2702: switch (GET_CODE (x))
2703: {
2704: case EQ:
2705: fprintf (file, "<>"); break;
2706: case NE:
2707: fprintf (file, "="); break;
2708: case GT:
2709: fprintf (file, "<="); break;
2710: case GE:
2711: fprintf (file, "<"); break;
2712: case GEU:
2713: fprintf (file, "<<"); break;
2714: case GTU:
2715: fprintf (file, "<<="); break;
2716: case LT:
2717: fprintf (file, ">="); break;
2718: case LE:
2719: fprintf (file, ">"); break;
2720: case LEU:
2721: fprintf (file, ">>"); break;
2722: case LTU:
2723: fprintf (file, ">>="); break;
2724: default:
2725: abort ();
2726: }
2727: return;
2728: /* For floating point comparisons. Need special conditions to deal
2729: with NaNs properly. */
2730: case 'Y':
2731: switch (GET_CODE (x))
2732: {
2733: case EQ:
2734: fprintf (file, "!="); break;
2735: case NE:
2736: fprintf (file, "="); break;
2737: case GT:
2738: fprintf (file, "!>"); break;
2739: case GE:
2740: fprintf (file, "!>="); break;
2741: case LT:
2742: fprintf (file, "!<"); break;
2743: case LE:
2744: fprintf (file, "!<="); break;
2745: default:
2746: abort ();
2747: }
2748: return;
2749: case 'S': /* Condition, operands are (S)wapped. */
2750: switch (GET_CODE (x))
2751: {
2752: case EQ:
2753: fprintf (file, "="); break;
2754: case NE:
2755: fprintf (file, "<>"); break;
2756: case GT:
2757: fprintf (file, "<"); break;
2758: case GE:
2759: fprintf (file, "<="); break;
2760: case GEU:
2761: fprintf (file, "<<="); break;
2762: case GTU:
2763: fprintf (file, "<<"); break;
2764: case LT:
2765: fprintf (file, ">"); break;
2766: case LE:
2767: fprintf (file, ">="); break;
2768: case LEU:
2769: fprintf (file, ">>="); break;
2770: case LTU:
2771: fprintf (file, ">>"); break;
2772: default:
2773: abort ();
2774: }
2775: return;
2776: case 'B': /* Condition, (B)oth swapped and negate. */
2777: switch (GET_CODE (x))
2778: {
2779: case EQ:
2780: fprintf (file, "<>"); break;
2781: case NE:
2782: fprintf (file, "="); break;
2783: case GT:
2784: fprintf (file, ">="); break;
2785: case GE:
2786: fprintf (file, ">"); break;
2787: case GEU:
2788: fprintf (file, ">>"); break;
2789: case GTU:
2790: fprintf (file, ">>="); break;
2791: case LT:
2792: fprintf (file, "<="); break;
2793: case LE:
2794: fprintf (file, "<"); break;
2795: case LEU:
2796: fprintf (file, "<<"); break;
2797: case LTU:
2798: fprintf (file, "<<="); break;
2799: default:
2800: abort ();
2801: }
2802: return;
2803: case 'k':
2804: if (GET_CODE (x) == CONST_INT)
2805: {
2806: fprintf (file, "%d", ~INTVAL (x));
2807: return;
2808: }
2809: abort();
2810: case 'L':
2811: if (GET_CODE (x) == CONST_INT)
2812: {
2813: fprintf (file, "%d", 32 - (INTVAL (x) & 31));
2814: return;
2815: }
2816: abort();
2817: case 'O':
2818: if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
2819: {
2820: fprintf (file, "%d", exact_log2 (INTVAL (x)));
2821: return;
2822: }
2823: abort();
2824: case 'P':
2825: if (GET_CODE (x) == CONST_INT)
2826: {
2827: fprintf (file, "%d", 31 - (INTVAL (x) & 31));
2828: return;
2829: }
2830: abort();
2831: case 'I':
2832: if (GET_CODE (x) == CONST_INT)
2833: fputs ("i", file);
2834: return;
2835: case 'M':
2836: switch (GET_CODE (XEXP (x, 0)))
2837: {
2838: case PRE_DEC:
2839: case PRE_INC:
2840: fprintf (file, "s,mb");
2841: break;
2842: case POST_DEC:
2843: case POST_INC:
2844: fprintf (file, "s,ma");
2845: break;
2846: default:
2847: break;
2848: }
2849: return;
2850: case 'F':
2851: switch (GET_CODE (XEXP (x, 0)))
2852: {
2853: case PRE_DEC:
2854: case PRE_INC:
2855: fprintf (file, ",mb");
2856: break;
2857: case POST_DEC:
2858: case POST_INC:
2859: fprintf (file, ",ma");
2860: break;
2861: default:
2862: break;
2863: }
2864: return;
2865: case 'G':
2866: output_global_address (file, x);
2867: return;
2868: case 0: /* Don't do anything special */
2869: break;
2870: case 'Z':
2871: {
2872: unsigned op[3];
2873: compute_zdepi_operands (INTVAL (x), op);
2874: fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
2875: return;
2876: }
2877: default:
2878: abort ();
2879: }
2880: if (GET_CODE (x) == REG)
2881: fprintf (file, "%s", reg_names [REGNO (x)]);
2882: else if (GET_CODE (x) == MEM)
2883: {
2884: int size = GET_MODE_SIZE (GET_MODE (x));
2885: rtx base = XEXP (XEXP (x, 0), 0);
2886: switch (GET_CODE (XEXP (x, 0)))
2887: {
2888: case PRE_DEC:
2889: case POST_DEC:
2890: fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]);
2891: break;
2892: case PRE_INC:
2893: case POST_INC:
2894: fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]);
2895: break;
2896: default:
2897: output_address (XEXP (x, 0));
2898: break;
2899: }
2900: }
2901: else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
2902: {
2903: union { double d; int i[2]; } u;
2904: union { float f; int i; } u1;
2905: u.i[0] = XINT (x, 0); u.i[1] = XINT (x, 1);
2906: u1.f = u.d;
2907: if (code == 'f')
2908: fprintf (file, "0r%.9g", u1.f);
2909: else
2910: fprintf (file, "0x%x", u1.i);
2911: }
2912: else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != DImode)
2913: {
2914: union { double d; int i[2]; } u;
2915: u.i[0] = XINT (x, 0); u.i[1] = XINT (x, 1);
2916: fprintf (file, "0r%.20g", u.d);
2917: }
2918: else
2919: output_addr_const (file, x);
2920: }
2921:
2922: /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
2923:
2924: void
2925: output_global_address (file, x)
2926: FILE *file;
2927: rtx x;
2928: {
2929:
2930: /* Imagine (high (const (plus ...))). */
2931: if (GET_CODE (x) == HIGH)
2932: x = XEXP (x, 0);
2933:
2934: if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x))
2935: assemble_name (file, XSTR (x, 0));
2936: else if (GET_CODE (x) == SYMBOL_REF)
2937: {
2938: assemble_name (file, XSTR (x, 0));
2939: fprintf (file, "-$global$");
2940: }
2941: else if (GET_CODE (x) == CONST)
2942: {
2943: char *sep = "";
2944: int offset = 0; /* assembler wants -$global$ at end */
2945: rtx base;
2946:
2947: if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
2948: {
2949: base = XEXP (XEXP (x, 0), 0);
2950: output_addr_const (file, base);
2951: }
2952: else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT)
2953: offset = INTVAL (XEXP (XEXP (x, 0), 0));
2954: else abort ();
2955:
2956: if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF)
2957: {
2958: base = XEXP (XEXP (x, 0), 1);
2959: output_addr_const (file, base);
2960: }
2961: else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2962: offset = INTVAL (XEXP (XEXP (x, 0),1));
2963: else abort ();
2964:
2965: if (GET_CODE (XEXP (x, 0)) == PLUS)
2966: {
2967: if (offset < 0)
2968: {
2969: offset = -offset;
2970: sep = "-";
2971: }
2972: else
2973: sep = "+";
2974: }
2975: else if (GET_CODE (XEXP (x, 0)) == MINUS
2976: && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
2977: sep = "-";
2978: else abort ();
2979:
2980: if (!read_only_operand (base))
2981: fprintf (file, "-$global$");
2982: fprintf (file, "%s", sep);
2983: if (offset) fprintf (file,"%d", offset);
2984: }
2985: else
2986: output_addr_const (file, x);
2987: }
2988:
2989: /* HP's millicode routines mean something special to the assembler.
2990: Keep track of which ones we have used. */
2991:
2992: enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
2993: static char imported[(int)end1000];
2994: static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
2995: static char import_string[] = ".IMPORT $$....,MILLICODE";
2996: #define MILLI_START 10
2997:
2998: static void
2999: import_milli (code)
3000: enum millicodes code;
3001: {
3002: char str[sizeof (import_string)];
3003:
3004: if (!imported[(int)code])
3005: {
3006: imported[(int)code] = 1;
3007: strcpy (str, import_string);
3008: strncpy (str + MILLI_START, milli_names[(int)code], 4);
3009: output_asm_insn (str, 0);
3010: }
3011: }
3012:
3013: /* The register constraints have put the operands and return value in
3014: the proper registers. */
3015:
3016: char *
3017: output_mul_insn (unsignedp, insn)
3018: int unsignedp;
3019: rtx insn;
3020: {
3021:
3022: if (unsignedp)
3023: {
3024: import_milli (mulU);
3025: return output_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$mulU"),
3026: gen_rtx (REG, SImode, 31));
3027: }
3028: else
3029: {
3030: import_milli (mulI);
3031: return output_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$mulI"),
3032: gen_rtx (REG, SImode, 31));
3033: }
3034: }
3035:
3036: /* If operands isn't NULL, then it's a CONST_INT with which we can do
3037: something */
3038:
3039:
3040: /* Emit the rtl for doing a division by a constant. */
3041:
3042: /* Do magic division millicodes exist for this value? */
3043:
3044: static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
3045: 1, 1};
3046:
3047: /* We'll use an array to keep track of the magic millicodes and
3048: whether or not we've used them already. [n][0] is signed, [n][1] is
3049: unsigned. */
3050:
3051: static int div_milli[16][2];
3052:
3053: int
3054: div_operand (op, mode)
3055: rtx op;
3056: enum machine_mode mode;
3057: {
3058: return (mode == SImode
3059: && ((GET_CODE (op) == REG && REGNO (op) == 25)
3060: || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0
3061: && INTVAL (op) < 16 && magic_milli[INTVAL (op)])));
3062: }
3063:
3064: int
3065: emit_hpdiv_const (operands, unsignedp)
3066: rtx *operands;
3067: int unsignedp;
3068: {
3069: if (GET_CODE (operands[2]) == CONST_INT
3070: && INTVAL (operands[2]) > 0
3071: && INTVAL (operands[2]) < 16
3072: && magic_milli[INTVAL (operands[2])])
3073: {
3074: emit_move_insn ( gen_rtx (REG, SImode, 26), operands[1]);
3075: emit
3076: (gen_rtx
3077: (PARALLEL, VOIDmode,
3078: gen_rtvec (5, gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 29),
3079: gen_rtx (unsignedp ? UDIV : DIV, SImode,
3080: gen_rtx (REG, SImode, 26),
3081: operands[2])),
3082: gen_rtx (CLOBBER, VOIDmode, operands[3]),
3083: gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 26)),
3084: gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 25)),
3085: gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, SImode, 31)))));
3086: emit_move_insn (operands[0], gen_rtx (REG, SImode, 29));
3087: return 1;
3088: }
3089: return 0;
3090: }
3091:
3092: char *
3093: output_div_insn (operands, unsignedp, insn)
3094: rtx *operands;
3095: int unsignedp;
3096: rtx insn;
3097: {
3098: int divisor;
3099:
3100: /* If the divisor is a constant, try to use one of the special
3101: opcodes .*/
3102: if (GET_CODE (operands[0]) == CONST_INT)
3103: {
3104: static char buf[100];
3105: divisor = INTVAL (operands[0]);
3106: if (!div_milli[divisor][unsignedp])
3107: {
3108: div_milli[divisor][unsignedp] = 1;
3109: if (unsignedp)
3110: output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
3111: else
3112: output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
3113: }
3114: if (unsignedp)
3115: {
3116: sprintf (buf, "$$divU_%d", INTVAL (operands[0]));
3117: return output_call (insn, gen_rtx (SYMBOL_REF, SImode, buf),
3118: gen_rtx (REG, SImode, 31));
3119: }
3120: else
3121: {
3122: sprintf (buf, "$$divI_%d", INTVAL (operands[0]));
3123: return output_call (insn, gen_rtx (SYMBOL_REF, SImode, buf),
3124: gen_rtx (REG, SImode, 31));
3125: }
3126: }
3127: /* Divisor isn't a special constant. */
3128: else
3129: {
3130: if (unsignedp)
3131: {
3132: import_milli (divU);
3133: return output_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$divU"),
3134: gen_rtx (REG, SImode, 31));
3135: }
3136: else
3137: {
3138: import_milli (divI);
3139: return output_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$divI"),
3140: gen_rtx (REG, SImode, 31));
3141: }
3142: }
3143: }
3144:
3145: /* Output a $$rem millicode to do mod. */
3146:
3147: char *
3148: output_mod_insn (unsignedp, insn)
3149: int unsignedp;
3150: rtx insn;
3151: {
3152: if (unsignedp)
3153: {
3154: import_milli (remU);
3155: return output_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$remU"),
3156: gen_rtx (REG, SImode, 31));
3157: }
3158: else
3159: {
3160: import_milli (remI);
3161: return output_call (insn, gen_rtx (SYMBOL_REF, SImode, "$$remI"),
3162: gen_rtx (REG, SImode, 31));
3163: }
3164: }
3165:
3166: void
3167: output_arg_descriptor (insn)
3168: rtx insn;
3169: {
3170: char *arg_regs[4];
3171: enum machine_mode arg_mode;
3172: rtx prev_insn;
3173: int i, output_flag = 0;
3174: int regno;
3175:
3176: for (i = 0; i < 4; i++)
3177: arg_regs[i] = 0;
3178:
3179: /* Specify explicitly that no argument relocations should take place
3180: if using the portable runtime calling conventions. */
3181: if (TARGET_PORTABLE_RUNTIME)
3182: {
3183: fprintf (asm_out_file,
3184: "\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n");
3185: return;
3186: }
3187:
3188: for (prev_insn = PREV_INSN (insn); GET_CODE (prev_insn) == INSN;
3189: prev_insn = PREV_INSN (prev_insn))
3190: {
3191: /* Terminate search for arguments if a non-USE insn is encountered
3192: or a USE insn which does not specify an argument, STATIC_CHAIN,
3193: or STRUCT_VALUE register. */
3194: if (!(GET_CODE (PATTERN (prev_insn)) == USE
3195: && GET_CODE (XEXP (PATTERN (prev_insn), 0)) == REG
3196: && (FUNCTION_ARG_REGNO_P (REGNO (XEXP (PATTERN (prev_insn), 0)))
3197: || REGNO (XEXP (PATTERN (prev_insn), 0)) == STATIC_CHAIN_REGNUM
3198: || REGNO (XEXP (PATTERN (prev_insn), 0))
3199: == STRUCT_VALUE_REGNUM)))
3200: break;
3201:
3202: /* If this is a USE for the STATIC_CHAIN or STRUCT_VALUE register,
3203: then skip it and continue the loop since those are not encoded
3204: in the argument relocation bits. */
3205: if (REGNO (XEXP (PATTERN (prev_insn), 0)) == STATIC_CHAIN_REGNUM
3206: || REGNO (XEXP (PATTERN (prev_insn), 0)) == STRUCT_VALUE_REGNUM)
3207: continue;
3208:
3209: arg_mode = GET_MODE (XEXP (PATTERN (prev_insn), 0));
3210: regno = REGNO (XEXP (PATTERN (prev_insn), 0));
3211: if (regno >= 23 && regno <= 26)
3212: {
3213: arg_regs[26 - regno] = "GR";
3214: if (arg_mode == DImode)
3215: arg_regs[25 - regno] = "GR";
3216: }
3217: else if (!TARGET_SNAKE) /* fp args */
3218: {
3219: if (arg_mode == SFmode)
3220: arg_regs[regno - 32] = "FR";
3221: else
3222: {
3223: #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
3224: arg_regs[regno - 33] = "FR";
3225: arg_regs[regno - 32] = "FU";
3226: #else
3227: arg_regs[regno - 33] = "FU";
3228: arg_regs[regno - 32] = "FR";
3229: #endif
3230: }
3231: }
3232: else
3233: {
3234: if (arg_mode == SFmode)
3235: arg_regs[(regno - 44) / 2] = "FR";
3236: else
3237: {
3238: #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
3239: arg_regs[(regno - 46) / 2] = "FR";
3240: arg_regs[(regno - 46) / 2 + 1] = "FU";
3241: #else
3242: arg_regs[(regno - 46) / 2] = "FU";
3243: arg_regs[(regno - 46) / 2 + 1] = "FR";
3244: #endif
3245: }
3246: }
3247: }
3248: fputs ("\t.CALL ", asm_out_file);
3249: for (i = 0; i < 4; i++)
3250: {
3251: if (arg_regs[i])
3252: {
3253: if (output_flag++)
3254: fputc (',', asm_out_file);
3255: fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
3256: }
3257: }
3258: fputc ('\n', asm_out_file);
3259: }
3260:
3261: /* Memory loads/stores to/from the shift need to go through
3262: the general registers. */
3263:
3264: enum reg_class
3265: secondary_reload_class (class, mode, in)
3266: enum reg_class class;
3267: enum machine_mode mode;
3268: rtx in;
3269: {
3270: int regno = true_regnum (in);
3271:
3272: if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
3273: && GET_MODE_CLASS (mode) == MODE_INT
3274: && FP_REG_CLASS_P (class))
3275: || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
3276: return GENERAL_REGS;
3277:
3278: if (GET_CODE (in) == HIGH)
3279: in = XEXP (in, 0);
3280:
3281: if (class != R1_REGS && symbolic_operand (in, VOIDmode))
3282: return R1_REGS;
3283:
3284: return NO_REGS;
3285: }
3286:
3287: enum direction
3288: function_arg_padding (mode, type)
3289: enum machine_mode mode;
3290: tree type;
3291: {
3292: int size;
3293:
3294: if (mode == BLKmode)
3295: {
3296: if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
3297: size = int_size_in_bytes (type) * BITS_PER_UNIT;
3298: else
3299: return upward; /* Don't know if this is right, but */
3300: /* same as old definition. */
3301: }
3302: else
3303: size = GET_MODE_BITSIZE (mode);
3304: if (size < PARM_BOUNDARY)
3305: return downward;
3306: else if (size % PARM_BOUNDARY)
3307: return upward;
3308: else
3309: return none;
3310: }
3311:
3312:
3313: /* Do what is necessary for `va_start'. The argument is ignored;
3314: We look at the current function to determine if stdargs or varargs
3315: is used and fill in an initial va_list. A pointer to this constructor
3316: is returned. */
3317:
3318: struct rtx_def *
3319: hppa_builtin_saveregs (arglist)
3320: tree arglist;
3321: {
3322: rtx offset;
3323: tree fntype = TREE_TYPE (current_function_decl);
3324: int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
3325: && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3326: != void_type_node)))
3327: ? UNITS_PER_WORD : 0);
3328:
3329: if (argadj)
3330: offset = plus_constant (current_function_arg_offset_rtx, argadj);
3331: else
3332: offset = current_function_arg_offset_rtx;
3333:
3334: /* Store general registers on the stack. */
3335: move_block_from_reg (23,
3336: gen_rtx (MEM, BLKmode,
3337: plus_constant
3338: (current_function_internal_arg_pointer, -16)),
3339: 4, 4 * UNITS_PER_WORD);
3340: return copy_to_reg (expand_binop (Pmode, add_optab,
3341: current_function_internal_arg_pointer,
3342: offset, 0, 0, OPTAB_LIB_WIDEN));
3343: }
3344:
3345: /* This routine handles all the normal conditional branch sequences we
3346: might need to generate. It handles compare immediate vs compare
3347: register, nullification of delay slots, varying length branches,
3348: negated branches, and all combinations of the above. It returns the
3349: output appropriate to emit the branch corresponding to all given
3350: parameters. */
3351:
3352: char *
3353: output_cbranch (operands, nullify, length, negated, insn)
3354: rtx *operands;
3355: int nullify, length, negated;
3356: rtx insn;
3357: {
3358: static char buf[100];
3359: int useskip = 0;
3360:
3361: /* A conditional branch to the following instruction (eg the delay slot) is
3362: asking for a disaster. This can happen when not optimizing.
3363:
3364: In such cases it is safe to emit nothing. */
3365:
3366: if (JUMP_LABEL (insn) == next_nonnote_insn (insn))
3367: return "";
3368:
3369: /* If this is a long branch with its delay slot unfilled, set `nullify'
3370: as it can nullify the delay slot and save a nop. */
3371: if (length == 8 && dbr_sequence_length () == 0)
3372: nullify = 1;
3373:
3374: /* If this is a short forward conditional branch which did not get
3375: its delay slot filled, the delay slot can still be nullified. */
3376: if (! nullify && length == 4 && dbr_sequence_length () == 0)
3377: nullify = forward_branch_p (insn);
3378:
3379: /* A forward branch over a single nullified insn can be done with a
3380: comclr instruction. This avoids a single cycle penalty due to
3381: mis-predicted branch if we fall through (branch not taken). */
3382: if (length == 4
3383: && next_real_insn (insn) != 0
3384: && get_attr_length (next_real_insn (insn)) == 4
3385: && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
3386: && nullify)
3387: useskip = 1;
3388:
3389: switch (length)
3390: {
3391: /* All short conditional branches except backwards with an unfilled
3392: delay slot. */
3393: case 4:
3394: if (useskip)
3395: strcpy (buf, "com%I2clr,");
3396: else
3397: strcpy (buf, "com%I2b,");
3398: if (negated)
3399: strcat (buf, "%B3");
3400: else
3401: strcat (buf, "%S3");
3402: if (useskip)
3403: strcat (buf, " %2,%1,0");
3404: else if (nullify)
3405: strcat (buf, ",n %2,%1,%0");
3406: else
3407: strcat (buf, " %2,%1,%0");
3408: break;
3409:
3410: /* All long conditionals. Note an short backward branch with an
3411: unfilled delay slot is treated just like a long backward branch
3412: with an unfilled delay slot. */
3413: case 8:
3414: /* Handle weird backwards branch with a filled delay slot
3415: with is nullified. */
3416: if (dbr_sequence_length () != 0
3417: && ! forward_branch_p (insn)
3418: && nullify)
3419: {
3420: strcpy (buf, "com%I2b,");
3421: if (negated)
3422: strcat (buf, "%S3");
3423: else
3424: strcat (buf, "%B3");
3425: strcat (buf, ",n %2,%1,.+12\n\tbl %0,0");
3426: }
3427: else
3428: {
3429: strcpy (buf, "com%I2clr,");
3430: if (negated)
3431: strcat (buf, "%S3");
3432: else
3433: strcat (buf, "%B3");
3434: if (nullify)
3435: strcat (buf, " %2,%1,0\n\tbl,n %0,0");
3436: else
3437: strcat (buf, " %2,%1,0\n\tbl %0,0");
3438: }
3439: break;
3440:
3441: default:
3442: abort();
3443: }
3444: return buf;
3445: }
3446:
3447: /* This routine handles all the branch-on-bit conditional branch sequences we
3448: might need to generate. It handles nullification of delay slots,
3449: varying length branches, negated branches and all combinations of the
3450: above. it returns the appropriate output template to emit the branch. */
3451:
3452: char *
3453: output_bb (operands, nullify, length, negated, insn, which)
3454: rtx *operands;
3455: int nullify, length, negated;
3456: rtx insn;
3457: int which;
3458: {
3459: static char buf[100];
3460: int useskip = 0;
3461:
3462: /* A conditional branch to the following instruction (eg the delay slot) is
3463: asking for a disaster. I do not think this can happen as this pattern
3464: is only used when optimizing; jump optimization should eliminate the
3465: jump. But be prepared just in case. */
3466:
3467: if (JUMP_LABEL (insn) == next_nonnote_insn (insn))
3468: return "";
3469:
3470: /* If this is a long branch with its delay slot unfilled, set `nullify'
3471: as it can nullify the delay slot and save a nop. */
3472: if (length == 8 && dbr_sequence_length () == 0)
3473: nullify = 1;
3474:
3475: /* If this is a short forward conditional branch which did not get
3476: its delay slot filled, the delay slot can still be nullified. */
3477: if (! nullify && length == 4 && dbr_sequence_length () == 0)
3478: nullify = forward_branch_p (insn);
3479:
3480: /* A forward branch over a single nullified insn can be done with a
3481: extrs instruction. This avoids a single cycle penalty due to
3482: mis-predicted branch if we fall through (branch not taken). */
3483:
3484: if (length == 4
3485: && next_real_insn (insn) != 0
3486: && get_attr_length (next_real_insn (insn)) == 4
3487: && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
3488: && nullify)
3489: useskip = 1;
3490:
3491: switch (length)
3492: {
3493:
3494: /* All short conditional branches except backwards with an unfilled
3495: delay slot. */
3496: case 4:
3497: if (useskip)
3498: strcpy (buf, "extrs,");
3499: else
3500: strcpy (buf, "bb,");
3501: if ((which == 0 && negated)
3502: || (which == 1 && ! negated))
3503: strcat (buf, ">=");
3504: else
3505: strcat (buf, "<");
3506: if (useskip)
3507: strcat (buf, " %0,%1,1,0");
3508: else if (nullify && negated)
3509: strcat (buf, ",n %0,%1,%3");
3510: else if (nullify && ! negated)
3511: strcat (buf, ",n %0,%1,%2");
3512: else if (! nullify && negated)
3513: strcat (buf, "%0,%1,%3");
3514: else if (! nullify && ! negated)
3515: strcat (buf, " %0,%1,%2");
3516: break;
3517:
3518: /* All long conditionals. Note an short backward branch with an
3519: unfilled delay slot is treated just like a long backward branch
3520: with an unfilled delay slot. */
3521: case 8:
3522: /* Handle weird backwards branch with a filled delay slot
3523: with is nullified. */
3524: if (dbr_sequence_length () != 0
3525: && ! forward_branch_p (insn)
3526: && nullify)
3527: {
3528: strcpy (buf, "bb,");
3529: if ((which == 0 && negated)
3530: || (which == 1 && ! negated))
3531: strcat (buf, "<");
3532: else
3533: strcat (buf, ">=");
3534: if (negated)
3535: strcat (buf, " %0,%1,.+12\n\tbl %3,0");
3536: else
3537: strcat (buf, " %0,%1,.+12\n\tbl %2,0");
3538: }
3539: else
3540: {
3541: strcpy (buf, "extrs,");
3542: if ((which == 0 && negated)
3543: || (which == 1 && ! negated))
3544: strcat (buf, "<");
3545: else
3546: strcat (buf, ">=");
3547: if (nullify && negated)
3548: strcat (buf, " %0,%1,1,0\n\tbl,n %3,0");
3549: else if (nullify && ! negated)
3550: strcat (buf, " %0,%1,1,0\n\tbl,n %2,0");
3551: else if (negated)
3552: strcat (buf, " %0,%1,1,0\n\tbl %3,0");
3553: else
3554: strcat (buf, " %0,%1,1,0\n\tbl %2,0");
3555: }
3556: break;
3557:
3558: default:
3559: abort();
3560: }
3561: return buf;
3562: }
3563:
3564: /* Return the output template for emitting a dbra type insn.
3565:
3566: Note it may perform some output operations on its own before
3567: returning the final output string. */
3568: char *
3569: output_dbra (operands, insn, which_alternative)
3570: rtx *operands;
3571: rtx insn;
3572: int which_alternative;
3573: {
3574:
3575: /* A conditional branch to the following instruction (eg the delay slot) is
3576: asking for a disaster. Be prepared! */
3577:
3578: if (JUMP_LABEL (insn) == next_nonnote_insn (insn))
3579: {
3580: if (which_alternative == 0)
3581: return "ldo %1(%0),%0";
3582: else if (which_alternative == 1)
3583: {
3584: output_asm_insn ("fstws %0,-16(0,%%r30)",operands);
3585: output_asm_insn ("ldw -16(0,%%r30),%4",operands);
3586: output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
3587: return "fldws -16(0,%%r30),%0";
3588: }
3589: else
3590: {
3591: output_asm_insn ("ldw %0,%4", operands);
3592: return "ldo %1(%4),%4\n\tstw %4,%0";
3593: }
3594: }
3595:
3596: if (which_alternative == 0)
3597: {
3598: int nullify = INSN_ANNULLED_BRANCH_P (insn);
3599: int length = get_attr_length (insn);
3600:
3601: /* If this is a long branch with its delay slot unfilled, set `nullify'
3602: as it can nullify the delay slot and save a nop. */
3603: if (length == 8 && dbr_sequence_length () == 0)
3604: nullify = 1;
3605:
3606: /* If this is a short forward conditional branch which did not get
3607: its delay slot filled, the delay slot can still be nullified. */
3608: if (! nullify && length == 4 && dbr_sequence_length () == 0)
3609: nullify = forward_branch_p (insn);
3610:
3611: /* Handle short versions first. */
3612: if (length == 4 && nullify)
3613: return "addib,%C2,n %1,%0,%3";
3614: else if (length == 4 && ! nullify)
3615: return "addib,%C2 %1,%0,%3";
3616: else if (length == 8)
3617: {
3618: /* Handle weird backwards branch with a fulled delay slot
3619: which is nullified. */
3620: if (dbr_sequence_length () != 0
3621: && ! forward_branch_p (insn)
3622: && nullify)
3623: return "addib,%N2,n %1,%0,.+12\n\tbl %3,0";
3624:
3625: /* Handle normal cases. */
3626: if (nullify)
3627: return "addi,%N2 %1,%0,%0\n\tbl,n %3,0";
3628: else
3629: return "addi,%N2 %1,%0,%0\n\tbl %3,0";
3630: }
3631: else
3632: abort();
3633: }
3634: /* Deal with gross reload from FP register case. */
3635: else if (which_alternative == 1)
3636: {
3637: /* Move loop counter from FP register to MEM then into a GR,
3638: increment the GR, store the GR into MEM, and finally reload
3639: the FP register from MEM from within the branch's delay slot. */
3640: output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands);
3641: output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands);
3642: if (get_attr_length (insn) == 24)
3643: return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0";
3644: else
3645: return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
3646: }
3647: /* Deal with gross reload from memory case. */
3648: else
3649: {
3650: /* Reload loop counter from memory, the store back to memory
3651: happens in the branch's delay slot. */
3652: output_asm_insn ("ldw %0,%4", operands);
3653: if (get_attr_length (insn) == 12)
3654: return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
3655: else
3656: return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0";
3657: }
3658: }
3659:
3660: /* Return the output template for emitting a dbra type insn.
3661:
3662: Note it may perform some output operations on its own before
3663: returning the final output string. */
3664: char *
3665: output_movb (operands, insn, which_alternative, reverse_comparison)
3666: rtx *operands;
3667: rtx insn;
3668: int which_alternative;
3669: int reverse_comparison;
3670: {
3671:
3672: /* A conditional branch to the following instruction (eg the delay slot) is
3673: asking for a disaster. Be prepared! */
3674:
3675: if (JUMP_LABEL (insn) == next_nonnote_insn (insn))
3676: {
3677: if (which_alternative == 0)
3678: return "copy %1,%0";
3679: else if (which_alternative == 1)
3680: {
3681: output_asm_insn ("stw %1,-16(0,%%r30)",operands);
3682: return "fldws -16(0,%%r30),%0";
3683: }
3684: else
3685: return "stw %1,%0";
3686: }
3687:
3688: /* Support the second variant. */
3689: if (reverse_comparison)
3690: PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
3691:
3692: if (which_alternative == 0)
3693: {
3694: int nullify = INSN_ANNULLED_BRANCH_P (insn);
3695: int length = get_attr_length (insn);
3696:
3697: /* If this is a long branch with its delay slot unfilled, set `nullify'
3698: as it can nullify the delay slot and save a nop. */
3699: if (length == 8 && dbr_sequence_length () == 0)
3700: nullify = 1;
3701:
3702: /* If this is a short forward conditional branch which did not get
3703: its delay slot filled, the delay slot can still be nullified. */
3704: if (! nullify && length == 4 && dbr_sequence_length () == 0)
3705: nullify = forward_branch_p (insn);
3706:
3707: /* Handle short versions first. */
3708: if (length == 4 && nullify)
3709: return "movb,%C2,n %1,%0,%3";
3710: else if (length == 4 && ! nullify)
3711: return "movb,%C2 %1,%0,%3";
3712: else if (length == 8)
3713: {
3714: /* Handle weird backwards branch with a filled delay slot
3715: which is nullified. */
3716: if (dbr_sequence_length () != 0
3717: && ! forward_branch_p (insn)
3718: && nullify)
3719: return "movb,%N2,n %1,%0,.+12\n\ttbl %3,0";
3720:
3721: /* Handle normal cases. */
3722: if (nullify)
3723: return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0";
3724: else
3725: return "or,%N2 %1,%%r0,%0\n\tbl %3,0";
3726: }
3727: else
3728: abort();
3729: }
3730: /* Deal with gross reload from FP register case. */
3731: else if (which_alternative == 1)
3732: {
3733: /* Move loop counter from FP register to MEM then into a GR,
3734: increment the GR, store the GR into MEM, and finally reload
3735: the FP register from MEM from within the branch's delay slot. */
3736: output_asm_insn ("stw %1,-16(0,%%r30)",operands);
3737: if (get_attr_length (insn) == 12)
3738: return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0";
3739: else
3740: return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0";
3741: }
3742: /* Deal with gross reload from memory case. */
3743: else
3744: {
3745: /* Reload loop counter from memory, the store back to memory
3746: happens in the branch's delay slot. */
3747: if (get_attr_length (insn) == 8)
3748: return "comb,%S2 0,%1,%3\n\tstw %1,%0";
3749: else
3750: return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0";
3751: }
3752: }
3753:
3754:
3755: /* INSN is either a function call or a millicode call. It may have an
3756: unconditional jump in its delay slot.
3757:
3758: CALL_DEST is the routine we are calling.
3759:
3760: RETURN_POINTER is the register which will hold the return address.
3761: %r2 for most calls, %r31 for millicode calls. */
3762: char *
3763: output_call (insn, call_dest, return_pointer)
3764: rtx insn;
3765: rtx call_dest;
3766: rtx return_pointer;
3767:
3768: {
3769: int distance;
3770: rtx xoperands[4];
3771: rtx seq_insn;
3772:
3773: /* Handle common case -- empty delay slot or no jump in the delay slot. */
3774: if (dbr_sequence_length () == 0
3775: || (dbr_sequence_length () != 0
3776: && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN))
3777: {
3778: xoperands[0] = call_dest;
3779: xoperands[1] = return_pointer;
3780: output_asm_insn ("bl %0,%r1%#", xoperands);
3781: return "";
3782: }
3783:
3784: /* This call has an unconditional jump in its delay slot. */
3785:
3786: /* Use the containing sequence insn's address. */
3787: seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
3788:
3789: distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))]
3790: - insn_addresses[INSN_UID (seq_insn)] - 8;
3791:
3792: /* If the branch was too far away, emit a normal call followed
3793: by a nop, followed by the unconditional branch.
3794:
3795: If the branch is close, then adjust %r2 from within the
3796: call's delay slot. */
3797:
3798: xoperands[0] = call_dest;
3799: xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
3800: xoperands[2] = return_pointer;
3801: if (! VAL_14_BITS_P (distance))
3802: output_asm_insn ("bl %0,%r2\n\tnop\n\tbl,n %1,%%r0", xoperands);
3803: else
3804: {
3805: xoperands[3] = gen_label_rtx ();
3806: output_asm_insn ("\n\tbl %0,%r2\n\tldo %1-%3(%r2),%r2", xoperands);
3807: ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3808: CODE_LABEL_NUMBER (xoperands[3]));
3809: }
3810:
3811: /* Delete the jump. */
3812: PUT_CODE (NEXT_INSN (insn), NOTE);
3813: NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
3814: NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
3815: return "";
3816: }
3817:
3818: extern struct obstack *saveable_obstack;
3819:
3820: /* In HPUX 8.0's shared library scheme, special relocations are needed
3821: for function labels if they might be passed to a function
3822: in a shared library (because shared libraries don't live in code
3823: space), and special magic is needed to construct their address. */
3824:
3825: void
3826: hppa_encode_label (sym)
3827: rtx sym;
3828: {
3829: char *str = XSTR (sym, 0);
3830: int len = strlen (str);
3831: char *newstr = obstack_alloc (saveable_obstack, len + 2) ;
3832:
3833: if (str[0] == '*')
3834: *newstr++ = *str++;
3835: strcpy (newstr + 1, str);
3836: *newstr = '@';
3837: XSTR (sym,0) = newstr;
3838: }
3839:
3840: int
3841: function_label_operand (op, mode)
3842: rtx op;
3843: enum machine_mode mode;
3844: {
3845: return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
3846: }
3847:
3848: /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
3849: use in fmpyadd instructions. */
3850: int
3851: fmpyaddoperands(operands)
3852: rtx *operands;
3853: {
3854: enum machine_mode mode = GET_MODE (operands[0]);
3855:
3856: /* All modes must be the same. */
3857: if (! (mode == GET_MODE (operands[1])
3858: && mode == GET_MODE (operands[2])
3859: && mode == GET_MODE (operands[3])
3860: && mode == GET_MODE (operands[4])
3861: && mode == GET_MODE (operands[5])))
3862: return 0;
3863:
3864: /* Both DFmode and SFmode should work. But using SFmode makes the
3865: assembler complain. Just turn it off for now. */
3866: if (mode != DFmode)
3867: return 0;
3868:
3869: /* Only 2 real operands to the addition. One of the input operands must
3870: be the same as the output operand. */
3871: if (! rtx_equal_p (operands[3], operands[4])
3872: && ! rtx_equal_p (operands[3], operands[5]))
3873: return 0;
3874:
3875: /* Inout operand of add can not conflict with any operands from multiply. */
3876: if (rtx_equal_p (operands[3], operands[0])
3877: || rtx_equal_p (operands[3], operands[1])
3878: || rtx_equal_p (operands[3], operands[2]))
3879: return 0;
3880:
3881: /* multiply can not feed into addition operands. */
3882: if (rtx_equal_p (operands[4], operands[0])
3883: || rtx_equal_p (operands[5], operands[0]))
3884: return 0;
3885:
3886: /* Passed. Operands are suitable for fmpyadd. */
3887: return 1;
3888: }
3889:
3890: /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
3891: use in fmpysub instructions. */
3892: int
3893: fmpysuboperands(operands)
3894: rtx *operands;
3895: {
3896: enum machine_mode mode = GET_MODE (operands[0]);
3897:
3898: /* All modes must be the same. */
3899: if (! (mode == GET_MODE (operands[1])
3900: && mode == GET_MODE (operands[2])
3901: && mode == GET_MODE (operands[3])
3902: && mode == GET_MODE (operands[4])
3903: && mode == GET_MODE (operands[5])))
3904: return 0;
3905:
3906: /* Both DFmode and SFmode should work. But using SFmode makes the
3907: assembler complain. Just turn it off for now. */
3908: if (mode != DFmode)
3909: return 0;
3910:
3911: /* Only 2 real operands to the subtraction. Subtraction is not a commutative
3912: operation, so operands[4] must be the same as operand[3]. */
3913: if (! rtx_equal_p (operands[3], operands[4]))
3914: return 0;
3915:
3916: /* multiply can not feed into subtraction. */
3917: if (rtx_equal_p (operands[5], operands[0]))
3918: return 0;
3919:
3920: /* Inout operand of sub can not conflict with any operands from multiply. */
3921: if (rtx_equal_p (operands[3], operands[0])
3922: || rtx_equal_p (operands[3], operands[1])
3923: || rtx_equal_p (operands[3], operands[2]))
3924: return 0;
3925:
3926: /* Passed. Operands are suitable for fmpysub. */
3927: return 1;
3928: }
3929:
3930: int
3931: plus_xor_ior_operator (op, mode)
3932: rtx op;
3933: enum machine_mode mode;
3934: {
3935: return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR
3936: || GET_CODE (op) == IOR);
3937: }
3938:
3939: /* Return 1 if the given constant is 2, 4, or 8. These are the valid
3940: constants for shadd instructions. */
3941: int
3942: shadd_constant_p (val)
3943: int val;
3944: {
3945: if (val == 2 || val == 4 || val == 8)
3946: return 1;
3947: else
3948: return 0;
3949: }
3950:
3951: /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are
3952: the valid constant for shadd instructions. */
3953: int
3954: shadd_operand (op, mode)
3955: rtx op;
3956: enum machine_mode mode;
3957: {
3958: return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op)));
3959: }
3960:
3961: /* Return 1 if INSN branches forward. Should be using insn_addresses
3962: to avoid walking through all the insns... */
3963: int
3964: forward_branch_p (insn)
3965: rtx insn;
3966: {
3967: rtx label = JUMP_LABEL (insn);
3968:
3969: while (insn)
3970: {
3971: if (insn == label)
3972: break;
3973: else
3974: insn = NEXT_INSN (insn);
3975: }
3976:
3977: return (insn == label);
3978: }
3979:
3980: /* Return 1 if OP is an equality comparison, else return 0. */
3981: int
3982: eq_neq_comparison_operator (op, mode)
3983: rtx op;
3984: enum machine_mode mode;
3985: {
3986: return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
3987: }
3988:
3989: /* Return 1 if OP is an operator suitable for use in a movb instruction. */
3990: int
3991: movb_comparison_operator (op, mode)
3992: rtx op;
3993: enum machine_mode mode;
3994: {
3995: return (GET_CODE (op) == EQ || GET_CODE (op) == NE
3996: || GET_CODE (op) == LT || GET_CODE (op) == GE);
3997: }
3998:
3999: /* Return 1 if INSN is in the delay slot of a call instruction. */
4000: int
4001: jump_in_call_delay (insn)
4002: rtx insn;
4003: {
4004:
4005: if (GET_CODE (insn) != JUMP_INSN)
4006: return 0;
4007:
4008: if (PREV_INSN (insn)
4009: && PREV_INSN (PREV_INSN (insn))
4010: && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
4011: {
4012: rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
4013:
4014: return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
4015: && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
4016:
4017: }
4018: else
4019: return 0;
4020: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.