|
|
1.1 root 1: \input verbatim
2: \input itemize
3: \chapter{A small assembler for the MIPS}
4: This is part of the code generator for Standard ML of New Jersey.
5: We generate code in several stages.
6: This is nearly the lowest stage; it is like an assembler.
7: The user can call any function in the MIPSCODER signature.
8: Each one corresponds to an assembler pseudo-instruction.
9: Most correspond to single MIPS instructions.
10: The assembler remembers all the instructions that have been
11: requested, and when [[codegen]] is called it generates MIPS
12: code for them.
13:
14: Some other structure will be able to use the MIPS structure to implement
15: a [[CMACHINE]], which is the abstract machine that ML thinks it is running
16: on.
17: (What really happens is a functor maps some structure
18: implementing [[MIPSCODER]] to a different structure implementing
19: [[CMACHINE]].)
20:
21: {\em Any function using a structure of this signature must avoid
22: touching registers 1~and~31.
23: Those registers are reserved for use by the assembler.}
24:
25: @ Here is the signature of the assembler, [[MIPSCODER]].
26: It can be extracted from this file by
27: $$\hbox{\tt notangle mipsinstr.nw -Rsignature}.$$
28: <<signature>>=
29: signature MIPSCODER = sig
30:
31: (* Assembler for the MIPS chip *)
32:
33: eqtype Label
34: datatype Register = Reg of int
35: (* Registers 1 and 31 are reserved for use by this assembler *)
36: datatype EA = Direct of Register | Immed of int | Immedlab of Label
37: (* effective address *)
38:
39: structure M : sig
40:
41: (* Emit various constants into the code *)
42:
43: val emitstring : string -> unit (* put a literal string into the
44: code (null-terminated?) and
45: extend with nulls to 4-byte
46: boundary. Just chars, no
47: descriptor or length *)
48: val realconst : string -> unit (* emit a floating pt literal *)
49: (* NOT RIGHT YET *)
50: val emitlong : int -> unit (* emit a 4-byte integer literal *)
51:
52:
53: (* Label bindings and emissions *)
54:
55: val newlabel : unit -> Label (* new, unbound label *)
56: val define : Label -> unit (* cause the label to be bound to
57: the code about to be generated *)
58: val emitlab : int * Label -> unit (* L3: emitlab(k,L2) is equivalent to
59: L3: emitlong(k+L2-L3) *)
60:
61: (* Control flow instructions *)
62:
63: val slt : Register * EA * Register -> unit
64: (* (operand1, operand2, result) *)
65: (* set less than family *)
66: val beq : bool * Register * Register * Label -> unit
67: (* (beq or bne, operand1, operand2, branch address) *)
68: (* branch equal/not equal family *)
69:
70: val jump : Register -> unit (* jump register instruction *)
71:
72: val slt_double : Register * Register -> unit
73: (* floating pt set less than *)
74: val seq_double : Register * Register -> unit
75: (* floating pt set equal *)
76: val bcop1 : bool * Label -> unit (* floating pt conditional branch *)
77:
78:
79: (* Arithmetic instructions *)
80: (* arguments are (operand1, operand2, result) *)
81:
82: val add : Register * EA * Register -> unit
83: val and' : Register * EA * Register -> unit
84: val or : Register * EA * Register -> unit
85: val xor : Register * EA * Register -> unit
86: val sub : Register * Register * Register -> unit
87: val div : Register * Register * Register -> unit
88: val mult : Register * Register * Register -> unit
89: val mfhi : Register -> unit (* high word of 64-bit multiply *)
90:
91: (* Floating point arithmetic *)
92:
93: val neg_double : Register * Register -> unit
94: val mul_double : Register * Register * Register -> unit
95: val div_double : Register * Register * Register -> unit
96: val add_double : Register * Register * Register -> unit
97: val sub_double : Register * Register * Register -> unit
98:
99: (* Move pseudo-instruction : move(src,dest) *)
100:
101: val move : EA * Register -> unit
102:
103: (* Load and store instructions *)
104: (* arguments are (destination, source address, offset) *)
105:
106: val lbu : Register * EA * int -> unit (* bytes *)
107: val sb : Register * EA * int -> unit
108: val lw : Register * EA * int -> unit (* words *)
109: val sw : Register * EA * int -> unit
110: val lwc1: Register * EA * int -> unit (* floating point coprocessor *)
111: val swc1: Register * EA * int -> unit
112: val lui : Register * int -> unit
113:
114: (* Shift instructions *)
115: (* arguments are (shamt, operand, result) *)
116: (* shamt as Immedlab _ is senseless *)
117:
118: val sll : EA * Register * Register -> unit
119: val sra : EA * Register * Register -> unit
120:
121:
122: (* Miscellany *)
123:
124: val align : unit -> unit (* cause next data to be emitted on
125: a 4-byte boundary *)
126: val mark : unit -> unit (* emit a back pointer,
127: also called mark *)
128:
129: val comment : string -> unit
130:
131: end (* signature of structure M *)
132:
133: val codegen : (int * int -> unit) * (int -> string -> unit)
134: * (string -> unit) -> unit
135:
136: val codestats : outstream -> unit (* write statistics on stream *)
137:
138: end (* signature MIPSCODER *)
139: @ The basic strategy of the implementation is to hold on, via the [[kept]]
140: pointer, to the list of instructions generated so far.
141: We use [[instr]] for the type of an instruction, so
142: [[kept]] has type [[instr list ref]].
143:
144: The instructions will be executed in the following order: the
145: instruction at the head of the [[!kept]] is executed last.
146: This enables us to accept calls in the order of execution but
147: add the new instruction(s) to the list in constant time.
148:
149:
150: @
151: We structure the instruction stream a little bit by factoring
152: out the difference between multiplication and division; these
153: operations are treated identically in that the result has to be
154: fetched out of the MIPS' LO register.
155:
156: We also factor the different of load and store instructions that can
157: occur: we have load byte, load word, and load to coprocessor (floating point).
158: <<types auxiliary to [[instr]]>>=
159: datatype size = Byte | Word | Floating
160: datatype muldiv = MULT | DIV
161: @
162: Here are the instructions that exist.
163: We list them in more or less the order of the MIPSCODER signature.
164: <<definition of [[instr]]>>=
165: <<types auxiliary to [[instr]]>>
166:
167: datatype instr =
168: STRINGCONST of string (* constants *)
169: | REALCONST of string
170: | EMITLONG of int
171:
172: | DEFINE of Label (* labels *)
173: | EMITLAB of int * Label
174:
175: | SLT of Register * EA * Register (* control flow *)
176: | BEQ of bool * Register * Register * Label
177: | JUMP of Register
178: | SLT_D of Register * Register
179: | SEQ_D of Register * Register
180: | BCOP1 of bool * Label
181:
182: | NOP (* no-op for delay slot *)
183:
184: | ADD of Register * EA * Register (* arithmetic *)
185: | AND of Register * EA * Register
186: | OR of Register * EA * Register
187: | XOR of Register * EA * Register
188: | SUB of Register * Register * Register
189: | MULDIV of muldiv * Register * Register
190: | MFLO of Register (* mflo instruction used with
191: 64-bit multiply and divide *)
192: | MFHI of Register
193:
194: | NEG_D of Register * Register
195: | MUL_D of Register * Register * Register
196: | DIV_D of Register * Register * Register
197: | ADD_D of Register * Register * Register
198: | SUB_D of Register * Register * Register
199:
200: | MOVE of EA * Register (* put something into a register *)
201: | LDI_32 of int * Register (* load in a big immediate constant (>16 bits) *)
202: | LUI of Register * int (* Mips lui instruction *)
203:
204: | LOAD of size * Register * EA * int (* load and store *)
205: | STORE of size * Register * EA * int
206:
207: | SLL of EA * Register * Register (* shift *)
208: | SRA of EA * Register * Register
209:
210: | COMMENT of string (* generates nothing *)
211: | MARK (* a backpointer *)
212: @
213: Here is the code that handles the generated stream, [[kept]].
214: It begins life as [[nil]] and returns to [[nil]] every time code is
215: generated.
216: The function [[keep]] is a convenient way of adding a single [[instr]] to
217: the list; it's very terse.
218: Sometimes we have to add multiple [[instr]]s; then we use [[keeplist]].
219: We also define a function [[delay]] that is just like a [[keep]] but
220: it adds a NOP in the delay slot.
221: <<instruction stream and its functions>>=
222: val kept = ref nil : instr list ref
223: fun keep f a = kept := f a :: !kept
224: fun delay f a = kept := NOP :: f a :: !kept
225: fun keeplist l = kept := l @ !kept
226: <<reinitialize [[kept]]>>=
227: kept := nil
228: @
229: \subsection{Exporting functions for [[MIPSCODER]]}
230: We now know enough to implement most of the functions called for in
231: [[MIPSCODER]].
232: We still haven't decided on an implementation of labels,
233: and there is one subtlety in multiplication and division,
234: but the rest is set.
235: <<[[MIPSCODER]] functions>>=
236: val emitstring = keep STRINGCONST (* literals *)
237: val realconst = keep REALCONST
238: val emitlong = keep EMITLONG
239:
240: <<label functions>> (* labels *)
241:
242: val slt = keep SLT (* control flow *)
243: val beq = delay BEQ
244: val jump = delay JUMP
245: val slt_double = delay SLT_D
246: val seq_double = delay SEQ_D
247: val bcop1 = delay BCOP1
248:
249: val add = keep ADD (* arithmetic *)
250: val and' = keep AND
251: val or = keep OR
252: val xor = keep XOR
253: val op sub = keep SUB
254: <<multiplication and division functions>>
255:
256: val neg_double = keep NEG_D
257: val mul_double = keep MUL_D
258: val div_double = keep DIV_D
259: val add_double = keep ADD_D
260: val sub_double = keep SUB_D
261:
262: val move = keep MOVE
263:
264: fun lbu (a,b,c) = delay LOAD (Byte,a,b,c) (* load and store *)
265: fun lw (a,b,c) = delay LOAD (Word,a,b,c)
266: fun lwc1 (a,b,c) = delay LOAD (Floating,a,b,c)
267: fun sb (a,b,c) = keep STORE (Byte,a,b,c)
268: fun sw (a,b,c) = keep STORE (Word,a,b,c)
269: fun swc1 (a,b,c) = delay STORE (Floating,a,b,c)
270: val lui = keep LUI
271:
272: val sll = keep SLL (* shift *)
273: val sra = keep SRA
274:
275: fun align() = () (* never need to align on MIPS *)
276: val mark = keep (fn () => MARK)
277: val comment = keep COMMENT
278: @
279: Multiplication and division have a minor complication; the
280: result has to be fetched from the LO register.
281: <<multiplication and division functions>>=
282: fun muldiv f (a,b,c) = keeplist [MFLO c, MULDIV (f,a,b)]
283: val op div = muldiv DIV
284: val mult = muldiv MULT
285: val mfhi = keep MFHI
286: @
287: For now, labels are just pointers to integers.
288: During code generation, those integers will be set to positions
289: in the instruction stream, and then they'll be useful as addresses
290: relative to the program counter pointer (to be held in [[Reg pcreg]]).
291: <<definition of [[Label]]>>=
292: type Label = int ref
293: <<label functions>>=
294: fun newlabel () = ref 0
295: val define = keep DEFINE
296: val emitlab = keep EMITLAB
297: @
298: Here's the overall plan of this structure:
299: <<*>>=
300: structure MipsCoder : MIPSCODER = struct
301:
302: <<definition of [[Label]]>>
303:
304: datatype Register = Reg of int
305:
306: datatype EA = Direct of Register
307: | Immed of int
308: | Immedlab of Label
309:
310: <<definition of [[instr]]>>
311:
312: <<instruction stream and its functions>>
313:
314: structure M = struct
315: <<[[MIPSCODER]] functions>>
316: end
317:
318: open M
319:
320: <<functions that assemble [[instr]]s into code>>
321:
322: <<statistics>>
323:
324: end (* MipsInstr *)
325: @ \subsection{Sizes of [[instr]]s}
326: Now let's consider the correspondence between our [[instr]] type and the
327: actual MIPS instructions we intend to emit.
328: One important problem to solve is figuring out how big things are,
329: so that we know what addresses to generate for the various labels.
330: We will also want to know what address is currently stored in the program
331: counter regsiter ([[pcreg]]),
332: because we'll need to know when something is close
333: enough that we can use a sixteen-bit address relative to that register.
334: The kind of address we can use will determine how big things are.
335:
336: We'll rearrange the code so that we have a list of [[ref int * instr]] pairs,
337: where the [[ref int]] stores the position in the list.
338: (Positions start at zero.)
339: Since in the MIPS all instructions are the same size, we measure
340: position as number of instructions.
341: While we're at it, we reverse the list so that the head will execute first,
342: then the rest of the list.
343:
344: We begin with each position set to zero, and make a pass over the list
345: trying to set the value of each position.
346: We do this by estimating the size of (number of MIPS instructions
347: generated for) each [[instr]].
348: Since there are forward references, we may not have all the distances right
349: the first time, so we have to make a second pass.
350: But during this second pass we could find that something is further
351: away than we thought, and we have to switch from using a pc-relative mode to
352: something else (or maybe grab the new pc?), which changes the size again,
353: and moves things even further away.
354: Because we can't control this process, we just keep making passes over the
355: list until the process quiesces (we get the same size twice).
356:
357: In order to guarantee termination, we have to make sure later passes only
358: increase the sizes of things.
359: This is sufficient since there is a maximum number of MIPS instructions
360: we can generate for each [[instr]].
361:
362:
363: While we're at it, we might want to complicate things by making the function
364: that does the passes also emit code.
365: For a single pass we hand an optional triple of emitters, the initial position,
366: an [[int option]] for the program counter pointer (if known), and the
367: instructions.
368:
369:
370:
371: I'm not sure what explains the use of the [[ref int]] to track the position,
372: instead of just an [[int]]---it might be a desire to avoid the
373: overhead of creating a bunch of new objects, or it might be really hard
374: to do the passes cheaply.
375: It should think a variation on [[map]] would do the job, but maybe I'm
376: missing something.
377:
378: @ [[emitters]] is actually a triple [[emit,emit_string,emit_real]].
379: [[emit : int * int -> unit]] emits one instruction,
380: and [[emit_string : int -> string -> unit]] emits a string constant.
381: [[emit_string]] could be specified as a function of [[emit]],
382: but the nature of the function would depend on whether the target
383: machine was little-endian or big-endian, and we don't want to have
384: that dependency built in.
385: [[emit_real : string -> unit]] emits two words corresponding to an IEEE
386: floating point constant in string form (e.g. [["3.14159"]]).
387: In principle, it can always be derived from [[emit_word]], but it's
388: easier to pass it in because then we can use John Reppy's code;
389: see the [[MipsReal]] functor for more details, as well as
390: [[mipsglue.sml]].
391:
392:
393: [[instrs]] is the
394: list of instructions (in execute-head-last order).
395:
396: The second argument to [[pass]] indicates for what instructions code
397: is to be generated.
398: It is a record (position of next instruction, program counter pointer if any,
399: remaining instructions to generate [with positions]).
400:
401: \indent [[prepare]] produces two results: the instruction stream with
402: size pointers added, and the total size of code to be generated.
403: We add the total size because that is the only way to find the number
404: of [[bltzal]]s, which are implicit in the instruction stream.
405:
406: <<assembler>>=
407: fun prepare instrs =
408: let fun add_positions(done, inst::rest) =
409: add_positions( (ref 0, inst) :: done, rest)
410: | add_positions(done, nil) = done
411:
412: val instrs' = add_positions(nil, instrs) (* reverse and add [[ref int]]s*)
413:
414: fun passes(oldsize) =
415: (* make passes with no emission until size is stable*)
416: let val size = pass NONE (0,NONE,instrs')
417: in if size=oldsize then size
418: else passes size
419: end
420: in {size = passes 0, stream = instrs'}
421: end
422:
423: fun assemble emitters instrs =
424: pass (SOME emitters) (0,NONE,#stream (prepare instrs))
425:
426: <<functions that assemble [[instr]]s into code>>=
427: fun get (SOME x) = x
428: | get NONE = ErrorMsg.impossible "missing pcptr in mipscoder"
429:
430: <<[[pcptr]] functions>>
431: <<single pass>>
432: <<assembler>>
433:
434: fun codegen emitters = (
435: assemble emitters (!kept);
436: <<reinitialize [[kept]]>>
437: )
438: @
439: The program counter pointer is a device that enables us to to addressing
440: relative to the pcp register, register 31.
441: The need for it arises when we want to access a data element which we know
442: only by its label.
443: The labels give us addresses relative to the beginning of the function,
444: but we can only use addresses relative to some register.
445: The answer is to set register~31 with a [[bltzal]] instruction,
446: then use that for addressing.
447:
448: The function [[needs_a_pcptr]] determines when it is necessary
449: to have a known value in register~31.
450: That is, we need the program counter pointer
451: \itemize
452: \item
453: at [[NOP]] for a reason to be named later?
454: \item
455: at any operation that uses an effective address that refers to a label
456: (since all labels have to be relative to the program counter).
457: \item
458: BEQ's and BCOP1's to very far away,
459: since we have to compute the address for a JUMP
460: knowing the value of the program counter pointer.
461: \enditemize
462: <<[[pcptr]] functions>>=
463: fun needs_a_pcptr(_,SLT(_,Immedlab _,_)) = true
464: | needs_a_pcptr(_,ADD(_,Immedlab _,_)) = true
465: | needs_a_pcptr(_,AND(_,Immedlab _,_)) = true
466: | needs_a_pcptr(_,OR(_,Immedlab _,_)) = true
467: | needs_a_pcptr(_,XOR(_,Immedlab _,_)) = true
468: | needs_a_pcptr(_,MOVE(Immedlab _,_)) = true
469: | needs_a_pcptr(_,LOAD(_,_,Immedlab _,_)) = true
470: | needs_a_pcptr(_,STORE(_,_,Immedlab _,_)) = true
471: | needs_a_pcptr(_,SLL(Immedlab _,_,_)) = true
472: | needs_a_pcptr(_,SRA(Immedlab _,_,_)) = true
473: | needs_a_pcptr(1, BEQ _) = false (* small BEQ's dont need pcptr *)
474: | needs_a_pcptr(_, BEQ _) = true (* but large ones do *)
475: | needs_a_pcptr(1, BCOP1 _) = false (* small BCOP1's dont need pcptr *)
476: | needs_a_pcptr(_, BCOP1 _) = true (* but large ones do *)
477: | needs_a_pcptr _ = false
478: @
479: Creating the program counter pointer once, with a [[bltzal]], is not
480: enough; we have to invalidate the program counter pointer at every
481: label, since control could arrive at the label from God knows where, and
482: therefore we don't know what the program counter pointer is.
483:
484: We use the function [[makepcptr]] to create a new program counter pointer
485: ``on the fly'' while generating code for other [[instrs]].
486: (I chose not to create a special [[instr]] for [[bltzal]], which I
487: could have inserted at appropriate points in the instruction stream.)
488: To try and find an odd bug, I'm adding no-ops after each [[bltzal]].
489: I don't really believe they're necessary.
490:
491: The function [[gen]], which generates the instructions (or computes
492: their size), takes three arguments.
493: Third: the list of instructions to be generated (paired with pointers
494: to their sizes); first: the position (in words) at which to generate
495: those instructions; second: the current value of the program counter
496: pointer (register~31), if known.
497:
498: The mutual recursion between [[gen]] and [[makepcptr]] maintains
499: the program counter pointer.
500: [[gen]] invalidates it at labels, and calls [[makepcptr]] to create a valid
501: one when necessary (as determined by [[needs_a_pcptr]]).
502: <<single pass>>=
503: fun pass emitters =
504: let fun makepcptr(i,x) =
505: (* may need to emit NOP for delay slot if next instr is branch *)
506: let val size = case x of ((_,BEQ _)::rest) => 2
507: | ((_,BCOP1 _)::rest) => 2
508: | _ => 1
509: in case emitters of NONE => ()
510: | SOME (emit, _, _ ) => (
511: emit(Opcodes.bltzal(0,0));
512: if size=2 then emit(Opcodes.add(0,0,0)) else ());
513: gen(i+size, SOME (i+2), x)
514: end
515: and gen(i,_,nil) = i
516: | gen(i, _, (_,DEFINE lab) :: rest) = (lab := i; gen(i,NONE, rest))
517: (* invalidate the pc pointer at labels *)
518: (* may want to do special fiddling with NOPs *)
519: | gen(pos, pcptr, x as ((sizeref as ref size, inst) :: rest)) =
520: if (pcptr=NONE andalso needs_a_pcptr(size, inst)) then makepcptr(pos,x)
521: else case emitters of
522: SOME (emit : int*int -> unit, emit_string : int -> string -> unit,
523: emit_real : string -> unit) =>
524: <<emit MIPS instructions>>
525: | NONE =>
526: <<compute positions>>
527: in gen
528: end
529:
530: @ \subsection{Generating the instructions}
531: Now we need to consider the nitty-gritty details of just what instructions
532: are generated for each [[instr]].
533: In early passes, we'll just need to know how many instructions are
534: required (and that number may change from pass to pass, so it must be
535: recomputed).
536: In the last pass, the sizes are stable (by definition), so we can look
537: at the sizes to see what instructions to generate.
538:
539: We'll consider the [[instrs]] in groups, but first, here's the
540: way we will structure things:
541: <<compute positions>>=
542: let <<functions for computing sizes>>
543: val newsize = case inst of
544: <<cases for sizes to be computed>>
545: in if newsize > size then sizeref := newsize else ();
546: gen(pos+(!sizeref) (* BUGS -- was pos+size*),pcptr,rest)
547: end
548: <<emit MIPS instructions>>=
549: let fun gen1() = gen(pos+size,pcptr,rest)
550: (* generate the rest of the [[instr]]s *)
551: open Bits
552: open Opcodes
553: <<declare reserved registers [[tempreg]] and [[pcreg]]>>
554: <<functions for emitting instructions>>
555: in case inst of
556: <<cases of instructions to be emitted>>
557: end
558: @ When we get around to generating code, we may need to use a temporary
559: register.
560: For example, if we want to load into a register
561: an immediate constant that won't fit
562: into 16~bits, we will have to load the high-order part of the constant
563: with [[lui]], then use [[addi]] to add then the low-order part.
564: The MIPS assembler has a similar problem, and on page D-2 of
565: the MIPS book we notice that register~1 is reserved for the use of the
566: assembler.
567: So we do the same.
568:
569: We need to reserve a second register for use in pointing to the program
570: counter.
571: We will use register 31 because the [[bltzal]] instruction automatically
572: sets register 31 to the PC.
573: <<declare reserved registers [[tempreg]] and [[pcreg]]>>=
574: val tempreg = 1
575: val pcreg = 31
576: @
577: Before showing the code for the actual instructions, we should
578: point out that
579: we have two different ways of emitting a long word.
580: [[emitlong]] just splits the bits into two pieces for those cases
581: when it's desirable to put a word into the memory image.
582: [[split]] gives something that will load correctly
583: when the high-order piece is loaded into a high-order halfword
584: (using [[lui]]),
585: and the low-order piece is sign-extended and then added to the
586: high-order piece.
587: This is the way we load immediate constants of more than sixteen bits.
588: It is also useful for generating load or store instructions with
589: offsets of more than sixteen bits: we [[lui]] the [[hi]] part and
590: add it to the base regsiter, then use the [[lo]] part as an offset.
591: <<functions for emitting instructions>>=
592: fun emitlong i = emit(rshift(i,16), andb(i,65535))
593: (* emit one long word (no sign fiddling) *)
594: fun split i = let val hi = rshift(i,16) and lo = andb(i,65535)
595: in if lo<32768 then (hi,lo) else (hi+1, lo-65536)
596: end
597:
598: @ We begin implementing [[instrs]] by considering those that emit constants.
599: String constants are padded with nulls out to a word boundary, and
600: real constants take up two words.
601: {\bf At the moment real constants seem to be zeros.
602: One day this will have to be fixed.}
603: Integer constants are just emitted with [[emitlong]].
604: <<cases for sizes to be computed>>=
605: STRINGCONST s => Integer.div(String.length(s)+3,4)
606: | REALCONST _ => 2
607: | EMITLONG _ => 1
608: <<cases of instructions to be emitted>>=
609: STRINGCONST s =>
610: let val s' = s ^ "\000\000\000\000"
611: in gen1(emit_string (4*size) s')
612: (* doesn't know Big vs Little-Endian *)
613: end
614: | REALCONST s => gen1(emit_real s) (* floating pt constant *)
615: | EMITLONG i => gen1(emitlong i)
616: @
617: Next consider the labels.
618: A [[DEFINE]] should never reach this far, and [[EMITLAB]] is almost like
619: an [[EMITLONG]].
620: <<cases for sizes to be computed>>=
621: | DEFINE _ => ErrorMsg.impossible "generate code for DEFINE in mipscoder"
622: | EMITLAB _ => 1
623: <<cases of instructions to be emitted>>=
624: | DEFINE _ => gen1(ErrorMsg.impossible "generate code for DEFINE in mipscoder")
625: | EMITLAB(i, ref d) => gen1(emitlong((d-pos)*4+i))
626: @
627: Now we have to start worrying about instructions with [[EA]] in them.
628: The real difficulty these things present is that they may have an
629: immediate operand that won't fit in 16~bits.
630: So we'll need to get this large immediate operand into a register,
631: sixteen bits at a time, and then do the operation on the register.
632:
633: Since all of the arithmetic instructions have this difficulty, and since
634: we can use them to implement the others, we'll start with those and
635: catch up with the control-flow instructions later.
636: @ [[SUB]], [[MULDIV]], and [[MFLO]] all use registers only, so they are easy.
637: The other arithmetic operations get treated exactly the same, so we'll
638: use a function to compute the size.
639: {\bf move this to follow the definition of [[arith]]?}
640: <<cases for sizes to be computed>>=
641: | ADD(_, ea, _) => easize ea
642: | AND(_, ea, _) => easize ea
643: | OR (_, ea, _) => easize ea
644: | XOR(_, ea, _) => easize ea
645: | SUB _ => 1
646: | MULDIV _ => 1
647: | MFLO _ => 1
648: | MFHI _ => 1
649: @ Register operations take one instruction.
650: Immediate operations take one instruction for 16~bit constants,
651: and 3 for larger constants (since it costs two instructions to load
652: a big immediate constant into a register).
653: An immediate instruction with [[Immedlab l]] means that the operand
654: is intended to be the machine address associated with that label.
655: To compute that address, we need to add
656: [[4*(l-pcptr)]] to the contents of
657: register~[[pcreg]] (which holds [[4*pcptr]]),
658: put the results in a register, and operate on that register.
659:
660: This tells us enough to compute the sizes.
661: <<functions for computing sizes>>=
662: fun easize (Direct _) = 1
663: | easize (Immed i) = if abs(i)<32768 then 1 else 3
664: | easize (Immedlab(ref lab)) = 1 + easize(Immed (4*(lab-(get pcptr))))
665: @
666: As we have seen,
667: to implement any arithmetic operation, we need to know the register
668: form and the sixteen-bit immediate form.
669: We will also want the operator from [[instr]], since we do the
670: large immediate via a recursive call.
671: We'll set up a function, [[arith]], that does the job.
672: <<functions for emitting instructions>>=
673: fun arith (opr, rform, iform) =
674: let fun ar (Reg op1, Direct (Reg op2), Reg result) =
675: gen1(emit(rform(result,op1,op2)))
676: | ar (Reg op1, Immed op2, Reg result) =
677: (case size of
678: 1 (* 16 bits *) => gen1(emit(iform(result,op1,op2)))
679: | 3 (* 32 bits *) =>
680: gen(pos,pcptr,
681: (ref 2, LDI_32(op2, Reg tempreg))::
682: (ref 1, opr(Reg op1, Direct(Reg tempreg), Reg result))::
683: rest)
684: | _ => gen(ErrorMsg.impossible
685: "bad size in arith Immed in mipscoder")
686: )
687: | ar (Reg op1, Immedlab (ref op2), Reg result) =
688: gen(pos, pcptr,
689: (ref (size-1),
690: ADD(Reg pcreg,Immed(4*(op2-(get pcptr))), Reg tempreg))::
691: (ref 1, opr(Reg op1, Direct(Reg tempreg), Reg result))::
692: rest)
693: in ar
694: end
695: @
696: The generation itself may be a bit anticlimactic.
697: The MIPS has no ``subtract immediate'' instruction, and [[SUB]] has
698: a different type than the others, so we emit it directly.
699: <<cases of instructions to be emitted>>=
700: | ADD stuff => arith (ADD,add,addi) stuff
701: | AND stuff => arith (AND,and',andi) stuff
702: | OR stuff => arith (OR,or,ori) stuff
703: | XOR stuff => arith (XOR,xor,xori) stuff
704: | SUB (Reg op1, Reg op2, Reg result) => gen1(emit(sub(result,op1,op2)))
705: | MULDIV(DIV, Reg op1, Reg op2) => gen1(emit(div(op1,op2)))
706: | MULDIV(MULT,Reg op1, Reg op2) => gen1(emit(mult(op1,op2)))
707: | MFLO(Reg result) => gen1(emit(mflo(result)))
708: | MFHI(Reg result) => gen1(emit(mfhi(result)))
709: @ Floating point arithmetic is pretty easy because we always do it in
710: registers.
711: We also support only one format, double precision.
712: <<cases for sizes to be computed>>=
713: | NEG_D _ => 1
714: | MUL_D _ => 1
715: | DIV_D _ => 1
716: | ADD_D _ => 1
717: | SUB_D _ => 1
718: @ When emitting instructions we have to remember the Mips instructions
719: use result on the left, but the [[MIPSCODER]] signature requires result
720: on the right.
721: <<cases of instructions to be emitted>>=
722: | NEG_D (Reg op1,Reg result) => gen1(emit(neg_fmt(D_fmt,result,op1)))
723: <<functions for emitting instructions>>=
724: fun float3double instruction (Reg op1,Reg op2,Reg result) =
725: gen1(emit(instruction(D_fmt,result,op1,op2)))
726: <<cases of instructions to be emitted>>=
727: | MUL_D x => float3double mul_fmt x
728: | DIV_D x => float3double div_fmt x
729: | ADD_D x => float3double add_fmt x
730: | SUB_D x => float3double sub_fmt x
731:
732:
733: @ We offer a separate [[MOVE]] instruction because of large immediate
734: constants.
735: It is always possible to do [[move(src,dest)]] by doing
736: [[add(Reg 0,src,dest)]], but the general form [[add(Reg i, Immed c, dest)]]
737: takes three instructions when [[c]] is a large constant (more than 16 bits).
738: Rather than clutter up the code for [[add]] (and [[or]] and [[xor]]) by
739: trying to recognize register~0, we provide [[move]] explicitly.
740:
741: \indent [[LDI_32]] takes care of the particular case in which we are
742: loading a 32-bit immediate constant into a register.
743: It dates from the bad old days before [[MOVE]], and it might be a good idea
744: to remove it sometime.
745: <<functions for emitting instructions>>=
746: fun domove (Direct (Reg src), Reg dest) = gen1(emit(add(dest,src,0)))
747: | domove (Immed src, Reg dest) =
748: (case size of
749: 1 (* 16 bits *) => gen1(emit(addi(dest,0,src)))
750: | 2 (* 32 bits *) =>
751: gen(pos,pcptr,(ref 2, LDI_32(src, Reg dest))::rest)
752: | _ => gen(ErrorMsg.impossible "bad size in domove Immed in mipscoder")
753: )
754: | domove (Immedlab (ref src), Reg dest) =
755: gen(pos, pcptr,
756: (ref size,
757: ADD(Reg pcreg,Immed(4*(src-(get pcptr))), Reg dest))::rest)
758: @ Notice we use [[easize]] and not [[movesize]] in the third clause
759: because when we reach this point the treatment of a [[MOVE]] is the same
760: as that of an [[ADD]].
761: <<functions for computing sizes>>=
762: fun movesize (Direct _) = 1
763: | movesize (Immed i) = if abs(i)<32768 then 1 else 2
764: | movesize (Immedlab(ref lab)) = easize(Immed (4*(lab-(get pcptr))))
765:
766: <<cases for sizes to be computed>>=
767: | MOVE (src,_) => movesize src
768: | LDI_32 _ => 2
769: | LUI _ => 1
770: <<cases of instructions to be emitted>>=
771: | MOVE stuff => domove stuff
772: | LDI_32 (immedconst, Reg dest) =>
773: let val (hi,lo) = split immedconst
774: in gen1(emit(lui(dest,hi));emit(addi(dest,dest,lo)))
775: end
776: | LUI (Reg dest,immed16) => gen1(emit(lui(dest,immed16)))
777:
778: @
779: Now that we've done arithmetic, we can see how to do control flow without
780: too much trouble.
781: [[SLT]] can be treated just like an arithmetic operator.
782: [[BEQ]] is simple if the address to which we branch is close enough.
783: Otherwise we use the following sequence for [[BEQ(Reg op1, Reg op2, ref dest)]]:
784: \verbatim
785: bne op1,op2,L
786: ADD (Reg pcreg, Immed (4*(dest-pcptr)), Reg tempreg)
787: jr tempreg
788: L: ...
789: \endverbatim
790: Notice we don't have to put a [[NOP]] in the delay slot of the [[bne]].
791: We don't need one after the jump unless we needed one after the
792: original [[BEQ]], in which case one will be there.
793: If the branch is taken, we're doing as well as we can.
794: If the branch is not taken, we will have executed an [[add]] or [[lui]] in the
795: delay slot of the [[bne]], but the results just get thrown away.
796: <<cases for sizes to be computed>>=
797: | SLT(_, ea, _) => easize ea
798: | BEQ(_,_,_,ref dest) =>
799: if abs((pos+1)-dest) < 32768 then 1 (* single instruction *)
800: else 2+easize (Immed (4*(dest-(get pcptr))))
801: | JUMP _ => 1
802: | SLT_D _ => 1
803: | SEQ_D _ => 1
804: | BCOP1(_,ref dest) =>
805: if abs((pos+1)-dest) < 32768 then 1 (* single instruction *)
806: else 2+easize (Immed (4*(dest-(get pcptr))))
807: | NOP => 1
808: @ The implementation is as described, except we use a
809: non-standard [[nop]].
810: There are many Mips instructions that have no effect, and the standard
811: one is the word with all zeroes ([[sll 0,0,0]]).
812: We use [[add]], adding 0 to 0 and store the result in 0, because it
813: will be easy to distinguish from a data word that happens to be zero.
814: <<cases of instructions to be emitted>>=
815: | SLT stuff => arith (SLT,slt,slti) stuff
816: | BEQ(b, Reg op1, Reg op2, ref dest) =>
817: if size = 1 then
818: gen1(emit((if b then beq else bne)(op1,op2,dest-(pos+1))))
819: else gen(pos,pcptr,
820: (ref 1, BEQ(not b, Reg op1, Reg op2, ref(pos+size)))
821: ::(ref (size-2),
822: ADD(Reg pcreg, Immed(4*(dest-(get pcptr))), Reg tempreg))
823: ::(ref 1, JUMP(Reg tempreg))
824: ::rest)
825: | JUMP(Reg dest) => gen1(emit(jr(dest)))
826: | SLT_D (Reg op1, Reg op2) =>
827: gen1(emit(c_lt(D_fmt,op1,op2)))
828: | SEQ_D (Reg op1, Reg op2) =>
829: gen1(emit(c_seq(D_fmt,op1,op2)))
830: | BCOP1(b, ref dest) =>
831: let fun bc1f offset = cop1(8,0,offset)
832: fun bc1t offset = cop1(8,1,offset)
833: in if size = 1 then
834: gen1(emit((if b then bc1t else bc1f)(dest-(pos+1))))
835: else gen(pos,pcptr,
836: (ref 1, BCOP1(not b, ref(pos+size)))
837: ::(ref (size-2),
838: ADD(Reg pcreg, Immed(4*(dest-(get pcptr))), Reg tempreg))
839: ::(ref 1, JUMP(Reg tempreg))
840: ::rest)
841: end
842: | NOP => gen1(emit(add(0,0,0))) (* one of the many MIPS no-ops *)
843: @
844: Our next problem is to tackle load and store.
845: The major difficulty is if the offset is too large to fit in
846: sixteen bits; if so, we have to create a new base register.
847: If we have [[Immedlab]], we do it as an offset from [[pcreg]].
848: <<functions for emitting instructions>>=
849: fun memop(rform,Reg dest, Direct (Reg base), offset) =
850: (case size
851: of 1 => gen1(emit(rform(dest,offset,base)))
852: | 3 => let val (hi,lo) = split offset
853: in gen1(emit(lui(tempreg,hi)); (* tempreg = hi @<< 16 *)
854: emit(add(tempreg,base,tempreg));(* tempreg += base *)
855: emit(rform(dest,lo,tempreg)) (* load dest,lo(tempreg) *)
856: )
857: end
858: | _ => gen1(ErrorMsg.impossible "bad size in memop Direct in mipscoder")
859: )
860: | memop(rform,Reg dest, Immed address, offset) =
861: (case size
862: of 1 => gen1(emit(rform(dest,offset+address,0)))
863: | 2 => let val (hi,lo) = split (offset+address)
864: in gen1(emit(lui(tempreg,hi));
865: emit(rform(dest,lo,tempreg))
866: )
867: end
868: | _ => gen1(ErrorMsg.impossible "bad size in memop Immed in mipscoder")
869: )
870: | memop(rform,Reg dest, Immedlab (ref lab), offset) =
871: memop(rform, Reg dest, Direct (Reg pcreg), offset+4*(lab - get pcptr))
872: @ The actual registers don't matter for computing sizes, and in fact
873: the value of [[pcreg]] is not visible here, so we use an arbitrary
874: register ([[Reg 0]]) to compute the size.
875: <<functions for computing sizes>>=
876: fun adrsize(_, Reg _, Direct _, offset) =
877: if abs(offset)<32768 then 1 else 3
878: | adrsize(_, Reg _, Immed address, offset) =
879: if abs(address+offset) < 32768 then 1 else 2
880: | adrsize(x, Reg dest, Immedlab (ref lab), offset) =
881: adrsize(x, Reg dest, Direct (Reg 0 (* pcreg in code *) ),
882: offset+4*(lab-(get pcptr)))
883: <<cases for sizes to be computed>>=
884: | LOAD x => adrsize x
885: | STORE x => adrsize x
886: <<cases of instructions to be emitted>>=
887: | LOAD (Byte,dest,address,offset) => memop(lbu,dest,address,offset)
888: | LOAD (Word,dest,address,offset) => memop(lw,dest,address,offset)
889: | LOAD (Floating,dest,address,offset) => memop(lwc1,dest,address,offset)
890: | STORE (Byte,dest,address,offset) => memop(sb,dest,address,offset)
891: | STORE (Word,dest,address,offset) => memop(sw,dest,address,offset)
892: | STORE (Floating,dest,address,offset) => memop(swc1,dest,address,offset)
893: @
894: For the shift instructions, only register and immediate operands
895: make sense.
896: Immediate operands make sense if and only if they are representable
897: in five bits.
898: If everything is right, these are single instructions.
899: <<cases for sizes to be computed>>=
900: | SLL _ => 1
901: | SRA _ => 1
902: <<cases of instructions to be emitted>>=
903: | SLL (Immed shamt, Reg op1, Reg result) => gen1(
904: if (shamt >= 0 andalso shamt < 32) then emit(sll(result,op1,shamt))
905: else ErrorMsg.impossible ("bad sll shamt "
906: ^ (Integer.makestring shamt) ^ " in mipscoder"))
907: | SLL (Direct(Reg shamt), Reg op1, Reg result) =>
908: gen1(emit(sllv(result,op1,shamt)))
909: | SLL (Immedlab _,_,_) => ErrorMsg.impossible "sll shamt is Immedlab in mipscoder"
910: | SRA (Immed shamt, Reg op1, Reg result) => gen1(
911: if (shamt >= 0 andalso shamt < 32) then emit(sra(result,op1,shamt))
912: else ErrorMsg.impossible ("bad sra shamt "
913: ^ (Integer.makestring shamt) ^ " in mipscoder"))
914: | SRA (Direct(Reg shamt), Reg op1, Reg result) =>
915: gen1(emit(srav(result,op1,shamt)))
916: | SRA (Immedlab _,_,_) => ErrorMsg.impossible "sra shamt is Immedlab in mipscoder"
917: @
918: Finally, comments are ignored, and marks (backpointers) are written into the
919: instruction stream.
920:
921: Comments are used by the front end to give diagnostics.
922: In the bad old days we would have had two different [[MIPSCODER]]s, one
923: which generated machine code (and ignored comments), and one which
924: wrote out assembly code (and copied comments).
925: Today we have just one, which means the rerouting of comments takes place
926: at a much higher level. Look in [[cps/mipsglue.nw]].
927: <<cases for sizes to be computed>>=
928: | COMMENT _ => 0
929: | MARK => 1 (* backpointer takes one word *)
930: @ Just for the record, here's the description of what a mark (backpointer)
931: is.
932: ``Take the byte address at which the mark resides and add 4, giving
933: the byte address of the object following the mark.
934: (That object is the marked object.)
935: Subtract the byte address of the initial word that marks the
936: start of this instruction stream.
937: Now divide by 4, giving the distance in words between the
938: beginning of the block and the marked object.
939: Take that quantity and shift it left by multiplying by [[power_tags]],
940: and indicate the result is a mark by adding the tag bits [[tag_backptr]]
941: into the low order part.''
942: [[pos+1]] is exactly the required distance in words.
943: <<cases of instructions to be emitted>>=
944: | COMMENT _ => gen1()
945: | MARK => gen1(
946: let open System.Tags
947: in emitlong((pos+1) * power_tags + tag_backptr)
948: end)
949: @
950: @ \subsection{Optimization}
951: The first step towards optimization is to take statistics.
952: We will count: [[instrs]], Mips words, [[NOP]]s in load and branch delays,
953: and [[bltzal]]s.
954: In the current implementation the [[bltzal]]s are implicit, so there
955: is no way to count them or optimize them.
956: <<statistics>>=
957: fun printstats stream
958: {inst : int, code : int, data : int,
959: load : int, branch : int, compare : int, size : int} =
960: let val print = output stream
961: val nop = load+branch+compare
962: val bltzal = size - (code + data)
963: val code = code + bltzal
964: <<definition of [[sprintf]]>>
965: fun P x = substring(makestring(100.0 * x),0,4) (* percent *)
966: fun printf f d = print (sprintf f d)
967: in printf ["Counted "," instrs in "," words (",
968: " code, "," data)\n" ^
969: "Used "," NOPs ("," load, "," branch,"," compare) and "," bltzals\n" ^
970: "","% of code words were NOPs; ","% were bltzals\n" ^
971: "","% of all words were code; ","% of all words were NOPs\n"]
972: [I inst, I size, I code, I data,
973: I nop, I load, I branch, I compare, I bltzal,
974: P (real nop / real code), P (real bltzal / real code),
975: P (real code / real size), P (real nop / real size)]
976: handle Overflow => print "[Overflow in computing Mips stats]\n"
977: | Real s => print ("[FPE ("^s^") in computing Mips stats]\n")
978: end
979:
980: <<statistics>>=
981: <<definition of [[iscode]]>>
982: fun addstats (counts as {inst,code,data,load,branch,compare}) =
983: fn nil => counts
984: | (sizeref,first)::(_,NOP)::rest => addstats
985: {inst=inst+2, code=code+(!sizeref)+1, data=data,
986: load=load+ (case first of LOAD _ => 1 | _ => 0),
987: branch=branch +(case first of BEQ _ => 1 | JUMP _ => 1
988: | BCOP1 _ => 1 | _ => 0),
989: compare=compare+(case first of SLT_D _ => 1 | SEQ_D _ => 1
990: | _ => 0)
991: } rest
992: | (sizeref,first)::rest => addstats
993: {inst=inst+1,
994: code = code + if iscode(first) then !sizeref else 0,
995: data = data + if not (iscode first) then !sizeref else 0,
996: load=load,
997: branch=branch,
998: compare=compare
999: } rest
1000:
1001:
1002: fun codestats outfile =
1003: let val {size,stream=instrs} = prepare (!kept)
1004: val zero = {inst=0, code=0, data=0, load=0, branch=0, compare=0}
1005: val counts as {inst,code,data,load,branch,compare} =
1006: addstats zero instrs
1007: in printstats outfile
1008: {inst=inst,code=code,data=data,
1009: load=load,branch=branch,compare=compare,size=size}
1010: end
1011:
1012: <<definition of [[iscode]]>>=
1013: val iscode = fn
1014: STRINGCONST _ => false
1015: | REALCONST _ => false
1016: | EMITLONG _ => false
1017: | DEFINE _ => false
1018: | EMITLAB _ => false
1019:
1020: | SLT _ => true
1021: | BEQ _ => true
1022: | JUMP _ => true
1023: | NOP => true
1024: | SLT_D _ => true
1025: | SEQ_D _ => true
1026: | BCOP1 _ => true
1027:
1028: | ADD _ => true
1029: | AND _ => true
1030: | OR _ => true
1031: | XOR _ => true
1032: | SUB _ => true
1033: | MULDIV _ => true
1034: | MFLO _ => true
1035: | MFHI _ => true
1036:
1037: | NEG_D _ => true
1038: | MUL_D _ => true
1039: | DIV_D _ => true
1040: | ADD_D _ => true
1041: | SUB_D _ => true
1042:
1043: | MOVE _ => true
1044: | LDI_32 _ => true
1045: | LUI _ => true
1046:
1047: | LOAD _ => true
1048: | STORE _ => true
1049:
1050: | SLL _ => true
1051: | SRA _ => true
1052:
1053: | COMMENT _ => false
1054: | MARK => false
1055:
1056: <<definition of [[sprintf]]>>=
1057: val I = Integer.makestring
1058: val R = Real.makestring
1059: exception Printf
1060: fun sprintf format values =
1061: let fun merge([x],nil) = [x]
1062: | merge(nil,nil) = nil
1063: | merge(x::y,z::w) = x::z:: merge(y,w)
1064: | merge _ = raise Printf
1065: in implode(merge(format,values))
1066: end
1067:
1068: @
1069: At the moment these functions are meaningless junk.
1070: <<functions that remove pipeline bubbles>>=
1071: val rec squeeze =
1072:
1073: fn (x as LOAD(_,Reg d, m, i))::NOP::instr::rest =>
1074: if use(instr,d) then ??
1075: else squeeze(x::instr::rest)
1076: | (x as STORE _)::(y as LOAD _)::rest =>
1077: x :: squeeze(y::rest)
1078: | instr::(x as LOAD(_,Reg d, Direct(Reg s), i))::NOP::rest =>
1079: if use(instr, d) orelse gen(instr, s) then ??
1080: else squeeze(x::instr::rest)
1081: | instr::(x as LOAD(_,Reg d, _, i))::NOP::rest =>
1082: if use(instr,d) then ??
1083: else squeeze(x::instr::rest)
1084: | (x as MFLO _):: (y as MULDIV _) :: rest =>
1085: x :: squeeze (y::rest)
1086: | (x as MFLO(Reg d))::instr::rest =>
1087: if (use(instr,d) orelse gen(instr,d) then ??
1088: else squeeze(instr::x::rest)
1089: | instr :: (x as MULDIV(Reg a, Reg b)) :: rest =>
1090: if gen(instr,a) orelse gen(instr,b) then ??
1091: else squeeze(x::instr::rest)
1092:
1093: val rec final =
1094: fn
1095: | instr::(x as LOAD(_,Reg d, Direct(Reg s), i))::NOP::rest =>
1096: if gen(instr, s) then instr::final(x::NOP::rest)
1097: else x::instr::(final rest)
1098: | instr :: (x as JUMP _) :: NOP :: rest =>
1099: x :: instr :: final rest
1100: | instr :: (x as BEQ(_,Reg a, Reg b, _)) :: NOP :: rest =>
1101: if gen(instr,a) orelse gen(instr,b) then instr::x::NOP::(final rest)
1102: else x::instr::(final rest)
1103:
1104:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.