Annotation of researchv10no/cmd/sml/src/mips/mipscoder.nw, revision 1.1.1.1

1.1       root        1: \input verbatim
                      2: \input itemize
                      3: \chapter{A small assembler for the MIPS}
                      4: This is part of the code generator for Standard ML of New Jersey.
                      5: We generate code in several stages.
                      6: This is nearly the lowest stage; it is like an assembler.
                      7: The user can call any function in the MIPSCODER signature.
                      8: Each one corresponds to an assembler pseudo-instruction.
                      9: Most correspond to single MIPS instructions.
                     10: The assembler remembers all the instructions that have been 
                     11: requested, and when [[codegen]] is called it generates MIPS
                     12: code for them.
                     13: 
                     14: Some other structure will be able to use the MIPS structure to implement
                     15: a [[CMACHINE]], which is the abstract machine that ML thinks it is running
                     16: on.
                     17: (What really happens is a functor maps some structure 
                     18: implementing [[MIPSCODER]] to a different structure implementing 
                     19: [[CMACHINE]].)
                     20: 
                     21: {\em Any function using a structure of this signature must avoid
                     22: touching registers 1~and~31.
                     23: Those registers are reserved for use by the assembler.}
                     24: 
                     25: @ Here is the signature of the assembler, [[MIPSCODER]].
                     26: It can be extracted from this file by
                     27: $$\hbox{\tt notangle mipsinstr.nw -Rsignature}.$$
                     28: <<signature>>=
                     29: signature MIPSCODER = sig
                     30: 
                     31: (* Assembler for the MIPS chip *)
                     32: 
                     33: eqtype Label
                     34: datatype Register = Reg of int
                     35:     (* Registers 1 and 31 are reserved for use by this assembler *)
                     36: datatype EA = Direct of Register | Immed of int | Immedlab of Label
                     37:                                 (* effective address *)
                     38: 
                     39: structure M : sig
                     40: 
                     41:     (* Emit various constants into the code *)
                     42: 
                     43:     val emitstring : string -> unit     (* put a literal string into the
                     44:                                            code (null-terminated?) and
                     45:                                            extend with nulls to 4-byte 
                     46:                                            boundary. Just chars, no 
                     47:                                            descriptor or length *)
                     48:     val realconst : string -> unit      (* emit a floating pt literal *)
                     49:                                                 (* NOT RIGHT YET *)
                     50:     val emitlong : int -> unit          (* emit a 4-byte integer literal *)
                     51: 
                     52: 
                     53:     (* Label bindings and emissions *)
                     54: 
                     55:     val newlabel : unit -> Label        (* new, unbound label *)
                     56:     val define : Label -> unit          (* cause the label to be bound to
                     57:                                            the code about to be generated *)
                     58:     val emitlab : int * Label -> unit   (* L3: emitlab(k,L2) is equivalent to
                     59:                                            L3: emitlong(k+L2-L3) *)
                     60: 
                     61:     (* Control flow instructions *)
                     62: 
                     63:     val slt : Register * EA * Register -> unit
                     64:                 (* (operand1, operand2, result) *)
                     65:                                         (* set less than family *)
                     66:     val beq : bool * Register * Register * Label -> unit
                     67:                 (* (beq or bne, operand1, operand2, branch address) *)
                     68:                                         (* branch equal/not equal family *)
                     69:     
                     70:     val jump : Register -> unit         (* jump register instruction *)
                     71: 
                     72:     val slt_double : Register * Register -> unit
                     73:                                        (* floating pt set less than *)
                     74:     val seq_double : Register * Register -> unit
                     75:                                        (* floating pt set equal *)
                     76:     val bcop1 : bool * Label -> unit    (* floating pt conditional branch *)
                     77: 
                     78: 
                     79:     (* Arithmetic instructions *)
                     80:             (* arguments are (operand1, operand2, result) *)
                     81: 
                     82:     val add : Register * EA * Register -> unit
                     83:     val and' : Register * EA * Register -> unit
                     84:     val or : Register * EA * Register -> unit
                     85:     val xor : Register * EA * Register -> unit
                     86:     val sub : Register * Register * Register -> unit
                     87:     val div : Register * Register * Register -> unit
                     88:     val mult : Register * Register * Register -> unit
                     89:     val mfhi : Register -> unit                (* high word of 64-bit multiply *)
                     90:     
                     91:     (* Floating point arithmetic *)
                     92: 
                     93:     val neg_double : Register * Register -> unit
                     94:     val mul_double : Register * Register * Register -> unit
                     95:     val div_double : Register * Register * Register -> unit
                     96:     val add_double : Register * Register * Register -> unit
                     97:     val sub_double : Register * Register * Register -> unit
                     98: 
                     99:     (* Move pseudo-instruction :  move(src,dest) *)
                    100: 
                    101:     val move : EA * Register -> unit
                    102: 
                    103:     (* Load and store instructions *)
                    104:             (* arguments are (destination, source address, offset) *)
                    105:  
                    106:     val lbu  : Register * EA * int -> unit (* bytes *)
                    107:     val sb  : Register * EA * int -> unit
                    108:     val lw  : Register * EA * int -> unit  (* words *)
                    109:     val sw  : Register * EA * int -> unit
                    110:     val lwc1: Register * EA * int -> unit  (* floating point coprocessor *)
                    111:     val swc1: Register * EA * int -> unit
                    112:     val lui : Register * int -> unit
                    113: 
                    114:     (* Shift instructions *)
                    115:             (* arguments are (shamt, operand, result) *)
                    116:             (* shamt as Immedlab _ is senseless *)
                    117: 
                    118:     val sll : EA * Register * Register -> unit
                    119:     val sra : EA * Register * Register -> unit
                    120:     
                    121: 
                    122:     (* Miscellany *)
                    123: 
                    124:     val align : unit -> unit            (* cause next data to be emitted on
                    125:                                            a 4-byte boundary *)
                    126:     val mark : unit -> unit             (* emit a back pointer, 
                    127:                                            also called mark *)
                    128: 
                    129:     val comment : string -> unit
                    130: 
                    131:   end (* signature of structure M *)
                    132: 
                    133:   val codegen : (int * int -> unit) * (int -> string -> unit) 
                    134:                        * (string -> unit) -> unit
                    135: 
                    136:   val codestats : outstream -> unit    (* write statistics on stream *)
                    137: 
                    138: end (* signature MIPSCODER *)
                    139: @ The basic strategy of the implementation is to hold on, via the [[kept]]
                    140: pointer, to the list of instructions generated so far.
                    141: We use [[instr]] for the type of an instruction, so
                    142: [[kept]] has type [[instr list ref]].
                    143: 
                    144: The instructions will be executed in the following order: the 
                    145: instruction at the head of the [[!kept]] is executed last.
                    146: This enables us to accept calls in the order of execution but
                    147: add the new instruction(s) to the list in constant time.
                    148: 
                    149: 
                    150: @
                    151: We structure the instruction stream a little bit by factoring
                    152: out the difference between multiplication and division; these
                    153: operations are treated identically in that the result has to be
                    154: fetched out of the MIPS' LO register.
                    155: 
                    156: We also factor the different of load and store instructions that can
                    157: occur: we have load byte, load word, and load to coprocessor (floating point).
                    158: <<types auxiliary to [[instr]]>>=
                    159: datatype size = Byte | Word | Floating
                    160: datatype muldiv = MULT | DIV
                    161: @
                    162: Here are the instructions that exist.
                    163: We list them in more or less the order of the MIPSCODER signature.
                    164: <<definition of [[instr]]>>=
                    165: <<types auxiliary to [[instr]]>>
                    166: 
                    167: datatype instr = 
                    168:     STRINGCONST of string               (* constants *)
                    169:   | REALCONST of string
                    170:   | EMITLONG of int
                    171: 
                    172:   | DEFINE of Label                     (* labels *)
                    173:   | EMITLAB of int * Label
                    174: 
                    175:   | SLT of Register * EA * Register     (* control flow *)
                    176:   | BEQ of bool * Register * Register * Label
                    177:   | JUMP of Register 
                    178:   | SLT_D of Register * Register
                    179:   | SEQ_D of Register * Register
                    180:   | BCOP1 of bool * Label
                    181: 
                    182:   | NOP (* no-op for delay slot *)
                    183: 
                    184:   | ADD of Register * EA * Register     (* arithmetic *)
                    185:   | AND of Register * EA * Register
                    186:   | OR  of Register * EA * Register
                    187:   | XOR of Register * EA * Register
                    188:   | SUB of Register * Register * Register
                    189:   | MULDIV of muldiv * Register * Register
                    190:   | MFLO of Register    (* mflo instruction used with
                    191:                            64-bit multiply and divide *)
                    192:   | MFHI of Register
                    193: 
                    194:   | NEG_D of Register * Register
                    195:   | MUL_D of Register * Register * Register
                    196:   | DIV_D of Register * Register * Register
                    197:   | ADD_D of Register * Register * Register
                    198:   | SUB_D of Register * Register * Register
                    199: 
                    200:   | MOVE of EA * Register    (* put something into a register *)
                    201:   | LDI_32 of int * Register (* load in a big immediate constant (>16 bits) *)
                    202:   | LUI of Register * int    (* Mips lui instruction *)
                    203: 
                    204:   | LOAD of size * Register * EA * int  (* load and store *)
                    205:   | STORE  of size * Register * EA * int
                    206: 
                    207:   | SLL of EA * Register * Register     (* shift *)
                    208:   | SRA of EA * Register * Register
                    209: 
                    210:   | COMMENT of string                   (* generates nothing *)
                    211:   | MARK                                (* a backpointer *)
                    212: @
                    213: Here is the code that handles the generated stream, [[kept]].
                    214: It begins life as [[nil]] and returns to [[nil]] every time code is
                    215: generated.
                    216: The function [[keep]] is a convenient way of adding a single [[instr]] to
                    217: the list; it's very terse.
                    218: Sometimes we have to add multiple [[instr]]s; then we use [[keeplist]].
                    219: We also define a function [[delay]] that is just like a [[keep]] but
                    220: it adds a NOP in the delay slot.
                    221: <<instruction stream and its functions>>=
                    222:   val kept = ref nil : instr list ref
                    223:   fun keep f a = kept := f a :: !kept
                    224:   fun delay f a = kept := NOP :: f a :: !kept
                    225:   fun keeplist l = kept := l @ !kept
                    226: <<reinitialize [[kept]]>>=
                    227:   kept := nil
                    228: @
                    229: \subsection{Exporting functions for [[MIPSCODER]]}
                    230: We now know enough to implement most of the functions called for in
                    231: [[MIPSCODER]].
                    232: We still haven't decided on an implementation of labels,
                    233: and there is one subtlety in multiplication and division,
                    234: but the rest is set.
                    235: <<[[MIPSCODER]] functions>>=
                    236:   val emitstring = keep STRINGCONST     (* literals *)
                    237:   val realconst = keep REALCONST
                    238:   val emitlong = keep EMITLONG
                    239: 
                    240:   <<label functions>>                   (* labels *)
                    241: 
                    242:   val slt = keep SLT                    (* control flow *)
                    243:   val beq = delay BEQ
                    244:   val jump = delay JUMP
                    245:   val slt_double = delay SLT_D
                    246:   val seq_double = delay SEQ_D
                    247:   val bcop1 = delay BCOP1
                    248: 
                    249:   val add = keep ADD                    (* arithmetic *)
                    250:   val and' = keep AND
                    251:   val or = keep OR
                    252:   val xor = keep XOR
                    253:   val op sub = keep SUB
                    254:   <<multiplication and division functions>>
                    255: 
                    256:   val neg_double = keep NEG_D
                    257:   val mul_double = keep MUL_D
                    258:   val div_double = keep DIV_D
                    259:   val add_double = keep ADD_D
                    260:   val sub_double = keep SUB_D
                    261: 
                    262:   val move = keep MOVE
                    263: 
                    264:   fun lbu (a,b,c) = delay LOAD (Byte,a,b,c) (* load and store *)
                    265:   fun lw (a,b,c)  = delay LOAD (Word,a,b,c)
                    266:   fun lwc1 (a,b,c)  = delay LOAD (Floating,a,b,c)
                    267:   fun sb (a,b,c)  = keep STORE (Byte,a,b,c)
                    268:   fun sw (a,b,c)  = keep STORE (Word,a,b,c)
                    269:   fun swc1 (a,b,c)  = delay STORE (Floating,a,b,c)
                    270:   val lui = keep LUI
                    271: 
                    272:   val sll = keep SLL                    (* shift *)
                    273:   val sra = keep SRA
                    274: 
                    275:   fun align() = ()                      (* never need to align on MIPS *)
                    276:   val mark = keep (fn () => MARK)
                    277:   val comment = keep COMMENT
                    278: @
                    279: Multiplication and division have a minor complication; the
                    280: result has to be fetched from the LO register.
                    281: <<multiplication and division functions>>=
                    282:   fun muldiv f (a,b,c) = keeplist [MFLO c, MULDIV (f,a,b)]
                    283:   val op div = muldiv DIV
                    284:   val mult = muldiv MULT
                    285:   val mfhi = keep MFHI
                    286: @ 
                    287: For now, labels are just pointers to integers.
                    288: During code generation, those integers will be set to positions
                    289: in the instruction stream, and then they'll be useful as addresses
                    290: relative to the program counter pointer (to be held in [[Reg pcreg]]).
                    291: <<definition of [[Label]]>>=
                    292:   type Label = int ref
                    293: <<label functions>>=
                    294:   fun newlabel () = ref 0
                    295:   val define = keep DEFINE
                    296:   val emitlab = keep EMITLAB
                    297: @
                    298: Here's the overall plan of this structure:
                    299: <<*>>=
                    300: structure MipsCoder : MIPSCODER = struct
                    301: 
                    302:   <<definition of [[Label]]>>
                    303: 
                    304:   datatype Register = Reg of int
                    305: 
                    306:   datatype EA = Direct of Register
                    307:               | Immed of int
                    308:               | Immedlab of Label
                    309: 
                    310:   <<definition of [[instr]]>>
                    311: 
                    312:   <<instruction stream and its functions>>
                    313:   
                    314:   structure M = struct
                    315:     <<[[MIPSCODER]] functions>>
                    316:   end
                    317: 
                    318:   open M
                    319: 
                    320:   <<functions that assemble [[instr]]s into code>>
                    321: 
                    322:   <<statistics>>
                    323: 
                    324: end (* MipsInstr *)
                    325: @ \subsection{Sizes of [[instr]]s}
                    326: Now let's consider the correspondence between our [[instr]] type and the
                    327: actual MIPS instructions we intend to emit.
                    328: One important problem to solve is figuring out how big things are, 
                    329: so that we know what addresses to generate for the various labels.
                    330: We will also want to know what address is currently stored in the program
                    331: counter regsiter ([[pcreg]]),
                    332: because we'll need to know when something is close 
                    333: enough that we can use a sixteen-bit address relative to that register.
                    334: The kind of address we can use will determine how big things are.
                    335: 
                    336: We'll rearrange the code so that we have a list of [[ref int * instr]] pairs,
                    337: where the [[ref int]] stores the position in the list. 
                    338: (Positions start at zero.)
                    339: Since in the MIPS all instructions are the same size, we measure
                    340: position as number of instructions.
                    341: While we're at it, we reverse the list so that the head will execute first,
                    342: then the rest of the list.
                    343: 
                    344: We begin with each position set to zero, and make a pass over the list 
                    345: trying to set the value of each position. 
                    346: We do this by estimating the size of (number of MIPS instructions 
                    347: generated for) each [[instr]].
                    348: Since there are forward references, we may not have all the distances right
                    349: the first time, so we have to make a second pass.
                    350: But during this second pass we could find that something is further
                    351: away than we thought, and we have to switch from using a pc-relative mode to
                    352: something else (or maybe grab the new pc?), which changes the size again,
                    353: and moves things even further away.
                    354: Because we can't control this process, we just keep making passes over the
                    355: list until the process quiesces (we get the same size twice).
                    356: 
                    357: In order to guarantee termination, we have to make sure later passes only 
                    358: increase the sizes of things.
                    359: This is sufficient since there is a maximum number of MIPS instructions
                    360: we can generate for each [[instr]].
                    361: 
                    362: 
                    363: While we're at it, we might want to complicate things by making the function
                    364: that does the passes also emit code.
                    365: For a single pass we hand an optional triple of emitters, the initial position,
                    366: an [[int option]] for the program counter pointer (if known), and the
                    367: instructions.
                    368: 
                    369: 
                    370: 
                    371: I'm not sure what explains the use of the [[ref int]] to track the position,
                    372: instead of just an [[int]]---it might be a desire to avoid the
                    373: overhead of creating a bunch of new objects, or it might be really hard
                    374: to do the passes cheaply.
                    375: It should think a variation on [[map]] would do the job, but maybe I'm
                    376: missing something.
                    377: 
                    378: @ [[emitters]] is actually a triple [[emit,emit_string,emit_real]].
                    379: [[emit : int * int -> unit]] emits one instruction, 
                    380: and [[emit_string : int -> string -> unit]] emits a string constant.
                    381: [[emit_string]] could be specified as a function of [[emit]],
                    382: but the nature of the function would depend on whether the target
                    383: machine was little-endian or big-endian, and we don't want to have
                    384: that dependency built in.
                    385: [[emit_real : string -> unit]] emits two words corresponding to an IEEE
                    386: floating point constant in string form (e.g. [["3.14159"]]).
                    387: In principle, it can always be derived from [[emit_word]], but it's
                    388: easier to pass it in because then we can use John Reppy's code;
                    389: see the [[MipsReal]] functor for more details, as well as
                    390: [[mipsglue.sml]].
                    391: 
                    392: 
                    393:  [[instrs]] is the
                    394: list of instructions (in execute-head-last order).
                    395: 
                    396: The second argument to [[pass]] indicates for what instructions code
                    397: is to be generated.
                    398: It is a record (position of next instruction, program counter pointer if any,
                    399: remaining instructions to generate [with positions]).
                    400: 
                    401: \indent [[prepare]] produces two results: the instruction stream with
                    402: size pointers added, and the total size of code to be generated.
                    403: We add the total size because that is the only way to find the number
                    404: of [[bltzal]]s, which are implicit in the instruction stream.
                    405: 
                    406: <<assembler>>=
                    407: fun prepare instrs =
                    408:  let fun add_positions(done, inst::rest) =  
                    409:                  add_positions( (ref 0, inst) :: done, rest)
                    410:        | add_positions(done, nil) = done
                    411: 
                    412:      val instrs' = add_positions(nil, instrs) (* reverse and add [[ref int]]s*)
                    413: 
                    414:      fun passes(oldsize) = 
                    415:                 (* make passes with no emission until size is stable*)
                    416:         let val size = pass NONE (0,NONE,instrs')
                    417:         in  if size=oldsize then size
                    418:             else passes size
                    419:         end
                    420:   in {size = passes 0, stream = instrs'}
                    421:   end
                    422: 
                    423: fun assemble emitters instrs =
                    424:         pass (SOME emitters) (0,NONE,#stream (prepare instrs))
                    425: 
                    426: <<functions that assemble [[instr]]s into code>>=
                    427: fun get (SOME x) = x 
                    428:   | get NONE = ErrorMsg.impossible "missing pcptr in mipscoder"
                    429: 
                    430: <<[[pcptr]] functions>>
                    431: <<single pass>>
                    432: <<assembler>>
                    433: 
                    434: fun codegen emitters = (
                    435:     assemble emitters (!kept);
                    436:     <<reinitialize [[kept]]>>
                    437:     )
                    438: @
                    439: The program counter pointer is a device that enables us to to addressing
                    440: relative to the pcp register, register 31.
                    441: The need for it arises when we want to access a data element which we know
                    442: only by its label.
                    443: The labels give us addresses relative to the beginning of the function,
                    444: but we can only use addresses relative to some register.
                    445: The answer is to set register~31 with a [[bltzal]] instruction,
                    446: then use that for addressing.
                    447: 
                    448: The function [[needs_a_pcptr]] determines when it is necessary
                    449: to have a known value in register~31.
                    450: That is, we need the program counter pointer
                    451: \itemize
                    452: \item 
                    453: at [[NOP]] for a reason to be named later?
                    454: \item
                    455: at any operation that uses an effective address that refers to a label
                    456: (since all labels have to be relative to the program counter).
                    457: \item
                    458: BEQ's and BCOP1's to very far away, 
                    459: since we have to compute the address for a JUMP 
                    460: knowing the value of the program counter pointer.
                    461: \enditemize
                    462: <<[[pcptr]] functions>>=
                    463: fun needs_a_pcptr(_,SLT(_,Immedlab _,_)) = true
                    464:   | needs_a_pcptr(_,ADD(_,Immedlab _,_)) = true
                    465:   | needs_a_pcptr(_,AND(_,Immedlab _,_)) = true
                    466:   | needs_a_pcptr(_,OR(_,Immedlab _,_)) = true
                    467:   | needs_a_pcptr(_,XOR(_,Immedlab _,_)) = true
                    468:   | needs_a_pcptr(_,MOVE(Immedlab _,_)) = true
                    469:   | needs_a_pcptr(_,LOAD(_,_,Immedlab _,_)) = true
                    470:   | needs_a_pcptr(_,STORE(_,_,Immedlab _,_)) = true
                    471:   | needs_a_pcptr(_,SLL(Immedlab _,_,_)) = true
                    472:   | needs_a_pcptr(_,SRA(Immedlab _,_,_)) = true
                    473:   | needs_a_pcptr(1, BEQ _) = false  (* small BEQ's dont need pcptr *)
                    474:   | needs_a_pcptr(_, BEQ _) = true   (* but large ones do *)
                    475:   | needs_a_pcptr(1, BCOP1 _) = false  (* small BCOP1's dont need pcptr *)
                    476:   | needs_a_pcptr(_, BCOP1 _) = true   (* but large ones do *)
                    477:   | needs_a_pcptr _ = false
                    478: @
                    479: Creating the program counter pointer once, with a [[bltzal]], is not
                    480: enough; we have to invalidate the program counter pointer at every
                    481: label, since control could arrive at the label from God knows where, and
                    482: therefore we don't know what the program counter pointer is.
                    483: 
                    484: We use the function [[makepcptr]] to create a new program counter pointer
                    485: ``on the fly'' while generating code for other [[instrs]].
                    486: (I chose not to create a special [[instr]] for [[bltzal]], which I
                    487: could have inserted at appropriate points in the instruction stream.)
                    488: To try and find an odd bug, I'm adding no-ops after each [[bltzal]].
                    489: I don't really believe they're necessary.
                    490: 
                    491: The function [[gen]], which generates the instructions (or computes
                    492: their size), takes three arguments.
                    493: Third: the list of instructions to be generated (paired with pointers
                    494: to their sizes); first: the position (in words) at which to generate 
                    495: those instructions;  second: the current value of the program counter
                    496: pointer (register~31), if known.
                    497: 
                    498: The mutual recursion between [[gen]] and [[makepcptr]] maintains
                    499: the program counter pointer.
                    500: [[gen]] invalidates it at labels, and calls [[makepcptr]] to create a valid
                    501: one when necessary (as determined by [[needs_a_pcptr]]).
                    502: <<single pass>>=
                    503: fun pass emitters =
                    504: let fun makepcptr(i,x) = 
                    505:          (* may need to emit NOP for delay slot if next instr is branch *)
                    506:   let val size = case x of ((_,BEQ _)::rest) => 2 
                    507:                         | ((_,BCOP1 _)::rest) => 2 
                    508:                         | _ => 1
                    509:   in  case emitters of NONE                     => () 
                    510:                      | SOME (emit, _, _ ) => (
                    511:                              emit(Opcodes.bltzal(0,0));
                    512:                              if size=2 then emit(Opcodes.add(0,0,0)) else ());
                    513:       gen(i+size, SOME (i+2), x)
                    514:   end
                    515: and gen(i,_,nil) = i
                    516:   | gen(i, _, (_,DEFINE lab) :: rest) = (lab := i; gen(i,NONE, rest))
                    517:                         (* invalidate the pc pointer at labels *)
                    518:   (* may want to do special fiddling with NOPs *)
                    519:   | gen(pos, pcptr, x as ((sizeref as ref size, inst) :: rest)) =
                    520:        if (pcptr=NONE andalso needs_a_pcptr(size, inst)) then makepcptr(pos,x)
                    521:        else case emitters of
                    522:           SOME (emit : int*int -> unit, emit_string : int -> string -> unit,
                    523:                emit_real : string -> unit) =>
                    524:              <<emit MIPS instructions>>
                    525:         | NONE =>
                    526:              <<compute positions>>
                    527: in  gen
                    528: end
                    529: 
                    530: @ \subsection{Generating the instructions}
                    531: Now we need to consider the nitty-gritty details of just what instructions
                    532: are generated for each [[instr]].
                    533: In  early passes, we'll just need to know how many instructions are
                    534: required (and that number may change from pass to pass, so it must be
                    535: recomputed).
                    536: In the last pass, the sizes are stable (by definition), so we can look
                    537: at the sizes to see what instructions to generate.
                    538: 
                    539: We'll consider the [[instrs]] in groups, but first, here's the
                    540: way we will structure things:
                    541: <<compute positions>>=
                    542: let <<functions for computing sizes>>
                    543:     val newsize = case inst of
                    544:         <<cases for sizes to be computed>>
                    545: in  if newsize > size then sizeref := newsize else ();
                    546:     gen(pos+(!sizeref) (* BUGS -- was pos+size*),pcptr,rest)
                    547: end
                    548: <<emit MIPS instructions>>=
                    549: let fun gen1() = gen(pos+size,pcptr,rest) 
                    550:                                 (* generate the rest of the [[instr]]s *)
                    551:     open Bits
                    552:     open Opcodes
                    553:     <<declare reserved registers [[tempreg]] and [[pcreg]]>>
                    554:     <<functions for emitting instructions>>
                    555: in  case inst of
                    556:     <<cases of instructions to be emitted>>
                    557: end
                    558: @ When we get around to generating code, we may need to use a temporary
                    559: register.
                    560: For example, if we want to load into a register
                    561: an immediate constant that won't fit
                    562: into 16~bits, we will have to load the high-order part of the constant
                    563: with [[lui]], then use [[addi]] to add then the low-order part.
                    564: The MIPS assembler has a similar problem, and on page D-2 of
                    565: the MIPS book we notice that register~1 is reserved for the use of the
                    566: assembler.
                    567: So we do the same.
                    568: 
                    569: We need to reserve a second register for use in pointing to the program 
                    570: counter.
                    571: We will use register 31 because the [[bltzal]] instruction automatically
                    572: sets register 31 to the PC.
                    573: <<declare reserved registers [[tempreg]] and [[pcreg]]>>=
                    574: val tempreg = 1
                    575: val pcreg = 31
                    576: @
                    577: Before showing the code for the actual instructions, we should
                    578: point out that
                    579: we have two different ways of emitting a long word.
                    580: [[emitlong]] just splits the bits into two pieces for those cases
                    581: when it's desirable to put a word into the memory image.
                    582: [[split]] gives something that will load correctly
                    583: when the high-order piece is loaded into a high-order halfword
                    584: (using [[lui]]),
                    585: and the low-order piece is sign-extended and then added to the 
                    586: high-order piece.
                    587: This is the way we load immediate constants of more than sixteen bits.
                    588: It is also useful for generating load or store instructions with
                    589: offsets of more than sixteen bits: we [[lui]] the [[hi]] part and
                    590: add it to the base regsiter, then use the [[lo]] part as an offset.
                    591: <<functions for emitting instructions>>=
                    592: fun emitlong i = emit(rshift(i,16), andb(i,65535))
                    593:                                 (* emit one long word (no sign fiddling) *)
                    594: fun split i = let val hi = rshift(i,16) and lo = andb(i,65535)
                    595:                      in if lo<32768 then (hi,lo) else (hi+1, lo-65536)
                    596:                     end
                    597: 
                    598: @ We begin implementing [[instrs]] by considering those that emit constants.
                    599: String constants are padded with nulls out to a word boundary, and 
                    600: real constants take up two words.
                    601: {\bf At the moment real constants seem to be zeros.
                    602: One day this will have to be fixed.}
                    603: Integer constants are just emitted with [[emitlong]].
                    604: <<cases for sizes to be computed>>=
                    605:   STRINGCONST s => Integer.div(String.length(s)+3,4)
                    606: | REALCONST _ => 2
                    607: | EMITLONG _ => 1
                    608: <<cases of instructions to be emitted>>=
                    609:   STRINGCONST s => 
                    610:         let val s' = s ^ "\000\000\000\000"
                    611:         in  gen1(emit_string (4*size) s')
                    612:                                       (* doesn't know Big vs Little-Endian *)
                    613:         end
                    614: | REALCONST s => gen1(emit_real s)  (* floating pt constant *)
                    615: | EMITLONG i => gen1(emitlong i)
                    616: @
                    617: Next consider the labels.
                    618: A [[DEFINE]] should never reach this far, and [[EMITLAB]] is almost like
                    619: an [[EMITLONG]].
                    620: <<cases for sizes to be computed>>=
                    621: | DEFINE _ => ErrorMsg.impossible "generate code for DEFINE in mipscoder"
                    622: | EMITLAB _ => 1
                    623: <<cases of instructions to be emitted>>=
                    624: | DEFINE _ => gen1(ErrorMsg.impossible "generate code for DEFINE in mipscoder")
                    625: | EMITLAB(i, ref d) => gen1(emitlong((d-pos)*4+i))
                    626: @
                    627: Now we have to start worrying about instructions with [[EA]] in them.
                    628: The real difficulty these things present is that they may have an
                    629: immediate operand that won't fit in 16~bits.
                    630: So we'll need to get this large immediate operand into a register,
                    631: sixteen bits at a time, and then do the operation on the register.
                    632: 
                    633: Since all of the arithmetic instructions have this difficulty, and since
                    634: we can use them to implement the others, we'll start with those and
                    635: catch up with the control-flow instructions later.
                    636: @ [[SUB]], [[MULDIV]], and [[MFLO]] all use registers only, so they are easy.
                    637: The other arithmetic operations get treated exactly the same, so we'll
                    638: use a function to compute the size.
                    639: {\bf move this to follow the definition of [[arith]]?}
                    640: <<cases for sizes to be computed>>=
                    641: | ADD(_, ea, _) => easize ea
                    642: | AND(_, ea, _) => easize ea
                    643: | OR (_, ea, _) => easize ea
                    644: | XOR(_, ea, _) => easize ea
                    645: | SUB _ => 1
                    646: | MULDIV _ => 1 
                    647: | MFLO _ => 1
                    648: | MFHI _ => 1
                    649: @ Register operations take one instruction.
                    650: Immediate operations take one instruction for 16~bit constants,
                    651: and 3 for larger constants (since it costs two instructions to load
                    652: a big immediate constant into a register).
                    653: An immediate instruction with [[Immedlab l]] means that the operand
                    654: is intended to be the machine address associated with that label.
                    655: To compute that address, we need to add 
                    656: [[4*(l-pcptr)]] to the contents of 
                    657: register~[[pcreg]] (which holds [[4*pcptr]]), 
                    658: put the results in a register, and operate on that register.
                    659: 
                    660: This tells us enough to compute the sizes.
                    661: <<functions for computing sizes>>=       
                    662: fun easize (Direct _) = 1
                    663:   | easize (Immed i) = if abs(i)<32768 then 1 else 3
                    664:   | easize (Immedlab(ref lab)) = 1 + easize(Immed (4*(lab-(get pcptr))))
                    665: @ 
                    666: As we have seen, 
                    667: to implement any arithmetic operation, we need to know the register 
                    668: form and the sixteen-bit immediate form.
                    669: We will also want the operator from [[instr]], since we do the
                    670: large immediate via a recursive call.
                    671: We'll set up a function, [[arith]], that does the job.
                    672: <<functions for emitting instructions>>=
                    673: fun arith (opr, rform, iform) =
                    674:    let fun ar (Reg op1, Direct (Reg op2), Reg result) = 
                    675:                 gen1(emit(rform(result,op1,op2)))
                    676:          | ar (Reg op1, Immed op2, Reg result) =
                    677:                 (case size of
                    678:                     1 (* 16 bits *) => gen1(emit(iform(result,op1,op2)))
                    679:                   | 3 (* 32 bits *) => 
                    680:                      gen(pos,pcptr,
                    681:                           (ref 2, LDI_32(op2, Reg tempreg))::
                    682:                           (ref 1, opr(Reg op1, Direct(Reg tempreg), Reg result))::
                    683:                           rest)
                    684:                   | _ => gen(ErrorMsg.impossible 
                    685:                                 "bad size in arith Immed in mipscoder")
                    686:                 )
                    687:          | ar (Reg op1, Immedlab (ref op2), Reg result) =
                    688:                 gen(pos, pcptr, 
                    689:                       (ref (size-1), 
                    690:                             ADD(Reg pcreg,Immed(4*(op2-(get pcptr))), Reg tempreg))::
                    691:                       (ref 1, opr(Reg op1, Direct(Reg tempreg), Reg result))::
                    692:                       rest)
                    693:    in  ar
                    694:    end
                    695: @
                    696: The generation itself may be a bit anticlimactic.
                    697: The MIPS has no ``subtract immediate'' instruction, and [[SUB]] has
                    698: a different type than the others, so we emit it directly.
                    699: <<cases of instructions to be emitted>>=
                    700: | ADD stuff => arith (ADD,add,addi) stuff
                    701: | AND stuff => arith (AND,and',andi) stuff
                    702: | OR  stuff => arith (OR,or,ori) stuff
                    703: | XOR stuff => arith (XOR,xor,xori) stuff
                    704: | SUB (Reg op1, Reg op2, Reg result) => gen1(emit(sub(result,op1,op2)))
                    705: | MULDIV(DIV, Reg op1, Reg op2) => gen1(emit(div(op1,op2)))
                    706: | MULDIV(MULT,Reg op1, Reg op2) => gen1(emit(mult(op1,op2)))
                    707: | MFLO(Reg result) => gen1(emit(mflo(result)))
                    708: | MFHI(Reg result) => gen1(emit(mfhi(result)))
                    709: @ Floating point arithmetic is pretty easy because we always do it in
                    710: registers.  
                    711: We also support only one format, double precision.
                    712: <<cases for sizes to be computed>>=
                    713: | NEG_D _ => 1
                    714: | MUL_D _ => 1
                    715: | DIV_D _ => 1
                    716: | ADD_D _ => 1
                    717: | SUB_D _ => 1
                    718: @ When emitting instructions we have to remember the Mips instructions
                    719: use result on the left, but the [[MIPSCODER]] signature requires result
                    720: on the right.
                    721: <<cases of instructions to be emitted>>=
                    722: | NEG_D (Reg op1,Reg result) => gen1(emit(neg_fmt(D_fmt,result,op1)))
                    723: <<functions for emitting instructions>>=
                    724: fun float3double instruction (Reg op1,Reg op2,Reg result) =
                    725:    gen1(emit(instruction(D_fmt,result,op1,op2)))
                    726: <<cases of instructions to be emitted>>=
                    727: | MUL_D x => float3double mul_fmt x
                    728: | DIV_D x => float3double div_fmt x
                    729: | ADD_D x => float3double add_fmt x
                    730: | SUB_D x => float3double sub_fmt x
                    731: 
                    732: 
                    733: @ We offer a separate [[MOVE]] instruction because of large immediate
                    734: constants.  
                    735: It is always possible to do [[move(src,dest)]] by doing 
                    736: [[add(Reg 0,src,dest)]], but the general form [[add(Reg i, Immed c, dest)]]
                    737: takes three instructions when [[c]] is a large constant (more than 16 bits).
                    738: Rather than clutter up the code for [[add]] (and [[or]] and [[xor]]) by
                    739: trying to recognize register~0, we provide [[move]] explicitly.
                    740: 
                    741: \indent [[LDI_32]] takes care of the particular case in which we are 
                    742: loading a 32-bit immediate constant into a register.  
                    743: It dates from the bad old days before [[MOVE]], and it might be a good idea
                    744: to remove it sometime.
                    745: <<functions for emitting instructions>>=
                    746: fun domove (Direct (Reg src), Reg dest) = gen1(emit(add(dest,src,0)))
                    747:   | domove (Immed src, Reg dest) =
                    748:         (case size of
                    749:             1 (* 16 bits *) => gen1(emit(addi(dest,0,src)))
                    750:           | 2 (* 32 bits *) => 
                    751:                        gen(pos,pcptr,(ref 2, LDI_32(src, Reg dest))::rest)
                    752:           | _ => gen(ErrorMsg.impossible "bad size in domove Immed in mipscoder")
                    753:         )
                    754:   | domove (Immedlab (ref src), Reg dest) =
                    755:         gen(pos, pcptr, 
                    756:               (ref size, 
                    757:                     ADD(Reg pcreg,Immed(4*(src-(get pcptr))), Reg dest))::rest)
                    758: @ Notice we use [[easize]] and not [[movesize]] in the third clause
                    759: because when we reach this point the treatment of a [[MOVE]] is the same
                    760: as that of an [[ADD]].
                    761: <<functions for computing sizes>>=
                    762: fun movesize (Direct _) = 1
                    763:   | movesize (Immed i) = if abs(i)<32768 then 1 else 2
                    764:   | movesize (Immedlab(ref lab)) = easize(Immed (4*(lab-(get pcptr))))
                    765: 
                    766: <<cases for sizes to be computed>>=
                    767: | MOVE (src,_) => movesize src
                    768: | LDI_32 _ => 2
                    769: | LUI _ => 1
                    770: <<cases of instructions to be emitted>>=
                    771: | MOVE stuff => domove stuff
                    772: | LDI_32 (immedconst, Reg dest) =>
                    773:          let val (hi,lo) = split immedconst
                    774:          in  gen1(emit(lui(dest,hi));emit(addi(dest,dest,lo)))
                    775:          end 
                    776: | LUI (Reg dest,immed16) => gen1(emit(lui(dest,immed16)))
                    777: 
                    778: @ 
                    779: Now that we've done arithmetic, we can see how to do control flow without
                    780: too much trouble.
                    781: [[SLT]] can be treated just like an arithmetic operator.
                    782: [[BEQ]] is simple if the address to which we branch is close enough.
                    783: Otherwise we use the following sequence for [[BEQ(Reg op1, Reg op2, ref dest)]]:
                    784: \verbatim
                    785:         bne op1,op2,L
                    786:         ADD (Reg pcreg, Immed (4*(dest-pcptr)), Reg tempreg)
                    787:         jr tempreg
                    788:      L: ...
                    789: \endverbatim
                    790: Notice we don't have to put a [[NOP]] in the delay slot of the [[bne]].
                    791: We don't need one after the jump unless we needed one after the 
                    792: original [[BEQ]], in which case one will be there.
                    793: If the branch is taken, we're doing as well as we can.
                    794: If the branch is not taken, we will have executed an [[add]] or [[lui]] in the 
                    795: delay slot of the [[bne]], but the results just get thrown away.
                    796: <<cases for sizes to be computed>>=
                    797: | SLT(_, ea, _) => easize ea
                    798: | BEQ(_,_,_,ref dest) => 
                    799:         if abs((pos+1)-dest) < 32768 then 1 (* single instruction *)
                    800:         else 2+easize (Immed (4*(dest-(get pcptr))))
                    801: | JUMP _ => 1
                    802: | SLT_D _ => 1
                    803: | SEQ_D _ => 1
                    804: | BCOP1(_,ref dest) => 
                    805:         if abs((pos+1)-dest) < 32768 then 1 (* single instruction *)
                    806:         else 2+easize (Immed (4*(dest-(get pcptr))))
                    807: | NOP => 1
                    808: @ The implementation is as described, except we use a 
                    809: non-standard [[nop]].
                    810: There are many Mips instructions that have no effect, and the standard
                    811: one is the word with all zeroes ([[sll 0,0,0]]).
                    812: We use [[add]],  adding 0 to 0 and store the result in 0, because it
                    813: will be easy to distinguish from a data word that happens to be zero.
                    814: <<cases of instructions to be emitted>>=
                    815: | SLT stuff => arith (SLT,slt,slti) stuff
                    816: | BEQ(b, Reg op1, Reg op2, ref dest) =>
                    817:     if size = 1 then 
                    818:          gen1(emit((if b then beq else bne)(op1,op2,dest-(pos+1))))
                    819:     else gen(pos,pcptr,
                    820:                   (ref 1, BEQ(not b, Reg op1, Reg op2, ref(pos+size)))
                    821:                   ::(ref (size-2), 
                    822:                       ADD(Reg pcreg, Immed(4*(dest-(get pcptr))), Reg tempreg))
                    823:                   ::(ref 1, JUMP(Reg tempreg))
                    824:                   ::rest)
                    825: | JUMP(Reg dest) => gen1(emit(jr(dest)))
                    826: | SLT_D (Reg op1, Reg op2) => 
                    827:      gen1(emit(c_lt(D_fmt,op1,op2)))
                    828: | SEQ_D (Reg op1, Reg op2) => 
                    829:      gen1(emit(c_seq(D_fmt,op1,op2)))
                    830: | BCOP1(b, ref dest) =>
                    831:     let fun bc1f offset = cop1(8,0,offset)
                    832:        fun bc1t offset = cop1(8,1,offset)
                    833:     in  if size = 1 then 
                    834:              gen1(emit((if b then bc1t else bc1f)(dest-(pos+1))))
                    835:         else gen(pos,pcptr,
                    836:                   (ref 1, BCOP1(not b, ref(pos+size)))
                    837:                   ::(ref (size-2), 
                    838:                       ADD(Reg pcreg, Immed(4*(dest-(get pcptr))), Reg tempreg))
                    839:                   ::(ref 1, JUMP(Reg tempreg))
                    840:                   ::rest)
                    841:     end
                    842: | NOP => gen1(emit(add(0,0,0)))         (* one of the many MIPS no-ops *)
                    843: @
                    844: Our next problem is to tackle load and store.
                    845: The major difficulty is if the offset is too large to fit in
                    846: sixteen bits; if so, we have to create a new base register.
                    847: If we have [[Immedlab]], we do it as an offset from [[pcreg]].
                    848: <<functions for emitting instructions>>=
                    849: fun memop(rform,Reg dest, Direct (Reg base), offset) =
                    850:       (case size
                    851:        of 1 => gen1(emit(rform(dest,offset,base)))
                    852:         | 3 => let val (hi,lo) = split offset
                    853:                in  gen1(emit(lui(tempreg,hi));       (* tempreg = hi @<< 16 *)
                    854:                         emit(add(tempreg,base,tempreg));(* tempreg += base *)
                    855:                         emit(rform(dest,lo,tempreg)) (* load dest,lo(tempreg) *)
                    856:                        )
                    857:                end
                    858:         | _ => gen1(ErrorMsg.impossible "bad size in memop Direct in mipscoder")
                    859:        )
                    860:   | memop(rform,Reg dest, Immed address, offset) =
                    861:       (case size
                    862:        of 1 => gen1(emit(rform(dest,offset+address,0)))
                    863:         | 2 => let val (hi,lo) = split (offset+address)
                    864:                in  gen1(emit(lui(tempreg,hi)); 
                    865:                         emit(rform(dest,lo,tempreg))
                    866:                        )
                    867:                end
                    868:         | _ => gen1(ErrorMsg.impossible "bad size in memop Immed in mipscoder")
                    869:        )
                    870:   | memop(rform,Reg dest, Immedlab (ref lab), offset) =
                    871:       memop(rform, Reg dest, Direct (Reg pcreg), offset+4*(lab - get pcptr))
                    872: @ The actual registers don't matter for computing sizes, and in fact
                    873: the value of [[pcreg]] is not visible here, so we use an arbitrary
                    874: register ([[Reg 0]]) to compute the size.
                    875: <<functions for computing sizes>>=       
                    876: fun adrsize(_, Reg _, Direct _, offset) = 
                    877:             if abs(offset)<32768 then 1 else 3
                    878:   | adrsize(_, Reg _, Immed address, offset) = 
                    879:             if abs(address+offset) < 32768 then 1 else 2
                    880:   | adrsize(x, Reg dest, Immedlab (ref lab), offset) =
                    881:             adrsize(x, Reg dest, Direct (Reg 0  (* pcreg in code *) ), 
                    882:                     offset+4*(lab-(get pcptr)))
                    883: <<cases for sizes to be computed>>=
                    884: | LOAD  x => adrsize x
                    885: | STORE x => adrsize x
                    886: <<cases of instructions to be emitted>>=
                    887: | LOAD  (Byte,dest,address,offset) => memop(lbu,dest,address,offset)
                    888: | LOAD  (Word,dest,address,offset) => memop(lw,dest,address,offset)
                    889: | LOAD  (Floating,dest,address,offset) => memop(lwc1,dest,address,offset)
                    890: | STORE (Byte,dest,address,offset) => memop(sb,dest,address,offset)
                    891: | STORE (Word,dest,address,offset) => memop(sw,dest,address,offset)
                    892: | STORE (Floating,dest,address,offset) => memop(swc1,dest,address,offset)
                    893: @  
                    894: For the shift instructions, only register and immediate operands
                    895: make sense.
                    896: Immediate operands make sense if and only if they are representable
                    897: in five bits.
                    898: If everything is right, these are single instructions.
                    899: <<cases for sizes to be computed>>=
                    900: | SLL _ => 1  
                    901: | SRA _ => 1
                    902: <<cases of instructions to be emitted>>=
                    903: | SLL (Immed shamt, Reg op1, Reg result) => gen1(
                    904:         if (shamt >= 0 andalso shamt < 32) then emit(sll(result,op1,shamt))
                    905:         else ErrorMsg.impossible ("bad sll shamt "
                    906:                ^ (Integer.makestring shamt) ^ " in mipscoder"))
                    907: | SLL (Direct(Reg shamt), Reg op1, Reg result) => 
                    908:         gen1(emit(sllv(result,op1,shamt)))
                    909: | SLL (Immedlab _,_,_) => ErrorMsg.impossible "sll shamt is Immedlab in mipscoder"
                    910: | SRA (Immed shamt, Reg op1, Reg result) => gen1(
                    911:         if (shamt >= 0 andalso shamt < 32) then emit(sra(result,op1,shamt))
                    912:         else ErrorMsg.impossible ("bad sra shamt "
                    913:                ^ (Integer.makestring shamt) ^ " in mipscoder"))
                    914: | SRA (Direct(Reg shamt), Reg op1, Reg result) =>
                    915:         gen1(emit(srav(result,op1,shamt)))
                    916: | SRA (Immedlab _,_,_) => ErrorMsg.impossible "sra shamt is Immedlab in mipscoder"
                    917: @
                    918: Finally, comments are ignored, and marks (backpointers) are written into the
                    919: instruction stream.
                    920: 
                    921: Comments are used by the front end to give diagnostics.
                    922: In the bad old days we would have had two different [[MIPSCODER]]s, one
                    923: which generated machine code (and ignored comments), and one which
                    924: wrote out assembly code (and copied comments).
                    925: Today we have just one, which means the rerouting of comments takes place
                    926: at a much higher level.  Look in [[cps/mipsglue.nw]].
                    927: <<cases for sizes to be computed>>=
                    928: | COMMENT _ => 0
                    929: | MARK => 1                     (* backpointer takes one word *)
                    930: @ Just for the record, here's the description of what a mark (backpointer)
                    931: is.
                    932: ``Take the byte address at which the mark resides and add 4, giving
                    933: the byte address of the object following the mark.
                    934: (That object is the marked object.)
                    935: Subtract the byte address of the initial word that marks the
                    936: start of this instruction stream.
                    937: Now divide by 4, giving the distance in words between the
                    938: beginning of the block and the marked object.
                    939: Take that quantity and shift it left by multiplying by [[power_tags]],
                    940: and indicate the result is a mark by adding the tag bits [[tag_backptr]]
                    941: into the low order part.''
                    942:  [[pos+1]] is exactly the required distance in words.
                    943: <<cases of instructions to be emitted>>=
                    944: | COMMENT _ => gen1()
                    945: | MARK => gen1(
                    946:     let open System.Tags
                    947:     in  emitlong((pos+1) * power_tags + tag_backptr)
                    948:     end)
                    949: @
                    950: @ \subsection{Optimization}
                    951: The first step towards optimization is to take statistics.
                    952: We will count: [[instrs]], Mips words, [[NOP]]s in load and branch delays,
                    953: and [[bltzal]]s.
                    954: In the current implementation the [[bltzal]]s are implicit, so there
                    955: is no way to count them or optimize them.
                    956: <<statistics>>=
                    957: fun printstats stream
                    958:  {inst : int, code : int, data : int, 
                    959:   load : int, branch : int, compare : int, size : int} =
                    960:     let val print = output stream
                    961:        val nop = load+branch+compare
                    962:        val bltzal = size - (code + data)
                    963:        val code = code + bltzal
                    964:        <<definition of [[sprintf]]>>
                    965:        fun P x = substring(makestring(100.0 * x),0,4)  (* percent *)
                    966:        fun printf f d = print (sprintf f d)
                    967:     in  printf ["Counted "," instrs in "," words (",
                    968:                                " code, "," data)\n" ^
                    969:                "Used "," NOPs ("," load, "," branch,"," compare) and "," bltzals\n" ^
                    970:                "","% of code words were NOPs; ","% were bltzals\n" ^
                    971:                "","% of all words were code; ","% of all words were NOPs\n"]
                    972:               [I inst, I size, I code, I data, 
                    973:                 I nop, I load, I branch,  I compare, I bltzal,
                    974:                P (real nop / real code), P (real bltzal / real code),
                    975:                P (real code / real size), P (real nop / real size)]
                    976:        handle Overflow => print "[Overflow in computing Mips stats]\n"
                    977:             | Real s => print ("[FPE ("^s^") in computing Mips stats]\n")
                    978:     end
                    979:                
                    980: <<statistics>>=
                    981: <<definition of [[iscode]]>>
                    982: fun addstats (counts as {inst,code,data,load,branch,compare}) =
                    983:   fn nil => counts
                    984:    | (sizeref,first)::(_,NOP)::rest => addstats
                    985:           {inst=inst+2, code=code+(!sizeref)+1, data=data,
                    986:            load=load+ (case first of LOAD _ => 1 | _ => 0),
                    987:            branch=branch +(case first of BEQ _ => 1 | JUMP _ => 1 
                    988:                                       | BCOP1 _ => 1 | _ => 0),
                    989:           compare=compare+(case first of SLT_D _ => 1 | SEQ_D _ => 1 
                    990:                                        | _ => 0)
                    991:           } rest
                    992:    | (sizeref,first)::rest => addstats
                    993:           {inst=inst+1, 
                    994:            code = code + if iscode(first) then !sizeref else 0,
                    995:            data = data + if not (iscode first) then !sizeref else 0,
                    996:            load=load,
                    997:            branch=branch,
                    998:           compare=compare
                    999:           } rest
                   1000: 
                   1001: 
                   1002: fun codestats outfile =
                   1003:     let val {size,stream=instrs} = prepare (!kept)
                   1004:        val zero = {inst=0, code=0, data=0, load=0, branch=0, compare=0}
                   1005:         val counts as {inst,code,data,load,branch,compare} = 
                   1006:                                                addstats zero instrs
                   1007:     in  printstats outfile 
                   1008:            {inst=inst,code=code,data=data,
                   1009:             load=load,branch=branch,compare=compare,size=size}
                   1010:     end
                   1011:        
                   1012: <<definition of [[iscode]]>>=
                   1013: val iscode = fn
                   1014:     STRINGCONST _ => false
                   1015:   | REALCONST _ => false
                   1016:   | EMITLONG _ => false
                   1017:   | DEFINE _ => false
                   1018:   | EMITLAB _ => false
                   1019: 
                   1020:   | SLT _ => true
                   1021:   | BEQ _ => true
                   1022:   | JUMP _ => true
                   1023:   | NOP => true
                   1024:   | SLT_D _ => true
                   1025:   | SEQ_D _ => true
                   1026:   | BCOP1 _ => true
                   1027: 
                   1028:   | ADD _ => true
                   1029:   | AND _ => true
                   1030:   | OR  _ => true
                   1031:   | XOR _ => true
                   1032:   | SUB _ => true
                   1033:   | MULDIV _ => true
                   1034:   | MFLO _ => true
                   1035:   | MFHI _ => true
                   1036: 
                   1037:   | NEG_D _ => true
                   1038:   | MUL_D _ => true
                   1039:   | DIV_D _ => true
                   1040:   | ADD_D _ => true
                   1041:   | SUB_D _ => true
                   1042: 
                   1043:   | MOVE _ => true
                   1044:   | LDI_32 _ => true
                   1045:   | LUI _ => true
                   1046: 
                   1047:   | LOAD _ => true
                   1048:   | STORE  _ => true
                   1049: 
                   1050:   | SLL _ => true
                   1051:   | SRA _ => true
                   1052: 
                   1053:   | COMMENT _ => false
                   1054:   | MARK => false
                   1055: 
                   1056: <<definition of [[sprintf]]>>=
                   1057: val I = Integer.makestring
                   1058: val R = Real.makestring
                   1059: exception Printf
                   1060: fun sprintf format values =
                   1061:     let fun merge([x],nil) = [x]
                   1062:           | merge(nil,nil) = nil
                   1063:           | merge(x::y,z::w) = x::z:: merge(y,w)
                   1064:           | merge _ = raise Printf
                   1065:     in  implode(merge(format,values))
                   1066:     end
                   1067: 
                   1068: @
                   1069: At the moment these functions are meaningless junk.
                   1070: <<functions that remove pipeline bubbles>>=
                   1071: val rec squeeze =
                   1072: 
                   1073:  fn (x as LOAD(_,Reg d, m, i))::NOP::instr::rest =>
                   1074:         if use(instr,d) then ??
                   1075:         else squeeze(x::instr::rest)
                   1076:   | (x as STORE _)::(y as LOAD _)::rest => 
                   1077:         x :: squeeze(y::rest)
                   1078:   | instr::(x as LOAD(_,Reg d, Direct(Reg s), i))::NOP::rest =>
                   1079:         if use(instr, d) orelse gen(instr, s) then ??
                   1080:         else squeeze(x::instr::rest)
                   1081:   | instr::(x as LOAD(_,Reg d, _, i))::NOP::rest =>
                   1082:         if use(instr,d) then ??
                   1083:         else squeeze(x::instr::rest)
                   1084:   | (x as MFLO _):: (y as MULDIV _) :: rest =>
                   1085:         x :: squeeze (y::rest)
                   1086:   | (x as MFLO(Reg d))::instr::rest =>
                   1087:         if (use(instr,d) orelse gen(instr,d) then ??
                   1088:         else squeeze(instr::x::rest)
                   1089:   | instr :: (x as MULDIV(Reg a, Reg b)) :: rest =>
                   1090:         if gen(instr,a) orelse gen(instr,b) then ??
                   1091:         else squeeze(x::instr::rest)
                   1092: 
                   1093: val rec final =
                   1094:  fn
                   1095:   | instr::(x as LOAD(_,Reg d, Direct(Reg s), i))::NOP::rest =>
                   1096:         if gen(instr, s) then instr::final(x::NOP::rest)
                   1097:         else x::instr::(final rest)
                   1098:   | instr :: (x as JUMP _) :: NOP :: rest =>
                   1099:         x :: instr :: final rest
                   1100:   | instr :: (x as BEQ(_,Reg a, Reg b, _)) :: NOP :: rest =>
                   1101:         if gen(instr,a) orelse gen(instr,b) then instr::x::NOP::(final rest)
                   1102:         else x::instr::(final rest)
                   1103: 
                   1104: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.