|
|
1.1 ! root 1: typedef long Type; ! 2: ! 3: /* ! 4: * See the comments at the beginning of gbitblt.c ! 5: * for an outline of how this bitblt works ! 6: ! 7: * Registers ! 8: * in addition to the registers of the abstract machine, ! 9: * we use RF to hold ~0 always, RX as a scratch register, ! 10: * and AT to hold the address of the table given in Inittab. ! 11: */ ! 12: #define As 5 ! 13: #define Ad 6 ! 14: #define Rs 7 ! 15: #define Rd 8 ! 16: #define Rt 9 ! 17: #define Ru 10 ! 18: #define Ri 11 ! 19: #define Ro 12 ! 20: #define RF 1 /* ~0 */ ! 21: #define RX 2 /* scratch */ ! 22: #define AT 3 /* conversion table */ ! 23: ! 24: /* ! 25: * Macros for assembling MIPS instructions ! 26: */ ! 27: ! 28: /* generate `a', `size' bits wide, into bit position `shift' */ ! 29: /* the SMM version is used when there might be extra bits in `a' */ ! 30: #define SM(a,size,shift) ((a)<<(shift)) ! 31: #define SMM(a,size,shift) (((a)&(1<<(size))-1)<<(shift)) ! 32: ! 33: /* Make sure im fits in 16 bits and sh fits in 5 bits */ ! 34: #define Iinst(op,rs,rt,im) (SM(op,6,26)|SM(rs,5,21)|SM(rt,5,16)|(im)) ! 35: #define Rinst(op,rs,rt,rd,sh,f) (SM(op,6,26)|SM(rs,5,21)|SM(rt,5,16)|SM(rd,5,11)|SM(sh,5,6)|(f)) ! 36: ! 37: /* ! 38: * Instructions ! 39: */ ! 40: ! 41: #define SLL(rs,rd,sh) Rinst(0,0,rs,rd,sh,0) ! 42: #define SRL(rs,rd,sh) Rinst(0,0,rs,rd,sh,2) ! 43: #define ADDU(rs,rt,rd) Rinst(0,rs,rt,rd,0,041) ! 44: #define ADDIU(rs,rd,v) Iinst(011,rs,rd,v) ! 45: #define AND(rs,rt,rd) Rinst(0,rs,rt,rd,0,044) ! 46: #define ANDI(rs,rd,v) Iinst(014,rs,rd,v) ! 47: #define OR(rs,rt,rd) Rinst(0,rs,rt,rd,0,045) ! 48: #define ORI(rs,rd,v) Iinst(015,rs,rd,v) ! 49: #define XOR(rs,rt,rd) Rinst(0,rs,rt,rd,0,046) ! 50: #define XORI(rs,rd,v) Iinst(016,rs,rd,v) ! 51: #define NOR(rs,rt,rd) Rinst(0,rs,rt,rd,0,047) ! 52: #define NOP Rinst(0,0,0,0,0,047) ! 53: #define LUI(r,v) Iinst(017,0,r,v) ! 54: #define LW(as,rd) Iinst(043,as,rd,0) ! 55: #define LBU(as,rd) Iinst(044,as,rd,0) ! 56: #define LHU(as,rd) Iinst(045,as,rd,0) ! 57: #define SW(ad,rs) Iinst(053,ad,rs,0) ! 58: #define BGTZ(r,d) Iinst(007,r,0,d) ! 59: #define JR(r) Rinst(0,r,0,0,0,010) ! 60: ! 61: /* ! 62: * Macros for assembling the operations of the abstract machine. ! 63: * Each assumes that ulong *p points to the next location where ! 64: * an instruction should be assembled. ! 65: * These macros can use RX as a scratch register, but no others. ! 66: * They can assume RF holds ~0. ! 67: */ ! 68: ! 69: #define Ofield(c) *p++ = XOR(Rs,Rd,Rs); \ ! 70: if((c)&0xFFFF0000) { \ ! 71: *p++ = LUI(RX,((ulong)(c))>>16); \ ! 72: *p++ = ORI(RX,RX,(c)&0xFFFF); \ ! 73: *p++ = AND(RX,Rs,Rs); \ ! 74: } else \ ! 75: *p++ = ANDI(Rs,Rs,(c)); \ ! 76: *p++ = XOR(Rs,Rd,Rs) ! 77: ! 78: #define Olsha_RsRt *p++ = SLL(Rt,Rs,sha) ! 79: #define Olshb_RsRt *p++ = SLL(Rt,Rs,shb) ! 80: #define Olsh_RsRd(c) *p++ = SLL(Rd,Rs,c) ! 81: #define Olsh_RtRt(c) *p++ = SLL(Rt,Rt,c) ! 82: #define Olsha_RtRt *p++ = SLL(Rt,Rt,sha) ! 83: #define Olsha_RtRu *p++ = SLL(Ru,Rt,sha) ! 84: #define Olshb_RtRu *p++ = SLL(Ru,Rt,shb) ! 85: #define Orsha_RsRt *p++ = SRL(Rt,Rs,sha) ! 86: #define Orshb_RsRt *p++ = SRL(Rt,Rs,shb) ! 87: #define Orsha_RtRu *p++ = SRL(Ru,Rt,sha) ! 88: #define Orshb_RtRu *p++ = SRL(Ru,Rt,shb) ! 89: #define Oorlsha_RsRt *p++ = SLL(Rt,RX,sha); *p++ = OR(RX,Rs,Rs) ! 90: #define Oorlshb_RsRt *p++ = SLL(Rt,RX,shb); *p++ = OR(RX,Rs,Rs) ! 91: #define Oorlsh_RsRd(c) *p++ = SLL(Rd,RX,c); *p++ = OR(RX,Rs,Rs) ! 92: #define Oorrsha_RsRt *p++ = SRL(Rt,RX,sha); *p++ = OR(RX,Rs,Rs) ! 93: #define Oorrshb_RsRt *p++ = SRL(Rt,RX,shb); *p++ = OR(RX,Rs,Rs) ! 94: #define Oorrsha_RtRu *p++ = SRL(Ru,RX,sha); *p++ = OR(RX,Rt,Rt) ! 95: #define Oorrshb_RtRu *p++ = SRL(Ru,RX,shb); *p++ = OR(RX,Rt,Rt) ! 96: #define Oor_RsRd *p++ = OR(Rs,Rd,Rs) ! 97: ! 98: /* ! 99: * Try to use smaller instructions when the constant is small. ! 100: * Note that MIPS sign extends immediate operands ! 101: */ ! 102: #define Add_As(c) if((c)&0xFFFF8000) { \ ! 103: if(!((c)&0xFFFF0000)) { \ ! 104: *p++ = ORI(0,RX,(c)&0xFFFF); \ ! 105: *p++ = ADDU(RX,As,As); \ ! 106: } else { \ ! 107: *p++ = LUI(RX,((ulong)(c))>>16); \ ! 108: *p++ = ORI(RX,RX,(c)&0xFFFF); \ ! 109: *p++ = ADDU(RX,As,As); \ ! 110: } \ ! 111: } else \ ! 112: *p++ = ADDIU(As,As,c) ! 113: ! 114: #define Add_Ad(c) if((c)&0xFFFF8000) { \ ! 115: if(!((c)&0xFFFF0000)) { \ ! 116: *p++ = ORI(0,RX,(c)&0xFFFF); \ ! 117: *p++ = ADDU(RX,Ad,Ad); \ ! 118: } else { \ ! 119: *p++ = LUI(RX,((ulong)(c))>>16); \ ! 120: *p++ = ORI(RX,RX,(c)&0xFFFF); \ ! 121: *p++ = ADDU(RX,Ad,Ad); \ ! 122: } \ ! 123: } else \ ! 124: *p++ = ADDIU(Ad,Ad,c) ! 125: ! 126: #define Initsd(s,d) *p++ = LUI(As,((ulong)(s))>>16); \ ! 127: *p++ = ORI(As,As,((ulong)(s))&0xFFFF); \ ! 128: *p++ = LUI(Ad,((ulong)(d))>>16); \ ! 129: *p++ = ORI(Ad,Ad,((ulong)(d))&0xFFFF) ! 130: ! 131: #define Initsh(a,b) ! 132: ! 133: /* Put all ones in RF */ ! 134: #define Extrainit *p++ = ADDIU(0,RF,0xFFFF) ! 135: ! 136: /* ! 137: * We put one less than the loop count into R[io], so that ! 138: * the loop instructions can decrement after the test instead ! 139: * of before ! 140: */ ! 141: #define Ilabel(c) tmp = (c)-1; \ ! 142: if(tmp&0xFFFF8000) { \ ! 143: *p++ = LUI(Ri,((ulong)tmp)>>16); \ ! 144: *p++ = ORI(Ri,Ri,tmp&0xFFFF); \ ! 145: } else \ ! 146: *p++ = ADDIU(0,Ri,tmp) ! 147: ! 148: #define Olabel(c) tmp = (c)-1; \ ! 149: if(tmp&0xFFFF8000) { \ ! 150: *p++ = LUI(Ro,((ulong)tmp)>>16); \ ! 151: *p++ = ORI(Ro,Ro,tmp&0xFFFF); \ ! 152: } else \ ! 153: *p++ = ADDIU(0,Ro,tmp) ! 154: ! 155: /* ! 156: * The decrement is done after the test instead of before ! 157: * so that the required delay slot can be filled with ! 158: * the decrement (counts were adjusted by [IO]label) ! 159: */ ! 160: #define Iloop(lp) *p = BGTZ(Ri,(lp-(p+1))&0xFFFF); p++; \ ! 161: *p++ = ADDIU(Ri,Ri,0xFFFF) ! 162: ! 163: #define Oloop(lp) *p = BGTZ(Ro,(lp-(p+1))&0xFFFF); p++; \ ! 164: *p++ = ADDIU(Ro,Ro,0xFFFF) ! 165: ! 166: #define Orts *p++ = JR(31); *p++ = NOP ! 167: ! 168: /* ! 169: * Load and Fetch macros: arg should be 1 if following instr might use loaded value ! 170: * In the predecrement versions, it's as easy to do the decrment afterwards in ! 171: * the delay slot ! 172: */ ! 173: ! 174: #define Load_Rs_P *p++ = LW(As,Rs); *p++ = ADDIU(As,As,4) ! 175: #define Load_Rt_P *p++ = LW(As,Rt); *p++ = ADDIU(As,As,4) ! 176: #define Loadzx_Rt_P *p++ = LW(As,Rt); *p++ = ADDIU(As,As,4) ! 177: #define Loador_Rt_P *p++ = LW(As,Rt); *p++ = ADDIU(As,As,4) ! 178: #define Load_Ru_P *p++ = LW(As,Ru); *p++ = ADDIU(As,As,4) ! 179: #define Load_Rd_D(f) *p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rd); \ ! 180: if(f) *p++ = NOP ! 181: #define Load_Rs_D(f) *p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rs); \ ! 182: if(f) *p++ = NOP ! 183: #define Load_Rt_D(f) *p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rt); \ ! 184: if(f) *p++ = NOP ! 185: #define Loadzx_Rt_D(f) *p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rt); \ ! 186: if(f) *p++ = NOP ! 187: #define Load_Rd(f) *p++ = LW(As,Rd); if(f) *p++ = NOP ! 188: #define Load_Rs(f) *p++ = LW(As,Rs); if(f) *p++ = NOP ! 189: #define Load_Rt(f) *p++ = LW(As,Rt); if(f) *p++ = NOP ! 190: #define Loadzx_Rt(f) *p++ = LW(As,Rt); if(f) *p++ = NOP ! 191: #define Fetch_Rd_P(f) *p++ = LW(Ad,Rd); *p++ = ADDIU(Ad,Ad,4) ! 192: #define Fetch_Rd_D(f) *p++ = ADDIU(Ad,Ad,-4&0xFFFF); *p++ = LW(Ad,Rd); \ ! 193: if(f) *p++ = NOP ! 194: #define Fetch_Rd(f) *p++ = LW(Ad,Rd); if(f) *p++ = NOP ! 195: #define Store_Rs_P *p++ = SW(Ad,Rs); *p++ = ADDIU(Ad,Ad,4) ! 196: #define Store_Rs_D *p++ = ADDIU(Ad,Ad,-4&0xFFFF); *p++ = SW(Ad,Rs) ! 197: #define Store_Rs *p++ = SW(Ad,Rs) ! 198: #define Nop *p++ = NOP ! 199: ! 200: #define Inittab(t,s) *p++ = LUI(AT,((ulong)(t))>>16); \ ! 201: *p++ = ORI(AT,AT,((ulong)(t))&0xFFFF) ! 202: ! 203: /* emit code to look up n bits at offset o; table entries are 1<<l bytes long */ ! 204: #define Table_RdRt(o,n,l) \ ! 205: tmp = 32-((o)+(n))-(l); \ ! 206: if(tmp > 0) \ ! 207: *p++ = SRL(Rt,Rd,tmp); \ ! 208: else if((l) > 0) \ ! 209: *p++ = SLL(Rt,Rd,l); \ ! 210: else \ ! 211: *p++ = ADDU(Rt,0,Rd); \ ! 212: *p++ = ANDI(Rd,Rd,((1<<(n))-1)<<(l)); \ ! 213: *p++ = ADDU(Rd,AT,Rd); \ ! 214: if(osiz==1) *p++ = LBU(Rd,Rd); \ ! 215: else if(osiz==2) *p++ = LHU(Rd,Rd); \ ! 216: else *p++ = LW(Rd,Rd); \ ! 217: *p++ = NOP ! 218: ! 219: #define Table_RsRt(o,n,l) \ ! 220: tmp = 32-((o)+(n))-(l); \ ! 221: if(tmp > 0) \ ! 222: *p++ = SRL(Rt,Rs,tmp); \ ! 223: else if((l) > 0) \ ! 224: *p++ = SLL(Rt,Rs,l); \ ! 225: else \ ! 226: *p++ = ADDU(Rt,0,Rs); \ ! 227: *p++ = ANDI(Rd,Rs,((1<<(n))-1)<<(l)); \ ! 228: *p++ = ADDU(Rs,AT,Rs); \ ! 229: if(osiz==1) *p++ = LBU(Rs,Rs); \ ! 230: else if(osiz==2) *p++ = LHU(Rs,Rs); \ ! 231: else *p++ = LW(Rs,Rs); \ ! 232: *p++ = NOP ! 233: ! 234: /* emit code to assemble low n bits of Rd into offset o in Rs */ ! 235: #define Assemble(o,n) \ ! 236: if((o) == 0) { \ ! 237: Olsh_RsRd(32-(n)); \ ! 238: } else if((o) == 32-(n)) { \ ! 239: *p++ = OR(Rs,Rd,Rs); \ ! 240: } else { \ ! 241: Oorlsh_RsRd(32-((o)+(n))); \ ! 242: } ! 243: ! 244: /* emit code to assemble low n bits of Rd into offset o in Rs. ! 245: this works by shifting Rd as we go, it only works if ! 246: the whole word will eventually be filled */ ! 247: #define Assemblex(o,n) \ ! 248: if((o) == 0) { \ ! 249: *p++ = ADDU(Rd,0,Rs); \ ! 250: } else { \ ! 251: *p++ = SLL(Rs,Rs,n); \ ! 252: *p++ = OR(Rd,Rs,Rs); \ ! 253: } ! 254: ! 255: #define Emitop \ ! 256: p[0] = fi[0]; \ ! 257: p[1] = fi[1]; \ ! 258: p = (Type*)(((char *)p)+fin) ! 259: ! 260: typedef struct Fstr ! 261: { ! 262: char fetchs; ! 263: char fetchd; ! 264: short n; ! 265: Type instr[2]; ! 266: } Fstr; ! 267: ! 268: Fstr fstr[16] = ! 269: { ! 270: [0] 0,0,4, /* Zero */ ! 271: { LUI(Rs,0), 0 }, ! 272: ! 273: [1] 1,1,4, /* DnorS */ ! 274: { NOR(Rs,Rd,Rs), 0 }, ! 275: ! 276: [2] 1,1,8, /* DandnotS */ ! 277: { XOR(Rs,RF,Rs), AND(Rs,Rd,Rs) }, ! 278: ! 279: [3] 1,0,4, /* notS */ ! 280: { XOR(Rs,RF,Rs), 0 }, ! 281: ! 282: [4] 1,1,8, /* notDandS */ ! 283: { XOR(Rs,RF,Rs), NOR(Rd,Rs,Rs) }, ! 284: ! 285: [5] 0,1,4, /* notD */ ! 286: { XOR(Rd,RF,Rs), 0 }, ! 287: ! 288: [6] 1,1,4, /* DxorS */ ! 289: { XOR(Rd,Rs,Rs), 0 }, ! 290: ! 291: [7] 1,1,8, /* DnandS */ ! 292: { AND(Rd,Rs,Rs), XOR(Rs,RF,Rs) }, ! 293: ! 294: [8] 1,1,4, /* DandS */ ! 295: { AND(Rd,Rs,Rs), 0 }, ! 296: ! 297: [9] 1,1,8, /* DxnorS */ ! 298: { XOR(Rd,Rs,Rs), XOR(Rs,RF,Rs) }, ! 299: ! 300: [10] 0,1,4, /* D */ ! 301: { ADDU(Rd,0,Rs), 0 }, ! 302: ! 303: [11] 1,1,8, /* DornotS */ ! 304: { XOR(Rs,RF,Rs), OR(Rs,Rd,Rs) }, ! 305: ! 306: [12] 1,0,0, /* S */ ! 307: {0, 0}, ! 308: ! 309: [13] 1,1,8, /* notDorS */ ! 310: { XOR(Rd,RF,RX), OR(Rs,RX,Rs) }, ! 311: ! 312: [14] 1,1,4, /* DorS */ ! 313: { OR(Rs,Rd,Rs), 0 }, ! 314: ! 315: [15] 0,0,4, /* F */ ! 316: { OR(RF,0,Rs), 0 }, ! 317: }; ! 318: ! 319: #include "tabs.h" ! 320: static uchar *tabs[4][4] = ! 321: { ! 322: { 0, (uchar*)tab01, (uchar*)tab02, (uchar*)tab03}, ! 323: {(uchar*)tab10, 0, (uchar*)tab12, (uchar*)tab13}, ! 324: {(uchar*)tab20, (uchar*)tab21, 0, (uchar*)tab23}, ! 325: {(uchar*)tab30, (uchar*)tab31, (uchar*)tab32, 0}, ! 326: }; ! 327: ! 328: static uchar tabosiz[4][4] = /* size in bytes of entries */ ! 329: { ! 330: { 0, 2, 4, 4}, ! 331: { 1, 0, 2, 4}, ! 332: { 1, 1, 0, 2}, ! 333: { 1, 1, 1, 0}, ! 334: }; ! 335: ! 336: enum { ! 337: Progmax = 900, /* max number of Type units in a bitblt prog */ ! 338: Progmaxnoconv = 64, /* max number of Type units when no conversion */ ! 339: }; ! 340: ! 341: void ! 342: prprog(void) ! 343: { ! 344: abort(); /* use db */ ! 345: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.