|
|
1.1 root 1: typedef long Type;
2:
3: /*
4: * See the comments at the beginning of gbitblt.c
5: * for an outline of how this bitblt works
6:
7: * Registers
8: * in addition to the registers of the abstract machine,
9: * we use RF to hold ~0 always, RX as a scratch register,
10: * and AT to hold the address of the table given in Inittab.
11: */
12: #define As 5
13: #define Ad 6
14: #define Rs 7
15: #define Rd 8
16: #define Rt 9
17: #define Ru 10
18: #define Ri 11
19: #define Ro 12
20: #define RF 1 /* ~0 */
21: #define RX 2 /* scratch */
22: #define AT 3 /* conversion table */
23:
24: /*
25: * Macros for assembling MIPS instructions
26: */
27:
28: /* generate `a', `size' bits wide, into bit position `shift' */
29: /* the SMM version is used when there might be extra bits in `a' */
30: #define SM(a,size,shift) ((a)<<(shift))
31: #define SMM(a,size,shift) (((a)&(1<<(size))-1)<<(shift))
32:
33: /* Make sure im fits in 16 bits and sh fits in 5 bits */
34: #define Iinst(op,rs,rt,im) (SM(op,6,26)|SM(rs,5,21)|SM(rt,5,16)|(im))
35: #define Rinst(op,rs,rt,rd,sh,f) (SM(op,6,26)|SM(rs,5,21)|SM(rt,5,16)|SM(rd,5,11)|SM(sh,5,6)|(f))
36:
37: /*
38: * Instructions
39: */
40:
41: #define SLL(rs,rd,sh) Rinst(0,0,rs,rd,sh,0)
42: #define SRL(rs,rd,sh) Rinst(0,0,rs,rd,sh,2)
43: #define ADDU(rs,rt,rd) Rinst(0,rs,rt,rd,0,041)
44: #define ADDIU(rs,rd,v) Iinst(011,rs,rd,v)
45: #define AND(rs,rt,rd) Rinst(0,rs,rt,rd,0,044)
46: #define ANDI(rs,rd,v) Iinst(014,rs,rd,v)
47: #define OR(rs,rt,rd) Rinst(0,rs,rt,rd,0,045)
48: #define ORI(rs,rd,v) Iinst(015,rs,rd,v)
49: #define XOR(rs,rt,rd) Rinst(0,rs,rt,rd,0,046)
50: #define XORI(rs,rd,v) Iinst(016,rs,rd,v)
51: #define NOR(rs,rt,rd) Rinst(0,rs,rt,rd,0,047)
52: #define NOP Rinst(0,0,0,0,0,047)
53: #define LUI(r,v) Iinst(017,0,r,v)
54: #define LW(as,rd) Iinst(043,as,rd,0)
55: #define LBU(as,rd) Iinst(044,as,rd,0)
56: #define LHU(as,rd) Iinst(045,as,rd,0)
57: #define SW(ad,rs) Iinst(053,ad,rs,0)
58: #define BGTZ(r,d) Iinst(007,r,0,d)
59: #define JR(r) Rinst(0,r,0,0,0,010)
60:
61: /*
62: * Macros for assembling the operations of the abstract machine.
63: * Each assumes that ulong *p points to the next location where
64: * an instruction should be assembled.
65: * These macros can use RX as a scratch register, but no others.
66: * They can assume RF holds ~0.
67: */
68:
69: #define Ofield(c) *p++ = XOR(Rs,Rd,Rs); \
70: if((c)&0xFFFF0000) { \
71: *p++ = LUI(RX,((ulong)(c))>>16); \
72: *p++ = ORI(RX,RX,(c)&0xFFFF); \
73: *p++ = AND(RX,Rs,Rs); \
74: } else \
75: *p++ = ANDI(Rs,Rs,(c)); \
76: *p++ = XOR(Rs,Rd,Rs)
77:
78: #define Olsha_RsRt *p++ = SLL(Rt,Rs,sha)
79: #define Olshb_RsRt *p++ = SLL(Rt,Rs,shb)
80: #define Olsh_RsRd(c) *p++ = SLL(Rd,Rs,c)
81: #define Olsh_RtRt(c) *p++ = SLL(Rt,Rt,c)
82: #define Olsha_RtRt *p++ = SLL(Rt,Rt,sha)
83: #define Olsha_RtRu *p++ = SLL(Ru,Rt,sha)
84: #define Olshb_RtRu *p++ = SLL(Ru,Rt,shb)
85: #define Orsha_RsRt *p++ = SRL(Rt,Rs,sha)
86: #define Orshb_RsRt *p++ = SRL(Rt,Rs,shb)
87: #define Orsha_RtRu *p++ = SRL(Ru,Rt,sha)
88: #define Orshb_RtRu *p++ = SRL(Ru,Rt,shb)
89: #define Oorlsha_RsRt *p++ = SLL(Rt,RX,sha); *p++ = OR(RX,Rs,Rs)
90: #define Oorlshb_RsRt *p++ = SLL(Rt,RX,shb); *p++ = OR(RX,Rs,Rs)
91: #define Oorlsh_RsRd(c) *p++ = SLL(Rd,RX,c); *p++ = OR(RX,Rs,Rs)
92: #define Oorrsha_RsRt *p++ = SRL(Rt,RX,sha); *p++ = OR(RX,Rs,Rs)
93: #define Oorrshb_RsRt *p++ = SRL(Rt,RX,shb); *p++ = OR(RX,Rs,Rs)
94: #define Oorrsha_RtRu *p++ = SRL(Ru,RX,sha); *p++ = OR(RX,Rt,Rt)
95: #define Oorrshb_RtRu *p++ = SRL(Ru,RX,shb); *p++ = OR(RX,Rt,Rt)
96: #define Oor_RsRd *p++ = OR(Rs,Rd,Rs)
97:
98: /*
99: * Try to use smaller instructions when the constant is small.
100: * Note that MIPS sign extends immediate operands
101: */
102: #define Add_As(c) if((c)&0xFFFF8000) { \
103: if(!((c)&0xFFFF0000)) { \
104: *p++ = ORI(0,RX,(c)&0xFFFF); \
105: *p++ = ADDU(RX,As,As); \
106: } else { \
107: *p++ = LUI(RX,((ulong)(c))>>16); \
108: *p++ = ORI(RX,RX,(c)&0xFFFF); \
109: *p++ = ADDU(RX,As,As); \
110: } \
111: } else \
112: *p++ = ADDIU(As,As,c)
113:
114: #define Add_Ad(c) if((c)&0xFFFF8000) { \
115: if(!((c)&0xFFFF0000)) { \
116: *p++ = ORI(0,RX,(c)&0xFFFF); \
117: *p++ = ADDU(RX,Ad,Ad); \
118: } else { \
119: *p++ = LUI(RX,((ulong)(c))>>16); \
120: *p++ = ORI(RX,RX,(c)&0xFFFF); \
121: *p++ = ADDU(RX,Ad,Ad); \
122: } \
123: } else \
124: *p++ = ADDIU(Ad,Ad,c)
125:
126: #define Initsd(s,d) *p++ = LUI(As,((ulong)(s))>>16); \
127: *p++ = ORI(As,As,((ulong)(s))&0xFFFF); \
128: *p++ = LUI(Ad,((ulong)(d))>>16); \
129: *p++ = ORI(Ad,Ad,((ulong)(d))&0xFFFF)
130:
131: #define Initsh(a,b)
132:
133: /* Put all ones in RF */
134: #define Extrainit *p++ = ADDIU(0,RF,0xFFFF)
135:
136: /*
137: * We put one less than the loop count into R[io], so that
138: * the loop instructions can decrement after the test instead
139: * of before
140: */
141: #define Ilabel(c) tmp = (c)-1; \
142: if(tmp&0xFFFF8000) { \
143: *p++ = LUI(Ri,((ulong)tmp)>>16); \
144: *p++ = ORI(Ri,Ri,tmp&0xFFFF); \
145: } else \
146: *p++ = ADDIU(0,Ri,tmp)
147:
148: #define Olabel(c) tmp = (c)-1; \
149: if(tmp&0xFFFF8000) { \
150: *p++ = LUI(Ro,((ulong)tmp)>>16); \
151: *p++ = ORI(Ro,Ro,tmp&0xFFFF); \
152: } else \
153: *p++ = ADDIU(0,Ro,tmp)
154:
155: /*
156: * The decrement is done after the test instead of before
157: * so that the required delay slot can be filled with
158: * the decrement (counts were adjusted by [IO]label)
159: */
160: #define Iloop(lp) *p = BGTZ(Ri,(lp-(p+1))&0xFFFF); p++; \
161: *p++ = ADDIU(Ri,Ri,0xFFFF)
162:
163: #define Oloop(lp) *p = BGTZ(Ro,(lp-(p+1))&0xFFFF); p++; \
164: *p++ = ADDIU(Ro,Ro,0xFFFF)
165:
166: #define Orts *p++ = JR(31); *p++ = NOP
167:
168: /*
169: * Load and Fetch macros: arg should be 1 if following instr might use loaded value
170: * In the predecrement versions, it's as easy to do the decrment afterwards in
171: * the delay slot
172: */
173:
174: #define Load_Rs_P *p++ = LW(As,Rs); *p++ = ADDIU(As,As,4)
175: #define Load_Rt_P *p++ = LW(As,Rt); *p++ = ADDIU(As,As,4)
176: #define Loadzx_Rt_P *p++ = LW(As,Rt); *p++ = ADDIU(As,As,4)
177: #define Loador_Rt_P *p++ = LW(As,Rt); *p++ = ADDIU(As,As,4)
178: #define Load_Ru_P *p++ = LW(As,Ru); *p++ = ADDIU(As,As,4)
179: #define Load_Rd_D(f) *p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rd); \
180: if(f) *p++ = NOP
181: #define Load_Rs_D(f) *p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rs); \
182: if(f) *p++ = NOP
183: #define Load_Rt_D(f) *p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rt); \
184: if(f) *p++ = NOP
185: #define Loadzx_Rt_D(f) *p++ = ADDIU(As,As,-4&0xFFFF); *p++ = LW(As,Rt); \
186: if(f) *p++ = NOP
187: #define Load_Rd(f) *p++ = LW(As,Rd); if(f) *p++ = NOP
188: #define Load_Rs(f) *p++ = LW(As,Rs); if(f) *p++ = NOP
189: #define Load_Rt(f) *p++ = LW(As,Rt); if(f) *p++ = NOP
190: #define Loadzx_Rt(f) *p++ = LW(As,Rt); if(f) *p++ = NOP
191: #define Fetch_Rd_P(f) *p++ = LW(Ad,Rd); *p++ = ADDIU(Ad,Ad,4)
192: #define Fetch_Rd_D(f) *p++ = ADDIU(Ad,Ad,-4&0xFFFF); *p++ = LW(Ad,Rd); \
193: if(f) *p++ = NOP
194: #define Fetch_Rd(f) *p++ = LW(Ad,Rd); if(f) *p++ = NOP
195: #define Store_Rs_P *p++ = SW(Ad,Rs); *p++ = ADDIU(Ad,Ad,4)
196: #define Store_Rs_D *p++ = ADDIU(Ad,Ad,-4&0xFFFF); *p++ = SW(Ad,Rs)
197: #define Store_Rs *p++ = SW(Ad,Rs)
198: #define Nop *p++ = NOP
199:
200: #define Inittab(t,s) *p++ = LUI(AT,((ulong)(t))>>16); \
201: *p++ = ORI(AT,AT,((ulong)(t))&0xFFFF)
202:
203: /* emit code to look up n bits at offset o; table entries are 1<<l bytes long */
204: #define Table_RdRt(o,n,l) \
205: tmp = 32-((o)+(n))-(l); \
206: if(tmp > 0) \
207: *p++ = SRL(Rt,Rd,tmp); \
208: else if((l) > 0) \
209: *p++ = SLL(Rt,Rd,l); \
210: else \
211: *p++ = ADDU(Rt,0,Rd); \
212: *p++ = ANDI(Rd,Rd,((1<<(n))-1)<<(l)); \
213: *p++ = ADDU(Rd,AT,Rd); \
214: if(osiz==1) *p++ = LBU(Rd,Rd); \
215: else if(osiz==2) *p++ = LHU(Rd,Rd); \
216: else *p++ = LW(Rd,Rd); \
217: *p++ = NOP
218:
219: #define Table_RsRt(o,n,l) \
220: tmp = 32-((o)+(n))-(l); \
221: if(tmp > 0) \
222: *p++ = SRL(Rt,Rs,tmp); \
223: else if((l) > 0) \
224: *p++ = SLL(Rt,Rs,l); \
225: else \
226: *p++ = ADDU(Rt,0,Rs); \
227: *p++ = ANDI(Rd,Rs,((1<<(n))-1)<<(l)); \
228: *p++ = ADDU(Rs,AT,Rs); \
229: if(osiz==1) *p++ = LBU(Rs,Rs); \
230: else if(osiz==2) *p++ = LHU(Rs,Rs); \
231: else *p++ = LW(Rs,Rs); \
232: *p++ = NOP
233:
234: /* emit code to assemble low n bits of Rd into offset o in Rs */
235: #define Assemble(o,n) \
236: if((o) == 0) { \
237: Olsh_RsRd(32-(n)); \
238: } else if((o) == 32-(n)) { \
239: *p++ = OR(Rs,Rd,Rs); \
240: } else { \
241: Oorlsh_RsRd(32-((o)+(n))); \
242: }
243:
244: /* emit code to assemble low n bits of Rd into offset o in Rs.
245: this works by shifting Rd as we go, it only works if
246: the whole word will eventually be filled */
247: #define Assemblex(o,n) \
248: if((o) == 0) { \
249: *p++ = ADDU(Rd,0,Rs); \
250: } else { \
251: *p++ = SLL(Rs,Rs,n); \
252: *p++ = OR(Rd,Rs,Rs); \
253: }
254:
255: #define Emitop \
256: p[0] = fi[0]; \
257: p[1] = fi[1]; \
258: p = (Type*)(((char *)p)+fin)
259:
260: typedef struct Fstr
261: {
262: char fetchs;
263: char fetchd;
264: short n;
265: Type instr[2];
266: } Fstr;
267:
268: Fstr fstr[16] =
269: {
270: [0] 0,0,4, /* Zero */
271: { LUI(Rs,0), 0 },
272:
273: [1] 1,1,4, /* DnorS */
274: { NOR(Rs,Rd,Rs), 0 },
275:
276: [2] 1,1,8, /* DandnotS */
277: { XOR(Rs,RF,Rs), AND(Rs,Rd,Rs) },
278:
279: [3] 1,0,4, /* notS */
280: { XOR(Rs,RF,Rs), 0 },
281:
282: [4] 1,1,8, /* notDandS */
283: { XOR(Rs,RF,Rs), NOR(Rd,Rs,Rs) },
284:
285: [5] 0,1,4, /* notD */
286: { XOR(Rd,RF,Rs), 0 },
287:
288: [6] 1,1,4, /* DxorS */
289: { XOR(Rd,Rs,Rs), 0 },
290:
291: [7] 1,1,8, /* DnandS */
292: { AND(Rd,Rs,Rs), XOR(Rs,RF,Rs) },
293:
294: [8] 1,1,4, /* DandS */
295: { AND(Rd,Rs,Rs), 0 },
296:
297: [9] 1,1,8, /* DxnorS */
298: { XOR(Rd,Rs,Rs), XOR(Rs,RF,Rs) },
299:
300: [10] 0,1,4, /* D */
301: { ADDU(Rd,0,Rs), 0 },
302:
303: [11] 1,1,8, /* DornotS */
304: { XOR(Rs,RF,Rs), OR(Rs,Rd,Rs) },
305:
306: [12] 1,0,0, /* S */
307: {0, 0},
308:
309: [13] 1,1,8, /* notDorS */
310: { XOR(Rd,RF,RX), OR(Rs,RX,Rs) },
311:
312: [14] 1,1,4, /* DorS */
313: { OR(Rs,Rd,Rs), 0 },
314:
315: [15] 0,0,4, /* F */
316: { OR(RF,0,Rs), 0 },
317: };
318:
319: #include "tabs.h"
320: static uchar *tabs[4][4] =
321: {
322: { 0, (uchar*)tab01, (uchar*)tab02, (uchar*)tab03},
323: {(uchar*)tab10, 0, (uchar*)tab12, (uchar*)tab13},
324: {(uchar*)tab20, (uchar*)tab21, 0, (uchar*)tab23},
325: {(uchar*)tab30, (uchar*)tab31, (uchar*)tab32, 0},
326: };
327:
328: static uchar tabosiz[4][4] = /* size in bytes of entries */
329: {
330: { 0, 2, 4, 4},
331: { 1, 0, 2, 4},
332: { 1, 1, 0, 2},
333: { 1, 1, 1, 0},
334: };
335:
336: enum {
337: Progmax = 900, /* max number of Type units in a bitblt prog */
338: Progmaxnoconv = 64, /* max number of Type units when no conversion */
339: };
340:
341: void
342: prprog(void)
343: {
344: abort(); /* use db */
345: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.