|
|
1.1 root 1: /*
2: * See the comments at the beginning of bb.c and bbc.h
3: * for an outline of how this bitblt works
4:
5: * The VGA screen on the 386 has an awful bit/byte order:
6: * the high order bit of a byte is leftmost, but the low
7: * order byte of a word is leftmost. This causes havoc
8: * if the source is shifted relative to the destination.
9: * So we make a 'word' be a byte. That changes the converting
10: * bitblt considerably, so that Ru is no longer needed in
11: * the abstract machine.
12: */
13: #define WBITS 8
14: #define LWBITS 3
15: #define W2L 4
16: #define WMASK 0xFF
17: #define BYTEREV
18: typedef uchar *WType;
19:
20: typedef uchar Type;
21:
22: /*
23: * Registers:
24: */
25: #define As 6 /* ESI */
26: #define Ad 7 /* EDI */
27: #define Rs 0 /* EAX: FIELD works better with this here */
28: #define Rd 3 /* EBX */
29: #define Rt 2 /* EDX */
30: #define Ri 1 /* ECX: LOOP depends on this here */
31: #define RX 5 /* EBP */
32: #define SP 4 /* ESP */
33: /* Ro is top-of-stack, if needed */
34:
35: /*
36: * Macros for assembling 386 instructions
37: */
38: #define MODRM(mod,rm,reg) (((mod)<<6)|((reg)<<3)|(rm))
39: #define RMR(rm,reg) MODRM(3,rm,reg)
40: #define IRMR(rm,reg) MODRM(0,rm,reg)
41: #define DB(b1,b2,b3,b4) ((b1)|((b2)<<8)|((b3)<<16)|((b4)<<24))
42: #define SB(b1,b2) ((b1)|((b2)<<8))
43:
44: #define OR(src,dst) SB(0x0B, RMR(src,dst))
45: #define XOR(src,dst) SB(0x31, RMR(dst,src))
46: #define AND(src,dst) SB(0x21, RMR(dst,src))
47: #define NOT(r) SB(0xF7, RMR(r,2))
48: #define MOV(src,dst) SB(0x89, RMR(dst,src))
49:
50: #define Immd(v) *(long*)p = (long)(v); p += 4
51: #define Immdb(b1,b2,b3,b4) Immd(DB(b1,b2,b3,b4))
52: #define Imms(v) *(short*)p = (v); p += 2
53: #define Immsb(b1,b2) Imms(SB(b1,b2))
54:
55: #define Loadb(areg,dst) Immsb(0x8A, IRMR(areg,dst))
56: #define Loadbzx(areg,dst) Immsb(0x0F, 0xB6); *p++ = IRMR(areg,dst);
57: #define Storeb(src,areg) Immsb(0x88, IRMR(areg,src))
58: #define Mov(src,dst) Imms(MOV(src,dst))
59: #define Movd(v,dst) *p++ = (0xB8 + (dst)); Immd(v)
60: #define Imovzx(a,i,r) Immdb(0x0F, 0xB6, IRMR(4,r), IRMR(5,i)); Immd(a)
61: #define Or(src,dst) Imms(OR(src,dst))
62: #define Xor(src,dst) Imms(XOR(src,dst))
63: #define And(src,dst) Imms(AND(src,dst))
64: #define Andd(v,dst) if(dst==Rs) {\
65: *p++ = 0x25;\
66: } else {\
67: Immsb(0x81, RMR(dst,4));\
68: }\
69: Immd(v)
70: #define And8(v,dst) if(dst==Rs) {\
71: *p++ = 0x24;\
72: } else {\
73: Immsb(0x80, RMR(dst,4));\
74: }\
75: *p++ = (v)
76: #define Not(r) Imms(NOT(r))
77: #define Inc(r) *p++ = (0x40 + (r))
78: #define Dec(r) *p++ = (0x48 + (r))
79: #define Decsp Immsb(0xFF, IRMR(4,1)); *p++ = 0x24
80: #define Shl(r,sh) Immsb(0xC1, RMR(r,4)); *p++ = (sh)
81: #define Shr(r,sh) Immsb(0xC1, RMR(r,5)); *p++ = (sh)
82: #define Addd(v,r) Immsb(0x81, RMR(r,0)); Immd(v)
83: #define Addsp(v) Immsb(0x83, RMR(4,0)); *p++ = v
84: #define Pushd(v) *p++ = 0x68; Immd(v)
85: #define Loop(d) *p = 0xE2; *(p+1) = d; p += 2
86: #define Jmp8(d) *p = 0xEB; *(p+1) = d; p += 2
87: #define Jmp(d) *p++ = 0xE9; Immd(d)
88: #define Jg8(d) *p = 0x7F; *(p+1) = d; p += 2
89: #define Jle8(d) *p = 0x7E; *(p+1) = d; p += 2
90:
91: /*
92: * Macros for assembling the operations of the abstract machine.
93: * Each assumes that uchar *p points to the next location where
94: * an instruction should be assembled. Some of them assume
95: * that Arg *a points to the Arg structure describing the current
96: * bitblt (see bb.c).
97: * Some of the operations only care about the low order bytes of
98: * the registers; others have to zero the high order bytes because
99: * they might get shifted down.
100: */
101:
102: #define Ofield(c) Xor(Rd,Rs); And8(c,Rs); Xor(Rd,Rs)
103: #define Olsha_RsRt Mov(Rt,Rs); Shl(Rs,sha)
104: #define Olshb_RsRt Mov(Rt,Rs); Shl(Rs,shb)
105: #define Olsh_RsRd(sh) Mov(Rd,Rs); Shl(Rs,sh)
106: #define Olsh_RtRt(sh) Shl(Rt,sh)
107: #define Olsha_RtRt Shl(Rt,sha)
108: #define Olsha_RtRu /* Ru not needed */
109: #define Olshb_RtRu /* Ru not needed */
110: #define Orsha_RsRt Mov(Rt,Rs); Shr(Rs,sha)
111: #define Orshb_RsRt Mov(Rt,Rs); Shr(Rs,shb)
112: #define Orsha_RtRu /* Ru not needed */
113: #define Orshb_RtRu /* Ru not needed */
114: #define Oorlsha_RsRt Mov(Rt,RX); Shl(RX,sha); Or(RX,Rs)
115: #define Oorlshb_RsRt Mov(Rt,RX); Shl(RX,shb); Or(RX,Rs)
116: #define Oorlsh_RsRd(sh) Mov(Rd,RX); Shl(RX,sh); Or(RX,Rs)
117: #define Oorrsha_RsRt Mov(Rt,RX); Shr(RX,sha); Or(RX,Rs)
118: #define Oorrshb_RsRt Mov(Rt,RX); Shr(RX,shb); Or(RX,Rs)
119: #define Oorrsha_RtRu /* Ru not needed */
120: #define Oorrshb_RtRu /* Ru not needed */
121: #define Oor_RsRd Or(Rd,Rs)
122: #define Add_As(v) Addd(v,As)
123: #define Add_Ad(v) Addd(v,Ad)
124: #define Initsd(s,d) Movd(s,As); Movd(d,Ad)
125: #define Ilabel(c) Movd(c,Ri)
126: #define Olabel(c) Pushd(c)
127: #define Iloop(lp) tmp = (lp)-(p+2); \
128: if(tmp > -128) { \
129: Loop(tmp); \
130: } else { \
131: Loop(2); \
132: Jmp8(5); \
133: tmp = (lp) - (p+5); \
134: Jmp(tmp); \
135: }
136: #define Oloop(lp) Decsp; \
137: tmp = (lp)-(p+2); \
138: if(tmp > -128) { \
139: Jg8(tmp); \
140: } else { \
141: Jle8(5); \
142: tmp = (lp) - (p+5); \
143: Jmp(tmp); \
144: } \
145: Addsp(4)
146: #define Orts *p++ = 0xC3
147: /* load */
148: #define Load_Rs_P Loadb(As,Rs); Inc(As)
149: #define Load_Rt_P Loadb(As,Rt); Inc(As)
150: #define Loadzx_Rt_P Loadbzx(As,Rt); Inc(As)
151: #define Loador_Rt_P Loadb(As,Rt); Inc(As)
152: #define Load_Ru_P /* Ru not needed */
153: #define Load_Rd_D(f) Dec(As); Loadb(As,Rd)
154: #define Load_Rs_D(f) Dec(As); Loadb(As,Rs)
155: #define Load_Rt_D(f) Dec(As); Loadb(As,Rt)
156: #define Loadzx_Rt_D(f) Dec(As); Loadbzx(As,Rt)
157: #define Load_Rd(f) Loadb(As,Rd)
158: #define Load_Rs(f) Loadb(As,Rs)
159: #define Load_Rt(f) Loadb(As,Rt)
160: #define Loadzx_Rt(f) Loadbzx(As,Rt)
161: /* fetch */
162: #define Fetch_Rd_P(f) Loadb(Ad,Rd); Inc(Ad)
163: #define Fetch_Rd_D(f) Dec(Ad); Loadb(Ad,Rd)
164: #define Fetch_Rd(f) Loadb(Ad,Rd)
165: /* store */
166: #define Store_Rs_P Storeb(Rs,Ad); Inc(Ad)
167: #define Store_Rs_D Dec(Ad); Storeb(Rs,Ad)
168: #define Store_Rs Storeb(Rs,Ad)
169:
170: #define Inittab(t,s)
171: #define Initsh(a,b)
172:
173: /* emit code to look up n bits of Rt at offset o, answer byte in Rd */
174: /* l is always 0 when WBITS==8 */
175: #define Table_RdRt(o,n,l) \
176: Mov(Rt,Rd); \
177: tmp = 32-((o)+(n)); \
178: if(tmp > 0) { \
179: Shr(Rd,tmp); \
180: } \
181: Andd(((1<<(n))-1),Rd); \
182: Imovzx(tab,Rd,Rd)
183:
184: #define Table_RsRt(o,n,l) \
185: Mov(Rt,Rs); \
186: tmp = 32-((o)+(n)); \
187: if(tmp > 0) { \
188: Shr(Rs,tmp); \
189: } \
190: Andd(((1<<(n))-1),Rs); \
191: Imovzx(tab,Rs,Rs)
192:
193: /* emit code to assemble low n bits of Rd into offset o in low byte in Rs */
194: #define Assemble(o,n) \
195: if((o) == 0) { \
196: Olsh_RsRd(8-(n)); \
197: } else if((o) == 8-(n)) { \
198: Or(Rd,Rs); \
199: } else { \
200: Oorlsh_RsRd(8-((o)+(n))); \
201: }
202:
203: #define Assemblex(o,n) \
204: if((o) == 0) { \
205: Mov(Rs,Rd); \
206: } else { \
207: Shl(Rs,n); \
208: Or(Rd,Rs); \
209: }
210:
211: #define Nop
212:
213: #ifdef TEST
214: #define Extrainit \
215: Movd(lrand(),Rs); \
216: Movd(lrand(),Rd); \
217: Movd(lrand(),Rt); \
218: Movd(lrand(),Ri); \
219: Movd(lrand(),RX)
220: #else
221: #define Extrainit
222: #endif
223:
224: #define Execandfree(memstart,onstack) \
225: (*(void (*)(void))memstart)(); \
226: if(!onstack) \
227: bbfree(memstart, (p-memstart) * sizeof(Type));
228:
229: /* emit code seq at fi (at most 3 shorts) */
230: #define Emitop \
231: *(long *)p = *(long *)fi; \
232: *(short *)(p+4) = *(short *)(((char *)fi)+4); \
233: p = (Type*)(((char *)p)+fin)
234:
235: typedef struct Fstr
236: {
237: short fetchs;
238: short fetchd;
239: int n;
240: short instr[4];
241: } Fstr;
242:
243: Fstr fstr[16] =
244: {
245: [0] 0,0,2, /* Zero */
246: { XOR(Rs,Rs), 0, 0, 0 },
247:
248: [1] 1,1,4, /* DnorS */
249: { OR(Rd,Rs), NOT(Rs), 0, 0},
250:
251: [2] 1,1,4, /* DandnotS */
252: { NOT(Rs), AND(Rd,Rs), 0, 0},
253:
254: [3] 1,0,2, /* notS */
255: { NOT(Rs), 0, 0, 0},
256:
257: [4] 1,1,6, /* notDandS */
258: { NOT(Rs), OR(Rd,Rs), NOT(Rs), 0},
259:
260: [5] 0,1,4, /* notD */
261: { MOV(Rd,Rs), NOT(Rs), 0, 0},
262:
263: [6] 1,1,2, /* DxorS */
264: { XOR(Rd,Rs), 0, 0, 0},
265:
266: [7] 1,1,4, /* DnandS */
267: { AND(Rd,Rs), NOT(Rs), 0, 0},
268:
269: [8] 1,1,2, /* DandS */
270: { AND(Rd,Rs), 0, 0, 0},
271:
272: [9] 1,1,4, /* DxnorS */
273: { XOR(Rd,Rs), NOT(Rs), 0, 0},
274:
275: [10] 0,1,2, /* D */
276: { MOV(Rd,Rs), 0, 0, 0},
277:
278: [11] 1,1,4, /* DornotS */
279: { NOT(Rs), OR(Rd,Rs), 0, 0},
280:
281: [12] 1,0,0, /* S */
282: { 0, 0, 0, 0},
283:
284: [13] 1,1,6, /* notDorS */
285: { NOT(Rs), AND(Rd,Rs), NOT(Rs), 0},
286:
287: [14] 1,1,2, /* DorS */
288: { OR(Rd,Rs), 0, 0, 0},
289:
290: [15] 0,0,4, /* F */
291: { XOR(Rs,Rs), NOT(Rs), 0, 0},
292: };
293:
294: #include "tabs.h"
295:
296: static uchar *tabs[4][4] =
297: {
298: { 0, (uchar*)tab01b, 0, (uchar*)tab03b},
299: {(uchar*)tab10b, 0, 0, 0},
300: { 0, 0, 0, 0},
301: {(uchar*)tab30b, 0, 0, 0},
302: };
303:
304: static uchar tabosiz[4][4] = /* size in bytes of entries */
305: {
306: { 0, 1, 0, 1},
307: { 1, 0, 0, 0},
308: { 0, 0, 0, 0},
309: { 1, 0, 0, 0},
310: };
311:
312: enum {
313: Progmax = 1000, /* max number of bytes in a bitblt prog */
314: Progmaxnoconv = 168, /* max number when not converting */
315: };
316:
317: #ifdef TEST
318: void
319: prprog(void)
320: {
321: abort(); /* use db */
322: }
323:
324: void
325: bbexec(void (*memstart)(void), int len, int onstack)
326: {
327: memstart();
328: if(!onstack)
329: bbfree(memstart, len);
330: }
331:
332: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.