|
|
1.1 root 1: /* BITBLT() Moves bits around on the screen, and does a LOT of it
2: *
3: * WARNING
4: * WARNING: to any future modifier of this code. This is highly
5: * hand optimized code, it pushes the frame pointer and
6: * references locals off the stack pointer,
7: * and it even simulates register
8: * allocation. Be very careful if you edit this.
9: */
10:
11: #include <jerq.h>
12: /* alignment codes for narrow rectangles */
13: #define S_STRADDLE 0x4 /* source straddles a word boundary */
14: #define D_STRADDLE 0x8 /* dest straddles a word boundary */
15: #define LEFTDIR 8
16: #define NOSHIFT 4
17: #define DAMMIT 4 /* you'll see why */
18: #undef sw
19: #define DX1 m
20:
1.1.1.2 ! root 21: #undef bitblt
1.1 root 22: bitblt(sm,r,dm,p,fc)
1.1.1.2 ! root 23: #define bitblt Sbitblt
1.1 root 24: Bitmap *sm,*dm;
25: Rectangle r;
26: Point p;
27: int fc;
28: {
29: register Word *source,*dest; /* %r8-%r7 */
30: register Word sw, dw; /* %r6-%r5 */
31: register UWord m; /* %r4 */
32: register int i; /* %r3 */
33:
34: int a,b,j,h,w,dx1,px31,rx31;
35: unsigned int mask1,mask2,mask3,mask4,ntmask1,ntmask2;
36: int dummy1, dummy2; /* for future use, CANNOT CHANGE */
37: /* NUMBER OF LOCALS DUE TO %fp */
38: /* TRICKS */
1.1.1.2 ! root 39:
! 40:
1.1 root 41: /* clip to the source Bitmap */
1.1.1.2 ! root 42: if(r.origin.x < sm->rect.origin.x){
! 43: p.x+=sm->rect.origin.x-r.origin.x;
1.1 root 44: r.origin.x=sm->rect.origin.x;
1.1.1.2 ! root 45: }
1.1 root 46: if(r.corner.x > sm->rect.corner.x)
47: r.corner.x=sm->rect.corner.x;
1.1.1.2 ! root 48: if(r.origin.y < sm->rect.origin.y){
! 49: p.y+=sm->rect.origin.y-r.origin.y;
1.1 root 50: r.origin.y=sm->rect.origin.y;
1.1.1.2 ! root 51: }
1.1 root 52: if(r.corner.y > sm->rect.corner.y)
53: r.corner.y=sm->rect.corner.y;
54: /*
55: * If the sm->rect and r are disjoint, r is now degenerate,
56: * due to the clipping, and the next clipping code will reject it.
57: * This is safe because we will only increase origin or decrease corner.
58: */
59: /* clip to the destination Bitmap */
60: if(p.x < dm->rect.origin.x){
61: r.origin.x+=dm->rect.origin.x-p.x;
62: p.x=dm->rect.origin.x;
63: }
64: if(p.y < dm->rect.origin.y){
65: r.origin.y+=dm->rect.origin.y-p.y;
66: p.y=dm->rect.origin.y;
67: }
68: if(r.corner.x-r.origin.x > dm->rect.corner.x-p.x)
69: r.corner.x=r.origin.x+(dm->rect.corner.x-p.x);
70: if(r.corner.y-r.origin.y > dm->rect.corner.y-p.y)
71: r.corner.y=r.origin.y+(dm->rect.corner.y-p.y);
72: i = r.corner.y - r.origin.y; /* going to be h */
73: dw = r.corner.x - r.origin.x - 1; /* going to be dx1 */
74: if (i <= 0 || dw < 0)
75: return;
76: if (dw < 32)
77: goto narrow;
78: DX1 = dw;
79: h = i;
80: ntmask1 = topbits[p.x & 0x1f];
81: mask1 = ~ntmask1;
82: mask2 = topbits[((p.x+DX1) & 31) + 1];
83: ntmask2 = ~mask2;
1.1.1.2 ! root 84: /*
! 85: the following code is wrong.
! 86: the moral is, do it right
! 87: */
! 88: /*w = ((p.x+DX1) >> 5) - (p.x >> 5); /* inner loop+1: sub 1 later*/
! 89: w = (((p.x+DX1)&~31)/32) - ((p.x&~31)/32); /* inner loop+1: sub 1 later*/
1.1 root 90: sw = ((sm->width - w) << 2); /* sleazy hack to avoid shift */
91: dw = ((dm->width - w) << 2); /* in outer, inner loops */
92: if (sm == dm) { /* may have to mess with loop order */
93: if (r.origin.y < p.y) { /* swap top with bottom */
94: r.origin.y += i-1;
95: p.y += i-1;
96: if (r.origin.x < p.x) { /* swap left with right */
97: fc |= LEFTDIR;
98: r.origin.x = r.origin.x + DX1;
99: p.x = p.x + DX1;
100: sw = -sw;
101: dw = -dw;
102: }
103: else
104: {
105: sw -= (sm->width << 3); /* -(w+n) == (w-n) - 2*w */
106: dw -= (dm->width << 3) ;
107: }
108: }
109: else
110: {
111: if (r.origin.x < p.x) { /* swap left with right */
112: fc |= LEFTDIR;
113: r.origin.x = r.origin.x + DX1;
114: p.x = p.x + DX1;
115: sw = (sm->width + w) << 2;
116: dw = (dm->width + w) << 2;
117: }
118: }
119: }
120: w--; /* subtract the 1 like we promised */
121: px31 = p.x & 0x1f; /* commonly used expression */
122: rx31 = r.origin.x & 0x1f; /* commonly used expression */
123: dest = addr(dm,p);
124: source = addr(sm,r.origin);
125: a = px31 - rx31;
126: if(a == 0)
127: fc |= NOSHIFT;
128: else if (a < 0)
129: a += 32;
130: /* a == 0 means no shift, remember that */
131: b = 32 - a;
132: switch (fc) {
133:
134: case F_OR | NOSHIFT | LEFTDIR:
135: b = w>>2;
136: w = w&3;
137: m = h; /* m is free => use it */
138: do {
139: *dest-- |= mask2 & *source--;
140: if ((i = b) > 0) do {
141: *dest |= *source;
142: *(dest-1) |= *(source-1);
143: *(dest-2) |= *(source-2);
144: *(dest-3) |= *(source-3);
145: dest -= 4;
146: source -= 4;
147: } while (--i > 0);
148: if ((i = w) > 0) do {
149: *dest-- |= *source--;
150: } while (--i > 0);
151: *dest |= mask1 & *source;
152: asm(" ADDW2 %r6,%r8"); /*source += sw; */
153: asm(" ADDW2 %r5,%r7"); /*dest += dw; */
154: } while (--m != 0);
155: break;
156: case F_OR | LEFTDIR:
157: if ((px31) < (rx31))
158: source++; /* adjust for pipeline */
159: do {
160: m = *source--; /* m is a free register */
161: *dest-- |= (((m >> a) | (*source << b)) & mask2);
162: if ((i=w) > 0) do {
163: m = (*source--) >> a;
164: *dest-- |= m | (*source << b);
165: } while (--i > 0);
166: m = *source; /* m is a free register */
167: *dest |= (((m >> a) | (*(source-1) << b)) & mask1);
168: asm(" addw2 %r6,%r8"); /*source += sw; */
169: asm(" addw2 %r5,%r7"); /*dest += dw; */
170: } while (--h > 0);
171: break;
172: case F_OR | NOSHIFT:
173: b = w>>2;
174: w = w&3;
175: m = h; /* m is free => use it */
176: do {
177: *dest++ |= (mask1 & *source++);
178: if ((i = b) > 0) do {
179: *dest |= *source;
180: *(dest+1) |= *(source+1);
181: *(dest+2) |= *(source+2);
182: *(dest+3) |= *(source+3);
183: dest += 4;
184: source += 4;
185: } while (--i > 0);
186: if ((i = w) > 0) do {
187: *dest++ |= *source++;
188: } while (--i > 0);
189: *dest |= (mask2 & *source);
190: asm(" addw2 %r6,%r8"); /*source += sw; */
191: asm(" addw2 %r5,%r7"); /*dest += dw; */
192: } while (--m != 0);
193: break;
194: case F_OR:
195: if ((px31) > (rx31))
196: source--; /* adjust for pipeline */
197: do {
198: m = *source++; /* m is a free register */
199: *dest++ |= (((m << b) | (*source >> a)) & mask1);
200: if ((i=w) > 0) do {
201: m = (*source++) << b;
202: *dest++ |= m | (*source >> a);
203: } while (--i > 0);
204: m = *source; /* m is a free register */
205: *dest |= (((m << b) | (*(source+1) >> a)) & mask2);
206: asm(" addw2 %r6,%r8"); /*source += sw; */
207: asm(" addw2 %r5,%r7"); /*dest += dw; */
208: } while (--h > 0);
209: break;
210: case F_CLR | NOSHIFT | LEFTDIR:
211: b = w>>2;
212: w = w&3;
213: m = h; /* m is free => use it */
214: do {
215: *dest-- &= ~(mask2 & *source--);
216: if ((i = b) > 0) do {
217: *dest &= ~(*source);
218: *(dest-1) &= ~(*(source-1));
219: *(dest-2) &= ~(*(source-2));
220: *(dest-3) &= ~(*(source-3));
221: dest -= 4;
222: source -= 4;
223: } while (--i > 0);
224: if ((i = w) > 0) do {
225: *dest-- &= ~(*source--);
226: } while (--i > 0);
227: *dest &= ~(mask1 & *source);
228: asm(" addw2 %r6,%r8"); /*source += sw; */
229: asm(" addw2 %r5,%r7"); /*dest += dw; */
230: } while (--m != 0);
231: break;
232: case F_CLR | LEFTDIR:
233: if ((px31) < (rx31))
234: source++; /* adjust for pipeline */
235: do {
236: m = *source--; /* m is a free register */
237: *dest-- &= ~((((m >> a) | (*source << b)) & mask2));
238: if ((i=w) > 0) do {
239: m = (*source--) >> a;
240: *dest-- &= ~(m | (*source << b));
241: } while (--i > 0);
242: m = *source; /* m is a free register */
243: *dest &= ~((((m >> a) | (*(source-1) << b)) & mask1));
244: asm(" addw2 %r6,%r8"); /*source += sw; */
245: asm(" addw2 %r5,%r7"); /*dest += dw; */
246: } while (--h > 0);
247: break;
248: case F_CLR | NOSHIFT:
249: b = w>>2;
250: w = w&3;
251: m = h; /* m is free => use it */
252: do {
253: *dest++ &= ~((mask1 & *source++));
254: if ((i = b) > 0) do {
255: *dest &= ~(*source);
256: *(dest+1) &= ~(*(source+1));
257: *(dest+2) &= ~(*(source+2));
258: *(dest+3) &= ~(*(source+3));
259: dest += 4;
260: source += 4;
261: } while (--i > 0);
262: if ((i = w) > 0) do {
263: *dest++ &= ~(*source++);
264: } while (--i > 0);
265: *dest &= ~((mask2 & *source));
266: asm(" addw2 %r6,%r8"); /*source += sw; */
267: asm(" addw2 %r5,%r7"); /*dest += dw; */
268: } while (--m != 0);
269: break;
270: case F_CLR:
271: if ((px31) > (rx31))
272: source--; /* adjust for pipeline */
273: do {
274: m = *source++; /* m is a free register */
275: *dest++ &= ~((((m << b) | (*source >> a)) & mask1));
276: if ((i=w) > 0) do {
277: m = (*source++) << b;
278: *dest++ &= ~(m | (*source >> a));
279: } while (--i > 0);
280: m = *source; /* m is a free register */
281: *dest &= ~((((m << b) | (*(source+1) >> a)) & mask2));
282: asm(" addw2 %r6,%r8"); /*source += sw; */
283: asm(" addw2 %r5,%r7"); /*dest += dw; */
284: } while (--h > 0);
285: break;
286: case F_XOR | NOSHIFT | LEFTDIR:
287: b = w>>2;
288: w = w&3;
289: m = h; /* m is free => use it */
290: do {
291: *dest-- ^= mask2 & *source--;
292: if ((i = b) > 0) do {
293: *dest ^= *source;
294: *(dest-1) ^= *(source-1);
295: *(dest-2) ^= *(source-2);
296: *(dest-3) ^= *(source-3);
297: dest -= 4;
298: source -= 4;
299: } while (--i > 0);
300: if ((i = w) > 0) do {
301: *dest-- ^= *source--;
302: } while (--i > 0);
303: *dest ^= mask1 & *source;
304: asm(" addw2 %r6,%r8"); /*source += sw; */
305: asm(" addw2 %r5,%r7"); /*dest += dw; */
306: } while (--m != 0);
307: break;
308: case F_XOR | LEFTDIR:
309: if ((px31) < (rx31))
310: source++; /* adjust for pipeline */
311: do {
312: m = *source--; /* m is a free register */
313: *dest-- ^= (((m >> a) | (*source << b)) & mask2);
314: if ((i=w) > 0) do {
315: m = (*source--) >> a;
316: *dest-- ^= m | (*source << b);
317: } while (--i > 0);
318: m = *source; /* m is a free register */
319: *dest ^= (((m >> a) | (*(source-1) << b)) & mask1);
320: asm(" addw2 %r6,%r8"); /*source += sw; */
321: asm(" addw2 %r5,%r7"); /*dest += dw; */
322: } while (--h > 0);
323: break;
324: case F_XOR | NOSHIFT:
325: b = w>>2;
326: w = w&3;
327: m = h; /* m is free => use it */
328: do {
329: *dest++ ^= (mask1 & *source++);
330: if ((i = b) > 0) do {
331: *dest ^= *source;
332: *(dest+1) ^= *(source+1);
333: *(dest+2) ^= *(source+2);
334: *(dest+3) ^= *(source+3);
335: dest += 4;
336: source += 4;
337: } while (--i > 0);
338: if ((i = w) > 0) do {
339: *dest++ ^= *source++;
340: } while (--i > 0);
341: *dest ^= (mask2 & *source);
342: asm(" addw2 %r6,%r8"); /*source += sw; */
343: asm(" addw2 %r5,%r7"); /*dest += dw; */
344: } while (--m != 0);
345: break;
346: case F_XOR:
347: if ((px31) > (rx31))
348: source--; /* adjust for pipeline */
349: asm(" PUSHW %ap");
350: m = topbits[a];
351: asm(" MCOMW %r4, %r1");
352: asm(" MOVW %r4, %r2");
353: asm(" MOVW 0(%fp),%ap");
354: asm(" PUSHW %fp");
355: asm("BW_XORLOOP:");
356: asm(" ROTW %ap, 0(%r8), %r4");
357: asm(" ANDW2 %r2,%r4");
358: asm(" ADDW2 &4,%r8");
359: asm(" ROTW %ap,0(%r8),%r0");
360: asm(" ANDW3 %r0,%r1,%fp");
361: asm(" ORW2 %r4,%fp");
362: asm(" ANDW2 -0x28(%sp),%fp");
363: asm(" XORW2 %fp,0(%r7)");
364: asm(" ADDW2 &4,%r7");
365: /* if (i = w) */
366: asm(" MOVW -0x38(%sp),%r3");
367: asm(" BEB BW_XORINNER");
368: do{
369: asm(" ANDW3 %r2,%r0,%r4");
370: asm(" ADDW2 &4,%r8");
371: asm(" ROTW %ap, 0(%r8),%r0");
372: asm(" ANDW3 %r0,%r1,%fp");
373: asm(" ORW2 %r4,%fp");
374: asm(" XORW2 %fp,0(%r7)");
375: asm(" ADDW2 &4, %r7");
376: } while (--i > 0);
377: asm("BW_XORINNER:");
378: asm(" ROTW %ap, 0(%r8),%r4");
379: asm(" ANDW2 %r2,%r4");
380: asm(" LRSW3 %ap, 4(%r8),%r0");
381: asm(" ORW2 %r4,%r0");
382: asm(" ANDW2 -0x24(%sp),%r0");
383: asm(" XORW2 %r0,0(%r7)");
384: asm(" ADDW2 %r6,%r8"); /*source += sw; */
385: asm(" ADDW2 %r5,%r7"); /*dest += dw; */
386: /* } while (--h > 0); */
387: asm(" DECW -0x3c(%sp)");
388: asm(" BGB BW_XORLOOP");
389:
390: asm(" POPW %fp");
391: asm(" POPW %ap");
392:
393: /* above is similar to:
394: /* do {
395: /* m = *source++ << b; /* m is a free register */
396: /* *dest++ ^= ((m & LMASK) | ((*source >> a)& RMASK) & mask1);
397: /* if ((i=w) > 0) do {
398: /* m = ((*source++) << b) & LMASK;
399: /* *dest++ ^= m | ((*source >> a)&RMASK);
400: /* } while (--i > 0);
401: /* m = *source; /* m is a free register */
402: /* *dest ^= (((m << b) | (*(source+1) >> a)) & mask2);
403: /* asm(" addw2 %r6,%r8"); /*source += sw; */
404: /* asm(" addw2 %r5,%r7"); /*dest += dw; */
405: /* } while (--h > 0); */
406: break;
407: case F_STORE | NOSHIFT | LEFTDIR:
408: b = w>>2;
409: w = w&3;
410: m = h; /* m is free => use it */
411: do {
412: *dest = (ntmask2 & *dest) | (mask2 & *source--);
413: --dest;
414: if ((i = b) > 0) do {
415: *dest = *source;
416: *(dest-1) = *(source-1);
417: *(dest-2) = *(source-2);
418: *(dest-3) = *(source-3);
419: dest -= 4;
420: source -= 4;
421: } while (--i > 0);
422: if ((i = w) > 0) do {
423: *dest-- = *source--;
424: } while (--i > 0);
425: *dest = (ntmask1 & *dest) | (mask1 & *source);
426: asm(" addw2 %r6,%r8"); /*source += sw; */
427: asm(" addw2 %r5,%r7"); /*dest += dw; */
428: } while (--m != 0);
429: break;
430: case F_STORE | LEFTDIR:
431: if ((px31) < (rx31))
432: source++; /* adjust for pipeline */
433: do {
434: m = *source--; /* m is a free register */
435: *dest = (((m >> a) | (*source << b)) & mask2) |
436: (*dest & ntmask2);
437: --dest;
438: if ((i=w) > 0) do {
439: m = (*source--) >> a;
440: *dest-- = m | (*source << b);
441: } while (--i > 0);
442: m = *source; /* m is a free register */
443: *dest = (((m >> a) | (*(source-1) << b)) & mask1) |
444: (*dest & ntmask1);
445: asm(" addw2 %r6,%r8"); /*source += sw; */
446: asm(" addw2 %r5,%r7"); /*dest += dw; */
447: } while (--h > 0);
448: break;
449: case F_STORE | NOSHIFT:
450: widestore:
451: b = w >> 2;
452: w = w & 3;
453: m = h;
454: do {
455: *dest = (ntmask1 & *dest) | (mask1 & *source++);
456: dest++;
457: if ((i = b) > 0) do {
458: *dest = *source;
459: *(dest+1) = *(source+1);
460: *(dest+2) = *(source+2);
461: *(dest+3) = *(source+3);
462: dest += 4;
463: source += 4;
464: } while (--i > 0);
465: if ((i = w) > 0) do {
466: *dest++ = *source++;
467: } while (--i > 0);
468: *dest = (ntmask2 & *dest) | (mask2 & *source);
469: asm(" addw2 %r6,%r8");
470: asm(" addw2 %r5,%r7");
471: } while (--m != 0);
472: break;
473: case F_STORE:
474: if ((px31) > (rx31))
475: source--; /* adjust for pipeline */
476: do {
477: m = *source++; /* m is a free register */
478: *dest = (((m << b) | (*source >> a)) & mask1) |
479: (*dest & ntmask1);
480: dest++;
481: if ((i=w) > 0) do {
482: m = (*source++) << b;
483: *dest++ = m | (*source >> a);
484: } while (--i > 0);
485: m = *source; /* m is a free register */
486: *dest = (((m << b) | (*(source+1) >> a)) & mask2) |
487: (*dest & ntmask2);
488: asm(" addw2 %r6,%r8"); /*source += sw; */
489: asm(" addw2 %r5,%r7"); /*dest += dw; */
490: } while (--h > 0);
491: break;
492: }
493:
494: return;
495: narrow:
496: /*
497: * width is 32 bits or less. There are four basic cases
498: * (in addition to the function code), which depend on whether
499: * the source and dest straddle word boundaries or not
500: */
501:
502: m = p.x & 31; /* commonly used expression */
503: sw = r.origin.x & 31; /* commonly used expression */
504: if (sw + dw > 31) /* if source is NOT aligned */
505: {
506: fc |= S_STRADDLE;
507: mask1 = ONES >> sw;
508: mask2 = topbits[((sw + dw) & 31) + 1];
509: }
510:
511: if (m + dw > 31) /* if dest is NOT aligned */
512: {
513: fc |= D_STRADDLE;
514: mask3 = ONES >> m;
515: mask4 = topbits[((m + dw) & 31) + 1];
516: }
517: px31 = m;
518: m = m - sw;
519: a = dw;
520:
521: if ((sm == dm) && (r.origin.y < p.y))
522: { /* may have to mess with loop order */
523: r.origin.y += i-1;
524: p.y += i-1;
525: sw = -(sm->width << 2); /* sleazy hack to avoid shift */
526: dw = -(dm->width << 2); /* in outer, inner loops */
527: }
528: else
529: {
530: sw = sm->width << 2;
531: dw = dm->width << 2;
532: }
533:
534: source = addr(sm,r.origin);
535: dest = addr(dm,p);
536:
537: switch(fc)
538: {
539: case F_STORE:
540: mask1 = topbits[a+1] >> (px31);
541: asm(" MOVW 0x20(%fp),%r1");
542: do {
543: asm(" ROTW %r4,0(%r8),%r2");
544: asm(" XORW2 0(%r7),%r2");
545: asm(" ANDW2 %r1,%r2");
546: asm(" XORW2 %r2,0(%r7)");
547: asm(" ADDW2 %r6, %r8");
548: asm(" ADDW2 %r5, %r7");
549: } while (--i > 0);
550: break;
551: case F_STORE | S_STRADDLE:
552: mask4 = 32 - m; /* REALLY the other shift count */
553: mask3 = topbits[a+1] >> px31;
554: asm(" PUSHW %ap"); /* store ap on stack */
555: asm(" MOVW 0x28(%fp),%r0"); /* put mask3 in r0 */
556: asm(" MOVW 0x2c(%fp),%ap"); /* put other shift in ap */
557:
558: do {
559: asm(" LLSW3 %ap,0(%r8),%r1"); /* 32-m,lft shft */
560: asm(" LRSW3 %r4,4(%r8),%r2"); /* m, right shift */
561: asm(" ORW2 %r2, %r1");
562: asm(" XORW2 0(%r7),%r1");
563: asm(" ANDW2 %r0,%r1");
564: asm(" XORW2 %r1,0(%r7)");
565: asm(" ADDW2 %r6, %r8");
566: asm(" ADDW2 %r5, %r7");
567: } while (--i > 0);
568: asm(" POPW %ap"); /* restore ap from stack */
569: break;
570: case F_STORE | D_STRADDLE:
571: asm(" PUSHW %ap"); /* store ap on stack */
572: asm(" MOVW 0x28(%fp),%r0"); /* put mask3 in r0 */
573: asm(" MOVW 0x2c(%fp),%ap"); /* put mask4 in ap */
574: do {
575: asm(" ROTW %r4,0(%r8),%r1");
576: asm(" XORW3 0(%r7),%r1,%r2");
577: asm(" ANDW2 %r0,%r2");
578: asm(" XORW2 %r2,0(%r7)");
579: asm(" XORW2 4(%r7),%r1");
580: asm(" ANDW2 %ap,%r1");
581: asm(" XORW2 %r1,4(%r7)");
582: asm(" ADDW2 %r6, %r8");
583: asm(" ADDW2 %r5, %r7");
584: } while (--i > 0);
585: asm(" POPW %ap"); /* restore ap from stack */
586: break;
587: case F_STORE | S_STRADDLE | D_STRADDLE:
588: asm(" PUSHW %ap"); /* store ap on stack */
589: asm(" SUBW3 %r4,&0x20,%ap"); /* right shift distance */
590: asm(" MOVW 0x20(%fp),%r0"); /* r0 <- mask1 */
591: asm(" MOVW 0x24(%fp),%r2"); /* r2 <- mask2 */
592: asm(" PUSHW %fp"); /* store fp on stack */
593: do {
594: asm(" ANDW3 %r0, 0(%r8), %r1");
595: asm(" ANDW3 %r2, 4(%r8),%ap");
596: asm(" ORW2 %ap, %r1");
597: asm(" ROTW %r4, %r1, %r1");
598: asm(" XORW3 0(%r7), %r1, %ap");
599: asm(" ANDW2 0x28(%fp), %ap");
600: asm(" XORW2 %ap, 0(%r7)");
601: asm(" XORW2 4(%r7), %r1");
602: asm(" ANDW2 0x2c(%fp), %r1");
603: asm(" XORW2 %r1, 4(%r7)");
604: asm(" ADDW2 %r6, %r8");
605: asm(" ADDW2 %r5, %r7");
606: } while (--i > 0);
607: asm(" POPW %fp"); /* restore fp from stack */
608: asm(" POPW %ap"); /* restore ap from stack */
609: break;
610: case F_OR:
611: mask1 = topbits[a+1] >> px31;
612: asm(" MOVW 0x20(%fp),%r1"); /* mask1 */
613: do {
614: asm(" ROTW %r4,0(%r8),%r2");
615: asm(" ANDW2 %r1,%r2");
616: asm(" ORW2 %r2,0(%r7)");
617: asm(" ADDW2 %r6, %r8");
618: asm(" ADDW2 %r5, %r7");
619: } while (--i > 0);
620: break;
621: case F_OR | S_STRADDLE:
622: asm(" MOVW 0x20(%fp),%r0"); /* store mask1 in a reg */
623: asm(" PUSHW %ap"); /* store ap on stack */
624: asm(" MOVW 0x24(%fp),%ap"); /* store mask1 in a reg */
625:
626: do {
627: asm(" ANDW3 %r0,0(%r8),%r2"); /* x20(fp)=mask1 */
628: asm(" ANDW3 %ap,4(%r8),%r1"); /* x24(fp)=mask2 */
629: asm(" ORW2 %r2,%r1");
630: asm(" ROTW %r4,%r1,%r1");
631: asm(" ORW2 %r1,0(%r7)");
632: asm(" ADDW2 %r6, %r8");
633: asm(" ADDW2 %r5, %r7");
634: } while (--i > 0);
635: asm(" POPW %ap"); /* restore ap from stack */
636: break;
637: case F_OR | D_STRADDLE:
638: if (a <= 16) /* very narrow, 17 bits max */
639: {
640: asm(" MOVW &0xffff0000,%r2");
641: asm(" ORW3 0x28(%fp),0x2c(%fp),%r0"); /* compute mask */
642: do {
643: asm(" ROTW %r4,0(%r8),%r1");
644: asm(" ANDW2 %r0,%r1"); /* mask */
645: asm(" ORH2 %r1,2(%r7)");
646: asm(" ANDW2 %r2, %r1");
647: asm(" ORW2 %r1, 4(%r7)");
648: asm(" ADDW2 %r6, %r8");
649: asm(" ADDW2 %r5, %r7");
650: } while (--i > 0);
651: }
652: else
653: {
654: asm(" MOVW 0x28(%fp),%r0"); /* store mask3 in a reg */
655: asm(" PUSHW %ap"); /* store ap on stack */
656: asm(" MOVW 0x2c(%fp),%ap"); /* store mask4 in a reg */
657: do {
658: asm(" ROTW %r4,0(%r8),%r1");
659: asm(" ANDW3 %r0,%r1,%r2");
660: asm(" ORW2 %r2,0(%r7)");
661: asm(" ANDW2 %ap,%r1");
662: asm(" ORW2 %r1,4(%r7)");
663: asm(" ADDW2 %r6, %r8");
664: asm(" ADDW2 %r5, %r7");
665: } while (--i > 0);
666: asm(" POPW %ap"); /* restore ap from stack */
667: }
668: break;
669: case F_OR | S_STRADDLE | D_STRADDLE:
670: asm(" MOVW 0x20(%fp),%r0"); /* store mask1 in a reg */
671: asm(" PUSHW %ap"); /* store ap on stack */
672: asm(" MOVW 0x24(%fp),%ap"); /* store mask2 in a reg */
673: if (a > 16){ /* not super narrow */
674: do {
675: asm(" ANDW3 %r0,0(%r8),%r1");
676: asm(" ANDW3 %ap,4(%r8),%r2");
677: asm(" ORW2 %r2,%r1");
678: asm(" ROTW %r4,%r1,%r1");
679: asm(" ANDW3 0x28(%fp),%r1,%r2");
680: asm(" ORW2 %r2,0(%r7)");
681: asm(" ANDW2 0x2c(%fp),%r1");
682: asm(" ORW2 %r1,4(%r7)");
683: asm(" ADDW2 %r6, %r8");
684: asm(" ADDW2 %r5, %r7");
685: } while (--i > 0);
686: }
687: else
688: {
689: do {
690: asm(" ANDW3 %r0,0(%r8),%r1");
691: asm(" ANDW3 %ap,4(%r8),%r2");
692: asm(" ORW2 %r2,%r1");
693: asm(" ROTW %r4,%r1,%r1");
694: asm(" ORH2 %r1,2(%r7)");
695: asm(" ANDW2 &0xffff0000,%r1");
696: asm(" ORW2 %r1,4(%r7)");
697: asm(" ADDW2 %r6, %r8");
698: asm(" ADDW2 %r5, %r7");
699: } while (--i > 0);
700: }
701: asm(" POPW %ap"); /* restore ap from stack */
702: break;
703: case F_CLR:
704: mask1 = topbits[a+1] >> px31;
705: asm(" MOVW 0x20(%fp),%r1"); /* mask1 */
706: do {
707: asm(" ROTW %r4,0(%r8),%r2");
708: asm(" ANDW2 %r1,%r2");
709: asm(" MCOMW %r2,%r2");
710: asm(" ANDW2 %r2,0(%r7)");
711: asm(" ADDW2 %r6, %r8");
712: asm(" ADDW2 %r5, %r7");
713: } while (--i > 0);
714: break;
715: case F_CLR | S_STRADDLE:
716: asm(" MOVW 0x20(%fp),%r0"); /* store mask1 in a reg */
717: asm(" PUSHW %ap"); /* store ap on stack */
718: asm(" MOVW 0x24(%fp),%ap"); /* store mask1 in a reg */
719:
720: do {
721: asm(" ANDW3 %r0,0(%r8),%r2"); /* x20(fp)=mask1 */
722: asm(" ANDW3 %ap,4(%r8),%r1"); /* x24(fp)=mask2 */
723: asm(" ORW2 %r2,%r1");
724: asm(" ROTW %r4,%r1,%r1");
725: asm(" MCOMW %r1,%r1");
726: asm(" ANDW2 %r1,0(%r7)");
727: asm(" ADDW2 %r6, %r8");
728: asm(" ADDW2 %r5, %r7");
729: } while (--i > 0);
730: asm(" POPW %ap"); /* restore ap from stack */
731: break;
732: case F_CLR | D_STRADDLE:
733: if (a <= 16) /* very narrow, 17 bits max */
734: {
735: asm(" MOVW &0xffff,%r2");
736: asm(" ORW3 0x28(%fp),0x2c(%fp),%r0"); /* compute mask */
737: do {
738: asm(" ROTW %r4,0(%r8),%r1");
739: asm(" ANDW2 %r0,%r1"); /* mask */
740: asm(" MCOMW %r1,%r1");
741: asm(" ANDH2 %r1,2(%r7)");
742: asm(" ORW2 %r2,%r1");
743: asm(" ANDW2 %r1,4(%r7)");
744: asm(" ADDW2 %r6, %r8");
745: asm(" ADDW2 %r5, %r7");
746: } while (--i > 0);
747: }
748: else
749: {
750: asm(" MOVW 0x28(%fp),%r0"); /* store mask3 in a reg */
751: asm(" PUSHW %ap"); /* store ap on stack */
752: asm(" MOVW 0x2c(%fp),%ap"); /* store mask4 in a reg */
753: do {
754: asm(" ROTW %r4,0(%r8),%r1");
755: asm(" ANDW3 %r0,%r1,%r2");
756: asm(" MCOMW %r2,%r2");
757: asm(" ANDW2 %r2,0(%r7)");
758: asm(" ANDW2 %ap,%r1");
759: asm(" MCOMW %r1,%r1");
760: asm(" ANDW2 %r1,4(%r7)");
761: asm(" ADDW2 %r6, %r8");
762: asm(" ADDW2 %r5, %r7");
763: } while (--i > 0);
764: asm(" POPW %ap"); /* restore ap from stack */
765: }
766: break;
767: case F_CLR | S_STRADDLE | D_STRADDLE:
768: asm(" MOVW 0x20(%fp),%r0"); /* store mask1 in a reg */
769: asm(" PUSHW %ap"); /* store ap on stack */
770: asm(" MOVW 0x24(%fp),%ap"); /* store mask2 in a reg */
771: if (a > 16){ /* not super narrow */
772: do {
773: asm(" ANDW3 %r0,0(%r8),%r1");
774: asm(" ANDW3 %ap,4(%r8),%r2");
775: asm(" ORW2 %r2,%r1");
776: asm(" ROTW %r4,%r1,%r1");
777: asm(" ANDW3 0x28(%fp),%r1,%r2");
778: asm(" MCOMW %r2,%r2");
779: asm(" ANDW2 %r2,0(%r7)");
780: asm(" ANDW2 0x2c(%fp),%r1");
781: asm(" MCOMW %r1,%r1");
782: asm(" ANDW2 %r1,4(%r7)");
783: asm(" ADDW2 %r6, %r8");
784: asm(" ADDW2 %r5, %r7");
785: } while (--i > 0);
786: }
787: else
788: {
789: do {
790: asm(" ANDW3 %r0,0(%r8),%r1");
791: asm(" ANDW3 %ap,4(%r8),%r2");
792: asm(" ORW2 %r2,%r1");
793: asm(" ROTW %r4,%r1,%r1");
794: asm(" MCOMW %r1,%r1");
795: asm(" ANDH2 %r1,2(%r7)");
796: asm(" ORW2 &0xffff,%r1");
797: asm(" ANDW2 %r1,4(%r7)");
798: asm(" ADDW2 %r6, %r8");
799: asm(" ADDW2 %r5, %r7");
800: } while (--i > 0);
801: }
802: asm(" POPW %ap"); /* restore ap from stack */
803: break;
804: case F_XOR:
805: mask1 = topbits[a+1] >> px31;
806: asm(" MOVW 0x20(%fp),%r1"); /* mask1 */
807: do {
808: asm(" ROTW %r4,0(%r8),%r2");
809: asm(" ANDW2 %r1,%r2");
810: asm(" XORW2 %r2,0(%r7)");
811: asm(" ADDW2 %r6, %r8");
812: asm(" ADDW2 %r5, %r7");
813: } while (--i > 0);
814: break;
815: case F_XOR | S_STRADDLE:
816: asm(" MOVW 0x20(%fp),%r0"); /* store mask1 in a reg */
817: asm(" PUSHW %ap"); /* store ap on stack */
818: asm(" MOVW 0x24(%fp),%ap"); /* store mask1 in a reg */
819:
820: do {
821: asm(" ANDW3 %r0,0(%r8),%r2"); /* x20(fp)=mask1 */
822: asm(" ANDW3 %ap,4(%r8),%r1"); /* x24(fp)=mask2 */
823: asm(" ORW2 %r2,%r1");
824: asm(" ROTW %r4,%r1,%r1");
825: asm(" XORW2 %r1,0(%r7)");
826: asm(" ADDW2 %r6, %r8");
827: asm(" ADDW2 %r5, %r7");
828: } while (--i > 0);
829: asm(" POPW %ap"); /* restore ap from stack */
830: break;
831: case F_XOR | D_STRADDLE:
832: if (a <= 16) /* very narrow, 17 bits max */
833: {
834: asm(" MOVW &0xffff0000,%r2");
835: asm(" ORW3 0x28(%fp),0x2c(%fp),%r0"); /* compute mask */
836: do {
837: asm(" ROTW %r4,0(%r8),%r1");
838: asm(" ANDW2 %r0,%r1"); /* mask */
839: asm(" XORH2 %r1,2(%r7)");
840: asm(" ANDW2 %r2,%r1");
841: asm(" XORW2 %r1,4(%r7)");
842: asm(" ADDW2 %r6, %r8");
843: asm(" ADDW2 %r5, %r7");
844: } while (--i > 0);
845: }
846: else
847: {
848: asm(" MOVW 0x28(%fp),%r0"); /* store mask3 in a reg */
849: asm(" PUSHW %ap"); /* store ap on stack */
850: asm(" MOVW 0x2c(%fp),%ap"); /* store mask4 in a reg */
851: do {
852: asm(" ROTW %r4,0(%r8),%r1");
853: asm(" ANDW3 %r0,%r1,%r2");
854: asm(" XORW2 %r2,0(%r7)");
855: asm(" ANDW2 %ap,%r1");
856: asm(" XORW2 %r1,4(%r7)");
857: asm(" ADDW2 %r6, %r8");
858: asm(" ADDW2 %r5, %r7");
859: } while (--i > 0);
860: asm(" POPW %ap"); /* restore ap from stack */
861: }
862: break;
863: case F_XOR | S_STRADDLE | D_STRADDLE:
864: asm(" MOVW 0x20(%fp),%r0"); /* store mask1 in a reg */
865: asm(" PUSHW %ap"); /* store ap on stack */
866: asm(" MOVW 0x24(%fp),%ap"); /* store mask2 in a reg */
867: if (a > 16){ /* not super narrow */
868: do {
869: asm(" ANDW3 %r0,0(%r8),%r1");
870: asm(" ANDW3 %ap,4(%r8),%r2");
871: asm(" ORW2 %r2,%r1");
872: asm(" ROTW %r4,%r1,%r1");
873: asm(" ANDW3 0x28(%fp),%r1,%r2");
874: asm(" XORW2 %r2,0(%r7)");
875: asm(" ANDW2 0x2c(%fp),%r1");
876: asm(" XORW2 %r1,4(%r7)");
877: asm(" ADDW2 %r6, %r8");
878: asm(" ADDW2 %r5, %r7");
879: } while (--i > 0);
880: }
881: else
882: {
883: do {
884: asm(" ANDW3 %r0,0(%r8),%r1");
885: asm(" ANDW3 %ap,4(%r8),%r2");
886: asm(" ORW2 %r2,%r1");
887: asm(" ROTW %r4,%r1,%r1");
888: asm(" XORH2 %r1,2(%r7)");
889: asm(" ANDW2 &0xffff0000,%r1");
890: asm(" XORW2 %r1,4(%r7)");
891: asm(" ADDW2 %r6, %r8");
892: asm(" ADDW2 %r5, %r7");
893: } while (--i > 0);
894: }
895: asm(" POPW %ap"); /* restore ap from stack */
896: break;
897: }
898: return;
899: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.