|
|
1.1 root 1: ;---------------------------Module-Header------------------------------;
2: ; Module Name: srccopy.asm
3: ;
4: ; Copyright (c) 1993 Microsoft Corporation
5: ;-----------------------------------------------------------------------;
6:
7: ;-----------------------------------------------------------------------;
8: ; VOID vSrcCopy8bpp(ppdev, psoSrc, prclDst, pptlSrc, lSrcDelta, pvSrcStart);
9: ;
10: ; Input:
11: ;
12: ; ppdev - screen pdev
13: ; psoSrc - source surface
14: ; prcldest - pointer to destination rectangle
15: ; pptlsrc - pointer to source upper left corner
16: ; lSrcDelta - offset from start of one scan to next in source
17: ; pvSrcStart - pointer to start of bitmap
18: ;
19: ; Performs 8bpp SRCCOPY memory-to-screen blts.
20: ;
21: ;-----------------------------------------------------------------------;
22: ; NOTE: Assumes all rectangles have positive heights and widths. Will
23: ; not work properly if this is not the case.
24: ;-----------------------------------------------------------------------;
25:
26: ;-----------------------------------------------------------------------;
27: ; Set LOOP_UNROLL_SHIFT to the log2 of the number of times you want loops in
28: ; this module unrolled. For example, LOOP_UNROLL_SHIFT of 3 yields 2**3 = 8
29: ; times unrolling. This is the only thing you need to change to control
30: ; unrolling.
31:
32: LOOP_UNROLL_SHIFT equ 2
33:
34: ;-----------------------------------------------------------------------;
35:
36: .386
37:
38: .model small,c
39:
40: assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
41: assume fs:nothing,gs:nothing
42:
43: .xlist
44: include stdcall.inc ;calling convention cmacros
45: include i386\strucs.inc
46: include i386\driver.inc
47: include i386\unroll.inc
48: include i386\egavga.inc
49:
50: .list
51:
52: ;-----------------------------------------------------------------------;
53:
54: .data
55:
56: ;-----------------------------------------------------------------------;
57:
58: .code
59:
60: ;-----------------------------------------------------------------------;
61:
62: cProc vSrcCopy8bpp,20,< \
63: uses esi edi ebx, \
64: ppdev: ptr PDEV, \
65: prclDst: ptr RECTL, \
66: pptlSrc: ptr POINTL, \
67: lSrcDelta: dword, \
68: pvSrcStart: ptr >
69:
70: local culMiddle: dword ;# of dwords in middle
71: local cyToGo: dword ;# of scans to copy after the current bank
72: local ppfnUnroll: ptr ;points to appropriate unroll loop table
73: local pvSrc: ptr ;source pointer
74:
75: mov esi,prclDst ;esi = prclDest
76: mov ebx,ppdev ;ebx = ppdev
77: mov edi,[esi].yTop
78:
79: cmp edi,[ebx].pdev_rcl1WindowClip.yTop
80: jl short src8_map_init_bank
81:
82: mov edx,[ebx].pdev_rcl1WindowClip.yBottom
83: ;edx = ppdev->rcl1WindowClip.bottom
84: cmp edi,edx
85: jl short src8_init_bank_mapped
86:
87: src8_map_init_bank:
88: ptrCall <dword ptr [ebx].pdev_pfnBankControl>, \
89: <ebx,edi,JustifyTop>
90:
91: mov edx,[ebx].pdev_rcl1WindowClip.yBottom
92:
93: src8_init_bank_mapped:
94: mov eax,[esi].yBottom
95: sub eax,edx
96: mov cyToGo,eax ;eax = # scans to do after this bank
97: sbb ecx,ecx
98: and ecx,eax
99: add edx,ecx ;edx = min(prclDst->bottom,
100: ; ppdev->rcl1WindowClip.bottom)
101: sub edx,edi ;edx = # of scans to do in this bank
102:
103: ; ebx = ppdev
104: ; edx = # of scans to do in this bank
105: ; esi = prclDst
106:
107: mov eax,[esi].xLeft
108: add eax,3
109: and eax,not 3 ;eax = xLeft aligned to next dword
110:
111: mov edi,pptlSrc ;edi = pptlSrc
112: mov ecx,lSrcDelta
113: imul ecx,[edi].ptl_y
114: add ecx,[edi].ptl_x
115: add ecx,eax
116: sub ecx,[esi].xLeft
117: add ecx,pvSrcStart
118: mov pvSrc,ecx ;pvSrc = pptlSrc->y * lSrcDelta +
119: ; pptlSrc->x + dest alignment +
120: ; pvSrcStart
121:
122: mov edi,[ebx].pdev_lNextScan
123: imul edi,[esi].yTop
124: add edi,eax
125: add edi,[ebx].pdev_pvBitmapStart
126: ;edi = prclDst->top * ppdev->lNextScan +
127: ; aligned left + ppdev->pvBitmapStart
128: ; (the aligned destination address)
129:
130: ; eax = prclDst->left aligned to dword
131: ; ebx = ppdev
132: ; edx = # of scans to do in this bank
133: ; esi = prclDst
134: ; edi = destination address
135:
136: mov ecx,[esi].xRight ;esi = prclDst->right
137: sub ecx,eax ;ecx = length in bytes from first full
138: ; dword to last byte
139: jl short src8_one_dword ;special case if the destination
140: ; starts and ends in the same dword
141:
142: mov eax,ecx
143: and ecx,not 3 ;ecx = length of middle dwords in bytes
144: sub eax,ecx ;eax = (right & 3)
145:
146: mov esi,[esi].xLeft
147: and esi,3 ;esi = (left & 3)
148: shl eax,2
149: or esi,eax ;esi = ((right & 3) << 2) | (left & 3)
150: ; (look-up index for unroll loops)
151:
152: mov ebx,[ebx].pdev_lNextScan
153: sub ebx,ecx ;ebx = ppdev->lNextScan
154: ; - (culMiddle << 2)
155: ; (destination delta)
156:
157: mov eax,lSrcDelta
158: sub eax,ecx ;eax = lSrcDelta - (culMiddle << 2)
159: ; (source delta)
160:
161: shr ecx,2
162: mov culMiddle,ecx ;culMiddle = number of middle dwords
163:
164: ; eax = source delta
165: ; ebx = destination delta
166: ; ecx =
167: ; edx = # scans to do
168: ; esi = flags
169: ; edi = destination pointer
170:
171: ; Set up for loop unrolling:
172:
173: mov ecx,edx
174: add edx,(1 shl LOOP_UNROLL_SHIFT)-1 ;round count up
175: and ecx,(1 shl LOOP_UNROLL_SHIFT)-1 ;fractional part of loop
176: shr edx,LOOP_UNROLL_SHIFT ;number of whole loops
177:
178: shl esi,LOOP_UNROLL_SHIFT+2 ;every unroll table is
179: ; (1 << LOOP_UNROLL_SHIFT) dwords long
180: add esi,offset gapfnMasterCopyTable
181: mov ppfnUnroll,esi ;save unroll table pointer for next bank
182:
183: mov ecx,[esi+ecx*4]
184: mov esi,pvSrc
185: jmp ecx
186:
187: ;-----------------------------------------------------------------------;
188: ; Here we handle cases where copy starts and ends in same dword:
189:
190: public src8_one_dword
191: src8_one_dword:
192: sub eax,[esi].xLeft ;eax = # of bytes from left edge to
193: ; first dword
194: add ecx,eax ;ecx = # of bytes to do
195:
196: sub edi,eax ;adjust back to start byte
197: sub pvSrc,eax ;adjust accordingly
198:
199: mov ebx,[ebx].pdev_lNextScan;ebx = ppdev->lNextScan
200: ; (destination delta)
201:
202: mov eax,lSrcDelta ;eax = lSrcDelta
203: ; (source delta)
204:
205: ; eax = source delta
206: ; ebx = destination delta
207: ; esi =
208: ; edx = # scans to do
209: ; ecx = flags
210: ; edi = destination pointer
211:
212: ; Set up for loop unrolling:
213:
214: mov esi,edx
215: add edx,(1 shl LOOP_UNROLL_SHIFT)-1 ;round count up
216: and esi,(1 shl LOOP_UNROLL_SHIFT)-1 ;fractional part of loop
217: shr edx,LOOP_UNROLL_SHIFT ;number of whole loops
218:
219: dec ecx ;adjust for table (no zero entry)
220: shl ecx,LOOP_UNROLL_SHIFT+2 ;every unroll table is
221: ; (1 << LOOP_UNROLL_SHIFT) dwords long
222: add ecx,offset gapfnOneDwordCopyTable
223: mov ppfnUnroll,ecx ;save unroll table pointer for next bank
224:
225: mov ecx,[ecx+esi*4]
226: mov esi,pvSrc
227: jmp ecx
228:
229: ;-----------------------------------------------------------------------;
230: ; We have following variables set before calling unrolled loops:
231: ;
232: ; eax = source delta (from end of dwords to start of dwords on next scan)
233: ; ebx = destination delta
234: ; edx = # of unrolled loops
235: ; esi = source pointer
236: ; edi = destination pointer
237: ; culMiddle = number of dwords to copy
238:
239: ;-----------------------------------------------------------------------;
240: ; See if done. If not, get next bank.
241:
242: align 4
243: public src8_done
244: src8_done:
245: cmp cyToGo,0
246: jg short src8_next_bank
247: cRet vSrcCopy8bpp
248:
249: src8_next_bank:
250: push esi
251: push ebx ;save some registers
252: mov ebx,ppdev
253: push eax
254:
255: mov esi,[ebx].pdev_rcl1WindowClip.yBottom
256: sub edi,[ebx].pdev_pvBitmapStart
257:
258: ptrCall <dword ptr [ebx].pdev_pfnBankControl>, \
259: <ebx,esi,JustifyTop>
260:
261: add edi,[ebx].pdev_pvBitmapStart
262: mov edx,[ebx].pdev_rcl1WindowClip.yBottom
263: sub edx,esi ;edx = # scans can do in this bank
264:
265: mov eax,cyToGo
266: sub eax,edx
267: mov cyToGo,eax ;eax = # scans to do after this bank
268: sbb ecx,ecx
269: and ecx,eax
270: add edx,ecx ;edx = min(# of scans can do in bank,
271: ; # of scans to go)
272:
273: mov ecx,edx
274: mov esi,ppfnUnroll ;unroll table
275: add edx,(1 shl LOOP_UNROLL_SHIFT)-1 ;round count up
276: and ecx,(1 shl LOOP_UNROLL_SHIFT)-1 ;fractional part of loop
277: shr edx,LOOP_UNROLL_SHIFT ;# of whole loops
278:
279: mov ecx,[esi+ecx*4]
280:
281: pop eax ;restore those registers
282: pop ebx
283: pop esi
284:
285: jmp ecx
286:
287: ;-----------------------------------------------------------------------;
288: ; We organize the unroll tables as follows so that it's easy to index
289: ; into them:
290: ;
291: ; Bits 2 and 3 = # of trailing bytes
292: ; Bits 0 and 1 = # of leading bytes to skip in first dword
293:
294: align 4
295: gapfnMasterCopyTable label dword
296: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW00Entry,W_00,LOOP_UNROLL_COUNT
297: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW30Entry,W_30,LOOP_UNROLL_COUNT
298: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW20Entry,W_20,LOOP_UNROLL_COUNT
299: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW10Entry,W_10,LOOP_UNROLL_COUNT
300: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW01Entry,W_01,LOOP_UNROLL_COUNT
301: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW31Entry,W_31,LOOP_UNROLL_COUNT
302: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW21Entry,W_21,LOOP_UNROLL_COUNT
303: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW11Entry,W_11,LOOP_UNROLL_COUNT
304: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW02Entry,W_02,LOOP_UNROLL_COUNT
305: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW32Entry,W_32,LOOP_UNROLL_COUNT
306: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW22Entry,W_22,LOOP_UNROLL_COUNT
307: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW12Entry,W_12,LOOP_UNROLL_COUNT
308: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW03Entry,W_03,LOOP_UNROLL_COUNT
309: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW33Entry,W_33,LOOP_UNROLL_COUNT
310: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW23Entry,W_23,LOOP_UNROLL_COUNT
311: UNROLL_LOOP_ENTRY_TABLE pfnCopyWideW13Entry,W_13,LOOP_UNROLL_COUNT
312:
313: align 4
314: gapfnOneDwordCopyTable label dword
315: UNROLL_LOOP_ENTRY_TABLE pfnCopyThin1Entry,T_1,LOOP_UNROLL_COUNT
316: UNROLL_LOOP_ENTRY_TABLE pfnCopyThin2Entry,T_2,LOOP_UNROLL_COUNT
317: UNROLL_LOOP_ENTRY_TABLE pfnCopyThin3Entry,T_3,LOOP_UNROLL_COUNT
318:
319: ;-----------------------------------------------------------------------;
320: ; Copy n dwords, 0 leading bytes, 0 trailing bytes, then advance to next
321: ; scan line.
322:
323: COPY_WIDE_W_00 macro ENTRY_LABEL,ENTRY_INDEX
324: &ENTRY_LABEL&ENTRY_INDEX&:
325: mov ecx,culMiddle
326: rep movsd
327: add esi,eax
328: add edi,ebx
329: endm ;--------------------------------;
330:
331: align 4
332: copy_wide_w_00_loop:
333: UNROLL_LOOP COPY_WIDE_W_00,W_00,LOOP_UNROLL_COUNT
334: dec edx
335: jnz copy_wide_w_00_loop
336: jmp src8_done
337:
338: ;-----------------------------------------------------------------------;
339: ; Copy n dwords, 0 leading bytes, 1 trailing bytes, then advance to next
340: ; scan line.
341:
342: COPY_WIDE_W_01 macro ENTRY_LABEL,ENTRY_INDEX
343: &ENTRY_LABEL&ENTRY_INDEX&:
344: mov ecx,culMiddle
345: rep movsd
346: mov cl,[esi]
347: mov [edi],cl
348: add esi,eax
349: add edi,ebx
350: endm ;--------------------------------;
351:
352: align 4
353: copy_wide_w_01_loop:
354: UNROLL_LOOP COPY_WIDE_W_01,W_01,LOOP_UNROLL_COUNT
355: dec edx
356: jnz copy_wide_w_01_loop
357: jmp src8_done
358:
359: ;-----------------------------------------------------------------------;
360: ; Copy n dwords, 0 leading bytes, 2 trailing bytes, then advance to next
361: ; scan line.
362:
363: COPY_WIDE_W_02 macro ENTRY_LABEL,ENTRY_INDEX
364: &ENTRY_LABEL&ENTRY_INDEX&:
365: mov ecx,culMiddle
366: rep movsd
367: mov cx,[esi]
368: mov [edi],cx
369: add esi,eax
370: add edi,ebx
371: endm ;--------------------------------;
372:
373: align 4
374: copy_wide_w_02_loop:
375: UNROLL_LOOP COPY_WIDE_W_02,W_02,LOOP_UNROLL_COUNT
376: dec edx
377: jnz copy_wide_w_02_loop
378: jmp src8_done
379:
380: ;-----------------------------------------------------------------------;
381: ; Copy n dwords, 0 leading bytes, 3 trailing bytes, then advance to next
382: ; scan line.
383:
384: COPY_WIDE_W_03 macro ENTRY_LABEL,ENTRY_INDEX
385: &ENTRY_LABEL&ENTRY_INDEX&:
386: mov ecx,culMiddle
387: rep movsd
388: mov cx,[esi]
389: mov [edi],cx
390: mov cl,[esi+2]
391: mov [edi+2],cl
392: add esi,eax
393: add edi,ebx
394: endm ;--------------------------------;
395:
396: align 4
397: copy_wide_w_03_loop:
398: UNROLL_LOOP COPY_WIDE_W_03,W_03,LOOP_UNROLL_COUNT
399: dec edx
400: jnz copy_wide_w_03_loop
401: jmp src8_done
402:
403: ;-----------------------------------------------------------------------;
404: ; Copy n dwords, 1 leading byte, 0 trailing bytes, then advance to next
405: ; scan line.
406:
407: COPY_WIDE_W_10 macro ENTRY_LABEL,ENTRY_INDEX
408: &ENTRY_LABEL&ENTRY_INDEX&:
409: mov cl,[esi-1]
410: mov [edi-1],cl
411: mov ecx,culMiddle
412: rep movsd
413: add esi,eax
414: add edi,ebx
415: endm ;--------------------------------;
416:
417: align 4
418: copy_wide_w_10_loop:
419: UNROLL_LOOP COPY_WIDE_W_10,W_10,LOOP_UNROLL_COUNT
420: dec edx
421: jnz copy_wide_w_10_loop
422: jmp src8_done
423:
424: ;-----------------------------------------------------------------------;
425: ; Copy n dwords, 1 leading byte, 1 trailing bytes, then advance to next
426: ; scan line.
427:
428: COPY_WIDE_W_11 macro ENTRY_LABEL,ENTRY_INDEX
429: &ENTRY_LABEL&ENTRY_INDEX&:
430: mov cl,[esi-1]
431: mov [edi-1],cl
432: mov ecx,culMiddle
433: rep movsd
434: mov cl,[esi]
435: mov [edi],cl
436: add esi,eax
437: add edi,ebx
438: endm ;--------------------------------;
439:
440: align 4
441: copy_wide_w_11_loop:
442: UNROLL_LOOP COPY_WIDE_W_11,W_11,LOOP_UNROLL_COUNT
443: dec edx
444: jnz copy_wide_w_11_loop
445: jmp src8_done
446:
447: ;-----------------------------------------------------------------------;
448: ; Copy n dwords, 1 leading byte, 2 trailing bytes, then advance to next
449: ; scan line.
450:
451: COPY_WIDE_W_12 macro ENTRY_LABEL,ENTRY_INDEX
452: &ENTRY_LABEL&ENTRY_INDEX&:
453: mov cl,[esi-1]
454: mov [edi-1],cl
455: mov ecx,culMiddle
456: rep movsd
457: mov cx,[esi]
458: mov [edi],cx
459: add esi,eax
460: add edi,ebx
461: endm ;--------------------------------;
462:
463: align 4
464: copy_wide_w_12_loop:
465: UNROLL_LOOP COPY_WIDE_W_12,W_12,LOOP_UNROLL_COUNT
466: dec edx
467: jnz copy_wide_w_12_loop
468: jmp src8_done
469:
470: ;-----------------------------------------------------------------------;
471: ; Copy n dwords, 1 leading byte, 3 trailing bytes, then advance to next
472: ; scan line.
473:
474: COPY_WIDE_W_13 macro ENTRY_LABEL,ENTRY_INDEX
475: &ENTRY_LABEL&ENTRY_INDEX&:
476: mov cl,[esi-1]
477: mov [edi-1],cl
478: mov ecx,culMiddle
479: rep movsd
480: mov cx,[esi]
481: mov [edi],cx
482: mov cl,[esi+2]
483: mov [edi+2],cl
484: add esi,eax
485: add edi,ebx
486: endm ;--------------------------------;
487:
488: align 4
489: copy_wide_w_13_loop:
490: UNROLL_LOOP COPY_WIDE_W_13,W_13,LOOP_UNROLL_COUNT
491: dec edx
492: jnz copy_wide_w_13_loop
493: jmp src8_done
494:
495: ;-----------------------------------------------------------------------;
496: ; Copy n dwords, 2 leading bytes, 0 trailing bytes, then advance to next
497: ; scan line.
498:
499: COPY_WIDE_W_20 macro ENTRY_LABEL,ENTRY_INDEX
500: &ENTRY_LABEL&ENTRY_INDEX&:
501: mov cx,[esi-2]
502: mov [edi-2],cx
503: mov ecx,culMiddle
504: rep movsd
505: add esi,eax
506: add edi,ebx
507: endm ;--------------------------------;
508:
509: align 4
510: copy_wide_w_20_loop:
511: UNROLL_LOOP COPY_WIDE_W_20,W_20,LOOP_UNROLL_COUNT
512: dec edx
513: jnz copy_wide_w_20_loop
514: jmp src8_done
515:
516: ;-----------------------------------------------------------------------;
517: ; Copy n dwords, 2 leading bytes, 1 trailing bytes, then advance to next
518: ; scan line.
519:
520: COPY_WIDE_W_21 macro ENTRY_LABEL,ENTRY_INDEX
521: &ENTRY_LABEL&ENTRY_INDEX&:
522: mov cx,[esi-2]
523: mov [edi-2],cx
524: mov ecx,culMiddle
525: rep movsd
526: mov cl,[esi]
527: mov [edi],cl
528: add esi,eax
529: add edi,ebx
530: endm ;--------------------------------;
531:
532: align 4
533: copy_wide_w_21_loop:
534: UNROLL_LOOP COPY_WIDE_W_21,W_21,LOOP_UNROLL_COUNT
535: dec edx
536: jnz copy_wide_w_21_loop
537: jmp src8_done
538:
539: ;-----------------------------------------------------------------------;
540: ; Copy n dwords, 2 leading bytes, 2 trailing bytes, then advance to next
541: ; scan line.
542:
543: COPY_WIDE_W_22 macro ENTRY_LABEL,ENTRY_INDEX
544: &ENTRY_LABEL&ENTRY_INDEX&:
545: mov cx,[esi-2]
546: mov [edi-2],cx
547: mov ecx,culMiddle
548: rep movsd
549: mov cx,[esi]
550: mov [edi],cx
551: add esi,eax
552: add edi,ebx
553: endm ;--------------------------------;
554:
555: align 4
556: copy_wide_w_22_loop:
557: UNROLL_LOOP COPY_WIDE_W_22,W_22,LOOP_UNROLL_COUNT
558: dec edx
559: jnz copy_wide_w_22_loop
560: jmp src8_done
561:
562: ;-----------------------------------------------------------------------;
563: ; Copy n dwords, 2 leading bytes, 3 trailing bytes, then advance to next
564: ; scan line.
565:
566: COPY_WIDE_W_23 macro ENTRY_LABEL,ENTRY_INDEX
567: &ENTRY_LABEL&ENTRY_INDEX&:
568: mov cx,[esi-2]
569: mov [edi-2],cx
570: mov ecx,culMiddle
571: rep movsd
572: mov cx,[esi]
573: mov [edi],cx
574: mov cl,[esi+2]
575: mov [edi+2],cl
576: add esi,eax
577: add edi,ebx
578: endm ;--------------------------------;
579:
580: align 4
581: copy_wide_w_23_loop:
582: UNROLL_LOOP COPY_WIDE_W_23,W_23,LOOP_UNROLL_COUNT
583: dec edx
584: jnz copy_wide_w_23_loop
585: jmp src8_done
586:
587: ;-----------------------------------------------------------------------;
588: ; Copy n dwords, 3 leading bytes, 0 trailing bytes, then advance to next
589: ; scan line.
590:
591: COPY_WIDE_W_30 macro ENTRY_LABEL,ENTRY_INDEX
592: &ENTRY_LABEL&ENTRY_INDEX&:
593: mov cl,[esi-3]
594: mov [edi-3],cl
595: mov cx,[esi-2]
596: mov [edi-2],cx
597: mov ecx,culMiddle
598: rep movsd
599: add esi,eax
600: add edi,ebx
601: endm ;--------------------------------;
602:
603: align 4
604: copy_wide_w_30_loop:
605: UNROLL_LOOP COPY_WIDE_W_30,W_30,LOOP_UNROLL_COUNT
606: dec edx
607: jnz copy_wide_w_30_loop
608: jmp src8_done
609:
610: ;-----------------------------------------------------------------------;
611: ; Copy n dwords, 3 leading bytes, 1 trailing bytes, then advance to next
612: ; scan line.
613:
614: COPY_WIDE_W_31 macro ENTRY_LABEL,ENTRY_INDEX
615: &ENTRY_LABEL&ENTRY_INDEX&:
616: mov cl,[esi-3]
617: mov [edi-3],cl
618: mov cx,[esi-2]
619: mov [edi-2],cx
620: mov ecx,culMiddle
621: rep movsd
622: mov cl,[esi]
623: mov [edi],cl
624: add esi,eax
625: add edi,ebx
626: endm ;--------------------------------;
627:
628: align 4
629: copy_wide_w_31_loop:
630: UNROLL_LOOP COPY_WIDE_W_31,W_31,LOOP_UNROLL_COUNT
631: dec edx
632: jnz copy_wide_w_31_loop
633: jmp src8_done
634:
635: ;-----------------------------------------------------------------------;
636: ; Copy n dwords, 3 leading bytes, 2 trailing bytes, then advance to next
637: ; scan line.
638:
639: COPY_WIDE_W_32 macro ENTRY_LABEL,ENTRY_INDEX
640: &ENTRY_LABEL&ENTRY_INDEX&:
641: mov cl,[esi-3]
642: mov [edi-3],cl
643: mov cx,[esi-2]
644: mov [edi-2],cx
645: mov ecx,culMiddle
646: rep movsd
647: mov cx,[esi]
648: mov [edi],cx
649: add esi,eax
650: add edi,ebx
651: endm ;--------------------------------;
652:
653: align 4
654: copy_wide_w_32_loop:
655: UNROLL_LOOP COPY_WIDE_W_32,W_32,LOOP_UNROLL_COUNT
656: dec edx
657: jnz copy_wide_w_32_loop
658: jmp src8_done
659:
660: ;-----------------------------------------------------------------------;
661: ; Copy n dwords, 3 leading bytes, 3 trailing bytes, then advance to next
662: ; scan line.
663:
664: COPY_WIDE_W_33 macro ENTRY_LABEL,ENTRY_INDEX
665: &ENTRY_LABEL&ENTRY_INDEX&:
666: mov cl,[esi-3]
667: mov [edi-3],cl
668: mov cx,[esi-2]
669: mov [edi-2],cx
670: mov ecx,culMiddle
671: rep movsd
672: mov cx,[esi]
673: mov [edi],cx
674: mov cl,[esi+2]
675: mov [edi+2],cl
676: add esi,eax
677: add edi,ebx
678: endm ;--------------------------------;
679:
680: align 4
681: copy_wide_w_33_loop:
682: UNROLL_LOOP COPY_WIDE_W_33,W_33,LOOP_UNROLL_COUNT
683: dec edx
684: jnz copy_wide_w_33_loop
685: jmp src8_done
686:
687: ;-----------------------------------------------------------------------;
688: ; Copy 1 byte, then advance to next scan line.
689:
690: COPY_THIN_T_1 macro ENTRY_LABEL,ENTRY_INDEX
691: &ENTRY_LABEL&ENTRY_INDEX&:
692: mov cl,[esi]
693: mov [edi],cl
694: add esi,eax
695: add edi,ebx
696: endm ;--------------------------------;
697:
698: align 4
699: copy_thin_t_1_loop:
700: UNROLL_LOOP COPY_THIN_T_1,T_1,LOOP_UNROLL_COUNT
701: dec edx
702: jnz copy_thin_t_1_loop
703: jmp src8_done
704:
705: ;-----------------------------------------------------------------------;
706: ; Copy 2 bytes, then advance to next scan line.
707:
708: COPY_THIN_T_2 macro ENTRY_LABEL,ENTRY_INDEX
709: &ENTRY_LABEL&ENTRY_INDEX&:
710: mov cx,[esi]
711: mov [edi],cx
712: add esi,eax
713: add edi,ebx
714: endm ;--------------------------------;
715:
716: align 4
717: copy_thin_t_2_loop:
718: UNROLL_LOOP COPY_THIN_T_2,T_2,LOOP_UNROLL_COUNT
719: dec edx
720: jnz copy_thin_t_2_loop
721: jmp src8_done
722:
723: ;-----------------------------------------------------------------------;
724: ; Copy 3 bytes, then advance to next scan line.
725:
726: COPY_THIN_T_3 macro ENTRY_LABEL,ENTRY_INDEX
727: &ENTRY_LABEL&ENTRY_INDEX&:
728: mov cx,[esi]
729: mov [edi],cx
730: mov cl,[esi+2]
731: mov [edi+2],cl
732: add esi,eax
733: add edi,ebx
734: endm ;--------------------------------;
735:
736: align 4
737: copy_thin_t_3_loop:
738: UNROLL_LOOP COPY_THIN_T_3,T_3,LOOP_UNROLL_COUNT
739: dec edx
740: jnz copy_thin_t_3_loop
741: jmp src8_done
742:
743: public copy_wide_w_00_loop
744: public copy_wide_w_01_loop
745: public copy_wide_w_02_loop
746: public copy_wide_w_03_loop
747: public copy_wide_w_10_loop
748: public copy_wide_w_11_loop
749: public copy_wide_w_12_loop
750: public copy_wide_w_13_loop
751: public copy_wide_w_20_loop
752: public copy_wide_w_21_loop
753: public copy_wide_w_22_loop
754: public copy_wide_w_23_loop
755: public copy_wide_w_30_loop
756: public copy_wide_w_31_loop
757: public copy_wide_w_32_loop
758: public copy_wide_w_33_loop
759: public copy_thin_t_1_loop
760: public copy_thin_t_2_loop
761: public copy_thin_t_3_loop
762:
763: endProc vSrcCopy8bpp
764:
765: end
766:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.