Source to src/md-i386-gcc/X86.S
/* These are optimized x86 assembly versions of pfield_linetoscr.
* Feel free to send me Sparc/PPC/Alpha versions of this... :)
* [it's not necessarily a win to code these in assembler, though - Paul
* Liss says this code is slower than the generic C stuff in custom.c on
* a PPro]
*/
/*#define X86_PPRO_OPT*/
#ifdef X86_PPRO_OPT
#define PARTIAL_REG(a,b) a
#define BYTE_MOVE movzbl
#define WORD_MOVE movzwl
#define CLEAR_FOR_BYTE_MOVE(a)
#else
#define PARTIAL_REG(a,b) b
#define BYTE_MOVE movb
#define WORD_MOVE movw
#define CLEAR_FOR_BYTE_MOVE(a) xorl a,a
#endif
#ifndef USE_UNDERSCORE
#define SYM(NAME) NAME
#define FUNCTION_ALIGN .align 16
#define FUNCTYPE(NAME) .type NAME,@function
#else
#define SYM(NAME) _##NAME
#define FUNCTION_ALIGN .align 4
#define FUNCTYPE(NAME)
#endif
.text
.globl DitherLine
#ifndef USE_UNDERSCORE
.type DitherLine,@function
#endif
FUNCTION_ALIGN
DitherLine:
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
movl 20(%esp),%edi
xorl %ebx,%ebx
movw 36(%esp),%bx
movl 32(%esp),%edx
andl $3,%edx
sall $15,%edx
movl 28(%esp),%eax
andl $3,%eax
sall $12,%eax
leal SYM(cidx)(%edx,%eax),%ebp
xorb %dl,%dl
movl $8,%ecx
testl %ebx,%ebx
je .Li_end
cmpl $8,40(%esp)
je .Li_fast
movl 24(%esp),%esi
.Li_loop:
movzwl (%esi),%eax
movzbl (%eax,%ebp),%eax
subl 40(%esp),%ecx
sall %cl,%eax
orb %al,%dl
testl %ecx,%ecx
jne .Li_1
movb %dl,(%edi)
incl %edi
movl $8,%ecx
xorb %dl,%dl
.Li_1:
movzwl 2(%esi),%eax
movzbl 4096(%ebp,%eax),%eax
subl 40(%esp),%ecx
sall %cl,%eax
orb %al,%dl
testl %ecx,%ecx
jne .Li_2
movb %dl,(%edi)
incl %edi
movl $8,%ecx
xorb %dl,%dl
.Li_2:
movzwl 4(%esi),%eax
movzbl 8192(%ebp,%eax),%eax
subl 40(%esp),%ecx
sall %cl,%eax
orb %al,%dl
testl %ecx,%ecx
jne .Li_3
movb %dl,(%edi)
incl %edi
movl $8,%ecx
xorb %dl,%dl
.Li_3:
movzwl 6(%esi),%eax
movzbl 12288(%ebp,%eax),%eax
addl $8,%esi
subl 40(%esp),%ecx
sall %cl,%eax
orb %al,%dl
testl %ecx,%ecx
jne .Li_4
movb %dl,(%edi)
incl %edi
movl $8,%ecx
xorb %dl,%dl
.Li_4:
subl $4,%ebx
jne .Li_loop
jmp .Li_end
/* Fast 8-bit version */
.Li_fast:
movl 24(%esp),%esi
xorl %edx,%edx
xorl %ecx,%ecx
FUNCTION_ALIGN
.Li_fast_loop:
movw (%esi),%dx
movw 2(%esi),%cx
movb (%edx,%ebp),%al
movw 4(%esi),%dx
movb 4096(%ebp,%ecx),%ah
movw 6(%esi),%cx
sall $16,%eax
movb 8192(%ebp,%edx),%al
movb 12288(%ebp,%ecx),%ah
roll $16,%eax
movl %eax,(%edi)
addl $4,%edi
addl $8,%esi
subl $4,%ebx
jne .Li_fast_loop
.Li_end:
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
#if 0
.globl compiler_do_rts
/* Entry: EDX == regs.regs + 15 */
compiler_do_rts:
movl (%edx),%esi
addl address_space,%esi