|
|
1.1 ! root 1: /* ! 2: * strlen.S (c) 1995 David Mosberger ([email protected]) ! 3: * ! 4: * Finds length of a 0-terminated string. Optimized for the ! 5: * Alpha architecture: ! 6: * ! 7: * - memory accessed as aligned quadwords only ! 8: * - uses bcmpge to compare 8 bytes in parallel ! 9: * - does binary search to find 0 byte in last ! 10: * quadword (HAKMEM needed 12 instructions to ! 11: * do this instead of the 9 instructions that ! 12: * binary search needs). ! 13: */ ! 14: ! 15: .set noreorder ! 16: .set noat ! 17: ! 18: .align 3 ! 19: ! 20: .globl strlen ! 21: .ent strlen ! 22: strlen: ! 23: .frame $sp, 0, $26, 0 ! 24: .prologue 0 ! 25: ! 26: ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) ! 27: lda $2, -1($31) ! 28: insqh $2, $16, $2 ! 29: andnot $16, 7, $0 ! 30: or $2, $1, $1 ! 31: cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 ! 32: bne $2, found ! 33: ! 34: loop: ldq $1, 8($0) ! 35: addq $0, 8, $0 # addr += 8 ! 36: nop # helps dual issue last two insns ! 37: cmpbge $31, $1, $2 ! 38: beq $2, loop ! 39: ! 40: found: blbs $2, done # make aligned case fast ! 41: negq $2, $3 ! 42: and $2, $3, $2 ! 43: ! 44: and $2, 0x0f, $1 ! 45: addq $0, 4, $3 ! 46: cmoveq $1, $3, $0 ! 47: ! 48: and $2, 0x33, $1 ! 49: addq $0, 2, $3 ! 50: cmoveq $1, $3, $0 ! 51: ! 52: and $2, 0x55, $1 ! 53: addq $0, 1, $3 ! 54: cmoveq $1, $3, $0 ! 55: ! 56: done: subq $0, $16, $0 ! 57: ret $31, ($26) ! 58: ! 59: .end strlen
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.