|
|
1.1 root 1: /*
2: * strlen.S (c) 1995 David Mosberger ([email protected])
3: *
4: * Finds length of a 0-terminated string. Optimized for the
5: * Alpha architecture:
6: *
7: * - memory accessed as aligned quadwords only
8: * - uses bcmpge to compare 8 bytes in parallel
9: * - does binary search to find 0 byte in last
10: * quadword (HAKMEM needed 12 instructions to
11: * do this instead of the 9 instructions that
12: * binary search needs).
13: */
14:
15: .set noreorder
16: .set noat
17:
18: .align 3
19:
20: .globl strlen
21: .ent strlen
22: strlen:
23: .frame $sp, 0, $26, 0
24: .prologue 0
25:
26: ldq_u $1, 0($16) # load first quadword ($16 may be misaligned)
27: lda $2, -1($31)
28: insqh $2, $16, $2
29: andnot $16, 7, $0
30: or $2, $1, $1
31: cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0
32: bne $2, found
33:
34: loop: ldq $1, 8($0)
35: addq $0, 8, $0 # addr += 8
36: nop # helps dual issue last two insns
37: cmpbge $31, $1, $2
38: beq $2, loop
39:
40: found: blbs $2, done # make aligned case fast
41: negq $2, $3
42: and $2, $3, $2
43:
44: and $2, 0x0f, $1
45: addq $0, 4, $3
46: cmoveq $1, $3, $0
47:
48: and $2, 0x33, $1
49: addq $0, 2, $3
50: cmoveq $1, $3, $0
51:
52: and $2, 0x55, $1
53: addq $0, 1, $3
54: cmoveq $1, $3, $0
55:
56: done: subq $0, $16, $0
57: ret $31, ($26)
58:
59: .end strlen
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.