-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindexbyte_riscv64.s
139 lines (109 loc) · 2.79 KB
/
indexbyte_riscv64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40
// X10 = b_base
// X11 = b_len
// X12 = b_cap (unused)
// X13 = byte to find
AND $0xff, X13, X12 // x12 byte to look for
MOV X10, X13 // store base for later
SLTI $24, X11, X14
ADD X10, X11 // end
BEQZ X14, bigBody
SUB $1, X10
loop:
ADD $1, X10
BEQ X10, X11, notfound
MOVBU (X10), X14
BNE X12, X14, loop
SUB X13, X10 // remove base
RET
notfound:
MOV $-1, X10
RET
bigBody:
JMP indexByteBig<>(SB)
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32
// X10 = b_base
// X11 = b_len
// X12 = byte to find
AND $0xff, X12 // x12 byte to look for
MOV X10, X13 // store base for later
SLTI $24, X11, X14
ADD X10, X11 // end
BEQZ X14, bigBody
SUB $1, X10
loop:
ADD $1, X10
BEQ X10, X11, notfound
MOVBU (X10), X14
BNE X12, X14, loop
SUB X13, X10 // remove base
RET
notfound:
MOV $-1, X10
RET
bigBody:
JMP indexByteBig<>(SB)
TEXT indexByteBig<>(SB),NOSPLIT|NOFRAME,$0
// On entry
// X10 = b_base
// X11 = end
// X12 = byte to find
// X13 = b_base
// X11 is at least 16 bytes > X10
// On exit
// X10 = index of first instance of sought byte, if found, or -1 otherwise
// Process the first few bytes until we get to an 8 byte boundary
// No need to check for end here as we have at least 16 bytes in
// the buffer.
unalignedloop:
AND $7, X10, X14
BEQZ X14, aligned
MOVBU (X10), X14
BEQ X12, X14, found
ADD $1, X10
JMP unalignedloop
aligned:
AND $~7, X11, X15 // X15 = end of aligned data
// We have at least 9 bytes left
// Use 'Determine if a word has a byte equal to n' bit hack from
// https://graphics.stanford.edu/~seander/bithacks.html to determine
// whether the byte is present somewhere in the next 8 bytes of the
// array.
MOV $0x0101010101010101, X16
SLLI $7, X16, X17 // X17 = 0x8080808080808080
MUL X12, X16, X18 // broadcast X12 to every byte in X18
alignedloop:
MOV (X10), X14
XOR X14, X18, X19
// If the LSB in X12 is present somewhere in the 8 bytes we've just
// loaded into X14 then at least one of the bytes in X19 will be 0
// after the XOR. If any of the bytes in X19 are zero then
//
// ((X19 - X16) & (~X19) & X17)
//
// will be non-zero. The expression will evaluate to zero if none of
// the bytes in X19 are zero, i.e., X12 is not present in X14.
SUB X16, X19, X20
ANDN X19, X17, X21
AND X20, X21
BNEZ X21, tailloop // If X21 != 0 X12 is present in X14
ADD $8, X10
BNE X10, X15, alignedloop
tailloop:
SUB $1, X10
loop:
ADD $1, X10
BEQ X10, X11, notfound
MOVBU (X10), X14
BNE X12, X14, loop
found:
SUB X13, X10 // remove base
RET
notfound:
MOV $-1, X10
RET