-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintrinsics.go
256 lines (230 loc) · 9.12 KB
/
intrinsics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sys
// Copied from math/bits to avoid dependence.
var deBruijn32tab = [32]byte{
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
}
const deBruijn32 = 0x077CB531
var deBruijn64tab = [64]byte{
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
}
const deBruijn64 = 0x03f79d71b4ca8b09
const ntz8tab = "" +
"\x08\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x07\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x06\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x05\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00" +
"\x04\x00\x01\x00\x02\x00\x01\x00\x03\x00\x01\x00\x02\x00\x01\x00"
// TrailingZeros32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
func TrailingZeros32(x uint32) int {
if x == 0 {
return 32
}
// see comment in TrailingZeros64
return int(deBruijn32tab[(x&-x)*deBruijn32>>(32-5)])
}
// TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
func TrailingZeros64(x uint64) int {
if x == 0 {
return 64
}
// If popcount is fast, replace code below with return popcount(^x & (x - 1)).
//
// x & -x leaves only the right-most bit set in the word. Let k be the
// index of that bit. Since only a single bit is set, the value is two
// to the power of k. Multiplying by a power of two is equivalent to
// left shifting, in this case by k bits. The de Bruijn (64 bit) constant
// is such that all six bit, consecutive substrings are distinct.
// Therefore, if we have a left shifted version of this constant we can
// find by how many bits it was shifted by looking at which six bit
// substring ended up at the top of the word.
// (Knuth, volume 4, section 7.3.1)
return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)])
}
// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
func TrailingZeros8(x uint8) int {
return int(ntz8tab[x])
}
const len8tab = "" +
"\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" +
"\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" +
"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
"\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" +
"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
"\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" +
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" +
"\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
// Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
//
// nosplit because this is used in src/runtime/histogram.go, which make run in sensitive contexts.
//
//go:nosplit
func Len64(x uint64) (n int) {
if x >= 1<<32 {
x >>= 32
n = 32
}
if x >= 1<<16 {
x >>= 16
n += 16
}
if x >= 1<<8 {
x >>= 8
n += 8
}
return n + int(len8tab[x])
}
// --- OnesCount ---
const m0 = 0x5555555555555555 // 01010101 ...
const m1 = 0x3333333333333333 // 00110011 ...
const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ...
// OnesCount64 returns the number of one bits ("population count") in x.
func OnesCount64(x uint64) int {
// Implementation: Parallel summing of adjacent bits.
// See "Hacker's Delight", Chap. 5: Counting Bits.
// The following pattern shows the general approach:
//
// x = x>>1&(m0&m) + x&(m0&m)
// x = x>>2&(m1&m) + x&(m1&m)
// x = x>>4&(m2&m) + x&(m2&m)
// x = x>>8&(m3&m) + x&(m3&m)
// x = x>>16&(m4&m) + x&(m4&m)
// x = x>>32&(m5&m) + x&(m5&m)
// return int(x)
//
// Masking (& operations) can be left away when there's no
// danger that a field's sum will carry over into the next
// field: Since the result cannot be > 64, 8 bits is enough
// and we can ignore the masks for the shifts by 8 and up.
// Per "Hacker's Delight", the first line can be simplified
// more, but it saves at best one instruction, so we leave
// it alone for clarity.
const m = 1<<64 - 1
x = x>>1&(m0&m) + x&(m0&m)
x = x>>2&(m1&m) + x&(m1&m)
x = (x>>4 + x) & (m2 & m)
x += x >> 8
x += x >> 16
x += x >> 32
return int(x) & (1<<7 - 1)
}
// LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0.
func LeadingZeros64(x uint64) int { return 64 - Len64(x) }
// LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0.
func LeadingZeros8(x uint8) int { return 8 - Len8(x) }
// Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0.
func Len8(x uint8) int {
return int(len8tab[x])
}
// Bswap64 returns its input with byte order reversed
// 0x0102030405060708 -> 0x0807060504030201
func Bswap64(x uint64) uint64 {
c8 := uint64(0x00ff00ff00ff00ff)
a := x >> 8 & c8
b := (x & c8) << 8
x = a | b
c16 := uint64(0x0000ffff0000ffff)
a = x >> 16 & c16
b = (x & c16) << 16
x = a | b
c32 := uint64(0x00000000ffffffff)
a = x >> 32 & c32
b = (x & c32) << 32
x = a | b
return x
}
// Bswap32 returns its input with byte order reversed
// 0x01020304 -> 0x04030201
func Bswap32(x uint32) uint32 {
c8 := uint32(0x00ff00ff)
a := x >> 8 & c8
b := (x & c8) << 8
x = a | b
c16 := uint32(0x0000ffff)
a = x >> 16 & c16
b = (x & c16) << 16
x = a | b
return x
}
// Prefetch prefetches data from memory addr to cache
//
// AMD64: Produce PREFETCHT0 instruction
//
// ARM64: Produce PRFM instruction with PLDL1KEEP option
func Prefetch(addr uintptr) {}
// PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed.
// That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache.
//
// AMD64: Produce PREFETCHNTA instruction
//
// ARM64: Produce PRFM instruction with PLDL1STRM option
func PrefetchStreamed(addr uintptr) {}
// GetCallerPC returns the program counter (PC) of its caller's caller.
// GetCallerSP returns the stack pointer (SP) of its caller's caller.
// Both are implemented as intrinsics on every platform.
//
// For example:
//
// func f(arg1, arg2, arg3 int) {
// pc := GetCallerPC()
// sp := GetCallerSP()
// }
//
// These two lines find the PC and SP immediately following
// the call to f (where f will return).
//
// The call to GetCallerPC and GetCallerSP must be done in the
// frame being asked about.
//
// The result of GetCallerSP is correct at the time of the return,
// but it may be invalidated by any subsequent call to a function
// that might relocate the stack in order to grow or shrink it.
// A general rule is that the result of GetCallerSP should be used
// immediately and can only be passed to nosplit functions.
func GetCallerPC() uintptr
func GetCallerSP() uintptr
// GetClosurePtr returns the pointer to the current closure.
// GetClosurePtr can only be used in an assignment statement
// at the entry of a function. Moreover, go:nosplit directive
// must be specified at the declaration of caller function,
// so that the function prolog does not clobber the closure register.
// for example:
//
// //go:nosplit
// func f(arg1, arg2, arg3 int) {
// dx := GetClosurePtr()
// }
//
// The compiler rewrites calls to this function into instructions that fetch the
// pointer from a well-known register (DX on x86 architecture, etc.) directly.
//
// WARNING: PGO-based devirtualization cannot detect that caller of
// GetClosurePtr requires closure context, and thus must maintain a list of
// these functions, which is in
// cmd/compile/internal/devirtualize/pgo.maybeDevirtualizeFunctionCall.
func GetClosurePtr() uintptr