-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathshift.go
135 lines (128 loc) · 4.2 KB
/
shift.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asmgen
// shiftVU generates lshVU and rshVU, which do
// z, c = x << s and z, c = x >> s, for 0 < s < _W.
func shiftVU(a *Asm, name string) {
// Because these routines can be called for z.Lsh(z, N) and z.Rsh(z, N),
// the input and output slices may be aliased at different offsets.
// For example (on 64-bit systems), during z.Lsh(z, 65), &z[0] == &x[1],
// and during z.Rsh(z, 65), &z[1] == &x[0].
// For left shift, we must process the slices from len(z)-1 down to 0,
// so that we don't overwrite a word before we need to read it.
// For right shift, we must process the slices from 0 up to len(z)-1.
// The different traversals at least make the two cases more consistent,
// since we're always delaying the output by one word compared
// to the input.
f := a.Func("func " + name + "(z, x []Word, s uint) (c Word)")
// Check for no input early, since we need to start by reading 1 word.
n := f.Arg("z_len")
a.JmpZero(n, "ret0")
// Start loop by reading first input word.
s := f.ArgHint("s", HintShiftCount)
p := f.Pipe()
if name == "lshVU" {
p.SetBackward()
}
unroll := []int{1, 4}
if a.Arch == Arch386 {
unroll = []int{1} // too few registers for more
p.SetUseIndexCounter()
}
p.LoadPtrs(n)
a.Comment("shift first word into carry")
prev := p.LoadN(1)[0][0]
// Decide how to shift. On systems with a wide shift (x86), use that.
// Otherwise, we need shift by s and negative (reverse) shift by 64-s or 32-s.
shift := a.Lsh
shiftWide := a.LshWide
negShift := a.Rsh
negShiftReg := a.RshReg
if name == "rshVU" {
shift = a.Rsh
shiftWide = a.RshWide
negShift = a.Lsh
negShiftReg = a.LshReg
}
if a.Arch.HasShiftWide() {
// Use wide shift to avoid needing negative shifts.
// The invariant is that prev holds the previous word (not shifted at all),
// to be used as input into the wide shift.
// After the loop finishes, prev holds the final output word to be written.
c := a.Reg()
shiftWide(s, prev, a.Imm(0), c)
f.StoreArg(c, "c")
a.Free(c)
a.Comment("shift remaining words")
p.Start(n, unroll...)
p.Loop(func(in [][]Reg, out [][]Reg) {
// We reuse the input registers as output, delayed one cycle; prev is the first output.
// After writing the outputs to memory, we can copy the final x value into prev
// for the next iteration.
old := prev
for i, x := range in[0] {
shiftWide(s, x, old, old)
out[0][i] = old
old = x
}
p.StoreN(out)
a.Mov(old, prev)
})
a.Comment("store final shifted bits")
shift(s, prev, prev)
} else {
// Construct values from x << s and x >> (64-s).
// After the first word has been processed, the invariant is that
// prev holds x << s, to be used as the high bits of the next output word,
// once we find the low bits after reading the next input word.
// After the loop finishes, prev holds the final output word to be written.
sNeg := a.Reg()
a.Mov(a.Imm(a.Arch.WordBits), sNeg)
a.Sub(s, sNeg, sNeg, SmashCarry)
c := a.Reg()
negShift(sNeg, prev, c)
shift(s, prev, prev)
f.StoreArg(c, "c")
a.Free(c)
a.Comment("shift remaining words")
p.Start(n, unroll...)
p.Loop(func(in, out [][]Reg) {
if a.HasRegShift() {
// ARM (32-bit) allows shifts in most arithmetic expressions,
// including OR, letting us combine the negShift and a.Or.
// The simplest way to manage the registers is to do StoreN for
// one output at a time, and since we don't use multi-register
// stores on ARM, that doesn't hurt us.
out[0] = out[0][:1]
for _, x := range in[0] {
a.Or(negShiftReg(sNeg, x), prev, prev)
out[0][0] = prev
p.StoreN(out)
shift(s, x, prev)
}
return
}
// We reuse the input registers as output, delayed one cycle; z0 is the first output.
z0 := a.Reg()
z := z0
for i, x := range in[0] {
negShift(sNeg, x, z)
a.Or(prev, z, z)
shift(s, x, prev)
out[0][i] = z
z = x
}
p.StoreN(out)
})
a.Comment("store final shifted bits")
}
p.StoreN([][]Reg{{prev}})
p.Done()
a.Free(s)
a.Ret()
// Return 0, used from above.
a.Label("ret0")
f.StoreArg(a.Imm(0), "c")
a.Ret()
}