Skip to content

Commit a3f126b

Browse files
authored
Merge pull request #1972 from riscv-software-src/fix-vlen-32
Fix mask element accesses under VLEN=32
2 parents d85cd10 + 7988172 commit a3f126b

File tree

9 files changed

+49
-89
lines changed

9 files changed

+49
-89
lines changed

riscv/insns/vcompress_vm.h

+1-5
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,7 @@ require_noover(insn.rd(), P.VU.vflmul, insn.rs1(), 1);
99
reg_t pos = 0;
1010

1111
VI_GENERAL_LOOP_BASE
12-
const int midx = i / 64;
13-
const int mpos = i % 64;
14-
15-
bool do_mask = (P.VU.elt<uint64_t>(rs1_num, midx) >> mpos) & 0x1;
16-
if (do_mask) {
12+
if (P.VU.mask_elt(rs1_num, i)) {
1713
switch (sew) {
1814
case e8:
1915
P.VU.elt<uint8_t>(rd_num, pos, true) = P.VU.elt<uint8_t>(rs2_num, i);

riscv/insns/vcpop_m.h

+2-10
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,7 @@ reg_t rs2_num = insn.rs2();
66
require(P.VU.vstart->read() == 0);
77
reg_t popcount = 0;
88
for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
9-
const int midx = i / 32;
10-
const int mpos = i % 32;
11-
12-
bool vs2_lsb = ((P.VU.elt<uint32_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
13-
if (insn.v_vm() == 1) {
14-
popcount += vs2_lsb;
15-
} else {
16-
bool do_mask = (P.VU.elt<uint32_t>(0, midx) >> mpos) & 0x1;
17-
popcount += (vs2_lsb && do_mask);
18-
}
9+
bool vs2_bit = P.VU.mask_elt(rs2_num, i);
10+
popcount += vs2_bit && (insn.v_vm() || P.VU.mask_elt(0, i));
1911
}
2012
WRITE_RD(popcount);

riscv/insns/vfirst_m.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ reg_t pos = -1;
88
for (reg_t i=P.VU.vstart->read(); i < vl; ++i) {
99
VI_LOOP_ELEMENT_SKIP()
1010

11-
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
12-
if (vs2_lsb) {
11+
if (P.VU.mask_elt(rs2_num, i)) {
1312
pos = i;
1413
break;
1514
}

riscv/insns/viota_m.h

+2-6
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,11 @@ require_noover(rd_num, P.VU.vflmul, rs2_num, 1);
1212

1313
int cnt = 0;
1414
for (reg_t i = 0; i < vl; ++i) {
15-
const int midx = i / 64;
16-
const int mpos = i % 64;
17-
18-
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1;
19-
bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
15+
bool do_mask = P.VU.mask_elt(0, i);
2016

2117
bool has_one = false;
2218
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
23-
if (vs2_lsb) {
19+
if (P.VU.mask_elt(rs2_num, i)) {
2420
has_one = true;
2521
}
2622
}

riscv/insns/vmsbf_m.h

+6-11
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,17 @@ reg_t rs2_num = insn.rs2();
1111

1212
bool has_one = false;
1313
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
14-
const int midx = i / 64;
15-
const int mpos = i % 64;
16-
const uint64_t mmask = UINT64_C(1) << mpos; \
17-
18-
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1;
19-
bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
20-
14+
bool vs2_lsb = P.VU.mask_elt(rs2_num, i);
15+
bool do_mask = P.VU.mask_elt(0, i);
2116

2217
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
23-
auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true);
24-
uint64_t res = 0;
18+
bool res = false;
2519
if (!has_one && !vs2_lsb) {
26-
res = 1;
20+
res = true;
2721
} else if (!has_one && vs2_lsb) {
2822
has_one = true;
2923
}
30-
vd = (vd & ~mmask) | ((res << mpos) & mmask);
24+
25+
P.VU.set_mask_elt(rd_num, i, res);
3126
}
3227
}

riscv/insns/vmsif_m.h

+7-11
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,18 @@ reg_t rs2_num = insn.rs2();
1111

1212
bool has_one = false;
1313
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
14-
const int midx = i / 64;
15-
const int mpos = i % 64;
16-
const uint64_t mmask = UINT64_C(1) << mpos; \
17-
18-
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
19-
bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
14+
bool vs2_lsb = P.VU.mask_elt(rs2_num, i);
15+
bool do_mask = P.VU.mask_elt(0, i);
2016

2117
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
22-
auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true);
23-
uint64_t res = 0;
18+
bool res = false;
2419
if (!has_one && !vs2_lsb) {
25-
res = 1;
20+
res = true;
2621
} else if (!has_one && vs2_lsb) {
2722
has_one = true;
28-
res = 1;
23+
res = true;
2924
}
30-
vd = (vd & ~mmask) | ((res << mpos) & mmask);
25+
26+
P.VU.set_mask_elt(rd_num, i, res);
3127
}
3228
}

riscv/insns/vmsof_m.h

+6-10
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,16 @@ reg_t rs2_num = insn.rs2();
1111

1212
bool has_one = false;
1313
for (reg_t i = P.VU.vstart->read() ; i < vl; ++i) {
14-
const int midx = i / 64;
15-
const int mpos = i % 64;
16-
const uint64_t mmask = UINT64_C(1) << mpos; \
17-
18-
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
19-
bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
14+
bool vs2_lsb = P.VU.mask_elt(rs2_num, i);
15+
bool do_mask = P.VU.mask_elt(0, i);
2016

2117
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
22-
uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true);
23-
uint64_t res = 0;
18+
bool res = false;
2419
if (!has_one && vs2_lsb) {
2520
has_one = true;
26-
res = 1;
21+
res = true;
2722
}
28-
vd = (vd & ~mmask) | ((res << mpos) & mmask);
23+
24+
P.VU.set_mask_elt(rd_num, i, res);
2925
}
3026
}

riscv/v_ext_macros.h

+13-34
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,10 @@
88
//
99
// vector: masking skip helper
1010
//
11-
#define VI_MASK_VARS \
12-
const int midx = i / 64; \
13-
const int mpos = i % 64;
14-
1511
#define VI_LOOP_ELEMENT_SKIP(BODY) \
16-
VI_MASK_VARS \
1712
if (insn.v_vm() == 0) { \
1813
BODY; \
19-
bool skip = ((P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1) == 0; \
20-
if (skip) { \
14+
if (!P.VU.mask_elt(0, i)) { \
2115
continue; \
2216
} \
2317
}
@@ -231,24 +225,18 @@ static inline bool is_overlapped_widen(const int astart, int asize,
231225

232226
#define VI_LOOP_CARRY_BASE \
233227
VI_GENERAL_LOOP_BASE \
234-
VI_MASK_VARS \
235-
auto v0 = P.VU.elt<uint64_t>(0, midx); \
236-
const uint64_t mmask = UINT64_C(1) << mpos; \
237228
const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); \
238-
uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; \
239-
uint128_t res = 0; \
240-
auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true);
229+
uint64_t carry = insn.v_vm() == 0 ? P.VU.mask_elt(0, i) : 0; \
230+
bool res = false;
241231

242232
#define VI_LOOP_CARRY_END \
243-
vd = (vd & ~mmask) | (((res) << mpos) & mmask); \
233+
P.VU.set_mask_elt(insn.rd(), i, res); \
244234
} \
245235
P.VU.vstart->write(0);
246236
#define VI_LOOP_WITH_CARRY_BASE \
247237
VI_GENERAL_LOOP_BASE \
248-
VI_MASK_VARS \
249-
auto &v0 = P.VU.elt<uint64_t>(0, midx); \
250238
const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); \
251-
uint64_t carry = (v0 >> mpos) & 0x1;
239+
uint64_t carry = P.VU.mask_elt(0, i);
252240

253241
#define VI_LOOP_CMP_BASE \
254242
require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \
@@ -260,12 +248,10 @@ static inline bool is_overlapped_widen(const int astart, int asize,
260248
reg_t rs2_num = insn.rs2(); \
261249
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
262250
VI_LOOP_ELEMENT_SKIP(); \
263-
uint64_t mmask = UINT64_C(1) << mpos; \
264-
uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx, true); \
265-
uint64_t res = 0;
251+
bool res = false;
266252

267253
#define VI_LOOP_CMP_END \
268-
vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
254+
P.VU.set_mask_elt(insn.rd(), i, res); \
269255
} \
270256
P.VU.vstart->write(0);
271257

@@ -274,13 +260,9 @@ static inline bool is_overlapped_widen(const int astart, int asize,
274260
require_vector(true); \
275261
reg_t vl = P.VU.vl->read(); \
276262
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
277-
int midx = i / 64; \
278-
int mpos = i % 64; \
279-
uint64_t mmask = UINT64_C(1) << mpos; \
280-
uint64_t vs2 = P.VU.elt<uint64_t>(insn.rs2(), midx); \
281-
uint64_t vs1 = P.VU.elt<uint64_t>(insn.rs1(), midx); \
282-
uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx, true); \
283-
res = (res & ~mmask) | ((op) & (1ULL << mpos)); \
263+
bool vs2 = P.VU.mask_elt(insn.rs2(), i); \
264+
bool vs1 = P.VU.mask_elt(insn.rs1(), i); \
265+
P.VU.set_mask_elt(insn.rd(), i, (op)); \
284266
} \
285267
P.VU.vstart->write(0);
286268

@@ -523,8 +505,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
523505

524506
// merge and copy loop
525507
#define VI_MERGE_VARS \
526-
VI_MASK_VARS \
527-
bool UNUSED use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
508+
bool UNUSED use_first = P.VU.mask_elt(0, i);
528509

529510
#define VI_MERGE_LOOP_BASE \
530511
VI_GENERAL_LOOP_BASE \
@@ -1482,9 +1463,7 @@ VI_VX_ULOOP({ \
14821463
VI_VFP_COMMON \
14831464
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
14841465
VI_LOOP_ELEMENT_SKIP(); \
1485-
uint64_t mmask = UINT64_C(1) << mpos; \
1486-
uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true); \
1487-
uint64_t res = 0;
1466+
bool res = false;
14881467

14891468
#define VI_VFP_LOOP_REDUCTION_BASE(width) \
14901469
float##width##_t vd_0 = P.VU.elt<float##width##_t>(rd_num, 0); \
@@ -1562,7 +1541,7 @@ VI_VX_ULOOP({ \
15621541
case e16: \
15631542
case e32: \
15641543
case e64: { \
1565-
vd = (vd & ~mmask) | (((res) << mpos) & mmask); \
1544+
P.VU.set_mask_elt(insn.rd(), i, res); \
15661545
break; \
15671546
} \
15681547
default: \

riscv/vector_unit.h

+11
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,17 @@ class vectorUnit_t
108108
template<typename EG> EG&
109109
elt_group(reg_t vReg, reg_t n, bool is_write = false);
110110

111+
bool mask_elt(reg_t vReg, reg_t n)
112+
{
113+
return (elt<uint8_t>(vReg, n / 8) >> (n % 8)) & 1;
114+
}
115+
116+
void set_mask_elt(reg_t vReg, reg_t n, bool value)
117+
{
118+
auto& e = elt<uint8_t>(vReg, n / 8, true);
119+
e = (e & ~(1U << (n % 8))) | (value << (n % 8));
120+
}
121+
111122
public:
112123

113124
void reset();

0 commit comments

Comments
 (0)