-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[RISCV] Support ZVqdot Codegen and C intrinsics #154915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
spec: https://github.com/riscv/riscv-dot-product/tree/main Node: we pack 4 int8/uint8 element in rs1 to a uint32.
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-risc-v Author: Brandon Wu (4vtomat) Changesspec: https://github.com/riscv/riscv-dot-product/tree/main Node: we pack 4 int8/uint8 element in rs1 to a uint32. Patch is 399.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154915.diff 35 Files Affected:
diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index c1de2bfe4243d..52ab5252e219d 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -2547,3 +2547,24 @@ let UnMaskedPolicyScheme = HasPolicyOperand, HasMasked = false in {
defm vsm3me : RVVOutOp1BuiltinSet<"vsm3me", "i", [["vv", "Uv", "UvUvUv"]]>;
}
}
+
+multiclass RVVVQDOTQBuiltinSet<list<list<string>> suffixes_prototypes> {
+ let UnMaskedPolicyScheme = HasPolicyOperand,
+ HasMaskedOffOperand = false,
+ OverloadedName = NAME,
+ Log2LMUL = [-1, 0, 1, 2, 3] in {
+ defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "i", suffixes_prototypes>;
+ }
+}
+
+// Only SEW=32 is defined for zvqdotq so far, and since inputs are in fact four
+// 8-bit integer bundles, we use unsigned type to represent all of them
+let RequiredFeatures = ["zvqdotq"] in {
+ defm vqdot : RVVVQDOTQBuiltinSet<[["vv", "v", "vv(FixedSEW:8)v(FixedSEW:8)v"],
+ ["vx", "v", "vv(FixedSEW:8)vUe"]]>;
+ defm vqdotu : RVVVQDOTQBuiltinSet<[["vv", "Uv", "UvUv(FixedSEW:8)Uv(FixedSEW:8)Uv"],
+ ["vx", "Uv", "UvUv(FixedSEW:8)UvUe"]]>;
+ defm vqdotsu : RVVVQDOTQBuiltinSet<[["vv", "v", "vv(FixedSEW:8)v(FixedSEW:8)Uv"],
+ ["vx", "v", "vv(FixedSEW:8)vUe"]]>;
+ defm vqdotus : RVVVQDOTQBuiltinSet<[["vx", "v", "vv(FixedSEW:8)UvUe"]]>;
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vv.c
new file mode 100644
index 0000000000000..b51c62823e60c
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vv.c
@@ -0,0 +1,120 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +experimental-zvqdotq -disable-O0-optnone \
+// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN: FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x i32> @test_vqdot_vv_i32mf2(
+// CHECK-RV64-SAME: <vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vqdot.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD]], <vscale x 4 x i8> [[VS2]], <vscale x 4 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_vqdot_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs2,
+ vint8mf2_t vs1, size_t vl) {
+ return __riscv_vqdot_vv_i32mf2(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_vqdot_vv_i32m1(
+// CHECK-RV64-SAME: <vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vqdot.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS2]], <vscale x 8 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_vqdot_vv_i32m1(vint32m1_t vd, vint8m1_t vs2,
+ vint8m1_t vs1, size_t vl) {
+ return __riscv_vqdot_vv_i32m1(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_vqdot_vv_i32m2(
+// CHECK-RV64-SAME: <vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vqdot.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD]], <vscale x 16 x i8> [[VS2]], <vscale x 16 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_vqdot_vv_i32m2(vint32m2_t vd, vint8m2_t vs2,
+ vint8m2_t vs1, size_t vl) {
+ return __riscv_vqdot_vv_i32m2(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_vqdot_vv_i32m4(
+// CHECK-RV64-SAME: <vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vqdot.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD]], <vscale x 32 x i8> [[VS2]], <vscale x 32 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_vqdot_vv_i32m4(vint32m4_t vd, vint8m4_t vs2,
+ vint8m4_t vs1, size_t vl) {
+ return __riscv_vqdot_vv_i32m4(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_vqdot_vv_i32m8(
+// CHECK-RV64-SAME: <vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vqdot.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD]], <vscale x 64 x i8> [[VS2]], <vscale x 64 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_vqdot_vv_i32m8(vint32m8_t vd, vint8m8_t vs2,
+ vint8m8_t vs1, size_t vl) {
+ return __riscv_vqdot_vv_i32m8(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x i32> @test_vqdot_vv_i32mf2_m(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vqdot.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD]], <vscale x 4 x i8> [[VS2]], <vscale x 4 x i8> [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_vqdot_vv_i32mf2_m(vbool64_t vm, vint32mf2_t vd,
+ vint8mf2_t vs2, vint8mf2_t vs1,
+ size_t vl) {
+ return __riscv_vqdot_vv_i32mf2_m(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_vqdot_vv_i32m1_m(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vqdot.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS2]], <vscale x 8 x i8> [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_vqdot_vv_i32m1_m(vbool32_t vm, vint32m1_t vd,
+ vint8m1_t vs2, vint8m1_t vs1,
+ size_t vl) {
+ return __riscv_vqdot_vv_i32m1_m(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_vqdot_vv_i32m2_m(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vqdot.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD]], <vscale x 16 x i8> [[VS2]], <vscale x 16 x i8> [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_vqdot_vv_i32m2_m(vbool16_t vm, vint32m2_t vd,
+ vint8m2_t vs2, vint8m2_t vs1,
+ size_t vl) {
+ return __riscv_vqdot_vv_i32m2_m(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_vqdot_vv_i32m4_m(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vqdot.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD]], <vscale x 32 x i8> [[VS2]], <vscale x 32 x i8> [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_vqdot_vv_i32m4_m(vbool8_t vm, vint32m4_t vd, vint8m4_t vs2,
+ vint8m4_t vs1, size_t vl) {
+ return __riscv_vqdot_vv_i32m4_m(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_vqdot_vv_i32m8_m(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vqdot.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD]], <vscale x 64 x i8> [[VS2]], <vscale x 64 x i8> [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_vqdot_vv_i32m8_m(vbool4_t vm, vint32m8_t vd, vint8m8_t vs2,
+ vint8m8_t vs1, size_t vl) {
+ return __riscv_vqdot_vv_i32m8_m(vm, vd, vs2, vs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vx.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vx.c
new file mode 100644
index 0000000000000..a3f5d9282cf47
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vx.c
@@ -0,0 +1,118 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +experimental-zvqdotq -disable-O0-optnone \
+// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN: FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x i32> @test_vqdot_vx_i32mf2(
+// CHECK-RV64-SAME: <vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vqdot.nxv1i32.nxv4i8.i32.i64(<vscale x 1 x i32> [[VD]], <vscale x 4 x i8> [[VS2]], i32 [[RS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_vqdot_vx_i32mf2(vint32mf2_t vd, vint8mf2_t vs2,
+ uint32_t rs1, size_t vl) {
+ return __riscv_vqdot_vx_i32mf2(vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_vqdot_vx_i32m1(
+// CHECK-RV64-SAME: <vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vqdot.nxv2i32.nxv8i8.i32.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS2]], i32 [[RS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_vqdot_vx_i32m1(vint32m1_t vd, vint8m1_t vs2, uint32_t rs1,
+ size_t vl) {
+ return __riscv_vqdot_vx_i32m1(vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_vqdot_vx_i32m2(
+// CHECK-RV64-SAME: <vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vqdot.nxv4i32.nxv16i8.i32.i64(<vscale x 4 x i32> [[VD]], <vscale x 16 x i8> [[VS2]], i32 [[RS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_vqdot_vx_i32m2(vint32m2_t vd, vint8m2_t vs2, uint32_t rs1,
+ size_t vl) {
+ return __riscv_vqdot_vx_i32m2(vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_vqdot_vx_i32m4(
+// CHECK-RV64-SAME: <vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vqdot.nxv8i32.nxv32i8.i32.i64(<vscale x 8 x i32> [[VD]], <vscale x 32 x i8> [[VS2]], i32 [[RS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_vqdot_vx_i32m4(vint32m4_t vd, vint8m4_t vs2, uint32_t rs1,
+ size_t vl) {
+ return __riscv_vqdot_vx_i32m4(vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_vqdot_vx_i32m8(
+// CHECK-RV64-SAME: <vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vqdot.nxv16i32.nxv64i8.i32.i64(<vscale x 16 x i32> [[VD]], <vscale x 64 x i8> [[VS2]], i32 [[RS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_vqdot_vx_i32m8(vint32m8_t vd, vint8m8_t vs2, uint32_t rs1,
+ size_t vl) {
+ return __riscv_vqdot_vx_i32m8(vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x i32> @test_vqdot_vx_i32mf2_m(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vqdot.mask.nxv1i32.nxv4i8.i32.i64(<vscale x 1 x i32> [[VD]], <vscale x 4 x i8> [[VS2]], i32 [[RS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_vqdot_vx_i32mf2_m(vbool64_t vm, vint32mf2_t vd,
+ vint8mf2_t vs2, uint32_t rs1,
+ size_t vl) {
+ return __riscv_vqdot_vx_i32mf2_m(vm, vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_vqdot_vx_i32m1_m(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vqdot.mask.nxv2i32.nxv8i8.i32.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS2]], i32 [[RS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_vqdot_vx_i32m1_m(vbool32_t vm, vint32m1_t vd,
+ vint8m1_t vs2, uint32_t rs1, size_t vl) {
+ return __riscv_vqdot_vx_i32m1_m(vm, vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_vqdot_vx_i32m2_m(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vqdot.mask.nxv4i32.nxv16i8.i32.i64(<vscale x 4 x i32> [[VD]], <vscale x 16 x i8> [[VS2]], i32 [[RS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_vqdot_vx_i32m2_m(vbool16_t vm, vint32m2_t vd,
+ vint8m2_t vs2, uint32_t rs1, size_t vl) {
+ return __riscv_vqdot_vx_i32m2_m(vm, vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_vqdot_vx_i32m4_m(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vqdot.mask.nxv8i32.nxv32i8.i32.i64(<vscale x 8 x i32> [[VD]], <vscale x 32 x i8> [[VS2]], i32 [[RS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_vqdot_vx_i32m4_m(vbool8_t vm, vint32m4_t vd, vint8m4_t vs2,
+ uint32_t rs1, size_t vl) {
+ return __riscv_vqdot_vx_i32m4_m(vm, vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_vqdot_vx_i32m8_m(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vqdot.mask.nxv16i32.nxv64i8.i32.i64(<vscale x 16 x i32> [[VD]], <vscale x 64 x i8> [[VS2]], i32 [[RS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_vqdot_vx_i32m8_m(vbool4_t vm, vint32m8_t vd, vint8m8_t vs2,
+ uint32_t rs1, size_t vl) {
+ return __riscv_vqdot_vx_i32m8_m(vm, vd, vs2, rs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotsu_vv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotsu_vv.c
new file mode 100644
index 0000000000000..4246981fbbf05
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotsu_vv.c
@@ -0,0 +1,122 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +experimental-zvqdotq -disable-O0-optnone \
+// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN: FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x i32> @test_vqdotsu_vv_i32mf2(
+// CHECK-RV64-SAME: <vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vqdotsu.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD]], <vscale x 4 x i8> [[VS2]], <vscale x 4 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_vqdotsu_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs2,
+ vuint8mf2_t vs1, size_t vl) {
+ return __riscv_vqdotsu_vv_i32mf2(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_vqdotsu_vv_i32m1(
+// CHECK-RV64-SAME: <vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT: entry:
+// CHECK-RV64-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vqdotsu.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS2]], <vscale x 8 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT: ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_vqdotsu_vv_i32m1(vint32m1_t vd, vint8m1_t vs2,
+ vuint8m1_t vs1, size_t vl) {
+ return __riscv_vqdotsu_vv_i32m1(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_loca...
[truncated]
|
// CHECK-RV64-NEXT: ret <vscale x 1 x i32> [[TMP0]] | ||
// | ||
vint32mf2_t test_vqdot_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs2, | ||
vint8mf2_t vs1, size_t vl) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indented too far
} | ||
|
||
defvar AllE32Vectors = [VI32MF2, VI32M1, VI32M2, VI32M4, VI32M8]; | ||
defm : VPatBinaryVL_VV_VX<riscv_vqdot_vl, "PseudoVQDOT", AllE32Vectors>; | ||
defm : VPatBinaryVL_VV_VX<riscv_vqdotu_vl, "PseudoVQDOTU", AllE32Vectors>; | ||
defm : VPatBinaryVL_VV_VX<riscv_vqdotsu_vl, "PseudoVQDOTSU", AllE32Vectors>; | ||
|
||
// These VPat definitions are for vqdot because they have a different operand | ||
// order with other ternary instructions (i.e. vop.vx vd, vs2, rs1) | ||
multiclass VPatTernaryV_VX_AAXASwapped<string intrinsic, string instruction, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Swapping at this layer seems wrong. Is the assembly instruction defined incorrectly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh. What you really mean is that the last source is the scalar operand?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah that's what I mean, normal ternary instruction has the order "vd, vs1, vs2"
746a82d
to
057d292
Compare
@@ -2547,3 +2547,24 @@ let UnMaskedPolicyScheme = HasPolicyOperand, HasMasked = false in { | |||
defm vsm3me : RVVOutOp1BuiltinSet<"vsm3me", "i", [["vv", "Uv", "UvUvUv"]]>; | |||
} | |||
} | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Zvqdotq |
@@ -0,0 +1,468 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |||
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvqdotq \ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is either zve32x or zve64x sufficient
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we rename it to vqdotsu.ll
to match its assembly name?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we put the intrinsic tests for Zvqdot to another separate subdirectory like Zvfbfmin?
} | ||
|
||
defvar AllE32Vectors = [VI32MF2, VI32M1, VI32M2, VI32M4, VI32M8]; | ||
defm : VPatBinaryVL_VV_VX<riscv_vqdot_vl, "PseudoVQDOT", AllE32Vectors>; | ||
defm : VPatBinaryVL_VV_VX<riscv_vqdotu_vl, "PseudoVQDOTU", AllE32Vectors>; | ||
defm : VPatBinaryVL_VV_VX<riscv_vqdotsu_vl, "PseudoVQDOTSU", AllE32Vectors>; | ||
|
||
// These VPat definitions are for vqdot because they have a different operand | ||
// order with other ternary instructions (i.e. vop.vx vd, vs2, rs1) | ||
multiclass VPatTernaryV_VX_AAXASwapped<string intrinsic, string instruction, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't the name be AABX instead of AAXASwapped? The source and destination types are different and you don't need the word "Swapped" if you swap the letters that represent types.
spec: https://github.com/riscv/riscv-dot-product/tree/main
Node: we pack 4 int8/uint8 element in rs1 to a uint32.