Skip to content

Conversation

4vtomat
Copy link
Member

@4vtomat 4vtomat commented Aug 22, 2025

spec: https://github.com/riscv/riscv-dot-product/tree/main

Node: we pack 4 int8/uint8 element in rs1 to a uint32.

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:RISC-V clang:frontend Language frontend issues, e.g. anything involving "Sema" llvm:ir labels Aug 22, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 22, 2025

@llvm/pr-subscribers-clang
@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-risc-v

Author: Brandon Wu (4vtomat)

Changes

spec: https://github.com/riscv/riscv-dot-product/tree/main

Node: we pack 4 int8/uint8 element in rs1 to a uint32.


Patch is 399.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154915.diff

35 Files Affected:

  • (modified) clang/include/clang/Basic/riscv_vector.td (+21)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vv.c (+120)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vx.c (+118)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotsu_vv.c (+122)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotsu_vx.c (+122)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotu_vv.c (+122)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotu_vx.c (+122)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotus_vx.c (+122)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vqdot_vv.c (+120)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vqdot_vx.c (+118)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vqdotsu_vv.c (+122)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vqdotsu_vx.c (+122)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vqdotu_vv.c (+122)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vqdotu_vx.c (+122)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/vqdotus_vx.c (+122)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vqdot_vv.c (+207)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vqdot_vx.c (+207)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vqdotsu_vv.c (+207)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vqdotsu_vx.c (+207)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vqdotu_vv.c (+207)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vqdotu_vx.c (+207)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/vqdotus_vx.c (+207)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vqdot_vv.c (+242)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vqdot_vx.c (+238)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vqdotsu_vv.c (+242)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vqdotsu_vx.c (+242)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vqdotu_vv.c (+242)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vqdotu_vx.c (+242)
  • (added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/vqdotus_vx.c (+242)
  • (modified) llvm/include/llvm/IR/IntrinsicsRISCV.td (+45)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td (+62)
  • (added) llvm/test/CodeGen/RISCV/rvv/vqdot.ll (+468)
  • (added) llvm/test/CodeGen/RISCV/rvv/vqdot_su.ll (+468)
  • (added) llvm/test/CodeGen/RISCV/rvv/vqdot_u.ll (+468)
  • (added) llvm/test/CodeGen/RISCV/rvv/vqdot_us.ll (+236)
diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index c1de2bfe4243d..52ab5252e219d 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -2547,3 +2547,24 @@ let UnMaskedPolicyScheme = HasPolicyOperand, HasMasked = false in {
     defm vsm3me  : RVVOutOp1BuiltinSet<"vsm3me", "i", [["vv", "Uv", "UvUvUv"]]>;
   }
 }
+
+multiclass RVVVQDOTQBuiltinSet<list<list<string>> suffixes_prototypes> {
+  let UnMaskedPolicyScheme = HasPolicyOperand,
+      HasMaskedOffOperand = false,
+      OverloadedName = NAME,
+      Log2LMUL = [-1, 0, 1, 2, 3] in {
+    defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "i", suffixes_prototypes>;
+  }
+}
+
+// Only SEW=32 is defined for zvqdotq so far, and since inputs are in fact four
+// 8-bit integer bundles, we use unsigned type to represent all of them
+let RequiredFeatures = ["zvqdotq"] in {
+  defm vqdot : RVVVQDOTQBuiltinSet<[["vv", "v", "vv(FixedSEW:8)v(FixedSEW:8)v"],
+                                    ["vx", "v", "vv(FixedSEW:8)vUe"]]>;
+  defm vqdotu : RVVVQDOTQBuiltinSet<[["vv", "Uv", "UvUv(FixedSEW:8)Uv(FixedSEW:8)Uv"],
+                                     ["vx", "Uv", "UvUv(FixedSEW:8)UvUe"]]>;
+  defm vqdotsu : RVVVQDOTQBuiltinSet<[["vv", "v", "vv(FixedSEW:8)v(FixedSEW:8)Uv"],
+                                      ["vx", "v", "vv(FixedSEW:8)vUe"]]>;
+  defm vqdotus : RVVVQDOTQBuiltinSet<[["vx", "v", "vv(FixedSEW:8)UvUe"]]>;
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vv.c
new file mode 100644
index 0000000000000..b51c62823e60c
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vv.c
@@ -0,0 +1,120 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +experimental-zvqdotq -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x i32> @test_vqdot_vv_i32mf2(
+// CHECK-RV64-SAME: <vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vqdot.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD]], <vscale x 4 x i8> [[VS2]], <vscale x 4 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_vqdot_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs2,
+                                    vint8mf2_t vs1, size_t vl) {
+  return __riscv_vqdot_vv_i32mf2(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_vqdot_vv_i32m1(
+// CHECK-RV64-SAME: <vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vqdot.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS2]], <vscale x 8 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_vqdot_vv_i32m1(vint32m1_t vd, vint8m1_t vs2,
+                                  vint8m1_t vs1, size_t vl) {
+  return __riscv_vqdot_vv_i32m1(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_vqdot_vv_i32m2(
+// CHECK-RV64-SAME: <vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vqdot.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD]], <vscale x 16 x i8> [[VS2]], <vscale x 16 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_vqdot_vv_i32m2(vint32m2_t vd, vint8m2_t vs2,
+                                  vint8m2_t vs1, size_t vl) {
+  return __riscv_vqdot_vv_i32m2(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_vqdot_vv_i32m4(
+// CHECK-RV64-SAME: <vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vqdot.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD]], <vscale x 32 x i8> [[VS2]], <vscale x 32 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_vqdot_vv_i32m4(vint32m4_t vd, vint8m4_t vs2,
+                                  vint8m4_t vs1, size_t vl) {
+  return __riscv_vqdot_vv_i32m4(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_vqdot_vv_i32m8(
+// CHECK-RV64-SAME: <vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vqdot.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD]], <vscale x 64 x i8> [[VS2]], <vscale x 64 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_vqdot_vv_i32m8(vint32m8_t vd, vint8m8_t vs2,
+                                  vint8m8_t vs1, size_t vl) {
+  return __riscv_vqdot_vv_i32m8(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x i32> @test_vqdot_vv_i32mf2_m(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vqdot.mask.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD]], <vscale x 4 x i8> [[VS2]], <vscale x 4 x i8> [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_vqdot_vv_i32mf2_m(vbool64_t vm, vint32mf2_t vd,
+                                      vint8mf2_t vs2, vint8mf2_t vs1,
+                                      size_t vl) {
+  return __riscv_vqdot_vv_i32mf2_m(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_vqdot_vv_i32m1_m(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vqdot.mask.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS2]], <vscale x 8 x i8> [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_vqdot_vv_i32m1_m(vbool32_t vm, vint32m1_t vd,
+                                    vint8m1_t vs2, vint8m1_t vs1,
+                                    size_t vl) {
+  return __riscv_vqdot_vv_i32m1_m(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_vqdot_vv_i32m2_m(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS2:%.*]], <vscale x 16 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vqdot.mask.nxv4i32.nxv16i8.nxv16i8.i64(<vscale x 4 x i32> [[VD]], <vscale x 16 x i8> [[VS2]], <vscale x 16 x i8> [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_vqdot_vv_i32m2_m(vbool16_t vm, vint32m2_t vd,
+                                    vint8m2_t vs2, vint8m2_t vs1,
+                                    size_t vl) {
+  return __riscv_vqdot_vv_i32m2_m(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_vqdot_vv_i32m4_m(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS2:%.*]], <vscale x 32 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vqdot.mask.nxv8i32.nxv32i8.nxv32i8.i64(<vscale x 8 x i32> [[VD]], <vscale x 32 x i8> [[VS2]], <vscale x 32 x i8> [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_vqdot_vv_i32m4_m(vbool8_t vm, vint32m4_t vd, vint8m4_t vs2,
+                                    vint8m4_t vs1, size_t vl) {
+  return __riscv_vqdot_vv_i32m4_m(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_vqdot_vv_i32m8_m(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS2:%.*]], <vscale x 64 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vqdot.mask.nxv16i32.nxv64i8.nxv64i8.i64(<vscale x 16 x i32> [[VD]], <vscale x 64 x i8> [[VS2]], <vscale x 64 x i8> [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_vqdot_vv_i32m8_m(vbool4_t vm, vint32m8_t vd, vint8m8_t vs2,
+                                    vint8m8_t vs1, size_t vl) {
+  return __riscv_vqdot_vv_i32m8_m(vm, vd, vs2, vs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vx.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vx.c
new file mode 100644
index 0000000000000..a3f5d9282cf47
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdot_vx.c
@@ -0,0 +1,118 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +experimental-zvqdotq -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x i32> @test_vqdot_vx_i32mf2(
+// CHECK-RV64-SAME: <vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vqdot.nxv1i32.nxv4i8.i32.i64(<vscale x 1 x i32> [[VD]], <vscale x 4 x i8> [[VS2]], i32 [[RS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_vqdot_vx_i32mf2(vint32mf2_t vd, vint8mf2_t vs2,
+                                    uint32_t rs1, size_t vl) {
+  return __riscv_vqdot_vx_i32mf2(vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_vqdot_vx_i32m1(
+// CHECK-RV64-SAME: <vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vqdot.nxv2i32.nxv8i8.i32.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS2]], i32 [[RS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_vqdot_vx_i32m1(vint32m1_t vd, vint8m1_t vs2, uint32_t rs1,
+                                  size_t vl) {
+  return __riscv_vqdot_vx_i32m1(vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_vqdot_vx_i32m2(
+// CHECK-RV64-SAME: <vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vqdot.nxv4i32.nxv16i8.i32.i64(<vscale x 4 x i32> [[VD]], <vscale x 16 x i8> [[VS2]], i32 [[RS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_vqdot_vx_i32m2(vint32m2_t vd, vint8m2_t vs2, uint32_t rs1,
+                                  size_t vl) {
+  return __riscv_vqdot_vx_i32m2(vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_vqdot_vx_i32m4(
+// CHECK-RV64-SAME: <vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vqdot.nxv8i32.nxv32i8.i32.i64(<vscale x 8 x i32> [[VD]], <vscale x 32 x i8> [[VS2]], i32 [[RS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_vqdot_vx_i32m4(vint32m4_t vd, vint8m4_t vs2, uint32_t rs1,
+                                  size_t vl) {
+  return __riscv_vqdot_vx_i32m4(vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_vqdot_vx_i32m8(
+// CHECK-RV64-SAME: <vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vqdot.nxv16i32.nxv64i8.i32.i64(<vscale x 16 x i32> [[VD]], <vscale x 64 x i8> [[VS2]], i32 [[RS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_vqdot_vx_i32m8(vint32m8_t vd, vint8m8_t vs2, uint32_t rs1,
+                                  size_t vl) {
+  return __riscv_vqdot_vx_i32m8(vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x i32> @test_vqdot_vx_i32mf2_m(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vqdot.mask.nxv1i32.nxv4i8.i32.i64(<vscale x 1 x i32> [[VD]], <vscale x 4 x i8> [[VS2]], i32 [[RS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_vqdot_vx_i32mf2_m(vbool64_t vm, vint32mf2_t vd,
+                                      vint8mf2_t vs2, uint32_t rs1,
+                                      size_t vl) {
+  return __riscv_vqdot_vx_i32mf2_m(vm, vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_vqdot_vx_i32m1_m(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vqdot.mask.nxv2i32.nxv8i8.i32.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS2]], i32 [[RS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_vqdot_vx_i32m1_m(vbool32_t vm, vint32m1_t vd,
+                                    vint8m1_t vs2, uint32_t rs1, size_t vl) {
+  return __riscv_vqdot_vx_i32m1_m(vm, vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x i32> @test_vqdot_vx_i32m2_m(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x i32> [[VD:%.*]], <vscale x 16 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vqdot.mask.nxv4i32.nxv16i8.i32.i64(<vscale x 4 x i32> [[VD]], <vscale x 16 x i8> [[VS2]], i32 [[RS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+vint32m2_t test_vqdot_vx_i32m2_m(vbool16_t vm, vint32m2_t vd,
+                                    vint8m2_t vs2, uint32_t rs1, size_t vl) {
+  return __riscv_vqdot_vx_i32m2_m(vm, vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x i32> @test_vqdot_vx_i32m4_m(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x i32> [[VD:%.*]], <vscale x 32 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vqdot.mask.nxv8i32.nxv32i8.i32.i64(<vscale x 8 x i32> [[VD]], <vscale x 32 x i8> [[VS2]], i32 [[RS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP0]]
+//
+vint32m4_t test_vqdot_vx_i32m4_m(vbool8_t vm, vint32m4_t vd, vint8m4_t vs2,
+                                    uint32_t rs1, size_t vl) {
+  return __riscv_vqdot_vx_i32m4_m(vm, vd, vs2, rs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x i32> @test_vqdot_vx_i32m8_m(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x i32> [[VD:%.*]], <vscale x 64 x i8> [[VS2:%.*]], i32 noundef signext [[RS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vqdot.mask.nxv16i32.nxv64i8.i32.i64(<vscale x 16 x i32> [[VD]], <vscale x 64 x i8> [[VS2]], i32 [[RS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP0]]
+//
+vint32m8_t test_vqdot_vx_i32m8_m(vbool4_t vm, vint32m8_t vd, vint8m8_t vs2,
+                                    uint32_t rs1, size_t vl) {
+  return __riscv_vqdot_vx_i32m8_m(vm, vd, vs2, rs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotsu_vv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotsu_vv.c
new file mode 100644
index 0000000000000..4246981fbbf05
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/vqdotsu_vv.c
@@ -0,0 +1,122 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +experimental-zvqdotq -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x i32> @test_vqdotsu_vv_i32mf2(
+// CHECK-RV64-SAME: <vscale x 1 x i32> [[VD:%.*]], <vscale x 4 x i8> [[VS2:%.*]], <vscale x 4 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vqdotsu.nxv1i32.nxv4i8.nxv4i8.i64(<vscale x 1 x i32> [[VD]], <vscale x 4 x i8> [[VS2]], <vscale x 4 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP0]]
+//
+vint32mf2_t test_vqdotsu_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs2,
+                                      vuint8mf2_t vs1, size_t vl) {
+  return __riscv_vqdotsu_vv_i32mf2(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x i32> @test_vqdotsu_vv_i32m1(
+// CHECK-RV64-SAME: <vscale x 2 x i32> [[VD:%.*]], <vscale x 8 x i8> [[VS2:%.*]], <vscale x 8 x i8> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vqdotsu.nxv2i32.nxv8i8.nxv8i8.i64(<vscale x 2 x i32> [[VD]], <vscale x 8 x i8> [[VS2]], <vscale x 8 x i8> [[VS1]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+vint32m1_t test_vqdotsu_vv_i32m1(vint32m1_t vd, vint8m1_t vs2,
+                                    vuint8m1_t vs1, size_t vl) {
+  return __riscv_vqdotsu_vv_i32m1(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_loca...
[truncated]

// CHECK-RV64-NEXT: ret <vscale x 1 x i32> [[TMP0]]
//
vint32mf2_t test_vqdot_vv_i32mf2(vint32mf2_t vd, vint8mf2_t vs2,
vint8mf2_t vs1, size_t vl) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indented too far

}

defvar AllE32Vectors = [VI32MF2, VI32M1, VI32M2, VI32M4, VI32M8];
defm : VPatBinaryVL_VV_VX<riscv_vqdot_vl, "PseudoVQDOT", AllE32Vectors>;
defm : VPatBinaryVL_VV_VX<riscv_vqdotu_vl, "PseudoVQDOTU", AllE32Vectors>;
defm : VPatBinaryVL_VV_VX<riscv_vqdotsu_vl, "PseudoVQDOTSU", AllE32Vectors>;

// These VPat definitions are for vqdot because they have a different operand
// order with other ternary instructions (i.e. vop.vx vd, vs2, rs1)
multiclass VPatTernaryV_VX_AAXASwapped<string intrinsic, string instruction,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Swapping at this layer seems wrong. Is the assembly instruction defined incorrectly?

Copy link
Collaborator

@topperc topperc Aug 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. What you really mean is that the last source is the scalar operand?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's what I mean, normal ternary instruction has the order "vd, vs1, vs2"

@4vtomat 4vtomat force-pushed the support_zvqdot_intrinsic branch from 746a82d to 057d292 Compare August 25, 2025 04:19
@@ -2547,3 +2547,24 @@ let UnMaskedPolicyScheme = HasPolicyOperand, HasMasked = false in {
defm vsm3me : RVVOutOp1BuiltinSet<"vsm3me", "i", [["vv", "Uv", "UvUvUv"]]>;
}
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Zvqdotq

@@ -0,0 +1,468 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvqdotq \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is either zve32x or zve64x sufficient

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we rename it to vqdotsu.ll to match its assembly name?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we put the intrinsic tests for Zvqdot to another separate subdirectory like Zvfbfmin?

}

defvar AllE32Vectors = [VI32MF2, VI32M1, VI32M2, VI32M4, VI32M8];
defm : VPatBinaryVL_VV_VX<riscv_vqdot_vl, "PseudoVQDOT", AllE32Vectors>;
defm : VPatBinaryVL_VV_VX<riscv_vqdotu_vl, "PseudoVQDOTU", AllE32Vectors>;
defm : VPatBinaryVL_VV_VX<riscv_vqdotsu_vl, "PseudoVQDOTSU", AllE32Vectors>;

// These VPat definitions are for vqdot because they have a different operand
// order with other ternary instructions (i.e. vop.vx vd, vs2, rs1)
multiclass VPatTernaryV_VX_AAXASwapped<string intrinsic, string instruction,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the name be AABX instead of AAXASwapped? The source and destination types are different and you don't need the word "Swapped" if you swap the letters that represent types.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:RISC-V clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category llvm:ir
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants