-
Notifications
You must be signed in to change notification settings - Fork 14.9k
webassembly: recognize saturating truncation #155470
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
and turn it into `narrow_TYPE_s`
@llvm/pr-subscribers-backend-webassembly Author: Folkert de Vries (folkertdev) Changesfixes #153838 Recognize a manual saturating truncation and select the corresponding instruction. This is useful in general, but came up specifically in https://github.com/rust-lang/stdarch because it will allow us to drop more target-specific intrinsics in favor of cross-platform ones. Full diff: https://github.com/llvm/llvm-project/pull/155470.diff 2 Files Affected:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 143298b700928..f55e7346ac161 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1442,6 +1442,49 @@ def : Pat<(v16i8 (wasm_narrow_u (v8i16 V128:$left), (v8i16 V128:$right))),
def : Pat<(v8i16 (wasm_narrow_u (v4i32 V128:$left), (v4i32 V128:$right))),
(NARROW_U_I16x8 $left, $right)>;
+// Recognize a saturating truncation and convert into the corresponding
+// narrow_TYPE_s or narrow_TYPE_u instruction.
+multiclass SignedSaturatingTruncate<ValueType input, ValueType output,
+ Instruction narrow, int minval,
+ int maxval, int mask> {
+ def : Pat<
+ (output (wasm_narrow_u
+ (and (smin (smax (input V128:$a), (splat_vector (i32 minval))),
+ (splat_vector (i32 maxval))), (splat_vector (i32 mask))),
+ (and (smin (smax (input V128:$b), (splat_vector (i32 minval))),
+ (splat_vector (i32 maxval))), (splat_vector (i32 mask)))
+ )),
+ (narrow V128:$a, V128:$b)
+ >;
+
+ def : Pat<
+ (output (wasm_narrow_u
+ (and (smax (smin (input V128:$a), (splat_vector (i32 maxval))),
+ (splat_vector (i32 minval))), (splat_vector (i32 mask))),
+ (and (smax (smin (input V128:$b), (splat_vector (i32 maxval))),
+ (splat_vector (i32 minval))), (splat_vector (i32 mask)))
+ )),
+ (narrow V128:$a, V128:$b)
+ >;
+}
+
+defm : SignedSaturatingTruncate<v8i16, v16i8, NARROW_S_I8x16, -128, 127, 0xFF>;
+defm : SignedSaturatingTruncate<v4i32, v8i16, NARROW_S_I16x8, -32768, 32767, 0xFFFF>;
+
+multiclass UnsignedSaturatingTruncate<ValueType input, ValueType output,
+ Instruction narrow, int maxval> {
+ def : Pat<
+ (output (wasm_narrow_u
+ (umin (input V128:$a), (splat_vector (i32 maxval))),
+ (umin (input V128:$b), (splat_vector (i32 maxval)))
+ )),
+ (narrow V128:$a, V128:$b)
+ >;
+}
+
+defm : UnsignedSaturatingTruncate<v8i16, v16i8, NARROW_U_I8x16, 0xFF>;
+defm : UnsignedSaturatingTruncate<v4i32, v8i16, NARROW_U_I16x8, 0xFFFF>;
+
// Bitcasts are nops
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
foreach t1 = AllVecs in
diff --git a/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll b/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll
new file mode 100644
index 0000000000000..f3f3ba9b268d7
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2
+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2
+
+define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: i16_signed:
+; CHECK: .functype i16_signed (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i8x16.narrow_i16x8_s
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128))
+ %2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
+ %3 = trunc nsw <16 x i16> %2 to <16 x i8>
+ ret <16 x i8> %3
+ ret <16 x i8> %3
+}
+
+define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_signed:
+; CHECK: .functype i32_signed (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_s
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768))
+ %2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767))
+ %3 = trunc nsw <8 x i32> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define <8 x i16> @i32_signed_flipped(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_signed_flipped:
+; CHECK: .functype i32_signed_flipped (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_s
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> splat (i32 32767), <8 x i32> %0)
+ %2 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> splat (i32 -32768), <8 x i32> %1)
+ %3 = trunc nsw <8 x i32> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: i16_unsigned:
+; CHECK: .functype i16_unsigned (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255))
+ %2 = trunc nuw <16 x i16> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_unsigned:
+; CHECK: .functype i32_unsigned (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535))
+ %2 = trunc nsw <8 x i32> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
|
fixes #153838
using the same approach as #155377
Recognize a manual saturating truncation and select the corresponding instruction. This is useful in general, but came up specifically in https://github.com/rust-lang/stdarch because it will allow us to drop more target-specific intrinsics in favor of cross-platform ones.