Skip to content

Conversation

farzonl
Copy link
Member

@farzonl farzonl commented Sep 4, 2025

fixes #156068

  • We needed to add a new sub arch to the target tripple so we can test that emulation does not happen when targeting SM6.9
  • The HLSL toolchain needed to be updated to handle the conversion of strings to enums for the new sub arch.
  • The emulation is done in DXILIntrinsicExpansion.cpp and needs to be able to convert both llvm.is.fpclass and lvm.dx.isinf to the proper emulation
  • test updates in TargetParser/TripleTest.cpp, isinf.ll, is_fpclass.ll, and DXCModeTest.cpp

@llvmbot llvmbot added clang Clang issues not falling into any other category clang:driver 'clang' and 'clang++' user-facing binaries. Not 'clang-cl' backend:DirectX HLSL HLSL Language Support labels Sep 4, 2025
@llvmbot
Copy link
Member

llvmbot commented Sep 4, 2025

@llvm/pr-subscribers-clang
@llvm/pr-subscribers-backend-directx

@llvm/pr-subscribers-clang-driver

Author: Farzon Lotfi (farzonl)

Changes

fixes #156068

  • We needed to add a new sub arch to the target tripple so we can test that emulation does not happen when targeting SM6.9
  • The HLSL toolchain needed to be updated to handle the conversion of strings to enums for the new sub arch.
  • The emulation is done in DXILIntrinsicExpansion.cpp and needs to be able to convert both llvm.is.fpclass and lvm.dx.isinf to the proper emulation
  • test updates in TargetParser/TripleTest.cpp, isinf.ll, and is_fpclass.ll

Full diff: https://github.com/llvm/llvm-project/pull/156932.diff

7 Files Affected:

  • (modified) clang/lib/Driver/ToolChains/HLSL.cpp (+3)
  • (modified) llvm/include/llvm/TargetParser/Triple.h (+2-1)
  • (modified) llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp (+42-1)
  • (modified) llvm/lib/TargetParser/Triple.cpp (+8-1)
  • (modified) llvm/test/CodeGen/DirectX/is_fpclass.ll (+19-2)
  • (modified) llvm/test/CodeGen/DirectX/isinf.ll (+37-6)
  • (modified) llvm/unittests/TargetParser/TripleTest.cpp (+13)
diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp
index 660661945d62a..559af32dc3808 100644
--- a/clang/lib/Driver/ToolChains/HLSL.cpp
+++ b/clang/lib/Driver/ToolChains/HLSL.cpp
@@ -132,6 +132,9 @@ std::optional<std::string> tryParseProfile(StringRef Profile) {
   case 8:
     SubArch = llvm::Triple::DXILSubArch_v1_8;
     break;
+  case 9:
+    SubArch = llvm::Triple::DXILSubArch_v1_9;
+    break;
   case OfflineLibMinor:
     // Always consider minor version x as the latest supported DXIL version
     SubArch = llvm::Triple::LatestDXILSubArch;
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index f85984ed4f328..8e12c6852075d 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -180,7 +180,8 @@ class Triple {
     DXILSubArch_v1_6,
     DXILSubArch_v1_7,
     DXILSubArch_v1_8,
-    LatestDXILSubArch = DXILSubArch_v1_8,
+    DXILSubArch_v1_9,
+    LatestDXILSubArch = DXILSubArch_v1_9,
   };
   enum VendorType {
     UnknownVendor,
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index ee1db54446cb8..7966e3f1d6f3d 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -51,6 +51,42 @@ static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy,
   return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
 }
 
+  static Value* expand16BitIsInf(CallInst *Orig) {
+    Module* M = Orig->getModule();
+    if(M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))
+      return nullptr;
+    
+    Value *Val = Orig->getOperand(0);
+    Type* ValTy = Val->getType();
+    if (!(ValTy->isHalfTy() ||
+      (ValTy->isVectorTy() &&
+       cast<FixedVectorType>(ValTy)->getElementType()->isHalfTy())))
+        return nullptr;
+
+    IRBuilder<> Builder(Orig);
+    Type *IType = Type::getInt16Ty(M->getContext());
+    Constant *PosInf = ValTy->isVectorTy()
+                       ? ConstantVector::getSplat(
+                             ElementCount::getFixed(
+                                 cast<FixedVectorType>(ValTy)->getNumElements()),
+                             ConstantInt::get(IType, 0x7c00))
+                       : ConstantInt::get(IType, 0x7c00);
+
+    Constant *NegInf = ValTy->isVectorTy()
+                       ? ConstantVector::getSplat(
+                             ElementCount::getFixed(
+                                 cast<FixedVectorType>(ValTy)->getNumElements()),
+                             ConstantInt::get(IType, 0xfc00))
+                       : ConstantInt::get(IType, 0xfc00);
+
+
+    Value *IVal = Builder.CreateBitCast(Val, PosInf->getType());
+    Value *B1 = Builder.CreateICmpEQ(IVal, PosInf);
+    Value *B2 = Builder.CreateICmpEQ(IVal, NegInf);
+    Value *B3 = Builder.CreateOr(B1, B2);
+    return B3;
+  }
+
 static bool isIntrinsicExpansion(Function &F) {
   switch (F.getIntrinsicID()) {
   case Intrinsic::abs:
@@ -68,6 +104,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_sclamp:
   case Intrinsic::dx_nclamp:
   case Intrinsic::dx_degrees:
+  case Intrinsic::dx_isinf:
   case Intrinsic::dx_lerp:
   case Intrinsic::dx_normalize:
   case Intrinsic::dx_fdot:
@@ -301,9 +338,10 @@ static Value *expandIsFPClass(CallInst *Orig) {
   auto *TCI = dyn_cast<ConstantInt>(T);
 
   // These FPClassTest cases have DXIL opcodes, so they will be handled in
-  // DXIL Op Lowering instead.
+  // DXIL Op Lowering instead for all non f16 cases.
   switch (TCI->getZExtValue()) {
   case FPClassTest::fcInf:
+    return expand16BitIsInf(Orig);
   case FPClassTest::fcNan:
   case FPClassTest::fcNormal:
   case FPClassTest::fcFinite:
@@ -873,6 +911,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_degrees:
     Result = expandDegreesIntrinsic(Orig);
     break;
+  case Intrinsic::dx_isinf:
+    Result = expand16BitIsInf(Orig);
+    break;
   case Intrinsic::dx_lerp:
     Result = expandLerpIntrinsic(Orig);
     break;
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index ec15f235b8624..2ee27ba4a7de4 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -158,6 +158,8 @@ StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) {
       return "dxilv1.7";
     case Triple::DXILSubArch_v1_8:
       return "dxilv1.8";
+    case Triple::DXILSubArch_v1_9:
+      return "dxilv1.9";
     default:
       break;
     }
@@ -650,6 +652,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
                 .Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3",
                        "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7",
                        "dxilv1.8", Triple::dxil)
+                //Note: Cases has max limit of 10.
+                .Case("dxilv1.9", Triple::dxil)
                 .Case("xtensa", Triple::xtensa)
                 .Default(Triple::UnknownArch);
 
@@ -842,6 +846,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
         .EndsWith("v1.6", Triple::DXILSubArch_v1_6)
         .EndsWith("v1.7", Triple::DXILSubArch_v1_7)
         .EndsWith("v1.8", Triple::DXILSubArch_v1_8)
+        .EndsWith("v1.9", Triple::DXILSubArch_v1_9)
         .Default(Triple::NoSubArch);
 
   StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName);
@@ -1111,7 +1116,7 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) {
   VersionTuple Ver =
       parseVersionFromName(ShaderModelStr.drop_front(strlen("shadermodel")));
   // Default DXIL minor version when Shader Model version is anything other
-  // than 6.[0...8] or 6.x (which translates to latest current SM version)
+  // than 6.[0...9] or 6.x (which translates to latest current SM version)
   const unsigned SMMajor = 6;
   if (!Ver.empty()) {
     if (Ver.getMajor() == SMMajor) {
@@ -1135,6 +1140,8 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) {
           return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_7);
         case 8:
           return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_8);
+        case 9:
+          return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_9);
         default:
           report_fatal_error("Unsupported Shader Model version", false);
         }
diff --git a/llvm/test/CodeGen/DirectX/is_fpclass.ll b/llvm/test/CodeGen/DirectX/is_fpclass.ll
index a628096aacd7d..1796e8bd794d8 100644
--- a/llvm/test/CodeGen/DirectX/is_fpclass.ll
+++ b/llvm/test/CodeGen/DirectX/is_fpclass.ll
@@ -1,5 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s --check-prefixes=CHECK,SM69CHECK
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.8-library %s | FileCheck %s --check-prefixes=CHECK,SMOLDCHECK
 
 
 define noundef i1 @isnegzero(float noundef %a) {
@@ -75,6 +75,23 @@ entry:
   ret i1 %0
 }
 
+define noundef i1 @isinfh(half noundef %a) {
+; CHECK-LABEL: define noundef i1 @isinfh(
+; CHECK-SAME: half noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; SM69CHECK-NEXT:    [[ISINF:%.*]] = call i1 @dx.op.isSpecialFloat.f16(i32 9, half [[A]]) #[[ATTR0]]
+; SMOLDCHECK-NEXT: [[BITCAST:%.*]] = bitcast half %a to i16
+; SMOLDCHECK-NEXT: [[CMPHIGH:%.*]] = icmp eq i16 [[BITCAST]], 31744
+; SMOLDCHECK-NEXT: [[CMPLOW:%.*]] = icmp eq i16 [[BITCAST]], -1024
+; SMOLDCHECK-NEXT: [[OR:%.*]] = or i1 [[CMPHIGH]], [[CMPLOW]]
+; SMOLDCHECK-NEXT:   ret i1 [[OR]]
+; SM69CHECK-NEXT:    ret i1 [[ISINF]]
+;
+entry:
+  %0 = call i1 @llvm.is.fpclass.f16(half %a, i32 516)
+  ret i1 %0
+}
+
 define noundef <2 x i1> @isinfv2(<2 x float> noundef %a) {
 ; CHECK-LABEL: define noundef <2 x i1> @isinfv2(
 ; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
diff --git a/llvm/test/CodeGen/DirectX/isinf.ll b/llvm/test/CodeGen/DirectX/isinf.ll
index 461553b533ae1..bf31363ee114c 100644
--- a/llvm/test/CodeGen/DirectX/isinf.ll
+++ b/llvm/test/CodeGen/DirectX/isinf.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s --check-prefixes=CHECK,SM69CHECK
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.8-library %s | FileCheck %s --check-prefixes=CHECK,SMOLDCHECK
 
 ; Make sure dxil operation function calls for isinf are generated for float and half.
 
@@ -11,17 +12,47 @@ entry:
 
 define noundef i1 @isinf_half(half noundef %a) {
 entry:
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]]
+  ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]]
+  ; SMOLDCHECK: [[BITCAST:%.*]] = bitcast half %a to i16
+  ; SMOLDCHECK: [[CMPHIGH:%.*]] = icmp eq i16 [[BITCAST]], 31744
+  ; SMOLDCHECK: [[CMPLOW:%.*]] = icmp eq i16 [[BITCAST]], -1024
+  ; SMOLDCHECK: [[OR:%.*]] = or i1 [[CMPHIGH]], [[CMPLOW]]
   %dx.isinf = call i1 @llvm.dx.isinf.f16(half %a)
   ret i1 %dx.isinf
 }
 
 define noundef <4 x i1> @isinf_half4(<4 x half> noundef %p0) {
 entry:
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+  ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+  ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+  ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+  ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+
+  ; SMOLDCHECK: [[ee0:%.*]] = extractelement <4 x half> %p0, i64 0
+  ; SMOLDCHECK: [[BITCAST0:%.*]] = bitcast half [[ee0]] to i16
+  ; SMOLDCHECK: [[ee1:%.*]] = extractelement <4 x half> %p0, i64 1
+  ; SMOLDCHECK: [[BITCAST1:%.*]] = bitcast half [[ee1]] to i16
+  ; SMOLDCHECK:[[ee2:%.*]] = extractelement <4 x half> %p0, i64 2
+  ; SMOLDCHECK: [[BITCAST2:%.*]] = bitcast half [[ee2]] to i16
+  ; SMOLDCHECK: [[ee3:%.*]] = extractelement <4 x half> %p0, i64 3
+  ; SMOLDCHECK: [[BITCAST3:%.*]] = bitcast half [[ee3]] to i16
+  ; SMOLDCHECK: [[ICMPHIGH0:%.*]] = icmp eq i16 [[BITCAST0]], 31744
+  ; SMOLDCHECK: [[ICMPHIGH1:%.*]] = icmp eq i16 [[BITCAST1]], 31744
+  ; SMOLDCHECK: [[ICMPHIGH2:%.*]] = icmp eq i16 [[BITCAST2]], 31744
+  ; SMOLDCHECK: [[ICMPHIGH3:%.*]] = icmp eq i16 [[BITCAST3]], 31744
+  ; SMOLDCHECK: [[ICMPLOW0:%.*]] = icmp eq i16 [[BITCAST0]], -1024
+  ; SMOLDCHECK: [[ICMPLOW1:%.*]] = icmp eq i16 [[BITCAST1]], -1024
+  ; SMOLDCHECK: [[ICMPLOW2:%.*]] = icmp eq i16 [[BITCAST2]], -1024
+  ; SMOLDCHECK: [[ICMPLOW3:%.*]] = icmp eq i16 [[BITCAST3]], -1024
+  ; SMOLDCHECK: [[OR0:%.*]] = or i1 [[ICMPHIGH0]], [[ICMPLOW0]]
+  ; SMOLDCHECK: [[OR1:%.*]] = or i1 [[ICMPHIGH1]], [[ICMPLOW1]]
+  ; SMOLDCHECK: [[OR2:%.*]] = or i1 [[ICMPHIGH2]], [[ICMPLOW2]]
+  ; SMOLDCHECK: [[OR3:%.*]] = or i1 [[ICMPHIGH3]], [[ICMPLOW3]]
+  ; SMOLDCHECK: %.upto019 = insertelement <4 x i1> poison, i1 [[OR0]], i64 0
+  ; SMOLDCHECK: %.upto120 = insertelement <4 x i1> %.upto019, i1 [[OR1]], i64 1
+  ; SMOLDCHECK: %.upto221 = insertelement <4 x i1> %.upto120, i1 [[OR2]], i64 2
+  ; SMOLDCHECK: %0 = insertelement <4 x i1> %.upto221, i1 [[OR3]], i64 3
+
   %hlsl.isinf = call <4 x i1> @llvm.dx.isinf.v4f16(<4 x half> %p0)
   ret <4 x i1> %hlsl.isinf
 }
diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp
index 7d07615d273d7..942912bcfe0e2 100644
--- a/llvm/unittests/TargetParser/TripleTest.cpp
+++ b/llvm/unittests/TargetParser/TripleTest.cpp
@@ -553,6 +553,13 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::ShaderModel, T.getOS());
   EXPECT_EQ(VersionTuple(1, 8), T.getDXILVersion());
 
+  T = Triple("dxilv1.9-unknown-shadermodel6.15-library");
+  EXPECT_EQ(Triple::dxil, T.getArch());
+  EXPECT_EQ(Triple::DXILSubArch_v1_9, T.getSubArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::ShaderModel, T.getOS());
+  EXPECT_EQ(VersionTuple(1, 9), T.getDXILVersion());
+
   T = Triple("x86_64-unknown-fuchsia");
   EXPECT_EQ(Triple::x86_64, T.getArch());
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
@@ -1270,6 +1277,12 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
   EXPECT_EQ(Triple::UnknownOS, T.getOS());
 
+  T = Triple("dxilv1.9-unknown-unknown");
+  EXPECT_EQ(Triple::dxil, T.getArch());
+  EXPECT_EQ(Triple::DXILSubArch_v1_9, T.getSubArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+
   // Check specification of unknown SubArch results in
   // unknown architecture.
   T = Triple("dxilv1.999-unknown-unknown");

@llvmbot
Copy link
Member

llvmbot commented Sep 4, 2025

@llvm/pr-subscribers-hlsl

Author: Farzon Lotfi (farzonl)

Changes

fixes #156068

  • We needed to add a new sub arch to the target tripple so we can test that emulation does not happen when targeting SM6.9
  • The HLSL toolchain needed to be updated to handle the conversion of strings to enums for the new sub arch.
  • The emulation is done in DXILIntrinsicExpansion.cpp and needs to be able to convert both llvm.is.fpclass and lvm.dx.isinf to the proper emulation
  • test updates in TargetParser/TripleTest.cpp, isinf.ll, and is_fpclass.ll

Full diff: https://github.com/llvm/llvm-project/pull/156932.diff

7 Files Affected:

  • (modified) clang/lib/Driver/ToolChains/HLSL.cpp (+3)
  • (modified) llvm/include/llvm/TargetParser/Triple.h (+2-1)
  • (modified) llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp (+42-1)
  • (modified) llvm/lib/TargetParser/Triple.cpp (+8-1)
  • (modified) llvm/test/CodeGen/DirectX/is_fpclass.ll (+19-2)
  • (modified) llvm/test/CodeGen/DirectX/isinf.ll (+37-6)
  • (modified) llvm/unittests/TargetParser/TripleTest.cpp (+13)
diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp
index 660661945d62a..559af32dc3808 100644
--- a/clang/lib/Driver/ToolChains/HLSL.cpp
+++ b/clang/lib/Driver/ToolChains/HLSL.cpp
@@ -132,6 +132,9 @@ std::optional<std::string> tryParseProfile(StringRef Profile) {
   case 8:
     SubArch = llvm::Triple::DXILSubArch_v1_8;
     break;
+  case 9:
+    SubArch = llvm::Triple::DXILSubArch_v1_9;
+    break;
   case OfflineLibMinor:
     // Always consider minor version x as the latest supported DXIL version
     SubArch = llvm::Triple::LatestDXILSubArch;
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index f85984ed4f328..8e12c6852075d 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -180,7 +180,8 @@ class Triple {
     DXILSubArch_v1_6,
     DXILSubArch_v1_7,
     DXILSubArch_v1_8,
-    LatestDXILSubArch = DXILSubArch_v1_8,
+    DXILSubArch_v1_9,
+    LatestDXILSubArch = DXILSubArch_v1_9,
   };
   enum VendorType {
     UnknownVendor,
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index ee1db54446cb8..7966e3f1d6f3d 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -51,6 +51,42 @@ static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy,
   return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
 }
 
+  static Value* expand16BitIsInf(CallInst *Orig) {
+    Module* M = Orig->getModule();
+    if(M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))
+      return nullptr;
+    
+    Value *Val = Orig->getOperand(0);
+    Type* ValTy = Val->getType();
+    if (!(ValTy->isHalfTy() ||
+      (ValTy->isVectorTy() &&
+       cast<FixedVectorType>(ValTy)->getElementType()->isHalfTy())))
+        return nullptr;
+
+    IRBuilder<> Builder(Orig);
+    Type *IType = Type::getInt16Ty(M->getContext());
+    Constant *PosInf = ValTy->isVectorTy()
+                       ? ConstantVector::getSplat(
+                             ElementCount::getFixed(
+                                 cast<FixedVectorType>(ValTy)->getNumElements()),
+                             ConstantInt::get(IType, 0x7c00))
+                       : ConstantInt::get(IType, 0x7c00);
+
+    Constant *NegInf = ValTy->isVectorTy()
+                       ? ConstantVector::getSplat(
+                             ElementCount::getFixed(
+                                 cast<FixedVectorType>(ValTy)->getNumElements()),
+                             ConstantInt::get(IType, 0xfc00))
+                       : ConstantInt::get(IType, 0xfc00);
+
+
+    Value *IVal = Builder.CreateBitCast(Val, PosInf->getType());
+    Value *B1 = Builder.CreateICmpEQ(IVal, PosInf);
+    Value *B2 = Builder.CreateICmpEQ(IVal, NegInf);
+    Value *B3 = Builder.CreateOr(B1, B2);
+    return B3;
+  }
+
 static bool isIntrinsicExpansion(Function &F) {
   switch (F.getIntrinsicID()) {
   case Intrinsic::abs:
@@ -68,6 +104,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_sclamp:
   case Intrinsic::dx_nclamp:
   case Intrinsic::dx_degrees:
+  case Intrinsic::dx_isinf:
   case Intrinsic::dx_lerp:
   case Intrinsic::dx_normalize:
   case Intrinsic::dx_fdot:
@@ -301,9 +338,10 @@ static Value *expandIsFPClass(CallInst *Orig) {
   auto *TCI = dyn_cast<ConstantInt>(T);
 
   // These FPClassTest cases have DXIL opcodes, so they will be handled in
-  // DXIL Op Lowering instead.
+  // DXIL Op Lowering instead for all non f16 cases.
   switch (TCI->getZExtValue()) {
   case FPClassTest::fcInf:
+    return expand16BitIsInf(Orig);
   case FPClassTest::fcNan:
   case FPClassTest::fcNormal:
   case FPClassTest::fcFinite:
@@ -873,6 +911,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_degrees:
     Result = expandDegreesIntrinsic(Orig);
     break;
+  case Intrinsic::dx_isinf:
+    Result = expand16BitIsInf(Orig);
+    break;
   case Intrinsic::dx_lerp:
     Result = expandLerpIntrinsic(Orig);
     break;
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index ec15f235b8624..2ee27ba4a7de4 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -158,6 +158,8 @@ StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) {
       return "dxilv1.7";
     case Triple::DXILSubArch_v1_8:
       return "dxilv1.8";
+    case Triple::DXILSubArch_v1_9:
+      return "dxilv1.9";
     default:
       break;
     }
@@ -650,6 +652,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
                 .Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3",
                        "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7",
                        "dxilv1.8", Triple::dxil)
+                //Note: Cases has max limit of 10.
+                .Case("dxilv1.9", Triple::dxil)
                 .Case("xtensa", Triple::xtensa)
                 .Default(Triple::UnknownArch);
 
@@ -842,6 +846,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
         .EndsWith("v1.6", Triple::DXILSubArch_v1_6)
         .EndsWith("v1.7", Triple::DXILSubArch_v1_7)
         .EndsWith("v1.8", Triple::DXILSubArch_v1_8)
+        .EndsWith("v1.9", Triple::DXILSubArch_v1_9)
         .Default(Triple::NoSubArch);
 
   StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName);
@@ -1111,7 +1116,7 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) {
   VersionTuple Ver =
       parseVersionFromName(ShaderModelStr.drop_front(strlen("shadermodel")));
   // Default DXIL minor version when Shader Model version is anything other
-  // than 6.[0...8] or 6.x (which translates to latest current SM version)
+  // than 6.[0...9] or 6.x (which translates to latest current SM version)
   const unsigned SMMajor = 6;
   if (!Ver.empty()) {
     if (Ver.getMajor() == SMMajor) {
@@ -1135,6 +1140,8 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) {
           return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_7);
         case 8:
           return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_8);
+        case 9:
+          return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_9);
         default:
           report_fatal_error("Unsupported Shader Model version", false);
         }
diff --git a/llvm/test/CodeGen/DirectX/is_fpclass.ll b/llvm/test/CodeGen/DirectX/is_fpclass.ll
index a628096aacd7d..1796e8bd794d8 100644
--- a/llvm/test/CodeGen/DirectX/is_fpclass.ll
+++ b/llvm/test/CodeGen/DirectX/is_fpclass.ll
@@ -1,5 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s --check-prefixes=CHECK,SM69CHECK
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.8-library %s | FileCheck %s --check-prefixes=CHECK,SMOLDCHECK
 
 
 define noundef i1 @isnegzero(float noundef %a) {
@@ -75,6 +75,23 @@ entry:
   ret i1 %0
 }
 
+define noundef i1 @isinfh(half noundef %a) {
+; CHECK-LABEL: define noundef i1 @isinfh(
+; CHECK-SAME: half noundef [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; SM69CHECK-NEXT:    [[ISINF:%.*]] = call i1 @dx.op.isSpecialFloat.f16(i32 9, half [[A]]) #[[ATTR0]]
+; SMOLDCHECK-NEXT: [[BITCAST:%.*]] = bitcast half %a to i16
+; SMOLDCHECK-NEXT: [[CMPHIGH:%.*]] = icmp eq i16 [[BITCAST]], 31744
+; SMOLDCHECK-NEXT: [[CMPLOW:%.*]] = icmp eq i16 [[BITCAST]], -1024
+; SMOLDCHECK-NEXT: [[OR:%.*]] = or i1 [[CMPHIGH]], [[CMPLOW]]
+; SMOLDCHECK-NEXT:   ret i1 [[OR]]
+; SM69CHECK-NEXT:    ret i1 [[ISINF]]
+;
+entry:
+  %0 = call i1 @llvm.is.fpclass.f16(half %a, i32 516)
+  ret i1 %0
+}
+
 define noundef <2 x i1> @isinfv2(<2 x float> noundef %a) {
 ; CHECK-LABEL: define noundef <2 x i1> @isinfv2(
 ; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
diff --git a/llvm/test/CodeGen/DirectX/isinf.ll b/llvm/test/CodeGen/DirectX/isinf.ll
index 461553b533ae1..bf31363ee114c 100644
--- a/llvm/test/CodeGen/DirectX/isinf.ll
+++ b/llvm/test/CodeGen/DirectX/isinf.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s --check-prefixes=CHECK,SM69CHECK
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.8-library %s | FileCheck %s --check-prefixes=CHECK,SMOLDCHECK
 
 ; Make sure dxil operation function calls for isinf are generated for float and half.
 
@@ -11,17 +12,47 @@ entry:
 
 define noundef i1 @isinf_half(half noundef %a) {
 entry:
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]]
+  ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]]
+  ; SMOLDCHECK: [[BITCAST:%.*]] = bitcast half %a to i16
+  ; SMOLDCHECK: [[CMPHIGH:%.*]] = icmp eq i16 [[BITCAST]], 31744
+  ; SMOLDCHECK: [[CMPLOW:%.*]] = icmp eq i16 [[BITCAST]], -1024
+  ; SMOLDCHECK: [[OR:%.*]] = or i1 [[CMPHIGH]], [[CMPLOW]]
   %dx.isinf = call i1 @llvm.dx.isinf.f16(half %a)
   ret i1 %dx.isinf
 }
 
 define noundef <4 x i1> @isinf_half4(<4 x half> noundef %p0) {
 entry:
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
-  ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+  ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+  ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+  ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+  ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+
+  ; SMOLDCHECK: [[ee0:%.*]] = extractelement <4 x half> %p0, i64 0
+  ; SMOLDCHECK: [[BITCAST0:%.*]] = bitcast half [[ee0]] to i16
+  ; SMOLDCHECK: [[ee1:%.*]] = extractelement <4 x half> %p0, i64 1
+  ; SMOLDCHECK: [[BITCAST1:%.*]] = bitcast half [[ee1]] to i16
+  ; SMOLDCHECK:[[ee2:%.*]] = extractelement <4 x half> %p0, i64 2
+  ; SMOLDCHECK: [[BITCAST2:%.*]] = bitcast half [[ee2]] to i16
+  ; SMOLDCHECK: [[ee3:%.*]] = extractelement <4 x half> %p0, i64 3
+  ; SMOLDCHECK: [[BITCAST3:%.*]] = bitcast half [[ee3]] to i16
+  ; SMOLDCHECK: [[ICMPHIGH0:%.*]] = icmp eq i16 [[BITCAST0]], 31744
+  ; SMOLDCHECK: [[ICMPHIGH1:%.*]] = icmp eq i16 [[BITCAST1]], 31744
+  ; SMOLDCHECK: [[ICMPHIGH2:%.*]] = icmp eq i16 [[BITCAST2]], 31744
+  ; SMOLDCHECK: [[ICMPHIGH3:%.*]] = icmp eq i16 [[BITCAST3]], 31744
+  ; SMOLDCHECK: [[ICMPLOW0:%.*]] = icmp eq i16 [[BITCAST0]], -1024
+  ; SMOLDCHECK: [[ICMPLOW1:%.*]] = icmp eq i16 [[BITCAST1]], -1024
+  ; SMOLDCHECK: [[ICMPLOW2:%.*]] = icmp eq i16 [[BITCAST2]], -1024
+  ; SMOLDCHECK: [[ICMPLOW3:%.*]] = icmp eq i16 [[BITCAST3]], -1024
+  ; SMOLDCHECK: [[OR0:%.*]] = or i1 [[ICMPHIGH0]], [[ICMPLOW0]]
+  ; SMOLDCHECK: [[OR1:%.*]] = or i1 [[ICMPHIGH1]], [[ICMPLOW1]]
+  ; SMOLDCHECK: [[OR2:%.*]] = or i1 [[ICMPHIGH2]], [[ICMPLOW2]]
+  ; SMOLDCHECK: [[OR3:%.*]] = or i1 [[ICMPHIGH3]], [[ICMPLOW3]]
+  ; SMOLDCHECK: %.upto019 = insertelement <4 x i1> poison, i1 [[OR0]], i64 0
+  ; SMOLDCHECK: %.upto120 = insertelement <4 x i1> %.upto019, i1 [[OR1]], i64 1
+  ; SMOLDCHECK: %.upto221 = insertelement <4 x i1> %.upto120, i1 [[OR2]], i64 2
+  ; SMOLDCHECK: %0 = insertelement <4 x i1> %.upto221, i1 [[OR3]], i64 3
+
   %hlsl.isinf = call <4 x i1> @llvm.dx.isinf.v4f16(<4 x half> %p0)
   ret <4 x i1> %hlsl.isinf
 }
diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp
index 7d07615d273d7..942912bcfe0e2 100644
--- a/llvm/unittests/TargetParser/TripleTest.cpp
+++ b/llvm/unittests/TargetParser/TripleTest.cpp
@@ -553,6 +553,13 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::ShaderModel, T.getOS());
   EXPECT_EQ(VersionTuple(1, 8), T.getDXILVersion());
 
+  T = Triple("dxilv1.9-unknown-shadermodel6.15-library");
+  EXPECT_EQ(Triple::dxil, T.getArch());
+  EXPECT_EQ(Triple::DXILSubArch_v1_9, T.getSubArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::ShaderModel, T.getOS());
+  EXPECT_EQ(VersionTuple(1, 9), T.getDXILVersion());
+
   T = Triple("x86_64-unknown-fuchsia");
   EXPECT_EQ(Triple::x86_64, T.getArch());
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
@@ -1270,6 +1277,12 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
   EXPECT_EQ(Triple::UnknownOS, T.getOS());
 
+  T = Triple("dxilv1.9-unknown-unknown");
+  EXPECT_EQ(Triple::dxil, T.getArch());
+  EXPECT_EQ(Triple::DXILSubArch_v1_9, T.getSubArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+
   // Check specification of unknown SubArch results in
   // unknown architecture.
   T = Triple("dxilv1.999-unknown-unknown");

Copy link

github-actions bot commented Sep 4, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

fixes llvm#156068

- We needed to add a new sub arch to the target tripple so we can test that
emulation does not happen when targeting SM6.9
- The HLSL toolchain needed to be updated to handle the conversion of
  strings to enums for the new sub arch.
- The emulation is done in DXILIntrinsicExpansion.cpp and needs to be able to convert both llvm.is.fpclass and
  lvm.dx.isinf to the proper emulation
- test updates in TargetParser/TripleTest.cpp, isinf.ll, and is_fpclass.ll
@farzonl farzonl force-pushed the feature/isinf_f16_emulation_issue-156068 branch from dcc6ae9 to 8c31865 Compare September 4, 2025 17:24
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:DirectX clang:driver 'clang' and 'clang++' user-facing binaries. Not 'clang-cl' clang Clang issues not falling into any other category HLSL HLSL Language Support
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[HLSL][DXIL] 16 bit overload of isinf should be emulated in sm6.8 and earlier.
2 participants