-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[AMDGPU] Fix hw stage metadata setting for unsigned values #154502
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Ana Mihajlovic (mihajlovicana) ChangesFull diff: https://github.com/llvm/llvm-project/pull/154502.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 845449931b5a9..70bcbe79c52f0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1441,9 +1441,10 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
MD->setComputeRegisters(".dynamic_vgpr_en", true);
}
- MD->setHwStage(CC, ".lds_size",
- (unsigned)(CurrentProgramInfo.LdsSize *
- getLdsDwGranularity(ST) * sizeof(uint32_t)));
+ MD->updateHwStageMaximum(
+ CC, ".lds_size",
+ (unsigned)(CurrentProgramInfo.LdsSize * getLdsDwGranularity(ST) *
+ sizeof(uint32_t)));
}
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index fd6253daa327a..8d1b0e6dac2d6 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -1061,6 +1061,17 @@ VersionTuple AMDGPUPALMetadata::getPALVersion() {
return VersionTuple(getPALVersion(0), getPALVersion(1));
}
+// Set the field in a given .hardware_stages entry to a maximum value
+void AMDGPUPALMetadata::updateHwStageMaximum(unsigned CC, StringRef field,
+ unsigned Val) {
+ auto HwStageFieldMapNode = getHwStage(CC);
+ auto It = HwStageFieldMapNode.find(field);
+ if (It == HwStageFieldMapNode.end())
+ HwStageFieldMapNode[field] = Val;
+ else
+ It->second = std::max<unsigned>(It->second.getUInt(), Val);
+}
+
// Set the field in a given .hardware_stages entry
void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, unsigned Val) {
getHwStage(CC)[field] = Val;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 4830db5fda50b..e50150cc8de94 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -156,6 +156,7 @@ class AMDGPUPALMetadata {
unsigned getPALMinorVersion();
VersionTuple getPALVersion();
+ void updateHwStageMaximum(unsigned CC, StringRef field, unsigned Val);
void setHwStage(unsigned CC, StringRef field, unsigned Val);
void setHwStage(unsigned CC, StringRef field, bool Val);
void setHwStage(unsigned CC, StringRef field, msgpack::Type Type,
diff --git a/llvm/test/CodeGen/AMDGPU/lds-size-pal-metadata.ll b/llvm/test/CodeGen/AMDGPU/lds-size-pal-metadata.ll
new file mode 100644
index 0000000000000..ff260997f3357
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lds-size-pal-metadata.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=PAL %s
+
+@x = addrspace(3) global i32 undef
+
+; PAL: .hardware_stages:
+; PAL: .lds_size: 0x200
+; PAL: .shader_functions:
+; PAL: f1:
+; PAL: .lds_size: 0x4
+; PAL: f2:
+; PAL: .lds_size: 0
+
+define amdgpu_gfx void @f1(i32 %val) {
+ store i32 %val, ptr addrspace(3) @x
+ ret void
+}
+
+define amdgpu_gfx void @f2(i32 %a, ptr addrspace(1) %ptr) {
+ store i32 %a, ptr addrspace(1) %ptr
+ ret void
+}
+
+!amdgpu.pal.metadata.msgpack = !{!8}
+!8 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C3\AA.tgid_y_en\C3\AA.tgid_z_en\C3\AF.tidig_comp_cnt\00\B0.hardware_stages\81\A3.cs\8D\AF.checksum_value\00\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93 \01\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\10\00\00\06\CE\FF\FF\FF\FF\00\01\02\03\04\05\06\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\10\00\00\02\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\10\AB.vgpr_limit\CC\80\AF.wavefront_size \AF.wg_round_robin\C2\B7.internal_pipeline_hash\92\CF\F6\B5\A6D\E3\BE\9D\D6\CFF\\=l\09\AB\F0#\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\00\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CD\FF\FF\A5.type\A2Cs\B0.user_data_limit\07\A9.uses_cps\C3\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF-ua\DD\EA7\19\94\CF\80\16\9A\FC\9B\A6\1Dk\AD.llpc_version\A477.4\AEamdpal.version\92\03\00"}
|
auto It = HwStageFieldMapNode.find(field); | ||
if (It == HwStageFieldMapNode.end()) | ||
HwStageFieldMapNode[field] = Val; | ||
else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use insert instead of double map lookup
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use insert instead of double map lookup
I don't see msgpack::MapDocNode having insert or emplace... Should that be added ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably should have an insert
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could perhaps use operator[]
for this purpose, if you can detect whether it has inserted a new empty node.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could perhaps use
operator[]
for this purpose, if you can detect whether it has inserted a new empty node.
Not sure if I'm correct on this, from what I've saw [] does construct a new entry but doesn't initialize since it doesn't have to hold uint value (getKind() method returns Type::Empty), so I assume entry has to be directly initialized
16895ed
to
0c05e3d
Compare
// Set the field in a given .hardware_stages entry to a maximum value | ||
void AMDGPUPALMetadata::updateHwStageMaximum(unsigned CC, StringRef field, | ||
unsigned Val) { | ||
auto HwStageFieldMapNode = getHwStage(CC); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No auto
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=PAL %s | ||
|
||
@x = addrspace(3) global i32 undef |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@x = addrspace(3) global i32 undef | |
@x = addrspace(3) global i32 poison |
1e115d5
to
0c05e3d
Compare
ping |
@@ -0,0 +1,25 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=PAL %s | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you comment what this is testing? I'm not sure I understand what the issue is
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lds_size of f2 is zero so the purpose is for it to not overwrite the overall lds size when f1 sets it
2e04a5b
to
4b15bbf
Compare
auto It = HwStageFieldMapNode.find(field); | ||
if (It == HwStageFieldMapNode.end()) | ||
HwStageFieldMapNode[field] = Val; | ||
else | ||
It->second = std::max<unsigned>(It->second.getUInt(), Val); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you do something like:
auto It = HwStageFieldMapNode.find(field); | |
if (It == HwStageFieldMapNode.end()) | |
HwStageFieldMapNode[field] = Val; | |
else | |
It->second = std::max<unsigned>(It->second.getUInt(), Val); | |
auto &Node = HwStageFieldMapNode[field]; | |
if (Node.isEmpty()) | |
Node = Val; | |
else | |
Node = std::max<unsigned>(Node.getUInt(), Val); |
No description provided.