Skip to content

Commit 5d0ffbe

Browse files
committed
[Matrix] Mark expressions shared between multiple remarks.
This patch adds support for explicitly highlighting sub-expressions shared by multiple leaf nodes. For example consider the following code %shared.load = tail call <8 x double> @llvm.matrix.columnwise.load.v8f64.p0f64(double* %arg1, i32 %stride, i32 2, i32 4), !dbg !10, !noalias !10 %trans = tail call <8 x double> @llvm.matrix.transpose.v8f64(<8 x double> %shared.load, i32 2, i32 4), !dbg !10 tail call void @llvm.matrix.columnwise.store.v8f64.p0f64(<8 x double> %trans, double* %arg3, i32 10, i32 4, i32 2), !dbg !10 %load.2 = tail call <30 x double> @llvm.matrix.columnwise.load.v30f64.p0f64(double* %arg3, i32 %stride, i32 2, i32 15), !dbg !10, !noalias !10 %mult = tail call <60 x double> @llvm.matrix.multiply.v60f64.v8f64.v30f64(<8 x double> %trans, <30 x double> %load.2, i32 4, i32 2, i32 15), !dbg !11 tail call void @llvm.matrix.columnwise.store.v60f64.p0f64(<60 x double> %mult, double* %arg2, i32 10, i32 4, i32 15), !dbg !11 We have two leaf nodes (the 2 stores) and the first store stores %trans which is also used by the matrix multiply %mult. We generate separate remarks for each leaf (stores). To denote that parts are shared, the shared expressions are marked as shared (), with a reference to the other remark that shares it. The operation summary also denotes the shared operations separately. Reviewers: anemet, Gerolf, thegameg, hfinkel, andrew.w.kaylor, LuoYuanke Reviewed By: anemet Differential Revision: https://reviews.llvm.org/D72526
1 parent 03a6b85 commit 5d0ffbe

File tree

3 files changed

+257
-15
lines changed

3 files changed

+257
-15
lines changed

llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp

Lines changed: 93 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -946,13 +946,23 @@ class LowerMatrixIntrinsics {
946946
/// matrix instructions.
947947
const MapVector<Value *, ColumnMatrixTy> &Inst2ColumnMatrix;
948948

949+
/// Mapping from values to the leaves of all expressions that the value is
950+
/// part of.
951+
const DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared;
952+
953+
/// Leaf node of the expression to linearize.
954+
Value *Leaf;
955+
949956
/// Used to keep track of sub-expressions that get reused while linearizing
950957
/// the expression. Re-used sub-expressions are marked as (reused).
951958
SmallPtrSet<Value *, 8> ReusedExprs;
952959

953960
ExprLinearizer(const DataLayout &DL,
954-
const MapVector<Value *, ColumnMatrixTy> &Inst2ColumnMatrix)
955-
: Str(), Stream(Str), DL(DL), Inst2ColumnMatrix(Inst2ColumnMatrix) {}
961+
const MapVector<Value *, ColumnMatrixTy> &Inst2ColumnMatrix,
962+
const DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared,
963+
Value *Leaf)
964+
: Str(), Stream(Str), DL(DL), Inst2ColumnMatrix(Inst2ColumnMatrix),
965+
Shared(Shared), Leaf(Leaf) {}
956966

957967
void indent(unsigned N) {
958968
LineLength += N;
@@ -1108,11 +1118,30 @@ class LowerMatrixIntrinsics {
11081118
/// Linearize expression \p Expr starting at an indentation of \p Indent.
11091119
/// Expressions that are re-used multiple times are prefixed with (reused)
11101120
/// at the re-used root instruction.
1111-
void linearizeExpr(Value *Expr, unsigned Indent, bool ParentReused) {
1121+
void linearizeExpr(Value *Expr, unsigned Indent, bool ParentReused,
1122+
bool ParentShared) {
11121123
auto *I = cast<Instruction>(Expr);
11131124
maybeIndent(Indent);
11141125
SmallVector<Value *, 8> Ops;
11151126

1127+
// Is Expr shared with other expression leaves?
1128+
bool ExprShared = false;
1129+
1130+
// Deal with shared subtrees. Mark them as shared, if required.
1131+
if (!ParentShared) {
1132+
auto SI = Shared.find(Expr);
1133+
assert(SI != Shared.end() && SI->second.find(Leaf) != SI->second.end());
1134+
1135+
for (Value *S : SI->second) {
1136+
if (S == Leaf)
1137+
continue;
1138+
DebugLoc DL = cast<Instruction>(S)->getDebugLoc();
1139+
write("shared with remark at line " + std::to_string(DL.getLine()) +
1140+
" column " + std::to_string(DL.getCol()) + " (");
1141+
}
1142+
ExprShared = SI->second.size() > 1;
1143+
}
1144+
11161145
bool Reused = !ReusedExprs.insert(Expr).second;
11171146
if (Reused && !ParentReused)
11181147
write("(reused) ");
@@ -1144,7 +1173,7 @@ class LowerMatrixIntrinsics {
11441173

11451174
maybeIndent(Indent + 1);
11461175
if (isMatrix(Op))
1147-
linearizeExpr(Op, Indent + 1, Reused);
1176+
linearizeExpr(Op, Indent + 1, Reused, ExprShared);
11481177
else
11491178
write(Op);
11501179
if (Op != Ops.back())
@@ -1171,7 +1200,6 @@ class LowerMatrixIntrinsics {
11711200
///
11721201
/// TODO:
11731202
/// * Summarize number of vector instructions generated for each expression.
1174-
/// * Account for shared sub-expressions.
11751203
/// * Propagate matrix remarks up the inlining chain.
11761204
struct RemarkGenerator {
11771205
const MapVector<Value *, ColumnMatrixTy> &Inst2ColumnMatrix;
@@ -1194,9 +1222,27 @@ class LowerMatrixIntrinsics {
11941222
return Leaves;
11951223
}
11961224

1225+
/// Recursively traverse expression \p V starting at \p Leaf and add \p Leaf
1226+
/// to all visited expressions in \p Shared.
1227+
void collectSharedInfo(Value *Leaf, Value *V,
1228+
DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared) {
1229+
1230+
if (Inst2ColumnMatrix.find(V) == Inst2ColumnMatrix.end())
1231+
return;
1232+
1233+
auto I = Shared.insert({V, {}});
1234+
I.first->second.insert(Leaf);
1235+
1236+
for (Value *Op : cast<Instruction>(V)->operand_values())
1237+
collectSharedInfo(Leaf, Op, Shared);
1238+
return;
1239+
}
1240+
11971241
/// Calculate the number of exclusive and shared op counts for expression
11981242
/// starting at \p V. Expressions used multiple times are counted once.
1199-
OpInfoTy sumOpInfos(Value *Root, SmallPtrSetImpl<Value *> &ReusedExprs) {
1243+
std::pair<OpInfoTy, OpInfoTy>
1244+
sumOpInfos(Value *Root, SmallPtrSetImpl<Value *> &ReusedExprs,
1245+
DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared) {
12001246
auto CM = Inst2ColumnMatrix.find(Root);
12011247
if (CM == Inst2ColumnMatrix.end())
12021248
return {};
@@ -1205,10 +1251,21 @@ class LowerMatrixIntrinsics {
12051251
if (!ReusedExprs.insert(Root).second)
12061252
return {};
12071253

1208-
OpInfoTy Count = CM->second.getOpInfo();
1209-
for (Value *Op : cast<Instruction>(Root)->operand_values())
1210-
Count += sumOpInfos(Op, ReusedExprs);
1211-
return Count;
1254+
OpInfoTy SharedCount;
1255+
OpInfoTy Count;
1256+
1257+
auto I = Shared.find(Root);
1258+
if (I->second.size() == 1)
1259+
Count = CM->second.getOpInfo();
1260+
else
1261+
SharedCount = CM->second.getOpInfo();
1262+
1263+
for (Value *Op : cast<Instruction>(Root)->operand_values()) {
1264+
auto C = sumOpInfos(Op, ReusedExprs, Shared);
1265+
Count += C.first;
1266+
SharedCount += C.second;
1267+
}
1268+
return {Count, SharedCount};
12121269
}
12131270

12141271
void emitRemarks() {
@@ -1218,26 +1275,47 @@ class LowerMatrixIntrinsics {
12181275
// Find leafs of matrix expressions.
12191276
auto Leaves = getExpressionLeaves();
12201277

1278+
DenseMap<Value *, SmallPtrSet<Value *, 2>> Shared;
1279+
1280+
for (Value *Leaf : Leaves)
1281+
collectSharedInfo(Leaf, Leaf, Shared);
1282+
12211283
// Generate remarks for each leaf.
12221284
for (auto *L : Leaves) {
12231285
SmallPtrSet<Value *, 8> ReusedExprs;
1224-
auto Counts = sumOpInfos(L, ReusedExprs);
1286+
OpInfoTy Counts, SharedCounts;
1287+
std::tie(Counts, SharedCounts) = sumOpInfos(L, ReusedExprs, Shared);
1288+
12251289
OptimizationRemark Rem(DEBUG_TYPE, "matrix-lowered",
12261290
cast<Instruction>(L)->getDebugLoc(),
12271291
cast<Instruction>(L)->getParent());
1292+
12281293
Rem << "Lowered with ";
12291294
Rem << ore::NV("NumStores", Counts.NumStores) << " stores, "
12301295
<< ore::NV("NumLoads", Counts.NumLoads) << " loads, "
12311296
<< ore::NV("NumComputeOps", Counts.NumComputeOps) << " compute ops";
12321297

1233-
Rem << ("\n" + linearize(L, DL));
1298+
if (SharedCounts.NumStores > 0 || SharedCounts.NumLoads > 0 ||
1299+
SharedCounts.NumComputeOps > 0) {
1300+
Rem << ",\nadditionally "
1301+
<< ore::NV("NumStores", SharedCounts.NumStores) << " stores, "
1302+
<< ore::NV("NumLoads", SharedCounts.NumLoads) << " loads, "
1303+
<< ore::NV("NumFPOps", SharedCounts.NumComputeOps)
1304+
<< " compute ops"
1305+
<< " are shared with other expressions";
1306+
}
1307+
1308+
Rem << ("\n" + linearize(L, Shared, DL));
12341309
ORE.emit(Rem);
12351310
}
12361311
}
12371312

1238-
std::string linearize(Value *L, const DataLayout &DL) {
1239-
ExprLinearizer Lin(DL, Inst2ColumnMatrix);
1240-
Lin.linearizeExpr(L, 0, false);
1313+
std::string
1314+
linearize(Value *L,
1315+
const DenseMap<Value *, SmallPtrSet<Value *, 2>> &Shared,
1316+
const DataLayout &DL) {
1317+
ExprLinearizer Lin(DL, Inst2ColumnMatrix, Shared, L);
1318+
Lin.linearizeExpr(L, 0, false, false);
12411319
return Lin.getResult();
12421320
}
12431321
};
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
; REQUIRES: aarch64-registered-target
2+
3+
; This test needs to be target specific due to the cost estimate in the output.
4+
5+
; RUN: opt -lower-matrix-intrinsics -pass-remarks-output=%t -pass-remarks=lower-matrix-intrinsics -mtriple=arm64-apple-iphoneos %s 2>&1 -disable-output | FileCheck --check-prefix=STDERR %s
6+
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
7+
8+
; YAML-LABEL: --- !Passed
9+
; YAML-NEXT: Pass: lower-matrix-intrinsics
10+
; YAML-NEXT: Name: matrix-lowered
11+
; YAML-NEXT: DebugLoc: { File: test.cpp, Line: 35, Column: 71 }
12+
; YAML-NEXT: Function: test_2leafs
13+
; YAML-NEXT: Args:
14+
; YAML-NEXT: - String: 'Lowered with '
15+
; YAML-NEXT: - NumStores: '4'
16+
; YAML-NEXT: - String: ' stores, '
17+
; YAML-NEXT: - NumLoads: '0'
18+
; YAML-NEXT: - String: ' loads, '
19+
; YAML-NEXT: - NumComputeOps: '0'
20+
; YAML-NEXT: - String: ' compute ops'
21+
; YAML-NEXT: - String: ',
22+
; YAML-NEXT: additionally '
23+
; YAML-NEXT: - NumStores: '0'
24+
; YAML-NEXT: - String: ' stores, '
25+
; YAML-NEXT: - NumLoads: '4'
26+
; YAML-NEXT: - String: ' loads, '
27+
; YAML-NEXT: - NumFPOps: '16'
28+
; YAML-NEXT: - String: ' compute ops'
29+
; YAML-NEXT: - String: ' are shared with other expressions'
30+
; YAML-NEXT: - String: |
31+
; YAML: columnwise.store.4x2.double(
32+
; YAML-NEXT: shared with remark at line 35 column 45 (transpose.2x4.double(columnwise.load.2x4.double(addr %arg1,
33+
; YAML-NEXT: scalar)),
34+
; YAML-NEXT: addr %arg3,
35+
; YAML-NEXT: 10)
36+
37+
; YAML-LABEL: --- !Passed
38+
; YAML-NEXT: Pass: lower-matrix-intrinsics
39+
; YAML-NEXT: Name: matrix-lowered
40+
; YAML-NEXT: DebugLoc: { File: test.cpp, Line: 35, Column: 45 }
41+
; YAML-NEXT: Function: test_2leafs
42+
; YAML-NEXT: Args:
43+
; YAML-NEXT: - String: 'Lowered with '
44+
; YAML-NEXT: - NumStores: '30'
45+
; YAML-NEXT: - String: ' stores, '
46+
; YAML-NEXT: - NumLoads: '45'
47+
; YAML-NEXT: - String: ' loads, '
48+
; YAML-NEXT: - NumComputeOps: '120'
49+
; YAML-NEXT: - String: ' compute ops'
50+
; YAML-NEXT: - String: ',
51+
; YAML-NEXT: additionally '
52+
; YAML-NEXT: - NumStores: '0'
53+
; YAML-NEXT: - String: ' stores, '
54+
; YAML-NEXT: - NumLoads: '4'
55+
; YAML-NEXT: - String: ' loads, '
56+
; YAML-NEXT: - NumFPOps: '16'
57+
; YAML-NEXT: - String: ' compute ops'
58+
; YAML-NEXT: - String: ' are shared with other expressions'
59+
; YAML-NEXT: - String: |
60+
; YAML: columnwise.store.4x15.double(
61+
; YAML-NEXT: fsub(
62+
; YAML-NEXT: columnwise.load.4x15.double(addr %arg2, 20),
63+
; YAML-NEXT: multiply.4x2.2x15.double(
64+
; YAML-NEXT: shared with remark at line 35 column 71 (transpose.2x4.double(columnwise.load.2x4.double(addr %arg1,
65+
; YAML-NEXT: scalar)),
66+
; YAML-NEXT: columnwise.load.2x15.double(addr %arg3, scalar))),
67+
; YAML-NEXT: addr %arg2,
68+
; YAML-NEXT: 10)
69+
70+
71+
; STDERR-LABEL: remark: test.cpp:35:71: Lowered with 4 stores, 0 loads, 0 compute ops,
72+
; STDERR-NEXT: additionally 0 stores, 4 loads, 16 compute ops are shared with other expressions
73+
; STDERR-NEXT: columnwise.store.4x2.double(
74+
; STDERR-NEXT: shared with remark at line 35 column 45 (transpose.2x4.double(columnwise.load.2x4.double(addr %arg1,
75+
; STDERR-NEXT: scalar)),
76+
; STDERR-NEXT: addr %arg3,
77+
; STDERR-NEXT: 10)
78+
79+
; STDERR-LABEL: remark: test.cpp:35:45: Lowered with 30 stores, 45 loads, 120 compute ops,
80+
; STDERR-NEXT: additionally 0 stores, 4 loads, 16 compute ops are shared with other expressions
81+
; STDERR-NEXT: columnwise.store.4x15.double(
82+
; STDERR-NEXT: fsub(
83+
; STDERR-NEXT: columnwise.load.4x15.double(addr %arg2, 20),
84+
; STDERR-NEXT: multiply.4x2.2x15.double(
85+
; STDERR-NEXT: shared with remark at line 35 column 71 (transpose.2x4.double(columnwise.load.2x4.double(addr %arg1,
86+
; STDERR-NEXT: scalar)),
87+
; STDERR-NEXT: columnwise.load.2x15.double(addr %arg3, scalar))),
88+
; STDERR-NEXT: addr %arg2,
89+
; STDERR-NEXT: 10)
90+
define void @test_2leafs(double* %arg1, double* %arg2, double* %arg3, i32 %stride, i32 %offset) !dbg !8 {
91+
bb:
92+
%shared.load = tail call <8 x double> @llvm.matrix.columnwise.load.v8f64.p0f64(double* %arg1, i32 %stride, i32 2, i32 4), !dbg !10, !noalias !10
93+
%shared.load.2 = tail call <30 x double> @llvm.matrix.columnwise.load.v30f64.p0f64(double* %arg3, i32 %stride, i32 2, i32 15), !dbg !10, !noalias !10
94+
%tmp17 = tail call <8 x double> @llvm.matrix.transpose.v8f64(<8 x double> %shared.load, i32 2, i32 4), !dbg !10
95+
tail call void @llvm.matrix.columnwise.store.v8f64.p0f64(<8 x double> %tmp17, double* %arg3, i32 10, i32 4, i32 2), !dbg !10
96+
%tmp18 = tail call <60 x double> @llvm.matrix.columnwise.load.v60f64.p0f64(double* %arg2, i32 20, i32 4, i32 15), !dbg !11
97+
%tmp48 = tail call <60 x double> @llvm.matrix.multiply.v60f64.v8f64.v30f64(<8 x double> %tmp17, <30 x double> %shared.load.2, i32 4, i32 2, i32 15), !dbg !11
98+
%tmp49 = fsub <60 x double> %tmp18, %tmp48, !dbg !11
99+
tail call void @llvm.matrix.columnwise.store.v60f64.p0f64(<60 x double> %tmp49, double* %arg2, i32 10, i32 4, i32 15), !dbg !11
100+
ret void
101+
}
102+
103+
declare <8 x double> @llvm.matrix.transpose.v8f64(<8 x double>, i32 immarg, i32 immarg)
104+
declare <8 x double> @llvm.matrix.columnwise.load.v8f64.p0f64(double*, i32, i32 immarg, i32 immarg)
105+
declare <30 x double> @llvm.matrix.columnwise.load.v30f64.p0f64(double*, i32, i32 immarg, i32 immarg)
106+
declare <60 x double> @llvm.matrix.columnwise.load.v60f64.p0f64(double*, i32, i32 immarg, i32 immarg)
107+
declare void @llvm.matrix.columnwise.store.v60f64.p0f64(<60 x double>, double* writeonly, i32, i32 immarg, i32 immarg)
108+
declare void @llvm.matrix.columnwise.store.v8f64.p0f64(<8 x double>, double* writeonly, i32, i32 immarg, i32 immarg)
109+
declare <60 x double> @llvm.matrix.multiply.v60f64.v8f64.v30f64(<8 x double>, <30 x double>, i32 immarg, i32 immarg, i32 immarg)
110+
111+
!llvm.module.flags = !{!0, !1, !2, !3}
112+
!llvm.dbg.cu = !{!4}
113+
!llvm.ident = !{!7}
114+
115+
!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 13, i32 0]}
116+
!1 = !{i32 2, !"Debug Info Version", i32 3}
117+
!2 = !{i32 1, !"wchar_size", i32 4}
118+
!3 = !{i32 7, !"PIC Level", i32 2}
119+
!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !5, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !6, nameTableKind: GNU)
120+
!5 = !DIFile(filename: "test.cpp", directory: "")
121+
!6 = !{}
122+
!7 = !{!"clang"}
123+
!8 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 26, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
124+
!9 = !DISubroutineType(types: !6)
125+
!10 = distinct !DILocation(line: 35, column: 71, scope: !8)
126+
!11 = distinct !DILocation(line: 35, column: 45, scope: !8)
127+
!12 = !DILocation(line: 800, column: 17, scope: !13, inlinedAt: !15)
128+
!13 = distinct !DISubprogram(name: "foo", scope: !14, file: !14, line: 789, type: !9, scopeLine: 790, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
129+
!14 = !DIFile(filename: "bar.h", directory: "bar")
130+
!15 = distinct !DILocation(line: 1280, column: 5, scope: !16, inlinedAt: !18)
131+
!16 = distinct !DISubprogram(name: "zar", scope: !17, file: !17, line: 1275, type: !9, scopeLine: 1278, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
132+
!17 = !DIFile(filename: "file1.h", directory: "dir1")
133+
!18 = distinct !DILocation(line: 1278, column: 1, scope: !19, inlinedAt: !20)
134+
!19 = distinct !DISubprogram(name: "yo", scope: !17, file: !17, line: 1275, type: !9, scopeLine: 1278, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
135+
!20 = distinct !DILocation(line: 2514, column: 26, scope: !21, inlinedAt: !22)
136+
!21 = distinct !DISubprogram(name: "zzzz", scope: !14, file: !14, line: 2505, type: !9, scopeLine: 2506, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
137+
!22 = distinct !DILocation(line: 1263, column: 5, scope: !23, inlinedAt: !24)
138+
!23 = distinct !DISubprogram(name: "ppppp", scope: !17, file: !17, line: 1258, type: !9, scopeLine: 1261, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
139+
!24 = distinct !DILocation(line: 1261, column: 1, scope: !25, inlinedAt: !26)
140+
!25 = distinct !DISubprogram(name: "qqqq", scope: !17, file: !17, line: 1258, type: !9, scopeLine: 1261, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
141+
!26 = distinct !DILocation(line: 168, column: 7, scope: !27, inlinedAt: !29)
142+
!27 = distinct !DISubprogram(name: "lll", scope: !28, file: !28, line: 166, type: !9, scopeLine: 169, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
143+
!28 = !DIFile(filename: "file2.h", directory: "dir2")
144+
!29 = distinct !DILocation(line: 169, column: 1, scope: !30, inlinedAt: !31)
145+
!30 = distinct !DISubprogram(name: "Expr1", scope: !28, file: !28, line: 166, type: !9, scopeLine: 169, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
146+
!31 = distinct !DILocation(line: 368, column: 12, scope: !32, inlinedAt: !33)
147+
!32 = distinct !DISubprogram(name: "yyyyy", scope: !14, file: !14, line: 364, type: !9, scopeLine: 365, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
148+
!33 = distinct !DILocation(line: 1297, column: 34, scope: !34, inlinedAt: !35)
149+
!34 = distinct !DISubprogram(name: "eeeee", scope: !14, file: !14, line: 1290, type: !9, scopeLine: 1291, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
150+
!35 = distinct !DILocation(line: 2306, column: 5, scope: !36, inlinedAt: !11)
151+
!36 = distinct !DISubprogram(name: "aaaaa", scope: !37, file: !37, line: 2304, type: !9, scopeLine: 2305, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
152+
!37 = !DIFile(filename: "foo.c", directory: "/")
153+
!38 = distinct !DISubprogram(name: "test2", scope: !5, file: !5, line: 90, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
154+
!39 = distinct !DILocation(line: 44, column: 44, scope: !38)
155+
!40 = distinct !DILocation(line: 55, column: 55, scope: !38)
156+
!41 = distinct !DILocation(line: 66, column: 66, scope: !38)
157+
!42 = distinct !DISubprogram(name: "test2", scope: !5, file: !5, line: 90, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
158+
!43 = distinct !DILocation(line: 77, column: 77, scope: !42)
159+
!44 = distinct !DILocation(line: 88, column: 88, scope: !42)
160+
!45 = distinct !DISubprogram(name: "test2", scope: !5, file: !5, line: 90, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
161+
!46 = distinct !DILocation(line: 99, column: 99, scope: !45)
162+
!47 = distinct !DILocation(line: 111, column: 111, scope: !45)

llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
; REQUIRES: aarch64-registered-target
22

3+
; This test needs to be target specific due to the cost estimate in the output.
4+
35
; RUN: opt -lower-matrix-intrinsics -pass-remarks=lower-matrix-intrinsics -mtriple=arm64-apple-iphoneos < %s 2>&1 | FileCheck %s
46

57
; CHECK-LABEL: remark: test.h:40:20: Lowered with 6 stores, 6 loads, 24 compute ops

0 commit comments

Comments
 (0)