|
| 1 | +; REQUIRES: aarch64-registered-target |
| 2 | + |
| 3 | +; This test needs to be target specific due to the cost estimate in the output. |
| 4 | + |
| 5 | +; RUN: opt -lower-matrix-intrinsics -pass-remarks-output=%t -pass-remarks=lower-matrix-intrinsics -mtriple=arm64-apple-iphoneos %s 2>&1 -disable-output | FileCheck --check-prefix=STDERR %s |
| 6 | +; RUN: FileCheck --input-file=%t --check-prefix=YAML %s |
| 7 | + |
| 8 | +; YAML-LABEL: --- !Passed |
| 9 | +; YAML-NEXT: Pass: lower-matrix-intrinsics |
| 10 | +; YAML-NEXT: Name: matrix-lowered |
| 11 | +; YAML-NEXT: DebugLoc: { File: test.cpp, Line: 35, Column: 71 } |
| 12 | +; YAML-NEXT: Function: test_2leafs |
| 13 | +; YAML-NEXT: Args: |
| 14 | +; YAML-NEXT: - String: 'Lowered with ' |
| 15 | +; YAML-NEXT: - NumStores: '4' |
| 16 | +; YAML-NEXT: - String: ' stores, ' |
| 17 | +; YAML-NEXT: - NumLoads: '0' |
| 18 | +; YAML-NEXT: - String: ' loads, ' |
| 19 | +; YAML-NEXT: - NumComputeOps: '0' |
| 20 | +; YAML-NEXT: - String: ' compute ops' |
| 21 | +; YAML-NEXT: - String: ', |
| 22 | +; YAML-NEXT: additionally ' |
| 23 | +; YAML-NEXT: - NumStores: '0' |
| 24 | +; YAML-NEXT: - String: ' stores, ' |
| 25 | +; YAML-NEXT: - NumLoads: '4' |
| 26 | +; YAML-NEXT: - String: ' loads, ' |
| 27 | +; YAML-NEXT: - NumFPOps: '16' |
| 28 | +; YAML-NEXT: - String: ' compute ops' |
| 29 | +; YAML-NEXT: - String: ' are shared with other expressions' |
| 30 | +; YAML-NEXT: - String: | |
| 31 | +; YAML: columnwise.store.4x2.double( |
| 32 | +; YAML-NEXT: shared with remark at line 35 column 45 (transpose.2x4.double(columnwise.load.2x4.double(addr %arg1, |
| 33 | +; YAML-NEXT: scalar)), |
| 34 | +; YAML-NEXT: addr %arg3, |
| 35 | +; YAML-NEXT: 10) |
| 36 | + |
| 37 | +; YAML-LABEL: --- !Passed |
| 38 | +; YAML-NEXT: Pass: lower-matrix-intrinsics |
| 39 | +; YAML-NEXT: Name: matrix-lowered |
| 40 | +; YAML-NEXT: DebugLoc: { File: test.cpp, Line: 35, Column: 45 } |
| 41 | +; YAML-NEXT: Function: test_2leafs |
| 42 | +; YAML-NEXT: Args: |
| 43 | +; YAML-NEXT: - String: 'Lowered with ' |
| 44 | +; YAML-NEXT: - NumStores: '30' |
| 45 | +; YAML-NEXT: - String: ' stores, ' |
| 46 | +; YAML-NEXT: - NumLoads: '45' |
| 47 | +; YAML-NEXT: - String: ' loads, ' |
| 48 | +; YAML-NEXT: - NumComputeOps: '120' |
| 49 | +; YAML-NEXT: - String: ' compute ops' |
| 50 | +; YAML-NEXT: - String: ', |
| 51 | +; YAML-NEXT: additionally ' |
| 52 | +; YAML-NEXT: - NumStores: '0' |
| 53 | +; YAML-NEXT: - String: ' stores, ' |
| 54 | +; YAML-NEXT: - NumLoads: '4' |
| 55 | +; YAML-NEXT: - String: ' loads, ' |
| 56 | +; YAML-NEXT: - NumFPOps: '16' |
| 57 | +; YAML-NEXT: - String: ' compute ops' |
| 58 | +; YAML-NEXT: - String: ' are shared with other expressions' |
| 59 | +; YAML-NEXT: - String: | |
| 60 | +; YAML: columnwise.store.4x15.double( |
| 61 | +; YAML-NEXT: fsub( |
| 62 | +; YAML-NEXT: columnwise.load.4x15.double(addr %arg2, 20), |
| 63 | +; YAML-NEXT: multiply.4x2.2x15.double( |
| 64 | +; YAML-NEXT: shared with remark at line 35 column 71 (transpose.2x4.double(columnwise.load.2x4.double(addr %arg1, |
| 65 | +; YAML-NEXT: scalar)), |
| 66 | +; YAML-NEXT: columnwise.load.2x15.double(addr %arg3, scalar))), |
| 67 | +; YAML-NEXT: addr %arg2, |
| 68 | +; YAML-NEXT: 10) |
| 69 | + |
| 70 | + |
| 71 | +; STDERR-LABEL: remark: test.cpp:35:71: Lowered with 4 stores, 0 loads, 0 compute ops, |
| 72 | +; STDERR-NEXT: additionally 0 stores, 4 loads, 16 compute ops are shared with other expressions |
| 73 | +; STDERR-NEXT: columnwise.store.4x2.double( |
| 74 | +; STDERR-NEXT: shared with remark at line 35 column 45 (transpose.2x4.double(columnwise.load.2x4.double(addr %arg1, |
| 75 | +; STDERR-NEXT: scalar)), |
| 76 | +; STDERR-NEXT: addr %arg3, |
| 77 | +; STDERR-NEXT: 10) |
| 78 | + |
| 79 | +; STDERR-LABEL: remark: test.cpp:35:45: Lowered with 30 stores, 45 loads, 120 compute ops, |
| 80 | +; STDERR-NEXT: additionally 0 stores, 4 loads, 16 compute ops are shared with other expressions |
| 81 | +; STDERR-NEXT: columnwise.store.4x15.double( |
| 82 | +; STDERR-NEXT: fsub( |
| 83 | +; STDERR-NEXT: columnwise.load.4x15.double(addr %arg2, 20), |
| 84 | +; STDERR-NEXT: multiply.4x2.2x15.double( |
| 85 | +; STDERR-NEXT: shared with remark at line 35 column 71 (transpose.2x4.double(columnwise.load.2x4.double(addr %arg1, |
| 86 | +; STDERR-NEXT: scalar)), |
| 87 | +; STDERR-NEXT: columnwise.load.2x15.double(addr %arg3, scalar))), |
| 88 | +; STDERR-NEXT: addr %arg2, |
| 89 | +; STDERR-NEXT: 10) |
| 90 | +define void @test_2leafs(double* %arg1, double* %arg2, double* %arg3, i32 %stride, i32 %offset) !dbg !8 { |
| 91 | +bb: |
| 92 | + %shared.load = tail call <8 x double> @llvm.matrix.columnwise.load.v8f64.p0f64(double* %arg1, i32 %stride, i32 2, i32 4), !dbg !10, !noalias !10 |
| 93 | + %shared.load.2 = tail call <30 x double> @llvm.matrix.columnwise.load.v30f64.p0f64(double* %arg3, i32 %stride, i32 2, i32 15), !dbg !10, !noalias !10 |
| 94 | + %tmp17 = tail call <8 x double> @llvm.matrix.transpose.v8f64(<8 x double> %shared.load, i32 2, i32 4), !dbg !10 |
| 95 | + tail call void @llvm.matrix.columnwise.store.v8f64.p0f64(<8 x double> %tmp17, double* %arg3, i32 10, i32 4, i32 2), !dbg !10 |
| 96 | + %tmp18 = tail call <60 x double> @llvm.matrix.columnwise.load.v60f64.p0f64(double* %arg2, i32 20, i32 4, i32 15), !dbg !11 |
| 97 | + %tmp48 = tail call <60 x double> @llvm.matrix.multiply.v60f64.v8f64.v30f64(<8 x double> %tmp17, <30 x double> %shared.load.2, i32 4, i32 2, i32 15), !dbg !11 |
| 98 | + %tmp49 = fsub <60 x double> %tmp18, %tmp48, !dbg !11 |
| 99 | + tail call void @llvm.matrix.columnwise.store.v60f64.p0f64(<60 x double> %tmp49, double* %arg2, i32 10, i32 4, i32 15), !dbg !11 |
| 100 | + ret void |
| 101 | +} |
| 102 | + |
| 103 | +declare <8 x double> @llvm.matrix.transpose.v8f64(<8 x double>, i32 immarg, i32 immarg) |
| 104 | +declare <8 x double> @llvm.matrix.columnwise.load.v8f64.p0f64(double*, i32, i32 immarg, i32 immarg) |
| 105 | +declare <30 x double> @llvm.matrix.columnwise.load.v30f64.p0f64(double*, i32, i32 immarg, i32 immarg) |
| 106 | +declare <60 x double> @llvm.matrix.columnwise.load.v60f64.p0f64(double*, i32, i32 immarg, i32 immarg) |
| 107 | +declare void @llvm.matrix.columnwise.store.v60f64.p0f64(<60 x double>, double* writeonly, i32, i32 immarg, i32 immarg) |
| 108 | +declare void @llvm.matrix.columnwise.store.v8f64.p0f64(<8 x double>, double* writeonly, i32, i32 immarg, i32 immarg) |
| 109 | +declare <60 x double> @llvm.matrix.multiply.v60f64.v8f64.v30f64(<8 x double>, <30 x double>, i32 immarg, i32 immarg, i32 immarg) |
| 110 | + |
| 111 | +!llvm.module.flags = !{!0, !1, !2, !3} |
| 112 | +!llvm.dbg.cu = !{!4} |
| 113 | +!llvm.ident = !{!7} |
| 114 | + |
| 115 | +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 13, i32 0]} |
| 116 | +!1 = !{i32 2, !"Debug Info Version", i32 3} |
| 117 | +!2 = !{i32 1, !"wchar_size", i32 4} |
| 118 | +!3 = !{i32 7, !"PIC Level", i32 2} |
| 119 | +!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !5, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !6, nameTableKind: GNU) |
| 120 | +!5 = !DIFile(filename: "test.cpp", directory: "") |
| 121 | +!6 = !{} |
| 122 | +!7 = !{!"clang"} |
| 123 | +!8 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 26, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 124 | +!9 = !DISubroutineType(types: !6) |
| 125 | +!10 = distinct !DILocation(line: 35, column: 71, scope: !8) |
| 126 | +!11 = distinct !DILocation(line: 35, column: 45, scope: !8) |
| 127 | +!12 = !DILocation(line: 800, column: 17, scope: !13, inlinedAt: !15) |
| 128 | +!13 = distinct !DISubprogram(name: "foo", scope: !14, file: !14, line: 789, type: !9, scopeLine: 790, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 129 | +!14 = !DIFile(filename: "bar.h", directory: "bar") |
| 130 | +!15 = distinct !DILocation(line: 1280, column: 5, scope: !16, inlinedAt: !18) |
| 131 | +!16 = distinct !DISubprogram(name: "zar", scope: !17, file: !17, line: 1275, type: !9, scopeLine: 1278, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 132 | +!17 = !DIFile(filename: "file1.h", directory: "dir1") |
| 133 | +!18 = distinct !DILocation(line: 1278, column: 1, scope: !19, inlinedAt: !20) |
| 134 | +!19 = distinct !DISubprogram(name: "yo", scope: !17, file: !17, line: 1275, type: !9, scopeLine: 1278, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 135 | +!20 = distinct !DILocation(line: 2514, column: 26, scope: !21, inlinedAt: !22) |
| 136 | +!21 = distinct !DISubprogram(name: "zzzz", scope: !14, file: !14, line: 2505, type: !9, scopeLine: 2506, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 137 | +!22 = distinct !DILocation(line: 1263, column: 5, scope: !23, inlinedAt: !24) |
| 138 | +!23 = distinct !DISubprogram(name: "ppppp", scope: !17, file: !17, line: 1258, type: !9, scopeLine: 1261, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 139 | +!24 = distinct !DILocation(line: 1261, column: 1, scope: !25, inlinedAt: !26) |
| 140 | +!25 = distinct !DISubprogram(name: "qqqq", scope: !17, file: !17, line: 1258, type: !9, scopeLine: 1261, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 141 | +!26 = distinct !DILocation(line: 168, column: 7, scope: !27, inlinedAt: !29) |
| 142 | +!27 = distinct !DISubprogram(name: "lll", scope: !28, file: !28, line: 166, type: !9, scopeLine: 169, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 143 | +!28 = !DIFile(filename: "file2.h", directory: "dir2") |
| 144 | +!29 = distinct !DILocation(line: 169, column: 1, scope: !30, inlinedAt: !31) |
| 145 | +!30 = distinct !DISubprogram(name: "Expr1", scope: !28, file: !28, line: 166, type: !9, scopeLine: 169, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 146 | +!31 = distinct !DILocation(line: 368, column: 12, scope: !32, inlinedAt: !33) |
| 147 | +!32 = distinct !DISubprogram(name: "yyyyy", scope: !14, file: !14, line: 364, type: !9, scopeLine: 365, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 148 | +!33 = distinct !DILocation(line: 1297, column: 34, scope: !34, inlinedAt: !35) |
| 149 | +!34 = distinct !DISubprogram(name: "eeeee", scope: !14, file: !14, line: 1290, type: !9, scopeLine: 1291, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 150 | +!35 = distinct !DILocation(line: 2306, column: 5, scope: !36, inlinedAt: !11) |
| 151 | +!36 = distinct !DISubprogram(name: "aaaaa", scope: !37, file: !37, line: 2304, type: !9, scopeLine: 2305, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 152 | +!37 = !DIFile(filename: "foo.c", directory: "/") |
| 153 | +!38 = distinct !DISubprogram(name: "test2", scope: !5, file: !5, line: 90, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 154 | +!39 = distinct !DILocation(line: 44, column: 44, scope: !38) |
| 155 | +!40 = distinct !DILocation(line: 55, column: 55, scope: !38) |
| 156 | +!41 = distinct !DILocation(line: 66, column: 66, scope: !38) |
| 157 | +!42 = distinct !DISubprogram(name: "test2", scope: !5, file: !5, line: 90, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 158 | +!43 = distinct !DILocation(line: 77, column: 77, scope: !42) |
| 159 | +!44 = distinct !DILocation(line: 88, column: 88, scope: !42) |
| 160 | +!45 = distinct !DISubprogram(name: "test2", scope: !5, file: !5, line: 90, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) |
| 161 | +!46 = distinct !DILocation(line: 99, column: 99, scope: !45) |
| 162 | +!47 = distinct !DILocation(line: 111, column: 111, scope: !45) |
0 commit comments