@@ -841,7 +841,7 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
841
841
; GCN-ISEL-LABEL: name: vuaddo64
842
842
; GCN-ISEL-LABEL: body:
843
843
; GCN-ISEL-LABEL: bb.0
844
- ; GCN-ISEL: V_ADD_U64_PSEUDO
844
+ ; GCN-ISEL: V_ADD_CO_U32_e64
845
845
846
846
define amdgpu_kernel void @vuaddo64 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %carryout , i64 %a ) #0 {
847
847
; CISI-LABEL: vuaddo64:
@@ -854,9 +854,8 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
854
854
; CISI-NEXT: s_mov_b32 s4, s0
855
855
; CISI-NEXT: v_mov_b32_e32 v1, s9
856
856
; CISI-NEXT: v_add_i32_e32 v0, vcc, s8, v0
857
- ; CISI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
858
- ; CISI-NEXT: v_cmp_gt_u64_e32 vcc, s[8:9], v[0:1]
859
857
; CISI-NEXT: s_mov_b32 s5, s1
858
+ ; CISI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
860
859
; CISI-NEXT: s_mov_b32 s0, s2
861
860
; CISI-NEXT: s_mov_b32 s1, s3
862
861
; CISI-NEXT: s_mov_b32 s2, s6
@@ -876,7 +875,6 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
876
875
; VI-NEXT: v_mov_b32_e32 v6, s5
877
876
; VI-NEXT: v_add_u32_e32 v5, vcc, s4, v0
878
877
; VI-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
879
- ; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[5:6]
880
878
; VI-NEXT: v_mov_b32_e32 v2, s1
881
879
; VI-NEXT: v_mov_b32_e32 v3, s2
882
880
; VI-NEXT: v_mov_b32_e32 v4, s3
@@ -894,7 +892,6 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
894
892
; GFX9-NEXT: v_mov_b32_e32 v1, s7
895
893
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v0
896
894
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
897
- ; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
898
895
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
899
896
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
900
897
; GFX9-NEXT: global_store_byte v2, v0, s[2:3]
@@ -909,8 +906,7 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
909
906
; GFX1010-NEXT: s_waitcnt lgkmcnt(0)
910
907
; GFX1010-NEXT: v_add_co_u32 v0, s4, s6, v0
911
908
; GFX1010-NEXT: v_add_co_ci_u32_e64 v1, s4, s7, 0, s4
912
- ; GFX1010-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[0:1]
913
- ; GFX1010-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
909
+ ; GFX1010-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
914
910
; GFX1010-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
915
911
; GFX1010-NEXT: global_store_byte v2, v3, s[2:3]
916
912
; GFX1010-NEXT: s_endpgm
@@ -923,9 +919,8 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
923
919
; GFX1030W32-NEXT: v_mov_b32_e32 v2, 0
924
920
; GFX1030W32-NEXT: s_waitcnt lgkmcnt(0)
925
921
; GFX1030W32-NEXT: v_add_co_u32 v0, s4, s6, v0
926
- ; GFX1030W32-NEXT: v_add_co_ci_u32_e64 v1, null, s7, 0, s4
927
- ; GFX1030W32-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[0:1]
928
- ; GFX1030W32-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
922
+ ; GFX1030W32-NEXT: v_add_co_ci_u32_e64 v1, s4, s7, 0, s4
923
+ ; GFX1030W32-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
929
924
; GFX1030W32-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
930
925
; GFX1030W32-NEXT: global_store_byte v2, v3, s[2:3]
931
926
; GFX1030W32-NEXT: s_endpgm
@@ -938,9 +933,8 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
938
933
; GFX1030W64-NEXT: v_mov_b32_e32 v2, 0
939
934
; GFX1030W64-NEXT: s_waitcnt lgkmcnt(0)
940
935
; GFX1030W64-NEXT: v_add_co_u32 v0, s[4:5], s6, v0
941
- ; GFX1030W64-NEXT: v_add_co_ci_u32_e64 v1, null, s7, 0, s[4:5]
942
- ; GFX1030W64-NEXT: v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
943
- ; GFX1030W64-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
936
+ ; GFX1030W64-NEXT: v_add_co_ci_u32_e64 v1, s[4:5], s7, 0, s[4:5]
937
+ ; GFX1030W64-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
944
938
; GFX1030W64-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
945
939
; GFX1030W64-NEXT: global_store_byte v2, v3, s[2:3]
946
940
; GFX1030W64-NEXT: s_endpgm
@@ -955,10 +949,9 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
955
949
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
956
950
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
957
951
; GFX11-NEXT: v_add_co_u32 v0, s4, s6, v0
958
- ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null , s7, 0, s4
952
+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, s4 , s7, 0, s4
959
953
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
960
- ; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[0:1]
961
- ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
954
+ ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
962
955
; GFX11-NEXT: s_clause 0x1
963
956
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
964
957
; GFX11-NEXT: global_store_b8 v2, v3, s[2:3]
@@ -969,16 +962,17 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car
969
962
; GFX1250-NEXT: s_clause 0x1
970
963
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
971
964
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
972
- ; GFX1250-NEXT: v_mov_b32_e32 v1, 0
973
965
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
966
+ ; GFX1250-NEXT: v_mov_b32_e32 v2, 0
974
967
; GFX1250-NEXT: s_wait_kmcnt 0x0
975
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
976
- ; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], s[6:7], v[0:1]
977
- ; GFX1250-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[2:3]
978
- ; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
968
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
969
+ ; GFX1250-NEXT: v_add_co_u32 v0, s4, s6, v0
970
+ ; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, s4, s7, 0, s4
971
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
972
+ ; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
979
973
; GFX1250-NEXT: s_clause 0x1
980
- ; GFX1250-NEXT: global_store_b64 v1 , v[2:3 ], s[0:1]
981
- ; GFX1250-NEXT: global_store_b8 v1, v0 , s[2:3]
974
+ ; GFX1250-NEXT: global_store_b64 v2 , v[0:1 ], s[0:1]
975
+ ; GFX1250-NEXT: global_store_b8 v2, v3 , s[2:3]
982
976
; GFX1250-NEXT: s_endpgm
983
977
%tid = call i32 @llvm.amdgcn.workitem.id.x ()
984
978
%tid.ext = sext i32 %tid to i64
@@ -1821,7 +1815,7 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
1821
1815
; GCN-ISEL-LABEL: name: vusubo64
1822
1816
; GCN-ISEL-LABEL: body:
1823
1817
; GCN-ISEL-LABEL: bb.0
1824
- ; GCN-ISEL: V_SUB_U64_PSEUDO
1818
+ ; GCN-ISEL: V_SUBB_U32_e64
1825
1819
1826
1820
define amdgpu_kernel void @vusubo64 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %carryout , i64 %a ) #0 {
1827
1821
; CISI-LABEL: vusubo64:
@@ -1834,9 +1828,8 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
1834
1828
; CISI-NEXT: s_mov_b32 s4, s0
1835
1829
; CISI-NEXT: v_mov_b32_e32 v1, s9
1836
1830
; CISI-NEXT: v_sub_i32_e32 v0, vcc, s8, v0
1837
- ; CISI-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1838
- ; CISI-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1]
1839
1831
; CISI-NEXT: s_mov_b32 s5, s1
1832
+ ; CISI-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1840
1833
; CISI-NEXT: s_mov_b32 s0, s2
1841
1834
; CISI-NEXT: s_mov_b32 s1, s3
1842
1835
; CISI-NEXT: s_mov_b32 s2, s6
@@ -1856,7 +1849,6 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
1856
1849
; VI-NEXT: v_mov_b32_e32 v6, s5
1857
1850
; VI-NEXT: v_sub_u32_e32 v5, vcc, s4, v0
1858
1851
; VI-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc
1859
- ; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[5:6]
1860
1852
; VI-NEXT: v_mov_b32_e32 v2, s1
1861
1853
; VI-NEXT: v_mov_b32_e32 v3, s2
1862
1854
; VI-NEXT: v_mov_b32_e32 v4, s3
@@ -1874,7 +1866,6 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
1874
1866
; GFX9-NEXT: v_mov_b32_e32 v1, s7
1875
1867
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s6, v0
1876
1868
; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc
1877
- ; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
1878
1869
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1879
1870
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1880
1871
; GFX9-NEXT: global_store_byte v2, v0, s[2:3]
@@ -1889,8 +1880,7 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
1889
1880
; GFX1010-NEXT: s_waitcnt lgkmcnt(0)
1890
1881
; GFX1010-NEXT: v_sub_co_u32 v0, s4, s6, v0
1891
1882
; GFX1010-NEXT: v_sub_co_ci_u32_e64 v1, s4, s7, 0, s4
1892
- ; GFX1010-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[0:1]
1893
- ; GFX1010-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
1883
+ ; GFX1010-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
1894
1884
; GFX1010-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1895
1885
; GFX1010-NEXT: global_store_byte v2, v3, s[2:3]
1896
1886
; GFX1010-NEXT: s_endpgm
@@ -1903,9 +1893,8 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
1903
1893
; GFX1030W32-NEXT: v_mov_b32_e32 v2, 0
1904
1894
; GFX1030W32-NEXT: s_waitcnt lgkmcnt(0)
1905
1895
; GFX1030W32-NEXT: v_sub_co_u32 v0, s4, s6, v0
1906
- ; GFX1030W32-NEXT: v_sub_co_ci_u32_e64 v1, null, s7, 0, s4
1907
- ; GFX1030W32-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[0:1]
1908
- ; GFX1030W32-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
1896
+ ; GFX1030W32-NEXT: v_sub_co_ci_u32_e64 v1, s4, s7, 0, s4
1897
+ ; GFX1030W32-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
1909
1898
; GFX1030W32-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1910
1899
; GFX1030W32-NEXT: global_store_byte v2, v3, s[2:3]
1911
1900
; GFX1030W32-NEXT: s_endpgm
@@ -1918,9 +1907,8 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
1918
1907
; GFX1030W64-NEXT: v_mov_b32_e32 v2, 0
1919
1908
; GFX1030W64-NEXT: s_waitcnt lgkmcnt(0)
1920
1909
; GFX1030W64-NEXT: v_sub_co_u32 v0, s[4:5], s6, v0
1921
- ; GFX1030W64-NEXT: v_sub_co_ci_u32_e64 v1, null, s7, 0, s[4:5]
1922
- ; GFX1030W64-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
1923
- ; GFX1030W64-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1910
+ ; GFX1030W64-NEXT: v_sub_co_ci_u32_e64 v1, s[4:5], s7, 0, s[4:5]
1911
+ ; GFX1030W64-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
1924
1912
; GFX1030W64-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1925
1913
; GFX1030W64-NEXT: global_store_byte v2, v3, s[2:3]
1926
1914
; GFX1030W64-NEXT: s_endpgm
@@ -1935,10 +1923,9 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
1935
1923
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1936
1924
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1937
1925
; GFX11-NEXT: v_sub_co_u32 v0, s4, s6, v0
1938
- ; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, null , s7, 0, s4
1926
+ ; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, s4 , s7, 0, s4
1939
1927
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1940
- ; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[0:1]
1941
- ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
1928
+ ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
1942
1929
; GFX11-NEXT: s_clause 0x1
1943
1930
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1944
1931
; GFX11-NEXT: global_store_b8 v2, v3, s[2:3]
@@ -1949,16 +1936,17 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car
1949
1936
; GFX1250-NEXT: s_clause 0x1
1950
1937
; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
1951
1938
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1952
- ; GFX1250-NEXT: v_mov_b32_e32 v1, 0
1953
1939
; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1940
+ ; GFX1250-NEXT: v_mov_b32_e32 v2, 0
1954
1941
; GFX1250-NEXT: s_wait_kmcnt 0x0
1955
- ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1956
- ; GFX1250-NEXT: v_sub_nc_u64_e32 v[2:3], s[6:7], v[0:1]
1957
- ; GFX1250-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[2:3]
1958
- ; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1942
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1943
+ ; GFX1250-NEXT: v_sub_co_u32 v0, s4, s6, v0
1944
+ ; GFX1250-NEXT: v_sub_co_ci_u32_e64 v1, s4, s7, 0, s4
1945
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
1946
+ ; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
1959
1947
; GFX1250-NEXT: s_clause 0x1
1960
- ; GFX1250-NEXT: global_store_b64 v1 , v[2:3 ], s[0:1]
1961
- ; GFX1250-NEXT: global_store_b8 v1, v0 , s[2:3]
1948
+ ; GFX1250-NEXT: global_store_b64 v2 , v[0:1 ], s[0:1]
1949
+ ; GFX1250-NEXT: global_store_b8 v2, v3 , s[2:3]
1962
1950
; GFX1250-NEXT: s_endpgm
1963
1951
%tid = call i32 @llvm.amdgcn.workitem.id.x ()
1964
1952
%tid.ext = sext i32 %tid to i64
0 commit comments