[AMDGPU] 4-align SGPR triples
Previously SGPR triples like s[3:5] were aligned on a 3-SGPR boundary which has no basis in hardware. Aligning them on a 4-SGPR boundary is at least justified by the architecture reference guide which says: "Quad-alignment of SGPRs is required for operation on more than 64-bits". Currently there are no instructions that take SGPR triples as operands so the issue is latent. Differential Revision: https://reviews.llvm.org/D151463
This commit is contained in:
@@ -413,7 +413,7 @@ def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
def SGPR_64Regs : SIRegisterTuples<getSubRegs<2>.ret, SGPR_32, 105, 2, 2, "s">;
|
||||
|
||||
// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
|
||||
def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 3, 3, "s">;
|
||||
def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 4, 3, "s">;
|
||||
|
||||
// SGPR 128-bit registers
|
||||
def SGPR_128Regs : SIRegisterTuples<getSubRegs<4>.ret, SGPR_32, 105, 4, 4, "s">;
|
||||
|
||||
@@ -189,14 +189,11 @@ body: |
|
||||
; CHECK-LABEL: name: extract_sgpr_s96_from_s128
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]]
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub1_sub2_sub3
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]]
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub0_sub1_sub2
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]]
|
||||
%0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sgpr(s96) = G_EXTRACT %0, 0
|
||||
%2:sgpr(s96) = G_EXTRACT %0, 32
|
||||
S_ENDPGM 0, implicit %1, implicit %2
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
|
||||
@@ -240,7 +240,7 @@ body: |
|
||||
; CHECK-LABEL: name: insert_s_s96_s_s64_0
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
|
||||
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
|
||||
@@ -252,28 +252,6 @@ body: |
|
||||
|
||||
---
|
||||
|
||||
name: insert_s_s96_s_s64_32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5
|
||||
; CHECK-LABEL: name: insert_s_s96_s_s64_32
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5
|
||||
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
|
||||
%0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2
|
||||
%1:sgpr(s64) = COPY $sgpr4_sgpr5
|
||||
%2:sgpr(s96) = G_INSERT %0, %1, 32
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: insert_s_s128_s_s64_0
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
@@ -398,110 +376,44 @@ regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6
|
||||
; CHECK-LABEL: name: insert_s_s128_s_s96_0
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr4_sgpr5_sgpr6
|
||||
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
|
||||
%0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
|
||||
%1:sgpr(s96) = COPY $sgpr4_sgpr5_sgpr6
|
||||
%2:sgpr(s128) = G_INSERT %0, %1, 0
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: insert_s_s128_s_s96_32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-LABEL: name: insert_s_s128_s_s96_32
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
|
||||
%0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
|
||||
%2:sgpr(s128) = G_INSERT %0, %1, 32
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: insert_s_s160_s_s96_0
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr8_sgpr9_sgpr10
|
||||
; CHECK-LABEL: name: insert_s_s160_s_s96_0
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr8_sgpr9_sgpr10
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr8_sgpr9_sgpr10
|
||||
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
|
||||
%0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
|
||||
%1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
|
||||
%1:sgpr(s96) = COPY $sgpr8_sgpr9_sgpr10
|
||||
%2:sgpr(s160) = G_INSERT %0, %1, 0
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: insert_s_s160_s_s96_32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-LABEL: name: insert_s_s160_s_s96_32
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
|
||||
%0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
|
||||
%1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
|
||||
%2:sgpr(s160) = G_INSERT %0, %1, 32
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: insert_s_s160_s_s96_64
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-LABEL: name: insert_s_s160_s_s96_64
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_160_with_sub2_sub3_sub4 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
|
||||
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:sgpr_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
|
||||
%0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
|
||||
%1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
|
||||
%2:sgpr(s160) = G_INSERT %0, %1, 64
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: insert_s_s256_s_s128_0
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
@@ -125,7 +125,7 @@ body: |
|
||||
; GCN-LABEL: name: trunc_sgpr_s96_to_s64
|
||||
; GCN: liveins: $sgpr0_sgpr1_sgpr2
|
||||
; GCN-NEXT: {{ $}}
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
|
||||
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]]
|
||||
%0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2
|
||||
@@ -163,7 +163,7 @@ body: |
|
||||
; GCN-LABEL: name: trunc_sgpr_s128_to_s96
|
||||
; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN-NEXT: {{ $}}
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub0_sub1_sub2
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY1]]
|
||||
%0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
@@ -282,34 +282,33 @@ tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11
|
||||
liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5_sgpr6, $sgpr8_sgpr9_sgpr10, $sgpr12_sgpr13_sgpr14
|
||||
|
||||
; GCN-LABEL: name: test_unmerge_s_v3s32_s_v12s32
|
||||
; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11
|
||||
; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5_sgpr6, $sgpr8_sgpr9_sgpr10, $sgpr12_sgpr13_sgpr14
|
||||
; GCN-NEXT: {{ $}}
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
|
||||
; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr3_sgpr4_sgpr5
|
||||
; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8
|
||||
; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY $sgpr9_sgpr10_sgpr11
|
||||
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_384_with_sub0_sub1_sub2 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3_sub4_sub5, [[COPY2]], %subreg.sub6_sub7_sub8, [[COPY3]], %subreg.sub9_sub10_sub11
|
||||
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2
|
||||
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5
|
||||
; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8
|
||||
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11
|
||||
; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]]
|
||||
; GCN-NEXT: $sgpr3_sgpr4_sgpr5 = COPY [[COPY5]]
|
||||
; GCN-NEXT: $sgpr6_sgpr7_sgpr8 = COPY [[COPY6]]
|
||||
; GCN-NEXT: $sgpr9_sgpr10_sgpr11 = COPY [[COPY7]]
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
|
||||
; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6
|
||||
; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10
|
||||
; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr(<3 x s32>) = COPY $sgpr12_sgpr13_sgpr14
|
||||
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr_384(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<3 x s32>), [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>)
|
||||
; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub0_sub1_sub2(<12 x s32>)
|
||||
; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_96(<3 x s32>) = COPY [[CONCAT_VECTORS]].sub3_sub4_sub5(<12 x s32>)
|
||||
; GCN-NEXT: [[UV:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV1:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV2:%[0-9]+]]:sgpr_96(<3 x s32>), [[UV3:%[0-9]+]]:sgpr_96(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
|
||||
; GCN-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[UV]](<3 x s32>)
|
||||
; GCN-NEXT: $sgpr4_sgpr5_sgpr6 = COPY [[UV1]](<3 x s32>)
|
||||
; GCN-NEXT: $sgpr8_sgpr9_sgpr10 = COPY [[UV2]](<3 x s32>)
|
||||
; GCN-NEXT: $sgpr12_sgpr13_sgpr14 = COPY [[UV3]](<3 x s32>)
|
||||
%0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
|
||||
%1:sgpr(<3 x s32>) = COPY $sgpr3_sgpr4_sgpr5
|
||||
%2:sgpr(<3 x s32>) = COPY $sgpr6_sgpr7_sgpr8
|
||||
%3:sgpr(<3 x s32>) = COPY $sgpr9_sgpr10_sgpr11
|
||||
%1:sgpr(<3 x s32>) = COPY $sgpr4_sgpr5_sgpr6
|
||||
%2:sgpr(<3 x s32>) = COPY $sgpr8_sgpr9_sgpr10
|
||||
%3:sgpr(<3 x s32>) = COPY $sgpr12_sgpr13_sgpr14
|
||||
%4:sgpr(<12 x s32>) = G_CONCAT_VECTORS %0, %1, %2, %3
|
||||
%5:sgpr(<3 x s32>), %6:sgpr(<3 x s32>), %7:sgpr(<3 x s32>), %8:sgpr(<3 x s32>) = G_UNMERGE_VALUES %4
|
||||
$sgpr0_sgpr1_sgpr2 = COPY %5
|
||||
$sgpr3_sgpr4_sgpr5 = COPY %6
|
||||
$sgpr6_sgpr7_sgpr8 = COPY %7
|
||||
$sgpr9_sgpr10_sgpr11 = COPY %8
|
||||
$sgpr4_sgpr5_sgpr6 = COPY %6
|
||||
$sgpr8_sgpr9_sgpr10 = COPY %7
|
||||
$sgpr12_sgpr13_sgpr14 = COPY %8
|
||||
|
||||
...
|
||||
|
||||
|
||||
@@ -669,10 +669,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr,
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 2, v0
|
||||
; GFX11-NEXT: s_mov_b32 s9, 0x40400000
|
||||
; GFX11-NEXT: s_mov_b32 s8, 0x40400000
|
||||
; GFX11-NEXT: s_mov_b32 s12, 0x40c00000
|
||||
; GFX11-NEXT: s_mov_b32 s11, 0x40a00000
|
||||
; GFX11-NEXT: s_mov_b32 s10, 4.0
|
||||
; GFX11-NEXT: s_mov_b32 s10, 0x40a00000
|
||||
; GFX11-NEXT: s_mov_b32 s9, 4.0
|
||||
; GFX11-NEXT: s_mov_b32 s14, 0x41000000
|
||||
; GFX11-NEXT: s_mov_b32 s13, 0x40e00000
|
||||
; GFX11-NEXT: v_mov_b32_e32 v6, s12
|
||||
@@ -691,9 +691,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr,
|
||||
; GFX11-NEXT: flat_load_b32 v9, v[0:1]
|
||||
; GFX11-NEXT: flat_load_b32 v10, v[2:3]
|
||||
; GFX11-NEXT: s_mov_b32 s2, 2.0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s9
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s8
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
|
||||
; GFX11-NEXT: v_dual_mov_b32 v5, s11 :: v_dual_mov_b32 v4, s10
|
||||
; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s9
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v9, v10, v[0:2], v[3:5], v[6:8]], s[4:7]
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
@@ -778,9 +778,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 2, v0
|
||||
; GFX11-NEXT: s_mov_b32 s9, 0x42004600
|
||||
; GFX11-NEXT: s_mov_b32 s10, 0x44004700
|
||||
; GFX11-NEXT: s_mov_b32 s11, 0x45004800
|
||||
; GFX11-NEXT: s_mov_b32 s8, 0x42004600
|
||||
; GFX11-NEXT: s_mov_b32 s9, 0x44004700
|
||||
; GFX11-NEXT: s_mov_b32 s10, 0x45004800
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||||
; GFX11-NEXT: s_mov_b32 s1, 1.0
|
||||
@@ -795,9 +795,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_
|
||||
; GFX11-NEXT: flat_load_b32 v6, v[0:1]
|
||||
; GFX11-NEXT: flat_load_b32 v7, v[2:3]
|
||||
; GFX11-NEXT: s_mov_b32 s2, 2.0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s9
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s8
|
||||
; GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
|
||||
; GFX11-NEXT: v_dual_mov_b32 v5, s11 :: v_dual_mov_b32 v4, s10
|
||||
; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v4, s9
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v6, v7, v[0:2], v[3:5]], s[4:7] a16
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
@@ -887,31 +887,31 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
|
||||
; GFX11-NEXT: s_clause 0x1
|
||||
; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
|
||||
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x34
|
||||
; GFX11-NEXT: s_mov_b32 s16, 0xb36211c7
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
|
||||
; GFX11-NEXT: s_mov_b32 s8, 2.0
|
||||
; GFX11-NEXT: s_mov_b32 s7, 1.0
|
||||
; GFX11-NEXT: s_mov_b32 s6, 0
|
||||
; GFX11-NEXT: s_mov_b32 s9, 0x40400000
|
||||
; GFX11-NEXT: s_movk_i32 s17, 0x102
|
||||
; GFX11-NEXT: s_mov_b32 s8, 0x40400000
|
||||
; GFX11-NEXT: s_mov_b32 s12, 0x40c00000
|
||||
; GFX11-NEXT: s_mov_b32 s11, 0x40a00000
|
||||
; GFX11-NEXT: s_mov_b32 s10, 4.0
|
||||
; GFX11-NEXT: s_mov_b32 s6, 2.0
|
||||
; GFX11-NEXT: s_mov_b32 s10, 0x40a00000
|
||||
; GFX11-NEXT: s_mov_b32 s9, 4.0
|
||||
; GFX11-NEXT: s_mov_b32 s14, 0x41000000
|
||||
; GFX11-NEXT: s_mov_b32 s13, 0x40e00000
|
||||
; GFX11-NEXT: v_mov_b32_e32 v6, s12
|
||||
; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v3, s9
|
||||
; GFX11-NEXT: v_dual_mov_b32 v4, s10 :: v_dual_mov_b32 v7, s13
|
||||
; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v9, s16
|
||||
; GFX11-NEXT: v_dual_mov_b32 v3, s8 :: v_dual_mov_b32 v4, s9
|
||||
; GFX11-NEXT: v_mov_b32_e32 v7, s13
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v5, s11 :: v_dual_mov_b32 v0, s4
|
||||
; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v0, s4
|
||||
; GFX11-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX11-NEXT: s_mov_b32 s4, 0xb36211c7
|
||||
; GFX11-NEXT: s_movk_i32 s5, 0x102
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v10, s5 :: v_dual_mov_b32 v9, s4
|
||||
; GFX11-NEXT: s_mov_b32 s4, 0
|
||||
; GFX11-NEXT: s_mov_b32 s5, 1.0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v10, s17
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX11-NEXT: flat_load_b32 v11, v[0:1]
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
|
||||
; GFX11-NEXT: v_mov_b32_e32 v2, s8
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
||||
; GFX11-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[9:10], v11, v[0:2], v[3:5], v[6:8]], s[0:3]
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
@@ -993,27 +993,25 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
|
||||
; GFX11-NEXT: s_clause 0x1
|
||||
; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
|
||||
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x34
|
||||
; GFX11-NEXT: s_mov_b32 s12, 0xb36211c6
|
||||
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
|
||||
; GFX11-NEXT: s_mov_b32 s6, 0
|
||||
; GFX11-NEXT: s_mov_b32 s9, 0x42004600
|
||||
; GFX11-NEXT: s_mov_b32 s8, 2.0
|
||||
; GFX11-NEXT: s_mov_b32 s7, 1.0
|
||||
; GFX11-NEXT: s_mov_b32 s10, 0x44004700
|
||||
; GFX11-NEXT: s_mov_b32 s11, 0x45004800
|
||||
; GFX11-NEXT: v_dual_mov_b32 v3, s9 :: v_dual_mov_b32 v4, s10
|
||||
; GFX11-NEXT: s_movk_i32 s13, 0x102
|
||||
; GFX11-NEXT: s_mov_b32 s6, 2.0
|
||||
; GFX11-NEXT: s_mov_b32 s8, 0x42004600
|
||||
; GFX11-NEXT: s_mov_b32 s9, 0x44004700
|
||||
; GFX11-NEXT: s_mov_b32 s10, 0x45004800
|
||||
; GFX11-NEXT: v_dual_mov_b32 v3, s8 :: v_dual_mov_b32 v4, s9
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v5, s11 :: v_dual_mov_b32 v0, s4
|
||||
; GFX11-NEXT: v_dual_mov_b32 v5, s10 :: v_dual_mov_b32 v0, s4
|
||||
; GFX11-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX11-NEXT: s_mov_b32 s4, 0xb36211c6
|
||||
; GFX11-NEXT: s_movk_i32 s5, 0x102
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: v_mov_b32_e32 v7, s5
|
||||
; GFX11-NEXT: s_mov_b32 s5, 1.0
|
||||
; GFX11-NEXT: s_mov_b32 s4, 0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v7, s13
|
||||
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
|
||||
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
; GFX11-NEXT: v_mov_b32_e32 v6, s4
|
||||
; GFX11-NEXT: flat_load_b32 v8, v[0:1]
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
|
||||
; GFX11-NEXT: v_mov_b32_e32 v2, s8
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
||||
; GFX11-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[6:7], v8, v[0:2], v[3:5]], s[0:3] a16
|
||||
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
||||
|
||||
@@ -10,25 +10,25 @@
|
||||
define amdgpu_kernel void @store_lds_v3i32(ptr addrspace(3) %out, <3 x i32> %x) {
|
||||
; GFX9-LABEL: store_lds_v3i32:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10
|
||||
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
|
||||
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s2
|
||||
; GFX9-NEXT: ds_write_b96 v3, v[0:2]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: store_lds_v3i32:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x4
|
||||
; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4
|
||||
; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0
|
||||
; GFX7-NEXT: s_mov_b32 m0, -1
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s13
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s14
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s0
|
||||
; GFX7-NEXT: ds_write_b96 v3, v[0:2]
|
||||
; GFX7-NEXT: s_endpgm
|
||||
@@ -36,12 +36,12 @@ define amdgpu_kernel void @store_lds_v3i32(ptr addrspace(3) %out, <3 x i32> %x)
|
||||
; GFX10-LABEL: store_lds_v3i32:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_clause 0x1
|
||||
; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10
|
||||
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
|
||||
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s13
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s14
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, s2
|
||||
; GFX10-NEXT: ds_write_b96 v3, v[0:2]
|
||||
; GFX10-NEXT: s_endpgm
|
||||
@@ -49,11 +49,11 @@ define amdgpu_kernel void @store_lds_v3i32(ptr addrspace(3) %out, <3 x i32> %x)
|
||||
; GFX11-LABEL: store_lds_v3i32:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_clause 0x1
|
||||
; GFX11-NEXT: s_load_b128 s[12:15], s[0:1], 0x10
|
||||
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x10
|
||||
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13
|
||||
; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
||||
; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s0
|
||||
; GFX11-NEXT: ds_store_b96 v3, v[0:2]
|
||||
; GFX11-NEXT: s_endpgm
|
||||
store <3 x i32> %x, ptr addrspace(3) %out
|
||||
@@ -447,25 +447,25 @@ define amdgpu_kernel void @store_lds_v3i32_align8(ptr addrspace(3) %out, <3 x i3
|
||||
define amdgpu_kernel void @store_lds_v3i32_align16(ptr addrspace(3) %out, <3 x i32> %x) {
|
||||
; GFX9-LABEL: store_lds_v3i32_align16:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10
|
||||
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
|
||||
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s13
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s14
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s2
|
||||
; GFX9-NEXT: ds_write_b96 v3, v[0:2]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: store_lds_v3i32_align16:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x4
|
||||
; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4
|
||||
; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0
|
||||
; GFX7-NEXT: s_mov_b32 m0, -1
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s13
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s14
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, s0
|
||||
; GFX7-NEXT: ds_write_b96 v3, v[0:2]
|
||||
; GFX7-NEXT: s_endpgm
|
||||
@@ -473,12 +473,12 @@ define amdgpu_kernel void @store_lds_v3i32_align16(ptr addrspace(3) %out, <3 x i
|
||||
; GFX10-LABEL: store_lds_v3i32_align16:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_clause 0x1
|
||||
; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10
|
||||
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
|
||||
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s12
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s13
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s14
|
||||
; GFX10-NEXT: v_mov_b32_e32 v0, s4
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s5
|
||||
; GFX10-NEXT: v_mov_b32_e32 v2, s6
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, s2
|
||||
; GFX10-NEXT: ds_write_b96 v3, v[0:2]
|
||||
; GFX10-NEXT: s_endpgm
|
||||
@@ -486,11 +486,11 @@ define amdgpu_kernel void @store_lds_v3i32_align16(ptr addrspace(3) %out, <3 x i
|
||||
; GFX11-LABEL: store_lds_v3i32_align16:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_clause 0x1
|
||||
; GFX11-NEXT: s_load_b128 s[12:15], s[0:1], 0x10
|
||||
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x10
|
||||
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13
|
||||
; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s0
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
|
||||
; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s0
|
||||
; GFX11-NEXT: ds_store_b96 v3, v[0:2]
|
||||
; GFX11-NEXT: s_endpgm
|
||||
store <3 x i32> %x, ptr addrspace(3) %out, align 16
|
||||
|
||||
@@ -45,7 +45,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.2:
|
||||
; GFX90A-NEXT: successors: %bb.3(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr24, $sgpr33, $vgpr31, $agpr0, $vgpr26, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr58, $sgpr59, $sgpr21_sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3, $vgpr20, $vgpr22
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr24, $sgpr33, $vgpr31, $agpr0, $vgpr26, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr58, $sgpr59, $sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3, $vgpr20, $vgpr22
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF
|
||||
; GFX90A-NEXT: renamable $sgpr20 = IMPLICIT_DEF
|
||||
@@ -578,7 +578,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: bb.44:
|
||||
; GFX90A-NEXT: successors: %bb.45(0x80000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr58, $vgpr57, $vgpr20, $vgpr61, $vgpr31, $vgpr63, $agpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $vgpr40, $vgpr62, $vgpr60, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21, $sgpr21_sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr3, $vgpr4, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr14
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr58, $vgpr57, $vgpr20, $vgpr61, $vgpr31, $vgpr63, $agpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $vgpr40, $vgpr62, $vgpr60, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr3, $vgpr4, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr14
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37
|
||||
; GFX90A-NEXT: renamable $vgpr10_vgpr11 = IMPLICIT_DEF
|
||||
|
||||
@@ -32,17 +32,16 @@ name: nonoverlapping_copy_kill
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
|
||||
liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6
|
||||
|
||||
; CHECK-LABEL: name: nonoverlapping_copy_kill
|
||||
; CHECK: liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
|
||||
; CHECK: liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2
|
||||
; CHECK-NEXT: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
|
||||
; CHECK-NEXT: $sgpr2 = S_MOV_B32 $sgpr5, implicit killed $sgpr3_sgpr4_sgpr5
|
||||
; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6, implicit-def $sgpr0_sgpr1_sgpr2
|
||||
; CHECK-NEXT: $sgpr2 = S_MOV_B32 $sgpr6, implicit killed $sgpr4_sgpr5_sgpr6
|
||||
; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
|
||||
; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
|
||||
renamable $sgpr0_sgpr1_sgpr2 = COPY killed renamable $sgpr3_sgpr4_sgpr5
|
||||
renamable $sgpr0_sgpr1_sgpr2 = COPY killed renamable $sgpr4_sgpr5_sgpr6
|
||||
renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
|
||||
S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
|
||||
|
||||
|
||||
@@ -8,15 +8,15 @@
|
||||
define amdgpu_kernel void @s_input_output_i128() {
|
||||
; GFX908-LABEL: name: s_input_output_i128
|
||||
; GFX908: bb.0 (%ir-block.0):
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7143434 /* regdef:SGPR_128 */, def %4
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7143433 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: s_input_output_i128
|
||||
; GFX90A: bb.0 (%ir-block.0):
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7143434 /* regdef:SGPR_128 */, def %4
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6881290 /* regdef:SGPR_128 */, def %4
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7143433 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6881289 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
%val = tail call i128 asm sideeffect "; def $0", "=s"()
|
||||
call void asm sideeffect "; use $0", "s"(i128 %val)
|
||||
@@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() {
|
||||
define amdgpu_kernel void @v_input_output_i128() {
|
||||
; GFX908-LABEL: name: v_input_output_i128
|
||||
; GFX908: bb.0 (%ir-block.0):
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %4
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:VReg_128 */, def %4
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6094857 /* reguse:VReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:VReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: v_input_output_i128
|
||||
; GFX90A: bb.0 (%ir-block.0):
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %4
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128_Align2 */, def %4
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6422537 /* reguse:VReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6094857 /* reguse:VReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
%val = tail call i128 asm sideeffect "; def $0", "=v"()
|
||||
call void asm sideeffect "; use $0", "v"(i128 %val)
|
||||
@@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() {
|
||||
define amdgpu_kernel void @a_input_output_i128() {
|
||||
; GFX908-LABEL: name: a_input_output_i128
|
||||
; GFX908: bb.0 (%ir-block.0):
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:AReg_128 */, def %4
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5701642 /* regdef:AReg_128 */, def %4
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5701641 /* reguse:AReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: a_input_output_i128
|
||||
; GFX90A: bb.0 (%ir-block.0):
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:AReg_128_Align2 */, def %4
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5963786 /* regdef:AReg_128_Align2 */, def %4
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5963785 /* reguse:AReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
%val = call i128 asm sideeffect "; def $0", "=a"()
|
||||
call void asm sideeffect "; use $0", "a"(i128 %val)
|
||||
|
||||
@@ -11,7 +11,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
|
||||
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
|
||||
; REGALLOC-GFX908-NEXT: {{ $}}
|
||||
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef %5:agpr_32
|
||||
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %26
|
||||
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:VReg_128 */, def %26
|
||||
; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26
|
||||
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64 */, def %23
|
||||
; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
|
||||
@@ -35,7 +35,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
|
||||
; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
; PEI-GFX908-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef renamable $agpr0
|
||||
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
|
||||
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3080202 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
|
||||
; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
|
||||
@@ -58,7 +58,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
|
||||
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
|
||||
; REGALLOC-GFX90A-NEXT: {{ $}}
|
||||
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef %5:agpr_32
|
||||
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %25
|
||||
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128_Align2 */, def %25
|
||||
; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25
|
||||
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3407882 /* regdef:VReg_64_Align2 */, def %23
|
||||
; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
|
||||
@@ -80,7 +80,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
|
||||
; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
; PEI-GFX90A-NEXT: $sgpr9 = S_ADDC_U32 $sgpr9, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:AGPR_32 */, undef renamable $agpr0
|
||||
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
|
||||
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3407882 /* regdef:VReg_64_Align2 */, def renamable $vgpr0_vgpr1
|
||||
; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, 0, 4, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
|
||||
|
||||
@@ -57,63 +57,22 @@ body: |
|
||||
; GFX9-LABEL: name: sgpr96_aligned_src_dst
|
||||
; GFX9: liveins: $sgpr0_sgpr1_sgpr2
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: $sgpr8 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr6_sgpr7_sgpr8
|
||||
; GFX9-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2
|
||||
$sgpr6_sgpr7_sgpr8 = COPY $sgpr0_sgpr1_sgpr2
|
||||
...
|
||||
|
||||
---
|
||||
name: sgpr96_aligned_src
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2
|
||||
; GFX9-LABEL: name: sgpr96_aligned_src
|
||||
; GFX9: liveins: $sgpr0_sgpr1_sgpr2
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: $sgpr5 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr3_sgpr4_sgpr5
|
||||
; GFX9-NEXT: $sgpr4 = S_MOV_B32 $sgpr1, implicit $sgpr0_sgpr1_sgpr2
|
||||
; GFX9-NEXT: $sgpr3 = S_MOV_B32 $sgpr0, implicit $sgpr0_sgpr1_sgpr2
|
||||
$sgpr3_sgpr4_sgpr5 = COPY $sgpr0_sgpr1_sgpr2
|
||||
...
|
||||
|
||||
---
|
||||
name: sgpr96_aligned_dst
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr3_sgpr4_sgpr5
|
||||
; GFX9-LABEL: name: sgpr96_aligned_dst
|
||||
; GFX9: liveins: $sgpr3_sgpr4_sgpr5
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2
|
||||
; GFX9-NEXT: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
|
||||
; GFX9-NEXT: $sgpr2 = S_MOV_B32 $sgpr5, implicit $sgpr3_sgpr4_sgpr5
|
||||
$sgpr0_sgpr1_sgpr2 = COPY $sgpr3_sgpr4_sgpr5
|
||||
...
|
||||
|
||||
---
|
||||
name: sgpr96_unaligned_src_dst
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr3_sgpr4_sgpr5
|
||||
; GFX9-LABEL: name: sgpr96_unaligned_src_dst
|
||||
; GFX9: liveins: $sgpr3_sgpr4_sgpr5
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: $sgpr10_sgpr11 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr9_sgpr10_sgpr11
|
||||
; GFX9-NEXT: $sgpr9 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5
|
||||
$sgpr9_sgpr10_sgpr11 = COPY $sgpr3_sgpr4_sgpr5
|
||||
; GFX9-NEXT: $sgpr6 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr4_sgpr5_sgpr6
|
||||
; GFX9-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2
|
||||
$sgpr4_sgpr5_sgpr6 = COPY $sgpr0_sgpr1_sgpr2
|
||||
...
|
||||
|
||||
---
|
||||
name: sgpr96_killed
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr3_sgpr4_sgpr5
|
||||
liveins: $sgpr4_sgpr5_sgpr6
|
||||
; GFX9-LABEL: name: sgpr96_killed
|
||||
; GFX9: liveins: $sgpr3_sgpr4_sgpr5
|
||||
; GFX9: liveins: $sgpr4_sgpr5_sgpr6
|
||||
; GFX9-NEXT: {{ $}}
|
||||
; GFX9-NEXT: $sgpr10_sgpr11 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr9_sgpr10_sgpr11
|
||||
; GFX9-NEXT: $sgpr9 = S_MOV_B32 $sgpr3, implicit killed $sgpr3_sgpr4_sgpr5
|
||||
$sgpr9_sgpr10_sgpr11 = COPY killed $sgpr3_sgpr4_sgpr5
|
||||
; GFX9-NEXT: $sgpr10 = S_MOV_B32 $sgpr6, implicit $sgpr4_sgpr5_sgpr6, implicit-def $sgpr8_sgpr9_sgpr10
|
||||
; GFX9-NEXT: $sgpr8_sgpr9 = S_MOV_B64 $sgpr4_sgpr5, implicit killed $sgpr4_sgpr5_sgpr6
|
||||
$sgpr8_sgpr9_sgpr10 = COPY killed $sgpr4_sgpr5_sgpr6
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
@@ -218,7 +218,7 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
|
||||
; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4)
|
||||
; CHECK-NEXT: KILL %411.sub0, %411.sub1
|
||||
; CHECK-NEXT: KILL undef %488:sreg_64
|
||||
; CHECK-NEXT: KILL [[COPY15]].sub0_sub1, [[COPY15]].sub2_sub3
|
||||
; CHECK-NEXT: KILL [[COPY15]].sub0_sub1_sub2, [[COPY15]].sub3
|
||||
; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 3, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (invariant load (s128) from %ir.261, addrspace 4)
|
||||
; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
|
||||
|
||||
Reference in New Issue
Block a user