We have loads preserving low and high 16 bits of their destinations. However, we always use a whole 32 bit register for these. The same happens with 16 bit stores, we have to use full 32 bit register so if high bits are clobbered the register needs to be copied. One example of such code is added to the load-hi16.ll. The proper solution to the problem is to define 16 bit subregs and use them in the operations which do not read another half of a VGPR or preserve it if the VGPR is written. This patch simply defines subregisters and register classes. At the moment there should be no difference in code generation. A lot more work is needed to actually use these new register classes. Therefore, there are no new tests at this time. Register weight calculation has changed with new subregs so appropriate changes were made to keep all calculations just as they are now, especially calculations of register pressure. Differential Revision: https://reviews.llvm.org/D74873
47 lines
3.9 KiB
LLVM
47 lines
3.9 KiB
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s
|
|
; Make sure the expected regmask is generated for sub/superregisters.
|
|
|
|
; CHECK-DAG: csr Clobbered Registers: $vgpr0 $vgpr0_hi16 $vgpr0_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr0_vgpr1 $vgpr0_vgpr1_vgpr2 {{$}}
|
|
define void @csr() #0 {
|
|
call void asm sideeffect "", "~{v0},~{v36},~{v37}"() #0
|
|
ret void
|
|
}
|
|
|
|
; CHECK-DAG: subregs_for_super Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
|
|
define void @subregs_for_super() #0 {
|
|
call void asm sideeffect "", "~{v0},~{v1}"() #0
|
|
ret void
|
|
}
|
|
|
|
; CHECK-DAG: Clobbered Registers: $vgpr0 $vgpr1 $vgpr0_hi16 $vgpr1_hi16 $vgpr0_lo16 $vgpr1_lo16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 $vgpr0_vgpr1_vgpr2_vgpr3 $vgpr1_vgpr2_vgpr3_vgpr4 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 $vgpr0_vgpr1 $vgpr1_vgpr2 $vgpr0_vgpr1_vgpr2 $vgpr1_vgpr2_vgpr3 {{$}}
|
|
define void @clobbered_reg_with_sub() #0 {
|
|
call void asm sideeffect "", "~{v[0:1]}"() #0
|
|
ret void
|
|
}
|
|
|
|
; CHECK-DAG: nothing Clobbered Registers: {{$}}
|
|
define void @nothing() #0 {
|
|
ret void
|
|
}
|
|
|
|
; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 {{$}}
|
|
define void @special_regs() #0 {
|
|
call void asm sideeffect "", "~{m0},~{scc}"() #0
|
|
ret void
|
|
}
|
|
|
|
; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo {{$}}
|
|
define void @vcc() #0 {
|
|
call void asm sideeffect "", "~{vcc}"() #0
|
|
ret void
|
|
}
|
|
|
|
@llvm.used = appending global [6 x i8*] [i8* bitcast (void ()* @csr to i8*),
|
|
i8* bitcast (void ()* @subregs_for_super to i8*),
|
|
i8* bitcast (void ()* @clobbered_reg_with_sub to i8*),
|
|
i8* bitcast (void ()* @nothing to i8*),
|
|
i8* bitcast (void ()* @special_regs to i8*),
|
|
i8* bitcast (void ()* @vcc to i8*)]
|
|
|
|
attributes #0 = { nounwind }
|