Files
clang-p2996/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
Andrew Savonichev 51eefa8164 [NVPTX] Add VRFrame and VRFrameLocal to integer register classes
These registers are used as operands for instructions that expect an
integer register, so they should be added to Int32Regs or Int64Regs
register classes. Otherwise the machine verifier emits an error for
the following LIT tests when LLVM_ENABLE_MACHINE_VERIFIER=1
environment variable is set:

*** Bad machine code: Illegal physical register for instruction ***
- function:    kernel_func
- basic block: %bb.0 entry (0x55c8903d5438)
- instruction: %3:int64regs = LEA_ADDRi64 $vrframelocal, 0
- operand 1:   $vrframelocal
$vrframelocal is not a Int64Regs register.

    CodeGen/NVPTX/call-with-alloca-buffer.ll
    CodeGen/NVPTX/disable-opt.ll
    CodeGen/NVPTX/lower-alloca.ll
    CodeGen/NVPTX/lower-args.ll
    CodeGen/NVPTX/param-align.ll
    CodeGen/NVPTX/reg-types.ll
    DebugInfo/NVPTX/dbg-declare-alloca.ll
    DebugInfo/NVPTX/dbg-value-const-byref.ll

Differential Revision: https://reviews.llvm.org/D110164
2021-10-14 16:19:03 +03:00

83 lines
3.3 KiB
LLVM

; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=PTX32
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=PTX64
; Ensure we access the local stack properly
; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
; PTX32: cvta.local.u32 %SP, %SPL;
; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
; PTX32: st.volatile.u32 [%SP+0], %r{{[0-9]+}};
; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
; PTX64: cvta.local.u64 %SP, %SPL;
; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
; PTX64: st.volatile.u32 [%SP+0], %r{{[0-9]+}};
define void @foo(i32 %a) {
%local = alloca i32, align 4
store volatile i32 %a, i32* %local
ret void
}
; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
; PTX32: cvta.local.u32 %SP, %SPL;
; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo2_param_0];
; PTX32: add.u32 %r[[SP_REG:[0-9]+]], %SPL, 0;
; PTX32: st.local.u32 [%r[[SP_REG]]], %r{{[0-9]+}};
; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
; PTX64: cvta.local.u64 %SP, %SPL;
; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo2_param_0];
; PTX64: add.u64 %rd[[SP_REG:[0-9]+]], %SPL, 0;
; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}};
define void @foo2(i32 %a) {
%local = alloca i32, align 4
store i32 %a, i32* %local
call void @bar(i32* %local)
ret void
}
declare void @bar(i32* %a)
!nvvm.annotations = !{!0}
!0 = !{void (i32)* @foo2, !"kernel", i32 1}
; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
; PTX32-NOT: cvta.local.u32 %SP, %SPL;
; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo3_param_0];
; PTX32: add.u32 %r{{[0-9]+}}, %SPL, 0;
; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
; PTX64-NOT: cvta.local.u64 %SP, %SPL;
; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo3_param_0];
; PTX64: add.u64 %rd{{[0-9]+}}, %SPL, 0;
; PTX64: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}};
define void @foo3(i32 %a) {
%local = alloca [3 x i32], align 4
%1 = bitcast [3 x i32]* %local to i32*
%2 = getelementptr inbounds i32, i32* %1, i32 %a
store i32 %a, i32* %2
ret void
}
; PTX32: cvta.local.u32 %SP, %SPL;
; PTX32: add.u32 {{%r[0-9]+}}, %SP, 0;
; PTX32: add.u32 {{%r[0-9]+}}, %SPL, 0;
; PTX32: add.u32 {{%r[0-9]+}}, %SP, 4;
; PTX32: add.u32 {{%r[0-9]+}}, %SPL, 4;
; PTX32: st.local.u32 [{{%r[0-9]+}}], {{%r[0-9]+}}
; PTX32: st.local.u32 [{{%r[0-9]+}}], {{%r[0-9]+}}
; PTX64: cvta.local.u64 %SP, %SPL;
; PTX64: add.u64 {{%rd[0-9]+}}, %SP, 0;
; PTX64: add.u64 {{%rd[0-9]+}}, %SPL, 0;
; PTX64: add.u64 {{%rd[0-9]+}}, %SP, 4;
; PTX64: add.u64 {{%rd[0-9]+}}, %SPL, 4;
; PTX64: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
; PTX64: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
define void @foo4() {
%A = alloca i32
%B = alloca i32
store i32 0, i32* %A
store i32 0, i32* %B
call void @bar(i32* %A)
call void @bar(i32* %B)
ret void
}