This change consolidates and cleans up various NVPTXISD target-specific nodes in order to simplify SDAG ISel. While there are some whitespace changes in the emitted PTX it is otherwise a non-functional change. NVPTXISD::Wrapper - This node was used to wrap external-symbol and global-address nodes. It is redundant and has been removed. Instead we use the non-target versions of these nodes and convert them appropriately during ISel. NVPTXISD::CALL - Much of the family of nodes used to represent a PTX call instruction have been replaced by this new single node. It corresponds to a single instruction and is therefore much simpler to create and lower.
95 lines
3.8 KiB
LLVM
95 lines
3.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: not llc < %s -mtriple=nvptx -mattr=+ptx72 -mcpu=sm_52 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS
|
|
; RUN: not llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_50 2>&1 | FileCheck %s --check-prefixes=CHECK-FAILS
|
|
|
|
; RUN: llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-32
|
|
; RUN: llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | FileCheck %s --check-prefixes=CHECK-64
|
|
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
|
|
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mattr=+ptx73 -mcpu=sm_52 | %ptxas-verify %}
|
|
|
|
; CHECK-FAILS: in function test_dynamic_stackalloc{{.*}}: Support for dynamic alloca introduced in PTX ISA version 7.3 and requires target sm_52.
|
|
|
|
define i32 @test_dynamic_stackalloc(i64 %n) {
|
|
; CHECK-32-LABEL: test_dynamic_stackalloc(
|
|
; CHECK-32: {
|
|
; CHECK-32-NEXT: .reg .b32 %r<8>;
|
|
; CHECK-32-EMPTY:
|
|
; CHECK-32-NEXT: // %bb.0:
|
|
; CHECK-32-NEXT: ld.param.b32 %r1, [test_dynamic_stackalloc_param_0];
|
|
; CHECK-32-NEXT: add.s32 %r2, %r1, 7;
|
|
; CHECK-32-NEXT: and.b32 %r3, %r2, -8;
|
|
; CHECK-32-NEXT: alloca.u32 %r4, %r3, 16;
|
|
; CHECK-32-NEXT: cvta.local.u32 %r5, %r4;
|
|
; CHECK-32-NEXT: { // callseq 0, 0
|
|
; CHECK-32-NEXT: .param .b32 param0;
|
|
; CHECK-32-NEXT: st.param.b32 [param0], %r5;
|
|
; CHECK-32-NEXT: .param .b32 retval0;
|
|
; CHECK-32-NEXT: call.uni (retval0), bar, (param0);
|
|
; CHECK-32-NEXT: ld.param.b32 %r6, [retval0];
|
|
; CHECK-32-NEXT: } // callseq 0
|
|
; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6;
|
|
; CHECK-32-NEXT: ret;
|
|
;
|
|
; CHECK-64-LABEL: test_dynamic_stackalloc(
|
|
; CHECK-64: {
|
|
; CHECK-64-NEXT: .reg .b32 %r<3>;
|
|
; CHECK-64-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-64-EMPTY:
|
|
; CHECK-64-NEXT: // %bb.0:
|
|
; CHECK-64-NEXT: ld.param.b64 %rd1, [test_dynamic_stackalloc_param_0];
|
|
; CHECK-64-NEXT: add.s64 %rd2, %rd1, 7;
|
|
; CHECK-64-NEXT: and.b64 %rd3, %rd2, -8;
|
|
; CHECK-64-NEXT: alloca.u64 %rd4, %rd3, 16;
|
|
; CHECK-64-NEXT: cvta.local.u64 %rd5, %rd4;
|
|
; CHECK-64-NEXT: { // callseq 0, 0
|
|
; CHECK-64-NEXT: .param .b64 param0;
|
|
; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
|
|
; CHECK-64-NEXT: .param .b32 retval0;
|
|
; CHECK-64-NEXT: call.uni (retval0), bar, (param0);
|
|
; CHECK-64-NEXT: ld.param.b32 %r1, [retval0];
|
|
; CHECK-64-NEXT: } // callseq 0
|
|
; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-64-NEXT: ret;
|
|
%alloca = alloca i8, i64 %n, align 16
|
|
%call = call i32 @bar(ptr %alloca)
|
|
ret i32 %call
|
|
}
|
|
|
|
define float @test_dynamic_stackalloc_unaligned(i64 %0) {
|
|
; CHECK-32-LABEL: test_dynamic_stackalloc_unaligned(
|
|
; CHECK-32: {
|
|
; CHECK-32-NEXT: .reg .b32 %r<7>;
|
|
; CHECK-32-EMPTY:
|
|
; CHECK-32-NEXT: // %bb.0:
|
|
; CHECK-32-NEXT: ld.param.b32 %r1, [test_dynamic_stackalloc_unaligned_param_0];
|
|
; CHECK-32-NEXT: shl.b32 %r2, %r1, 2;
|
|
; CHECK-32-NEXT: add.s32 %r3, %r2, 7;
|
|
; CHECK-32-NEXT: and.b32 %r4, %r3, -8;
|
|
; CHECK-32-NEXT: alloca.u32 %r5, %r4, 8;
|
|
; CHECK-32-NEXT: ld.local.b32 %r6, [%r5];
|
|
; CHECK-32-NEXT: st.param.b32 [func_retval0], %r6;
|
|
; CHECK-32-NEXT: ret;
|
|
;
|
|
; CHECK-64-LABEL: test_dynamic_stackalloc_unaligned(
|
|
; CHECK-64: {
|
|
; CHECK-64-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-64-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-64-EMPTY:
|
|
; CHECK-64-NEXT: // %bb.0:
|
|
; CHECK-64-NEXT: ld.param.b64 %rd1, [test_dynamic_stackalloc_unaligned_param_0];
|
|
; CHECK-64-NEXT: shl.b64 %rd2, %rd1, 2;
|
|
; CHECK-64-NEXT: add.s64 %rd3, %rd2, 7;
|
|
; CHECK-64-NEXT: and.b64 %rd4, %rd3, -8;
|
|
; CHECK-64-NEXT: alloca.u64 %rd5, %rd4, 8;
|
|
; CHECK-64-NEXT: ld.local.b32 %r1, [%rd5];
|
|
; CHECK-64-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-64-NEXT: ret;
|
|
%4 = alloca float, i64 %0, align 4
|
|
%5 = getelementptr float, ptr %4, i64 0
|
|
%6 = load float, ptr %5, align 4
|
|
ret float %6
|
|
}
|
|
|
|
declare i32 @bar(ptr)
|
|
|