Remove the extraneous '+0' immediate offset part in PTX load/stores, to improve readability of output PTX code.
86 lines
2.6 KiB
LLVM
86 lines
2.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -march=nvptx64 | FileCheck %s
|
|
|
|
target triple = "nvptx64-nvidia-cuda"
|
|
|
|
define i32 @test_addr_mode_i64(ptr %x) {
|
|
; CHECK-LABEL: test_addr_mode_i64(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_param_0];
|
|
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%addr = getelementptr i32, ptr %x, i64 -1
|
|
%res = load i32, ptr %addr
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_addr_mode_i32(ptr %x) {
|
|
; CHECK-LABEL: test_addr_mode_i32(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i32_param_0];
|
|
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%addr = getelementptr i32, ptr %x, i32 -1
|
|
%res = load i32, ptr %addr
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_addr_mode_i16(ptr %x) {
|
|
; CHECK-LABEL: test_addr_mode_i16(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i16_param_0];
|
|
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%addr = getelementptr i32, ptr %x, i16 -1
|
|
%res = load i32, ptr %addr
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_addr_mode_i8(ptr %x) {
|
|
; CHECK-LABEL: test_addr_mode_i8(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<2>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i8_param_0];
|
|
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%addr = getelementptr i32, ptr %x, i8 -1
|
|
%res = load i32, ptr %addr
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test_addr_mode_i64_large(ptr %x) {
|
|
; CHECK-LABEL: test_addr_mode_i64_large(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<3>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_large_param_0];
|
|
; CHECK-NEXT: add.s64 %rd2, %rd1, 17179869172;
|
|
; CHECK-NEXT: ld.u32 %r1, [%rd2];
|
|
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
|
; CHECK-NEXT: ret;
|
|
%addr = getelementptr i32, ptr %x, i64 4294967293
|
|
%res = load i32, ptr %addr
|
|
ret i32 %res
|
|
}
|