- Process i128 array with custom ComputePTXValueVTs. The i128 elements should be handled and split into i64 types in the recursion. - Add corresponding tests
69 lines
2.3 KiB
LLVM
69 lines
2.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
|
|
|
define [2 x i128] @foo(i64 %a, i32 %b) {
|
|
; CHECK-LABEL: foo(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b32 %r<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<5>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.u32 %r1, [foo_param_1];
|
|
; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0];
|
|
; CHECK-NEXT: shr.s64 %rd2, %rd1, 63;
|
|
; CHECK-NEXT: cvt.s64.s32 %rd3, %r1;
|
|
; CHECK-NEXT: shr.s64 %rd4, %rd3, 63;
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4};
|
|
; CHECK-NEXT: ret;
|
|
%1 = sext i64 %a to i128
|
|
%2 = sext i32 %b to i128
|
|
%3 = insertvalue [2 x i128] undef, i128 %1, 0
|
|
%4 = insertvalue [2 x i128] %3, i128 %2, 1
|
|
|
|
ret [2 x i128] %4
|
|
}
|
|
|
|
define [2 x i128] @foo2(ptr byval([2 x i128]) %a) {
|
|
; CHECK-LABEL: foo2(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: mov.b64 %rd1, foo2_param_0;
|
|
; CHECK-NEXT: ld.param.u64 %rd2, [foo2_param_0+8];
|
|
; CHECK-NEXT: ld.param.u64 %rd3, [foo2_param_0];
|
|
; CHECK-NEXT: ld.param.u64 %rd4, [foo2_param_0+24];
|
|
; CHECK-NEXT: ld.param.u64 %rd5, [foo2_param_0+16];
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2};
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd5, %rd4};
|
|
; CHECK-NEXT: ret;
|
|
%ptr0 = getelementptr [2 x i128], ptr %a, i64 0, i32 0
|
|
%1 = load i128, i128* %ptr0
|
|
%ptr1 = getelementptr [2 x i128], ptr %a, i64 0, i32 1
|
|
%2 = load i128, i128* %ptr1
|
|
%3 = insertvalue [2 x i128] undef, i128 %1, 0
|
|
%4 = insertvalue [2 x i128] %3, i128 %2, 1
|
|
|
|
ret [2 x i128] %4
|
|
}
|
|
|
|
define [2 x i128] @foo3([2 x i128] %a) {
|
|
; CHECK-LABEL: foo3(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<5>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [foo3_param_0+16];
|
|
; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [foo3_param_0];
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4};
|
|
; CHECK-NEXT: ret;
|
|
%1 = extractvalue [2 x i128] %a, 0
|
|
%2 = extractvalue [2 x i128] %a, 1
|
|
%3 = insertvalue [2 x i128] undef, i128 %1, 0
|
|
%4 = insertvalue [2 x i128] %3, i128 %2, 1
|
|
|
|
ret [2 x i128] %4
|
|
}
|