[NVPTX] Fix and refine prefetch.* intrinsics (#126899)
This is follow-up PR from #125887 which fixes the intrinsic failures . --------- Co-authored-by: abmajumder <abmajumder@nvidia.com>
This commit is contained in:
committed by
GitHub
parent
a663e78a6e
commit
55f3df875d
@@ -598,18 +598,18 @@ Syntax:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
declare void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
|
||||
declare void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
|
||||
declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
|
||||
|
||||
declare void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
|
||||
declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
|
||||
|
||||
declare void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
|
||||
declare void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
|
||||
|
||||
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
|
||||
|
||||
declare void @llvm.nvvm.prefetchu.L1(ptr %ptr)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
@@ -5001,22 +5001,26 @@ foreach dim = [1, 2, 3, 4, 5] in {
|
||||
}
|
||||
|
||||
// Intrinsics for Prefetch and Prefetchu
|
||||
foreach level = ["L1", "L2"] in {
|
||||
foreach addr = ["global", "local", ""] in {
|
||||
foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
|
||||
defvar suffix = "" # !if(!eq(addr, ""), "", addr # "_") # level # "_" # evict;
|
||||
def int_nvvm_prefetch_ # suffix : Intrinsic<[],
|
||||
!cond(
|
||||
!eq(addr, "global") : [llvm_global_ptr_ty],
|
||||
!eq(addr, "local") : [llvm_local_ptr_ty],
|
||||
!eq(addr, "") : [llvm_ptr_ty]),
|
||||
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
|
||||
NoCapture<ArgIndex<0>>]>;
|
||||
}
|
||||
}
|
||||
}
|
||||
def int_nvvm_prefetch_L1 : Intrinsic<[], [llvm_ptr_ty],
|
||||
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
def int_nvvm_prefetch_L2 : Intrinsic<[], [llvm_ptr_ty],
|
||||
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
def int_nvvm_prefetch_global_L1 : Intrinsic<[], [llvm_global_ptr_ty],
|
||||
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
def int_nvvm_prefetch_global_L2 : Intrinsic<[], [llvm_global_ptr_ty],
|
||||
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
def int_nvvm_prefetch_local_L1 : Intrinsic<[], [llvm_local_ptr_ty],
|
||||
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
def int_nvvm_prefetch_local_L2 : Intrinsic<[], [llvm_local_ptr_ty],
|
||||
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
|
||||
def int_nvvm_prefetch_global_L2_evict_normal: Intrinsic<[], [llvm_global_ptr_ty],
|
||||
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
def int_nvvm_prefetch_global_L2_evict_last: Intrinsic<[], [llvm_global_ptr_ty],
|
||||
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
|
||||
def int_nvvm_prefetchu_L1_evictnormal : Intrinsic<[], [llvm_ptr_ty],
|
||||
|
||||
def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty],
|
||||
[IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
|
||||
|
||||
|
||||
@@ -741,9 +741,6 @@ foreach dim = [1, 2, 3, 4, 5] in {
|
||||
}
|
||||
|
||||
//Prefetch and Prefetchu
|
||||
class Join<string sep, list<string> lst> {
|
||||
string ret = !foldl("", lst, a, b, !if(!eq(a, ""), b, !if(!eq(b,""), a, !strconcat(a, sep, b))));
|
||||
}
|
||||
|
||||
class PREFETCH_INTRS<string InstName> :
|
||||
NVPTXInst<(outs), (ins Int64Regs:$addr),
|
||||
@@ -753,19 +750,25 @@ class PREFETCH_INTRS<string InstName> :
|
||||
Requires<[hasPTX<80>, hasSM<90>]>;
|
||||
|
||||
|
||||
// Only global supports evictlast and evictnormal.
|
||||
// Other variants (local and default) only support evictnormal
|
||||
foreach level = ["L1", "L2"] in {
|
||||
foreach addr = ["global", "local", ""] in {
|
||||
foreach evict = !if(!eq(addr, "global"), ["evictlast", "evictnormal"], ["evictnormal"]) in {
|
||||
defvar suffix = Join<"_", [addr, level, evict]>.ret;
|
||||
defvar inst_name = "prefetch." # !subst("_", ".", suffix);
|
||||
def PREFETCH_# suffix : PREFETCH_INTRS<inst_name>;
|
||||
}
|
||||
}
|
||||
}
|
||||
def PREFETCH_L1 : PREFETCH_INTRS<"prefetch.L1">;
|
||||
def PREFETCH_L2 : PREFETCH_INTRS<"prefetch.L2">;
|
||||
def PREFETCH_GLOBAL_L1 : PREFETCH_INTRS<"prefetch.global.L1">;
|
||||
def PREFETCH_LOCAL_L1 : PREFETCH_INTRS<"prefetch.local.L1">;
|
||||
def PREFETCH_GLOBAL_L2 : PREFETCH_INTRS<"prefetch.global.L2">;
|
||||
def PREFETCH_LOCAL_L2 : PREFETCH_INTRS<"prefetch.local.L2">;
|
||||
|
||||
def PREFETCHU_L1_EVICTNORMAL : PREFETCH_INTRS<"prefetchu.L1.evictnormal">;
|
||||
def PREFETCH_GLOBAL_L2_EVICT_NORMAL : NVPTXInst<(outs), (ins Int64Regs:$addr),
|
||||
"prefetch.global.L2::evict_normal" # " [$addr];",
|
||||
[(!cast<Intrinsic>("int_nvvm_prefetch_global_L2_evict_normal") i64:$addr)]>,
|
||||
Requires<[hasPTX<80>, hasSM<90>]>;
|
||||
|
||||
def PREFETCH_GLOBAL_L2_EVICT_LAST : NVPTXInst<(outs), (ins Int64Regs:$addr),
|
||||
"prefetch.global.L2::evict_last" # " [$addr];",
|
||||
[(!cast<Intrinsic>("int_nvvm_prefetch_global_L2_evict_last") i64:$addr)]>,
|
||||
Requires<[hasPTX<80>, hasSM<90>]>;
|
||||
|
||||
|
||||
def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">;
|
||||
|
||||
//-----------------------------------
|
||||
// MBarrier Functions
|
||||
|
||||
@@ -4,18 +4,18 @@
|
||||
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
declare void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
|
||||
declare void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
|
||||
declare void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
|
||||
|
||||
declare void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.L1(ptr %ptr)
|
||||
declare void @llvm.nvvm.prefetch.L2(ptr %ptr)
|
||||
|
||||
declare void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
|
||||
declare void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
|
||||
declare void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
|
||||
|
||||
declare void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
|
||||
declare void @llvm.nvvm.prefetchu.L1(ptr %ptr)
|
||||
|
||||
define void @prefetch_local(ptr addrspace(5) %local_ptr) {
|
||||
; CHECK-PTX64-LABEL: prefetch_local(
|
||||
@@ -24,11 +24,11 @@ define void @prefetch_local(ptr addrspace(5) %local_ptr) {
|
||||
; CHECK-PTX64-EMPTY:
|
||||
; CHECK-PTX64-NEXT: // %bb.0:
|
||||
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_local_param_0];
|
||||
; CHECK-PTX64-NEXT: prefetch.local.L1.evictnormal [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.local.L2.evictnormal [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.local.L1 [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.local.L2 [%rd1];
|
||||
; CHECK-PTX64-NEXT: ret;
|
||||
tail call void @llvm.nvvm.prefetch.local.L1.evictnormal(ptr addrspace(5) %local_ptr)
|
||||
tail call void @llvm.nvvm.prefetch.local.L2.evictnormal(ptr addrspace(5) %local_ptr)
|
||||
tail call void @llvm.nvvm.prefetch.local.L1(ptr addrspace(5) %local_ptr)
|
||||
tail call void @llvm.nvvm.prefetch.local.L2(ptr addrspace(5) %local_ptr)
|
||||
ret void
|
||||
}
|
||||
|
||||
@@ -39,15 +39,15 @@ define void @prefetch_global(ptr addrspace(1) %global_ptr) {
|
||||
; CHECK-PTX64-EMPTY:
|
||||
; CHECK-PTX64-NEXT: // %bb.0:
|
||||
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch_global_param_0];
|
||||
; CHECK-PTX64-NEXT: prefetch.global.L1.evictnormal [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.global.L2.evictnormal [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.global.L1.evictlast [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.global.L2.evictlast [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.global.L1 [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.global.L2 [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.global.L2::evict_normal [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.global.L2::evict_last [%rd1];
|
||||
; CHECK-PTX64-NEXT: ret;
|
||||
tail call void @llvm.nvvm.prefetch.global.L1.evictnormal(ptr addrspace(1) %global_ptr)
|
||||
tail call void @llvm.nvvm.prefetch.global.L2.evictnormal(ptr addrspace(1) %global_ptr)
|
||||
tail call void @llvm.nvvm.prefetch.global.L1.evictlast(ptr addrspace(1) %global_ptr)
|
||||
tail call void @llvm.nvvm.prefetch.global.L2.evictlast(ptr addrspace(1) %global_ptr)
|
||||
tail call void @llvm.nvvm.prefetch.global.L1(ptr addrspace(1) %global_ptr)
|
||||
tail call void @llvm.nvvm.prefetch.global.L2(ptr addrspace(1) %global_ptr)
|
||||
tail call void @llvm.nvvm.prefetch.global.L2.evict.normal(ptr addrspace(1) %global_ptr)
|
||||
tail call void @llvm.nvvm.prefetch.global.L2.evict.last(ptr addrspace(1) %global_ptr)
|
||||
ret void
|
||||
}
|
||||
|
||||
@@ -59,11 +59,11 @@ define void @prefetch_(ptr %ptr) {
|
||||
; CHECK-PTX64-EMPTY:
|
||||
; CHECK-PTX64-NEXT: // %bb.0:
|
||||
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetch__param_0];
|
||||
; CHECK-PTX64-NEXT: prefetch.L1.evictnormal [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.L2.evictnormal [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.L1 [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetch.L2 [%rd1];
|
||||
; CHECK-PTX64-NEXT: ret;
|
||||
tail call void @llvm.nvvm.prefetch.L1.evictnormal(ptr %ptr)
|
||||
tail call void @llvm.nvvm.prefetch.L2.evictnormal(ptr %ptr)
|
||||
tail call void @llvm.nvvm.prefetch.L1(ptr %ptr)
|
||||
tail call void @llvm.nvvm.prefetch.L2(ptr %ptr)
|
||||
ret void
|
||||
}
|
||||
|
||||
@@ -74,8 +74,8 @@ define void @prefetchu_l1(ptr %ptr) {
|
||||
; CHECK-PTX64-EMPTY:
|
||||
; CHECK-PTX64-NEXT: // %bb.0:
|
||||
; CHECK-PTX64-NEXT: ld.param.u64 %rd1, [prefetchu_l1_param_0];
|
||||
; CHECK-PTX64-NEXT: prefetchu.L1.evictnormal [%rd1];
|
||||
; CHECK-PTX64-NEXT: prefetchu.L1 [%rd1];
|
||||
; CHECK-PTX64-NEXT: ret;
|
||||
tail call void @llvm.nvvm.prefetchu.L1.evictnormal(ptr %ptr)
|
||||
tail call void @llvm.nvvm.prefetchu.L1(ptr %ptr)
|
||||
ret void
|
||||
}
|
||||
Reference in New Issue
Block a user