Adds "initial" support for `syncscope` to the NVPTX backend `load`/`store`/`fence` instructions. Atomic Read-Modify-Write operations intentionally not supported as part of this initial PR.
1615 lines
76 KiB
LLVM
1615 lines
76 KiB
LLVM
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefixes=CHECK,SM60 %s
|
|
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
|
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s -check-prefixes=CHECK,SM70
|
|
; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %}
|
|
|
|
; TODO: add i1, <8 x i8>, and <6 x i8> vector tests.
|
|
|
|
; TODO: add test for vectors that exceed 128-bit length
|
|
; Per https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#vectors
|
|
; vectors cannot exceed 128-bit in length, i.e., .v4.u64 is not allowed.
|
|
|
|
; TODO: generate PTX that preserves Concurrent Forward Progress
|
|
; for atomic operations to local statespace
|
|
; by generating atomic or volatile operations.
|
|
|
|
; TODO: design exposure for atomic operations on vector types.
|
|
|
|
; TODO: add weak,atomic,volatile,atomic volatile tests
|
|
; for .const and .param statespaces.
|
|
|
|
; TODO: optimize .sys.shared into .cta.shared or .cluster.shared .
|
|
|
|
; generic statespace
|
|
|
|
; CHECK-LABEL: generic_weak
|
|
define void @generic_weak(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
|
|
; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load i8, ptr %a
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store i8 %a.add, ptr %a
|
|
|
|
; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load i16, ptr %b
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store i16 %b.add, ptr %b
|
|
|
|
; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load i32, ptr %c
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store i32 %c.add, ptr %c
|
|
|
|
; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load i64, ptr %d
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store i64 %d.add, ptr %d
|
|
|
|
; CHECK: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load float, ptr %c
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store float %e.add, ptr %c
|
|
|
|
; CHECK: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load double, ptr %d
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store double %f.add, ptr %d
|
|
|
|
; TODO: make the lowering of this weak vector ops consistent with
|
|
; the ones of the next tests. This test lowers to a weak PTX
|
|
; vector op, but next test lowers to a vector PTX op.
|
|
; CHECK: ld.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%h.load = load <2 x i8>, ptr %b
|
|
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
|
|
; CHECK: st.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store <2 x i8> %h.add, ptr %b
|
|
|
|
; TODO: make the lowering of this weak vector ops consistent with
|
|
; the ones of the previous test. This test lowers to a weak
|
|
; PTX scalar op, but prior test lowers to a vector PTX op.
|
|
; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%i.load = load <4 x i8>, ptr %c
|
|
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
|
|
; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store <4 x i8> %i.add, ptr %c
|
|
|
|
; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%j.load = load <2 x i16>, ptr %c
|
|
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
|
|
; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store <2 x i16> %j.add, ptr %c
|
|
|
|
; CHECK: ld.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%k.load = load <4 x i16>, ptr %d
|
|
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
|
|
; CHECK: st.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store <4 x i16> %k.add, ptr %d
|
|
|
|
; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%l.load = load <2 x i32>, ptr %d
|
|
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
|
|
; CHECK: st.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store <2 x i32> %l.add, ptr %d
|
|
|
|
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%m.load = load <4 x i32>, ptr %d
|
|
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
|
|
; CHECK: st.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store <4 x i32> %m.add, ptr %d
|
|
|
|
; CHECK: ld.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%n.load = load <2 x i64>, ptr %d
|
|
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
|
|
; CHECK: st.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
|
|
store <2 x i64> %n.add, ptr %d
|
|
|
|
; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%o.load = load <2 x float>, ptr %d
|
|
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
|
|
; CHECK: st.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store <2 x float> %o.add, ptr %d
|
|
|
|
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%p.load = load <4 x float>, ptr %d
|
|
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
|
|
; CHECK: st.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store <4 x float> %p.add, ptr %d
|
|
|
|
; CHECK: ld.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%q.load = load <2 x double>, ptr %d
|
|
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
|
|
; CHECK: st.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
|
|
store <2 x double> %q.add, ptr %d
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: generic_volatile
|
|
define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
|
|
; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load volatile i8, ptr %a
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store volatile i8 %a.add, ptr %a
|
|
|
|
; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load volatile i16, ptr %b
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store volatile i16 %b.add, ptr %b
|
|
|
|
; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load volatile i32, ptr %c
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile i32 %c.add, ptr %c
|
|
|
|
; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load volatile i64, ptr %d
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store volatile i64 %d.add, ptr %d
|
|
|
|
; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load volatile float, ptr %c
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store volatile float %e.add, ptr %c
|
|
|
|
; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load volatile double, ptr %c
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store volatile double %f.add, ptr %c
|
|
|
|
; TODO: volatile, atomic, and volatile atomic memory operations on vector types.
|
|
; Currently, LLVM:
|
|
; - does not allow atomic operations on vectors.
|
|
; - it allows volatile operations but not clear what that means.
|
|
; Following both semantics make sense in general and PTX supports both:
|
|
; - volatile/atomic/volatile atomic applies to the whole vector
|
|
; - volatile/atomic/volatile atomic applies elementwise
|
|
; Actions required:
|
|
; - clarify LLVM semantics for volatile on vectors and align the NVPTX backend with those
|
|
; Below tests show that the current implementation picks the semantics in an inconsistent way
|
|
; * volatile <2 x i8> lowers to "elementwise volatile"
|
|
; * <4 x i8> lowers to "full vector volatile"
|
|
; - provide support for vector atomics, e.g., by extending LLVM IR or via intrinsics
|
|
; - update tests in load-store-sm70.ll as well.
|
|
|
|
; TODO: make this operation consistent with the one for <4 x i8>
|
|
; This operation lowers to a "element wise volatile PTX operation".
|
|
; CHECK: ld.volatile.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%h.load = load volatile <2 x i8>, ptr %b
|
|
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
|
|
; CHECK: st.volatile.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store volatile <2 x i8> %h.add, ptr %b
|
|
|
|
; TODO: make this operation consistent with the one for <2 x i8>
|
|
; This operation lowers to a "full vector volatile PTX operation".
|
|
; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%i.load = load volatile <4 x i8>, ptr %c
|
|
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
|
|
; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile <4 x i8> %i.add, ptr %c
|
|
|
|
; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%j.load = load volatile <2 x i16>, ptr %c
|
|
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
|
|
; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile <2 x i16> %j.add, ptr %c
|
|
|
|
; CHECK: ld.volatile.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%k.load = load volatile <4 x i16>, ptr %d
|
|
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
|
|
; CHECK: st.volatile.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store volatile <4 x i16> %k.add, ptr %d
|
|
|
|
; CHECK: ld.volatile.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%l.load = load volatile <2 x i32>, ptr %d
|
|
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
|
|
; CHECK: st.volatile.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store volatile <2 x i32> %l.add, ptr %d
|
|
|
|
; CHECK: ld.volatile.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%m.load = load volatile <4 x i32>, ptr %d
|
|
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
|
|
; CHECK: st.volatile.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store volatile <4 x i32> %m.add, ptr %d
|
|
|
|
; CHECK: ld.volatile.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%n.load = load volatile <2 x i64>, ptr %d
|
|
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
|
|
; CHECK: st.volatile.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
|
|
store volatile <2 x i64> %n.add, ptr %d
|
|
|
|
; CHECK: ld.volatile.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%o.load = load volatile <2 x float>, ptr %d
|
|
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
|
|
; CHECK: st.volatile.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store volatile <2 x float> %o.add, ptr %d
|
|
|
|
; CHECK: ld.volatile.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%p.load = load volatile <4 x float>, ptr %d
|
|
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
|
|
; CHECK: st.volatile.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store volatile <4 x float> %p.add, ptr %d
|
|
|
|
; CHECK: ld.volatile.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%q.load = load volatile <2 x double>, ptr %d
|
|
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
|
|
; CHECK: st.volatile.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
|
|
store volatile <2 x double> %q.add, ptr %d
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: generic_unordered_sys
|
|
define void @generic_unordered_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
|
|
; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic i8, ptr %a unordered, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; SM60: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i8 %a.add, ptr %a unordered, align 1
|
|
|
|
; SM60: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic i16, ptr %b unordered, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; SM60: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i16 %b.add, ptr %b unordered, align 2
|
|
|
|
; SM60: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic i32, ptr %c unordered, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; SM60: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic i32 %c.add, ptr %c unordered, align 4
|
|
|
|
; SM60: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic i64, ptr %d unordered, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; SM60: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic i64 %d.add, ptr %d unordered, align 8
|
|
|
|
; SM60: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic float, ptr %e unordered, align 4
|
|
%e.add = fadd float %e.load, 1.0
|
|
; SM60: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic float %e.add, ptr %e unordered, align 4
|
|
|
|
; SM60: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic double, ptr %e unordered, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; SM60: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic double %f.add, ptr %e unordered, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: generic_unordered_volatile_sys
|
|
define void @generic_unordered_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
|
|
; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic volatile i8, ptr %a unordered, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i8 %a.add, ptr %a unordered, align 1
|
|
|
|
; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic volatile i16, ptr %b unordered, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i16 %b.add, ptr %b unordered, align 2
|
|
|
|
; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic volatile i32, ptr %c unordered, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic volatile i32 %c.add, ptr %c unordered, align 4
|
|
|
|
; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic volatile i64, ptr %d unordered, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic volatile i64 %d.add, ptr %d unordered, align 8
|
|
|
|
; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic volatile float, ptr %e unordered, align 4
|
|
%e.add = fadd float %e.load, 1.0
|
|
; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic volatile float %e.add, ptr %e unordered, align 4
|
|
|
|
; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic volatile double, ptr %e unordered, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic volatile double %f.add, ptr %e unordered, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: generic_monotonic_sys
|
|
define void @generic_monotonic_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
|
|
; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic i8, ptr %a monotonic, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; SM60: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i8 %a.add, ptr %a monotonic, align 1
|
|
|
|
; SM60: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic i16, ptr %b monotonic, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; SM60: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i16 %b.add, ptr %b monotonic, align 2
|
|
|
|
; SM60: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic i32, ptr %c monotonic, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; SM60: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic i32 %c.add, ptr %c monotonic, align 4
|
|
|
|
; SM60: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic i64, ptr %d monotonic, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; SM60: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic i64 %d.add, ptr %d monotonic, align 8
|
|
|
|
; SM60: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic float, ptr %e monotonic, align 4
|
|
%e.add = fadd float %e.load, 1.
|
|
; SM60: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic float %e.add, ptr %e monotonic, align 4
|
|
|
|
; SM60: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic double, ptr %e monotonic, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; SM60: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic double %f.add, ptr %e monotonic, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: generic_monotonic_volatile_sys
|
|
define void @generic_monotonic_volatile_sys(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
|
|
; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic volatile i8, ptr %a monotonic, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i8 %a.add, ptr %a monotonic, align 1
|
|
|
|
; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic volatile i16, ptr %b monotonic, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i16 %b.add, ptr %b monotonic, align 2
|
|
|
|
; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic volatile i32, ptr %c monotonic, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic volatile i32 %c.add, ptr %c monotonic, align 4
|
|
|
|
; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic volatile i64, ptr %d monotonic, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic volatile i64 %d.add, ptr %d monotonic, align 8
|
|
|
|
; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic volatile float, ptr %e monotonic, align 4
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic volatile float %e.add, ptr %e monotonic, align 4
|
|
|
|
; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic volatile double, ptr %e monotonic, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic volatile double %f.add, ptr %e monotonic, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
;; global statespace
|
|
|
|
; CHECK-LABEL: global_weak
|
|
define void @global_weak(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
|
|
; CHECK: ld.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load i8, ptr addrspace(1) %a
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store i8 %a.add, ptr addrspace(1) %a
|
|
|
|
; CHECK: ld.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load i16, ptr addrspace(1) %b
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store i16 %b.add, ptr addrspace(1) %b
|
|
|
|
; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load i32, ptr addrspace(1) %c
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store i32 %c.add, ptr addrspace(1) %c
|
|
|
|
; CHECK: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load i64, ptr addrspace(1) %d
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store i64 %d.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load float, ptr addrspace(1) %c
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store float %e.add, ptr addrspace(1) %c
|
|
|
|
; CHECK: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load double, ptr addrspace(1) %c
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store double %f.add, ptr addrspace(1) %c
|
|
|
|
; CHECK: ld.global.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%h.load = load <2 x i8>, ptr addrspace(1) %b
|
|
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
|
|
; CHECK: st.global.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store <2 x i8> %h.add, ptr addrspace(1) %b
|
|
|
|
; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%i.load = load <4 x i8>, ptr addrspace(1) %c
|
|
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
|
|
; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store <4 x i8> %i.add, ptr addrspace(1) %c
|
|
|
|
; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%j.load = load <2 x i16>, ptr addrspace(1) %c
|
|
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
|
|
; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store <2 x i16> %j.add, ptr addrspace(1) %c
|
|
|
|
; CHECK: ld.global.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%k.load = load <4 x i16>, ptr addrspace(1) %d
|
|
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
|
|
; CHECK: st.global.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store <4 x i16> %k.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.global.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%l.load = load <2 x i32>, ptr addrspace(1) %d
|
|
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
|
|
; CHECK: st.global.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store <2 x i32> %l.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.global.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%m.load = load <4 x i32>, ptr addrspace(1) %d
|
|
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
|
|
; CHECK: st.global.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store <4 x i32> %m.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.global.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%n.load = load <2 x i64>, ptr addrspace(1) %d
|
|
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
|
|
; CHECK: st.global.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
|
|
store <2 x i64> %n.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.global.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%o.load = load <2 x float>, ptr addrspace(1) %d
|
|
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
|
|
; CHECK: st.global.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store <2 x float> %o.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.global.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%p.load = load <4 x float>, ptr addrspace(1) %d
|
|
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
|
|
; CHECK: st.global.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store <4 x float> %p.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.global.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%q.load = load <2 x double>, ptr addrspace(1) %d
|
|
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
|
|
; CHECK: st.global.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
|
|
store <2 x double> %q.add, ptr addrspace(1) %d
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: global_volatile
|
|
define void @global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
|
|
; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load volatile i8, ptr addrspace(1) %a
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store volatile i8 %a.add, ptr addrspace(1) %a
|
|
|
|
; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load volatile i16, ptr addrspace(1) %b
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store volatile i16 %b.add, ptr addrspace(1) %b
|
|
|
|
; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load volatile i32, ptr addrspace(1) %c
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile i32 %c.add, ptr addrspace(1) %c
|
|
|
|
; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load volatile i64, ptr addrspace(1) %d
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store volatile i64 %d.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load volatile float, ptr addrspace(1) %c
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store volatile float %e.add, ptr addrspace(1) %c
|
|
|
|
; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load volatile double, ptr addrspace(1) %c
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store volatile double %f.add, ptr addrspace(1) %c
|
|
|
|
; CHECK: ld.volatile.global.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%h.load = load volatile <2 x i8>, ptr addrspace(1) %b
|
|
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
|
|
; CHECK: st.volatile.global.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store volatile<2 x i8> %h.add, ptr addrspace(1) %b
|
|
|
|
; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%i.load = load volatile <4 x i8>, ptr addrspace(1) %c
|
|
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
|
|
; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile<4 x i8> %i.add, ptr addrspace(1) %c
|
|
|
|
; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%j.load = load volatile <2 x i16>, ptr addrspace(1) %c
|
|
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
|
|
; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile<2 x i16> %j.add, ptr addrspace(1) %c
|
|
|
|
; CHECK: ld.volatile.global.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%k.load = load volatile <4 x i16>, ptr addrspace(1) %d
|
|
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
|
|
; CHECK: st.volatile.global.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store volatile<4 x i16> %k.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.volatile.global.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%l.load = load volatile <2 x i32>, ptr addrspace(1) %d
|
|
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
|
|
; CHECK: st.volatile.global.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store volatile<2 x i32> %l.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.volatile.global.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%m.load = load volatile <4 x i32>, ptr addrspace(1) %d
|
|
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
|
|
; CHECK: st.volatile.global.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store volatile<4 x i32> %m.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.volatile.global.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%n.load = load volatile <2 x i64>, ptr addrspace(1) %d
|
|
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
|
|
; CHECK: st.volatile.global.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
|
|
store volatile<2 x i64> %n.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.volatile.global.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%o.load = load volatile <2 x float>, ptr addrspace(1) %d
|
|
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
|
|
; CHECK: st.volatile.global.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store volatile<2 x float> %o.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.volatile.global.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%p.load = load volatile <4 x float>, ptr addrspace(1) %d
|
|
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
|
|
; CHECK: st.volatile.global.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store volatile<4 x float> %p.add, ptr addrspace(1) %d
|
|
|
|
; CHECK: ld.volatile.global.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%q.load = load volatile <2 x double>, ptr addrspace(1) %d
|
|
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
|
|
; CHECK: st.volatile.global.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
|
|
store volatile<2 x double> %q.add, ptr addrspace(1) %d
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: global_unordered_sys
|
|
define void @global_unordered_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
|
|
; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1
|
|
|
|
; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic i16, ptr addrspace(1) %b unordered, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i16 %b.add, ptr addrspace(1) %b unordered, align 2
|
|
|
|
; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic i32, ptr addrspace(1) %c unordered, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic i32 %c.add, ptr addrspace(1) %c unordered, align 4
|
|
|
|
; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic i64, ptr addrspace(1) %d unordered, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic i64 %d.add, ptr addrspace(1) %d unordered, align 8
|
|
|
|
; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic float, ptr addrspace(1) %e unordered, align 4
|
|
%e.add = fadd float %e.load, 1.0
|
|
; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic float %e.add, ptr addrspace(1) %e unordered, align 4
|
|
|
|
; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic double, ptr addrspace(1) %e unordered, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic double %f.add, ptr addrspace(1) %e unordered, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: global_unordered_volatile_sys
|
|
define void @global_unordered_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
|
|
; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1
|
|
|
|
; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic volatile i16, ptr addrspace(1) %b unordered, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i16 %b.add, ptr addrspace(1) %b unordered, align 2
|
|
|
|
; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic volatile i32, ptr addrspace(1) %c unordered, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic volatile i32 %c.add, ptr addrspace(1) %c unordered, align 4
|
|
|
|
; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic volatile i64, ptr addrspace(1) %d unordered, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic volatile i64 %d.add, ptr addrspace(1) %d unordered, align 8
|
|
|
|
; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic volatile float, ptr addrspace(1) %e unordered, align 4
|
|
%e.add = fadd float %e.load, 1.0
|
|
; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic volatile float %e.add, ptr addrspace(1) %e unordered, align 4
|
|
|
|
; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic volatile double, ptr addrspace(1) %e unordered, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic volatile double %f.add, ptr addrspace(1) %e unordered, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: global_monotonic_sys
|
|
define void @global_monotonic_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
|
|
; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
|
|
|
|
; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic i16, ptr addrspace(1) %b monotonic, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i16 %b.add, ptr addrspace(1) %b monotonic, align 2
|
|
|
|
; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic i32, ptr addrspace(1) %c monotonic, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic i32 %c.add, ptr addrspace(1) %c monotonic, align 4
|
|
|
|
; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic i64, ptr addrspace(1) %d monotonic, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic i64 %d.add, ptr addrspace(1) %d monotonic, align 8
|
|
|
|
; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic float, ptr addrspace(1) %e monotonic, align 4
|
|
%e.add = fadd float %e.load, 1.
|
|
; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic float %e.add, ptr addrspace(1) %e monotonic, align 4
|
|
|
|
; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic double, ptr addrspace(1) %e monotonic, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic double %f.add, ptr addrspace(1) %e monotonic, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: global_monotonic_volatile_sys
|
|
define void @global_monotonic_volatile_sys(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
|
|
; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
|
|
|
|
; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic volatile i16, ptr addrspace(1) %b monotonic, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i16 %b.add, ptr addrspace(1) %b monotonic, align 2
|
|
|
|
; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic volatile i32, ptr addrspace(1) %c monotonic, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic volatile i32 %c.add, ptr addrspace(1) %c monotonic, align 4
|
|
|
|
; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic volatile i64, ptr addrspace(1) %d monotonic, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic volatile i64 %d.add, ptr addrspace(1) %d monotonic, align 8
|
|
|
|
; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic volatile float, ptr addrspace(1) %e monotonic, align 4
|
|
%e.add = fadd float %e.load, 1.
|
|
; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic volatile float %e.add, ptr addrspace(1) %e monotonic, align 4
|
|
|
|
; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic volatile double, ptr addrspace(1) %e monotonic, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
; SM70: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic volatile double %f.add, ptr addrspace(1) %e monotonic, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
;; shared statespace
|
|
|
|
; CHECK-LABEL: shared_weak
|
|
define void @shared_weak(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
|
|
; CHECK: ld.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load i8, ptr addrspace(3) %a
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store i8 %a.add, ptr addrspace(3) %a
|
|
|
|
; CHECK: ld.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load i16, ptr addrspace(3) %b
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store i16 %b.add, ptr addrspace(3) %b
|
|
|
|
; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load i32, ptr addrspace(3) %c
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store i32 %c.add, ptr addrspace(3) %c
|
|
|
|
; CHECK: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load i64, ptr addrspace(3) %d
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store i64 %d.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load float, ptr addrspace(3) %c
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store float %e.add, ptr addrspace(3) %c
|
|
|
|
; CHECK: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load double, ptr addrspace(3) %c
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store double %f.add, ptr addrspace(3) %c
|
|
|
|
; CHECK: ld.shared.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%h.load = load <2 x i8>, ptr addrspace(3) %b
|
|
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
|
|
; CHECK: st.shared.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store <2 x i8> %h.add, ptr addrspace(3) %b
|
|
|
|
; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%i.load = load <4 x i8>, ptr addrspace(3) %c
|
|
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
|
|
; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store <4 x i8> %i.add, ptr addrspace(3) %c
|
|
|
|
; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%j.load = load <2 x i16>, ptr addrspace(3) %c
|
|
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
|
|
; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store <2 x i16> %j.add, ptr addrspace(3) %c
|
|
|
|
; CHECK: ld.shared.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%k.load = load <4 x i16>, ptr addrspace(3) %d
|
|
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
|
|
; CHECK: st.shared.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store <4 x i16> %k.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.shared.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%l.load = load <2 x i32>, ptr addrspace(3) %d
|
|
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
|
|
; CHECK: st.shared.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store <2 x i32> %l.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.shared.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%m.load = load <4 x i32>, ptr addrspace(3) %d
|
|
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
|
|
; CHECK: st.shared.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store <4 x i32> %m.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.shared.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%n.load = load <2 x i64>, ptr addrspace(3) %d
|
|
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
|
|
; CHECK: st.shared.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
|
|
store <2 x i64> %n.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.shared.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%o.load = load <2 x float>, ptr addrspace(3) %d
|
|
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
|
|
; CHECK: st.shared.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store <2 x float> %o.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.shared.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%p.load = load <4 x float>, ptr addrspace(3) %d
|
|
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
|
|
; CHECK: st.shared.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store <4 x float> %p.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.shared.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%q.load = load <2 x double>, ptr addrspace(3) %d
|
|
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
|
|
; CHECK: st.shared.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
|
|
store <2 x double> %q.add, ptr addrspace(3) %d
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: shared_volatile
|
|
define void @shared_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
|
|
; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load volatile i8, ptr addrspace(3) %a
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store volatile i8 %a.add, ptr addrspace(3) %a
|
|
|
|
; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load volatile i16, ptr addrspace(3) %b
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store volatile i16 %b.add, ptr addrspace(3) %b
|
|
|
|
; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load volatile i32, ptr addrspace(3) %c
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile i32 %c.add, ptr addrspace(3) %c
|
|
|
|
; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load volatile i64, ptr addrspace(3) %d
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store volatile i64 %d.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load volatile float, ptr addrspace(3) %c
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store volatile float %e.add, ptr addrspace(3) %c
|
|
|
|
; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load volatile double, ptr addrspace(3) %c
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store volatile double %f.add, ptr addrspace(3) %c
|
|
|
|
; CHECK: ld.volatile.shared.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%h.load = load volatile <2 x i8>, ptr addrspace(3) %b
|
|
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
|
|
; CHECK: st.volatile.shared.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store volatile <2 x i8> %h.add, ptr addrspace(3) %b
|
|
|
|
; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%i.load = load volatile <4 x i8>, ptr addrspace(3) %c
|
|
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
|
|
; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile <4 x i8> %i.add, ptr addrspace(3) %c
|
|
|
|
; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%j.load = load volatile <2 x i16>, ptr addrspace(3) %c
|
|
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
|
|
; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile <2 x i16> %j.add, ptr addrspace(3) %c
|
|
|
|
; CHECK: ld.volatile.shared.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%k.load = load volatile <4 x i16>, ptr addrspace(3) %d
|
|
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
|
|
; CHECK: st.volatile.shared.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store volatile <4 x i16> %k.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.volatile.shared.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%l.load = load volatile <2 x i32>, ptr addrspace(3) %d
|
|
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
|
|
; CHECK: st.volatile.shared.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store volatile <2 x i32> %l.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.volatile.shared.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%m.load = load volatile <4 x i32>, ptr addrspace(3) %d
|
|
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
|
|
; CHECK: st.volatile.shared.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store volatile <4 x i32> %m.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.volatile.shared.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%n.load = load volatile <2 x i64>, ptr addrspace(3) %d
|
|
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
|
|
; CHECK: st.volatile.shared.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
|
|
store volatile <2 x i64> %n.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.volatile.shared.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%o.load = load volatile <2 x float>, ptr addrspace(3) %d
|
|
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
|
|
; CHECK: st.volatile.shared.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store volatile <2 x float> %o.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.volatile.shared.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%p.load = load volatile <4 x float>, ptr addrspace(3) %d
|
|
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
|
|
; CHECK: st.volatile.shared.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store volatile <4 x float> %p.add, ptr addrspace(3) %d
|
|
|
|
; CHECK: ld.volatile.shared.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%q.load = load volatile <2 x double>, ptr addrspace(3) %d
|
|
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
|
|
; CHECK: st.volatile.shared.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
|
|
store volatile <2 x double> %q.add, ptr addrspace(3) %d
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: shared_unordered_sys
|
|
define void @shared_unordered_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
|
|
; SM60: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; SM60: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1
|
|
|
|
; SM60: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic i16, ptr addrspace(3) %b unordered, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; SM60: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i16 %b.add, ptr addrspace(3) %b unordered, align 2
|
|
|
|
; SM60: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic i32, ptr addrspace(3) %c unordered, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; SM60: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic i32 %c.add, ptr addrspace(3) %c unordered, align 4
|
|
|
|
; SM60: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic i64, ptr addrspace(3) %d unordered, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; SM60: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic i64 %d.add, ptr addrspace(3) %d unordered, align 8
|
|
|
|
; SM60: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic float, ptr addrspace(3) %e unordered, align 4
|
|
%e.add = fadd float %e.load, 1.0
|
|
; SM60: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic float %e.add, ptr addrspace(3) %e unordered, align 4
|
|
|
|
; SM60: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic double, ptr addrspace(3) %e unordered, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; SM60: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic double %f.add, ptr addrspace(3) %e unordered, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: shared_unordered_volatile_sys
|
|
define void @shared_unordered_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
|
|
; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic volatile i8, ptr addrspace(3) %a unordered, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i8 %a.add, ptr addrspace(3) %a unordered, align 1
|
|
|
|
; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic volatile i16, ptr addrspace(3) %b unordered, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i16 %b.add, ptr addrspace(3) %b unordered, align 2
|
|
|
|
; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic volatile i32, ptr addrspace(3) %c unordered, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic volatile i32 %c.add, ptr addrspace(3) %c unordered, align 4
|
|
|
|
; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic volatile i64, ptr addrspace(3) %d unordered, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic volatile i64 %d.add, ptr addrspace(3) %d unordered, align 8
|
|
|
|
; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic volatile float, ptr addrspace(3) %e unordered, align 4
|
|
%e.add = fadd float %e.load, 1.0
|
|
; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic volatile float %e.add, ptr addrspace(3) %e unordered, align 4
|
|
|
|
; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic volatile double, ptr addrspace(3) %e unordered, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic volatile double %f.add, ptr addrspace(3) %e unordered, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: shared_monotonic_sys
|
|
define void @shared_monotonic_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
|
|
; SM60: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; SM60: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
|
|
|
|
; SM60: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic i16, ptr addrspace(3) %b monotonic, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; SM60: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i16 %b.add, ptr addrspace(3) %b monotonic, align 2
|
|
|
|
; SM60: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic i32, ptr addrspace(3) %c monotonic, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; SM60: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic i32 %c.add, ptr addrspace(3) %c monotonic, align 4
|
|
|
|
; SM60: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic i64, ptr addrspace(3) %d monotonic, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; SM60: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic i64 %d.add, ptr addrspace(3) %d monotonic, align 8
|
|
|
|
; SM60: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic float, ptr addrspace(3) %e monotonic, align 4
|
|
%e.add = fadd float %e.load, 1.
|
|
; SM60: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic float %e.add, ptr addrspace(3) %e monotonic, align 4
|
|
|
|
; SM60: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
; SM70: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic double, ptr addrspace(3) %e monotonic, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; SM60: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
; SM70: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic double %f.add, ptr addrspace(3) %e monotonic, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: shared_monotonic_volatile_sys
|
|
define void @shared_monotonic_volatile_sys(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
|
|
; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1
|
|
|
|
; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic volatile i16, ptr addrspace(3) %b monotonic, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i16 %b.add, ptr addrspace(3) %b monotonic, align 2
|
|
|
|
; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic volatile i32, ptr addrspace(3) %c monotonic, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic volatile i32 %c.add, ptr addrspace(3) %c monotonic, align 4
|
|
|
|
; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic volatile i64, ptr addrspace(3) %d monotonic, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic volatile i64 %d.add, ptr addrspace(3) %d monotonic, align 8
|
|
|
|
; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic volatile float, ptr addrspace(3) %e monotonic, align 4
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic volatile float %e.add, ptr addrspace(3) %e monotonic, align 4
|
|
|
|
; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic volatile double, ptr addrspace(3) %e monotonic, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic volatile double %f.add, ptr addrspace(3) %e monotonic, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
;; local statespace
|
|
|
|
; CHECK-LABEL: local_weak
|
|
define void @local_weak(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
|
|
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load i8, ptr addrspace(5) %a
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store i8 %a.add, ptr addrspace(5) %a
|
|
|
|
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load i16, ptr addrspace(5) %b
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store i16 %b.add, ptr addrspace(5) %b
|
|
|
|
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load i32, ptr addrspace(5) %c
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store i32 %c.add, ptr addrspace(5) %c
|
|
|
|
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load i64, ptr addrspace(5) %d
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store i64 %d.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load float, ptr addrspace(5) %c
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store float %e.add, ptr addrspace(5) %c
|
|
|
|
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load double, ptr addrspace(5) %c
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store double %f.add, ptr addrspace(5) %c
|
|
|
|
; CHECK: ld.local.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%h.load = load <2 x i8>, ptr addrspace(5) %b
|
|
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
|
|
; CHECK: st.local.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store <2 x i8> %h.add, ptr addrspace(5) %b
|
|
|
|
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%i.load = load <4 x i8>, ptr addrspace(5) %c
|
|
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
|
|
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store <4 x i8> %i.add, ptr addrspace(5) %c
|
|
|
|
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%j.load = load <2 x i16>, ptr addrspace(5) %c
|
|
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
|
|
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store <2 x i16> %j.add, ptr addrspace(5) %c
|
|
|
|
; CHECK: ld.local.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%k.load = load <4 x i16>, ptr addrspace(5) %d
|
|
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
|
|
; CHECK: st.local.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store <4 x i16> %k.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%l.load = load <2 x i32>, ptr addrspace(5) %d
|
|
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
|
|
; CHECK: st.local.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store <2 x i32> %l.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%m.load = load <4 x i32>, ptr addrspace(5) %d
|
|
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
|
|
; CHECK: st.local.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store <4 x i32> %m.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%n.load = load <2 x i64>, ptr addrspace(5) %d
|
|
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
|
|
; CHECK: st.local.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
|
|
store <2 x i64> %n.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%o.load = load <2 x float>, ptr addrspace(5) %d
|
|
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
|
|
; CHECK: st.local.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store <2 x float> %o.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%p.load = load <4 x float>, ptr addrspace(5) %d
|
|
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
|
|
; CHECK: st.local.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store <4 x float> %p.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%q.load = load <2 x double>, ptr addrspace(5) %d
|
|
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
|
|
; CHECK: st.local.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
|
|
store <2 x double> %q.add, ptr addrspace(5) %d
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: local_volatile
|
|
define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
|
|
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load volatile i8, ptr addrspace(5) %a
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store volatile i8 %a.add, ptr addrspace(5) %a
|
|
|
|
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load volatile i16, ptr addrspace(5) %b
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store volatile i16 %b.add, ptr addrspace(5) %b
|
|
|
|
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load volatile i32, ptr addrspace(5) %c
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile i32 %c.add, ptr addrspace(5) %c
|
|
|
|
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load volatile i64, ptr addrspace(5) %d
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store volatile i64 %d.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load volatile float, ptr addrspace(5) %c
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store volatile float %e.add, ptr addrspace(5) %c
|
|
|
|
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load volatile double, ptr addrspace(5) %c
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store volatile double %f.add, ptr addrspace(5) %c
|
|
|
|
; CHECK: ld.local.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%h.load = load volatile <2 x i8>, ptr addrspace(5) %b
|
|
%h.add = add <2 x i8> %h.load, <i8 1, i8 1>
|
|
; CHECK: st.local.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store volatile <2 x i8> %h.add, ptr addrspace(5) %b
|
|
|
|
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%i.load = load volatile <4 x i8>, ptr addrspace(5) %c
|
|
%i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
|
|
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile <4 x i8> %i.add, ptr addrspace(5) %c
|
|
|
|
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%j.load = load volatile <2 x i16>, ptr addrspace(5) %c
|
|
%j.add = add <2 x i16> %j.load, <i16 1, i16 1>
|
|
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store volatile <2 x i16> %j.add, ptr addrspace(5) %c
|
|
|
|
; CHECK: ld.local.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%k.load = load volatile <4 x i16>, ptr addrspace(5) %d
|
|
%k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
|
|
; CHECK: st.local.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
|
|
store volatile <4 x i16> %k.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%l.load = load volatile <2 x i32>, ptr addrspace(5) %d
|
|
%l.add = add <2 x i32> %l.load, <i32 1, i32 1>
|
|
; CHECK: st.local.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store volatile <2 x i32> %l.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%m.load = load volatile <4 x i32>, ptr addrspace(5) %d
|
|
%m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
|
|
; CHECK: st.local.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
|
|
store volatile <4 x i32> %m.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%n.load = load volatile <2 x i64>, ptr addrspace(5) %d
|
|
%n.add = add <2 x i64> %n.load, <i64 1, i64 1>
|
|
; CHECK: st.local.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
|
|
store volatile <2 x i64> %n.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%o.load = load volatile <2 x float>, ptr addrspace(5) %d
|
|
%o.add = fadd <2 x float> %o.load, <float 1., float 1.>
|
|
; CHECK: st.local.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store volatile <2 x float> %o.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%p.load = load volatile <4 x float>, ptr addrspace(5) %d
|
|
%p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
|
|
; CHECK: st.local.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
|
|
store volatile <4 x float> %p.add, ptr addrspace(5) %d
|
|
|
|
; CHECK: ld.local.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
|
|
%q.load = load volatile <2 x double>, ptr addrspace(5) %d
|
|
%q.add = fadd <2 x double> %q.load, <double 1., double 1.>
|
|
; CHECK: st.local.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
|
|
store volatile <2 x double> %q.add, ptr addrspace(5) %d
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: local_unordered_sys
|
|
define void @local_unordered_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
|
|
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic i8, ptr addrspace(5) %a unordered, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i8 %a.add, ptr addrspace(5) %a unordered, align 1
|
|
|
|
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic i16, ptr addrspace(5) %b unordered, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i16 %b.add, ptr addrspace(5) %b unordered, align 2
|
|
|
|
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic i32, ptr addrspace(5) %c unordered, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic i32 %c.add, ptr addrspace(5) %c unordered, align 4
|
|
|
|
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic i64, ptr addrspace(5) %d unordered, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic i64 %d.add, ptr addrspace(5) %d unordered, align 8
|
|
|
|
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic float, ptr addrspace(5) %e unordered, align 4
|
|
%e.add = fadd float %e.load, 1.0
|
|
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic float %e.add, ptr addrspace(5) %e unordered, align 4
|
|
|
|
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic double, ptr addrspace(5) %e unordered, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic double %f.add, ptr addrspace(5) %e unordered, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: local_unordered_volatile_sys
|
|
define void @local_unordered_volatile_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
|
|
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic volatile i8, ptr addrspace(5) %a unordered, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i8 %a.add, ptr addrspace(5) %a unordered, align 1
|
|
|
|
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic volatile i16, ptr addrspace(5) %b unordered, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i16 %b.add, ptr addrspace(5) %b unordered, align 2
|
|
|
|
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic volatile i32, ptr addrspace(5) %c unordered, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic volatile i32 %c.add, ptr addrspace(5) %c unordered, align 4
|
|
|
|
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic volatile i64, ptr addrspace(5) %d unordered, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic volatile i64 %d.add, ptr addrspace(5) %d unordered, align 8
|
|
|
|
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic volatile float, ptr addrspace(5) %e unordered, align 4
|
|
%e.add = fadd float %e.load, 1.0
|
|
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic volatile float %e.add, ptr addrspace(5) %e unordered, align 4
|
|
|
|
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic volatile double, ptr addrspace(5) %e unordered, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic volatile double %f.add, ptr addrspace(5) %e unordered, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: local_monotonic_sys
|
|
define void @local_monotonic_sys(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
|
|
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1
|
|
|
|
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic i16, ptr addrspace(5) %b monotonic, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic i16 %b.add, ptr addrspace(5) %b monotonic, align 2
|
|
|
|
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic i32, ptr addrspace(5) %c monotonic, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic i32 %c.add, ptr addrspace(5) %c monotonic, align 4
|
|
|
|
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic i64, ptr addrspace(5) %d monotonic, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic i64 %d.add, ptr addrspace(5) %d monotonic, align 8
|
|
|
|
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic float, ptr addrspace(5) %e monotonic, align 4
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic float %e.add, ptr addrspace(5) %e monotonic, align 4
|
|
|
|
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic double, ptr addrspace(5) %e monotonic, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic double %f.add, ptr addrspace(5) %e monotonic, align 8
|
|
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: local_monotonic_volatile
|
|
define void @local_monotonic_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
|
|
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
|
|
%a.add = add i8 %a.load, 1
|
|
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1
|
|
|
|
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%b.load = load atomic volatile i16, ptr addrspace(5) %b monotonic, align 2
|
|
%b.add = add i16 %b.load, 1
|
|
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
|
|
store atomic volatile i16 %b.add, ptr addrspace(5) %b monotonic, align 2
|
|
|
|
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%c.load = load atomic volatile i32, ptr addrspace(5) %c monotonic, align 4
|
|
%c.add = add i32 %c.load, 1
|
|
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
|
|
store atomic volatile i32 %c.add, ptr addrspace(5) %c monotonic, align 4
|
|
|
|
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%d.load = load atomic volatile i64, ptr addrspace(5) %d monotonic, align 8
|
|
%d.add = add i64 %d.load, 1
|
|
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
|
|
store atomic volatile i64 %d.add, ptr addrspace(5) %d monotonic, align 8
|
|
|
|
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%e.load = load atomic volatile float, ptr addrspace(5) %e monotonic, align 4
|
|
%e.add = fadd float %e.load, 1.
|
|
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
|
|
store atomic volatile float %e.add, ptr addrspace(5) %e monotonic, align 4
|
|
|
|
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
|
|
%f.load = load atomic volatile double, ptr addrspace(5) %e monotonic, align 8
|
|
%f.add = fadd double %f.load, 1.
|
|
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
|
|
store atomic volatile double %f.add, ptr addrspace(5) %e monotonic, align 8
|
|
|
|
ret void
|
|
}
|