Generate a predicated variant of the last load/store in a group to avoid accessing OOB memory. Disable vector alignment on HVX prior to v62, since v62 does not have predicated vector loads.
68 lines
3.1 KiB
LLVM
68 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -march=hexagon < %s | FileCheck %s
|
|
|
|
; Function Attrs: nounwind
|
|
define <32 x i32> @f0(ptr %a0, i32 %a1) #0 {
|
|
; CHECK-LABEL: f0:
|
|
; CHECK: // %bb.0: // %b0
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: r0 = add(r1,r0)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: r7 = #8
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: r4 = ##.LCPI0_0
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: r2 = #-1
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: v0 = vmem(r0+#1)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: v1 = vmem(r0+#2)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: r0 = add(r0,#128)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: v1 = valign(v1,v0,r7)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: v2 = vmem(r4+#0)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: q0 = vand(v2,r2)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: v0.w = vadd(v0.w,v1.w)
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: if (q0) vmem(r0+#0) = v0
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: jumpr r31
|
|
; CHECK-NEXT: }
|
|
b0:
|
|
%v0 = add i32 %a1, 128
|
|
%v1 = getelementptr i8, ptr %a0, i32 %v0
|
|
%v3 = tail call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %v1, i32 128, <32 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i32> undef)
|
|
%v4 = add i32 %a1, 136
|
|
%v5 = getelementptr i8, ptr %a0, i32 %v4
|
|
%v7 = tail call <32 x i32> @llvm.masked.load.v32i32.p0(ptr %v5, i32 8, <32 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i32> undef)
|
|
%v8 = add <32 x i32> %v3, %v7
|
|
tail call void @llvm.masked.store.v32i32.p0(<32 x i32> %v8, ptr %v1, i32 128, <32 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
|
|
ret <32 x i32> %v8
|
|
}
|
|
|
|
; Function Attrs: argmemonly nounwind readonly willreturn
|
|
declare <32 x i32> @llvm.masked.load.v32i32.p0(ptr, i32 immarg, <32 x i1>, <32 x i32>) #1
|
|
|
|
; Function Attrs: argmemonly nounwind willreturn
|
|
declare void @llvm.masked.store.v32i32.p0(<32 x i32>, ptr, i32 immarg, <32 x i1>) #2
|
|
|
|
attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length128b,-packets" }
|
|
attributes #1 = { argmemonly nounwind readonly willreturn }
|
|
attributes #2 = { argmemonly nounwind willreturn }
|