NEON has non-IEEE compliant denormal flushing and the compiler should check if it safe to vectorize instructions for NEON in non-fast math mode. Fixes https://github.com/llvm/llvm-project/issues/106909
61 lines
2.8 KiB
LLVM
61 lines
2.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S --passes=slp-vectorizer -mtriple=arm-- -mattr=+neon < %s | FileCheck %s
|
|
|
|
define void @test(ptr %a, ptr %b, ptr %r) {
|
|
; CHECK-LABEL: define void @test(
|
|
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[R:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[V_A_0:%.*]] = load float, ptr [[A]], align 4
|
|
; CHECK-NEXT: [[A_1:%.*]] = getelementptr i8, ptr [[A]], i64 4
|
|
; CHECK-NEXT: [[V_A_1:%.*]] = load float, ptr [[A_1]], align 4
|
|
; CHECK-NEXT: [[A_2:%.*]] = getelementptr i8, ptr [[A]], i64 8
|
|
; CHECK-NEXT: [[V_A_2:%.*]] = load float, ptr [[A_2]], align 4
|
|
; CHECK-NEXT: [[A_3:%.*]] = getelementptr i8, ptr [[A]], i64 12
|
|
; CHECK-NEXT: [[V_A_3:%.*]] = load float, ptr [[A_3]], align 4
|
|
; CHECK-NEXT: [[V_B_0:%.*]] = load float, ptr [[B]], align 4
|
|
; CHECK-NEXT: [[B_1:%.*]] = getelementptr i8, ptr [[B]], i64 4
|
|
; CHECK-NEXT: [[V_B_1:%.*]] = load float, ptr [[B_1]], align 4
|
|
; CHECK-NEXT: [[B_2:%.*]] = getelementptr i8, ptr [[B]], i64 8
|
|
; CHECK-NEXT: [[V_B_2:%.*]] = load float, ptr [[B_2]], align 4
|
|
; CHECK-NEXT: [[B_3:%.*]] = getelementptr i8, ptr [[B]], i64 12
|
|
; CHECK-NEXT: [[V_B_3:%.*]] = load float, ptr [[B_3]], align 4
|
|
; CHECK-NEXT: [[V_R_0:%.*]] = fadd float [[V_A_0]], [[V_B_0]]
|
|
; CHECK-NEXT: [[V_R_1:%.*]] = fadd float [[V_A_1]], [[V_B_1]]
|
|
; CHECK-NEXT: [[V_R_2:%.*]] = fadd float [[V_A_2]], [[V_B_2]]
|
|
; CHECK-NEXT: [[V_R_3:%.*]] = fadd float [[V_A_3]], [[V_B_3]]
|
|
; CHECK-NEXT: store float [[V_R_0]], ptr [[R]], align 4
|
|
; CHECK-NEXT: [[R_1:%.*]] = getelementptr i8, ptr [[R]], i64 4
|
|
; CHECK-NEXT: store float [[V_R_1]], ptr [[R_1]], align 4
|
|
; CHECK-NEXT: [[R_2:%.*]] = getelementptr i8, ptr [[R]], i64 8
|
|
; CHECK-NEXT: store float [[V_R_2]], ptr [[R_2]], align 4
|
|
; CHECK-NEXT: [[R_3:%.*]] = getelementptr i8, ptr [[R]], i64 12
|
|
; CHECK-NEXT: store float [[V_R_3]], ptr [[R_3]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%v.a.0 = load float, ptr %a
|
|
%a.1 = getelementptr i8, ptr %a, i64 4
|
|
%v.a.1 = load float, ptr %a.1
|
|
%a.2 = getelementptr i8, ptr %a, i64 8
|
|
%v.a.2 = load float, ptr %a.2
|
|
%a.3 = getelementptr i8, ptr %a, i64 12
|
|
%v.a.3 = load float, ptr %a.3
|
|
%v.b.0 = load float, ptr %b
|
|
%b.1 = getelementptr i8, ptr %b, i64 4
|
|
%v.b.1 = load float, ptr %b.1
|
|
%b.2 = getelementptr i8, ptr %b, i64 8
|
|
%v.b.2 = load float, ptr %b.2
|
|
%b.3 = getelementptr i8, ptr %b, i64 12
|
|
%v.b.3 = load float, ptr %b.3
|
|
%v.r.0 = fadd float %v.a.0, %v.b.0
|
|
%v.r.1 = fadd float %v.a.1, %v.b.1
|
|
%v.r.2 = fadd float %v.a.2, %v.b.2
|
|
%v.r.3 = fadd float %v.a.3, %v.b.3
|
|
store float %v.r.0, ptr %r
|
|
%r.1 = getelementptr i8, ptr %r, i64 4
|
|
store float %v.r.1, ptr %r.1
|
|
%r.2 = getelementptr i8, ptr %r, i64 8
|
|
store float %v.r.2, ptr %r.2
|
|
%r.3 = getelementptr i8, ptr %r, i64 12
|
|
store float %v.r.3, ptr %r.3
|
|
ret void
|
|
}
|