Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/AArch64/fpsat.ll
David Green 6f81903e89 [LV][SLP] Mark fptosi_sat as vectorizable
This adds fptosi_sat and fptoui_sat to the list of trivially
vectorizable functions, mainly so that the loop vectorizer can vectorize
the instruction. Marking them as trivially vectorizable also allows them
to be SLP vectorized, and Scalarized.

The signature of a fptosi_sat requires two type overrides
(@llvm.fptosi.sat.v2i32.v2f32), unlike other intrinsics that often only
take a single. This patch alters hasVectorInstrinsicOverloadedScalarOpd
to isVectorIntrinsicWithOverloadTypeAtArg, so that it can mark the first
operand of the intrinsic as a overloaded (but not scalar) operand.

Differential Revision: https://reviews.llvm.org/D124358
2022-05-03 09:32:34 +01:00

68 lines
2.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -slp-vectorizer -mtriple=aarch64-none-eabi < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define void @signed(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: @signed(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[Y:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
%l0 = load float, ptr %x, align 4
%arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
%l2 = load float, ptr %arrayidx.1, align 4
%arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
%l4 = load float, ptr %arrayidx.2, align 4
%arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
%l6 = load float, ptr %arrayidx.3, align 4
%l1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l0)
%l3 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l2)
%l5 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l4)
%l7 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l6)
store i32 %l1, ptr %y, align 4
%arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1
store i32 %l3, ptr %arrayidx2.1, align 4
%arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2
store i32 %l5, ptr %arrayidx2.2, align 4
%arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3
store i32 %l7, ptr %arrayidx2.3, align 4
ret void
}
define void @unsigned(ptr %x, ptr %y, i32 %n) {
; CHECK-LABEL: @unsigned(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]])
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[Y:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
%l0 = load float, ptr %x, align 4
%arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
%l2 = load float, ptr %arrayidx.1, align 4
%arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
%l4 = load float, ptr %arrayidx.2, align 4
%arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
%l6 = load float, ptr %arrayidx.3, align 4
%l1 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l0)
%l3 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l2)
%l5 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l4)
%l7 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l6)
store i32 %l1, ptr %y, align 4
%arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1
store i32 %l3, ptr %arrayidx2.1, align 4
%arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2
store i32 %l5, ptr %arrayidx2.2, align 4
%arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3
store i32 %l7, ptr %arrayidx2.3, align 4
ret void
}
declare i32 @llvm.fptosi.sat.i32.f32(float)
declare i32 @llvm.fptoui.sat.i32.f32(float)