This adds fptosi_sat and fptoui_sat to the list of trivially vectorizable functions, mainly so that the loop vectorizer can vectorize the instruction. Marking them as trivially vectorizable also allows them to be SLP vectorized, and Scalarized. The signature of a fptosi_sat requires two type overrides (@llvm.fptosi.sat.v2i32.v2f32), unlike other intrinsics that often only take a single. This patch alters hasVectorInstrinsicOverloadedScalarOpd to isVectorIntrinsicWithOverloadTypeAtArg, so that it can mark the first operand of the intrinsic as a overloaded (but not scalar) operand. Differential Revision: https://reviews.llvm.org/D124358
68 lines
2.9 KiB
LLVM
68 lines
2.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -slp-vectorizer -mtriple=aarch64-none-eabi < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
|
|
define void @signed(ptr %x, ptr %y, i32 %n) {
|
|
; CHECK-LABEL: @signed(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[Y:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%l0 = load float, ptr %x, align 4
|
|
%arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
|
|
%l2 = load float, ptr %arrayidx.1, align 4
|
|
%arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
|
|
%l4 = load float, ptr %arrayidx.2, align 4
|
|
%arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
|
|
%l6 = load float, ptr %arrayidx.3, align 4
|
|
%l1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l0)
|
|
%l3 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l2)
|
|
%l5 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l4)
|
|
%l7 = tail call i32 @llvm.fptosi.sat.i32.f32(float %l6)
|
|
store i32 %l1, ptr %y, align 4
|
|
%arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1
|
|
store i32 %l3, ptr %arrayidx2.1, align 4
|
|
%arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2
|
|
store i32 %l5, ptr %arrayidx2.2, align 4
|
|
%arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3
|
|
store i32 %l7, ptr %arrayidx2.3, align 4
|
|
ret void
|
|
}
|
|
|
|
define void @unsigned(ptr %x, ptr %y, i32 %n) {
|
|
; CHECK-LABEL: @unsigned(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[Y:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%l0 = load float, ptr %x, align 4
|
|
%arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
|
|
%l2 = load float, ptr %arrayidx.1, align 4
|
|
%arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
|
|
%l4 = load float, ptr %arrayidx.2, align 4
|
|
%arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
|
|
%l6 = load float, ptr %arrayidx.3, align 4
|
|
%l1 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l0)
|
|
%l3 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l2)
|
|
%l5 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l4)
|
|
%l7 = tail call i32 @llvm.fptoui.sat.i32.f32(float %l6)
|
|
store i32 %l1, ptr %y, align 4
|
|
%arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1
|
|
store i32 %l3, ptr %arrayidx2.1, align 4
|
|
%arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2
|
|
store i32 %l5, ptr %arrayidx2.2, align 4
|
|
%arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3
|
|
store i32 %l7, ptr %arrayidx2.3, align 4
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.fptosi.sat.i32.f32(float)
|
|
declare i32 @llvm.fptoui.sat.i32.f32(float)
|