Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll
Alexey Bataev 4d7f3d9e0f [SLP]Fix final analysis for unsigned nodes.
Need to check that at least single bit is cleared for unsigned nodes
before reducing their size. Otherwise they might be treated as signed in
signed nodes.
2024-04-19 03:03:56 -07:00

80 lines
4.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux -mattr=+v < %s | FileCheck %s
define void @test(ptr %p, i16 %load794) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[P:%.*]], i16 [[LOAD794:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ZEXT795:%.*]] = zext i16 [[LOAD794]] to i32
; CHECK-NEXT: [[GEP799:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[P]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[GEP799]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> [[TMP4]], [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[TMP7]], <i32 3329, i32 3329>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[ZEXT795]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i32> [[TMP8]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i32> [[TMP12]] to <2 x i64>
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw <2 x i64> [[TMP9]], <i64 5039, i64 5039>
; CHECK-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP10]], <i64 24, i64 24>
; CHECK-NEXT: [[TMP13:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i32>
; CHECK-NEXT: [[TMP20:%.*]] = mul <2 x i32> [[TMP13]], <i32 62207, i32 62207>
; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = trunc <2 x i32> [[TMP21]] to <2 x i16>
; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i16> [[TMP14]], <i16 -3329, i16 -3329>
; CHECK-NEXT: [[TMP16:%.*]] = icmp slt <2 x i16> [[TMP15]], zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[TMP15]], <2 x i16> zeroinitializer)
; CHECK-NEXT: [[TMP19:%.*]] = or <2 x i16> [[TMP17]], [[TMP18]]
; CHECK-NEXT: store <2 x i16> [[TMP19]], ptr [[P]], align 2
; CHECK-NEXT: ret void
;
%zext795 = zext i16 %load794 to i32
%load798 = load i16, ptr %p, align 2
%gep799 = getelementptr inbounds i8, ptr %p, i64 16
%load800 = load i16, ptr %gep799, align 2
%zext801 = zext i16 %load798 to i32
%zext802 = zext i16 %load800 to i32
%sub809 = sub nsw i32 %zext802, %zext801
%add810 = add nsw i32 %sub809, 3329
%mul811 = mul i32 %add810, %zext795
%zext812 = zext i32 %mul811 to i64
%mul813 = mul nuw nsw i64 %zext812, 5039
%lshr814 = lshr i64 %mul813, 24
%trunc815 = trunc nuw nsw i64 %lshr814 to i32
%mul816 = mul i32 %trunc815, 62207
%add817 = add i32 %mul816, %mul811
%trunc818 = trunc i32 %add817 to i16
%add819 = add i16 %trunc818, -3329
%icmp820 = icmp slt i16 %add819, 0
%select821 = select i1 %icmp820, i16 %trunc818, i16 0
%call822 = call i16 @llvm.smax.i16(i16 %add819, i16 0)
%or823 = or i16 %select821, %call822
store i16 %or823, ptr %p, align 2
%gep826 = getelementptr inbounds i8, ptr %p, i64 2
%load827 = load i16, ptr %gep826, align 2
%gep828 = getelementptr inbounds i8, ptr %p, i64 18
%load829 = load i16, ptr %gep828, align 2
%zext830 = zext i16 %load827 to i32
%zext831 = zext i16 %load829 to i32
%sub838 = sub nsw i32 %zext831, %zext830
%add839 = add nsw i32 %sub838, 3329
%mul840 = mul i32 %add839, %zext795
%zext841 = zext i32 %mul840 to i64
%mul842 = mul nuw nsw i64 %zext841, 5039
%lshr843 = lshr i64 %mul842, 24
%trunc844 = trunc nuw nsw i64 %lshr843 to i32
%mul845 = mul i32 %trunc844, 62207
%add846 = add i32 %mul845, %mul840
%trunc847 = trunc i32 %add846 to i16
%add848 = add i16 %trunc847, -3329
%icmp849 = icmp slt i16 %add848, 0
%select850 = select i1 %icmp849, i16 %trunc847, i16 0
%call851 = call i16 @llvm.smax.i16(i16 %add848, i16 0)
%or852 = or i16 %select850, %call851
store i16 %or852, ptr %gep826, align 2
ret void
}