Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
Alexey Bataev cb5046da26 [SLP]Do not ignore undefs when trying to replace with "poisonous" shuffles
Need to consider undefs correctly, when trying to replace them with
potentially poisonous values in shuffles. Such elements should not be
silently replaced by poison values, instead complex analysis should be
implemented to see if it is safe to do it.

Fixes #113425
2024-10-24 07:47:23 -07:00

80 lines
5.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-20 -pass-remarks-output=%t %s | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
; YAML: --- !Passed
; YAML: Pass: slp-vectorizer
; YAML: Name: StoresVectorized
; YAML: Function: test1
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
; YAML: - Cost: '4'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '5'
define void @test1(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, <4 x float> %load17, <4 x float> %fmuladd7, <4 x float> %fmuladd16, ptr %out_ptr) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VEXT165_I:%.*]] = shufflevector <4 x float> [[LOAD6:%.*]], <4 x float> [[LOAD7:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <4 x float> [[LOAD7]], <4 x float> [[LOAD8:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[VEXT165_I]], i64 0)
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP0]], <4 x float> [[VEXT309_I]], i64 4)
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[LOAD17:%.*]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[FMULADD7:%.*]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP5]], <4 x float> [[FMULADD16:%.*]], i64 4)
; CHECK-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP4]], <8 x float> [[TMP6]])
; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[OUT_PTR:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
%vext165.i = shufflevector <4 x float> %load6, <4 x float> %load7, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
%vext309.i = shufflevector <4 x float> %load7, <4 x float> %load8, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
%fmuladd8 = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> %vext165.i, <4 x float> %load17, <4 x float> %fmuladd7)
%fmuladd17 = tail call noundef <4 x float> @llvm.fmuladd.v4f32(<4 x float> %vext309.i, <4 x float> %load17, <4 x float> %fmuladd16)
%add.ptr.i.i = getelementptr inbounds i8, ptr %out_ptr, i64 16
store <4 x float> %fmuladd8, ptr %out_ptr, align 4
store <4 x float> %fmuladd17, ptr %add.ptr.i.i, align 4
ret void
}
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
; YAML: --- !Passed
; YAML: Pass: slp-vectorizer
; YAML: Name: StoresVectorized
; YAML: Function: test2
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
; YAML: - Cost: '12'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '5'
define void @test2(<8 x float> %load6, <8 x float> %load7, <8 x float> %load8, <8 x float> %load17, <8 x float> %fmuladd7, <8 x float> %fmuladd16, ptr %out_ptr) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VEXT165_I:%.*]] = shufflevector <8 x float> [[LOAD6:%.*]], <8 x float> [[LOAD7:%.*]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <8 x float> [[LOAD7]], <8 x float> [[LOAD8:%.*]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[VEXT165_I]], i64 0)
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> [[VEXT309_I]], i64 8)
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[LOAD17:%.*]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[FMULADD7:%.*]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP5]], <8 x float> [[FMULADD16:%.*]], i64 8)
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP4]], <16 x float> [[TMP6]])
; CHECK-NEXT: store <16 x float> [[TMP7]], ptr [[OUT_PTR:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
%vext165.i = shufflevector <8 x float> %load6, <8 x float> %load7, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
%vext309.i = shufflevector <8 x float> %load7, <8 x float> %load8, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
%fmuladd8 = tail call noundef <8 x float> @llvm.fmuladd.v8f32(<8 x float> %vext165.i, <8 x float> %load17, <8 x float> %fmuladd7)
%fmuladd17 = tail call noundef <8 x float> @llvm.fmuladd.v8f32(<8 x float> %vext309.i, <8 x float> %load17, <8 x float> %fmuladd16)
%add.ptr.i.i = getelementptr inbounds i8, ptr %out_ptr, i64 32
store <8 x float> %fmuladd8, ptr %out_ptr, align 4
store <8 x float> %fmuladd17, ptr %add.ptr.i.i, align 4
ret void
}
declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)