Files
clang-p2996/llvm/test/Transforms/VectorCombine/X86/pr67803.ll
Simon Pilgrim bddfbe748b [VectorCombine] foldShuffleOfShuffles - fold "shuffle (shuffle x, undef), (shuffle y, undef)" -> "shuffle x, y" (#88743)
Another step towards cleaning up shuffles that have been split, often across bitcasts between SSE intrinsic.

Strip shuffles entirely if we fold to an identity shuffle.
2024-04-22 15:57:59 +01:00

30 lines
1.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx | FileCheck %s
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
define <4 x i64> @PR67803(<8 x i32> %x, <8 x i32> %y, <8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @PR67803(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
; CHECK-NEXT: [[CONCAT:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
; CHECK-NEXT: [[MASK:%.*]] = bitcast <4 x i64> [[CONCAT]] to <8 x float>
; CHECK-NEXT: [[SEL:%.*]] = tail call noundef <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[MASK]])
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x float> [[SEL]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[RES]]
;
entry:
%cmp = icmp sgt <8 x i32> %x, %y
%cmp.lo = shufflevector <8 x i1> %cmp, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%cmp.hi = shufflevector <8 x i1> %cmp, <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%sext.lo = sext <4 x i1> %cmp.lo to <4 x i32>
%sext.hi = sext <4 x i1> %cmp.hi to <4 x i32>
%bitcast.lo = bitcast <4 x i32> %sext.lo to <2 x i64>
%bitcast.hi = bitcast <4 x i32> %sext.hi to <2 x i64>
%concat = shufflevector <2 x i64> %bitcast.lo, <2 x i64> %bitcast.hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%mask = bitcast <4 x i64> %concat to <8 x float>
%sel = tail call noundef <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %mask)
%res = bitcast <8 x float> %sel to <4 x i64>
ret <4 x i64> %res
}