Fixes #81136 - we might be loading from a constant pool entry wider than the destination register bitwidth, affecting the vextload scale calculation. ConvertToBroadcastAVX512 doesn't yet set an explicit bitwidth (it will default to the constant pool bitwidth) due to difficulties in looking up the original register width through the fold tables, but as we only use rebuildSplatCst this shouldn't cause any miscompilations, although it might prevent folding to broadcast if only the lower bits match a splatable pattern.
46 lines
2.0 KiB
LLVM
46 lines
2.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s
|
|
|
|
define i64 @PR81136(i32 %a0, i32 %a1, ptr %a2) {
|
|
; CHECK-LABEL: PR81136:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vmovd %edi, %xmm0
|
|
; CHECK-NEXT: vmovd %esi, %xmm1
|
|
; CHECK-NEXT: vmovdqa (%rdx), %ymm2
|
|
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
|
; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm4 = [128,1]
|
|
; CHECK-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm4
|
|
; CHECK-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
|
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
|
|
; CHECK-NEXT: vpalignr {{.*#+}} xmm0 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
|
|
; CHECK-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
|
|
; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm2
|
|
; CHECK-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
|
|
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
|
|
; CHECK-NEXT: vandnpd %ymm0, %ymm1, %ymm0
|
|
; CHECK-NEXT: vmovmskpd %ymm0, %eax
|
|
; CHECK-NEXT: popcntl %eax, %eax
|
|
; CHECK-NEXT: negq %rax
|
|
; CHECK-NEXT: retq
|
|
%v0 = bitcast i32 %a0 to <2 x i16>
|
|
%v1 = bitcast i32 %a1 to <2 x i16>
|
|
%cmp15 = icmp sle <2 x i16> %v1, %v0
|
|
%conv16 = sext <2 x i1> %cmp15 to <2 x i64>
|
|
%shuffle29 = shufflevector <2 x i64> %conv16, <2 x i64> <i64 128, i64 1>, <4 x i32> <i32 2, i32 3, i32 3, i32 0>
|
|
%data = load volatile <4 x i64>, ptr %a2, align 32
|
|
%cmp65 = icmp ne <4 x i64> %data, <i64 -2071602529, i64 -1537047284, i64 717942021, i64 597457239>
|
|
%cmp67 = icmp ne <4 x i64> %shuffle29, zeroinitializer
|
|
%and = and <4 x i1> %cmp65, %cmp67
|
|
%mask = bitcast <4 x i1> %and to i4
|
|
%cnt = tail call i4 @llvm.ctpop.i4(i4 %mask)
|
|
%cntz = zext i4 %cnt to i64
|
|
%res = sub nsw i64 0, %cntz
|
|
ret i64 %res
|
|
}
|
|
declare i4 @llvm.ctpop.i4(i4)
|