Files
clang-p2996/llvm/test/CodeGen/X86/pr81136.ll
Simon Pilgrim bef25ae297 [X86] X86FixupVectorConstants - use explicit register bitwidth for the loaded vector instead of using constant pool bitwidth
Fixes #81136 - we might be loading from a constant pool entry wider than the destination register bitwidth, affecting the vextload scale calculation.

ConvertToBroadcastAVX512 doesn't yet set an explicit bitwidth (it will default to the constant pool bitwidth) due to difficulties in looking up the original register width through the fold tables, but as we only use rebuildSplatCst this shouldn't cause any miscompilations, although it might prevent folding to broadcast if only the lower bits match a splatable pattern.
2024-02-08 17:39:19 +00:00

46 lines
2.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s
define i64 @PR81136(i32 %a0, i32 %a1, ptr %a2) {
; CHECK-LABEL: PR81136:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovd %edi, %xmm0
; CHECK-NEXT: vmovd %esi, %xmm1
; CHECK-NEXT: vmovdqa (%rdx), %ymm2
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm4 = [128,1]
; CHECK-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm4
; CHECK-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
; CHECK-NEXT: vpalignr {{.*#+}} xmm0 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; CHECK-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm2
; CHECK-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; CHECK-NEXT: vandnpd %ymm0, %ymm1, %ymm0
; CHECK-NEXT: vmovmskpd %ymm0, %eax
; CHECK-NEXT: popcntl %eax, %eax
; CHECK-NEXT: negq %rax
; CHECK-NEXT: retq
%v0 = bitcast i32 %a0 to <2 x i16>
%v1 = bitcast i32 %a1 to <2 x i16>
%cmp15 = icmp sle <2 x i16> %v1, %v0
%conv16 = sext <2 x i1> %cmp15 to <2 x i64>
%shuffle29 = shufflevector <2 x i64> %conv16, <2 x i64> <i64 128, i64 1>, <4 x i32> <i32 2, i32 3, i32 3, i32 0>
%data = load volatile <4 x i64>, ptr %a2, align 32
%cmp65 = icmp ne <4 x i64> %data, <i64 -2071602529, i64 -1537047284, i64 717942021, i64 597457239>
%cmp67 = icmp ne <4 x i64> %shuffle29, zeroinitializer
%and = and <4 x i1> %cmp65, %cmp67
%mask = bitcast <4 x i1> %and to i4
%cnt = tail call i4 @llvm.ctpop.i4(i4 %mask)
%cntz = zext i4 %cnt to i64
%res = sub nsw i64 0, %cntz
ret i64 %res
}
declare i4 @llvm.ctpop.i4(i4)