Reduce the size of the vector constant by storing it in the constant pool in a truncated form, and sign-extend it as part of the load. I've extended the existing FixupConstant functionality to support these sext constant rebuilds - we still select the smallest stored constant entry and prefer vzload/broadcast/vextload for same bitwidth to avoid domain flips. I intend to add the matching load+zero-extend handling in a future PR, but that requires some alterations to the existing MC shuffle comments handling first.
18 lines
851 B
LLVM
18 lines
851 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s
|
|
|
|
define <4 x i32> @PR63507() {
|
|
; CHECK-LABEL: PR63507:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4294967295,0,4294967295,0]
|
|
; CHECK-NEXT: vpmulld %xmm0, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%psll.i = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer)
|
|
%cmp.i = icmp eq <4 x i32> %psll.i, zeroinitializer
|
|
%sext.i = sext <4 x i1> %cmp.i to <4 x i32>
|
|
%shuffle.i101 = shufflevector <4 x i32> %sext.i, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
|
%mul.i = mul <4 x i32> %shuffle.i101, %shuffle.i101
|
|
ret <4 x i32> %mul.i
|
|
}
|
|
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>)
|