lowerBuildVectorAsBroadcast will not broadcast splat constants in all cases, resulting in a lot of situations where a full width vector load that has failed to fold but is loading splat constant values could use a broadcast load instruction just as cheaply, and save constant pool space. This is an updated commit ofab4b924832after being reverted at78de45fd4a
50 lines
1.9 KiB
LLVM
50 lines
1.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s
|
|
|
|
define <2 x i64> @undef_tval() {
|
|
; CHECK-LABEL: undef_tval:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1]
|
|
; CHECK-NEXT: movb $1, %al
|
|
; CHECK-NEXT: kmovw %eax, %k1
|
|
; CHECK-NEXT: vpmovqw %zmm0, %xmm0 {%k1}
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> undef, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, i8 1) #3
|
|
%2 = bitcast <8 x i16> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <2 x i64> @foo(<8 x i64> %x) {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
|
|
; CHECK-NEXT: movb $1, %al
|
|
; CHECK-NEXT: kmovw %eax, %k1
|
|
; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, i8 1) #3
|
|
%2 = bitcast <8 x i16> %1 to <2 x i64>
|
|
ret <2 x i64> %2
|
|
}
|
|
|
|
define <4 x i64> @goo(<16 x i32> %x) {
|
|
; CHECK-LABEL: goo:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
; CHECK-NEXT: movw $1, %ax
|
|
; CHECK-NEXT: kmovw %eax, %k1
|
|
; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1}
|
|
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, i16 1) #3
|
|
%2 = bitcast <16 x i16> %1 to <4 x i64>
|
|
ret <4 x i64> %2
|
|
}
|
|
|
|
|
|
declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
|
|
declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
|