KMOV is essential for copy between k-registers and GPRs. R16-R31 was added into GPRs in #70958, so we extend KMOV for these new registers first. This patch 1. Promotes KMOV instructions from VEX space to EVEX space 2. Emits prefix {evex} for the EVEX variants 3. Prefers EVEX variant than VEX variant in ISEL and optimizations for better RA EVEX variants will be compressed to VEX variants by existing EVEX2VEX pass if no EGPR is used. RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4 TAG: llvm-test-suite && CPU2017 can be built with feature egpr successfully.
104 lines
5.2 KiB
LLVM
104 lines
5.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+egpr --show-mc-encoding | FileCheck --check-prefix=AVX512 %s
|
|
|
|
define void @bitcast_16i8_store(ptr %p, <16 x i8> %a0) {
|
|
; AVX512-LABEL: bitcast_16i8_store:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
|
; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
|
|
; AVX512-NEXT: kmovw %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x91,0x07]
|
|
; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
|
; AVX512-NEXT: retq # encoding: [0xc3]
|
|
%a1 = icmp slt <16 x i8> %a0, zeroinitializer
|
|
%a2 = bitcast <16 x i1> %a1 to i16
|
|
store i16 %a2, ptr %p
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_32i8_store(ptr %p, <32 x i8> %a0) {
|
|
; AVX512-LABEL: bitcast_32i8_store:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
|
|
; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
|
|
; AVX512-NEXT: kmovd %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x91,0x07]
|
|
; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
|
; AVX512-NEXT: retq # encoding: [0xc3]
|
|
%a1 = icmp slt <32 x i8> %a0, zeroinitializer
|
|
%a2 = bitcast <32 x i1> %a1 to i32
|
|
store i32 %a2, ptr %p
|
|
ret void
|
|
}
|
|
|
|
define void @bitcast_64i8_store(ptr %p, <64 x i8> %a0) {
|
|
; AVX512-LABEL: bitcast_64i8_store:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
|
|
; AVX512-NEXT: kmovq %k0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x91,0x07]
|
|
; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
|
; AVX512-NEXT: retq # encoding: [0xc3]
|
|
%a1 = icmp slt <64 x i8> %a0, zeroinitializer
|
|
%a2 = bitcast <64 x i1> %a1 to i64
|
|
store i64 %a2, ptr %p
|
|
ret void
|
|
}
|
|
|
|
define <16 x i1> @bitcast_16i8_load(ptr %p, <16 x i1> %a, <16 x i1> %b) {
|
|
; AVX512-LABEL: bitcast_16i8_load:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vpsllw $7, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x71,0xf1,0x07]
|
|
; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1]
|
|
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x07]
|
|
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
|
|
; AVX512-NEXT: kmovw (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x90,0x0f]
|
|
; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0]
|
|
; AVX512-NEXT: kandnw %k0, %k1, %k0 # encoding: [0xc5,0xf4,0x42,0xc0]
|
|
; AVX512-NEXT: korw %k0, %k2, %k0 # encoding: [0xc5,0xec,0x45,0xc0]
|
|
; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
|
|
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
|
|
; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
|
; AVX512-NEXT: retq # encoding: [0xc3]
|
|
%mask = load i16, ptr %p
|
|
%vmask = bitcast i16 %mask to <16 x i1>
|
|
%res = select <16 x i1> %vmask, <16 x i1> %a, <16 x i1> %b
|
|
ret <16 x i1> %res
|
|
}
|
|
|
|
define <32 x i1> @bitcast_32i8_load(ptr %p, <32 x i1> %a, <32 x i1> %b) {
|
|
; AVX512-LABEL: bitcast_32i8_load:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vpsllw $7, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x71,0xf1,0x07]
|
|
; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1]
|
|
; AVX512-NEXT: vpsllw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xf0,0x07]
|
|
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
|
|
; AVX512-NEXT: kmovd (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x90,0x0f]
|
|
; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0]
|
|
; AVX512-NEXT: kandnd %k0, %k1, %k0 # encoding: [0xc4,0xe1,0xf5,0x42,0xc0]
|
|
; AVX512-NEXT: kord %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x45,0xc0]
|
|
; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
|
|
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
|
; AVX512-NEXT: retq # encoding: [0xc3]
|
|
%mask = load i32, ptr %p
|
|
%vmask = bitcast i32 %mask to <32 x i1>
|
|
%res = select <32 x i1> %vmask, <32 x i1> %a, <32 x i1> %b
|
|
ret <32 x i1> %res
|
|
}
|
|
|
|
define <64 x i1> @bitcast_64i8_load(ptr %p, <64 x i1> %a, <64 x i1> %b) {
|
|
; AVX512-LABEL: bitcast_64i8_load:
|
|
; AVX512: # %bb.0:
|
|
; AVX512-NEXT: vpsllw $7, %zmm1, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x71,0xf1,0x07]
|
|
; AVX512-NEXT: vpmovb2m %zmm1, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc1]
|
|
; AVX512-NEXT: vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07]
|
|
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
|
|
; AVX512-NEXT: kmovq (%rdi), %k1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf8,0x90,0x0f]
|
|
; AVX512-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xd0]
|
|
; AVX512-NEXT: kandnq %k0, %k1, %k0 # encoding: [0xc4,0xe1,0xf4,0x42,0xc0]
|
|
; AVX512-NEXT: korq %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xec,0x45,0xc0]
|
|
; AVX512-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
|
|
; AVX512-NEXT: retq # encoding: [0xc3]
|
|
%mask = load i64, ptr %p
|
|
%vmask = bitcast i64 %mask to <64 x i1>
|
|
%res = select <64 x i1> %vmask, <64 x i1> %a, <64 x i1> %b
|
|
ret <64 x i1> %res
|
|
}
|