Files
clang-p2996/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
Craig Topper 31cbe75b3b [X86] Rename the autoupgraded of packed fp compare and fpclass intrinsics that don't take a mask as input to exclude '.mask.' from their name.
I think the intrinsics named 'avx512.mask.' should refer to the previous behavior of taking a mask argument in the intrinsic instead of using a 'select' or 'and' instruction in IR to accomplish the masking. This is more consistent with the goal that eventually we will have no intrinsics that have masking builtin. When we reach that goal, we should have no intrinsics named "avx512.mask".

llvm-svn: 335744
2018-06-27 15:57:53 +00:00

53 lines
2.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s
; This test checks optimal passing values between "cmp" and "kor" intrinsics
; PR28839
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nounwind readnone uwtable
define zeroext i16 @cmp_kor_seq_16(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d, <16 x float> %x) local_unnamed_addr #0 {
; CHECK-LABEL: cmp_kor_seq_16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vcmpgeps %zmm4, %zmm0, %k0
; CHECK-NEXT: vcmpgeps %zmm4, %zmm1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: vcmpgeps %zmm4, %zmm2, %k1
; CHECK-NEXT: vcmpgeps %zmm4, %zmm3, %k2
; CHECK-NEXT: korw %k2, %k1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
entry:
%0 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i32 4)
%1 = bitcast <16 x i1> %0 to i16
%2 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, i32 4)
%3 = bitcast <16 x i1> %2 to i16
%4 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, i32 4)
%5 = bitcast <16 x i1> %4 to i16
%6 = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, i32 4)
%7 = bitcast <16 x i1> %6 to i16
%8 = bitcast i16 %1 to <16 x i1>
%9 = bitcast i16 %3 to <16 x i1>
%10 = or <16 x i1> %8, %9
%11 = bitcast <16 x i1> %10 to i16
%12 = bitcast i16 %5 to <16 x i1>
%13 = bitcast i16 %7 to <16 x i1>
%14 = or <16 x i1> %12, %13
%15 = bitcast <16 x i1> %14 to i16
%16 = bitcast i16 %11 to <16 x i1>
%17 = bitcast i16 %15 to <16 x i1>
%18 = or <16 x i1> %16, %17
%19 = bitcast <16 x i1> %18 to i16
ret i16 %19
}
; Function Attrs: nounwind readnone
declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) #1
attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }