127 lines
4.3 KiB
LLVM
127 lines
4.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+avx512f -verify-machineinstrs | FileCheck %s
|
|
|
|
define void @basic(i32 %a, ptr %b, ptr %p, ptr %q) {
|
|
; CHECK-LABEL: basic:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testl %edi, %edi
|
|
; CHECK-NEXT: cfcmovel (%rsi), %eax
|
|
; CHECK-NEXT: cfcmovel %eax, (%rdx)
|
|
; CHECK-NEXT: movl $1, %eax
|
|
; CHECK-NEXT: cfcmovneq %rax, (%rdx)
|
|
; CHECK-NEXT: movw $2, %ax
|
|
; CHECK-NEXT: cfcmovnew %ax, (%rcx)
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%cond = icmp eq i32 %a, 0
|
|
%0 = bitcast i1 %cond to <1 x i1>
|
|
%1 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i32> poison)
|
|
call void @llvm.masked.store.v1i32.p0(<1 x i32> %1, ptr %p, i32 4, <1 x i1> %0)
|
|
%2 = xor i1 %cond, true
|
|
%3 = bitcast i1 %2 to <1 x i1>
|
|
call void @llvm.masked.store.v1i64.p0(<1 x i64> <i64 1>, ptr %p, i32 8, <1 x i1> %3)
|
|
call void @llvm.masked.store.v1i16.p0(<1 x i16> <i16 2>, ptr %q, i32 8, <1 x i1> %3)
|
|
ret void
|
|
}
|
|
|
|
define i16 @cload_passthru_zero(i16 %a, ptr %b) {
|
|
; CHECK-LABEL: cload_passthru_zero:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testw %di, %di
|
|
; CHECK-NEXT: cfcmovew (%rsi), %ax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%cond = icmp eq i16 %a, 0
|
|
%0 = bitcast i1 %cond to <1 x i1>
|
|
%1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i16> <i16 0>)
|
|
%2 = bitcast <1 x i16> %1 to i16
|
|
ret i16 %2
|
|
}
|
|
|
|
define i64 @cload_passthru_not_zero(i64 %a, ptr %b) {
|
|
; CHECK-LABEL: cload_passthru_not_zero:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: testq %rdi, %rdi
|
|
; CHECK-NEXT: cfcmoveq (%rsi), %rdi, %rax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%cond = icmp eq i64 %a, 0
|
|
%0 = bitcast i1 %cond to <1 x i1>
|
|
%va = bitcast i64 %a to <1 x i64>
|
|
%1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i64> %va)
|
|
%2 = bitcast <1 x i64> %1 to i64
|
|
ret i64 %2
|
|
}
|
|
|
|
;; CFCMOV can use the flags produced by SUB directly.
|
|
define i64 @reduced_data_dependency(i64 %a, i64 %b, ptr %c) {
|
|
; CHECK-LABEL: reduced_data_dependency:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movq %rdi, %rcx
|
|
; CHECK-NEXT: subq %rsi, %rcx
|
|
; CHECK-NEXT: cfcmovnsq (%rdx), %rdi, %rax
|
|
; CHECK-NEXT: addq %rcx, %rax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%sub = sub i64 %a, %b
|
|
%cond = icmp sge i64 %sub, 0
|
|
%0 = bitcast i1 %cond to <1 x i1>
|
|
%va = bitcast i64 %a to <1 x i64>
|
|
%1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %c, i32 4, <1 x i1> %0, <1 x i64> %va)
|
|
%2 = bitcast <1 x i64> %1 to i64
|
|
%3 = add i64 %2, %sub
|
|
ret i64 %3
|
|
}
|
|
|
|
;; No need to optimize the generated assembly for cond_false/cond_true b/c it
|
|
;; should never be emitted by middle end. Add IR here just to check it's
|
|
;; legal to feed constant mask to backend.
|
|
define i16 @cond_false(ptr %b) {
|
|
; CHECK-LABEL: cond_false:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: negb %al
|
|
; CHECK-NEXT: cfcmovnew (%rdi), %ax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = bitcast i1 false to <1 x i1>
|
|
%1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i16> <i16 0>)
|
|
%2 = bitcast <1 x i16> %1 to i16
|
|
ret i16 %2
|
|
}
|
|
|
|
define i64 @cond_true(ptr %b) {
|
|
; CHECK-LABEL: cond_true:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: movb $1, %al
|
|
; CHECK-NEXT: negb %al
|
|
; CHECK-NEXT: cfcmovneq (%rdi), %rax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = bitcast i1 true to <1 x i1>
|
|
%1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i64> <i64 0>)
|
|
%2 = bitcast <1 x i64> %1 to i64
|
|
ret i64 %2
|
|
}
|
|
|
|
define void @no_crash(ptr %p, <4 x i1> %cond1, <4 x i1> %cond2) {
|
|
; CHECK-LABEL: no_crash:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
|
|
; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
; CHECK-NEXT: kshiftlw $12, %k0, %k0
|
|
; CHECK-NEXT: kshiftrw $12, %k0, %k1
|
|
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
|
|
; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
; CHECK-NEXT: kshiftlw $12, %k0, %k0
|
|
; CHECK-NEXT: kshiftrw $12, %k0, %k2
|
|
; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k2} {z}
|
|
; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1}
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr %p, i32 8, <4 x i1> %cond1, <4 x i64> poison)
|
|
call void @llvm.masked.store.v4i64.p0(<4 x i64> %0, ptr %p, i32 8, <4 x i1> %cond2)
|
|
ret void
|
|
}
|