[X86] Remove x86-experimental-unordered-atomic-isel option and associated code

This option enables an experimental lowering for unordered atomics I worked
on a few years back.  It never reached production quality, and hasn't been
worked on in years.  So let's rip it out.

This wasn't a crazy idea, but I hit some stumbling block which prevented me
from pushing it across the finish line.  From the look of 027aa27, that
change description is probably a good summary.  I don't remember the
details any longer.
This commit is contained in:
Philip Reames
2023-12-04 18:00:17 -08:00
committed by Philip Reames
parent 74c00d4329
commit 943f3e52a0
5 changed files with 201 additions and 567 deletions

View File

@@ -4715,25 +4715,6 @@ public:
return Chain;
}
/// Should SelectionDAG lower an atomic store of the given kind as a normal
/// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
/// eventually migrate all targets to the using StoreSDNodes, but porting is
/// being done target at a time.
virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
assert(SI.isAtomic() && "violated precondition");
return false;
}
/// Should SelectionDAG lower an atomic load of the given kind as a normal
/// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
/// eventually migrate all targets to the using LoadSDNodes, but porting is
/// being done target at a time.
virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
assert(LI.isAtomic() && "violated precondition");
return false;
}
/// This callback is invoked by the type legalizer to legalize nodes with an
/// illegal operand type but legal result types. It replaces the
/// LowerOperation callback in the type Legalizer. The reason we can not do

View File

@@ -4857,23 +4857,6 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue Ptr = getValue(I.getPointerOperand());
if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
// TODO: Once this is better exercised by tests, it should be merged with
// the normal path for loads to prevent future divergence.
SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
if (MemVT != VT)
L = DAG.getPtrExtOrTrunc(L, dl, VT);
setValue(&I, L);
SDValue OutChain = L.getValue(1);
if (!I.isUnordered())
DAG.setRoot(OutChain);
else
PendingLoads.push_back(OutChain);
return;
}
SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
Ptr, MMO);
@@ -4913,14 +4896,6 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
SDValue Ptr = getValue(I.getPointerOperand());
if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
// TODO: Once this is better exercised by tests, it should be merged with
// the normal path for stores to prevent future divergence.
SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
setValue(&I, S);
DAG.setRoot(S);
return;
}
SDValue OutChain =
DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, Val, Ptr, MMO);

View File

@@ -83,13 +83,6 @@ static cl::opt<bool> MulConstantOptimization(
"SHIFT, LEA, etc."),
cl::Hidden);
static cl::opt<bool> ExperimentalUnorderedISEL(
"x86-experimental-unordered-atomic-isel", cl::init(false),
cl::desc("Use LoadSDNode and StoreSDNode instead of "
"AtomicSDNode for unordered atomic loads and "
"stores respectively."),
cl::Hidden);
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -30598,18 +30591,6 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
return Loaded;
}
bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
if (!SI.isUnordered())
return false;
return ExperimentalUnorderedISEL;
}
bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
if (!LI.isUnordered())
return false;
return ExperimentalUnorderedISEL;
}
/// Emit a locked operation on a stack location which does not change any
/// memory location, but does involve a lock prefix. Location is chosen to be
/// a) very likely accessed only by a single thread to minimize cache traffic,

View File

@@ -1753,9 +1753,6 @@ namespace llvm {
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
bool needsCmpXchgNb(Type *MemType) const;
void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,

View File

@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-CUR %s
; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-CUR %s
; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-EX %s
; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s
; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O0 %s
; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O3 %s
define i8 @load_i8(ptr %ptr) {
; CHECK-O0-LABEL: load_i8:
@@ -408,33 +406,21 @@ define void @store_i256(ptr %ptr, i256 %v) {
; Legal if wider type is also atomic (TODO)
define void @vec_store(ptr %p0, <2 x i32> %vec) {
; CHECK-O0-CUR-LABEL: vec_store:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx
; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax
; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi)
; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi)
; CHECK-O0-CUR-NEXT: retq
; CHECK-O0-LABEL: vec_store:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: vmovd %xmm0, %ecx
; CHECK-O0-NEXT: vpextrd $1, %xmm0, %eax
; CHECK-O0-NEXT: movl %ecx, (%rdi)
; CHECK-O0-NEXT: movl %eax, 4(%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: vec_store:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax
; CHECK-O3-CUR-NEXT: vpextrd $1, %xmm0, %ecx
; CHECK-O3-CUR-NEXT: movl %eax, (%rdi)
; CHECK-O3-CUR-NEXT: movl %ecx, 4(%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O0-EX-LABEL: vec_store:
; CHECK-O0-EX: # %bb.0:
; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi)
; CHECK-O0-EX-NEXT: vpextrd $1, %xmm0, 4(%rdi)
; CHECK-O0-EX-NEXT: retq
;
; CHECK-O3-EX-LABEL: vec_store:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi)
; CHECK-O3-EX-NEXT: vextractps $1, %xmm0, 4(%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: vec_store:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: vmovd %xmm0, %eax
; CHECK-O3-NEXT: vpextrd $1, %xmm0, %ecx
; CHECK-O3-NEXT: movl %eax, (%rdi)
; CHECK-O3-NEXT: movl %ecx, 4(%rdi)
; CHECK-O3-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%v2 = extractelement <2 x i32> %vec, i32 1
%p1 = getelementptr i32, ptr %p0, i64 1
@@ -445,33 +431,21 @@ define void @vec_store(ptr %p0, <2 x i32> %vec) {
; Not legal to widen due to alignment restriction
define void @vec_store_unaligned(ptr %p0, <2 x i32> %vec) {
; CHECK-O0-CUR-LABEL: vec_store_unaligned:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx
; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax
; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi)
; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi)
; CHECK-O0-CUR-NEXT: retq
; CHECK-O0-LABEL: vec_store_unaligned:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: vmovd %xmm0, %ecx
; CHECK-O0-NEXT: vpextrd $1, %xmm0, %eax
; CHECK-O0-NEXT: movl %ecx, (%rdi)
; CHECK-O0-NEXT: movl %eax, 4(%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: vec_store_unaligned:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax
; CHECK-O3-CUR-NEXT: vpextrd $1, %xmm0, %ecx
; CHECK-O3-CUR-NEXT: movl %eax, (%rdi)
; CHECK-O3-CUR-NEXT: movl %ecx, 4(%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O0-EX-LABEL: vec_store_unaligned:
; CHECK-O0-EX: # %bb.0:
; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi)
; CHECK-O0-EX-NEXT: vpextrd $1, %xmm0, 4(%rdi)
; CHECK-O0-EX-NEXT: retq
;
; CHECK-O3-EX-LABEL: vec_store_unaligned:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi)
; CHECK-O3-EX-NEXT: vextractps $1, %xmm0, 4(%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: vec_store_unaligned:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: vmovd %xmm0, %eax
; CHECK-O3-NEXT: vpextrd $1, %xmm0, %ecx
; CHECK-O3-NEXT: movl %eax, (%rdi)
; CHECK-O3-NEXT: movl %ecx, 4(%rdi)
; CHECK-O3-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%v2 = extractelement <2 x i32> %vec, i32 1
%p1 = getelementptr i32, ptr %p0, i64 1
@@ -485,31 +459,12 @@ define void @vec_store_unaligned(ptr %p0, <2 x i32> %vec) {
; Legal if wider type is also atomic (TODO)
; Also, can avoid register move from xmm to eax (TODO)
define void @widen_broadcast2(ptr %p0, <2 x i32> %vec) {
; CHECK-O0-CUR-LABEL: widen_broadcast2:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax
; CHECK-O0-CUR-NEXT: movl %eax, (%rdi)
; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi)
; CHECK-O0-CUR-NEXT: retq
;
; CHECK-O3-CUR-LABEL: widen_broadcast2:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax
; CHECK-O3-CUR-NEXT: movl %eax, (%rdi)
; CHECK-O3-CUR-NEXT: movl %eax, 4(%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O0-EX-LABEL: widen_broadcast2:
; CHECK-O0-EX: # %bb.0:
; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi)
; CHECK-O0-EX-NEXT: vmovd %xmm0, 4(%rdi)
; CHECK-O0-EX-NEXT: retq
;
; CHECK-O3-EX-LABEL: widen_broadcast2:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi)
; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-LABEL: widen_broadcast2:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: movl %eax, (%rdi)
; CHECK-NEXT: movl %eax, 4(%rdi)
; CHECK-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%p1 = getelementptr i32, ptr %p0, i64 1
store atomic i32 %v1, ptr %p0 unordered, align 8
@@ -519,31 +474,12 @@ define void @widen_broadcast2(ptr %p0, <2 x i32> %vec) {
; Not legal to widen due to alignment restriction
define void @widen_broadcast2_unaligned(ptr %p0, <2 x i32> %vec) {
; CHECK-O0-CUR-LABEL: widen_broadcast2_unaligned:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax
; CHECK-O0-CUR-NEXT: movl %eax, (%rdi)
; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi)
; CHECK-O0-CUR-NEXT: retq
;
; CHECK-O3-CUR-LABEL: widen_broadcast2_unaligned:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax
; CHECK-O3-CUR-NEXT: movl %eax, (%rdi)
; CHECK-O3-CUR-NEXT: movl %eax, 4(%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O0-EX-LABEL: widen_broadcast2_unaligned:
; CHECK-O0-EX: # %bb.0:
; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi)
; CHECK-O0-EX-NEXT: vmovd %xmm0, 4(%rdi)
; CHECK-O0-EX-NEXT: retq
;
; CHECK-O3-EX-LABEL: widen_broadcast2_unaligned:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi)
; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-LABEL: widen_broadcast2_unaligned:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: movl %eax, (%rdi)
; CHECK-NEXT: movl %eax, 4(%rdi)
; CHECK-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%p1 = getelementptr i32, ptr %p0, i64 1
store atomic i32 %v1, ptr %p0 unordered, align 4
@@ -610,17 +546,11 @@ define i64 @load_fold_add3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: addq (%rsi), %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_fold_add3:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
; CHECK-O3-CUR-NEXT: addq (%rdi), %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_fold_add3:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: addq (%rsi), %rax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_fold_add3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: addq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = add i64 %v, %v2
@@ -704,17 +634,11 @@ define i64 @load_fold_mul3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: imulq (%rsi), %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_fold_mul3:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
; CHECK-O3-CUR-NEXT: imulq (%rdi), %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_fold_mul3:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: imulq (%rsi), %rax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_fold_mul3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: imulq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = mul i64 %v, %v2
@@ -823,20 +747,13 @@ define i64 @load_fold_udiv1(ptr %p) {
; CHECK-O0-NEXT: divq %rcx
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_fold_udiv1:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx
; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax
; CHECK-O3-CUR-NEXT: shrq $3, %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_fold_udiv1:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax
; CHECK-O3-EX-NEXT: shrq $3, %rax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_fold_udiv1:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rdx
; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
; CHECK-O3-NEXT: mulxq %rax, %rax, %rax
; CHECK-O3-NEXT: shrq $3, %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p unordered, align 8
%ret = udiv i64 %v, 15
ret i64 %ret
@@ -1288,17 +1205,11 @@ define i64 @load_fold_and3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: andq (%rsi), %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_fold_and3:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
; CHECK-O3-CUR-NEXT: andq (%rdi), %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_fold_and3:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: andq (%rsi), %rax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_fold_and3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: andq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = and i64 %v, %v2
@@ -1335,17 +1246,11 @@ define i64 @load_fold_or3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: orq (%rsi), %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_fold_or3:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
; CHECK-O3-CUR-NEXT: orq (%rdi), %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_fold_or3:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: orq (%rsi), %rax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_fold_or3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: orq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = or i64 %v, %v2
@@ -1382,17 +1287,11 @@ define i64 @load_fold_xor3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: xorq (%rsi), %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_fold_xor3:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
; CHECK-O3-CUR-NEXT: xorq (%rdi), %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_fold_xor3:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: xorq (%rsi), %rax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_fold_xor3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: xorq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = xor i64 %v, %v2
@@ -1444,19 +1343,12 @@ define i1 @load_fold_icmp3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: sete %al
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_fold_icmp3:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
; CHECK-O3-CUR-NEXT: cmpq %rax, (%rdi)
; CHECK-O3-CUR-NEXT: sete %al
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_fold_icmp3:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: cmpq (%rsi), %rax
; CHECK-O3-EX-NEXT: sete %al
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_fold_icmp3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: cmpq %rax, (%rdi)
; CHECK-O3-NEXT: sete %al
; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = icmp eq i64 %v, %v2
@@ -1653,31 +1545,14 @@ define void @rmw_fold_sdiv2(ptr %p, i64 %v) {
; Legal, as expected
define void @rmw_fold_udiv1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_udiv1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rdx
; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
; CHECK-O0-NEXT: mulxq %rax, %rax, %rax
; CHECK-O0-NEXT: shrq $3, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: rmw_fold_udiv1:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx
; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax
; CHECK-O3-CUR-NEXT: shrq $3, %rax
; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: rmw_fold_udiv1:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax
; CHECK-O3-EX-NEXT: shrq $3, %rax
; CHECK-O3-EX-NEXT: movq %rax, (%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-LABEL: rmw_fold_udiv1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rdx
; CHECK-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
; CHECK-NEXT: mulxq %rax, %rax, %rax
; CHECK-NEXT: shrq $3, %rax
; CHECK-NEXT: movq %rax, (%rdi)
; CHECK-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = udiv i64 %prev, 15
store atomic i64 %val, ptr %p unordered, align 8
@@ -1868,24 +1743,12 @@ define void @rmw_fold_urem2(ptr %p, i64 %v) {
; Legal to fold (TODO)
define void @rmw_fold_shl1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_shl1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: shlq $15, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: rmw_fold_shl1:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
; CHECK-O3-CUR-NEXT: shlq $15, %rax
; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: rmw_fold_shl1:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: shlq $15, (%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-LABEL: rmw_fold_shl1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: shlq $15, %rax
; CHECK-NEXT: movq %rax, (%rdi)
; CHECK-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = shl i64 %prev, 15
store atomic i64 %val, ptr %p unordered, align 8
@@ -1904,18 +1767,11 @@ define void @rmw_fold_shl2(ptr %p, i64 %v) {
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: rmw_fold_shl2:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: shlxq %rsi, (%rdi), %rax
; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: rmw_fold_shl2:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq %rsi, %rcx
; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-O3-EX-NEXT: shlq %cl, (%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: rmw_fold_shl2:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = shl i64 %prev, %v
store atomic i64 %val, ptr %p unordered, align 8
@@ -1924,24 +1780,12 @@ define void @rmw_fold_shl2(ptr %p, i64 %v) {
; Legal to fold (TODO)
define void @rmw_fold_lshr1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_lshr1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: shrq $15, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: rmw_fold_lshr1:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
; CHECK-O3-CUR-NEXT: shrq $15, %rax
; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: rmw_fold_lshr1:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: shrq $15, (%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-LABEL: rmw_fold_lshr1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: shrq $15, %rax
; CHECK-NEXT: movq %rax, (%rdi)
; CHECK-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = lshr i64 %prev, 15
store atomic i64 %val, ptr %p unordered, align 8
@@ -1960,18 +1804,11 @@ define void @rmw_fold_lshr2(ptr %p, i64 %v) {
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: rmw_fold_lshr2:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: shrxq %rsi, (%rdi), %rax
; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: rmw_fold_lshr2:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq %rsi, %rcx
; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-O3-EX-NEXT: shrq %cl, (%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: rmw_fold_lshr2:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = lshr i64 %prev, %v
store atomic i64 %val, ptr %p unordered, align 8
@@ -1980,24 +1817,12 @@ define void @rmw_fold_lshr2(ptr %p, i64 %v) {
; Legal to fold (TODO)
define void @rmw_fold_ashr1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_ashr1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: sarq $15, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: rmw_fold_ashr1:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
; CHECK-O3-CUR-NEXT: sarq $15, %rax
; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: rmw_fold_ashr1:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: sarq $15, (%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-LABEL: rmw_fold_ashr1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: sarq $15, %rax
; CHECK-NEXT: movq %rax, (%rdi)
; CHECK-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = ashr i64 %prev, 15
store atomic i64 %val, ptr %p unordered, align 8
@@ -2016,18 +1841,11 @@ define void @rmw_fold_ashr2(ptr %p, i64 %v) {
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: rmw_fold_ashr2:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: sarxq %rsi, (%rdi), %rax
; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: rmw_fold_ashr2:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq %rsi, %rcx
; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-O3-EX-NEXT: sarq %cl, (%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: rmw_fold_ashr2:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = ashr i64 %prev, %v
store atomic i64 %val, ptr %p unordered, align 8
@@ -2391,26 +2209,12 @@ define i64 @fold_constant(i64 %arg) {
}
define i64 @fold_constant_clobber(ptr %p, i64 %arg) {
; CHECK-O0-LABEL: fold_constant_clobber:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq Constant(%rip), %rax
; CHECK-O0-NEXT: movq $5, (%rdi)
; CHECK-O0-NEXT: addq %rsi, %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: fold_constant_clobber:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq Constant(%rip), %rax
; CHECK-O3-CUR-NEXT: movq $5, (%rdi)
; CHECK-O3-CUR-NEXT: addq %rsi, %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: fold_constant_clobber:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq %rsi, %rax
; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax
; CHECK-O3-EX-NEXT: movq $5, (%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-LABEL: fold_constant_clobber:
; CHECK: # %bb.0:
; CHECK-NEXT: movq Constant(%rip), %rax
; CHECK-NEXT: movq $5, (%rdi)
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: retq
%v = load atomic i64, ptr @Constant unordered, align 8
store i64 5, ptr %p
%ret = add i64 %v, %arg
@@ -2418,26 +2222,12 @@ define i64 @fold_constant_clobber(ptr %p, i64 %arg) {
}
define i64 @fold_constant_fence(i64 %arg) {
; CHECK-O0-LABEL: fold_constant_fence:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq Constant(%rip), %rax
; CHECK-O0-NEXT: mfence
; CHECK-O0-NEXT: addq %rdi, %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: fold_constant_fence:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq Constant(%rip), %rax
; CHECK-O3-CUR-NEXT: mfence
; CHECK-O3-CUR-NEXT: addq %rdi, %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: fold_constant_fence:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq %rdi, %rax
; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax
; CHECK-O3-EX-NEXT: mfence
; CHECK-O3-EX-NEXT: retq
; CHECK-LABEL: fold_constant_fence:
; CHECK: # %bb.0:
; CHECK-NEXT: movq Constant(%rip), %rax
; CHECK-NEXT: mfence
; CHECK-NEXT: addq %rdi, %rax
; CHECK-NEXT: retq
%v = load atomic i64, ptr @Constant unordered, align 8
fence seq_cst
%ret = add i64 %v, %arg
@@ -2445,26 +2235,12 @@ define i64 @fold_constant_fence(i64 %arg) {
}
define i64 @fold_invariant_clobber(ptr dereferenceable(8) %p, i64 %arg) {
; CHECK-O0-LABEL: fold_invariant_clobber:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: movq $5, (%rdi)
; CHECK-O0-NEXT: addq %rsi, %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: fold_invariant_clobber:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
; CHECK-O3-CUR-NEXT: movq $5, (%rdi)
; CHECK-O3-CUR-NEXT: addq %rsi, %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: fold_invariant_clobber:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq %rsi, %rax
; CHECK-O3-EX-NEXT: addq (%rdi), %rax
; CHECK-O3-EX-NEXT: movq $5, (%rdi)
; CHECK-O3-EX-NEXT: retq
; CHECK-LABEL: fold_invariant_clobber:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq $5, (%rdi)
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: retq
%v = load atomic i64, ptr %p unordered, align 8, !invariant.load !{}
store i64 5, ptr %p
%ret = add i64 %v, %arg
@@ -2473,26 +2249,12 @@ define i64 @fold_invariant_clobber(ptr dereferenceable(8) %p, i64 %arg) {
define i64 @fold_invariant_fence(ptr dereferenceable(8) %p, i64 %arg) {
; CHECK-O0-LABEL: fold_invariant_fence:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: mfence
; CHECK-O0-NEXT: addq %rsi, %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: fold_invariant_fence:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
; CHECK-O3-CUR-NEXT: mfence
; CHECK-O3-CUR-NEXT: addq %rsi, %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: fold_invariant_fence:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: movq %rsi, %rax
; CHECK-O3-EX-NEXT: addq (%rdi), %rax
; CHECK-O3-EX-NEXT: mfence
; CHECK-O3-EX-NEXT: retq
; CHECK-LABEL: fold_invariant_fence:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: mfence
; CHECK-NEXT: addq %rsi, %rax
; CHECK-NEXT: retq
%v = load atomic i64, ptr %p unordered, align 8, !invariant.load !{}
fence seq_cst
%ret = add i64 %v, %arg
@@ -2503,32 +2265,18 @@ define i64 @fold_invariant_fence(ptr dereferenceable(8) %p, i64 %arg) {
; Exercise a few cases involving any extend idioms
define i16 @load_i8_anyext_i16(ptr %ptr) {
; CHECK-O0-CUR-LABEL: load_i8_anyext_i16:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: movb (%rdi), %al
; CHECK-O0-CUR-NEXT: movzbl %al, %eax
; CHECK-O0-CUR-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-O0-CUR-NEXT: retq
; CHECK-O0-LABEL: load_i8_anyext_i16:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movb (%rdi), %al
; CHECK-O0-NEXT: movzbl %al, %eax
; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_i8_anyext_i16:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movzbl (%rdi), %eax
; CHECK-O3-CUR-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O0-EX-LABEL: load_i8_anyext_i16:
; CHECK-O0-EX: # %bb.0:
; CHECK-O0-EX-NEXT: vpbroadcastb (%rdi), %xmm0
; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax
; CHECK-O0-EX-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-O0-EX-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_i8_anyext_i16:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0
; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax
; CHECK-O3-EX-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_i8_anyext_i16:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzbl (%rdi), %eax
; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-O3-NEXT: retq
%v = load atomic i8, ptr %ptr unordered, align 2
%vec = insertelement <2 x i8> undef, i8 %v, i32 0
%res = bitcast <2 x i8> %vec to i16
@@ -2536,28 +2284,16 @@ define i16 @load_i8_anyext_i16(ptr %ptr) {
}
define i32 @load_i8_anyext_i32(ptr %ptr) {
; CHECK-O0-CUR-LABEL: load_i8_anyext_i32:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: movb (%rdi), %al
; CHECK-O0-CUR-NEXT: movzbl %al, %eax
; CHECK-O0-CUR-NEXT: retq
; CHECK-O0-LABEL: load_i8_anyext_i32:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movb (%rdi), %al
; CHECK-O0-NEXT: movzbl %al, %eax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_i8_anyext_i32:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movzbl (%rdi), %eax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O0-EX-LABEL: load_i8_anyext_i32:
; CHECK-O0-EX: # %bb.0:
; CHECK-O0-EX-NEXT: vpbroadcastb (%rdi), %xmm0
; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax
; CHECK-O0-EX-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_i8_anyext_i32:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0
; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_i8_anyext_i32:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzbl (%rdi), %eax
; CHECK-O3-NEXT: retq
%v = load atomic i8, ptr %ptr unordered, align 4
%vec = insertelement <4 x i8> undef, i8 %v, i32 0
%res = bitcast <4 x i8> %vec to i32
@@ -2565,29 +2301,17 @@ define i32 @load_i8_anyext_i32(ptr %ptr) {
}
define i32 @load_i16_anyext_i32(ptr %ptr) {
; CHECK-O0-CUR-LABEL: load_i16_anyext_i32:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: movw (%rdi), %cx
; CHECK-O0-CUR-NEXT: # implicit-def: $eax
; CHECK-O0-CUR-NEXT: movw %cx, %ax
; CHECK-O0-CUR-NEXT: retq
; CHECK-O0-LABEL: load_i16_anyext_i32:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movw (%rdi), %cx
; CHECK-O0-NEXT: # implicit-def: $eax
; CHECK-O0-NEXT: movw %cx, %ax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_i16_anyext_i32:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movzwl (%rdi), %eax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O0-EX-LABEL: load_i16_anyext_i32:
; CHECK-O0-EX: # %bb.0:
; CHECK-O0-EX-NEXT: vpbroadcastw (%rdi), %xmm0
; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax
; CHECK-O0-EX-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_i16_anyext_i32:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0
; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_i16_anyext_i32:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
; CHECK-O3-NEXT: retq
%v = load atomic i16, ptr %ptr unordered, align 4
%vec = insertelement <2 x i16> undef, i16 %v, i64 0
%res = bitcast <2 x i16> %vec to i32
@@ -2595,33 +2319,21 @@ define i32 @load_i16_anyext_i32(ptr %ptr) {
}
define i64 @load_i16_anyext_i64(ptr %ptr) {
; CHECK-O0-CUR-LABEL: load_i16_anyext_i64:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: movw (%rdi), %cx
; CHECK-O0-CUR-NEXT: # implicit-def: $eax
; CHECK-O0-CUR-NEXT: movw %cx, %ax
; CHECK-O0-CUR-NEXT: vmovd %eax, %xmm0
; CHECK-O0-CUR-NEXT: vmovq %xmm0, %rax
; CHECK-O0-CUR-NEXT: retq
; CHECK-O0-LABEL: load_i16_anyext_i64:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movw (%rdi), %cx
; CHECK-O0-NEXT: # implicit-def: $eax
; CHECK-O0-NEXT: movw %cx, %ax
; CHECK-O0-NEXT: vmovd %eax, %xmm0
; CHECK-O0-NEXT: vmovq %xmm0, %rax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: load_i16_anyext_i64:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movzwl (%rdi), %eax
; CHECK-O3-CUR-NEXT: vmovd %eax, %xmm0
; CHECK-O3-CUR-NEXT: vmovq %xmm0, %rax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O0-EX-LABEL: load_i16_anyext_i64:
; CHECK-O0-EX: # %bb.0:
; CHECK-O0-EX-NEXT: vpbroadcastw (%rdi), %xmm0
; CHECK-O0-EX-NEXT: vmovq %xmm0, %rax
; CHECK-O0-EX-NEXT: retq
;
; CHECK-O3-EX-LABEL: load_i16_anyext_i64:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0
; CHECK-O3-EX-NEXT: vmovq %xmm0, %rax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: load_i16_anyext_i64:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
; CHECK-O3-NEXT: vmovd %eax, %xmm0
; CHECK-O3-NEXT: vmovq %xmm0, %rax
; CHECK-O3-NEXT: retq
%v = load atomic i16, ptr %ptr unordered, align 8
%vec = insertelement <4 x i16> undef, i16 %v, i64 0
%res = bitcast <4 x i16> %vec to i64
@@ -2675,30 +2387,18 @@ define i1 @fold_cmp_over_fence(ptr %p, i32 %v1) {
; CHECK-O0-NEXT: # kill: def $al killed $al killed $eax
; CHECK-O0-NEXT: retq
;
; CHECK-O3-CUR-LABEL: fold_cmp_over_fence:
; CHECK-O3-CUR: # %bb.0:
; CHECK-O3-CUR-NEXT: movl (%rdi), %eax
; CHECK-O3-CUR-NEXT: mfence
; CHECK-O3-CUR-NEXT: cmpl %eax, %esi
; CHECK-O3-CUR-NEXT: jne .LBB116_2
; CHECK-O3-CUR-NEXT: # %bb.1: # %taken
; CHECK-O3-CUR-NEXT: movb $1, %al
; CHECK-O3-CUR-NEXT: retq
; CHECK-O3-CUR-NEXT: .LBB116_2: # %untaken
; CHECK-O3-CUR-NEXT: xorl %eax, %eax
; CHECK-O3-CUR-NEXT: retq
;
; CHECK-O3-EX-LABEL: fold_cmp_over_fence:
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: cmpl (%rdi), %esi
; CHECK-O3-EX-NEXT: mfence
; CHECK-O3-EX-NEXT: jne .LBB116_2
; CHECK-O3-EX-NEXT: # %bb.1: # %taken
; CHECK-O3-EX-NEXT: movb $1, %al
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-EX-NEXT: .LBB116_2: # %untaken
; CHECK-O3-EX-NEXT: xorl %eax, %eax
; CHECK-O3-EX-NEXT: retq
; CHECK-O3-LABEL: fold_cmp_over_fence:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movl (%rdi), %eax
; CHECK-O3-NEXT: mfence
; CHECK-O3-NEXT: cmpl %eax, %esi
; CHECK-O3-NEXT: jne .LBB116_2
; CHECK-O3-NEXT: # %bb.1: # %taken
; CHECK-O3-NEXT: movb $1, %al
; CHECK-O3-NEXT: retq
; CHECK-O3-NEXT: .LBB116_2: # %untaken
; CHECK-O3-NEXT: xorl %eax, %eax
; CHECK-O3-NEXT: retq
%v2 = load atomic i32, ptr %p unordered, align 4
fence seq_cst
%cmp = icmp eq i32 %v1, %v2