Files
clang-p2996/llvm/test/CodeGen/PowerPC/ppc-partword-atomic.ll
Jessica Clarke e10958c807 [SelectionDAG][Mips][PowerPC][RISCV][WebAssembly] Teach computeKnownBits/ComputeNumSignBits about atomics
Unlike normal loads these don't have an extension field, but we know
from TargetLowering whether these are sign-extending or zero-extending,
and so can optimise away unnecessary extensions.

This was noticed on RISC-V, where sign extensions in the calling
convention would result in unnecessary explicit extension instructions,
but this also fixes some Mips inefficiencies. PowerPC sees churn in the
tests as all the zero extensions are only for promoting 32-bit to
64-bit, but these zero extensions are still not optimised away as they
should be, likely due to i32 being a legal type.

This also simplifies the WebAssembly code somewhat, which currently
works around the lack of target-independent combines with some ugly
patterns that break once they're optimised away.

Re-landed with correct handling in ComputeNumSignBits for Tmp == VTBits,
where zero-extending atomics were incorrectly returning 0 rather than
the (slightly confusing) required return value of 1.

Re-landed again after D102819 fixed PowerPC to correctly zero-extend all
of its atomics as it claimed to do, since the combination of that bug
and this optimisation caused buildbot regressions.

Reviewed By: RKSimon, atanasyan

Differential Revision: https://reviews.llvm.org/D101342
2021-05-20 20:34:23 +01:00

115 lines
3.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 %s -o - | FileCheck %s --check-prefix=PWR7
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr9 %s -o - | FileCheck %s --check-prefix=PWR9
@value8 = dso_local global { i8 } zeroinitializer, align 1
@value16 = dso_local global { i16 } zeroinitializer, align 2
@global_int = dso_local local_unnamed_addr global i32 0, align 4
define dso_local zeroext i32 @testI8(i8 zeroext %val) local_unnamed_addr #0 {
; PWR7-LABEL: testI8:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: addis 4, 2, value8@toc@ha
; PWR7-NEXT: li 6, 255
; PWR7-NEXT: sync
; PWR7-NEXT: addi 5, 4, value8@toc@l
; PWR7-NEXT: rlwinm 4, 5, 3, 27, 28
; PWR7-NEXT: rldicr 5, 5, 0, 61
; PWR7-NEXT: xori 4, 4, 24
; PWR7-NEXT: slw 7, 3, 4
; PWR7-NEXT: slw 3, 6, 4
; PWR7-NEXT: and 6, 7, 3
; PWR7-NEXT: .LBB0_1: # %entry
; PWR7-NEXT: #
; PWR7-NEXT: lwarx 7, 0, 5
; PWR7-NEXT: andc 8, 7, 3
; PWR7-NEXT: or 8, 6, 8
; PWR7-NEXT: stwcx. 8, 0, 5
; PWR7-NEXT: bne 0, .LBB0_1
; PWR7-NEXT: # %bb.2: # %entry
; PWR7-NEXT: srw 3, 7, 4
; PWR7-NEXT: addis 5, 2, global_int@toc@ha
; PWR7-NEXT: lwsync
; PWR7-NEXT: clrlwi 4, 3, 24
; PWR7-NEXT: li 3, 55
; PWR7-NEXT: stw 4, global_int@toc@l(5)
; PWR7-NEXT: blr
;
; PWR9-LABEL: testI8:
; PWR9: # %bb.0: # %entry
; PWR9-NEXT: addis 4, 2, value8@toc@ha
; PWR9-NEXT: sync
; PWR9-NEXT: addi 5, 4, value8@toc@l
; PWR9-NEXT: .LBB0_1: # %entry
; PWR9-NEXT: #
; PWR9-NEXT: lbarx 4, 0, 5
; PWR9-NEXT: stbcx. 3, 0, 5
; PWR9-NEXT: bne 0, .LBB0_1
; PWR9-NEXT: # %bb.2: # %entry
; PWR9-NEXT: addis 3, 2, global_int@toc@ha
; PWR9-NEXT: lwsync
; PWR9-NEXT: stw 4, global_int@toc@l(3)
; PWR9-NEXT: li 3, 55
; PWR9-NEXT: blr
entry:
%0 = atomicrmw xchg i8* getelementptr inbounds ({ i8 }, { i8 }* @value8, i64 0, i32 0), i8 %val seq_cst, align 1
%conv = zext i8 %0 to i32
store i32 %conv, i32* @global_int, align 4
ret i32 55
}
define dso_local zeroext i32 @testI16(i16 zeroext %val) local_unnamed_addr #0 {
; PWR7-LABEL: testI16:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: addis 4, 2, value16@toc@ha
; PWR7-NEXT: li 6, 0
; PWR7-NEXT: sync
; PWR7-NEXT: addi 5, 4, value16@toc@l
; PWR7-NEXT: ori 6, 6, 65535
; PWR7-NEXT: rlwinm 4, 5, 3, 27, 27
; PWR7-NEXT: rldicr 5, 5, 0, 61
; PWR7-NEXT: xori 4, 4, 16
; PWR7-NEXT: slw 7, 3, 4
; PWR7-NEXT: slw 3, 6, 4
; PWR7-NEXT: and 6, 7, 3
; PWR7-NEXT: .LBB1_1: # %entry
; PWR7-NEXT: #
; PWR7-NEXT: lwarx 7, 0, 5
; PWR7-NEXT: andc 8, 7, 3
; PWR7-NEXT: or 8, 6, 8
; PWR7-NEXT: stwcx. 8, 0, 5
; PWR7-NEXT: bne 0, .LBB1_1
; PWR7-NEXT: # %bb.2: # %entry
; PWR7-NEXT: srw 3, 7, 4
; PWR7-NEXT: addis 5, 2, global_int@toc@ha
; PWR7-NEXT: lwsync
; PWR7-NEXT: clrlwi 4, 3, 16
; PWR7-NEXT: li 3, 55
; PWR7-NEXT: stw 4, global_int@toc@l(5)
; PWR7-NEXT: blr
;
; PWR9-LABEL: testI16:
; PWR9: # %bb.0: # %entry
; PWR9-NEXT: addis 4, 2, value16@toc@ha
; PWR9-NEXT: sync
; PWR9-NEXT: addi 5, 4, value16@toc@l
; PWR9-NEXT: .LBB1_1: # %entry
; PWR9-NEXT: #
; PWR9-NEXT: lharx 4, 0, 5
; PWR9-NEXT: sthcx. 3, 0, 5
; PWR9-NEXT: bne 0, .LBB1_1
; PWR9-NEXT: # %bb.2: # %entry
; PWR9-NEXT: addis 3, 2, global_int@toc@ha
; PWR9-NEXT: lwsync
; PWR9-NEXT: stw 4, global_int@toc@l(3)
; PWR9-NEXT: li 3, 55
; PWR9-NEXT: blr
entry:
%0 = atomicrmw xchg i16* getelementptr inbounds ({ i16 }, { i16 }* @value16, i64 0, i32 0), i16 %val seq_cst, align 2
%conv = zext i16 %0 to i32
store i32 %conv, i32* @global_int, align 4
ret i32 55
}
attributes #0 = { nounwind }