Unlike normal loads these don't have an extension field, but we know from TargetLowering whether these are sign-extending or zero-extending, and so can optimise away unnecessary extensions. This was noticed on RISC-V, where sign extensions in the calling convention would result in unnecessary explicit extension instructions, but this also fixes some Mips inefficiencies. PowerPC sees churn in the tests as all the zero extensions are only for promoting 32-bit to 64-bit, but these zero extensions are still not optimised away as they should be, likely due to i32 being a legal type. This also simplifies the WebAssembly code somewhat, which currently works around the lack of target-independent combines with some ugly patterns that break once they're optimised away. Re-landed with correct handling in ComputeNumSignBits for Tmp == VTBits, where zero-extending atomics were incorrectly returning 0 rather than the (slightly confusing) required return value of 1. Re-landed again after D102819 fixed PowerPC to correctly zero-extend all of its atomics as it claimed to do, since the combination of that bug and this optimisation caused buildbot regressions. Reviewed By: RKSimon, atanasyan Differential Revision: https://reviews.llvm.org/D101342
115 lines
3.7 KiB
LLVM
115 lines
3.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 %s -o - | FileCheck %s --check-prefix=PWR7
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr9 %s -o - | FileCheck %s --check-prefix=PWR9
|
|
|
|
@value8 = dso_local global { i8 } zeroinitializer, align 1
|
|
@value16 = dso_local global { i16 } zeroinitializer, align 2
|
|
@global_int = dso_local local_unnamed_addr global i32 0, align 4
|
|
|
|
define dso_local zeroext i32 @testI8(i8 zeroext %val) local_unnamed_addr #0 {
|
|
; PWR7-LABEL: testI8:
|
|
; PWR7: # %bb.0: # %entry
|
|
; PWR7-NEXT: addis 4, 2, value8@toc@ha
|
|
; PWR7-NEXT: li 6, 255
|
|
; PWR7-NEXT: sync
|
|
; PWR7-NEXT: addi 5, 4, value8@toc@l
|
|
; PWR7-NEXT: rlwinm 4, 5, 3, 27, 28
|
|
; PWR7-NEXT: rldicr 5, 5, 0, 61
|
|
; PWR7-NEXT: xori 4, 4, 24
|
|
; PWR7-NEXT: slw 7, 3, 4
|
|
; PWR7-NEXT: slw 3, 6, 4
|
|
; PWR7-NEXT: and 6, 7, 3
|
|
; PWR7-NEXT: .LBB0_1: # %entry
|
|
; PWR7-NEXT: #
|
|
; PWR7-NEXT: lwarx 7, 0, 5
|
|
; PWR7-NEXT: andc 8, 7, 3
|
|
; PWR7-NEXT: or 8, 6, 8
|
|
; PWR7-NEXT: stwcx. 8, 0, 5
|
|
; PWR7-NEXT: bne 0, .LBB0_1
|
|
; PWR7-NEXT: # %bb.2: # %entry
|
|
; PWR7-NEXT: srw 3, 7, 4
|
|
; PWR7-NEXT: addis 5, 2, global_int@toc@ha
|
|
; PWR7-NEXT: lwsync
|
|
; PWR7-NEXT: clrlwi 4, 3, 24
|
|
; PWR7-NEXT: li 3, 55
|
|
; PWR7-NEXT: stw 4, global_int@toc@l(5)
|
|
; PWR7-NEXT: blr
|
|
;
|
|
; PWR9-LABEL: testI8:
|
|
; PWR9: # %bb.0: # %entry
|
|
; PWR9-NEXT: addis 4, 2, value8@toc@ha
|
|
; PWR9-NEXT: sync
|
|
; PWR9-NEXT: addi 5, 4, value8@toc@l
|
|
; PWR9-NEXT: .LBB0_1: # %entry
|
|
; PWR9-NEXT: #
|
|
; PWR9-NEXT: lbarx 4, 0, 5
|
|
; PWR9-NEXT: stbcx. 3, 0, 5
|
|
; PWR9-NEXT: bne 0, .LBB0_1
|
|
; PWR9-NEXT: # %bb.2: # %entry
|
|
; PWR9-NEXT: addis 3, 2, global_int@toc@ha
|
|
; PWR9-NEXT: lwsync
|
|
; PWR9-NEXT: stw 4, global_int@toc@l(3)
|
|
; PWR9-NEXT: li 3, 55
|
|
; PWR9-NEXT: blr
|
|
entry:
|
|
%0 = atomicrmw xchg i8* getelementptr inbounds ({ i8 }, { i8 }* @value8, i64 0, i32 0), i8 %val seq_cst, align 1
|
|
%conv = zext i8 %0 to i32
|
|
store i32 %conv, i32* @global_int, align 4
|
|
ret i32 55
|
|
}
|
|
|
|
define dso_local zeroext i32 @testI16(i16 zeroext %val) local_unnamed_addr #0 {
|
|
; PWR7-LABEL: testI16:
|
|
; PWR7: # %bb.0: # %entry
|
|
; PWR7-NEXT: addis 4, 2, value16@toc@ha
|
|
; PWR7-NEXT: li 6, 0
|
|
; PWR7-NEXT: sync
|
|
; PWR7-NEXT: addi 5, 4, value16@toc@l
|
|
; PWR7-NEXT: ori 6, 6, 65535
|
|
; PWR7-NEXT: rlwinm 4, 5, 3, 27, 27
|
|
; PWR7-NEXT: rldicr 5, 5, 0, 61
|
|
; PWR7-NEXT: xori 4, 4, 16
|
|
; PWR7-NEXT: slw 7, 3, 4
|
|
; PWR7-NEXT: slw 3, 6, 4
|
|
; PWR7-NEXT: and 6, 7, 3
|
|
; PWR7-NEXT: .LBB1_1: # %entry
|
|
; PWR7-NEXT: #
|
|
; PWR7-NEXT: lwarx 7, 0, 5
|
|
; PWR7-NEXT: andc 8, 7, 3
|
|
; PWR7-NEXT: or 8, 6, 8
|
|
; PWR7-NEXT: stwcx. 8, 0, 5
|
|
; PWR7-NEXT: bne 0, .LBB1_1
|
|
; PWR7-NEXT: # %bb.2: # %entry
|
|
; PWR7-NEXT: srw 3, 7, 4
|
|
; PWR7-NEXT: addis 5, 2, global_int@toc@ha
|
|
; PWR7-NEXT: lwsync
|
|
; PWR7-NEXT: clrlwi 4, 3, 16
|
|
; PWR7-NEXT: li 3, 55
|
|
; PWR7-NEXT: stw 4, global_int@toc@l(5)
|
|
; PWR7-NEXT: blr
|
|
;
|
|
; PWR9-LABEL: testI16:
|
|
; PWR9: # %bb.0: # %entry
|
|
; PWR9-NEXT: addis 4, 2, value16@toc@ha
|
|
; PWR9-NEXT: sync
|
|
; PWR9-NEXT: addi 5, 4, value16@toc@l
|
|
; PWR9-NEXT: .LBB1_1: # %entry
|
|
; PWR9-NEXT: #
|
|
; PWR9-NEXT: lharx 4, 0, 5
|
|
; PWR9-NEXT: sthcx. 3, 0, 5
|
|
; PWR9-NEXT: bne 0, .LBB1_1
|
|
; PWR9-NEXT: # %bb.2: # %entry
|
|
; PWR9-NEXT: addis 3, 2, global_int@toc@ha
|
|
; PWR9-NEXT: lwsync
|
|
; PWR9-NEXT: stw 4, global_int@toc@l(3)
|
|
; PWR9-NEXT: li 3, 55
|
|
; PWR9-NEXT: blr
|
|
entry:
|
|
%0 = atomicrmw xchg i16* getelementptr inbounds ({ i16 }, { i16 }* @value16, i64 0, i32 0), i16 %val seq_cst, align 2
|
|
%conv = zext i16 %0 to i32
|
|
store i32 %conv, i32* @global_int, align 4
|
|
ret i32 55
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|