The load narrowing part of TargetLowering::SimplifySetCC is updated according to this: 1) The offset calculation (for big endian) did not work properly for non byte-sized types. This is basically solved by an early exit if the memory type isn't byte-sized. But the code is also corrected to use the store size when calculating the offset. 2) To still allow some optimizations for non-byte-sized types the TargetLowering::isPaddedAtMostSignificantBitsWhenStored hook is added. By default it assumes that scalar integer types are padded starting at the most significant bits, if the type needs padding when being stored to memory. 3) Allow optimizing when isPaddedAtMostSignificantBitsWhenStored is true, as that hook makes it possible for TargetLowering to know how the non byte-sized value is aligned in memory. 4) Update the algorithm to always search for a narrowed load with a power-of-2 byte-sized type. In the past the algorithm started with the the width of the original load, and then divided it by two for each iteration. But for a type such as i48 that would just end up trying to narrow the load into a i24 or i12 load, and then we would fail sooner or later due to not finding a newVT that fulfilled newVT.isRound(). With this new approach we can narrow the i48 load into either an i8, i16 or i32 load. By checking if such a load is allowed (e.g. alignment wise) for any "multiple of 8 offset", then we can find more opportunities for the optimization to trigger. So even for a byte-sized type such as i32 we may now end up narrowing the load into loading the 16 bits starting at offset 8 (if that is allowed by the target). The old algorithm did not even consider that case. 5) Also start using getObjectPtrOffset instead of getMemBasePlusOffset when creating the new ptr. This way we get "nsw" on the add.
383 lines
9.9 KiB
LLVM
383 lines
9.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -O1 -mtriple ppc32le -o - %s | FileCheck --check-prefix CHECK-LE %s
|
|
; RUN: llc -O1 -mtriple ppc32 -o - %s | FileCheck --check-prefix CHECK-BE %s
|
|
|
|
; A collection of regression tests to verify the load-narrowing part of
|
|
; TargetLowering::SimplifySetCC (and/or other similar rewrites such as
|
|
; combining AND+LOAD into ZEXTLOAD).
|
|
|
|
|
|
;--------------------------------------------------------------------------
|
|
; Test non byte-sized types.
|
|
;
|
|
; As long as LLVM IR isn't defining where the padding goes we can't really
|
|
; optimize these (without adding a target lowering hook that can inform
|
|
; ISel about which bits are padding).
|
|
; --------------------------------------------------------------------------
|
|
|
|
define i1 @test_129_15_0(ptr %y) {
|
|
; CHECK-LE-LABEL: test_129_15_0:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lhz 3, 0(3)
|
|
; CHECK-LE-NEXT: clrlwi 3, 3, 17
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_129_15_0:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lhz 3, 15(3)
|
|
; CHECK-BE-NEXT: clrlwi 3, 3, 17
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i129, ptr %y
|
|
%b = and i129 %a, u0x7fff
|
|
%cmp = icmp ne i129 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_126_20_4(ptr %y) {
|
|
; CHECK-LE-LABEL: test_126_20_4:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lwz 3, 0(3)
|
|
; CHECK-LE-NEXT: rlwinm 3, 3, 0, 8, 27
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_126_20_4:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lwz 3, 12(3)
|
|
; CHECK-BE-NEXT: rlwinm 3, 3, 0, 8, 27
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i126, ptr %y
|
|
%b = and i126 %a, u0xfffff0
|
|
%cmp = icmp ne i126 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_33_8_0(ptr %y) {
|
|
; CHECK-LE-LABEL: test_33_8_0:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lbz 3, 0(3)
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_33_8_0:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lbz 3, 4(3)
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i33, ptr %y
|
|
%b = and i33 %a, u0xff
|
|
%cmp = icmp ne i33 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_33_1_32(ptr %y) {
|
|
; CHECK-LE-LABEL: test_33_1_32:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lbz 3, 4(3)
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_33_1_32:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lwz 3, 0(3)
|
|
; CHECK-BE-NEXT: srwi 3, 3, 24
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i33, ptr %y
|
|
%b = and i33 %a, u0x100000000
|
|
%cmp = icmp ne i33 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_33_1_31(ptr %y) {
|
|
; CHECK-LE-LABEL: test_33_1_31:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lbz 3, 3(3)
|
|
; CHECK-LE-NEXT: srwi 3, 3, 7
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_33_1_31:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lbz 3, 1(3)
|
|
; CHECK-BE-NEXT: srwi 3, 3, 7
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i33, ptr %y
|
|
%b = and i33 %a, u0x80000000
|
|
%cmp = icmp ne i33 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_33_1_0(ptr %y) {
|
|
; CHECK-LE-LABEL: test_33_1_0:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lbz 3, 0(3)
|
|
; CHECK-LE-NEXT: clrlwi 3, 3, 31
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_33_1_0:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lbz 3, 4(3)
|
|
; CHECK-BE-NEXT: clrlwi 3, 3, 31
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i33, ptr %y
|
|
%b = and i33 %a, u0x1
|
|
%cmp = icmp ne i33 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
;--------------------------------------------------------------------------
|
|
; Test byte-sized types.
|
|
;--------------------------------------------------------------------------
|
|
|
|
|
|
define i1 @test_128_20_4(ptr %y) {
|
|
; CHECK-LE-LABEL: test_128_20_4:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lwz 3, 0(3)
|
|
; CHECK-LE-NEXT: rlwinm 3, 3, 0, 8, 27
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_128_20_4:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lwz 3, 12(3)
|
|
; CHECK-BE-NEXT: rlwinm 3, 3, 0, 8, 27
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i128, ptr %y
|
|
%b = and i128 %a, u0xfffff0
|
|
%cmp = icmp ne i128 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_48_16_0(ptr %y) {
|
|
; CHECK-LE-LABEL: test_48_16_0:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lhz 3, 0(3)
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_48_16_0:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lhz 3, 4(3)
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i48, ptr %y
|
|
%b = and i48 %a, u0xffff
|
|
%cmp = icmp ne i48 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_48_16_8(ptr %y) {
|
|
; CHECK-LE-LABEL: test_48_16_8:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lhz 3, 1(3)
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_48_16_8:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lhz 3, 3(3)
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i48, ptr %y
|
|
%b = and i48 %a, u0xffff00
|
|
%cmp = icmp ne i48 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_48_16_16(ptr %y) {
|
|
; CHECK-LE-LABEL: test_48_16_16:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lhz 3, 2(3)
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_48_16_16:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lhz 3, 2(3)
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i48, ptr %y
|
|
%b = and i48 %a, u0xffff0000
|
|
%cmp = icmp ne i48 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_48_16_32(ptr %y) {
|
|
; CHECK-LE-LABEL: test_48_16_32:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lhz 3, 4(3)
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_48_16_32:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lhz 3, 0(3)
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i48, ptr %y
|
|
%b = and i48 %a, u0xffff00000000
|
|
%cmp = icmp ne i48 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_48_17_0(ptr %y) {
|
|
; CHECK-LE-LABEL: test_48_17_0:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lwz 3, 0(3)
|
|
; CHECK-LE-NEXT: clrlwi 3, 3, 15
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_48_17_0:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lwz 3, 2(3)
|
|
; CHECK-BE-NEXT: clrlwi 3, 3, 15
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i48, ptr %y
|
|
%b = and i48 %a, u0x1ffff
|
|
%cmp = icmp ne i48 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_40_16_0(ptr %y) {
|
|
; CHECK-LE-LABEL: test_40_16_0:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lhz 3, 0(3)
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_40_16_0:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lhz 3, 3(3)
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i40, ptr %y
|
|
%b = and i40 %a, u0xffff
|
|
%cmp = icmp ne i40 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_40_1_32(ptr %y) {
|
|
; CHECK-LE-LABEL: test_40_1_32:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lbz 3, 4(3)
|
|
; CHECK-LE-NEXT: clrlwi 3, 3, 31
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_40_1_32:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lbz 3, 0(3)
|
|
; CHECK-BE-NEXT: clrlwi 3, 3, 31
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i40, ptr %y
|
|
%b = and i40 %a, u0x100000000
|
|
%cmp = icmp ne i40 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_24_16_0(ptr %y) {
|
|
; CHECK-LE-LABEL: test_24_16_0:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lhz 3, 0(3)
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_24_16_0:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lhz 3, 1(3)
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i24, ptr %y
|
|
%b = and i24 %a, u0xffff
|
|
%cmp = icmp ne i24 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_24_8_8(ptr %y) {
|
|
; CHECK-LE-LABEL: test_24_8_8:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lbz 3, 1(3)
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_24_8_8:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lbz 3, 1(3)
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i24, ptr %y
|
|
%b = and i24 %a, u0xff00
|
|
%cmp = icmp ne i24 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_24_8_12(ptr %y) {
|
|
; CHECK-LE-LABEL: test_24_8_12:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lhz 3, 1(3)
|
|
; CHECK-LE-NEXT: rlwinm 3, 3, 0, 20, 27
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_24_8_12:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lhz 3, 0(3)
|
|
; CHECK-BE-NEXT: rlwinm 3, 3, 0, 20, 27
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i24, ptr %y
|
|
%b = and i24 %a, u0xff000
|
|
%cmp = icmp ne i24 %b, 0
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @test_24_8_16(ptr %y) {
|
|
; CHECK-LE-LABEL: test_24_8_16:
|
|
; CHECK-LE: # %bb.0:
|
|
; CHECK-LE-NEXT: lbz 3, 2(3)
|
|
; CHECK-LE-NEXT: addic 4, 3, -1
|
|
; CHECK-LE-NEXT: subfe 3, 4, 3
|
|
; CHECK-LE-NEXT: blr
|
|
;
|
|
; CHECK-BE-LABEL: test_24_8_16:
|
|
; CHECK-BE: # %bb.0:
|
|
; CHECK-BE-NEXT: lbz 3, 0(3)
|
|
; CHECK-BE-NEXT: addic 4, 3, -1
|
|
; CHECK-BE-NEXT: subfe 3, 4, 3
|
|
; CHECK-BE-NEXT: blr
|
|
%a = load i24, ptr %y
|
|
%b = and i24 %a, u0xff0000
|
|
%cmp = icmp ne i24 %b, 0
|
|
ret i1 %cmp
|
|
}
|