Files
clang-p2996/llvm/test/CodeGen/PowerPC/pr59074.ll
futog 3e0a76b1fd [Codegen][LegalizeIntegerTypes] Improve shift through stack (#96151)
Minor improvement on cc39c3b17f.

Use an aligned stack slot to store the shifted value.
Use the native register width as shifting unit, so the load of the
shift result is aligned.

If the shift amount is a multiple of the native register width, there is
no need to do a follow-up shift after the load. I added new tests for
these cases.

Co-authored-by: Gergely Futo <gergely.futo@hightec-rt.com>
2024-09-23 11:45:43 +02:00

128 lines
3.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE64
; RUN: llc -mtriple=powerpcle-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=LE32
; RUN: llc -mtriple=powerpc64-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE64
; RUN: llc -mtriple=powerpc-ibm-aix -mcpu=pwr7 < %s | FileCheck %s --check-prefix=BE32
; To verify this doesn't crash due to array out of bound.
define void @pr59074(ptr %0) {
; LE64-LABEL: pr59074:
; LE64: # %bb.0: # %entry
; LE64-NEXT: lwz 6, 0(3)
; LE64-NEXT: li 7, 12
; LE64-NEXT: ld 4, 16(3)
; LE64-NEXT: ld 5, 24(3)
; LE64-NEXT: addi 6, 6, -12
; LE64-NEXT: std 4, 16(3)
; LE64-NEXT: std 5, 24(3)
; LE64-NEXT: srd 6, 7, 6
; LE64-NEXT: li 7, 0
; LE64-NEXT: std 7, 8(3)
; LE64-NEXT: std 6, 0(3)
; LE64-NEXT: blr
;
; LE32-LABEL: pr59074:
; LE32: # %bb.0: # %entry
; LE32-NEXT: stwu 1, -80(1)
; LE32-NEXT: .cfi_def_cfa_offset 80
; LE32-NEXT: lwz 4, 0(3)
; LE32-NEXT: xxlxor 0, 0, 0
; LE32-NEXT: li 5, 4
; LE32-NEXT: addi 6, 1, 16
; LE32-NEXT: li 7, 0
; LE32-NEXT: li 8, 12
; LE32-NEXT: xxswapd 0, 0
; LE32-NEXT: rlwimi 5, 6, 0, 30, 28
; LE32-NEXT: addi 4, 4, -12
; LE32-NEXT: rlwinm 9, 4, 29, 28, 29
; LE32-NEXT: stxvd2x 0, 0, 5
; LE32-NEXT: stw 7, 44(1)
; LE32-NEXT: stw 7, 40(1)
; LE32-NEXT: stw 7, 36(1)
; LE32-NEXT: stw 8, 16(1)
; LE32-NEXT: clrlwi 4, 4, 27
; LE32-NEXT: lwzux 5, 9, 6
; LE32-NEXT: lwz 6, 8(9)
; LE32-NEXT: lwz 7, 4(9)
; LE32-NEXT: lwz 8, 12(9)
; LE32-NEXT: xori 9, 4, 31
; LE32-NEXT: subfic 11, 4, 32
; LE32-NEXT: srw 5, 5, 4
; LE32-NEXT: slwi 10, 6, 1
; LE32-NEXT: srw 6, 6, 4
; LE32-NEXT: slw 9, 10, 9
; LE32-NEXT: srw 10, 7, 4
; LE32-NEXT: slw 7, 7, 11
; LE32-NEXT: slw 11, 8, 11
; LE32-NEXT: srw 4, 8, 4
; LE32-NEXT: or 5, 7, 5
; LE32-NEXT: or 6, 11, 6
; LE32-NEXT: or 7, 10, 9
; LE32-NEXT: stw 4, 12(3)
; LE32-NEXT: stw 6, 8(3)
; LE32-NEXT: stw 5, 0(3)
; LE32-NEXT: stw 7, 4(3)
; LE32-NEXT: addi 1, 1, 80
; LE32-NEXT: blr
;
; BE64-LABEL: pr59074:
; BE64: # %bb.0: # %entry
; BE64-NEXT: lwz 6, 12(3)
; BE64-NEXT: li 7, 12
; BE64-NEXT: ld 4, 24(3)
; BE64-NEXT: ld 5, 16(3)
; BE64-NEXT: addi 6, 6, -12
; BE64-NEXT: std 4, 24(3)
; BE64-NEXT: std 5, 16(3)
; BE64-NEXT: srd 6, 7, 6
; BE64-NEXT: li 7, 0
; BE64-NEXT: std 7, 0(3)
; BE64-NEXT: std 6, 8(3)
; BE64-NEXT: blr
;
; BE32-LABEL: pr59074:
; BE32: # %bb.0: # %entry
; BE32-NEXT: lwz 4, 12(3)
; BE32-NEXT: xxlxor 0, 0, 0
; BE32-NEXT: addi 5, 1, -64
; BE32-NEXT: li 6, 12
; BE32-NEXT: li 7, 0
; BE32-NEXT: addi 8, 1, -48
; BE32-NEXT: stxvw4x 0, 0, 5
; BE32-NEXT: stw 6, -36(1)
; BE32-NEXT: addi 4, 4, -12
; BE32-NEXT: stw 7, -40(1)
; BE32-NEXT: stw 7, -44(1)
; BE32-NEXT: stw 7, -48(1)
; BE32-NEXT: rlwinm 9, 4, 29, 28, 29
; BE32-NEXT: clrlwi 4, 4, 27
; BE32-NEXT: sub 5, 8, 9
; BE32-NEXT: lwz 6, 4(5)
; BE32-NEXT: lwz 7, 0(5)
; BE32-NEXT: lwz 8, 12(5)
; BE32-NEXT: lwz 5, 8(5)
; BE32-NEXT: subfic 10, 4, 32
; BE32-NEXT: srw 9, 6, 4
; BE32-NEXT: slw 11, 7, 10
; BE32-NEXT: srw 8, 8, 4
; BE32-NEXT: slw 6, 6, 10
; BE32-NEXT: slw 10, 5, 10
; BE32-NEXT: srw 5, 5, 4
; BE32-NEXT: srw 4, 7, 4
; BE32-NEXT: or 7, 11, 9
; BE32-NEXT: or 8, 10, 8
; BE32-NEXT: or 5, 6, 5
; BE32-NEXT: stw 4, 0(3)
; BE32-NEXT: stw 5, 8(3)
; BE32-NEXT: stw 8, 12(3)
; BE32-NEXT: stw 7, 4(3)
; BE32-NEXT: blr
entry:
%v1 = load <2 x i128>, <2 x i128>* %0
%v2 = insertelement <2 x i128> %v1, i128 12, i32 0
%v3 = sub <2 x i128> %v1, %v2
%v4 = lshr <2 x i128> %v2, %v3
store <2 x i128> %v4, <2 x i128>* %0
ret void
}