Files
clang-p2996/llvm/test/CodeGen/PowerPC/xxperm-swap.ll
Maryam Moghadas cf0395f816 [PowerPC] Fix the xxperm swap requirements
This patch is to fix the xxperm vector operand swap condition so that the
single-use operand is in V2 to prevent copying, it also fixes the subtarget
condition to exploit the xpperm.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D146632
2023-04-05 20:13:40 -05:00

98 lines
4.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE-P9
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local noundef <16 x i8> @test1(<16 x i8> noundef %burn, <16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr #0 {
; CHECK-LE-P9-LABEL: test1:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-LE-P9-NEXT: lxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: xxperm v4, v3, vs0
; CHECK-LE-P9-NEXT: vaddubm v2, v4, v3
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P9-LABEL: test1:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-BE-P9-NEXT: lxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: xxperm v4, v3, vs0
; CHECK-BE-P9-NEXT: vaddubm v2, v4, v3
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test1:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm v4, v3, vs0
; CHECK-AIX-64-P9-NEXT: vaddubm v2, v4, v3
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test1:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: xxperm v4, v3, vs0
; CHECK-AIX-32-P9-NEXT: vaddubm v2, v4, v3
; CHECK-AIX-32-P9-NEXT: blr
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 18, i32 23, i32 12, i32 22, i32 22, i32 22, i32 22, i32 0, i32 0, i32 0, i32 0, i32 9, i32 9, i32 9, i32 9>
%add = add <16 x i8> %shuffle, %a
ret <16 x i8> %add
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local noundef <16 x i8> @test2(<16 x i8> noundef %burn, <16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr #0 {
; CHECK-LE-P9-LABEL: test2:
; CHECK-LE-P9: # %bb.0: # %entry
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-LE-P9-NEXT: lxv vs0, 0(r3)
; CHECK-LE-P9-NEXT: xxperm v3, v4, vs0
; CHECK-LE-P9-NEXT: vaddubm v2, v3, v4
; CHECK-LE-P9-NEXT: blr
;
; CHECK-BE-P9-LABEL: test2:
; CHECK-BE-P9: # %bb.0: # %entry
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-BE-P9-NEXT: lxv vs0, 0(r3)
; CHECK-BE-P9-NEXT: xxperm v3, v4, vs0
; CHECK-BE-P9-NEXT: vaddubm v2, v3, v4
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P9-LABEL: test2:
; CHECK-AIX-64-P9: # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT: xxperm v3, v4, vs0
; CHECK-AIX-64-P9-NEXT: vaddubm v2, v3, v4
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P9-LABEL: test2:
; CHECK-AIX-32-P9: # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) # %const.0
; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT: xxperm v3, v4, vs0
; CHECK-AIX-32-P9-NEXT: vaddubm v2, v3, v4
; CHECK-AIX-32-P9-NEXT: blr
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 18, i32 23, i32 12, i32 22, i32 22, i32 22, i32 22, i32 0, i32 0, i32 0, i32 0, i32 9, i32 9, i32 9, i32 9>
%add = add <16 x i8> %shuffle, %b
ret <16 x i8> %add
}
attributes #0 = {nounwind}