This patch is to fix the xxperm vector operand swap condition so that the single-use operand is in V2 to prevent copying, it also fixes the subtarget condition to exploit the xpperm. Reviewed By: stefanp Differential Revision: https://reviews.llvm.org/D146632
98 lines
4.2 KiB
LLVM
98 lines
4.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
|
|
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
|
|
; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
|
|
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
|
|
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
|
|
; RUN: FileCheck %s --check-prefix=CHECK-BE-P9
|
|
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
|
|
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
|
|
; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
|
|
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
|
|
; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
|
|
; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
|
|
define dso_local noundef <16 x i8> @test1(<16 x i8> noundef %burn, <16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr #0 {
|
|
; CHECK-LE-P9-LABEL: test1:
|
|
; CHECK-LE-P9: # %bb.0: # %entry
|
|
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha
|
|
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l
|
|
; CHECK-LE-P9-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-LE-P9-NEXT: xxperm v4, v3, vs0
|
|
; CHECK-LE-P9-NEXT: vaddubm v2, v4, v3
|
|
; CHECK-LE-P9-NEXT: blr
|
|
;
|
|
; CHECK-BE-P9-LABEL: test1:
|
|
; CHECK-BE-P9: # %bb.0: # %entry
|
|
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha
|
|
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l
|
|
; CHECK-BE-P9-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-BE-P9-NEXT: xxperm v4, v3, vs0
|
|
; CHECK-BE-P9-NEXT: vaddubm v2, v4, v3
|
|
; CHECK-BE-P9-NEXT: blr
|
|
;
|
|
; CHECK-AIX-64-P9-LABEL: test1:
|
|
; CHECK-AIX-64-P9: # %bb.0: # %entry
|
|
; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0
|
|
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-AIX-64-P9-NEXT: xxperm v4, v3, vs0
|
|
; CHECK-AIX-64-P9-NEXT: vaddubm v2, v4, v3
|
|
; CHECK-AIX-64-P9-NEXT: blr
|
|
;
|
|
; CHECK-AIX-32-P9-LABEL: test1:
|
|
; CHECK-AIX-32-P9: # %bb.0: # %entry
|
|
; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0
|
|
; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-AIX-32-P9-NEXT: xxperm v4, v3, vs0
|
|
; CHECK-AIX-32-P9-NEXT: vaddubm v2, v4, v3
|
|
; CHECK-AIX-32-P9-NEXT: blr
|
|
entry:
|
|
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 18, i32 23, i32 12, i32 22, i32 22, i32 22, i32 22, i32 0, i32 0, i32 0, i32 0, i32 9, i32 9, i32 9, i32 9>
|
|
%add = add <16 x i8> %shuffle, %a
|
|
ret <16 x i8> %add
|
|
}
|
|
|
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
|
|
define dso_local noundef <16 x i8> @test2(<16 x i8> noundef %burn, <16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr #0 {
|
|
; CHECK-LE-P9-LABEL: test2:
|
|
; CHECK-LE-P9: # %bb.0: # %entry
|
|
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha
|
|
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l
|
|
; CHECK-LE-P9-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-LE-P9-NEXT: xxperm v3, v4, vs0
|
|
; CHECK-LE-P9-NEXT: vaddubm v2, v3, v4
|
|
; CHECK-LE-P9-NEXT: blr
|
|
;
|
|
; CHECK-BE-P9-LABEL: test2:
|
|
; CHECK-BE-P9: # %bb.0: # %entry
|
|
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha
|
|
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l
|
|
; CHECK-BE-P9-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-BE-P9-NEXT: xxperm v3, v4, vs0
|
|
; CHECK-BE-P9-NEXT: vaddubm v2, v3, v4
|
|
; CHECK-BE-P9-NEXT: blr
|
|
;
|
|
; CHECK-AIX-64-P9-LABEL: test2:
|
|
; CHECK-AIX-64-P9: # %bb.0: # %entry
|
|
; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0
|
|
; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-AIX-64-P9-NEXT: xxperm v3, v4, vs0
|
|
; CHECK-AIX-64-P9-NEXT: vaddubm v2, v3, v4
|
|
; CHECK-AIX-64-P9-NEXT: blr
|
|
;
|
|
; CHECK-AIX-32-P9-LABEL: test2:
|
|
; CHECK-AIX-32-P9: # %bb.0: # %entry
|
|
; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) # %const.0
|
|
; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r3)
|
|
; CHECK-AIX-32-P9-NEXT: xxperm v3, v4, vs0
|
|
; CHECK-AIX-32-P9-NEXT: vaddubm v2, v3, v4
|
|
; CHECK-AIX-32-P9-NEXT: blr
|
|
entry:
|
|
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 18, i32 23, i32 12, i32 22, i32 22, i32 22, i32 22, i32 0, i32 0, i32 0, i32 0, i32 9, i32 9, i32 9, i32 9>
|
|
%add = add <16 x i8> %shuffle, %b
|
|
ret <16 x i8> %add
|
|
}
|
|
|
|
attributes #0 = {nounwind}
|