For big endian targets that need a node such as this: v2i8 = bitcast i16:tN legalized by: 1. Promoting the i16 input type 2. Widening the v2i32 result type The result will be incorrect because the legalizer will promote the input type and then produce a scalar_to_vector from that wider type to a vector of N elements of that type. That puts the desired bits into the low order bytes of element zero and they need to be in the high order bytes on big endian systems. This patch changes the legalization to widen to a vector with elements of the original scalar size. Differential revision: https://reviews.llvm.org/D140365
67 lines
2.6 KiB
LLVM
67 lines
2.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
|
; RUN: FileCheck %s --check-prefix=CHECK-BE
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
|
; RUN: FileCheck %s --check-prefix=CHECK-P9-BE
|
|
define void @test() local_unnamed_addr #0 align 2 {
|
|
; CHECK-BE-LABEL: test:
|
|
; CHECK-BE: # %bb.0: # %bb
|
|
; CHECK-BE-NEXT: vspltisw v2, -16
|
|
; CHECK-BE-NEXT: lhz r3, 0(r3)
|
|
; CHECK-BE-NEXT: xxlxor vs1, vs1, vs1
|
|
; CHECK-BE-NEXT: addi r3, r3, 1
|
|
; CHECK-BE-NEXT: vsrw v2, v2, v2
|
|
; CHECK-BE-NEXT: sth r3, -32(r1)
|
|
; CHECK-BE-NEXT: addi r3, r1, -32
|
|
; CHECK-BE-NEXT: lxvw4x vs0, 0, r3
|
|
; CHECK-BE-NEXT: addi r3, r1, -16
|
|
; CHECK-BE-NEXT: xxsel vs0, vs0, vs1, v2
|
|
; CHECK-BE-NEXT: stxvw4x vs0, 0, r3
|
|
; CHECK-BE-NEXT: lwz r3, -16(r1)
|
|
; CHECK-BE-NEXT: stw r3, 0(r3)
|
|
; CHECK-BE-NEXT: .p2align 4
|
|
; CHECK-BE-NEXT: .LBB0_1: # %bb9
|
|
; CHECK-BE-NEXT: #
|
|
; CHECK-BE-NEXT: b .LBB0_1
|
|
;
|
|
; CHECK-P9-BE-LABEL: test:
|
|
; CHECK-P9-BE: # %bb.0: # %bb
|
|
; CHECK-P9-BE-NEXT: lhz r3, 0(r3)
|
|
; CHECK-P9-BE-NEXT: vspltisw v2, -16
|
|
; CHECK-P9-BE-NEXT: xxlxor vs0, vs0, vs0
|
|
; CHECK-P9-BE-NEXT: addi r3, r3, 1
|
|
; CHECK-P9-BE-NEXT: vsrw v2, v2, v2
|
|
; CHECK-P9-BE-NEXT: sldi r3, r3, 48
|
|
; CHECK-P9-BE-NEXT: mtfprd f1, r3
|
|
; CHECK-P9-BE-NEXT: xxsel v2, vs1, vs0, v2
|
|
; CHECK-P9-BE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3
|
|
; CHECK-P9-BE-NEXT: .p2align 4
|
|
; CHECK-P9-BE-NEXT: .LBB0_1: # %bb9
|
|
; CHECK-P9-BE-NEXT: #
|
|
; CHECK-P9-BE-NEXT: b .LBB0_1
|
|
bb:
|
|
br i1 false, label %bb1, label %bb2
|
|
|
|
bb1: ; preds = %bb
|
|
unreachable
|
|
|
|
bb2: ; preds = %bb
|
|
%i = load i32, ptr poison, align 4
|
|
%i3 = trunc i32 %i to i16
|
|
%i4 = add i16 %i3, 1
|
|
%i5 = bitcast i16 %i4 to <2 x i8>
|
|
%i6 = shufflevector <2 x i8> %i5, <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
%i7 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i8> %i6, <4 x i8> undef
|
|
%i8 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i8> <i8 undef, i8 undef, i8 0, i8 0>, <4 x i8> %i7
|
|
br label %bb9
|
|
|
|
bb9: ; preds = %bb9, %bb2
|
|
%i10 = phi <4 x i8> [ %i8, %bb2 ], [ poison, %bb9 ]
|
|
%i11 = bitcast <4 x i8> %i10 to i32
|
|
store i32 %i11, ptr poison, align 2
|
|
br label %bb9
|
|
}
|