Files
clang-p2996/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll
Krzysztof Parzyszek 95c7dd8810 Revert "[Hexagon] Don't build two halves of HVX vector in parallel"
This reverts commit ba07f300c6.

A build-vector sequence is made of pairs: rotate+insert. When constructing
a single vector, this results in a chain of 2*N instructions. The rotate
operation is a permute operation, but the insert uses a multiplication
resource: insert and rotate can execute in the same cycle, but obviously
they cannot operate on the same vector. The original halving idea is still
beneficial since it does allow for insert/rotate overlap, and for hiding
insert's latency.
2021-12-30 07:57:11 -08:00

77 lines
2.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=hexagon < %s | FileCheck %s
define <32 x i32> @fred(i32 %a0) #0 {
; CHECK-LABEL: fred:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = combine(#20,#9)
; CHECK-NEXT: v0 = vxor(v0,v0)
; CHECK-NEXT: r1 = #24
; CHECK-NEXT: r4 = #12
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v1 = vror(v0,r1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v1.w = vinsert(r2)
; CHECK-NEXT: r4 = #7
; CHECK-NEXT: r2 = #116
; CHECK-NEXT: v0 = vror(v0,r4)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0.w = vinsert(r4)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v1 = vror(v1,r3)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v1.w = vinsert(r0)
; CHECK-NEXT: v0 = vror(v0,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v1 = vror(v1,r3)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0 = vor(v0,v1)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
%v0 = insertelement <32 x i32> undef, i32 undef, i32 0
%v1 = insertelement <32 x i32> %v0, i32 undef, i32 1
%v2 = insertelement <32 x i32> %v1, i32 undef, i32 2
%v3 = insertelement <32 x i32> %v2, i32 7, i32 3
%v4 = insertelement <32 x i32> %v3, i32 undef, i32 4
%v5 = insertelement <32 x i32> %v4, i32 undef, i32 5
%v6 = insertelement <32 x i32> %v5, i32 undef, i32 6
%v7 = insertelement <32 x i32> %v6, i32 undef, i32 7
%v8 = insertelement <32 x i32> %v7, i32 undef, i32 8
%v9 = insertelement <32 x i32> %v8, i32 undef, i32 9
%v10 = insertelement <32 x i32> %v9, i32 undef, i32 10
%v11 = insertelement <32 x i32> %v10, i32 undef, i32 11
%v12 = insertelement <32 x i32> %v11, i32 undef, i32 12
%v13 = insertelement <32 x i32> %v12, i32 undef, i32 13
%v14 = insertelement <32 x i32> %v13, i32 undef, i32 14
%v15 = insertelement <32 x i32> %v14, i32 undef, i32 15
%v16 = insertelement <32 x i32> %v15, i32 undef, i32 16
%v17 = insertelement <32 x i32> %v16, i32 undef, i32 17
%v18 = insertelement <32 x i32> %v17, i32 undef, i32 18
%v19 = insertelement <32 x i32> %v18, i32 undef, i32 19
%v20 = insertelement <32 x i32> %v19, i32 undef, i32 20
%v21 = insertelement <32 x i32> %v20, i32 undef, i32 21
%v22 = insertelement <32 x i32> %v21, i32 9, i32 22
%v23 = insertelement <32 x i32> %v22, i32 undef, i32 23
%v24 = insertelement <32 x i32> %v23, i32 undef, i32 24
%v25 = insertelement <32 x i32> %v24, i32 undef, i32 25
%v26 = insertelement <32 x i32> %v25, i32 undef, i32 26
%v27 = insertelement <32 x i32> %v26, i32 %a0, i32 27
%v28 = insertelement <32 x i32> %v27, i32 undef, i32 28
%v29 = insertelement <32 x i32> %v28, i32 undef, i32 29
%v30 = insertelement <32 x i32> %v29, i32 undef, i32 30
%v31 = insertelement <32 x i32> %v30, i32 undef, i32 31
ret <32 x i32> %v31
}
attributes #0 = { "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" }