During legalization, we can end up with shuffles that are identity masks, so act like extract_subvector, but do not simplify to extract_subvector. This adjusts the profitability heuristic in foldExtractSubvectorFromShuffleVector to allow identity vectors that do not start at element 0. Undef masks elements are excluded as it can be more useful to keep the undef elements. Differential Revision: https://reviews.llvm.org/D153504
114 lines
3.1 KiB
LLVM
114 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s
|
|
|
|
define i64 @add_i64_ext_load(<1 x i64> %A, ptr %B) nounwind {
|
|
; CHECK-LABEL: add_i64_ext_load:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: add d0, d0, d1
|
|
; CHECK-NEXT: fmov x0, d0
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = load i64, ptr %B
|
|
%c = add i64 %a, %b
|
|
ret i64 %c
|
|
}
|
|
|
|
define i64 @sub_i64_ext_load(<1 x i64> %A, ptr %B) nounwind {
|
|
; CHECK-LABEL: sub_i64_ext_load:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: sub d0, d0, d1
|
|
; CHECK-NEXT: fmov x0, d0
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = load i64, ptr %B
|
|
%c = sub i64 %a, %b
|
|
ret i64 %c
|
|
}
|
|
|
|
define void @add_i64_ext_load_store(<1 x i64> %A, ptr %B) nounwind {
|
|
; CHECK-LABEL: add_i64_ext_load_store:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d1, [x0]
|
|
; CHECK-NEXT: add d0, d0, d1
|
|
; CHECK-NEXT: str d0, [x0]
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = load i64, ptr %B
|
|
%c = add i64 %a, %b
|
|
store i64 %c, ptr %B
|
|
ret void
|
|
}
|
|
|
|
define i64 @add_v2i64_ext_load(<2 x i64> %A, ptr %B) nounwind {
|
|
; CHECK-LABEL: add_v2i64_ext_load:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr x8, [x0]
|
|
; CHECK-NEXT: fmov x9, d0
|
|
; CHECK-NEXT: add x0, x9, x8
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <2 x i64> %A, i32 0
|
|
%b = load i64, ptr %B
|
|
%c = add i64 %a, %b
|
|
ret i64 %c
|
|
}
|
|
|
|
define i64 @add_i64_ext_ext(<1 x i64> %A, <1 x i64> %B) nounwind {
|
|
; CHECK-LABEL: add_i64_ext_ext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add d0, d0, d1
|
|
; CHECK-NEXT: fmov x0, d0
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = extractelement <1 x i64> %B, i32 0
|
|
%c = add i64 %a, %b
|
|
ret i64 %c
|
|
}
|
|
|
|
define i32 @add_i32_ext_load(<1 x i32> %A, ptr %B) nounwind {
|
|
; CHECK-LABEL: add_i32_ext_load:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: ldr w8, [x0]
|
|
; CHECK-NEXT: fmov w9, s0
|
|
; CHECK-NEXT: add w0, w9, w8
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i32> %A, i32 0
|
|
%b = load i32, ptr %B
|
|
%c = add i32 %a, %b
|
|
ret i32 %c
|
|
}
|
|
|
|
define i64 @add_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind {
|
|
; CHECK-LABEL: add_i64_ext_ext_test1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
|
|
; CHECK-NEXT: add d0, d0, d1
|
|
; CHECK-NEXT: add d0, d0, d2
|
|
; CHECK-NEXT: fmov x0, d0
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = extractelement <2 x i64> %B, i32 0
|
|
%c = extractelement <2 x i64> %B, i32 1
|
|
%d = add i64 %a, %b
|
|
%e = add i64 %d, %c
|
|
ret i64 %e
|
|
}
|
|
|
|
define i64 @sub_i64_ext_ext_test1(<1 x i64> %A, <2 x i64> %B) nounwind {
|
|
; CHECK-LABEL: sub_i64_ext_ext_test1:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
|
|
; CHECK-NEXT: sub d0, d0, d1
|
|
; CHECK-NEXT: sub d0, d0, d2
|
|
; CHECK-NEXT: fmov x0, d0
|
|
; CHECK-NEXT: ret
|
|
%a = extractelement <1 x i64> %A, i32 0
|
|
%b = extractelement <2 x i64> %B, i32 0
|
|
%c = extractelement <2 x i64> %B, i32 1
|
|
%d = sub i64 %a, %b
|
|
%e = sub i64 %d, %c
|
|
ret i64 %e
|
|
}
|