For a GEP in a pointer chain, if: 1) a pointer chain is unit-strided 2) the base pointer wasn't folded and is sitting in a register somewhere 3) the distance between the GEP and the base pointer is small enough and can be folded into the addressing mode of the using load/store Then we can exclude that GEP from the total cost of the pointer chain, as it will likely be folded away. In order to check if 3) holds, we need to know the type of memory access being made by the users of the pointer chain. For that, we need to pass along a new argument to getPointersChainCost. (Using the source pointer type of the GEP isn't accurate, see https://reviews.llvm.org/D149889 for more details). Also note that 2) is currently an assumption, and could be modelled more accurately. This prevents some unprofitable cases from being SLP vectorized on RISC-V by making the scalar costs cheaper and closer to the actual codegen. For now the getPointersChainCost hook is duplicated for RISC-V to prevent disturbing other targets, but could be merged back in and shared with other targets in a following patch. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D149654
102 lines
3.9 KiB
LLVM
102 lines
3.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt -S -mtriple=riscv64 -mattr=+v -riscv-v-slp-max-vf=0 -passes=slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
|
|
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
|
|
|
|
; Because all of these addresses are foldable, the scalar cost should be 0 when
|
|
; computing the pointers chain cost.
|
|
;
|
|
; TODO: These are currently costed as free the indices are all constants, but we
|
|
; should check if the constants are actually foldable
|
|
define void @f(ptr %dest, i64 %i) {
|
|
; CHECK-LABEL: define void @f
|
|
; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 0
|
|
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[P1]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
; YAML: Pass: slp-vectorizer
|
|
; YAML-NEXT: Name: StoresVectorized
|
|
; YAML-NEXT: Function: f
|
|
; YAML-NEXT: Args:
|
|
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
|
|
; YAML-NEXT: - Cost: '-2'
|
|
; YAML-NEXT: - String: ' and with tree size '
|
|
; YAML-NEXT: - TreeSize: '2'
|
|
%p1 = getelementptr i32, ptr %dest, i32 0
|
|
store i32 1, ptr %p1
|
|
%p2 = getelementptr i32, ptr %dest, i32 1
|
|
store i32 1, ptr %p2
|
|
%p3 = getelementptr i32, ptr %dest, i32 2
|
|
store i32 1, ptr %p3
|
|
%p4 = getelementptr i32, ptr %dest, i32 3
|
|
store i32 1, ptr %p4
|
|
ret void
|
|
}
|
|
|
|
; When computing the scalar pointers chain cost here, there is a cost of 1 for
|
|
; the base pointer, and the rest can be folded in, so the scalar cost should be
|
|
; 1.
|
|
;
|
|
; TODO: These are currently costed as free the indices are all constants, but we
|
|
; should check if the constants are actually foldable
|
|
define void @g(ptr %dest, i64 %i) {
|
|
; CHECK-LABEL: define void @g
|
|
; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 2048
|
|
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[P1]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
; YAML: Pass: slp-vectorizer
|
|
; YAML-NEXT: Name: StoresVectorized
|
|
; YAML-NEXT: Function: g
|
|
; YAML-NEXT: Args:
|
|
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
|
|
; YAML-NEXT: - Cost: '-2'
|
|
; YAML-NEXT: - String: ' and with tree size '
|
|
; YAML-NEXT: - TreeSize: '2'
|
|
%p1 = getelementptr i32, ptr %dest, i32 2048
|
|
store i32 1, ptr %p1
|
|
%p2 = getelementptr i32, ptr %dest, i32 2049
|
|
store i32 1, ptr %p2
|
|
%p3 = getelementptr i32, ptr %dest, i32 2050
|
|
store i32 1, ptr %p3
|
|
%p4 = getelementptr i32, ptr %dest, i32 2051
|
|
store i32 1, ptr %p4
|
|
ret void
|
|
}
|
|
|
|
; When computing the scalar pointers chain cost here, there is a cost of
|
|
; 1 for the base pointer, and the rest can be folded in, so the scalar cost
|
|
; should be 1.
|
|
define void @h(ptr %dest, i32 %i) {
|
|
; CHECK-LABEL: define void @h
|
|
; CHECK-SAME: (ptr [[DEST:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1:%.*]] = getelementptr [4 x i32], ptr [[DEST]], i32 [[I]], i32 0
|
|
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[P1]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
; YAML: Pass: slp-vectorizer
|
|
; YAML-NEXT: Name: StoresVectorized
|
|
; YAML-NEXT: Function: h
|
|
; YAML-NEXT: Args:
|
|
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
|
|
; YAML-NEXT: - Cost: '-2'
|
|
; YAML-NEXT: - String: ' and with tree size '
|
|
; YAML-NEXT: - TreeSize: '2'
|
|
%p1 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 0
|
|
store i32 1, ptr %p1
|
|
%p2 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 1
|
|
store i32 1, ptr %p2
|
|
%p3 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 2
|
|
store i32 1, ptr %p3
|
|
%p4 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 3
|
|
store i32 1, ptr %p4
|
|
ret void
|
|
}
|