Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
Luke Lau c27a0b21c5 [SLP][RISCV] Account for offset folding in getPointersChainCost
For a GEP in a pointer chain, if:
1) a pointer chain is unit-strided
2) the base pointer wasn't folded and is sitting in a register somewhere
3) the distance between the GEP and the base pointer is small enough and
   can be folded into the addressing mode of the using load/store

Then we can exclude that GEP from the total cost of the pointer chain,
as it will likely be folded away.

In order to check if 3) holds, we need to know the type of memory access
being made by the users of the pointer chain. For that, we need to pass
along a new argument to getPointersChainCost. (Using the source pointer
type of the GEP isn't accurate, see https://reviews.llvm.org/D149889 for
more details).

Also note that 2) is currently an assumption, and could be modelled more
accurately.

This prevents some unprofitable cases from being SLP vectorized on
RISC-V by making the scalar costs cheaper and closer to the actual
codegen.

For now the getPointersChainCost hook is duplicated for RISC-V to prevent
disturbing other targets, but could be merged back in and shared with
other targets in a following patch.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D149654
2023-05-22 13:55:30 +01:00

102 lines
3.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -mtriple=riscv64 -mattr=+v -riscv-v-slp-max-vf=0 -passes=slp-vectorizer -pass-remarks-output=%t < %s | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
; Because all of these addresses are foldable, the scalar cost should be 0 when
; computing the pointers chain cost.
;
; TODO: These are currently costed as free the indices are all constants, but we
; should check if the constants are actually foldable
define void @f(ptr %dest, i64 %i) {
; CHECK-LABEL: define void @f
; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 0
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[P1]], align 4
; CHECK-NEXT: ret void
;
entry:
; YAML: Pass: slp-vectorizer
; YAML-NEXT: Name: StoresVectorized
; YAML-NEXT: Function: f
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
; YAML-NEXT: - Cost: '-2'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '2'
%p1 = getelementptr i32, ptr %dest, i32 0
store i32 1, ptr %p1
%p2 = getelementptr i32, ptr %dest, i32 1
store i32 1, ptr %p2
%p3 = getelementptr i32, ptr %dest, i32 2
store i32 1, ptr %p3
%p4 = getelementptr i32, ptr %dest, i32 3
store i32 1, ptr %p4
ret void
}
; When computing the scalar pointers chain cost here, there is a cost of 1 for
; the base pointer, and the rest can be folded in, so the scalar cost should be
; 1.
;
; TODO: These are currently costed as free the indices are all constants, but we
; should check if the constants are actually foldable
define void @g(ptr %dest, i64 %i) {
; CHECK-LABEL: define void @g
; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 2048
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[P1]], align 4
; CHECK-NEXT: ret void
;
entry:
; YAML: Pass: slp-vectorizer
; YAML-NEXT: Name: StoresVectorized
; YAML-NEXT: Function: g
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
; YAML-NEXT: - Cost: '-2'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '2'
%p1 = getelementptr i32, ptr %dest, i32 2048
store i32 1, ptr %p1
%p2 = getelementptr i32, ptr %dest, i32 2049
store i32 1, ptr %p2
%p3 = getelementptr i32, ptr %dest, i32 2050
store i32 1, ptr %p3
%p4 = getelementptr i32, ptr %dest, i32 2051
store i32 1, ptr %p4
ret void
}
; When computing the scalar pointers chain cost here, there is a cost of
; 1 for the base pointer, and the rest can be folded in, so the scalar cost
; should be 1.
define void @h(ptr %dest, i32 %i) {
; CHECK-LABEL: define void @h
; CHECK-SAME: (ptr [[DEST:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P1:%.*]] = getelementptr [4 x i32], ptr [[DEST]], i32 [[I]], i32 0
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[P1]], align 4
; CHECK-NEXT: ret void
;
entry:
; YAML: Pass: slp-vectorizer
; YAML-NEXT: Name: StoresVectorized
; YAML-NEXT: Function: h
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
; YAML-NEXT: - Cost: '-2'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '2'
%p1 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 0
store i32 1, ptr %p1
%p2 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 1
store i32 1, ptr %p2
%p3 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 2
store i32 1, ptr %p3
%p4 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 3
store i32 1, ptr %p4
ret void
}