Files
clang-p2996/llvm/test/Transforms/Scalarizer/vector-gep.ll
Nicolai Hähnle 2cb5c6d124 Scalarizer: limit scalarization for small element types
Scalarization can expose optimization opportunities for the individual
elements of a vector, and can therefore be beneficial on targets like
GPUs that tend to operate on scalars anyway.

However, notably with 16-bit operations it is often beneficial to keep
<2 x i16 / half> vectors around since there are packed instructions for
those.

Refactor the code to operate on "fragments" of split vectors. The
fragments are usually scalars, but may themselves be smaller vectors
when the scalarizer-min-bits option is used. If the split is uneven,
the last fragment is a shorter remainder.

This is almost NFC when the new option is unused, but it happens to
clean up some code in the fully scalarized case as well.

Differential Revision: https://reviews.llvm.org/D149842
2023-06-13 21:14:32 +02:00

153 lines
6.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes='function(scalarizer)' %s | FileCheck %s
; Check that the scalarizer can handle vector GEPs with scalar indices
@vec = global <4 x ptr> <ptr null, ptr null, ptr null, ptr null>
@index = global i16 1
@ptr = global [4 x i16] [i16 1, i16 2, i16 3, i16 4]
@ptrptr = global ptr null
; constant index
define void @test1() {
; CHECK-LABEL: @test1(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr @vec, align 32
; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 0
; CHECK-NEXT: [[DOTI01:%.*]] = getelementptr i16, ptr [[DOTI0]], i16 1
; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 1
; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i16, ptr [[DOTI1]], i16 1
; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 2
; CHECK-NEXT: [[DOTI23:%.*]] = getelementptr i16, ptr [[DOTI2]], i16 1
; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 3
; CHECK-NEXT: [[DOTI34:%.*]] = getelementptr i16, ptr [[DOTI3]], i16 1
; CHECK-NEXT: ret void
;
bb:
%0 = load <4 x ptr>, ptr @vec
%1 = getelementptr i16, <4 x ptr> %0, i16 1
ret void
}
; non-constant index
define void @test2() {
; CHECK-LABEL: @test2(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr @vec, align 32
; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 0
; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 1
; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 2
; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 3
; CHECK-NEXT: [[INDEX:%.*]] = load i16, ptr @index, align 2
; CHECK-NEXT: [[DOTI01:%.*]] = getelementptr i16, ptr [[DOTI0]], i16 [[INDEX]]
; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i16, ptr [[DOTI1]], i16 [[INDEX]]
; CHECK-NEXT: [[DOTI23:%.*]] = getelementptr i16, ptr [[DOTI2]], i16 [[INDEX]]
; CHECK-NEXT: [[DOTI34:%.*]] = getelementptr i16, ptr [[DOTI3]], i16 [[INDEX]]
; CHECK-NEXT: ret void
;
bb:
%0 = load <4 x ptr>, ptr @vec
%index = load i16, ptr @index
%1 = getelementptr i16, <4 x ptr> %0, i16 %index
ret void
}
; Check that the scalarizer can handle vector GEPs with scalar pointer
; constant pointer
define <4 x ptr> @test3_constexpr() {
; CHECK-LABEL: @test3_constexpr(
; CHECK-NEXT: bb:
; CHECK-NEXT: ret <4 x ptr> getelementptr (i16, ptr @ptr, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
;
bb:
ret <4 x ptr> getelementptr (i16, ptr @ptr, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
}
define <4 x ptr> @test3_constbase(i16 %idx) {
; CHECK-LABEL: @test3_constbase(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[OFFSET:%.*]] = getelementptr [4 x i16], ptr @ptr, i16 0, i16 [[IDX:%.*]]
; CHECK-NEXT: [[GEP_I0:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 0
; CHECK-NEXT: [[GEP_I1:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 1
; CHECK-NEXT: [[GEP_I2:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 2
; CHECK-NEXT: [[GEP_I3:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 3
; CHECK-NEXT: [[GEP_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[GEP_I0]], i64 0
; CHECK-NEXT: [[GEP_UPTO1:%.*]] = insertelement <4 x ptr> [[GEP_UPTO0]], ptr [[GEP_I1]], i64 1
; CHECK-NEXT: [[GEP_UPTO2:%.*]] = insertelement <4 x ptr> [[GEP_UPTO1]], ptr [[GEP_I2]], i64 2
; CHECK-NEXT: [[GEP:%.*]] = insertelement <4 x ptr> [[GEP_UPTO2]], ptr [[GEP_I3]], i64 3
; CHECK-NEXT: ret <4 x ptr> [[GEP]]
;
bb:
%offset = getelementptr [4 x i16], ptr @ptr, i16 0, i16 %idx
%gep = getelementptr i16, ptr %offset, <4 x i16> <i16 0, i16 1, i16 2, i16 3>
ret <4 x ptr> %gep
}
; Constant pointer with a variable vector offset
define <4 x ptr> @test3_varoffset(<4 x i16> %offset) {
; CHECK-LABEL: @test3_varoffset(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[OFFSET_I0:%.*]] = extractelement <4 x i16> [[OFFSET:%.*]], i64 0
; CHECK-NEXT: [[GEP_I0:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I0]]
; CHECK-NEXT: [[OFFSET_I1:%.*]] = extractelement <4 x i16> [[OFFSET]], i64 1
; CHECK-NEXT: [[GEP_I1:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I1]]
; CHECK-NEXT: [[OFFSET_I2:%.*]] = extractelement <4 x i16> [[OFFSET]], i64 2
; CHECK-NEXT: [[GEP_I2:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I2]]
; CHECK-NEXT: [[OFFSET_I3:%.*]] = extractelement <4 x i16> [[OFFSET]], i64 3
; CHECK-NEXT: [[GEP_I3:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I3]]
; CHECK-NEXT: [[GEP_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[GEP_I0]], i64 0
; CHECK-NEXT: [[GEP_UPTO1:%.*]] = insertelement <4 x ptr> [[GEP_UPTO0]], ptr [[GEP_I1]], i64 1
; CHECK-NEXT: [[GEP_UPTO2:%.*]] = insertelement <4 x ptr> [[GEP_UPTO1]], ptr [[GEP_I2]], i64 2
; CHECK-NEXT: [[GEP:%.*]] = insertelement <4 x ptr> [[GEP_UPTO2]], ptr [[GEP_I3]], i64 3
; CHECK-NEXT: ret <4 x ptr> [[GEP]]
;
bb:
%gep = getelementptr i16, ptr @ptr, <4 x i16> %offset
ret <4 x ptr> %gep
}
; non-constant pointer
define void @test4() {
; CHECK-LABEL: @test4(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @ptrptr, align 8
; CHECK-NEXT: [[DOTI0:%.*]] = getelementptr i16, ptr [[TMP0]], i16 0
; CHECK-NEXT: [[DOTI1:%.*]] = getelementptr i16, ptr [[TMP0]], i16 1
; CHECK-NEXT: [[DOTI2:%.*]] = getelementptr i16, ptr [[TMP0]], i16 2
; CHECK-NEXT: [[DOTI3:%.*]] = getelementptr i16, ptr [[TMP0]], i16 3
; CHECK-NEXT: ret void
;
bb:
%0 = load ptr, ptr @ptrptr
%1 = getelementptr i16, ptr %0, <4 x i16> <i16 0, i16 1, i16 2, i16 3>
ret void
}
; constant index, inbounds
define void @test5() {
; CHECK-LABEL: @test5(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr @vec, align 32
; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 0
; CHECK-NEXT: [[DOTI01:%.*]] = getelementptr inbounds i16, ptr [[DOTI0]], i16 1
; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 1
; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr inbounds i16, ptr [[DOTI1]], i16 1
; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 2
; CHECK-NEXT: [[DOTI23:%.*]] = getelementptr inbounds i16, ptr [[DOTI2]], i16 1
; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 3
; CHECK-NEXT: [[DOTI34:%.*]] = getelementptr inbounds i16, ptr [[DOTI3]], i16 1
; CHECK-NEXT: ret void
;
bb:
%0 = load <4 x ptr>, ptr @vec
%1 = getelementptr inbounds i16, <4 x ptr> %0, i16 1
ret void
}