Scalarization can expose optimization opportunities for the individual elements of a vector, and can therefore be beneficial on targets like GPUs that tend to operate on scalars anyway. However, notably with 16-bit operations it is often beneficial to keep <2 x i16 / half> vectors around since there are packed instructions for those. Refactor the code to operate on "fragments" of split vectors. The fragments are usually scalars, but may themselves be smaller vectors when the scalarizer-min-bits option is used. If the split is uneven, the last fragment is a shorter remainder. This is almost NFC when the new option is unused, but it happens to clean up some code in the fully scalarized case as well. Differential Revision: https://reviews.llvm.org/D149842
153 lines
6.4 KiB
LLVM
153 lines
6.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -passes='function(scalarizer)' %s | FileCheck %s
|
|
|
|
; Check that the scalarizer can handle vector GEPs with scalar indices
|
|
|
|
@vec = global <4 x ptr> <ptr null, ptr null, ptr null, ptr null>
|
|
@index = global i16 1
|
|
@ptr = global [4 x i16] [i16 1, i16 2, i16 3, i16 4]
|
|
@ptrptr = global ptr null
|
|
|
|
; constant index
|
|
define void @test1() {
|
|
; CHECK-LABEL: @test1(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr @vec, align 32
|
|
; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 0
|
|
; CHECK-NEXT: [[DOTI01:%.*]] = getelementptr i16, ptr [[DOTI0]], i16 1
|
|
; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 1
|
|
; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i16, ptr [[DOTI1]], i16 1
|
|
; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 2
|
|
; CHECK-NEXT: [[DOTI23:%.*]] = getelementptr i16, ptr [[DOTI2]], i16 1
|
|
; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 3
|
|
; CHECK-NEXT: [[DOTI34:%.*]] = getelementptr i16, ptr [[DOTI3]], i16 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb:
|
|
%0 = load <4 x ptr>, ptr @vec
|
|
%1 = getelementptr i16, <4 x ptr> %0, i16 1
|
|
|
|
ret void
|
|
}
|
|
|
|
; non-constant index
|
|
define void @test2() {
|
|
; CHECK-LABEL: @test2(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr @vec, align 32
|
|
; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 0
|
|
; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 1
|
|
; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 2
|
|
; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 3
|
|
; CHECK-NEXT: [[INDEX:%.*]] = load i16, ptr @index, align 2
|
|
; CHECK-NEXT: [[DOTI01:%.*]] = getelementptr i16, ptr [[DOTI0]], i16 [[INDEX]]
|
|
; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i16, ptr [[DOTI1]], i16 [[INDEX]]
|
|
; CHECK-NEXT: [[DOTI23:%.*]] = getelementptr i16, ptr [[DOTI2]], i16 [[INDEX]]
|
|
; CHECK-NEXT: [[DOTI34:%.*]] = getelementptr i16, ptr [[DOTI3]], i16 [[INDEX]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb:
|
|
%0 = load <4 x ptr>, ptr @vec
|
|
%index = load i16, ptr @index
|
|
%1 = getelementptr i16, <4 x ptr> %0, i16 %index
|
|
|
|
ret void
|
|
}
|
|
|
|
; Check that the scalarizer can handle vector GEPs with scalar pointer
|
|
|
|
; constant pointer
|
|
define <4 x ptr> @test3_constexpr() {
|
|
; CHECK-LABEL: @test3_constexpr(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: ret <4 x ptr> getelementptr (i16, ptr @ptr, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
|
|
;
|
|
bb:
|
|
ret <4 x ptr> getelementptr (i16, ptr @ptr, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
|
|
}
|
|
|
|
|
|
define <4 x ptr> @test3_constbase(i16 %idx) {
|
|
; CHECK-LABEL: @test3_constbase(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[OFFSET:%.*]] = getelementptr [4 x i16], ptr @ptr, i16 0, i16 [[IDX:%.*]]
|
|
; CHECK-NEXT: [[GEP_I0:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 0
|
|
; CHECK-NEXT: [[GEP_I1:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 1
|
|
; CHECK-NEXT: [[GEP_I2:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 2
|
|
; CHECK-NEXT: [[GEP_I3:%.*]] = getelementptr i16, ptr [[OFFSET]], i16 3
|
|
; CHECK-NEXT: [[GEP_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[GEP_I0]], i64 0
|
|
; CHECK-NEXT: [[GEP_UPTO1:%.*]] = insertelement <4 x ptr> [[GEP_UPTO0]], ptr [[GEP_I1]], i64 1
|
|
; CHECK-NEXT: [[GEP_UPTO2:%.*]] = insertelement <4 x ptr> [[GEP_UPTO1]], ptr [[GEP_I2]], i64 2
|
|
; CHECK-NEXT: [[GEP:%.*]] = insertelement <4 x ptr> [[GEP_UPTO2]], ptr [[GEP_I3]], i64 3
|
|
; CHECK-NEXT: ret <4 x ptr> [[GEP]]
|
|
;
|
|
bb:
|
|
%offset = getelementptr [4 x i16], ptr @ptr, i16 0, i16 %idx
|
|
%gep = getelementptr i16, ptr %offset, <4 x i16> <i16 0, i16 1, i16 2, i16 3>
|
|
ret <4 x ptr> %gep
|
|
}
|
|
|
|
; Constant pointer with a variable vector offset
|
|
define <4 x ptr> @test3_varoffset(<4 x i16> %offset) {
|
|
; CHECK-LABEL: @test3_varoffset(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[OFFSET_I0:%.*]] = extractelement <4 x i16> [[OFFSET:%.*]], i64 0
|
|
; CHECK-NEXT: [[GEP_I0:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I0]]
|
|
; CHECK-NEXT: [[OFFSET_I1:%.*]] = extractelement <4 x i16> [[OFFSET]], i64 1
|
|
; CHECK-NEXT: [[GEP_I1:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I1]]
|
|
; CHECK-NEXT: [[OFFSET_I2:%.*]] = extractelement <4 x i16> [[OFFSET]], i64 2
|
|
; CHECK-NEXT: [[GEP_I2:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I2]]
|
|
; CHECK-NEXT: [[OFFSET_I3:%.*]] = extractelement <4 x i16> [[OFFSET]], i64 3
|
|
; CHECK-NEXT: [[GEP_I3:%.*]] = getelementptr i16, ptr @ptr, i16 [[OFFSET_I3]]
|
|
; CHECK-NEXT: [[GEP_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[GEP_I0]], i64 0
|
|
; CHECK-NEXT: [[GEP_UPTO1:%.*]] = insertelement <4 x ptr> [[GEP_UPTO0]], ptr [[GEP_I1]], i64 1
|
|
; CHECK-NEXT: [[GEP_UPTO2:%.*]] = insertelement <4 x ptr> [[GEP_UPTO1]], ptr [[GEP_I2]], i64 2
|
|
; CHECK-NEXT: [[GEP:%.*]] = insertelement <4 x ptr> [[GEP_UPTO2]], ptr [[GEP_I3]], i64 3
|
|
; CHECK-NEXT: ret <4 x ptr> [[GEP]]
|
|
;
|
|
bb:
|
|
%gep = getelementptr i16, ptr @ptr, <4 x i16> %offset
|
|
ret <4 x ptr> %gep
|
|
}
|
|
|
|
; non-constant pointer
|
|
define void @test4() {
|
|
; CHECK-LABEL: @test4(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @ptrptr, align 8
|
|
; CHECK-NEXT: [[DOTI0:%.*]] = getelementptr i16, ptr [[TMP0]], i16 0
|
|
; CHECK-NEXT: [[DOTI1:%.*]] = getelementptr i16, ptr [[TMP0]], i16 1
|
|
; CHECK-NEXT: [[DOTI2:%.*]] = getelementptr i16, ptr [[TMP0]], i16 2
|
|
; CHECK-NEXT: [[DOTI3:%.*]] = getelementptr i16, ptr [[TMP0]], i16 3
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb:
|
|
%0 = load ptr, ptr @ptrptr
|
|
%1 = getelementptr i16, ptr %0, <4 x i16> <i16 0, i16 1, i16 2, i16 3>
|
|
|
|
ret void
|
|
}
|
|
|
|
; constant index, inbounds
|
|
define void @test5() {
|
|
; CHECK-LABEL: @test5(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x ptr>, ptr @vec, align 32
|
|
; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 0
|
|
; CHECK-NEXT: [[DOTI01:%.*]] = getelementptr inbounds i16, ptr [[DOTI0]], i16 1
|
|
; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 1
|
|
; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr inbounds i16, ptr [[DOTI1]], i16 1
|
|
; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 2
|
|
; CHECK-NEXT: [[DOTI23:%.*]] = getelementptr inbounds i16, ptr [[DOTI2]], i16 1
|
|
; CHECK-NEXT: [[DOTI3:%.*]] = extractelement <4 x ptr> [[TMP0]], i64 3
|
|
; CHECK-NEXT: [[DOTI34:%.*]] = getelementptr inbounds i16, ptr [[DOTI3]], i16 1
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb:
|
|
%0 = load <4 x ptr>, ptr @vec
|
|
%1 = getelementptr inbounds i16, <4 x ptr> %0, i16 1
|
|
|
|
ret void
|
|
}
|
|
|