This addresses the following issue I opened: https://github.com/llvm/llvm-project/issues/118851. This change generalizes the Type Legalization mechanism that currently handles `v8[i/f/bf]16` upsizing to include loads _and_ stores of `v8i8` + `v16i8`, allowing all of the mentioned vectors to be lowered to ptx as vectors of `b32`. This extension also allows us to remove the DagCombine that only handled exactly `load v16i8`, thus centralizing all the upsizing logic into one place. Test changes include adding v8i8, v16i8, and v8i16 cases to load-store.ll, and updating the CHECKs for other tests to match the improved codegen.
40 lines
917 B
LLVM
40 lines
917 B
LLVM
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
|
|
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
|
|
|
; CHECK-LABEL: .visible .func foo1
|
|
; CHECK: st.v2.f32
|
|
define void @foo1(<2 x float> %val, ptr %ptr) {
|
|
store <2 x float> %val, ptr %ptr
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: .visible .func foo2
|
|
; CHECK: st.v4.f32
|
|
define void @foo2(<4 x float> %val, ptr %ptr) {
|
|
store <4 x float> %val, ptr %ptr
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: .visible .func foo3
|
|
; CHECK: st.v2.u32
|
|
define void @foo3(<2 x i32> %val, ptr %ptr) {
|
|
store <2 x i32> %val, ptr %ptr
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: .visible .func foo4
|
|
; CHECK: st.v4.u32
|
|
define void @foo4(<4 x i32> %val, ptr %ptr) {
|
|
store <4 x i32> %val, ptr %ptr
|
|
ret void
|
|
}
|
|
|
|
; CHECK-LABEL: .visible .func v16i8
|
|
define void @v16i8(ptr %a, ptr %b) {
|
|
; CHECK: ld.v4.b32
|
|
; CHECK: st.v4.b32
|
|
%v = load <16 x i8>, ptr %a
|
|
store <16 x i8> %v, ptr %b
|
|
ret void
|
|
}
|