[HLSL][SPIR-V] Add hlsl_private address space for SPIR-V (#133464)

This is an alternative to
https://github.com/llvm/llvm-project/pull/122103

In SPIR-V, private global variables have the Private storage class. This
PR adds a new address space which allows frontend to emit variable with
this storage class when targeting this backend.

This is covered in this proposal: llvm/wg-hlsl@4c9e11a

This PR will cause addrspacecast to show up in several cases, like class
member functions or assignment. Those will have to be handled in the
backend later on, particularly to fixup pointer storage classes in some
functions.

Before this change, global variable were emitted with the 'Function'
storage class, which was wrong.
This commit is contained in:
Nathan Gauër
2025-04-10 10:55:10 +02:00
committed by GitHub
parent 344a491dad
commit a625bc60e2
26 changed files with 186 additions and 28 deletions

View File

@@ -59,6 +59,7 @@ enum class LangAS : unsigned {
// HLSL specific address spaces.
hlsl_groupshared,
hlsl_constant,
hlsl_private,
// Wasm specific address spaces.
wasm_funcref,

View File

@@ -4328,6 +4328,7 @@ public:
NamedDecl *findLocallyScopedExternCDecl(DeclarationName Name);
void deduceOpenCLAddressSpace(ValueDecl *decl);
void deduceHLSLAddressSpace(VarDecl *decl);
/// Adjust the \c DeclContext for a function or variable that might be a
/// function-local external declaration.

View File

@@ -154,6 +154,8 @@ public:
bool TransformInitList(const InitializedEntity &Entity,
const InitializationKind &Kind, InitListExpr *Init);
void deduceAddressSpace(VarDecl *Decl);
private:
// HLSL resource type attributes need to be processed all at once.
// This is a list to collect them.

View File

@@ -94,6 +94,10 @@ bool Qualifiers::isTargetAddressSpaceSupersetOf(LangAS A, LangAS B,
(A == LangAS::Default &&
(B == LangAS::cuda_constant || B == LangAS::cuda_device ||
B == LangAS::cuda_shared)) ||
// `this` overloading depending on address space is not ready,
// so this is a hack to allow generating addrspacecasts.
// IR legalization will be required when this address space is used.
(A == LangAS::Default && B == LangAS::hlsl_private) ||
// Conversions from target specific address spaces may be legal
// depending on the target information.
Ctx.getTargetInfo().isAddressSpaceSupersetOf(A, B);

View File

@@ -2581,6 +2581,8 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) {
return "groupshared";
case LangAS::hlsl_constant:
return "hlsl_constant";
case LangAS::hlsl_private:
return "hlsl_private";
case LangAS::wasm_funcref:
return "__funcref";
default:

View File

@@ -47,6 +47,8 @@ static const LangASMap FakeAddrSpaceMap = {
11, // ptr32_uptr
12, // ptr64
13, // hlsl_groupshared
14, // hlsl_constant
15, // hlsl_private
20, // wasm_funcref
};

View File

@@ -45,6 +45,7 @@ static const unsigned ARM64AddrSpaceMap[] = {
static_cast<unsigned>(AArch64AddrSpace::ptr64),
0, // hlsl_groupshared
0, // hlsl_constant
0, // hlsl_private
// Wasm address space values for this target are dummy values,
// as it is only enabled for Wasm targets.
20, // wasm_funcref

View File

@@ -60,6 +60,9 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
// FIXME(pr/122103): hlsl_private -> PRIVATE is wrong, but at least this
// will break loudly.
llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private
};
const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
@@ -85,6 +88,7 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private
};
} // namespace targets
} // namespace clang

View File

@@ -43,6 +43,7 @@ static const unsigned DirectXAddrSpaceMap[] = {
0, // ptr64
3, // hlsl_groupshared
2, // hlsl_constant
0, // hlsl_private
// Wasm address space values for this target are dummy values,
// as it is only enabled for Wasm targets.
20, // wasm_funcref

View File

@@ -47,6 +47,7 @@ static const unsigned NVPTXAddrSpaceMap[] = {
0, // ptr64
0, // hlsl_groupshared
0, // hlsl_constant
0, // hlsl_private
// Wasm address space values for this target are dummy values,
// as it is only enabled for Wasm targets.
20, // wasm_funcref

View File

@@ -38,16 +38,17 @@ static const unsigned SPIRDefIsPrivMap[] = {
0, // cuda_constant
0, // cuda_shared
// SYCL address space values for this map are dummy
0, // sycl_global
0, // sycl_global_device
0, // sycl_global_host
0, // sycl_local
0, // sycl_private
0, // ptr32_sptr
0, // ptr32_uptr
0, // ptr64
0, // hlsl_groupshared
2, // hlsl_constant
0, // sycl_global
0, // sycl_global_device
0, // sycl_global_host
0, // sycl_local
0, // sycl_private
0, // ptr32_sptr
0, // ptr32_uptr
0, // ptr64
0, // hlsl_groupshared
2, // hlsl_constant
10, // hlsl_private
// Wasm address space values for this target are dummy values,
// as it is only enabled for Wasm targets.
20, // wasm_funcref
@@ -70,18 +71,19 @@ static const unsigned SPIRDefIsGenMap[] = {
// cuda_constant pointer can be casted to default/"flat" pointer, but in
// SPIR-V casts between constant and generic pointers are not allowed. For
// this reason cuda_constant is mapped to SPIR-V CrossWorkgroup.
1, // cuda_constant
3, // cuda_shared
1, // sycl_global
5, // sycl_global_device
6, // sycl_global_host
3, // sycl_local
0, // sycl_private
0, // ptr32_sptr
0, // ptr32_uptr
0, // ptr64
0, // hlsl_groupshared
0, // hlsl_constant
1, // cuda_constant
3, // cuda_shared
1, // sycl_global
5, // sycl_global_device
6, // sycl_global_host
3, // sycl_local
0, // sycl_private
0, // ptr32_sptr
0, // ptr32_uptr
0, // ptr64
0, // hlsl_groupshared
0, // hlsl_constant
10, // hlsl_private
// Wasm address space values for this target are dummy values,
// as it is only enabled for Wasm targets.
20, // wasm_funcref
@@ -315,7 +317,7 @@ public:
// SPIR-V IDs are represented with a single 32-bit word.
SizeType = TargetInfo::UnsignedInt;
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
"v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
"v256:256-v512:512-v1024:1024-n8:16:32:64-G10");
}
llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const override;

View File

@@ -43,6 +43,7 @@ static const unsigned ZOSAddressMap[] = {
0, // ptr64
0, // hlsl_groupshared
0, // hlsl_constant
0, // hlsl_private
0 // wasm_funcref
};

View File

@@ -52,6 +52,7 @@ static const unsigned TCEOpenCLAddrSpaceMap[] = {
0, // ptr64
0, // hlsl_groupshared
0, // hlsl_constant
0, // hlsl_private
// Wasm address space values for this target are dummy values,
// as it is only enabled for Wasm targets.
20, // wasm_funcref

View File

@@ -43,6 +43,7 @@ static const unsigned WebAssemblyAddrSpaceMap[] = {
0, // ptr64
0, // hlsl_groupshared
0, // hlsl_constant
0, // hlsl_private
20, // wasm_funcref
};

View File

@@ -47,6 +47,7 @@ static const unsigned X86AddrSpaceMap[] = {
272, // ptr64
0, // hlsl_groupshared
0, // hlsl_constant
0, // hlsl_private
// Wasm address space values for this target are dummy values,
// as it is only enabled for Wasm targets.
20, // wasm_funcref

View File

@@ -1157,8 +1157,22 @@ void CodeGenFunction::GenerateCXXGlobalCleanUpFunc(
CGM.getCXXABI().useSinitAndSterm() &&
"Arg could not be nullptr unless using sinit and sterm functions.");
CI = Builder.CreateCall(CalleeTy, Callee);
} else
} else {
// If the object lives in a different address space, the `this` pointer
// address space won't match the dtor `this` param. An addrspacecast is
// required.
assert(Arg->getType()->isPointerTy());
assert(CalleeTy->getParamType(0)->isPointerTy());
unsigned ActualAddrSpace = Arg->getType()->getPointerAddressSpace();
unsigned ExpectedAddrSpace =
CalleeTy->getParamType(0)->getPointerAddressSpace();
if (ActualAddrSpace != ExpectedAddrSpace) {
llvm::PointerType *PTy =
llvm::PointerType::get(getLLVMContext(), ExpectedAddrSpace);
Arg = llvm::ConstantExpr::getAddrSpaceCast(Arg, PTy);
}
CI = Builder.CreateCall(CalleeTy, Callee, Arg);
}
// Make sure the call and the callee agree on calling convention.
if (llvm::Function *F = dyn_cast<llvm::Function>(Callee))

View File

@@ -7977,6 +7977,7 @@ NamedDecl *Sema::ActOnVariableDeclarator(
if (getLangOpts().HLSL)
HLSL().ActOnVariableDeclarator(NewVD);
if (getLangOpts().OpenACC)
OpenACC().ActOnVariableDeclarator(NewVD);
@@ -13131,6 +13132,9 @@ bool Sema::DeduceVariableDeclarationType(VarDecl *VDecl, bool DirectInit,
if (getLangOpts().OpenCL)
deduceOpenCLAddressSpace(VDecl);
if (getLangOpts().HLSL)
HLSL().deduceAddressSpace(VDecl);
// If this is a redeclaration, check that the type we just deduced matches
// the previously declared type.
if (VarDecl *Old = VDecl->getPreviousDecl()) {

View File

@@ -3138,6 +3138,32 @@ static bool IsDefaultBufferConstantDecl(VarDecl *VD) {
!isInvalidConstantBufferLeafElementType(QT.getTypePtr());
}
void SemaHLSL::deduceAddressSpace(VarDecl *Decl) {
// The variable already has an address space (groupshared for ex).
if (Decl->getType().hasAddressSpace())
return;
if (Decl->getType()->isDependentType())
return;
QualType Type = Decl->getType();
if (Type->isSamplerT() || Type->isVoidType())
return;
// Resource handles.
if (isResourceRecordTypeOrArrayOf(Type->getUnqualifiedDesugaredType()))
return;
// Only static globals belong to the Private address space.
// Non-static globals belongs to the cbuffer.
if (Decl->getStorageClass() != SC_Static && !Decl->isStaticDataMember())
return;
LangAS ImplAS = LangAS::hlsl_private;
Type = SemaRef.getASTContext().getAddrSpaceQualType(Type, ImplAS);
Decl->setType(Type);
}
void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) {
if (VD->hasGlobalStorage()) {
// make sure the declaration has a complete type
@@ -3146,6 +3172,7 @@ void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) {
SemaRef.getASTContext().getBaseElementType(VD->getType()),
diag::err_typecheck_decl_incomplete_type)) {
VD->setInvalidDecl();
deduceAddressSpace(VD);
return;
}
@@ -3177,6 +3204,8 @@ void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) {
// process explicit bindings
processExplicitBindingsOnDecl(VD);
}
deduceAddressSpace(VD);
}
// Walks though the global variable declaration, collects all resource binding

View File

@@ -149,7 +149,7 @@ _Static_assert(__builtin_hlsl_is_scalarized_layout_compatible(TwoFloats, __cblay
cbuffer CB {
// CHECK: FunctionDecl {{.*}} f 'void ()'
void f() {}
// CHECK: VarDecl {{.*}} SV 'float' static
// CHECK: VarDecl {{.*}} SV 'hlsl_private float' static
static float SV;
// CHECK: VarDecl {{.*}} s7 'EmptyStruct' callinit
EmptyStruct s7;

View File

@@ -0,0 +1,34 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -ast-dump -o - %s | FileCheck %s
// CHECK: VarDecl {{.*}} global_scalar 'hlsl_private int' static cinit
static int global_scalar = 0;
// CHECK: VarDecl {{.*}} global_buffer 'RWBuffer<float>':'hlsl::RWBuffer<float>' static callinit
RWBuffer<float> global_buffer;
class A {
// CHECK: VarDecl {{.*}} a 'hlsl_private int' static
static int a;
};
class B {
// CHECK: VarDecl {{.*}} b 'hlsl_private int' static
static int b;
};
// CHECK: VarDecl {{.*}} b 'hlsl_private int' cinit
int B::b = 0;
export void foo() {
// CHECK: VarDecl {{.*}} local_buffer 'RWBuffer<float>':'hlsl::RWBuffer<float>' cinit
RWBuffer<float> local_buffer = global_buffer;
// CHECK: VarDecl {{.*}} static_local_buffer 'RWBuffer<float>':'hlsl::RWBuffer<float>' static cinit
static RWBuffer<float> static_local_buffer = global_buffer;
// CHECK: VarDecl {{.*}} local_scalar 'int' cinit
int local_scalar = global_scalar;
// CHECK: VarDecl {{.*}} static_scalar 'hlsl_private int' static cinit
static int static_scalar = 0;
}

View File

@@ -87,7 +87,7 @@ void main(unsigned GI : SV_GroupIndex) {
// NOINLINE-SPIRV: define internal spir_func void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]]
// NOINLINE-SPIRV-NEXT: entry:
// NOINLINE-SPIRV-NEXT: %0 = call token @llvm.experimental.convergence.entry()
// NOINLINE-SPIRV-NEXT: call spir_func void @_ZN4TailD1Ev(ptr @_ZZ3WagvE1T) [ "convergencectrl"(token %0) ]
// NOINLINE-SPIRV-NEXT: call spir_func void @_ZN4TailD1Ev(ptr addrspacecast (ptr addrspace(10) @_ZZ3WagvE1T to ptr)) [ "convergencectrl"(token %0) ]
// NOINLINE-SPIRV-NEXT: call spir_func void @_ZN6PupperD1Ev(ptr @GlobalPup) [ "convergencectrl"(token %0) ]
// NOINLINE-SPIRV-NEXT: ret void

View File

@@ -20,6 +20,7 @@ cbuffer B {
// CHECK: define {{.*}} float @_Z3foov() #0 {
// CHECK: load float, ptr addrspace(2) @a, align 4
// CHECK: load float, ptr @_ZL1b, align 4
extern float bar() {
return foo();

View File

@@ -0,0 +1,30 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,DXIL
// RUN: %clang_cc1 -triple spirv-pc-vulkan1.3-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,SPIRV
struct S {
static int Value;
};
int S::Value = 1;
// DXIL: @_ZN1S5ValueE = global i32 1, align 4
// SPIRV: @_ZN1S5ValueE = addrspace(10) global i32 1, align 4
[shader("compute")]
[numthreads(1,1,1)]
void main() {
S s;
int value1, value2;
// CHECK: %s = alloca %struct.S, align 1
// CHECK: %value1 = alloca i32, align 4
// CHECK: %value2 = alloca i32, align 4
// DXIL: [[tmp:%.*]] = load i32, ptr @_ZN1S5ValueE, align 4
// SPIRV: [[tmp:%.*]] = load i32, ptr addrspace(10) @_ZN1S5ValueE, align 4
// CHECK: store i32 [[tmp]], ptr %value1, align 4
value1 = S::Value;
// DXIL: [[tmp:%.*]] = load i32, ptr @_ZN1S5ValueE, align 4
// SPIRV: [[tmp:%.*]] = load i32, ptr addrspace(10) @_ZN1S5ValueE, align 4
// CHECK: store i32 [[tmp]], ptr %value2, align 4
value2 = s.Value;
}

View File

@@ -0,0 +1,18 @@
// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan1.3-library %s \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=SPIRV
// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=DXIL
// DXIL: @_ZL1g = internal global float 0.000000e+00, align 4
// SPIRV: @_ZL1g = internal addrspace(10) global float 0.000000e+00, align 4
static float g = 0;
[numthreads(8,8,1)]
void main() {
// DXIL: {{.*}} = load float, ptr @_ZL1g, align 4
// SPIRV: {{.*}} = load float, ptr addrspace(10) @_ZL1g, align 4
float l = g;
}

View File

@@ -43,7 +43,7 @@ void neg() {
template <long int I>
void tooBig() {
__attribute__((address_space(I))) int *bounds; // expected-error {{address space is larger than the maximum supported (8388585)}}
__attribute__((address_space(I))) int *bounds; // expected-error {{address space is larger than the maximum supported (8388584)}}
}
template <long int I>
@@ -101,7 +101,7 @@ int main() {
car<1, 2, 3>(); // expected-note {{in instantiation of function template specialization 'car<1, 2, 3>' requested here}}
HasASTemplateFields<1> HASTF;
neg<-1>(); // expected-note {{in instantiation of function template specialization 'neg<-1>' requested here}}
correct<0x7FFFE9>();
correct<0x7FFFE8>();
tooBig<8388650>(); // expected-note {{in instantiation of function template specialization 'tooBig<8388650L>' requested here}}
__attribute__((address_space(1))) char *x;

View File

@@ -70,6 +70,9 @@ static std::string computeDataLayout(const Triple &TT) {
if (Arch == Triple::spirv32)
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
"v256:256-v512:512-v1024:1024-n8:16:32:64-G1";
if (Arch == Triple::spirv)
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-"
"v512:512-v1024:1024-n8:16:32:64-G10";
if (TT.getVendor() == Triple::VendorType::AMD &&
TT.getOS() == Triple::OSType::AMDHSA)
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-"