[CodeGen] Add TBAA struct path info for array members (#137719)

This enables the LLVM optimizer to view accesses to distinct struct
members as independent, also for array members. For example, the
following two stores no longer alias:

    struct S { int a[10]; int b; };
    void test(S *p, int i) {
      p->a[i] = ...;
      p->b = ...;
    }

Array members were already added to TBAA struct type nodes in commit
57493e29. Here, we extend a path tag for an array subscript expression.
This commit is contained in:
Bruno De Fraine
2025-06-05 13:37:18 +02:00
committed by GitHub
parent 1bf1e6e40e
commit c3b8a15eab
3 changed files with 74 additions and 7 deletions

View File

@@ -4595,7 +4595,32 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
E->getType(), !getLangOpts().PointerOverflowDefined, SignedIndices,
E->getExprLoc(), &arrayType, E->getBase());
EltBaseInfo = ArrayLV.getBaseInfo();
EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
if (!CGM.getCodeGenOpts().NewStructPathTBAA) {
// Since CodeGenTBAA::getTypeInfoHelper only handles array types for
// new struct path TBAA, we must a use a plain access.
EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
} else if (ArrayLV.getTBAAInfo().isMayAlias()) {
EltTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
} else if (ArrayLV.getTBAAInfo().isIncomplete()) {
// The array element is complete, even if the array is not.
EltTBAAInfo = CGM.getTBAAAccessInfo(E->getType());
} else {
// The TBAA access info from the array (base) lvalue is ordinary. We will
// adapt it to create access info for the element.
EltTBAAInfo = ArrayLV.getTBAAInfo();
// We retain the TBAA struct path (BaseType and Offset members) from the
// array. In the TBAA representation, we map any array access to the
// element at index 0, as the index is generally a runtime value. This
// element has the same offset in the base type as the array itself.
// If the array lvalue had no base type, there is no point trying to
// generate one, since an array itself is not a valid base type.
// We also retain the access type from the base lvalue, but the access
// size must be updated to the size of an individual element.
EltTBAAInfo.Size =
getContext().getTypeSizeInChars(E->getType()).getQuantity();
}
} else {
// The base must be a pointer; emit it with an estimate of its alignment.
Address BaseAddr =

View File

@@ -130,6 +130,13 @@ static bool TypeHasMayAlias(QualType QTy) {
return true;
QTy = TT->desugar();
}
// Also consider an array type as may_alias when its element type (at
// any level) is marked as such.
if (auto *ArrayTy = QTy->getAsArrayTypeUnsafe())
if (TypeHasMayAlias(ArrayTy->getElementType()))
return true;
return false;
}

View File

@@ -1,6 +1,6 @@
// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \
// RUN: %clang_cc1 -triple x86_64-linux -O1 %s \
// RUN: -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \
// RUN: %clang_cc1 -triple x86_64-linux -O1 %s \
// RUN: -new-struct-path-tbaa -emit-llvm -o - | \
// RUN: FileCheck -check-prefix=CHECK-NEW %s
//
@@ -10,6 +10,12 @@
struct A { int i; };
struct B { A a[1]; };
struct C { int i; int x[3]; };
struct D { int n; int arr[]; }; // flexible array member
extern int AA[]; // incomplete array type
typedef int __attribute__((may_alias)) aliasing_int;
typedef int __attribute__((may_alias)) aliasing_array[10];
struct E { aliasing_int x[4]; aliasing_array y; };
int foo(B *b) {
// CHECK-LABEL: _Z3fooP1B
@@ -28,16 +34,42 @@ int bar(C *c) {
int bar2(C *c) {
// CHECK-NEW-LABEL: _Z4bar2P1C
// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_C_x:!.*]]
return c->x[2];
}
int bar3(C *c, int j) {
// CHECK-NEW-LABEL: _Z4bar3P1Ci
// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_C_x]]
return c->x[j];
}
int bar4(D *d) {
// CHECK-NEW-LABEL: _Z4bar4P1D
// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
return d->arr[d->n];
}
int bar5(int j) {
// CHECK-NEW-LABEL: _Z4bar5i
// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
return AA[2] + AA[j];
}
int bar6(E *e, int j) {
// CHECK-NEW-LABEL: _Z4bar6P1Ei
// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_E_x:!.*]]
return e->x[j];
}
int bar7(E *e, int j) {
// CHECK-NEW-LABEL: _Z4bar7P1Ei
// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_E_y:!.*]]
return e->y[j];
}
// CHECK-DAG: [[TAG_A_i]] = !{[[TYPE_A:!.*]], [[TYPE_int:!.*]], i64 0}
// CHECK-DAG: [[TYPE_A]] = !{!"_ZTS1A", !{{.*}}, i64 0}
// CHECK-DAG: [[TYPE_int]] = !{!"int", !{{.*}}, i64 0}
@@ -45,8 +77,11 @@ int bar3(C *c, int j) {
// CHECK-NEW-DAG: [[TYPE_char:!.*]] = !{{{.*}}, i64 1, !"omnipotent char"}
// CHECK-NEW-DAG: [[TYPE_int:!.*]] = !{[[TYPE_char]], i64 4, !"int"}
// CHECK-NEW-DAG: [[TAG_int]] = !{[[TYPE_int]], [[TYPE_int]], i64 0, i64 4}
// CHECK-NEW-DAG: [[TYPE_pointer:!.*]] = !{[[TYPE_char]], i64 8, !"any pointer"}
// CHECK-NEW-DAG: [[TYPE_A:!.*]] = !{[[TYPE_char]], i64 4, !"_ZTS1A", [[TYPE_int]], i64 0, i64 4}
// CHECK-NEW-DAG: [[TAG_A_i]] = !{[[TYPE_A]], [[TYPE_int]], i64 0, i64 4}
// CHECK-NEW-DAG: [[TYPE_C:!.*]] = !{[[TYPE_char]], i64 16, !"_ZTS1C", [[TYPE_int]], i64 0, i64 4, [[TYPE_int]], i64 4, i64 12}
// CHECK-NEW-DAG: [[TAG_C_i]] = !{[[TYPE_C:!.*]], [[TYPE_int:!.*]], i64 0, i64 4}
// CHECK-NEW-DAG: [[TAG_C_i]] = !{[[TYPE_C]], [[TYPE_int]], i64 0, i64 4}
// CHECK-NEW-DAG: [[TAG_C_x]] = !{[[TYPE_C]], [[TYPE_int]], i64 4, i64 4}
// CHECK-NEW-DAG: [[TYPE_E:!.*]] = !{[[TYPE_char]], i64 56, !"_ZTS1E", [[TYPE_char]], i64 0, i64 16, [[TYPE_char]], i64 16, i64 40}
// CHECK-NEW-DAG: [[TAG_E_x]] = !{[[TYPE_E]], [[TYPE_char]], i64 0, i64 4}
// CHECK-NEW-DAG: [[TAG_E_y]] = !{[[TYPE_E]], [[TYPE_char]], i64 16, i64 4}