Files
clang-p2996/llvm/test/Transforms/Attributor/ArgumentPromotion/array.ll
Johannes Doerfert bf789b1957 [Attributor] Replace AAValueSimplify with AAPotentialValues
For the longest time we used `AAValueSimplify` and
`genericValueTraversal` to determine "potential values". This was
problematic for many reasons:
- We recomputed the result a lot as there was no caching for the 9
  locations calling `genericValueTraversal`.
- We added the idea of "intra" vs. "inter" procedural simplification
  only as an afterthought. `genericValueTraversal` did offer an option
  but `AAValueSimplify` did not. Thus, we might end up with "too much"
  simplification in certain situations and then gave up on it.
- Because `genericValueTraversal` was not a real `AA` we ended up with
  problems like the infinite recursion bug (#54981) as well as code
  duplication.

This patch introduces `AAPotentialValues` and replaces the
`AAValueSimplify` uses with it. `genericValueTraversal` is folded into
`AAPotentialValues` as are the instruction simplifications performed in
`AAValueSimplify` before. We further distinguish "intra" and "inter"
procedural simplification now.

`AAValueSimplify` was not deleted as we haven't ported the
re-materialization of instructions yet. There are other differences over
the former handling, e.g., we may not fold trivially foldable
instructions right now, e.g., `add i32 1, 1` is not folded to `i32 2`
but if an operand would be simplified to `i32 1` we would fold it still.

We are also even more aware of function/SCC boundaries in CGSCC passes,
which is good even if some tests look like they regress.

Fixes: https://github.com/llvm/llvm-project/issues/54981

Note: A previous version was flawed and consequently reverted in
      6555558a80.
2022-07-19 16:24:42 -05:00

70 lines
4.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
;
; FIXME: The GEP + BC + GEP solution we create is not great but correct.
declare void @use(i32* nocapture readonly %arg)
define void @caller() {
; IS________OPM-LABEL: define {{[^@]+}}@caller() {
; IS________OPM-NEXT: entry:
; IS________OPM-NEXT: [[LEFT:%.*]] = alloca [3 x i32], align 4
; IS________OPM-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[LEFT]], i64 0, i64 0
; IS________OPM-NEXT: call void @callee(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(12) [[ARRAYDECAY]])
; IS________OPM-NEXT: ret void
;
; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller() {
; IS__TUNIT_NPM-NEXT: entry:
; IS__TUNIT_NPM-NEXT: [[LEFT:%.*]] = alloca [3 x i32], align 4
; IS__TUNIT_NPM-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[LEFT]], i64 0, i64 0
; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYDECAY]] to [3 x i32]*
; IS__TUNIT_NPM-NEXT: [[DOTCAST:%.*]] = bitcast [3 x i32]* [[TMP0]] to i32*
; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAST]], align 4
; IS__TUNIT_NPM-NEXT: [[DOT0_1:%.*]] = getelementptr [3 x i32], [3 x i32]* [[TMP0]], i64 0, i64 1
; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOT0_1]], align 4
; IS__TUNIT_NPM-NEXT: [[DOT0_2:%.*]] = getelementptr [3 x i32], [3 x i32]* [[TMP0]], i64 0, i64 2
; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOT0_2]], align 4
; IS__TUNIT_NPM-NEXT: call void @callee(i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]])
; IS__TUNIT_NPM-NEXT: ret void
;
; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller() {
; IS__CGSCC_NPM-NEXT: entry:
; IS__CGSCC_NPM-NEXT: call void @callee(i32 undef, i32 undef, i32 undef)
; IS__CGSCC_NPM-NEXT: ret void
;
entry:
%left = alloca [3 x i32], align 4
%arraydecay = getelementptr inbounds [3 x i32], [3 x i32]* %left, i64 0, i64 0
call void @callee(i32* %arraydecay)
ret void
}
define internal void @callee(i32* noalias %arg) {
; IS________OPM-LABEL: define {{[^@]+}}@callee
; IS________OPM-SAME: (i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(12) [[ARG:%.*]]) {
; IS________OPM-NEXT: entry:
; IS________OPM-NEXT: call void @use(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(12) [[ARG]])
; IS________OPM-NEXT: ret void
;
; IS________NPM-LABEL: define {{[^@]+}}@callee
; IS________NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) {
; IS________NPM-NEXT: entry:
; IS________NPM-NEXT: [[ARG_PRIV:%.*]] = alloca [3 x i32], align 4
; IS________NPM-NEXT: [[ARG_PRIV_CAST:%.*]] = bitcast [3 x i32]* [[ARG_PRIV]] to i32*
; IS________NPM-NEXT: store i32 [[TMP0]], i32* [[ARG_PRIV_CAST]], align 4
; IS________NPM-NEXT: [[ARG_PRIV_0_1:%.*]] = getelementptr [3 x i32], [3 x i32]* [[ARG_PRIV]], i64 0, i64 1
; IS________NPM-NEXT: store i32 [[TMP1]], i32* [[ARG_PRIV_0_1]], align 4
; IS________NPM-NEXT: [[ARG_PRIV_0_2:%.*]] = getelementptr [3 x i32], [3 x i32]* [[ARG_PRIV]], i64 0, i64 2
; IS________NPM-NEXT: store i32 [[TMP2]], i32* [[ARG_PRIV_0_2]], align 4
; IS________NPM-NEXT: [[TMP3:%.*]] = bitcast [3 x i32]* [[ARG_PRIV]] to i32*
; IS________NPM-NEXT: call void @use(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(12) [[TMP3]])
; IS________NPM-NEXT: ret void
;
entry:
call void @use(i32* %arg)
ret void
}