The loop directive is a descriptive construct which allows the compiler flexibility in how it generates code for the directive's associated loop(s). See OpenMP specification 5.2 [257:8-9]. Codegen added in this patch for the combined 'loop' directives are: 'target teams loop' -> 'target teams distribute parallel for' 'teams loop' -> 'teams distribute parallel for' 'target parallel loop' -> 'target parallel for' 'parallel loop' -> 'parallel for' NOTE: The implementation of the 'loop' directive itself is unchanged. Differential Revision: https://reviews.llvm.org/D145823
118 lines
5.5 KiB
C++
118 lines
5.5 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]"
|
|
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp %s
|
|
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
|
|
|
|
// Check same results after serialization round-trip
|
|
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s
|
|
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH
|
|
|
|
// expected-no-diagnostics
|
|
|
|
#ifndef HEADER
|
|
#define HEADER
|
|
|
|
void foo(int t) {
|
|
|
|
int i, j, z;
|
|
#pragma omp loop collapse(2) reduction(+:z) lastprivate(j) bind(thread)
|
|
for (int i = 0; i<t; ++i)
|
|
for (j = 0; j<t; ++j)
|
|
z += i+j;
|
|
}
|
|
#endif
|
|
// IR-LABEL: define {{[^@]+}}@_Z3fooi
|
|
// IR-SAME: (i32 noundef [[T:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// IR-NEXT: entry:
|
|
// IR-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4
|
|
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// IR-NEXT: [[J:%.*]] = alloca i32, align 4
|
|
// IR-NEXT: [[Z:%.*]] = alloca i32, align 4
|
|
// IR-NEXT: [[I1:%.*]] = alloca i32, align 4
|
|
// IR-NEXT: store i32 [[T]], ptr [[T_ADDR]], align 4
|
|
// IR-NEXT: store i32 0, ptr [[I1]], align 4
|
|
// IR-NEXT: br label [[FOR_COND:%.*]]
|
|
// IR: for.cond:
|
|
// IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[I1]], align 4
|
|
// IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_ADDR]], align 4
|
|
// IR-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]]
|
|
// IR-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]]
|
|
// IR: for.body:
|
|
// IR-NEXT: store i32 0, ptr [[J]], align 4
|
|
// IR-NEXT: br label [[FOR_COND2:%.*]]
|
|
// IR: for.cond2:
|
|
// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[J]], align 4
|
|
// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_ADDR]], align 4
|
|
// IR-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]]
|
|
// IR-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END:%.*]]
|
|
// IR: for.body4:
|
|
// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4
|
|
// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[J]], align 4
|
|
// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
|
|
// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[Z]], align 4
|
|
// IR-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], [[ADD]]
|
|
// IR-NEXT: store i32 [[ADD5]], ptr [[Z]], align 4
|
|
// IR-NEXT: br label [[FOR_INC:%.*]]
|
|
// IR: for.inc:
|
|
// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4
|
|
// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
|
|
// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4
|
|
// IR-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
// IR: for.end:
|
|
// IR-NEXT: br label [[FOR_INC6:%.*]]
|
|
// IR: for.inc6:
|
|
// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4
|
|
// IR-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1
|
|
// IR-NEXT: store i32 [[INC7]], ptr [[I1]], align 4
|
|
// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
// IR: for.end8:
|
|
// IR-NEXT: ret void
|
|
//
|
|
//
|
|
// IR-PCH-LABEL: define {{[^@]+}}@_Z3fooi
|
|
// IR-PCH-SAME: (i32 noundef [[T:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// IR-PCH-NEXT: entry:
|
|
// IR-PCH-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4
|
|
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
|
|
// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
|
|
// IR-PCH-NEXT: [[Z:%.*]] = alloca i32, align 4
|
|
// IR-PCH-NEXT: [[I1:%.*]] = alloca i32, align 4
|
|
// IR-PCH-NEXT: store i32 [[T]], ptr [[T_ADDR]], align 4
|
|
// IR-PCH-NEXT: store i32 0, ptr [[I1]], align 4
|
|
// IR-PCH-NEXT: br label [[FOR_COND:%.*]]
|
|
// IR-PCH: for.cond:
|
|
// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[I1]], align 4
|
|
// IR-PCH-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_ADDR]], align 4
|
|
// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]]
|
|
// IR-PCH-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]]
|
|
// IR-PCH: for.body:
|
|
// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
|
|
// IR-PCH-NEXT: br label [[FOR_COND2:%.*]]
|
|
// IR-PCH: for.cond2:
|
|
// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[J]], align 4
|
|
// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_ADDR]], align 4
|
|
// IR-PCH-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]]
|
|
// IR-PCH-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END:%.*]]
|
|
// IR-PCH: for.body4:
|
|
// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4
|
|
// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[J]], align 4
|
|
// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
|
|
// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[Z]], align 4
|
|
// IR-PCH-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], [[ADD]]
|
|
// IR-PCH-NEXT: store i32 [[ADD5]], ptr [[Z]], align 4
|
|
// IR-PCH-NEXT: br label [[FOR_INC:%.*]]
|
|
// IR-PCH: for.inc:
|
|
// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4
|
|
// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
|
|
// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4
|
|
// IR-PCH-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
// IR-PCH: for.end:
|
|
// IR-PCH-NEXT: br label [[FOR_INC6:%.*]]
|
|
// IR-PCH: for.inc6:
|
|
// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4
|
|
// IR-PCH-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1
|
|
// IR-PCH-NEXT: store i32 [[INC7]], ptr [[I1]], align 4
|
|
// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
// IR-PCH: for.end8:
|
|
// IR-PCH-NEXT: ret void
|
|
//
|