When pragma of loop transformations is specified, follow-up metadata for
loops is generated after each transformation. On the LLVM side,
follow-up metadata is expected to be a list of properties, such as the
following:
```
!followup = !{!"llvm.loop.vectorize.followup_all", !mp, !isvectorized}
!mp = !{!"llvm.loop.mustprogress"}
!isvectorized = !{"llvm.loop.isvectorized"}
```
However, on the clang side, the generated metadata contains an MDNode
that has those properties, as shown below:
```
!followup = !{!"llvm.loop.vectorize.followup_all", !loop_id}
!loop_id = distinct !{!loop_id, !mp, !isvectorized}
!mp = !{!"llvm.loop.mustprogress"}
!isvectorized = !{"llvm.loop.isvectorized"}
```
According to the
[LangRef](https://llvm.org/docs/TransformMetadata.html#transformation-metadata-structure),
the LLVM side is correct. Due to this inconsistency, follow-up metadata
was not interpreted correctly, e.g., only one transformation is applied
when multiple pragmas are used.
This patch fixes clang side to emit followup metadata in correct format.
150 lines
9.0 KiB
LLVM
150 lines
9.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -passes='loop-vectorize,loop-unroll' -force-vector-width=4 -S < %s | FileCheck %s
|
|
|
|
; Test if the follow-up metadata for loops works fine. The original code is
|
|
; something like below. In this case, unrolling should be applied after
|
|
; vectorization.
|
|
;
|
|
; void f(float *a, float x) {
|
|
; #pragma clang loop vectorize(enable) unroll_count(8)
|
|
; for (int i = 0; i < 1024; i++) {
|
|
; a[i] *= x;
|
|
; }
|
|
; }
|
|
;
|
|
define void @f(ptr noundef captures(none) %a, float noundef %x) {
|
|
; CHECK-LABEL: define void @f(
|
|
; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_6]]
|
|
; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4
|
|
; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_7]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP14]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT1]]
|
|
; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_1]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 8
|
|
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_1]]
|
|
; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_2]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP16]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 12
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_2]]
|
|
; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP6]], align 4
|
|
; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_3]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP6]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT_3:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 16
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_3]]
|
|
; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_4]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 20
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_4]]
|
|
; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_5]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 24
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_5]]
|
|
; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP12]], align 4
|
|
; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_6]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP12]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 28
|
|
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_7]]
|
|
; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP17]], align 4
|
|
; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_8]]
|
|
; CHECK-NEXT: store <4 x float> [[TMP18]], ptr [[TMP17]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX_NEXT_6]], 32
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X]], [[LOAD]]
|
|
; CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[LOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
|
|
; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[X]], [[LOAD_1]]
|
|
; CHECK-NEXT: store float [[MUL_1]], ptr [[ARRAYIDX_1]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_1]]
|
|
; CHECK-NEXT: [[LOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
|
|
; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[X]], [[LOAD_2]]
|
|
; CHECK-NEXT: store float [[MUL_2]], ptr [[ARRAYIDX_2]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
|
|
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_2]]
|
|
; CHECK-NEXT: [[LOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
|
|
; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[X]], [[LOAD_3]]
|
|
; CHECK-NEXT: store float [[MUL_3]], ptr [[ARRAYIDX_3]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_3]]
|
|
; CHECK-NEXT: [[LOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4
|
|
; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[X]], [[LOAD_4]]
|
|
; CHECK-NEXT: store float [[MUL_4]], ptr [[ARRAYIDX_4]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5
|
|
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_4]]
|
|
; CHECK-NEXT: [[LOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4
|
|
; CHECK-NEXT: [[MUL_5:%.*]] = fmul float [[X]], [[LOAD_5]]
|
|
; CHECK-NEXT: store float [[MUL_5]], ptr [[ARRAYIDX_5]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6
|
|
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_5]]
|
|
; CHECK-NEXT: [[LOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4
|
|
; CHECK-NEXT: [[MUL_6:%.*]] = fmul float [[X]], [[LOAD_6]]
|
|
; CHECK-NEXT: store float [[MUL_6]], ptr [[ARRAYIDX_6]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7
|
|
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_6]]
|
|
; CHECK-NEXT: [[LOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4
|
|
; CHECK-NEXT: [[MUL_7:%.*]] = fmul float [[X]], [[LOAD_7]]
|
|
; CHECK-NEXT: store float [[MUL_7]], ptr [[ARRAYIDX_7]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
|
|
; CHECK-NEXT: [[COMP_7:%.*]] = icmp eq i64 [[IV_NEXT_7]], 1024
|
|
; CHECK-NEXT: br i1 [[COMP_7]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: [[EXIT_LOOPEXIT]]:
|
|
; CHECK-NEXT: br label %[[EXIT]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds nuw float, ptr %a, i64 %iv
|
|
%load = load float, ptr %arrayidx, align 4
|
|
%mul = fmul float %x, %load
|
|
store float %mul, ptr %arrayidx, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%comp = icmp eq i64 %iv.next, 1024
|
|
br i1 %comp, label %exit, label %for.body, !llvm.loop !0
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
!0 = distinct !{!0, !1, !2}
|
|
!1 = !{!"llvm.loop.vectorize.enable", i1 true}
|
|
!2 = !{!"llvm.loop.vectorize.followup_all", !3, !4}
|
|
!3 = !{!"llvm.loop.isvectorized"}
|
|
!4 = !{!"llvm.loop.unroll.count", i32 8}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
|
|
;.
|