Files
clang-p2996/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
Michael Kruse 978ba61536 Introduce llvm.loop.parallel_accesses and llvm.access.group metadata.
The current llvm.mem.parallel_loop_access metadata has a problem in that
it uses LoopIDs. LoopID unfortunately is not loop identifier. It is
neither unique (there's even a regression test assigning the some LoopID
to multiple loops; can otherwise happen if passes such as LoopVersioning
make copies of entire loops) nor persistent (every time a property is
removed/added from a LoopID's MDNode, it will also receive a new LoopID;
this happens e.g. when calling Loop::setLoopAlreadyUnrolled()).
Since most loop transformation passes change the loop attributes (even
if it just to mark that a loop should not be processed again as
llvm.loop.isvectorized does, for the versioned and unversioned loop),
the parallel access information is lost for any subsequent pass.

This patch unlinks LoopIDs and parallel accesses.
llvm.mem.parallel_loop_access metadata on instruction is replaced by
llvm.access.group metadata. llvm.access.group points to a distinct
MDNode with no operands (avoiding the problem to ever need to add/remove
operands), called "access group". Alternatively, it can point to a list
of access groups. The LoopID then has an attribute
llvm.loop.parallel_accesses with all the access groups that are parallel
(no dependencies carries by this loop).

This intentionally avoid any kind of "ID". Loops that are clones/have
their attributes modifies retain the llvm.loop.parallel_accesses
attribute. Access instructions that a cloned point to the same access
group. It is not necessary for each access to have it's own "ID" MDNode,
but those memory access instructions with the same behavior can be
grouped together.

The behavior of llvm.mem.parallel_loop_access is not changed by this
patch, but should be considered deprecated.

Differential Revision: https://reviews.llvm.org/D52116

llvm-svn: 349725
2018-12-20 04:58:07 +00:00

116 lines
4.7 KiB
LLVM

; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; A tricky loop:
;
; void loop(int *a, int *b) {
; for (int i = 0; i < 512; ++i) {
; a[a[i]] = b[i];
; a[i] = b[i+1];
; }
;}
;CHECK-LABEL: @loop(
;CHECK-NOT: <4 x i32>
define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%1 = load i32, i32* %arrayidx2, align 4
%idxprom3 = sext i32 %1 to i64
%arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
store i32 %0, i32* %arrayidx4, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%2 = load i32, i32* %arrayidx6, align 4
store i32 %2, i32* %arrayidx2, align 4
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 512
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; The same loop with parallel loop metadata added to the loop branch
; and the memory instructions.
;CHECK-LABEL: @parallel_loop(
;CHECK: <4 x i32>
define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !13
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%1 = load i32, i32* %arrayidx2, align 4, !llvm.access.group !13
%idxprom3 = sext i32 %1 to i64
%arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
; This store might have originated from inlining a function with a parallel
; loop. Refers to a list with the "original loop reference" (!4) also included.
store i32 %0, i32* %arrayidx4, align 4, !llvm.access.group !15
%indvars.iv.next = add i64 %indvars.iv, 1
%arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%2 = load i32, i32* %arrayidx6, align 4, !llvm.access.group !13
store i32 %2, i32* %arrayidx2, align 4, !llvm.access.group !13
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 512
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
for.end: ; preds = %for.body
ret void
}
; The same loop with an illegal parallel loop metadata: the memory
; accesses refer to a different loop's identifier.
;CHECK-LABEL: @mixed_metadata(
;CHECK-NOT: <4 x i32>
define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !16
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%1 = load i32, i32* %arrayidx2, align 4, !llvm.access.group !16
%idxprom3 = sext i32 %1 to i64
%arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
; This refers to the loop marked with !7 which we are not in at the moment.
; It should prevent detecting as a parallel loop.
store i32 %0, i32* %arrayidx4, align 4, !llvm.access.group !17
%indvars.iv.next = add i64 %indvars.iv, 1
%arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%2 = load i32, i32* %arrayidx6, align 4, !llvm.access.group !16
store i32 %2, i32* %arrayidx2, align 4, !llvm.access.group !16
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 512
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
for.end: ; preds = %for.body
ret void
}
!3 = !{!3, !{!"llvm.loop.parallel_accesses", !13, !15}}
!4 = !{!4, !{!"llvm.loop.parallel_accesses", !14, !15}}
!6 = !{!6, !{!"llvm.loop.parallel_accesses", !16}}
!7 = !{!7, !{!"llvm.loop.parallel_accesses", !17}}
!13 = distinct !{}
!14 = distinct !{}
!15 = distinct !{}
!16 = distinct !{}
!17 = distinct !{}