Files
clang-p2996/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
Florian Hahn ea83e1c05a [LV] Assign cost to all interleave members when not interleaving.
At the moment, the full cost of all interleave group members is assigned
to the instruction at the group's insert position, even if the decision
was to not form an interleave group.

This can lead to inaccurate cost estimates, e.g. if the instruction at
the insert position is dead. If the decision is to not vectorize but
scalarize or scather/gather, then the cost will be to total cost for all
members. In those cases, assign individual the cost per member, to more
closely reflect to choice per instruction.

This fixes a divergence between legacy and VPlan-based cost model.

Fixes https://github.com/llvm/llvm-project/issues/108098.
2024-09-11 21:04:34 +01:00

40 lines
1.3 KiB
LLVM

; REQUIRES: asserts
; RUN: opt < %s -force-vector-width=2 -force-vector-interleave=1 -passes=loop-vectorize -S --debug-only=loop-vectorize 2>&1 | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
%pair = type { i8, i8 }
; CHECK-LABEL: test
; CHECK: Found an estimated cost of 8 for VF 2 For instruction: {{.*}} load i8
; CHECK: Found an estimated cost of 8 for VF 2 For instruction: {{.*}} load i8
; CHECK-LABEL: entry:
; CHECK-LABEL: vector.body:
; CHECK: [[LOAD1:%.*]] = load i8
; CHECK: [[LOAD2:%.*]] = load i8
; CHECK: [[INSERT:%.*]] = insertelement <2 x i8> poison, i8 [[LOAD1]], i32 0
; CHECK: insertelement <2 x i8> [[INSERT]], i8 [[LOAD2]], i32 1
; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
define void @test(ptr %p, ptr %q, i64 %n) {
entry:
br label %for.body
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr %pair, ptr %p, i64 %i, i32 0
%tmp1 = load i8, ptr %tmp0, align 1
%tmp2 = getelementptr %pair, ptr %p, i64 %i, i32 1
%tmp3 = load i8, ptr %tmp2, align 1
%add = add i8 %tmp1, %tmp3
%qi = getelementptr i8, ptr %q, i64 %i
store i8 %add, ptr %qi, align 1
%i.next = add nuw nsw i64 %i, 1
%cond = icmp eq i64 %i.next, %n
br i1 %cond, label %for.end, label %for.body
for.end:
ret void
}