Files
clang-p2996/llvm/test/CodeGen/BPF/loops.ll
yonghong-song 7852ebc088 [BPF] Make -mcpu=v3 as the default (#107008)
Before llvm20, (void)__sync_fetch_and_add(...) always generates locked
xadd insns. In linux kernel upstream discussion [1], it is found that
for arm64 architecture, the original semantics of
(void)__sync_fetch_and_add(...), i.e., __atomic_fetch_add(...), is
preferred in order for jit to emit proper native barrier insns.

In llvm commits [2] and [3], (void)__sync_fetch_and_add(...) will
generate the following insns:
  - for cpu v1/v2: locked xadd insns to keep backward compatibility
  - for cpu v3/v4: __atomic_fetch_add() insns

To ensure proper barrier semantics for (void)__sync_fetch_and_add(...),
cpu v3/v4 is recommended.

This patch enables cpu=v3 as the default cpu version. For users wanting
to use cpu v1, -mcpu=v1 needs to be explicitly added to clang/llc
command line.

  [1]
https://lore.kernel.org/bpf/ZqqiQQWRnz7H93Hc@google.com/T/#mb68d67bc8f39e35a0c3db52468b9de59b79f021f
  [2] https://github.com/llvm/llvm-project/pull/101428
  [3] https://github.com/llvm/llvm-project/pull/106494
2024-09-03 07:15:18 -07:00

112 lines
5.2 KiB
LLVM

; RUN: llc < %s -march=bpfel -mcpu=v1 | FileCheck %s
define zeroext i16 @add(ptr nocapture %a, i16 zeroext %n) nounwind readonly {
entry:
%cmp8 = icmp eq i16 %n, 0 ; <i1> [#uses=1]
br i1 %cmp8, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
%sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
%arrayidx = getelementptr i16, ptr %a, i16 %i.010 ; <ptr> [#uses=1]
; CHECK-LABEL: add:
; CHECK: r{{[0-9]+}} += r{{[0-9]+}}
%tmp4 = load i16, ptr %arrayidx ; <i16> [#uses=1]
%add = add i16 %tmp4, %sum.09 ; <i16> [#uses=2]
%inc = add i16 %i.010, 1 ; <i16> [#uses=2]
%exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
ret i16 %sum.0.lcssa
}
define zeroext i16 @sub(ptr nocapture %a, i16 zeroext %n) nounwind readonly {
entry:
%cmp8 = icmp eq i16 %n, 0 ; <i1> [#uses=1]
br i1 %cmp8, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
%sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
%arrayidx = getelementptr i16, ptr %a, i16 %i.010 ; <ptr> [#uses=1]
; CHECK-LABEL: sub:
; CHECK: r{{[0-9]+}} -= r{{[0-9]+}}
%tmp4 = load i16, ptr %arrayidx ; <i16> [#uses=1]
%add = sub i16 %tmp4, %sum.09 ; <i16> [#uses=2]
%inc = add i16 %i.010, 1 ; <i16> [#uses=2]
%exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
ret i16 %sum.0.lcssa
}
define zeroext i16 @or(ptr nocapture %a, i16 zeroext %n) nounwind readonly {
entry:
%cmp8 = icmp eq i16 %n, 0 ; <i1> [#uses=1]
br i1 %cmp8, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
%sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
%arrayidx = getelementptr i16, ptr %a, i16 %i.010 ; <ptr> [#uses=1]
; CHECK-LABEL: or:
; CHECK: r{{[0-9]+}} |= r{{[0-9]+}}
%tmp4 = load i16, ptr %arrayidx ; <i16> [#uses=1]
%add = or i16 %tmp4, %sum.09 ; <i16> [#uses=2]
%inc = add i16 %i.010, 1 ; <i16> [#uses=2]
%exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
ret i16 %sum.0.lcssa
}
define zeroext i16 @xor(ptr nocapture %a, i16 zeroext %n) nounwind readonly {
entry:
%cmp8 = icmp eq i16 %n, 0 ; <i1> [#uses=1]
br i1 %cmp8, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
%sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
%arrayidx = getelementptr i16, ptr %a, i16 %i.010 ; <ptr> [#uses=1]
; CHECK-LABEL: xor:
; CHECK: r{{[0-9]+}} ^= r{{[0-9]+}}
%tmp4 = load i16, ptr %arrayidx ; <i16> [#uses=1]
%add = xor i16 %tmp4, %sum.09 ; <i16> [#uses=2]
%inc = add i16 %i.010, 1 ; <i16> [#uses=2]
%exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
ret i16 %sum.0.lcssa
}
define zeroext i16 @and(ptr nocapture %a, i16 zeroext %n) nounwind readonly {
entry:
%cmp8 = icmp eq i16 %n, 0 ; <i1> [#uses=1]
br i1 %cmp8, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
%sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
%arrayidx = getelementptr i16, ptr %a, i16 %i.010 ; <ptr> [#uses=1]
; CHECK-LABEL: and:
; CHECK: r{{[0-9]+}} &= r{{[0-9]+}}
%tmp4 = load i16, ptr %arrayidx ; <i16> [#uses=1]
%add = and i16 %tmp4, %sum.09 ; <i16> [#uses=2]
%inc = add i16 %i.010, 1 ; <i16> [#uses=2]
%exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
ret i16 %sum.0.lcssa
}