This changes the default value used for mask policy from mask undisturbed to mask agnostic. In hardware, there may be a minor preference for ta/ma, but since this is only going to apply to instructions which don't use the mask policy bit, this is functionally mostly a nop. The main value is to make future changes to using MA when legal for masked instructions easier to review by reducing test churn.
The prior code was motivated by a desire to minimize state transitions between masked and unmasked code. This patch achieves the same effect using the demanded field logic (landed in afb45ff), and there are no regressions I spotted in the test diffs. (Given the size, I have only been able to skim.) I do want to call out that regressions are possible here; the demanded analysis only works on a block local scope right now, so e.g. a tight loop mixing masked and unmasked computation might see an extra vsetvli or two.
Differential Revision: https://reviews.llvm.org/D133803
78 lines
2.9 KiB
LLVM
78 lines
2.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
|
|
; RUN: | FileCheck %s -check-prefix=RV64IV
|
|
|
|
declare <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8(
|
|
<vscale x 64 x i8>,
|
|
<vscale x 64 x i8>,
|
|
<vscale x 64 x i8>,
|
|
i64,
|
|
i64);
|
|
|
|
define <vscale x 64 x i8> @callee(<vscale x 64 x i8> %arg0, <vscale x 64 x i8> %arg1, <vscale x 64 x i8> %arg2) {
|
|
; RV64IV-LABEL: callee:
|
|
; RV64IV: # %bb.0:
|
|
; RV64IV-NEXT: vl8r.v v24, (a0)
|
|
; RV64IV-NEXT: li a0, 1024
|
|
; RV64IV-NEXT: vsetvli zero, a0, e8, m8, tu, ma
|
|
; RV64IV-NEXT: vmacc.vv v8, v16, v24
|
|
; RV64IV-NEXT: ret
|
|
%ret = call <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8(
|
|
<vscale x 64 x i8> %arg0,
|
|
<vscale x 64 x i8> %arg1,
|
|
<vscale x 64 x i8> %arg2, i64 1024, i64 0)
|
|
ret <vscale x 64 x i8> %ret
|
|
}
|
|
|
|
define <vscale x 64 x i8> @caller() {
|
|
; RV64IV-LABEL: caller:
|
|
; RV64IV: # %bb.0:
|
|
; RV64IV-NEXT: addi sp, sp, -80
|
|
; RV64IV-NEXT: .cfi_def_cfa_offset 80
|
|
; RV64IV-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
|
|
; RV64IV-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
|
|
; RV64IV-NEXT: .cfi_offset ra, -8
|
|
; RV64IV-NEXT: .cfi_offset s0, -16
|
|
; RV64IV-NEXT: addi s0, sp, 80
|
|
; RV64IV-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64IV-NEXT: csrr a0, vlenb
|
|
; RV64IV-NEXT: slli a0, a0, 5
|
|
; RV64IV-NEXT: sub sp, sp, a0
|
|
; RV64IV-NEXT: andi sp, sp, -64
|
|
; RV64IV-NEXT: csrr a0, vlenb
|
|
; RV64IV-NEXT: li a1, 24
|
|
; RV64IV-NEXT: mul a0, a0, a1
|
|
; RV64IV-NEXT: add a0, sp, a0
|
|
; RV64IV-NEXT: addi a0, a0, 64
|
|
; RV64IV-NEXT: vl8r.v v8, (a0)
|
|
; RV64IV-NEXT: csrr a0, vlenb
|
|
; RV64IV-NEXT: slli a0, a0, 4
|
|
; RV64IV-NEXT: add a0, sp, a0
|
|
; RV64IV-NEXT: addi a0, a0, 64
|
|
; RV64IV-NEXT: vl8r.v v16, (a0)
|
|
; RV64IV-NEXT: csrr a0, vlenb
|
|
; RV64IV-NEXT: slli a0, a0, 3
|
|
; RV64IV-NEXT: add a0, sp, a0
|
|
; RV64IV-NEXT: addi a0, a0, 64
|
|
; RV64IV-NEXT: vl8r.v v24, (a0)
|
|
; RV64IV-NEXT: addi a1, sp, 64
|
|
; RV64IV-NEXT: addi a0, sp, 64
|
|
; RV64IV-NEXT: vs8r.v v24, (a1)
|
|
; RV64IV-NEXT: call callee@plt
|
|
; RV64IV-NEXT: addi sp, s0, -80
|
|
; RV64IV-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
|
|
; RV64IV-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
|
|
; RV64IV-NEXT: addi sp, sp, 80
|
|
; RV64IV-NEXT: ret
|
|
%local0 = alloca <vscale x 64 x i8>
|
|
%local1 = alloca <vscale x 64 x i8>
|
|
%local2 = alloca <vscale x 64 x i8>
|
|
%arg0 = load volatile <vscale x 64 x i8>, <vscale x 64 x i8>* %local0
|
|
%arg1 = load volatile <vscale x 64 x i8>, <vscale x 64 x i8>* %local1
|
|
%arg2 = load volatile <vscale x 64 x i8>, <vscale x 64 x i8>* %local2
|
|
%ret = call <vscale x 64 x i8> @callee(<vscale x 64 x i8> %arg0,
|
|
<vscale x 64 x i8> %arg1,
|
|
<vscale x 64 x i8> %arg2)
|
|
ret <vscale x 64 x i8> %ret
|
|
}
|