…ted. (#89998)" (#90250)
This partially reverts commit 7aedd7dc75.
This change removes calls to the deprecated member functions. It does
not mark the functions deprecated yet and does not disable the
deprecation warning in TypeSwitch. This seems to cause problems with
MSVC.
2999 lines
116 KiB
C++
2999 lines
116 KiB
C++
//===-- PPCIntrinsicCall.cpp ----------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Helper routines for constructing the FIR dialect of MLIR for PowerPC
|
|
// intrinsics. Extensive use of MLIR interfaces and MLIR's coding style
|
|
// (https://mlir.llvm.org/getting_started/DeveloperGuide/) is used in this
|
|
// module.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Optimizer/Builder/PPCIntrinsicCall.h"
|
|
#include "flang/Evaluate/common.h"
|
|
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
|
#include "flang/Optimizer/Builder/MutableBox.h"
|
|
#include "mlir/Dialect/Vector/IR/VectorOps.h"
|
|
|
|
namespace fir {
|
|
|
|
using PI = PPCIntrinsicLibrary;
|
|
|
|
// PPC specific intrinsic handlers.
|
|
static constexpr IntrinsicHandler ppcHandlers[]{
|
|
{"__ppc_mma_assemble_acc",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::AssembleAcc, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr},
|
|
{"arg1", asValue},
|
|
{"arg2", asValue},
|
|
{"arg3", asValue},
|
|
{"arg4", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_assemble_pair",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::AssemblePair, MMAHandlerOp::SubToFunc>),
|
|
{{{"pair", asAddr}, {"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_build_acc",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::AssembleAcc,
|
|
MMAHandlerOp::SubToFuncReverseArgOnLE>),
|
|
{{{"acc", asAddr},
|
|
{"arg1", asValue},
|
|
{"arg2", asValue},
|
|
{"arg3", asValue},
|
|
{"arg4", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_disassemble_acc",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::DisassembleAcc, MMAHandlerOp::SubToFunc>),
|
|
{{{"data", asAddr}, {"acc", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_disassemble_pair",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::DisassemblePair, MMAHandlerOp::SubToFunc>),
|
|
{{{"data", asAddr}, {"pair", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvbf16ger2_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvbf16ger2, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvbf16ger2nn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvbf16ger2nn,
|
|
MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvbf16ger2np",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvbf16ger2np,
|
|
MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvbf16ger2pn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvbf16ger2pn,
|
|
MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvbf16ger2pp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvbf16ger2pp,
|
|
MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf16ger2_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf16ger2, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf16ger2nn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf16ger2np",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf16ger2np, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf16ger2pn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf16ger2pp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf32ger",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf32ger, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf32gernn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf32gernn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf32gernp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf32gernp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf32gerpn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf32gerpn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf32gerpp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf32gerpp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf64ger",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf64ger, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf64gernn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf64gernn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf64gernp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf64gernp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf64gerpn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf64gerpn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvf64gerpp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvf64gerpp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvi16ger2_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvi16ger2, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvi16ger2pp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvi16ger2pp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvi16ger2s",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvi16ger2s, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvi16ger2spp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvi16ger2spp,
|
|
MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvi4ger8_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvi4ger8, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvi4ger8pp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvi4ger8pp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvi8ger4_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvi8ger4, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvi8ger4pp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvi8ger4pp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_pmxvi8ger4spp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Pmxvi8ger4spp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr},
|
|
{"a", asValue},
|
|
{"b", asValue},
|
|
{"xmask", asValue},
|
|
{"ymask", asValue},
|
|
{"pmask", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvbf16ger2_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvbf16ger2, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvbf16ger2nn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvbf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvbf16ger2np",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvbf16ger2np, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvbf16ger2pn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvbf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvbf16ger2pp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvbf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf16ger2_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf16ger2, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf16ger2nn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf16ger2np",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf16ger2np, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf16ger2pn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf16ger2pp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf32ger",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf32ger, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf32gernn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf32gernn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf32gernp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf32gernp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf32gerpn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf32gerpn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf32gerpp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf32gerpp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf64ger",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf64ger, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf64gernn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf64gernn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf64gernp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf64gernp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf64gerpn",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf64gerpn, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvf64gerpp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvf64gerpp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvi16ger2_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvi16ger2, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvi16ger2pp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvi16ger2pp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvi16ger2s",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvi16ger2s, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvi16ger2spp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvi16ger2spp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvi4ger8_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvi4ger8, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvi4ger8pp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvi4ger8pp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvi8ger4_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvi8ger4, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvi8ger4pp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvi8ger4pp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xvi8ger4spp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xvi8ger4spp, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xxmfacc",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xxmfacc, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xxmtacc",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xxmtacc, MMAHandlerOp::FirstArgIsResult>),
|
|
{{{"acc", asAddr}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mma_xxsetaccz",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genMmaIntr<MMAOp::Xxsetaccz, MMAHandlerOp::SubToFunc>),
|
|
{{{"acc", asAddr}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_mtfsf",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<false>),
|
|
{{{"mask", asValue}, {"r", asValue}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_mtfsfi",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<true>),
|
|
{{{"bf", asValue}, {"i", asValue}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_abs",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecAbs),
|
|
{{{"arg1", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_add",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecAddAndMulSubXor<VecOp::Add>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_and",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecAddAndMulSubXor<VecOp::And>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_any_ge",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecAnyCompare<VecOp::Anyge>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_cmpge",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecCmp<VecOp::Cmpge>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_cmpgt",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecCmp<VecOp::Cmpgt>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_cmple",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecCmp<VecOp::Cmple>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_cmplt",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecCmp<VecOp::Cmplt>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_convert",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecConvert<VecOp::Convert>),
|
|
{{{"v", asValue}, {"mold", asValue}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_ctf",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecConvert<VecOp::Ctf>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_cvf",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecConvert<VecOp::Cvf>),
|
|
{{{"arg1", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_extract",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecExtract),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_insert",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecInsert),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_ld",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecLdCallGrp<VecOp::Ld>),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_lde",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecLdCallGrp<VecOp::Lde>),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_ldl",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecLdCallGrp<VecOp::Ldl>),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_lvsl",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecLvsGrp<VecOp::Lvsl>),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_lvsr",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecLvsGrp<VecOp::Lvsr>),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_lxv",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecLdNoCallGrp<VecOp::Lxv>),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_lxvp",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecLdCallGrp<VecOp::Lxvp>),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_mergeh",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecMerge<VecOp::Mergeh>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_mergel",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecMerge<VecOp::Mergel>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_msub",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecNmaddMsub<VecOp::Msub>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_mul",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecAddAndMulSubXor<VecOp::Mul>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_nmadd",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecNmaddMsub<VecOp::Nmadd>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_perm",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecPerm<VecOp::Perm>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_permi",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecPerm<VecOp::Permi>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_sel",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecSel),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_sl",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecShift<VecOp::Sl>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_sld",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecShift<VecOp::Sld>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_sldw",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecShift<VecOp::Sldw>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_sll",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecShift<VecOp::Sll>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_slo",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecShift<VecOp::Slo>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_splat",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecSplat<VecOp::Splat>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_splat_s32_",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecSplat<VecOp::Splat_s32>),
|
|
{{{"arg1", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_splats",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecSplat<VecOp::Splats>),
|
|
{{{"arg1", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_sr",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecShift<VecOp::Sr>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_srl",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecShift<VecOp::Srl>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_sro",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecShift<VecOp::Sro>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_st",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genVecStore<VecOp::St>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_ste",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genVecStore<VecOp::Ste>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_stxv",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genVecXStore<VecOp::Stxv>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_stxvp",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genVecStore<VecOp::Stxvp>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_sub",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecAddAndMulSubXor<VecOp::Sub>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_xl",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlGrp),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_xl_be",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecLdNoCallGrp<VecOp::Xlbe>),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_xld2_",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecLdCallGrp<VecOp::Xld2>),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_xlds",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlds),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_xlw4_",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecLdCallGrp<VecOp::Xlw4>),
|
|
{{{"arg1", asValue}, {"arg2", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_xor",
|
|
static_cast<IntrinsicLibrary::ExtendedGenerator>(
|
|
&PI::genVecAddAndMulSubXor<VecOp::Xor>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}}},
|
|
/*isElemental=*/true},
|
|
{"__ppc_vec_xst",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genVecXStore<VecOp::Xst>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_xst_be",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genVecXStore<VecOp::Xst_be>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_xstd2_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genVecXStore<VecOp::Xstd2>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
|
|
/*isElemental=*/false},
|
|
{"__ppc_vec_xstw4_",
|
|
static_cast<IntrinsicLibrary::SubroutineGenerator>(
|
|
&PI::genVecXStore<VecOp::Xstw4>),
|
|
{{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
|
|
/*isElemental=*/false},
|
|
};
|
|
|
|
static constexpr MathOperation ppcMathOperations[] = {
|
|
// fcfi is just another name for fcfid, there is no llvm.ppc.fcfi.
|
|
{"__ppc_fcfi", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fcfid", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fcfud", "llvm.ppc.fcfud", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fctid", "llvm.ppc.fctid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fctidz", "llvm.ppc.fctidz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fctiw", "llvm.ppc.fctiw", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fctiwz", "llvm.ppc.fctiwz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fctudz", "llvm.ppc.fctudz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fctuwz", "llvm.ppc.fctuwz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fmadd", "llvm.fma.f32",
|
|
genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
|
|
genMathOp<mlir::math::FmaOp>},
|
|
{"__ppc_fmadd", "llvm.fma.f64",
|
|
genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
|
|
genMathOp<mlir::math::FmaOp>},
|
|
{"__ppc_fmsub", "llvm.ppc.fmsubs",
|
|
genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
|
|
genLibCall},
|
|
{"__ppc_fmsub", "llvm.ppc.fmsub",
|
|
genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fnabs", "llvm.ppc.fnabss", genFuncType<Ty::Real<4>, Ty::Real<4>>,
|
|
genLibCall},
|
|
{"__ppc_fnabs", "llvm.ppc.fnabs", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fnmadd", "llvm.ppc.fnmadds",
|
|
genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
|
|
genLibCall},
|
|
{"__ppc_fnmadd", "llvm.ppc.fnmadd",
|
|
genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fnmsub", "llvm.ppc.fnmsub.f32",
|
|
genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
|
|
genLibCall},
|
|
{"__ppc_fnmsub", "llvm.ppc.fnmsub.f64",
|
|
genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fre", "llvm.ppc.fre", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_fres", "llvm.ppc.fres", genFuncType<Ty::Real<4>, Ty::Real<4>>,
|
|
genLibCall},
|
|
{"__ppc_frsqrte", "llvm.ppc.frsqrte", genFuncType<Ty::Real<8>, Ty::Real<8>>,
|
|
genLibCall},
|
|
{"__ppc_frsqrtes", "llvm.ppc.frsqrtes",
|
|
genFuncType<Ty::Real<4>, Ty::Real<4>>, genLibCall},
|
|
{"__ppc_vec_cvbf16spn", "llvm.ppc.vsx.xvcvbf16spn",
|
|
genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall},
|
|
{"__ppc_vec_cvspbf16_", "llvm.ppc.vsx.xvcvspbf16",
|
|
genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall},
|
|
{"__ppc_vec_madd", "llvm.fma.v4f32",
|
|
genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
|
|
Ty::RealVector<4>>,
|
|
genLibCall},
|
|
{"__ppc_vec_madd", "llvm.fma.v2f64",
|
|
genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
|
|
Ty::RealVector<8>>,
|
|
genLibCall},
|
|
{"__ppc_vec_max", "llvm.ppc.altivec.vmaxsb",
|
|
genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
|
|
Ty::IntegerVector<1>>,
|
|
genLibCall},
|
|
{"__ppc_vec_max", "llvm.ppc.altivec.vmaxsh",
|
|
genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
|
|
Ty::IntegerVector<2>>,
|
|
genLibCall},
|
|
{"__ppc_vec_max", "llvm.ppc.altivec.vmaxsw",
|
|
genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
|
|
Ty::IntegerVector<4>>,
|
|
genLibCall},
|
|
{"__ppc_vec_max", "llvm.ppc.altivec.vmaxsd",
|
|
genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
|
|
Ty::IntegerVector<8>>,
|
|
genLibCall},
|
|
{"__ppc_vec_max", "llvm.ppc.altivec.vmaxub",
|
|
genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
|
|
Ty::UnsignedVector<1>>,
|
|
genLibCall},
|
|
{"__ppc_vec_max", "llvm.ppc.altivec.vmaxuh",
|
|
genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
|
|
Ty::UnsignedVector<2>>,
|
|
genLibCall},
|
|
{"__ppc_vec_max", "llvm.ppc.altivec.vmaxuw",
|
|
genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
|
|
Ty::UnsignedVector<4>>,
|
|
genLibCall},
|
|
{"__ppc_vec_max", "llvm.ppc.altivec.vmaxud",
|
|
genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
|
|
Ty::UnsignedVector<8>>,
|
|
genLibCall},
|
|
{"__ppc_vec_max", "llvm.ppc.vsx.xvmaxsp",
|
|
genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
|
|
genLibCall},
|
|
{"__ppc_vec_max", "llvm.ppc.vsx.xvmaxdp",
|
|
genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
|
|
genLibCall},
|
|
{"__ppc_vec_min", "llvm.ppc.altivec.vminsb",
|
|
genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
|
|
Ty::IntegerVector<1>>,
|
|
genLibCall},
|
|
{"__ppc_vec_min", "llvm.ppc.altivec.vminsh",
|
|
genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
|
|
Ty::IntegerVector<2>>,
|
|
genLibCall},
|
|
{"__ppc_vec_min", "llvm.ppc.altivec.vminsw",
|
|
genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
|
|
Ty::IntegerVector<4>>,
|
|
genLibCall},
|
|
{"__ppc_vec_min", "llvm.ppc.altivec.vminsd",
|
|
genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
|
|
Ty::IntegerVector<8>>,
|
|
genLibCall},
|
|
{"__ppc_vec_min", "llvm.ppc.altivec.vminub",
|
|
genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
|
|
Ty::UnsignedVector<1>>,
|
|
genLibCall},
|
|
{"__ppc_vec_min", "llvm.ppc.altivec.vminuh",
|
|
genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
|
|
Ty::UnsignedVector<2>>,
|
|
genLibCall},
|
|
{"__ppc_vec_min", "llvm.ppc.altivec.vminuw",
|
|
genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
|
|
Ty::UnsignedVector<4>>,
|
|
genLibCall},
|
|
{"__ppc_vec_min", "llvm.ppc.altivec.vminud",
|
|
genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
|
|
Ty::UnsignedVector<8>>,
|
|
genLibCall},
|
|
{"__ppc_vec_min", "llvm.ppc.vsx.xvminsp",
|
|
genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
|
|
genLibCall},
|
|
{"__ppc_vec_min", "llvm.ppc.vsx.xvmindp",
|
|
genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
|
|
genLibCall},
|
|
{"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v4f32",
|
|
genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
|
|
Ty::RealVector<4>>,
|
|
genLibCall},
|
|
{"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v2f64",
|
|
genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
|
|
Ty::RealVector<8>>,
|
|
genLibCall},
|
|
};
|
|
|
|
const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name) {
|
|
auto compare = [](const IntrinsicHandler &ppcHandler, llvm::StringRef name) {
|
|
return name.compare(ppcHandler.name) > 0;
|
|
};
|
|
auto result = llvm::lower_bound(ppcHandlers, name, compare);
|
|
return result != std::end(ppcHandlers) && result->name == name ? result
|
|
: nullptr;
|
|
}
|
|
|
|
using RtMap = Fortran::common::StaticMultimapView<MathOperation>;
|
|
static constexpr RtMap ppcMathOps(ppcMathOperations);
|
|
static_assert(ppcMathOps.Verify() && "map must be sorted");
|
|
|
|
std::pair<const MathOperation *, const MathOperation *>
|
|
checkPPCMathOperationsRange(llvm::StringRef name) {
|
|
return ppcMathOps.equal_range(name);
|
|
}
|
|
|
|
// Helper functions for vector element ordering.
|
|
bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() {
|
|
const auto triple{fir::getTargetTriple(builder.getModule())};
|
|
return (triple.isLittleEndian() &&
|
|
converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
|
|
}
|
|
bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() {
|
|
const auto triple{fir::getTargetTriple(builder.getModule())};
|
|
return (triple.isLittleEndian() &&
|
|
!converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
|
|
}
|
|
bool PPCIntrinsicLibrary::changeVecElemOrder() {
|
|
const auto triple{fir::getTargetTriple(builder.getModule())};
|
|
return (triple.isLittleEndian() !=
|
|
converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
|
|
}
|
|
|
|
static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context,
|
|
int quadCnt, int pairCnt, int vecCnt,
|
|
int intCnt = 0,
|
|
int vecElemBitSize = 8,
|
|
int intBitSize = 32) {
|
|
// Constructs a function type with the following signature:
|
|
// Result type: __vector_pair
|
|
// Arguments:
|
|
// quadCnt: number of arguments that has __vector_quad type, followed by
|
|
// pairCnt: number of arguments that has __vector_pair type, followed by
|
|
// vecCnt: number of arguments that has vector(integer) type, followed by
|
|
// intCnt: number of arguments that has integer type
|
|
// vecElemBitSize: specifies the size of vector elements in bits
|
|
// intBitSize: specifies the size of integer arguments in bits
|
|
auto vType{mlir::VectorType::get(
|
|
128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))};
|
|
auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
|
|
auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
|
|
auto iType{mlir::IntegerType::get(context, intBitSize)};
|
|
llvm::SmallVector<mlir::Type> argTypes;
|
|
for (int i = 0; i < quadCnt; ++i) {
|
|
argTypes.push_back(vqType);
|
|
}
|
|
for (int i = 0; i < pairCnt; ++i) {
|
|
argTypes.push_back(vpType);
|
|
}
|
|
for (int i = 0; i < vecCnt; ++i) {
|
|
argTypes.push_back(vType);
|
|
}
|
|
for (int i = 0; i < intCnt; ++i) {
|
|
argTypes.push_back(iType);
|
|
}
|
|
|
|
return mlir::FunctionType::get(context, argTypes, {vpType});
|
|
}
|
|
|
|
static mlir::FunctionType genMmaVqFuncType(mlir::MLIRContext *context,
|
|
int quadCnt, int pairCnt, int vecCnt,
|
|
int intCnt = 0,
|
|
int vecElemBitSize = 8,
|
|
int intBitSize = 32) {
|
|
// Constructs a function type with the following signature:
|
|
// Result type: __vector_quad
|
|
// Arguments:
|
|
// quadCnt: number of arguments that has __vector_quad type, followed by
|
|
// pairCnt: number of arguments that has __vector_pair type, followed by
|
|
// vecCnt: number of arguments that has vector(integer) type, followed by
|
|
// intCnt: number of arguments that has integer type
|
|
// vecElemBitSize: specifies the size of vector elements in bits
|
|
// intBitSize: specifies the size of integer arguments in bits
|
|
auto vType{mlir::VectorType::get(
|
|
128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))};
|
|
auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
|
|
auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
|
|
auto iType{mlir::IntegerType::get(context, intBitSize)};
|
|
llvm::SmallVector<mlir::Type> argTypes;
|
|
for (int i = 0; i < quadCnt; ++i) {
|
|
argTypes.push_back(vqType);
|
|
}
|
|
for (int i = 0; i < pairCnt; ++i) {
|
|
argTypes.push_back(vpType);
|
|
}
|
|
for (int i = 0; i < vecCnt; ++i) {
|
|
argTypes.push_back(vType);
|
|
}
|
|
for (int i = 0; i < intCnt; ++i) {
|
|
argTypes.push_back(iType);
|
|
}
|
|
|
|
return mlir::FunctionType::get(context, argTypes, {vqType});
|
|
}
|
|
|
|
mlir::FunctionType genMmaDisassembleFuncType(mlir::MLIRContext *context,
|
|
MMAOp mmaOp) {
|
|
auto vType{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))};
|
|
llvm::SmallVector<mlir::Type> members;
|
|
|
|
if (mmaOp == MMAOp::DisassembleAcc) {
|
|
auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
|
|
members.push_back(vType);
|
|
members.push_back(vType);
|
|
members.push_back(vType);
|
|
members.push_back(vType);
|
|
auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)};
|
|
return mlir::FunctionType::get(context, {vqType}, {resType});
|
|
} else if (mmaOp == MMAOp::DisassemblePair) {
|
|
auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
|
|
members.push_back(vType);
|
|
members.push_back(vType);
|
|
auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)};
|
|
return mlir::FunctionType::get(context, {vpType}, {resType});
|
|
} else {
|
|
llvm_unreachable(
|
|
"Unsupported intrinsic code for function signature generator");
|
|
}
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// PowerPC specific intrinsic handlers.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// MTFSF, MTFSFI
|
|
template <bool isImm>
|
|
void PPCIntrinsicLibrary::genMtfsf(llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 2);
|
|
llvm::SmallVector<mlir::Value> scalarArgs;
|
|
for (const fir::ExtendedValue &arg : args)
|
|
if (arg.getUnboxed())
|
|
scalarArgs.emplace_back(fir::getBase(arg));
|
|
else
|
|
mlir::emitError(loc, "nonscalar intrinsic argument");
|
|
|
|
mlir::FunctionType libFuncType;
|
|
mlir::func::FuncOp funcOp;
|
|
if (isImm) {
|
|
libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Integer<4>>(
|
|
builder.getContext(), builder);
|
|
funcOp = builder.createFunction(loc, "llvm.ppc.mtfsfi", libFuncType);
|
|
} else {
|
|
libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Real<8>>(
|
|
builder.getContext(), builder);
|
|
funcOp = builder.createFunction(loc, "llvm.ppc.mtfsf", libFuncType);
|
|
}
|
|
builder.create<fir::CallOp>(loc, funcOp, scalarArgs);
|
|
}
|
|
|
|
// VEC_ABS
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 1);
|
|
auto context{builder.getContext()};
|
|
auto argBases{getBasesForArgs(args)};
|
|
auto vTypeInfo{getVecTypeFromFir(argBases[0])};
|
|
|
|
mlir::func::FuncOp funcOp{nullptr};
|
|
mlir::FunctionType ftype;
|
|
llvm::StringRef fname{};
|
|
if (vTypeInfo.isFloat()) {
|
|
if (vTypeInfo.isFloat32()) {
|
|
fname = "llvm.fabs.v4f32";
|
|
ftype =
|
|
genFuncType<Ty::RealVector<4>, Ty::RealVector<4>>(context, builder);
|
|
} else if (vTypeInfo.isFloat64()) {
|
|
fname = "llvm.fabs.v2f64";
|
|
ftype =
|
|
genFuncType<Ty::RealVector<8>, Ty::RealVector<8>>(context, builder);
|
|
}
|
|
|
|
funcOp = builder.createFunction(loc, fname, ftype);
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, argBases[0])};
|
|
return callOp.getResult(0);
|
|
} else if (auto eleTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) {
|
|
// vec_abs(arg1) = max(0 - arg1, arg1)
|
|
|
|
auto newVecTy{mlir::VectorType::get(vTypeInfo.len, eleTy)};
|
|
auto varg1{builder.createConvert(loc, newVecTy, argBases[0])};
|
|
// construct vector(0,..)
|
|
auto zeroVal{builder.createIntegerConstant(loc, eleTy, 0)};
|
|
auto vZero{
|
|
builder.create<mlir::vector::BroadcastOp>(loc, newVecTy, zeroVal)};
|
|
auto zeroSubVarg1{builder.create<mlir::arith::SubIOp>(loc, vZero, varg1)};
|
|
|
|
mlir::func::FuncOp funcOp{nullptr};
|
|
switch (eleTy.getWidth()) {
|
|
case 8:
|
|
fname = "llvm.ppc.altivec.vmaxsb";
|
|
ftype = genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
|
|
Ty::IntegerVector<1>>(context, builder);
|
|
break;
|
|
case 16:
|
|
fname = "llvm.ppc.altivec.vmaxsh";
|
|
ftype = genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
|
|
Ty::IntegerVector<2>>(context, builder);
|
|
break;
|
|
case 32:
|
|
fname = "llvm.ppc.altivec.vmaxsw";
|
|
ftype = genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
|
|
Ty::IntegerVector<4>>(context, builder);
|
|
break;
|
|
case 64:
|
|
fname = "llvm.ppc.altivec.vmaxsd";
|
|
ftype = genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
|
|
Ty::IntegerVector<8>>(context, builder);
|
|
break;
|
|
default:
|
|
llvm_unreachable("invalid integer size");
|
|
}
|
|
funcOp = builder.createFunction(loc, fname, ftype);
|
|
|
|
mlir::Value args[] = {zeroSubVarg1, varg1};
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, args)};
|
|
return builder.createConvert(loc, argBases[0].getType(),
|
|
callOp.getResult(0));
|
|
}
|
|
|
|
llvm_unreachable("unknown vector type");
|
|
}
|
|
|
|
// VEC_ADD, VEC_AND, VEC_SUB, VEC_MUL, VEC_XOR
|
|
template <VecOp vop>
|
|
fir::ExtendedValue PPCIntrinsicLibrary::genVecAddAndMulSubXor(
|
|
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 2);
|
|
auto argBases{getBasesForArgs(args)};
|
|
auto argsTy{getTypesForArgs(argBases)};
|
|
assert(mlir::isa<fir::VectorType>(argsTy[0]) &&
|
|
mlir::isa<fir::VectorType>(argsTy[1]));
|
|
|
|
auto vecTyInfo{getVecTypeFromFir(argBases[0])};
|
|
|
|
const auto isInteger{mlir::isa<mlir::IntegerType>(vecTyInfo.eleTy)};
|
|
const auto isFloat{mlir::isa<mlir::FloatType>(vecTyInfo.eleTy)};
|
|
assert((isInteger || isFloat) && "unknown vector type");
|
|
|
|
auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)};
|
|
|
|
mlir::Value r{nullptr};
|
|
switch (vop) {
|
|
case VecOp::Add:
|
|
if (isInteger)
|
|
r = builder.create<mlir::arith::AddIOp>(loc, vargs[0], vargs[1]);
|
|
else if (isFloat)
|
|
r = builder.create<mlir::arith::AddFOp>(loc, vargs[0], vargs[1]);
|
|
break;
|
|
case VecOp::Mul:
|
|
if (isInteger)
|
|
r = builder.create<mlir::arith::MulIOp>(loc, vargs[0], vargs[1]);
|
|
else if (isFloat)
|
|
r = builder.create<mlir::arith::MulFOp>(loc, vargs[0], vargs[1]);
|
|
break;
|
|
case VecOp::Sub:
|
|
if (isInteger)
|
|
r = builder.create<mlir::arith::SubIOp>(loc, vargs[0], vargs[1]);
|
|
else if (isFloat)
|
|
r = builder.create<mlir::arith::SubFOp>(loc, vargs[0], vargs[1]);
|
|
break;
|
|
case VecOp::And:
|
|
case VecOp::Xor: {
|
|
mlir::Value arg1{nullptr};
|
|
mlir::Value arg2{nullptr};
|
|
if (isInteger) {
|
|
arg1 = vargs[0];
|
|
arg2 = vargs[1];
|
|
} else if (isFloat) {
|
|
// bitcast the arguments to integer
|
|
auto wd{mlir::dyn_cast<mlir::FloatType>(vecTyInfo.eleTy).getWidth()};
|
|
auto ftype{builder.getIntegerType(wd)};
|
|
auto bcVecTy{mlir::VectorType::get(vecTyInfo.len, ftype)};
|
|
arg1 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0]);
|
|
arg2 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1]);
|
|
}
|
|
if (vop == VecOp::And)
|
|
r = builder.create<mlir::arith::AndIOp>(loc, arg1, arg2);
|
|
else if (vop == VecOp::Xor)
|
|
r = builder.create<mlir::arith::XOrIOp>(loc, arg1, arg2);
|
|
|
|
if (isFloat)
|
|
r = builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), r);
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
return builder.createConvert(loc, argsTy[0], r);
|
|
}
|
|
|
|
// VEC_ANY_GE
|
|
template <VecOp vop>
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecAnyCompare(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 2);
|
|
assert(vop == VecOp::Anyge && "unknown vector compare operation");
|
|
auto argBases{getBasesForArgs(args)};
|
|
VecTypeInfo vTypeInfo{getVecTypeFromFir(argBases[0])};
|
|
[[maybe_unused]] const auto isSupportedTy{
|
|
mlir::isa<mlir::Float32Type, mlir::Float64Type, mlir::IntegerType>(
|
|
vTypeInfo.eleTy)};
|
|
assert(isSupportedTy && "unsupported vector type");
|
|
|
|
// Constants for mapping CR6 bits to predicate result
|
|
enum { CR6_EQ_REV = 1, CR6_LT_REV = 3 };
|
|
|
|
auto context{builder.getContext()};
|
|
|
|
static std::map<std::pair<ParamTypeId, unsigned>,
|
|
std::pair<llvm::StringRef, mlir::FunctionType>>
|
|
uiBuiltin{
|
|
{std::make_pair(ParamTypeId::IntegerVector, 8),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtsb.p",
|
|
genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<1>,
|
|
Ty::IntegerVector<1>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::IntegerVector, 16),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtsh.p",
|
|
genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<2>,
|
|
Ty::IntegerVector<2>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::IntegerVector, 32),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtsw.p",
|
|
genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<4>,
|
|
Ty::IntegerVector<4>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::IntegerVector, 64),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtsd.p",
|
|
genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<8>,
|
|
Ty::IntegerVector<8>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::UnsignedVector, 8),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtub.p",
|
|
genFuncType<Ty::Integer<4>, Ty::Integer<4>,
|
|
Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>(
|
|
context, builder))},
|
|
{std::make_pair(ParamTypeId::UnsignedVector, 16),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtuh.p",
|
|
genFuncType<Ty::Integer<4>, Ty::Integer<4>,
|
|
Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>(
|
|
context, builder))},
|
|
{std::make_pair(ParamTypeId::UnsignedVector, 32),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtuw.p",
|
|
genFuncType<Ty::Integer<4>, Ty::Integer<4>,
|
|
Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>(
|
|
context, builder))},
|
|
{std::make_pair(ParamTypeId::UnsignedVector, 64),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtud.p",
|
|
genFuncType<Ty::Integer<4>, Ty::Integer<4>,
|
|
Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>(
|
|
context, builder))},
|
|
};
|
|
|
|
mlir::FunctionType ftype{nullptr};
|
|
llvm::StringRef fname;
|
|
const auto i32Ty{mlir::IntegerType::get(context, 32)};
|
|
llvm::SmallVector<mlir::Value> cmpArgs;
|
|
mlir::Value op{nullptr};
|
|
const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
|
|
|
|
if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) {
|
|
std::pair<llvm::StringRef, mlir::FunctionType> bi;
|
|
bi = (elementTy.isUnsignedInteger())
|
|
? uiBuiltin[std::pair(ParamTypeId::UnsignedVector, width)]
|
|
: uiBuiltin[std::pair(ParamTypeId::IntegerVector, width)];
|
|
|
|
fname = std::get<0>(bi);
|
|
ftype = std::get<1>(bi);
|
|
|
|
op = builder.createIntegerConstant(loc, i32Ty, CR6_LT_REV);
|
|
cmpArgs.emplace_back(op);
|
|
// reverse the argument order
|
|
cmpArgs.emplace_back(argBases[1]);
|
|
cmpArgs.emplace_back(argBases[0]);
|
|
} else if (vTypeInfo.isFloat()) {
|
|
if (vTypeInfo.isFloat32()) {
|
|
fname = "llvm.ppc.vsx.xvcmpgesp.p";
|
|
ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<4>,
|
|
Ty::RealVector<4>>(context, builder);
|
|
} else {
|
|
fname = "llvm.ppc.vsx.xvcmpgedp.p";
|
|
ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<8>,
|
|
Ty::RealVector<8>>(context, builder);
|
|
}
|
|
op = builder.createIntegerConstant(loc, i32Ty, CR6_EQ_REV);
|
|
cmpArgs.emplace_back(op);
|
|
cmpArgs.emplace_back(argBases[0]);
|
|
cmpArgs.emplace_back(argBases[1]);
|
|
}
|
|
assert((!fname.empty() && ftype) && "invalid type");
|
|
|
|
mlir::func::FuncOp funcOp{builder.createFunction(loc, fname, ftype)};
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, cmpArgs)};
|
|
return callOp.getResult(0);
|
|
}
|
|
|
|
static std::pair<llvm::StringRef, mlir::FunctionType>
|
|
getVecCmpFuncTypeAndName(VecTypeInfo &vTypeInfo, VecOp vop,
|
|
fir::FirOpBuilder &builder) {
|
|
auto context{builder.getContext()};
|
|
static std::map<std::pair<ParamTypeId, unsigned>,
|
|
std::pair<llvm::StringRef, mlir::FunctionType>>
|
|
iuBuiltinName{
|
|
{std::make_pair(ParamTypeId::IntegerVector, 8),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtsb",
|
|
genFuncType<Ty::UnsignedVector<1>, Ty::IntegerVector<1>,
|
|
Ty::IntegerVector<1>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::IntegerVector, 16),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtsh",
|
|
genFuncType<Ty::UnsignedVector<2>, Ty::IntegerVector<2>,
|
|
Ty::IntegerVector<2>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::IntegerVector, 32),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtsw",
|
|
genFuncType<Ty::UnsignedVector<4>, Ty::IntegerVector<4>,
|
|
Ty::IntegerVector<4>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::IntegerVector, 64),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtsd",
|
|
genFuncType<Ty::UnsignedVector<8>, Ty::IntegerVector<8>,
|
|
Ty::IntegerVector<8>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::UnsignedVector, 8),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtub",
|
|
genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
|
|
Ty::UnsignedVector<1>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::UnsignedVector, 16),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtuh",
|
|
genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
|
|
Ty::UnsignedVector<2>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::UnsignedVector, 32),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtuw",
|
|
genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
|
|
Ty::UnsignedVector<4>>(context, builder))},
|
|
{std::make_pair(ParamTypeId::UnsignedVector, 64),
|
|
std::make_pair(
|
|
"llvm.ppc.altivec.vcmpgtud",
|
|
genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
|
|
Ty::UnsignedVector<8>>(context, builder))}};
|
|
|
|
// VSX only defines GE and GT builtins. Cmple and Cmplt use GE and GT with
|
|
// arguments revsered.
|
|
enum class Cmp { gtOrLt, geOrLe };
|
|
static std::map<std::pair<Cmp, int>,
|
|
std::pair<llvm::StringRef, mlir::FunctionType>>
|
|
rGBI{{std::make_pair(Cmp::geOrLe, 32),
|
|
std::make_pair("llvm.ppc.vsx.xvcmpgesp",
|
|
genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>,
|
|
Ty::RealVector<4>>(context, builder))},
|
|
{std::make_pair(Cmp::geOrLe, 64),
|
|
std::make_pair("llvm.ppc.vsx.xvcmpgedp",
|
|
genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>,
|
|
Ty::RealVector<8>>(context, builder))},
|
|
{std::make_pair(Cmp::gtOrLt, 32),
|
|
std::make_pair("llvm.ppc.vsx.xvcmpgtsp",
|
|
genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>,
|
|
Ty::RealVector<4>>(context, builder))},
|
|
{std::make_pair(Cmp::gtOrLt, 64),
|
|
std::make_pair("llvm.ppc.vsx.xvcmpgtdp",
|
|
genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>,
|
|
Ty::RealVector<8>>(context, builder))}};
|
|
|
|
const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
|
|
std::pair<llvm::StringRef, mlir::FunctionType> specFunc;
|
|
if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy))
|
|
specFunc =
|
|
(elementTy.isUnsignedInteger())
|
|
? iuBuiltinName[std::make_pair(ParamTypeId::UnsignedVector, width)]
|
|
: iuBuiltinName[std::make_pair(ParamTypeId::IntegerVector, width)];
|
|
else if (vTypeInfo.isFloat())
|
|
specFunc = (vop == VecOp::Cmpge || vop == VecOp::Cmple)
|
|
? rGBI[std::make_pair(Cmp::geOrLe, width)]
|
|
: rGBI[std::make_pair(Cmp::gtOrLt, width)];
|
|
|
|
assert(!std::get<0>(specFunc).empty() && "unknown builtin name");
|
|
assert(std::get<1>(specFunc) && "unknown function type");
|
|
return specFunc;
|
|
}
|
|
|
|
// VEC_CMPGE, VEC_CMPGT, VEC_CMPLE, VEC_CMPLT
|
|
template <VecOp vop>
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecCmp(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 2);
|
|
auto context{builder.getContext()};
|
|
auto argBases{getBasesForArgs(args)};
|
|
VecTypeInfo vecTyInfo{getVecTypeFromFir(argBases[0])};
|
|
auto varg{convertVecArgs(builder, loc, vecTyInfo, argBases)};
|
|
|
|
std::pair<llvm::StringRef, mlir::FunctionType> funcTyNam{
|
|
getVecCmpFuncTypeAndName(vecTyInfo, vop, builder)};
|
|
|
|
mlir::func::FuncOp funcOp = builder.createFunction(
|
|
loc, std::get<0>(funcTyNam), std::get<1>(funcTyNam));
|
|
|
|
mlir::Value res{nullptr};
|
|
|
|
if (auto eTy = mlir::dyn_cast<mlir::IntegerType>(vecTyInfo.eleTy)) {
|
|
constexpr int firstArg{0};
|
|
constexpr int secondArg{1};
|
|
std::map<VecOp, std::array<int, 2>> argOrder{
|
|
{VecOp::Cmpge, {secondArg, firstArg}},
|
|
{VecOp::Cmple, {firstArg, secondArg}},
|
|
{VecOp::Cmpgt, {firstArg, secondArg}},
|
|
{VecOp::Cmplt, {secondArg, firstArg}}};
|
|
|
|
// Construct the function return type, unsigned vector, for conversion.
|
|
auto itype = mlir::IntegerType::get(context, eTy.getWidth(),
|
|
mlir::IntegerType::Unsigned);
|
|
auto returnType = fir::VectorType::get(vecTyInfo.len, itype);
|
|
|
|
switch (vop) {
|
|
case VecOp::Cmpgt:
|
|
case VecOp::Cmplt: {
|
|
// arg1 > arg2 --> vcmpgt(arg1, arg2)
|
|
// arg1 < arg2 --> vcmpgt(arg2, arg1)
|
|
mlir::Value vargs[]{argBases[argOrder[vop][0]],
|
|
argBases[argOrder[vop][1]]};
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
|
|
res = callOp.getResult(0);
|
|
break;
|
|
}
|
|
case VecOp::Cmpge:
|
|
case VecOp::Cmple: {
|
|
// arg1 >= arg2 --> vcmpge(arg2, arg1) xor vector(-1)
|
|
// arg1 <= arg2 --> vcmpge(arg1, arg2) xor vector(-1)
|
|
mlir::Value vargs[]{argBases[argOrder[vop][0]],
|
|
argBases[argOrder[vop][1]]};
|
|
|
|
// Construct a constant vector(-1)
|
|
auto negOneVal{builder.createIntegerConstant(
|
|
loc, getConvertedElementType(context, eTy), -1)};
|
|
auto vNegOne{builder.create<mlir::vector::BroadcastOp>(
|
|
loc, vecTyInfo.toMlirVectorType(context), negOneVal)};
|
|
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
|
|
mlir::Value callRes{callOp.getResult(0)};
|
|
auto vargs2{
|
|
convertVecArgs(builder, loc, vecTyInfo, mlir::ValueRange{callRes})};
|
|
auto xorRes{builder.create<mlir::arith::XOrIOp>(loc, vargs2[0], vNegOne)};
|
|
|
|
res = builder.createConvert(loc, returnType, xorRes);
|
|
break;
|
|
}
|
|
default:
|
|
llvm_unreachable("Invalid vector operation for generator");
|
|
}
|
|
} else if (vecTyInfo.isFloat()) {
|
|
mlir::Value vargs[2];
|
|
switch (vop) {
|
|
case VecOp::Cmpge:
|
|
case VecOp::Cmpgt:
|
|
vargs[0] = argBases[0];
|
|
vargs[1] = argBases[1];
|
|
break;
|
|
case VecOp::Cmple:
|
|
case VecOp::Cmplt:
|
|
// Swap the arguments as xvcmpg[et] is used
|
|
vargs[0] = argBases[1];
|
|
vargs[1] = argBases[0];
|
|
break;
|
|
default:
|
|
llvm_unreachable("Invalid vector operation for generator");
|
|
}
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
|
|
res = callOp.getResult(0);
|
|
} else
|
|
llvm_unreachable("invalid vector type");
|
|
|
|
return res;
|
|
}
|
|
|
|
static inline mlir::Value swapVectorWordPairs(fir::FirOpBuilder &builder,
|
|
mlir::Location loc,
|
|
mlir::Value arg) {
|
|
auto ty = arg.getType();
|
|
auto context{builder.getContext()};
|
|
auto vtype{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))};
|
|
|
|
if (ty != vtype)
|
|
arg = builder.create<mlir::LLVM::BitcastOp>(loc, vtype, arg).getResult();
|
|
|
|
llvm::SmallVector<int64_t, 16> mask{4, 5, 6, 7, 0, 1, 2, 3,
|
|
12, 13, 14, 15, 8, 9, 10, 11};
|
|
arg = builder.create<mlir::vector::ShuffleOp>(loc, arg, arg, mask);
|
|
if (ty != vtype)
|
|
arg = builder.create<mlir::LLVM::BitcastOp>(loc, ty, arg);
|
|
return arg;
|
|
}
|
|
|
|
// VEC_CONVERT, VEC_CTF, VEC_CVF
|
|
template <VecOp vop>
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
auto context{builder.getContext()};
|
|
auto argBases{getBasesForArgs(args)};
|
|
auto vecTyInfo{getVecTypeFromFir(argBases[0])};
|
|
auto mlirTy{vecTyInfo.toMlirVectorType(context)};
|
|
auto vArg1{builder.createConvert(loc, mlirTy, argBases[0])};
|
|
const auto i32Ty{mlir::IntegerType::get(context, 32)};
|
|
|
|
switch (vop) {
|
|
case VecOp::Ctf: {
|
|
assert(args.size() == 2);
|
|
auto convArg{builder.createConvert(loc, i32Ty, argBases[1])};
|
|
auto eTy{mlir::dyn_cast<mlir::IntegerType>(vecTyInfo.eleTy)};
|
|
assert(eTy && "Unsupported vector type");
|
|
const auto isUnsigned{eTy.isUnsignedInteger()};
|
|
const auto width{eTy.getWidth()};
|
|
|
|
if (width == 32) {
|
|
auto ftype{(isUnsigned)
|
|
? genFuncType<Ty::RealVector<4>, Ty::UnsignedVector<4>,
|
|
Ty::Integer<4>>(context, builder)
|
|
: genFuncType<Ty::RealVector<4>, Ty::IntegerVector<4>,
|
|
Ty::Integer<4>>(context, builder)};
|
|
const llvm::StringRef fname{(isUnsigned) ? "llvm.ppc.altivec.vcfux"
|
|
: "llvm.ppc.altivec.vcfsx"};
|
|
auto funcOp{builder.createFunction(loc, fname, ftype)};
|
|
mlir::Value newArgs[] = {argBases[0], convArg};
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
|
|
|
|
return callOp.getResult(0);
|
|
} else if (width == 64) {
|
|
auto fTy{mlir::FloatType::getF64(context)};
|
|
auto ty{mlir::VectorType::get(2, fTy)};
|
|
|
|
// vec_vtf(arg1, arg2) = fmul(1.0 / (1 << arg2), llvm.sitofp(arg1))
|
|
auto convOp{(isUnsigned)
|
|
? builder.create<mlir::LLVM::UIToFPOp>(loc, ty, vArg1)
|
|
: builder.create<mlir::LLVM::SIToFPOp>(loc, ty, vArg1)};
|
|
|
|
// construct vector<1./(1<<arg1), 1.0/(1<<arg1)>
|
|
auto constInt{mlir::dyn_cast_or_null<mlir::IntegerAttr>(
|
|
mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[1].getDefiningOp())
|
|
.getValue())};
|
|
assert(constInt && "expected integer constant argument");
|
|
double f{1.0 / (1 << constInt.getInt())};
|
|
llvm::SmallVector<double> vals{f, f};
|
|
auto constOp{builder.create<mlir::arith::ConstantOp>(
|
|
loc, ty, builder.getF64VectorAttr(vals))};
|
|
|
|
auto mulOp{builder.create<mlir::LLVM::FMulOp>(
|
|
loc, ty, convOp->getResult(0), constOp)};
|
|
|
|
return builder.createConvert(loc, fir::VectorType::get(2, fTy), mulOp);
|
|
}
|
|
llvm_unreachable("invalid element integer kind");
|
|
}
|
|
case VecOp::Convert: {
|
|
assert(args.size() == 2);
|
|
// resultType has mold type (if scalar) or element type (if array)
|
|
auto resTyInfo{getVecTypeFromFirType(resultType)};
|
|
auto moldTy{resTyInfo.toMlirVectorType(context)};
|
|
auto firTy{resTyInfo.toFirVectorType()};
|
|
|
|
// vec_convert(v, mold) = bitcast v to "type of mold"
|
|
auto conv{builder.create<mlir::LLVM::BitcastOp>(loc, moldTy, vArg1)};
|
|
|
|
return builder.createConvert(loc, firTy, conv);
|
|
}
|
|
case VecOp::Cvf: {
|
|
assert(args.size() == 1);
|
|
|
|
mlir::Value newArgs[]{vArg1};
|
|
if (vecTyInfo.isFloat32()) {
|
|
if (changeVecElemOrder())
|
|
newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
|
|
|
|
const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"};
|
|
auto ftype{
|
|
genFuncType<Ty::RealVector<8>, Ty::RealVector<4>>(context, builder)};
|
|
auto funcOp{builder.createFunction(loc, fname, ftype)};
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
|
|
|
|
return callOp.getResult(0);
|
|
} else if (vecTyInfo.isFloat64()) {
|
|
const llvm::StringRef fname{"llvm.ppc.vsx.xvcvdpsp"};
|
|
auto ftype{
|
|
genFuncType<Ty::RealVector<4>, Ty::RealVector<8>>(context, builder)};
|
|
auto funcOp{builder.createFunction(loc, fname, ftype)};
|
|
newArgs[0] =
|
|
builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0);
|
|
auto fvf32Ty{newArgs[0].getType()};
|
|
auto f32type{mlir::FloatType::getF32(context)};
|
|
auto mvf32Ty{mlir::VectorType::get(4, f32type)};
|
|
newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]);
|
|
|
|
if (changeVecElemOrder())
|
|
newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
|
|
|
|
return builder.createConvert(loc, fvf32Ty, newArgs[0]);
|
|
}
|
|
llvm_unreachable("invalid element integer kind");
|
|
}
|
|
default:
|
|
llvm_unreachable("Invalid vector operation for generator");
|
|
}
|
|
}
|
|
|
|
static mlir::Value convertVectorElementOrder(fir::FirOpBuilder &builder,
|
|
mlir::Location loc,
|
|
VecTypeInfo vecInfo,
|
|
mlir::Value idx) {
|
|
mlir::Value numSub1{
|
|
builder.createIntegerConstant(loc, idx.getType(), vecInfo.len - 1)};
|
|
return builder.create<mlir::LLVM::SubOp>(loc, idx.getType(), numSub1, idx);
|
|
}
|
|
|
|
// VEC_EXTRACT
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecExtract(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 2);
|
|
auto argBases{getBasesForArgs(args)};
|
|
auto argTypes{getTypesForArgs(argBases)};
|
|
auto vecTyInfo{getVecTypeFromFir(argBases[0])};
|
|
|
|
auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
|
|
auto varg0{builder.createConvert(loc, mlirTy, argBases[0])};
|
|
|
|
// arg2 modulo the number of elements in arg1 to determine the element
|
|
// position
|
|
auto numEle{builder.createIntegerConstant(loc, argTypes[1], vecTyInfo.len)};
|
|
mlir::Value uremOp{
|
|
builder.create<mlir::LLVM::URemOp>(loc, argBases[1], numEle)};
|
|
|
|
if (!isNativeVecElemOrderOnLE())
|
|
uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp);
|
|
|
|
return builder.create<mlir::vector::ExtractElementOp>(loc, varg0, uremOp);
|
|
}
|
|
|
|
// VEC_INSERT
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecInsert(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 3);
|
|
auto argBases{getBasesForArgs(args)};
|
|
auto argTypes{getTypesForArgs(argBases)};
|
|
auto vecTyInfo{getVecTypeFromFir(argBases[1])};
|
|
auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
|
|
auto varg1{builder.createConvert(loc, mlirTy, argBases[1])};
|
|
|
|
auto numEle{builder.createIntegerConstant(loc, argTypes[2], vecTyInfo.len)};
|
|
mlir::Value uremOp{
|
|
builder.create<mlir::LLVM::URemOp>(loc, argBases[2], numEle)};
|
|
|
|
if (!isNativeVecElemOrderOnLE())
|
|
uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp);
|
|
|
|
auto res{builder.create<mlir::vector::InsertElementOp>(loc, argBases[0],
|
|
varg1, uremOp)};
|
|
return builder.create<fir::ConvertOp>(loc, vecTyInfo.toFirVectorType(), res);
|
|
}
|
|
|
|
// VEC_MERGEH, VEC_MERGEL
|
|
template <VecOp vop>
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecMerge(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 2);
|
|
auto argBases{getBasesForArgs(args)};
|
|
auto vecTyInfo{getVecTypeFromFir(argBases[0])};
|
|
llvm::SmallVector<int64_t, 16> mMask; // native vector element order mask
|
|
llvm::SmallVector<int64_t, 16> rMask; // non-native vector element order mask
|
|
|
|
switch (vop) {
|
|
case VecOp::Mergeh: {
|
|
switch (vecTyInfo.len) {
|
|
case 2: {
|
|
enum { V1 = 0, V2 = 2 };
|
|
mMask = {V1 + 0, V2 + 0};
|
|
rMask = {V2 + 1, V1 + 1};
|
|
break;
|
|
}
|
|
case 4: {
|
|
enum { V1 = 0, V2 = 4 };
|
|
mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1};
|
|
rMask = {V2 + 2, V1 + 2, V2 + 3, V1 + 3};
|
|
break;
|
|
}
|
|
case 8: {
|
|
enum { V1 = 0, V2 = 8 };
|
|
mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1, V1 + 2, V2 + 2, V1 + 3, V2 + 3};
|
|
rMask = {V2 + 4, V1 + 4, V2 + 5, V1 + 5, V2 + 6, V1 + 6, V2 + 7, V1 + 7};
|
|
break;
|
|
}
|
|
case 16:
|
|
mMask = {0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13,
|
|
0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17};
|
|
rMask = {0x18, 0x08, 0x19, 0x09, 0x1A, 0x0A, 0x1B, 0x0B,
|
|
0x1C, 0x0C, 0x1D, 0x0D, 0x1E, 0x0E, 0x1F, 0x0F};
|
|
break;
|
|
default:
|
|
llvm_unreachable("unexpected vector length");
|
|
}
|
|
break;
|
|
}
|
|
case VecOp::Mergel: {
|
|
switch (vecTyInfo.len) {
|
|
case 2: {
|
|
enum { V1 = 0, V2 = 2 };
|
|
mMask = {V1 + 1, V2 + 1};
|
|
rMask = {V2 + 0, V1 + 0};
|
|
break;
|
|
}
|
|
case 4: {
|
|
enum { V1 = 0, V2 = 4 };
|
|
mMask = {V1 + 2, V2 + 2, V1 + 3, V2 + 3};
|
|
rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1};
|
|
break;
|
|
}
|
|
case 8: {
|
|
enum { V1 = 0, V2 = 8 };
|
|
mMask = {V1 + 4, V2 + 4, V1 + 5, V2 + 5, V1 + 6, V2 + 6, V1 + 7, V2 + 7};
|
|
rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1, V2 + 2, V1 + 2, V2 + 3, V1 + 3};
|
|
break;
|
|
}
|
|
case 16:
|
|
mMask = {0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B,
|
|
0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F};
|
|
rMask = {0x10, 0x00, 0x11, 0x01, 0x12, 0x02, 0x13, 0x03,
|
|
0x14, 0x04, 0x15, 0x05, 0x16, 0x06, 0x17, 0x07};
|
|
break;
|
|
default:
|
|
llvm_unreachable("unexpected vector length");
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
llvm_unreachable("invalid vector operation for generator");
|
|
}
|
|
|
|
auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)};
|
|
|
|
llvm::SmallVector<int64_t, 16> &mergeMask =
|
|
(isBEVecElemOrderOnLE()) ? rMask : mMask;
|
|
|
|
auto callOp{builder.create<mlir::vector::ShuffleOp>(loc, vargs[0], vargs[1],
|
|
mergeMask)};
|
|
return builder.createConvert(loc, resultType, callOp);
|
|
}
|
|
|
|
static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder,
|
|
mlir::Location loc, mlir::Value baseAddr,
|
|
mlir::Value offset) {
|
|
auto typeExtent{fir::SequenceType::getUnknownExtent()};
|
|
// Construct an !fir.ref<!ref.array<?xi8>> type
|
|
auto arrRefTy{builder.getRefType(fir::SequenceType::get(
|
|
{typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))};
|
|
// Convert arg to !fir.ref<!ref.array<?xi8>>
|
|
auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)};
|
|
|
|
return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset);
|
|
}
|
|
|
|
static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
|
|
mlir::Location loc, mlir::Value v,
|
|
int64_t len) {
|
|
assert(mlir::isa<mlir::VectorType>(v.getType()));
|
|
assert(len > 0);
|
|
llvm::SmallVector<int64_t, 16> mask;
|
|
for (int64_t i = 0; i < len; ++i) {
|
|
mask.push_back(len - 1 - i);
|
|
}
|
|
auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())};
|
|
return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
|
|
}
|
|
|
|
static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder,
|
|
const int val) {
|
|
auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)};
|
|
auto alignAttr{mlir::IntegerAttr::get(i64ty, val)};
|
|
return builder.getNamedAttr("alignment", alignAttr);
|
|
}
|
|
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecXlGrp(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
VecTypeInfo vecTyInfo{getVecTypeFromFirType(resultType)};
|
|
switch (vecTyInfo.eleTy.getIntOrFloatBitWidth()) {
|
|
case 8:
|
|
// vec_xlb1
|
|
return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
|
|
case 16:
|
|
// vec_xlh8
|
|
return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
|
|
case 32:
|
|
// vec_xlw4
|
|
return genVecLdCallGrp<VecOp::Xlw4>(resultType, args);
|
|
case 64:
|
|
// vec_xld2
|
|
return genVecLdCallGrp<VecOp::Xld2>(resultType, args);
|
|
default:
|
|
llvm_unreachable("invalid kind");
|
|
}
|
|
llvm_unreachable("invalid vector operation for generator");
|
|
}
|
|
|
|
template <VecOp vop>
|
|
fir::ExtendedValue PPCIntrinsicLibrary::genVecLdNoCallGrp(
|
|
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 2);
|
|
auto arg0{getBase(args[0])};
|
|
auto arg1{getBase(args[1])};
|
|
|
|
auto vecTyInfo{getVecTypeFromFirType(resultType)};
|
|
auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
|
|
auto firTy{vecTyInfo.toFirVectorType()};
|
|
|
|
// Add the %val of arg0 to %addr of arg1
|
|
auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
|
|
|
|
const auto triple{fir::getTargetTriple(builder.getModule())};
|
|
// Need to get align 1.
|
|
auto result{builder.create<fir::LoadOp>(loc, mlirTy, addr,
|
|
getAlignmentAttr(builder, 1))};
|
|
if ((vop == VecOp::Xl && isBEVecElemOrderOnLE()) ||
|
|
(vop == VecOp::Xlbe && triple.isLittleEndian()))
|
|
return builder.createConvert(
|
|
loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
|
|
|
|
return builder.createConvert(loc, firTy, result);
|
|
}
|
|
|
|
// VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4
|
|
template <VecOp vop>
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 2);
|
|
auto context{builder.getContext()};
|
|
auto arg0{getBase(args[0])};
|
|
auto arg1{getBase(args[1])};
|
|
|
|
// Prepare the return type in FIR.
|
|
auto vecResTyInfo{getVecTypeFromFirType(resultType)};
|
|
auto mlirTy{vecResTyInfo.toMlirVectorType(context)};
|
|
auto firTy{vecResTyInfo.toFirVectorType()};
|
|
|
|
// llvm.ppc.altivec.lvx* returns <4xi32>
|
|
// Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type
|
|
const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)};
|
|
const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)};
|
|
|
|
// For vec_ld, need to convert arg0 from i64 to i32
|
|
if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64)
|
|
arg0 = builder.createConvert(loc, i32Ty, arg0);
|
|
|
|
// Add the %val of arg0 to %addr of arg1
|
|
auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
|
|
llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
|
|
|
|
mlir::Type intrinResTy{nullptr};
|
|
llvm::StringRef fname{};
|
|
switch (vop) {
|
|
case VecOp::Ld:
|
|
fname = "llvm.ppc.altivec.lvx";
|
|
intrinResTy = mVecI32Ty;
|
|
break;
|
|
case VecOp::Lde:
|
|
switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) {
|
|
case 8:
|
|
fname = "llvm.ppc.altivec.lvebx";
|
|
intrinResTy = mlirTy;
|
|
break;
|
|
case 16:
|
|
fname = "llvm.ppc.altivec.lvehx";
|
|
intrinResTy = mlirTy;
|
|
break;
|
|
case 32:
|
|
fname = "llvm.ppc.altivec.lvewx";
|
|
if (mlir::isa<mlir::IntegerType>(vecResTyInfo.eleTy))
|
|
intrinResTy = mlirTy;
|
|
else
|
|
intrinResTy = mVecI32Ty;
|
|
break;
|
|
default:
|
|
llvm_unreachable("invalid vector for vec_lde");
|
|
}
|
|
break;
|
|
case VecOp::Ldl:
|
|
fname = "llvm.ppc.altivec.lvxl";
|
|
intrinResTy = mVecI32Ty;
|
|
break;
|
|
case VecOp::Lxvp:
|
|
fname = "llvm.ppc.vsx.lxvp";
|
|
intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1));
|
|
break;
|
|
case VecOp::Xld2: {
|
|
fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be"
|
|
: "llvm.ppc.vsx.lxvd2x";
|
|
// llvm.ppc.altivec.lxvd2x* returns <2 x double>
|
|
intrinResTy = mlir::VectorType::get(2, mlir::FloatType::getF64(context));
|
|
} break;
|
|
case VecOp::Xlw4:
|
|
fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be"
|
|
: "llvm.ppc.vsx.lxvw4x";
|
|
// llvm.ppc.altivec.lxvw4x* returns <4xi32>
|
|
intrinResTy = mVecI32Ty;
|
|
break;
|
|
default:
|
|
llvm_unreachable("invalid vector operation for generator");
|
|
}
|
|
|
|
auto funcType{
|
|
mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})};
|
|
auto funcOp{builder.createFunction(loc, fname, funcType)};
|
|
auto result{
|
|
builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
|
|
|
|
if (vop == VecOp::Lxvp)
|
|
return result;
|
|
|
|
if (intrinResTy != mlirTy)
|
|
result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, result);
|
|
|
|
if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE())
|
|
return builder.createConvert(
|
|
loc, firTy,
|
|
reverseVectorElements(builder, loc, result, vecResTyInfo.len));
|
|
|
|
return builder.createConvert(loc, firTy, result);
|
|
}
|
|
|
|
// VEC_LVSL, VEC_LVSR
|
|
template <VecOp vop>
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 2);
|
|
auto context{builder.getContext()};
|
|
auto arg0{getBase(args[0])};
|
|
auto arg1{getBase(args[1])};
|
|
|
|
auto vecTyInfo{getVecTypeFromFirType(resultType)};
|
|
auto mlirTy{vecTyInfo.toMlirVectorType(context)};
|
|
auto firTy{vecTyInfo.toFirVectorType()};
|
|
|
|
// Convert arg0 to i64 type if needed
|
|
auto i64ty{mlir::IntegerType::get(context, 64)};
|
|
if (arg0.getType() != i64ty)
|
|
arg0 = builder.create<fir::ConvertOp>(loc, i64ty, arg0);
|
|
|
|
// offset is modulo 16, so shift left 56 bits and then right 56 bits to clear
|
|
// upper 56 bit while preserving sign
|
|
auto shiftVal{builder.createIntegerConstant(loc, i64ty, 56)};
|
|
auto offset{builder.create<mlir::arith::ShLIOp>(loc, arg0, shiftVal)};
|
|
auto offset2{builder.create<mlir::arith::ShRSIOp>(loc, offset, shiftVal)};
|
|
|
|
// Add the offsetArg to %addr of arg1
|
|
auto addr{addOffsetToAddress(builder, loc, arg1, offset2)};
|
|
llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
|
|
|
|
llvm::StringRef fname{};
|
|
switch (vop) {
|
|
case VecOp::Lvsl:
|
|
fname = "llvm.ppc.altivec.lvsl";
|
|
break;
|
|
case VecOp::Lvsr:
|
|
fname = "llvm.ppc.altivec.lvsr";
|
|
break;
|
|
default:
|
|
llvm_unreachable("invalid vector operation for generator");
|
|
}
|
|
auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})};
|
|
auto funcOp{builder.createFunction(loc, fname, funcType)};
|
|
auto result{
|
|
builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
|
|
|
|
if (isNativeVecElemOrderOnLE())
|
|
return builder.createConvert(
|
|
loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
|
|
|
|
return builder.createConvert(loc, firTy, result);
|
|
}
|
|
|
|
// VEC_NMADD, VEC_MSUB
|
|
template <VecOp vop>
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 3);
|
|
auto context{builder.getContext()};
|
|
auto argBases{getBasesForArgs(args)};
|
|
auto vTypeInfo{getVecTypeFromFir(argBases[0])};
|
|
auto newArgs{convertVecArgs(builder, loc, vTypeInfo, argBases)};
|
|
const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
|
|
|
|
static std::map<int, std::pair<llvm::StringRef, mlir::FunctionType>> fmaMap{
|
|
{32,
|
|
std::make_pair(
|
|
"llvm.fma.v4f32",
|
|
genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>(
|
|
context, builder))},
|
|
{64,
|
|
std::make_pair(
|
|
"llvm.fma.v2f64",
|
|
genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>(
|
|
context, builder))}};
|
|
|
|
auto funcOp{builder.createFunction(loc, std::get<0>(fmaMap[width]),
|
|
std::get<1>(fmaMap[width]))};
|
|
if (vop == VecOp::Nmadd) {
|
|
// vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3)
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
|
|
|
|
// We need to convert fir.vector to MLIR vector to use fneg and then back
|
|
// to fir.vector to store.
|
|
auto vCall{builder.createConvert(loc, vTypeInfo.toMlirVectorType(context),
|
|
callOp.getResult(0))};
|
|
auto neg{builder.create<mlir::arith::NegFOp>(loc, vCall)};
|
|
return builder.createConvert(loc, vTypeInfo.toFirVectorType(), neg);
|
|
} else if (vop == VecOp::Msub) {
|
|
// vec_msub(arg1, arg2, arg3) = fma(arg1, arg2, -arg3)
|
|
newArgs[2] = builder.create<mlir::arith::NegFOp>(loc, newArgs[2]);
|
|
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
|
|
return callOp.getResult(0);
|
|
}
|
|
llvm_unreachable("Invalid vector operation for generator");
|
|
}
|
|
|
|
// VEC_PERM, VEC_PERMI
|
|
template <VecOp vop>
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 3);
|
|
auto context{builder.getContext()};
|
|
auto argBases{getBasesForArgs(args)};
|
|
auto argTypes{getTypesForArgs(argBases)};
|
|
auto vecTyInfo{getVecTypeFromFir(argBases[0])};
|
|
auto mlirTy{vecTyInfo.toMlirVectorType(context)};
|
|
|
|
auto vi32Ty{mlir::VectorType::get(4, mlir::IntegerType::get(context, 32))};
|
|
auto vf64Ty{mlir::VectorType::get(2, mlir::FloatType::getF64(context))};
|
|
|
|
auto mArg0{builder.createConvert(loc, mlirTy, argBases[0])};
|
|
auto mArg1{builder.createConvert(loc, mlirTy, argBases[1])};
|
|
|
|
switch (vop) {
|
|
case VecOp::Perm: {
|
|
VecTypeInfo maskVecTyInfo{getVecTypeFromFir(argBases[2])};
|
|
auto mlirMaskTy{maskVecTyInfo.toMlirVectorType(context)};
|
|
auto mMask{builder.createConvert(loc, mlirMaskTy, argBases[2])};
|
|
|
|
if (mlirTy != vi32Ty) {
|
|
mArg0 =
|
|
builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg0).getResult();
|
|
mArg1 =
|
|
builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg1).getResult();
|
|
}
|
|
|
|
auto funcOp{builder.createFunction(
|
|
loc, "llvm.ppc.altivec.vperm",
|
|
genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
|
|
Ty::IntegerVector<4>, Ty::IntegerVector<1>>(context,
|
|
builder))};
|
|
|
|
llvm::SmallVector<mlir::Value> newArgs;
|
|
if (isNativeVecElemOrderOnLE()) {
|
|
auto i8Ty{mlir::IntegerType::get(context, 8)};
|
|
auto v8Ty{mlir::VectorType::get(16, i8Ty)};
|
|
auto negOne{builder.createMinusOneInteger(loc, i8Ty)};
|
|
auto vNegOne{
|
|
builder.create<mlir::vector::BroadcastOp>(loc, v8Ty, negOne)};
|
|
|
|
mMask = builder.create<mlir::arith::XOrIOp>(loc, mMask, vNegOne);
|
|
newArgs = {mArg1, mArg0, mMask};
|
|
} else {
|
|
newArgs = {mArg0, mArg1, mMask};
|
|
}
|
|
|
|
auto res{builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0)};
|
|
|
|
if (res.getType() != argTypes[0]) {
|
|
// fir.call llvm.ppc.altivec.vperm returns !fir.vector<i4:32>
|
|
// convert the result back to the original type
|
|
res = builder.createConvert(loc, vi32Ty, res);
|
|
if (mlirTy != vi32Ty)
|
|
res =
|
|
builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res).getResult();
|
|
}
|
|
return builder.createConvert(loc, resultType, res);
|
|
}
|
|
case VecOp::Permi: {
|
|
// arg3 is a constant
|
|
auto constIntOp{mlir::dyn_cast_or_null<mlir::IntegerAttr>(
|
|
mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp())
|
|
.getValue())};
|
|
assert(constIntOp && "expected integer constant argument");
|
|
auto constInt{constIntOp.getInt()};
|
|
// arg1, arg2, and result type share same VecTypeInfo
|
|
if (vecTyInfo.isFloat()) {
|
|
mArg0 =
|
|
builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg0).getResult();
|
|
mArg1 =
|
|
builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg1).getResult();
|
|
}
|
|
|
|
llvm::SmallVector<int64_t, 2> nMask; // native vector element order mask
|
|
llvm::SmallVector<int64_t, 2> rMask; // non-native vector element order mask
|
|
enum { V1 = 0, V2 = 2 };
|
|
switch (constInt) {
|
|
case 0:
|
|
nMask = {V1 + 0, V2 + 0};
|
|
rMask = {V2 + 1, V1 + 1};
|
|
break;
|
|
case 1:
|
|
nMask = {V1 + 0, V2 + 1};
|
|
rMask = {V2 + 0, V1 + 1};
|
|
break;
|
|
case 2:
|
|
nMask = {V1 + 1, V2 + 0};
|
|
rMask = {V2 + 1, V1 + 0};
|
|
break;
|
|
case 3:
|
|
nMask = {V1 + 1, V2 + 1};
|
|
rMask = {V2 + 0, V1 + 0};
|
|
break;
|
|
default:
|
|
llvm_unreachable("unexpected arg3 value for vec_permi");
|
|
}
|
|
|
|
llvm::SmallVector<int64_t, 2> mask =
|
|
(isBEVecElemOrderOnLE()) ? rMask : nMask;
|
|
auto res{builder.create<mlir::vector::ShuffleOp>(loc, mArg0, mArg1, mask)};
|
|
if (res.getType() != mlirTy) {
|
|
auto cast{builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res)};
|
|
return builder.createConvert(loc, resultType, cast);
|
|
}
|
|
return builder.createConvert(loc, resultType, res);
|
|
}
|
|
default:
|
|
llvm_unreachable("invalid vector operation for generator");
|
|
}
|
|
}
|
|
|
|
// VEC_SEL
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecSel(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 3);
|
|
auto argBases{getBasesForArgs(args)};
|
|
llvm::SmallVector<VecTypeInfo, 4> vecTyInfos;
|
|
for (size_t i = 0; i < argBases.size(); i++) {
|
|
vecTyInfos.push_back(getVecTypeFromFir(argBases[i]));
|
|
}
|
|
auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)};
|
|
|
|
auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)};
|
|
auto negOne{builder.createMinusOneInteger(loc, i8Ty)};
|
|
|
|
// construct a constant <16 x i8> vector with value -1 for bitcast
|
|
auto bcVecTy{mlir::VectorType::get(16, i8Ty)};
|
|
auto vNegOne{builder.create<mlir::vector::BroadcastOp>(loc, bcVecTy, negOne)};
|
|
|
|
// bitcast arguments to bcVecTy
|
|
auto arg1{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0])};
|
|
auto arg2{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1])};
|
|
auto arg3{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[2])};
|
|
|
|
// vec_sel(arg1, arg2, arg3) =
|
|
// (arg2 and arg3) or (arg1 and (arg3 xor vector(-1,...)))
|
|
auto comp{builder.create<mlir::arith::XOrIOp>(loc, arg3, vNegOne)};
|
|
auto a1AndComp{builder.create<mlir::arith::AndIOp>(loc, arg1, comp)};
|
|
auto a1OrA2{builder.create<mlir::arith::AndIOp>(loc, arg2, arg3)};
|
|
auto res{builder.create<mlir::arith::OrIOp>(loc, a1AndComp, a1OrA2)};
|
|
|
|
auto bcRes{
|
|
builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), res)};
|
|
|
|
return builder.createConvert(loc, vecTyInfos[0].toFirVectorType(), bcRes);
|
|
}
|
|
|
|
// VEC_SL, VEC_SLD, VEC_SLDW, VEC_SLL, VEC_SLO, VEC_SR, VEC_SRL, VEC_SRO
|
|
template <VecOp vop>
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecShift(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
auto context{builder.getContext()};
|
|
auto argBases{getBasesForArgs(args)};
|
|
auto argTypes{getTypesForArgs(argBases)};
|
|
|
|
llvm::SmallVector<VecTypeInfo, 2> vecTyInfoArgs;
|
|
vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[0]));
|
|
vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[1]));
|
|
|
|
// Convert the first two arguments to MLIR vectors
|
|
llvm::SmallVector<mlir::Type, 2> mlirTyArgs;
|
|
mlirTyArgs.push_back(vecTyInfoArgs[0].toMlirVectorType(context));
|
|
mlirTyArgs.push_back(vecTyInfoArgs[1].toMlirVectorType(context));
|
|
|
|
llvm::SmallVector<mlir::Value, 2> mlirVecArgs;
|
|
mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[0], argBases[0]));
|
|
mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[1], argBases[1]));
|
|
|
|
mlir::Value shftRes{nullptr};
|
|
|
|
if (vop == VecOp::Sl || vop == VecOp::Sr) {
|
|
assert(args.size() == 2);
|
|
// Construct the mask
|
|
auto width{
|
|
mlir::dyn_cast<mlir::IntegerType>(vecTyInfoArgs[1].eleTy).getWidth()};
|
|
auto vecVal{builder.createIntegerConstant(
|
|
loc, getConvertedElementType(context, vecTyInfoArgs[0].eleTy), width)};
|
|
auto mask{
|
|
builder.create<mlir::vector::BroadcastOp>(loc, mlirTyArgs[1], vecVal)};
|
|
auto shft{builder.create<mlir::arith::RemUIOp>(loc, mlirVecArgs[1], mask)};
|
|
|
|
mlir::Value res{nullptr};
|
|
if (vop == VecOp::Sr)
|
|
res = builder.create<mlir::arith::ShRUIOp>(loc, mlirVecArgs[0], shft);
|
|
else if (vop == VecOp::Sl)
|
|
res = builder.create<mlir::arith::ShLIOp>(loc, mlirVecArgs[0], shft);
|
|
|
|
shftRes = builder.createConvert(loc, argTypes[0], res);
|
|
} else if (vop == VecOp::Sll || vop == VecOp::Slo || vop == VecOp::Srl ||
|
|
vop == VecOp::Sro) {
|
|
assert(args.size() == 2);
|
|
|
|
// Bitcast to vector<4xi32>
|
|
auto bcVecTy{mlir::VectorType::get(4, builder.getIntegerType(32))};
|
|
if (mlirTyArgs[0] != bcVecTy)
|
|
mlirVecArgs[0] =
|
|
builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[0]);
|
|
if (mlirTyArgs[1] != bcVecTy)
|
|
mlirVecArgs[1] =
|
|
builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[1]);
|
|
|
|
llvm::StringRef funcName;
|
|
switch (vop) {
|
|
case VecOp::Srl:
|
|
funcName = "llvm.ppc.altivec.vsr";
|
|
break;
|
|
case VecOp::Sro:
|
|
funcName = "llvm.ppc.altivec.vsro";
|
|
break;
|
|
case VecOp::Sll:
|
|
funcName = "llvm.ppc.altivec.vsl";
|
|
break;
|
|
case VecOp::Slo:
|
|
funcName = "llvm.ppc.altivec.vslo";
|
|
break;
|
|
default:
|
|
llvm_unreachable("unknown vector shift operation");
|
|
}
|
|
auto funcTy{genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
|
|
Ty::IntegerVector<4>>(context, builder)};
|
|
mlir::func::FuncOp funcOp{builder.createFunction(loc, funcName, funcTy)};
|
|
auto callOp{builder.create<fir::CallOp>(loc, funcOp, mlirVecArgs)};
|
|
|
|
// If the result vector type is different from the original type, need
|
|
// to convert to mlir vector, bitcast and then convert back to fir vector.
|
|
if (callOp.getResult(0).getType() != argTypes[0]) {
|
|
auto res = builder.createConvert(loc, bcVecTy, callOp.getResult(0));
|
|
res = builder.create<mlir::vector::BitCastOp>(loc, mlirTyArgs[0], res);
|
|
shftRes = builder.createConvert(loc, argTypes[0], res);
|
|
} else {
|
|
shftRes = callOp.getResult(0);
|
|
}
|
|
} else if (vop == VecOp::Sld || vop == VecOp::Sldw) {
|
|
assert(args.size() == 3);
|
|
auto constIntOp = mlir::dyn_cast_or_null<mlir::IntegerAttr>(
|
|
mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp())
|
|
.getValue());
|
|
assert(constIntOp && "expected integer constant argument");
|
|
|
|
// Bitcast to vector<16xi8>
|
|
auto vi8Ty{mlir::VectorType::get(16, builder.getIntegerType(8))};
|
|
if (mlirTyArgs[0] != vi8Ty) {
|
|
mlirVecArgs[0] =
|
|
builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[0])
|
|
.getResult();
|
|
mlirVecArgs[1] =
|
|
builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[1])
|
|
.getResult();
|
|
}
|
|
|
|
// Construct the mask for shuffling
|
|
auto shiftVal{constIntOp.getInt()};
|
|
if (vop == VecOp::Sldw)
|
|
shiftVal = shiftVal << 2;
|
|
shiftVal &= 0xF;
|
|
llvm::SmallVector<int64_t, 16> mask;
|
|
// Shuffle with mask based on the endianness
|
|
const auto triple{fir::getTargetTriple(builder.getModule())};
|
|
if (triple.isLittleEndian()) {
|
|
for (int i = 16; i < 32; ++i)
|
|
mask.push_back(i - shiftVal);
|
|
shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[1],
|
|
mlirVecArgs[0], mask);
|
|
} else {
|
|
for (int i = 0; i < 16; ++i)
|
|
mask.push_back(i + shiftVal);
|
|
shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[0],
|
|
mlirVecArgs[1], mask);
|
|
}
|
|
|
|
// Bitcast to the original type
|
|
if (shftRes.getType() != mlirTyArgs[0])
|
|
shftRes =
|
|
builder.create<mlir::LLVM::BitcastOp>(loc, mlirTyArgs[0], shftRes);
|
|
|
|
return builder.createConvert(loc, resultType, shftRes);
|
|
} else
|
|
llvm_unreachable("Invalid vector operation for generator");
|
|
|
|
return shftRes;
|
|
}
|
|
|
|
// VEC_SPLAT, VEC_SPLATS, VEC_SPLAT_S32
|
|
template <VecOp vop>
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
auto context{builder.getContext()};
|
|
auto argBases{getBasesForArgs(args)};
|
|
|
|
mlir::vector::SplatOp splatOp{nullptr};
|
|
mlir::Type retTy{nullptr};
|
|
switch (vop) {
|
|
case VecOp::Splat: {
|
|
assert(args.size() == 2);
|
|
auto vecTyInfo{getVecTypeFromFir(argBases[0])};
|
|
|
|
auto extractOp{genVecExtract(resultType, args)};
|
|
splatOp = builder.create<mlir::vector::SplatOp>(
|
|
loc, *(extractOp.getUnboxed()), vecTyInfo.toMlirVectorType(context));
|
|
retTy = vecTyInfo.toFirVectorType();
|
|
break;
|
|
}
|
|
case VecOp::Splats: {
|
|
assert(args.size() == 1);
|
|
auto vecTyInfo{getVecTypeFromEle(argBases[0])};
|
|
|
|
splatOp = builder.create<mlir::vector::SplatOp>(
|
|
loc, argBases[0], vecTyInfo.toMlirVectorType(context));
|
|
retTy = vecTyInfo.toFirVectorType();
|
|
break;
|
|
}
|
|
case VecOp::Splat_s32: {
|
|
assert(args.size() == 1);
|
|
auto eleTy{builder.getIntegerType(32)};
|
|
auto intOp{builder.createConvert(loc, eleTy, argBases[0])};
|
|
|
|
// the intrinsic always returns vector(integer(4))
|
|
splatOp = builder.create<mlir::vector::SplatOp>(
|
|
loc, intOp, mlir::VectorType::get(4, eleTy));
|
|
retTy = fir::VectorType::get(4, eleTy);
|
|
break;
|
|
}
|
|
default:
|
|
llvm_unreachable("invalid vector operation for generator");
|
|
}
|
|
return builder.createConvert(loc, retTy, splatOp);
|
|
}
|
|
|
|
fir::ExtendedValue
|
|
PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType,
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 2);
|
|
auto arg0{getBase(args[0])};
|
|
auto arg1{getBase(args[1])};
|
|
|
|
// Prepare the return type in FIR.
|
|
auto vecTyInfo{getVecTypeFromFirType(resultType)};
|
|
auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
|
|
auto firTy{vecTyInfo.toFirVectorType()};
|
|
|
|
// Add the %val of arg0 to %addr of arg1
|
|
auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
|
|
|
|
auto i64Ty{mlir::IntegerType::get(builder.getContext(), 64)};
|
|
auto i64VecTy{mlir::VectorType::get(2, i64Ty)};
|
|
auto i64RefTy{builder.getRefType(i64Ty)};
|
|
auto addrConv{builder.create<fir::ConvertOp>(loc, i64RefTy, addr)};
|
|
|
|
auto addrVal{builder.create<fir::LoadOp>(loc, addrConv)};
|
|
auto splatRes{builder.create<mlir::vector::SplatOp>(loc, addrVal, i64VecTy)};
|
|
|
|
mlir::Value result{nullptr};
|
|
if (mlirTy != splatRes.getType()) {
|
|
result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, splatRes);
|
|
} else
|
|
result = splatRes;
|
|
|
|
return builder.createConvert(loc, firTy, result);
|
|
}
|
|
|
|
const char *getMmaIrIntrName(MMAOp mmaOp) {
|
|
switch (mmaOp) {
|
|
case MMAOp::AssembleAcc:
|
|
return "llvm.ppc.mma.assemble.acc";
|
|
case MMAOp::AssemblePair:
|
|
return "llvm.ppc.vsx.assemble.pair";
|
|
case MMAOp::DisassembleAcc:
|
|
return "llvm.ppc.mma.disassemble.acc";
|
|
case MMAOp::DisassemblePair:
|
|
return "llvm.ppc.vsx.disassemble.pair";
|
|
case MMAOp::Xxmfacc:
|
|
return "llvm.ppc.mma.xxmfacc";
|
|
case MMAOp::Xxmtacc:
|
|
return "llvm.ppc.mma.xxmtacc";
|
|
case MMAOp::Xxsetaccz:
|
|
return "llvm.ppc.mma.xxsetaccz";
|
|
case MMAOp::Pmxvbf16ger2:
|
|
return "llvm.ppc.mma.pmxvbf16ger2";
|
|
case MMAOp::Pmxvbf16ger2nn:
|
|
return "llvm.ppc.mma.pmxvbf16ger2nn";
|
|
case MMAOp::Pmxvbf16ger2np:
|
|
return "llvm.ppc.mma.pmxvbf16ger2np";
|
|
case MMAOp::Pmxvbf16ger2pn:
|
|
return "llvm.ppc.mma.pmxvbf16ger2pn";
|
|
case MMAOp::Pmxvbf16ger2pp:
|
|
return "llvm.ppc.mma.pmxvbf16ger2pp";
|
|
case MMAOp::Pmxvf16ger2:
|
|
return "llvm.ppc.mma.pmxvf16ger2";
|
|
case MMAOp::Pmxvf16ger2nn:
|
|
return "llvm.ppc.mma.pmxvf16ger2nn";
|
|
case MMAOp::Pmxvf16ger2np:
|
|
return "llvm.ppc.mma.pmxvf16ger2np";
|
|
case MMAOp::Pmxvf16ger2pn:
|
|
return "llvm.ppc.mma.pmxvf16ger2pn";
|
|
case MMAOp::Pmxvf16ger2pp:
|
|
return "llvm.ppc.mma.pmxvf16ger2pp";
|
|
case MMAOp::Pmxvf32ger:
|
|
return "llvm.ppc.mma.pmxvf32ger";
|
|
case MMAOp::Pmxvf32gernn:
|
|
return "llvm.ppc.mma.pmxvf32gernn";
|
|
case MMAOp::Pmxvf32gernp:
|
|
return "llvm.ppc.mma.pmxvf32gernp";
|
|
case MMAOp::Pmxvf32gerpn:
|
|
return "llvm.ppc.mma.pmxvf32gerpn";
|
|
case MMAOp::Pmxvf32gerpp:
|
|
return "llvm.ppc.mma.pmxvf32gerpp";
|
|
case MMAOp::Pmxvf64ger:
|
|
return "llvm.ppc.mma.pmxvf64ger";
|
|
case MMAOp::Pmxvf64gernn:
|
|
return "llvm.ppc.mma.pmxvf64gernn";
|
|
case MMAOp::Pmxvf64gernp:
|
|
return "llvm.ppc.mma.pmxvf64gernp";
|
|
case MMAOp::Pmxvf64gerpn:
|
|
return "llvm.ppc.mma.pmxvf64gerpn";
|
|
case MMAOp::Pmxvf64gerpp:
|
|
return "llvm.ppc.mma.pmxvf64gerpp";
|
|
case MMAOp::Pmxvi16ger2:
|
|
return "llvm.ppc.mma.pmxvi16ger2";
|
|
case MMAOp::Pmxvi16ger2pp:
|
|
return "llvm.ppc.mma.pmxvi16ger2pp";
|
|
case MMAOp::Pmxvi16ger2s:
|
|
return "llvm.ppc.mma.pmxvi16ger2s";
|
|
case MMAOp::Pmxvi16ger2spp:
|
|
return "llvm.ppc.mma.pmxvi16ger2spp";
|
|
case MMAOp::Pmxvi4ger8:
|
|
return "llvm.ppc.mma.pmxvi4ger8";
|
|
case MMAOp::Pmxvi4ger8pp:
|
|
return "llvm.ppc.mma.pmxvi4ger8pp";
|
|
case MMAOp::Pmxvi8ger4:
|
|
return "llvm.ppc.mma.pmxvi8ger4";
|
|
case MMAOp::Pmxvi8ger4pp:
|
|
return "llvm.ppc.mma.pmxvi8ger4pp";
|
|
case MMAOp::Pmxvi8ger4spp:
|
|
return "llvm.ppc.mma.pmxvi8ger4spp";
|
|
case MMAOp::Xvbf16ger2:
|
|
return "llvm.ppc.mma.xvbf16ger2";
|
|
case MMAOp::Xvbf16ger2nn:
|
|
return "llvm.ppc.mma.xvbf16ger2nn";
|
|
case MMAOp::Xvbf16ger2np:
|
|
return "llvm.ppc.mma.xvbf16ger2np";
|
|
case MMAOp::Xvbf16ger2pn:
|
|
return "llvm.ppc.mma.xvbf16ger2pn";
|
|
case MMAOp::Xvbf16ger2pp:
|
|
return "llvm.ppc.mma.xvbf16ger2pp";
|
|
case MMAOp::Xvf16ger2:
|
|
return "llvm.ppc.mma.xvf16ger2";
|
|
case MMAOp::Xvf16ger2nn:
|
|
return "llvm.ppc.mma.xvf16ger2nn";
|
|
case MMAOp::Xvf16ger2np:
|
|
return "llvm.ppc.mma.xvf16ger2np";
|
|
case MMAOp::Xvf16ger2pn:
|
|
return "llvm.ppc.mma.xvf16ger2pn";
|
|
case MMAOp::Xvf16ger2pp:
|
|
return "llvm.ppc.mma.xvf16ger2pp";
|
|
case MMAOp::Xvf32ger:
|
|
return "llvm.ppc.mma.xvf32ger";
|
|
case MMAOp::Xvf32gernn:
|
|
return "llvm.ppc.mma.xvf32gernn";
|
|
case MMAOp::Xvf32gernp:
|
|
return "llvm.ppc.mma.xvf32gernp";
|
|
case MMAOp::Xvf32gerpn:
|
|
return "llvm.ppc.mma.xvf32gerpn";
|
|
case MMAOp::Xvf32gerpp:
|
|
return "llvm.ppc.mma.xvf32gerpp";
|
|
case MMAOp::Xvf64ger:
|
|
return "llvm.ppc.mma.xvf64ger";
|
|
case MMAOp::Xvf64gernn:
|
|
return "llvm.ppc.mma.xvf64gernn";
|
|
case MMAOp::Xvf64gernp:
|
|
return "llvm.ppc.mma.xvf64gernp";
|
|
case MMAOp::Xvf64gerpn:
|
|
return "llvm.ppc.mma.xvf64gerpn";
|
|
case MMAOp::Xvf64gerpp:
|
|
return "llvm.ppc.mma.xvf64gerpp";
|
|
case MMAOp::Xvi16ger2:
|
|
return "llvm.ppc.mma.xvi16ger2";
|
|
case MMAOp::Xvi16ger2pp:
|
|
return "llvm.ppc.mma.xvi16ger2pp";
|
|
case MMAOp::Xvi16ger2s:
|
|
return "llvm.ppc.mma.xvi16ger2s";
|
|
case MMAOp::Xvi16ger2spp:
|
|
return "llvm.ppc.mma.xvi16ger2spp";
|
|
case MMAOp::Xvi4ger8:
|
|
return "llvm.ppc.mma.xvi4ger8";
|
|
case MMAOp::Xvi4ger8pp:
|
|
return "llvm.ppc.mma.xvi4ger8pp";
|
|
case MMAOp::Xvi8ger4:
|
|
return "llvm.ppc.mma.xvi8ger4";
|
|
case MMAOp::Xvi8ger4pp:
|
|
return "llvm.ppc.mma.xvi8ger4pp";
|
|
case MMAOp::Xvi8ger4spp:
|
|
return "llvm.ppc.mma.xvi8ger4spp";
|
|
}
|
|
llvm_unreachable("getMmaIrIntrName");
|
|
}
|
|
|
|
mlir::FunctionType getMmaIrFuncType(mlir::MLIRContext *context, MMAOp mmaOp) {
|
|
switch (mmaOp) {
|
|
case MMAOp::AssembleAcc:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 4);
|
|
case MMAOp::AssemblePair:
|
|
return genMmaVpFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::DisassembleAcc:
|
|
return genMmaDisassembleFuncType(context, mmaOp);
|
|
case MMAOp::DisassemblePair:
|
|
return genMmaDisassembleFuncType(context, mmaOp);
|
|
case MMAOp::Xxmfacc:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0);
|
|
case MMAOp::Xxmtacc:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0);
|
|
case MMAOp::Xxsetaccz:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 0);
|
|
case MMAOp::Pmxvbf16ger2:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvbf16ger2nn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvbf16ger2np:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvbf16ger2pn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvbf16ger2pp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvf16ger2:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvf16ger2nn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvf16ger2np:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvf16ger2pn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvf16ger2pp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvf32ger:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 2);
|
|
case MMAOp::Pmxvf32gernn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 2);
|
|
case MMAOp::Pmxvf32gernp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 2);
|
|
case MMAOp::Pmxvf32gerpn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 2);
|
|
case MMAOp::Pmxvf32gerpp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 2);
|
|
case MMAOp::Pmxvf64ger:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1,
|
|
/*Integer*/ 2);
|
|
case MMAOp::Pmxvf64gernn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
|
|
/*Integer*/ 2);
|
|
case MMAOp::Pmxvf64gernp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
|
|
/*Integer*/ 2);
|
|
case MMAOp::Pmxvf64gerpn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
|
|
/*Integer*/ 2);
|
|
case MMAOp::Pmxvf64gerpp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
|
|
/*Integer*/ 2);
|
|
case MMAOp::Pmxvi16ger2:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvi16ger2pp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvi16ger2s:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvi16ger2spp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvi4ger8:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvi4ger8pp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvi8ger4:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvi8ger4pp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Pmxvi8ger4spp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
|
|
/*Integer*/ 3);
|
|
case MMAOp::Xvbf16ger2:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvbf16ger2nn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvbf16ger2np:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvbf16ger2pn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvbf16ger2pp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf16ger2:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf16ger2nn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf16ger2np:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf16ger2pn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf16ger2pp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf32ger:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf32gernn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf32gernp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf32gerpn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf32gerpp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvf64ger:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1);
|
|
case MMAOp::Xvf64gernn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
|
|
case MMAOp::Xvf64gernp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
|
|
case MMAOp::Xvf64gerpn:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
|
|
case MMAOp::Xvf64gerpp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
|
|
case MMAOp::Xvi16ger2:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvi16ger2pp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvi16ger2s:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvi16ger2spp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvi4ger8:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvi4ger8pp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvi8ger4:
|
|
return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvi8ger4pp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
case MMAOp::Xvi8ger4spp:
|
|
return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
|
|
}
|
|
llvm_unreachable("getMmaIrFuncType");
|
|
}
|
|
|
|
template <MMAOp IntrId, MMAHandlerOp HandlerOp>
|
|
void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
auto context{builder.getContext()};
|
|
mlir::FunctionType intrFuncType{getMmaIrFuncType(context, IntrId)};
|
|
mlir::func::FuncOp funcOp{
|
|
builder.createFunction(loc, getMmaIrIntrName(IntrId), intrFuncType)};
|
|
llvm::SmallVector<mlir::Value> intrArgs;
|
|
|
|
// Depending on SubToFunc, change the subroutine call to a function call.
|
|
// First argument represents the result. Rest of the arguments
|
|
// are shifted one position to form the actual argument list.
|
|
size_t argStart{0};
|
|
size_t argStep{1};
|
|
size_t e{args.size()};
|
|
if (HandlerOp == MMAHandlerOp::SubToFunc) {
|
|
// The first argument becomes function result. Start from the second
|
|
// argument.
|
|
argStart = 1;
|
|
} else if (HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE) {
|
|
// Reverse argument order on little-endian target only.
|
|
// The reversal does not depend on the setting of non-native-order option.
|
|
const auto triple{fir::getTargetTriple(builder.getModule())};
|
|
if (triple.isLittleEndian()) {
|
|
// Load the arguments in reverse order.
|
|
argStart = args.size() - 1;
|
|
// The first argument becomes function result. Stop at the second
|
|
// argument.
|
|
e = 0;
|
|
argStep = -1;
|
|
} else {
|
|
// Load the arguments in natural order.
|
|
// The first argument becomes function result. Start from the second
|
|
// argument.
|
|
argStart = 1;
|
|
}
|
|
}
|
|
|
|
for (size_t i = argStart, j = 0; i != e; i += argStep, ++j) {
|
|
auto v{fir::getBase(args[i])};
|
|
if (i == 0 && HandlerOp == MMAHandlerOp::FirstArgIsResult) {
|
|
// First argument is passed in as an address. We need to load
|
|
// the content to match the LLVM interface.
|
|
v = builder.create<fir::LoadOp>(loc, v);
|
|
}
|
|
auto vType{v.getType()};
|
|
mlir::Type targetType{intrFuncType.getInput(j)};
|
|
if (vType != targetType) {
|
|
if (mlir::isa<mlir::VectorType>(targetType)) {
|
|
// Perform vector type conversion for arguments passed by value.
|
|
auto eleTy{mlir::dyn_cast<fir::VectorType>(vType).getEleTy()};
|
|
auto len{mlir::dyn_cast<fir::VectorType>(vType).getLen()};
|
|
mlir::VectorType mlirType = mlir::VectorType::get(len, eleTy);
|
|
auto v0{builder.createConvert(loc, mlirType, v)};
|
|
auto v1{builder.create<mlir::vector::BitCastOp>(loc, targetType, v0)};
|
|
intrArgs.push_back(v1);
|
|
} else if (mlir::isa<mlir::IntegerType>(targetType) &&
|
|
mlir::isa<mlir::IntegerType>(vType)) {
|
|
auto v0{builder.createConvert(loc, targetType, v)};
|
|
intrArgs.push_back(v0);
|
|
} else {
|
|
llvm::errs() << "\nUnexpected type conversion requested: "
|
|
<< " from " << vType << " to " << targetType << "\n";
|
|
llvm_unreachable("Unsupported type conversion for argument to PowerPC "
|
|
"MMA intrinsic");
|
|
}
|
|
} else {
|
|
intrArgs.push_back(v);
|
|
}
|
|
}
|
|
auto callSt{builder.create<fir::CallOp>(loc, funcOp, intrArgs)};
|
|
if (HandlerOp == MMAHandlerOp::SubToFunc ||
|
|
HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE ||
|
|
HandlerOp == MMAHandlerOp::FirstArgIsResult) {
|
|
// Convert pointer type if needed.
|
|
mlir::Value callResult{callSt.getResult(0)};
|
|
mlir::Value destPtr{fir::getBase(args[0])};
|
|
mlir::Type callResultPtrType{builder.getRefType(callResult.getType())};
|
|
if (destPtr.getType() != callResultPtrType) {
|
|
destPtr = builder.create<fir::ConvertOp>(loc, callResultPtrType, destPtr);
|
|
}
|
|
// Copy the result.
|
|
builder.create<fir::StoreOp>(loc, callResult, destPtr);
|
|
}
|
|
}
|
|
|
|
// VEC_ST, VEC_STE
|
|
template <VecOp vop>
|
|
void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 3);
|
|
|
|
auto context{builder.getContext()};
|
|
auto argBases{getBasesForArgs(args)};
|
|
auto arg1TyInfo{getVecTypeFromFir(argBases[0])};
|
|
|
|
auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};
|
|
|
|
llvm::StringRef fname{};
|
|
mlir::VectorType stTy{nullptr};
|
|
auto i32ty{mlir::IntegerType::get(context, 32)};
|
|
switch (vop) {
|
|
case VecOp::St:
|
|
stTy = mlir::VectorType::get(4, i32ty);
|
|
fname = "llvm.ppc.altivec.stvx";
|
|
break;
|
|
case VecOp::Ste: {
|
|
const auto width{arg1TyInfo.eleTy.getIntOrFloatBitWidth()};
|
|
const auto len{arg1TyInfo.len};
|
|
|
|
if (arg1TyInfo.isFloat32()) {
|
|
stTy = mlir::VectorType::get(len, i32ty);
|
|
fname = "llvm.ppc.altivec.stvewx";
|
|
} else if (mlir::isa<mlir::IntegerType>(arg1TyInfo.eleTy)) {
|
|
stTy = mlir::VectorType::get(len, mlir::IntegerType::get(context, width));
|
|
|
|
switch (width) {
|
|
case 8:
|
|
fname = "llvm.ppc.altivec.stvebx";
|
|
break;
|
|
case 16:
|
|
fname = "llvm.ppc.altivec.stvehx";
|
|
break;
|
|
case 32:
|
|
fname = "llvm.ppc.altivec.stvewx";
|
|
break;
|
|
default:
|
|
assert(false && "invalid element size");
|
|
}
|
|
} else
|
|
assert(false && "unknown type");
|
|
break;
|
|
}
|
|
case VecOp::Stxvp:
|
|
// __vector_pair type
|
|
stTy = mlir::VectorType::get(256, mlir::IntegerType::get(context, 1));
|
|
fname = "llvm.ppc.vsx.stxvp";
|
|
break;
|
|
default:
|
|
llvm_unreachable("invalid vector operation for generator");
|
|
}
|
|
|
|
auto funcType{
|
|
mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)};
|
|
mlir::func::FuncOp funcOp = builder.createFunction(loc, fname, funcType);
|
|
|
|
llvm::SmallVector<mlir::Value, 4> biArgs;
|
|
|
|
if (vop == VecOp::Stxvp) {
|
|
biArgs.push_back(argBases[0]);
|
|
biArgs.push_back(addr);
|
|
builder.create<fir::CallOp>(loc, funcOp, biArgs);
|
|
return;
|
|
}
|
|
|
|
auto vecTyInfo{getVecTypeFromFirType(argBases[0].getType())};
|
|
auto cnv{builder.createConvert(loc, vecTyInfo.toMlirVectorType(context),
|
|
argBases[0])};
|
|
|
|
mlir::Value newArg1{nullptr};
|
|
if (stTy != arg1TyInfo.toMlirVectorType(context))
|
|
newArg1 = builder.create<mlir::vector::BitCastOp>(loc, stTy, cnv);
|
|
else
|
|
newArg1 = cnv;
|
|
|
|
if (isBEVecElemOrderOnLE())
|
|
newArg1 = builder.createConvert(
|
|
loc, stTy, reverseVectorElements(builder, loc, newArg1, 4));
|
|
|
|
biArgs.push_back(newArg1);
|
|
biArgs.push_back(addr);
|
|
|
|
builder.create<fir::CallOp>(loc, funcOp, biArgs);
|
|
}
|
|
|
|
// VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4
|
|
template <VecOp vop>
|
|
void PPCIntrinsicLibrary::genVecXStore(
|
|
llvm::ArrayRef<fir::ExtendedValue> args) {
|
|
assert(args.size() == 3);
|
|
auto context{builder.getContext()};
|
|
auto argBases{getBasesForArgs(args)};
|
|
VecTypeInfo arg1TyInfo{getVecTypeFromFir(argBases[0])};
|
|
|
|
auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};
|
|
|
|
mlir::Value trg{nullptr};
|
|
mlir::Value src{nullptr};
|
|
|
|
switch (vop) {
|
|
case VecOp::Xst:
|
|
case VecOp::Xst_be: {
|
|
src = argBases[0];
|
|
trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
|
|
addr);
|
|
|
|
if (vop == VecOp::Xst_be || isBEVecElemOrderOnLE()) {
|
|
auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
|
|
argBases[0])};
|
|
auto shf{reverseVectorElements(builder, loc, cnv, arg1TyInfo.len)};
|
|
|
|
src = builder.createConvert(loc, arg1TyInfo.toFirVectorType(), shf);
|
|
}
|
|
break;
|
|
}
|
|
case VecOp::Xstd2:
|
|
case VecOp::Xstw4: {
|
|
// an 16-byte vector arg1 is treated as two 8-byte elements or
|
|
// four 4-byte elements
|
|
mlir::IntegerType elemTy;
|
|
uint64_t numElem = (vop == VecOp::Xstd2) ? 2 : 4;
|
|
elemTy = builder.getIntegerType(128 / numElem);
|
|
|
|
mlir::VectorType mlirVecTy{mlir::VectorType::get(numElem, elemTy)};
|
|
fir::VectorType firVecTy{fir::VectorType::get(numElem, elemTy)};
|
|
|
|
auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
|
|
argBases[0])};
|
|
|
|
mlir::Type srcTy{nullptr};
|
|
if (numElem != arg1TyInfo.len) {
|
|
cnv = builder.create<mlir::vector::BitCastOp>(loc, mlirVecTy, cnv);
|
|
srcTy = firVecTy;
|
|
} else {
|
|
srcTy = arg1TyInfo.toFirVectorType();
|
|
}
|
|
|
|
trg = builder.createConvert(loc, builder.getRefType(srcTy), addr);
|
|
|
|
if (isBEVecElemOrderOnLE()) {
|
|
cnv = reverseVectorElements(builder, loc, cnv, numElem);
|
|
}
|
|
|
|
src = builder.createConvert(loc, srcTy, cnv);
|
|
break;
|
|
}
|
|
case VecOp::Stxv:
|
|
src = argBases[0];
|
|
trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
|
|
addr);
|
|
break;
|
|
default:
|
|
assert(false && "Invalid vector operation for generator");
|
|
}
|
|
builder.create<fir::StoreOp>(loc, mlir::TypeRange{},
|
|
mlir::ValueRange{src, trg},
|
|
getAlignmentAttr(builder, 1));
|
|
}
|
|
|
|
} // namespace fir
|