//===-- PPCIntrinsicCall.cpp ----------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Helper routines for constructing the FIR dialect of MLIR for PowerPC // intrinsics. Extensive use of MLIR interfaces and MLIR's coding style // (https://mlir.llvm.org/getting_started/DeveloperGuide/) is used in this // module. // //===----------------------------------------------------------------------===// #include "flang/Optimizer/Builder/PPCIntrinsicCall.h" #include "flang/Evaluate/common.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/MutableBox.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" namespace fir { using PI = PPCIntrinsicLibrary; // PPC specific intrinsic handlers. static constexpr IntrinsicHandler ppcHandlers[]{ {"__ppc_mma_assemble_acc", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}, {"arg4", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_assemble_pair", static_cast( &PI::genMmaIntr), {{{"pair", asAddr}, {"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_build_acc", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}, {"arg4", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_disassemble_acc", static_cast( &PI::genMmaIntr), {{{"data", asAddr}, {"acc", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_disassemble_pair", static_cast( &PI::genMmaIntr), {{{"data", asAddr}, {"pair", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvbf16ger2_", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvbf16ger2nn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvbf16ger2np", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvbf16ger2pn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvbf16ger2pp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf16ger2_", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf16ger2nn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf16ger2np", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf16ger2pn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf16ger2pp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf32ger", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf32gernn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf32gernp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf32gerpn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf32gerpp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf64ger", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf64gernn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf64gernp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf64gerpn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvf64gerpp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvi16ger2_", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvi16ger2pp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvi16ger2s", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvi16ger2spp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvi4ger8_", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvi4ger8pp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvi8ger4_", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvi8ger4pp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_pmxvi8ger4spp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}, {"xmask", asValue}, {"ymask", asValue}, {"pmask", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvbf16ger2_", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvbf16ger2nn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvbf16ger2np", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvbf16ger2pn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvbf16ger2pp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf16ger2_", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf16ger2nn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf16ger2np", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf16ger2pn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf16ger2pp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf32ger", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf32gernn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf32gernp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf32gerpn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf32gerpp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf64ger", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf64gernn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf64gernp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf64gerpn", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvf64gerpp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvi16ger2_", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvi16ger2pp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvi16ger2s", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvi16ger2spp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvi4ger8_", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvi4ger8pp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvi8ger4_", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvi8ger4pp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xvi8ger4spp", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, /*isElemental=*/true}, {"__ppc_mma_xxmfacc", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}}}, /*isElemental=*/true}, {"__ppc_mma_xxmtacc", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}}}, /*isElemental=*/true}, {"__ppc_mma_xxsetaccz", static_cast( &PI::genMmaIntr), {{{"acc", asAddr}}}, /*isElemental=*/true}, {"__ppc_mtfsf", static_cast(&PI::genMtfsf), {{{"mask", asValue}, {"r", asValue}}}, /*isElemental=*/false}, {"__ppc_mtfsfi", static_cast(&PI::genMtfsf), {{{"bf", asValue}, {"i", asValue}}}, /*isElemental=*/false}, {"__ppc_vec_abs", static_cast(&PI::genVecAbs), {{{"arg1", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_add", static_cast( &PI::genVecAddAndMulSubXor), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_and", static_cast( &PI::genVecAddAndMulSubXor), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_any_ge", static_cast( &PI::genVecAnyCompare), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_cmpge", static_cast( &PI::genVecCmp), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_cmpgt", static_cast( &PI::genVecCmp), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_cmple", static_cast( &PI::genVecCmp), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_cmplt", static_cast( &PI::genVecCmp), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_convert", static_cast( &PI::genVecConvert), {{{"v", asValue}, {"mold", asValue}}}, /*isElemental=*/false}, {"__ppc_vec_ctf", static_cast( &PI::genVecConvert), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_cvf", static_cast( &PI::genVecConvert), {{{"arg1", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_extract", static_cast(&PI::genVecExtract), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_insert", static_cast(&PI::genVecInsert), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_ld", static_cast( &PI::genVecLdCallGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_lde", static_cast( &PI::genVecLdCallGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_ldl", static_cast( &PI::genVecLdCallGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_lvsl", static_cast( &PI::genVecLvsGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_lvsr", static_cast( &PI::genVecLvsGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_lxv", static_cast( &PI::genVecLdNoCallGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_lxvp", static_cast( &PI::genVecLdCallGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_mergeh", static_cast( &PI::genVecMerge), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_mergel", static_cast( &PI::genVecMerge), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_msub", static_cast( &PI::genVecNmaddMsub), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_mul", static_cast( &PI::genVecAddAndMulSubXor), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_nmadd", static_cast( &PI::genVecNmaddMsub), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_perm", static_cast( &PI::genVecPerm), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_permi", static_cast( &PI::genVecPerm), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_sel", static_cast(&PI::genVecSel), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_sl", static_cast( &PI::genVecShift), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_sld", static_cast( &PI::genVecShift), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_sldw", static_cast( &PI::genVecShift), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_sll", static_cast( &PI::genVecShift), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_slo", static_cast( &PI::genVecShift), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_splat", static_cast( &PI::genVecSplat), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_splat_s32_", static_cast( &PI::genVecSplat), {{{"arg1", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_splats", static_cast( &PI::genVecSplat), {{{"arg1", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_sr", static_cast( &PI::genVecShift), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_srl", static_cast( &PI::genVecShift), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_sro", static_cast( &PI::genVecShift), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_st", static_cast( &PI::genVecStore), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_ste", static_cast( &PI::genVecStore), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_stxv", static_cast( &PI::genVecXStore), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_stxvp", static_cast( &PI::genVecStore), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_sub", static_cast( &PI::genVecAddAndMulSubXor), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_xl", static_cast(&PI::genVecXlGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_xl_be", static_cast( &PI::genVecLdNoCallGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_xld2_", static_cast( &PI::genVecLdCallGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_xlds", static_cast(&PI::genVecXlds), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_xlw4_", static_cast( &PI::genVecLdCallGrp), {{{"arg1", asValue}, {"arg2", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_xor", static_cast( &PI::genVecAddAndMulSubXor), {{{"arg1", asValue}, {"arg2", asValue}}}, /*isElemental=*/true}, {"__ppc_vec_xst", static_cast( &PI::genVecXStore), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_xst_be", static_cast( &PI::genVecXStore), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_xstd2_", static_cast( &PI::genVecXStore), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, /*isElemental=*/false}, {"__ppc_vec_xstw4_", static_cast( &PI::genVecXStore), {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, /*isElemental=*/false}, }; static constexpr MathOperation ppcMathOperations[] = { // fcfi is just another name for fcfid, there is no llvm.ppc.fcfi. {"__ppc_fcfi", "llvm.ppc.fcfid", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fcfid", "llvm.ppc.fcfid", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fcfud", "llvm.ppc.fcfud", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fctid", "llvm.ppc.fctid", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fctidz", "llvm.ppc.fctidz", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fctiw", "llvm.ppc.fctiw", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fctiwz", "llvm.ppc.fctiwz", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fctudz", "llvm.ppc.fctudz", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fctuwz", "llvm.ppc.fctuwz", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fmadd", "llvm.fma.f32", genFuncType, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, genMathOp}, {"__ppc_fmadd", "llvm.fma.f64", genFuncType, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, genMathOp}, {"__ppc_fmsub", "llvm.ppc.fmsubs", genFuncType, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, genLibCall}, {"__ppc_fmsub", "llvm.ppc.fmsub", genFuncType, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, genLibCall}, {"__ppc_fnabs", "llvm.ppc.fnabss", genFuncType, Ty::Real<4>>, genLibCall}, {"__ppc_fnabs", "llvm.ppc.fnabs", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fnmadd", "llvm.ppc.fnmadds", genFuncType, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, genLibCall}, {"__ppc_fnmadd", "llvm.ppc.fnmadd", genFuncType, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, genLibCall}, {"__ppc_fnmsub", "llvm.ppc.fnmsub.f32", genFuncType, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, genLibCall}, {"__ppc_fnmsub", "llvm.ppc.fnmsub.f64", genFuncType, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, genLibCall}, {"__ppc_fre", "llvm.ppc.fre", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_fres", "llvm.ppc.fres", genFuncType, Ty::Real<4>>, genLibCall}, {"__ppc_frsqrte", "llvm.ppc.frsqrte", genFuncType, Ty::Real<8>>, genLibCall}, {"__ppc_frsqrtes", "llvm.ppc.frsqrtes", genFuncType, Ty::Real<4>>, genLibCall}, {"__ppc_vec_cvbf16spn", "llvm.ppc.vsx.xvcvbf16spn", genFuncType, Ty::UnsignedVector<1>>, genLibCall}, {"__ppc_vec_cvspbf16_", "llvm.ppc.vsx.xvcvspbf16", genFuncType, Ty::UnsignedVector<1>>, genLibCall}, {"__ppc_vec_madd", "llvm.fma.v4f32", genFuncType, Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>, genLibCall}, {"__ppc_vec_madd", "llvm.fma.v2f64", genFuncType, Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>, genLibCall}, {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsb", genFuncType, Ty::IntegerVector<1>, Ty::IntegerVector<1>>, genLibCall}, {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsh", genFuncType, Ty::IntegerVector<2>, Ty::IntegerVector<2>>, genLibCall}, {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsw", genFuncType, Ty::IntegerVector<4>, Ty::IntegerVector<4>>, genLibCall}, {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsd", genFuncType, Ty::IntegerVector<8>, Ty::IntegerVector<8>>, genLibCall}, {"__ppc_vec_max", "llvm.ppc.altivec.vmaxub", genFuncType, Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall}, {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuh", genFuncType, Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>, genLibCall}, {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuw", genFuncType, Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>, genLibCall}, {"__ppc_vec_max", "llvm.ppc.altivec.vmaxud", genFuncType, Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>, genLibCall}, {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxsp", genFuncType, Ty::RealVector<4>, Ty::RealVector<4>>, genLibCall}, {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxdp", genFuncType, Ty::RealVector<8>, Ty::RealVector<8>>, genLibCall}, {"__ppc_vec_min", "llvm.ppc.altivec.vminsb", genFuncType, Ty::IntegerVector<1>, Ty::IntegerVector<1>>, genLibCall}, {"__ppc_vec_min", "llvm.ppc.altivec.vminsh", genFuncType, Ty::IntegerVector<2>, Ty::IntegerVector<2>>, genLibCall}, {"__ppc_vec_min", "llvm.ppc.altivec.vminsw", genFuncType, Ty::IntegerVector<4>, Ty::IntegerVector<4>>, genLibCall}, {"__ppc_vec_min", "llvm.ppc.altivec.vminsd", genFuncType, Ty::IntegerVector<8>, Ty::IntegerVector<8>>, genLibCall}, {"__ppc_vec_min", "llvm.ppc.altivec.vminub", genFuncType, Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall}, {"__ppc_vec_min", "llvm.ppc.altivec.vminuh", genFuncType, Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>, genLibCall}, {"__ppc_vec_min", "llvm.ppc.altivec.vminuw", genFuncType, Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>, genLibCall}, {"__ppc_vec_min", "llvm.ppc.altivec.vminud", genFuncType, Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>, genLibCall}, {"__ppc_vec_min", "llvm.ppc.vsx.xvminsp", genFuncType, Ty::RealVector<4>, Ty::RealVector<4>>, genLibCall}, {"__ppc_vec_min", "llvm.ppc.vsx.xvmindp", genFuncType, Ty::RealVector<8>, Ty::RealVector<8>>, genLibCall}, {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v4f32", genFuncType, Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>, genLibCall}, {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v2f64", genFuncType, Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>, genLibCall}, }; const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name) { auto compare = [](const IntrinsicHandler &ppcHandler, llvm::StringRef name) { return name.compare(ppcHandler.name) > 0; }; auto result = llvm::lower_bound(ppcHandlers, name, compare); return result != std::end(ppcHandlers) && result->name == name ? result : nullptr; } using RtMap = Fortran::common::StaticMultimapView; static constexpr RtMap ppcMathOps(ppcMathOperations); static_assert(ppcMathOps.Verify() && "map must be sorted"); std::pair checkPPCMathOperationsRange(llvm::StringRef name) { return ppcMathOps.equal_range(name); } // Helper functions for vector element ordering. bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() { const auto triple{fir::getTargetTriple(builder.getModule())}; return (triple.isLittleEndian() && converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); } bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() { const auto triple{fir::getTargetTriple(builder.getModule())}; return (triple.isLittleEndian() && !converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); } bool PPCIntrinsicLibrary::changeVecElemOrder() { const auto triple{fir::getTargetTriple(builder.getModule())}; return (triple.isLittleEndian() != converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); } static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context, int quadCnt, int pairCnt, int vecCnt, int intCnt = 0, int vecElemBitSize = 8, int intBitSize = 32) { // Constructs a function type with the following signature: // Result type: __vector_pair // Arguments: // quadCnt: number of arguments that has __vector_quad type, followed by // pairCnt: number of arguments that has __vector_pair type, followed by // vecCnt: number of arguments that has vector(integer) type, followed by // intCnt: number of arguments that has integer type // vecElemBitSize: specifies the size of vector elements in bits // intBitSize: specifies the size of integer arguments in bits auto vType{mlir::VectorType::get( 128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))}; auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))}; auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))}; auto iType{mlir::IntegerType::get(context, intBitSize)}; llvm::SmallVector argTypes; for (int i = 0; i < quadCnt; ++i) { argTypes.push_back(vqType); } for (int i = 0; i < pairCnt; ++i) { argTypes.push_back(vpType); } for (int i = 0; i < vecCnt; ++i) { argTypes.push_back(vType); } for (int i = 0; i < intCnt; ++i) { argTypes.push_back(iType); } return mlir::FunctionType::get(context, argTypes, {vpType}); } static mlir::FunctionType genMmaVqFuncType(mlir::MLIRContext *context, int quadCnt, int pairCnt, int vecCnt, int intCnt = 0, int vecElemBitSize = 8, int intBitSize = 32) { // Constructs a function type with the following signature: // Result type: __vector_quad // Arguments: // quadCnt: number of arguments that has __vector_quad type, followed by // pairCnt: number of arguments that has __vector_pair type, followed by // vecCnt: number of arguments that has vector(integer) type, followed by // intCnt: number of arguments that has integer type // vecElemBitSize: specifies the size of vector elements in bits // intBitSize: specifies the size of integer arguments in bits auto vType{mlir::VectorType::get( 128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))}; auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))}; auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))}; auto iType{mlir::IntegerType::get(context, intBitSize)}; llvm::SmallVector argTypes; for (int i = 0; i < quadCnt; ++i) { argTypes.push_back(vqType); } for (int i = 0; i < pairCnt; ++i) { argTypes.push_back(vpType); } for (int i = 0; i < vecCnt; ++i) { argTypes.push_back(vType); } for (int i = 0; i < intCnt; ++i) { argTypes.push_back(iType); } return mlir::FunctionType::get(context, argTypes, {vqType}); } mlir::FunctionType genMmaDisassembleFuncType(mlir::MLIRContext *context, MMAOp mmaOp) { auto vType{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))}; llvm::SmallVector members; if (mmaOp == MMAOp::DisassembleAcc) { auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))}; members.push_back(vType); members.push_back(vType); members.push_back(vType); members.push_back(vType); auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)}; return mlir::FunctionType::get(context, {vqType}, {resType}); } else if (mmaOp == MMAOp::DisassemblePair) { auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))}; members.push_back(vType); members.push_back(vType); auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)}; return mlir::FunctionType::get(context, {vpType}, {resType}); } else { llvm_unreachable( "Unsupported intrinsic code for function signature generator"); } } //===----------------------------------------------------------------------===// // PowerPC specific intrinsic handlers. //===----------------------------------------------------------------------===// // MTFSF, MTFSFI template void PPCIntrinsicLibrary::genMtfsf(llvm::ArrayRef args) { assert(args.size() == 2); llvm::SmallVector scalarArgs; for (const fir::ExtendedValue &arg : args) if (arg.getUnboxed()) scalarArgs.emplace_back(fir::getBase(arg)); else mlir::emitError(loc, "nonscalar intrinsic argument"); mlir::FunctionType libFuncType; mlir::func::FuncOp funcOp; if (isImm) { libFuncType = genFuncType, Ty::Integer<4>>( builder.getContext(), builder); funcOp = builder.addNamedFunction(loc, "llvm.ppc.mtfsfi", libFuncType); } else { libFuncType = genFuncType, Ty::Real<8>>( builder.getContext(), builder); funcOp = builder.addNamedFunction(loc, "llvm.ppc.mtfsf", libFuncType); } builder.create(loc, funcOp, scalarArgs); } // VEC_ABS fir::ExtendedValue PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 1); auto context{builder.getContext()}; auto argBases{getBasesForArgs(args)}; auto vTypeInfo{getVecTypeFromFir(argBases[0])}; mlir::func::FuncOp funcOp{nullptr}; mlir::FunctionType ftype; llvm::StringRef fname{}; if (vTypeInfo.isFloat()) { if (vTypeInfo.isFloat32()) { fname = "llvm.fabs.v4f32"; ftype = genFuncType, Ty::RealVector<4>>(context, builder); } else if (vTypeInfo.isFloat64()) { fname = "llvm.fabs.v2f64"; ftype = genFuncType, Ty::RealVector<8>>(context, builder); } funcOp = builder.addNamedFunction(loc, fname, ftype); auto callOp{builder.create(loc, funcOp, argBases[0])}; return callOp.getResult(0); } else if (auto eleTy = vTypeInfo.eleTy.dyn_cast()) { // vec_abs(arg1) = max(0 - arg1, arg1) auto newVecTy{mlir::VectorType::get(vTypeInfo.len, eleTy)}; auto varg1{builder.createConvert(loc, newVecTy, argBases[0])}; // construct vector(0,..) auto zeroVal{builder.createIntegerConstant(loc, eleTy, 0)}; auto vZero{ builder.create(loc, newVecTy, zeroVal)}; auto zeroSubVarg1{builder.create(loc, vZero, varg1)}; mlir::func::FuncOp funcOp{nullptr}; switch (eleTy.getWidth()) { case 8: fname = "llvm.ppc.altivec.vmaxsb"; ftype = genFuncType, Ty::IntegerVector<1>, Ty::IntegerVector<1>>(context, builder); break; case 16: fname = "llvm.ppc.altivec.vmaxsh"; ftype = genFuncType, Ty::IntegerVector<2>, Ty::IntegerVector<2>>(context, builder); break; case 32: fname = "llvm.ppc.altivec.vmaxsw"; ftype = genFuncType, Ty::IntegerVector<4>, Ty::IntegerVector<4>>(context, builder); break; case 64: fname = "llvm.ppc.altivec.vmaxsd"; ftype = genFuncType, Ty::IntegerVector<8>, Ty::IntegerVector<8>>(context, builder); break; default: llvm_unreachable("invalid integer size"); } funcOp = builder.addNamedFunction(loc, fname, ftype); mlir::Value args[] = {zeroSubVarg1, varg1}; auto callOp{builder.create(loc, funcOp, args)}; return builder.createConvert(loc, argBases[0].getType(), callOp.getResult(0)); } llvm_unreachable("unknown vector type"); } // VEC_ADD, VEC_AND, VEC_SUB, VEC_MUL, VEC_XOR template fir::ExtendedValue PPCIntrinsicLibrary::genVecAddAndMulSubXor( mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 2); auto argBases{getBasesForArgs(args)}; auto argsTy{getTypesForArgs(argBases)}; assert(argsTy[0].isa() && argsTy[1].isa()); auto vecTyInfo{getVecTypeFromFir(argBases[0])}; const auto isInteger{vecTyInfo.eleTy.isa()}; const auto isFloat{vecTyInfo.eleTy.isa()}; assert((isInteger || isFloat) && "unknown vector type"); auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)}; mlir::Value r{nullptr}; switch (vop) { case VecOp::Add: if (isInteger) r = builder.create(loc, vargs[0], vargs[1]); else if (isFloat) r = builder.create(loc, vargs[0], vargs[1]); break; case VecOp::Mul: if (isInteger) r = builder.create(loc, vargs[0], vargs[1]); else if (isFloat) r = builder.create(loc, vargs[0], vargs[1]); break; case VecOp::Sub: if (isInteger) r = builder.create(loc, vargs[0], vargs[1]); else if (isFloat) r = builder.create(loc, vargs[0], vargs[1]); break; case VecOp::And: case VecOp::Xor: { mlir::Value arg1{nullptr}; mlir::Value arg2{nullptr}; if (isInteger) { arg1 = vargs[0]; arg2 = vargs[1]; } else if (isFloat) { // bitcast the arguments to integer auto wd{vecTyInfo.eleTy.dyn_cast().getWidth()}; auto ftype{builder.getIntegerType(wd)}; auto bcVecTy{mlir::VectorType::get(vecTyInfo.len, ftype)}; arg1 = builder.create(loc, bcVecTy, vargs[0]); arg2 = builder.create(loc, bcVecTy, vargs[1]); } if (vop == VecOp::And) r = builder.create(loc, arg1, arg2); else if (vop == VecOp::Xor) r = builder.create(loc, arg1, arg2); if (isFloat) r = builder.create(loc, vargs[0].getType(), r); break; } } return builder.createConvert(loc, argsTy[0], r); } // VEC_ANY_GE template fir::ExtendedValue PPCIntrinsicLibrary::genVecAnyCompare(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 2); assert(vop == VecOp::Anyge && "unknown vector compare operation"); auto argBases{getBasesForArgs(args)}; VecTypeInfo vTypeInfo{getVecTypeFromFir(argBases[0])}; [[maybe_unused]] const auto isSupportedTy{ mlir::isa( vTypeInfo.eleTy)}; assert(isSupportedTy && "unsupported vector type"); // Constants for mapping CR6 bits to predicate result enum { CR6_EQ_REV = 1, CR6_LT_REV = 3 }; auto context{builder.getContext()}; static std::map, std::pair> uiBuiltin{ {std::make_pair(ParamTypeId::IntegerVector, 8), std::make_pair( "llvm.ppc.altivec.vcmpgtsb.p", genFuncType, Ty::Integer<4>, Ty::IntegerVector<1>, Ty::IntegerVector<1>>(context, builder))}, {std::make_pair(ParamTypeId::IntegerVector, 16), std::make_pair( "llvm.ppc.altivec.vcmpgtsh.p", genFuncType, Ty::Integer<4>, Ty::IntegerVector<2>, Ty::IntegerVector<2>>(context, builder))}, {std::make_pair(ParamTypeId::IntegerVector, 32), std::make_pair( "llvm.ppc.altivec.vcmpgtsw.p", genFuncType, Ty::Integer<4>, Ty::IntegerVector<4>, Ty::IntegerVector<4>>(context, builder))}, {std::make_pair(ParamTypeId::IntegerVector, 64), std::make_pair( "llvm.ppc.altivec.vcmpgtsd.p", genFuncType, Ty::Integer<4>, Ty::IntegerVector<8>, Ty::IntegerVector<8>>(context, builder))}, {std::make_pair(ParamTypeId::UnsignedVector, 8), std::make_pair( "llvm.ppc.altivec.vcmpgtub.p", genFuncType, Ty::Integer<4>, Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>( context, builder))}, {std::make_pair(ParamTypeId::UnsignedVector, 16), std::make_pair( "llvm.ppc.altivec.vcmpgtuh.p", genFuncType, Ty::Integer<4>, Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>( context, builder))}, {std::make_pair(ParamTypeId::UnsignedVector, 32), std::make_pair( "llvm.ppc.altivec.vcmpgtuw.p", genFuncType, Ty::Integer<4>, Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>( context, builder))}, {std::make_pair(ParamTypeId::UnsignedVector, 64), std::make_pair( "llvm.ppc.altivec.vcmpgtud.p", genFuncType, Ty::Integer<4>, Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>( context, builder))}, }; mlir::FunctionType ftype{nullptr}; llvm::StringRef fname; const auto i32Ty{mlir::IntegerType::get(context, 32)}; llvm::SmallVector cmpArgs; mlir::Value op{nullptr}; const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; if (auto elementTy = mlir::dyn_cast(vTypeInfo.eleTy)) { std::pair bi; bi = (elementTy.isUnsignedInteger()) ? uiBuiltin[std::pair(ParamTypeId::UnsignedVector, width)] : uiBuiltin[std::pair(ParamTypeId::IntegerVector, width)]; fname = std::get<0>(bi); ftype = std::get<1>(bi); op = builder.createIntegerConstant(loc, i32Ty, CR6_LT_REV); cmpArgs.emplace_back(op); // reverse the argument order cmpArgs.emplace_back(argBases[1]); cmpArgs.emplace_back(argBases[0]); } else if (vTypeInfo.isFloat()) { if (vTypeInfo.isFloat32()) { fname = "llvm.ppc.vsx.xvcmpgesp.p"; ftype = genFuncType, Ty::Integer<4>, Ty::RealVector<4>, Ty::RealVector<4>>(context, builder); } else { fname = "llvm.ppc.vsx.xvcmpgedp.p"; ftype = genFuncType, Ty::Integer<4>, Ty::RealVector<8>, Ty::RealVector<8>>(context, builder); } op = builder.createIntegerConstant(loc, i32Ty, CR6_EQ_REV); cmpArgs.emplace_back(op); cmpArgs.emplace_back(argBases[0]); cmpArgs.emplace_back(argBases[1]); } assert((!fname.empty() && ftype) && "invalid type"); mlir::func::FuncOp funcOp{builder.addNamedFunction(loc, fname, ftype)}; auto callOp{builder.create(loc, funcOp, cmpArgs)}; return callOp.getResult(0); } static std::pair getVecCmpFuncTypeAndName(VecTypeInfo &vTypeInfo, VecOp vop, fir::FirOpBuilder &builder) { auto context{builder.getContext()}; static std::map, std::pair> iuBuiltinName{ {std::make_pair(ParamTypeId::IntegerVector, 8), std::make_pair( "llvm.ppc.altivec.vcmpgtsb", genFuncType, Ty::IntegerVector<1>, Ty::IntegerVector<1>>(context, builder))}, {std::make_pair(ParamTypeId::IntegerVector, 16), std::make_pair( "llvm.ppc.altivec.vcmpgtsh", genFuncType, Ty::IntegerVector<2>, Ty::IntegerVector<2>>(context, builder))}, {std::make_pair(ParamTypeId::IntegerVector, 32), std::make_pair( "llvm.ppc.altivec.vcmpgtsw", genFuncType, Ty::IntegerVector<4>, Ty::IntegerVector<4>>(context, builder))}, {std::make_pair(ParamTypeId::IntegerVector, 64), std::make_pair( "llvm.ppc.altivec.vcmpgtsd", genFuncType, Ty::IntegerVector<8>, Ty::IntegerVector<8>>(context, builder))}, {std::make_pair(ParamTypeId::UnsignedVector, 8), std::make_pair( "llvm.ppc.altivec.vcmpgtub", genFuncType, Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>(context, builder))}, {std::make_pair(ParamTypeId::UnsignedVector, 16), std::make_pair( "llvm.ppc.altivec.vcmpgtuh", genFuncType, Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>(context, builder))}, {std::make_pair(ParamTypeId::UnsignedVector, 32), std::make_pair( "llvm.ppc.altivec.vcmpgtuw", genFuncType, Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>(context, builder))}, {std::make_pair(ParamTypeId::UnsignedVector, 64), std::make_pair( "llvm.ppc.altivec.vcmpgtud", genFuncType, Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>(context, builder))}}; // VSX only defines GE and GT builtins. Cmple and Cmplt use GE and GT with // arguments revsered. enum class Cmp { gtOrLt, geOrLe }; static std::map, std::pair> rGBI{{std::make_pair(Cmp::geOrLe, 32), std::make_pair("llvm.ppc.vsx.xvcmpgesp", genFuncType, Ty::RealVector<4>, Ty::RealVector<4>>(context, builder))}, {std::make_pair(Cmp::geOrLe, 64), std::make_pair("llvm.ppc.vsx.xvcmpgedp", genFuncType, Ty::RealVector<8>, Ty::RealVector<8>>(context, builder))}, {std::make_pair(Cmp::gtOrLt, 32), std::make_pair("llvm.ppc.vsx.xvcmpgtsp", genFuncType, Ty::RealVector<4>, Ty::RealVector<4>>(context, builder))}, {std::make_pair(Cmp::gtOrLt, 64), std::make_pair("llvm.ppc.vsx.xvcmpgtdp", genFuncType, Ty::RealVector<8>, Ty::RealVector<8>>(context, builder))}}; const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; std::pair specFunc; if (auto elementTy = mlir::dyn_cast(vTypeInfo.eleTy)) specFunc = (elementTy.isUnsignedInteger()) ? iuBuiltinName[std::make_pair(ParamTypeId::UnsignedVector, width)] : iuBuiltinName[std::make_pair(ParamTypeId::IntegerVector, width)]; else if (vTypeInfo.isFloat()) specFunc = (vop == VecOp::Cmpge || vop == VecOp::Cmple) ? rGBI[std::make_pair(Cmp::geOrLe, width)] : rGBI[std::make_pair(Cmp::gtOrLt, width)]; assert(!std::get<0>(specFunc).empty() && "unknown builtin name"); assert(std::get<1>(specFunc) && "unknown function type"); return specFunc; } // VEC_CMPGE, VEC_CMPGT, VEC_CMPLE, VEC_CMPLT template fir::ExtendedValue PPCIntrinsicLibrary::genVecCmp(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 2); auto context{builder.getContext()}; auto argBases{getBasesForArgs(args)}; VecTypeInfo vecTyInfo{getVecTypeFromFir(argBases[0])}; auto varg{convertVecArgs(builder, loc, vecTyInfo, argBases)}; std::pair funcTyNam{ getVecCmpFuncTypeAndName(vecTyInfo, vop, builder)}; mlir::func::FuncOp funcOp = builder.addNamedFunction( loc, std::get<0>(funcTyNam), std::get<1>(funcTyNam)); mlir::Value res{nullptr}; if (auto eTy = vecTyInfo.eleTy.dyn_cast()) { constexpr int firstArg{0}; constexpr int secondArg{1}; std::map> argOrder{ {VecOp::Cmpge, {secondArg, firstArg}}, {VecOp::Cmple, {firstArg, secondArg}}, {VecOp::Cmpgt, {firstArg, secondArg}}, {VecOp::Cmplt, {secondArg, firstArg}}}; // Construct the function return type, unsigned vector, for conversion. auto itype = mlir::IntegerType::get(context, eTy.getWidth(), mlir::IntegerType::Unsigned); auto returnType = fir::VectorType::get(vecTyInfo.len, itype); switch (vop) { case VecOp::Cmpgt: case VecOp::Cmplt: { // arg1 > arg2 --> vcmpgt(arg1, arg2) // arg1 < arg2 --> vcmpgt(arg2, arg1) mlir::Value vargs[]{argBases[argOrder[vop][0]], argBases[argOrder[vop][1]]}; auto callOp{builder.create(loc, funcOp, vargs)}; res = callOp.getResult(0); break; } case VecOp::Cmpge: case VecOp::Cmple: { // arg1 >= arg2 --> vcmpge(arg2, arg1) xor vector(-1) // arg1 <= arg2 --> vcmpge(arg1, arg2) xor vector(-1) mlir::Value vargs[]{argBases[argOrder[vop][0]], argBases[argOrder[vop][1]]}; // Construct a constant vector(-1) auto negOneVal{builder.createIntegerConstant( loc, getConvertedElementType(context, eTy), -1)}; auto vNegOne{builder.create( loc, vecTyInfo.toMlirVectorType(context), negOneVal)}; auto callOp{builder.create(loc, funcOp, vargs)}; mlir::Value callRes{callOp.getResult(0)}; auto vargs2{ convertVecArgs(builder, loc, vecTyInfo, mlir::ValueRange{callRes})}; auto xorRes{builder.create(loc, vargs2[0], vNegOne)}; res = builder.createConvert(loc, returnType, xorRes); break; } default: llvm_unreachable("Invalid vector operation for generator"); } } else if (vecTyInfo.isFloat()) { mlir::Value vargs[2]; switch (vop) { case VecOp::Cmpge: case VecOp::Cmpgt: vargs[0] = argBases[0]; vargs[1] = argBases[1]; break; case VecOp::Cmple: case VecOp::Cmplt: // Swap the arguments as xvcmpg[et] is used vargs[0] = argBases[1]; vargs[1] = argBases[0]; break; default: llvm_unreachable("Invalid vector operation for generator"); } auto callOp{builder.create(loc, funcOp, vargs)}; res = callOp.getResult(0); } else llvm_unreachable("invalid vector type"); return res; } static inline mlir::Value swapVectorWordPairs(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value arg) { auto ty = arg.getType(); auto context{builder.getContext()}; auto vtype{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))}; if (ty != vtype) arg = builder.create(loc, vtype, arg).getResult(); llvm::SmallVector mask{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}; arg = builder.create(loc, arg, arg, mask); if (ty != vtype) arg = builder.create(loc, ty, arg); return arg; } // VEC_CONVERT, VEC_CTF, VEC_CVF template fir::ExtendedValue PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType, llvm::ArrayRef args) { auto context{builder.getContext()}; auto argBases{getBasesForArgs(args)}; auto vecTyInfo{getVecTypeFromFir(argBases[0])}; auto mlirTy{vecTyInfo.toMlirVectorType(context)}; auto vArg1{builder.createConvert(loc, mlirTy, argBases[0])}; const auto i32Ty{mlir::IntegerType::get(context, 32)}; switch (vop) { case VecOp::Ctf: { assert(args.size() == 2); auto convArg{builder.createConvert(loc, i32Ty, argBases[1])}; auto eTy{vecTyInfo.eleTy.dyn_cast()}; assert(eTy && "Unsupported vector type"); const auto isUnsigned{eTy.isUnsignedInteger()}; const auto width{eTy.getWidth()}; if (width == 32) { auto ftype{(isUnsigned) ? genFuncType, Ty::UnsignedVector<4>, Ty::Integer<4>>(context, builder) : genFuncType, Ty::IntegerVector<4>, Ty::Integer<4>>(context, builder)}; const llvm::StringRef fname{(isUnsigned) ? "llvm.ppc.altivec.vcfux" : "llvm.ppc.altivec.vcfsx"}; auto funcOp{builder.addNamedFunction(loc, fname, ftype)}; mlir::Value newArgs[] = {argBases[0], convArg}; auto callOp{builder.create(loc, funcOp, newArgs)}; return callOp.getResult(0); } else if (width == 64) { auto fTy{mlir::FloatType::getF64(context)}; auto ty{mlir::VectorType::get(2, fTy)}; // vec_vtf(arg1, arg2) = fmul(1.0 / (1 << arg2), llvm.sitofp(arg1)) auto convOp{(isUnsigned) ? builder.create(loc, ty, vArg1) : builder.create(loc, ty, vArg1)}; // construct vector<1./(1< auto constInt{ mlir::dyn_cast(argBases[1].getDefiningOp()) .getValue() .dyn_cast_or_null()}; assert(constInt && "expected integer constant argument"); double f{1.0 / (1 << constInt.getInt())}; llvm::SmallVector vals{f, f}; auto constOp{builder.create( loc, ty, builder.getF64VectorAttr(vals))}; auto mulOp{builder.create( loc, ty, convOp->getResult(0), constOp)}; return builder.createConvert(loc, fir::VectorType::get(2, fTy), mulOp); } llvm_unreachable("invalid element integer kind"); } case VecOp::Convert: { assert(args.size() == 2); // resultType has mold type (if scalar) or element type (if array) auto resTyInfo{getVecTypeFromFirType(resultType)}; auto moldTy{resTyInfo.toMlirVectorType(context)}; auto firTy{resTyInfo.toFirVectorType()}; // vec_convert(v, mold) = bitcast v to "type of mold" auto conv{builder.create(loc, moldTy, vArg1)}; return builder.createConvert(loc, firTy, conv); } case VecOp::Cvf: { assert(args.size() == 1); mlir::Value newArgs[]{vArg1}; if (vecTyInfo.isFloat32()) { if (changeVecElemOrder()) newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"}; auto ftype{ genFuncType, Ty::RealVector<4>>(context, builder)}; auto funcOp{builder.addNamedFunction(loc, fname, ftype)}; auto callOp{builder.create(loc, funcOp, newArgs)}; return callOp.getResult(0); } else if (vecTyInfo.isFloat64()) { const llvm::StringRef fname{"llvm.ppc.vsx.xvcvdpsp"}; auto ftype{ genFuncType, Ty::RealVector<8>>(context, builder)}; auto funcOp{builder.addNamedFunction(loc, fname, ftype)}; newArgs[0] = builder.create(loc, funcOp, newArgs).getResult(0); auto fvf32Ty{newArgs[0].getType()}; auto f32type{mlir::FloatType::getF32(context)}; auto mvf32Ty{mlir::VectorType::get(4, f32type)}; newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]); if (changeVecElemOrder()) newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); return builder.createConvert(loc, fvf32Ty, newArgs[0]); } llvm_unreachable("invalid element integer kind"); } default: llvm_unreachable("Invalid vector operation for generator"); } } static mlir::Value convertVectorElementOrder(fir::FirOpBuilder &builder, mlir::Location loc, VecTypeInfo vecInfo, mlir::Value idx) { mlir::Value numSub1{ builder.createIntegerConstant(loc, idx.getType(), vecInfo.len - 1)}; return builder.create(loc, idx.getType(), numSub1, idx); } // VEC_EXTRACT fir::ExtendedValue PPCIntrinsicLibrary::genVecExtract(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 2); auto argBases{getBasesForArgs(args)}; auto argTypes{getTypesForArgs(argBases)}; auto vecTyInfo{getVecTypeFromFir(argBases[0])}; auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; auto varg0{builder.createConvert(loc, mlirTy, argBases[0])}; // arg2 modulo the number of elements in arg1 to determine the element // position auto numEle{builder.createIntegerConstant(loc, argTypes[1], vecTyInfo.len)}; mlir::Value uremOp{ builder.create(loc, argBases[1], numEle)}; if (!isNativeVecElemOrderOnLE()) uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp); return builder.create(loc, varg0, uremOp); } // VEC_INSERT fir::ExtendedValue PPCIntrinsicLibrary::genVecInsert(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 3); auto argBases{getBasesForArgs(args)}; auto argTypes{getTypesForArgs(argBases)}; auto vecTyInfo{getVecTypeFromFir(argBases[1])}; auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; auto varg1{builder.createConvert(loc, mlirTy, argBases[1])}; auto numEle{builder.createIntegerConstant(loc, argTypes[2], vecTyInfo.len)}; mlir::Value uremOp{ builder.create(loc, argBases[2], numEle)}; if (!isNativeVecElemOrderOnLE()) uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp); auto res{builder.create(loc, argBases[0], varg1, uremOp)}; return builder.create(loc, vecTyInfo.toFirVectorType(), res); } // VEC_MERGEH, VEC_MERGEL template fir::ExtendedValue PPCIntrinsicLibrary::genVecMerge(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 2); auto argBases{getBasesForArgs(args)}; auto vecTyInfo{getVecTypeFromFir(argBases[0])}; llvm::SmallVector mMask; // native vector element order mask llvm::SmallVector rMask; // non-native vector element order mask switch (vop) { case VecOp::Mergeh: { switch (vecTyInfo.len) { case 2: { enum { V1 = 0, V2 = 2 }; mMask = {V1 + 0, V2 + 0}; rMask = {V2 + 1, V1 + 1}; break; } case 4: { enum { V1 = 0, V2 = 4 }; mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1}; rMask = {V2 + 2, V1 + 2, V2 + 3, V1 + 3}; break; } case 8: { enum { V1 = 0, V2 = 8 }; mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1, V1 + 2, V2 + 2, V1 + 3, V2 + 3}; rMask = {V2 + 4, V1 + 4, V2 + 5, V1 + 5, V2 + 6, V1 + 6, V2 + 7, V1 + 7}; break; } case 16: mMask = {0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17}; rMask = {0x18, 0x08, 0x19, 0x09, 0x1A, 0x0A, 0x1B, 0x0B, 0x1C, 0x0C, 0x1D, 0x0D, 0x1E, 0x0E, 0x1F, 0x0F}; break; default: llvm_unreachable("unexpected vector length"); } break; } case VecOp::Mergel: { switch (vecTyInfo.len) { case 2: { enum { V1 = 0, V2 = 2 }; mMask = {V1 + 1, V2 + 1}; rMask = {V2 + 0, V1 + 0}; break; } case 4: { enum { V1 = 0, V2 = 4 }; mMask = {V1 + 2, V2 + 2, V1 + 3, V2 + 3}; rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1}; break; } case 8: { enum { V1 = 0, V2 = 8 }; mMask = {V1 + 4, V2 + 4, V1 + 5, V2 + 5, V1 + 6, V2 + 6, V1 + 7, V2 + 7}; rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1, V2 + 2, V1 + 2, V2 + 3, V1 + 3}; break; } case 16: mMask = {0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F}; rMask = {0x10, 0x00, 0x11, 0x01, 0x12, 0x02, 0x13, 0x03, 0x14, 0x04, 0x15, 0x05, 0x16, 0x06, 0x17, 0x07}; break; default: llvm_unreachable("unexpected vector length"); } break; } default: llvm_unreachable("invalid vector operation for generator"); } auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)}; llvm::SmallVector &mergeMask = (isBEVecElemOrderOnLE()) ? rMask : mMask; auto callOp{builder.create(loc, vargs[0], vargs[1], mergeMask)}; return builder.createConvert(loc, resultType, callOp); } static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value baseAddr, mlir::Value offset) { auto typeExtent{fir::SequenceType::getUnknownExtent()}; // Construct an !fir.ref> type auto arrRefTy{builder.getRefType(fir::SequenceType::get( {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))}; // Convert arg to !fir.ref> auto resAddr{builder.create(loc, arrRefTy, baseAddr)}; return builder.create(loc, arrRefTy, resAddr, offset); } static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value v, int64_t len) { assert(v.getType().isa()); assert(len > 0); llvm::SmallVector mask; for (int64_t i = 0; i < len; ++i) { mask.push_back(len - 1 - i); } auto undefVec{builder.create(loc, v.getType())}; return builder.create(loc, v, undefVec, mask); } static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder, const int val) { auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)}; auto alignAttr{mlir::IntegerAttr::get(i64ty, val)}; return builder.getNamedAttr("alignment", alignAttr); } fir::ExtendedValue PPCIntrinsicLibrary::genVecXlGrp(mlir::Type resultType, llvm::ArrayRef args) { VecTypeInfo vecTyInfo{getVecTypeFromFirType(resultType)}; switch (vecTyInfo.eleTy.getIntOrFloatBitWidth()) { case 8: // vec_xlb1 return genVecLdNoCallGrp(resultType, args); case 16: // vec_xlh8 return genVecLdNoCallGrp(resultType, args); case 32: // vec_xlw4 return genVecLdCallGrp(resultType, args); case 64: // vec_xld2 return genVecLdCallGrp(resultType, args); default: llvm_unreachable("invalid kind"); } llvm_unreachable("invalid vector operation for generator"); } template fir::ExtendedValue PPCIntrinsicLibrary::genVecLdNoCallGrp( mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 2); auto arg0{getBase(args[0])}; auto arg1{getBase(args[1])}; auto vecTyInfo{getVecTypeFromFirType(resultType)}; auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; auto firTy{vecTyInfo.toFirVectorType()}; // Add the %val of arg0 to %addr of arg1 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)}; const auto triple{fir::getTargetTriple(builder.getModule())}; // Need to get align 1. auto result{builder.create(loc, mlirTy, addr, getAlignmentAttr(builder, 1))}; if ((vop == VecOp::Xl && isBEVecElemOrderOnLE()) || (vop == VecOp::Xlbe && triple.isLittleEndian())) return builder.createConvert( loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len)); return builder.createConvert(loc, firTy, result); } // VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4 template fir::ExtendedValue PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 2); auto context{builder.getContext()}; auto arg0{getBase(args[0])}; auto arg1{getBase(args[1])}; // Prepare the return type in FIR. auto vecResTyInfo{getVecTypeFromFirType(resultType)}; auto mlirTy{vecResTyInfo.toMlirVectorType(context)}; auto firTy{vecResTyInfo.toFirVectorType()}; // llvm.ppc.altivec.lvx* returns <4xi32> // Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)}; const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)}; // For vec_ld, need to convert arg0 from i64 to i32 if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64) arg0 = builder.createConvert(loc, i32Ty, arg0); // Add the %val of arg0 to %addr of arg1 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)}; llvm::SmallVector parsedArgs{addr}; mlir::Type intrinResTy{nullptr}; llvm::StringRef fname{}; switch (vop) { case VecOp::Ld: fname = "llvm.ppc.altivec.lvx"; intrinResTy = mVecI32Ty; break; case VecOp::Lde: switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) { case 8: fname = "llvm.ppc.altivec.lvebx"; intrinResTy = mlirTy; break; case 16: fname = "llvm.ppc.altivec.lvehx"; intrinResTy = mlirTy; break; case 32: fname = "llvm.ppc.altivec.lvewx"; if (mlir::isa(vecResTyInfo.eleTy)) intrinResTy = mlirTy; else intrinResTy = mVecI32Ty; break; default: llvm_unreachable("invalid vector for vec_lde"); } break; case VecOp::Ldl: fname = "llvm.ppc.altivec.lvxl"; intrinResTy = mVecI32Ty; break; case VecOp::Lxvp: fname = "llvm.ppc.vsx.lxvp"; intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1)); break; case VecOp::Xld2: { fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be" : "llvm.ppc.vsx.lxvd2x"; // llvm.ppc.altivec.lxvd2x* returns <2 x double> intrinResTy = mlir::VectorType::get(2, mlir::FloatType::getF64(context)); } break; case VecOp::Xlw4: fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be" : "llvm.ppc.vsx.lxvw4x"; // llvm.ppc.altivec.lxvw4x* returns <4xi32> intrinResTy = mVecI32Ty; break; default: llvm_unreachable("invalid vector operation for generator"); } auto funcType{ mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})}; auto funcOp{builder.addNamedFunction(loc, fname, funcType)}; auto result{ builder.create(loc, funcOp, parsedArgs).getResult(0)}; if (vop == VecOp::Lxvp) return result; if (intrinResTy != mlirTy) result = builder.create(loc, mlirTy, result); if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE()) return builder.createConvert( loc, firTy, reverseVectorElements(builder, loc, result, vecResTyInfo.len)); return builder.createConvert(loc, firTy, result); } // VEC_LVSL, VEC_LVSR template fir::ExtendedValue PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 2); auto context{builder.getContext()}; auto arg0{getBase(args[0])}; auto arg1{getBase(args[1])}; auto vecTyInfo{getVecTypeFromFirType(resultType)}; auto mlirTy{vecTyInfo.toMlirVectorType(context)}; auto firTy{vecTyInfo.toFirVectorType()}; // Convert arg0 to i64 type if needed auto i64ty{mlir::IntegerType::get(context, 64)}; if (arg0.getType() != i64ty) arg0 = builder.create(loc, i64ty, arg0); // offset is modulo 16, so shift left 56 bits and then right 56 bits to clear // upper 56 bit while preserving sign auto shiftVal{builder.createIntegerConstant(loc, i64ty, 56)}; auto offset{builder.create(loc, arg0, shiftVal)}; auto offset2{builder.create(loc, offset, shiftVal)}; // Add the offsetArg to %addr of arg1 auto addr{addOffsetToAddress(builder, loc, arg1, offset2)}; llvm::SmallVector parsedArgs{addr}; llvm::StringRef fname{}; switch (vop) { case VecOp::Lvsl: fname = "llvm.ppc.altivec.lvsl"; break; case VecOp::Lvsr: fname = "llvm.ppc.altivec.lvsr"; break; default: llvm_unreachable("invalid vector operation for generator"); } auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})}; auto funcOp{builder.addNamedFunction(loc, fname, funcType)}; auto result{ builder.create(loc, funcOp, parsedArgs).getResult(0)}; if (isNativeVecElemOrderOnLE()) return builder.createConvert( loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len)); return builder.createConvert(loc, firTy, result); } // VEC_NMADD, VEC_MSUB template fir::ExtendedValue PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 3); auto context{builder.getContext()}; auto argBases{getBasesForArgs(args)}; auto vTypeInfo{getVecTypeFromFir(argBases[0])}; auto newArgs{convertVecArgs(builder, loc, vTypeInfo, argBases)}; const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; static std::map> fmaMap{ {32, std::make_pair( "llvm.fma.v4f32", genFuncType, Ty::RealVector<4>, Ty::RealVector<4>>( context, builder))}, {64, std::make_pair( "llvm.fma.v2f64", genFuncType, Ty::RealVector<8>, Ty::RealVector<8>>( context, builder))}}; auto funcOp{builder.addNamedFunction(loc, std::get<0>(fmaMap[width]), std::get<1>(fmaMap[width]))}; if (vop == VecOp::Nmadd) { // vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3) auto callOp{builder.create(loc, funcOp, newArgs)}; // We need to convert fir.vector to MLIR vector to use fneg and then back // to fir.vector to store. auto vCall{builder.createConvert(loc, vTypeInfo.toMlirVectorType(context), callOp.getResult(0))}; auto neg{builder.create(loc, vCall)}; return builder.createConvert(loc, vTypeInfo.toFirVectorType(), neg); } else if (vop == VecOp::Msub) { // vec_msub(arg1, arg2, arg3) = fma(arg1, arg2, -arg3) newArgs[2] = builder.create(loc, newArgs[2]); auto callOp{builder.create(loc, funcOp, newArgs)}; return callOp.getResult(0); } llvm_unreachable("Invalid vector operation for generator"); } // VEC_PERM, VEC_PERMI template fir::ExtendedValue PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 3); auto context{builder.getContext()}; auto argBases{getBasesForArgs(args)}; auto argTypes{getTypesForArgs(argBases)}; auto vecTyInfo{getVecTypeFromFir(argBases[0])}; auto mlirTy{vecTyInfo.toMlirVectorType(context)}; auto vi32Ty{mlir::VectorType::get(4, mlir::IntegerType::get(context, 32))}; auto vf64Ty{mlir::VectorType::get(2, mlir::FloatType::getF64(context))}; auto mArg0{builder.createConvert(loc, mlirTy, argBases[0])}; auto mArg1{builder.createConvert(loc, mlirTy, argBases[1])}; switch (vop) { case VecOp::Perm: { VecTypeInfo maskVecTyInfo{getVecTypeFromFir(argBases[2])}; auto mlirMaskTy{maskVecTyInfo.toMlirVectorType(context)}; auto mMask{builder.createConvert(loc, mlirMaskTy, argBases[2])}; if (mlirTy != vi32Ty) { mArg0 = builder.create(loc, vi32Ty, mArg0).getResult(); mArg1 = builder.create(loc, vi32Ty, mArg1).getResult(); } auto funcOp{builder.addNamedFunction( loc, "llvm.ppc.altivec.vperm", genFuncType, Ty::IntegerVector<4>, Ty::IntegerVector<4>, Ty::IntegerVector<1>>(context, builder))}; llvm::SmallVector newArgs; if (isNativeVecElemOrderOnLE()) { auto i8Ty{mlir::IntegerType::get(context, 8)}; auto v8Ty{mlir::VectorType::get(16, i8Ty)}; auto negOne{builder.createIntegerConstant(loc, i8Ty, -1)}; auto vNegOne{ builder.create(loc, v8Ty, negOne)}; mMask = builder.create(loc, mMask, vNegOne); newArgs = {mArg1, mArg0, mMask}; } else { newArgs = {mArg0, mArg1, mMask}; } auto res{builder.create(loc, funcOp, newArgs).getResult(0)}; if (res.getType() != argTypes[0]) { // fir.call llvm.ppc.altivec.vperm returns !fir.vector // convert the result back to the original type res = builder.createConvert(loc, vi32Ty, res); if (mlirTy != vi32Ty) res = builder.create(loc, mlirTy, res).getResult(); } return builder.createConvert(loc, resultType, res); } case VecOp::Permi: { // arg3 is a constant auto constIntOp{ mlir::dyn_cast(argBases[2].getDefiningOp()) .getValue() .dyn_cast_or_null()}; assert(constIntOp && "expected integer constant argument"); auto constInt{constIntOp.getInt()}; // arg1, arg2, and result type share same VecTypeInfo if (vecTyInfo.isFloat()) { mArg0 = builder.create(loc, vf64Ty, mArg0).getResult(); mArg1 = builder.create(loc, vf64Ty, mArg1).getResult(); } llvm::SmallVector nMask; // native vector element order mask llvm::SmallVector rMask; // non-native vector element order mask enum { V1 = 0, V2 = 2 }; switch (constInt) { case 0: nMask = {V1 + 0, V2 + 0}; rMask = {V2 + 1, V1 + 1}; break; case 1: nMask = {V1 + 0, V2 + 1}; rMask = {V2 + 0, V1 + 1}; break; case 2: nMask = {V1 + 1, V2 + 0}; rMask = {V2 + 1, V1 + 0}; break; case 3: nMask = {V1 + 1, V2 + 1}; rMask = {V2 + 0, V1 + 0}; break; default: llvm_unreachable("unexpected arg3 value for vec_permi"); } llvm::SmallVector mask = (isBEVecElemOrderOnLE()) ? rMask : nMask; auto res{builder.create(loc, mArg0, mArg1, mask)}; if (res.getType() != mlirTy) { auto cast{builder.create(loc, mlirTy, res)}; return builder.createConvert(loc, resultType, cast); } return builder.createConvert(loc, resultType, res); } default: llvm_unreachable("invalid vector operation for generator"); } } // VEC_SEL fir::ExtendedValue PPCIntrinsicLibrary::genVecSel(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 3); auto argBases{getBasesForArgs(args)}; llvm::SmallVector vecTyInfos; for (size_t i = 0; i < argBases.size(); i++) { vecTyInfos.push_back(getVecTypeFromFir(argBases[i])); } auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)}; auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)}; auto negOne{builder.createIntegerConstant(loc, i8Ty, -1)}; // construct a constant <16 x i8> vector with value -1 for bitcast auto bcVecTy{mlir::VectorType::get(16, i8Ty)}; auto vNegOne{builder.create(loc, bcVecTy, negOne)}; // bitcast arguments to bcVecTy auto arg1{builder.create(loc, bcVecTy, vargs[0])}; auto arg2{builder.create(loc, bcVecTy, vargs[1])}; auto arg3{builder.create(loc, bcVecTy, vargs[2])}; // vec_sel(arg1, arg2, arg3) = // (arg2 and arg3) or (arg1 and (arg3 xor vector(-1,...))) auto comp{builder.create(loc, arg3, vNegOne)}; auto a1AndComp{builder.create(loc, arg1, comp)}; auto a1OrA2{builder.create(loc, arg2, arg3)}; auto res{builder.create(loc, a1AndComp, a1OrA2)}; auto bcRes{ builder.create(loc, vargs[0].getType(), res)}; return builder.createConvert(loc, vecTyInfos[0].toFirVectorType(), bcRes); } // VEC_SL, VEC_SLD, VEC_SLDW, VEC_SLL, VEC_SLO, VEC_SR, VEC_SRL, VEC_SRO template fir::ExtendedValue PPCIntrinsicLibrary::genVecShift(mlir::Type resultType, llvm::ArrayRef args) { auto context{builder.getContext()}; auto argBases{getBasesForArgs(args)}; auto argTypes{getTypesForArgs(argBases)}; llvm::SmallVector vecTyInfoArgs; vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[0])); vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[1])); // Convert the first two arguments to MLIR vectors llvm::SmallVector mlirTyArgs; mlirTyArgs.push_back(vecTyInfoArgs[0].toMlirVectorType(context)); mlirTyArgs.push_back(vecTyInfoArgs[1].toMlirVectorType(context)); llvm::SmallVector mlirVecArgs; mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[0], argBases[0])); mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[1], argBases[1])); mlir::Value shftRes{nullptr}; if (vop == VecOp::Sl || vop == VecOp::Sr) { assert(args.size() == 2); // Construct the mask auto width{ mlir::dyn_cast(vecTyInfoArgs[1].eleTy).getWidth()}; auto vecVal{builder.createIntegerConstant( loc, getConvertedElementType(context, vecTyInfoArgs[0].eleTy), width)}; auto mask{ builder.create(loc, mlirTyArgs[1], vecVal)}; auto shft{builder.create(loc, mlirVecArgs[1], mask)}; mlir::Value res{nullptr}; if (vop == VecOp::Sr) res = builder.create(loc, mlirVecArgs[0], shft); else if (vop == VecOp::Sl) res = builder.create(loc, mlirVecArgs[0], shft); shftRes = builder.createConvert(loc, argTypes[0], res); } else if (vop == VecOp::Sll || vop == VecOp::Slo || vop == VecOp::Srl || vop == VecOp::Sro) { assert(args.size() == 2); // Bitcast to vector<4xi32> auto bcVecTy{mlir::VectorType::get(4, builder.getIntegerType(32))}; if (mlirTyArgs[0] != bcVecTy) mlirVecArgs[0] = builder.create(loc, bcVecTy, mlirVecArgs[0]); if (mlirTyArgs[1] != bcVecTy) mlirVecArgs[1] = builder.create(loc, bcVecTy, mlirVecArgs[1]); llvm::StringRef funcName; switch (vop) { case VecOp::Srl: funcName = "llvm.ppc.altivec.vsr"; break; case VecOp::Sro: funcName = "llvm.ppc.altivec.vsro"; break; case VecOp::Sll: funcName = "llvm.ppc.altivec.vsl"; break; case VecOp::Slo: funcName = "llvm.ppc.altivec.vslo"; break; default: llvm_unreachable("unknown vector shift operation"); } auto funcTy{genFuncType, Ty::IntegerVector<4>, Ty::IntegerVector<4>>(context, builder)}; mlir::func::FuncOp funcOp{builder.addNamedFunction(loc, funcName, funcTy)}; auto callOp{builder.create(loc, funcOp, mlirVecArgs)}; // If the result vector type is different from the original type, need // to convert to mlir vector, bitcast and then convert back to fir vector. if (callOp.getResult(0).getType() != argTypes[0]) { auto res = builder.createConvert(loc, bcVecTy, callOp.getResult(0)); res = builder.create(loc, mlirTyArgs[0], res); shftRes = builder.createConvert(loc, argTypes[0], res); } else { shftRes = callOp.getResult(0); } } else if (vop == VecOp::Sld || vop == VecOp::Sldw) { assert(args.size() == 3); auto constIntOp = mlir::dyn_cast(argBases[2].getDefiningOp()) .getValue() .dyn_cast_or_null(); assert(constIntOp && "expected integer constant argument"); // Bitcast to vector<16xi8> auto vi8Ty{mlir::VectorType::get(16, builder.getIntegerType(8))}; if (mlirTyArgs[0] != vi8Ty) { mlirVecArgs[0] = builder.create(loc, vi8Ty, mlirVecArgs[0]) .getResult(); mlirVecArgs[1] = builder.create(loc, vi8Ty, mlirVecArgs[1]) .getResult(); } // Construct the mask for shuffling auto shiftVal{constIntOp.getInt()}; if (vop == VecOp::Sldw) shiftVal = shiftVal << 2; shiftVal &= 0xF; llvm::SmallVector mask; // Shuffle with mask based on the endianness const auto triple{fir::getTargetTriple(builder.getModule())}; if (triple.isLittleEndian()) { for (int i = 16; i < 32; ++i) mask.push_back(i - shiftVal); shftRes = builder.create(loc, mlirVecArgs[1], mlirVecArgs[0], mask); } else { for (int i = 0; i < 16; ++i) mask.push_back(i + shiftVal); shftRes = builder.create(loc, mlirVecArgs[0], mlirVecArgs[1], mask); } // Bitcast to the original type if (shftRes.getType() != mlirTyArgs[0]) shftRes = builder.create(loc, mlirTyArgs[0], shftRes); return builder.createConvert(loc, resultType, shftRes); } else llvm_unreachable("Invalid vector operation for generator"); return shftRes; } // VEC_SPLAT, VEC_SPLATS, VEC_SPLAT_S32 template fir::ExtendedValue PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType, llvm::ArrayRef args) { auto context{builder.getContext()}; auto argBases{getBasesForArgs(args)}; mlir::vector::SplatOp splatOp{nullptr}; mlir::Type retTy{nullptr}; switch (vop) { case VecOp::Splat: { assert(args.size() == 2); auto vecTyInfo{getVecTypeFromFir(argBases[0])}; auto extractOp{genVecExtract(resultType, args)}; splatOp = builder.create( loc, *(extractOp.getUnboxed()), vecTyInfo.toMlirVectorType(context)); retTy = vecTyInfo.toFirVectorType(); break; } case VecOp::Splats: { assert(args.size() == 1); auto vecTyInfo{getVecTypeFromEle(argBases[0])}; splatOp = builder.create( loc, argBases[0], vecTyInfo.toMlirVectorType(context)); retTy = vecTyInfo.toFirVectorType(); break; } case VecOp::Splat_s32: { assert(args.size() == 1); auto eleTy{builder.getIntegerType(32)}; auto intOp{builder.createConvert(loc, eleTy, argBases[0])}; // the intrinsic always returns vector(integer(4)) splatOp = builder.create( loc, intOp, mlir::VectorType::get(4, eleTy)); retTy = fir::VectorType::get(4, eleTy); break; } default: llvm_unreachable("invalid vector operation for generator"); } return builder.createConvert(loc, retTy, splatOp); } fir::ExtendedValue PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 2); auto arg0{getBase(args[0])}; auto arg1{getBase(args[1])}; // Prepare the return type in FIR. auto vecTyInfo{getVecTypeFromFirType(resultType)}; auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; auto firTy{vecTyInfo.toFirVectorType()}; // Add the %val of arg0 to %addr of arg1 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)}; auto i64Ty{mlir::IntegerType::get(builder.getContext(), 64)}; auto i64VecTy{mlir::VectorType::get(2, i64Ty)}; auto i64RefTy{builder.getRefType(i64Ty)}; auto addrConv{builder.create(loc, i64RefTy, addr)}; auto addrVal{builder.create(loc, addrConv)}; auto splatRes{builder.create(loc, addrVal, i64VecTy)}; mlir::Value result{nullptr}; if (mlirTy != splatRes.getType()) { result = builder.create(loc, mlirTy, splatRes); } else result = splatRes; return builder.createConvert(loc, firTy, result); } const char *getMmaIrIntrName(MMAOp mmaOp) { switch (mmaOp) { case MMAOp::AssembleAcc: return "llvm.ppc.mma.assemble.acc"; case MMAOp::AssemblePair: return "llvm.ppc.vsx.assemble.pair"; case MMAOp::DisassembleAcc: return "llvm.ppc.mma.disassemble.acc"; case MMAOp::DisassemblePair: return "llvm.ppc.vsx.disassemble.pair"; case MMAOp::Xxmfacc: return "llvm.ppc.mma.xxmfacc"; case MMAOp::Xxmtacc: return "llvm.ppc.mma.xxmtacc"; case MMAOp::Xxsetaccz: return "llvm.ppc.mma.xxsetaccz"; case MMAOp::Pmxvbf16ger2: return "llvm.ppc.mma.pmxvbf16ger2"; case MMAOp::Pmxvbf16ger2nn: return "llvm.ppc.mma.pmxvbf16ger2nn"; case MMAOp::Pmxvbf16ger2np: return "llvm.ppc.mma.pmxvbf16ger2np"; case MMAOp::Pmxvbf16ger2pn: return "llvm.ppc.mma.pmxvbf16ger2pn"; case MMAOp::Pmxvbf16ger2pp: return "llvm.ppc.mma.pmxvbf16ger2pp"; case MMAOp::Pmxvf16ger2: return "llvm.ppc.mma.pmxvf16ger2"; case MMAOp::Pmxvf16ger2nn: return "llvm.ppc.mma.pmxvf16ger2nn"; case MMAOp::Pmxvf16ger2np: return "llvm.ppc.mma.pmxvf16ger2np"; case MMAOp::Pmxvf16ger2pn: return "llvm.ppc.mma.pmxvf16ger2pn"; case MMAOp::Pmxvf16ger2pp: return "llvm.ppc.mma.pmxvf16ger2pp"; case MMAOp::Pmxvf32ger: return "llvm.ppc.mma.pmxvf32ger"; case MMAOp::Pmxvf32gernn: return "llvm.ppc.mma.pmxvf32gernn"; case MMAOp::Pmxvf32gernp: return "llvm.ppc.mma.pmxvf32gernp"; case MMAOp::Pmxvf32gerpn: return "llvm.ppc.mma.pmxvf32gerpn"; case MMAOp::Pmxvf32gerpp: return "llvm.ppc.mma.pmxvf32gerpp"; case MMAOp::Pmxvf64ger: return "llvm.ppc.mma.pmxvf64ger"; case MMAOp::Pmxvf64gernn: return "llvm.ppc.mma.pmxvf64gernn"; case MMAOp::Pmxvf64gernp: return "llvm.ppc.mma.pmxvf64gernp"; case MMAOp::Pmxvf64gerpn: return "llvm.ppc.mma.pmxvf64gerpn"; case MMAOp::Pmxvf64gerpp: return "llvm.ppc.mma.pmxvf64gerpp"; case MMAOp::Pmxvi16ger2: return "llvm.ppc.mma.pmxvi16ger2"; case MMAOp::Pmxvi16ger2pp: return "llvm.ppc.mma.pmxvi16ger2pp"; case MMAOp::Pmxvi16ger2s: return "llvm.ppc.mma.pmxvi16ger2s"; case MMAOp::Pmxvi16ger2spp: return "llvm.ppc.mma.pmxvi16ger2spp"; case MMAOp::Pmxvi4ger8: return "llvm.ppc.mma.pmxvi4ger8"; case MMAOp::Pmxvi4ger8pp: return "llvm.ppc.mma.pmxvi4ger8pp"; case MMAOp::Pmxvi8ger4: return "llvm.ppc.mma.pmxvi8ger4"; case MMAOp::Pmxvi8ger4pp: return "llvm.ppc.mma.pmxvi8ger4pp"; case MMAOp::Pmxvi8ger4spp: return "llvm.ppc.mma.pmxvi8ger4spp"; case MMAOp::Xvbf16ger2: return "llvm.ppc.mma.xvbf16ger2"; case MMAOp::Xvbf16ger2nn: return "llvm.ppc.mma.xvbf16ger2nn"; case MMAOp::Xvbf16ger2np: return "llvm.ppc.mma.xvbf16ger2np"; case MMAOp::Xvbf16ger2pn: return "llvm.ppc.mma.xvbf16ger2pn"; case MMAOp::Xvbf16ger2pp: return "llvm.ppc.mma.xvbf16ger2pp"; case MMAOp::Xvf16ger2: return "llvm.ppc.mma.xvf16ger2"; case MMAOp::Xvf16ger2nn: return "llvm.ppc.mma.xvf16ger2nn"; case MMAOp::Xvf16ger2np: return "llvm.ppc.mma.xvf16ger2np"; case MMAOp::Xvf16ger2pn: return "llvm.ppc.mma.xvf16ger2pn"; case MMAOp::Xvf16ger2pp: return "llvm.ppc.mma.xvf16ger2pp"; case MMAOp::Xvf32ger: return "llvm.ppc.mma.xvf32ger"; case MMAOp::Xvf32gernn: return "llvm.ppc.mma.xvf32gernn"; case MMAOp::Xvf32gernp: return "llvm.ppc.mma.xvf32gernp"; case MMAOp::Xvf32gerpn: return "llvm.ppc.mma.xvf32gerpn"; case MMAOp::Xvf32gerpp: return "llvm.ppc.mma.xvf32gerpp"; case MMAOp::Xvf64ger: return "llvm.ppc.mma.xvf64ger"; case MMAOp::Xvf64gernn: return "llvm.ppc.mma.xvf64gernn"; case MMAOp::Xvf64gernp: return "llvm.ppc.mma.xvf64gernp"; case MMAOp::Xvf64gerpn: return "llvm.ppc.mma.xvf64gerpn"; case MMAOp::Xvf64gerpp: return "llvm.ppc.mma.xvf64gerpp"; case MMAOp::Xvi16ger2: return "llvm.ppc.mma.xvi16ger2"; case MMAOp::Xvi16ger2pp: return "llvm.ppc.mma.xvi16ger2pp"; case MMAOp::Xvi16ger2s: return "llvm.ppc.mma.xvi16ger2s"; case MMAOp::Xvi16ger2spp: return "llvm.ppc.mma.xvi16ger2spp"; case MMAOp::Xvi4ger8: return "llvm.ppc.mma.xvi4ger8"; case MMAOp::Xvi4ger8pp: return "llvm.ppc.mma.xvi4ger8pp"; case MMAOp::Xvi8ger4: return "llvm.ppc.mma.xvi8ger4"; case MMAOp::Xvi8ger4pp: return "llvm.ppc.mma.xvi8ger4pp"; case MMAOp::Xvi8ger4spp: return "llvm.ppc.mma.xvi8ger4spp"; } llvm_unreachable("getMmaIrIntrName"); } mlir::FunctionType getMmaIrFuncType(mlir::MLIRContext *context, MMAOp mmaOp) { switch (mmaOp) { case MMAOp::AssembleAcc: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 4); case MMAOp::AssemblePair: return genMmaVpFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::DisassembleAcc: return genMmaDisassembleFuncType(context, mmaOp); case MMAOp::DisassemblePair: return genMmaDisassembleFuncType(context, mmaOp); case MMAOp::Xxmfacc: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0); case MMAOp::Xxmtacc: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0); case MMAOp::Xxsetaccz: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 0); case MMAOp::Pmxvbf16ger2: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvbf16ger2nn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvbf16ger2np: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvbf16ger2pn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvbf16ger2pp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvf16ger2: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvf16ger2nn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvf16ger2np: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvf16ger2pn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvf16ger2pp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvf32ger: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 2); case MMAOp::Pmxvf32gernn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 2); case MMAOp::Pmxvf32gernp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 2); case MMAOp::Pmxvf32gerpn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 2); case MMAOp::Pmxvf32gerpp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 2); case MMAOp::Pmxvf64ger: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1, /*Integer*/ 2); case MMAOp::Pmxvf64gernn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, /*Integer*/ 2); case MMAOp::Pmxvf64gernp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, /*Integer*/ 2); case MMAOp::Pmxvf64gerpn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, /*Integer*/ 2); case MMAOp::Pmxvf64gerpp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, /*Integer*/ 2); case MMAOp::Pmxvi16ger2: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvi16ger2pp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvi16ger2s: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvi16ger2spp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvi4ger8: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvi4ger8pp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvi8ger4: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvi8ger4pp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Pmxvi8ger4spp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, /*Integer*/ 3); case MMAOp::Xvbf16ger2: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvbf16ger2nn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvbf16ger2np: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvbf16ger2pn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvbf16ger2pp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf16ger2: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf16ger2nn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf16ger2np: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf16ger2pn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf16ger2pp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf32ger: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf32gernn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf32gernp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf32gerpn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf32gerpp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvf64ger: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1); case MMAOp::Xvf64gernn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); case MMAOp::Xvf64gernp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); case MMAOp::Xvf64gerpn: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); case MMAOp::Xvf64gerpp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); case MMAOp::Xvi16ger2: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvi16ger2pp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvi16ger2s: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvi16ger2spp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvi4ger8: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvi4ger8pp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvi8ger4: return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvi8ger4pp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); case MMAOp::Xvi8ger4spp: return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); } llvm_unreachable("getMmaIrFuncType"); } template void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef args) { auto context{builder.getContext()}; mlir::FunctionType intrFuncType{getMmaIrFuncType(context, IntrId)}; mlir::func::FuncOp funcOp{ builder.addNamedFunction(loc, getMmaIrIntrName(IntrId), intrFuncType)}; llvm::SmallVector intrArgs; // Depending on SubToFunc, change the subroutine call to a function call. // First argument represents the result. Rest of the arguments // are shifted one position to form the actual argument list. size_t argStart{0}; size_t argStep{1}; size_t e{args.size()}; if (HandlerOp == MMAHandlerOp::SubToFunc) { // The first argument becomes function result. Start from the second // argument. argStart = 1; } else if (HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE) { // Reverse argument order on little-endian target only. // The reversal does not depend on the setting of non-native-order option. const auto triple{fir::getTargetTriple(builder.getModule())}; if (triple.isLittleEndian()) { // Load the arguments in reverse order. argStart = args.size() - 1; // The first argument becomes function result. Stop at the second // argument. e = 0; argStep = -1; } else { // Load the arguments in natural order. // The first argument becomes function result. Start from the second // argument. argStart = 1; } } for (size_t i = argStart, j = 0; i != e; i += argStep, ++j) { auto v{fir::getBase(args[i])}; if (i == 0 && HandlerOp == MMAHandlerOp::FirstArgIsResult) { // First argument is passed in as an address. We need to load // the content to match the LLVM interface. v = builder.create(loc, v); } auto vType{v.getType()}; mlir::Type targetType{intrFuncType.getInput(j)}; if (vType != targetType) { if (targetType.isa()) { // Perform vector type conversion for arguments passed by value. auto eleTy{vType.dyn_cast().getEleTy()}; auto len{vType.dyn_cast().getLen()}; mlir::VectorType mlirType = mlir::VectorType::get(len, eleTy); auto v0{builder.createConvert(loc, mlirType, v)}; auto v1{builder.create(loc, targetType, v0)}; intrArgs.push_back(v1); } else if (targetType.isa() && vType.isa()) { auto v0{builder.createConvert(loc, targetType, v)}; intrArgs.push_back(v0); } else { llvm::errs() << "\nUnexpected type conversion requested: " << " from " << vType << " to " << targetType << "\n"; llvm_unreachable("Unsupported type conversion for argument to PowerPC " "MMA intrinsic"); } } else { intrArgs.push_back(v); } } auto callSt{builder.create(loc, funcOp, intrArgs)}; if (HandlerOp == MMAHandlerOp::SubToFunc || HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE || HandlerOp == MMAHandlerOp::FirstArgIsResult) { // Convert pointer type if needed. mlir::Value callResult{callSt.getResult(0)}; mlir::Value destPtr{fir::getBase(args[0])}; mlir::Type callResultPtrType{builder.getRefType(callResult.getType())}; if (destPtr.getType() != callResultPtrType) { destPtr = builder.create(loc, callResultPtrType, destPtr); } // Copy the result. builder.create(loc, callResult, destPtr); } } // VEC_ST, VEC_STE template void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef args) { assert(args.size() == 3); auto context{builder.getContext()}; auto argBases{getBasesForArgs(args)}; auto arg1TyInfo{getVecTypeFromFir(argBases[0])}; auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])}; llvm::StringRef fname{}; mlir::VectorType stTy{nullptr}; auto i32ty{mlir::IntegerType::get(context, 32)}; switch (vop) { case VecOp::St: stTy = mlir::VectorType::get(4, i32ty); fname = "llvm.ppc.altivec.stvx"; break; case VecOp::Ste: { const auto width{arg1TyInfo.eleTy.getIntOrFloatBitWidth()}; const auto len{arg1TyInfo.len}; if (arg1TyInfo.isFloat32()) { stTy = mlir::VectorType::get(len, i32ty); fname = "llvm.ppc.altivec.stvewx"; } else if (arg1TyInfo.eleTy.isa()) { stTy = mlir::VectorType::get(len, mlir::IntegerType::get(context, width)); switch (width) { case 8: fname = "llvm.ppc.altivec.stvebx"; break; case 16: fname = "llvm.ppc.altivec.stvehx"; break; case 32: fname = "llvm.ppc.altivec.stvewx"; break; default: assert(false && "invalid element size"); } } else assert(false && "unknown type"); break; } case VecOp::Stxvp: // __vector_pair type stTy = mlir::VectorType::get(256, mlir::IntegerType::get(context, 1)); fname = "llvm.ppc.vsx.stxvp"; break; default: llvm_unreachable("invalid vector operation for generator"); } auto funcType{ mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)}; mlir::func::FuncOp funcOp = builder.addNamedFunction(loc, fname, funcType); llvm::SmallVector biArgs; if (vop == VecOp::Stxvp) { biArgs.push_back(argBases[0]); biArgs.push_back(addr); builder.create(loc, funcOp, biArgs); return; } auto vecTyInfo{getVecTypeFromFirType(argBases[0].getType())}; auto cnv{builder.createConvert(loc, vecTyInfo.toMlirVectorType(context), argBases[0])}; mlir::Value newArg1{nullptr}; if (stTy != arg1TyInfo.toMlirVectorType(context)) newArg1 = builder.create(loc, stTy, cnv); else newArg1 = cnv; if (isBEVecElemOrderOnLE()) newArg1 = builder.createConvert( loc, stTy, reverseVectorElements(builder, loc, newArg1, 4)); biArgs.push_back(newArg1); biArgs.push_back(addr); builder.create(loc, funcOp, biArgs); } // VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4 template void PPCIntrinsicLibrary::genVecXStore( llvm::ArrayRef args) { assert(args.size() == 3); auto context{builder.getContext()}; auto argBases{getBasesForArgs(args)}; VecTypeInfo arg1TyInfo{getVecTypeFromFir(argBases[0])}; auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])}; mlir::Value trg{nullptr}; mlir::Value src{nullptr}; switch (vop) { case VecOp::Xst: case VecOp::Xst_be: { src = argBases[0]; trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()), addr); if (vop == VecOp::Xst_be || isBEVecElemOrderOnLE()) { auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context), argBases[0])}; auto shf{reverseVectorElements(builder, loc, cnv, arg1TyInfo.len)}; src = builder.createConvert(loc, arg1TyInfo.toFirVectorType(), shf); } break; } case VecOp::Xstd2: case VecOp::Xstw4: { // an 16-byte vector arg1 is treated as two 8-byte elements or // four 4-byte elements mlir::IntegerType elemTy; uint64_t numElem = (vop == VecOp::Xstd2) ? 2 : 4; elemTy = builder.getIntegerType(128 / numElem); mlir::VectorType mlirVecTy{mlir::VectorType::get(numElem, elemTy)}; fir::VectorType firVecTy{fir::VectorType::get(numElem, elemTy)}; auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context), argBases[0])}; mlir::Type srcTy{nullptr}; if (numElem != arg1TyInfo.len) { cnv = builder.create(loc, mlirVecTy, cnv); srcTy = firVecTy; } else { srcTy = arg1TyInfo.toFirVectorType(); } trg = builder.createConvert(loc, builder.getRefType(srcTy), addr); if (isBEVecElemOrderOnLE()) { cnv = reverseVectorElements(builder, loc, cnv, numElem); } src = builder.createConvert(loc, srcTy, cnv); break; } case VecOp::Stxv: src = argBases[0]; trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()), addr); break; default: assert(false && "Invalid vector operation for generator"); } builder.create(loc, mlir::TypeRange{}, mlir::ValueRange{src, trg}, getAlignmentAttr(builder, 1)); } } // namespace fir