//===-- Generate random but valid function descriptors -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "automemcpy/RandomFunctionGenerator.h" #include #include #include #include namespace llvm { namespace automemcpy { // Exploration parameters // ---------------------- // Here we define a set of values that will contraint the exploration and // limit combinatorial explosion. // We limit the number of cases for individual sizes to sizes up to 4. // More individual sizes don't bring much over the overlapping strategy. static constexpr int kMaxIndividualSize = 4; // We limit Overlapping Strategy to sizes up to 256. // An overlap of 256B means accessing 128B at once which is usually not // feasible by current CPUs. We rely on the compiler to generate multiple // loads/stores if needed but higher sizes are unlikely to benefit from hardware // acceleration. static constexpr int kMaxOverlapSize = 256; // For the loop strategies, we make sure that they iterate at least a certain // number of times to amortize the cost of looping. static constexpr int kLoopMinIter = 3; static constexpr int kAlignedLoopMinIter = 2; // We restrict the size of the block of data to handle in a loop. // Generally speaking block size <= 16 perform poorly. static constexpr int kLoopBlockSize[] = {16, 32, 64}; // We restrict alignment to the following values. static constexpr int kLoopAlignments[] = {16, 32, 64}; // We make sure that the region bounds are one of the following values. static constexpr int kAnchors[] = {0, 1, 2, 4, 8, 16, 32, 48, 64, 96, 128, 256, 512, 1024, kMaxSize}; // We also allow disabling loops, aligned loops and accelerators. static constexpr bool kDisableLoop = false; static constexpr bool kDisableAlignedLoop = false; static constexpr bool kDisableAccelerator = false; // For memcpy, we can also explore whether aligning on source or destination has // an effect. static constexpr bool kExploreAlignmentArg = true; // The function we generate code for. // BCMP is specifically disabled for now. static constexpr int kFunctionTypes[] = { (int)FunctionType::MEMCPY, (int)FunctionType::MEMCMP, // (int)FunctionType::BCMP, (int)FunctionType::MEMSET, (int)FunctionType::BZERO, }; // The actual implementation of each function can be handled via primitive types // (SCALAR), vector types where available (NATIVE) or by the compiler (BUILTIN). // We want to move toward delegating the code generation entirely to the // compiler but for now we have to make use of -per microarchitecture- custom // implementations. Scalar being more portable but also less performant, we // remove it as well. static constexpr int kElementClasses[] = { // (int)ElementTypeClass::SCALAR, (int)ElementTypeClass::NATIVE, // (int)ElementTypeClass::BUILTIN }; RandomFunctionGenerator::RandomFunctionGenerator() : Solver(Context), Type(Context.int_const("Type")), ContiguousBegin(Context.int_const("ContiguousBegin")), ContiguousEnd(Context.int_const("ContiguousEnd")), OverlapBegin(Context.int_const("OverlapBegin")), OverlapEnd(Context.int_const("OverlapEnd")), LoopBegin(Context.int_const("LoopBegin")), LoopEnd(Context.int_const("LoopEnd")), LoopBlockSize(Context.int_const("LoopBlockSize")), AlignedLoopBegin(Context.int_const("AlignedLoopBegin")), AlignedLoopEnd(Context.int_const("AlignedLoopEnd")), AlignedLoopBlockSize(Context.int_const("AlignedLoopBlockSize")), AlignedAlignment(Context.int_const("AlignedAlignment")), AlignedArg(Context.int_const("AlignedArg")), AcceleratorBegin(Context.int_const("AcceleratorBegin")), AcceleratorEnd(Context.int_const("AcceleratorEnd")), ElementClass(Context.int_const("ElementClass")) { // All possible functions. Solver.add(inSetConstraint(Type, kFunctionTypes)); // Add constraints for region bounds. addBoundsAndAnchors(ContiguousBegin, ContiguousEnd); addBoundsAndAnchors(OverlapBegin, OverlapEnd); addBoundsAndAnchors(LoopBegin, LoopEnd); addBoundsAndAnchors(AlignedLoopBegin, AlignedLoopEnd); addBoundsAndAnchors(AcceleratorBegin, AcceleratorEnd); // We always consider strategies in this order, and we // always end with the `Accelerator` strategy, as it's typically more // efficient for large sizes. // Contiguous <= Overlap <= Loop <= AlignedLoop <= Accelerator Solver.add(ContiguousEnd == OverlapBegin); Solver.add(OverlapEnd == LoopBegin); Solver.add(LoopEnd == AlignedLoopBegin); Solver.add(AlignedLoopEnd == AcceleratorBegin); // Fix endpoints: The minimum size that we want to copy is 0, and we always // start with the `Contiguous` strategy. The max size is `kMaxSize`. Solver.add(ContiguousBegin == 0); Solver.add(AcceleratorEnd == kMaxSize); // Contiguous Solver.add(ContiguousEnd <= kMaxIndividualSize + 1); // Overlap Solver.add(OverlapEnd <= kMaxOverlapSize + 1); // Overlap only ever makes sense when accessing multiple bytes at a time. // i.e. Overlap<1> is useless. Solver.add(OverlapBegin == OverlapEnd || OverlapBegin >= 2); // Loop addLoopConstraints(LoopBegin, LoopEnd, LoopBlockSize, kLoopMinIter); // Aligned Loop addLoopConstraints(AlignedLoopBegin, AlignedLoopEnd, AlignedLoopBlockSize, kAlignedLoopMinIter); Solver.add(inSetConstraint(AlignedAlignment, kLoopAlignments)); Solver.add(AlignedLoopBegin == AlignedLoopEnd || AlignedLoopBegin >= 64); Solver.add(AlignedLoopBlockSize >= AlignedAlignment); Solver.add(AlignedLoopBlockSize >= LoopBlockSize); z3::expr IsMemcpy = Type == (int)FunctionType::MEMCPY; z3::expr ExploreAlignment = IsMemcpy && kExploreAlignmentArg; Solver.add( (ExploreAlignment && inSetConstraint(AlignedArg, {(int)AlignArg::_1, (int)AlignArg::_2})) || (!ExploreAlignment && AlignedArg == (int)AlignArg::_1)); // Accelerator Solver.add(IsMemcpy || (AcceleratorBegin == AcceleratorEnd)); // Only Memcpy has accelerator for now. // Element classes Solver.add(inSetConstraint(ElementClass, kElementClasses)); if (kDisableLoop) Solver.add(LoopBegin == LoopEnd); if (kDisableAlignedLoop) Solver.add(AlignedLoopBegin == AlignedLoopEnd); if (kDisableAccelerator) Solver.add(AcceleratorBegin == AcceleratorEnd); } // Creates SizeSpan from Begin/End values. // Returns std::nullopt if Begin==End. static std::optional AsSizeSpan(size_t Begin, size_t End) { if (Begin == End) return std::nullopt; SizeSpan SS; SS.Begin = Begin; SS.End = End; return SS; } // Generic method to create a `Region` struct with a Span or std::nullopt if // span is empty. template static std::optional As(size_t Begin, size_t End) { if (auto Span = AsSizeSpan(Begin, End)) { Region Output; Output.Span = *Span; return Output; } return std::nullopt; } // Returns a Loop struct or std::nullopt if span is empty. static std::optional AsLoop(size_t Begin, size_t End, size_t BlockSize) { if (auto Span = AsSizeSpan(Begin, End)) { Loop Output; Output.Span = *Span; Output.BlockSize = BlockSize; return Output; } return std::nullopt; } // Returns an AlignedLoop struct or std::nullopt if span is empty. static std::optional AsAlignedLoop(size_t Begin, size_t End, size_t BlockSize, size_t Alignment, AlignArg AlignTo) { if (auto Loop = AsLoop(Begin, End, BlockSize)) { AlignedLoop Output; Output.Loop = *Loop; Output.Alignment = Alignment; Output.AlignTo = AlignTo; return Output; } return std::nullopt; } std::optional RandomFunctionGenerator::next() { if (Solver.check() != z3::sat) return {}; z3::model m = Solver.get_model(); // Helper method to get the current numerical value of a z3::expr. const auto E = [&m](z3::expr &V) -> int { return m.eval(V).get_numeral_int(); }; // Fill is the function descriptor to return. FunctionDescriptor R; R.Type = FunctionType(E(Type)); R.Contiguous = As(E(ContiguousBegin), E(ContiguousEnd)); R.Overlap = As(E(OverlapBegin), E(OverlapEnd)); R.Loop = AsLoop(E(LoopBegin), E(LoopEnd), E(LoopBlockSize)); R.AlignedLoop = AsAlignedLoop(E(AlignedLoopBegin), E(AlignedLoopEnd), E(AlignedLoopBlockSize), E(AlignedAlignment), AlignArg(E(AlignedArg))); R.Accelerator = As(E(AcceleratorBegin), E(AcceleratorEnd)); R.ElementClass = ElementTypeClass(E(ElementClass)); // Express current state as a set of constraints. z3::expr CurrentLayout = (Type == E(Type)) && (ContiguousBegin == E(ContiguousBegin)) && (ContiguousEnd == E(ContiguousEnd)) && (OverlapBegin == E(OverlapBegin)) && (OverlapEnd == E(OverlapEnd)) && (LoopBegin == E(LoopBegin)) && (LoopEnd == E(LoopEnd)) && (LoopBlockSize == E(LoopBlockSize)) && (AlignedLoopBegin == E(AlignedLoopBegin)) && (AlignedLoopEnd == E(AlignedLoopEnd)) && (AlignedLoopBlockSize == E(AlignedLoopBlockSize)) && (AlignedAlignment == E(AlignedAlignment)) && (AlignedArg == E(AlignedArg)) && (AcceleratorBegin == E(AcceleratorBegin)) && (AcceleratorEnd == E(AcceleratorEnd)) && (ElementClass == E(ElementClass)); // Ask solver to never show this configuration ever again. Solver.add(!CurrentLayout); return R; } // Make sure `Variable` is one of the provided values. z3::expr RandomFunctionGenerator::inSetConstraint(z3::expr &Variable, ArrayRef Values) const { z3::expr_vector Args(Variable.ctx()); for (int Value : Values) Args.push_back(Variable == Value); return z3::mk_or(Args); } void RandomFunctionGenerator::addBoundsAndAnchors(z3::expr &Begin, z3::expr &End) { // Begin and End are picked amongst a set of predefined values. Solver.add(inSetConstraint(Begin, kAnchors)); Solver.add(inSetConstraint(End, kAnchors)); Solver.add(Begin >= 0); Solver.add(Begin <= End); Solver.add(End <= kMaxSize); } void RandomFunctionGenerator::addLoopConstraints(const z3::expr &LoopBegin, const z3::expr &LoopEnd, z3::expr &LoopBlockSize, int LoopMinIter) { Solver.add(inSetConstraint(LoopBlockSize, kLoopBlockSize)); Solver.add(LoopBegin == LoopEnd || (LoopBegin > (LoopMinIter * LoopBlockSize))); } } // namespace automemcpy } // namespace llvm