//===-- Analyze benchmark JSON files --------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This code analyzes the json file produced by the `automemcpy` binary. // // As a remainder, `automemcpy` will benchmark each autogenerated memory // functions against one of the predefined distributions available in the // `libc/benchmarks/distributions` folder. // // It works as follows: // - Reads one or more json files. // - If there are several runs for the same function and distribution, picks the // median throughput (aka `BytesPerSecond`). // - Aggregates the throughput per distributions and scores them from worst (0) // to best (1). // - Each distribution categorizes each function into one of the following // categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE, // BAD. // - A process similar to the Majority Judgment voting system is used to `elect` // the best function. The histogram of grades is returned so we can // distinguish between functions with the same final grade. In the following // example both functions grade EXCELLENT but we may prefer the second one. // // | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ... // |------------|-----------|-----------|------|----------| ... // | Function_1 | 7 | 1 | 2 | | ... // | Function_2 | 6 | 4 | | | ... #include "automemcpy/ResultAnalyzer.h" #include "llvm/ADT/StringRef.h" #include #include namespace llvm { namespace automemcpy { StringRef Grade::getString(const GradeEnum &GE) { switch (GE) { case EXCELLENT: return "EXCELLENT"; case VERY_GOOD: return "VERY_GOOD"; case GOOD: return "GOOD"; case PASSABLE: return "PASSABLE"; case INADEQUATE: return "INADEQUATE"; case MEDIOCRE: return "MEDIOCRE"; case BAD: return "BAD"; case ARRAY_SIZE: report_fatal_error("logic error"); } } Grade::GradeEnum Grade::judge(double Score) { if (Score >= 6. / 7) return EXCELLENT; if (Score >= 5. / 7) return VERY_GOOD; if (Score >= 4. / 7) return GOOD; if (Score >= 3. / 7) return PASSABLE; if (Score >= 2. / 7) return INADEQUATE; if (Score >= 1. / 7) return MEDIOCRE; return BAD; } static double computeUnbiasedSampleVariance(const std::vector &Samples, const double SampleMean) { assert(!Samples.empty()); if (Samples.size() == 1) return 0; double DiffSquaresSum = 0; for (const double S : Samples) { const double Diff = S - SampleMean; DiffSquaresSum += Diff * Diff; } return DiffSquaresSum / (Samples.size() - 1); } static void processPerDistributionData(PerDistributionData &Data) { auto &Samples = Data.BytesPerSecondSamples; assert(!Samples.empty()); // Sample Mean const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0); Data.BytesPerSecondMean = Sum / Samples.size(); // Unbiased Sample Variance Data.BytesPerSecondVariance = computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean); // Median const size_t HalfSize = Samples.size() / 2; std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end()); Data.BytesPerSecondMedian = Samples[HalfSize]; } std::vector getThroughputs(ArrayRef Samples) { std::unordered_map Functions; for (const auto &S : Samples) { if (S.Type != SampleType::ITERATION) break; auto &Function = Functions[S.Id.Function]; auto &Data = Function.PerDistributionData[S.Id.Distribution.Name]; Data.BytesPerSecondSamples.push_back(S.BytesPerSecond); } std::vector Output; for (auto &[FunctionId, Function] : Functions) { Function.Id = FunctionId; for (auto &Pair : Function.PerDistributionData) processPerDistributionData(Pair.second); Output.push_back(std::move(Function)); } return Output; } void fillScores(MutableArrayRef Functions) { // A key to bucket throughput per function type and distribution. struct Key { FunctionType Type; StringRef Distribution; COMPARABLE_AND_HASHABLE(Key, Type, Distribution) }; // Tracks minimum and maximum values. struct MinMax { double Min = std::numeric_limits::max(); double Max = std::numeric_limits::min(); void update(double Value) { if (Value < Min) Min = Value; if (Value > Max) Max = Value; } double normalize(double Value) const { return (Value - Min) / (Max - Min); } }; std::unordered_map ThroughputMinMax; for (const auto &Function : Functions) { const FunctionType Type = Function.Id.Type; for (const auto &Pair : Function.PerDistributionData) { const auto &Distribution = Pair.getKey(); const double Throughput = Pair.getValue().BytesPerSecondMedian; const Key K{Type, Distribution}; ThroughputMinMax[K].update(Throughput); } } for (auto &Function : Functions) { const FunctionType Type = Function.Id.Type; for (const auto &Pair : Function.PerDistributionData) { const auto &Distribution = Pair.getKey(); const double Throughput = Pair.getValue().BytesPerSecondMedian; const Key K{Type, Distribution}; Function.PerDistributionData[Distribution].Score = ThroughputMinMax[K].normalize(Throughput); } } } void castVotes(MutableArrayRef Functions) { for (FunctionData &Function : Functions) { Function.ScoresGeoMean = 1.0; for (const auto &Pair : Function.PerDistributionData) { const StringRef Distribution = Pair.getKey(); const double Score = Pair.getValue().Score; Function.ScoresGeoMean *= Score; const auto G = Grade::judge(Score); ++(Function.GradeHisto[G]); Function.PerDistributionData[Distribution].Grade = G; } } for (FunctionData &Function : Functions) { const auto &GradeHisto = Function.GradeHisto; const size_t Votes = std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U); const size_t MedianVote = Votes / 2; size_t CountedVotes = 0; Grade::GradeEnum MedianGrade = Grade::BAD; for (size_t I = 0; I < GradeHisto.size(); ++I) { CountedVotes += GradeHisto[I]; if (CountedVotes > MedianVote) { MedianGrade = Grade::GradeEnum(I); break; } } Function.FinalGrade = MedianGrade; } } } // namespace automemcpy } // namespace llvm