llvm-project
728 строк · 29.1 Кб
1//===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Measures execution properties (latencies/uops) of an instruction.
11///
12//===----------------------------------------------------------------------===//
13
14#include "lib/Analysis.h"15#include "lib/BenchmarkResult.h"16#include "lib/BenchmarkRunner.h"17#include "lib/Clustering.h"18#include "lib/CodeTemplate.h"19#include "lib/Error.h"20#include "lib/LlvmState.h"21#include "lib/PerfHelper.h"22#include "lib/ProgressMeter.h"23#include "lib/ResultAggregator.h"24#include "lib/SnippetFile.h"25#include "lib/SnippetRepetitor.h"26#include "lib/Target.h"27#include "lib/TargetSelect.h"28#include "lib/ValidationEvent.h"29#include "llvm/ADT/StringExtras.h"30#include "llvm/ADT/Twine.h"31#include "llvm/MC/MCInstBuilder.h"32#include "llvm/MC/MCObjectFileInfo.h"33#include "llvm/MC/MCParser/MCAsmParser.h"34#include "llvm/MC/MCParser/MCTargetAsmParser.h"35#include "llvm/MC/MCRegisterInfo.h"36#include "llvm/MC/MCSubtargetInfo.h"37#include "llvm/MC/TargetRegistry.h"38#include "llvm/Object/ObjectFile.h"39#include "llvm/Support/CommandLine.h"40#include "llvm/Support/FileSystem.h"41#include "llvm/Support/Format.h"42#include "llvm/Support/InitLLVM.h"43#include "llvm/Support/Path.h"44#include "llvm/Support/SourceMgr.h"45#include "llvm/Support/TargetSelect.h"46#include "llvm/TargetParser/Host.h"47#include <algorithm>48#include <string>49
50namespace llvm {51namespace exegesis {52
53static cl::opt<int> OpcodeIndex(54"opcode-index",55cl::desc("opcode to measure, by index, or -1 to measure all opcodes"),56cl::cat(BenchmarkOptions), cl::init(0));57
58static cl::opt<std::string>59OpcodeNames("opcode-name",60cl::desc("comma-separated list of opcodes to measure, by name"),61cl::cat(BenchmarkOptions), cl::init(""));62
63static cl::opt<std::string> SnippetsFile("snippets-file",64cl::desc("code snippets to measure"),65cl::cat(BenchmarkOptions),66cl::init(""));67
68static cl::opt<std::string>69BenchmarkFile("benchmarks-file",70cl::desc("File to read (analysis mode) or write "71"(latency/uops/inverse_throughput modes) benchmark "72"results. “-” uses stdin/stdout."),73cl::cat(Options), cl::init(""));74
75static cl::opt<Benchmark::ModeE> BenchmarkMode(76"mode", cl::desc("the mode to run"), cl::cat(Options),77cl::values(clEnumValN(Benchmark::Latency, "latency", "Instruction Latency"),78clEnumValN(Benchmark::InverseThroughput, "inverse_throughput",79"Instruction Inverse Throughput"),80clEnumValN(Benchmark::Uops, "uops", "Uop Decomposition"),81// When not asking for a specific benchmark mode,82// we'll analyse the results.83clEnumValN(Benchmark::Unknown, "analysis", "Analysis")));84
85static cl::opt<Benchmark::ResultAggregationModeE> ResultAggMode(86"result-aggregation-mode", cl::desc("How to aggregate multi-values result"),87cl::cat(BenchmarkOptions),88cl::values(clEnumValN(Benchmark::Min, "min", "Keep min reading"),89clEnumValN(Benchmark::Max, "max", "Keep max reading"),90clEnumValN(Benchmark::Mean, "mean",91"Compute mean of all readings"),92clEnumValN(Benchmark::MinVariance, "min-variance",93"Keep readings set with min-variance")),94cl::init(Benchmark::Min));95
96static cl::opt<Benchmark::RepetitionModeE> RepetitionMode(97"repetition-mode", cl::desc("how to repeat the instruction snippet"),98cl::cat(BenchmarkOptions),99cl::values(100clEnumValN(Benchmark::Duplicate, "duplicate", "Duplicate the snippet"),101clEnumValN(Benchmark::Loop, "loop", "Loop over the snippet"),102clEnumValN(Benchmark::AggregateMin, "min",103"All of the above and take the minimum of measurements"),104clEnumValN(Benchmark::MiddleHalfDuplicate, "middle-half-duplicate",105"Middle half duplicate mode"),106clEnumValN(Benchmark::MiddleHalfLoop, "middle-half-loop",107"Middle half loop mode")),108cl::init(Benchmark::Duplicate));109
110static cl::opt<bool> BenchmarkMeasurementsPrintProgress(111"measurements-print-progress",112cl::desc("Produce progress indicator when performing measurements"),113cl::cat(BenchmarkOptions), cl::init(false));114
115static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(116"benchmark-phase",117cl::desc(118"it is possible to stop the benchmarking process after some phase"),119cl::cat(BenchmarkOptions),120cl::values(121clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet",122"Only generate the minimal instruction sequence"),123clEnumValN(BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet,124"prepare-and-assemble-snippet",125"Same as prepare-snippet, but also dumps an excerpt of the "126"sequence (hex encoded)"),127clEnumValN(BenchmarkPhaseSelectorE::AssembleMeasuredCode,128"assemble-measured-code",129"Same as prepare-and-assemble-snippet, but also creates the "130"full sequence "131"that can be dumped to a file using --dump-object-to-disk"),132clEnumValN(133BenchmarkPhaseSelectorE::Measure, "measure",134"Same as prepare-measured-code, but also runs the measurement "135"(default)")),136cl::init(BenchmarkPhaseSelectorE::Measure));137
138static cl::opt<bool>139UseDummyPerfCounters("use-dummy-perf-counters",140cl::desc("Do not read real performance counters, use "141"dummy values (for testing)"),142cl::cat(BenchmarkOptions), cl::init(false));143
144static cl::opt<unsigned>145MinInstructions("min-instructions",146cl::desc("The minimum number of instructions that should "147"be included in the snippet"),148cl::cat(BenchmarkOptions), cl::init(10000));149
150static cl::opt<unsigned>151LoopBodySize("loop-body-size",152cl::desc("when repeating the instruction snippet by looping "153"over it, duplicate the snippet until the loop body "154"contains at least this many instruction"),155cl::cat(BenchmarkOptions), cl::init(0));156
157static cl::opt<unsigned> MaxConfigsPerOpcode(158"max-configs-per-opcode",159cl::desc(160"allow to snippet generator to generate at most that many configs"),161cl::cat(BenchmarkOptions), cl::init(1));162
163static cl::opt<bool> IgnoreInvalidSchedClass(164"ignore-invalid-sched-class",165cl::desc("ignore instructions that do not define a sched class"),166cl::cat(BenchmarkOptions), cl::init(false));167
168static cl::opt<BenchmarkFilter> AnalysisSnippetFilter(169"analysis-filter", cl::desc("Filter the benchmarks before analysing them"),170cl::cat(BenchmarkOptions),171cl::values(172clEnumValN(BenchmarkFilter::All, "all",173"Keep all benchmarks (default)"),174clEnumValN(BenchmarkFilter::RegOnly, "reg-only",175"Keep only those benchmarks that do *NOT* involve memory"),176clEnumValN(BenchmarkFilter::WithMem, "mem-only",177"Keep only the benchmarks that *DO* involve memory")),178cl::init(BenchmarkFilter::All));179
180static cl::opt<BenchmarkClustering::ModeE> AnalysisClusteringAlgorithm(181"analysis-clustering", cl::desc("the clustering algorithm to use"),182cl::cat(AnalysisOptions),183cl::values(clEnumValN(BenchmarkClustering::Dbscan, "dbscan",184"use DBSCAN/OPTICS algorithm"),185clEnumValN(BenchmarkClustering::Naive, "naive",186"one cluster per opcode")),187cl::init(BenchmarkClustering::Dbscan));188
189static cl::opt<unsigned> AnalysisDbscanNumPoints(190"analysis-numpoints",191cl::desc("minimum number of points in an analysis cluster (dbscan only)"),192cl::cat(AnalysisOptions), cl::init(3));193
194static cl::opt<float> AnalysisClusteringEpsilon(195"analysis-clustering-epsilon",196cl::desc("epsilon for benchmark point clustering"),197cl::cat(AnalysisOptions), cl::init(0.1));198
199static cl::opt<float> AnalysisInconsistencyEpsilon(200"analysis-inconsistency-epsilon",201cl::desc("epsilon for detection of when the cluster is different from the "202"LLVM schedule profile values"),203cl::cat(AnalysisOptions), cl::init(0.1));204
205static cl::opt<std::string>206AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""),207cl::cat(AnalysisOptions), cl::init(""));208static cl::opt<std::string>209AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",210cl::desc(""), cl::cat(AnalysisOptions),211cl::init(""));212
213static cl::opt<bool> AnalysisDisplayUnstableOpcodes(214"analysis-display-unstable-clusters",215cl::desc("if there is more than one benchmark for an opcode, said "216"benchmarks may end up not being clustered into the same cluster "217"if the measured performance characteristics are different. by "218"default all such opcodes are filtered out. this flag will "219"instead show only such unstable opcodes"),220cl::cat(AnalysisOptions), cl::init(false));221
222static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu(223"analysis-override-benchmark-triple-and-cpu",224cl::desc("By default, we analyze the benchmarks for the triple/CPU they "225"were measured for, but if you want to analyze them for some "226"other combination (specified via -mtriple/-mcpu), you can "227"pass this flag."),228cl::cat(AnalysisOptions), cl::init(false));229
230static cl::opt<std::string>231TripleName("mtriple",232cl::desc("Target triple. See -version for available targets"),233cl::cat(Options));234
235static cl::opt<std::string>236MCPU("mcpu",237cl::desc("Target a specific cpu type (-mcpu=help for details)"),238cl::value_desc("cpu-name"), cl::cat(Options), cl::init("native"));239
240static cl::opt<std::string>241DumpObjectToDisk("dump-object-to-disk",242cl::desc("dumps the generated benchmark object to disk "243"and prints a message to access it"),244cl::ValueOptional, cl::cat(BenchmarkOptions));245
246static cl::opt<BenchmarkRunner::ExecutionModeE> ExecutionMode(247"execution-mode",248cl::desc("Selects the execution mode to use for running snippets"),249cl::cat(BenchmarkOptions),250cl::values(clEnumValN(BenchmarkRunner::ExecutionModeE::InProcess,251"inprocess",252"Executes the snippets within the same process"),253clEnumValN(BenchmarkRunner::ExecutionModeE::SubProcess,254"subprocess",255"Spawns a subprocess for each snippet execution, "256"allows for the use of memory annotations")),257cl::init(BenchmarkRunner::ExecutionModeE::InProcess));258
259static cl::opt<unsigned> BenchmarkRepeatCount(260"benchmark-repeat-count",261cl::desc("The number of times to repeat measurements on the benchmark k "262"before aggregating the results"),263cl::cat(BenchmarkOptions), cl::init(30));264
265static cl::list<ValidationEvent> ValidationCounters(266"validation-counter",267cl::desc(268"The name of a validation counter to run concurrently with the main "269"counter to validate benchmarking assumptions"),270cl::CommaSeparated, cl::cat(BenchmarkOptions), ValidationEventOptions());271
272static ExitOnError ExitOnErr("llvm-exegesis error: ");273
274// Helper function that logs the error(s) and exits.
275template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) {276ExitOnErr(make_error<Failure>(std::forward<ArgTs>(Args)...));277}
278
279// Check Err. If it's in a failure state log the file error(s) and exit.
280static void ExitOnFileError(const Twine &FileName, Error Err) {281if (Err) {282ExitOnErr(createFileError(FileName, std::move(Err)));283}284}
285
286// Check E. If it's in a success state then return the contained value.
287// If it's in a failure state log the file error(s) and exit.
288template <typename T>289T ExitOnFileError(const Twine &FileName, Expected<T> &&E) {290ExitOnFileError(FileName, E.takeError());291return std::move(*E);292}
293
294// Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
295// and returns the opcode indices or {} if snippets should be read from
296// `SnippetsFile`.
297static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) {298const size_t NumSetFlags = (OpcodeNames.empty() ? 0 : 1) +299(OpcodeIndex == 0 ? 0 : 1) +300(SnippetsFile.empty() ? 0 : 1);301const auto &ET = State.getExegesisTarget();302const auto AvailableFeatures = State.getSubtargetInfo().getFeatureBits();303
304if (NumSetFlags != 1) {305ExitOnErr.setBanner("llvm-exegesis: ");306ExitWithError("please provide one and only one of 'opcode-index', "307"'opcode-name' or 'snippets-file'");308}309if (!SnippetsFile.empty())310return {};311if (OpcodeIndex > 0)312return {static_cast<unsigned>(OpcodeIndex)};313if (OpcodeIndex < 0) {314std::vector<unsigned> Result;315unsigned NumOpcodes = State.getInstrInfo().getNumOpcodes();316Result.reserve(NumOpcodes);317for (unsigned I = 0, E = NumOpcodes; I < E; ++I) {318if (!ET.isOpcodeAvailable(I, AvailableFeatures))319continue;320Result.push_back(I);321}322return Result;323}324// Resolve opcode name -> opcode.325const auto ResolveName = [&State](StringRef OpcodeName) -> unsigned {326const auto &Map = State.getOpcodeNameToOpcodeIdxMapping();327auto I = Map.find(OpcodeName);328if (I != Map.end())329return I->getSecond();330return 0u;331};332SmallVector<StringRef, 2> Pieces;333StringRef(OpcodeNames.getValue())334.split(Pieces, ",", /* MaxSplit */ -1, /* KeepEmpty */ false);335std::vector<unsigned> Result;336Result.reserve(Pieces.size());337for (const StringRef &OpcodeName : Pieces) {338if (unsigned Opcode = ResolveName(OpcodeName))339Result.push_back(Opcode);340else341ExitWithError(Twine("unknown opcode ").concat(OpcodeName));342}343return Result;344}
345
346// Generates code snippets for opcode `Opcode`.
347static Expected<std::vector<BenchmarkCode>>348generateSnippets(const LLVMState &State, unsigned Opcode,349const BitVector &ForbiddenRegs) {350const Instruction &Instr = State.getIC().getInstr(Opcode);351const MCInstrDesc &InstrDesc = Instr.Description;352// Ignore instructions that we cannot run.353if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook())354return make_error<Failure>(355"Unsupported opcode: isPseudo/usesCustomInserter");356if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())357return make_error<Failure>("Unsupported opcode: isBranch/isIndirectBranch");358if (InstrDesc.isCall() || InstrDesc.isReturn())359return make_error<Failure>("Unsupported opcode: isCall/isReturn");360
361const std::vector<InstructionTemplate> InstructionVariants =362State.getExegesisTarget().generateInstructionVariants(363Instr, MaxConfigsPerOpcode);364
365SnippetGenerator::Options SnippetOptions;366SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode;367const std::unique_ptr<SnippetGenerator> Generator =368State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State,369SnippetOptions);370if (!Generator)371ExitWithError("cannot create snippet generator");372
373std::vector<BenchmarkCode> Benchmarks;374for (const InstructionTemplate &Variant : InstructionVariants) {375if (Benchmarks.size() >= MaxConfigsPerOpcode)376break;377if (auto Err = Generator->generateConfigurations(Variant, Benchmarks,378ForbiddenRegs))379return std::move(Err);380}381return Benchmarks;382}
383
384static void runBenchmarkConfigurations(385const LLVMState &State, ArrayRef<BenchmarkCode> Configurations,386ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors,387const BenchmarkRunner &Runner) {388assert(!Configurations.empty() && "Don't have any configurations to run.");389std::optional<raw_fd_ostream> FileOstr;390if (BenchmarkFile != "-") {391int ResultFD = 0;392// Create output file or open existing file and truncate it, once.393ExitOnErr(errorCodeToError(openFileForWrite(BenchmarkFile, ResultFD,394sys::fs::CD_CreateAlways,395sys::fs::OF_TextWithCRLF)));396FileOstr.emplace(ResultFD, true /*shouldClose*/);397}398raw_ostream &Ostr = FileOstr ? *FileOstr : outs();399
400std::optional<ProgressMeter<>> Meter;401if (BenchmarkMeasurementsPrintProgress)402Meter.emplace(Configurations.size());403
404SmallVector<unsigned, 2> MinInstructionCounts = {MinInstructions};405if (RepetitionMode == Benchmark::MiddleHalfDuplicate ||406RepetitionMode == Benchmark::MiddleHalfLoop)407MinInstructionCounts.push_back(MinInstructions * 2);408
409for (const BenchmarkCode &Conf : Configurations) {410ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &*Meter : nullptr);411SmallVector<Benchmark, 2> AllResults;412
413for (const std::unique_ptr<const SnippetRepetitor> &Repetitor :414Repetitors) {415for (unsigned IterationRepetitions : MinInstructionCounts) {416auto RC = ExitOnErr(Runner.getRunnableConfiguration(417Conf, IterationRepetitions, LoopBodySize, *Repetitor));418std::optional<StringRef> DumpFile;419if (DumpObjectToDisk.getNumOccurrences())420DumpFile = DumpObjectToDisk;421auto [Err, BenchmarkResult] =422Runner.runConfiguration(std::move(RC), DumpFile);423if (Err) {424// Errors from executing the snippets are fine.425// All other errors are a framework issue and should fail.426if (!Err.isA<SnippetExecutionFailure>())427ExitOnErr(std::move(Err));428
429BenchmarkResult.Error = toString(std::move(Err));430}431AllResults.push_back(std::move(BenchmarkResult));432}433}434
435Benchmark &Result = AllResults.front();436
437// If any of our measurements failed, pretend they all have failed.438if (AllResults.size() > 1 &&439any_of(AllResults, [](const Benchmark &R) {440return R.Measurements.empty();441}))442Result.Measurements.clear();443
444std::unique_ptr<ResultAggregator> ResultAgg =445ResultAggregator::CreateAggregator(RepetitionMode);446ResultAgg->AggregateResults(Result,447ArrayRef<Benchmark>(AllResults).drop_front());448
449// With dummy counters, measurements are rather meaningless,450// so drop them altogether.451if (UseDummyPerfCounters)452Result.Measurements.clear();453
454ExitOnFileError(BenchmarkFile, Result.writeYamlTo(State, Ostr));455}456}
457
458void benchmarkMain() {459if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure &&460!UseDummyPerfCounters) {461#ifndef HAVE_LIBPFM462ExitWithError(463"benchmarking unavailable, LLVM was built without libpfm. You can "464"pass --benchmark-phase=... to skip the actual benchmarking or "465"--use-dummy-perf-counters to not query the kernel for real event "466"counts.");467#else468if (pfm::pfmInitialize())469ExitWithError("cannot initialize libpfm");470#endif471}472
473InitializeAllExegesisTargets();474#define LLVM_EXEGESIS(TargetName) \475LLVMInitialize##TargetName##AsmPrinter(); \476LLVMInitialize##TargetName##AsmParser();477#include "llvm/Config/TargetExegesis.def"478
479const LLVMState State =480ExitOnErr(LLVMState::Create(TripleName, MCPU, "", UseDummyPerfCounters));481
482// Preliminary check to ensure features needed for requested483// benchmark mode are present on target CPU and/or OS.484if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure)485ExitOnErr(State.getExegesisTarget().checkFeatureSupport());486
487if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess &&488UseDummyPerfCounters)489ExitWithError("Dummy perf counters are not supported in the subprocess "490"execution mode.");491
492const std::unique_ptr<BenchmarkRunner> Runner =493ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(494BenchmarkMode, State, BenchmarkPhaseSelector, ExecutionMode,495BenchmarkRepeatCount, ValidationCounters, ResultAggMode));496if (!Runner) {497ExitWithError("cannot create benchmark runner");498}499
500const auto Opcodes = getOpcodesOrDie(State);501std::vector<BenchmarkCode> Configurations;502
503unsigned LoopRegister =504State.getExegesisTarget().getDefaultLoopCounterRegister(505State.getTargetMachine().getTargetTriple());506
507if (Opcodes.empty()) {508Configurations = ExitOnErr(readSnippets(State, SnippetsFile));509for (const auto &Configuration : Configurations) {510if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&511(Configuration.Key.MemoryMappings.size() != 0 ||512Configuration.Key.MemoryValues.size() != 0 ||513Configuration.Key.SnippetAddress != 0))514ExitWithError("Memory and snippet address annotations are only "515"supported in subprocess "516"execution mode");517}518LoopRegister = Configurations[0].Key.LoopRegister;519}520
521SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors;522if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin)523Repetitors.emplace_back(524SnippetRepetitor::Create(RepetitionMode, State, LoopRegister));525else {526for (Benchmark::RepetitionModeE RepMode :527{Benchmark::RepetitionModeE::Duplicate,528Benchmark::RepetitionModeE::Loop})529Repetitors.emplace_back(530SnippetRepetitor::Create(RepMode, State, LoopRegister));531}532
533BitVector AllReservedRegs;534for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors)535AllReservedRegs |= Repetitor->getReservedRegs();536
537if (!Opcodes.empty()) {538for (const unsigned Opcode : Opcodes) {539// Ignore instructions without a sched class if540// -ignore-invalid-sched-class is passed.541if (IgnoreInvalidSchedClass &&542State.getInstrInfo().get(Opcode).getSchedClass() == 0) {543errs() << State.getInstrInfo().getName(Opcode)544<< ": ignoring instruction without sched class\n";545continue;546}547
548auto ConfigsForInstr = generateSnippets(State, Opcode, AllReservedRegs);549if (!ConfigsForInstr) {550logAllUnhandledErrors(551ConfigsForInstr.takeError(), errs(),552Twine(State.getInstrInfo().getName(Opcode)).concat(": "));553continue;554}555std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(),556std::back_inserter(Configurations));557}558}559
560if (MinInstructions == 0) {561ExitOnErr.setBanner("llvm-exegesis: ");562ExitWithError("--min-instructions must be greater than zero");563}564
565// Write to standard output if file is not set.566if (BenchmarkFile.empty())567BenchmarkFile = "-";568
569if (!Configurations.empty())570runBenchmarkConfigurations(State, Configurations, Repetitors, *Runner);571
572pfm::pfmTerminate();573}
574
575// Prints the results of running analysis pass `Pass` to file `OutputFilename`
576// if OutputFilename is non-empty.
577template <typename Pass>578static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,579const std::string &OutputFilename) {580if (OutputFilename.empty())581return;582if (OutputFilename != "-") {583errs() << "Printing " << Name << " results to file '" << OutputFilename584<< "'\n";585}586std::error_code ErrorCode;587raw_fd_ostream ClustersOS(OutputFilename, ErrorCode,588sys::fs::FA_Read | sys::fs::FA_Write);589if (ErrorCode)590ExitOnFileError(OutputFilename, errorCodeToError(ErrorCode));591if (auto Err = Analyzer.run<Pass>(ClustersOS))592ExitOnFileError(OutputFilename, std::move(Err));593}
594
595static void filterPoints(MutableArrayRef<Benchmark> Points,596const MCInstrInfo &MCII) {597if (AnalysisSnippetFilter == BenchmarkFilter::All)598return;599
600bool WantPointsWithMemOps = AnalysisSnippetFilter == BenchmarkFilter::WithMem;601for (Benchmark &Point : Points) {602if (!Point.Error.empty())603continue;604if (WantPointsWithMemOps ==605any_of(Point.Key.Instructions, [&MCII](const MCInst &Inst) {606const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());607return MCDesc.mayLoad() || MCDesc.mayStore();608}))609continue;610Point.Error = "filtered out by user";611}612}
613
614static void analysisMain() {615ExitOnErr.setBanner("llvm-exegesis: ");616if (BenchmarkFile.empty())617ExitWithError("--benchmarks-file must be set");618
619if (AnalysisClustersOutputFile.empty() &&620AnalysisInconsistenciesOutputFile.empty()) {621ExitWithError(622"for --mode=analysis: At least one of --analysis-clusters-output-file "623"and --analysis-inconsistencies-output-file must be specified");624}625
626InitializeAllExegesisTargets();627#define LLVM_EXEGESIS(TargetName) \628LLVMInitialize##TargetName##AsmPrinter(); \629LLVMInitialize##TargetName##Disassembler();630#include "llvm/Config/TargetExegesis.def"631
632auto MemoryBuffer = ExitOnFileError(633BenchmarkFile,634errorOrToExpected(MemoryBuffer::getFile(BenchmarkFile, /*IsText=*/true)));635
636const auto TriplesAndCpus = ExitOnFileError(637BenchmarkFile,638Benchmark::readTriplesAndCpusFromYamls(*MemoryBuffer));639if (TriplesAndCpus.empty()) {640errs() << "no benchmarks to analyze\n";641return;642}643if (TriplesAndCpus.size() > 1) {644ExitWithError("analysis file contains benchmarks from several CPUs. This "645"is unsupported.");646}647auto TripleAndCpu = *TriplesAndCpus.begin();648if (AnalysisOverrideBenchmarksTripleAndCpu) {649errs() << "overridding file CPU name (" << TripleAndCpu.CpuName650<< ") with provided tripled (" << TripleName << ") and CPU name ("651<< MCPU << ")\n";652TripleAndCpu.LLVMTriple = TripleName;653TripleAndCpu.CpuName = MCPU;654}655errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '"656<< TripleAndCpu.CpuName << "'\n";657
658// Read benchmarks.659const LLVMState State = ExitOnErr(660LLVMState::Create(TripleAndCpu.LLVMTriple, TripleAndCpu.CpuName));661std::vector<Benchmark> Points = ExitOnFileError(662BenchmarkFile, Benchmark::readYamls(State, *MemoryBuffer));663
664outs() << "Parsed " << Points.size() << " benchmark points\n";665if (Points.empty()) {666errs() << "no benchmarks to analyze\n";667return;668}669// FIXME: Merge points from several runs (latency and uops).670
671filterPoints(Points, State.getInstrInfo());672
673const auto Clustering = ExitOnErr(BenchmarkClustering::create(674Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,675AnalysisClusteringEpsilon, &State.getSubtargetInfo(),676&State.getInstrInfo()));677
678const Analysis Analyzer(State, Clustering, AnalysisInconsistencyEpsilon,679AnalysisDisplayUnstableOpcodes);680
681maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",682AnalysisClustersOutputFile);683maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(684Analyzer, "sched class consistency analysis",685AnalysisInconsistenciesOutputFile);686}
687
688} // namespace exegesis689} // namespace llvm690
691int main(int Argc, char **Argv) {692using namespace llvm;693
694InitLLVM X(Argc, Argv);695
696// Initialize targets so we can print them when flag --version is specified.697#define LLVM_EXEGESIS(TargetName) \698LLVMInitialize##TargetName##Target(); \699LLVMInitialize##TargetName##TargetInfo(); \700LLVMInitialize##TargetName##TargetMC();701#include "llvm/Config/TargetExegesis.def"702
703// Register the Target and CPU printer for --version.704cl::AddExtraVersionPrinter(sys::printDefaultTargetAndDetectedCPU);705
706// Enable printing of available targets when flag --version is specified.707cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);708
709cl::HideUnrelatedOptions({&exegesis::Options, &exegesis::BenchmarkOptions,710&exegesis::AnalysisOptions});711
712cl::ParseCommandLineOptions(Argc, Argv,713"llvm host machine instruction characteristics "714"measurment and analysis.\n");715
716exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) {717if (Err.isA<exegesis::ClusteringError>())718return EXIT_SUCCESS;719return EXIT_FAILURE;720});721
722if (exegesis::BenchmarkMode == exegesis::Benchmark::Unknown) {723exegesis::analysisMain();724} else {725exegesis::benchmarkMain();726}727return EXIT_SUCCESS;728}
729