Verilator

Форк
0
/
verilated_profiler.cpp 
230 строк · 8.6 Кб
1
// -*- mode: C++; c-file-style: "cc-mode" -*-
2
//=============================================================================
3
//
4
// Code available from: https://verilator.org
5
//
6
// Copyright 2012-2024 by Wilson Snyder. This program is free software; you can
7
// redistribute it and/or modify it under the terms of either the GNU
8
// Lesser General Public License Version 3 or the Perl Artistic License
9
// Version 2.0.
10
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
11
//
12
//=============================================================================
13
///
14
/// \file
15
/// \brief Verilated run-time profiling implementation code
16
///
17
//=============================================================================
18

19
#include "verilatedos.h"
20

21
#include "verilated_profiler.h"
22

23
#include "verilated_threads.h"
24

25
#include <fstream>
26
#include <string>
27

28
//=============================================================================
29
// Globals
30

31
// Internal note: Globals may multi-construct, see verilated.cpp top.
32

33
thread_local VlExecutionProfiler::ExecutionTrace VlExecutionProfiler::t_trace;
34

35
constexpr const char* const VlExecutionRecord::s_ascii[];
36

37
//=============================================================================
38
// VlPgoProfiler implementation
39

40
uint16_t VlExecutionRecord::getcpu() {
41
#if defined(__linux)
42
    return sched_getcpu();  // TODO: this is a system call. Not exactly cheap.
43
#elif defined(__APPLE__) && !defined(__arm64__)
44
    uint32_t info[4];
45
    __cpuid_count(1, 0, info[0], info[1], info[2], info[3]);
46
    // info[1] is EBX, bits 24-31 are APIC ID
47
    if ((info[3] & (1 << 9)) == 0) {
48
        return -1;  // no APIC on chip
49
    } else {
50
        return (unsigned)info[1] >> 24;
51
    }
52
#elif defined(_WIN32)
53
    return GetCurrentProcessorNumber();
54
#else
55
    return 0;
56
#endif
57
}
58

59
//=============================================================================
60
// VlExecutionProfiler implementation
61

62
template <size_t N>
63
static size_t roundUptoMultipleOf(size_t value) {
64
    static_assert((N & (N - 1)) == 0, "'N' must be a power of 2");
65
    size_t mask = N - 1;
66
    return (value + mask) & ~mask;
67
}
68

69
VlExecutionProfiler::VlExecutionProfiler(VerilatedContext& context)
70
    : m_context{context} {
71
    // Setup profiling on main thread
72
    setupThread(0);
73
}
74

75
void VlExecutionProfiler::configure() {
76

77
    if (VL_UNLIKELY(m_enabled)) {
78
        --m_windowCount;
79
        if (VL_UNLIKELY(m_windowCount == m_context.profExecWindow())) {
80
            VL_DEBUG_IF(VL_DBG_MSGF("+ profile start collection\n"););
81
            clear();  // Clear the profile after the cache warm-up cycles.
82
            m_tickBegin = VL_CPU_TICK();
83
        } else if (VL_UNLIKELY(m_windowCount == 0)) {
84
            const uint64_t tickEnd = VL_CPU_TICK();
85
            VL_DEBUG_IF(VL_DBG_MSGF("+ profile end\n"););
86
            const std::string& fileName = m_context.profExecFilename();
87
            dump(fileName.c_str(), tickEnd);
88
            m_enabled = false;
89
        }
90
        return;
91
    }
92

93
    const uint64_t startReq = m_context.profExecStart() + 1;  // + 1, so we can start at time 0
94

95
    if (VL_UNLIKELY(m_lastStartReq < startReq && VL_TIME_Q() >= m_context.profExecStart())) {
96
        VL_DEBUG_IF(VL_DBG_MSGF("+ profile start warmup\n"););
97
        VL_DEBUG_IF(assert(m_windowCount == 0););
98
        m_enabled = true;
99
        m_windowCount = m_context.profExecWindow() * 2;
100
        m_lastStartReq = startReq;
101
    }
102
}
103

104
VerilatedVirtualBase* VlExecutionProfiler::construct(VerilatedContext& context) {
105
    VlExecutionProfiler* const selfp = new VlExecutionProfiler{context};
106
    if (VlThreadPool* const threadPoolp = static_cast<VlThreadPool*>(context.threadPoolp())) {
107
        for (int i = 0; i < threadPoolp->numThreads(); ++i) {
108
            // Data to pass to worker thread initialization
109
            struct Data final {
110
                VlExecutionProfiler* const selfp;
111
                const uint32_t threadId;
112
            } data{selfp, static_cast<uint32_t>(i + 1)};
113

114
            // Initialize worker thread
115
            threadPoolp->workerp(i)->addTask(
116
                [](void* userp, bool) {
117
                    Data* const datap = static_cast<Data*>(userp);
118
                    datap->selfp->setupThread(datap->threadId);
119
                },
120
                &data);
121

122
            // Wait until initialization is complete
123
            threadPoolp->workerp(i)->wait();
124
        }
125
    }
126
    return selfp;
127
}
128

129
void VlExecutionProfiler::setupThread(uint32_t threadId) {
130
    // Reserve some space in the thread-local profiling buffer, in order to try to avoid malloc
131
    // while profiling.
132
    t_trace.reserve(RESERVED_TRACE_CAPACITY);
133
    // Register thread-local buffer in list of all buffers
134
    bool exists;
135
    {
136
        const VerilatedLockGuard lock{m_mutex};
137
        exists = !m_traceps.emplace(threadId, &t_trace).second;
138
    }
139
    if (VL_UNLIKELY(exists)) {
140
        VL_FATAL_MT(__FILE__, __LINE__, "", "multiple initialization of profiler on some thread");
141
    }
142
}
143

144
void VlExecutionProfiler::clear() VL_MT_SAFE_EXCLUDES(m_mutex) {
145
    const VerilatedLockGuard lock{m_mutex};
146
    for (const auto& pair : m_traceps) {
147
        ExecutionTrace* const tracep = pair.second;
148
        const size_t reserve = roundUptoMultipleOf<RESERVED_TRACE_CAPACITY>(tracep->size());
149
        tracep->clear();
150
        tracep->reserve(reserve);
151
    }
152
}
153

154
void VlExecutionProfiler::dump(const char* filenamep, uint64_t tickEnd)
155
    VL_MT_SAFE_EXCLUDES(m_mutex) {
156
    const VerilatedLockGuard lock{m_mutex};
157
    VL_DEBUG_IF(VL_DBG_MSGF("+prof+exec writing to '%s'\n", filenamep););
158

159
    FILE* const fp = std::fopen(filenamep, "w");
160
    if (VL_UNLIKELY(!fp)) { VL_FATAL_MT(filenamep, 0, "", "+prof+exec+file file not writable"); }
161

162
    // TODO Perhaps merge with verilated_coverage output format, so can
163
    // have a common merging and reporting tool, etc.
164
    fprintf(fp, "VLPROFVERSION 2.1 # Verilator execution profile version 2.1\n");
165
    fprintf(fp, "VLPROF arg +verilator+prof+exec+start+%" PRIu64 "\n",
166
            Verilated::threadContextp()->profExecStart());
167
    fprintf(fp, "VLPROF arg +verilator+prof+exec+window+%u\n",
168
            Verilated::threadContextp()->profExecWindow());
169
    // Note that VerilatedContext will by default create as many threads as there are hardware
170
    // processors, but not all of them might be utilized. Report the actual number that has trace
171
    // entries to avoid over-counting.
172
    unsigned threads = 0;
173
    for (const auto& pair : m_traceps) {
174
        if (!pair.second->empty()) ++threads;
175
    }
176
    fprintf(fp, "VLPROF stat threads %u\n", threads);
177
    fprintf(fp, "VLPROF stat yields %" PRIu64 "\n", VlMTaskVertex::yields());
178

179
    // Copy /proc/cpuinfo into this output so verilator_gantt can be run on
180
    // a different machine
181
    {
182
        const std::unique_ptr<std::ifstream> ifp{new std::ifstream{"/proc/cpuinfo"}};
183
        if (!ifp->fail()) {
184
            std::string line;
185
            while (std::getline(*ifp, line)) { fprintf(fp, "VLPROFPROC %s\n", line.c_str()); }
186
        }
187
    }
188

189
    for (const auto& pair : m_traceps) {
190
        const uint32_t threadId = pair.first;
191
        ExecutionTrace* const tracep = pair.second;
192
        if (tracep->empty()) continue;
193
        fprintf(fp, "VLPROFTHREAD %" PRIu32 "\n", threadId);
194

195
        for (const VlExecutionRecord& er : *tracep) {
196
            const char* const name = VlExecutionRecord::s_ascii[static_cast<uint8_t>(er.m_type)];
197
            const uint64_t time = er.m_tick - m_tickBegin;
198
            fprintf(fp, "VLPROFEXEC %s %" PRIu64, name, time);
199

200
            switch (er.m_type) {
201
            case VlExecutionRecord::Type::SECTION_POP:
202
            case VlExecutionRecord::Type::EXEC_GRAPH_BEGIN:
203
            case VlExecutionRecord::Type::EXEC_GRAPH_END:
204
                // No payload
205
                fprintf(fp, "\n");
206
                break;
207
            case VlExecutionRecord::Type::MTASK_BEGIN: {
208
                const auto& payload = er.m_payload.mtaskBegin;
209
                fprintf(fp, " id %u predictStart %u cpu %u\n", payload.m_id,
210
                        payload.m_predictStart, payload.m_cpu);
211
                break;
212
            }
213
            case VlExecutionRecord::Type::MTASK_END: {
214
                const auto& payload = er.m_payload.mtaskEnd;
215
                fprintf(fp, " id %u predictCost %u\n", payload.m_id, payload.m_predictCost);
216
                break;
217
            }
218
            case VlExecutionRecord::Type::SECTION_PUSH: {
219
                const auto& payload = er.m_payload.sectionPush;
220
                fprintf(fp, " %s\n", payload.m_name);
221
                break;
222
            }
223
            default: abort();  // LCOV_EXCL_LINE
224
            }
225
        }
226
    }
227
    fprintf(fp, "VLPROF stat ticks %" PRIu64 "\n", tickEnd - m_tickBegin);
228

229
    std::fclose(fp);
230
}
231

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.