llama

test-barrier.cpp
93 строки · 2.6 Кб
Перенос по словам
1
#include "ggml.h"
2
#include "ggml-backend.h"
3

4
#include <chrono>
5
#include <iostream>
6
#include <cstdio>
7
#include <cstdlib>
8
#include <cassert>
9
#include <vector>
10

11
#define MAX_NARGS 2
12

13
int main(int argc, char *argv[]) {
14

15
    int n_threads = 4;
16
    int n_rounds  = 100;
17

18
    if (argc > 1) {
19
        n_threads = std::atoi(argv[1]);
20
    }
21

22
    if (argc > 2) {
23
        n_rounds  = std::atoi(argv[2]);
24
    }
25

26
    struct ggml_init_params params = {
27
        /* .mem_size   = */ 1024*1024*1024,
28
        /* .mem_buffer = */ NULL,
29
        /* .no_alloc   = */ false,
30
    };
31

32
    struct ggml_context * ctx = ggml_init(params);
33

34
    // Create graph
35
    struct ggml_cgraph * gf = ggml_new_graph(ctx);
36

37
    // Lots of small, parallel ops where barriers in between will dominate
38
    struct ggml_tensor * out = ggml_new_tensor_1d(ctx, GGML_TYPE_F32,  64);
39
    for (int i = 0; i < 1000; i++) {
40
        struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, 64, 128);
41
        out = ggml_mul_mat(ctx, a, out);
42

43
        struct ggml_tensor * d = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, 128, 64);
44
        out = ggml_mul_mat(ctx, d, out);
45
    }
46

47
    ggml_build_forward_expand(gf, out);
48
    int n_nodes = ggml_graph_n_nodes(gf);
49

50
    // Create threadpool
51
    struct ggml_threadpool_params tpp  = ggml_threadpool_params_default(n_threads);
52
    struct ggml_threadpool* threadpool = ggml_threadpool_new(&tpp);
53
    if (!threadpool) {
54
        fprintf(stderr, "threadpool create failed : n_threads %d\n", n_threads);
55
        exit(1);
56
    }
57

58
    // Create compute plan
59
    struct ggml_cplan cplan = ggml_graph_plan(gf, n_threads, threadpool);
60

61
    std::vector<uint8_t> work_data(cplan.work_size);
62
    cplan.work_data = work_data.data();
63

64
    std::cerr << "graph-compute with"
65
              << "\n n_threads: " << n_threads
66
              << "\n   n_nodes: " << n_nodes
67
              << "\n  n_rounds: " << n_rounds
68
              << "\n";
69
    // ggml_graph_print(gf);
70

71
    // Warmup
72
    ggml_graph_compute(gf, &cplan);
73

74
    auto t0 = std::chrono::high_resolution_clock::now();
75

76
    for (int i=0; i < n_rounds; i++) {
77
        ggml_graph_compute(gf, &cplan);
78
    }
79

80
    auto t1 = std::chrono::high_resolution_clock::now();
81

82
    auto usec = std::chrono::duration_cast<std::chrono::microseconds>(t1-t0).count();
83
    auto nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(t1-t0).count();
84
    std::cerr << "graph-compute took " << usec << " usec "
85
              << "\n " << (float) usec / n_rounds << " usec per-iter"
86
              << "\n " << (float) nsec / (n_rounds * n_nodes) << " nsec per-node"
87
              << "\n";
88

89
    ggml_threadpool_free(threadpool);
90
    ggml_free(ctx);
91

92
    return 0;
93
}
94
llama

Использование cookies