glusterfs
244 строки · 7.3 Кб
1/*
2Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
3This file is part of GlusterFS.
4
5This file is licensed to you under your choice of the GNU Lesser
6General Public License, version 3 or any later version (LGPLv3 or
7later), or the GNU General Public License, version 2 (GPLv2), in all
8cases as published by the Free Software Foundation.
9*/
10
11#include "glusterfs/monitoring.h"12#include "glusterfs/xlator.h"13#include "glusterfs/syscall.h"14
15#include <stdlib.h>16
17static void18dump_mem_acct_details(xlator_t *xl, int fd)19{
20struct mem_acct_rec *mem_rec;21int i = 0;22
23if (!xl || !xl->mem_acct || (xl->ctx->active != xl->graph))24return;25
26dprintf(fd, "# %s.%s.total.num_types %d\n", xl->type, xl->name,27xl->mem_acct->num_types);28
29dprintf(fd,30"# type, in-use-size, in-use-units, max-size, "31"max-units, total-allocs\n");32
33for (i = 0; i < xl->mem_acct->num_types; i++) {34mem_rec = &xl->mem_acct->rec[i];35if (!GF_ATOMIC_GET(mem_rec->num_allocs))36continue;37#ifdef DEBUG38dprintf(fd, "# %s, %" PRIu64 ", %" PRIu64 ", %u, %" PRIu64 "\n",39mem_rec->typestr, mem_rec->size, mem_rec->max_size,40mem_rec->max_num_allocs, GF_ATOMIC_GET(mem_rec->num_allocs));41#else42dprintf(fd, "# %s, %" PRIu64 "\n", mem_rec->typestr,43GF_ATOMIC_GET(mem_rec->num_allocs));44#endif45}46}
47
48static void49dump_latency_and_count(xlator_t *xl, int fd)50{
51int32_t index = 0;52uint64_t fop = 0;53uint64_t cbk = 0;54uint64_t total_fop_count = 0;55uint64_t interval_fop_count = 0;56
57if (xl->winds) {58dprintf(fd, "%s.total.pending-winds.count %" PRIu64 "\n", xl->name,59xl->winds);60}61
62/* Need 'fuse' data, and don't need all the old graph info */63if ((xl != xl->ctx->root) && (xl->ctx->active != xl->graph))64return;65
66for (index = 0; index < GF_FOP_MAXVALUE; index++) {67fop = GF_ATOMIC_GET(xl->stats[index].total_fop);68if (fop) {69dprintf(fd, "%s.total.%s.count %" PRIu64 "\n", xl->name,70gf_fop_list[index], fop);71total_fop_count += fop;72}73fop = GF_ATOMIC_SWAP(xl->stats[index].interval_fop, 0);74if (fop) {75dprintf(fd, "%s.interval.%s.count %" PRIu64 "\n", xl->name,76gf_fop_list[index], fop);77interval_fop_count += fop;78}79cbk = GF_ATOMIC_SWAP(xl->stats[index].interval_fop_cbk, 0);80if (cbk) {81dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name,82gf_fop_list[index], cbk);83}84if (xl->stats[index].latencies.count != 0) {85dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name,86gf_fop_list[index],87(((double)xl->stats[index].latencies.total) /88xl->stats[index].latencies.count));89dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name,90gf_fop_list[index], xl->stats[index].latencies.max);91dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name,92gf_fop_list[index], xl->stats[index].latencies.min);93}94memset(&xl->stats[index].latencies, 0,95sizeof(xl->stats[index].latencies));96}97
98dprintf(fd, "%s.total.fop-count %" PRIu64 "\n", xl->name, total_fop_count);99dprintf(fd, "%s.interval.fop-count %" PRIu64 "\n", xl->name,100interval_fop_count);101}
102
103static inline void104dump_call_stack_details(glusterfs_ctx_t *ctx, int fd)105{
106dprintf(fd, "total.stack.count %" PRIu64 "\n",107GF_ATOMIC_GET(ctx->pool->total_count));108dprintf(fd, "total.stack.in-flight %" PRIu64 "\n", ctx->pool->cnt);109}
110
111static inline void112dump_dict_details(glusterfs_ctx_t *ctx, int fd)113{
114uint64_t total_dicts = 0;115uint64_t total_pairs = 0;116
117total_dicts = GF_ATOMIC_GET(ctx->stats.total_dicts_used);118total_pairs = GF_ATOMIC_GET(ctx->stats.total_pairs_used);119
120dprintf(fd, "total.dict.max-pairs-per %" PRIu64 "\n",121GF_ATOMIC_GET(ctx->stats.max_dict_pairs));122dprintf(fd, "total.dict.pairs-used %" PRIu64 "\n", total_pairs);123dprintf(fd, "total.dict.used %" PRIu64 "\n", total_dicts);124dprintf(fd, "total.dict.average-pairs %" PRIu64 "\n",125(total_pairs / total_dicts));126}
127
128static void129dump_inode_stats(glusterfs_ctx_t *ctx, int fd)130{
131}
132
133static void134dump_global_metrics(glusterfs_ctx_t *ctx, int fd)135{
136time_t nowtime;137struct tm *nowtm;138char tmbuf[64] = {1390,140};141
142nowtime = gf_time();143nowtm = localtime(&nowtime);144strftime(tmbuf, sizeof tmbuf, "%Y-%m-%d %H:%M:%S", nowtm);145
146/* Let every file have information on which process dumped info */147dprintf(fd, "## %s\n", ctx->cmdlinestr);148dprintf(fd, "### %s\n", tmbuf);149dprintf(fd, "### BrickName: %s\n", ctx->cmd_args.brick_name);150dprintf(fd, "### MountName: %s\n", ctx->cmd_args.mount_point);151dprintf(fd, "### VolumeName: %s\n", ctx->cmd_args.volume_name);152
153dump_call_stack_details(ctx, fd);154dump_dict_details(ctx, fd);155dprintf(fd, "# -----\n");156
157dump_inode_stats(ctx, fd);158dprintf(fd, "# -----\n");159}
160
161static void162dump_xl_metrics(glusterfs_ctx_t *ctx, int fd)163{
164xlator_t *xl;165
166xl = ctx->active->top;167
168while (xl) {169dump_latency_and_count(xl, fd);170dump_mem_acct_details(xl, fd);171if (xl->dump_metrics)172xl->dump_metrics(xl, fd);173xl = xl->next;174}175
176if (ctx->root) {177xl = ctx->root;178
179dump_latency_and_count(xl, fd);180dump_mem_acct_details(xl, fd);181if (xl->dump_metrics)182xl->dump_metrics(xl, fd);183}184
185return;186}
187
188char *189gf_monitor_metrics(glusterfs_ctx_t *ctx)190{
191int ret = -1;192int fd = 0;193char *filepath = NULL, *dumppath = NULL;194
195gf_msg_trace("monitoring", 0, "received monitoring request (sig:USR2)");196
197dumppath = ctx->config.metrics_dumppath;198if (dumppath == NULL) {199dumppath = GLUSTER_METRICS_DIR;200}201ret = mkdir_p(dumppath, 0755, true);202if (ret) {203/* EEXIST is handled in mkdir_p() itself */204gf_msg("monitoring", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,205"failed to create metrics dir %s (%s)", dumppath,206strerror(errno));207return NULL;208}209
210ret = gf_asprintf(&filepath, "%s/gmetrics.XXXXXX", dumppath);211if (ret < 0) {212return NULL;213}214
215/* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */216fd = mkstemp(filepath);217if (fd < 0) {218gf_msg("monitoring", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,219"failed to open tmp file %s (%s)", filepath, strerror(errno));220GF_FREE(filepath);221return NULL;222}223
224dump_global_metrics(ctx, fd);225
226dump_xl_metrics(ctx, fd);227
228/* This below line is used just to capture any errors with dprintf() */229ret = dprintf(fd, "\n# End of metrics\n");230if (ret < 0) {231gf_msg("monitoring", GF_LOG_WARNING, 0, LG_MSG_STRDUP_ERROR,232"dprintf() failed: %s", strerror(errno));233}234
235ret = sys_fsync(fd);236if (ret < 0) {237gf_msg("monitoring", GF_LOG_WARNING, 0, LG_MSG_STRDUP_ERROR,238"fsync() failed: %s", strerror(errno));239}240sys_close(fd);241
242/* Figure this out, not happy with returning this string */243return filepath;244}
245