2
Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
3
This file is part of GlusterFS.
5
This file is licensed to you under your choice of the GNU Lesser
6
General Public License, version 3 or any later version (LGPLv3 or
7
later), or the GNU General Public License, version 2 (GPLv2), in all
8
cases as published by the Free Software Foundation.
10
#include <glusterfs/common-utils.h>
11
#include <glusterfs/glusterfs.h>
12
#include "glusterd-op-sm.h"
13
#include "glusterd-geo-rep.h"
14
#include "glusterd-store.h"
15
#include "glusterd-utils.h"
16
#include "glusterd-svc-mgmt.h"
17
#include "glusterd-svc-helper.h"
18
#include "glusterd-volgen.h"
19
#include "glusterd-messages.h"
20
#include "glusterd-server-quorum.h"
21
#include "glusterd-mgmt.h"
22
#include <glusterfs/run.h>
23
#include <glusterfs/syscall.h>
28
glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(rpcsvc_request_t *req,
32
__glusterd_handle_replace_brick(rpcsvc_request_t *req)
35
gf_cli_req cli_req = {{
39
char *src_brick = NULL;
40
char *dst_brick = NULL;
42
glusterd_op_t op = -1;
47
xlator_t *this = THIS;
48
glusterd_conf_t *conf = NULL;
53
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
55
// failed to decode msg;
56
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
58
"request received from cli");
59
req->rpc_err = GARBAGE_ARGS;
63
gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REPLACE_BRK_REQ_RCVD,
64
"Received replace brick req");
66
if (cli_req.dict.dict_len) {
67
/* Unserialize the dictionary */
70
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
73
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
75
"unserialize req-buffer to dictionary");
76
snprintf(msg, sizeof(msg),
77
"Unable to decode the "
83
ret = dict_get_str(dict, "volname", &volname);
85
snprintf(msg, sizeof(msg), "Could not get volume name");
86
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
90
ret = dict_get_str(dict, "operation", &cli_op);
92
gf_msg_debug(this->name, 0, "dict_get on operation failed");
93
snprintf(msg, sizeof(msg), "Could not get operation");
97
op = gd_cli_to_gd_op(cli_op);
99
if (conf->op_version < GD_OP_VERSION_3_9_0 &&
100
strcmp(cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
101
snprintf(msg, sizeof(msg),
102
"Cannot execute command. The "
103
"cluster is operating at version %d. reset-brick "
104
"command %s is unavailable in this version.",
105
conf->op_version, gd_rb_op_to_str(cli_op));
110
ret = dict_get_str(dict, "src-brick", &src_brick);
113
snprintf(msg, sizeof(msg), "Failed to get src brick");
114
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
117
gf_msg_debug(this->name, 0, "src brick=%s", src_brick);
119
if (!strcmp(cli_op, "GF_RESET_OP_COMMIT") ||
120
!strcmp(cli_op, "GF_RESET_OP_COMMIT_FORCE") ||
121
!strcmp(cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
122
ret = dict_get_str(dict, "dst-brick", &dst_brick);
125
snprintf(msg, sizeof(msg),
128
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
133
gf_msg_debug(this->name, 0, "dst brick=%s", dst_brick);
136
gf_msg(this->name, GF_LOG_INFO, 0,
137
(op == GD_OP_REPLACE_BRICK)
138
? GD_MSG_REPLACE_BRK_COMMIT_FORCE_REQ_RCVD
139
: GD_MSG_RESET_BRICK_COMMIT_FORCE_REQ_RCVD,
140
"Received %s request.", gd_rb_op_to_str(cli_op));
142
ret = glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(req, op, dict);
146
glusterd_op_send_cli_response(op, ret, 0, req, dict, msg);
149
free(cli_req.dict.dict_val); // malloced by xdr
155
glusterd_handle_reset_brick(rpcsvc_request_t *req)
157
return glusterd_big_locked_handler(req, __glusterd_handle_replace_brick);
161
glusterd_handle_replace_brick(rpcsvc_request_t *req)
163
return glusterd_big_locked_handler(req, __glusterd_handle_replace_brick);
167
glusterd_rb_check_bricks(glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src,
168
glusterd_brickinfo_t *dst)
170
glusterd_replace_brick_t *rb = NULL;
174
rb = &volinfo->rep_brick;
176
if (!rb->src_brick || !rb->dst_brick) {
177
gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
181
if (strcmp(rb->src_brick->hostname, src->hostname) ||
182
strcmp(rb->src_brick->path, src->path)) {
183
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RB_SRC_BRICKS_MISMATCH,
184
"Replace brick src bricks differ");
188
if (strcmp(rb->dst_brick->hostname, dst->hostname) ||
189
strcmp(rb->dst_brick->path, dst->path)) {
190
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RB_DST_BRICKS_MISMATCH,
191
"Replace brick dst bricks differ");
199
glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr,
203
char *src_brick = NULL;
204
char *dst_brick = NULL;
205
char *volname = NULL;
207
glusterd_op_t gd_op = -1;
208
glusterd_volinfo_t *volinfo = NULL;
209
glusterd_brickinfo_t *src_brickinfo = NULL;
211
char msg[2048] = {0};
212
glusterd_peerinfo_t *peerinfo = NULL;
213
glusterd_brickinfo_t *dst_brickinfo = NULL;
214
glusterd_conf_t *priv = NULL;
215
char pidfile[PATH_MAX] = {0};
216
xlator_t *this = THIS;
217
gf_boolean_t is_force = _gf_false;
218
char *dup_dstbrick = NULL;
220
priv = this->private;
223
ret = glusterd_brick_op_prerequisites(dict, &op, &gd_op, &volname, &volinfo,
224
&src_brick, &src_brickinfo, pidfile,
225
op_errstr, rsp_dict);
229
if (volinfo->type == GF_CLUSTER_TYPE_NONE) {
230
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_NOT_PERMITTED,
231
"replace-brick is not permitted on distribute only "
233
gf_asprintf(op_errstr,
234
"replace-brick is not permitted on "
235
"distribute only volumes. Please use add-brick "
236
"and remove-brick operations instead.");
240
ret = glusterd_validate_quorum(this, gd_op, dict, op_errstr);
242
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
243
"Server quorum not met. Rejecting operation.");
247
if (strcmp(op, "GF_REPLACE_OP_COMMIT_FORCE")) {
254
if (volinfo->snap_count > 0 || !cds_list_empty(&volinfo->snap_volumes)) {
255
snprintf(msg, sizeof(msg),
256
"Volume %s has %" PRIu64
258
"Changing the volume configuration will not effect snapshots."
259
"But the snapshot brick mount should be intact to "
260
"make them function.",
261
volname, volinfo->snap_count);
262
gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s", msg);
266
glusterd_add_peers_to_auth_list(volname);
268
ret = glusterd_get_dst_brick_info(&dst_brick, volname, op_errstr,
269
&dst_brickinfo, &host, dict,
274
ret = glusterd_new_brick_validate(dst_brick, dst_brickinfo, msg,
276
/* fail if brick being replaced with itself */
278
*op_errstr = gf_strdup(msg);
280
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_VALIDATE_FAIL, "%s",
285
volinfo->rep_brick.src_brick = src_brickinfo;
286
volinfo->rep_brick.dst_brick = dst_brickinfo;
288
if (glusterd_rb_check_bricks(volinfo, src_brickinfo, dst_brickinfo)) {
290
*op_errstr = gf_strdup(
291
"Incorrect source or "
292
"destination brick");
294
gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND,
299
if (glusterd_gf_is_local_addr(host)) {
300
ret = glusterd_validate_and_create_brickpath(
301
dst_brickinfo, volinfo->volume_id, volinfo->volname, op_errstr,
302
is_force, _gf_false);
307
if (!glusterd_gf_is_local_addr(host)) {
310
peerinfo = glusterd_peerinfo_find(NULL, host);
311
if (peerinfo == NULL) {
314
snprintf(msg, sizeof(msg), "%s, is not a friend", host);
315
*op_errstr = gf_strdup(msg);
318
} else if (!peerinfo->connected) {
321
snprintf(msg, sizeof(msg),
322
"%s, is not connected at "
325
*op_errstr = gf_strdup(msg);
328
} else if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state) {
331
snprintf(msg, sizeof(msg),
332
"%s, is not befriended "
335
*op_errstr = gf_strdup(msg);
341
if (!(gf_uuid_compare(dst_brickinfo->uuid, MY_UUID))) {
342
ret = glusterd_get_brick_mount_dir(dst_brickinfo->path,
343
dst_brickinfo->hostname,
344
dst_brickinfo->mount_dir);
346
gf_msg(this->name, GF_LOG_ERROR, 0,
347
GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
348
"Failed to get brick mount_dir");
351
ret = dict_set_dynstr_with_alloc(rsp_dict, "brick1.mount_dir",
352
dst_brickinfo->mount_dir);
354
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
355
"Failed to set brick.mount_dir");
360
ret = dict_set_int32_sizen(rsp_dict, "brick_count", 1);
362
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
363
"Failed to set local_brick_count");
371
GF_FREE(dup_dstbrick);
372
gf_msg_debug(this->name, 0, "Returning %d", ret);
378
glusterd_op_perform_replace_brick(glusterd_volinfo_t *volinfo, char *old_brick,
379
char *new_brick, dict_t *dict)
381
char *brick_mount_dir = NULL;
382
glusterd_brickinfo_t *old_brickinfo = NULL;
383
glusterd_brickinfo_t *new_brickinfo = NULL;
385
xlator_t *this = THIS;
386
struct statvfs brickstat = {
393
ret = glusterd_brickinfo_new_from_brick(new_brick, &new_brickinfo, _gf_true,
398
ret = glusterd_resolve_brick(new_brickinfo);
402
if (!gf_uuid_compare(new_brickinfo->uuid, MY_UUID)) {
403
ret = sys_statvfs(new_brickinfo->path, &brickstat);
405
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_STATVFS_FAILED,
406
"Failed to fetch disk utilization "
407
"from the brick (%s:%s). Please check the health of "
408
"the brick. Error code was %s",
409
new_brickinfo->hostname, new_brickinfo->path,
414
new_brickinfo->statfs_fsid = brickstat.f_fsid;
417
ret = glusterd_volume_brickinfo_get_by_brick(old_brick, volinfo,
418
&old_brickinfo, _gf_false);
422
(void)snprintf(new_brickinfo->brick_id, sizeof(new_brickinfo->brick_id),
423
"%s", old_brickinfo->brick_id);
424
new_brickinfo->port = old_brickinfo->port;
426
ret = dict_get_str(dict, "brick1.mount_dir", &brick_mount_dir);
428
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
429
"brick1.mount_dir not present");
432
(void)snprintf(new_brickinfo->mount_dir, sizeof(new_brickinfo->mount_dir),
433
"%s", brick_mount_dir);
435
cds_list_add(&new_brickinfo->brick_list, &old_brickinfo->brick_list);
437
volinfo->brick_count++;
439
ret = glusterd_op_perform_remove_brick(volinfo, old_brick, 1, NULL);
443
/* if the volume is a replicate volume, do: */
444
if (glusterd_is_volume_replicate(volinfo)) {
445
if (!gf_uuid_compare(new_brickinfo->uuid, MY_UUID)) {
446
ret = glusterd_handle_replicate_brick_ops(volinfo, new_brickinfo,
447
GD_OP_REPLACE_BRICK);
453
ret = glusterd_create_volfiles_and_notify_services(volinfo);
457
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
458
ret = glusterd_brick_start(volinfo, new_brickinfo, _gf_false,
466
gf_msg_debug("glusterd", 0, "Returning %d", ret);
471
glusterd_op_replace_brick(dict_t *dict, dict_t *rsp_dict)
474
char *replace_op = NULL;
475
glusterd_volinfo_t *volinfo = NULL;
476
char *volname = NULL;
477
xlator_t *this = THIS;
478
glusterd_conf_t *priv = NULL;
479
char *src_brick = NULL;
480
char *dst_brick = NULL;
481
glusterd_brickinfo_t *src_brickinfo = NULL;
482
glusterd_brickinfo_t *dst_brickinfo = NULL;
484
priv = this->private;
487
ret = dict_get_str(dict, "src-brick", &src_brick);
489
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
490
"Unable to get src brick");
494
gf_msg_debug(this->name, 0, "src brick=%s", src_brick);
496
ret = dict_get_str(dict, "dst-brick", &dst_brick);
498
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
499
"Unable to get dst brick");
503
gf_msg_debug(this->name, 0, "dst brick=%s", dst_brick);
505
ret = dict_get_str(dict, "volname", &volname);
507
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
508
"Unable to get volume name");
512
ret = dict_get_str(dict, "operation", &replace_op);
514
gf_msg_debug(this->name, 0, "dict_get on operation failed");
518
ret = glusterd_volinfo_find(volname, &volinfo);
520
gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
521
"Unable to allocate memory");
525
ret = glusterd_volume_brickinfo_get_by_brick(src_brick, volinfo,
526
&src_brickinfo, _gf_false);
528
gf_msg_debug(this->name, 0, "Unable to get src-brickinfo");
532
ret = glusterd_get_rb_dst_brickinfo(volinfo, &dst_brickinfo);
534
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RB_BRICKINFO_GET_FAIL,
536
"replace brick destination brickinfo");
540
ret = glusterd_resolve_brick(dst_brickinfo);
542
gf_msg_debug(this->name, 0, "Unable to resolve dst-brickinfo");
546
ret = rb_update_dstbrick_port(dst_brickinfo, rsp_dict, dict);
550
if (strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
555
ret = glusterd_svcs_stop(volinfo);
557
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTER_SERVICES_STOP_FAIL,
558
"Unable to stop gluster services, ret: %d", ret);
561
ret = glusterd_op_perform_replace_brick(volinfo, src_brick, dst_brick,
564
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_ADD_FAIL,
565
"Unable to add dst-brick: "
567
dst_brick, volinfo->volname);
568
(void)glusterd_svcs_manager(volinfo);
571
if (volinfo->rebal.defrag_status != GF_DEFRAG_STATUS_NOT_STARTED)
572
volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_RESET_DUE_REPLACE_BRC;
574
ret = glusterd_svcs_manager(volinfo);
576
gf_msg(this->name, GF_LOG_CRITICAL, 0,
577
GD_MSG_GLUSTER_SERVICE_START_FAIL,
578
"Failed to start one or more gluster services.");
581
ret = glusterd_fetchspec_notify(THIS);
582
glusterd_brickinfo_delete(volinfo->rep_brick.dst_brick);
583
volinfo->rep_brick.src_brick = NULL;
584
volinfo->rep_brick.dst_brick = NULL;
587
ret = glusterd_store_volinfo(volinfo,
588
GLUSTERD_VOLINFO_VER_AC_INCREMENT);
590
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RBOP_STATE_STORE_FAIL,
592
" replace brick operation's state");
599
glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(rpcsvc_request_t *req,
605
uint32_t txn_generation = 0;
606
uint32_t op_errno = 0;
607
char *op_errstr = NULL;
608
dict_t *req_dict = NULL;
609
dict_t *tmp_dict = NULL;
610
uuid_t *originator_uuid = NULL;
611
xlator_t *this = THIS;
612
glusterd_conf_t *conf = NULL;
613
gf_boolean_t is_acquired = _gf_false;
617
conf = this->private;
620
txn_generation = conf->generation;
621
originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
622
if (!originator_uuid) {
627
gf_uuid_copy(*originator_uuid, MY_UUID);
628
ret = dict_set_bin(dict, "originator_uuid", originator_uuid,
631
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
632
"Failed to set originator_uuid.");
633
GF_FREE(originator_uuid);
637
ret = dict_set_int32_sizen(dict, "is_synctasked", _gf_true);
639
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
640
"Failed to set synctasked flag to true.");
644
tmp_dict = dict_new();
646
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
647
"Unable to create dict");
650
dict_copy(dict, tmp_dict);
652
ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno,
653
&is_acquired, txn_generation);
655
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL,
656
"mgmt_v3 lockdown failed.");
660
ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op);
662
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL,
663
LOGSTR_BUILD_PAYLOAD, gd_op_list[op]);
664
if (op_errstr == NULL)
665
gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
669
ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno,
672
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
673
"Pre Validation Failed");
677
ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno,
680
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
690
(void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr,
691
is_acquired, txn_generation);
694
ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID);
696
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
697
"Failed to release mgmt_v3 locks on "
702
/* SEND CLI RESPONSE */
703
glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict, op_errstr);
706
dict_unref(req_dict);
709
dict_unref(tmp_dict);