oceanbase
853 строки · 35.8 Кб
1// Copyright (c) 2021 OceanBase
2// OceanBase is licensed under Mulan PubL v2.
3// You can use this software according to the terms and conditions of the Mulan PubL v2.
4// You may obtain a copy of Mulan PubL v2 at:
5// http://license.coscl.org.cn/MulanPubL-2.0
6// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
7// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
8// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
9// See the Mulan PubL v2 for more details.
10#include <cstdio>
11#include <gtest/gtest.h>
12#include <signal.h>
13#define private public
14#include "env/ob_simple_log_cluster_env.h"
15#undef private
16
17const std::string TEST_NAME = "arb_service";
18
19using namespace oceanbase::common;
20using namespace oceanbase;
21namespace oceanbase
22{
23using namespace logservice;
24
25int64_t ARB_TIMEOUT_ARG = 2 * 1000 * 1000L;
26
27namespace logservice
28{
29
30void ObArbitrationService::update_arb_timeout_()
31{
32arb_timeout_us_ = ARB_TIMEOUT_ARG;
33if (REACH_TIME_INTERVAL(2 * 1000 * 1000)) {
34CLOG_LOG_RET(WARN, OB_ERR_UNEXPECTED, "update_arb_timeout_", K_(self), K_(arb_timeout_us));
35}
36}
37}
38
39namespace unittest
40{
41
42class TestObSimpleLogClusterArbService : public ObSimpleLogClusterTestEnv
43{
44public:
45TestObSimpleLogClusterArbService() : ObSimpleLogClusterTestEnv()
46{}
47};
48
49int64_t ObSimpleLogClusterTestBase::member_cnt_ = 3;
50int64_t ObSimpleLogClusterTestBase::node_cnt_ = 5;
51bool ObSimpleLogClusterTestBase::need_add_arb_server_ = true;
52std::string ObSimpleLogClusterTestBase::test_name_ = TEST_NAME;
53
54TEST_F(TestObSimpleLogClusterArbService, test_2f1a_degrade_upgrade)
55{
56oceanbase::common::ObClusterVersion::get_instance().cluster_version_ = CLUSTER_VERSION_4_1_0_0;
57SET_CASE_LOG_FILE(TEST_NAME, "arb_2f1a_degrade_upgrade");
58OB_LOGGER.set_log_level("TRACE");
59MockLocCB loc_cb;
60int ret = OB_SUCCESS;
61PALF_LOG(INFO, "begin test_2f1a_degrade_upgrade");
62int64_t leader_idx = 0;
63int64_t arb_replica_idx = -1;
64PalfHandleImplGuard leader;
65std::vector<PalfHandleImplGuard*> palf_list;
66const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
67const int64_t id = ATOMIC_AAF(&palf_id_, 1);
68common::ObMember dummy_member;
69EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
70EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
71const int64_t another_f_idx = (leader_idx+1)%3;
72EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
73// 为备副本设置location cb,用于备副本找leader
74palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
75block_net(leader_idx, another_f_idx);
76// do not check OB_SUCCESS, may return OB_NOT_MASTER during degrading member
77submit_log(leader, 100, id);
78
79PALF_LOG(INFO, "CASE[1] degrade caused by block_net ");
80EXPECT_TRUE(is_degraded(leader, another_f_idx));
81
82loc_cb.leader_ = leader.palf_handle_impl_->self_;
83unblock_net(leader_idx, another_f_idx);
84EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id));
85
86EXPECT_TRUE(is_upgraded(leader, id));
87EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id));
88
89// set clog disk error
90ObTenantEnv::set_tenant(get_cluster()[another_f_idx]->get_tenant_base());
91logservice::coordinator::ObFailureDetector *detector = MTL(logservice::coordinator::ObFailureDetector *);
92if (NULL != detector) {
93PALF_LOG(INFO, "set clog full event");
94detector->has_add_clog_full_event_ = true;
95}
96
97PALF_LOG(INFO, "CASE[2] degrade caused by clog disk error");
98EXPECT_TRUE(is_degraded(leader, another_f_idx));
99
100if (NULL != detector) {
101detector->has_add_clog_full_event_ = false;
102}
103
104EXPECT_TRUE(is_upgraded(leader, id));
105EXPECT_EQ(OB_SUCCESS, submit_log(leader, 1, id));
106
107// test disable sync
108PALF_LOG(INFO, "CASE[3] degrade caused by disable_sync");
109palf_list[another_f_idx]->palf_handle_impl_->disable_sync();
110EXPECT_TRUE(is_degraded(leader, another_f_idx));
111palf_list[another_f_idx]->palf_handle_impl_->enable_sync();
112EXPECT_TRUE(is_upgraded(leader, id));
113
114PALF_LOG(INFO, "CASE[4] degrade caused by disable_vote");
115// test disbale vote
116palf_list[another_f_idx]->palf_handle_impl_->disable_vote(false/*no need check log missing*/);
117EXPECT_TRUE(is_degraded(leader, another_f_idx));
118palf_list[another_f_idx]->palf_handle_impl_->enable_vote();
119EXPECT_TRUE(is_upgraded(leader, id));
120
121// test revoking the leader when arb service is degrading
122block_all_net(another_f_idx);
123const common::ObAddr follower_addr = get_cluster()[another_f_idx]->get_addr();
124LogConfigChangeArgs args(common::ObMember(follower_addr, 1), 0, DEGRADE_ACCEPTOR_TO_LEARNER);
125int64_t ele_epoch;
126common::ObRole ele_role;
127int64_t proposal_id = leader.palf_handle_impl_->state_mgr_.get_proposal_id();
128leader.palf_handle_impl_->election_.get_role(ele_role, ele_epoch);
129LogConfigVersion config_version;
130EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config_(args, proposal_id, ele_epoch, config_version));
131EXPECT_FALSE(leader.palf_handle_impl_->config_mgr_.alive_paxos_memberlist_.contains(follower_addr));
132EXPECT_EQ(leader.palf_handle_impl_->config_mgr_.state_, 1);
133
134// reset status supposing the lease is expried
135block_net(leader_idx, another_f_idx);
136leader.palf_handle_impl_->config_mgr_.reset_status();
137EXPECT_TRUE(is_degraded(leader, another_f_idx));
138unblock_net(leader_idx, another_f_idx);
139unblock_all_net(another_f_idx);
140
141revert_cluster_palf_handle_guard(palf_list);
142leader.reset();
143delete_paxos_group(id);
144PALF_LOG(INFO, "end test_2f1a_degrade_upgrade", K(id));
145}
146
147TEST_F(TestObSimpleLogClusterArbService, test_4f1a_degrade_upgrade)
148{
149SET_CASE_LOG_FILE(TEST_NAME, "arb_4f1a_degrade_upgrade");
150OB_LOGGER.set_log_level("TRACE");
151MockLocCB loc_cb;
152int ret = OB_SUCCESS;
153PALF_LOG(INFO, "begin test_4f1a_degrade_upgrade");
154int64_t leader_idx = 0;
155int64_t arb_replica_idx = -1;
156PalfHandleImplGuard leader;
157const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
158const int64_t id = ATOMIC_AAF(&palf_id_, 1);
159common::ObMember dummy_member;
160std::vector<PalfHandleImplGuard*> palf_list;
161EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
162LogConfigVersion config_version;
163ASSERT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(config_version));
164EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_member(ObMember(get_cluster()[3]->get_addr(), 1), 3, config_version, CONFIG_CHANGE_TIMEOUT));
165ASSERT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(config_version));
166EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_member(ObMember(get_cluster()[4]->get_addr(), 1), 4, config_version, CONFIG_CHANGE_TIMEOUT));
167EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
168
169const int64_t another_f1_idx = (leader_idx+3)%5;
170const int64_t another_f2_idx = (leader_idx+4)%5;
171palf_list[another_f1_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
172palf_list[another_f2_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
173EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
174sleep(2);
175block_all_net(another_f1_idx);
176block_all_net(another_f2_idx);
177
178
179EXPECT_TRUE(is_degraded(leader, another_f1_idx));
180EXPECT_TRUE(is_degraded(leader, another_f2_idx));
181
182unblock_all_net(another_f1_idx);
183unblock_all_net(another_f2_idx);
184loc_cb.leader_ = leader.palf_handle_impl_->self_;
185EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id));
186
187EXPECT_TRUE(is_upgraded(leader, id));
188
189revert_cluster_palf_handle_guard(palf_list);
190leader.reset();
191delete_paxos_group(id);
192PALF_LOG(INFO, "end test_4f1a_degrade_upgrade", K(id));
193}
194
195TEST_F(TestObSimpleLogClusterArbService, test_2f1a_reconfirm_degrade_upgrade)
196{
197SET_CASE_LOG_FILE(TEST_NAME, "arb_2f1a_reconfirm_test");
198OB_LOGGER.set_log_level("TRACE");
199int ret = OB_SUCCESS;
200PALF_LOG(INFO, "begin test_2f1a_reconfirm_degrade_upgrade");
201MockLocCB loc_cb;
202int64_t leader_idx = 0;
203int64_t arb_replica_idx = -1;
204PalfHandleImplGuard leader;
205std::vector<PalfHandleImplGuard*> palf_list;
206const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
207const int64_t id = ATOMIC_AAF(&palf_id_, 1);
208common::ObMember dummy_member;
209EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
210EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
211const int64_t another_f_idx = (leader_idx+1)%3;
212EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
213sleep(2);
214palf_list[leader_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
215palf_list[another_f_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
216// block net of old leader, new leader will be elected
217// and degrade in RECONFIRM state
218ARB_TIMEOUT_ARG = 15 * 1000 * 1000;
219block_net(leader_idx, another_f_idx);
220block_net(leader_idx, arb_replica_idx);
221// block_net后会理解进行降级操作,导致旧主上有些单副本写成功的日志被committed
222submit_log(leader, 20, id);
223// submit some logs which will be truncated
224
225EXPECT_TRUE(is_degraded(*palf_list[another_f_idx], leader_idx));
226
227int64_t new_leader_idx = -1;
228PalfHandleImplGuard new_leader;
229EXPECT_EQ(OB_SUCCESS, get_leader(id, new_leader, new_leader_idx));
230loc_cb.leader_ = new_leader.palf_handle_impl_->self_;
231unblock_net(leader_idx, another_f_idx);
232unblock_net(leader_idx, arb_replica_idx);
233EXPECT_EQ(OB_SUCCESS, submit_log(new_leader, 100, id));
234
235EXPECT_TRUE(is_upgraded(new_leader, id));
236revert_cluster_palf_handle_guard(palf_list);
237leader.reset();
238new_leader.reset();
239delete_paxos_group(id);
240ARB_TIMEOUT_ARG = 2 * 1000 * 1000;
241PALF_LOG(INFO, "end test_2f1a_reconfirm_degrade_upgrade", K(id));
242}
243
244TEST_F(TestObSimpleLogClusterArbService, test_4f1a_reconfirm_degrade_upgrade)
245{
246SET_CASE_LOG_FILE(TEST_NAME, "arb_4f1a_reconfirm_test");
247OB_LOGGER.set_log_level("TRACE");
248MockLocCB loc_cb;
249int ret = OB_SUCCESS;
250PALF_LOG(INFO, "begin test_4f1a_reconfirm_degrade_upgrade");
251int64_t leader_idx = 0;
252int64_t arb_replica_idx = -1;
253auto cluster = get_cluster();
254PalfHandleImplGuard leader;
255const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
256const int64_t id = ATOMIC_AAF(&palf_id_, 1);
257common::ObMember dummy_member;
258std::vector<PalfHandleImplGuard*> palf_list;
259
260EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
261LogConfigVersion config_version;
262ASSERT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(config_version));
263EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_member(ObMember(get_cluster()[3]->get_addr(), 1), 3, config_version, CONFIG_CHANGE_TIMEOUT));
264ASSERT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(config_version));
265EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_member(ObMember(get_cluster()[4]->get_addr(), 1), 4, config_version, CONFIG_CHANGE_TIMEOUT));
266
267EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
268
269const int64_t another_f1_idx = 3;
270const int64_t another_f2_idx = 4;
271palf_list[leader_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
272palf_list[another_f1_idx]->palf_handle_impl_->set_location_cache_cb(&loc_cb);
273EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
274sleep(2);
275// stop leader and a follower
276block_all_net(leader_idx);
277block_all_net(another_f1_idx);
278
279//EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
280// wait for new leader is elected
281int64_t new_leader_idx = leader_idx;
282PalfHandleImplGuard new_leader;
283while (leader_idx == new_leader_idx) {
284new_leader.reset();
285EXPECT_EQ(OB_SUCCESS, get_leader(id, new_leader, new_leader_idx));
286}
287
288EXPECT_TRUE(is_degraded(new_leader, another_f1_idx));
289EXPECT_TRUE(is_degraded(new_leader, leader_idx));
290
291loc_cb.leader_ = new_leader.palf_handle_impl_->self_;
292// restart two servers
293unblock_all_net(leader_idx);
294unblock_all_net(another_f1_idx);
295
296EXPECT_EQ(OB_SUCCESS, submit_log(new_leader, 100, id));
297
298EXPECT_TRUE(is_upgraded(new_leader, id));
299leader.reset();
300new_leader.reset();
301revert_cluster_palf_handle_guard(palf_list);
302delete_paxos_group(id);
303PALF_LOG(INFO, "end test_4f1a_reconfirm_degrade_upgrade", K(id));
304}
305
306TEST_F(TestObSimpleLogClusterArbService, test_2f1a_config_change)
307{
308SET_CASE_LOG_FILE(TEST_NAME, "arb_2f1a_config_change");
309OB_LOGGER.set_log_level("DEBUG");
310MockLocCB loc_cb;
311int ret = OB_SUCCESS;
312PALF_LOG(INFO, "begin arb_2f1a_config_change");
313int64_t leader_idx = 0;
314int64_t arb_replica_idx = -1;
315PalfHandleImplGuard leader;
316std::vector<PalfHandleImplGuard*> palf_list;
317const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
318const int64_t id = ATOMIC_AAF(&palf_id_, 1);
319common::ObMember dummy_member;
320EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
321EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
322// 为备副本设置location cb,用于备副本找leader
323const int64_t another_f_idx = (leader_idx+1)%3;
324loc_cb.leader_ = leader.palf_handle_impl_->self_;
325palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
326palf_list[3]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
327palf_list[4]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
328EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
329sleep(2);
330
331LogConfigVersion config_version;
332ASSERT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(config_version));
333// replace member
334EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->replace_member(
335ObMember(palf_list[3]->palf_handle_impl_->self_, 1),
336ObMember(palf_list[another_f_idx]->palf_handle_impl_->self_, 1),
337config_version,
338CONFIG_CHANGE_TIMEOUT));
339
340// add learner
341EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(
342ObMember(palf_list[4]->palf_handle_impl_->self_, 1),
343CONFIG_CHANGE_TIMEOUT));
344
345// switch learner
346EXPECT_EQ(OB_INVALID_ARGUMENT, leader.palf_handle_impl_->switch_learner_to_acceptor(
347ObMember(palf_list[4]->palf_handle_impl_->self_, 1),
3482,
349config_version,
350CONFIG_CHANGE_TIMEOUT));
351EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_learner_to_acceptor(
352ObMember(palf_list[4]->palf_handle_impl_->self_, 1),
3533,
354config_version,
355CONFIG_CHANGE_TIMEOUT));
356revert_cluster_palf_handle_guard(palf_list);
357leader.reset();
358delete_paxos_group(id);
359PALF_LOG(INFO, "end arb_2f1a_config_change", K(id));
360}
361
362TEST_F(TestObSimpleLogClusterArbService, test_2f1a_arb_with_highest_version)
363{
364oceanbase::common::ObClusterVersion::get_instance().cluster_version_ = CLUSTER_VERSION_4_1_0_0;
365SET_CASE_LOG_FILE(TEST_NAME, "test_2f1a_arb_with_highest_version");
366OB_LOGGER.set_log_level("DEBUG");
367MockLocCB loc_cb;
368int ret = OB_SUCCESS;
369PALF_LOG(INFO, "begin test_2f1a_arb_with_highest_version");
370int64_t leader_idx = 0;
371int64_t arb_replica_idx = -1;
372PalfHandleImplGuard leader;
373std::vector<PalfHandleImplGuard*> palf_list;
374const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
375const int64_t id = ATOMIC_AAF(&palf_id_, 1);
376common::ObMember dummy_member;
377EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
378EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
379// 为备副本设置location cb,用于备副本找leader
380const int64_t another_f_idx = (leader_idx+1)%3;
381loc_cb.leader_ = leader.palf_handle_impl_->self_;
382palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
383EXPECT_EQ(OB_SUCCESS, submit_log(leader, 500, id));
384sleep(2);
385
386LogConfigChangeArgs args(ObMember(palf_list[3]->palf_handle_impl_->self_, 1), 0, ADD_LEARNER);
387int64_t proposal_id = 0;
388int64_t election_epoch = 0;
389LogConfigVersion config_version;
390EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
391EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
392// learner list and state_ has been changed
393EXPECT_TRUE(config_version.is_valid());
394EXPECT_EQ(1, leader.palf_handle_impl_->config_mgr_.state_);
395// only send config log to arb member
396ObMemberList member_list;
397member_list.add_server(get_cluster()[2]->get_addr());
398const int64_t prev_log_proposal_id = leader.palf_handle_impl_->config_mgr_.reconfig_barrier_.prev_log_proposal_id_;
399const LSN prev_lsn = leader.palf_handle_impl_->config_mgr_.reconfig_barrier_.prev_lsn_;
400const int64_t prev_mode_pid = leader.palf_handle_impl_->config_mgr_.reconfig_barrier_.prev_mode_pid_;
401const LogConfigMeta config_meta = leader.palf_handle_impl_->config_mgr_.log_ms_meta_;
402EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->log_engine_.submit_change_config_meta_req( \
403member_list, proposal_id, prev_log_proposal_id, prev_lsn, prev_mode_pid, config_meta));
404sleep(1);
405// check if arb member has received and persisted the config log
406while (true) {
407PalfHandleLiteGuard arb_member;
408if (OB_FAIL(get_arb_member_guard(id, arb_member))) {
409} else if (arb_member.palf_handle_lite_->config_mgr_.persistent_config_version_ == config_version) {
410break;
411} else {
412EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->log_engine_.submit_change_config_meta_req( \
413member_list, proposal_id, prev_log_proposal_id, prev_lsn, prev_mode_pid, config_meta));
414}
415::ob_usleep(10 * 1000);
416}
417EXPECT_GT(config_version, leader.palf_handle_impl_->config_mgr_.persistent_config_version_);
418EXPECT_GT(config_version, palf_list[1]->palf_handle_impl_->config_mgr_.persistent_config_version_);
419
420// restart cluster, close a follower, restart leader
421revert_cluster_palf_handle_guard(palf_list);
422leader.reset();
423// block_net, so two F cann't reach majority
424block_net(another_f_idx, leader_idx);
425EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
426
427const int64_t restart_finish_time_us_ = common::ObTimeUtility::current_time();
428PalfHandleImplGuard new_leader;
429int64_t new_leader_idx;
430get_leader(id, new_leader, new_leader_idx);
431EXPECT_EQ(OB_SUCCESS, submit_log(new_leader, 500, id));
432PALF_LOG(ERROR, "RTO", "RTO", common::ObTimeUtility::current_time() - restart_finish_time_us_);
433
434new_leader.reset();
435// must delete paxos group in here, otherwise memory of
436// MockLocCB will be relcaimed and core dump will occur
437// blacklist will not be deleted after reboot, clean it manually
438unblock_net(another_f_idx, leader_idx);
439delete_paxos_group(id);
440PALF_LOG(INFO, "end test_2f1a_arb_with_highest_version", K(id));
441}
442
443TEST_F(TestObSimpleLogClusterArbService, test_2f1a_defensive)
444{
445SET_CASE_LOG_FILE(TEST_NAME, "test_2f1a_defensive");
446OB_LOGGER.set_log_level("DEBUG");
447MockLocCB loc_cb;
448int ret = OB_SUCCESS;
449PALF_LOG(INFO, "begin test_2f1a_defensive");
450int64_t leader_idx = 0;
451int64_t arb_replica_idx = -1;
452PalfHandleImplGuard leader;
453std::vector<PalfHandleImplGuard*> palf_list;
454const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
455const int64_t id = ATOMIC_AAF(&palf_id_, 1);
456common::ObMember dummy_member;
457EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
458EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
459// 为备副本设置location cb,用于备副本找leader
460const int64_t another_f_idx = (leader_idx+1)%3;
461loc_cb.leader_ = leader.palf_handle_impl_->self_;
462palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
463EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
464sleep(2);
465const int64_t added_member_idx = 3;
466const common::ObMember added_member = ObMember(palf_list[added_member_idx]->palf_handle_impl_->self_, 1);
467
468// add a member, do not allow to append logs until config log reaches majority
469LogConfigVersion cur_config_version;
470ASSERT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(cur_config_version));
471LogConfigChangeArgs args(added_member, 3, cur_config_version, ADD_MEMBER);
472int64_t proposal_id = 0;
473int64_t election_epoch = 0;
474LogConfigVersion config_version;
475EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
476EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
477// do not allow to append log when changing config with arb
478// EXPECT_TRUE(leader.palf_handle_impl_->state_mgr_.is_changing_config_with_arb());
479while (true) {
480if (OB_SUCC(leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version))) {
481break;
482} else {
483(void) leader.palf_handle_impl_->config_mgr_.pre_sync_config_log_and_mode_meta(args.server_, proposal_id);
484::ob_usleep(10 * 1000);
485}
486}
487
488// flashback one follower
489LogEntryHeader header_origin;
490SCN base_scn;
491base_scn.set_base();
492SCN flashback_scn;
493palf::AccessMode unused_access_mode;
494int64_t mode_version;
495EXPECT_EQ(OB_SUCCESS, get_middle_scn(50, leader, flashback_scn, header_origin));
496switch_append_to_flashback(leader, mode_version);
497sleep(1);
498EXPECT_EQ(OB_SUCCESS, palf_list[another_f_idx]->palf_handle_impl_->flashback(mode_version, flashback_scn, CONFIG_CHANGE_TIMEOUT));
499
500// remove another follower
501EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->remove_member(added_member, 2, CONFIG_CHANGE_TIMEOUT));
502
503revert_cluster_palf_handle_guard(palf_list);
504leader.reset();
505delete_paxos_group(id);
506PALF_LOG(INFO, "end test_2f1a_defensive", K(id));
507}
508
509int get_palf_handle_lite(const int64_t tenant_id,
510const int64_t palf_id,
511ObSimpleArbServer *server,
512IPalfHandleImplGuard &handle_guard)
513{
514int ret = OB_SUCCESS;
515PalfEnvLiteGuard env_guard;
516if (NULL == server) {
517ret = OB_INVALID_ARGUMENT;
518} else if (OB_FAIL(server->get_palf_env_lite(tenant_id, env_guard))) {
519PALF_LOG(ERROR, "get_palf_env_lite failed", K(tenant_id), K(palf_id));
520} else if (OB_FAIL(env_guard.palf_env_lite_->get_palf_handle_impl(palf_id, handle_guard))) {
521PALF_LOG(ERROR, "get_palf_handle_impl failed", K(tenant_id), K(palf_id));
522} else {
523}
524return ret;
525}
526
527using namespace palflite;
528
529TEST_F(TestObSimpleLogClusterArbService, test_multi_meta_block)
530{
531SET_CASE_LOG_FILE(TEST_NAME, "test_mutli_meta_block");
532OB_LOGGER.set_log_level("INFO");
533MockLocCB loc_cb;
534int ret = OB_SUCCESS;
535PALF_LOG(INFO, "begin test_multi_meta_block");
536int64_t leader_idx = 0;
537int64_t arb_replica_idx = -1;
538PalfHandleImplGuard leader;
539std::vector<PalfHandleImplGuard*> palf_list;
540const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
541const int64_t id = ATOMIC_AAF(&palf_id_, 1);
542common::ObMember dummy_member;
543EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
544EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
545// 为备副本设置location cb,用于备副本找leader
546const int64_t another_f_idx = (leader_idx+1)%3;
547loc_cb.leader_ = leader.palf_handle_impl_->self_;
548palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
549EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
550sleep(2);
551ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
552IPalfHandleImplGuard arb_guard;
553ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
554PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
555LogEngine *log_engine = &arb_palf->log_engine_;
556LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
557LogStorage *meta_storage = &log_engine->log_meta_storage_;
558{
559while (1) {
560if (meta_storage->log_tail_ < LSN(meta_storage->logical_block_size_)) {
561EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
562} else {
563break;
564}
565}
566}
567meta_tail = log_engine->log_meta_storage_.log_tail_;
568ASSERT_EQ(meta_tail, LSN(log_engine->log_meta_storage_.logical_block_size_));
569revert_cluster_palf_handle_guard(palf_list);
570arb_guard.reset();
571leader.reset();
572EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
573{
574ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
575PalfHandleImplGuard leader;
576EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx));
577EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
578IPalfHandleImplGuard arb_guard;
579ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
580PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
581LogEngine *log_engine = &arb_palf->log_engine_;
582LogStorage *meta_storage = &log_engine->log_meta_storage_;
583EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
584LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
585ASSERT_NE(meta_tail, LSN(log_engine->log_meta_storage_.logical_block_size_));
586}
587EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
588{
589ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
590PalfHandleImplGuard leader;
591EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx));
592EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
593IPalfHandleImplGuard arb_guard;
594ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
595PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
596LogEngine *log_engine = &arb_palf->log_engine_;
597LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
598LogStorage *meta_storage = &log_engine->log_meta_storage_;
599while (1) {
600if (meta_storage->log_tail_ < LSN(32 * meta_storage->logical_block_size_)) {
601EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
602} else {
603break;
604}
605}
606}
607EXPECT_EQ(OB_SUCCESS, restart_paxos_groups());
608{
609ObSimpleArbServer *arb_server = dynamic_cast<ObSimpleArbServer*>(get_cluster()[arb_replica_idx]);
610PalfHandleImplGuard leader;
611EXPECT_EQ(OB_SUCCESS, get_leader(id, leader, leader_idx));
612EXPECT_EQ(OB_SUCCESS, submit_log(leader, 4000, id));
613IPalfHandleImplGuard arb_guard;
614ASSERT_EQ(OB_SUCCESS, get_palf_handle_lite(OB_SERVER_TENANT_ID, id, arb_server, arb_guard));
615PalfHandleLite *arb_palf = dynamic_cast<PalfHandleLite *>(arb_guard.palf_handle_impl_);
616LogEngine *log_engine = &arb_palf->log_engine_;
617LSN meta_tail = log_engine->log_meta_storage_.log_tail_;
618LogStorage *meta_storage = &log_engine->log_meta_storage_;
619while (1) {
620if (meta_storage->log_tail_ < LSN(34 * meta_storage->logical_block_size_ + 4*4*1024)) {
621EXPECT_EQ(OB_SUCCESS, log_engine->append_log_meta_(log_engine->log_meta_));
622} else {
623break;
624}
625}
626}
627delete_paxos_group(id);
628PALF_LOG(INFO, "end test_mutli_meta_block", K(id));
629}
630
631// 1. 2F1A, the leader starts to degrade another F
632// 2. after the config log has been accepted by another F, the leader revoked
633// 3. the previous leader has been elected as the new leader
634// 4. reconfirm may fail because leader's config_version is not same to that of the follower
635TEST_F(TestObSimpleLogClusterArbService, test_2f1a_degrade_when_no_leader)
636{
637SET_CASE_LOG_FILE(TEST_NAME, "test_2f1a_degrade_when_no_leader");
638MockLocCB loc_cb;
639int ret = OB_SUCCESS;
640PALF_LOG(INFO, "begin test_2f1a_degrade_when_no_leader");
641int64_t leader_idx = 0;
642int64_t arb_replica_idx = -1;
643PalfHandleImplGuard leader;
644std::vector<PalfHandleImplGuard*> palf_list;
645const int64_t id = ATOMIC_AAF(&palf_id_, 1);
646common::ObMember dummy_member;
647EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
648EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
649// 为备副本设置location cb,用于备副本找leader
650const int64_t another_f_idx = (leader_idx+1)%3;
651loc_cb.leader_ = leader.palf_handle_impl_->self_;
652palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
653EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
654sleep(2);
655
656const common::ObAddr b_addr = palf_list[another_f_idx]->palf_handle_impl_->self_;
657LogConfigChangeArgs args(ObMember(b_addr, 1), 0, DEGRADE_ACCEPTOR_TO_LEARNER);
658int64_t proposal_id = 0;
659int64_t election_epoch = 0;
660LogConfigVersion config_version;
661EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
662EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
663
664// leader appended config meta
665EXPECT_FALSE(palf_list[leader_idx]->get_palf_handle_impl()->config_mgr_.log_ms_meta_.curr_.config_.log_sync_memberlist_.contains(b_addr));
666EXPECT_TRUE(palf_list[another_f_idx]->get_palf_handle_impl()->config_mgr_.log_ms_meta_.curr_.config_.log_sync_memberlist_.contains(b_addr));
667
668// block all networks of arb member, and the network from the follower to the leader
669block_net(arb_replica_idx, another_f_idx, true);
670block_net(arb_replica_idx, leader_idx, true);
671block_net(another_f_idx, leader_idx, true);
672
673// waiting for leader revoke
674while (leader.palf_handle_impl_->state_mgr_.role_ == common::ObRole::LEADER) {
675sleep(1);
676}
677
678// unblock_net
679unblock_net(another_f_idx, leader_idx);
680unblock_net(arb_replica_idx, leader_idx);
681
682common::ObMemberList leader_member_list;
683int64_t leader_replica_num = 0;
684EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.get_log_sync_member_list( \
685leader_member_list, leader_replica_num));
686EXPECT_EQ(1, leader_member_list.get_member_number());
687EXPECT_EQ(1, leader_replica_num);
688
689int64_t new_leader_idx = 0;
690PalfHandleImplGuard new_leader;
691EXPECT_EQ(OB_SUCCESS, get_leader(id, new_leader, new_leader_idx));
692
693EXPECT_EQ(leader.palf_handle_impl_->self_, new_leader.palf_handle_impl_->self_);
694
695// waiting for upgrading
696is_upgraded(leader, id);
697EXPECT_EQ(OB_SUCCESS, new_leader.palf_handle_impl_->config_mgr_.get_log_sync_member_list( \
698leader_member_list, leader_replica_num));
699EXPECT_EQ(2, leader_member_list.get_member_number());
700EXPECT_EQ(2, leader_replica_num);
701
702revert_cluster_palf_handle_guard(palf_list);
703leader.reset();
704new_leader.reset();
705delete_paxos_group(id);
706PALF_LOG(INFO, "end test_2f1a_degrade_when_no_leader", K(id));
707}
708
709TEST_F(TestObSimpleLogClusterArbService, test_2f1a_upgrade_when_no_leader)
710{
711SET_CASE_LOG_FILE(TEST_NAME, "test_2f1a_upgrade_when_no_leader");
712// OB_LOGGER.set_log_level("TRACE");
713MockLocCB loc_cb;
714int ret = OB_SUCCESS;
715PALF_LOG(INFO, "begin test_2f1a_upgrade_when_no_leader");
716int64_t leader_idx = 0;
717int64_t arb_replica_idx = -1;
718PalfHandleImplGuard leader;
719std::vector<PalfHandleImplGuard*> palf_list;
720const int64_t id = ATOMIC_AAF(&palf_id_, 1);
721common::ObMember dummy_member;
722EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
723EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
724// 为备副本设置location cb,用于备副本找leader
725const int64_t another_f_idx = (leader_idx+1)%3;
726loc_cb.leader_ = leader.palf_handle_impl_->self_;
727palf_list[another_f_idx]->get_palf_handle_impl()->set_location_cache_cb(&loc_cb);
728EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
729sleep(2);
730
731// block the network from the follower to the leader
732block_net(another_f_idx, leader_idx, true);
733is_degraded(leader, another_f_idx);
734
735// upgrade follower manually
736int64_t proposal_id;
737int64_t election_epoch;
738LogConfigVersion config_version;
739LogConfigChangeArgs args(common::ObMember(get_cluster()[another_f_idx]->get_addr(), 1), 0, UPGRADE_LEARNER_TO_ACCEPTOR);
740EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.start_change_config(proposal_id, election_epoch, args.type_));
741
742block_net(arb_replica_idx, leader_idx, true);
743EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
744EXPECT_EQ(1, leader.palf_handle_impl_->config_mgr_.state_);
745EXPECT_EQ(OB_EAGAIN, leader.palf_handle_impl_->config_mgr_.change_config(args, proposal_id, election_epoch, config_version));
746EXPECT_EQ(1, leader.palf_handle_impl_->config_mgr_.state_);
747
748// waiting for leader revoke
749while (leader.palf_handle_impl_->state_mgr_.role_ == LEADER) {
750sleep(1);
751}
752
753// avoid the follower is elected to be leader
754block_net(arb_replica_idx, another_f_idx, true);
755unblock_all_net(leader_idx);
756
757// waiting for leader takeover
758while (!leader.palf_handle_impl_->state_mgr_.is_leader_active()) {
759sleep(1);
760}
761// waiting for upgrading
762is_upgraded(leader, id);
763
764revert_cluster_palf_handle_guard(palf_list);
765leader.reset();
766delete_paxos_group(id);
767PALF_LOG(INFO, "end test_2f1a_upgrade_when_no_leader", K(id));
768}
769
770TEST_F(TestObSimpleLogClusterArbService, test_1f1a_create_palf_group)
771{
772oceanbase::common::ObClusterVersion::get_instance().cluster_version_ = CLUSTER_VERSION_4_1_0_0;
773SET_CASE_LOG_FILE(TEST_NAME, "test_1f1a_create_palf_group");
774PALF_LOG(INFO, "begin test_1f1a_create_palf_group");
775OB_LOGGER.set_log_level("TRACE");
776const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
777MockLocCB loc_cb;
778int ret = OB_SUCCESS;
779int64_t leader_idx = 0;
780int64_t arb_replica_idx = -1;
781PalfHandleImplGuard leader;
782const int64_t id = ATOMIC_AAF(&palf_id_, 1);
783common::ObMemberList member_list = get_member_list();
784member_list.remove_server(get_cluster()[1]->get_addr());
785const int64_t member_cnt = 2;
786const common::ObMember &arb_member = get_arb_member();
787EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, &loc_cb, member_list, member_cnt, arb_member, arb_replica_idx, leader_idx, false, leader));
788EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id));
789
790LogConfigVersion config_version;
791EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_config_version(config_version));
792EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_member(ObMember(get_cluster()[1]->get_addr(), 1), 2, config_version, CONFIG_CHANGE_TIMEOUT));
793
794EXPECT_EQ(2, leader.palf_handle_impl_->config_mgr_.log_ms_meta_.curr_.config_.log_sync_replica_num_);
795EXPECT_EQ(2, leader.palf_handle_impl_->config_mgr_.log_ms_meta_.curr_.config_.log_sync_memberlist_.get_member_number());
796
797leader.reset();
798delete_paxos_group(id);
799PALF_LOG(INFO, "end test_2f1a_degrade_upgrade", K(id));
800}
801
802// 1. 2F1A
803// 2. lock_memberlist(just renew barrier)
804// 3. submit and commit logs
805// 4. kill leader
806// 5. check committed_end_lsn
807TEST_F(TestObSimpleLogClusterArbService, test_lock_memberlist_opt)
808{
809SET_CASE_LOG_FILE(TEST_NAME, "test_lock_memberlist_opt");
810int ret = OB_SUCCESS;
811const int64_t id = ATOMIC_AAF(&palf_id_, 1);
812PALF_LOG(INFO, "begin test_repeat_lock_memberlist", K(id));
813int64_t leader_idx = 0, arb_replica_idx = 0;
814PalfHandleImplGuard leader;
815oceanbase::common::ObClusterVersion::get_instance().cluster_version_ = CLUSTER_VERSION_4_2_0_0;
816EXPECT_EQ(OB_SUCCESS, create_paxos_group_with_arb(id, arb_replica_idx, leader_idx, leader));
817const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s
818const int64_t another_f_idx = (leader_idx+1)%3;
819
820EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id));
821EXPECT_UNTIL_EQ(leader.palf_handle_impl_->get_max_lsn(), leader.palf_handle_impl_->get_end_lsn());
822
823// 2. renew_barrier
824EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->config_mgr_.renew_config_change_barrier());
825
826// 3. submit and commit logs
827EXPECT_EQ(OB_SUCCESS, submit_log(leader, 10, id));
828const LSN max_lsn = leader.palf_handle_impl_->get_max_lsn();
829EXPECT_UNTIL_EQ(leader.palf_handle_impl_->get_max_lsn(), leader.palf_handle_impl_->get_end_lsn());
830
831// 4. kill leader
832block_all_net(leader_idx);
833
834// 5. check committed_end_lsn
835int64_t new_leader_idx = -1;
836PalfHandleImplGuard new_leader;
837EXPECT_EQ(OB_SUCCESS, get_leader(id, new_leader, new_leader_idx));
838EXPECT_UNTIL_EQ(leader.palf_handle_impl_->get_max_lsn(), leader.palf_handle_impl_->get_end_lsn());
839EXPECT_EQ(max_lsn, leader.palf_handle_impl_->get_end_lsn());
840unblock_all_net(leader_idx);
841
842leader.reset();
843new_leader.reset();
844delete_paxos_group(id);
845PALF_LOG(INFO, "end test_lock_memberlist_opt", K(id));
846}
847} // end unittest
848} // end oceanbase
849
850int main(int argc, char **argv)
851{
852RUN_SIMPLE_LOG_CLUSTER_TEST(TEST_NAME);
853}
854