oceanbase
1884 строки · 74.1 Кб
1/**
2* Copyright (c) 2021 OceanBase
3* OceanBase CE is licensed under Mulan PubL v2.
4* You can use this software according to the terms and conditions of the Mulan PubL v2.
5* You may obtain a copy of Mulan PubL v2 at:
6* http://license.coscl.org.cn/MulanPubL-2.0
7* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
8* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
9* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
10* See the Mulan PubL v2 for more details.
11*/
12
13#define USING_LOG_PREFIX SHARE14
15#include "share/schema/ob_schema_mgr.h"16#include "rootserver/ob_balance_group_ls_stat_operator.h"17#include "lib/hash/ob_hashset.h"18#include "lib/oblog/ob_log_module.h"19#include "lib/utility/ob_print_utils.h"20#include "common/ob_timeout_ctx.h"21#include "observer/ob_server_struct.h" // for GCTX22#include "share/schema/ob_table_schema.h"23#include "share/schema/ob_schema_struct.h"24#include "share/schema/ob_schema_getter_guard.h"25#include "share/ob_share_util.h"26#include "share/inner_table/ob_inner_table_schema_constants.h"27#include "share/ob_srv_rpc_proxy.h" // ObSrvRpcProxy28#include "share/tablet/ob_tablet_to_ls_operator.h"29#include "share/ls/ob_ls_operator.h" // ObLSAttrOperator30#include "share/balance/ob_balance_task_table_operator.h" // ObBalanceTaskTableOperator31#include "share/schema/ob_part_mgr_util.h"32#include "share/ob_debug_sync.h" // DEBUG_SYNC33#include "storage/tablelock/ob_lock_utils.h" // ObLSObjLockUtil34#include "share/ls/ob_ls_table.h" // ObLSTable35#include "share/ls/ob_ls_table_operator.h" // ObLSTableOperator36#include "share/location_cache/ob_location_service.h" // ObLocationService37#include "share/ob_rpc_struct.h" // ObCreateDupLSArg & ObCreateDupLSResult38#include "rootserver/ob_root_service.h"39#include "rootserver/parallel_ddl/ob_tablet_balance_allocator.h"40
41namespace oceanbase42{
43using namespace common;44using namespace common::sqlclient;45using namespace share;46using namespace share::schema;47using namespace transaction::tablelock;48
49namespace rootserver50{
51
52
53int64_t ObNewTableTabletAllocator::alloc_tablet_ls_offset_ = 0;54
55int ObBalanceGroupLSStat::build(56const uint64_t tenant_id,57const ObBalanceGroupID &balance_group_id,58const share::ObLSID &ls_id,59const int64_t tablet_group_count,60const ObBalanceGroupName &balance_group_name)61{
62int ret = OB_SUCCESS;63if (OB_UNLIKELY(OB_INVALID_ID == tenant_id64|| !balance_group_id.is_valid()65|| !ls_id.is_valid()66|| tablet_group_count < 067|| balance_group_name.is_empty())) {68ret = OB_INVALID_ARGUMENT;69LOG_WARN("invalid argument", KR(ret),70K(tenant_id),71K(balance_group_id),72K(ls_id),73K(tablet_group_count),74K(balance_group_name));75} else {76tenant_id_ = tenant_id;77balance_group_id_= balance_group_id;78ls_id_ = ls_id;79tablet_group_count_ = tablet_group_count;80balance_group_name_ = balance_group_name;81}82return ret;83}
84
85ObBalanceGroupLSStatOperator::ObBalanceGroupLSStatOperator()86: inited_(false),87sql_proxy_(nullptr)88{
89}
90
91ObBalanceGroupLSStatOperator::~ObBalanceGroupLSStatOperator()92{
93}
94
95int ObBalanceGroupLSStatOperator::init(96common::ObMySQLProxy *sql_proxy)97{
98int ret = OB_SUCCESS;99if (OB_UNLIKELY(inited_)) {100ret = OB_INIT_TWICE;101LOG_WARN("inited twice", KR(ret), K(inited_));102} else if (OB_UNLIKELY(nullptr == sql_proxy)) {103ret = OB_INVALID_ARGUMENT;104LOG_WARN("invalid argument", KR(ret), KP(sql_proxy));105} else {106sql_proxy_ = sql_proxy;107inited_ = true;108}109return ret;110}
111
112int ObBalanceGroupLSStatOperator::get_balance_group_ls_stat(113const int64_t timeout,114const uint64_t tenant_id,115const ObBalanceGroupID &balance_group_id,116common::ObIArray<ObBalanceGroupLSStat> &balance_group_ls_stat_array)117{
118int ret = OB_SUCCESS;119if (OB_UNLIKELY(!inited_)) {120ret = OB_NOT_INIT;121LOG_WARN("not init", KR(ret));122} else if (OB_UNLIKELY(timeout <= 0123|| OB_INVALID_ID == tenant_id124|| !balance_group_id.is_valid())) {125ret = OB_INVALID_ARGUMENT;126LOG_WARN("invalid argument", KR(ret),127K(timeout),128K(tenant_id),129K(balance_group_id));130} else if (OB_UNLIKELY(nullptr == sql_proxy_)) {131ret = OB_ERR_UNEXPECTED;132LOG_WARN("sql_proxy_ ptr is null", KR(ret), KP(sql_proxy_));133} else if (OB_FAIL(get_balance_group_ls_stat(134timeout,135*sql_proxy_,136tenant_id,137balance_group_id,138false, /* for update */139balance_group_ls_stat_array))) {140LOG_WARN("fail to get balance group ls stat", KR(ret),141K(tenant_id),142K(balance_group_id));143}144return ret;145}
146
147int ObBalanceGroupLSStatOperator::get_balance_group_ls_stat(148const int64_t timeout,149common::ObISQLClient &sql_client,150const uint64_t tenant_id,151const ObBalanceGroupID &balance_group_id,152const bool for_update,153common::ObIArray<ObBalanceGroupLSStat> &balance_group_ls_stat_array)154{
155int ret = OB_SUCCESS;156common::ObTimeoutCtx timeout_ctx;157if (OB_UNLIKELY(!inited_)) {158ret = OB_NOT_INIT;159LOG_WARN("not init", KR(ret));160} else if (OB_UNLIKELY(timeout <= 0161|| OB_INVALID_ID == tenant_id162|| !balance_group_id.is_valid())) {163ret = OB_INVALID_ARGUMENT;164LOG_WARN("invalid argument", KR(ret),165K(timeout),166K(tenant_id),167K(balance_group_id));168} else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(169timeout_ctx,170timeout))) {171LOG_WARN("fail to set timeout", KR(ret), K(timeout));172} else {173common::ObSqlString sql;174const uint64_t sql_tenant_id = gen_meta_tenant_id(tenant_id);175balance_group_ls_stat_array.reset();176SMART_VAR(ObISQLClient::ReadResult, res) {177sqlclient::ObMySQLResult *result = nullptr;178if (OB_FAIL(sql.append_fmt(179"SELECT * FROM %s WHERE "180"tenant_id = %ld AND "181"balance_group_id_high = %ld AND "182"balance_group_id_low = %ld%s",183OB_ALL_BALANCE_GROUP_LS_STAT_TNAME,184tenant_id,185balance_group_id.id_high_,186balance_group_id.id_low_,187(for_update ? " FOR UPDATE" : "")))) {188LOG_WARN("fail to assign sql", KR(ret));189} else if (OB_FAIL(sql_client.read(res, sql_tenant_id, sql.ptr()))) {190LOG_WARN("execute sql failed", KR(ret), K(sql_tenant_id), K(sql));191} else if (OB_UNLIKELY(nullptr == (result = res.get_result()))) {192ret = OB_ERR_UNEXPECTED;193LOG_WARN("get mysql res failed", KR(ret), K(sql));194} else {195while (OB_SUCC(ret) && OB_SUCC(result->next())) {196ObBalanceGroupLSStat tmp_bg_ls_stat;197uint64_t tenant_id = OB_INVALID_ID;198uint64_t id_high = OB_INVALID_ID;199uint64_t id_low = OB_INVALID_ID;200int64_t ls_id = 0;201int64_t tablet_group_count = -1;202ObString balance_group_name;203EXTRACT_INT_FIELD_MYSQL(*result, "tenant_id", tenant_id, uint64_t);204EXTRACT_INT_FIELD_MYSQL(*result, "balance_group_id_high", id_high, uint64_t);205EXTRACT_INT_FIELD_MYSQL(*result, "balance_group_id_low", id_low, uint64_t);206EXTRACT_INT_FIELD_MYSQL(*result, "ls_id", ls_id, int64_t);207EXTRACT_INT_FIELD_MYSQL(*result, "tablet_group_count", tablet_group_count, int64_t);208EXTRACT_VARCHAR_FIELD_MYSQL(*result, "balance_group_name", balance_group_name);209if (OB_SUCC(ret)) {210if (OB_FAIL(tmp_bg_ls_stat.build(211tenant_id,212ObBalanceGroupID(id_high, id_low),213share::ObLSID(ls_id),214tablet_group_count,215balance_group_name))) {216LOG_WARN("fail to build balance group ls stat", KR(ret),217K(tenant_id),218K(id_high),219K(id_low),220K(ls_id),221K(tablet_group_count),222K(balance_group_name));223} else if (OB_FAIL(balance_group_ls_stat_array.push_back(224tmp_bg_ls_stat))) {225LOG_WARN("fail to push back", KR(ret));226}227}228}229if (OB_ITER_END == ret) {230ret = OB_SUCCESS;231}232}233}234}235return ret;236}
237
238int ObBalanceGroupLSStatOperator::insert_update_balance_group_ls_stat(239const int64_t timeout,240const uint64_t tenant_id,241const ObBalanceGroupID &balance_group_id,242const common::ObIArray<ObBalanceGroupLSStat> &balance_group_ls_stat_array)243{
244int ret = OB_SUCCESS;245if (OB_UNLIKELY(!inited_)) {246ret = OB_NOT_INIT;247LOG_WARN("not init", KR(ret));248} else if (OB_UNLIKELY(timeout <= 0249|| OB_INVALID_ID == tenant_id250|| !balance_group_id.is_valid()251|| balance_group_ls_stat_array.count() <= 0)) {252ret = OB_INVALID_ARGUMENT;253LOG_WARN("invalid argument", KR(ret),254K(timeout),255K(tenant_id),256K(balance_group_id),257K(balance_group_ls_stat_array));258} else if (OB_UNLIKELY(nullptr == sql_proxy_)) {259ret = OB_ERR_UNEXPECTED;260LOG_WARN("sql_proxy_ ptr is null", KR(ret), KP(sql_proxy_));261} else {262ObMySQLTransaction trans;263if (OB_FAIL(trans.start(264sql_proxy_,265gen_meta_tenant_id(tenant_id)))) {266LOG_WARN("fail to start trans", KR(ret));267} else {268if (OB_FAIL(insert_update_balance_group_ls_stat(269timeout,270trans,271tenant_id,272balance_group_id,273balance_group_ls_stat_array))) {274LOG_WARN("fail to insert update balance group ls stat", KR(ret));275}276// commit/abort277int tmp_ret = OB_SUCCESS;278if (OB_SUCCESS != (tmp_ret = trans.end(OB_SUCC(ret)))) {279LOG_WARN("trans end failed", K(tmp_ret), "is_commit", OB_SUCCESS == ret);280ret = (OB_SUCCESS == ret ? tmp_ret : ret);281}282}283}284return ret;285}
286
287int ObBalanceGroupLSStatOperator::insert_update_balance_group_ls_stat(288const int64_t timeout,289common::ObISQLClient &sql_client,290const uint64_t tenant_id,291const ObBalanceGroupID &balance_group_id,292const common::ObIArray<ObBalanceGroupLSStat> &balance_group_ls_stat_array)293{
294int ret = OB_SUCCESS;295common::ObTimeoutCtx timeout_ctx;296if (OB_UNLIKELY(!inited_)) {297ret = OB_NOT_INIT;298LOG_WARN("not init", KR(ret));299} else if (OB_UNLIKELY(timeout <= 0300|| OB_INVALID_ID == tenant_id301|| !balance_group_id.is_valid()302|| balance_group_ls_stat_array.count() <= 0)) {303ret = OB_INVALID_ARGUMENT;304LOG_WARN("invalid argument", KR(ret),305K(tenant_id),306K(balance_group_id),307K(balance_group_ls_stat_array));308} else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(309timeout_ctx,310timeout))) {311LOG_WARN("fail to set timeout", KR(ret), K(timeout));312} else {313const uint64_t sql_tenant_id = gen_meta_tenant_id(tenant_id);314for (int64_t i = 0; OB_SUCC(ret) && i < balance_group_ls_stat_array.count(); ++i) {315int64_t affected_rows = -1;316common::ObSqlString insert_update_sql;317const ObBalanceGroupLSStat &balance_group_ls_stat = balance_group_ls_stat_array.at(i);318if (OB_FAIL(generate_insert_update_sql(319balance_group_ls_stat,320insert_update_sql))) {321LOG_WARN("fail to generate insert update sql", KR(ret),322K(balance_group_ls_stat),323K(insert_update_sql));324} else if (OB_FAIL(sql_client.write(325sql_tenant_id,326insert_update_sql.ptr(),327affected_rows))) {328LOG_WARN("fail to insert update", KR(ret),329K(sql_tenant_id),330K(insert_update_sql));331} else if (affected_rows > 2) {332ret = OB_ERR_UNEXPECTED;333LOG_WARN("unexpected affected rows", KR(ret),334K(sql_tenant_id),335K(insert_update_sql),336K(affected_rows));337}338}339}340return ret;341}
342
343int ObBalanceGroupLSStatOperator::inc_balance_group_ls_stat(344const int64_t timeout,345common::ObISQLClient &sql_client,346const uint64_t tenant_id,347const ObBalanceGroupLSStat &ls_stat)348{
349int ret = OB_SUCCESS;350common::ObTimeoutCtx timeout_ctx;351if (OB_UNLIKELY(!inited_)) {352ret = OB_NOT_INIT;353LOG_WARN("not init", KR(ret));354} else if (OB_UNLIKELY(355timeout <= 0356|| OB_INVALID_TENANT_ID == tenant_id357|| !ls_stat.get_balance_group_id().is_valid())) {358ret = OB_INVALID_ARGUMENT;359LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(ls_stat));360} else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(361timeout_ctx, timeout))) {362LOG_WARN("fail to set timeout", KR(ret), K(timeout));363} else {364const uint64_t sql_tenant_id = gen_meta_tenant_id(tenant_id);365common::ObSqlString inc_sql;366int64_t affected_rows = 0;367if (OB_FAIL(generate_inc_sql_(ls_stat, inc_sql))) {368LOG_WARN("fail to generate inc sql", KR(ret),369K(tenant_id), K(ls_stat), K(inc_sql));370} else if (OB_FAIL(sql_client.write(371sql_tenant_id, inc_sql.ptr(), affected_rows))) {372LOG_WARN("fail to insert update", KR(ret),373K(tenant_id), K(inc_sql));374} else if (OB_UNLIKELY(affected_rows > 2)) {375ret = OB_ERR_UNEXPECTED;376LOG_WARN("unexpected affected rows", KR(ret),377K(tenant_id), K(inc_sql), K(affected_rows));378}379}380return ret;381}
382
383int ObBalanceGroupLSStatOperator::delete_balance_group_ls_stat(384const int64_t timeout,385common::ObISQLClient &sql_client,386const uint64_t tenant_id)387{
388int ret = OB_SUCCESS;389ObSqlString sql;390int64_t affected_rows = 0;391if (OB_UNLIKELY(!inited_)) {392ret = OB_NOT_INIT;393LOG_WARN("not init", KR(ret));394} else if (OB_FAIL(sql.assign_fmt("delete from %s where tenant_id= %ld", OB_ALL_BALANCE_GROUP_LS_STAT_TNAME, tenant_id))) {395LOG_WARN("fail to format sql", KR(ret));396} else if (OB_FAIL(sql_client.write(gen_meta_tenant_id(tenant_id), sql.ptr(), affected_rows))) {397LOG_WARN("fail to delete inner table", KR(ret), K(sql));398}399return ret;400}
401
402int ObBalanceGroupLSStatOperator::generate_inc_sql_(403const ObBalanceGroupLSStat &bg_ls_stat,404common::ObSqlString &sql_string)405{
406int ret = OB_SUCCESS;407if (OB_UNLIKELY(!inited_)) {408ret = OB_NOT_INIT;409LOG_WARN("not init", KR(ret));410} else if (OB_UNLIKELY(!bg_ls_stat.is_valid())) {411ret = OB_INVALID_ARGUMENT;412LOG_WARN("invalid argument", KR(ret), K(bg_ls_stat));413} else if (OB_FAIL(sql_string.append_fmt(414"INSERT INTO %s ("415"tenant_id, "416"balance_group_id_high, "417"balance_group_id_low, "418"ls_id, "419"tablet_group_count, "420"balance_group_name)"421" VALUES ("422"%ld, %ld, %ld, %ld, %ld, '%s') "423"ON DUPLICATE KEY UPDATE "424"tablet_group_count = tablet_group_count + %ld, "425"balance_group_name = '%s'",426OB_ALL_BALANCE_GROUP_LS_STAT_TNAME,427bg_ls_stat.get_tenant_id(),428bg_ls_stat.get_balance_group_id().id_high_,429bg_ls_stat.get_balance_group_id().id_low_,430bg_ls_stat.get_ls_id().id(),431bg_ls_stat.get_tablet_group_count(),432to_cstring(ObHexEscapeSqlStr(bg_ls_stat.get_balance_group_name().str())),433bg_ls_stat.get_tablet_group_count(),434to_cstring(ObHexEscapeSqlStr(bg_ls_stat.get_balance_group_name().str()))))) {435LOG_WARN("fail to append fmt", KR(ret), K(bg_ls_stat));436} else {437LOG_INFO("balance group ls inc sql", K(sql_string));438}439return ret;440}
441
442int ObBalanceGroupLSStatOperator::generate_insert_update_sql(443const ObBalanceGroupLSStat &bg_ls_stat,444common::ObSqlString &sql_string)445{
446int ret = OB_SUCCESS;447if (OB_UNLIKELY(!inited_)) {448ret = OB_NOT_INIT;449LOG_WARN("not init", KR(ret));450} else if (OB_UNLIKELY(!bg_ls_stat.is_valid())) {451ret = OB_INVALID_ARGUMENT;452LOG_WARN("invalid argument", KR(ret), K(bg_ls_stat));453} else {454if (OB_FAIL(sql_string.append_fmt(455"INSERT INTO %s ("456"tenant_id, "457"balance_group_id_high, "458"balance_group_id_low, "459"ls_id, "460"tablet_group_count, "461"balance_group_name)"462" VALUES ("463"%ld, %ld, %ld, %ld, %ld, '%s') "464"ON DUPLICATE KEY UPDATE "465"tablet_group_count = %ld, "466"balance_group_name = '%s'",467OB_ALL_BALANCE_GROUP_LS_STAT_TNAME,468bg_ls_stat.get_tenant_id(),469bg_ls_stat.get_balance_group_id().id_high_,470bg_ls_stat.get_balance_group_id().id_low_,471bg_ls_stat.get_ls_id().id(),472bg_ls_stat.get_tablet_group_count(),473to_cstring(ObHexEscapeSqlStr(bg_ls_stat.get_balance_group_name().str())),474bg_ls_stat.get_tablet_group_count(),475to_cstring(ObHexEscapeSqlStr(bg_ls_stat.get_balance_group_name().str()))))) {476LOG_WARN("fail to append fmt", KR(ret), K(bg_ls_stat));477} else {478LOG_INFO("balance group ls update sql", K(sql_string));479}480}481return ret;482}
483
484ObNewTableTabletAllocator::ObNewTableTabletAllocator(485const uint64_t tenant_id,486share::schema::ObSchemaGetterGuard &schema_guard,487common::ObMySQLProxy *sql_proxy,488const bool use_parallel_ddl /*= false*/)489: tenant_id_(tenant_id),490schema_guard_(schema_guard),491sql_proxy_(sql_proxy),492bg_ls_stat_operator_(),493status_(MyStatus::INVALID),494ls_id_array_(),495inited_(false),496is_add_partition_(false),497use_parallel_ddl_(use_parallel_ddl)498{
499}
500
501ObNewTableTabletAllocator::~ObNewTableTabletAllocator()502{
503}
504
505int ObNewTableTabletAllocator::init()506{
507int ret = OB_SUCCESS;508const uint64_t meta_tenant_id = gen_meta_tenant_id(tenant_id_);509if (OB_UNLIKELY(inited_)) {510ret = OB_INIT_TWICE;511LOG_WARN("init twice", KR(ret), K(inited_));512} else if (OB_UNLIKELY(nullptr == sql_proxy_)) {513ret = OB_ERR_UNEXPECTED;514LOG_WARN("sql proxy ptr is null", KR(ret), KP(sql_proxy_));515} else if (OB_FAIL(bg_ls_stat_operator_.init(sql_proxy_))) {516LOG_WARN("fail to init bg_ls_stat_operator_", KR(ret));517} else {518status_ = MyStatus::WAIT_TO_PREPARE;519is_add_partition_ = false;520inited_ = true;521}522return ret;523}
524
525int ObNewTableTabletAllocator::prepare(526ObMySQLTransaction &trans,527const share::schema::ObTableSchema &table_schema,528bool is_add_partition)529{
530int ret = OB_SUCCESS;531is_add_partition_ = is_add_partition;532if (OB_UNLIKELY(!inited_)) {533ret = OB_NOT_INIT;534LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));535} else if (OB_UNLIKELY(!table_schema.has_tablet())) {536ret = OB_INVALID_ARGUMENT;537LOG_WARN("tablet has not tablet", KR(ret), K(table_schema));538} else if (MyStatus::WAIT_TO_PREPARE != status_) {539ret = OB_STATE_NOT_MATCH;540LOG_WARN("NewTableTabletAllocator state not match", KR(ret), K(status_));541} else if ((is_meta_tenant(table_schema.get_tenant_id()))542|| (is_sys_tenant(table_schema.get_tenant_id()))) {543if (OB_FAIL(alloc_ls_for_meta_or_sys_tenant_tablet(table_schema))) {544LOG_WARN("fail to alloc ls for meta or sys tenant tablet", KR(ret));545}546} else if (table_schema.is_duplicate_table()) {547if (OB_FAIL(alloc_ls_for_duplicate_table_(table_schema))) {548LOG_WARN("fail to alloc ls for duplicate tablet", KR(ret), K(table_schema));549}550} else {551if (table_schema.is_index_table()) {552if (table_schema.is_index_local_storage()) {553// local index or global index with local storage554if (OB_FAIL(alloc_ls_for_local_index_tablet(table_schema))) {555LOG_WARN("fail to alloc ls for local index tablet", KR(ret));556}557} else {558// global index559if (OB_FAIL(alloc_ls_for_global_index_tablet(table_schema))) {560LOG_WARN("fail to alloc ls for global index tablet", KR(ret));561}562}563} else {564if (OB_INVALID_ID != table_schema.get_tablegroup_id()) {565if (OB_FAIL(alloc_ls_for_in_tablegroup_tablet(table_schema))) {566LOG_WARN("fail to alloc ls for in tablegroup tablet", KR(ret));567}568} else {569if (OB_FAIL(alloc_ls_for_normal_table_tablet(table_schema))) {570LOG_WARN("fail to alloc ls for normal table tablet", KR(ret));571}572}573}574
575DEBUG_SYNC(BEFORE_LOCK_LS_WHEN_CREATE_TABLE);576// If ls status is not normal or is blocking tablet in, choose new ls for tablet creating.577if (OB_FAIL(ret)) {578} else if (is_related_table(table_schema.get_table_type(), table_schema.get_index_type())) {579// skip lock ls580} else if (OB_FAIL(check_and_replace_ls_(trans, table_schema.get_tenant_id()))) {581LOG_WARN("lock user ls failed", KR(ret),582"tenant_id", table_schema.get_tenant_id(), K_(ls_id_array));583}584}585
586if (OB_SUCC(ret)) {587status_ = MyStatus::WAIT_TO_OUTPUT;588}589is_add_partition_ = false;590return ret;591}
592
593int ObNewTableTabletAllocator::prepare_like(594const share::schema::ObTableSchema &table_schema)595{
596int ret = OB_SUCCESS;597if (OB_UNLIKELY(!inited_)) {598ret = OB_NOT_INIT;599LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));600} else if (OB_UNLIKELY(!table_schema.has_tablet())) {601ret = OB_INVALID_ARGUMENT;602LOG_WARN("tablet has not tablet", KR(ret), K(table_schema));603} else if (MyStatus::WAIT_TO_PREPARE != status_) {604ret = OB_STATE_NOT_MATCH;605LOG_WARN("NewTableTabletAllocator state not match", KR(ret), K(status_));606} else if ((is_meta_tenant(table_schema.get_tenant_id()))607|| (is_sys_tenant(table_schema.get_tenant_id()))) {608if (OB_FAIL(alloc_ls_for_meta_or_sys_tenant_tablet(table_schema))) {609LOG_WARN("fail to alloc ls for meta or sys tenant tablet", KR(ret));610}611} else if (OB_FAIL(alloc_tablet_by_primary_schema(table_schema))) {612LOG_WARN("fail to alloc tablet by primary schema", KR(ret), K(table_schema));613}614if (OB_SUCC(ret)) {615status_ = MyStatus::WAIT_TO_OUTPUT;616}617return ret;618}
619
620int ObNewTableTabletAllocator::get_ls_id_array(621common::ObIArray<share::ObLSID> &ls_id_array)622{
623int ret = OB_SUCCESS;624if (OB_UNLIKELY(!inited_)) {625ret = OB_NOT_INIT;626LOG_WARN("not init", KR(ret));627} else if (MyStatus::WAIT_TO_OUTPUT != status_) {628ret = OB_STATE_NOT_MATCH;629LOG_WARN("NewTableTabletAllocator state not match", KR(ret), K(status_));630} else {631ls_id_array.reset();632if (OB_FAIL(ls_id_array.assign(ls_id_array_))) {633LOG_WARN("fail to assign ls id array", KR(ret));634} else {635ls_id_array_.reset();636status_ = MyStatus::WAIT_TO_PREPARE;637}638}639return ret;640}
641
642int ObNewTableTabletAllocator::finish(643const bool commit)644{
645UNUSED(commit);646return OB_SUCCESS;647}
648
649int ObNewTableTabletAllocator::get_tablet_id_array(650const share::schema::ObTableSchema &table_schema,651common::ObIArray<common::ObTabletID> &tablet_id_array)652{
653int ret = OB_SUCCESS;654tablet_id_array.reset();655if (OB_UNLIKELY(!inited_)) {656ret = OB_NOT_INIT;657LOG_WARN("not init", KR(ret));658} else {659schema::ObPartitionSchemaIter iter(table_schema, schema::CHECK_PARTITION_MODE_NORMAL);660schema::ObPartitionSchemaIter::Info info;661while (OB_SUCC(ret)) {662if (OB_FAIL(iter.next_partition_info(info))) {663if (OB_ITER_END == ret) {664ret = OB_SUCCESS;665break;666}667} else if (OB_FAIL(tablet_id_array.push_back(info.tablet_id_))) {668LOG_WARN("fail to push tablet_id to array", KR(ret), K(info.tablet_id_));669}670}671}672return ret;673}
674
675int ObNewTableTabletAllocator::alloc_tablet_by_primary_schema(676const share::schema::ObTableSchema &table_schema)677{
678int ret = OB_SUCCESS;679LOG_INFO("alloc tablet by primary schema",680"tenant_id", table_schema.get_tenant_id(),681"table_id", table_schema.get_table_id());682if (OB_UNLIKELY(!inited_)) {683ret = OB_NOT_INIT;684LOG_WARN("not init", KR(ret));685} else if (OB_UNLIKELY(nullptr == sql_proxy_)) {686ret = OB_ERR_UNEXPECTED;687LOG_WARN("sql_proxy_ ptr is null", KR(ret));688} else {689common::ObArray<common::ObTabletID> tablet_id_array;690if (OB_FAIL(get_tablet_id_array(table_schema, tablet_id_array))) {691LOG_WARN("fail to get tablet id array", KR(ret));692} else if (OB_FAIL(ObTabletToLSTableOperator::batch_get_ls(693*sql_proxy_,694tenant_id_,695tablet_id_array,696ls_id_array_))) {697LOG_WARN("fail to batch get ls", KR(ret));698}699}700return ret;701}
702
703int ObNewTableTabletAllocator::get_available_ls(704common::ObIArray<share::ObLSID> &ls_id_array)705{
706int ret = OB_SUCCESS;707if (OB_UNLIKELY(!inited_)) {708ret = OB_NOT_INIT;709LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));710} else if (OB_UNLIKELY(nullptr == sql_proxy_)) {711ret = OB_ERR_UNEXPECTED;712LOG_WARN("sql_proxy ptr is null", KR(ret));713} else {714share::ObLSAttrOperator ls_attr_operator(tenant_id_, sql_proxy_);715ObLSAttrArray ls_attr_array;716if (OB_FAIL(ls_attr_operator.get_all_ls_by_order(ls_attr_array))) {717LOG_WARN("fail to load all ls", KR(ret), K_(tenant_id));718} else {719ARRAY_FOREACH(ls_attr_array, idx) {720share::ObLSAttr &ls_attr = ls_attr_array.at(idx);721if (ls_attr.ls_is_normal()722&& SYS_LS != ls_attr.get_ls_id()723&& !ls_attr.get_ls_flag().is_block_tablet_in()724&& !ls_attr.get_ls_flag().is_duplicate_ls()) {725if (OB_FAIL(ls_id_array.push_back(ls_attr.get_ls_id()))) {726LOG_WARN("fail to push back", KR(ret), K(ls_attr), K(ls_id_array));727}728}729}730}731}732return ret;733}
734
735int ObNewTableTabletAllocator::alloc_tablet_for_create_balance_group(736const ObBalanceGroupName &bg_name,737const ObBalanceGroupID &bg_id,738const common::ObIArray<share::ObLSID> &ls_id_array,739const int64_t part_num)740{
741int ret = OB_SUCCESS;742if (OB_UNLIKELY(!inited_)) {743ret = OB_NOT_INIT;744LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));745} else if (OB_UNLIKELY(bg_name.is_empty()746|| !bg_id.is_valid()747|| ls_id_array.count() <= 0748|| part_num <= 0)) {749ret = OB_INVALID_ARGUMENT;750LOG_WARN("invalid argument", KR(ret),751K(bg_name),752K(bg_id),753K(ls_id_array),754K(part_num));755} else {756const int64_t bucket_num = ls_id_array.count();757const int64_t min_itl = part_num / bucket_num;758const int64_t max_itl = ((min_itl * bucket_num == part_num) ? (min_itl) : (min_itl + 1));759const int64_t min_cnt = max_itl * bucket_num - part_num;760const int64_t max_cnt = bucket_num - min_cnt;761common::ObArray<ObBalanceGroupLSStat> bg_ls_stat_array;762int64_t start_idx = fetch_ls_offset();763for (int64_t i = 0; OB_SUCC(ret) && i < ls_id_array.count(); ++i) {764const share::ObLSID &ls_id = ls_id_array.at((start_idx + i) % ls_id_array.count());765const int64_t tablet_cnt = ((i < min_cnt) ? min_itl : max_itl);766for (int64_t j = 0; OB_SUCC(ret) && j < tablet_cnt; ++j) {767if (OB_FAIL(ls_id_array_.push_back(ls_id))) {768LOG_WARN("fail to push back", KR(ret));769}770}771if (OB_SUCC(ret)) {772ObBalanceGroupLSStat bg_ls_stat;773if (OB_FAIL(bg_ls_stat.build(774tenant_id_,775bg_id,776ls_id,777tablet_cnt,778bg_name))) {779LOG_WARN("fail to build bg ls stat", KR(ret));780} else if (OB_FAIL(bg_ls_stat_array.push_back(781bg_ls_stat))) {782LOG_WARN("fail to push back", KR(ret));783}784}785}786if (OB_SUCC(ret)) {787if (OB_FAIL(bg_ls_stat_operator_.insert_update_balance_group_ls_stat(788THIS_WORKER.get_timeout_remain(),789*sql_proxy_,790tenant_id_,791bg_id,792bg_ls_stat_array))) {793LOG_WARN("fail to insert update balance group ls stat", KR(ret));794}795}796}797return ret;798}
799
800int ObNewTableTabletAllocator::alloc_tablet_for_add_balance_group(801const common::ObIArray<ObBalanceGroupLSStat> &bg_ls_stat_array,802const ObBalanceGroupName &bg_name,803const ObBalanceGroupID &bg_id,804const common::ObIArray<share::ObLSID> &ls_id_array,805const int64_t partition_num)806{
807int ret = OB_SUCCESS;808// suppose bg_ls_stat_array can be empty809common::hash::ObHashSet<share::ObLSID> ls_id_set;810if (OB_UNLIKELY(!inited_)) {811ret = OB_NOT_INIT;812LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));813} else if (OB_UNLIKELY(bg_name.is_empty()814|| !bg_id.is_valid()815|| ls_id_array.count() <= 0816|| partition_num <= 0)) {817ret = OB_INVALID_ARGUMENT;818LOG_WARN("invalid argument", KR(ret),819K(bg_name),820K(bg_id),821K(ls_id_array),822K(partition_num));823} else if (OB_FAIL(ls_id_set.create(MAX_TENANT_LS_CNT))) {824LOG_WARN("fail to create ls id set", KR(ret));825} else {826common::ObArray<ObBalanceGroupLSStat> final_ls_stat_array;827int64_t total_alloc_num = partition_num;828int64_t valid_bg_cnt = total_alloc_num;829for (int64_t i = 0; OB_SUCC(ret) && i < ls_id_array.count(); ++i) {830const share::ObLSID &ls_id = ls_id_array.at(i);831if (OB_FAIL(ls_id_set.set_refactored(ls_id, 0/*not overwrite*/))) {832LOG_WARN("fail to set refactored", KR(ret));833}834}835for (int64_t i = 0; OB_SUCC(ret) && i < bg_ls_stat_array.count(); ++i) {836const share::ObLSID &ls_id = bg_ls_stat_array.at(i).get_ls_id();837int tmp_ret = ls_id_set.exist_refactored(ls_id);838LOG_INFO("balance group ls stat", "bg_ls_stat", bg_ls_stat_array.at(i));839if (OB_HASH_NOT_EXIST == tmp_ret) {840// ls not available841} else if (OB_HASH_EXIST == tmp_ret) {842if (OB_FAIL(final_ls_stat_array.push_back(bg_ls_stat_array.at(i)))) {843LOG_WARN("fail to push back", KR(ret));844} else if (OB_FAIL(ls_id_set.erase_refactored(ls_id))) {845LOG_WARN("fail to erase refactored", KR(ret));846} else {847valid_bg_cnt += bg_ls_stat_array.at(i).get_tablet_group_count();848}849} else {850ret = tmp_ret;851LOG_WARN("fail to check exist", KR(ret), K(ls_id));852}853}854for (common::hash::ObHashSet<share::ObLSID>::iterator iter = ls_id_set.begin();855OB_SUCC(ret) && iter != ls_id_set.end();856++iter) {857ObBalanceGroupLSStat bg_ls_stat;858if (OB_FAIL(bg_ls_stat.build(859tenant_id_,860bg_id,861iter->first, /*ls_id*/8620,/*bg cnt*/863bg_name))) {864LOG_WARN("fail to build bg ls stat", KR(ret),865K(tenant_id_),866K(bg_id),867K(bg_name),868"ls_id", iter->first);869} else if (OB_FAIL(final_ls_stat_array.push_back(bg_ls_stat))) {870LOG_WARN("fail to push back", KR(ret));871}872}873
874if (OB_FAIL(ret)) {875// bypass876} else if (OB_UNLIKELY(final_ls_stat_array.count() <= 0)) {877ret = OB_ERR_UNEXPECTED;878LOG_WARN("final ls stat array count unexpected", KR(ret), K(final_ls_stat_array));879} else {880std::sort(final_ls_stat_array.begin(), final_ls_stat_array.end());881for (int64_t alloc_seq = 0; OB_SUCC(ret) && alloc_seq < total_alloc_num; alloc_seq++) {882int64_t min_ls_tg_idx = 0;883int64_t min_ls_tg_cnt = final_ls_stat_array.at(0).get_tablet_group_count();884// find min885for (int64_t i = 1; OB_SUCC(ret) && i < final_ls_stat_array.count(); ++i) {886ObBalanceGroupLSStat &bg_ls_stat = final_ls_stat_array.at(i);887if (bg_ls_stat.get_tablet_group_count() < min_ls_tg_cnt) {888min_ls_tg_idx = i;889min_ls_tg_cnt = bg_ls_stat.get_tablet_group_count();890}891}892if (OB_SUCC(ret)) {893final_ls_stat_array.at(min_ls_tg_idx).add_tablet_group_count(1);894if (OB_FAIL(ls_id_array_.push_back(final_ls_stat_array.at(min_ls_tg_idx).get_ls_id()))) {895LOG_WARN("fail to push back", KR(ret));896}897}898}899if (OB_SUCC(ret)) {900if (OB_FAIL(bg_ls_stat_operator_.insert_update_balance_group_ls_stat(901THIS_WORKER.get_timeout_remain(),902*sql_proxy_,903tenant_id_,904bg_id,905final_ls_stat_array))) {906LOG_WARN("fail to insert update balance group ls stat", KR(ret));907}908}909}910}911return ret;912}
913
914int ObNewTableTabletAllocator::alloc_tablet_for_one_level_partitioned_balance_group(915const share::schema::ObTableSchema &table_schema)916{
917int ret = OB_SUCCESS;918LOG_INFO("alloc tablet for one level partitioned balance group",919"tenant_id", table_schema.get_tenant_id(),920"table_id", table_schema.get_table_id());921common::ObArray<share::ObLSID> ls_id_array;922ObBalanceGroup bg;923if (OB_UNLIKELY(!inited_)) {924ret = OB_NOT_INIT;925LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));926} else if (OB_UNLIKELY(PARTITION_LEVEL_ONE != table_schema.get_part_level())) {927ret = OB_INVALID_ARGUMENT;928LOG_WARN("invalid argument", KR(ret), "part_level", table_schema.get_part_level());929} else if (OB_FAIL(bg.init_by_table(table_schema, NULL/*partition*/))) {930LOG_WARN("fail to get one level partitioned bg info", KR(ret));931} else if (OB_FAIL(get_available_ls(ls_id_array))) {932LOG_WARN("fail to get available ls", KR(ret));933} else {934if (!is_add_partition_) {935if (OB_FAIL(alloc_tablet_for_create_balance_group(936bg.name(),937bg.id(),938ls_id_array,939table_schema.get_partition_num()))) {940LOG_WARN("fail to alloc tablet for create balance group", KR(ret));941}942} else {943common::ObArray<ObBalanceGroupLSStat> bg_ls_stat_array;944if (OB_FAIL(bg_ls_stat_operator_.get_balance_group_ls_stat(945THIS_WORKER.get_timeout_remain(),946*sql_proxy_,947tenant_id_,948bg.id(),949false, /*for update*/950bg_ls_stat_array))) {951LOG_WARN("fail to get balance group ls stat", KR(ret),952K(tenant_id_), K(bg));953} else if (OB_FAIL(alloc_tablet_for_add_balance_group(954bg_ls_stat_array,955bg.name(),956bg.id(),957ls_id_array,958table_schema.get_partition_num()))) {959LOG_WARN("fail to alloc tablet for add balance group", KR(ret), K(bg));960}961}962}963return ret;964}
965
966int ObNewTableTabletAllocator::alloc_tablet_for_two_level_partitioned_balance_group(967const share::schema::ObTableSchema &table_schema,968const int64_t part_idx)969{
970int ret = OB_SUCCESS;971LOG_INFO("alloc tablet for two level partitioned balance group",972"tenant_id", table_schema.get_tenant_id(),973"table_id", table_schema.get_table_id());974common::ObArray<share::ObLSID> ls_id_array;975ObBalanceGroup bg;976if (OB_UNLIKELY(!inited_)) {977ret = OB_NOT_INIT;978LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));979} else if (OB_UNLIKELY(PARTITION_LEVEL_TWO != table_schema.get_part_level())) {980ret = OB_INVALID_ARGUMENT;981LOG_WARN("invalid argument", KR(ret), "part_level", table_schema.get_part_level());982} else if (OB_UNLIKELY(part_idx >= table_schema.get_partition_num())) {983ret = OB_INVALID_ARGUMENT;984LOG_WARN("invalid part idx", KR(ret), K(part_idx),985"part_num", table_schema.get_partition_num());986} else {987const schema::ObPartition *partition = NULL;988if (OB_FAIL(table_schema.get_partition_by_partition_index(part_idx, schema::CHECK_PARTITION_MODE_NORMAL, partition))) {989LOG_WARN("get_partition_by_partition_index fail", KR(ret), K(part_idx), K(table_schema));990} else if (OB_ISNULL(partition)) {991ret = OB_ERR_UNEXPECTED;992LOG_WARN("part ptr is null", KR(ret), K(part_idx), K(table_schema));993} else if (OB_FAIL(bg.init_by_table(table_schema, partition))) {994LOG_WARN("fail to init two level partitioned bg info", KR(ret), K(table_schema), K(partition));995} else if (OB_FAIL(get_available_ls(ls_id_array))) {996LOG_WARN("fail to get available ls", KR(ret));997} else {998if (!is_add_partition_) {999if (OB_FAIL(alloc_tablet_for_create_balance_group(1000bg.name(),1001bg.id(),1002ls_id_array,1003partition->get_subpartition_num()))) {1004LOG_WARN("fail to alloc tablet for create balance group", KR(ret));1005}1006} else {1007common::ObArray<ObBalanceGroupLSStat> bg_ls_stat_array;1008if (OB_FAIL(bg_ls_stat_operator_.get_balance_group_ls_stat(1009THIS_WORKER.get_timeout_remain(),1010*sql_proxy_,1011tenant_id_,1012bg.id(),1013false, /*for update*/1014bg_ls_stat_array))) {1015LOG_WARN("fail to get balance group ls stat", KR(ret),1016K(tenant_id_), K(bg));1017} else if (OB_FAIL(alloc_tablet_for_add_balance_group(1018bg_ls_stat_array,1019bg.name(),1020bg.id(),1021ls_id_array,1022partition->get_subpartition_num()))) {1023LOG_WARN("fail to alloc tablet for add balance group", KR(ret), K(bg));1024}1025}1026}1027}1028return ret;1029}
1030
1031int ObNewTableTabletAllocator::alloc_tablet_for_non_partitioned_balance_group(1032const share::schema::ObTableSchema &table_schema)1033{
1034int ret = OB_SUCCESS;1035LOG_INFO("alloc tablet for non partitioned balance group",1036"tenant_id", table_schema.get_tenant_id(),1037"table_id", table_schema.get_table_id());1038ObBalanceGroup bg;1039common::ObArray<ObBalanceGroupLSStat> bg_ls_stat_array;1040common::ObArray<share::ObLSID> ls_id_array;1041if (OB_UNLIKELY(!inited_)) {1042ret = OB_NOT_INIT;1043LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1044} else if (OB_UNLIKELY(PARTITION_LEVEL_ZERO != table_schema.get_part_level())) {1045ret = OB_INVALID_ARGUMENT;1046LOG_WARN("invalid argument", KR(ret),1047"part_num", table_schema.get_all_part_num(),1048"part_level", table_schema.get_part_level(),1049K(table_schema));1050} else if (OB_FAIL(bg.init_by_table(table_schema, NULL/*partition*/))) {1051LOG_WARN("fail to init non partitioned bg info", KR(ret), K(bg), K(table_schema));1052} else if (OB_FAIL(get_available_ls(ls_id_array))) {1053LOG_WARN("fail to get available ls", KR(ret));1054} else if (OB_FAIL(bg_ls_stat_operator_.get_balance_group_ls_stat(1055THIS_WORKER.get_timeout_remain(),1056*sql_proxy_,1057tenant_id_,1058bg.id(),1059false, /*for update*/1060bg_ls_stat_array))) {1061LOG_WARN("fail to get balance group ls stat", KR(ret), K(tenant_id_), K(bg));1062} else if (OB_FAIL(alloc_tablet_for_add_balance_group(1063bg_ls_stat_array,1064bg.name(),1065bg.id(),1066ls_id_array,1067table_schema.get_all_part_num()))) {1068LOG_WARN("fail to alloc tablet for add balance group", KR(ret), K(bg), K(bg_ls_stat_array),1069K(ls_id_array), K(table_schema.get_all_part_num()));1070}1071return ret;1072}
1073
1074int ObNewTableTabletAllocator::alloc_tablet_for_non_partitioned_balance_group_by_cache_(1075const share::schema::ObTableSchema &table_schema)1076{
1077int ret = OB_SUCCESS;1078LOG_INFO("alloc tablet for non partitioned balance group by cache",1079"tenant_id", table_schema.get_tenant_id(),1080"table_id", table_schema.get_table_id());1081common::ObArray<share::ObLSID> ls_id_array;1082share::ObLSID ls_id;1083if (OB_UNLIKELY(!inited_)) {1084ret = OB_NOT_INIT;1085LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1086} else if (OB_UNLIKELY(PARTITION_LEVEL_ZERO != table_schema.get_part_level())) {1087ret = OB_INVALID_ARGUMENT;1088LOG_WARN("invalid argument", KR(ret),1089"part_num", table_schema.get_all_part_num(),1090"part_level", table_schema.get_part_level(),1091K(table_schema));1092} else if (OB_FAIL(get_available_ls(ls_id_array))) {1093LOG_WARN("fail to get available ls", KR(ret));1094} else if (OB_ISNULL(GCTX.root_service_)) {1095ret = OB_ERR_UNEXPECTED;1096LOG_WARN("rootservice is null", KR(ret));1097} else if (OB_FAIL(GCTX.root_service_->get_ddl_service()1098.get_non_partitioned_tablet_allocator()1099.alloc_tablet(tenant_id_, ls_id_array, ls_id))) {1100LOG_WARN("fail to alloc tablet by cache", KR(ret), K_(tenant_id));1101} else if (OB_FAIL(ls_id_array_.push_back(ls_id))) {1102LOG_WARN("fail to push back ls id", KR(ret), K_(tenant_id), K(ls_id));1103}1104return ret;1105}
1106
1107int ObNewTableTabletAllocator::alloc_tablet_for_partitioned_balance_group(1108const share::schema::ObTableSchema &table_schema)1109{
1110int ret = OB_SUCCESS;1111if (OB_UNLIKELY(!inited_)) {1112ret = OB_NOT_INIT;1113LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1114} else {1115if (PARTITION_LEVEL_ONE == table_schema.get_part_level()) {1116if (OB_FAIL(alloc_tablet_for_one_level_partitioned_balance_group(1117table_schema))) {1118LOG_WARN("fail to alloc tablet for one level partitioned bg", KR(ret));1119}1120} else if (PARTITION_LEVEL_TWO == table_schema.get_part_level()) {1121for (int64_t i = 0; OB_SUCC(ret) && i < table_schema.get_partition_num(); ++i) {1122if (OB_FAIL(alloc_tablet_for_two_level_partitioned_balance_group(1123table_schema, i))) {1124LOG_WARN("fail to alloc tablet for two level partitioned bg", KR(ret));1125}1126}1127} else {1128ret = OB_ERR_UNEXPECTED;1129LOG_WARN("part level unexpected", KR(ret),1130"part_level", table_schema.get_part_level());1131}1132}1133return ret;1134}
1135
1136int ObNewTableTabletAllocator::alloc_tablet_by_count_balance(1137const share::schema::ObTableSchema &table_schema)1138{
1139int ret = OB_SUCCESS;1140if (OB_UNLIKELY(!inited_)) {1141ret = OB_NOT_INIT;1142LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1143} else if (table_schema.get_all_part_num() <= 0) {1144ret = OB_ERR_UNEXPECTED;1145LOG_WARN("table schema part num unexpected", KR(ret),1146"part_num", table_schema.get_all_part_num(),1147"table_schema", table_schema);1148} else if (is_sys_table(table_schema.get_table_id())1149|| is_sys_tenant(table_schema.get_tenant_id())) {1150for (int64_t i = 0; i < table_schema.get_all_part_num() && OB_SUCC(ret); i++) {1151if (OB_FAIL(ls_id_array_.push_back(ObLSID(SYS_LS)))) {1152LOG_WARN("failed to push_back", KR(ret), K(i));1153}1154}1155} else if (PARTITION_LEVEL_ZERO == table_schema.get_part_level()) {1156if (!use_parallel_ddl_) {1157if (OB_FAIL(alloc_tablet_for_non_partitioned_balance_group(table_schema))) {1158LOG_WARN("fail to alloc tablet by non partitioned balance group", KR(ret));1159}1160} else {1161if (OB_FAIL(alloc_tablet_for_non_partitioned_balance_group_by_cache_(table_schema))) {1162LOG_WARN("fail to alloc tablet by non partitioned balance group by cache", KR(ret));1163}1164}1165} else {1166if (OB_FAIL(alloc_tablet_for_partitioned_balance_group(table_schema))) {1167LOG_WARN("fail to alloc tablet by partitioned balance group", KR(ret));1168}1169}1170return ret;1171}
1172
1173int ObNewTableTabletAllocator::alloc_ls_for_meta_or_sys_tenant_tablet(1174const share::schema::ObTableSchema &table_schema)1175{
1176int ret = OB_SUCCESS;1177LOG_INFO("alloc ls for meta or sys tenant tablet",1178"tenant_id", table_schema.get_tenant_id(),1179"table_id", table_schema.get_table_id());1180if (OB_UNLIKELY(!inited_)) {1181ret = OB_NOT_INIT;1182LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1183} else if ((!is_meta_tenant(table_schema.get_tenant_id())1184&& (!is_sys_tenant(table_schema.get_tenant_id())))) {1185ret = OB_ERR_UNEXPECTED;1186LOG_WARN("unexpected table schema", KR(ret),1187"tenant_id", table_schema.get_tenant_id(), K(table_schema));1188} else {1189for (int64_t i = 0; i < table_schema.get_all_part_num() && OB_SUCC(ret); i++) {1190if (OB_FAIL(ls_id_array_.push_back(ObLSID(SYS_LS)))) {1191LOG_WARN("failed to push_back", KR(ret), K(i));1192}1193}1194}1195return ret;1196}
1197
1198int ObNewTableTabletAllocator::alloc_ls_for_local_index_tablet(1199const share::schema::ObTableSchema &index_schema)1200{
1201int ret = OB_SUCCESS;1202LOG_INFO("alloc ls for local index tablet",1203"tenant_id", index_schema.get_tenant_id(),1204"index_id", index_schema.get_table_id());1205if (OB_UNLIKELY(!inited_)) {1206ret = OB_NOT_INIT;1207LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1208} else {1209const uint64_t tenant_id = index_schema.get_tenant_id();1210const uint64_t data_table_id = index_schema.get_data_table_id();1211const share::schema::ObTableSchema *table_schema = nullptr;1212if (OB_FAIL(schema_guard_.get_table_schema(1213tenant_id, data_table_id, table_schema))) {1214LOG_WARN("fail to get table schema", KR(ret), K(tenant_id), K(data_table_id));1215} else if (OB_UNLIKELY(nullptr == table_schema)) {1216ret = OB_TABLE_NOT_EXIST;1217LOG_WARN("table not exist", KR(ret), K(data_table_id));1218} else if (OB_FAIL(alloc_tablet_by_primary_schema(1219*table_schema))) {1220LOG_WARN("fail to alloc tablet by guard", KR(ret), K(data_table_id));1221}1222}1223return ret;1224}
1225
1226int ObNewTableTabletAllocator::alloc_ls_for_global_index_tablet(1227const share::schema::ObTableSchema &index_schema)1228{
1229int ret = OB_SUCCESS;1230LOG_INFO("alloc ls for global index tablet",1231"tenant_id", index_schema.get_tenant_id(),1232"index_id", index_schema.get_table_id());1233if (OB_UNLIKELY(!inited_)) {1234ret = OB_NOT_INIT;1235LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1236} else if (OB_FAIL(alloc_tablet_by_count_balance(1237index_schema))) {1238LOG_WARN("fail to alloc tablet by count balance", KR(ret));1239}1240return ret;1241}
1242
1243int ObNewTableTabletAllocator::alloc_ls_for_in_tablegroup_tablet(1244const share::schema::ObTableSchema &table_schema)1245{
1246int ret = OB_SUCCESS;1247LOG_INFO("alloc ls for in tablegroup tablet",1248"tenant_id", table_schema.get_tenant_id(),1249"table_id", table_schema.get_table_id());1250if (OB_UNLIKELY(!inited_)) {1251ret = OB_NOT_INIT;1252LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1253} else if (OB_UNLIKELY(OB_INVALID_ID == table_schema.get_tablegroup_id())) {1254ret = OB_ERR_UNEXPECTED;1255LOG_WARN("shall not be here for a table without tablegroup", KR(ret), K(table_schema));1256} else if (is_sys_table(table_schema.get_table_id())1257|| is_sys_tenant(table_schema.get_tenant_id())) {1258for (int64_t i = 0; i < table_schema.get_all_part_num() && OB_SUCC(ret); i++) {1259if (OB_FAIL(ls_id_array_.push_back(ObLSID(SYS_LS)))) {1260LOG_WARN("failed to push_back", KR(ret), K(i));1261}1262}1263} else {1264common::ObArray<const share::schema::ObTableSchema *> table_schema_array;1265const share::schema::ObSimpleTablegroupSchema *tablegroup_schema = NULL;1266if (OB_FAIL(schema_guard_.get_table_schemas_in_tablegroup(1267tenant_id_,1268table_schema.get_tablegroup_id(),1269table_schema_array))) {1270LOG_WARN("fail to get table schemas in tablegroup", KR(ret),1271"tenant_id", tenant_id_,1272"tablegroup_id", table_schema.get_tablegroup_id());1273} else if (OB_FAIL(schema_guard_.get_tablegroup_schema(tenant_id_, table_schema.get_tablegroup_id(), tablegroup_schema))) {1274LOG_WARN("fail to get tablegroup_schema", KR(ret), K(table_schema.get_tablegroup_id()));1275} else if (OB_ISNULL(tablegroup_schema) || !tablegroup_schema->is_valid()) {1276ret = OB_ERR_UNEXPECTED;1277LOG_WARN("tablegroup_schema invalid", KR(ret), K(tablegroup_schema));1278} else if (table_schema_array.count() > 0) {1279if (OB_UNLIKELY(nullptr == table_schema_array.at(0))) {1280ret = OB_ERR_UNEXPECTED;1281LOG_WARN("table schema ptr is null", KR(ret), K(table_schema_array));1282} else if (!is_add_partition_ || tablegroup_schema->get_sharding() == OB_PARTITION_SHARDING_NONE) {1283if (OB_FAIL(alloc_tablet_for_tablegroup(*table_schema_array.at(0), table_schema, *tablegroup_schema))) {1284LOG_WARN("fail to alloc tablet for tablegroup", KR(ret), K(is_add_partition_), K(tablegroup_schema), K(*table_schema_array.at(0)), K(table_schema));1285}1286} else if (tablegroup_schema->get_sharding() == OB_PARTITION_SHARDING_ADAPTIVE) {1287// add partition for tablegroup table may break the constraint of sharding ADAPTIVE1288// so alloc tablet as new table1289if (OB_FAIL(alloc_tablet_for_tablegroup(table_schema, *tablegroup_schema))) {1290LOG_WARN("fail to alloc tablet for tablegroup", KR(ret), K(table_schema), K(tablegroup_schema));1291}1292} else if (tablegroup_schema->get_sharding() == OB_PARTITION_SHARDING_PARTITION) {1293/* add partition for tablegroup sharding=PARTITION, we process only add subpart binding to existing one level partition1294* otherwise alloc tablet as new table
1295*/
1296const ObTableSchema *origin_table_schema = NULL;1297if (OB_FAIL(schema_guard_.get_table_schema(table_schema.get_tenant_id(), table_schema.get_table_id(), origin_table_schema))) {1298LOG_WARN("fail to get origin table_schema", KR(ret), K(table_schema.get_table_id()));1299} else if (OB_ISNULL(origin_table_schema)) {1300ret = OB_ERR_UNDEFINED;1301LOG_WARN("origin_table_schema is null", KR(ret), K(table_schema.get_table_id()));1302} else if (OB_FAIL(alloc_tablet_for_add_part_in_tablegroup_sharding_partition(table_schema, *origin_table_schema))) {1303LOG_WARN("fail to alloc_tablet_for_tablegroup_add_part", KR(ret), K(table_schema), K(origin_table_schema));1304}1305}1306} else {1307if (OB_FAIL(alloc_tablet_for_tablegroup(table_schema, *tablegroup_schema))) {1308LOG_WARN("fail to alloc tablet for tablegroup", KR(ret), K(table_schema));1309}1310}1311}1312return ret;1313}
1314
1315int ObNewTableTabletAllocator::alloc_tablet_for_add_part_in_tablegroup_sharding_partition(1316const schema::ObTableSchema &table_schema,1317const schema::ObTableSchema &origin_table_schema)1318{
1319int ret = OB_SUCCESS;1320common::ObArray<share::ObLSID> origin_ls_id_array;1321common::ObArray<share::ObLSID> pre_ls_id_array;1322common::ObArray<share::ObLSID> avail_ls_id_array;1323if (table_schema.get_table_id() != origin_table_schema.get_table_id()) {1324ret = OB_ERR_UNEXPECTED;1325LOG_WARN("table schema not match", KR(ret), K(table_schema), K(origin_table_schema));1326} else if (OB_FAIL(generate_ls_array_by_primary_schema(origin_table_schema, origin_ls_id_array))) {1327LOG_WARN("fail to generate_ls_array_by_primary_schema", KR(ret), K(origin_table_schema));1328} else if (OB_FAIL(extract_one_level_ls_array_by_primary_schema(origin_table_schema, origin_ls_id_array, pre_ls_id_array))) {1329LOG_WARN("fail to extract_one_level_ls_array_by_primary_schema", KR(ret), K(origin_table_schema));1330} else if (OB_FAIL(get_available_ls(avail_ls_id_array))) {1331LOG_WARN("fail get_available_ls", KR(ret));1332} else if (avail_ls_id_array.empty()) {1333ret = OB_ERR_UNEXPECTED;1334LOG_WARN("no available ls", KR(ret));1335} else {1336for (int i = 0; OB_SUCC(ret) && i < table_schema.get_partition_num(); i++) {1337const schema::ObPartition *partition = NULL;1338if (OB_FAIL(table_schema.get_partition_by_partition_index(i, schema::CHECK_PARTITION_MODE_NORMAL, partition))) {1339LOG_WARN("get_partition_by_partition_index fail", KR(ret), K(i), K(table_schema));1340} else if (OB_ISNULL(partition)) {1341ret = OB_ERR_UNEXPECTED;1342LOG_WARN("part ptr is null", KR(ret), K(i), K(table_schema));1343} else {1344int64_t origin_part_index = OB_INVALID_INDEX;1345ObLSID dest_ls_id;1346int64_t need_ls_count = 1;1347if (schema::PARTITION_LEVEL_TWO == table_schema.get_part_level()) {1348need_ls_count = partition->get_sub_part_num();1349}1350if (OB_FAIL(origin_table_schema.get_partition_index_by_id(partition->get_part_id(), schema::CHECK_PARTITION_MODE_NORMAL, origin_part_index))) {1351if (OB_ENTRY_NOT_EXIST == ret) {1352ret = OB_SUCCESS;1353int64_t dest_idx = (fetch_ls_offset() % avail_ls_id_array.count());1354// table_group can't use count balance because no partition value assign ls rule1355dest_ls_id = avail_ls_id_array.at(dest_idx);1356}1357} else {1358dest_ls_id = pre_ls_id_array.at(origin_part_index);1359}1360for (int c = 0; OB_SUCC(ret) && c < need_ls_count; c++) {1361if (OB_FAIL(ls_id_array_.push_back(dest_ls_id))) {1362LOG_WARN("fail to push ls_id to array", KR(ret), K(pre_ls_id_array), K(origin_part_index));1363}1364}1365}1366}1367}1368return ret;1369}
1370
1371int ObNewTableTabletAllocator::alloc_tablet_for_tablegroup(1372const schema::ObTableSchema &table_schema,1373const schema::ObSimpleTablegroupSchema &tablegroup_schema)1374{
1375int ret = OB_SUCCESS;1376common::ObArray<share::ObLSID> ls_id_array;1377if (OB_FAIL(get_available_ls(ls_id_array))) {1378LOG_WARN("fail to get available ls", KR(ret), K(tenant_id_));1379} else if (ls_id_array.empty()) {1380ret = OB_STATE_NOT_MATCH;1381LOG_WARN("empty ls to alloc", KR(ret), K(tenant_id_));1382} else if (tablegroup_schema.get_sharding() == OB_PARTITION_SHARDING_NONE || schema::PARTITION_LEVEL_ZERO == table_schema.get_part_level()) {1383int64_t start_idx = fetch_ls_offset();1384ObLSID dest_ls_id = ls_id_array.at(start_idx % ls_id_array.count());1385for (int64_t i = 0; i < table_schema.get_all_part_num() && OB_SUCC(ret); i++) {1386if (OB_FAIL(ls_id_array_.push_back(dest_ls_id))) {1387LOG_WARN("failed to push_back", KR(ret), K(i));1388}1389}1390} else if (tablegroup_schema.get_sharding() == OB_PARTITION_SHARDING_PARTITION || schema::PARTITION_LEVEL_ONE == table_schema.get_part_level()) {1391int64_t start_idx = fetch_ls_offset();1392for (int64_t i = 0; i < table_schema.get_partition_num() && OB_SUCC(ret); i++) {1393ObLSID dest_ls_id = ls_id_array.at((start_idx + i) % ls_id_array.count());1394if (schema::PARTITION_LEVEL_ONE == table_schema.get_part_level()) {1395if (OB_FAIL(ls_id_array_.push_back(dest_ls_id))) {1396LOG_WARN("failed to push_back", KR(ret), K(i));1397}1398} else if (schema::PARTITION_LEVEL_TWO == table_schema.get_part_level()) {1399const schema::ObPartition *partition = NULL;1400if (OB_FAIL(table_schema.get_partition_by_partition_index(i, schema::CHECK_PARTITION_MODE_NORMAL, partition))) {1401LOG_WARN("get_partition_by_partition_index fail", KR(ret), K(i), K(table_schema));1402} else if (OB_ISNULL(partition)) {1403ret = OB_ERR_UNEXPECTED;1404LOG_WARN("part ptr is null", KR(ret), K(i), K(table_schema));1405} else {1406for (int64_t sp = 0; OB_SUCC(ret) && sp < partition->get_subpartition_num(); sp++) {1407if (OB_FAIL(ls_id_array_.push_back(dest_ls_id))) {1408LOG_WARN("failed to push ls_id to array", KR(ret), K(dest_ls_id));1409}1410}1411}1412} else {1413ret = OB_ERR_UNEXPECTED;1414LOG_WARN("unexpected part_level", KR(ret), K(table_schema.get_part_level()));1415}1416}1417} else if (tablegroup_schema.get_sharding() == OB_PARTITION_SHARDING_ADAPTIVE) {1418for (int64_t i = 0; i < table_schema.get_partition_num() && OB_SUCC(ret); i++) {1419int64_t start_idx = fetch_ls_offset();1420if (schema::PARTITION_LEVEL_TWO == table_schema.get_part_level()) {1421const schema::ObPartition *partition = NULL;1422if (OB_FAIL(table_schema.get_partition_by_partition_index(i, schema::CHECK_PARTITION_MODE_NORMAL, partition))) {1423LOG_WARN("get_partition_by_partition_index fail", KR(ret), K(i), K(table_schema));1424} else if (OB_ISNULL(partition)) {1425ret = OB_ERR_UNEXPECTED;1426LOG_WARN("part ptr is null", KR(ret), K(i), K(table_schema));1427} else {1428for (int64_t sp = 0; OB_SUCC(ret) && sp < partition->get_subpartition_num(); sp++) {1429ObLSID dest_ls_id = ls_id_array.at((start_idx + sp) % ls_id_array.count());1430if (OB_FAIL(ls_id_array_.push_back(dest_ls_id))) {1431LOG_WARN("failed to push ls_id to array", KR(ret), K(dest_ls_id));1432}1433}1434}1435} else {1436ret = OB_ERR_UNEXPECTED;1437LOG_WARN("unexpected part_level", KR(ret), K(table_schema.get_part_level()));1438}1439}1440} else {1441ret = OB_ERR_UNEXPECTED;1442LOG_WARN("unknow sharding option", KR(ret), K(tablegroup_schema.get_sharding()));1443}1444return ret;1445}
1446
1447int ObNewTableTabletAllocator::generate_ls_array_by_primary_schema(1448const schema::ObTableSchema &primary_schema,1449common::ObArray<share::ObLSID> &ls_id_array)1450{
1451int ret = OB_SUCCESS;1452
1453ls_id_array.reuse();1454common::ObArray<common::ObTabletID> tablet_id_array;1455if (OB_FAIL(get_tablet_id_array(primary_schema, tablet_id_array))) {1456LOG_WARN("fail to get tablet id array", KR(ret), K(primary_schema));1457} else if (OB_ISNULL(sql_proxy_)) {1458ret = OB_ERR_UNEXPECTED;1459LOG_WARN("sql_proxy is null", KR(ret));1460} else if (OB_FAIL(ObTabletToLSTableOperator::batch_get_ls(1461*sql_proxy_,1462tenant_id_,1463tablet_id_array,1464ls_id_array))) {1465LOG_WARN("fail to batch get ls", KR(ret), K(tenant_id_), K(tablet_id_array), K(primary_schema));1466} else if (ls_id_array.count() != primary_schema.get_all_part_num()) {1467ret = OB_ERR_UNEXPECTED;1468LOG_WARN("empty pre_ls_id_array", KR(ret), K(tenant_id_), K(tablet_id_array), K(primary_schema));1469}1470
1471return ret;1472}
1473
1474int ObNewTableTabletAllocator::extract_one_level_ls_array_by_primary_schema(1475const schema::ObTableSchema &primary_schema,1476common::ObArray<share::ObLSID> &all_ls_id_array,1477common::ObArray<share::ObLSID> &pre_ls_id_array)1478{
1479int ret = OB_SUCCESS;1480pre_ls_id_array.reuse();1481if (primary_schema.get_all_part_num() != all_ls_id_array.count()) {1482ret = OB_ERR_UNEXPECTED;1483LOG_WARN("part count not match", KR(ret), K(primary_schema.get_all_part_num()), K(all_ls_id_array.count()));1484} else {1485int64_t primary_partition_offset = 0;1486for (int64_t part_idx = 0; OB_SUCC(ret) && part_idx < primary_schema.get_partition_num(); part_idx++) {1487if (OB_FAIL(pre_ls_id_array.push_back(all_ls_id_array.at(primary_partition_offset)))) {1488LOG_WARN("push ls_id to array", KR(ret), K(primary_partition_offset), K(all_ls_id_array));1489} else if (schema::PARTITION_LEVEL_ONE == primary_schema.get_part_level()) {1490primary_partition_offset++;1491} else if (schema::PARTITION_LEVEL_TWO == primary_schema.get_part_level()) {1492const schema::ObPartition *partition = NULL;1493if (OB_FAIL(primary_schema.get_partition_by_partition_index(part_idx, schema::CHECK_PARTITION_MODE_NORMAL, partition))) {1494LOG_WARN("get_partition_by_partition_index fail", KR(ret), K(part_idx), K(primary_schema));1495} else if (OB_ISNULL(partition)) {1496ret = OB_ERR_UNEXPECTED;1497LOG_WARN("part ptr is null", KR(ret), K(part_idx), K(primary_schema));1498} else {1499primary_partition_offset += partition->get_subpartition_num();1500}1501} else {1502ret = OB_ERR_UNEXPECTED;1503LOG_WARN("unknow table part_level", KR(ret), K(primary_schema));1504}1505}1506}1507
1508return ret;1509}
1510
1511int ObNewTableTabletAllocator::alloc_tablet_for_tablegroup(1512const schema::ObTableSchema &primary_schema,1513const schema::ObTableSchema &table_schema,1514const schema::ObSimpleTablegroupSchema &tablegroup_schema)1515{
1516int ret = OB_SUCCESS;1517if (tablegroup_schema.get_sharding() == OB_PARTITION_SHARDING_NONE || table_schema.get_part_level() == schema::PARTITION_LEVEL_ZERO) {1518common::ObArray<share::ObLSID> pre_ls_id_array;1519if (OB_FAIL(generate_ls_array_by_primary_schema(primary_schema, pre_ls_id_array))) {1520LOG_WARN("fail to generate_ls_array_by_primary_schema", KR(ret), K(primary_schema));1521} else {1522// first tablet location ls1523ObLSID dest_ls_id = pre_ls_id_array.at(0);1524for (int64_t i = 0; i < table_schema.get_all_part_num() && OB_SUCC(ret); i++) {1525if (OB_FAIL(ls_id_array_.push_back(dest_ls_id))) {1526LOG_WARN("failed to push_back", KR(ret), K(i), K(tenant_id_), K(table_schema));1527}1528}1529}1530} else if (tablegroup_schema.get_sharding() == OB_PARTITION_SHARDING_PARTITION) {1531common::ObArray<share::ObLSID> all_ls_id_array;1532common::ObArray<share::ObLSID> pre_ls_id_array;1533if (primary_schema.get_partition_num() != table_schema.get_partition_num()) {1534ret = OB_ERR_UNEXPECTED;1535LOG_WARN("mismatch partition num in tablegroup", KR(ret), K(primary_schema), K(table_schema));1536} else if (OB_FAIL(generate_ls_array_by_primary_schema(primary_schema, all_ls_id_array))) {1537LOG_WARN("fail to generate_ls_array_by_primary_schema", KR(ret), K(primary_schema));1538} else if (OB_FAIL(extract_one_level_ls_array_by_primary_schema(primary_schema, all_ls_id_array, pre_ls_id_array))) {1539LOG_WARN("fail to extract_one_level_ls_array_by_primary_schema", KR(ret), K(primary_schema));1540} else {1541/*1542* keep align with one level partition
1543*/
1544for (int64_t i = 0; i < table_schema.get_partition_num() && OB_SUCC(ret); i++) {1545ObLSID dest_ls_id = pre_ls_id_array.at(i);1546if (schema::PARTITION_LEVEL_ONE == table_schema.get_part_level()) {1547if (OB_FAIL(ls_id_array_.push_back(dest_ls_id))) {1548LOG_WARN("failed to push_back", KR(ret), K(i));1549}1550} else if (schema::PARTITION_LEVEL_TWO == table_schema.get_part_level()) {1551const schema::ObPartition *partition = NULL;1552if (OB_FAIL(table_schema.get_partition_by_partition_index(i, schema::CHECK_PARTITION_MODE_NORMAL, partition))) {1553LOG_WARN("get_partition_by_partition_index fail", KR(ret), K(i), K(table_schema));1554} else if (OB_ISNULL(partition)) {1555ret = OB_ERR_UNEXPECTED;1556LOG_WARN("part ptr is null", KR(ret), K(i), K(table_schema));1557} else {1558for (int64_t sp = 0; OB_SUCC(ret) && sp < partition->get_subpartition_num(); sp++) {1559if (OB_FAIL(ls_id_array_.push_back(dest_ls_id))) {1560LOG_WARN("failed to push ls_id to array", KR(ret), K(dest_ls_id));1561}1562}1563}1564} else {1565ret = OB_ERR_UNEXPECTED;1566LOG_WARN("unknow table part_level", KR(ret), K(table_schema));1567}1568}1569}1570} else if (tablegroup_schema.get_sharding() == OB_PARTITION_SHARDING_ADAPTIVE) {1571if (primary_schema.get_all_part_num() != table_schema.get_all_part_num()) {1572ret = OB_ERR_UNEXPECTED;1573LOG_WARN("mismatch partition in tablegroup", KR(ret), K(table_schema), K(primary_schema));1574} else if (OB_FAIL(alloc_tablet_by_primary_schema(primary_schema))) {1575LOG_WARN("fail to alloc tablet by primary_schema", KR(ret), K(primary_schema));1576}1577} else {1578ret = OB_ERR_UNEXPECTED;1579LOG_WARN("unknow sharding option", KR(ret), K(tablegroup_schema.get_sharding()));1580}1581return ret;1582}
1583
1584int ObNewTableTabletAllocator::alloc_ls_for_normal_table_tablet(1585const share::schema::ObTableSchema &table_schema)1586{
1587int ret = OB_SUCCESS;1588LOG_INFO("alloc ls for normal table tablet",1589"tenant_id", table_schema.get_tenant_id(),1590"table_id", table_schema.get_table_id());1591if (OB_UNLIKELY(!inited_)) {1592ret = OB_NOT_INIT;1593LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1594} else if (OB_FAIL(alloc_tablet_by_count_balance(table_schema))) {1595LOG_WARN("fail to alloc tablet by count balance", KR(ret));1596}1597return ret;1598}
1599
1600int ObNewTableTabletAllocator::wait_ls_elect_leader_(1601const uint64_t tenant_id,1602const ObLSID &ls_id)1603{
1604int ret = OB_SUCCESS;1605ObTimeoutCtx ctx;1606if (OB_UNLIKELY(!inited_)) {1607ret = OB_NOT_INIT;1608LOG_WARN("ObNewTableTabletAllocator not init", KR(ret), K(tenant_id), K(ls_id));1609} else if (OB_ISNULL(GCTX.location_service_)1610|| OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id || !ls_id.is_valid())) {1611ret = OB_INVALID_ARGUMENT;1612LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(ls_id));1613} else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(ctx, GCONF.internal_sql_execute_timeout))) {1614LOG_WARN("failed to set default timeout", KR(ret));1615} else {1616bool has_leader = false;1617ObAddr ls_leader;1618while (OB_SUCC(ret) && !has_leader) {1619int tmp_ret = OB_SUCCESS;1620ls_leader.reset();1621const share::ObLSReplica *leader_replica = nullptr;1622if (0 > ctx.get_timeout()) {1623ret = OB_TIMEOUT;1624LOG_WARN("wait ls elect leader timeout", KR(ret));1625} else if (OB_TMP_FAIL(GCTX.location_service_->nonblock_get_leader(GCONF.cluster_id, tenant_id, ls_id, ls_leader))) {1626LOG_WARN("fail to get ls leader", KR(ret), K(tenant_id), K(ls_id), K(ls_leader));1627} else {1628has_leader = true;1629}1630if (OB_SUCC(ret) && !has_leader) {1631LOG_WARN("fail to wait log stream elect leader, need retry", K(tenant_id), K(ls_id), K(ls_leader));1632ob_usleep(WAIT_INTERVAL_US);1633}1634}1635}1636return ret;1637}
1638
1639int ObNewTableTabletAllocator::alloc_ls_for_duplicate_table_(1640const share::schema::ObTableSchema &table_schema)1641{
1642int ret = OB_SUCCESS;1643uint64_t tenant_id = table_schema.get_tenant_id();1644LOG_INFO("alloc ls for duplicate table tablet",1645"tenant_id", table_schema.get_tenant_id(),1646"table_id", table_schema.get_table_id());1647share::ObLSStatusOperator ls_status_operator;1648share::ObLSStatusInfo duplicate_ls_status_info;1649ObTimeoutCtx ctx;1650if (OB_UNLIKELY(!inited_)) {1651ret = OB_NOT_INIT;1652LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1653} else if (OB_ISNULL(GCTX.sql_proxy_)1654|| OB_ISNULL(GCTX.location_service_)1655|| OB_ISNULL(GCTX.srv_rpc_proxy_)) {1656ret = OB_INVALID_ARGUMENT;1657LOG_WARN("invalid argument", KR(ret));1658} else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(ctx, GCONF.internal_sql_execute_timeout))) {1659LOG_WARN("failed to set default timeout", KR(ret));1660} else {1661obrpc::ObCreateDupLSArg arg;1662obrpc::ObCreateDupLSResult result;1663while (OB_SUCC(ret)) {1664int tmp_ret = OB_SUCCESS;1665duplicate_ls_status_info.reset();1666if (0 > ctx.get_timeout()) {1667ret = OB_TIMEOUT;1668LOG_WARN("wait creating duplicate log stream timeout", KR(ret));1669} else if (OB_TMP_FAIL(ls_status_operator.get_duplicate_ls_status_info(1670tenant_id,1671*GCTX.sql_proxy_,1672duplicate_ls_status_info,1673share::OBCG_DEFAULT/*group_id*/))) {1674if (OB_ENTRY_NOT_EXIST == tmp_ret) {1675LOG_INFO("duplicate log stream not exist, should create one duplicate log stream");1676tmp_ret = OB_SUCCESS;1677// create duplicate ls1678ObAddr leader;1679const int64_t timeout = ctx.get_timeout();1680if (OB_TMP_FAIL(GCTX.location_service_->get_leader(GCONF.cluster_id, tenant_id,1681SYS_LS, FALSE, leader))) {1682LOG_WARN("failed to get leader", KR(tmp_ret), K(tenant_id));1683} else if (OB_TMP_FAIL(arg.init(tenant_id))) {1684LOG_WARN("failed to init arg", KR(ret), K(tenant_id));1685} else if (OB_TMP_FAIL(GCTX.srv_rpc_proxy_->to(leader).timeout(timeout).notify_create_duplicate_ls(arg, result))) {1686LOG_WARN("failed to create tenant duplicate ls", KR(tmp_ret), K(tenant_id), K(leader), K(arg), K(timeout));1687if (OB_CONFLICT_WITH_CLONE == tmp_ret) {1688ret = tmp_ret;1689LOG_WARN("tenant is in clone procedure, can not create new log stream for now", KR(ret), K(tenant_id), K(arg));1690}1691}1692} else {1693LOG_WARN("fail to get duplicate log stream from table", KR(tmp_ret), K(tenant_id));1694}1695} else if (!duplicate_ls_status_info.ls_is_normal()) {1696LOG_TRACE("duplicate log stream is not in normal status", K(duplicate_ls_status_info));1697} else if (OB_FAIL(wait_ls_elect_leader_(1698duplicate_ls_status_info.tenant_id_,1699duplicate_ls_status_info.ls_id_))) {1700LOG_WARN("fail to wait duplicate ls elect leader", KR(ret), K(duplicate_ls_status_info));1701} else {1702for (int64_t i = 0; i < table_schema.get_all_part_num() && OB_SUCC(ret); i++) {1703if (OB_FAIL(ls_id_array_.push_back(duplicate_ls_status_info.ls_id_))) {1704LOG_WARN("failed to push_back", KR(ret), K(i), K(duplicate_ls_status_info));1705}1706}1707break;1708}1709if (OB_SUCC(ret)) {1710LOG_WARN("fail to get duplicate log stream, need retry", K(tenant_id), K(duplicate_ls_status_info));1711ob_usleep(WAIT_INTERVAL_US);1712}1713}1714}1715return ret;1716}
1717
1718int ObNewTableTabletAllocator::check_and_replace_ls_(1719ObMySQLTransaction &trans,1720const uint64_t tenant_id)1721{
1722int ret = OB_SUCCESS;1723ObArray<ObLSID> locked_ls_id_array;1724if (OB_UNLIKELY(!inited_) || OB_ISNULL(sql_proxy_)) {1725ret = OB_NOT_INIT;1726LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1727} else if (OB_FAIL(locked_ls_id_array.reserve(ls_id_array_.count()))) {1728LOG_WARN("reserve failed", KR(ret), K(tenant_id), K_(ls_id_array));1729} else {1730ARRAY_FOREACH(ls_id_array_, idx) {1731ObLSID prev_ls_id;1732ObLSID new_ls_id;1733ObLSAttr new_ls_attr;1734ObLSAttr curr_ls_attr;1735const ObLSID curr_ls_id = ls_id_array_.at(idx);1736if (idx > 0) {1737int64_t index = OB_INVALID_INDEX;1738find_last_user_ls_(locked_ls_id_array, index);1739if (index >= 0 && index < locked_ls_id_array.count()) {1740prev_ls_id = locked_ls_id_array.at(index);1741}1742}1743if (curr_ls_id.is_sys_ls()) { // do not lock sys ls1744new_ls_id = curr_ls_id;1745} else if (OB_FAIL(lock_and_check_ls_(1746trans,1747tenant_id,1748locked_ls_id_array,1749curr_ls_id,1750curr_ls_attr))) {1751if (OB_STATE_NOT_MATCH == ret) {1752if (OB_FAIL(choose_new_ls_(tenant_id, curr_ls_attr, prev_ls_id, new_ls_id))) {1753LOG_WARN("choose new ls failed", KR(ret),1754K(tenant_id), K(curr_ls_attr), K(prev_ls_id), K(new_ls_id));1755} else if (OB_FAIL(lock_and_check_ls_(1756trans,1757tenant_id,1758locked_ls_id_array,1759new_ls_id,1760new_ls_attr))) {1761// new ls should not be OB_STATE_NOT_MATCH1762LOG_WARN("check and lock ls failed", KR(ret),1763K(tenant_id), K(locked_ls_id_array), K(new_ls_id), K(new_ls_attr));1764} else {1765LOG_INFO("the ls allocated for tablet creating has changed",1766KR(ret), K(tenant_id), "old_ls_id", curr_ls_id, K(new_ls_id));1767}1768} else {1769LOG_WARN("check and lock ls failed", KR(ret),1770K(tenant_id), K(locked_ls_id_array), K(curr_ls_id), K(curr_ls_attr));1771}1772} else { // lock user ls successfully1773new_ls_id = curr_ls_id;1774}1775if (FAILEDx(locked_ls_id_array.push_back(new_ls_id))){1776LOG_WARN("push back failed", KR(ret), K(new_ls_id), K(locked_ls_id_array));1777}1778}1779if (OB_FAIL(ret)) {1780} else if (locked_ls_id_array.count() != ls_id_array_.count()) {1781ret = OB_ERR_UNEXPECTED;1782LOG_WARN("ls_id_array count not match", KR(ret), K(tenant_id),1783"tmp_ls_id_arry count", locked_ls_id_array.count(),1784"ls_id_array_ count", ls_id_array_.count(), K(locked_ls_id_array), K_(ls_id_array));1785} else if (OB_FAIL(ls_id_array_.assign(locked_ls_id_array))) {1786LOG_WARN("assign failed", KR(ret), K(locked_ls_id_array), K_(ls_id_array));1787}1788}1789return ret;1790}
1791
1792void ObNewTableTabletAllocator::find_last_user_ls_(1793const ObIArray<ObLSID> &ls_id_array,1794int64_t &index)1795{
1796index = OB_INVALID_INDEX;1797for (int64_t i = ls_id_array.count() - 1; i >= 0; --i) {1798const ObLSID &curr_ls = ls_id_array.at(i);1799if (curr_ls.id() > ObLSID::MIN_USER_LS_ID) {1800index = i;1801break;1802}1803}1804}
1805
1806int ObNewTableTabletAllocator::lock_and_check_ls_(1807ObMySQLTransaction &trans,1808const uint64_t tenant_id,1809const ObIArray<ObLSID> &locked_ls_id_array,1810const ObLSID &ls_id,1811ObLSAttr &ls_attr)1812{
1813int ret = OB_SUCCESS;1814ls_attr.reset();1815if (OB_UNLIKELY(!inited_) || OB_ISNULL(sql_proxy_)) {1816ret = OB_NOT_INIT;1817LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1818} else if (!ls_id.is_valid_with_tenant(tenant_id)) {1819ret = OB_INVALID_ARGUMENT;1820LOG_WARN("invalid args", KR(ret), K(tenant_id), K(ls_id));1821} else if (common::has_exist_in_array(locked_ls_id_array, ls_id)) {1822// ls has been locked1823} else {1824ObLSAttrOperator ls_operator(tenant_id, sql_proxy_);1825if (OB_FAIL(ObLSObjLockUtil::lock_ls_in_trans(1826trans,1827tenant_id,1828ls_id,1829SHARE))) {1830LOG_WARN("lock ls in trans failed", KR(ret), K(tenant_id), K(ls_id));1831} else if (OB_FAIL(ls_operator.get_ls_attr(ls_id, false/*for_update*/, trans, ls_attr))) {1832if (OB_ENTRY_NOT_EXIST == ret) {1833ls_attr.reset();1834ret = OB_STATE_NOT_MATCH;1835LOG_INFO("ls has been deleted when creating tablet", KR(ret), K(ls_id));1836} else {1837LOG_WARN("get ls attr failed", KR(ret), K(ls_id), K(ls_attr));1838}1839} else if (!ls_attr.ls_is_normal() || ls_attr.get_ls_flag().is_block_tablet_in()) {1840ret = OB_STATE_NOT_MATCH;1841LOG_TRACE("can not create tablet on this ls beacuse it is not in normal status or is block tablet in",1842KR(ret), K(tenant_id), K(ls_id), K(ls_attr));1843}1844}1845return ret;1846}
1847
1848int ObNewTableTabletAllocator::choose_new_ls_(1849const uint64_t tenant_id,1850const ObLSAttr &old_ls_attr,1851const ObLSID &prev_ls_id,1852ObLSID &new_ls_id)1853{
1854int ret = OB_SUCCESS;1855if (OB_UNLIKELY(!inited_) || OB_ISNULL(sql_proxy_)) {1856ret = OB_NOT_INIT;1857LOG_WARN("ObNewTableTabletAllocator not init", KR(ret));1858} else if (!old_ls_attr.is_valid() || !old_ls_attr.ls_is_normal()) {1859if (prev_ls_id.is_valid()) {1860new_ls_id = prev_ls_id;1861} else {1862ObLSAttrOperator ls_operator(tenant_id, sql_proxy_);1863if (OB_FAIL(ls_operator.get_random_normal_user_ls(new_ls_id))) {1864LOG_WARN("get random normal user ls failed", KR(ret), K(tenant_id), K(new_ls_id));1865}1866}1867} else if (old_ls_attr.get_ls_flag().is_block_tablet_in()) {1868//only in 4200 canbe block tablet in, no need process data_version1869if (OB_FAIL(ObBalanceTaskTableOperator::get_merge_task_dest_ls_by_src_ls(1870*sql_proxy_,1871tenant_id,1872old_ls_attr.get_ls_id(),1873new_ls_id))) {1874LOG_WARN("get dest ls by src ls failed", KR(ret), K(tenant_id), K(old_ls_attr), K(new_ls_id));1875}1876} else {1877ret = OB_ERR_UNEXPECTED;1878LOG_WARN("unexpected ls_attr", KR(ret), K(old_ls_attr));1879}1880return ret;1881}
1882
1883}//end namespace rootserver1884}//end namespace oceanbase1885