1
// Copyright 2018 The CubeFS Authors.
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
7
// http://www.apache.org/licenses/LICENSE-2.0
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12
// implied. See the License for the specific language governing
13
// permissions and limitations under the License.
22
"github.com/cubefs/cubefs/proto"
23
"github.com/cubefs/cubefs/sdk/master"
24
"github.com/spf13/cobra"
28
cmdDataPartitionUse = "datapartition [COMMAND]"
29
cmdDataPartitionShort = "Manage data partition"
32
func newDataPartitionCmd(client *master.MasterClient) *cobra.Command {
33
cmd := &cobra.Command{
34
Use: cmdDataPartitionUse,
35
Short: cmdDataPartitionShort,
38
newDataPartitionGetCmd(client),
39
newListCorruptDataPartitionCmd(client),
40
newDataPartitionDecommissionCmd(client),
41
newDataPartitionReplicateCmd(client),
42
newDataPartitionDeleteReplicaCmd(client),
43
newDataPartitionGetDiscardCmd(client),
44
newDataPartitionSetDiscardCmd(client),
50
cmdDataPartitionGetShort = "Display detail information of a data partition"
51
cmdCheckCorruptDataPartitionShort = "Check and list unhealthy data partitions"
52
cmdDataPartitionDecommissionShort = "Decommission a replication of the data partition to a new address"
53
cmdDataPartitionReplicateShort = "Add a replication of the data partition on a new address"
54
cmdDataPartitionDeleteReplicaShort = "Delete a replication of the data partition on a fixed address"
55
cmdDataPartitionGetDiscardShort = "Display all discard data partitions"
56
cmdDataPartitionSetDiscardShort = "Set discard flag for data partition"
59
func newDataPartitionGetCmd(client *master.MasterClient) *cobra.Command {
60
cmd := &cobra.Command{
61
Use: CliOpInfo + " [DATA PARTITION ID]",
62
Short: cmdDataPartitionGetShort,
63
Args: cobra.MinimumNArgs(1),
64
Run: func(cmd *cobra.Command, args []string) {
68
partition *proto.DataPartitionInfo
73
if partitionID, err = strconv.ParseUint(args[0], 10, 64); err != nil {
76
if partition, err = client.AdminAPI().GetDataPartition("", partitionID); err != nil {
79
stdoutf("%v", formatDataPartitionInfo(partition))
85
func newListCorruptDataPartitionCmd(client *master.MasterClient) *cobra.Command {
86
var ignoreDiscardDp bool
89
cmd := &cobra.Command{
91
Short: cmdCheckCorruptDataPartitionShort,
92
Long: `If the data nodes are marked as "Inactive", it means the nodes has been not available for a time. It is suggested to
93
eliminate the network, disk or other problems first. Once the bad nodes can never be "active", they are called corrupt
94
nodes. The "decommission" command can be used to discard the corrupt nodes. However, if more than half replicas of
95
a partition are on the corrupt nodes, the few remaining replicas can not reach an agreement with one leader. In this case,
96
you can use the "reset" command to fix the problem.The "reset" command may lead to data loss, be careful to do this.
97
The "reset" command will be released in next version`,
98
Run: func(cmd *cobra.Command, args []string) {
100
diagnosis *proto.DataPartitionDiagnosis
101
dataNodes []*proto.DataNodeInfo
107
if diagnosis, err = client.AdminAPI().DiagnoseDataPartition(ignoreDiscardDp); err != nil {
110
stdoutln("[Inactive Data nodes]:")
111
stdoutlnf("%v", formatDataNodeDetailTableHeader())
112
for _, addr := range diagnosis.InactiveDataNodes {
113
var node *proto.DataNodeInfo
114
if node, err = client.NodeAPI().GetDataNode(addr); err != nil {
117
dataNodes = append(dataNodes, node)
119
sort.SliceStable(dataNodes, func(i, j int) bool {
120
return dataNodes[i].ID < dataNodes[j].ID
122
for _, node := range dataNodes {
123
stdoutln(formatDataNodeDetail(node, true))
126
stdoutln("[Corrupt data partitions](no leader):")
127
stdoutln(partitionInfoTableHeader)
128
sort.SliceStable(diagnosis.CorruptDataPartitionIDs, func(i, j int) bool {
129
return diagnosis.CorruptDataPartitionIDs[i] < diagnosis.CorruptDataPartitionIDs[j]
131
for _, pid := range diagnosis.CorruptDataPartitionIDs {
132
var partition *proto.DataPartitionInfo
133
if partition, err = client.AdminAPI().GetDataPartition("", pid); err != nil {
134
err = fmt.Errorf("Partition not found, err:[%v] ", err)
137
stdoutln(formatDataPartitionInfoRow(partition))
141
stdoutln("[Partition lack replicas]:")
142
stdoutln(partitionInfoTableHeader)
143
sort.SliceStable(diagnosis.LackReplicaDataPartitionIDs, func(i, j int) bool {
144
return diagnosis.LackReplicaDataPartitionIDs[i] < diagnosis.LackReplicaDataPartitionIDs[j]
146
for _, pid := range diagnosis.LackReplicaDataPartitionIDs {
147
var partition *proto.DataPartitionInfo
148
if partition, err = client.AdminAPI().GetDataPartition("", pid); err != nil {
149
err = fmt.Errorf("Partition not found, err:[%v] ", err)
152
if partition != nil {
153
stdoutln(formatDataPartitionInfoRow(partition))
158
stdoutln("[Bad data partitions(decommission not completed)]:")
159
badPartitionTablePattern := "%-8v %-10v %-10v"
160
stdoutlnf(badPartitionTablePattern, "PATH", "PARTITION ID", "REPAIR PROGRESS")
161
for _, bdpv := range diagnosis.BadDataPartitionInfos {
162
sort.SliceStable(bdpv.PartitionInfos, func(i, j int) bool {
163
return bdpv.PartitionInfos[i].PartitionID < bdpv.PartitionInfos[j].PartitionID
165
for _, pinfo := range bdpv.PartitionInfos {
166
percent := strconv.FormatFloat(pinfo.DecommissionRepairProgress*100, 'f', 2, 64) + "%"
167
stdoutlnf(badPartitionTablePattern, bdpv.Path, pinfo.PartitionID, percent)
172
stdoutln("[Partition has unavailable replica]:")
173
stdoutln(badReplicaPartitionInfoTableHeader)
174
sort.SliceStable(diagnosis.BadReplicaDataPartitionIDs, func(i, j int) bool {
175
return diagnosis.BadReplicaDataPartitionIDs[i] < diagnosis.BadReplicaDataPartitionIDs[j]
178
for _, dpId := range diagnosis.BadReplicaDataPartitionIDs {
179
var partition *proto.DataPartitionInfo
180
if partition, err = client.AdminAPI().GetDataPartition("", dpId); err != nil {
181
err = fmt.Errorf("Partition not found, err:[%v] ", err)
184
if partition != nil {
185
stdoutln(formatBadReplicaDpInfoRow(partition))
191
stdoutln("[Partition with replica file count differ significantly]:")
192
stdoutln(RepFileCountDifferInfoTableHeader)
193
sort.SliceStable(diagnosis.RepFileCountDifferDpIDs, func(i, j int) bool {
194
return diagnosis.RepFileCountDifferDpIDs[i] < diagnosis.RepFileCountDifferDpIDs[j]
196
for _, dpId := range diagnosis.RepFileCountDifferDpIDs {
197
var partition *proto.DataPartitionInfo
198
if partition, err = client.AdminAPI().GetDataPartition("", dpId); err != nil {
199
err = fmt.Errorf("Partition not found, err:[%v] ", err)
202
if partition != nil {
203
stdoutln(formatReplicaFileCountDiffDpInfoRow(partition))
208
stdoutln("[Partition with replica used size differ significantly]:")
209
stdoutln(RepUsedSizeDifferInfoTableHeader)
210
sort.SliceStable(diagnosis.RepUsedSizeDifferDpIDs, func(i, j int) bool {
211
return diagnosis.RepUsedSizeDifferDpIDs[i] < diagnosis.RepUsedSizeDifferDpIDs[j]
213
for _, dpId := range diagnosis.RepUsedSizeDifferDpIDs {
214
var partition *proto.DataPartitionInfo
215
if partition, err = client.AdminAPI().GetDataPartition("", dpId); err != nil {
216
err = fmt.Errorf("Partition not found, err:[%v] ", err)
219
if partition != nil {
220
stdoutln(formatReplicaSizeDiffDpInfoRow(partition))
225
stdoutlnf("%v %v", "[Number of Partition with replica file count differ significantly]:",
226
len(diagnosis.RepUsedSizeDifferDpIDs))
229
stdoutlnf("%v %v", "[Number of Partition with replica used size differ significantly]:",
230
len(diagnosis.RepUsedSizeDifferDpIDs))
234
stdoutln("[Partition with excessive replicas]:")
235
stdoutln(partitionInfoTableHeader)
236
sort.SliceStable(diagnosis.ExcessReplicaDpIDs, func(i, j int) bool {
237
return diagnosis.ExcessReplicaDpIDs[i] < diagnosis.ExcessReplicaDpIDs[j]
239
for _, pid := range diagnosis.ExcessReplicaDpIDs {
240
var partition *proto.DataPartitionInfo
241
if partition, err = client.AdminAPI().GetDataPartition("", pid); err != nil {
242
err = fmt.Errorf("Partition not found, err:[%v] ", err)
245
if partition != nil {
246
stdoutln(formatDataPartitionInfoRow(partition))
252
cmd.Flags().BoolVarP(&ignoreDiscardDp, "ignoreDiscard", "i", false, "true for not display discard dp")
253
cmd.Flags().BoolVarP(&diff, "diff", "d", false, "true for display dp those replica file count count or size differ significantly")
257
func newDataPartitionDecommissionCmd(client *master.MasterClient) *cobra.Command {
258
var raftForceDel bool
259
var clientIDKey string
260
cmd := &cobra.Command{
261
Use: CliOpDecommission + " [ADDRESS] [DATA PARTITION ID]",
262
Short: cmdDataPartitionDecommissionShort,
263
Args: cobra.MinimumNArgs(2),
264
Run: func(cmd *cobra.Command, args []string) {
273
partitionID, err = strconv.ParseUint(args[1], 10, 64)
277
if err := client.AdminAPI().DecommissionDataPartition(partitionID, address, raftForceDel, clientIDKey); err != nil {
278
stdout(fmt.Sprintf("failed:err(%v)\n", err.Error()))
281
stdoutln("Decommission data partition successfully")
283
ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
285
return nil, cobra.ShellCompDirectiveNoFileComp
287
return validDataNodes(client, toComplete), cobra.ShellCompDirectiveNoFileComp
290
cmd.Flags().BoolVarP(&raftForceDel, "raftForceDel", "r", false, "true for raftForceDel")
291
cmd.Flags().StringVar(&clientIDKey, CliFlagClientIDKey, client.ClientIDKey(), CliUsageClientIDKey)
295
func newDataPartitionReplicateCmd(client *master.MasterClient) *cobra.Command {
296
var clientIDKey string
297
cmd := &cobra.Command{
298
Use: CliOpReplicate + " [ADDRESS] [DATA PARTITION ID]",
299
Short: cmdDataPartitionReplicateShort,
300
Args: cobra.MinimumNArgs(2),
301
Run: func(cmd *cobra.Command, args []string) {
310
if partitionID, err = strconv.ParseUint(args[1], 10, 64); err != nil {
313
if err = client.AdminAPI().AddDataReplica(partitionID, address, clientIDKey); err != nil {
316
stdoutln("Add replication successfully")
318
ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
320
return nil, cobra.ShellCompDirectiveNoFileComp
322
return validDataNodes(client, toComplete), cobra.ShellCompDirectiveNoFileComp
325
cmd.Flags().StringVar(&clientIDKey, CliFlagClientIDKey, client.ClientIDKey(), CliUsageClientIDKey)
329
func newDataPartitionDeleteReplicaCmd(client *master.MasterClient) *cobra.Command {
330
var clientIDKey string
331
cmd := &cobra.Command{
332
Use: CliOpDelReplica + " [ADDRESS] [DATA PARTITION ID]",
333
Short: cmdDataPartitionDeleteReplicaShort,
334
Args: cobra.MinimumNArgs(2),
335
Run: func(cmd *cobra.Command, args []string) {
344
if partitionID, err = strconv.ParseUint(args[1], 10, 64); err != nil {
347
if err = client.AdminAPI().DeleteDataReplica(partitionID, address, clientIDKey); err != nil {
350
stdoutln("Delete replication successfully")
352
ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
354
return nil, cobra.ShellCompDirectiveNoFileComp
356
return validDataNodes(client, toComplete), cobra.ShellCompDirectiveNoFileComp
359
cmd.Flags().StringVar(&clientIDKey, CliFlagClientIDKey, client.ClientIDKey(), CliUsageClientIDKey)
363
func newDataPartitionGetDiscardCmd(client *master.MasterClient) *cobra.Command {
364
cmd := &cobra.Command{
365
Use: CliOpGetDiscard,
366
Short: cmdDataPartitionGetDiscardShort,
367
Run: func(cmd *cobra.Command, args []string) {
369
infos *proto.DiscardDataPartitionInfos
377
if infos, err = client.AdminAPI().GetDiscardDataPartition(); err != nil {
382
stdoutln("[Discard Partitions]:")
383
stdoutln(partitionInfoTableHeader)
384
sort.SliceStable(infos.DiscardDps, func(i, j int) bool {
385
return infos.DiscardDps[i].PartitionID < infos.DiscardDps[j].PartitionID
387
for _, partition := range infos.DiscardDps {
388
stdoutln(formatDataPartitionInfoRow(&partition))
395
func newDataPartitionSetDiscardCmd(client *master.MasterClient) *cobra.Command {
396
cmd := &cobra.Command{
397
Use: CliOpSetDiscard + " [DATA PARTITION ID] [DISCARD]",
398
Short: cmdDataPartitionSetDiscardShort,
399
Args: cobra.MinimumNArgs(2),
400
Run: func(cmd *cobra.Command, args []string) {
411
dpId, err = strconv.ParseUint(args[0], 10, 64)
415
discard, err = strconv.ParseBool(args[1])
419
if err = client.AdminAPI().SetDataPartitionDiscard(dpId, discard); err != nil {
422
stdout("Discard %v successful", dpId)