cubefs

Форк
0
/
datapartition.go 
426 строк · 14.1 Кб
1
// Copyright 2018 The CubeFS Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12
// implied. See the License for the specific language governing
13
// permissions and limitations under the License.
14

15
package cmd
16

17
import (
18
	"fmt"
19
	"sort"
20
	"strconv"
21

22
	"github.com/cubefs/cubefs/proto"
23
	"github.com/cubefs/cubefs/sdk/master"
24
	"github.com/spf13/cobra"
25
)
26

27
const (
28
	cmdDataPartitionUse   = "datapartition [COMMAND]"
29
	cmdDataPartitionShort = "Manage data partition"
30
)
31

32
func newDataPartitionCmd(client *master.MasterClient) *cobra.Command {
33
	cmd := &cobra.Command{
34
		Use:   cmdDataPartitionUse,
35
		Short: cmdDataPartitionShort,
36
	}
37
	cmd.AddCommand(
38
		newDataPartitionGetCmd(client),
39
		newListCorruptDataPartitionCmd(client),
40
		newDataPartitionDecommissionCmd(client),
41
		newDataPartitionReplicateCmd(client),
42
		newDataPartitionDeleteReplicaCmd(client),
43
		newDataPartitionGetDiscardCmd(client),
44
		newDataPartitionSetDiscardCmd(client),
45
	)
46
	return cmd
47
}
48

49
const (
50
	cmdDataPartitionGetShort           = "Display detail information of a data partition"
51
	cmdCheckCorruptDataPartitionShort  = "Check and list unhealthy data partitions"
52
	cmdDataPartitionDecommissionShort  = "Decommission a replication of the data partition to a new address"
53
	cmdDataPartitionReplicateShort     = "Add a replication of the data partition on a new address"
54
	cmdDataPartitionDeleteReplicaShort = "Delete a replication of the data partition on a fixed address"
55
	cmdDataPartitionGetDiscardShort    = "Display all discard data partitions"
56
	cmdDataPartitionSetDiscardShort    = "Set discard flag for data partition"
57
)
58

59
func newDataPartitionGetCmd(client *master.MasterClient) *cobra.Command {
60
	cmd := &cobra.Command{
61
		Use:   CliOpInfo + " [DATA PARTITION ID]",
62
		Short: cmdDataPartitionGetShort,
63
		Args:  cobra.MinimumNArgs(1),
64
		Run: func(cmd *cobra.Command, args []string) {
65
			var (
66
				err         error
67
				partitionID uint64
68
				partition   *proto.DataPartitionInfo
69
			)
70
			defer func() {
71
				errout(err)
72
			}()
73
			if partitionID, err = strconv.ParseUint(args[0], 10, 64); err != nil {
74
				return
75
			}
76
			if partition, err = client.AdminAPI().GetDataPartition("", partitionID); err != nil {
77
				return
78
			}
79
			stdoutf("%v", formatDataPartitionInfo(partition))
80
		},
81
	}
82
	return cmd
83
}
84

85
func newListCorruptDataPartitionCmd(client *master.MasterClient) *cobra.Command {
86
	var ignoreDiscardDp bool
87
	var diff bool
88

89
	cmd := &cobra.Command{
90
		Use:   CliOpCheck,
91
		Short: cmdCheckCorruptDataPartitionShort,
92
		Long: `If the data nodes are marked as "Inactive", it means the nodes has been not available for a time. It is suggested to
93
eliminate the network, disk or other problems first. Once the bad nodes can never be "active", they are called corrupt
94
nodes. The "decommission" command can be used to discard the corrupt nodes. However, if more than half replicas of
95
a partition are on the corrupt nodes, the few remaining replicas can not reach an agreement with one leader. In this case,
96
you can use the "reset" command to fix the problem.The "reset" command may lead to data loss, be careful to do this.
97
The "reset" command will be released in next version`,
98
		Run: func(cmd *cobra.Command, args []string) {
99
			var (
100
				diagnosis *proto.DataPartitionDiagnosis
101
				dataNodes []*proto.DataNodeInfo
102
				err       error
103
			)
104
			defer func() {
105
				errout(err)
106
			}()
107
			if diagnosis, err = client.AdminAPI().DiagnoseDataPartition(ignoreDiscardDp); err != nil {
108
				return
109
			}
110
			stdoutln("[Inactive Data nodes]:")
111
			stdoutlnf("%v", formatDataNodeDetailTableHeader())
112
			for _, addr := range diagnosis.InactiveDataNodes {
113
				var node *proto.DataNodeInfo
114
				if node, err = client.NodeAPI().GetDataNode(addr); err != nil {
115
					return
116
				}
117
				dataNodes = append(dataNodes, node)
118
			}
119
			sort.SliceStable(dataNodes, func(i, j int) bool {
120
				return dataNodes[i].ID < dataNodes[j].ID
121
			})
122
			for _, node := range dataNodes {
123
				stdoutln(formatDataNodeDetail(node, true))
124
			}
125
			stdoutln()
126
			stdoutln("[Corrupt data partitions](no leader):")
127
			stdoutln(partitionInfoTableHeader)
128
			sort.SliceStable(diagnosis.CorruptDataPartitionIDs, func(i, j int) bool {
129
				return diagnosis.CorruptDataPartitionIDs[i] < diagnosis.CorruptDataPartitionIDs[j]
130
			})
131
			for _, pid := range diagnosis.CorruptDataPartitionIDs {
132
				var partition *proto.DataPartitionInfo
133
				if partition, err = client.AdminAPI().GetDataPartition("", pid); err != nil {
134
					err = fmt.Errorf("Partition not found, err:[%v] ", err)
135
					return
136
				}
137
				stdoutln(formatDataPartitionInfoRow(partition))
138
			}
139

140
			stdoutln()
141
			stdoutln("[Partition lack replicas]:")
142
			stdoutln(partitionInfoTableHeader)
143
			sort.SliceStable(diagnosis.LackReplicaDataPartitionIDs, func(i, j int) bool {
144
				return diagnosis.LackReplicaDataPartitionIDs[i] < diagnosis.LackReplicaDataPartitionIDs[j]
145
			})
146
			for _, pid := range diagnosis.LackReplicaDataPartitionIDs {
147
				var partition *proto.DataPartitionInfo
148
				if partition, err = client.AdminAPI().GetDataPartition("", pid); err != nil {
149
					err = fmt.Errorf("Partition not found, err:[%v] ", err)
150
					return
151
				}
152
				if partition != nil {
153
					stdoutln(formatDataPartitionInfoRow(partition))
154
				}
155
			}
156

157
			stdoutln()
158
			stdoutln("[Bad data partitions(decommission not completed)]:")
159
			badPartitionTablePattern := "%-8v    %-10v    %-10v"
160
			stdoutlnf(badPartitionTablePattern, "PATH", "PARTITION ID", "REPAIR PROGRESS")
161
			for _, bdpv := range diagnosis.BadDataPartitionInfos {
162
				sort.SliceStable(bdpv.PartitionInfos, func(i, j int) bool {
163
					return bdpv.PartitionInfos[i].PartitionID < bdpv.PartitionInfos[j].PartitionID
164
				})
165
				for _, pinfo := range bdpv.PartitionInfos {
166
					percent := strconv.FormatFloat(pinfo.DecommissionRepairProgress*100, 'f', 2, 64) + "%"
167
					stdoutlnf(badPartitionTablePattern, bdpv.Path, pinfo.PartitionID, percent)
168
				}
169
			}
170

171
			stdoutln()
172
			stdoutln("[Partition has unavailable replica]:")
173
			stdoutln(badReplicaPartitionInfoTableHeader)
174
			sort.SliceStable(diagnosis.BadReplicaDataPartitionIDs, func(i, j int) bool {
175
				return diagnosis.BadReplicaDataPartitionIDs[i] < diagnosis.BadReplicaDataPartitionIDs[j]
176
			})
177

178
			for _, dpId := range diagnosis.BadReplicaDataPartitionIDs {
179
				var partition *proto.DataPartitionInfo
180
				if partition, err = client.AdminAPI().GetDataPartition("", dpId); err != nil {
181
					err = fmt.Errorf("Partition not found, err:[%v] ", err)
182
					return
183
				}
184
				if partition != nil {
185
					stdoutln(formatBadReplicaDpInfoRow(partition))
186
				}
187
			}
188

189
			if diff {
190
				stdoutln()
191
				stdoutln("[Partition with replica file count differ significantly]:")
192
				stdoutln(RepFileCountDifferInfoTableHeader)
193
				sort.SliceStable(diagnosis.RepFileCountDifferDpIDs, func(i, j int) bool {
194
					return diagnosis.RepFileCountDifferDpIDs[i] < diagnosis.RepFileCountDifferDpIDs[j]
195
				})
196
				for _, dpId := range diagnosis.RepFileCountDifferDpIDs {
197
					var partition *proto.DataPartitionInfo
198
					if partition, err = client.AdminAPI().GetDataPartition("", dpId); err != nil {
199
						err = fmt.Errorf("Partition not found, err:[%v] ", err)
200
						return
201
					}
202
					if partition != nil {
203
						stdoutln(formatReplicaFileCountDiffDpInfoRow(partition))
204
					}
205
				}
206

207
				stdoutln()
208
				stdoutln("[Partition with replica used size differ significantly]:")
209
				stdoutln(RepUsedSizeDifferInfoTableHeader)
210
				sort.SliceStable(diagnosis.RepUsedSizeDifferDpIDs, func(i, j int) bool {
211
					return diagnosis.RepUsedSizeDifferDpIDs[i] < diagnosis.RepUsedSizeDifferDpIDs[j]
212
				})
213
				for _, dpId := range diagnosis.RepUsedSizeDifferDpIDs {
214
					var partition *proto.DataPartitionInfo
215
					if partition, err = client.AdminAPI().GetDataPartition("", dpId); err != nil {
216
						err = fmt.Errorf("Partition not found, err:[%v] ", err)
217
						return
218
					}
219
					if partition != nil {
220
						stdoutln(formatReplicaSizeDiffDpInfoRow(partition))
221
					}
222
				}
223
			} else {
224
				stdoutln()
225
				stdoutlnf("%v %v", "[Number of Partition with replica file count differ significantly]:",
226
					len(diagnosis.RepUsedSizeDifferDpIDs))
227

228
				stdoutln()
229
				stdoutlnf("%v %v", "[Number of Partition with replica used size differ significantly]:",
230
					len(diagnosis.RepUsedSizeDifferDpIDs))
231
			}
232

233
			stdoutln()
234
			stdoutln("[Partition with excessive replicas]:")
235
			stdoutln(partitionInfoTableHeader)
236
			sort.SliceStable(diagnosis.ExcessReplicaDpIDs, func(i, j int) bool {
237
				return diagnosis.ExcessReplicaDpIDs[i] < diagnosis.ExcessReplicaDpIDs[j]
238
			})
239
			for _, pid := range diagnosis.ExcessReplicaDpIDs {
240
				var partition *proto.DataPartitionInfo
241
				if partition, err = client.AdminAPI().GetDataPartition("", pid); err != nil {
242
					err = fmt.Errorf("Partition not found, err:[%v] ", err)
243
					return
244
				}
245
				if partition != nil {
246
					stdoutln(formatDataPartitionInfoRow(partition))
247
				}
248
			}
249
		},
250
	}
251

252
	cmd.Flags().BoolVarP(&ignoreDiscardDp, "ignoreDiscard", "i", false, "true for not display discard dp")
253
	cmd.Flags().BoolVarP(&diff, "diff", "d", false, "true for display dp those replica file count count or size differ significantly")
254
	return cmd
255
}
256

257
func newDataPartitionDecommissionCmd(client *master.MasterClient) *cobra.Command {
258
	var raftForceDel bool
259
	var clientIDKey string
260
	cmd := &cobra.Command{
261
		Use:   CliOpDecommission + " [ADDRESS] [DATA PARTITION ID]",
262
		Short: cmdDataPartitionDecommissionShort,
263
		Args:  cobra.MinimumNArgs(2),
264
		Run: func(cmd *cobra.Command, args []string) {
265
			var (
266
				err         error
267
				partitionID uint64
268
			)
269
			defer func() {
270
				errout(err)
271
			}()
272
			address := args[0]
273
			partitionID, err = strconv.ParseUint(args[1], 10, 64)
274
			if err != nil {
275
				return
276
			}
277
			if err := client.AdminAPI().DecommissionDataPartition(partitionID, address, raftForceDel, clientIDKey); err != nil {
278
				stdout(fmt.Sprintf("failed:err(%v)\n", err.Error()))
279
				return
280
			}
281
			stdoutln("Decommission data partition successfully")
282
		},
283
		ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
284
			if len(args) != 0 {
285
				return nil, cobra.ShellCompDirectiveNoFileComp
286
			}
287
			return validDataNodes(client, toComplete), cobra.ShellCompDirectiveNoFileComp
288
		},
289
	}
290
	cmd.Flags().BoolVarP(&raftForceDel, "raftForceDel", "r", false, "true for raftForceDel")
291
	cmd.Flags().StringVar(&clientIDKey, CliFlagClientIDKey, client.ClientIDKey(), CliUsageClientIDKey)
292
	return cmd
293
}
294

295
func newDataPartitionReplicateCmd(client *master.MasterClient) *cobra.Command {
296
	var clientIDKey string
297
	cmd := &cobra.Command{
298
		Use:   CliOpReplicate + " [ADDRESS] [DATA PARTITION ID]",
299
		Short: cmdDataPartitionReplicateShort,
300
		Args:  cobra.MinimumNArgs(2),
301
		Run: func(cmd *cobra.Command, args []string) {
302
			var (
303
				err         error
304
				partitionID uint64
305
			)
306
			defer func() {
307
				errout(err)
308
			}()
309
			address := args[0]
310
			if partitionID, err = strconv.ParseUint(args[1], 10, 64); err != nil {
311
				return
312
			}
313
			if err = client.AdminAPI().AddDataReplica(partitionID, address, clientIDKey); err != nil {
314
				return
315
			}
316
			stdoutln("Add replication successfully")
317
		},
318
		ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
319
			if len(args) != 0 {
320
				return nil, cobra.ShellCompDirectiveNoFileComp
321
			}
322
			return validDataNodes(client, toComplete), cobra.ShellCompDirectiveNoFileComp
323
		},
324
	}
325
	cmd.Flags().StringVar(&clientIDKey, CliFlagClientIDKey, client.ClientIDKey(), CliUsageClientIDKey)
326
	return cmd
327
}
328

329
func newDataPartitionDeleteReplicaCmd(client *master.MasterClient) *cobra.Command {
330
	var clientIDKey string
331
	cmd := &cobra.Command{
332
		Use:   CliOpDelReplica + " [ADDRESS] [DATA PARTITION ID]",
333
		Short: cmdDataPartitionDeleteReplicaShort,
334
		Args:  cobra.MinimumNArgs(2),
335
		Run: func(cmd *cobra.Command, args []string) {
336
			var (
337
				err         error
338
				partitionID uint64
339
			)
340
			defer func() {
341
				errout(err)
342
			}()
343
			address := args[0]
344
			if partitionID, err = strconv.ParseUint(args[1], 10, 64); err != nil {
345
				return
346
			}
347
			if err = client.AdminAPI().DeleteDataReplica(partitionID, address, clientIDKey); err != nil {
348
				return
349
			}
350
			stdoutln("Delete replication successfully")
351
		},
352
		ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
353
			if len(args) != 0 {
354
				return nil, cobra.ShellCompDirectiveNoFileComp
355
			}
356
			return validDataNodes(client, toComplete), cobra.ShellCompDirectiveNoFileComp
357
		},
358
	}
359
	cmd.Flags().StringVar(&clientIDKey, CliFlagClientIDKey, client.ClientIDKey(), CliUsageClientIDKey)
360
	return cmd
361
}
362

363
func newDataPartitionGetDiscardCmd(client *master.MasterClient) *cobra.Command {
364
	cmd := &cobra.Command{
365
		Use:   CliOpGetDiscard,
366
		Short: cmdDataPartitionGetDiscardShort,
367
		Run: func(cmd *cobra.Command, args []string) {
368
			var (
369
				infos *proto.DiscardDataPartitionInfos
370
				err   error
371
			)
372

373
			defer func() {
374
				errout(err)
375
			}()
376

377
			if infos, err = client.AdminAPI().GetDiscardDataPartition(); err != nil {
378
				return
379
			}
380

381
			stdoutln()
382
			stdoutln("[Discard Partitions]:")
383
			stdoutln(partitionInfoTableHeader)
384
			sort.SliceStable(infos.DiscardDps, func(i, j int) bool {
385
				return infos.DiscardDps[i].PartitionID < infos.DiscardDps[j].PartitionID
386
			})
387
			for _, partition := range infos.DiscardDps {
388
				stdoutln(formatDataPartitionInfoRow(&partition))
389
			}
390
		},
391
	}
392
	return cmd
393
}
394

395
func newDataPartitionSetDiscardCmd(client *master.MasterClient) *cobra.Command {
396
	cmd := &cobra.Command{
397
		Use:   CliOpSetDiscard + " [DATA PARTITION ID] [DISCARD]",
398
		Short: cmdDataPartitionSetDiscardShort,
399
		Args:  cobra.MinimumNArgs(2),
400
		Run: func(cmd *cobra.Command, args []string) {
401
			var (
402
				err     error
403
				dpId    uint64
404
				discard bool
405
			)
406

407
			defer func() {
408
				errout(err)
409
			}()
410

411
			dpId, err = strconv.ParseUint(args[0], 10, 64)
412
			if err != nil {
413
				return
414
			}
415
			discard, err = strconv.ParseBool(args[1])
416
			if err != nil {
417
				return
418
			}
419
			if err = client.AdminAPI().SetDataPartitionDiscard(dpId, discard); err != nil {
420
				return
421
			}
422
			stdout("Discard %v successful", dpId)
423
		},
424
	}
425
	return cmd
426
}
427

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.