2
# The following script enables, Detecting, Reporting and Fixing
3
# anomalies in quota accounting. Run this script with -h option
7
Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
8
This file is part of GlusterFS.
10
This file is licensed to you under your choice of the GNU Lesser
11
General Public License, version 3 or any later version (LGPLv3 or
12
later), or the GNU General Public License, version 2 (GPLv2), in all
13
cases as published by the Free Software Foundation.
15
from __future__ import print_function
38
QUOTA_SIZE_MISMATCH = 2
45
The script attempts to find any gluster accounting issues in the
46
filesystem at the given subtree. The script crawls the given
47
subdirectory tree doing a stat for all files and compares the
48
size reported by gluster quota with the size reported by stat
49
calls. Any mismatch is reported. In addition integrity of marker
53
def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None):
54
if log_type == QUOTA_VERBOSE:
55
print('%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path, xattr_dict, stbuf))
56
elif log_type == QUOTA_META_ABSENT:
57
print('%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path, xattr_dict))
58
elif log_type == QUOTA_SIZE_MISMATCH:
60
if dir_size is not None:
61
print('%24s %60s %12s %12s' % ("Size Mismatch", path,
62
xattr_dict, dir_size))
64
print('%-24s %-60s %-12s %-12s' % ("Size Mismatch", path, xattr_dict,
67
def size_differs_lot(s1, s2):
69
There could be minor accounting differences between the stat based
70
accounting and gluster accounting. To avoid these from throwing lot
71
of false positives in our logs. using a threshold of 1M for now.
72
TODO: For a deeply nested directory, at higher levels in hierarchy
73
differences may not be significant, hence this check needs to be improved.
80
def fix_hardlink_accounting(curr_dict, accounted_dict, curr_size):
82
Hard links are messy.. we have to account them for their parent
83
directory. But, stop accounting at the most common ancestor.
85
say we have 3 hardlinks : /d1/d2/h1, /d1/d3/h2 and /d1/h3
87
suppose we encounter the hard links h1 first , then h2 and then h3.
88
while accounting for h1, we account the size until root(d2->d1->/)
89
while accounting for h2, we need to account only till d3. (as d1
90
and / are accounted for this inode).
91
while accounting for h3 we should not account at all.. as all
92
its ancestors are already accounted for same inode.
94
curr_dict : dict of hardlinks that were seen and
95
accounted by the current iteration.
96
accounted_dict : dict of hardlinks that has already been
99
size : size of the object as accounted by the
103
curr_size : size reduced by hardlink sizes for those
104
hardlinks that has already been accounted
106
Also delete the duplicate link from curr_dict.
109
dual_accounted_links = set(curr_dict.keys()) & set(accounted_dict.keys())
110
for link in dual_accounted_links:
111
curr_size = curr_size - curr_dict[link]
116
def fix_xattr(file_name, mark_dirty):
123
print("MARKING DIRTY: " + file_name)
124
out = subprocess.check_output (["/usr/bin/setfattr", "-n",
125
"trusted.glusterfs.quota.dirty",
126
"-v", IS_DIRTY, file_name])
127
rel_path = os.path.relpath(file_name, brick_path)
128
print("stat on " + mnt_path + "/" + rel_path)
129
stbuf = os.lstat(mnt_path + "/" + rel_path)
133
def get_quota_xattr_brick(dpath):
134
out = subprocess.check_output (["/usr/bin/getfattr", "--no-dereference",
135
"-d", "-m.", "-e", "hex", dpath])
136
pairs = out.splitlines()
139
Sample output to be parsed:
140
[root@dhcp35-100 mnt]# getfattr -d -m. -e hex /export/b1/B0/d14/d13/
141
# file: export/b1/B0/d14/d13/
142
security.selinux=0x756e636f6e66696e65645f753a6f626a6563745f723a7573725f743a733000
143
trusted.gfid=0xbae5e0d2d05043de9fd851d91ecf63e8
144
trusted.glusterfs.dht=0x000000010000000000000000ffffffff
145
trusted.glusterfs.dht.mds=0x00000000
146
trusted.glusterfs.quota.6a7675a3-b85a-40c5-830b-de9229d702ce.contri.39=0x00000000000000000000000000000000000000000000000e
147
trusted.glusterfs.quota.dirty=0x3000
148
trusted.glusterfs.quota.size.39=0x00000000000000000000000000000000000000000000000e
152
xattr_dict dictionary holds quota related xattrs
157
xattr_dict['parents'] = {}
159
for xattr in pairs[1:]:
160
xattr = xattr.decode("utf-8")
161
xattr_key = xattr.split("=")[0]
163
# skip any empty lines
165
elif not re.search("quota", xattr_key):
166
# skip all non quota xattr.
169
xattr_value = xattr.split("=")[1]
170
if re.search("contri", xattr_key):
172
xattr_version = xattr_key.split(".")[5]
173
if 'version' not in xattr_dict:
174
xattr_dict['version'] = xattr_version
176
if xattr_version != xattr_dict['version']:
177
print("Multiple xattr version found")
180
cur_parent = xattr_key.split(".")[3]
181
if cur_parent not in xattr_dict['parents']:
182
xattr_dict['parents'][cur_parent] = {}
184
contri_dict = xattr_dict['parents'][cur_parent]
185
if len(xattr_value) == 34:
186
# 34 bytes implies file contri xattr
187
# contri format =0x< 16bytes file size><16bytes file count>
188
# size is obtained in iatt, file count = 1, dir count=0
189
contri_dict['contri_size'] = int(xattr_value[2:18], 16)
190
contri_dict['contri_file_count'] = int(xattr_value[18:34], 16)
191
contri_dict['contri_dir_count'] = 0
193
# This is a directory contri.
194
contri_dict['contri_size'] = int(xattr_value[2:18], 16)
195
contri_dict['contri_file_count'] = int(xattr_value[18:34], 16)
196
contri_dict['contri_dir_count'] = int(xattr_value[34:], 16)
198
elif re.search("size", xattr_key):
199
xattr_dict['size'] = int(xattr_value[2:18], 16)
200
xattr_dict['file_count'] = int(xattr_value[18:34], 16)
201
xattr_dict['dir_count'] = int(xattr_value[34:], 16)
202
elif re.search("dirty", xattr_key):
203
if xattr_value == IS_CLEAN:
204
xattr_dict['dirty'] = False
205
elif xattr_value == IS_DIRTY:
206
xattr_dict['dirty'] = True
207
elif re.search("limit_objects", xattr_key):
208
xattr_dict['limit_objects'] = int(xattr_value[2:18], 16)
209
elif re.search("limit_set", xattr_key):
210
xattr_dict['limit_set'] = int(xattr_value[2:18], 16)
214
def verify_file_xattr(path, stbuf = None):
220
stbuf = os.lstat(path)
222
xattr_dict = get_quota_xattr_brick(path)
224
for parent in xattr_dict['parents']:
225
contri_dict = xattr_dict['parents'][parent]
227
if 'contri_size' not in contri_dict or \
228
'contri_file_count' not in contri_dict or \
229
'contri_dir_count' not in contri_dict:
230
print_msg(QUOTA_META_ABSENT, path, xattr_dict, stbuf)
231
fix_xattr(path, False)
233
elif size_differs_lot(contri_dict['contri_size'], stbuf.st_size):
234
print_msg(QUOTA_SIZE_MISMATCH, path, xattr_dict, stbuf)
235
fix_xattr(path, False)
238
if verbose_mode is True:
239
print_msg(QUOTA_VERBOSE, path, xattr_dict, stbuf)
242
def verify_dir_xattr(path, dir_size):
246
xattr_dict = get_quota_xattr_brick(path)
248
stbuf = os.lstat(path)
250
for parent in xattr_dict['parents']:
251
contri_dict = xattr_dict['parents'][parent]
253
if 'size' not in xattr_dict or 'contri_size' not in contri_dict:
254
print_msg(QUOTA_META_ABSENT, path)
255
fix_xattr(path, True)
257
elif size_differs_lot(dir_size, xattr_dict['size']) or \
258
size_differs_lot(contri_dict['contri_size'], xattr_dict['size']):
259
print_msg(QUOTA_SIZE_MISMATCH, path, xattr_dict, stbuf, dir_size)
260
fix_xattr(path, True)
263
if verbose_mode is True:
264
print_msg("VERBOSE", path, xattr_dict, stbuf, dir_size)
267
def walktree(t_dir, hard_link_dict):
268
'''recursively descend the directory tree rooted at dir,
270
t_dir : directory to walk over.
271
hard_link_dict : dict of inodes with multiple hard_links under t_dir
276
for entry in os.listdir(t_dir):
277
pathname = os.path.join(t_dir, entry)
278
stbuf = os.lstat(pathname)
279
if S_ISDIR(stbuf.st_mode):
280
# It's a directory, recurse into it
281
if entry == '.glusterfs':
282
print("skipping " + pathname)
284
descendent_hardlinks = {}
285
subtree_size = walktree(pathname, descendent_hardlinks)
287
subtree_size = fix_hardlink_accounting(descendent_hardlinks,
291
aggr_size[t_dir] = aggr_size[t_dir] + subtree_size
293
elif S_ISREG(stbuf.st_mode) or S_ISLNK(stbuf.st_mode):
294
# Even a symbolic link file may have multiple hardlinks.
296
file_size = stbuf.st_size
297
if stbuf.st_nlink > 2:
298
# send a single element dict to check if file is accounted.
299
file_size = fix_hardlink_accounting({stbuf.st_ino:stbuf.st_size},
304
print_msg("HARD_LINK (skipped)", pathname, "",
307
print_msg("HARD_LINK (accounted)", pathname, "",
309
hard_link_dict[stbuf.st_ino] = stbuf.st_size
311
if t_dir in aggr_size:
312
aggr_size[t_dir] = aggr_size[t_dir] + file_size
314
aggr_size[t_dir] = file_size
315
verify_file_xattr(pathname, stbuf)
318
# Unknown file type, print a message
319
print('Skipping %s, due to file mode' % (pathname))
321
if t_dir not in aggr_size:
324
verify_dir_xattr(t_dir, aggr_size[t_dir])
325
# du also accounts for t_directory sizes
326
# aggr_size[t_dir] += 4096
329
ret = aggr_size[t_dir]
334
if __name__ == '__main__':
336
parser = argparse.ArgumentParser(description='Diagnose quota accounting issues.', epilog=epilog_msg)
337
parser.add_argument('brick_path', nargs=1,
338
help='The brick path (or any descendent sub-directory of brick path)',
340
parser.add_argument('--full-logs', dest='verbose', action='store_true',
342
log all the xattr values and stat values reported
343
for analysis. [CAUTION: This can give lot of output
344
depending on FS depth. So one has to make sure enough
345
disk space exists if redirecting to file]
348
parser.add_argument('--fix-issues', metavar='mount_path', dest='mnt', action='store',
350
fix accounting issues where the xattr values disagree
351
with stat sizes reported by gluster. A mount is also
352
required for this option to be used.
353
[CAUTION: This will directly modify backend xattr]
356
parser.add_argument('--sub-dir', metavar='sub_dir', dest='sub_dir', action='store',
358
limit the crawling and accounting verification/correction
359
to a specific subdirectory.
363
args = parser.parse_args()
364
verbose_mode = args.verbose
365
brick_path = args.brick_path[0]
366
sub_dir = args.sub_dir
369
if sub_dir is not None:
370
walktree(os.path.join(brick_path, sub_dir), hard_link_dict)
372
walktree(brick_path, hard_link_dict)
374
print("Files verified : " + str(file_count))
375
print("Directories verified : " + str(dir_count))
376
if mnt_path is not None:
377
print("Objects Fixed : " + str(obj_fix_count))