glusterfs

Форк
0
2232 строки · 56.8 Кб
1
/*
2
  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
3
  This file is part of GlusterFS.
4

5
  This file is licensed to you under your choice of the GNU Lesser
6
  General Public License, version 3 or any later version (LGPLv3 or
7
  later), or the GNU General Public License, version 2 (GPLv2), in all
8
  cases as published by the Free Software Foundation.
9
*/
10

11
#include <math.h>
12
#include <glusterfs/glusterfs.h>
13
#include <glusterfs/logging.h>
14
#include <glusterfs/dict.h>
15
#include "io-cache.h"
16
#include "ioc-mem-types.h"
17
#include <glusterfs/statedump.h>
18
#include <assert.h>
19
#include <sys/time.h>
20
#include "io-cache-messages.h"
21
int ioc_log2_page_size;
22

23
uint32_t
24
ioc_get_priority(ioc_table_t *table, const char *path);
25

26
struct volume_options options[];
27

28
static uint32_t
29
ioc_hashfn(void *data, int len)
30
{
31
    off_t offset;
32

33
    offset = *(off_t *)data;
34

35
    return (offset >> ioc_log2_page_size);
36
}
37

38
/* TODO: This function is not used, uncomment when we find a
39
         usage for this function.
40

41
static ioc_inode_t *
42
ioc_inode_reupdate (ioc_inode_t *ioc_inode)
43
{
44
        ioc_table_t *table = NULL;
45

46
        table = ioc_inode->table;
47

48
        list_add_tail (&ioc_inode->inode_lru,
49
                       &table->inode_lru[ioc_inode->weight]);
50

51
        return ioc_inode;
52
}
53

54

55
static ioc_inode_t *
56
ioc_get_inode (dict_t *dict, char *name)
57
{
58
        ioc_inode_t *ioc_inode      = NULL;
59
        data_t      *ioc_inode_data = NULL;
60
        ioc_table_t *table          = NULL;
61

62
        ioc_inode_data = dict_get (dict, name);
63
        if (ioc_inode_data) {
64
                ioc_inode = data_to_ptr (ioc_inode_data);
65
                table = ioc_inode->table;
66

67
                ioc_table_lock (table);
68
                {
69
                        if (list_empty (&ioc_inode->inode_lru)) {
70
                                ioc_inode = ioc_inode_reupdate (ioc_inode);
71
                        }
72
                }
73
                ioc_table_unlock (table);
74
        }
75

76
        return ioc_inode;
77
}
78
*/
79

80
int
81
ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode,
82
                 struct iovec *vector, int32_t count, int op_ret, off_t offset)
83
{
84
    size_t size = 0;
85
    off_t rounded_offset = 0, rounded_end = 0, trav_offset = 0,
86
          write_offset = 0;
87
    off_t page_offset = 0, page_end = 0;
88
    ioc_page_t *trav = NULL;
89

90
    size = iov_length(vector, count);
91
    size = min(size, op_ret);
92

93
    rounded_offset = gf_floor(offset, ioc_inode->table->page_size);
94
    rounded_end = gf_roof(offset + size, ioc_inode->table->page_size);
95

96
    trav_offset = rounded_offset;
97
    ioc_inode_lock(ioc_inode);
98
    {
99
        while (trav_offset < rounded_end) {
100
            trav = __ioc_page_get(ioc_inode, trav_offset);
101
            if (trav && trav->ready) {
102
                if (trav_offset == rounded_offset)
103
                    page_offset = offset - rounded_offset;
104
                else
105
                    page_offset = 0;
106

107
                if ((trav_offset + ioc_inode->table->page_size) >=
108
                    rounded_end) {
109
                    page_end = trav->size - (rounded_end - (offset + size));
110
                } else {
111
                    page_end = trav->size;
112
                }
113

114
                iov_range_copy(trav->vector, trav->count, page_offset, vector,
115
                               count, write_offset, page_end - page_offset);
116
            } else if (trav) {
117
                if (!trav->waitq)
118
                    ioc_inode->table->cache_used -= __ioc_page_destroy(trav);
119
            }
120

121
            if (trav_offset == rounded_offset)
122
                write_offset += (ioc_inode->table->page_size -
123
                                 (offset - rounded_offset));
124
            else
125
                write_offset += ioc_inode->table->page_size;
126

127
            trav_offset += ioc_inode->table->page_size;
128
        }
129
    }
130
    ioc_inode_unlock(ioc_inode);
131

132
    return 0;
133
}
134

135
static gf_boolean_t
136
ioc_inode_need_revalidate(ioc_inode_t *ioc_inode)
137
{
138
    ioc_table_t *table = NULL;
139

140
    GF_ASSERT(ioc_inode);
141
    table = ioc_inode->table;
142
    GF_ASSERT(table);
143

144
    return (gf_time() - ioc_inode->cache.last_revalidate >=
145
            table->cache_timeout);
146
}
147

148
/*
149
 * __ioc_inode_flush - flush all the cached pages of the given inode
150
 *
151
 * @ioc_inode:
152
 *
153
 * assumes lock is held
154
 */
155
int64_t
156
__ioc_inode_flush(ioc_inode_t *ioc_inode)
157
{
158
    ioc_page_t *curr = NULL, *next = NULL;
159
    int64_t destroy_size = 0;
160
    int64_t ret = 0;
161

162
    list_for_each_entry_safe(curr, next, &ioc_inode->cache.page_lru, page_lru)
163
    {
164
        ret = __ioc_page_destroy(curr);
165

166
        if (ret != -1)
167
            destroy_size += ret;
168
    }
169

170
    return destroy_size;
171
}
172

173
void
174
ioc_inode_flush(ioc_inode_t *ioc_inode)
175
{
176
    int64_t destroy_size = 0;
177

178
    ioc_inode_lock(ioc_inode);
179
    {
180
        destroy_size = __ioc_inode_flush(ioc_inode);
181
    }
182
    ioc_inode_unlock(ioc_inode);
183

184
    if (destroy_size) {
185
        ioc_table_lock(ioc_inode->table);
186
        {
187
            ioc_inode->table->cache_used -= destroy_size;
188
        }
189
        ioc_table_unlock(ioc_inode->table);
190
    }
191

192
    return;
193
}
194

195
int32_t
196
ioc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
197
                int32_t op_ret, int32_t op_errno, struct iatt *preop,
198
                struct iatt *postop, dict_t *xdata)
199
{
200
    STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop, postop, xdata);
201
    return 0;
202
}
203

204
int32_t
205
ioc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
206
            int32_t valid, dict_t *xdata)
207
{
208
    uint64_t ioc_inode = 0;
209

210
    inode_ctx_get(loc->inode, this, &ioc_inode);
211

212
    if (ioc_inode &&
213
        ((valid & GF_SET_ATTR_ATIME) || (valid & GF_SET_ATTR_MTIME)))
214
        ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
215

216
    STACK_WIND(frame, ioc_setattr_cbk, FIRST_CHILD(this),
217
               FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
218

219
    return 0;
220
}
221

222
int32_t
223
ioc_inode_update(xlator_t *this, inode_t *inode, char *path, struct iatt *iabuf)
224
{
225
    ioc_table_t *table = NULL;
226
    uint64_t tmp_ioc_inode = 0;
227
    ioc_inode_t *ioc_inode = NULL;
228
    uint32_t weight = 0xffffffff;
229
    gf_boolean_t cache_still_valid = _gf_false;
230

231
    if (!this || !inode)
232
        goto out;
233

234
    table = this->private;
235

236
    LOCK(&inode->lock);
237
    {
238
        (void)__inode_ctx_get(inode, this, &tmp_ioc_inode);
239
        ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
240

241
        if (!ioc_inode) {
242
            weight = ioc_get_priority(table, path);
243

244
            ioc_inode = ioc_inode_create(table, inode, weight);
245

246
            (void)__inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode);
247
        }
248
    }
249
    UNLOCK(&inode->lock);
250

251
    ioc_inode_lock(ioc_inode);
252
    {
253
        if (ioc_inode->cache.mtime == 0) {
254
            ioc_inode->cache.mtime = iabuf->ia_mtime;
255
            ioc_inode->cache.mtime_nsec = iabuf->ia_mtime_nsec;
256
        }
257

258
        ioc_inode->ia_size = iabuf->ia_size;
259
    }
260
    ioc_inode_unlock(ioc_inode);
261

262
    cache_still_valid = ioc_cache_still_valid(ioc_inode, iabuf);
263

264
    if (!cache_still_valid) {
265
        ioc_inode_flush(ioc_inode);
266
    }
267

268
    ioc_table_lock(ioc_inode->table);
269
    {
270
        list_move_tail(&ioc_inode->inode_lru,
271
                       &table->inode_lru[ioc_inode->weight]);
272
    }
273
    ioc_table_unlock(ioc_inode->table);
274

275
out:
276
    return 0;
277
}
278

279
int32_t
280
ioc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
281
               int32_t op_ret, int32_t op_errno, inode_t *inode,
282
               struct iatt *stbuf, dict_t *xdata, struct iatt *postparent)
283
{
284
    ioc_local_t *local = NULL;
285

286
    if (op_ret != 0)
287
        goto out;
288

289
    local = frame->local;
290
    if (local == NULL) {
291
        op_ret = -1;
292
        op_errno = EINVAL;
293
        goto out;
294
    }
295

296
    if (!this || !this->private) {
297
        op_ret = -1;
298
        op_errno = EINVAL;
299
        goto out;
300
    }
301

302
    ioc_inode_update(this, inode, (char *)local->file_loc.path, stbuf);
303

304
out:
305
    if (frame->local != NULL) {
306
        local = frame->local;
307
        loc_wipe(&local->file_loc);
308
    }
309

310
    STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata,
311
                        postparent);
312
    return 0;
313
}
314

315
int32_t
316
ioc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
317
{
318
    ioc_local_t *local = NULL;
319
    int32_t op_errno = -1, ret = -1;
320

321
    local = mem_get0(this->local_pool);
322
    if (local == NULL) {
323
        op_errno = ENOMEM;
324
        gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
325
        goto unwind;
326
    }
327

328
    ret = loc_copy(&local->file_loc, loc);
329
    if (ret != 0) {
330
        op_errno = ENOMEM;
331
        gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
332
        goto unwind;
333
    }
334

335
    frame->local = local;
336

337
    STACK_WIND(frame, ioc_lookup_cbk, FIRST_CHILD(this),
338
               FIRST_CHILD(this)->fops->lookup, loc, xdata);
339

340
    return 0;
341

342
unwind:
343
    if (local != NULL) {
344
        loc_wipe(&local->file_loc);
345
        mem_put(local);
346
    }
347

348
    STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
349

350
    return 0;
351
}
352

353
/*
354
 * ioc_forget -
355
 *
356
 * @frame:
357
 * @this:
358
 * @inode:
359
 *
360
 */
361
int32_t
362
ioc_forget(xlator_t *this, inode_t *inode)
363
{
364
    uint64_t ioc_inode = 0;
365

366
    inode_ctx_get(inode, this, &ioc_inode);
367

368
    if (ioc_inode)
369
        ioc_inode_destroy((ioc_inode_t *)(long)ioc_inode);
370

371
    return 0;
372
}
373

374
static int32_t
375
ioc_invalidate(xlator_t *this, inode_t *inode)
376
{
377
    uint64_t ioc_inode = 0;
378

379
    inode_ctx_get(inode, this, &ioc_inode);
380

381
    if (ioc_inode)
382
        ioc_inode_flush((ioc_inode_t *)(uintptr_t)ioc_inode);
383

384
    return 0;
385
}
386

387
/*
388
 * ioc_cache_validate_cbk -
389
 *
390
 * @frame:
391
 * @cookie:
392
 * @this:
393
 * @op_ret:
394
 * @op_errno:
395
 * @buf
396
 *
397
 */
398
int32_t
399
ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
400
                       int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
401
                       dict_t *xdata)
402
{
403
    ioc_local_t *local = NULL;
404
    ioc_inode_t *ioc_inode = NULL;
405
    size_t destroy_size = 0;
406
    struct iatt *local_stbuf = NULL;
407

408
    local = frame->local;
409
    ioc_inode = local->inode;
410
    local_stbuf = stbuf;
411

412
    if ((op_ret == -1) ||
413
        ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
414
        gf_msg_debug(ioc_inode->table->xl->name, 0,
415
                     "cache for inode(%p) is invalid. flushing all pages",
416
                     ioc_inode);
417
        /* NOTE: only pages with no waiting frames are flushed by
418
         * ioc_inode_flush. page_fault will be generated for all
419
         * the pages which have waiting frames by ioc_inode_wakeup()
420
         */
421
        ioc_inode_lock(ioc_inode);
422
        {
423
            destroy_size = __ioc_inode_flush(ioc_inode);
424
            if (op_ret >= 0) {
425
                ioc_inode->cache.mtime = stbuf->ia_mtime;
426
                ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
427
            }
428
        }
429
        ioc_inode_unlock(ioc_inode);
430
        local_stbuf = NULL;
431
    }
432

433
    if (destroy_size) {
434
        ioc_table_lock(ioc_inode->table);
435
        {
436
            ioc_inode->table->cache_used -= destroy_size;
437
        }
438
        ioc_table_unlock(ioc_inode->table);
439
    }
440

441
    if (op_ret < 0)
442
        local_stbuf = NULL;
443

444
    ioc_inode_lock(ioc_inode);
445
    {
446
        ioc_inode->cache.last_revalidate = gf_time();
447
    }
448
    ioc_inode_unlock(ioc_inode);
449

450
    ioc_inode_wakeup(frame, ioc_inode, local_stbuf);
451

452
    /* any page-fault initiated by ioc_inode_wakeup() will have its own
453
     * fd_ref on fd, safe to unref validate frame's private copy
454
     */
455
    fd_unref(local->fd);
456
    dict_unref(local->xattr_req);
457

458
    STACK_DESTROY(frame->root);
459

460
    return 0;
461
}
462

463
int32_t
464
ioc_wait_on_inode(ioc_inode_t *ioc_inode, ioc_page_t *page)
465
{
466
    ioc_waitq_t *waiter = NULL, *trav = NULL;
467
    uint32_t page_found = 0;
468
    int32_t ret = 0;
469

470
    trav = ioc_inode->waitq;
471

472
    while (trav) {
473
        if (trav->data == page) {
474
            page_found = 1;
475
            break;
476
        }
477
        trav = trav->next;
478
    }
479

480
    if (!page_found) {
481
        waiter = GF_CALLOC(1, sizeof(ioc_waitq_t), gf_ioc_mt_ioc_waitq_t);
482
        if (waiter == NULL) {
483
            gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, ENOMEM,
484
                    IO_CACHE_MSG_NO_MEMORY, NULL);
485
            ret = -ENOMEM;
486
            goto out;
487
        }
488

489
        waiter->data = page;
490
        waiter->next = ioc_inode->waitq;
491
        ioc_inode->waitq = waiter;
492
    }
493

494
out:
495
    return ret;
496
}
497

498
/*
499
 * ioc_cache_validate -
500
 *
501
 * @frame:
502
 * @ioc_inode:
503
 * @fd:
504
 *
505
 */
506
int32_t
507
ioc_cache_validate(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
508
                   ioc_page_t *page)
509
{
510
    call_frame_t *validate_frame = NULL;
511
    ioc_local_t *validate_local = NULL;
512
    ioc_local_t *local = NULL;
513
    int32_t ret = 0;
514

515
    local = frame->local;
516
    validate_local = mem_get0(THIS->local_pool);
517
    if (validate_local == NULL) {
518
        ret = -1;
519
        local->op_ret = -1;
520
        local->op_errno = ENOMEM;
521
        gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0,
522
                IO_CACHE_MSG_NO_MEMORY, NULL);
523
        goto out;
524
    }
525

526
    validate_frame = copy_frame(frame);
527
    if (validate_frame == NULL) {
528
        ret = -1;
529
        local->op_ret = -1;
530
        local->op_errno = ENOMEM;
531
        mem_put(validate_local);
532
        gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0,
533
                IO_CACHE_MSG_NO_MEMORY, NULL);
534
        goto out;
535
    }
536

537
    validate_local->fd = fd_ref(fd);
538
    validate_local->inode = ioc_inode;
539
    if (local && local->xattr_req)
540
        validate_local->xattr_req = dict_ref(local->xattr_req);
541
    validate_frame->local = validate_local;
542

543
    STACK_WIND(validate_frame, ioc_cache_validate_cbk, FIRST_CHILD(frame->this),
544
               FIRST_CHILD(frame->this)->fops->fstat, fd,
545
               validate_local->xattr_req);
546

547
out:
548
    return ret;
549
}
550

551
static uint32_t
552
is_match(const char *path, const char *pattern)
553
{
554
    int32_t ret = 0;
555

556
    ret = fnmatch(pattern, path, FNM_NOESCAPE);
557

558
    return (ret == 0);
559
}
560

561
uint32_t
562
ioc_get_priority(ioc_table_t *table, const char *path)
563
{
564
    uint32_t priority = 1;
565
    struct ioc_priority *curr = NULL;
566

567
    if (list_empty(&table->priority_list) || !path)
568
        return priority;
569

570
    priority = 0;
571
    list_for_each_entry(curr, &table->priority_list, list)
572
    {
573
        if (is_match(path, curr->pattern))
574
            priority = curr->priority;
575
    }
576

577
    return priority;
578
}
579

580
/*
581
 * ioc_open_cbk - open callback for io cache
582
 *
583
 * @frame: call frame
584
 * @cookie:
585
 * @this:
586
 * @op_ret:
587
 * @op_errno:
588
 * @fd:
589
 *
590
 */
591
int32_t
592
ioc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
593
             int32_t op_errno, fd_t *fd, dict_t *xdata)
594
{
595
    uint64_t tmp_ioc_inode = 0;
596
    ioc_local_t *local = NULL;
597
    ioc_table_t *table = NULL;
598
    ioc_inode_t *ioc_inode = NULL;
599

600
    local = frame->local;
601
    if (!this || !this->private) {
602
        op_ret = -1;
603
        op_errno = EINVAL;
604
        goto out;
605
    }
606

607
    table = this->private;
608

609
    if (op_ret != -1) {
610
        inode_ctx_get(fd->inode, this, &tmp_ioc_inode);
611
        ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
612

613
        // TODO: see why inode context is NULL and handle it.
614
        if (!ioc_inode) {
615
            gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
616
                    IO_CACHE_MSG_ENFORCEMENT_FAILED, "inode-gfid=%s",
617
                    uuid_utoa(fd->inode->gfid), NULL);
618
            goto out;
619
        }
620

621
        ioc_table_lock(ioc_inode->table);
622
        {
623
            list_move_tail(&ioc_inode->inode_lru,
624
                           &table->inode_lru[ioc_inode->weight]);
625
        }
626
        ioc_table_unlock(ioc_inode->table);
627

628
        ioc_inode_lock(ioc_inode);
629
        {
630
            if ((table->min_file_size > ioc_inode->ia_size) ||
631
                ((table->max_file_size > 0) &&
632
                 (table->max_file_size < ioc_inode->ia_size))) {
633
                fd_ctx_set(fd, this, 1);
634
            }
635
        }
636
        ioc_inode_unlock(ioc_inode);
637

638
        /* If O_DIRECT open, we disable caching on it */
639
        if ((local->flags & O_DIRECT)) {
640
            /* O_DIRECT is only for one fd, not the inode
641
             * as a whole
642
             */
643
            fd_ctx_set(fd, this, 1);
644
        }
645
    }
646

647
out:
648
    mem_put(local);
649
    frame->local = NULL;
650

651
    STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
652

653
    return 0;
654
}
655

656
/*
657
 * ioc_create_cbk - create callback for io cache
658
 *
659
 * @frame: call frame
660
 * @cookie:
661
 * @this:
662
 * @op_ret:
663
 * @op_errno:
664
 * @fd:
665
 * @inode:
666
 * @buf:
667
 *
668
 */
669
int32_t
670
ioc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
671
               int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
672
               struct iatt *buf, struct iatt *preparent,
673
               struct iatt *postparent, dict_t *xdata)
674
{
675
    ioc_local_t *local = NULL;
676
    ioc_table_t *table = NULL;
677
    ioc_inode_t *ioc_inode = NULL;
678
    uint32_t weight = 0xffffffff;
679
    const char *path = NULL;
680
    int ret = -1;
681

682
    local = frame->local;
683
    if (!this || !this->private) {
684
        op_ret = -1;
685
        op_errno = EINVAL;
686
        goto out;
687
    }
688

689
    table = this->private;
690
    path = local->file_loc.path;
691

692
    if (op_ret != -1) {
693
        /* assign weight */
694
        weight = ioc_get_priority(table, path);
695

696
        ioc_inode = ioc_inode_create(table, inode, weight);
697

698
        ioc_inode_lock(ioc_inode);
699
        {
700
            ioc_inode->cache.mtime = buf->ia_mtime;
701
            ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
702
            ioc_inode->ia_size = buf->ia_size;
703

704
            if ((table->min_file_size > ioc_inode->ia_size) ||
705
                ((table->max_file_size > 0) &&
706
                 (table->max_file_size < ioc_inode->ia_size))) {
707
                ret = fd_ctx_set(fd, this, 1);
708
                if (ret)
709
                    gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
710
                            IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
711
                            local->file_loc.path, NULL);
712
            }
713
        }
714
        ioc_inode_unlock(ioc_inode);
715

716
        inode_ctx_put(fd->inode, this, (uint64_t)(long)ioc_inode);
717

718
        /* If O_DIRECT open, we disable caching on it */
719
        if (local->flags & O_DIRECT) {
720
            /*
721
             * O_DIRECT is only for one fd, not the inode
722
             * as a whole */
723
            ret = fd_ctx_set(fd, this, 1);
724
            if (ret)
725
                gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
726
                        IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
727
                        local->file_loc.path, NULL);
728
        }
729

730
        /* if weight == 0, we disable caching on it */
731
        if (!weight) {
732
            /* we allow a pattern-matched cache disable this way */
733
            ret = fd_ctx_set(fd, this, 1);
734
            if (ret)
735
                gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
736
                        IO_CACHE_MSG_SET_FD_FAILED, "path=%s",
737
                        local->file_loc.path, NULL);
738
        }
739
    }
740

741
out:
742
    frame->local = NULL;
743
    mem_put(local);
744

745
    STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
746
                        preparent, postparent, xdata);
747

748
    return 0;
749
}
750

751
int32_t
752
ioc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
753
              int32_t op_errno, inode_t *inode, struct iatt *buf,
754
              struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
755
{
756
    ioc_local_t *local = NULL;
757
    ioc_table_t *table = NULL;
758
    ioc_inode_t *ioc_inode = NULL;
759
    uint32_t weight = 0xffffffff;
760
    const char *path = NULL;
761

762
    local = frame->local;
763
    if (!this || !this->private) {
764
        op_ret = -1;
765
        op_errno = EINVAL;
766
        goto out;
767
    }
768

769
    table = this->private;
770
    path = local->file_loc.path;
771

772
    if (op_ret != -1) {
773
        /* assign weight */
774
        weight = ioc_get_priority(table, path);
775

776
        ioc_inode = ioc_inode_create(table, inode, weight);
777

778
        ioc_inode_lock(ioc_inode);
779
        {
780
            ioc_inode->cache.mtime = buf->ia_mtime;
781
            ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
782
            ioc_inode->ia_size = buf->ia_size;
783
        }
784
        ioc_inode_unlock(ioc_inode);
785

786
        inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode);
787
    }
788

789
out:
790
    frame->local = NULL;
791

792
    loc_wipe(&local->file_loc);
793
    mem_put(local);
794

795
    STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,
796
                        postparent, xdata);
797
    return 0;
798
}
799

800
int
801
ioc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
802
          dev_t rdev, mode_t umask, dict_t *xdata)
803
{
804
    ioc_local_t *local = NULL;
805
    int32_t op_errno = -1, ret = -1;
806

807
    local = mem_get0(this->local_pool);
808
    if (local == NULL) {
809
        op_errno = ENOMEM;
810
        gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
811
        goto unwind;
812
    }
813

814
    ret = loc_copy(&local->file_loc, loc);
815
    if (ret != 0) {
816
        op_errno = ENOMEM;
817
        gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);
818
        goto unwind;
819
    }
820

821
    frame->local = local;
822

823
    STACK_WIND(frame, ioc_mknod_cbk, FIRST_CHILD(this),
824
               FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
825
    return 0;
826

827
unwind:
828
    if (local != NULL) {
829
        loc_wipe(&local->file_loc);
830
        mem_put(local);
831
    }
832

833
    STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
834
                        NULL);
835

836
    return 0;
837
}
838

839
/*
840
 * ioc_open - open fop for io cache
841
 * @frame:
842
 * @this:
843
 * @loc:
844
 * @flags:
845
 *
846
 */
847
int32_t
848
ioc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
849
         fd_t *fd, dict_t *xdata)
850
{
851
    ioc_local_t *local = NULL;
852

853
    local = mem_get0(this->local_pool);
854
    if (local == NULL) {
855
        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
856
        STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL);
857
        return 0;
858
    }
859

860
    local->flags = flags;
861
    local->file_loc.path = loc->path;
862
    local->file_loc.inode = loc->inode;
863

864
    frame->local = local;
865

866
    STACK_WIND(frame, ioc_open_cbk, FIRST_CHILD(this),
867
               FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
868

869
    return 0;
870
}
871

872
/*
873
 * ioc_create - create fop for io cache
874
 *
875
 * @frame:
876
 * @this:
877
 * @pathname:
878
 * @flags:
879
 * @mode:
880
 *
881
 */
882
int32_t
883
ioc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
884
           mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
885
{
886
    ioc_local_t *local = NULL;
887

888
    local = mem_get0(this->local_pool);
889
    if (local == NULL) {
890
        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
891
        STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
892
                            NULL, NULL);
893
        return 0;
894
    }
895

896
    local->flags = flags;
897
    local->file_loc.path = loc->path;
898
    frame->local = local;
899

900
    STACK_WIND(frame, ioc_create_cbk, FIRST_CHILD(this),
901
               FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
902
               xdata);
903

904
    return 0;
905
}
906

907
/*
908
 * ioc_release - release fop for io cache
909
 *
910
 * @frame:
911
 * @this:
912
 * @fd:
913
 *
914
 */
915
int32_t
916
ioc_release(xlator_t *this, fd_t *fd)
917
{
918
    return 0;
919
}
920

921
int32_t
922
ioc_need_prune(ioc_table_t *table)
923
{
924
    int64_t cache_difference = 0;
925

926
    ioc_table_lock(table);
927
    {
928
        cache_difference = table->cache_used - table->cache_size;
929
    }
930
    ioc_table_unlock(table);
931

932
    if (cache_difference > 0)
933
        return 1;
934
    else
935
        return 0;
936
}
937

938
/*
939
 * ioc_dispatch_requests -
940
 *
941
 * @frame:
942
 * @inode:
943
 *
944
 *
945
 */
946
void
947
ioc_dispatch_requests(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
948
                      off_t offset, size_t size)
949
{
950
    ioc_local_t *local = NULL;
951
    ioc_table_t *table = NULL;
952
    ioc_page_t *trav = NULL;
953
    ioc_waitq_t *waitq = NULL;
954
    off_t rounded_offset = 0;
955
    off_t rounded_end = 0;
956
    off_t trav_offset = 0;
957
    int32_t fault = 0;
958
    size_t trav_size = 0;
959
    off_t local_offset = 0;
960
    int32_t ret = -1;
961
    int8_t need_validate = 0;
962
    int8_t might_need_validate = 0; /*
963
                                     * if a page exists, do we need
964
                                     * to validate it?
965
                                     */
966
    local = frame->local;
967
    table = ioc_inode->table;
968

969
    rounded_offset = gf_floor(offset, table->page_size);
970
    rounded_end = gf_roof(offset + size, table->page_size);
971
    trav_offset = rounded_offset;
972

973
    /* once a frame does read, it should be waiting on something */
974
    local->wait_count++;
975

976
    /* Requested region can fall in three different pages,
977
     * 1. Ready - region is already in cache, we just have to serve it.
978
     * 2. In-transit - page fault has been generated on this page, we need
979
     *    to wait till the page is ready
980
     * 3. Fault - page is not in cache, we have to generate a page fault
981
     */
982

983
    might_need_validate = ioc_inode_need_revalidate(ioc_inode);
984

985
    while (trav_offset < rounded_end) {
986
        ioc_inode_lock(ioc_inode);
987
        {
988
            /* look for requested region in the cache */
989
            trav = __ioc_page_get(ioc_inode, trav_offset);
990

991
            local_offset = max(trav_offset, offset);
992
            trav_size = min(((offset + size) - local_offset), table->page_size);
993

994
            if (!trav) {
995
                /* page not in cache, we need to generate page
996
                 * fault
997
                 */
998
                trav = __ioc_page_create(ioc_inode, trav_offset);
999
                fault = 1;
1000
                if (!trav) {
1001
                    gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM,
1002
                            IO_CACHE_MSG_NO_MEMORY, NULL);
1003
                    local->op_ret = -1;
1004
                    local->op_errno = ENOMEM;
1005
                    ioc_inode_unlock(ioc_inode);
1006
                    goto out;
1007
                }
1008
            }
1009

1010
            __ioc_wait_on_page(trav, frame, local_offset, trav_size);
1011

1012
            if (trav->ready) {
1013
                /* page found in cache */
1014
                if (!might_need_validate && !ioc_inode->waitq) {
1015
                    /* fresh enough */
1016
                    gf_msg_trace(frame->this->name, 0,
1017
                                 "cache hit for "
1018
                                 "trav_offset=%" PRId64
1019
                                 "/local_"
1020
                                 "offset=%" PRId64 "",
1021
                                 trav_offset, local_offset);
1022
                    waitq = __ioc_page_wakeup(trav, trav->op_errno);
1023
                } else {
1024
                    /* if waitq already exists, fstat
1025
                     * revalidate is
1026
                     * already on the way
1027
                     */
1028
                    if (!ioc_inode->waitq) {
1029
                        need_validate = 1;
1030
                    }
1031

1032
                    ret = ioc_wait_on_inode(ioc_inode, trav);
1033
                    if (ret < 0) {
1034
                        local->op_ret = -1;
1035
                        local->op_errno = -ret;
1036
                        need_validate = 0;
1037

1038
                        waitq = __ioc_page_wakeup(trav, trav->op_errno);
1039
                        ioc_inode_unlock(ioc_inode);
1040

1041
                        ioc_waitq_return(waitq);
1042
                        waitq = NULL;
1043
                        goto out;
1044
                    }
1045
                }
1046
            }
1047
        }
1048
        ioc_inode_unlock(ioc_inode);
1049

1050
        ioc_waitq_return(waitq);
1051
        waitq = NULL;
1052

1053
        if (fault) {
1054
            fault = 0;
1055
            /* new page created, increase the table->cache_used */
1056
            ioc_page_fault(ioc_inode, frame, fd, trav_offset);
1057
        }
1058

1059
        if (need_validate) {
1060
            need_validate = 0;
1061
            gf_msg_trace(frame->this->name, 0,
1062
                         "sending validate request for "
1063
                         "inode(%s) at offset=%" PRId64 "",
1064
                         uuid_utoa(fd->inode->gfid), trav_offset);
1065
            ret = ioc_cache_validate(frame, ioc_inode, fd, trav);
1066
            if (ret == -1) {
1067
                ioc_inode_lock(ioc_inode);
1068
                {
1069
                    waitq = __ioc_page_wakeup(trav, trav->op_errno);
1070
                }
1071
                ioc_inode_unlock(ioc_inode);
1072

1073
                ioc_waitq_return(waitq);
1074
                waitq = NULL;
1075
                goto out;
1076
            }
1077
        }
1078

1079
        trav_offset += table->page_size;
1080
    }
1081

1082
out:
1083
    ioc_frame_return(frame);
1084

1085
    if (ioc_need_prune(ioc_inode->table)) {
1086
        ioc_prune(ioc_inode->table);
1087
    }
1088

1089
    return;
1090
}
1091

1092
/*
1093
 * ioc_readv -
1094
 *
1095
 * @frame:
1096
 * @this:
1097
 * @fd:
1098
 * @size:
1099
 * @offset:
1100
 *
1101
 */
1102
int32_t
1103
ioc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
1104
          off_t offset, uint32_t flags, dict_t *xdata)
1105
{
1106
    uint64_t tmp_ioc_inode = 0;
1107
    ioc_inode_t *ioc_inode = NULL;
1108
    ioc_local_t *local = NULL;
1109
    uint32_t weight = 0;
1110
    ioc_table_t *table = NULL;
1111
    int32_t op_errno = EINVAL;
1112
    uint64_t fd_ctx = 0;
1113

1114
    if (!this) {
1115
        goto out;
1116
    }
1117

1118
    inode_ctx_get(fd->inode, this, &tmp_ioc_inode);
1119
    ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
1120
    if (!ioc_inode) {
1121
        /* caching disabled, go ahead with normal readv */
1122
        STACK_WIND_TAIL(frame, FIRST_CHILD(this),
1123
                        FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
1124
                        xdata);
1125
        return 0;
1126
    }
1127

1128
    if (flags & O_DIRECT) {
1129
        /* disable caching for this fd, if O_DIRECT is used */
1130
        STACK_WIND_TAIL(frame, FIRST_CHILD(this),
1131
                        FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
1132
                        xdata);
1133
        return 0;
1134
    }
1135

1136
    table = this->private;
1137

1138
    if (!table) {
1139
        gf_smsg(this->name, GF_LOG_ERROR, EINVAL, IO_CACHE_MSG_TABLE_NULL,
1140
                NULL);
1141
        op_errno = EINVAL;
1142
        goto out;
1143
    }
1144

1145
    ioc_inode_lock(ioc_inode);
1146
    {
1147
        if (!ioc_inode->cache.page_table) {
1148
            ioc_inode->cache.page_table = rbthash_table_init(
1149
                this->ctx, IOC_PAGE_TABLE_BUCKET_COUNT, ioc_hashfn, NULL, 0,
1150
                table->mem_pool);
1151

1152
            if (ioc_inode->cache.page_table == NULL) {
1153
                op_errno = ENOMEM;
1154
                ioc_inode_unlock(ioc_inode);
1155
                goto out;
1156
            }
1157
        }
1158
    }
1159
    ioc_inode_unlock(ioc_inode);
1160

1161
    fd_ctx = fd_ctx_get(fd, this);
1162
    if (fd_ctx) {
1163
        /* disable caching for this fd, go ahead with normal readv */
1164
        STACK_WIND_TAIL(frame, FIRST_CHILD(this),
1165
                        FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
1166
                        xdata);
1167
        return 0;
1168
    }
1169

1170
    local = mem_get0(this->local_pool);
1171
    if (local == NULL) {
1172
        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
1173
        op_errno = ENOMEM;
1174
        goto out;
1175
    }
1176

1177
    INIT_LIST_HEAD(&local->fill_list);
1178

1179
    frame->local = local;
1180
    local->pending_offset = offset;
1181
    local->pending_size = size;
1182
    local->offset = offset;
1183
    local->size = size;
1184
    local->inode = ioc_inode;
1185
    local->xattr_req = dict_ref(xdata);
1186

1187
    gf_msg_trace(this->name, 0,
1188
                 "NEW REQ (%p) offset "
1189
                 "= %" PRId64 " && size = %" GF_PRI_SIZET "",
1190
                 frame, offset, size);
1191

1192
    weight = ioc_inode->weight;
1193

1194
    ioc_table_lock(ioc_inode->table);
1195
    {
1196
        list_move_tail(&ioc_inode->inode_lru,
1197
                       &ioc_inode->table->inode_lru[weight]);
1198
    }
1199
    ioc_table_unlock(ioc_inode->table);
1200

1201
    ioc_dispatch_requests(frame, ioc_inode, fd, offset, size);
1202
    return 0;
1203

1204
out:
1205
    STACK_UNWIND_STRICT(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
1206
    return 0;
1207
}
1208

1209
/*
1210
 * ioc_writev_cbk -
1211
 *
1212
 * @frame:
1213
 * @cookie:
1214
 * @this:
1215
 * @op_ret:
1216
 * @op_errno:
1217
 *
1218
 */
1219
int32_t
1220
ioc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
1221
               int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
1222
               struct iatt *postbuf, dict_t *xdata)
1223
{
1224
    ioc_local_t *local = NULL;
1225
    uint64_t ioc_inode = 0;
1226

1227
    local = frame->local;
1228
    frame->local = NULL;
1229
    inode_ctx_get(local->fd->inode, this, &ioc_inode);
1230

1231
    if (op_ret >= 0) {
1232
        ioc_update_pages(frame, (ioc_inode_t *)(long)ioc_inode, local->vector,
1233
                         local->op_ret, op_ret, local->offset);
1234
    }
1235

1236
    STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
1237
                        xdata);
1238
    if (local->iobref) {
1239
        iobref_unref(local->iobref);
1240
        GF_FREE(local->vector);
1241
    }
1242

1243
    mem_put(local);
1244
    return 0;
1245
}
1246

1247
/*
1248
 * ioc_writev
1249
 *
1250
 * @frame:
1251
 * @this:
1252
 * @fd:
1253
 * @vector:
1254
 * @count:
1255
 * @offset:
1256
 *
1257
 */
1258
int32_t
1259
ioc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
1260
           int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
1261
           dict_t *xdata)
1262
{
1263
    ioc_local_t *local = NULL;
1264
    uint64_t ioc_inode = 0;
1265

1266
    local = mem_get0(this->local_pool);
1267
    if (local == NULL) {
1268
        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
1269

1270
        STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, NULL, NULL, NULL);
1271
        return 0;
1272
    }
1273

1274
    /* TODO: why is it not fd_ref'ed */
1275
    local->fd = fd;
1276
    frame->local = local;
1277

1278
    inode_ctx_get(fd->inode, this, &ioc_inode);
1279
    if (ioc_inode) {
1280
        local->iobref = iobref_ref(iobref);
1281
        local->vector = iov_dup(vector, count);
1282
        local->op_ret = count;
1283
        local->offset = offset;
1284
    }
1285

1286
    STACK_WIND(frame, ioc_writev_cbk, FIRST_CHILD(this),
1287
               FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
1288
               flags, iobref, xdata);
1289

1290
    return 0;
1291
}
1292

1293
/*
1294
 * ioc_truncate_cbk -
1295
 *
1296
 * @frame:
1297
 * @cookie:
1298
 * @this:
1299
 * @op_ret:
1300
 * @op_errno:
1301
 * @buf:
1302
 *
1303
 */
1304
int32_t
1305
ioc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
1306
                 int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
1307
                 struct iatt *postbuf, dict_t *xdata)
1308
{
1309
    STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
1310
                        xdata);
1311
    return 0;
1312
}
1313

1314
/*
1315
 * ioc_ftruncate_cbk -
1316
 *
1317
 * @frame:
1318
 * @cookie:
1319
 * @this:
1320
 * @op_ret:
1321
 * @op_errno:
1322
 * @buf:
1323
 *
1324
 */
1325
int32_t
1326
ioc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
1327
                  int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
1328
                  struct iatt *postbuf, dict_t *xdata)
1329
{
1330
    STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
1331
                        xdata);
1332
    return 0;
1333
}
1334

1335
/*
1336
 * ioc_truncate -
1337
 *
1338
 * @frame:
1339
 * @this:
1340
 * @loc:
1341
 * @offset:
1342
 *
1343
 */
1344
int32_t
1345
ioc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
1346
             dict_t *xdata)
1347
{
1348
    uint64_t ioc_inode = 0;
1349

1350
    inode_ctx_get(loc->inode, this, &ioc_inode);
1351

1352
    if (ioc_inode)
1353
        ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
1354

1355
    STACK_WIND(frame, ioc_truncate_cbk, FIRST_CHILD(this),
1356
               FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
1357
    return 0;
1358
}
1359

1360
/*
1361
 * ioc_ftruncate -
1362
 *
1363
 * @frame:
1364
 * @this:
1365
 * @fd:
1366
 * @offset:
1367
 *
1368
 */
1369
int32_t
1370
ioc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
1371
              dict_t *xdata)
1372
{
1373
    uint64_t ioc_inode = 0;
1374

1375
    inode_ctx_get(fd->inode, this, &ioc_inode);
1376

1377
    if (ioc_inode)
1378
        ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
1379

1380
    STACK_WIND(frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
1381
               FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
1382
    return 0;
1383
}
1384

1385
int32_t
1386
ioc_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
1387
           int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
1388
{
1389
    STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, lock, xdata);
1390
    return 0;
1391
}
1392

1393
int32_t
1394
ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
1395
       struct gf_flock *lock, dict_t *xdata)
1396
{
1397
    ioc_inode_t *ioc_inode = NULL;
1398
    uint64_t tmp_inode = 0;
1399

1400
    inode_ctx_get(fd->inode, this, &tmp_inode);
1401
    ioc_inode = (ioc_inode_t *)(long)tmp_inode;
1402
    if (!ioc_inode) {
1403
        gf_msg_debug(this->name, EBADFD,
1404
                     "inode context is NULL: returning EBADFD");
1405
        STACK_UNWIND_STRICT(lk, frame, -1, EBADFD, NULL, NULL);
1406
        return 0;
1407
    }
1408

1409
    ioc_inode_lock(ioc_inode);
1410
    {
1411
        ioc_inode->cache.last_revalidate = gf_time();
1412
    }
1413
    ioc_inode_unlock(ioc_inode);
1414

1415
    STACK_WIND(frame, ioc_lk_cbk, FIRST_CHILD(this),
1416
               FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
1417

1418
    return 0;
1419
}
1420

1421
int
1422
ioc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
1423
                 int op_errno, gf_dirent_t *entries, dict_t *xdata)
1424
{
1425
    gf_dirent_t *entry = NULL;
1426
    char *path = NULL;
1427
    fd_t *fd = NULL;
1428

1429
    fd = frame->local;
1430
    frame->local = NULL;
1431

1432
    if (op_ret <= 0)
1433
        goto unwind;
1434

1435
    list_for_each_entry(entry, &entries->list, list)
1436
    {
1437
        inode_path(fd->inode, entry->d_name, &path);
1438
        ioc_inode_update(this, entry->inode, path, &entry->d_stat);
1439
        GF_FREE(path);
1440
        path = NULL;
1441
    }
1442

1443
unwind:
1444
    STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
1445

1446
    return 0;
1447
}
1448

1449
int
1450
ioc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
1451
             off_t offset, dict_t *dict)
1452
{
1453
    frame->local = fd;
1454

1455
    STACK_WIND(frame, ioc_readdirp_cbk, FIRST_CHILD(this),
1456
               FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
1457

1458
    return 0;
1459
}
1460

1461
static int32_t
1462
ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
1463
                int32_t op_ret, int32_t op_errno, struct iatt *pre,
1464
                struct iatt *post, dict_t *xdata)
1465
{
1466
    STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
1467
    return 0;
1468
}
1469

1470
static int32_t
1471
ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
1472
            size_t len, dict_t *xdata)
1473
{
1474
    uint64_t ioc_inode = 0;
1475

1476
    inode_ctx_get(fd->inode, this, &ioc_inode);
1477

1478
    if (ioc_inode)
1479
        ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
1480

1481
    STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this),
1482
               FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
1483
    return 0;
1484
}
1485

1486
static int32_t
1487
ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
1488
                 int32_t op_ret, int32_t op_errno, struct iatt *pre,
1489
                 struct iatt *post, dict_t *xdata)
1490
{
1491
    STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre, post, xdata);
1492
    return 0;
1493
}
1494

1495
static int32_t
1496
ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
1497
             off_t len, dict_t *xdata)
1498
{
1499
    uint64_t ioc_inode = 0;
1500

1501
    inode_ctx_get(fd->inode, this, &ioc_inode);
1502

1503
    if (ioc_inode)
1504
        ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);
1505

1506
    STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this),
1507
               FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
1508
    return 0;
1509
}
1510

1511
int32_t
1512
ioc_get_priority_list(const char *opt_str, struct list_head *first)
1513
{
1514
    int32_t max_pri = 1;
1515
    char *tmp_str = NULL;
1516
    char *tmp_str1 = NULL;
1517
    char *tmp_str2 = NULL;
1518
    char *dup_str = NULL;
1519
    char *stripe_str = NULL;
1520
    char *pattern = NULL;
1521
    char *priority = NULL;
1522
    char *string = NULL;
1523
    struct ioc_priority *curr = NULL, *tmp = NULL;
1524

1525
    string = gf_strdup(opt_str);
1526
    if (string == NULL) {
1527
        max_pri = -1;
1528
        goto out;
1529
    }
1530

1531
    /* Get the pattern for cache priority.
1532
     * "option priority *.jpg:1,abc*:2" etc
1533
     */
1534
    /* TODO: inode_lru in table is statically hard-coded to 5,
1535
     * should be changed to run-time configuration
1536
     */
1537
    stripe_str = strtok_r(string, ",", &tmp_str);
1538
    while (stripe_str) {
1539
        curr = GF_CALLOC(1, sizeof(struct ioc_priority),
1540
                         gf_ioc_mt_ioc_priority);
1541
        if (curr == NULL) {
1542
            max_pri = -1;
1543
            goto out;
1544
        }
1545

1546
        list_add_tail(&curr->list, first);
1547

1548
        dup_str = gf_strdup(stripe_str);
1549
        if (dup_str == NULL) {
1550
            max_pri = -1;
1551
            goto out;
1552
        }
1553

1554
        pattern = strtok_r(dup_str, ":", &tmp_str1);
1555
        if (!pattern) {
1556
            max_pri = -1;
1557
            goto out;
1558
        }
1559

1560
        priority = strtok_r(NULL, ":", &tmp_str1);
1561
        if (!priority) {
1562
            max_pri = -1;
1563
            goto out;
1564
        }
1565

1566
        gf_msg_trace("io-cache", 0, "ioc priority : pattern %s : priority %s",
1567
                     pattern, priority);
1568

1569
        curr->pattern = gf_strdup(pattern);
1570
        if (curr->pattern == NULL) {
1571
            max_pri = -1;
1572
            goto out;
1573
        }
1574

1575
        curr->priority = strtol(priority, &tmp_str2, 0);
1576
        if (tmp_str2 && (*tmp_str2)) {
1577
            max_pri = -1;
1578
            goto out;
1579
        } else {
1580
            max_pri = max(max_pri, curr->priority);
1581
        }
1582

1583
        GF_FREE(dup_str);
1584
        dup_str = NULL;
1585

1586
        stripe_str = strtok_r(NULL, ",", &tmp_str);
1587
    }
1588
out:
1589
    GF_FREE(string);
1590

1591
    GF_FREE(dup_str);
1592

1593
    if (max_pri == -1) {
1594
        list_for_each_entry_safe(curr, tmp, first, list)
1595
        {
1596
            list_del_init(&curr->list);
1597
            GF_FREE(curr->pattern);
1598
            GF_FREE(curr);
1599
        }
1600
    }
1601

1602
    return max_pri;
1603
}
1604

1605
int32_t
1606
mem_acct_init(xlator_t *this)
1607
{
1608
    int ret = -1;
1609

1610
    if (!this)
1611
        return ret;
1612

1613
    ret = xlator_mem_acct_init(this, gf_ioc_mt_end);
1614

1615
    if (ret != 0) {
1616
        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
1617
                IO_CACHE_MSG_MEMORY_INIT_FAILED, NULL);
1618
        return ret;
1619
    }
1620

1621
    return ret;
1622
}
1623

1624
static gf_boolean_t
1625
check_cache_size_ok(xlator_t *this, uint64_t cache_size)
1626
{
1627
    gf_boolean_t ret = _gf_true;
1628
    uint64_t total_mem = 0;
1629
    uint64_t max_cache_size = 0;
1630
    volume_option_t *opt = NULL;
1631

1632
    GF_ASSERT(this);
1633
    opt = xlator_volume_option_get(this, "cache-size");
1634
    if (!opt) {
1635
        ret = _gf_false;
1636
        gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
1637
                IO_CACHE_MSG_NO_CACHE_SIZE_OPT, NULL);
1638
        goto out;
1639
    }
1640

1641
    total_mem = get_mem_size();
1642
    if (-1 == total_mem)
1643
        max_cache_size = opt->max;
1644
    else
1645
        max_cache_size = total_mem;
1646

1647
    gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size);
1648

1649
    if (cache_size > max_cache_size) {
1650
        ret = _gf_false;
1651
        gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT,
1652
                "Cache-size=%" PRIu64, cache_size, "max-size=%" PRIu64,
1653
                max_cache_size, NULL);
1654
        goto out;
1655
    }
1656
out:
1657
    return ret;
1658
}
1659

1660
int
1661
reconfigure(xlator_t *this, dict_t *options)
1662
{
1663
    data_t *data = NULL;
1664
    ioc_table_t *table = NULL;
1665
    int ret = -1;
1666
    uint64_t cache_size_new = 0;
1667
    if (!this || !this->private)
1668
        goto out;
1669

1670
    table = this->private;
1671

1672
    ioc_table_lock(table);
1673
    {
1674
        GF_OPTION_RECONF("pass-through", this->pass_through, options, bool,
1675
                         unlock);
1676

1677
        GF_OPTION_RECONF("cache-timeout", table->cache_timeout, options, time,
1678
                         unlock);
1679

1680
        data = dict_get(options, "priority");
1681
        if (data) {
1682
            char *option_list = data_to_str(data);
1683

1684
            gf_msg_trace(this->name, 0, "option path %s", option_list);
1685
            /* parse the list of pattern:priority */
1686
            table->max_pri = ioc_get_priority_list(option_list,
1687
                                                   &table->priority_list);
1688

1689
            if (table->max_pri == -1) {
1690
                goto unlock;
1691
            }
1692
            table->max_pri++;
1693
        }
1694

1695
        GF_OPTION_RECONF("max-file-size", table->max_file_size, options,
1696
                         size_uint64, unlock);
1697

1698
        GF_OPTION_RECONF("min-file-size", table->min_file_size, options,
1699
                         size_uint64, unlock);
1700

1701
        if ((table->max_file_size <= UINT64_MAX) &&
1702
            (table->min_file_size > table->max_file_size)) {
1703
            gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_DEFAULTING_TO_OLD,
1704
                    "minimum-size=%" PRIu64, table->min_file_size,
1705
                    "maximum-size=%" PRIu64, table->max_file_size, NULL);
1706
            goto unlock;
1707
        }
1708

1709
        GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64,
1710
                         unlock);
1711
        if (!check_cache_size_ok(this, cache_size_new)) {
1712
            ret = -1;
1713
            gf_smsg(this->name, GF_LOG_ERROR, 0,
1714
                    IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE, NULL);
1715
            goto unlock;
1716
        }
1717
        table->cache_size = cache_size_new;
1718

1719
        ret = 0;
1720
    }
1721
unlock:
1722
    ioc_table_unlock(table);
1723
out:
1724
    return ret;
1725
}
1726

1727
/*
1728
 * init -
1729
 * @this:
1730
 *
1731
 */
1732
int32_t
1733
init(xlator_t *this)
1734
{
1735
    ioc_table_t *table = NULL;
1736
    dict_t *xl_options = NULL;
1737
    uint32_t index = 0;
1738
    int32_t ret = -1;
1739
    glusterfs_ctx_t *ctx = NULL;
1740
    data_t *data = 0;
1741
    uint32_t num_pages = 0;
1742

1743
    xl_options = this->options;
1744

1745
    if (!this->children || this->children->next) {
1746
        gf_smsg(this->name, GF_LOG_ERROR, 0,
1747
                IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, NULL);
1748
        goto out;
1749
    }
1750

1751
    if (!this->parents) {
1752
        gf_smsg(this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_VOL_MISCONFIGURED,
1753
                NULL);
1754
    }
1755

1756
    table = (void *)GF_CALLOC(1, sizeof(*table), gf_ioc_mt_ioc_table_t);
1757
    if (table == NULL) {
1758
        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);
1759
        goto out;
1760
    }
1761

1762
    table->xl = this;
1763
    table->page_size = this->ctx->page_size;
1764

1765
    GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
1766

1767
    GF_OPTION_INIT("cache-size", table->cache_size, size_uint64, out);
1768

1769
    GF_OPTION_INIT("cache-timeout", table->cache_timeout, time, out);
1770

1771
    GF_OPTION_INIT("min-file-size", table->min_file_size, size_uint64, out);
1772

1773
    GF_OPTION_INIT("max-file-size", table->max_file_size, size_uint64, out);
1774

1775
    if (!check_cache_size_ok(this, table->cache_size)) {
1776
        ret = -1;
1777
        goto out;
1778
    }
1779

1780
    INIT_LIST_HEAD(&table->priority_list);
1781
    table->max_pri = 1;
1782
    data = dict_get(xl_options, "priority");
1783
    if (data) {
1784
        char *option_list = data_to_str(data);
1785
        gf_msg_trace(this->name, 0, "option path %s", option_list);
1786
        /* parse the list of pattern:priority */
1787
        table->max_pri = ioc_get_priority_list(option_list,
1788
                                               &table->priority_list);
1789

1790
        if (table->max_pri == -1) {
1791
            goto out;
1792
        }
1793
    }
1794
    table->max_pri++;
1795

1796
    INIT_LIST_HEAD(&table->inodes);
1797

1798
    if ((table->max_file_size <= UINT64_MAX) &&
1799
        (table->min_file_size > table->max_file_size)) {
1800
        gf_smsg("io-cache", GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT,
1801
                "minimum-size=%" PRIu64, table->min_file_size,
1802
                "maximum-size=%" PRIu64, table->max_file_size, NULL);
1803
        goto out;
1804
    }
1805

1806
    table->inode_lru = GF_CALLOC(table->max_pri, sizeof(struct list_head),
1807
                                 gf_ioc_mt_list_head);
1808
    if (table->inode_lru == NULL) {
1809
        goto out;
1810
    }
1811

1812
    for (index = 0; index < (table->max_pri); index++)
1813
        INIT_LIST_HEAD(&table->inode_lru[index]);
1814

1815
    this->local_pool = mem_pool_new(ioc_local_t, 64);
1816
    if (!this->local_pool) {
1817
        ret = -1;
1818
        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
1819
                IO_CACHE_MSG_CREATE_MEM_POOL_FAILED, NULL);
1820
        goto out;
1821
    }
1822

1823
    pthread_mutex_init(&table->table_lock, NULL);
1824
    this->private = table;
1825

1826
    num_pages = (table->cache_size / table->page_size) +
1827
                ((table->cache_size % table->page_size) ? 1 : 0);
1828

1829
    table->mem_pool = mem_pool_new(rbthash_entry_t, num_pages);
1830
    if (!table->mem_pool) {
1831
        gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
1832
                IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, NULL);
1833
        goto out;
1834
    }
1835

1836
    ret = 0;
1837

1838
    ctx = this->ctx;
1839
    ioc_log2_page_size = log_base2(ctx->page_size);
1840

1841
out:
1842
    if (ret == -1) {
1843
        if (table != NULL) {
1844
            GF_FREE(table->inode_lru);
1845
            GF_FREE(table);
1846
        }
1847
    }
1848

1849
    return ret;
1850
}
1851

1852
void
1853
ioc_page_waitq_dump(ioc_page_t *page, char *prefix)
1854
{
1855
    ioc_waitq_t *trav = NULL;
1856
    call_frame_t *frame = NULL;
1857
    int32_t i = 0;
1858
    char key[GF_DUMP_MAX_BUF_LEN] = {
1859
        0,
1860
    };
1861

1862
    trav = page->waitq;
1863

1864
    while (trav) {
1865
        frame = trav->data;
1866
        sprintf(key, "waitq.frame[%d]", i++);
1867
        gf_proc_dump_write(key, "%" PRId64, frame->root->unique);
1868

1869
        trav = trav->next;
1870
    }
1871
}
1872

1873
void
1874
__ioc_inode_waitq_dump(ioc_inode_t *ioc_inode, char *prefix)
1875
{
1876
    ioc_waitq_t *trav = NULL;
1877
    ioc_page_t *page = NULL;
1878
    int32_t i = 0;
1879
    char key[GF_DUMP_MAX_BUF_LEN] = {
1880
        0,
1881
    };
1882

1883
    trav = ioc_inode->waitq;
1884

1885
    while (trav) {
1886
        page = trav->data;
1887

1888
        sprintf(key, "cache-validation-waitq.page[%d].offset", i++);
1889
        gf_proc_dump_write(key, "%" PRId64, page->offset);
1890

1891
        trav = trav->next;
1892
    }
1893
}
1894

1895
void
1896
__ioc_page_dump(ioc_page_t *page, char *prefix)
1897
{
1898
    int ret = -1;
1899

1900
    if (!page)
1901
        return;
1902
    /* ioc_page_lock can be used to hold the mutex. But in statedump
1903
     * its better to use trylock to avoid deadlocks.
1904
     */
1905
    ret = pthread_mutex_trylock(&page->page_lock);
1906
    if (ret)
1907
        goto out;
1908
    {
1909
        gf_proc_dump_write("offset", "%" PRId64, page->offset);
1910
        gf_proc_dump_write("size", "%" GF_PRI_SIZET, page->size);
1911
        gf_proc_dump_write("dirty", "%s", page->dirty ? "yes" : "no");
1912
        gf_proc_dump_write("ready", "%s", page->ready ? "yes" : "no");
1913
        ioc_page_waitq_dump(page, prefix);
1914
    }
1915
    pthread_mutex_unlock(&page->page_lock);
1916

1917
out:
1918
    if (ret && page)
1919
        gf_proc_dump_write("Unable to dump the page information",
1920
                           "(Lock acquisition failed) %p", page);
1921

1922
    return;
1923
}
1924

1925
void
1926
__ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix)
1927
{
1928
    off_t offset = 0;
1929
    ioc_table_t *table = NULL;
1930
    ioc_page_t *page = NULL;
1931
    int i = 0;
1932
    char key[GF_DUMP_MAX_BUF_LEN] = {
1933
        0,
1934
    };
1935
    char timestr[GF_TIMESTR_SIZE] = {
1936
        0,
1937
    };
1938

1939
    if ((ioc_inode == NULL) || (prefix == NULL)) {
1940
        goto out;
1941
    }
1942

1943
    table = ioc_inode->table;
1944

1945
    if (ioc_inode->cache.last_revalidate) {
1946
        gf_time_fmt_FT(timestr, sizeof timestr,
1947
                       ioc_inode->cache.last_revalidate);
1948

1949
        gf_proc_dump_write("last-cache-validation-time", "%s", timestr);
1950
    }
1951

1952
    for (offset = 0; offset < ioc_inode->ia_size; offset += table->page_size) {
1953
        page = __ioc_page_get(ioc_inode, offset);
1954
        if (page == NULL) {
1955
            continue;
1956
        }
1957

1958
        sprintf(key, "inode.cache.page[%d]", i++);
1959
        __ioc_page_dump(page, key);
1960
    }
1961
out:
1962
    return;
1963
}
1964

1965
int
1966
ioc_inode_dump(xlator_t *this, inode_t *inode)
1967
{
1968
    char *path = NULL;
1969
    int ret = -1;
1970
    char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
1971
        0,
1972
    };
1973
    uint64_t tmp_ioc_inode = 0;
1974
    ioc_inode_t *ioc_inode = NULL;
1975
    gf_boolean_t section_added = _gf_false;
1976
    char uuid_str[64] = {
1977
        0,
1978
    };
1979

1980
    if (this == NULL || inode == NULL)
1981
        goto out;
1982

1983
    gf_proc_dump_build_key(key_prefix, "io-cache", "inode");
1984

1985
    inode_ctx_get(inode, this, &tmp_ioc_inode);
1986
    ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
1987
    if (ioc_inode == NULL)
1988
        goto out;
1989

1990
    /* Similar to ioc_page_dump function its better to use
1991
     * pthread_mutex_trylock and not to use gf_log in statedump
1992
     * to avoid deadlocks.
1993
     */
1994
    ret = pthread_mutex_trylock(&ioc_inode->inode_lock);
1995
    if (ret)
1996
        goto out;
1997

1998
    {
1999
        if (gf_uuid_is_null(ioc_inode->inode->gfid))
2000
            goto unlock;
2001

2002
        gf_proc_dump_add_section("%s", key_prefix);
2003
        section_added = _gf_true;
2004

2005
        __inode_path(ioc_inode->inode, NULL, &path);
2006

2007
        gf_proc_dump_write("inode.weight", "%d", ioc_inode->weight);
2008

2009
        if (path) {
2010
            gf_proc_dump_write("path", "%s", path);
2011
            GF_FREE(path);
2012
        }
2013

2014
        gf_proc_dump_write("uuid", "%s",
2015
                           uuid_utoa_r(ioc_inode->inode->gfid, uuid_str));
2016
        __ioc_cache_dump(ioc_inode, key_prefix);
2017
        __ioc_inode_waitq_dump(ioc_inode, key_prefix);
2018
    }
2019
unlock:
2020
    pthread_mutex_unlock(&ioc_inode->inode_lock);
2021

2022
out:
2023
    if (ret && ioc_inode) {
2024
        if (section_added == _gf_false)
2025
            gf_proc_dump_add_section("%s", key_prefix);
2026
        gf_proc_dump_write("Unable to print the status of ioc_inode",
2027
                           "(Lock acquisition failed) %s",
2028
                           uuid_utoa(inode->gfid));
2029
    }
2030
    return ret;
2031
}
2032

2033
int
2034
ioc_priv_dump(xlator_t *this)
2035
{
2036
    ioc_table_t *priv = NULL;
2037
    char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
2038
        0,
2039
    };
2040
    int ret = -1;
2041
    gf_boolean_t add_section = _gf_false;
2042

2043
    if (!this || !this->private)
2044
        goto out;
2045

2046
    priv = this->private;
2047

2048
    gf_proc_dump_build_key(key_prefix, "io-cache", "priv");
2049
    gf_proc_dump_add_section("%s", key_prefix);
2050
    add_section = _gf_true;
2051

2052
    ret = pthread_mutex_trylock(&priv->table_lock);
2053
    if (ret)
2054
        goto out;
2055
    {
2056
        gf_proc_dump_write("page_size", "%" PRIu64, priv->page_size);
2057
        gf_proc_dump_write("cache_size", "%" PRIu64, priv->cache_size);
2058
        gf_proc_dump_write("cache_used", "%" PRIu64, priv->cache_used);
2059
        gf_proc_dump_write("inode_count", "%u", priv->inode_count);
2060
        gf_proc_dump_write("cache_timeout", "%ld", priv->cache_timeout);
2061
        gf_proc_dump_write("min-file-size", "%" PRIu64, priv->min_file_size);
2062
        gf_proc_dump_write("max-file-size", "%" PRIu64, priv->max_file_size);
2063
    }
2064
    pthread_mutex_unlock(&priv->table_lock);
2065
out:
2066
    if (ret && priv) {
2067
        if (!add_section) {
2068
            gf_proc_dump_build_key(key_prefix,
2069
                                   "xlator."
2070
                                   "performance.io-cache",
2071
                                   "priv");
2072
            gf_proc_dump_add_section("%s", key_prefix);
2073
        }
2074
        gf_proc_dump_write(
2075
            "Unable to dump the state of private "
2076
            "structure of io-cache xlator",
2077
            "(Lock "
2078
            "acquisition failed) %s",
2079
            this->name);
2080
    }
2081

2082
    return 0;
2083
}
2084

2085
/*
2086
 * fini -
2087
 *
2088
 * @this:
2089
 *
2090
 */
2091
void
2092
fini(xlator_t *this)
2093
{
2094
    ioc_table_t *table = NULL;
2095
    struct ioc_priority *curr = NULL, *tmp = NULL;
2096

2097
    table = this->private;
2098

2099
    if (table == NULL)
2100
        return;
2101

2102
    this->private = NULL;
2103

2104
    if (table->mem_pool != NULL) {
2105
        mem_pool_destroy(table->mem_pool);
2106
        table->mem_pool = NULL;
2107
    }
2108

2109
    list_for_each_entry_safe(curr, tmp, &table->priority_list, list)
2110
    {
2111
        list_del_init(&curr->list);
2112
        GF_FREE(curr->pattern);
2113
        GF_FREE(curr);
2114
    }
2115

2116
    /* inode_lru and inodes list can be empty in case fini() is
2117
     * called soon after init()? Hence commenting the below asserts.
2118
     */
2119
    /*for (i = 0; i < table->max_pri; i++) {
2120
            GF_ASSERT (list_empty (&table->inode_lru[i]));
2121
    }
2122

2123
    GF_ASSERT (list_empty (&table->inodes));
2124
    */
2125
    pthread_mutex_destroy(&table->table_lock);
2126
    GF_FREE(table);
2127

2128
    this->private = NULL;
2129
    return;
2130
}
2131

2132
struct xlator_fops fops = {
2133
    .open = ioc_open,
2134
    .create = ioc_create,
2135
    .readv = ioc_readv,
2136
    .writev = ioc_writev,
2137
    .truncate = ioc_truncate,
2138
    .ftruncate = ioc_ftruncate,
2139
    .lookup = ioc_lookup,
2140
    .lk = ioc_lk,
2141
    .setattr = ioc_setattr,
2142
    .mknod = ioc_mknod,
2143

2144
    .readdirp = ioc_readdirp,
2145
    .discard = ioc_discard,
2146
    .zerofill = ioc_zerofill,
2147
};
2148

2149
struct xlator_dumpops dumpops = {
2150
    .priv = ioc_priv_dump,
2151
    .inodectx = ioc_inode_dump,
2152
};
2153

2154
struct xlator_cbks cbks = {
2155
    .forget = ioc_forget,
2156
    .release = ioc_release,
2157
    .invalidate = ioc_invalidate,
2158
};
2159

2160
struct volume_options options[] = {
2161
    {
2162
        .key = {"io-cache"},
2163
        .type = GF_OPTION_TYPE_BOOL,
2164
        .default_value = "off",
2165
        .description = "enable/disable io-cache",
2166
        .op_version = {GD_OP_VERSION_6_0},
2167
        .flags = OPT_FLAG_SETTABLE,
2168
    },
2169
    {.key = {"priority"},
2170
     .type = GF_OPTION_TYPE_PRIORITY_LIST,
2171
     .default_value = "",
2172
     .description = "Assigns priority to filenames with specific "
2173
                    "patterns so that when a page needs to be ejected "
2174
                    "out of the cache, the page of a file whose "
2175
                    "priority is the lowest will be ejected earlier",
2176
     .op_version = {1},
2177
     .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
2178
    {.key = {"cache-timeout", "force-revalidate-timeout"},
2179
     .type = GF_OPTION_TYPE_INT,
2180
     .min = 0,
2181
     .max = 60,
2182
     .default_value = "1",
2183
     .description = "The cached data for a file will be retained for "
2184
                    "'cache-refresh-timeout' seconds, after which data "
2185
                    "re-validation is performed.",
2186
     .op_version = {1},
2187
     .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
2188
    {.key = {"cache-size"},
2189
     .type = GF_OPTION_TYPE_SIZET,
2190
     .min = 4 * GF_UNIT_MB,
2191
     .max = INFINITY,
2192
     .default_value = "32MB",
2193
     .description = "Size of the read cache.",
2194
     .op_version = {1},
2195
     .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
2196
    {.key = {"min-file-size"},
2197
     .type = GF_OPTION_TYPE_SIZET,
2198
     .default_value = "0",
2199
     .description = "Minimum file size which would be cached by the "
2200
                    "io-cache translator.",
2201
     .op_version = {1},
2202
     .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
2203
    {.key = {"max-file-size"},
2204
     .type = GF_OPTION_TYPE_SIZET,
2205
     .default_value = "0",
2206
     .description = "Maximum file size which would be cached by the "
2207
                    "io-cache translator.",
2208
     .op_version = {1},
2209
     .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
2210
    {.key = {"pass-through"},
2211
     .type = GF_OPTION_TYPE_BOOL,
2212
     .default_value = "false",
2213
     .op_version = {GD_OP_VERSION_4_1_0},
2214
     .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
2215
     .tags = {"io-cache"},
2216
     .description = "Enable/Disable io cache translator"},
2217
    {.key = {NULL}},
2218
};
2219

2220
xlator_api_t xlator_api = {
2221
    .init = init,
2222
    .fini = fini,
2223
    .reconfigure = reconfigure,
2224
    .mem_acct_init = mem_acct_init,
2225
    .op_version = {1}, /* Present from the initial version */
2226
    .dumpops = &dumpops,
2227
    .fops = &fops,
2228
    .cbks = &cbks,
2229
    .options = options,
2230
    .identifier = "io-cache",
2231
    .category = GF_MAINTAINED,
2232
};
2233

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.