glusterfs
2232 строки · 56.8 Кб
1/*
2Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
3This file is part of GlusterFS.
4
5This file is licensed to you under your choice of the GNU Lesser
6General Public License, version 3 or any later version (LGPLv3 or
7later), or the GNU General Public License, version 2 (GPLv2), in all
8cases as published by the Free Software Foundation.
9*/
10
11#include <math.h>12#include <glusterfs/glusterfs.h>13#include <glusterfs/logging.h>14#include <glusterfs/dict.h>15#include "io-cache.h"16#include "ioc-mem-types.h"17#include <glusterfs/statedump.h>18#include <assert.h>19#include <sys/time.h>20#include "io-cache-messages.h"21int ioc_log2_page_size;22
23uint32_t
24ioc_get_priority(ioc_table_t *table, const char *path);25
26struct volume_options options[];27
28static uint32_t29ioc_hashfn(void *data, int len)30{
31off_t offset;32
33offset = *(off_t *)data;34
35return (offset >> ioc_log2_page_size);36}
37
38/* TODO: This function is not used, uncomment when we find a
39usage for this function.
40
41static ioc_inode_t *
42ioc_inode_reupdate (ioc_inode_t *ioc_inode)
43{
44ioc_table_t *table = NULL;
45
46table = ioc_inode->table;
47
48list_add_tail (&ioc_inode->inode_lru,
49&table->inode_lru[ioc_inode->weight]);
50
51return ioc_inode;
52}
53
54
55static ioc_inode_t *
56ioc_get_inode (dict_t *dict, char *name)
57{
58ioc_inode_t *ioc_inode = NULL;
59data_t *ioc_inode_data = NULL;
60ioc_table_t *table = NULL;
61
62ioc_inode_data = dict_get (dict, name);
63if (ioc_inode_data) {
64ioc_inode = data_to_ptr (ioc_inode_data);
65table = ioc_inode->table;
66
67ioc_table_lock (table);
68{
69if (list_empty (&ioc_inode->inode_lru)) {
70ioc_inode = ioc_inode_reupdate (ioc_inode);
71}
72}
73ioc_table_unlock (table);
74}
75
76return ioc_inode;
77}
78*/
79
80int
81ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode,82struct iovec *vector, int32_t count, int op_ret, off_t offset)83{
84size_t size = 0;85off_t rounded_offset = 0, rounded_end = 0, trav_offset = 0,86write_offset = 0;87off_t page_offset = 0, page_end = 0;88ioc_page_t *trav = NULL;89
90size = iov_length(vector, count);91size = min(size, op_ret);92
93rounded_offset = gf_floor(offset, ioc_inode->table->page_size);94rounded_end = gf_roof(offset + size, ioc_inode->table->page_size);95
96trav_offset = rounded_offset;97ioc_inode_lock(ioc_inode);98{99while (trav_offset < rounded_end) {100trav = __ioc_page_get(ioc_inode, trav_offset);101if (trav && trav->ready) {102if (trav_offset == rounded_offset)103page_offset = offset - rounded_offset;104else105page_offset = 0;106
107if ((trav_offset + ioc_inode->table->page_size) >=108rounded_end) {109page_end = trav->size - (rounded_end - (offset + size));110} else {111page_end = trav->size;112}113
114iov_range_copy(trav->vector, trav->count, page_offset, vector,115count, write_offset, page_end - page_offset);116} else if (trav) {117if (!trav->waitq)118ioc_inode->table->cache_used -= __ioc_page_destroy(trav);119}120
121if (trav_offset == rounded_offset)122write_offset += (ioc_inode->table->page_size -123(offset - rounded_offset));124else125write_offset += ioc_inode->table->page_size;126
127trav_offset += ioc_inode->table->page_size;128}129}130ioc_inode_unlock(ioc_inode);131
132return 0;133}
134
135static gf_boolean_t136ioc_inode_need_revalidate(ioc_inode_t *ioc_inode)137{
138ioc_table_t *table = NULL;139
140GF_ASSERT(ioc_inode);141table = ioc_inode->table;142GF_ASSERT(table);143
144return (gf_time() - ioc_inode->cache.last_revalidate >=145table->cache_timeout);146}
147
148/*
149* __ioc_inode_flush - flush all the cached pages of the given inode
150*
151* @ioc_inode:
152*
153* assumes lock is held
154*/
155int64_t
156__ioc_inode_flush(ioc_inode_t *ioc_inode)157{
158ioc_page_t *curr = NULL, *next = NULL;159int64_t destroy_size = 0;160int64_t ret = 0;161
162list_for_each_entry_safe(curr, next, &ioc_inode->cache.page_lru, page_lru)163{164ret = __ioc_page_destroy(curr);165
166if (ret != -1)167destroy_size += ret;168}169
170return destroy_size;171}
172
173void
174ioc_inode_flush(ioc_inode_t *ioc_inode)175{
176int64_t destroy_size = 0;177
178ioc_inode_lock(ioc_inode);179{180destroy_size = __ioc_inode_flush(ioc_inode);181}182ioc_inode_unlock(ioc_inode);183
184if (destroy_size) {185ioc_table_lock(ioc_inode->table);186{187ioc_inode->table->cache_used -= destroy_size;188}189ioc_table_unlock(ioc_inode->table);190}191
192return;193}
194
195int32_t
196ioc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,197int32_t op_ret, int32_t op_errno, struct iatt *preop,198struct iatt *postop, dict_t *xdata)199{
200STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop, postop, xdata);201return 0;202}
203
204int32_t
205ioc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,206int32_t valid, dict_t *xdata)207{
208uint64_t ioc_inode = 0;209
210inode_ctx_get(loc->inode, this, &ioc_inode);211
212if (ioc_inode &&213((valid & GF_SET_ATTR_ATIME) || (valid & GF_SET_ATTR_MTIME)))214ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);215
216STACK_WIND(frame, ioc_setattr_cbk, FIRST_CHILD(this),217FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);218
219return 0;220}
221
222int32_t
223ioc_inode_update(xlator_t *this, inode_t *inode, char *path, struct iatt *iabuf)224{
225ioc_table_t *table = NULL;226uint64_t tmp_ioc_inode = 0;227ioc_inode_t *ioc_inode = NULL;228uint32_t weight = 0xffffffff;229gf_boolean_t cache_still_valid = _gf_false;230
231if (!this || !inode)232goto out;233
234table = this->private;235
236LOCK(&inode->lock);237{238(void)__inode_ctx_get(inode, this, &tmp_ioc_inode);239ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;240
241if (!ioc_inode) {242weight = ioc_get_priority(table, path);243
244ioc_inode = ioc_inode_create(table, inode, weight);245
246(void)__inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode);247}248}249UNLOCK(&inode->lock);250
251ioc_inode_lock(ioc_inode);252{253if (ioc_inode->cache.mtime == 0) {254ioc_inode->cache.mtime = iabuf->ia_mtime;255ioc_inode->cache.mtime_nsec = iabuf->ia_mtime_nsec;256}257
258ioc_inode->ia_size = iabuf->ia_size;259}260ioc_inode_unlock(ioc_inode);261
262cache_still_valid = ioc_cache_still_valid(ioc_inode, iabuf);263
264if (!cache_still_valid) {265ioc_inode_flush(ioc_inode);266}267
268ioc_table_lock(ioc_inode->table);269{270list_move_tail(&ioc_inode->inode_lru,271&table->inode_lru[ioc_inode->weight]);272}273ioc_table_unlock(ioc_inode->table);274
275out:276return 0;277}
278
279int32_t
280ioc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,281int32_t op_ret, int32_t op_errno, inode_t *inode,282struct iatt *stbuf, dict_t *xdata, struct iatt *postparent)283{
284ioc_local_t *local = NULL;285
286if (op_ret != 0)287goto out;288
289local = frame->local;290if (local == NULL) {291op_ret = -1;292op_errno = EINVAL;293goto out;294}295
296if (!this || !this->private) {297op_ret = -1;298op_errno = EINVAL;299goto out;300}301
302ioc_inode_update(this, inode, (char *)local->file_loc.path, stbuf);303
304out:305if (frame->local != NULL) {306local = frame->local;307loc_wipe(&local->file_loc);308}309
310STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata,311postparent);312return 0;313}
314
315int32_t
316ioc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)317{
318ioc_local_t *local = NULL;319int32_t op_errno = -1, ret = -1;320
321local = mem_get0(this->local_pool);322if (local == NULL) {323op_errno = ENOMEM;324gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);325goto unwind;326}327
328ret = loc_copy(&local->file_loc, loc);329if (ret != 0) {330op_errno = ENOMEM;331gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);332goto unwind;333}334
335frame->local = local;336
337STACK_WIND(frame, ioc_lookup_cbk, FIRST_CHILD(this),338FIRST_CHILD(this)->fops->lookup, loc, xdata);339
340return 0;341
342unwind:343if (local != NULL) {344loc_wipe(&local->file_loc);345mem_put(local);346}347
348STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);349
350return 0;351}
352
353/*
354* ioc_forget -
355*
356* @frame:
357* @this:
358* @inode:
359*
360*/
361int32_t
362ioc_forget(xlator_t *this, inode_t *inode)363{
364uint64_t ioc_inode = 0;365
366inode_ctx_get(inode, this, &ioc_inode);367
368if (ioc_inode)369ioc_inode_destroy((ioc_inode_t *)(long)ioc_inode);370
371return 0;372}
373
374static int32_t375ioc_invalidate(xlator_t *this, inode_t *inode)376{
377uint64_t ioc_inode = 0;378
379inode_ctx_get(inode, this, &ioc_inode);380
381if (ioc_inode)382ioc_inode_flush((ioc_inode_t *)(uintptr_t)ioc_inode);383
384return 0;385}
386
387/*
388* ioc_cache_validate_cbk -
389*
390* @frame:
391* @cookie:
392* @this:
393* @op_ret:
394* @op_errno:
395* @buf
396*
397*/
398int32_t
399ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,400int32_t op_ret, int32_t op_errno, struct iatt *stbuf,401dict_t *xdata)402{
403ioc_local_t *local = NULL;404ioc_inode_t *ioc_inode = NULL;405size_t destroy_size = 0;406struct iatt *local_stbuf = NULL;407
408local = frame->local;409ioc_inode = local->inode;410local_stbuf = stbuf;411
412if ((op_ret == -1) ||413((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {414gf_msg_debug(ioc_inode->table->xl->name, 0,415"cache for inode(%p) is invalid. flushing all pages",416ioc_inode);417/* NOTE: only pages with no waiting frames are flushed by418* ioc_inode_flush. page_fault will be generated for all
419* the pages which have waiting frames by ioc_inode_wakeup()
420*/
421ioc_inode_lock(ioc_inode);422{423destroy_size = __ioc_inode_flush(ioc_inode);424if (op_ret >= 0) {425ioc_inode->cache.mtime = stbuf->ia_mtime;426ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;427}428}429ioc_inode_unlock(ioc_inode);430local_stbuf = NULL;431}432
433if (destroy_size) {434ioc_table_lock(ioc_inode->table);435{436ioc_inode->table->cache_used -= destroy_size;437}438ioc_table_unlock(ioc_inode->table);439}440
441if (op_ret < 0)442local_stbuf = NULL;443
444ioc_inode_lock(ioc_inode);445{446ioc_inode->cache.last_revalidate = gf_time();447}448ioc_inode_unlock(ioc_inode);449
450ioc_inode_wakeup(frame, ioc_inode, local_stbuf);451
452/* any page-fault initiated by ioc_inode_wakeup() will have its own453* fd_ref on fd, safe to unref validate frame's private copy
454*/
455fd_unref(local->fd);456dict_unref(local->xattr_req);457
458STACK_DESTROY(frame->root);459
460return 0;461}
462
463int32_t
464ioc_wait_on_inode(ioc_inode_t *ioc_inode, ioc_page_t *page)465{
466ioc_waitq_t *waiter = NULL, *trav = NULL;467uint32_t page_found = 0;468int32_t ret = 0;469
470trav = ioc_inode->waitq;471
472while (trav) {473if (trav->data == page) {474page_found = 1;475break;476}477trav = trav->next;478}479
480if (!page_found) {481waiter = GF_CALLOC(1, sizeof(ioc_waitq_t), gf_ioc_mt_ioc_waitq_t);482if (waiter == NULL) {483gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, ENOMEM,484IO_CACHE_MSG_NO_MEMORY, NULL);485ret = -ENOMEM;486goto out;487}488
489waiter->data = page;490waiter->next = ioc_inode->waitq;491ioc_inode->waitq = waiter;492}493
494out:495return ret;496}
497
498/*
499* ioc_cache_validate -
500*
501* @frame:
502* @ioc_inode:
503* @fd:
504*
505*/
506int32_t
507ioc_cache_validate(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,508ioc_page_t *page)509{
510call_frame_t *validate_frame = NULL;511ioc_local_t *validate_local = NULL;512ioc_local_t *local = NULL;513int32_t ret = 0;514
515local = frame->local;516validate_local = mem_get0(THIS->local_pool);517if (validate_local == NULL) {518ret = -1;519local->op_ret = -1;520local->op_errno = ENOMEM;521gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0,522IO_CACHE_MSG_NO_MEMORY, NULL);523goto out;524}525
526validate_frame = copy_frame(frame);527if (validate_frame == NULL) {528ret = -1;529local->op_ret = -1;530local->op_errno = ENOMEM;531mem_put(validate_local);532gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0,533IO_CACHE_MSG_NO_MEMORY, NULL);534goto out;535}536
537validate_local->fd = fd_ref(fd);538validate_local->inode = ioc_inode;539if (local && local->xattr_req)540validate_local->xattr_req = dict_ref(local->xattr_req);541validate_frame->local = validate_local;542
543STACK_WIND(validate_frame, ioc_cache_validate_cbk, FIRST_CHILD(frame->this),544FIRST_CHILD(frame->this)->fops->fstat, fd,545validate_local->xattr_req);546
547out:548return ret;549}
550
551static uint32_t552is_match(const char *path, const char *pattern)553{
554int32_t ret = 0;555
556ret = fnmatch(pattern, path, FNM_NOESCAPE);557
558return (ret == 0);559}
560
561uint32_t
562ioc_get_priority(ioc_table_t *table, const char *path)563{
564uint32_t priority = 1;565struct ioc_priority *curr = NULL;566
567if (list_empty(&table->priority_list) || !path)568return priority;569
570priority = 0;571list_for_each_entry(curr, &table->priority_list, list)572{573if (is_match(path, curr->pattern))574priority = curr->priority;575}576
577return priority;578}
579
580/*
581* ioc_open_cbk - open callback for io cache
582*
583* @frame: call frame
584* @cookie:
585* @this:
586* @op_ret:
587* @op_errno:
588* @fd:
589*
590*/
591int32_t
592ioc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,593int32_t op_errno, fd_t *fd, dict_t *xdata)594{
595uint64_t tmp_ioc_inode = 0;596ioc_local_t *local = NULL;597ioc_table_t *table = NULL;598ioc_inode_t *ioc_inode = NULL;599
600local = frame->local;601if (!this || !this->private) {602op_ret = -1;603op_errno = EINVAL;604goto out;605}606
607table = this->private;608
609if (op_ret != -1) {610inode_ctx_get(fd->inode, this, &tmp_ioc_inode);611ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;612
613// TODO: see why inode context is NULL and handle it.614if (!ioc_inode) {615gf_smsg(this->name, GF_LOG_ERROR, EINVAL,616IO_CACHE_MSG_ENFORCEMENT_FAILED, "inode-gfid=%s",617uuid_utoa(fd->inode->gfid), NULL);618goto out;619}620
621ioc_table_lock(ioc_inode->table);622{623list_move_tail(&ioc_inode->inode_lru,624&table->inode_lru[ioc_inode->weight]);625}626ioc_table_unlock(ioc_inode->table);627
628ioc_inode_lock(ioc_inode);629{630if ((table->min_file_size > ioc_inode->ia_size) ||631((table->max_file_size > 0) &&632(table->max_file_size < ioc_inode->ia_size))) {633fd_ctx_set(fd, this, 1);634}635}636ioc_inode_unlock(ioc_inode);637
638/* If O_DIRECT open, we disable caching on it */639if ((local->flags & O_DIRECT)) {640/* O_DIRECT is only for one fd, not the inode641* as a whole
642*/
643fd_ctx_set(fd, this, 1);644}645}646
647out:648mem_put(local);649frame->local = NULL;650
651STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);652
653return 0;654}
655
656/*
657* ioc_create_cbk - create callback for io cache
658*
659* @frame: call frame
660* @cookie:
661* @this:
662* @op_ret:
663* @op_errno:
664* @fd:
665* @inode:
666* @buf:
667*
668*/
669int32_t
670ioc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,671int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,672struct iatt *buf, struct iatt *preparent,673struct iatt *postparent, dict_t *xdata)674{
675ioc_local_t *local = NULL;676ioc_table_t *table = NULL;677ioc_inode_t *ioc_inode = NULL;678uint32_t weight = 0xffffffff;679const char *path = NULL;680int ret = -1;681
682local = frame->local;683if (!this || !this->private) {684op_ret = -1;685op_errno = EINVAL;686goto out;687}688
689table = this->private;690path = local->file_loc.path;691
692if (op_ret != -1) {693/* assign weight */694weight = ioc_get_priority(table, path);695
696ioc_inode = ioc_inode_create(table, inode, weight);697
698ioc_inode_lock(ioc_inode);699{700ioc_inode->cache.mtime = buf->ia_mtime;701ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;702ioc_inode->ia_size = buf->ia_size;703
704if ((table->min_file_size > ioc_inode->ia_size) ||705((table->max_file_size > 0) &&706(table->max_file_size < ioc_inode->ia_size))) {707ret = fd_ctx_set(fd, this, 1);708if (ret)709gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,710IO_CACHE_MSG_SET_FD_FAILED, "path=%s",711local->file_loc.path, NULL);712}713}714ioc_inode_unlock(ioc_inode);715
716inode_ctx_put(fd->inode, this, (uint64_t)(long)ioc_inode);717
718/* If O_DIRECT open, we disable caching on it */719if (local->flags & O_DIRECT) {720/*721* O_DIRECT is only for one fd, not the inode
722* as a whole */
723ret = fd_ctx_set(fd, this, 1);724if (ret)725gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,726IO_CACHE_MSG_SET_FD_FAILED, "path=%s",727local->file_loc.path, NULL);728}729
730/* if weight == 0, we disable caching on it */731if (!weight) {732/* we allow a pattern-matched cache disable this way */733ret = fd_ctx_set(fd, this, 1);734if (ret)735gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,736IO_CACHE_MSG_SET_FD_FAILED, "path=%s",737local->file_loc.path, NULL);738}739}740
741out:742frame->local = NULL;743mem_put(local);744
745STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,746preparent, postparent, xdata);747
748return 0;749}
750
751int32_t
752ioc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,753int32_t op_errno, inode_t *inode, struct iatt *buf,754struct iatt *preparent, struct iatt *postparent, dict_t *xdata)755{
756ioc_local_t *local = NULL;757ioc_table_t *table = NULL;758ioc_inode_t *ioc_inode = NULL;759uint32_t weight = 0xffffffff;760const char *path = NULL;761
762local = frame->local;763if (!this || !this->private) {764op_ret = -1;765op_errno = EINVAL;766goto out;767}768
769table = this->private;770path = local->file_loc.path;771
772if (op_ret != -1) {773/* assign weight */774weight = ioc_get_priority(table, path);775
776ioc_inode = ioc_inode_create(table, inode, weight);777
778ioc_inode_lock(ioc_inode);779{780ioc_inode->cache.mtime = buf->ia_mtime;781ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;782ioc_inode->ia_size = buf->ia_size;783}784ioc_inode_unlock(ioc_inode);785
786inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode);787}788
789out:790frame->local = NULL;791
792loc_wipe(&local->file_loc);793mem_put(local);794
795STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,796postparent, xdata);797return 0;798}
799
800int
801ioc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,802dev_t rdev, mode_t umask, dict_t *xdata)803{
804ioc_local_t *local = NULL;805int32_t op_errno = -1, ret = -1;806
807local = mem_get0(this->local_pool);808if (local == NULL) {809op_errno = ENOMEM;810gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);811goto unwind;812}813
814ret = loc_copy(&local->file_loc, loc);815if (ret != 0) {816op_errno = ENOMEM;817gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL);818goto unwind;819}820
821frame->local = local;822
823STACK_WIND(frame, ioc_mknod_cbk, FIRST_CHILD(this),824FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);825return 0;826
827unwind:828if (local != NULL) {829loc_wipe(&local->file_loc);830mem_put(local);831}832
833STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,834NULL);835
836return 0;837}
838
839/*
840* ioc_open - open fop for io cache
841* @frame:
842* @this:
843* @loc:
844* @flags:
845*
846*/
847int32_t
848ioc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,849fd_t *fd, dict_t *xdata)850{
851ioc_local_t *local = NULL;852
853local = mem_get0(this->local_pool);854if (local == NULL) {855gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);856STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL);857return 0;858}859
860local->flags = flags;861local->file_loc.path = loc->path;862local->file_loc.inode = loc->inode;863
864frame->local = local;865
866STACK_WIND(frame, ioc_open_cbk, FIRST_CHILD(this),867FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);868
869return 0;870}
871
872/*
873* ioc_create - create fop for io cache
874*
875* @frame:
876* @this:
877* @pathname:
878* @flags:
879* @mode:
880*
881*/
882int32_t
883ioc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,884mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)885{
886ioc_local_t *local = NULL;887
888local = mem_get0(this->local_pool);889if (local == NULL) {890gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);891STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,892NULL, NULL);893return 0;894}895
896local->flags = flags;897local->file_loc.path = loc->path;898frame->local = local;899
900STACK_WIND(frame, ioc_create_cbk, FIRST_CHILD(this),901FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,902xdata);903
904return 0;905}
906
907/*
908* ioc_release - release fop for io cache
909*
910* @frame:
911* @this:
912* @fd:
913*
914*/
915int32_t
916ioc_release(xlator_t *this, fd_t *fd)917{
918return 0;919}
920
921int32_t
922ioc_need_prune(ioc_table_t *table)923{
924int64_t cache_difference = 0;925
926ioc_table_lock(table);927{928cache_difference = table->cache_used - table->cache_size;929}930ioc_table_unlock(table);931
932if (cache_difference > 0)933return 1;934else935return 0;936}
937
938/*
939* ioc_dispatch_requests -
940*
941* @frame:
942* @inode:
943*
944*
945*/
946void
947ioc_dispatch_requests(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,948off_t offset, size_t size)949{
950ioc_local_t *local = NULL;951ioc_table_t *table = NULL;952ioc_page_t *trav = NULL;953ioc_waitq_t *waitq = NULL;954off_t rounded_offset = 0;955off_t rounded_end = 0;956off_t trav_offset = 0;957int32_t fault = 0;958size_t trav_size = 0;959off_t local_offset = 0;960int32_t ret = -1;961int8_t need_validate = 0;962int8_t might_need_validate = 0; /*963* if a page exists, do we need
964* to validate it?
965*/
966local = frame->local;967table = ioc_inode->table;968
969rounded_offset = gf_floor(offset, table->page_size);970rounded_end = gf_roof(offset + size, table->page_size);971trav_offset = rounded_offset;972
973/* once a frame does read, it should be waiting on something */974local->wait_count++;975
976/* Requested region can fall in three different pages,977* 1. Ready - region is already in cache, we just have to serve it.
978* 2. In-transit - page fault has been generated on this page, we need
979* to wait till the page is ready
980* 3. Fault - page is not in cache, we have to generate a page fault
981*/
982
983might_need_validate = ioc_inode_need_revalidate(ioc_inode);984
985while (trav_offset < rounded_end) {986ioc_inode_lock(ioc_inode);987{988/* look for requested region in the cache */989trav = __ioc_page_get(ioc_inode, trav_offset);990
991local_offset = max(trav_offset, offset);992trav_size = min(((offset + size) - local_offset), table->page_size);993
994if (!trav) {995/* page not in cache, we need to generate page996* fault
997*/
998trav = __ioc_page_create(ioc_inode, trav_offset);999fault = 1;1000if (!trav) {1001gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM,1002IO_CACHE_MSG_NO_MEMORY, NULL);1003local->op_ret = -1;1004local->op_errno = ENOMEM;1005ioc_inode_unlock(ioc_inode);1006goto out;1007}1008}1009
1010__ioc_wait_on_page(trav, frame, local_offset, trav_size);1011
1012if (trav->ready) {1013/* page found in cache */1014if (!might_need_validate && !ioc_inode->waitq) {1015/* fresh enough */1016gf_msg_trace(frame->this->name, 0,1017"cache hit for "1018"trav_offset=%" PRId641019"/local_"1020"offset=%" PRId64 "",1021trav_offset, local_offset);1022waitq = __ioc_page_wakeup(trav, trav->op_errno);1023} else {1024/* if waitq already exists, fstat1025* revalidate is
1026* already on the way
1027*/
1028if (!ioc_inode->waitq) {1029need_validate = 1;1030}1031
1032ret = ioc_wait_on_inode(ioc_inode, trav);1033if (ret < 0) {1034local->op_ret = -1;1035local->op_errno = -ret;1036need_validate = 0;1037
1038waitq = __ioc_page_wakeup(trav, trav->op_errno);1039ioc_inode_unlock(ioc_inode);1040
1041ioc_waitq_return(waitq);1042waitq = NULL;1043goto out;1044}1045}1046}1047}1048ioc_inode_unlock(ioc_inode);1049
1050ioc_waitq_return(waitq);1051waitq = NULL;1052
1053if (fault) {1054fault = 0;1055/* new page created, increase the table->cache_used */1056ioc_page_fault(ioc_inode, frame, fd, trav_offset);1057}1058
1059if (need_validate) {1060need_validate = 0;1061gf_msg_trace(frame->this->name, 0,1062"sending validate request for "1063"inode(%s) at offset=%" PRId64 "",1064uuid_utoa(fd->inode->gfid), trav_offset);1065ret = ioc_cache_validate(frame, ioc_inode, fd, trav);1066if (ret == -1) {1067ioc_inode_lock(ioc_inode);1068{1069waitq = __ioc_page_wakeup(trav, trav->op_errno);1070}1071ioc_inode_unlock(ioc_inode);1072
1073ioc_waitq_return(waitq);1074waitq = NULL;1075goto out;1076}1077}1078
1079trav_offset += table->page_size;1080}1081
1082out:1083ioc_frame_return(frame);1084
1085if (ioc_need_prune(ioc_inode->table)) {1086ioc_prune(ioc_inode->table);1087}1088
1089return;1090}
1091
1092/*
1093* ioc_readv -
1094*
1095* @frame:
1096* @this:
1097* @fd:
1098* @size:
1099* @offset:
1100*
1101*/
1102int32_t
1103ioc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,1104off_t offset, uint32_t flags, dict_t *xdata)1105{
1106uint64_t tmp_ioc_inode = 0;1107ioc_inode_t *ioc_inode = NULL;1108ioc_local_t *local = NULL;1109uint32_t weight = 0;1110ioc_table_t *table = NULL;1111int32_t op_errno = EINVAL;1112uint64_t fd_ctx = 0;1113
1114if (!this) {1115goto out;1116}1117
1118inode_ctx_get(fd->inode, this, &tmp_ioc_inode);1119ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;1120if (!ioc_inode) {1121/* caching disabled, go ahead with normal readv */1122STACK_WIND_TAIL(frame, FIRST_CHILD(this),1123FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,1124xdata);1125return 0;1126}1127
1128if (flags & O_DIRECT) {1129/* disable caching for this fd, if O_DIRECT is used */1130STACK_WIND_TAIL(frame, FIRST_CHILD(this),1131FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,1132xdata);1133return 0;1134}1135
1136table = this->private;1137
1138if (!table) {1139gf_smsg(this->name, GF_LOG_ERROR, EINVAL, IO_CACHE_MSG_TABLE_NULL,1140NULL);1141op_errno = EINVAL;1142goto out;1143}1144
1145ioc_inode_lock(ioc_inode);1146{1147if (!ioc_inode->cache.page_table) {1148ioc_inode->cache.page_table = rbthash_table_init(1149this->ctx, IOC_PAGE_TABLE_BUCKET_COUNT, ioc_hashfn, NULL, 0,1150table->mem_pool);1151
1152if (ioc_inode->cache.page_table == NULL) {1153op_errno = ENOMEM;1154ioc_inode_unlock(ioc_inode);1155goto out;1156}1157}1158}1159ioc_inode_unlock(ioc_inode);1160
1161fd_ctx = fd_ctx_get(fd, this);1162if (fd_ctx) {1163/* disable caching for this fd, go ahead with normal readv */1164STACK_WIND_TAIL(frame, FIRST_CHILD(this),1165FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,1166xdata);1167return 0;1168}1169
1170local = mem_get0(this->local_pool);1171if (local == NULL) {1172gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);1173op_errno = ENOMEM;1174goto out;1175}1176
1177INIT_LIST_HEAD(&local->fill_list);1178
1179frame->local = local;1180local->pending_offset = offset;1181local->pending_size = size;1182local->offset = offset;1183local->size = size;1184local->inode = ioc_inode;1185local->xattr_req = dict_ref(xdata);1186
1187gf_msg_trace(this->name, 0,1188"NEW REQ (%p) offset "1189"= %" PRId64 " && size = %" GF_PRI_SIZET "",1190frame, offset, size);1191
1192weight = ioc_inode->weight;1193
1194ioc_table_lock(ioc_inode->table);1195{1196list_move_tail(&ioc_inode->inode_lru,1197&ioc_inode->table->inode_lru[weight]);1198}1199ioc_table_unlock(ioc_inode->table);1200
1201ioc_dispatch_requests(frame, ioc_inode, fd, offset, size);1202return 0;1203
1204out:1205STACK_UNWIND_STRICT(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);1206return 0;1207}
1208
1209/*
1210* ioc_writev_cbk -
1211*
1212* @frame:
1213* @cookie:
1214* @this:
1215* @op_ret:
1216* @op_errno:
1217*
1218*/
1219int32_t
1220ioc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,1221int32_t op_ret, int32_t op_errno, struct iatt *prebuf,1222struct iatt *postbuf, dict_t *xdata)1223{
1224ioc_local_t *local = NULL;1225uint64_t ioc_inode = 0;1226
1227local = frame->local;1228frame->local = NULL;1229inode_ctx_get(local->fd->inode, this, &ioc_inode);1230
1231if (op_ret >= 0) {1232ioc_update_pages(frame, (ioc_inode_t *)(long)ioc_inode, local->vector,1233local->op_ret, op_ret, local->offset);1234}1235
1236STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,1237xdata);1238if (local->iobref) {1239iobref_unref(local->iobref);1240GF_FREE(local->vector);1241}1242
1243mem_put(local);1244return 0;1245}
1246
1247/*
1248* ioc_writev
1249*
1250* @frame:
1251* @this:
1252* @fd:
1253* @vector:
1254* @count:
1255* @offset:
1256*
1257*/
1258int32_t
1259ioc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,1260int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,1261dict_t *xdata)1262{
1263ioc_local_t *local = NULL;1264uint64_t ioc_inode = 0;1265
1266local = mem_get0(this->local_pool);1267if (local == NULL) {1268gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);1269
1270STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, NULL, NULL, NULL);1271return 0;1272}1273
1274/* TODO: why is it not fd_ref'ed */1275local->fd = fd;1276frame->local = local;1277
1278inode_ctx_get(fd->inode, this, &ioc_inode);1279if (ioc_inode) {1280local->iobref = iobref_ref(iobref);1281local->vector = iov_dup(vector, count);1282local->op_ret = count;1283local->offset = offset;1284}1285
1286STACK_WIND(frame, ioc_writev_cbk, FIRST_CHILD(this),1287FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,1288flags, iobref, xdata);1289
1290return 0;1291}
1292
1293/*
1294* ioc_truncate_cbk -
1295*
1296* @frame:
1297* @cookie:
1298* @this:
1299* @op_ret:
1300* @op_errno:
1301* @buf:
1302*
1303*/
1304int32_t
1305ioc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,1306int32_t op_ret, int32_t op_errno, struct iatt *prebuf,1307struct iatt *postbuf, dict_t *xdata)1308{
1309STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,1310xdata);1311return 0;1312}
1313
1314/*
1315* ioc_ftruncate_cbk -
1316*
1317* @frame:
1318* @cookie:
1319* @this:
1320* @op_ret:
1321* @op_errno:
1322* @buf:
1323*
1324*/
1325int32_t
1326ioc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,1327int32_t op_ret, int32_t op_errno, struct iatt *prebuf,1328struct iatt *postbuf, dict_t *xdata)1329{
1330STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,1331xdata);1332return 0;1333}
1334
1335/*
1336* ioc_truncate -
1337*
1338* @frame:
1339* @this:
1340* @loc:
1341* @offset:
1342*
1343*/
1344int32_t
1345ioc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,1346dict_t *xdata)1347{
1348uint64_t ioc_inode = 0;1349
1350inode_ctx_get(loc->inode, this, &ioc_inode);1351
1352if (ioc_inode)1353ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);1354
1355STACK_WIND(frame, ioc_truncate_cbk, FIRST_CHILD(this),1356FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);1357return 0;1358}
1359
1360/*
1361* ioc_ftruncate -
1362*
1363* @frame:
1364* @this:
1365* @fd:
1366* @offset:
1367*
1368*/
1369int32_t
1370ioc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,1371dict_t *xdata)1372{
1373uint64_t ioc_inode = 0;1374
1375inode_ctx_get(fd->inode, this, &ioc_inode);1376
1377if (ioc_inode)1378ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);1379
1380STACK_WIND(frame, ioc_ftruncate_cbk, FIRST_CHILD(this),1381FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);1382return 0;1383}
1384
1385int32_t
1386ioc_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,1387int32_t op_errno, struct gf_flock *lock, dict_t *xdata)1388{
1389STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, lock, xdata);1390return 0;1391}
1392
1393int32_t
1394ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,1395struct gf_flock *lock, dict_t *xdata)1396{
1397ioc_inode_t *ioc_inode = NULL;1398uint64_t tmp_inode = 0;1399
1400inode_ctx_get(fd->inode, this, &tmp_inode);1401ioc_inode = (ioc_inode_t *)(long)tmp_inode;1402if (!ioc_inode) {1403gf_msg_debug(this->name, EBADFD,1404"inode context is NULL: returning EBADFD");1405STACK_UNWIND_STRICT(lk, frame, -1, EBADFD, NULL, NULL);1406return 0;1407}1408
1409ioc_inode_lock(ioc_inode);1410{1411ioc_inode->cache.last_revalidate = gf_time();1412}1413ioc_inode_unlock(ioc_inode);1414
1415STACK_WIND(frame, ioc_lk_cbk, FIRST_CHILD(this),1416FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);1417
1418return 0;1419}
1420
1421int
1422ioc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,1423int op_errno, gf_dirent_t *entries, dict_t *xdata)1424{
1425gf_dirent_t *entry = NULL;1426char *path = NULL;1427fd_t *fd = NULL;1428
1429fd = frame->local;1430frame->local = NULL;1431
1432if (op_ret <= 0)1433goto unwind;1434
1435list_for_each_entry(entry, &entries->list, list)1436{1437inode_path(fd->inode, entry->d_name, &path);1438ioc_inode_update(this, entry->inode, path, &entry->d_stat);1439GF_FREE(path);1440path = NULL;1441}1442
1443unwind:1444STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);1445
1446return 0;1447}
1448
1449int
1450ioc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,1451off_t offset, dict_t *dict)1452{
1453frame->local = fd;1454
1455STACK_WIND(frame, ioc_readdirp_cbk, FIRST_CHILD(this),1456FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);1457
1458return 0;1459}
1460
1461static int32_t1462ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,1463int32_t op_ret, int32_t op_errno, struct iatt *pre,1464struct iatt *post, dict_t *xdata)1465{
1466STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);1467return 0;1468}
1469
1470static int32_t1471ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,1472size_t len, dict_t *xdata)1473{
1474uint64_t ioc_inode = 0;1475
1476inode_ctx_get(fd->inode, this, &ioc_inode);1477
1478if (ioc_inode)1479ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);1480
1481STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this),1482FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);1483return 0;1484}
1485
1486static int32_t1487ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,1488int32_t op_ret, int32_t op_errno, struct iatt *pre,1489struct iatt *post, dict_t *xdata)1490{
1491STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre, post, xdata);1492return 0;1493}
1494
1495static int32_t1496ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,1497off_t len, dict_t *xdata)1498{
1499uint64_t ioc_inode = 0;1500
1501inode_ctx_get(fd->inode, this, &ioc_inode);1502
1503if (ioc_inode)1504ioc_inode_flush((ioc_inode_t *)(long)ioc_inode);1505
1506STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this),1507FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);1508return 0;1509}
1510
1511int32_t
1512ioc_get_priority_list(const char *opt_str, struct list_head *first)1513{
1514int32_t max_pri = 1;1515char *tmp_str = NULL;1516char *tmp_str1 = NULL;1517char *tmp_str2 = NULL;1518char *dup_str = NULL;1519char *stripe_str = NULL;1520char *pattern = NULL;1521char *priority = NULL;1522char *string = NULL;1523struct ioc_priority *curr = NULL, *tmp = NULL;1524
1525string = gf_strdup(opt_str);1526if (string == NULL) {1527max_pri = -1;1528goto out;1529}1530
1531/* Get the pattern for cache priority.1532* "option priority *.jpg:1,abc*:2" etc
1533*/
1534/* TODO: inode_lru in table is statically hard-coded to 5,1535* should be changed to run-time configuration
1536*/
1537stripe_str = strtok_r(string, ",", &tmp_str);1538while (stripe_str) {1539curr = GF_CALLOC(1, sizeof(struct ioc_priority),1540gf_ioc_mt_ioc_priority);1541if (curr == NULL) {1542max_pri = -1;1543goto out;1544}1545
1546list_add_tail(&curr->list, first);1547
1548dup_str = gf_strdup(stripe_str);1549if (dup_str == NULL) {1550max_pri = -1;1551goto out;1552}1553
1554pattern = strtok_r(dup_str, ":", &tmp_str1);1555if (!pattern) {1556max_pri = -1;1557goto out;1558}1559
1560priority = strtok_r(NULL, ":", &tmp_str1);1561if (!priority) {1562max_pri = -1;1563goto out;1564}1565
1566gf_msg_trace("io-cache", 0, "ioc priority : pattern %s : priority %s",1567pattern, priority);1568
1569curr->pattern = gf_strdup(pattern);1570if (curr->pattern == NULL) {1571max_pri = -1;1572goto out;1573}1574
1575curr->priority = strtol(priority, &tmp_str2, 0);1576if (tmp_str2 && (*tmp_str2)) {1577max_pri = -1;1578goto out;1579} else {1580max_pri = max(max_pri, curr->priority);1581}1582
1583GF_FREE(dup_str);1584dup_str = NULL;1585
1586stripe_str = strtok_r(NULL, ",", &tmp_str);1587}1588out:1589GF_FREE(string);1590
1591GF_FREE(dup_str);1592
1593if (max_pri == -1) {1594list_for_each_entry_safe(curr, tmp, first, list)1595{1596list_del_init(&curr->list);1597GF_FREE(curr->pattern);1598GF_FREE(curr);1599}1600}1601
1602return max_pri;1603}
1604
1605int32_t
1606mem_acct_init(xlator_t *this)1607{
1608int ret = -1;1609
1610if (!this)1611return ret;1612
1613ret = xlator_mem_acct_init(this, gf_ioc_mt_end);1614
1615if (ret != 0) {1616gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,1617IO_CACHE_MSG_MEMORY_INIT_FAILED, NULL);1618return ret;1619}1620
1621return ret;1622}
1623
1624static gf_boolean_t1625check_cache_size_ok(xlator_t *this, uint64_t cache_size)1626{
1627gf_boolean_t ret = _gf_true;1628uint64_t total_mem = 0;1629uint64_t max_cache_size = 0;1630volume_option_t *opt = NULL;1631
1632GF_ASSERT(this);1633opt = xlator_volume_option_get(this, "cache-size");1634if (!opt) {1635ret = _gf_false;1636gf_smsg(this->name, GF_LOG_ERROR, EINVAL,1637IO_CACHE_MSG_NO_CACHE_SIZE_OPT, NULL);1638goto out;1639}1640
1641total_mem = get_mem_size();1642if (-1 == total_mem)1643max_cache_size = opt->max;1644else1645max_cache_size = total_mem;1646
1647gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size);1648
1649if (cache_size > max_cache_size) {1650ret = _gf_false;1651gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT,1652"Cache-size=%" PRIu64, cache_size, "max-size=%" PRIu64,1653max_cache_size, NULL);1654goto out;1655}1656out:1657return ret;1658}
1659
1660int
1661reconfigure(xlator_t *this, dict_t *options)1662{
1663data_t *data = NULL;1664ioc_table_t *table = NULL;1665int ret = -1;1666uint64_t cache_size_new = 0;1667if (!this || !this->private)1668goto out;1669
1670table = this->private;1671
1672ioc_table_lock(table);1673{1674GF_OPTION_RECONF("pass-through", this->pass_through, options, bool,1675unlock);1676
1677GF_OPTION_RECONF("cache-timeout", table->cache_timeout, options, time,1678unlock);1679
1680data = dict_get(options, "priority");1681if (data) {1682char *option_list = data_to_str(data);1683
1684gf_msg_trace(this->name, 0, "option path %s", option_list);1685/* parse the list of pattern:priority */1686table->max_pri = ioc_get_priority_list(option_list,1687&table->priority_list);1688
1689if (table->max_pri == -1) {1690goto unlock;1691}1692table->max_pri++;1693}1694
1695GF_OPTION_RECONF("max-file-size", table->max_file_size, options,1696size_uint64, unlock);1697
1698GF_OPTION_RECONF("min-file-size", table->min_file_size, options,1699size_uint64, unlock);1700
1701if ((table->max_file_size <= UINT64_MAX) &&1702(table->min_file_size > table->max_file_size)) {1703gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_DEFAULTING_TO_OLD,1704"minimum-size=%" PRIu64, table->min_file_size,1705"maximum-size=%" PRIu64, table->max_file_size, NULL);1706goto unlock;1707}1708
1709GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64,1710unlock);1711if (!check_cache_size_ok(this, cache_size_new)) {1712ret = -1;1713gf_smsg(this->name, GF_LOG_ERROR, 0,1714IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE, NULL);1715goto unlock;1716}1717table->cache_size = cache_size_new;1718
1719ret = 0;1720}1721unlock:1722ioc_table_unlock(table);1723out:1724return ret;1725}
1726
1727/*
1728* init -
1729* @this:
1730*
1731*/
1732int32_t
1733init(xlator_t *this)1734{
1735ioc_table_t *table = NULL;1736dict_t *xl_options = NULL;1737uint32_t index = 0;1738int32_t ret = -1;1739glusterfs_ctx_t *ctx = NULL;1740data_t *data = 0;1741uint32_t num_pages = 0;1742
1743xl_options = this->options;1744
1745if (!this->children || this->children->next) {1746gf_smsg(this->name, GF_LOG_ERROR, 0,1747IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, NULL);1748goto out;1749}1750
1751if (!this->parents) {1752gf_smsg(this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_VOL_MISCONFIGURED,1753NULL);1754}1755
1756table = (void *)GF_CALLOC(1, sizeof(*table), gf_ioc_mt_ioc_table_t);1757if (table == NULL) {1758gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL);1759goto out;1760}1761
1762table->xl = this;1763table->page_size = this->ctx->page_size;1764
1765GF_OPTION_INIT("pass-through", this->pass_through, bool, out);1766
1767GF_OPTION_INIT("cache-size", table->cache_size, size_uint64, out);1768
1769GF_OPTION_INIT("cache-timeout", table->cache_timeout, time, out);1770
1771GF_OPTION_INIT("min-file-size", table->min_file_size, size_uint64, out);1772
1773GF_OPTION_INIT("max-file-size", table->max_file_size, size_uint64, out);1774
1775if (!check_cache_size_ok(this, table->cache_size)) {1776ret = -1;1777goto out;1778}1779
1780INIT_LIST_HEAD(&table->priority_list);1781table->max_pri = 1;1782data = dict_get(xl_options, "priority");1783if (data) {1784char *option_list = data_to_str(data);1785gf_msg_trace(this->name, 0, "option path %s", option_list);1786/* parse the list of pattern:priority */1787table->max_pri = ioc_get_priority_list(option_list,1788&table->priority_list);1789
1790if (table->max_pri == -1) {1791goto out;1792}1793}1794table->max_pri++;1795
1796INIT_LIST_HEAD(&table->inodes);1797
1798if ((table->max_file_size <= UINT64_MAX) &&1799(table->min_file_size > table->max_file_size)) {1800gf_smsg("io-cache", GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT,1801"minimum-size=%" PRIu64, table->min_file_size,1802"maximum-size=%" PRIu64, table->max_file_size, NULL);1803goto out;1804}1805
1806table->inode_lru = GF_CALLOC(table->max_pri, sizeof(struct list_head),1807gf_ioc_mt_list_head);1808if (table->inode_lru == NULL) {1809goto out;1810}1811
1812for (index = 0; index < (table->max_pri); index++)1813INIT_LIST_HEAD(&table->inode_lru[index]);1814
1815this->local_pool = mem_pool_new(ioc_local_t, 64);1816if (!this->local_pool) {1817ret = -1;1818gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,1819IO_CACHE_MSG_CREATE_MEM_POOL_FAILED, NULL);1820goto out;1821}1822
1823pthread_mutex_init(&table->table_lock, NULL);1824this->private = table;1825
1826num_pages = (table->cache_size / table->page_size) +1827((table->cache_size % table->page_size) ? 1 : 0);1828
1829table->mem_pool = mem_pool_new(rbthash_entry_t, num_pages);1830if (!table->mem_pool) {1831gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,1832IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, NULL);1833goto out;1834}1835
1836ret = 0;1837
1838ctx = this->ctx;1839ioc_log2_page_size = log_base2(ctx->page_size);1840
1841out:1842if (ret == -1) {1843if (table != NULL) {1844GF_FREE(table->inode_lru);1845GF_FREE(table);1846}1847}1848
1849return ret;1850}
1851
1852void
1853ioc_page_waitq_dump(ioc_page_t *page, char *prefix)1854{
1855ioc_waitq_t *trav = NULL;1856call_frame_t *frame = NULL;1857int32_t i = 0;1858char key[GF_DUMP_MAX_BUF_LEN] = {18590,1860};1861
1862trav = page->waitq;1863
1864while (trav) {1865frame = trav->data;1866sprintf(key, "waitq.frame[%d]", i++);1867gf_proc_dump_write(key, "%" PRId64, frame->root->unique);1868
1869trav = trav->next;1870}1871}
1872
1873void
1874__ioc_inode_waitq_dump(ioc_inode_t *ioc_inode, char *prefix)1875{
1876ioc_waitq_t *trav = NULL;1877ioc_page_t *page = NULL;1878int32_t i = 0;1879char key[GF_DUMP_MAX_BUF_LEN] = {18800,1881};1882
1883trav = ioc_inode->waitq;1884
1885while (trav) {1886page = trav->data;1887
1888sprintf(key, "cache-validation-waitq.page[%d].offset", i++);1889gf_proc_dump_write(key, "%" PRId64, page->offset);1890
1891trav = trav->next;1892}1893}
1894
1895void
1896__ioc_page_dump(ioc_page_t *page, char *prefix)1897{
1898int ret = -1;1899
1900if (!page)1901return;1902/* ioc_page_lock can be used to hold the mutex. But in statedump1903* its better to use trylock to avoid deadlocks.
1904*/
1905ret = pthread_mutex_trylock(&page->page_lock);1906if (ret)1907goto out;1908{1909gf_proc_dump_write("offset", "%" PRId64, page->offset);1910gf_proc_dump_write("size", "%" GF_PRI_SIZET, page->size);1911gf_proc_dump_write("dirty", "%s", page->dirty ? "yes" : "no");1912gf_proc_dump_write("ready", "%s", page->ready ? "yes" : "no");1913ioc_page_waitq_dump(page, prefix);1914}1915pthread_mutex_unlock(&page->page_lock);1916
1917out:1918if (ret && page)1919gf_proc_dump_write("Unable to dump the page information",1920"(Lock acquisition failed) %p", page);1921
1922return;1923}
1924
1925void
1926__ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix)1927{
1928off_t offset = 0;1929ioc_table_t *table = NULL;1930ioc_page_t *page = NULL;1931int i = 0;1932char key[GF_DUMP_MAX_BUF_LEN] = {19330,1934};1935char timestr[GF_TIMESTR_SIZE] = {19360,1937};1938
1939if ((ioc_inode == NULL) || (prefix == NULL)) {1940goto out;1941}1942
1943table = ioc_inode->table;1944
1945if (ioc_inode->cache.last_revalidate) {1946gf_time_fmt_FT(timestr, sizeof timestr,1947ioc_inode->cache.last_revalidate);1948
1949gf_proc_dump_write("last-cache-validation-time", "%s", timestr);1950}1951
1952for (offset = 0; offset < ioc_inode->ia_size; offset += table->page_size) {1953page = __ioc_page_get(ioc_inode, offset);1954if (page == NULL) {1955continue;1956}1957
1958sprintf(key, "inode.cache.page[%d]", i++);1959__ioc_page_dump(page, key);1960}1961out:1962return;1963}
1964
1965int
1966ioc_inode_dump(xlator_t *this, inode_t *inode)1967{
1968char *path = NULL;1969int ret = -1;1970char key_prefix[GF_DUMP_MAX_BUF_LEN] = {19710,1972};1973uint64_t tmp_ioc_inode = 0;1974ioc_inode_t *ioc_inode = NULL;1975gf_boolean_t section_added = _gf_false;1976char uuid_str[64] = {19770,1978};1979
1980if (this == NULL || inode == NULL)1981goto out;1982
1983gf_proc_dump_build_key(key_prefix, "io-cache", "inode");1984
1985inode_ctx_get(inode, this, &tmp_ioc_inode);1986ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;1987if (ioc_inode == NULL)1988goto out;1989
1990/* Similar to ioc_page_dump function its better to use1991* pthread_mutex_trylock and not to use gf_log in statedump
1992* to avoid deadlocks.
1993*/
1994ret = pthread_mutex_trylock(&ioc_inode->inode_lock);1995if (ret)1996goto out;1997
1998{1999if (gf_uuid_is_null(ioc_inode->inode->gfid))2000goto unlock;2001
2002gf_proc_dump_add_section("%s", key_prefix);2003section_added = _gf_true;2004
2005__inode_path(ioc_inode->inode, NULL, &path);2006
2007gf_proc_dump_write("inode.weight", "%d", ioc_inode->weight);2008
2009if (path) {2010gf_proc_dump_write("path", "%s", path);2011GF_FREE(path);2012}2013
2014gf_proc_dump_write("uuid", "%s",2015uuid_utoa_r(ioc_inode->inode->gfid, uuid_str));2016__ioc_cache_dump(ioc_inode, key_prefix);2017__ioc_inode_waitq_dump(ioc_inode, key_prefix);2018}2019unlock:2020pthread_mutex_unlock(&ioc_inode->inode_lock);2021
2022out:2023if (ret && ioc_inode) {2024if (section_added == _gf_false)2025gf_proc_dump_add_section("%s", key_prefix);2026gf_proc_dump_write("Unable to print the status of ioc_inode",2027"(Lock acquisition failed) %s",2028uuid_utoa(inode->gfid));2029}2030return ret;2031}
2032
2033int
2034ioc_priv_dump(xlator_t *this)2035{
2036ioc_table_t *priv = NULL;2037char key_prefix[GF_DUMP_MAX_BUF_LEN] = {20380,2039};2040int ret = -1;2041gf_boolean_t add_section = _gf_false;2042
2043if (!this || !this->private)2044goto out;2045
2046priv = this->private;2047
2048gf_proc_dump_build_key(key_prefix, "io-cache", "priv");2049gf_proc_dump_add_section("%s", key_prefix);2050add_section = _gf_true;2051
2052ret = pthread_mutex_trylock(&priv->table_lock);2053if (ret)2054goto out;2055{2056gf_proc_dump_write("page_size", "%" PRIu64, priv->page_size);2057gf_proc_dump_write("cache_size", "%" PRIu64, priv->cache_size);2058gf_proc_dump_write("cache_used", "%" PRIu64, priv->cache_used);2059gf_proc_dump_write("inode_count", "%u", priv->inode_count);2060gf_proc_dump_write("cache_timeout", "%ld", priv->cache_timeout);2061gf_proc_dump_write("min-file-size", "%" PRIu64, priv->min_file_size);2062gf_proc_dump_write("max-file-size", "%" PRIu64, priv->max_file_size);2063}2064pthread_mutex_unlock(&priv->table_lock);2065out:2066if (ret && priv) {2067if (!add_section) {2068gf_proc_dump_build_key(key_prefix,2069"xlator."2070"performance.io-cache",2071"priv");2072gf_proc_dump_add_section("%s", key_prefix);2073}2074gf_proc_dump_write(2075"Unable to dump the state of private "2076"structure of io-cache xlator",2077"(Lock "2078"acquisition failed) %s",2079this->name);2080}2081
2082return 0;2083}
2084
2085/*
2086* fini -
2087*
2088* @this:
2089*
2090*/
2091void
2092fini(xlator_t *this)2093{
2094ioc_table_t *table = NULL;2095struct ioc_priority *curr = NULL, *tmp = NULL;2096
2097table = this->private;2098
2099if (table == NULL)2100return;2101
2102this->private = NULL;2103
2104if (table->mem_pool != NULL) {2105mem_pool_destroy(table->mem_pool);2106table->mem_pool = NULL;2107}2108
2109list_for_each_entry_safe(curr, tmp, &table->priority_list, list)2110{2111list_del_init(&curr->list);2112GF_FREE(curr->pattern);2113GF_FREE(curr);2114}2115
2116/* inode_lru and inodes list can be empty in case fini() is2117* called soon after init()? Hence commenting the below asserts.
2118*/
2119/*for (i = 0; i < table->max_pri; i++) {2120GF_ASSERT (list_empty (&table->inode_lru[i]));
2121}
2122
2123GF_ASSERT (list_empty (&table->inodes));
2124*/
2125pthread_mutex_destroy(&table->table_lock);2126GF_FREE(table);2127
2128this->private = NULL;2129return;2130}
2131
2132struct xlator_fops fops = {2133.open = ioc_open,2134.create = ioc_create,2135.readv = ioc_readv,2136.writev = ioc_writev,2137.truncate = ioc_truncate,2138.ftruncate = ioc_ftruncate,2139.lookup = ioc_lookup,2140.lk = ioc_lk,2141.setattr = ioc_setattr,2142.mknod = ioc_mknod,2143
2144.readdirp = ioc_readdirp,2145.discard = ioc_discard,2146.zerofill = ioc_zerofill,2147};2148
2149struct xlator_dumpops dumpops = {2150.priv = ioc_priv_dump,2151.inodectx = ioc_inode_dump,2152};2153
2154struct xlator_cbks cbks = {2155.forget = ioc_forget,2156.release = ioc_release,2157.invalidate = ioc_invalidate,2158};2159
2160struct volume_options options[] = {2161{2162.key = {"io-cache"},2163.type = GF_OPTION_TYPE_BOOL,2164.default_value = "off",2165.description = "enable/disable io-cache",2166.op_version = {GD_OP_VERSION_6_0},2167.flags = OPT_FLAG_SETTABLE,2168},2169{.key = {"priority"},2170.type = GF_OPTION_TYPE_PRIORITY_LIST,2171.default_value = "",2172.description = "Assigns priority to filenames with specific "2173"patterns so that when a page needs to be ejected "2174"out of the cache, the page of a file whose "2175"priority is the lowest will be ejected earlier",2176.op_version = {1},2177.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},2178{.key = {"cache-timeout", "force-revalidate-timeout"},2179.type = GF_OPTION_TYPE_INT,2180.min = 0,2181.max = 60,2182.default_value = "1",2183.description = "The cached data for a file will be retained for "2184"'cache-refresh-timeout' seconds, after which data "2185"re-validation is performed.",2186.op_version = {1},2187.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},2188{.key = {"cache-size"},2189.type = GF_OPTION_TYPE_SIZET,2190.min = 4 * GF_UNIT_MB,2191.max = INFINITY,2192.default_value = "32MB",2193.description = "Size of the read cache.",2194.op_version = {1},2195.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},2196{.key = {"min-file-size"},2197.type = GF_OPTION_TYPE_SIZET,2198.default_value = "0",2199.description = "Minimum file size which would be cached by the "2200"io-cache translator.",2201.op_version = {1},2202.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},2203{.key = {"max-file-size"},2204.type = GF_OPTION_TYPE_SIZET,2205.default_value = "0",2206.description = "Maximum file size which would be cached by the "2207"io-cache translator.",2208.op_version = {1},2209.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},2210{.key = {"pass-through"},2211.type = GF_OPTION_TYPE_BOOL,2212.default_value = "false",2213.op_version = {GD_OP_VERSION_4_1_0},2214.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,2215.tags = {"io-cache"},2216.description = "Enable/Disable io cache translator"},2217{.key = {NULL}},2218};2219
2220xlator_api_t xlator_api = {2221.init = init,2222.fini = fini,2223.reconfigure = reconfigure,2224.mem_acct_init = mem_acct_init,2225.op_version = {1}, /* Present from the initial version */2226.dumpops = &dumpops,2227.fops = &fops,2228.cbks = &cbks,2229.options = options,2230.identifier = "io-cache",2231.category = GF_MAINTAINED,2232};2233