1 /*
2 * Copyright (C) 2021 Jakub Kruszona-Zawadzki, Core Technology Sp. z o.o.
3 *
4 * This file is part of MooseFS.
5 *
6 * MooseFS is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, version 2 (only).
9 *
10 * MooseFS is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with MooseFS; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301, USA
18 * or visit http://www.gnu.org/licenses/gpl-2.0.html
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24
25 #include <stdlib.h>
26 #include <inttypes.h>
27
28 #include "MFSCommunication.h"
29
30 #include "matoclserv.h"
31 #include "openfiles.h"
32 #include "metadata.h"
33 #include "main.h"
34 #include "changelog.h"
35 #include "datapack.h"
36 #include "bio.h"
37 #include "slogger.h"
38 #include "massert.h"
39
40 #define MODE_CORRECT 0
41 #define MODE_BSD 1
42 #define MODE_LINUX 2
43
44 #define STATE_WAITING 0
45 #define STATE_ACTIVE 1
46
47 #define LTYPE_READER 0
48 #define LTYPE_WRITER 1
49
50 typedef struct _instance {
51 uint32_t msgid;
52 uint32_t reqid; // for interruption only (valid in waiting_* queues)
53 struct _instance *next;
54 } instance;
55
56 typedef struct _lock {
57 uint64_t owner;
58 uint32_t sessionid;
59 uint8_t state;
60 uint8_t ltype;
61 instance *lock_instances;
62 struct _inodelocks *parent;
63 struct _lock *next,**prev;
64 } lock;
65
66 typedef struct _inodelocks {
67 uint32_t inode;
68 lock *active;
69 lock *waiting_head,**waiting_tail;
70 struct _inodelocks *next;
71 } inodelocks;
72
73 #define FLOCK_INODE_HASHSIZE 1024
74
75 #define FLOCK_INODE_HASH(inode) (((inode)*0x738A2379)%(FLOCK_INODE_HASHSIZE))
76
77 static inodelocks **inodehash;
78
79 static uint8_t FlocksMode;
80
81 #if 0
82 static inline void flock_dump(void) {
83 uint32_t h;
84 inodelocks *il;
85 lock *l,**lptr;
86 instance *i;
87 syslog(LOG_NOTICE,"flock dump:");
88 for (h=0 ; h<FLOCK_INODE_HASHSIZE ; h++) {
89 for (il = inodehash[h] ; il ; il=il->next) {
90 syslog(LOG_NOTICE," inode: %"PRIu32" (active:%s,waiting:%s)",il->inode,il->active?"yes":"no",il->waiting_head?"yes":"no");
91 lptr = &(il->active);
92 while ((l=*lptr)) {
93 syslog(LOG_NOTICE," active lock: session:%"PRIu32",owner:%"PRIu64",type:%s",l->sessionid,l->owner,l->ltype==LTYPE_READER?"R":"W");
94 if (l->state!=STATE_ACTIVE) {
95 syslog(LOG_WARNING," wrong state !!!");
96 }
97 if (l->prev != lptr) {
98 syslog(LOG_WARNING," wrong prev pointer !!!");
99 }
100 if (l->lock_instances) {
101 syslog(LOG_WARNING," active lock with waiting processes !!!");
102 }
103 lptr = &(l->next);
104 }
105 lptr = &(il->waiting_head);
106 while ((l=*lptr)) {
107 syslog(LOG_NOTICE," waiting lock: session:%"PRIu32",owner:%"PRIu64",type:%s",l->sessionid,l->owner,l->ltype==LTYPE_READER?"R":"W");
108 if (l->state!=STATE_WAITING) {
109 syslog(LOG_WARNING," wrong state !!!");
110 }
111 if (l->prev != lptr) {
112 syslog(LOG_WARNING," wrong prev pointer !!!");
113 }
114 for (i = l->lock_instances ; i ; i=i->next) {
115 syslog(LOG_NOTICE," waiting process reqid: %"PRIu32,i->reqid);
116 }
117 lptr = &(l->next);
118 }
119 if (il->waiting_tail != lptr) {
120 syslog(LOG_WARNING," wrong tail pointer !!!");
121 }
122 }
123 }
124 }
125 #endif
126
flock_inode_find(uint32_t inode)127 static inline inodelocks* flock_inode_find(uint32_t inode) {
128 inodelocks *il;
129
130 for (il = inodehash[FLOCK_INODE_HASH(inode)] ; il ; il=il->next) {
131 if (il->inode==inode) {
132 return il;
133 }
134 }
135 return NULL;
136 }
137
flock_inode_new(uint32_t inode)138 static inline inodelocks* flock_inode_new(uint32_t inode) {
139 inodelocks *il;
140 uint32_t hash;
141
142 il = malloc(sizeof(inodelocks));
143 passert(il);
144 il->inode = inode;
145 il->active = NULL;
146 il->waiting_head = NULL;
147 il->waiting_tail = &(il->waiting_head);
148 hash = FLOCK_INODE_HASH(inode);
149 il->next = inodehash[hash];
150 inodehash[hash] = il;
151 return il;
152 }
153
flock_inode_remove(uint32_t inode)154 static inline void flock_inode_remove(uint32_t inode) {
155 inodelocks *il,**ilp;
156 uint32_t hash;
157
158 hash = FLOCK_INODE_HASH(inode);
159 ilp = inodehash + hash;
160 while ((il=*ilp)) {
161 if (il->inode==inode) {
162 massert(il->active==NULL && il->waiting_head==NULL,"inode flock record not empty !!!");
163 *ilp = il->next;
164 free(il);
165 } else {
166 ilp = &(il->next);
167 }
168 }
169 }
170
flock_lock_inode_detach(lock * l)171 static inline void flock_lock_inode_detach(lock *l) {
172 if (l->next) {
173 l->next->prev = l->prev;
174 } else {
175 if (l->state==STATE_WAITING) {
176 l->parent->waiting_tail = l->prev;
177 }
178 }
179 *(l->prev) = l->next;
180 }
181
flock_do_lock_inode_attach(lock * l)182 static inline void flock_do_lock_inode_attach(lock *l) {
183 if (l->state==STATE_WAITING) {
184 l->next = NULL;
185 l->prev = l->parent->waiting_tail;
186 *(l->parent->waiting_tail) = l;
187 l->parent->waiting_tail = &(l->next);
188 } else {
189 l->next = l->parent->active;
190 if (l->next) {
191 l->next->prev = &(l->next);
192 }
193 l->prev = &(l->parent->active);
194 l->parent->active = l;
195 }
196 }
197
flock_lock_inode_attach(lock * l)198 static inline void flock_lock_inode_attach(lock *l) {
199 if (l->state==STATE_ACTIVE) {
200 changelog("%"PRIu32"|FLOCK(%"PRIu32",%"PRIu32",%"PRIu64",%c)",main_time(),l->parent->inode,l->sessionid,l->owner,l->ltype==LTYPE_READER?'R':'W');
201 }
202 flock_do_lock_inode_attach(l);
203 }
204
flock_lock_wake_up_one(lock * l,uint32_t reqid,uint8_t status)205 static inline void flock_lock_wake_up_one(lock *l,uint32_t reqid,uint8_t status) {
206 instance *i,**iptr;
207 iptr = &(l->lock_instances);
208 while ((i=*iptr)) {
209 if (i->reqid==reqid) {
210 matoclserv_fuse_flock_wake_up(l->sessionid,i->msgid,status);
211 *iptr = i->next;
212 free(i);
213 } else {
214 iptr = &(i->next);
215 }
216 }
217 }
218
flock_lock_wake_up_all(lock * l,uint8_t status)219 static inline void flock_lock_wake_up_all(lock *l,uint8_t status) {
220 instance *i,*ni;
221 i = l->lock_instances;
222 while (i) {
223 ni = i->next;
224 matoclserv_fuse_flock_wake_up(l->sessionid,i->msgid,status);
225 free(i);
226 i = ni;
227 }
228 l->lock_instances=NULL;
229 }
230
flock_lock_append_req(lock * l,uint32_t msgid,uint32_t reqid)231 static inline void flock_lock_append_req(lock *l,uint32_t msgid,uint32_t reqid) {
232 instance *i;
233
234 for (i = l->lock_instances ; i ; i=i->next) {
235 if (i->reqid==reqid) {
236 i->msgid = msgid;
237 return;
238 }
239 }
240 i = malloc(sizeof(instance));
241 passert(i);
242 i->msgid = msgid;
243 i->reqid = reqid;
244 i->next = l->lock_instances;
245 l->lock_instances = i;
246 }
247
flock_do_lock_remove(lock * l)248 static inline void flock_do_lock_remove(lock *l) {
249 instance *i,*ni;
250 i=l->lock_instances;
251 while (i) {
252 ni = i->next;
253 free(i);
254 i = ni;
255 }
256 if (l->next) {
257 l->next->prev = l->prev;
258 } else {
259 if (l->state==STATE_WAITING) {
260 l->parent->waiting_tail = l->prev;
261 }
262 }
263 *(l->prev) = l->next;
264 free(l);
265 }
266
flock_lock_remove(lock * l)267 static inline void flock_lock_remove(lock *l) {
268 if (l->state==STATE_ACTIVE) {
269 changelog("%"PRIu32"|FLOCK(%"PRIu32",%"PRIu32",%"PRIu64",U)",main_time(),l->parent->inode,l->sessionid,l->owner);
270 }
271 flock_do_lock_remove(l);
272 }
273
flock_check(inodelocks * il,uint8_t ltype)274 static inline uint8_t flock_check(inodelocks *il,uint8_t ltype) {
275 if (ltype==LTYPE_READER) {
276 if (il->active!=NULL && il->active->ltype==LTYPE_WRITER) {
277 return 1;
278 }
279 if (FlocksMode==MODE_CORRECT) {
280 // additional condition for classic readers/writers algorithm (not seen in any
281 // tested OS) - reader should wait when there are other waiting lock's (even if
282 // currently acquired lock(s) is a reader lock) - it avoids writer starvation
283 if (il->waiting_head!=NULL) {
284 return 1;
285 }
286 }
287 } else {
288 if (il->active!=NULL) {
289 return 1;
290 }
291 }
292 return 0;
293 }
294
flock_lock_new(inodelocks * il,uint8_t ltype,uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint64_t owner)295 static inline uint8_t flock_lock_new(inodelocks *il,uint8_t ltype,uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint64_t owner) {
296 lock *l;
297 l = malloc(sizeof(lock));
298 l->owner = owner;
299 l->sessionid = sessionid;
300 l->state = STATE_ACTIVE;
301 l->ltype = ltype;
302 l->lock_instances = NULL;
303 l->parent = il;
304 l->next = NULL;
305 l->prev = NULL;
306 if (flock_check(il,ltype)) {
307 l->state = STATE_WAITING;
308 flock_lock_append_req(l,msgid,reqid);
309 flock_lock_inode_attach(l);
310 return MFS_ERROR_WAITING;
311 }
312 flock_lock_inode_attach(l);
313 return MFS_STATUS_OK;
314 }
315
flock_lock_check_waiting(inodelocks * il)316 static inline void flock_lock_check_waiting(inodelocks *il) {
317 lock *l,*nl;
318 l = il->waiting_head;
319 if (l==NULL) {
320 return;
321 }
322 if (il->active==NULL && l->ltype==LTYPE_WRITER) {
323 flock_lock_inode_detach(l);
324 l->state = STATE_ACTIVE;
325 flock_lock_inode_attach(l);
326 flock_lock_wake_up_all(l,MFS_STATUS_OK);
327 }
328 if (il->active==NULL || il->active->ltype==LTYPE_READER) {
329 if (FlocksMode==MODE_LINUX) {
330 while (l) {
331 nl = l->next;
332 if (l->ltype==LTYPE_READER) {
333 flock_lock_inode_detach(l);
334 l->state = STATE_ACTIVE;
335 flock_lock_inode_attach(l);
336 flock_lock_wake_up_all(l,MFS_STATUS_OK);
337 }
338 l = nl;
339 }
340 } else { // FreeBSD, OSX, Classic readers/writers algorithm
341 while (l && l->ltype==LTYPE_READER) {
342 nl = l->next;
343 flock_lock_inode_detach(l);
344 l->state = STATE_ACTIVE;
345 flock_lock_inode_attach(l);
346 flock_lock_wake_up_all(l,MFS_STATUS_OK);
347 l = nl;
348 }
349 }
350 }
351 }
352
flock_lock_unlock(inodelocks * il,lock * l)353 static inline void flock_lock_unlock(inodelocks *il,lock *l) {
354 massert(il==l->parent,"flock data structures mismatch");
355
356 flock_lock_remove(l);
357
358 if (il->active==NULL && il->waiting_head!=NULL) {
359 flock_lock_check_waiting(il);
360 }
361 }
362
flock_locks_cmd(uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint32_t inode,uint64_t owner,uint8_t op)363 uint8_t flock_locks_cmd(uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint32_t inode,uint64_t owner,uint8_t op) {
364 inodelocks *il;
365 lock *l,*nl;
366 uint8_t ltype;
367
368 // flock_dump();
369 // syslog(LOG_NOTICE,"flock op: sessionid:%"PRIu32",msgid:%"PRIu32",reqid:%"PRIu32",inode:%"PRIu32",owner:%"PRIu64",op:%u",sessionid,msgid,reqid,inode,owner,op);
370
371 if (op!=FLOCK_INTERRUPT && op!=FLOCK_RELEASE) {
372 if (of_checknode(sessionid,inode)==0) {
373 return MFS_ERROR_NOTOPENED;
374 }
375 }
376 il = flock_inode_find(inode);
377 if (il==NULL) {
378 if (op==FLOCK_UNLOCK || op==FLOCK_INTERRUPT || op==FLOCK_RELEASE) {
379 return MFS_STATUS_OK;
380 }
381 il = flock_inode_new(inode);
382 }
383 if (op==FLOCK_INTERRUPT) {
384 l = il->waiting_head;
385 while (l) {
386 nl = l->next;
387 if (l->sessionid==sessionid && l->owner==owner) {
388 flock_lock_wake_up_one(l,reqid,MFS_ERROR_EINTR);
389 if (l->lock_instances==NULL) { // remove
390 flock_lock_remove(l);
391 }
392 }
393 l = nl;
394 }
395 return MFS_STATUS_OK;
396 }
397 for (l=il->active ; l ; l=l->next) {
398 if (l->sessionid==sessionid && l->owner==owner) {
399 if (op==FLOCK_UNLOCK || op==FLOCK_RELEASE) {
400 flock_lock_unlock(il,l);
401 if (il->waiting_head==NULL && il->active==NULL) {
402 flock_inode_remove(il->inode);
403 }
404 return MFS_STATUS_OK;
405 } else if (op==FLOCK_TRY_SHARED) {
406 if (l->ltype==LTYPE_READER) {
407 return MFS_STATUS_OK;
408 } else { // l->ltype==LTYPE_WRITER
409 l->ltype=LTYPE_READER;
410 flock_lock_check_waiting(il);
411 return MFS_STATUS_OK;
412 }
413 } else if (op==FLOCK_LOCK_SHARED) {
414 if (l->ltype==LTYPE_READER) {
415 return MFS_STATUS_OK;
416 } else { // l->ltype==LTYPE_WRITER
417 flock_lock_unlock(il,l);
418 return flock_lock_new(il,LTYPE_READER,sessionid,msgid,reqid,owner);
419 }
420 } else if (op==FLOCK_TRY_EXCLUSIVE) {
421 if (l->ltype==LTYPE_WRITER) {
422 return MFS_STATUS_OK;
423 } else { // l->ltype==LTYPE_READER
424 if (il->active->next==NULL) { // this lock is the only one
425 l->ltype=LTYPE_WRITER;
426 return MFS_STATUS_OK;
427 }
428 return MFS_ERROR_EAGAIN;
429 }
430 } else if (op==FLOCK_LOCK_EXCLUSIVE) {
431 if (l->ltype==LTYPE_WRITER) {
432 return MFS_STATUS_OK;
433 } else { // l->ltype==LTYPE_READER
434 flock_lock_unlock(il,l);
435 return flock_lock_new(il,LTYPE_WRITER,sessionid,msgid,reqid,owner);
436 }
437 }
438 return MFS_ERROR_EINVAL;
439 }
440 }
441 for (l=il->waiting_head ; l ; l=l->next) {
442 if (l->sessionid==sessionid && l->owner==owner) {
443 if (op==FLOCK_RELEASE) {
444 flock_lock_wake_up_all(l,MFS_ERROR_ECANCELED);
445 flock_lock_remove(l);
446 return MFS_STATUS_OK;
447 } else if (op==FLOCK_UNLOCK) {
448 if (FlocksMode==MODE_CORRECT) {
449 // logically this call should do this:
450 flock_lock_wake_up_all(l,MFS_ERROR_ECANCELED);
451 flock_lock_remove(l);
452 }
453 // but in all tested kernels it was just ignored
454 return MFS_STATUS_OK;
455 } else if (op==FLOCK_TRY_SHARED || op==FLOCK_TRY_EXCLUSIVE) {
456 return MFS_ERROR_EAGAIN;
457 } else if (op==FLOCK_LOCK_SHARED) {
458 if (l->ltype==LTYPE_READER) {
459 flock_lock_append_req(l,msgid,reqid);
460 return MFS_ERROR_WAITING;
461 } else {
462 // return MFS_ERROR_EINVAL;
463 flock_lock_wake_up_all(l,MFS_ERROR_ECANCELED);
464 l->ltype=LTYPE_READER;
465 flock_lock_append_req(l,msgid,reqid);
466 return MFS_ERROR_WAITING;
467 }
468 } else if (op==FLOCK_LOCK_EXCLUSIVE) {
469 if (l->ltype==LTYPE_WRITER) {
470 flock_lock_append_req(l,msgid,reqid);
471 return MFS_ERROR_WAITING;
472 } else {
473 // return MFS_ERROR_EINVAL;
474 flock_lock_wake_up_all(l,MFS_ERROR_ECANCELED);
475 l->ltype=LTYPE_WRITER;
476 flock_lock_append_req(l,msgid,reqid);
477 return MFS_ERROR_WAITING;
478 }
479 }
480 return MFS_ERROR_EINVAL;
481 }
482 }
483 if (op==FLOCK_UNLOCK || op==FLOCK_RELEASE) {
484 return MFS_STATUS_OK;
485 }
486 ltype = (op==FLOCK_TRY_SHARED || op==FLOCK_LOCK_SHARED)?LTYPE_READER:LTYPE_WRITER;
487 if (op==FLOCK_TRY_SHARED || op==FLOCK_TRY_EXCLUSIVE) {
488 if (flock_check(il,ltype)) {
489 return MFS_ERROR_EAGAIN;
490 }
491 }
492 return flock_lock_new(il,ltype,sessionid,msgid,reqid,owner);
493 }
494
flock_file_closed(uint32_t sessionid,uint32_t inode)495 void flock_file_closed(uint32_t sessionid,uint32_t inode) {
496 inodelocks *il;
497 lock *l,*nl;
498
499 il = flock_inode_find(inode);
500 if (il==NULL) {
501 return;
502 }
503
504 l = il->waiting_head;
505 while (l) {
506 nl = l->next;
507 if (l->sessionid==sessionid) {
508 flock_lock_remove(l);
509 }
510 l = nl;
511 }
512
513 l = il->active;
514 while (l) {
515 nl = l->next;
516 if (l->sessionid==sessionid) {
517 flock_lock_unlock(il,l);
518 }
519 l = nl;
520 }
521 if (il->waiting_head==NULL && il->active==NULL) {
522 flock_inode_remove(il->inode);
523 }
524 }
525
flock_list(uint32_t inode,uint8_t * buff)526 uint32_t flock_list(uint32_t inode,uint8_t *buff) {
527 inodelocks *il;
528 lock *l;
529 uint32_t h;
530 uint32_t ret=0;
531
532 if (inode==0) {
533 for (h=0 ; h<FLOCK_INODE_HASHSIZE ; h++) {
534 for (il = inodehash[h] ; il ; il=il->next) {
535 for (l=il->active ; l ; l=l->next) {
536 if (buff==NULL) {
537 ret+=37;
538 } else {
539 put32bit(&buff,il->inode);
540 put32bit(&buff,l->sessionid);
541 put64bit(&buff,l->owner);
542 memset(buff,0,20); // pid,start,end
543 buff+=20;
544 switch (l->ltype) {
545 case LTYPE_READER:
546 put8bit(&buff,1);
547 break;
548 case LTYPE_WRITER:
549 put8bit(&buff,2);
550 break;
551 default:
552 put8bit(&buff,0);
553 }
554 }
555 }
556 }
557 }
558 } else {
559 il = flock_inode_find(inode);
560 if (il!=NULL) {
561 for (l=il->active ; l ; l=l->next) {
562 if (buff==NULL) {
563 ret+=33;
564 } else {
565 put32bit(&buff,l->sessionid);
566 put64bit(&buff,l->owner);
567 memset(buff,0,20); // pid,start,end
568 buff+=20;
569 switch (l->ltype) {
570 case LTYPE_READER:
571 put8bit(&buff,1);
572 break;
573 case LTYPE_WRITER:
574 put8bit(&buff,2);
575 break;
576 default:
577 put8bit(&buff,0);
578 }
579 }
580 }
581 }
582 }
583 return ret;
584 }
585
flock_mr_change(uint32_t inode,uint32_t sessionid,uint64_t owner,char cmd)586 uint8_t flock_mr_change(uint32_t inode,uint32_t sessionid,uint64_t owner,char cmd) {
587 inodelocks *il;
588 lock *l,*nl;
589 uint8_t ltype;
590
591 if (cmd=='U' || cmd=='u') {
592 il = flock_inode_find(inode);
593 if (il==NULL) {
594 return MFS_ERROR_MISMATCH;
595 }
596 l=il->active;
597 while (l) {
598 nl = l->next;
599 if (l->sessionid==sessionid && l->owner==owner) {
600 flock_do_lock_remove(l);
601 meta_version_inc();
602 }
603 l = nl;
604 }
605 if (il->waiting_head==NULL && il->active==NULL) {
606 flock_inode_remove(il->inode);
607 }
608 return MFS_STATUS_OK;
609 } else if (cmd=='R' || cmd=='r' || cmd=='S' || cmd=='s') {
610 ltype = LTYPE_READER;
611 } else if (cmd=='W' || cmd=='w' || cmd=='E' || cmd=='e') {
612 ltype = LTYPE_WRITER;
613 } else {
614 return MFS_ERROR_EINVAL;
615 }
616 il = flock_inode_find(inode);
617 if (il==NULL) {
618 il = flock_inode_new(inode);
619 }
620 if (il->active!=NULL && (il->active->ltype==LTYPE_WRITER || ltype==LTYPE_WRITER)) {
621 return MFS_ERROR_MISMATCH;
622 }
623 l = malloc(sizeof(lock));
624 l->owner = owner;
625 l->sessionid = sessionid;
626 l->state = STATE_ACTIVE;
627 l->ltype = ltype;
628 l->lock_instances = NULL;
629 l->parent = il;
630 l->next = NULL;
631 l->prev = NULL;
632 flock_do_lock_inode_attach(l);
633 meta_version_inc();
634 return MFS_STATUS_OK;
635 }
636
637 #define FLOCK_REC_SIZE 17
638
flock_store(bio * fd)639 uint8_t flock_store(bio *fd) {
640 uint8_t storebuff[FLOCK_REC_SIZE];
641 uint8_t *ptr;
642 uint32_t h;
643 inodelocks *il;
644 lock *l;
645
646 if (fd==NULL) {
647 return 0x10;
648 }
649 for (h=0 ; h<FLOCK_INODE_HASHSIZE ; h++) {
650 for (il = inodehash[h] ; il ; il=il->next) {
651 for (l=il->active ; l ; l=l->next) {
652 ptr = storebuff;
653 put32bit(&ptr,il->inode);
654 put64bit(&ptr,l->owner);
655 put32bit(&ptr,l->sessionid);
656 put8bit(&ptr,l->ltype);
657 if (bio_write(fd,storebuff,FLOCK_REC_SIZE)!=FLOCK_REC_SIZE) {
658 return 0xFF;
659 }
660 }
661 }
662 }
663 memset(storebuff,0,FLOCK_REC_SIZE);
664 if (bio_write(fd,storebuff,FLOCK_REC_SIZE)!=FLOCK_REC_SIZE) {
665 return 0xFF;
666 }
667 return 0;
668 }
669
flock_load(bio * fd,uint8_t mver,uint8_t ignoreflag)670 int flock_load(bio *fd,uint8_t mver,uint8_t ignoreflag) {
671 uint8_t loadbuff[FLOCK_REC_SIZE];
672 const uint8_t *ptr;
673 int32_t r;
674 uint32_t inode,sessionid;
675 uint64_t owner;
676 uint8_t ltype;
677 inodelocks *il;
678 lock *l;
679
680 if (mver!=0x10) {
681 return -1;
682 }
683
684 for (;;) {
685 r = bio_read(fd,loadbuff,FLOCK_REC_SIZE);
686 if (r!=FLOCK_REC_SIZE) {
687 return -1;
688 }
689 ptr = loadbuff;
690 inode = get32bit(&ptr);
691 owner = get64bit(&ptr);
692 sessionid = get32bit(&ptr);
693 ltype = get8bit(&ptr);
694 if (inode==0 && owner==0 && sessionid==0) {
695 return 0;
696 }
697 if (of_checknode(sessionid,inode)==0) {
698 if (ignoreflag) {
699 mfs_syslog(LOG_ERR,"loading flock_locks: lock on closed file !!! (ignoring)");
700 continue;
701 } else {
702 mfs_syslog(LOG_ERR,"loading flock_locks: lock on closed file !!!");
703 return -1;
704 }
705 }
706 // add lock
707 il = flock_inode_find(inode);
708 if (il==NULL) {
709 il = flock_inode_new(inode);
710 }
711 if (il->active!=NULL && (il->active->ltype==LTYPE_WRITER || ltype==LTYPE_WRITER)) {
712 if (ignoreflag) {
713 mfs_syslog(LOG_ERR,"loading flock_locks: wrong lock !!! (ignoring)");
714 continue;
715 } else {
716 mfs_syslog(LOG_ERR,"loading flock_locks: wrong lock !!!");
717 return -1;
718 }
719 }
720 l = malloc(sizeof(lock));
721 l->owner = owner;
722 l->sessionid = sessionid;
723 l->state = STATE_ACTIVE;
724 l->ltype = ltype;
725 l->lock_instances = NULL;
726 l->parent = il;
727 l->next = NULL;
728 l->prev = NULL;
729 flock_do_lock_inode_attach(l);
730 }
731 return 0; // unreachable
732 }
733
flock_cleanup(void)734 void flock_cleanup(void) {
735 uint32_t h,j;
736 inodelocks *il,*nil;
737 lock *l,*nl;
738 instance *i,*ni;
739 for (h=0 ; h<FLOCK_INODE_HASHSIZE ; h++) {
740 il = inodehash[h];
741 while (il) {
742 nil = il->next;
743 for (j=0 ; j<2 ; j++) {
744 l = j?il->active:il->waiting_head;
745 while (l) {
746 nl = l->next;
747 i = l->lock_instances;
748 while (i) {
749 ni = i->next;
750 free(i);
751 i = ni;
752 }
753 free(l);
754 l = nl;
755 }
756 }
757 free(il);
758 il = nil;
759 }
760 inodehash[h] = NULL;
761 }
762 }
763
flock_init(void)764 int flock_init(void) {
765 uint32_t i;
766 inodehash = malloc(sizeof(inodelocks*)*FLOCK_INODE_HASHSIZE);
767 for (i=0 ; i<FLOCK_INODE_HASHSIZE ; i++) {
768 inodehash[i] = NULL;
769 }
770 FlocksMode = 0;
771 return 0;
772 }
773