1 /*
2 * Copyright (C) 2021 Jakub Kruszona-Zawadzki, Core Technology Sp. z o.o.
3 *
4 * This file is part of MooseFS.
5 *
6 * MooseFS is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, version 2 (only).
9 *
10 * MooseFS is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with MooseFS; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301, USA
18 * or visit http://www.gnu.org/licenses/gpl-2.0.html
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24
25 #include <stdlib.h>
26 #include <inttypes.h>
27
28 #include "MFSCommunication.h"
29 #include "massert.h"
30
31 #ifndef MFSTEST
32
33 #include "matoclserv.h"
34 #include "openfiles.h"
35 #include "metadata.h"
36 #include "main.h"
37 #include "changelog.h"
38 #include "datapack.h"
39 #include "bio.h"
40 #include "slogger.h"
41
42 #endif
43
44 // ranges are closed-open: <start,end)
45
46 typedef struct _range {
47 uint64_t start;
48 uint64_t end;
49 uint8_t type;
50 struct _range *next;
51 } range;
52
53 #ifndef MFSTEST
54
55 typedef struct _alock {
56 uint64_t owner;
57 uint32_t sessionid;
58 uint32_t pid;
59 range *ranges;
60 struct _alock *next;
61 } alock;
62
63 typedef struct _wlock {
64 uint64_t owner;
65 uint32_t sessionid;
66 uint32_t pid;
67 uint32_t msgid;
68 uint32_t reqid;
69 uint64_t start;
70 uint64_t end;
71 uint8_t type;
72 struct _wlock *next,**prev;
73 } wlock;
74
75 typedef struct _inodelocks {
76 uint32_t inode;
77 alock *active;
78 wlock *waiting_head,**waiting_tail;
79 struct _inodelocks *next;
80 } inodelocks;
81
82 #define POSIX_LOCK_INODE_HASHSIZE 1024
83
84 #define POSIX_LOCK_INODE_HASH(inode) (((inode)*0x738A2379)%(POSIX_LOCK_INODE_HASHSIZE))
85
86 static inodelocks **inodehash;
87
88 #if 0
89 static inline void posix_lock_dump(void) {
90 uint32_t h;
91 inodelocks *il;
92 alock *al;
93 wlock *wl,**wlptr;
94 range *r;
95 syslog(LOG_NOTICE,"posix lock dump:");
96 for (h=0 ; h<POSIX_LOCK_INODE_HASHSIZE ; h++) {
97 for (il = inodehash[h] ; il ; il=il->next) {
98 syslog(LOG_NOTICE," inode: %"PRIu32" (active:%s,waiting:%s)",il->inode,il->active?"yes":"no",il->waiting_head?"yes":"no");
99 for (al = il->active ; al ; al=al->next) {
100 syslog(LOG_NOTICE," active lock: session:%"PRIu32",owner:%"PRIu64",pid:%"PRIu32,al->sessionid,al->owner,al->pid);
101 if (al->ranges==NULL) {
102 syslog(LOG_WARNING," no lock ranges !!!");
103 }
104 for (r = al->ranges ; r ; r=r->next) {
105 syslog(LOG_NOTICE," range: start:%"PRIu64",end:%"PRIu64",type:%c",r->start,r->end,(r->type==POSIX_LOCK_RDLCK)?'R':(r->type==POSIX_LOCK_WRLCK)?'W':'?');
106 }
107 }
108 wlptr = &(il->waiting_head);
109 for (wl = il->waiting_head ; wl ; wl=wl->next) {
110 syslog(LOG_NOTICE," waiting lock: session:%"PRIu32",owner:%"PRIu64",pid:%"PRIu32",start:%"PRIu64",end:%"PRIu64",type:%c",wl->sessionid,wl->owner,wl->pid,wl->start,wl->end,wl->type);
111 wlptr = &(wl->next);
112 }
113 if (il->waiting_tail != wlptr) {
114 syslog(LOG_WARNING," wrong tail pointer !!!");
115 }
116 }
117 }
118 }
119 #endif
120
121 #endif
122
posix_lock_test_wlock(range * r,uint8_t * type,uint64_t * start,uint64_t * end)123 static inline int posix_lock_test_wlock(range *r,uint8_t *type,uint64_t *start,uint64_t *end) {
124 while (r) {
125 if (*type==POSIX_LOCK_WRLCK || r->type==POSIX_LOCK_WRLCK) {
126 if (*end > r->start && *start < r->end) { // ranges intersects
127 *type = r->type;
128 *start = r->start;
129 *end = r->end;
130 return 1;
131 }
132 }
133 r = r->next;
134 }
135 return 0;
136 }
137
posix_lock_apply_range(range ** rptr,uint8_t type,uint64_t start,uint64_t end)138 static inline void posix_lock_apply_range(range **rptr,uint8_t type,uint64_t start,uint64_t end) {
139 range *nr,*r;
140 uint8_t added;
141
142 added = 0;
143 while (added==0 && (r=*rptr)) {
144 if (r->end < start) {
145 // wl: |-----|
146 // r: |--|
147 #ifdef MFSTEST
148 printf("case 1\n");
149 #endif
150 rptr = &(r->next);
151 } else if (r->start > end) {
152 // wl: |-----|
153 // r: |--|
154 if (type!=POSIX_LOCK_UNLCK) {
155 #ifdef MFSTEST
156 printf("case 2a\n");
157 printf("malloc\n");
158 #endif
159 nr = malloc(sizeof(range));
160 passert(nr);
161 nr->start = start;
162 nr->end = end;
163 nr->type = type;
164 nr->next = *rptr;
165 *rptr = nr;
166 #ifdef MFSTEST
167 } else {
168 printf("case 2b\n");
169 #endif
170 }
171 added = 1;
172 } else if (start <= r->start && end >= r->end) {
173 // wl: |-----| |-----|
174 // r: |--| |-----|
175 #ifdef MFSTEST
176 printf("case 3\n");
177 printf("free\n");
178 #endif
179 *rptr = r->next;
180 free(r);
181 } else if (r->start < start && r->end <= end) {
182 // wl: |-----| |-----|
183 // r: |---| |-------|
184 if (r->type == type) {
185 #ifdef MFSTEST
186 printf("case 4a\n");
187 printf("free\n");
188 #endif
189 start = r->start;
190 *rptr = r->next;
191 free(r);
192 } else {
193 #ifdef MFSTEST
194 printf("case 4b\n");
195 #endif
196 r->end = start;
197 rptr = &(r->next);
198 }
199 } else if (r->start >= start && r->end > end) {
200 // wl: |-----| |-----|
201 // r: |---| |-------|
202 if (r->type == type) {
203 #ifdef MFSTEST
204 printf("case 5a\n");
205 #endif
206 r->start = start;
207 added = 1;
208 } else {
209 r->start = end;
210 if (type!=POSIX_LOCK_UNLCK) {
211 #ifdef MFSTEST
212 printf("case 5b\n");
213 printf("malloc\n");
214 #endif
215 nr = malloc(sizeof(range));
216 passert(nr);
217 nr->start = start;
218 nr->end = end;
219 nr->type = type;
220 nr->next = r;
221 *rptr = nr;
222 #ifdef MFSTEST
223 } else {
224 printf("case 5c\n");
225 #endif
226 }
227 added = 1;
228 }
229 } else {
230 // wl: |-----|
231 // r: |---------|
232 if (r->type != type) {
233 nr = malloc(sizeof(range));
234 passert(nr);
235 nr->start = end;
236 nr->end = r->end;
237 nr->type = r->type;
238 nr->next = r->next;
239 r->next = nr;
240 if (type!=POSIX_LOCK_UNLCK) {
241 #ifdef MFSTEST
242 printf("case 6a\n");
243 printf("malloc\n");
244 printf("malloc\n");
245 #endif
246 nr = malloc(sizeof(range));
247 passert(nr);
248 nr->start = start;
249 nr->end = end;
250 nr->type = type;
251 nr->next = r->next;
252 r->next = nr;
253 #ifdef MFSTEST
254 } else {
255 printf("case 6b\n");
256 printf("malloc\n");
257 #endif
258 }
259 r->end = start;
260 #ifdef MFSTEST
261 } else {
262 printf("case 6c\n");
263 #endif
264 }
265 added = 1;
266 }
267 }
268 if (added==0 && type!=POSIX_LOCK_UNLCK) {
269 #ifdef MFSTEST
270 printf("case 7\n");
271 printf("malloc\n");
272 #endif
273 nr = malloc(sizeof(range));
274 passert(nr);
275 nr->start = start;
276 nr->end = end;
277 nr->type = type;
278 nr->next = NULL;
279 *rptr = nr;
280 }
281 }
282
283 #ifndef MFSTEST
284
posix_lock_inode_find(uint32_t inode)285 static inline inodelocks* posix_lock_inode_find(uint32_t inode) {
286 inodelocks *il;
287
288 for (il = inodehash[POSIX_LOCK_INODE_HASH(inode)] ; il ; il=il->next) {
289 if (il->inode==inode) {
290 return il;
291 }
292 }
293 return NULL;
294 }
295
posix_lock_inode_new(uint32_t inode)296 static inline inodelocks* posix_lock_inode_new(uint32_t inode) {
297 inodelocks *il;
298 uint32_t hash;
299
300 il = malloc(sizeof(inodelocks));
301 passert(il);
302 il->inode = inode;
303 il->active = NULL;
304 il->waiting_head = NULL;
305 il->waiting_tail = &(il->waiting_head);
306 hash = POSIX_LOCK_INODE_HASH(inode);
307 il->next = inodehash[hash];
308 inodehash[hash] = il;
309 return il;
310 }
311
posix_lock_inode_remove(uint32_t inode)312 static inline void posix_lock_inode_remove(uint32_t inode) {
313 inodelocks *il,**ilp;
314 uint32_t hash;
315
316 hash = POSIX_LOCK_INODE_HASH(inode);
317 ilp = inodehash + hash;
318 while ((il=*ilp)) {
319 if (il->inode==inode) {
320 massert(il->active==NULL && il->waiting_head==NULL,"inode posix lock record not empty !!!");
321 *ilp = il->next;
322 free(il);
323 } else {
324 ilp = &(il->next);
325 }
326 }
327 }
328
posix_lock_remove_lock(inodelocks * il,wlock * wl)329 static inline void posix_lock_remove_lock(inodelocks *il,wlock *wl) {
330 if (wl->next==NULL) {
331 il->waiting_tail = wl->prev;
332 } else {
333 wl->next->prev = wl->prev;
334 }
335 *(wl->prev) = wl->next;
336 free(wl);
337 }
338
posix_lock_get_offensive_lock(inodelocks * il,uint32_t sessionid,uint64_t owner,uint8_t * type,uint64_t * start,uint64_t * end,uint32_t * pid)339 static inline int posix_lock_get_offensive_lock(inodelocks *il,uint32_t sessionid,uint64_t owner,uint8_t *type,uint64_t *start,uint64_t *end,uint32_t *pid) {
340 alock *al;
341 for (al=il->active ; al ; al=al->next) {
342 if (al->owner!=owner || al->sessionid!=sessionid) {
343 if (posix_lock_test_wlock(al->ranges,type,start,end)) {
344 if (sessionid==al->sessionid) {
345 *pid = al->pid;
346 } else {
347 *pid = 0;
348 }
349 return 1;
350 }
351 }
352 }
353 return 0;
354 }
355
posix_lock_find_offensive_lock(inodelocks * il,uint32_t sessionid,uint64_t owner,uint8_t type,uint64_t start,uint64_t end)356 static inline int posix_lock_find_offensive_lock(inodelocks *il,uint32_t sessionid,uint64_t owner,uint8_t type,uint64_t start,uint64_t end) {
357 alock *al;
358 for (al=il->active ; al ; al=al->next) {
359 if (al->owner!=owner || al->sessionid!=sessionid) {
360 if (posix_lock_test_wlock(al->ranges,&type,&start,&end)) {
361 return 1;
362 }
363 }
364 }
365 return 0;
366 }
367
posix_lock_do_apply_lock(inodelocks * il,uint32_t sessionid,uint64_t owner,uint8_t type,uint64_t start,uint64_t end,uint32_t pid)368 static inline void posix_lock_do_apply_lock(inodelocks *il,uint32_t sessionid,uint64_t owner,uint8_t type,uint64_t start,uint64_t end,uint32_t pid) {
369 alock *al,**alptr;
370 alptr = &(il->active);
371 while ((al=*alptr)) {
372 if (al->owner==owner && al->sessionid==sessionid) {
373 posix_lock_apply_range(&(al->ranges),type,start,end);
374 if (al->ranges==NULL) {
375 *alptr = al->next;
376 free(al);
377 }
378 return;
379 }
380 alptr = &(al->next);
381 }
382 if (type==POSIX_LOCK_UNLCK) {
383 return;
384 }
385 al = malloc(sizeof(alock));
386 passert(al);
387 al->owner = owner;
388 al->sessionid = sessionid;
389 al->pid = pid;
390 al->ranges = NULL;
391 al->next = NULL;
392 *alptr = al;
393 posix_lock_apply_range(&(al->ranges),type,start,end);
394 }
395
posix_lock_apply_lock(inodelocks * il,uint32_t sessionid,uint64_t owner,uint8_t type,uint64_t start,uint64_t end,uint32_t pid)396 static inline void posix_lock_apply_lock(inodelocks *il,uint32_t sessionid,uint64_t owner,uint8_t type,uint64_t start,uint64_t end,uint32_t pid) {
397 changelog("%"PRIu32"|POSIXLOCK(%"PRIu32",%"PRIu32",%"PRIu64",%c,%"PRIu64",%"PRIu64",%"PRIu32")",main_time(),il->inode,sessionid,owner,(type==POSIX_LOCK_RDLCK)?'R':(type==POSIX_LOCK_WRLCK)?'W':'U',start,end,pid);
398 posix_lock_do_apply_lock(il,sessionid,owner,type,start,end,pid);
399 }
400
posix_lock_append_lock(inodelocks * il,uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint64_t owner,uint8_t type,uint64_t start,uint64_t end,uint32_t pid)401 static inline void posix_lock_append_lock(inodelocks *il,uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint64_t owner,uint8_t type,uint64_t start,uint64_t end,uint32_t pid) {
402 wlock *wl;
403 wl = malloc(sizeof(wlock));
404 passert(wl);
405 wl->owner = owner;
406 wl->sessionid = sessionid;
407 wl->pid = pid;
408 wl->msgid = msgid;
409 wl->reqid = reqid;
410 wl->start = start;
411 wl->end = end;
412 wl->type = type;
413 wl->next = NULL;
414 wl->prev = il->waiting_tail;
415 *(il->waiting_tail) = wl;
416 il->waiting_tail = &(wl->next);
417 }
418
posix_lock_interrupt(inodelocks * il,uint32_t sessionid,uint32_t reqid)419 static inline void posix_lock_interrupt(inodelocks *il,uint32_t sessionid,uint32_t reqid) {
420 wlock *wl;
421 for (wl=il->waiting_head ; wl ; wl=wl->next) {
422 if (wl->sessionid==sessionid && wl->reqid==reqid) {
423 matoclserv_fuse_posix_lock_wake_up(sessionid,wl->msgid,MFS_ERROR_EINTR);
424 posix_lock_remove_lock(il,wl);
425 return;
426 }
427 }
428 }
429
posix_lock_check_waiting(inodelocks * il)430 static inline void posix_lock_check_waiting(inodelocks *il) {
431 wlock *wl,*nwl;
432 if (il->active==NULL && il->waiting_head==NULL) {
433 posix_lock_inode_remove(il->inode);
434 return;
435 }
436 wl = il->waiting_head;
437 while (wl) {
438 nwl = wl->next;
439 if (posix_lock_find_offensive_lock(il,wl->sessionid,wl->owner,wl->type,wl->start,wl->end)==0) {
440 posix_lock_apply_lock(il,wl->sessionid,wl->owner,wl->type,wl->start,wl->end,wl->pid);
441 matoclserv_fuse_posix_lock_wake_up(wl->sessionid,wl->msgid,MFS_STATUS_OK);
442 posix_lock_remove_lock(il,wl);
443 }
444 wl = nwl;
445 }
446 }
447
posix_lock_cmd(uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint32_t inode,uint64_t owner,uint8_t op,uint8_t * type,uint64_t * start,uint64_t * end,uint32_t * pid)448 uint8_t posix_lock_cmd(uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint32_t inode,uint64_t owner,uint8_t op,uint8_t *type,uint64_t *start,uint64_t *end,uint32_t *pid) {
449 inodelocks *il;
450 uint8_t i_type;
451 uint64_t i_start;
452 uint64_t i_end;
453 uint32_t i_pid;
454
455 i_type = *type;
456 i_start = *start;
457 i_end = *end;
458 i_pid = *pid;
459
460 // posix_lock_dump();
461 // syslog(LOG_NOTICE,"new lock cmd: sessionid:%"PRIu32",msgid:%"PRIu32",reqid:%"PRIu32",inode:%"PRIu32",owner:%"PRIX64",op:%c,type:%c,start:%"PRIu64",end:%"PRIu64",pid:%"PRIu32,sessionid,msgid,reqid,inode,owner,(op==POSIX_LOCK_CMD_INT)?'I':(op==POSIX_LOCK_CMD_GET)?'G':(op==POSIX_LOCK_CMD_SET)?'S':(op==POSIX_LOCK_CMD_TRY)?'T':'?',(i_type==POSIX_LOCK_RDLCK)?'R':(i_type==POSIX_LOCK_WRLCK)?'W':(i_type==POSIX_LOCK_UNLCK)?'U':'?',i_start,i_end,i_pid);
462
463 if ((op==POSIX_LOCK_CMD_SET || op==POSIX_LOCK_CMD_TRY) && i_type!=POSIX_LOCK_UNLCK) {
464 if (of_checknode(sessionid,inode)==0) {
465 return MFS_ERROR_NOTOPENED;
466 }
467 }
468
469 il = posix_lock_inode_find(inode);
470
471 if (op==POSIX_LOCK_CMD_INT) {
472 if (il==NULL) {
473 return MFS_STATUS_OK;
474 }
475 posix_lock_interrupt(il,sessionid,reqid);
476 return MFS_STATUS_OK;
477 }
478 if (op==POSIX_LOCK_CMD_GET) {
479 if (il!=NULL && i_type!=POSIX_LOCK_UNLCK) {
480 if (posix_lock_get_offensive_lock(il,sessionid,owner,type,start,end,pid)) {
481 return MFS_STATUS_OK;
482 }
483 }
484 *type = POSIX_LOCK_UNLCK;
485 *start = 0;
486 *end = 0;
487 *pid = 0;
488 return MFS_STATUS_OK;
489 }
490 if (il!=NULL && i_type!=POSIX_LOCK_UNLCK) {
491 if (posix_lock_find_offensive_lock(il,sessionid,owner,i_type,i_start,i_end)) {
492 if (op==POSIX_LOCK_CMD_TRY) {
493 return MFS_ERROR_EAGAIN;
494 } else {
495 posix_lock_append_lock(il,sessionid,msgid,reqid,owner,i_type,i_start,i_end,i_pid);
496 return MFS_ERROR_WAITING;
497 }
498 }
499 }
500 if (i_type==POSIX_LOCK_UNLCK) {
501 if (il==NULL) {
502 return MFS_STATUS_OK;
503 }
504 posix_lock_apply_lock(il,sessionid,owner,i_type,i_start,i_end,i_pid);
505 posix_lock_check_waiting(il);
506 return MFS_STATUS_OK;
507 }
508 if (il==NULL) {
509 il = posix_lock_inode_new(inode);
510 }
511 if (posix_lock_find_offensive_lock(il,sessionid,owner,i_type,i_start,i_end)) {
512 posix_lock_append_lock(il,sessionid,msgid,reqid,owner,i_type,i_start,i_end,i_pid);
513 return MFS_ERROR_WAITING;
514 }
515 posix_lock_apply_lock(il,sessionid,owner,i_type,i_start,i_end,i_pid);
516 posix_lock_check_waiting(il);
517 return MFS_STATUS_OK;
518 }
519
posix_lock_file_closed(uint32_t sessionid,uint32_t inode)520 void posix_lock_file_closed(uint32_t sessionid,uint32_t inode) {
521 inodelocks *il;
522 wlock *wl,*nwl;
523 alock *al,**alptr;
524 uint8_t changed;
525
526 il = posix_lock_inode_find(inode);
527 if (il==NULL) {
528 return;
529 }
530
531 wl = il->waiting_head;
532 while (wl) {
533 nwl = wl->next;
534 if (wl->sessionid==sessionid) {
535 posix_lock_remove_lock(il,wl);
536 }
537 wl = nwl;
538 }
539
540 changed = 0;
541 alptr = &(il->active);
542 while ((al=*alptr)) {
543 if (al->sessionid==sessionid) {
544 posix_lock_apply_range(&(al->ranges),POSIX_LOCK_UNLCK,0,UINT64_MAX);
545 massert(al->ranges==NULL,"locks axists after unlocking everything !!!");
546 *alptr = al->next;
547 free(al);
548 changed = 1;
549 } else {
550 alptr = &(al->next);
551 }
552 }
553
554 if (changed) {
555 posix_lock_check_waiting(il);
556 } else if (il->active==NULL && il->waiting_head==NULL) {
557 posix_lock_inode_remove(il->inode);
558 }
559 }
560
posix_lock_list(uint32_t inode,uint8_t * buff)561 uint32_t posix_lock_list(uint32_t inode,uint8_t *buff) {
562 inodelocks *il;
563 alock *al;
564 range *r;
565 uint32_t h;
566 uint32_t ret=0;
567
568 if (inode==0) {
569 for (h=0 ; h<POSIX_LOCK_INODE_HASHSIZE ; h++) {
570 for (il = inodehash[h] ; il ; il=il->next) {
571 for (al=il->active ; al ; al=al->next) {
572 for (r=al->ranges ; r ; r=r->next) {
573 if (buff==NULL) {
574 ret+=37;
575 } else {
576 put32bit(&buff,il->inode);
577 put32bit(&buff,al->sessionid);
578 put64bit(&buff,al->owner);
579 put32bit(&buff,al->pid);
580 put64bit(&buff,r->start);
581 put64bit(&buff,r->end);
582 switch (r->type) {
583 case POSIX_LOCK_RDLCK:
584 put8bit(&buff,1);
585 break;
586 case POSIX_LOCK_WRLCK:
587 put8bit(&buff,2);
588 break;
589 default:
590 put8bit(&buff,0);
591 }
592 }
593 }
594 }
595 }
596 }
597 } else {
598 il = posix_lock_inode_find(inode);
599 if (il!=NULL) {
600 for (al=il->active ; al ; al=al->next) {
601 for (r=al->ranges ; r ; r=r->next) {
602 if (buff==NULL) {
603 ret+=33;
604 } else {
605 put32bit(&buff,al->sessionid);
606 put64bit(&buff,al->owner);
607 put32bit(&buff,al->pid);
608 put64bit(&buff,r->start);
609 put64bit(&buff,r->end);
610 switch (r->type) {
611 case POSIX_LOCK_RDLCK:
612 put8bit(&buff,1);
613 break;
614 case POSIX_LOCK_WRLCK:
615 put8bit(&buff,2);
616 break;
617 default:
618 put8bit(&buff,0);
619 }
620 }
621 }
622 }
623 }
624 }
625 return ret;
626 }
627
posix_lock_mr_change(uint32_t inode,uint32_t sessionid,uint64_t owner,char cmd,uint64_t start,uint64_t end,uint32_t pid)628 uint8_t posix_lock_mr_change(uint32_t inode,uint32_t sessionid,uint64_t owner,char cmd,uint64_t start,uint64_t end,uint32_t pid) {
629 inodelocks *il;
630 uint8_t type;
631
632 if (cmd=='U' || cmd=='u') {
633 il = posix_lock_inode_find(inode);
634 if (il==NULL) {
635 return MFS_ERROR_MISMATCH;
636 }
637 type = POSIX_LOCK_UNLCK;
638 } else if (cmd=='R' || cmd=='r' || cmd=='S' || cmd=='s') {
639 il = posix_lock_inode_find(inode);
640 if (il==NULL) {
641 il = posix_lock_inode_new(inode);
642 }
643 type = POSIX_LOCK_RDLCK;
644 } else if (cmd=='W' || cmd=='w' || cmd=='E' || cmd=='e') {
645 il = posix_lock_inode_find(inode);
646 if (il==NULL) {
647 il = posix_lock_inode_new(inode);
648 }
649 type = POSIX_LOCK_WRLCK;
650 } else {
651 return MFS_ERROR_EINVAL;
652 }
653 if (type!=POSIX_LOCK_UNLCK && posix_lock_find_offensive_lock(il,sessionid,owner,type,start,end)) {
654 return MFS_ERROR_MISMATCH;
655 }
656 posix_lock_do_apply_lock(il,sessionid,owner,type,start,end,pid);
657 meta_version_inc();
658 return MFS_STATUS_OK;
659 }
660
661 #define POSIX_LOCK_REC_SIZE 37
662
posix_lock_store(bio * fd)663 uint8_t posix_lock_store(bio *fd) {
664 uint8_t storebuff[POSIX_LOCK_REC_SIZE];
665 uint8_t *ptr;
666 uint32_t h;
667 inodelocks *il;
668 alock *al;
669 range *r;
670
671 if (fd==NULL) {
672 return 0x10;
673 }
674 for (h=0 ; h<POSIX_LOCK_INODE_HASHSIZE ; h++) {
675 for (il = inodehash[h] ; il ; il=il->next) {
676 for (al=il->active ; al ; al=al->next) {
677 for (r=al->ranges ; r ; r=r->next) {
678 ptr = storebuff;
679 put32bit(&ptr,il->inode);
680 put64bit(&ptr,al->owner);
681 put32bit(&ptr,al->sessionid);
682 put32bit(&ptr,al->pid);
683 put64bit(&ptr,r->start);
684 put64bit(&ptr,r->end);
685 put8bit(&ptr,r->type);
686 if (bio_write(fd,storebuff,POSIX_LOCK_REC_SIZE)!=POSIX_LOCK_REC_SIZE) {
687 return 0xFF;
688 }
689 }
690 }
691 }
692 }
693 memset(storebuff,0,POSIX_LOCK_REC_SIZE);
694 if (bio_write(fd,storebuff,POSIX_LOCK_REC_SIZE)!=POSIX_LOCK_REC_SIZE) {
695 return 0xFF;
696 }
697 return 0;
698 }
699
posix_lock_load(bio * fd,uint8_t mver,uint8_t ignoreflag)700 int posix_lock_load(bio *fd,uint8_t mver,uint8_t ignoreflag) {
701 uint8_t loadbuff[POSIX_LOCK_REC_SIZE];
702 const uint8_t *ptr;
703 int32_t l;
704 uint32_t inode,lastinode,sessionid,lastsessionid,pid;
705 uint64_t owner,lastowner,start,end,lastend;
706 uint8_t type,lasttype;
707 uint8_t fino,fses;
708 inodelocks *il;
709 alock *al,**altail;
710 range *r,**rtail;
711
712 if (mver!=0x10) {
713 return -1;
714 }
715
716 fino = 1;
717 fses = 1;
718 lastinode = 0;
719 lastsessionid = 0;
720 lastowner = 0;
721 lasttype = 0; // make gcc happy
722 lastend = 0; // make gcc happy
723 il = NULL; // make gcc happy
724 al = NULL; // make gcc happy
725 r = NULL; // make gcc happy
726 altail = NULL; // make gcc happy
727 rtail = NULL; // make gcc happy
728 for (;;) {
729 l = bio_read(fd,loadbuff,POSIX_LOCK_REC_SIZE);
730 if (l!=POSIX_LOCK_REC_SIZE) {
731 return -1;
732 }
733 ptr = loadbuff;
734 inode = get32bit(&ptr);
735 owner = get64bit(&ptr);
736 sessionid = get32bit(&ptr);
737 pid = get32bit(&ptr);
738 start = get64bit(&ptr);
739 end = get64bit(&ptr);
740 type = get8bit(&ptr);
741 if (inode==0 && owner==0 && sessionid==0) {
742 return 0;
743 }
744 if (inode!=lastinode || sessionid!=lastsessionid || fino || fses) {
745 if (of_checknode(sessionid,inode)==0) {
746 if (ignoreflag) {
747 mfs_syslog(LOG_ERR,"loading posix_locks: lock on closed file !!! (ignoring)");
748 continue;
749 } else {
750 mfs_syslog(LOG_ERR,"loading posix_locks: lock on closed file !!!");
751 return -1;
752 }
753 }
754 }
755 // add lock
756 if (inode!=lastinode || fino) {
757 lastinode = inode;
758 lastsessionid = 0;
759 lastowner = 0;
760 fses = 1;
761 il = posix_lock_inode_find(inode);
762 if (il==NULL) {
763 il = posix_lock_inode_new(inode);
764 }
765 altail = &(il->active);
766 fino = 0;
767 }
768 if (sessionid!=lastsessionid || owner!=lastowner || fses) {
769 lastsessionid = sessionid;
770 lastowner = owner;
771 lastend = 0;
772 lasttype = POSIX_LOCK_UNLCK;
773 al = malloc(sizeof(alock));
774 passert(al);
775 al->owner = owner;
776 al->sessionid = sessionid;
777 al->pid = pid;
778 al->ranges = NULL;
779 al->next = NULL;
780 *altail = al;
781 altail = &(al->next);
782 rtail = &(al->ranges);
783 fses = 0;
784 }
785 if (lasttype!=POSIX_LOCK_UNLCK) {
786 if (start<lastend) {
787 if (ignoreflag) {
788 mfs_syslog(LOG_ERR,"loading posix_locks: lock range not in order !!! (ignoring)");
789 continue;
790 } else {
791 mfs_syslog(LOG_ERR,"loading posix_locks: lock range not in order !!!");
792 return -1;
793 }
794 }
795 if (type==lasttype && start==lastend) {
796 if (ignoreflag) {
797 mfs_syslog(LOG_ERR,"loading posix_locks: lock range not connected !!! (ignoring)");
798 continue;
799 } else {
800 mfs_syslog(LOG_ERR,"loading posix_locks: lock range not connected !!!");
801 return -1;
802 }
803 }
804 }
805 r = malloc(sizeof(range));
806 passert(r);
807 r->start = start;
808 r->end = end;
809 r->type = type;
810 r->next = NULL;
811 *rtail = r;
812 rtail = &(r->next);
813 lastend = end;
814 lasttype = type;
815 }
816 return 0; // unreachable
817 }
818
posix_lock_cleanup(void)819 void posix_lock_cleanup(void) {
820 uint32_t h;
821 inodelocks *il,*nil;
822 wlock *wl,*nwl;
823 alock *al,*nal;
824 range *r,*nr;
825
826 for (h=0 ; h<POSIX_LOCK_INODE_HASHSIZE ; h++) {
827 il = inodehash[h];
828 while (il) {
829 nil = il->next;
830 wl = il->waiting_head;
831 while (wl) {
832 nwl = wl->next;
833 free(wl);
834 wl = nwl;
835 }
836 al = il->active;
837 while (al) {
838 nal = al->next;
839 r = al->ranges;
840 while (r) {
841 nr = r->next;
842 free(r);
843 r = nr;
844 }
845 free(al);
846 al = nal;
847 }
848 free(il);
849 il = nil;
850 }
851 inodehash[h] = NULL;
852 }
853 }
854
posix_lock_init(void)855 int posix_lock_init(void) {
856 uint32_t i;
857 inodehash = malloc(sizeof(inodelocks*)*POSIX_LOCK_INODE_HASHSIZE);
858 passert(inodehash);
859 for (i=0 ; i<POSIX_LOCK_INODE_HASHSIZE ; i++) {
860 inodehash[i] = NULL;
861 }
862 return 0;
863 }
864
865 #endif
866
867 #ifdef MFSTEST
868
869 #include <stdio.h>
870
posix_lock_print_ranges(range * r)871 void posix_lock_print_ranges(range *r) {
872 uint64_t pos;
873 range *rm;
874 if (r) {
875 rm = r;
876 while (r) {
877 printf("%c:<%"PRIu64",%"PRIu64")%s",(r->type==POSIX_LOCK_RDLCK)?'R':(r->type==POSIX_LOCK_WRLCK)?'W':'?',r->start,r->end,(r->next!=NULL)?" ; ":"\n");
878 r = r->next;
879 }
880 r = rm;
881 for (pos=0 ; pos<260 ; pos++) {
882 while (r!=NULL && pos>=r->end) {
883 r = r->next;
884 }
885 if (r==NULL || pos<r->start) {
886 printf(".");
887 } else {
888 printf("%c",(r->type==POSIX_LOCK_RDLCK)?'o':(r->type==POSIX_LOCK_WRLCK)?'O':'?');
889 }
890 }
891 printf("\n");
892 } else {
893 printf("empty\n");
894 }
895 }
896
posix_lock_verbose_apply_range(range ** rptr,uint8_t type,uint64_t start,uint64_t end)897 void posix_lock_verbose_apply_range(range **rptr,uint8_t type,uint64_t start,uint64_t end) {
898 uint64_t pos;
899 printf(" + %c:<%"PRIu64",%"PRIu64")\n",(type==POSIX_LOCK_RDLCK)?'R':(type==POSIX_LOCK_WRLCK)?'W':(type==POSIX_LOCK_UNLCK)?'U':'?',start,end);
900 for (pos=0 ; pos<260 ; pos++) {
901 if (pos<start || pos>=end) {
902 printf("-");
903 } else {
904 printf("%c",(type==POSIX_LOCK_RDLCK)?'o':(type==POSIX_LOCK_WRLCK)?'O':'.');
905 }
906 }
907 printf("\n");
908 posix_lock_apply_range(rptr,type,start,end);
909 }
910
main(int argc,char ** argv)911 int main(int argc,char **argv) {
912 range *r;
913 r = NULL;
914
915 if (argc<=1) {
916 printf("usage: %s 1|2\n",argv[0]);
917 return 1;
918 }
919 if (argv[1][0]=='1') {
920 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,20,25);
921 posix_lock_print_ranges(r);
922 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,30,35);
923 posix_lock_print_ranges(r);
924 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,10,15);
925 posix_lock_print_ranges(r);
926 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,19,26);
927 posix_lock_print_ranges(r);
928 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,18,25);
929 posix_lock_print_ranges(r);
930 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,20,27);
931 posix_lock_print_ranges(r);
932 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,20,25);
933 posix_lock_print_ranges(r);
934 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,11,34);
935 posix_lock_print_ranges(r);
936 posix_lock_verbose_apply_range(&r,POSIX_LOCK_WRLCK,20,25);
937 posix_lock_print_ranges(r);
938 posix_lock_verbose_apply_range(&r,POSIX_LOCK_UNLCK,15,20);
939 posix_lock_print_ranges(r);
940 posix_lock_verbose_apply_range(&r,POSIX_LOCK_UNLCK,25,30);
941 posix_lock_print_ranges(r);
942 posix_lock_verbose_apply_range(&r,POSIX_LOCK_WRLCK,15,20);
943 posix_lock_print_ranges(r);
944 posix_lock_verbose_apply_range(&r,POSIX_LOCK_WRLCK,25,30);
945 posix_lock_print_ranges(r);
946 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,15,20);
947 posix_lock_print_ranges(r);
948 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,25,30);
949 posix_lock_print_ranges(r);
950 posix_lock_verbose_apply_range(&r,POSIX_LOCK_RDLCK,20,25);
951 posix_lock_print_ranges(r);
952 posix_lock_verbose_apply_range(&r,POSIX_LOCK_UNLCK,25,30);
953 posix_lock_print_ranges(r);
954 posix_lock_verbose_apply_range(&r,POSIX_LOCK_UNLCK,15,20);
955 posix_lock_print_ranges(r);
956 posix_lock_verbose_apply_range(&r,POSIX_LOCK_UNLCK,0,5);
957 posix_lock_print_ranges(r);
958 posix_lock_verbose_apply_range(&r,POSIX_LOCK_UNLCK,0,UINT64_MAX);
959 posix_lock_print_ranges(r);
960 }
961 if (argv[1][0]=='2') {
962 uint16_t x,start,end;
963 uint8_t type;
964 uint32_t i;
965 for (i=0 ; i<1000 ; i++) {
966 do {
967 start = random()%250;
968 end = random()%250;
969 } while (start==end);
970 if (start>end) {
971 x = start;
972 start = end;
973 end = x;
974 }
975 switch (random()&3) {
976 case 0:
977 type = POSIX_LOCK_RDLCK;
978 break;
979 case 1:
980 type = POSIX_LOCK_WRLCK;
981 break;
982 case 2:
983 if (r==NULL) {
984 type = POSIX_LOCK_RDLCK;
985 } else {
986 type = POSIX_LOCK_UNLCK;
987 }
988 break;
989 case 3:
990 if (r==NULL) {
991 type = POSIX_LOCK_WRLCK;
992 } else {
993 type = POSIX_LOCK_UNLCK;
994 }
995 break;
996 }
997 posix_lock_verbose_apply_range(&r,type,start,end);
998 posix_lock_print_ranges(r);
999 }
1000 posix_lock_verbose_apply_range(&r,POSIX_LOCK_UNLCK,0,UINT64_MAX);
1001 posix_lock_print_ranges(r);
1002 }
1003 }
1004 #endif
1005