1 /*
2  * Copyright (C) 2021 Jakub Kruszona-Zawadzki, Core Technology Sp. z o.o.
3  *
4  * This file is part of MooseFS.
5  *
6  * MooseFS is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, version 2 (only).
9  *
10  * MooseFS is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with MooseFS; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301, USA
18  * or visit http://www.gnu.org/licenses/gpl-2.0.html
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include <stdlib.h>
26 #include <inttypes.h>
27 
28 #include "MFSCommunication.h"
29 
30 #include "matoclserv.h"
31 #include "openfiles.h"
32 #include "metadata.h"
33 #include "main.h"
34 #include "changelog.h"
35 #include "datapack.h"
36 #include "bio.h"
37 #include "slogger.h"
38 #include "massert.h"
39 
40 #define MODE_CORRECT 0
41 #define MODE_BSD 1
42 #define MODE_LINUX 2
43 
44 #define STATE_WAITING 0
45 #define STATE_ACTIVE 1
46 
47 #define LTYPE_READER 0
48 #define LTYPE_WRITER 1
49 
50 typedef struct _instance {
51 	uint32_t msgid;
52 	uint32_t reqid; // for interruption only (valid in waiting_* queues)
53 	struct _instance *next;
54 } instance;
55 
56 typedef struct _lock {
57 	uint64_t owner;
58 	uint32_t sessionid;
59 	uint8_t state;
60 	uint8_t ltype;
61 	instance *lock_instances;
62 	struct _inodelocks *parent;
63 	struct _lock *next,**prev;
64 } lock;
65 
66 typedef struct _inodelocks {
67 	uint32_t inode;
68 	lock *active;
69 	lock *waiting_head,**waiting_tail;
70 	struct _inodelocks *next;
71 } inodelocks;
72 
73 #define FLOCK_INODE_HASHSIZE 1024
74 
75 #define FLOCK_INODE_HASH(inode) (((inode)*0x738A2379)%(FLOCK_INODE_HASHSIZE))
76 
77 static inodelocks **inodehash;
78 
79 static uint8_t FlocksMode;
80 
81 #if 0
82 static inline void flock_dump(void) {
83 	uint32_t h;
84 	inodelocks *il;
85 	lock *l,**lptr;
86 	instance *i;
87 	syslog(LOG_NOTICE,"flock dump:");
88 	for (h=0 ; h<FLOCK_INODE_HASHSIZE ; h++) {
89 		for (il = inodehash[h] ; il ; il=il->next) {
90 			syslog(LOG_NOTICE,"  inode: %"PRIu32" (active:%s,waiting:%s)",il->inode,il->active?"yes":"no",il->waiting_head?"yes":"no");
91 			lptr = &(il->active);
92 			while ((l=*lptr)) {
93 				syslog(LOG_NOTICE,"    active lock: session:%"PRIu32",owner:%"PRIu64",type:%s",l->sessionid,l->owner,l->ltype==LTYPE_READER?"R":"W");
94 				if (l->state!=STATE_ACTIVE) {
95 					syslog(LOG_WARNING,"      wrong state !!!");
96 				}
97 				if (l->prev != lptr) {
98 					syslog(LOG_WARNING,"      wrong prev pointer !!!");
99 				}
100 				if (l->lock_instances) {
101 					syslog(LOG_WARNING,"      active lock with waiting processes !!!");
102 				}
103 				lptr = &(l->next);
104 			}
105 			lptr = &(il->waiting_head);
106 			while ((l=*lptr)) {
107 				syslog(LOG_NOTICE,"    waiting lock: session:%"PRIu32",owner:%"PRIu64",type:%s",l->sessionid,l->owner,l->ltype==LTYPE_READER?"R":"W");
108 				if (l->state!=STATE_WAITING) {
109 					syslog(LOG_WARNING,"      wrong state !!!");
110 				}
111 				if (l->prev != lptr) {
112 					syslog(LOG_WARNING,"      wrong prev pointer !!!");
113 				}
114 				for (i = l->lock_instances ; i ; i=i->next) {
115 					syslog(LOG_NOTICE,"      waiting process reqid: %"PRIu32,i->reqid);
116 				}
117 				lptr = &(l->next);
118 			}
119 			if (il->waiting_tail != lptr) {
120 				syslog(LOG_WARNING,"    wrong tail pointer !!!");
121 			}
122 		}
123 	}
124 }
125 #endif
126 
flock_inode_find(uint32_t inode)127 static inline inodelocks* flock_inode_find(uint32_t inode) {
128 	inodelocks *il;
129 
130 	for (il = inodehash[FLOCK_INODE_HASH(inode)] ; il ; il=il->next) {
131 		if (il->inode==inode) {
132 			return il;
133 		}
134 	}
135 	return NULL;
136 }
137 
flock_inode_new(uint32_t inode)138 static inline inodelocks* flock_inode_new(uint32_t inode) {
139 	inodelocks *il;
140 	uint32_t hash;
141 
142 	il = malloc(sizeof(inodelocks));
143 	passert(il);
144 	il->inode = inode;
145 	il->active = NULL;
146 	il->waiting_head = NULL;
147 	il->waiting_tail = &(il->waiting_head);
148 	hash = FLOCK_INODE_HASH(inode);
149 	il->next = inodehash[hash];
150 	inodehash[hash] = il;
151 	return il;
152 }
153 
flock_inode_remove(uint32_t inode)154 static inline void flock_inode_remove(uint32_t inode) {
155 	inodelocks *il,**ilp;
156 	uint32_t hash;
157 
158 	hash = FLOCK_INODE_HASH(inode);
159 	ilp = inodehash + hash;
160 	while ((il=*ilp)) {
161 		if (il->inode==inode) {
162 			massert(il->active==NULL && il->waiting_head==NULL,"inode flock record not empty !!!");
163 			*ilp = il->next;
164 			free(il);
165 		} else {
166 			ilp = &(il->next);
167 		}
168 	}
169 }
170 
flock_lock_inode_detach(lock * l)171 static inline void flock_lock_inode_detach(lock *l) {
172 	if (l->next) {
173 		l->next->prev = l->prev;
174 	} else {
175 		if (l->state==STATE_WAITING) {
176 			l->parent->waiting_tail = l->prev;
177 		}
178 	}
179 	*(l->prev) = l->next;
180 }
181 
flock_do_lock_inode_attach(lock * l)182 static inline void flock_do_lock_inode_attach(lock *l) {
183 	if (l->state==STATE_WAITING) {
184 		l->next = NULL;
185 		l->prev = l->parent->waiting_tail;
186 		*(l->parent->waiting_tail) = l;
187 		l->parent->waiting_tail = &(l->next);
188 	} else {
189 		l->next = l->parent->active;
190 		if (l->next) {
191 			l->next->prev = &(l->next);
192 		}
193 		l->prev = &(l->parent->active);
194 		l->parent->active = l;
195 	}
196 }
197 
flock_lock_inode_attach(lock * l)198 static inline void flock_lock_inode_attach(lock *l) {
199 	if (l->state==STATE_ACTIVE) {
200 		changelog("%"PRIu32"|FLOCK(%"PRIu32",%"PRIu32",%"PRIu64",%c)",main_time(),l->parent->inode,l->sessionid,l->owner,l->ltype==LTYPE_READER?'R':'W');
201 	}
202 	flock_do_lock_inode_attach(l);
203 }
204 
flock_lock_wake_up_one(lock * l,uint32_t reqid,uint8_t status)205 static inline void flock_lock_wake_up_one(lock *l,uint32_t reqid,uint8_t status) {
206 	instance *i,**iptr;
207 	iptr = &(l->lock_instances);
208 	while ((i=*iptr)) {
209 		if (i->reqid==reqid) {
210 			matoclserv_fuse_flock_wake_up(l->sessionid,i->msgid,status);
211 			*iptr = i->next;
212 			free(i);
213 		} else {
214 			iptr = &(i->next);
215 		}
216 	}
217 }
218 
flock_lock_wake_up_all(lock * l,uint8_t status)219 static inline void flock_lock_wake_up_all(lock *l,uint8_t status) {
220 	instance *i,*ni;
221 	i = l->lock_instances;
222 	while (i) {
223 		ni = i->next;
224 		matoclserv_fuse_flock_wake_up(l->sessionid,i->msgid,status);
225 		free(i);
226 		i = ni;
227 	}
228 	l->lock_instances=NULL;
229 }
230 
flock_lock_append_req(lock * l,uint32_t msgid,uint32_t reqid)231 static inline void flock_lock_append_req(lock *l,uint32_t msgid,uint32_t reqid) {
232 	instance *i;
233 
234 	for (i = l->lock_instances ; i ; i=i->next) {
235 		if (i->reqid==reqid) {
236 			i->msgid = msgid;
237 			return;
238 		}
239 	}
240 	i = malloc(sizeof(instance));
241 	passert(i);
242 	i->msgid = msgid;
243 	i->reqid = reqid;
244 	i->next = l->lock_instances;
245 	l->lock_instances = i;
246 }
247 
flock_do_lock_remove(lock * l)248 static inline void flock_do_lock_remove(lock *l) {
249 	instance *i,*ni;
250 	i=l->lock_instances;
251 	while (i) {
252 		ni = i->next;
253 		free(i);
254 		i = ni;
255 	}
256 	if (l->next) {
257 		l->next->prev = l->prev;
258 	} else {
259 		if (l->state==STATE_WAITING) {
260 			l->parent->waiting_tail = l->prev;
261 		}
262 	}
263 	*(l->prev) = l->next;
264 	free(l);
265 }
266 
flock_lock_remove(lock * l)267 static inline void flock_lock_remove(lock *l) {
268 	if (l->state==STATE_ACTIVE) {
269 		changelog("%"PRIu32"|FLOCK(%"PRIu32",%"PRIu32",%"PRIu64",U)",main_time(),l->parent->inode,l->sessionid,l->owner);
270 	}
271 	flock_do_lock_remove(l);
272 }
273 
flock_check(inodelocks * il,uint8_t ltype)274 static inline uint8_t flock_check(inodelocks *il,uint8_t ltype) {
275 	if (ltype==LTYPE_READER) {
276 		if (il->active!=NULL && il->active->ltype==LTYPE_WRITER) {
277 			return 1;
278 		}
279 		if (FlocksMode==MODE_CORRECT) {
280 // additional condition for classic readers/writers algorithm (not seen in any
281 // tested OS) - reader should wait when there are other waiting lock's (even if
282 // currently acquired lock(s) is a reader lock) - it avoids writer starvation
283 			if (il->waiting_head!=NULL) {
284 				return 1;
285 			}
286 		}
287 	} else {
288 		if (il->active!=NULL) {
289 			return 1;
290 		}
291 	}
292 	return 0;
293 }
294 
flock_lock_new(inodelocks * il,uint8_t ltype,uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint64_t owner)295 static inline uint8_t flock_lock_new(inodelocks *il,uint8_t ltype,uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint64_t owner) {
296 	lock *l;
297 	l = malloc(sizeof(lock));
298 	l->owner = owner;
299 	l->sessionid = sessionid;
300 	l->state = STATE_ACTIVE;
301 	l->ltype = ltype;
302 	l->lock_instances = NULL;
303 	l->parent = il;
304 	l->next = NULL;
305 	l->prev = NULL;
306 	if (flock_check(il,ltype)) {
307 		l->state = STATE_WAITING;
308 		flock_lock_append_req(l,msgid,reqid);
309 		flock_lock_inode_attach(l);
310 		return MFS_ERROR_WAITING;
311 	}
312 	flock_lock_inode_attach(l);
313 	return MFS_STATUS_OK;
314 }
315 
flock_lock_check_waiting(inodelocks * il)316 static inline void flock_lock_check_waiting(inodelocks *il) {
317 	lock *l,*nl;
318 	l = il->waiting_head;
319 	if (l==NULL) {
320 		return;
321 	}
322 	if (il->active==NULL && l->ltype==LTYPE_WRITER) {
323 		flock_lock_inode_detach(l);
324 		l->state = STATE_ACTIVE;
325 		flock_lock_inode_attach(l);
326 		flock_lock_wake_up_all(l,MFS_STATUS_OK);
327 	}
328 	if (il->active==NULL || il->active->ltype==LTYPE_READER) {
329 		if (FlocksMode==MODE_LINUX) {
330 			while (l) {
331 				nl = l->next;
332 				if (l->ltype==LTYPE_READER) {
333 					flock_lock_inode_detach(l);
334 					l->state = STATE_ACTIVE;
335 					flock_lock_inode_attach(l);
336 					flock_lock_wake_up_all(l,MFS_STATUS_OK);
337 				}
338 				l = nl;
339 			}
340 		} else { // FreeBSD, OSX, Classic readers/writers algorithm
341 			while (l && l->ltype==LTYPE_READER) {
342 				nl = l->next;
343 				flock_lock_inode_detach(l);
344 				l->state = STATE_ACTIVE;
345 				flock_lock_inode_attach(l);
346 				flock_lock_wake_up_all(l,MFS_STATUS_OK);
347 				l = nl;
348 			}
349 		}
350 	}
351 }
352 
flock_lock_unlock(inodelocks * il,lock * l)353 static inline void flock_lock_unlock(inodelocks *il,lock *l) {
354 	massert(il==l->parent,"flock data structures mismatch");
355 
356 	flock_lock_remove(l);
357 
358 	if (il->active==NULL && il->waiting_head!=NULL) {
359 		flock_lock_check_waiting(il);
360 	}
361 }
362 
flock_locks_cmd(uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint32_t inode,uint64_t owner,uint8_t op)363 uint8_t flock_locks_cmd(uint32_t sessionid,uint32_t msgid,uint32_t reqid,uint32_t inode,uint64_t owner,uint8_t op) {
364 	inodelocks *il;
365 	lock *l,*nl;
366 	uint8_t ltype;
367 
368 //	flock_dump();
369 //	syslog(LOG_NOTICE,"flock op: sessionid:%"PRIu32",msgid:%"PRIu32",reqid:%"PRIu32",inode:%"PRIu32",owner:%"PRIu64",op:%u",sessionid,msgid,reqid,inode,owner,op);
370 
371 	if (op!=FLOCK_INTERRUPT && op!=FLOCK_RELEASE) {
372 		if (of_checknode(sessionid,inode)==0) {
373 			return MFS_ERROR_NOTOPENED;
374 		}
375 	}
376 	il = flock_inode_find(inode);
377 	if (il==NULL) {
378 		if (op==FLOCK_UNLOCK || op==FLOCK_INTERRUPT || op==FLOCK_RELEASE) {
379 			return MFS_STATUS_OK;
380 		}
381 		il = flock_inode_new(inode);
382 	}
383 	if (op==FLOCK_INTERRUPT) {
384 		l = il->waiting_head;
385 		while (l) {
386 			nl = l->next;
387 			if (l->sessionid==sessionid && l->owner==owner) {
388 				flock_lock_wake_up_one(l,reqid,MFS_ERROR_EINTR);
389 				if (l->lock_instances==NULL) { // remove
390 					flock_lock_remove(l);
391 				}
392 			}
393 			l = nl;
394 		}
395 		return MFS_STATUS_OK;
396 	}
397 	for (l=il->active ; l ; l=l->next) {
398 		if (l->sessionid==sessionid && l->owner==owner) {
399 			if (op==FLOCK_UNLOCK || op==FLOCK_RELEASE) {
400 				flock_lock_unlock(il,l);
401 				if (il->waiting_head==NULL && il->active==NULL) {
402 					flock_inode_remove(il->inode);
403 				}
404 				return MFS_STATUS_OK;
405 			} else if (op==FLOCK_TRY_SHARED) {
406 				if (l->ltype==LTYPE_READER) {
407 					return MFS_STATUS_OK;
408 				} else { // l->ltype==LTYPE_WRITER
409 					l->ltype=LTYPE_READER;
410 					flock_lock_check_waiting(il);
411 					return MFS_STATUS_OK;
412 				}
413 			} else if (op==FLOCK_LOCK_SHARED) {
414 				if (l->ltype==LTYPE_READER) {
415 					return MFS_STATUS_OK;
416 				} else { // l->ltype==LTYPE_WRITER
417 					flock_lock_unlock(il,l);
418 					return flock_lock_new(il,LTYPE_READER,sessionid,msgid,reqid,owner);
419 				}
420 			} else if (op==FLOCK_TRY_EXCLUSIVE) {
421 				if (l->ltype==LTYPE_WRITER) {
422 					return MFS_STATUS_OK;
423 				} else { // l->ltype==LTYPE_READER
424 					if (il->active->next==NULL) { // this lock is the only one
425 						l->ltype=LTYPE_WRITER;
426 						return MFS_STATUS_OK;
427 					}
428 					return MFS_ERROR_EAGAIN;
429 				}
430 			} else if (op==FLOCK_LOCK_EXCLUSIVE) {
431 				if (l->ltype==LTYPE_WRITER) {
432 					return MFS_STATUS_OK;
433 				} else { // l->ltype==LTYPE_READER
434 					flock_lock_unlock(il,l);
435 					return flock_lock_new(il,LTYPE_WRITER,sessionid,msgid,reqid,owner);
436 				}
437 			}
438 			return MFS_ERROR_EINVAL;
439 		}
440 	}
441 	for (l=il->waiting_head ; l ; l=l->next) {
442 		if (l->sessionid==sessionid && l->owner==owner) {
443 			if (op==FLOCK_RELEASE) {
444 				flock_lock_wake_up_all(l,MFS_ERROR_ECANCELED);
445 				flock_lock_remove(l);
446 				return MFS_STATUS_OK;
447 			} else if (op==FLOCK_UNLOCK) {
448 				if (FlocksMode==MODE_CORRECT) {
449 					// logically this call should do this:
450 					flock_lock_wake_up_all(l,MFS_ERROR_ECANCELED);
451 					flock_lock_remove(l);
452 				}
453 				// but in all tested kernels it was just ignored
454 				return MFS_STATUS_OK;
455 			} else if (op==FLOCK_TRY_SHARED || op==FLOCK_TRY_EXCLUSIVE) {
456 				return MFS_ERROR_EAGAIN;
457 			} else if (op==FLOCK_LOCK_SHARED) {
458 				if (l->ltype==LTYPE_READER) {
459 					flock_lock_append_req(l,msgid,reqid);
460 					return MFS_ERROR_WAITING;
461 				} else {
462 //					return MFS_ERROR_EINVAL;
463 					flock_lock_wake_up_all(l,MFS_ERROR_ECANCELED);
464 					l->ltype=LTYPE_READER;
465 					flock_lock_append_req(l,msgid,reqid);
466 					return MFS_ERROR_WAITING;
467 				}
468 			} else if (op==FLOCK_LOCK_EXCLUSIVE) {
469 				if (l->ltype==LTYPE_WRITER) {
470 					flock_lock_append_req(l,msgid,reqid);
471 					return MFS_ERROR_WAITING;
472 				} else {
473 //					return MFS_ERROR_EINVAL;
474 					flock_lock_wake_up_all(l,MFS_ERROR_ECANCELED);
475 					l->ltype=LTYPE_WRITER;
476 					flock_lock_append_req(l,msgid,reqid);
477 					return MFS_ERROR_WAITING;
478 				}
479 			}
480 			return MFS_ERROR_EINVAL;
481 		}
482 	}
483 	if (op==FLOCK_UNLOCK || op==FLOCK_RELEASE) {
484 		return MFS_STATUS_OK;
485 	}
486 	ltype = (op==FLOCK_TRY_SHARED || op==FLOCK_LOCK_SHARED)?LTYPE_READER:LTYPE_WRITER;
487 	if (op==FLOCK_TRY_SHARED || op==FLOCK_TRY_EXCLUSIVE) {
488 		if (flock_check(il,ltype)) {
489 			return MFS_ERROR_EAGAIN;
490 		}
491 	}
492 	return flock_lock_new(il,ltype,sessionid,msgid,reqid,owner);
493 }
494 
flock_file_closed(uint32_t sessionid,uint32_t inode)495 void flock_file_closed(uint32_t sessionid,uint32_t inode) {
496 	inodelocks *il;
497 	lock *l,*nl;
498 
499 	il = flock_inode_find(inode);
500 	if (il==NULL) {
501 		return;
502 	}
503 
504 	l = il->waiting_head;
505 	while (l) {
506 		nl = l->next;
507 		if (l->sessionid==sessionid) {
508 			flock_lock_remove(l);
509 		}
510 		l = nl;
511 	}
512 
513 	l = il->active;
514 	while (l) {
515 		nl = l->next;
516 		if (l->sessionid==sessionid) {
517 			flock_lock_unlock(il,l);
518 		}
519 		l = nl;
520 	}
521 	if (il->waiting_head==NULL && il->active==NULL) {
522 		flock_inode_remove(il->inode);
523 	}
524 }
525 
flock_list(uint32_t inode,uint8_t * buff)526 uint32_t flock_list(uint32_t inode,uint8_t *buff) {
527 	inodelocks *il;
528 	lock *l;
529 	uint32_t h;
530 	uint32_t ret=0;
531 
532 	if (inode==0) {
533 		for (h=0 ; h<FLOCK_INODE_HASHSIZE ; h++) {
534 			for (il = inodehash[h] ; il ; il=il->next) {
535 				for (l=il->active ; l ; l=l->next) {
536 					if (buff==NULL) {
537 						ret+=37;
538 					} else {
539 						put32bit(&buff,il->inode);
540 						put32bit(&buff,l->sessionid);
541 						put64bit(&buff,l->owner);
542 						memset(buff,0,20); // pid,start,end
543 						buff+=20;
544 						switch (l->ltype) {
545 							case LTYPE_READER:
546 								put8bit(&buff,1);
547 								break;
548 							case LTYPE_WRITER:
549 								put8bit(&buff,2);
550 								break;
551 							default:
552 								put8bit(&buff,0);
553 						}
554 					}
555 				}
556 			}
557 		}
558 	} else {
559 		il = flock_inode_find(inode);
560 		if (il!=NULL) {
561 			for (l=il->active ; l ; l=l->next) {
562 				if (buff==NULL) {
563 					ret+=33;
564 				} else {
565 					put32bit(&buff,l->sessionid);
566 					put64bit(&buff,l->owner);
567 					memset(buff,0,20); // pid,start,end
568 					buff+=20;
569 					switch (l->ltype) {
570 						case LTYPE_READER:
571 							put8bit(&buff,1);
572 							break;
573 						case LTYPE_WRITER:
574 							put8bit(&buff,2);
575 							break;
576 						default:
577 							put8bit(&buff,0);
578 					}
579 				}
580 			}
581 		}
582 	}
583 	return ret;
584 }
585 
flock_mr_change(uint32_t inode,uint32_t sessionid,uint64_t owner,char cmd)586 uint8_t flock_mr_change(uint32_t inode,uint32_t sessionid,uint64_t owner,char cmd) {
587 	inodelocks *il;
588 	lock *l,*nl;
589 	uint8_t ltype;
590 
591 	if (cmd=='U' || cmd=='u') {
592 		il = flock_inode_find(inode);
593 		if (il==NULL) {
594 			return MFS_ERROR_MISMATCH;
595 		}
596 		l=il->active;
597 		while (l) {
598 			nl = l->next;
599 			if (l->sessionid==sessionid && l->owner==owner) {
600 				flock_do_lock_remove(l);
601 				meta_version_inc();
602 			}
603 			l = nl;
604 		}
605 		if (il->waiting_head==NULL && il->active==NULL) {
606 			flock_inode_remove(il->inode);
607 		}
608 		return MFS_STATUS_OK;
609 	} else if (cmd=='R' || cmd=='r' || cmd=='S' || cmd=='s') {
610 		ltype = LTYPE_READER;
611 	} else if (cmd=='W' || cmd=='w' || cmd=='E' || cmd=='e') {
612 		ltype = LTYPE_WRITER;
613 	} else {
614 		return MFS_ERROR_EINVAL;
615 	}
616 	il = flock_inode_find(inode);
617 	if (il==NULL) {
618 		il = flock_inode_new(inode);
619 	}
620 	if (il->active!=NULL && (il->active->ltype==LTYPE_WRITER || ltype==LTYPE_WRITER)) {
621 		return MFS_ERROR_MISMATCH;
622 	}
623 	l = malloc(sizeof(lock));
624 	l->owner = owner;
625 	l->sessionid = sessionid;
626 	l->state = STATE_ACTIVE;
627 	l->ltype = ltype;
628 	l->lock_instances = NULL;
629 	l->parent = il;
630 	l->next = NULL;
631 	l->prev = NULL;
632 	flock_do_lock_inode_attach(l);
633 	meta_version_inc();
634 	return MFS_STATUS_OK;
635 }
636 
637 #define FLOCK_REC_SIZE 17
638 
flock_store(bio * fd)639 uint8_t flock_store(bio *fd) {
640 	uint8_t storebuff[FLOCK_REC_SIZE];
641 	uint8_t *ptr;
642 	uint32_t h;
643 	inodelocks *il;
644 	lock *l;
645 
646 	if (fd==NULL) {
647 		return 0x10;
648 	}
649 	for (h=0 ; h<FLOCK_INODE_HASHSIZE ; h++) {
650 		for (il = inodehash[h] ; il ; il=il->next) {
651 			for (l=il->active ; l ; l=l->next) {
652 				ptr = storebuff;
653 				put32bit(&ptr,il->inode);
654 				put64bit(&ptr,l->owner);
655 				put32bit(&ptr,l->sessionid);
656 				put8bit(&ptr,l->ltype);
657 				if (bio_write(fd,storebuff,FLOCK_REC_SIZE)!=FLOCK_REC_SIZE) {
658 					return 0xFF;
659 				}
660 			}
661 		}
662 	}
663 	memset(storebuff,0,FLOCK_REC_SIZE);
664 	if (bio_write(fd,storebuff,FLOCK_REC_SIZE)!=FLOCK_REC_SIZE) {
665 		return 0xFF;
666 	}
667 	return 0;
668 }
669 
flock_load(bio * fd,uint8_t mver,uint8_t ignoreflag)670 int flock_load(bio *fd,uint8_t mver,uint8_t ignoreflag) {
671 	uint8_t loadbuff[FLOCK_REC_SIZE];
672 	const uint8_t *ptr;
673 	int32_t r;
674 	uint32_t inode,sessionid;
675 	uint64_t owner;
676 	uint8_t ltype;
677 	inodelocks *il;
678 	lock *l;
679 
680 	if (mver!=0x10) {
681 		return -1;
682 	}
683 
684 	for (;;) {
685 		r = bio_read(fd,loadbuff,FLOCK_REC_SIZE);
686 		if (r!=FLOCK_REC_SIZE) {
687 			return -1;
688 		}
689 		ptr = loadbuff;
690 		inode = get32bit(&ptr);
691 		owner = get64bit(&ptr);
692 		sessionid = get32bit(&ptr);
693 		ltype = get8bit(&ptr);
694 		if (inode==0 && owner==0 && sessionid==0) {
695 			return 0;
696 		}
697 		if (of_checknode(sessionid,inode)==0) {
698 			if (ignoreflag) {
699 				mfs_syslog(LOG_ERR,"loading flock_locks: lock on closed file !!! (ignoring)");
700 				continue;
701 			} else {
702 				mfs_syslog(LOG_ERR,"loading flock_locks: lock on closed file !!!");
703 				return -1;
704 			}
705 		}
706 		// add lock
707 		il = flock_inode_find(inode);
708 		if (il==NULL) {
709 			il = flock_inode_new(inode);
710 		}
711 		if (il->active!=NULL && (il->active->ltype==LTYPE_WRITER || ltype==LTYPE_WRITER)) {
712 			if (ignoreflag) {
713 				mfs_syslog(LOG_ERR,"loading flock_locks: wrong lock !!! (ignoring)");
714 				continue;
715 			} else {
716 				mfs_syslog(LOG_ERR,"loading flock_locks: wrong lock !!!");
717 				return -1;
718 			}
719 		}
720 		l = malloc(sizeof(lock));
721 		l->owner = owner;
722 		l->sessionid = sessionid;
723 		l->state = STATE_ACTIVE;
724 		l->ltype = ltype;
725 		l->lock_instances = NULL;
726 		l->parent = il;
727 		l->next = NULL;
728 		l->prev = NULL;
729 		flock_do_lock_inode_attach(l);
730 	}
731 	return 0; // unreachable
732 }
733 
flock_cleanup(void)734 void flock_cleanup(void) {
735 	uint32_t h,j;
736 	inodelocks *il,*nil;
737 	lock *l,*nl;
738 	instance *i,*ni;
739 	for (h=0 ; h<FLOCK_INODE_HASHSIZE ; h++) {
740 		il = inodehash[h];
741 		while (il) {
742 			nil = il->next;
743 			for (j=0 ; j<2 ; j++) {
744 				l = j?il->active:il->waiting_head;
745 				while (l) {
746 					nl = l->next;
747 					i = l->lock_instances;
748 					while (i) {
749 						ni = i->next;
750 						free(i);
751 						i = ni;
752 					}
753 					free(l);
754 					l = nl;
755 				}
756 			}
757 			free(il);
758 			il = nil;
759 		}
760 		inodehash[h] = NULL;
761 	}
762 }
763 
flock_init(void)764 int flock_init(void) {
765 	uint32_t i;
766 	inodehash = malloc(sizeof(inodelocks*)*FLOCK_INODE_HASHSIZE);
767 	for (i=0 ; i<FLOCK_INODE_HASHSIZE ; i++) {
768 		inodehash[i] = NULL;
769 	}
770 	FlocksMode = 0;
771 	return 0;
772 }
773