1 /*
2 * Copyright (C) 2021 Jakub Kruszona-Zawadzki, Core Technology Sp. z o.o.
3 *
4 * This file is part of MooseFS.
5 *
6 * MooseFS is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, version 2 (only).
9 *
10 * MooseFS is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with MooseFS; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301, USA
18 * or visit http://www.gnu.org/licenses/gpl-2.0.html
19 */
20
21 // patch for old osxfuse
22 //#if defined(__APPLE__)
23 //# if ! defined(__DARWIN_64_BIT_INO_T) && ! defined(_DARWIN_USE_64_BIT_INODE)
24 //# define __DARWIN_64_BIT_INO_T 0
25 //# endif
26 //#endif
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "fusecommon.h"
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <fcntl.h>
39 #include <unistd.h>
40 #include <assert.h>
41 #include <syslog.h>
42 #include <inttypes.h>
43 #include <pthread.h>
44 #ifdef HAVE_SYS_XATTR_H
45 #include <sys/xattr.h>
46 #endif
47 #ifdef HAVE_ATTR_XATTR_H
48 #include <attr/xattr.h>
49 #endif
50 #ifdef HAVE_SYS_FILE_H
51 #include <sys/file.h>
52 #endif
53
54 #include "stats.h"
55 #include "oplog.h"
56 #include "datapack.h"
57 #include "clocks.h"
58 #include "portable.h"
59 #include "mastercomm.h"
60 #include "masterproxy.h"
61 #include "getgroups.h"
62 #include "readdata.h"
63 #include "writedata.h"
64 #include "truncate.h"
65 #include "massert.h"
66 #include "strerr.h"
67 #include "mfsalloc.h"
68 #include "lwthread.h"
69 #include "MFSCommunication.h"
70
71 #include "mfsmount.h"
72 #include "sustained_stats.h"
73 #include "sustained_parents.h"
74 #include "chunksdatacache.h"
75 #include "dirattrcache.h"
76 #include "symlinkcache.h"
77 #include "negentrycache.h"
78 #include "xattrcache.h"
79 #include "fdcache.h"
80 #include "inoleng.h"
81 #if defined(__linux__) && (FUSE_VERSION >= 28)
82 #include "dentry_invalidator.h"
83 #define DENTRY_INVALIDATOR 1
84 #endif
85
86 #if MFS_ROOT_ID != FUSE_ROOT_ID
87 #error FUSE_ROOT_ID is not equal to MFS_ROOT_ID
88 #endif
89
90 /* check for well known constants and define them if necessary */
91 #ifndef XATTR_CREATE
92 #define XATTR_CREATE 1
93 #endif
94 #ifndef XATTR_REPLACE
95 #define XATTR_REPLACE 2
96 #endif
97 #ifndef LOCK_SH
98 #define LOCK_SH 1
99 #endif
100 #ifndef LOCK_EX
101 #define LOCK_EX 2
102 #endif
103 #ifndef LOCK_NB
104 #define LOCK_NB 4
105 #endif
106 #ifndef LOCK_UN
107 #define LOCK_UN 8
108 #endif
109
110 #if defined(__FreeBSD__)
111 static int freebsd_workarounds = 1;
112 // workaround for bug in FreeBSD Fuse version (kernel part)
113 # define FREEBSD_DELAYED_RELEASE 1
114 # define FREEBSD_RELEASE_DELAY 10.0
115 # define FREEBSD_XONLY_ACCESS 1
116 #endif
117
118 #define RANDOM_BUFFSIZE 0x100000
119
120 #define READDIR_BUFFSIZE 50000
121
122 #define MAX_FILE_SIZE (int64_t)(MFS_MAX_FILE_SIZE)
123
124 #define PKGVERSION ((VERSMAJ)*1000000+(VERSMID)*10000+((VERSMIN)>>1)*100+(RELEASE))
125
126 #define MASTERINFO_WITH_VERSION 1
127
128 #define MASTERINFO_NAME ".masterinfo"
129 #define MASTERINFO_INODE 0x7FFFFFFF
130 // 0x0124 == 0b100100100 == 0444
131 #ifdef MASTERINFO_WITH_VERSION
132 static uint8_t masterinfoattr[ATTR_RECORD_SIZE]={0, (TYPE_FILE << 4) | 0x01,0x24, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,0,0,0,14, 0};
133 #else
134 static uint8_t masterinfoattr[ATTR_RECORD_SIZE]={0, (TYPE_FILE << 4) | 0x01,0x24, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,0,0,0,10, 0};
135 #endif
136
137 #define STATS_NAME ".stats"
138 #define STATS_INODE 0x7FFFFFF0
139 // 0x01A4 == 0b110100100 == 0644
140 static uint8_t statsattr[ATTR_RECORD_SIZE]={0, (TYPE_FILE << 4) | 0x01,0xA4, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,0,0,0,0, 0};
141
142 #define OPLOG_NAME ".oplog"
143 #define OPLOG_INODE 0x7FFFFFF1
144 #define OPHISTORY_NAME ".ophistory"
145 #define OPHISTORY_INODE 0x7FFFFFF2
146 // 0x0100 == 0b100000000 == 0400
147 static uint8_t oplogattr[ATTR_RECORD_SIZE]={0, (TYPE_FILE << 4) | 0x01,0x00, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,0,0,0,0, 0};
148
149 #define MOOSE_NAME ".mooseart"
150 #define MOOSE_INODE 0x7FFFFFF3
151 // 0x01A4 == 0b110100100 == 0644
152 static uint8_t mooseattr[ATTR_RECORD_SIZE]={0, (TYPE_FILE << 4) | 0x01,0xA4, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,0,0,0,0, 0};
153
154 #define RANDOM_NAME ".random"
155 #define RANDOM_INODE 0x7FFFFFF4
156 // 0x0124 == 0b100100100 == 0444
157 static uint8_t randomattr[ATTR_RECORD_SIZE]={0, (TYPE_FILE << 4) | 0x01,0x24, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,0,0,0,0, 0};
158
159 #define PARAMS_NAME ".params"
160 #define PARAMS_INODE 0x7FFFFFF5
161 // 0x0124 == 0b100100100 == 0400
162 static uint8_t paramsattr[ATTR_RECORD_SIZE]={0, (TYPE_FILE << 4) | 0x01,0x00, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,0,0,0,0, 0};
163 #define PARAMS_BUFFSIZE 10000
164
165 #define MIN_SPECIAL_INODE 0x7FFFFFF0
166 #define IS_SPECIAL_INODE(ino) ((ino)>=MIN_SPECIAL_INODE)
167 #define IS_SPECIAL_NAME(name) ((name)[0]=='.' && (strcmp(STATS_NAME,(name))==0 || strcmp(MASTERINFO_NAME,(name))==0 || strcmp(OPLOG_NAME,(name))==0 || strcmp(OPHISTORY_NAME,(name))==0 || strcmp(MOOSE_NAME,(name))==0 || strcmp(RANDOM_NAME,(name))==0 || strcmp(PARAMS_NAME,(name))==0))
168
169 #ifdef DENTRY_INVALIDATOR
170 static int dinval = 0;
171 #endif
172
173 // generators from: http://school.anhb.uwa.edu.au/personalpages/kwessen/shared/Marsaglia99.html (by George Marsaglia)
174
175 /* random state */
176 static uint32_t rndz=362436069;
177 static uint32_t rndw=521288629;
178 static uint32_t rndjsr=123456789;
179 static uint32_t rndjcong=380116160;
180
181 #define znew (rndz=36969*(rndz&65535)+(rndz>>16))
182 #define wnew (rndw=18000*(rndw&65535)+(rndw>>16))
183 #define MWC ((znew<<16)+wnew)
184 #define SHR3 (rndjsr^=(rndjsr<<17), rndjsr^=(rndjsr>>13), rndjsr^=(rndjsr<<5))
185 #define CONG (rndjcong=69069*rndjcong+1234567)
186 #define KISS ((MWC^CONG)+SHR3)
187
188 static pthread_mutex_t randomlock = PTHREAD_MUTEX_INITIALIZER;
189
190 /* STATS INODE BUFFER */
191
192 typedef struct _sinfo {
193 char *buff;
194 uint32_t leng;
195 uint8_t reset;
196 uint8_t valid;
197 pthread_mutex_t lock;
198 uint32_t next;
199 } sinfo;
200
201 static uint32_t sinfo_head=0,sinfo_size=0,sinfo_max=1;
202 static sinfo* *sinfo_tab=NULL;
203 static pthread_mutex_t sinfo_tab_lock = PTHREAD_MUTEX_INITIALIZER;
204
sinfo_new(void)205 static uint32_t sinfo_new(void) {
206 uint32_t i;
207 zassert(pthread_mutex_lock(&sinfo_tab_lock));
208 if (sinfo_head!=0) {
209 i = sinfo_head;
210 sinfo_head = sinfo_tab[i]->next;
211 } else {
212 if (sinfo_max>=sinfo_size) {
213 if (sinfo_size==0) {
214 sinfo_size = 16;
215 sinfo_tab = malloc(sizeof(sinfo*)*sinfo_size);
216 } else {
217 sinfo_size *= 2;
218 sinfo_tab = mfsrealloc(sinfo_tab,sizeof(sinfo*)*sinfo_size);
219 }
220 passert(sinfo_tab);
221 }
222 i = sinfo_max++;
223 sinfo_tab[i] = malloc(sizeof(sinfo));
224 passert(sinfo_tab[i]);
225 memset(sinfo_tab[i],0,sizeof(sinfo));
226 zassert(pthread_mutex_init(&(sinfo_tab[i]->lock),NULL));
227 }
228 zassert(pthread_mutex_unlock(&sinfo_tab_lock));
229 return i;
230 }
231
sinfo_get(uint32_t sindex)232 static inline sinfo* sinfo_get(uint32_t sindex) {
233 zassert(pthread_mutex_lock(&sinfo_tab_lock));
234 if (sindex==0 || sindex>=sinfo_max) {
235 zassert(pthread_mutex_unlock(&sinfo_tab_lock));
236 return NULL;
237 } else {
238 zassert(pthread_mutex_unlock(&sinfo_tab_lock));
239 return sinfo_tab[sindex];
240 }
241 }
242
sinfo_release(uint32_t sindex)243 static void sinfo_release(uint32_t sindex) {
244 sinfo *statsinfo;
245 if (sindex>0) {
246 zassert(pthread_mutex_lock(&sinfo_tab_lock));
247 statsinfo = sinfo_tab[sindex];
248 zassert(pthread_mutex_lock(&(statsinfo->lock)));
249 if (statsinfo->buff!=NULL) {
250 free(statsinfo->buff);
251 statsinfo->buff = NULL;
252 }
253 zassert(pthread_mutex_unlock(&(statsinfo->lock)));
254 statsinfo->next = sinfo_head;
255 sinfo_head = sindex;
256 zassert(pthread_mutex_unlock(&sinfo_tab_lock));
257 }
258 }
259
sinfo_freeall(void)260 static void sinfo_freeall(void) {
261 uint32_t i;
262 sinfo *statsinfo;
263 zassert(pthread_mutex_lock(&sinfo_tab_lock));
264 if (sinfo_tab!=NULL) {
265 for (i=1 ; i<sinfo_max ; i++) {
266 statsinfo = sinfo_tab[i];
267 zassert(pthread_mutex_lock(&(statsinfo->lock)));
268 if (statsinfo->buff) {
269 free(statsinfo->buff);
270 }
271 zassert(pthread_mutex_unlock(&(statsinfo->lock)));
272 zassert(pthread_mutex_destroy(&(statsinfo->lock)));
273 free(statsinfo);
274 }
275 free(sinfo_tab);
276 }
277 sinfo_max = 1;
278 sinfo_size = 0;
279 sinfo_head = 0;
280 zassert(pthread_mutex_unlock(&sinfo_tab_lock));
281 }
282
283
284
285 /* DIRECTORY INODE BUFFERS */
286
287 typedef struct _dirbuf {
288 int wasread;
289 int dataformat;
290 uid_t uid;
291 gid_t gid;
292 const uint8_t *p;
293 size_t size;
294 void *dcache;
295 pthread_mutex_t lock;
296 uint32_t next;
297 } dirbuf;
298
299 static uint32_t dirbuf_head=0,dirbuf_size=0,dirbuf_max=1;
300 static dirbuf* *dirbuf_tab=NULL;
301 static pthread_mutex_t dirbuf_tab_lock = PTHREAD_MUTEX_INITIALIZER;
302
dirbuf_new(void)303 static uint32_t dirbuf_new(void) {
304 uint32_t i;
305 zassert(pthread_mutex_lock(&dirbuf_tab_lock));
306 if (dirbuf_head!=0) {
307 i = dirbuf_head;
308 dirbuf_head = dirbuf_tab[i]->next;
309 } else {
310 if (dirbuf_max>=dirbuf_size) {
311 if (dirbuf_size==0) {
312 dirbuf_size = 32;
313 dirbuf_tab = malloc(sizeof(dirbuf*)*dirbuf_size);
314 } else {
315 dirbuf_size *= 2;
316 dirbuf_tab = mfsrealloc(dirbuf_tab,sizeof(dirbuf*)*dirbuf_size);
317 }
318 passert(dirbuf_tab);
319 }
320 i = dirbuf_max++;
321 dirbuf_tab[i] = malloc(sizeof(dirbuf));
322 passert(dirbuf_tab[i]);
323 memset(dirbuf_tab[i],0,sizeof(dirbuf));
324 zassert(pthread_mutex_init(&(dirbuf_tab[i]->lock),NULL));
325 }
326 zassert(pthread_mutex_unlock(&dirbuf_tab_lock));
327 return i;
328 }
329
dirbuf_get(uint32_t dindex)330 static inline dirbuf* dirbuf_get(uint32_t dindex) {
331 zassert(pthread_mutex_lock(&dirbuf_tab_lock));
332 if (dindex==0 || dindex>=dirbuf_max) {
333 zassert(pthread_mutex_unlock(&dirbuf_tab_lock));
334 return NULL;
335 } else {
336 zassert(pthread_mutex_unlock(&dirbuf_tab_lock));
337 return dirbuf_tab[dindex];
338 }
339 }
340
dirbuf_release(uint32_t dindex)341 static void dirbuf_release(uint32_t dindex) {
342 dirbuf *dirinfo;
343 if (dindex>0) {
344 zassert(pthread_mutex_lock(&dirbuf_tab_lock));
345 dirinfo = dirbuf_tab[dindex];
346 zassert(pthread_mutex_lock(&(dirinfo->lock)));
347 if (dirinfo->p!=NULL) {
348 free((uint8_t *)(dirinfo->p));
349 dirinfo->p = NULL;
350 }
351 zassert(pthread_mutex_unlock(&(dirinfo->lock)));
352 dirinfo->next = dirbuf_head;
353 dirbuf_head = dindex;
354 zassert(pthread_mutex_unlock(&dirbuf_tab_lock));
355 }
356 }
357
dirbuf_freeall(void)358 static void dirbuf_freeall(void) {
359 uint32_t i;
360 dirbuf *dirinfo;
361 zassert(pthread_mutex_lock(&dirbuf_tab_lock));
362 if (dirbuf_tab!=NULL) {
363 for (i=1 ; i<dirbuf_max ; i++) {
364 dirinfo = dirbuf_tab[i];
365 zassert(pthread_mutex_lock(&(dirinfo->lock)));
366 if (dirinfo->p) {
367 free((uint8_t*)(dirinfo->p));
368 }
369 zassert(pthread_mutex_unlock(&(dirinfo->lock)));
370 zassert(pthread_mutex_destroy(&(dirinfo->lock)));
371 free(dirinfo);
372 }
373 free(dirbuf_tab);
374 }
375 dirbuf_max = 1;
376 dirbuf_size = 0;
377 dirbuf_head = 0;
378 dirbuf_tab = NULL;
379 zassert(pthread_mutex_unlock(&dirbuf_tab_lock));
380 }
381
382
383
384 /* FILE INODE DATA */
385
386 enum {IO_RW,IO_RO,IO_RA};
387
388 typedef struct _lock_owner {
389 #ifdef FLUSH_EXTRA_LOCKS
390 pid_t pid;
391 #endif
392 uint64_t lock_owner;
393 struct _lock_owner *next;
394 } finfo_lock_owner;
395
396 typedef struct _finfo {
397 void *flengptr;
398 uint32_t inode;
399 uint8_t mode;
400 uint8_t uselocks;
401 uint8_t valid;
402 uint8_t open_waiting;
403 uint8_t open_in_master;
404 int open_status;
405 void *rdata;
406 void *wdata;
407 double create;
408 finfo_lock_owner *posix_lo_head;
409 finfo_lock_owner *flock_lo_head;
410 pthread_mutex_t lock;
411 pthread_cond_t opencond;
412 uint32_t findex;
413 uint32_t next,*prev;
414 #ifdef FREEBSD_DELAYED_RELEASE
415 uint32_t ops_in_progress;
416 double lastuse;
417 #endif
418 } finfo;
419
420 #ifdef FREEBSD_DELAYED_RELEASE
421 static uint32_t finfo_released_head=0;
422 #endif
423 static uint32_t finfo_head=0,finfo_size=0,finfo_max=1;
424 static finfo* *finfo_tab=NULL;
425 static pthread_mutex_t finfo_tab_lock = PTHREAD_MUTEX_INITIALIZER;
426 static uint32_t finfo_inode_hash[1024] = {0,};
427
finfo_free_resources(finfo * fileinfo)428 static void finfo_free_resources(finfo *fileinfo) {
429 if (fileinfo->rdata) {
430 read_data_end(fileinfo->rdata);
431 }
432 if (fileinfo->wdata) {
433 write_data_end(fileinfo->wdata);
434 }
435 if (fileinfo->flengptr) {
436 inoleng_release(fileinfo->flengptr);
437 }
438 fileinfo->rdata = fileinfo->wdata = fileinfo->flengptr = NULL;
439 }
440
441 #ifdef FREEBSD_DELAYED_RELEASE
finfo_delayed_release(double now)442 static void finfo_delayed_release(double now) {
443 finfo *fileinfo;
444 uint32_t i,*pi;
445 pi = &finfo_released_head;
446 while ((i=*pi)!=0) {
447 fileinfo = finfo_tab[i];
448 zassert(pthread_mutex_lock(&(fileinfo->lock)));
449 if (fileinfo->ops_in_progress==0 && fileinfo->lastuse+FREEBSD_RELEASE_DELAY<now) {
450 if (write_data_will_end_wait(fileinfo->wdata)) {
451 fileinfo->lastuse = now;
452 pi = &(fileinfo->next);
453 } else {
454 finfo_free_resources(fileinfo);
455 *pi = fileinfo->next;
456 fileinfo->next = finfo_head;
457 finfo_head = i;
458 }
459 } else {
460 pi = &(fileinfo->next);
461 }
462 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
463 }
464 }
465
finfo_delayed_release_cleanup_thread(void * arg)466 static void* finfo_delayed_release_cleanup_thread(void* arg) {
467 double now;
468
469 while (1) {
470 now = monotonic_seconds();
471 zassert(pthread_mutex_lock(&finfo_tab_lock));
472 finfo_delayed_release(now);
473 zassert(pthread_mutex_unlock(&finfo_tab_lock));
474 sleep(1);
475 }
476 return arg;
477 }
478 #endif
479
finfo_new(uint32_t inode)480 static uint32_t finfo_new(uint32_t inode) {
481 uint32_t i,ni,findex;
482 finfo *fileinfo;
483 #ifdef FREEBSD_DELAYED_RELEASE
484 double now;
485 now = monotonic_seconds();
486 #endif
487
488 zassert(pthread_mutex_lock(&finfo_tab_lock));
489 #ifdef FREEBSD_DELAYED_RELEASE
490 finfo_delayed_release(now);
491 #endif
492 if (finfo_head!=0) {
493 i = finfo_head;
494 fileinfo = finfo_tab[i];
495 finfo_head = fileinfo->next;
496 } else {
497 if (finfo_max>=finfo_size) {
498 if (finfo_size==0) {
499 finfo_size = 1024;
500 finfo_tab = malloc(sizeof(finfo*)*finfo_size);
501 } else {
502 finfo_size *= 2;
503 massert(finfo_size<=0x1000000,"file handle tabble too big");
504 finfo_tab = mfsrealloc(finfo_tab,sizeof(finfo*)*finfo_size);
505 }
506 passert(finfo_tab);
507 }
508 i = finfo_max++;
509 finfo_tab[i] = malloc(sizeof(finfo));
510 fileinfo = finfo_tab[i];
511 passert(fileinfo);
512 memset(fileinfo,0,sizeof(finfo));
513 zassert(pthread_mutex_init(&(fileinfo->lock),NULL));
514 zassert(pthread_cond_init(&(fileinfo->opencond),NULL));
515 fileinfo->rdata = NULL;
516 fileinfo->wdata = NULL;
517 fileinfo->flengptr = NULL;
518 fileinfo->findex = i;
519 }
520 findex = fileinfo->findex;
521 massert((findex&0xFFFFFF)==i,"file info record index mismatch");
522 findex += 0x1000000;
523 if ((findex & 0xFF000000)==0) {
524 findex += 0x1000000;
525 }
526 fileinfo->findex = findex;
527 fileinfo->inode = inode;
528 ni = fileinfo->next = finfo_inode_hash[inode&1023];
529 finfo_inode_hash[inode&1023] = i;
530 if (ni!=0) {
531 finfo_tab[ni]->prev = &(fileinfo->next);
532 }
533 fileinfo->prev = finfo_inode_hash + (inode&1023);
534 zassert(pthread_mutex_unlock(&finfo_tab_lock));
535 return findex;
536 }
537
finfo_get(uint32_t findex)538 static inline finfo* finfo_get(uint32_t findex) {
539 uint32_t tindex;
540 finfo *fileinfo;
541 if (findex>0) {
542 tindex = findex & 0xFFFFFF;
543 zassert(pthread_mutex_lock(&finfo_tab_lock));
544 if (tindex>=finfo_max) {
545 fileinfo = NULL;
546 } else {
547 fileinfo = finfo_tab[tindex];
548 if (fileinfo->findex!=findex) {
549 fileinfo = NULL;
550 }
551 }
552 zassert(pthread_mutex_unlock(&finfo_tab_lock));
553 } else {
554 fileinfo = NULL;
555 }
556 return fileinfo;
557 }
558
finfo_release(uint32_t findex)559 static void finfo_release(uint32_t findex) {
560 uint32_t tindex;
561 uint32_t ni;
562 finfo *fileinfo;
563 if (findex>0) {
564 tindex = findex & 0xFFFFFF;
565 zassert(pthread_mutex_lock(&finfo_tab_lock));
566 if (tindex<finfo_max) {
567 fileinfo = finfo_tab[tindex];
568 ni = fileinfo->next;
569 *(fileinfo->prev) = ni;
570 if (ni!=0) {
571 finfo_tab[ni]->prev = fileinfo->prev;
572 }
573 #ifdef FREEBSD_DELAYED_RELEASE
574 fileinfo->next = finfo_released_head;
575 fileinfo->prev = NULL;
576 finfo_released_head = tindex;
577 #else
578 fileinfo->next = finfo_head;
579 fileinfo->prev = NULL;
580 finfo_head = tindex;
581 #endif
582 }
583 zassert(pthread_mutex_unlock(&finfo_tab_lock));
584 }
585 }
586
finfo_freeall(void)587 static void finfo_freeall(void) {
588 uint32_t i;
589 finfo *fileinfo;
590 zassert(pthread_mutex_lock(&finfo_tab_lock));
591 if (finfo_tab!=NULL) {
592 for (i=1 ; i<finfo_max ; i++) {
593 fileinfo = finfo_tab[i];
594 zassert(pthread_mutex_lock(&(fileinfo->lock)));
595 finfo_free_resources(fileinfo);
596 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
597 zassert(pthread_mutex_destroy(&(fileinfo->lock)));
598 zassert(pthread_cond_destroy(&(fileinfo->opencond)));
599 free(fileinfo);
600 }
601 free(finfo_tab);
602 }
603 finfo_max = 1;
604 finfo_size = 0;
605 finfo_head = 0;
606 #ifdef FREEBSD_DELAYED_RELEASE
607 finfo_released_head = 0;
608 #endif
609 finfo_tab = NULL;
610 zassert(pthread_mutex_unlock(&finfo_tab_lock));
611 }
612
finfo_change_fleng(uint32_t inode,uint64_t fleng)613 static void finfo_change_fleng(uint32_t inode,uint64_t fleng) {
614 inoleng_update_fleng(inode,fleng);
615 }
616
617
618
619 #ifdef HAVE_FUSE3
620 static struct fuse_session *fuse_comm = NULL;
621 #else /* FUSE2 */
622 static struct fuse_chan *fuse_comm = NULL;
623 #endif
624 static int debug_mode = 0;
625 static int usedircache = 1;
626 static int keep_cache = 0;
627 static double direntry_cache_timeout = 0.1;
628 static double entry_cache_timeout = 0.0;
629 static double attr_cache_timeout = 0.1;
630 static int mkdir_copy_sgid = 0;
631 static int sugid_clear_mode = 0;
632 static int xattr_cache_on = 0;
633 static int xattr_acl_support = 0;
634 static double fsync_before_close_min_time = 10.0;
635 static int no_xattrs = 0;
636 static int no_posix_locks = 0;
637 static int no_bsd_locks = 0;
638 static int full_permissions = 0;
639 static uint32_t mfs_disables = 0;
640
641 //static int local_mode = 0;
642 //static int no_attr_cache = 0;
643
644 enum {
645 OP_STATFS = 0,
646 OP_ACCESS,
647 OP_LOOKUP,
648 OP_ERRLOOKUP,
649 OP_POSLOOKUP,
650 OP_NEGLOOKUP,
651 OP_LOOKUP_INTERNAL,
652 OP_DIRCACHE_LOOKUP,
653 OP_NEGCACHE_LOOKUP,
654 // OP_DIRCACHE_LOOKUP_POSITIVE,
655 // OP_DIRCACHE_LOOKUP_NEGATIVE,
656 // OP_DIRCACHE_LOOKUP_NOATTR,
657 OP_GETATTR,
658 OP_DIRCACHE_GETATTR,
659 OP_SETATTR,
660 OP_MKNOD,
661 OP_UNLINK,
662 OP_MKDIR,
663 OP_RMDIR,
664 OP_SYMLINK,
665 OP_READLINK_MASTER,
666 OP_READLINK_CACHED,
667 OP_RENAME,
668 OP_LINK,
669 OP_OPENDIR,
670 OP_READDIR,
671 OP_RELEASEDIR,
672 OP_CREATE,
673 OP_OPEN,
674 OP_RELEASE,
675 OP_READ,
676 OP_WRITE,
677 OP_FLUSH,
678 OP_FSYNC,
679 #if FUSE_VERSION >= 29
680 OP_FLOCK,
681 #endif
682 #if FUSE_VERSION >= 26
683 OP_GETLK,
684 OP_SETLK,
685 #endif
686 OP_SETXATTR,
687 OP_GETXATTR,
688 OP_LISTXATTR,
689 OP_REMOVEXATTR,
690 // OP_GETDIR_CACHED,
691 OP_GETDIR_FULL,
692 OP_GETDIR_SMALL,
693 #if FUSE_VERSION >= 30
694 OP_READDIRPLUS,
695 OP_GETDIR_PLUS,
696 #endif
697 STATNODES
698 };
699
700 static void *statsptr[STATNODES];
701
mfs_statsptr_init(void)702 void mfs_statsptr_init(void) {
703 void *s;
704 s = stats_get_subnode(NULL,"fuse_ops",0,1);
705 statsptr[OP_SETXATTR] = stats_get_subnode(s,"setxattr",0,1);
706 statsptr[OP_GETXATTR] = stats_get_subnode(s,"getxattr",0,1);
707 statsptr[OP_LISTXATTR] = stats_get_subnode(s,"listxattr",0,1);
708 statsptr[OP_REMOVEXATTR] = stats_get_subnode(s,"removexattr",0,1);
709 #if FUSE_VERSION >= 29
710 statsptr[OP_FLOCK] = stats_get_subnode(s,"flock",0,1);
711 #endif
712 #if FUSE_VERSION >= 26
713 statsptr[OP_GETLK] = stats_get_subnode(s,"getlk",0,1);
714 statsptr[OP_SETLK] = stats_get_subnode(s,"setlk",0,1);
715 #endif
716 statsptr[OP_FSYNC] = stats_get_subnode(s,"fsync",0,1);
717 statsptr[OP_FLUSH] = stats_get_subnode(s,"flush",0,1);
718 statsptr[OP_WRITE] = stats_get_subnode(s,"write",0,1);
719 statsptr[OP_READ] = stats_get_subnode(s,"read",0,1);
720 statsptr[OP_RELEASE] = stats_get_subnode(s,"release",0,1);
721 statsptr[OP_OPEN] = stats_get_subnode(s,"open",0,1);
722 statsptr[OP_CREATE] = stats_get_subnode(s,"create",0,1);
723 statsptr[OP_RELEASEDIR] = stats_get_subnode(s,"releasedir",0,1);
724 statsptr[OP_READDIR] = stats_get_subnode(s,"readdir",0,1);
725 #if FUSE_VERSION >= 30
726 statsptr[OP_READDIRPLUS] = stats_get_subnode(s,"readdirplus",0,1);
727 #endif
728 statsptr[OP_OPENDIR] = stats_get_subnode(s,"opendir",0,1);
729 statsptr[OP_LINK] = stats_get_subnode(s,"link",0,1);
730 statsptr[OP_RENAME] = stats_get_subnode(s,"rename",0,1);
731 {
732 void *rl;
733 rl = stats_get_subnode(s,"readlink",0,1);
734 statsptr[OP_READLINK_MASTER] = stats_get_subnode(rl,"master",0,1);
735 statsptr[OP_READLINK_CACHED] = stats_get_subnode(rl,"cached",0,1);
736 }
737 statsptr[OP_SYMLINK] = stats_get_subnode(s,"symlink",0,1);
738 statsptr[OP_RMDIR] = stats_get_subnode(s,"rmdir",0,1);
739 statsptr[OP_MKDIR] = stats_get_subnode(s,"mkdir",0,1);
740 statsptr[OP_UNLINK] = stats_get_subnode(s,"unlink",0,1);
741 statsptr[OP_MKNOD] = stats_get_subnode(s,"mknod",0,1);
742 statsptr[OP_SETATTR] = stats_get_subnode(s,"setattr",0,1);
743 statsptr[OP_GETATTR] = stats_get_subnode(s,"getattr",0,1);
744 statsptr[OP_DIRCACHE_GETATTR] = stats_get_subnode(s,"getattr-cached",0,1);
745 {
746 void *l,*cl,*nl;
747 l = stats_get_subnode(s,"lookup",0,1);
748 cl = stats_get_subnode(l,"cached",0,1);
749 nl = stats_get_subnode(l,"master",0,1);
750 statsptr[OP_LOOKUP_INTERNAL] = stats_get_subnode(l,"internal",0,1);
751 statsptr[OP_POSLOOKUP] = stats_get_subnode(nl,"positive",0,1);
752 statsptr[OP_NEGLOOKUP] = stats_get_subnode(nl,"negative",0,1);
753 statsptr[OP_ERRLOOKUP] = stats_get_subnode(nl,"error",0,1);
754 if (usedircache) {
755 statsptr[OP_DIRCACHE_LOOKUP] = stats_get_subnode(cl,"readdir",0,1);
756 }
757 statsptr[OP_NEGCACHE_LOOKUP] = stats_get_subnode(cl,"negative",0,1);
758 }
759 statsptr[OP_ACCESS] = stats_get_subnode(s,"access",0,1);
760 statsptr[OP_STATFS] = stats_get_subnode(s,"statfs",0,1);
761 {
762 void *rd;
763 rd = stats_get_subnode(s,"readdir",0,1);
764 if (usedircache) {
765 statsptr[OP_GETDIR_FULL] = stats_get_subnode(rd,"with_attrs",0,1);
766 }
767 statsptr[OP_GETDIR_SMALL] = stats_get_subnode(rd,"without_attrs",0,1);
768 #if FUSE_VERSION >= 30
769 statsptr[OP_GETDIR_PLUS] = stats_get_subnode(rd,"with_attrs+",0,1);
770 #endif
771 }
772 }
773
mfs_stats_inc(uint8_t id)774 void mfs_stats_inc(uint8_t id) {
775 if (id<STATNODES) {
776 stats_counter_inc(statsptr[id]);
777 }
778 }
779
780 static pthread_key_t aclstorage;
781
mfs_aclstorage_free(void * ptr)782 void mfs_aclstorage_free(void *ptr) {
783 if (ptr!=NULL) {
784 free(ptr);
785 }
786 }
787
mfs_aclstorage_init(void)788 void mfs_aclstorage_init(void) {
789 zassert(pthread_key_create(&aclstorage,mfs_aclstorage_free));
790 zassert(pthread_setspecific(aclstorage,NULL));
791 }
792
mfs_aclstorage_get(uint32_t size)793 void* mfs_aclstorage_get(uint32_t size) {
794 uint8_t *buff,*p;
795 const uint8_t *cp;
796 uint32_t s;
797 buff = pthread_getspecific(aclstorage);
798 if (buff!=NULL) {
799 cp = p = buff;
800 s = get32bit(&cp);
801 if (size<=s) {
802 return (buff+4);
803 }
804 free(buff);
805 }
806 buff = malloc(size+4);
807 passert(buff);
808 p = buff;
809 put32bit(&p,size);
810 zassert(pthread_setspecific(aclstorage,buff));
811 return p;
812 }
813
814 #ifndef EDQUOT
815 # define EDQUOT ENOSPC
816 #endif
817 #ifndef ENOATTR
818 # ifdef ENODATA
819 # define ENOATTR ENODATA
820 # else
821 # define ENOATTR ENOENT
822 # endif
823 #endif
824
mfs_errorconv(int status)825 static int mfs_errorconv(int status) {
826 int ret;
827 switch (status) {
828 case MFS_STATUS_OK:
829 ret=0;
830 break;
831 case MFS_ERROR_EPERM:
832 ret=EPERM;
833 break;
834 case MFS_ERROR_ENOTDIR:
835 ret=ENOTDIR;
836 break;
837 case MFS_ERROR_ENOENT:
838 ret=ENOENT;
839 break;
840 case MFS_ERROR_EACCES:
841 ret=EACCES;
842 break;
843 case MFS_ERROR_EEXIST:
844 ret=EEXIST;
845 break;
846 case MFS_ERROR_EINVAL:
847 ret=EINVAL;
848 break;
849 case MFS_ERROR_ENOTEMPTY:
850 ret=ENOTEMPTY;
851 break;
852 case MFS_ERROR_IO:
853 ret=EIO;
854 break;
855 case MFS_ERROR_EROFS:
856 ret=EROFS;
857 break;
858 case MFS_ERROR_EINTR:
859 ret=EINTR;
860 break;
861 case MFS_ERROR_EAGAIN:
862 ret=EAGAIN;
863 break;
864 case MFS_ERROR_ECANCELED:
865 ret=ECANCELED;
866 break;
867 case MFS_ERROR_QUOTA:
868 ret=EDQUOT;
869 break;
870 case MFS_ERROR_ENOATTR:
871 ret=ENOATTR;
872 break;
873 case MFS_ERROR_ENOTSUP:
874 ret=ENOTSUP;
875 break;
876 case MFS_ERROR_ERANGE:
877 ret=ERANGE;
878 break;
879 case MFS_ERROR_NOSPACE:
880 ret=ENOSPC;
881 break;
882 case MFS_ERROR_CHUNKLOST:
883 ret=ENXIO;
884 break;
885 case MFS_ERROR_NOCHUNKSERVERS:
886 ret=ENOSPC;
887 break;
888 case MFS_ERROR_CSNOTPRESENT:
889 ret=ENXIO;
890 break;
891 case MFS_ERROR_NOTOPENED:
892 ret=EBADF;
893 break;
894 case MFS_ERROR_INDEXTOOBIG:
895 ret=EFBIG;
896 break;
897 default:
898 ret=EINVAL;
899 break;
900 }
901 if (debug_mode && ret!=0) {
902 #ifdef HAVE_STRERROR_R
903 char errorbuff[500];
904 # ifdef STRERROR_R_CHAR_P
905 fprintf(stderr,"status: %s\n",strerror_r(ret,errorbuff,500));
906 # else
907 strerror_r(ret,errorbuff,500);
908 fprintf(stderr,"status: %s\n",errorbuff);
909 # endif
910 #else
911 # ifdef HAVE_PERROR
912 errno=ret;
913 perror("status: ");
914 # else
915 fprintf(stderr,"status: %d\n",ret);
916 # endif
917 #endif
918 }
919 return ret;
920 }
921
fsnodes_type_convert(uint8_t type)922 static inline uint8_t fsnodes_type_convert(uint8_t type) {
923 switch (type) {
924 case DISP_TYPE_FILE:
925 return TYPE_FILE;
926 case DISP_TYPE_DIRECTORY:
927 return TYPE_DIRECTORY;
928 case DISP_TYPE_SYMLINK:
929 return TYPE_SYMLINK;
930 case DISP_TYPE_FIFO:
931 return TYPE_FIFO;
932 case DISP_TYPE_BLOCKDEV:
933 return TYPE_BLOCKDEV;
934 case DISP_TYPE_CHARDEV:
935 return TYPE_CHARDEV;
936 case DISP_TYPE_SOCKET:
937 return TYPE_SOCKET;
938 case DISP_TYPE_TRASH:
939 return TYPE_TRASH;
940 case DISP_TYPE_SUSTAINED:
941 return TYPE_SUSTAINED;
942 }
943 return 0;
944 }
945
mfs_type_to_stat(uint32_t inode,uint8_t type,struct stat * stbuf)946 static void mfs_type_to_stat(uint32_t inode,uint8_t type, struct stat *stbuf) {
947 memset(stbuf,0,sizeof(struct stat));
948 stbuf->st_ino = inode;
949 switch (type&0x7F) {
950 case DISP_TYPE_DIRECTORY:
951 case TYPE_DIRECTORY:
952 stbuf->st_mode = S_IFDIR;
953 break;
954 case DISP_TYPE_SYMLINK:
955 case TYPE_SYMLINK:
956 stbuf->st_mode = S_IFLNK;
957 break;
958 case DISP_TYPE_FILE:
959 case TYPE_FILE:
960 stbuf->st_mode = S_IFREG;
961 break;
962 case DISP_TYPE_FIFO:
963 case TYPE_FIFO:
964 stbuf->st_mode = S_IFIFO;
965 break;
966 case DISP_TYPE_SOCKET:
967 case TYPE_SOCKET:
968 stbuf->st_mode = S_IFSOCK;
969 break;
970 case DISP_TYPE_BLOCKDEV:
971 case TYPE_BLOCKDEV:
972 stbuf->st_mode = S_IFBLK;
973 break;
974 case DISP_TYPE_CHARDEV:
975 case TYPE_CHARDEV:
976 stbuf->st_mode = S_IFCHR;
977 break;
978 default:
979 stbuf->st_mode = 0;
980 }
981 }
982
mfs_attr_get_type(const uint8_t attr[ATTR_RECORD_SIZE])983 static inline uint8_t mfs_attr_get_type(const uint8_t attr[ATTR_RECORD_SIZE]) {
984 if (attr[0]<64) { // 1.7.29 and up
985 return (attr[1]>>4);
986 } else {
987 return fsnodes_type_convert(attr[0]&0x7F);
988 }
989 }
990
mfs_attr_get_mattr(const uint8_t attr[ATTR_RECORD_SIZE])991 static inline uint8_t mfs_attr_get_mattr(const uint8_t attr[ATTR_RECORD_SIZE]) {
992 if (attr[0]<64) { // 1.7.29 and up
993 return attr[0];
994 } else {
995 return (attr[1]>>4);
996 }
997 }
998
mfs_attr_get_fleng(const uint8_t attr[ATTR_RECORD_SIZE])999 static inline uint64_t mfs_attr_get_fleng(const uint8_t attr[ATTR_RECORD_SIZE]) {
1000 const uint8_t *ptr;
1001 ptr = attr+27;
1002 return get64bit(&ptr);
1003 }
1004
mfs_attr_set_fleng(uint8_t attr[ATTR_RECORD_SIZE],uint64_t fleng)1005 static inline void mfs_attr_set_fleng(uint8_t attr[ATTR_RECORD_SIZE],uint64_t fleng) {
1006 uint8_t *ptr;
1007 ptr = attr+27;
1008 put64bit(&ptr,fleng);
1009 }
1010
mfs_attr_modify(uint32_t to_set,uint8_t attr[ATTR_RECORD_SIZE],struct stat * stbuf)1011 static void mfs_attr_modify(uint32_t to_set,uint8_t attr[ATTR_RECORD_SIZE],struct stat *stbuf) {
1012 uint8_t mattr;
1013 uint16_t attrmode;
1014 uint8_t attrtype;
1015 uint32_t attruid,attrgid,attratime,attrmtime,attrctime;
1016 const uint8_t *ptr;
1017 uint8_t *wptr;
1018 ptr = attr;
1019 if (attr[0]<64) { // 1.7.29 and up
1020 mattr = get8bit(&ptr);
1021 attrmode = get16bit(&ptr);
1022 attrtype = (attrmode>>12);
1023 } else {
1024 attrtype = get8bit(&ptr);
1025 attrtype = fsnodes_type_convert(attrtype&0x7F);
1026 attrmode = get16bit(&ptr);
1027 mattr = attrmode >> 12;
1028 }
1029 attrmode &= 0x0FFF;
1030 attruid = get32bit(&ptr);
1031 attrgid = get32bit(&ptr);
1032 attratime = get32bit(&ptr);
1033 attrmtime = get32bit(&ptr);
1034 attrctime = get32bit(&ptr);
1035 if (to_set & FUSE_SET_ATTR_MODE) {
1036 attrmode = stbuf->st_mode & 07777;
1037 attrctime = time(NULL);
1038 }
1039 if (to_set & FUSE_SET_ATTR_UID) {
1040 attruid = stbuf->st_uid;
1041 attrctime = time(NULL);
1042 }
1043 if (to_set & FUSE_SET_ATTR_GID) {
1044 attrgid = stbuf->st_gid;
1045 attrctime = time(NULL);
1046 }
1047 if (to_set & FUSE_SET_ATTR_ATIME) {
1048 attratime = stbuf->st_atime;
1049 attrctime = time(NULL);
1050 }
1051 if (to_set & FUSE_SET_ATTR_MTIME) {
1052 attratime = stbuf->st_mtime;
1053 attrctime = time(NULL);
1054 }
1055 wptr = attr;
1056 put8bit(&wptr,mattr);
1057 attrmode |= ((uint16_t)attrtype)<<12;
1058 put16bit(&wptr,attrmode);
1059 put32bit(&wptr,attruid);
1060 put32bit(&wptr,attrgid);
1061 put32bit(&wptr,attratime);
1062 put32bit(&wptr,attrmtime);
1063 put32bit(&wptr,attrctime);
1064 }
1065
mfs_attr_to_stat(uint32_t inode,const uint8_t attr[ATTR_RECORD_SIZE],struct stat * stbuf)1066 static void mfs_attr_to_stat(uint32_t inode,const uint8_t attr[ATTR_RECORD_SIZE], struct stat *stbuf) {
1067 uint16_t attrmode;
1068 uint8_t attrtype;
1069 uint32_t attruid,attrgid,attratime,attrmtime,attrctime,attrnlink,attrrdev;
1070 uint64_t attrlength;
1071 const uint8_t *ptr;
1072 ptr = attr;
1073 if (attr[0]<64) { // 1.7.29 and up
1074 ptr++;
1075 attrmode = get16bit(&ptr);
1076 attrtype = (attrmode>>12);
1077 } else {
1078 attrtype = get8bit(&ptr);
1079 attrtype = fsnodes_type_convert(attrtype&0x7F);
1080 attrmode = get16bit(&ptr);
1081 }
1082 attrmode &= 0x0FFF;
1083 attruid = get32bit(&ptr);
1084 attrgid = get32bit(&ptr);
1085 attratime = get32bit(&ptr);
1086 attrmtime = get32bit(&ptr);
1087 attrctime = get32bit(&ptr);
1088 attrnlink = get32bit(&ptr);
1089 stbuf->st_ino = inode;
1090 #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
1091 stbuf->st_blksize = MFSBLOCKSIZE;
1092 #endif
1093 switch (attrtype & 0x7F) {
1094 case TYPE_DIRECTORY:
1095 stbuf->st_mode = S_IFDIR | attrmode;
1096 attrlength = get64bit(&ptr);
1097 stbuf->st_size = attrlength;
1098 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
1099 stbuf->st_blocks = (attrlength+511)/512;
1100 #endif
1101 break;
1102 case TYPE_SYMLINK:
1103 stbuf->st_mode = S_IFLNK | attrmode;
1104 attrlength = get64bit(&ptr);
1105 stbuf->st_size = attrlength;
1106 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
1107 stbuf->st_blocks = (attrlength+511)/512;
1108 #endif
1109 break;
1110 case TYPE_FILE:
1111 stbuf->st_mode = S_IFREG | attrmode;
1112 attrlength = get64bit(&ptr);
1113 stbuf->st_size = attrlength;
1114 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
1115 stbuf->st_blocks = (attrlength+511)/512;
1116 #endif
1117 break;
1118 case TYPE_FIFO:
1119 stbuf->st_mode = S_IFIFO | attrmode;
1120 stbuf->st_size = 0;
1121 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
1122 stbuf->st_blocks = 0;
1123 #endif
1124 break;
1125 case TYPE_SOCKET:
1126 stbuf->st_mode = S_IFSOCK | attrmode;
1127 stbuf->st_size = 0;
1128 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
1129 stbuf->st_blocks = 0;
1130 #endif
1131 break;
1132 case TYPE_BLOCKDEV:
1133 stbuf->st_mode = S_IFBLK | attrmode;
1134 attrrdev = get32bit(&ptr);
1135 #ifdef HAVE_STRUCT_STAT_ST_RDEV
1136 stbuf->st_rdev = attrrdev;
1137 #endif
1138 stbuf->st_size = 0;
1139 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
1140 stbuf->st_blocks = 0;
1141 #endif
1142 break;
1143 case TYPE_CHARDEV:
1144 stbuf->st_mode = S_IFCHR | attrmode;
1145 attrrdev = get32bit(&ptr);
1146 #ifdef HAVE_STRUCT_STAT_ST_RDEV
1147 stbuf->st_rdev = attrrdev;
1148 #endif
1149 stbuf->st_size = 0;
1150 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
1151 stbuf->st_blocks = 0;
1152 #endif
1153 break;
1154 default:
1155 stbuf->st_mode = 0;
1156 }
1157 stbuf->st_uid = attruid;
1158 stbuf->st_gid = attrgid;
1159 stbuf->st_atime = attratime;
1160 stbuf->st_mtime = attrmtime;
1161 stbuf->st_ctime = attrctime;
1162 #ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME
1163 stbuf->st_birthtime = attrctime; // for future use
1164 #endif
1165 stbuf->st_nlink = attrnlink;
1166 }
1167
mfs_makemodestr(char modestr[11],uint16_t mode)1168 static inline void mfs_makemodestr(char modestr[11],uint16_t mode) {
1169 uint32_t i;
1170 strcpy(modestr,"?rwxrwxrwx");
1171 switch (mode & S_IFMT) {
1172 case S_IFSOCK:
1173 modestr[0] = 's';
1174 break;
1175 case S_IFLNK:
1176 modestr[0] = 'l';
1177 break;
1178 case S_IFREG:
1179 modestr[0] = '-';
1180 break;
1181 case S_IFBLK:
1182 modestr[0] = 'b';
1183 break;
1184 case S_IFDIR:
1185 modestr[0] = 'd';
1186 break;
1187 case S_IFCHR:
1188 modestr[0] = 'c';
1189 break;
1190 case S_IFIFO:
1191 modestr[0] = 'f';
1192 break;
1193 }
1194 if (mode & S_ISUID) {
1195 modestr[3] = 's';
1196 }
1197 if (mode & S_ISGID) {
1198 modestr[6] = 's';
1199 }
1200 if (mode & S_ISVTX) {
1201 modestr[9] = 't';
1202 }
1203 for (i=0 ; i<9 ; i++) {
1204 if ((mode & (1<<i))==0) {
1205 if (modestr[9-i]=='s' || modestr[9-i]=='t') {
1206 modestr[9-i]&=0xDF;
1207 } else {
1208 modestr[9-i]='-';
1209 }
1210 }
1211 }
1212 }
1213
mfs_makeattrstr(char * buff,uint32_t size,struct stat * stbuf)1214 static void mfs_makeattrstr(char *buff,uint32_t size,struct stat *stbuf) {
1215 char modestr[11];
1216 mfs_makemodestr(modestr,stbuf->st_mode);
1217 #ifdef HAVE_STRUCT_STAT_ST_RDEV
1218 if (modestr[0]=='b' || modestr[0]=='c') {
1219 snprintf(buff,size,"[%s:0%06o,%u,%ld,%ld,%lu,%lu,%lu,%llu,%08lX]",modestr,(unsigned int)(stbuf->st_mode),(unsigned int)(stbuf->st_nlink),(long int)stbuf->st_uid,(long int)stbuf->st_gid,(unsigned long int)(stbuf->st_atime),(unsigned long int)(stbuf->st_mtime),(unsigned long int)(stbuf->st_ctime),(unsigned long long int)(stbuf->st_size),(unsigned long int)(stbuf->st_rdev));
1220 } else {
1221 snprintf(buff,size,"[%s:0%06o,%u,%ld,%ld,%lu,%lu,%lu,%llu]",modestr,(unsigned int)(stbuf->st_mode),(unsigned int)(stbuf->st_nlink),(long int)stbuf->st_uid,(long int)stbuf->st_gid,(unsigned long int)(stbuf->st_atime),(unsigned long int)(stbuf->st_mtime),(unsigned long int)(stbuf->st_ctime),(unsigned long long int)(stbuf->st_size));
1222 }
1223 #else
1224 snprintf(buff,size,"[%s:0%06o,%u,%ld,%ld,%lu,%lu,%lu,%llu]",modestr,(unsigned int)(stbuf->st_mode),(unsigned int)(stbuf->st_nlink),(long int)stbuf->st_uid,(long int)stbuf->st_gid,(unsigned long int)(stbuf->st_atime),(unsigned long int)(stbuf->st_mtime),(unsigned long int)(stbuf->st_ctime),(unsigned long long int)(stbuf->st_size));
1225 #endif
1226 }
1227
1228 #if FUSE_USE_VERSION >= 26
mfs_statfs(fuse_req_t req,fuse_ino_t ino)1229 void mfs_statfs(fuse_req_t req,fuse_ino_t ino) {
1230 #else
1231 void mfs_statfs(fuse_req_t req) {
1232 #endif
1233 uint64_t totalspace,availspace,freespace,trashspace,sustainedspace;
1234 uint32_t inodes;
1235 uint32_t bsize;
1236 struct statvfs stfsbuf;
1237 memset(&stfsbuf,0,sizeof(stfsbuf));
1238 struct fuse_ctx ctx;
1239
1240 ctx = *(fuse_req_ctx(req));
1241 mfs_stats_inc(OP_STATFS);
1242 if (debug_mode) {
1243 #if FUSE_USE_VERSION >= 26
1244 oplog_printf(&ctx,"statfs (%lu)",(unsigned long int)ino);
1245 #else
1246 oplog_printf(&ctx,"statfs ()");
1247 #endif
1248 }
1249 #if FUSE_USE_VERSION >= 26
1250 (void)ino;
1251 #endif
1252 fs_statfs(&totalspace,&availspace,&freespace,&trashspace,&sustainedspace,&inodes);
1253
1254 #if defined(__APPLE__)
1255 if (totalspace>0x0001000000000000ULL) {
1256 bsize = 0x20000;
1257 } else {
1258 bsize = 0x10000;
1259 }
1260 #else
1261 bsize = 0x10000;
1262 #endif
1263
1264 stfsbuf.f_namemax = MFS_NAME_MAX;
1265 stfsbuf.f_frsize = bsize;
1266 stfsbuf.f_bsize = bsize;
1267 #if defined(__APPLE__)
1268 // FUSE on apple (or other parts of kernel) expects 32-bit values, so it's better to saturate this values than let being cut on 32-bit
1269 // can't change bsize also because 64k seems to be the biggest acceptable value for bsize
1270
1271 if (totalspace/bsize>0xFFFFFFFFU) {
1272 stfsbuf.f_blocks = 0xFFFFFFFFU;
1273 } else {
1274 stfsbuf.f_blocks = totalspace/bsize;
1275 }
1276 if (availspace/bsize>0xFFFFFFFFU) {
1277 stfsbuf.f_bavail = 0xFFFFFFFFU;
1278 } else {
1279 stfsbuf.f_bavail = availspace/bsize;
1280 }
1281 if (freespace/bsize>0xFFFFFFFFU) {
1282 stfsbuf.f_bfree = 0xFFFFFFFFU;
1283 } else {
1284 stfsbuf.f_bfree = freespace/bsize;
1285 }
1286 #else
1287 stfsbuf.f_blocks = totalspace/bsize;
1288 stfsbuf.f_bfree = freespace/bsize;
1289 stfsbuf.f_bavail = availspace/bsize;
1290 #endif
1291 stfsbuf.f_files = 1000000000+PKGVERSION+inodes;
1292 stfsbuf.f_ffree = 1000000000+PKGVERSION;
1293 stfsbuf.f_favail = 1000000000+PKGVERSION;
1294 //stfsbuf.f_flag = ST_RDONLY;
1295 #if FUSE_USE_VERSION >= 26
1296 oplog_printf(&ctx,"statfs (%lu): OK (%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu32")",(unsigned long int)ino,totalspace,availspace,freespace,trashspace,sustainedspace,inodes);
1297 #else
1298 oplog_printf(&ctx,"statfs (): OK (%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu32")",totalspace,availspace,freespace,trashspace,sustainedspace,inodes);
1299 #endif
1300 fuse_reply_statfs(req,&stfsbuf);
1301 }
1302
1303 /*
1304 static int mfs_node_access(uint8_t attr[32],uint32_t uid,uint32_t gid,int mask) {
1305 uint32_t emode,mmode;
1306 uint32_t attruid,attrgid;
1307 uint16_t attrmode;
1308 uint8_t *ptr;
1309 if (uid == 0) {
1310 return 1;
1311 }
1312 ptr = attr+2;
1313 attrmode = get16bit(&ptr);
1314 attruid = get32bit(&ptr);
1315 attrgid = get32bit(&ptr);
1316 if (uid == attruid) {
1317 emode = (attrmode & 0700) >> 6;
1318 } else if (gid == attrgid) {
1319 emode = (attrmode & 0070) >> 3;
1320 } else {
1321 emode = attrmode & 0007;
1322 }
1323 mmode = 0;
1324 if (mask & R_OK) {
1325 mmode |= 4;
1326 }
1327 if (mask & W_OK) {
1328 mmode |= 2;
1329 }
1330 if (mask & X_OK) {
1331 mmode |= 1;
1332 }
1333 if ((emode & mmode) == mmode) {
1334 return 1;
1335 }
1336 return 0;
1337 }
1338 */
1339
1340 // simple access test for deleted cwd nodes - no ACL's
1341 int mfs_access_test(const uint8_t attr[ATTR_RECORD_SIZE],int mmode,uint32_t uid,uint32_t gidcnt,uint32_t *gidtab) {
1342 uint8_t modebits,gok;
1343 uint16_t attrmode;
1344 uint32_t attruid,attrgid;
1345 const uint8_t *ptr;
1346
1347 if (uid==0) {
1348 return 0;
1349 }
1350 ptr = attr+1;
1351 attrmode = get16bit(&ptr);
1352 attruid = get32bit(&ptr);
1353 attrgid = get32bit(&ptr);
1354
1355 modebits = 0; // makes cppcheck happy
1356 if (uid == attruid) {
1357 modebits = (attrmode >> 6) & 7;
1358 } else {
1359 gok = 0;
1360 while (gidcnt>0) {
1361 gidcnt--;
1362 if (gidtab[gidcnt] == attrgid) {
1363 modebits = (attrmode >> 3) & 7;
1364 gok = 1;
1365 break;
1366 }
1367 }
1368 if (gok==0) {
1369 modebits = attrmode & 7;
1370 }
1371 }
1372 if ((mmode & modebits) == mmode) {
1373 return 0;
1374 }
1375 return EACCES;
1376 }
1377
1378 void mfs_access(fuse_req_t req, fuse_ino_t ino, int mask) {
1379 int status;
1380 struct fuse_ctx ctx;
1381 uint8_t attr[ATTR_RECORD_SIZE];
1382 groups *gids;
1383 int mmode;
1384 uint16_t lflags;
1385 int force_mode;
1386
1387 ctx = *(fuse_req_ctx(req));
1388 if (debug_mode) {
1389 oplog_printf(&ctx,"access (%lu,0x%X) ...",(unsigned long int)ino,mask);
1390 fprintf(stderr,"access (%lu,0x%X)\n",(unsigned long int)ino,mask);
1391 }
1392 mfs_stats_inc(OP_ACCESS);
1393 #ifdef FREEBSD_XONLY_ACCESS
1394 if (mask & X_OK) { // have X?
1395 if (mask & (R_OK|W_OK)) {
1396 oplog_printf(&ctx,"access (%lu,0x%X): (change mask to X_OK only - kernel bug workaround)",(unsigned long int)ino,mask);
1397 mask = X_OK; // check X only
1398 }
1399 } else {
1400 oplog_printf(&ctx,"access (%lu,0x%X): OK (forced - kernel bug workaround)",(unsigned long int)ino,mask);
1401 fuse_reply_err(req,0);
1402 return;
1403 }
1404 #endif
1405 #if (R_OK==MODE_MASK_R) && (W_OK==MODE_MASK_W) && (X_OK==MODE_MASK_X)
1406 mmode = mask;
1407 #else
1408 mmode = 0;
1409 if (mask & R_OK) {
1410 mmode |= MODE_MASK_R;
1411 }
1412 if (mask & W_OK) {
1413 mmode |= MODE_MASK_W;
1414 }
1415 if (mask & X_OK) {
1416 mmode |= MODE_MASK_X;
1417 }
1418 #endif
1419 if (IS_SPECIAL_INODE(ino)) {
1420 if (mask & (W_OK | X_OK)) {
1421 fuse_reply_err(req,EACCES);
1422 } else {
1423 fuse_reply_err(req,0);
1424 }
1425 return;
1426 }
1427
1428 if (fdcache_find(&ctx,ino,NULL,&lflags)) {
1429 if ((lflags & LOOKUP_RO_FILESYSTEM) && (mmode & MODE_MASK_W)) {
1430 status = MFS_ERROR_EROFS;
1431 } else if ((lflags & LOOKUP_IMMUTABLE) && (mmode & MODE_MASK_W)) {
1432 status = MFS_ERROR_EPERM;
1433 } else {
1434 status = (lflags & (1<<(mmode&0x7)))?MFS_STATUS_OK:MFS_ERROR_EACCES;
1435 }
1436 } else {
1437 if (full_permissions) {
1438 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
1439 status = fs_access(ino,ctx.uid,gids->gidcnt,gids->gidtab,mmode);
1440 groups_rel(gids);
1441 } else {
1442 uint32_t gidtmp = ctx.gid;
1443 status = fs_access(ino,ctx.uid,1,&gidtmp,mmode);
1444 }
1445 }
1446 force_mode = 0;
1447 if (status==MFS_ERROR_ENOENT) {
1448 if (ctx.pid == getpid()) {
1449 force_mode = 1;
1450 }
1451 if (sstats_get(ino,attr,force_mode)==MFS_STATUS_OK) {
1452 if (force_mode==0) {
1453 force_mode = 2;
1454 }
1455 }
1456 }
1457 if (force_mode) {
1458 if (force_mode == 1) {
1459 if (debug_mode) {
1460 fprintf(stderr,"special case: internal access (%lu,0x%X) - positive answer\n",(unsigned long int)ino,mask);
1461 }
1462 oplog_printf(&ctx,"special case: internal access (%lu,0x%X): OK",(unsigned long int)ino,mask);
1463 status = 0;
1464 } else {
1465 if (debug_mode) {
1466 fprintf(stderr,"special case: sustained access (%lu,0x%X) - using stored data\n",(unsigned long int)ino,mask);
1467 }
1468 if (full_permissions) {
1469 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
1470 status = mfs_access_test(attr,mmode,ctx.uid,gids->gidcnt,gids->gidtab);
1471 groups_rel(gids);
1472 } else {
1473 uint32_t gidtmp = ctx.gid;
1474 status = mfs_access_test(attr,mmode,ctx.uid,1,&gidtmp);
1475 }
1476 }
1477 } else {
1478 status = mfs_errorconv(status);
1479 if (status!=0) {
1480 oplog_printf(&ctx,"access (%lu,0x%X): %s",(unsigned long int)ino,mask,strerr(status));
1481 } else {
1482 oplog_printf(&ctx,"access (%lu,0x%X): OK",(unsigned long int)ino,mask);
1483 }
1484 }
1485 fuse_reply_err(req,status);
1486 }
1487
1488 void mfs_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) {
1489 struct fuse_entry_param e;
1490 uint64_t maxfleng;
1491 uint32_t inode;
1492 uint32_t nleng;
1493 uint8_t attr[ATTR_RECORD_SIZE];
1494 uint8_t csdataver;
1495 uint16_t lflags;
1496 uint64_t chunkid;
1497 uint32_t version;
1498 const uint8_t *csdata;
1499 uint32_t csdatasize;
1500 char attrstr[256];
1501 uint8_t mattr,type;
1502 uint8_t icacheflag;
1503 int status,nocache;
1504 struct fuse_ctx ctx;
1505 groups *gids;
1506
1507 ctx = *(fuse_req_ctx(req));
1508 if (debug_mode) {
1509 oplog_printf(&ctx,"lookup (%lu,%s) ...",(unsigned long int)parent,name);
1510 fprintf(stderr,"lookup (%lu,%s)\n",(unsigned long int)parent,name);
1511 }
1512 nleng = strlen(name);
1513 if (nleng>MFS_NAME_MAX) {
1514 mfs_stats_inc(OP_ERRLOOKUP);
1515 oplog_printf(&ctx,"lookup (%lu,%s): %s",(unsigned long int)parent,name,strerr(ENAMETOOLONG));
1516 fuse_reply_err(req, ENAMETOOLONG);
1517 return;
1518 }
1519 if (parent==FUSE_ROOT_ID) {
1520 if (nleng==2 && name[0]=='.' && name[1]=='.') {
1521 nleng=1;
1522 }
1523 if (strcmp(name,MASTERINFO_NAME)==0) {
1524 memset(&e, 0, sizeof(e));
1525 e.ino = MASTERINFO_INODE;
1526 e.generation = 1;
1527 e.attr_timeout = 3600.0;
1528 e.entry_timeout = 3600.0;
1529 mfs_attr_to_stat(MASTERINFO_INODE,masterinfoattr,&e.attr);
1530 mfs_stats_inc(OP_LOOKUP_INTERNAL);
1531 mfs_makeattrstr(attrstr,256,&e.attr);
1532 oplog_printf(&ctx,"lookup (%lu,%s) (internal node: MASTERINFO): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
1533 fuse_reply_entry(req, &e);
1534 return ;
1535 }
1536 if (strcmp(name,STATS_NAME)==0) {
1537 memset(&e, 0, sizeof(e));
1538 e.ino = STATS_INODE;
1539 e.generation = 1;
1540 e.attr_timeout = 3600.0;
1541 e.entry_timeout = 3600.0;
1542 mfs_attr_to_stat(STATS_INODE,statsattr,&e.attr);
1543 mfs_stats_inc(OP_LOOKUP_INTERNAL);
1544 mfs_makeattrstr(attrstr,256,&e.attr);
1545 oplog_printf(&ctx,"lookup (%lu,%s) (internal node: STATS): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
1546 fuse_reply_entry(req, &e);
1547 return ;
1548 }
1549 if (strcmp(name,PARAMS_NAME)==0) {
1550 memset(&e, 0, sizeof(e));
1551 e.ino = PARAMS_INODE;
1552 e.generation = 1;
1553 e.attr_timeout = 3600.0;
1554 e.entry_timeout = 3600.0;
1555 mfs_attr_to_stat(PARAMS_INODE,paramsattr,&e.attr);
1556 mfs_stats_inc(OP_LOOKUP_INTERNAL);
1557 mfs_makeattrstr(attrstr,256,&e.attr);
1558 oplog_printf(&ctx,"lookup (%lu,%s) (internal node: PARAMS): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
1559 fuse_reply_entry(req, &e);
1560 return ;
1561 }
1562 if (strcmp(name,RANDOM_NAME)==0) {
1563 memset(&e, 0, sizeof(e));
1564 e.ino = RANDOM_INODE;
1565 e.generation = 1;
1566 e.attr_timeout = 3600.0;
1567 e.entry_timeout = 3600.0;
1568 mfs_attr_to_stat(RANDOM_INODE,randomattr,&e.attr);
1569 mfs_stats_inc(OP_LOOKUP_INTERNAL);
1570 mfs_makeattrstr(attrstr,256,&e.attr);
1571 oplog_printf(&ctx,"lookup (%lu,%s) (internal node: RANDOM): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
1572 fuse_reply_entry(req, &e);
1573 return ;
1574 }
1575 if (strcmp(name,MOOSE_NAME)==0) {
1576 memset(&e, 0, sizeof(e));
1577 e.ino = MOOSE_INODE;
1578 e.generation = 1;
1579 e.attr_timeout = 3600.0;
1580 e.entry_timeout = 3600.0;
1581 mfs_attr_to_stat(MOOSE_INODE,mooseattr,&e.attr);
1582 mfs_stats_inc(OP_LOOKUP_INTERNAL);
1583 mfs_makeattrstr(attrstr,256,&e.attr);
1584 oplog_printf(&ctx,"lookup (%lu,%s) (internal node: MOOSE): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
1585 fuse_reply_entry(req, &e);
1586 return ;
1587 }
1588 if (strcmp(name,OPLOG_NAME)==0) {
1589 memset(&e, 0, sizeof(e));
1590 e.ino = OPLOG_INODE;
1591 e.generation = 1;
1592 e.attr_timeout = 3600.0;
1593 e.entry_timeout = 3600.0;
1594 mfs_attr_to_stat(OPLOG_INODE,oplogattr,&e.attr);
1595 mfs_stats_inc(OP_LOOKUP_INTERNAL);
1596 mfs_makeattrstr(attrstr,256,&e.attr);
1597 oplog_printf(&ctx,"lookup (%lu,%s) (internal node: OPLOG): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
1598 fuse_reply_entry(req, &e);
1599 return ;
1600 }
1601 if (strcmp(name,OPHISTORY_NAME)==0) {
1602 memset(&e, 0, sizeof(e));
1603 e.ino = OPHISTORY_INODE;
1604 e.generation = 1;
1605 e.attr_timeout = 3600.0;
1606 e.entry_timeout = 3600.0;
1607 mfs_attr_to_stat(OPHISTORY_INODE,oplogattr,&e.attr);
1608 mfs_stats_inc(OP_LOOKUP_INTERNAL);
1609 mfs_makeattrstr(attrstr,256,&e.attr);
1610 oplog_printf(&ctx,"lookup (%lu,%s) (internal node: OPHISTORY): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
1611 fuse_reply_entry(req, &e);
1612 return ;
1613 }
1614 /*
1615 if (strcmp(name,ATTRCACHE_NAME)==0) {
1616 memset(&e, 0, sizeof(e));
1617 e.ino = ATTRCACHE_INODE;
1618 e.generation = 1;
1619 e.attr_timeout = 3600.0;
1620 e.entry_timeout = 3600.0;
1621 mfs_attr_to_stat(ATTRCACHE_INODE,attrcacheattr,&e.attr);
1622 mfs_stats_inc(OP_LOOKUP_INTERNAL);
1623 oplog_printf(&ctx,"lookup (%lu,%s) (internal node: ATTRCACHE)",(unsigned long int)parent,name);
1624 fuse_reply_entry(req, &e);
1625 return ;
1626 }
1627 */
1628 }
1629 /*
1630 if (newdircache) {
1631 const uint8_t *dbuff;
1632 uint32_t dsize;
1633 switch (dir_cache_lookup(parent,nleng,(const uint8_t*)name,&inode,attr)) {
1634 case -1:
1635 mfs_stats_inc(OP_DIRCACHE_LOOKUP_NEGATIVE);
1636 oplog_printf(&ctx,"lookup (%lu,%s) (cached answer: %s)",(unsigned long int)parent,name,strerr(ENOENT));
1637 fuse_reply_err(req,ENOENT);
1638 return;
1639 case 1:
1640 mfs_stats_inc(OP_DIRCACHE_LOOKUP_POSITIVE);
1641 status = 0;
1642 oplog_printf(&ctx,"lookup (%lu,%s) (cached answer: %lu)",(unsigned long int)parent,name,(unsigned long int)inode);
1643 break;
1644 case -2:
1645 mfs_stats_inc(OP_LOOKUP);
1646 status = fs_lookup(parent,nleng,(const uint8_t*)name,ctx.uid,ctx.gid,&inode,attr);
1647 status = mfs_errorconv(status);
1648 if (status!=0) {
1649 oplog_printf(&ctx,"lookup (%lu,%s) (lookup forced by cache: %s)",(unsigned long int)parent,name,strerr(status));
1650 } else {
1651 oplog_printf(&ctx,"lookup (%lu,%s) (lookup forced by cache: %lu)",(unsigned long int)parent,name,(unsigned long int)inode);
1652 }
1653 break;
1654 case -3:
1655 mfs_stats_inc(OP_DIRCACHE_LOOKUP_NOATTR);
1656 status = fs_getattr(inode,ctx.uid,ctx.gid,attr);
1657 status = mfs_errorconv(status);
1658 if (status!=0) {
1659 oplog_printf(&ctx,"lookup (%lu,%s) (getattr forced by cache: %s)",(unsigned long int)parent,name,strerr(status));
1660 } else {
1661 oplog_printf(&ctx,"lookup (%lu,%s) (getattr forced by cache: %lu)",(unsigned long int)parent,name,(unsigned long int)inode);
1662 }
1663 break;
1664 default:
1665 status = fs_getdir_plus(parent,ctx.uid,ctx.gid,1,&dbuff,&dsize);
1666 status = mfs_errorconv(status);
1667 if (status!=0) {
1668 oplog_printf(&ctx,"lookup (%lu,%s) (readdir: %s)",(unsigned long int)parent,name,strerr(status));
1669 fuse_reply_err(req, status);
1670 return;
1671 }
1672 mfs_stats_inc(OP_GETDIR_FULL);
1673 dir_cache_newdirdata(parent,dsize,dbuff);
1674 switch (dir_cache_lookup(parent,nleng,(const uint8_t*)name,&inode,attr)) {
1675 case -1:
1676 mfs_stats_inc(OP_DIRCACHE_LOOKUP_NEGATIVE);
1677 oplog_printf(&ctx,"lookup (%lu,%s) (after readdir cached answer: %s)",(unsigned long int)parent,name,strerr(ENOENT));
1678 fuse_reply_err(req,ENOENT);
1679 return;
1680 case 1:
1681 mfs_stats_inc(OP_DIRCACHE_LOOKUP_POSITIVE);
1682 oplog_printf(&ctx,"lookup (%lu,%s) (after readdir cached answer: %lu)",(unsigned long int)parent,name,(unsigned long int)inode);
1683 break;
1684 default:
1685 mfs_stats_inc(OP_LOOKUP);
1686 status = fs_lookup(parent,nleng,(const uint8_t*)name,ctx.uid,ctx.gid,&inode,attr);
1687 status = mfs_errorconv(status);
1688 if (status!=0) {
1689 oplog_printf(&ctx,"lookup (%lu,%s) (after readdir lookup forced by cache: %s)",(unsigned long int)parent,name,strerr(status));
1690 } else {
1691 oplog_printf(&ctx,"lookup (%lu,%s) (after readdir lookup forced by cache: %lu)",(unsigned long int)parent,name,(unsigned long int)inode);
1692 }
1693 }
1694 }
1695 } else
1696 */
1697 if (usedircache && dcache_lookup(&ctx,parent,nleng,(const uint8_t*)name,&inode,attr)) {
1698 if (debug_mode) {
1699 fprintf(stderr,"lookup: sending data from dircache\n");
1700 }
1701 mfs_stats_inc(OP_DIRCACHE_LOOKUP);
1702 status = 0;
1703 lflags = 0xFFFF;
1704 icacheflag = 1;
1705 // oplog_printf(&ctx,"lookup (%lu,%s) (using open dir cache): OK (%lu)",(unsigned long int)parent,name,(unsigned long int)inode);
1706 } else {
1707 if (negentry_cache_search(parent,nleng,(const uint8_t*)name)) {
1708 if (debug_mode) {
1709 fprintf(stderr,"lookup: sending data from negcache\n");
1710 }
1711 oplog_printf(&ctx,"lookup (%lu,%s) (using negative entry cache): %s",(unsigned long int)parent,name,strerr(ENOENT));
1712 mfs_stats_inc(OP_NEGCACHE_LOOKUP);
1713 fuse_reply_err(req,ENOENT);
1714 return;
1715 }
1716 if (full_permissions) {
1717 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
1718 status = fs_lookup(parent,nleng,(const uint8_t*)name,ctx.uid,gids->gidcnt,gids->gidtab,&inode,attr,&lflags,&csdataver,&chunkid,&version,&csdata,&csdatasize);
1719 groups_rel(gids);
1720 } else {
1721 uint32_t gidtmp = ctx.gid;
1722 status = fs_lookup(parent,nleng,(const uint8_t*)name,ctx.uid,1,&gidtmp,&inode,attr,&lflags,&csdataver,&chunkid,&version,&csdata,&csdatasize);
1723 }
1724 if (status==MFS_ERROR_ENOENT_NOCACHE) {
1725 status = MFS_ERROR_ENOENT;
1726 nocache = 1;
1727 } else {
1728 nocache = 0;
1729 }
1730 status = mfs_errorconv(status);
1731 icacheflag = 0;
1732 if (status==0) {
1733 mfs_stats_inc(OP_POSLOOKUP);
1734 } else if (status==ENOENT) {
1735 if (strcmp(name,MASTERINFO_NAME)==0) {
1736 memset(&e, 0, sizeof(e));
1737 e.ino = MASTERINFO_INODE;
1738 e.generation = 1;
1739 e.attr_timeout = 3600.0;
1740 e.entry_timeout = 3600.0;
1741 mfs_attr_to_stat(MASTERINFO_INODE,masterinfoattr,&e.attr);
1742 mfs_stats_inc(OP_LOOKUP_INTERNAL);
1743 mfs_makeattrstr(attrstr,256,&e.attr);
1744 oplog_printf(&ctx,"lookup (%lu,%s) (internal node: MASTERINFO): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
1745 fuse_reply_entry(req, &e);
1746 return ;
1747 } else {
1748 mfs_stats_inc(OP_NEGLOOKUP);
1749 if (nocache==0) {
1750 negentry_cache_insert(parent,nleng,(const uint8_t*)name);
1751 }
1752 }
1753 } else {
1754 mfs_stats_inc(OP_ERRLOOKUP);
1755 }
1756 }
1757 if (status!=0) {
1758 oplog_printf(&ctx,"lookup (%lu,%s): %s",(unsigned long int)parent,name,strerr(status));
1759 fuse_reply_err(req, status);
1760 return;
1761 }
1762 type = mfs_attr_get_type(attr);
1763 if (type==TYPE_FILE) {
1764 maxfleng = write_data_inode_getmaxfleng(inode);
1765 } else {
1766 maxfleng = 0;
1767 }
1768 if (type==TYPE_DIRECTORY) {
1769 sstats_set(inode,attr,1);
1770 sparents_add(inode,parent,direntry_cache_timeout+60);
1771 }
1772 memset(&e, 0, sizeof(e));
1773 e.ino = inode;
1774 e.generation = 1;
1775 mattr = mfs_attr_get_mattr(attr);
1776 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1777 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:((type==TYPE_DIRECTORY)?direntry_cache_timeout:entry_cache_timeout);
1778 #ifdef DENTRY_INVALIDATOR
1779 if (dinval && (mattr&MATTR_UNDELETABLE)==0 && type==TYPE_DIRECTORY) {
1780 dinval_add(parent,nleng,(const uint8_t *)name,inode);
1781 }
1782 #endif
1783 mfs_attr_to_stat(inode,attr,&e.attr);
1784 if (maxfleng>(uint64_t)(e.attr.st_size)) {
1785 e.attr.st_size=maxfleng;
1786 mfs_attr_set_fleng(attr,maxfleng);
1787 }
1788 if (lflags!=0xFFFF) { // store extra data in cache
1789 fdcache_insert(&ctx,inode,attr,lflags,csdataver,chunkid,version,csdata,csdatasize);
1790 }
1791 if (type==TYPE_FILE) {
1792 read_inode_set_length_passive(inode,e.attr.st_size);
1793 finfo_change_fleng(inode,e.attr.st_size);
1794 }
1795 fs_fix_amtime(inode,&(e.attr.st_atime),&(e.attr.st_mtime));
1796 // if (type==TYPE_FILE && debug_mode) {
1797 // fprintf(stderr,"lookup inode %lu - file size: %llu\n",(unsigned long int)inode,(unsigned long long int)e.attr.st_size);
1798 // }
1799 mfs_makeattrstr(attrstr,256,&e.attr);
1800 oplog_printf(&ctx,"lookup (%lu,%s)%s: OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,icacheflag?" (using open dir cache)":"",e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
1801 fuse_reply_entry(req, &e);
1802 if (debug_mode) {
1803 fprintf(stderr,"lookup: positive answer timeouts (attr:%.3lf,entry:%.3lf)\n",e.attr_timeout,e.entry_timeout);
1804 }
1805 }
1806
1807 void mfs_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) {
1808 uint64_t maxfleng;
1809 double attr_timeout;
1810 struct stat o_stbuf;
1811 uint8_t attr[ATTR_RECORD_SIZE];
1812 uint8_t type;
1813 char attrstr[256];
1814 int status;
1815 uint8_t icacheflag;
1816 struct fuse_ctx ctx;
1817 int force_mode;
1818 (void)fi;
1819
1820 ctx = *(fuse_req_ctx(req));
1821 // mfs_stats_inc(OP_GETATTR);
1822 if (debug_mode) {
1823 if (fi!=NULL) {
1824 oplog_printf(&ctx,"getattr (%lu) [handle:%08"PRIX32"] ...",(unsigned long int)ino,(uint32_t)(fi->fh));
1825 } else {
1826 oplog_printf(&ctx,"getattr (%lu) ...",(unsigned long int)ino);
1827 }
1828 fprintf(stderr,"getattr (%lu)\n",(unsigned long int)ino);
1829 }
1830 // if (ino==MASTER_INODE) {
1831 // memset(&o_stbuf, 0, sizeof(struct stat));
1832 // mfs_attr_to_stat(ino,masterattr,&o_stbuf);
1833 // fuse_reply_attr(req, &o_stbuf, 3600.0);
1834 // mfs_stats_inc(OP_GETATTR);
1835 // return;
1836 // }
1837 if (ino==MASTERINFO_INODE) {
1838 memset(&o_stbuf, 0, sizeof(struct stat));
1839 mfs_attr_to_stat(ino,masterinfoattr,&o_stbuf);
1840 mfs_stats_inc(OP_GETATTR);
1841 mfs_makeattrstr(attrstr,256,&o_stbuf);
1842 oplog_printf(&ctx,"getattr (%lu) (internal node: MASTERINFO): OK (3600,%s)",(unsigned long int)ino,attrstr);
1843 fuse_reply_attr(req, &o_stbuf, 3600.0);
1844 return;
1845 }
1846 if (ino==STATS_INODE) {
1847 memset(&o_stbuf, 0, sizeof(struct stat));
1848 mfs_attr_to_stat(ino,statsattr,&o_stbuf);
1849 mfs_stats_inc(OP_GETATTR);
1850 mfs_makeattrstr(attrstr,256,&o_stbuf);
1851 oplog_printf(&ctx,"getattr (%lu) (internal node: STATS): OK (3600,%s)",(unsigned long int)ino,attrstr);
1852 fuse_reply_attr(req, &o_stbuf, 3600.0);
1853 return;
1854 }
1855 if (ino==PARAMS_INODE) {
1856 memset(&o_stbuf, 0, sizeof(struct stat));
1857 mfs_attr_to_stat(ino,paramsattr,&o_stbuf);
1858 mfs_stats_inc(OP_GETATTR);
1859 mfs_makeattrstr(attrstr,256,&o_stbuf);
1860 oplog_printf(&ctx,"getattr (%lu) (internal node: PARAMS): OK (3600,%s)",(unsigned long int)ino,attrstr);
1861 fuse_reply_attr(req, &o_stbuf, 3600.0);
1862 return;
1863 }
1864 if (ino==RANDOM_INODE) {
1865 memset(&o_stbuf, 0, sizeof(struct stat));
1866 mfs_attr_to_stat(ino,randomattr,&o_stbuf);
1867 mfs_stats_inc(OP_GETATTR);
1868 mfs_makeattrstr(attrstr,256,&o_stbuf);
1869 oplog_printf(&ctx,"getattr (%lu) (internal node: RANDOM): OK (3600,%s)",(unsigned long int)ino,attrstr);
1870 fuse_reply_attr(req, &o_stbuf, 3600.0);
1871 return;
1872 }
1873 if (ino==MOOSE_INODE) {
1874 memset(&o_stbuf, 0, sizeof(struct stat));
1875 mfs_attr_to_stat(ino,mooseattr,&o_stbuf);
1876 mfs_stats_inc(OP_GETATTR);
1877 mfs_makeattrstr(attrstr,256,&o_stbuf);
1878 oplog_printf(&ctx,"getattr (%lu) (internal node: MOOSE): OK (3600,%s)",(unsigned long int)ino,attrstr);
1879 fuse_reply_attr(req, &o_stbuf, 3600.0);
1880 return;
1881 }
1882 if (ino==OPLOG_INODE || ino==OPHISTORY_INODE) {
1883 memset(&o_stbuf, 0, sizeof(struct stat));
1884 mfs_attr_to_stat(ino,oplogattr,&o_stbuf);
1885 // if (fi && fi->fh) {
1886 // uint64_t *posptr = (uint64_t*)(unsigned long)(fi->fh);
1887 // o_stbuf.st_size = (*posptr)+oplog_getpos();
1888 // }
1889 mfs_stats_inc(OP_GETATTR);
1890 mfs_makeattrstr(attrstr,256,&o_stbuf);
1891 oplog_printf(&ctx,"getattr (%lu) (internal node: %s): OK (3600,%s)",(unsigned long int)ino,(ino==OPLOG_INODE)?"OPLOG":"OPHISTORY",attrstr);
1892 fuse_reply_attr(req, &o_stbuf, 3600.0);
1893 return;
1894 }
1895 force_mode = 0;
1896 if (usedircache && dcache_getattr(&ctx,ino,attr)) {
1897 if (debug_mode) {
1898 fprintf(stderr,"getattr: sending data from dircache\n");
1899 }
1900 mfs_stats_inc(OP_DIRCACHE_GETATTR);
1901 status = 0;
1902 icacheflag = 1;
1903 } else {
1904 mfs_stats_inc(OP_GETATTR);
1905 if (fdcache_find(&ctx,ino,attr,NULL)) {
1906 if (debug_mode) {
1907 fprintf(stderr,"getattr: sending data from fdcache\n");
1908 }
1909 status = MFS_STATUS_OK;
1910 } else {
1911 status = fs_getattr(ino,(fi!=NULL || fs_isopen(ino))?1:0,ctx.uid,ctx.gid,attr);
1912 }
1913 if (status==MFS_ERROR_ENOENT) {
1914 if (ctx.pid==getpid()) {
1915 force_mode = 1;
1916 }
1917 status = sstats_get(ino,attr,force_mode);
1918 if (status==MFS_STATUS_OK && force_mode==0) {
1919 force_mode = 2;
1920 }
1921 }
1922 status = mfs_errorconv(status);
1923 icacheflag = 0;
1924 }
1925 if (status!=0) {
1926 if (fi!=NULL) {
1927 oplog_printf(&ctx,"getattr (%lu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(uint32_t)(fi->fh),strerr(status));
1928 } else {
1929 oplog_printf(&ctx,"getattr (%lu) [no handle]: %s",(unsigned long int)ino,strerr(status));
1930 }
1931 fuse_reply_err(req, status);
1932 return;
1933 }
1934 type = mfs_attr_get_type(attr);
1935 if (type==TYPE_FILE) {
1936 maxfleng = write_data_inode_getmaxfleng(ino);
1937 } else {
1938 maxfleng = 0;
1939 }
1940 if (type==TYPE_DIRECTORY && force_mode==0) {
1941 sstats_set(ino,attr,1);
1942 }
1943 memset(&o_stbuf, 0, sizeof(struct stat));
1944 mfs_attr_to_stat(ino,attr,&o_stbuf);
1945 if (maxfleng>(uint64_t)(o_stbuf.st_size)) {
1946 o_stbuf.st_size=maxfleng;
1947 mfs_attr_set_fleng(attr,maxfleng);
1948 }
1949 if (type==TYPE_FILE) {
1950 read_inode_set_length_passive(ino,o_stbuf.st_size);
1951 finfo_change_fleng(ino,o_stbuf.st_size);
1952 fdcache_invalidate(ino);
1953 }
1954 fs_fix_amtime(ino,&(o_stbuf.st_atime),&(o_stbuf.st_mtime));
1955 attr_timeout = ((mfs_attr_get_mattr(attr)&MATTR_NOACACHE) || force_mode)?0.0:attr_cache_timeout;
1956 mfs_makeattrstr(attrstr,256,&o_stbuf);
1957 if (fi!=NULL) {
1958 oplog_printf(&ctx,"getattr (%lu) [handle:%08"PRIX32"]%s: OK (%.1lf,%s)",(unsigned long int)ino,(uint32_t)(fi->fh),icacheflag?" (using open dir cache)":(force_mode==1)?" (internal getattr)":(force_mode==2)?" (sustained nodes)":"",attr_timeout,attrstr);
1959 } else {
1960 oplog_printf(&ctx,"getattr (%lu) [no handle]%s: OK (%.1lf,%s)",(unsigned long int)ino,icacheflag?" (using open dir cache)":(force_mode==1)?" (internal getattr)":(force_mode==2)?" (sustained nodes)":"",attr_timeout,attrstr);
1961 }
1962 fuse_reply_attr(req, &o_stbuf, attr_timeout);
1963 }
1964
1965 void mfs_make_setattr_str(char *strbuff,uint32_t strsize,struct stat *stbuf,int to_set) {
1966 uint32_t strleng = 0;
1967 char modestr[11];
1968 if (strleng<strsize && (to_set&FUSE_SET_ATTR_MODE)) {
1969 mfs_makemodestr(modestr,stbuf->st_mode);
1970 strleng+=snprintf(strbuff+strleng,strsize-strleng,"mode=%s:0%04o;",modestr+1,(unsigned int)(stbuf->st_mode & 07777));
1971 }
1972 if (strleng<strsize && (to_set&FUSE_SET_ATTR_UID)) {
1973 strleng+=snprintf(strbuff+strleng,strsize-strleng,"uid=%ld;",(long int)(stbuf->st_uid));
1974 }
1975 if (strleng<strsize && (to_set&FUSE_SET_ATTR_GID)) {
1976 strleng+=snprintf(strbuff+strleng,strsize-strleng,"gid=%ld;",(long int)(stbuf->st_gid));
1977 }
1978 #if defined(FUSE_SET_ATTR_ATIME_NOW)
1979 if (strleng<strsize && ((to_set & FUSE_SET_ATTR_ATIME_NOW) || ((to_set & FUSE_SET_ATTR_ATIME) && stbuf->st_atime<0))) {
1980 #else
1981 if (strleng<strsize && ((to_set & FUSE_SET_ATTR_ATIME) && stbuf->st_atime<0)) {
1982 #endif
1983 strleng+=snprintf(strbuff+strleng,strsize-strleng,"atime=NOW;");
1984 } else if (strleng<strsize && (to_set&FUSE_SET_ATTR_ATIME)) {
1985 strleng+=snprintf(strbuff+strleng,strsize-strleng,"atime=%lu;",(unsigned long int)(stbuf->st_atime));
1986 }
1987 #if defined(FUSE_SET_ATTR_MTIME_NOW)
1988 if (strleng<strsize && ((to_set & FUSE_SET_ATTR_MTIME_NOW) || ((to_set & FUSE_SET_ATTR_MTIME) && stbuf->st_mtime<0))) {
1989 #else
1990 if (strleng<strsize && ((to_set & FUSE_SET_ATTR_MTIME) && stbuf->st_mtime<0)) {
1991 #endif
1992 strleng+=snprintf(strbuff+strleng,strsize-strleng,"mtime=NOW;");
1993 } else if (strleng<strsize && (to_set&FUSE_SET_ATTR_MTIME)) {
1994 strleng+=snprintf(strbuff+strleng,strsize-strleng,"mtime=%lu;",(unsigned long int)(stbuf->st_mtime));
1995 }
1996 if (strleng<strsize && (to_set&FUSE_SET_ATTR_SIZE)) {
1997 strleng+=snprintf(strbuff+strleng,strsize-strleng,"size=%llu;",(unsigned long long int)(stbuf->st_size));
1998 }
1999 if (strleng>0) {
2000 strleng--;
2001 }
2002 strbuff[strleng]='\0';
2003 }
2004
2005 void mfs_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *stbuf, int to_set, struct fuse_file_info *fi) {
2006 struct stat o_stbuf;
2007 uint64_t maxfleng;
2008 uint8_t attr[ATTR_RECORD_SIZE];
2009 char setattr_str[150];
2010 char attrstr[256];
2011 double attr_timeout;
2012 int status;
2013 struct fuse_ctx ctx;
2014 groups *gids;
2015 uint8_t setmask = 0;
2016
2017 ctx = *(fuse_req_ctx(req));
2018 mfs_make_setattr_str(setattr_str,150,stbuf,to_set);
2019 mfs_stats_inc(OP_SETATTR);
2020 if (debug_mode) {
2021 if (fi!=NULL) {
2022 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"] ...",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh));
2023 } else {
2024 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) ...",(unsigned long int)ino,to_set,setattr_str);
2025 }
2026 fprintf(stderr,"setattr (%lu,0x%X,[%s])\n",(unsigned long int)ino,to_set,setattr_str);
2027 }
2028 if (ino==MASTERINFO_INODE) {
2029 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]): %s",(unsigned long int)ino,to_set,setattr_str,strerr(EPERM));
2030 fuse_reply_err(req, EPERM);
2031 return;
2032 }
2033 if (ino==STATS_INODE) {
2034 memset(&o_stbuf, 0, sizeof(struct stat));
2035 mfs_attr_to_stat(ino,statsattr,&o_stbuf);
2036 mfs_makeattrstr(attrstr,256,&o_stbuf);
2037 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) (internal node: STATS): OK (3600,%s)",(unsigned long int)ino,to_set,setattr_str,attrstr);
2038 fuse_reply_attr(req, &o_stbuf, 3600.0);
2039 return;
2040 }
2041 if (ino==PARAMS_INODE) {
2042 memset(&o_stbuf, 0, sizeof(struct stat));
2043 mfs_attr_to_stat(ino,paramsattr,&o_stbuf);
2044 mfs_makeattrstr(attrstr,256,&o_stbuf);
2045 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) (internal node: PARAMS): OK (3600,%s)",(unsigned long int)ino,to_set,setattr_str,attrstr);
2046 fuse_reply_attr(req, &o_stbuf, 3600.0);
2047 return;
2048 }
2049 if (ino==RANDOM_INODE) {
2050 memset(&o_stbuf, 0, sizeof(struct stat));
2051 mfs_attr_to_stat(ino,randomattr,&o_stbuf);
2052 mfs_makeattrstr(attrstr,256,&o_stbuf);
2053 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) (internal node: RANDOM): OK (3600,%s)",(unsigned long int)ino,to_set,setattr_str,attrstr);
2054 fuse_reply_attr(req, &o_stbuf, 3600.0);
2055 return;
2056 }
2057 if (ino==MOOSE_INODE) {
2058 memset(&o_stbuf, 0, sizeof(struct stat));
2059 mfs_attr_to_stat(ino,mooseattr,&o_stbuf);
2060 mfs_makeattrstr(attrstr,256,&o_stbuf);
2061 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) (internal node: MOOSE): OK (3600,%s)",(unsigned long int)ino,to_set,setattr_str,attrstr);
2062 fuse_reply_attr(req, &o_stbuf, 3600.0);
2063 return;
2064 }
2065 if (ino==OPLOG_INODE || ino==OPHISTORY_INODE) {
2066 memset(&o_stbuf, 0, sizeof(struct stat));
2067 mfs_attr_to_stat(ino,oplogattr,&o_stbuf);
2068 // if (fi && fi->fh) {
2069 // uint64_t *posptr = (uint64_t*)(unsigned long)(fi->fh);
2070 // o_stbuf.st_size = (*posptr)+oplog_getpos();
2071 // }
2072 mfs_makeattrstr(attrstr,256,&o_stbuf);
2073 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) (internal node: %s): OK (3600,%s)",(unsigned long int)ino,to_set,setattr_str,(ino==OPLOG_INODE)?"OPLOG":"OPHISTORY",attrstr);
2074 fuse_reply_attr(req, &o_stbuf, 3600.0);
2075 return;
2076 }
2077 /*
2078 if (ino==ATTRCACHE_INODE) {
2079 memset(&o_stbuf, 0, sizeof(struct stat));
2080 mfs_attr_to_stat(ino,attrcacheattr,&o_stbuf);
2081 fuse_reply_attr(req, &o_stbuf, 3600.0);
2082 return;
2083 }
2084 */
2085 status = EINVAL;
2086 #if defined(FUSE_SET_ATTR_ATIME_NOW) && defined(FUSE_SET_ATTR_MTIME_NOW)
2087 if ((to_set & (FUSE_SET_ATTR_MODE|FUSE_SET_ATTR_UID|FUSE_SET_ATTR_GID|FUSE_SET_ATTR_ATIME|FUSE_SET_ATTR_MTIME|FUSE_SET_ATTR_SIZE|FUSE_SET_ATTR_ATIME_NOW|FUSE_SET_ATTR_MTIME_NOW)) == 0) {
2088 #else
2089 if ((to_set & (FUSE_SET_ATTR_MODE|FUSE_SET_ATTR_UID|FUSE_SET_ATTR_GID|FUSE_SET_ATTR_ATIME|FUSE_SET_ATTR_MTIME|FUSE_SET_ATTR_SIZE)) == 0) { // change other flags or change nothing
2090 #endif
2091 // status = fs_getattr(ino,ctx.uid,ctx.gid,attr);
2092 // ext3 compatibility - change ctime during this operation (usually chown(-1,-1))
2093 if (full_permissions) {
2094 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2095 status = fs_setattr(ino,(fi!=NULL || fs_isopen(ino))?1:0,ctx.uid,gids->gidcnt,gids->gidtab,0,0,0,0,0,0,0,0,attr);
2096 groups_rel(gids);
2097 } else {
2098 uint32_t gidtmp = ctx.gid;
2099 status = fs_setattr(ino,(fi!=NULL || fs_isopen(ino))?1:0,ctx.uid,1,&gidtmp,0,0,0,0,0,0,0,0,attr);
2100 }
2101 if (status==MFS_ERROR_ENOENT) {
2102 status = sstats_get(ino,attr,0);
2103 if (status==MFS_STATUS_OK) {
2104 mfs_attr_modify(to_set,attr,stbuf);
2105 }
2106 }
2107 if (status==MFS_STATUS_OK) {
2108 sstats_set(ino,attr,0);
2109 }
2110 status = mfs_errorconv(status);
2111 if (status!=0) {
2112 if (fi!=NULL) {
2113 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh),strerr(status));
2114 } else {
2115 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [no handle]: %s",(unsigned long int)ino,to_set,setattr_str,strerr(status));
2116 }
2117 fuse_reply_err(req, status);
2118 return;
2119 }
2120 }
2121 if (to_set & FUSE_SET_ATTR_SIZE) {
2122 if (stbuf->st_size<0) {
2123 if (fi!=NULL) {
2124 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh),strerr(EINVAL));
2125 } else {
2126 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [no handle]: %s",(unsigned long int)ino,to_set,setattr_str,strerr(EINVAL));
2127 }
2128 fuse_reply_err(req, EINVAL);
2129 return;
2130 }
2131 if (stbuf->st_size>=MAX_FILE_SIZE) {
2132 if (fi!=NULL) {
2133 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh),strerr(EFBIG));
2134 } else {
2135 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [no handle]: %s",(unsigned long int)ino,to_set,setattr_str,strerr(EFBIG));
2136 }
2137 fuse_reply_err(req, EFBIG);
2138 return;
2139 }
2140 if (fi!=NULL) {
2141 finfo *fileinfo;
2142 fileinfo = finfo_get(fi->fh);
2143 if (fi->fh==0 || fileinfo==NULL) {
2144 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh),strerr(EBADF));
2145 fuse_reply_err(req,EBADF);
2146 return;
2147 }
2148 zassert(pthread_mutex_lock(&(fileinfo->lock)));
2149 if (fileinfo->inode!=ino) {
2150 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
2151 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh),strerr(EBADF));
2152 fuse_reply_err(req,EBADF);
2153 return;
2154 }
2155 if (fileinfo->mode==IO_RO) {
2156 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
2157 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh),strerr(EACCES));
2158 fuse_reply_err(req,EACCES);
2159 return;
2160 }
2161 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
2162 }
2163 write_data_flush_inode(ino);
2164 if (full_permissions) {
2165 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2166 #if defined(__FreeBSD__)
2167 if (freebsd_workarounds) {
2168 status = do_truncate(ino,(fi!=NULL)?(TRUNCATE_FLAG_OPENED|TRUNCATE_FLAG_TIMEFIX):TRUNCATE_FLAG_TIMEFIX,ctx.uid,gids->gidcnt,gids->gidtab,stbuf->st_size,attr,NULL);
2169 } else {
2170 #endif
2171 status = do_truncate(ino,(fi!=NULL)?TRUNCATE_FLAG_OPENED:0,ctx.uid,gids->gidcnt,gids->gidtab,stbuf->st_size,attr,NULL);
2172 #if defined(__FreeBSD__)
2173 }
2174 #endif
2175 groups_rel(gids);
2176 } else {
2177 uint32_t gidtmp = ctx.gid;
2178 #if defined(__FreeBSD__)
2179 if (freebsd_workarounds) {
2180 status = do_truncate(ino,(fi!=NULL)?(TRUNCATE_FLAG_OPENED|TRUNCATE_FLAG_TIMEFIX):TRUNCATE_FLAG_TIMEFIX,ctx.uid,1,&gidtmp,stbuf->st_size,attr,NULL);
2181 } else {
2182 #endif
2183 status = do_truncate(ino,(fi!=NULL)?TRUNCATE_FLAG_OPENED:0,ctx.uid,1,&gidtmp,stbuf->st_size,attr,NULL);
2184 #if defined(__FreeBSD__)
2185 }
2186 #endif
2187 }
2188 status = mfs_errorconv(status);
2189 // read_inode_ops(ino);
2190 if (status!=0) {
2191 if (fi!=NULL) {
2192 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh),strerr(status));
2193 } else {
2194 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [no handle]: %s",(unsigned long int)ino,to_set,setattr_str,strerr(status));
2195 }
2196 fuse_reply_err(req, status);
2197 return;
2198 }
2199 chunksdatacache_clear_inode(ino,stbuf->st_size/MFSCHUNKSIZE);
2200 finfo_change_fleng(ino,stbuf->st_size);
2201 write_data_inode_setmaxfleng(ino,stbuf->st_size);
2202 read_inode_set_length_active(ino,stbuf->st_size);
2203 }
2204 #if defined(FUSE_SET_ATTR_ATIME_NOW) && defined(FUSE_SET_ATTR_MTIME_NOW)
2205 if (to_set & (FUSE_SET_ATTR_MODE|FUSE_SET_ATTR_UID|FUSE_SET_ATTR_GID|FUSE_SET_ATTR_ATIME|FUSE_SET_ATTR_MTIME|FUSE_SET_ATTR_ATIME_NOW|FUSE_SET_ATTR_MTIME_NOW)) {
2206 #else
2207 if (to_set & (FUSE_SET_ATTR_MODE|FUSE_SET_ATTR_UID|FUSE_SET_ATTR_GID|FUSE_SET_ATTR_ATIME|FUSE_SET_ATTR_MTIME)) {
2208 #endif
2209 uint32_t masterversion = master_version();
2210 //#if !(defined(FUSE_SET_ATTR_ATIME_NOW) && defined(FUSE_SET_ATTR_MTIME_NOW))
2211 // time_t now = time(NULL);
2212 //#endif
2213 setmask = 0;
2214 if (to_set & FUSE_SET_ATTR_MODE) {
2215 setmask |= SET_MODE_FLAG;
2216 if (no_xattrs==0 && xattr_cache_on) {
2217 xattr_cache_del(ino,6+1+5+1+3+1+6,(const uint8_t*)"system.posix_acl_access");
2218 }
2219 }
2220 if (to_set & FUSE_SET_ATTR_UID) {
2221 setmask |= SET_UID_FLAG;
2222 }
2223 if (to_set & FUSE_SET_ATTR_GID) {
2224 setmask |= SET_GID_FLAG;
2225 }
2226 #if defined(FUSE_SET_ATTR_ATIME_NOW)
2227 if (((to_set & FUSE_SET_ATTR_ATIME_NOW) || ((to_set & FUSE_SET_ATTR_ATIME) && stbuf->st_atime<0)) && masterversion>=VERSION2INT(2,1,13)) {
2228 #else
2229 if ((to_set & FUSE_SET_ATTR_ATIME) && stbuf->st_atime<0 && masterversion>=VERSION2INT(2,1,13)) {
2230 #endif
2231 setmask |= SET_ATIME_NOW_FLAG;
2232 } else if (to_set & FUSE_SET_ATTR_ATIME) {
2233 setmask |= SET_ATIME_FLAG;
2234 }
2235 #if defined(FUSE_SET_ATTR_MTIME_NOW)
2236 if (((to_set & FUSE_SET_ATTR_MTIME_NOW) || ((to_set & FUSE_SET_ATTR_MTIME) && stbuf->st_mtime<0)) && masterversion>=VERSION2INT(2,1,13)) {
2237 #else
2238 if ((to_set & FUSE_SET_ATTR_MTIME) && stbuf->st_mtime<0 && masterversion>=VERSION2INT(2,1,13)) {
2239 #endif
2240 setmask |= SET_MTIME_NOW_FLAG;
2241 } else if (to_set & FUSE_SET_ATTR_MTIME) {
2242 setmask |= SET_MTIME_FLAG;
2243 }
2244 if (setmask & (SET_ATIME_NOW_FLAG|SET_ATIME_FLAG)) {
2245 fs_no_atime(ino);
2246 }
2247 if (setmask & (SET_MTIME_NOW_FLAG|SET_MTIME_FLAG)) {
2248 fs_no_mtime(ino);
2249 }
2250 if (full_permissions) {
2251 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2252 status = fs_setattr(ino,(fi!=NULL || fs_isopen(ino))?1:0,ctx.uid,gids->gidcnt,gids->gidtab,setmask,stbuf->st_mode&07777,stbuf->st_uid,stbuf->st_gid,stbuf->st_atime,stbuf->st_mtime,0,sugid_clear_mode,attr);
2253 groups_rel(gids);
2254 } else {
2255 uint32_t gidtmp = ctx.gid;
2256 status = fs_setattr(ino,(fi!=NULL || fs_isopen(ino))?1:0,ctx.uid,1,&gidtmp,setmask,stbuf->st_mode&07777,stbuf->st_uid,stbuf->st_gid,stbuf->st_atime,stbuf->st_mtime,0,sugid_clear_mode,attr);
2257 }
2258 if (status==MFS_ERROR_ENOENT) {
2259 status = sstats_get(ino,attr,0);
2260 if (status==MFS_STATUS_OK) {
2261 mfs_attr_modify(to_set,attr,stbuf);
2262 }
2263 }
2264 if (status==MFS_STATUS_OK) {
2265 sstats_set(ino,attr,0);
2266 }
2267 status = mfs_errorconv(status);
2268 if (status!=0) {
2269 if (fi!=NULL) {
2270 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh),strerr(status));
2271 } else {
2272 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [no handle]: %s",(unsigned long int)ino,to_set,setattr_str,strerr(status));
2273 }
2274 fuse_reply_err(req, status);
2275 return;
2276 }
2277 }
2278 if (status!=0) { // should never happened but better check than sorry
2279 if (fi!=NULL) {
2280 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh),strerr(status));
2281 } else {
2282 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [no handle]: %s",(unsigned long int)ino,to_set,setattr_str,strerr(status));
2283 }
2284 fuse_reply_err(req, status);
2285 return;
2286 }
2287 dcache_setattr(ino,attr);
2288 if (mfs_attr_get_type(attr)==TYPE_FILE) {
2289 maxfleng = write_data_inode_getmaxfleng(ino);
2290 } else {
2291 maxfleng = 0;
2292 }
2293 memset(&o_stbuf, 0, sizeof(struct stat));
2294 mfs_attr_to_stat(ino,attr,&o_stbuf);
2295 if (maxfleng>(uint64_t)(o_stbuf.st_size)) {
2296 o_stbuf.st_size=maxfleng;
2297 mfs_attr_set_fleng(attr,maxfleng);
2298 }
2299 fdcache_invalidate(ino);
2300 attr_timeout = (mfs_attr_get_mattr(attr)&MATTR_NOACACHE)?0.0:attr_cache_timeout;
2301 mfs_makeattrstr(attrstr,256,&o_stbuf);
2302 if (fi!=NULL) {
2303 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [handle:%08"PRIX32"]: OK (%.1lf,%s)",(unsigned long int)ino,to_set,setattr_str,(uint32_t)(fi->fh),attr_timeout,attrstr);
2304 } else {
2305 oplog_printf(&ctx,"setattr (%lu,0x%X,[%s]) [no handle]: OK (%.1lf,%s)",(unsigned long int)ino,to_set,setattr_str,attr_timeout,attrstr);
2306 }
2307 fuse_reply_attr(req, &o_stbuf, attr_timeout);
2308 }
2309
2310 void mfs_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, dev_t rdev) {
2311 struct fuse_entry_param e;
2312 uint32_t inode;
2313 uint8_t attr[ATTR_RECORD_SIZE];
2314 char modestr[11];
2315 char attrstr[256];
2316 uint8_t mattr;
2317 uint32_t nleng;
2318 uint16_t cumask;
2319 int status;
2320 uint8_t type;
2321 struct fuse_ctx ctx;
2322 groups *gids;
2323
2324 ctx = *(fuse_req_ctx(req));
2325 mfs_makemodestr(modestr,mode);
2326 mfs_stats_inc(OP_MKNOD);
2327 if (debug_mode) {
2328 #ifdef FUSE_CAP_DONT_MASK
2329 char umaskstr[11];
2330 mfs_makemodestr(umaskstr,ctx.umask);
2331 oplog_printf(&ctx,"mknod (%lu,%s,%s:0%04o/%s:0%04o,0x%08lX) ...",(unsigned long int)parent,name,modestr,(unsigned int)mode,umaskstr+1,(unsigned int)(ctx.umask),(unsigned long int)rdev);
2332 fprintf(stderr,"mknod (%lu,%s,%s:0%04o/%s:0%04o,0x%08lX)\n",(unsigned long int)parent,name,modestr,(unsigned int)mode,umaskstr+1,(unsigned int)(ctx.umask),(unsigned long int)rdev);
2333 #else
2334 oplog_printf(&ctx,"mknod (%lu,%s,%s:0%04o,0x%08lX) ...",(unsigned long int)parent,name,modestr,(unsigned int)mode,(unsigned long int)rdev);
2335 fprintf(stderr,"mknod (%lu,%s,%s:0%04o,0x%08lX)\n",(unsigned long int)parent,name,modestr,(unsigned int)mode,(unsigned long int)rdev);
2336 #endif
2337 }
2338 nleng = strlen(name);
2339 if (nleng>MFS_NAME_MAX) {
2340 oplog_printf(&ctx,"mknod (%lu,%s,%s:0%04o,0x%08lX): %s",(unsigned long int)parent,name,modestr,(unsigned int)mode,(unsigned long int)rdev,strerr(ENAMETOOLONG));
2341 fuse_reply_err(req, ENAMETOOLONG);
2342 return;
2343 }
2344 if (S_ISFIFO(mode)) {
2345 type = TYPE_FIFO;
2346 } else if (S_ISCHR(mode)) {
2347 type = TYPE_CHARDEV;
2348 } else if (S_ISBLK(mode)) {
2349 type = TYPE_BLOCKDEV;
2350 } else if (S_ISSOCK(mode)) {
2351 type = TYPE_SOCKET;
2352 } else if (S_ISREG(mode) || (mode&0170000)==0) {
2353 type = TYPE_FILE;
2354 } else {
2355 oplog_printf(&ctx,"mknod (%lu,%s,%s:0%04o,0x%08lX): %s",(unsigned long int)parent,name,modestr,(unsigned int)mode,(unsigned long int)rdev,strerr(EPERM));
2356 fuse_reply_err(req, EPERM);
2357 return;
2358 }
2359
2360 if (parent==FUSE_ROOT_ID) {
2361 if (IS_SPECIAL_NAME(name)) {
2362 oplog_printf(&ctx,"mknod (%lu,%s,%s:0%04o,0x%08lX): %s",(unsigned long int)parent,name,modestr,(unsigned int)mode,(unsigned long int)rdev,strerr(EACCES));
2363 fuse_reply_err(req, EACCES);
2364 return;
2365 }
2366 }
2367
2368 #ifdef FUSE_CAP_DONT_MASK
2369 cumask = ctx.umask;
2370 #else
2371 cumask = 0;
2372 #endif
2373 if (full_permissions) {
2374 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2375 status = fs_mknod(parent,nleng,(const uint8_t*)name,type,mode&07777,cumask,ctx.uid,gids->gidcnt,gids->gidtab,rdev,&inode,attr);
2376 groups_rel(gids);
2377 } else {
2378 uint32_t gidtmp = ctx.gid;
2379 status = fs_mknod(parent,nleng,(const uint8_t*)name,type,mode&07777,cumask,ctx.uid,1,&gidtmp,rdev,&inode,attr);
2380 }
2381 status = mfs_errorconv(status);
2382 if (status!=0) {
2383 oplog_printf(&ctx,"mknod (%lu,%s,%s:0%04o,0x%08lX): %s",(unsigned long int)parent,name,modestr,(unsigned int)mode,(unsigned long int)rdev,strerr(status));
2384 fuse_reply_err(req, status);
2385 } else {
2386 negentry_cache_remove(parent,nleng,(const uint8_t*)name);
2387 // if (newdircache) {
2388 // dir_cache_link(parent,nleng,(const uint8_t*)name,inode,attr);
2389 // }
2390 dcache_invalidate_attr(parent);
2391 memset(&e, 0, sizeof(e));
2392 e.ino = inode;
2393 e.generation = 1;
2394 mattr = mfs_attr_get_mattr(attr);
2395 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
2396 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
2397 mfs_attr_to_stat(inode,attr,&e.attr);
2398 mfs_makeattrstr(attrstr,256,&e.attr);
2399 oplog_printf(&ctx,"mknod (%lu,%s,%s:0%04o,0x%08lX): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,modestr,(unsigned int)mode,(unsigned long int)rdev,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
2400 fuse_reply_entry(req, &e);
2401 }
2402 }
2403
2404 void mfs_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) {
2405 uint32_t nleng;
2406 uint32_t inode;
2407 int status;
2408 struct fuse_ctx ctx;
2409 groups *gids;
2410
2411 ctx = *(fuse_req_ctx(req));
2412 mfs_stats_inc(OP_UNLINK);
2413 if (debug_mode) {
2414 oplog_printf(&ctx,"unlink (%lu,%s) ...",(unsigned long int)parent,name);
2415 fprintf(stderr,"unlink (%lu,%s)\n",(unsigned long int)parent,name);
2416 }
2417 if (parent==FUSE_ROOT_ID) {
2418 if (IS_SPECIAL_NAME(name)) {
2419 oplog_printf(&ctx,"unlink (%lu,%s): %s",(unsigned long int)parent,name,strerr(EACCES));
2420 fuse_reply_err(req, EACCES);
2421 return;
2422 }
2423 }
2424
2425 nleng = strlen(name);
2426 if (nleng>MFS_NAME_MAX) {
2427 oplog_printf(&ctx,"unlink (%lu,%s): %s",(unsigned long int)parent,name,strerr(ENAMETOOLONG));
2428 fuse_reply_err(req, ENAMETOOLONG);
2429 return;
2430 }
2431
2432 if (full_permissions) {
2433 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2434 status = fs_unlink(parent,nleng,(const uint8_t*)name,ctx.uid,gids->gidcnt,gids->gidtab,&inode);
2435 groups_rel(gids);
2436 } else {
2437 uint32_t gidtmp = ctx.gid;
2438 status = fs_unlink(parent,nleng,(const uint8_t*)name,ctx.uid,1,&gidtmp,&inode);
2439 }
2440 status = mfs_errorconv(status);
2441 if (status!=0) {
2442 oplog_printf(&ctx,"unlink (%lu,%s): %s",(unsigned long int)parent,name,strerr(status));
2443 fuse_reply_err(req, status);
2444 } else {
2445 negentry_cache_insert(parent,nleng,(const uint8_t*)name);
2446 // if (newdircache) {
2447 // dir_cache_unlink(parent,nleng,(const uint8_t*)name);
2448 // }
2449 fdcache_invalidate(inode);
2450 dcache_invalidate_attr(parent);
2451 dcache_invalidate_name(&ctx,parent,nleng,(const uint8_t*)name);
2452 oplog_printf(&ctx,"unlink (%lu,%s): OK",(unsigned long int)parent,name);
2453 fuse_reply_err(req, 0);
2454 }
2455 }
2456
2457 void mfs_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode) {
2458 struct fuse_entry_param e;
2459 uint32_t inode;
2460 uint8_t attr[ATTR_RECORD_SIZE];
2461 char modestr[11];
2462 char attrstr[256];
2463 uint8_t mattr;
2464 uint32_t nleng;
2465 uint16_t cumask;
2466 int status;
2467 struct fuse_ctx ctx;
2468 groups *gids;
2469
2470 ctx = *(fuse_req_ctx(req));
2471 mfs_makemodestr(modestr,mode);
2472 mfs_stats_inc(OP_MKDIR);
2473 if (debug_mode) {
2474 #ifdef FUSE_CAP_DONT_MASK
2475 char umaskstr[11];
2476 mfs_makemodestr(umaskstr,ctx.umask);
2477 oplog_printf(&ctx,"mkdir (%lu,%s,d%s:0%04o/%s:0%04o) ...",(unsigned long int)parent,name,modestr+1,(unsigned int)mode,umaskstr+1,(unsigned int)(ctx.umask));
2478 fprintf(stderr,"mkdir (%lu,%s,d%s:0%04o/%s:0%04o)\n",(unsigned long int)parent,name,modestr+1,(unsigned int)mode,umaskstr+1,(unsigned int)(ctx.umask));
2479 #else
2480 oplog_printf(&ctx,"mkdir (%lu,%s,d%s:0%04o) ...",(unsigned long int)parent,name,modestr+1,(unsigned int)mode);
2481 fprintf(stderr,"mkdir (%lu,%s,d%s:0%04o)\n",(unsigned long int)parent,name,modestr+1,(unsigned int)mode);
2482 #endif
2483 }
2484 if (parent==FUSE_ROOT_ID) {
2485 if (IS_SPECIAL_NAME(name)) {
2486 oplog_printf(&ctx,"mkdir (%lu,%s,d%s:0%04o): %s",(unsigned long int)parent,name,modestr+1,(unsigned int)mode,strerr(EACCES));
2487 fuse_reply_err(req, EACCES);
2488 return;
2489 }
2490 }
2491 nleng = strlen(name);
2492 if (nleng>MFS_NAME_MAX) {
2493 oplog_printf(&ctx,"mkdir (%lu,%s,d%s:0%04o): %s",(unsigned long int)parent,name,modestr+1,(unsigned int)mode,strerr(ENAMETOOLONG));
2494 fuse_reply_err(req, ENAMETOOLONG);
2495 return;
2496 }
2497
2498 #ifdef FUSE_CAP_DONT_MASK
2499 cumask = ctx.umask;
2500 #else
2501 cumask = 0;
2502 #endif
2503 if (full_permissions) {
2504 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2505 status = fs_mkdir(parent,nleng,(const uint8_t*)name,mode,cumask,ctx.uid,gids->gidcnt,gids->gidtab,mkdir_copy_sgid,&inode,attr);
2506 groups_rel(gids);
2507 } else {
2508 uint32_t gidtmp = ctx.gid;
2509 status = fs_mkdir(parent,nleng,(const uint8_t*)name,mode,cumask,ctx.uid,1,&gidtmp,mkdir_copy_sgid,&inode,attr);
2510 }
2511 status = mfs_errorconv(status);
2512 if (status!=0) {
2513 oplog_printf(&ctx,"mkdir (%lu,%s,d%s:0%04o): %s",(unsigned long int)parent,name,modestr+1,(unsigned int)mode,strerr(status));
2514 fuse_reply_err(req, status);
2515 } else {
2516 sstats_set(inode,attr,1);
2517 sparents_add(inode,parent,direntry_cache_timeout+60);
2518 negentry_cache_remove(parent,nleng,(const uint8_t*)name);
2519 // if (newdircache) {
2520 // dir_cache_link(parent,nleng,(const uint8_t*)name,inode,attr);
2521 // }
2522 dcache_invalidate_attr(parent);
2523 memset(&e, 0, sizeof(e));
2524 e.ino = inode;
2525 e.generation = 1;
2526 mattr = mfs_attr_get_mattr(attr);
2527 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
2528 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:direntry_cache_timeout;
2529 #ifdef DENTRY_INVALIDATOR
2530 if (dinval && (mattr&MATTR_UNDELETABLE)==0) {
2531 dinval_add(parent,nleng,(const uint8_t*)name,inode);
2532 }
2533 #endif
2534 mfs_attr_to_stat(inode,attr,&e.attr);
2535 mfs_makeattrstr(attrstr,256,&e.attr);
2536 oplog_printf(&ctx,"mkdir (%lu,%s,d%s:0%04o): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)parent,name,modestr+1,(unsigned int)mode,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
2537 fuse_reply_entry(req, &e);
2538 }
2539 }
2540
2541 void mfs_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) {
2542 uint32_t nleng;
2543 uint32_t inode;
2544 int status;
2545 struct fuse_ctx ctx;
2546 groups *gids;
2547
2548 ctx = *(fuse_req_ctx(req));
2549 mfs_stats_inc(OP_RMDIR);
2550 if (debug_mode) {
2551 oplog_printf(&ctx,"rmdir (%lu,%s) ...",(unsigned long int)parent,name);
2552 fprintf(stderr,"rmdir (%lu,%s)\n",(unsigned long int)parent,name);
2553 }
2554 if (parent==FUSE_ROOT_ID) {
2555 if (IS_SPECIAL_NAME(name)) {
2556 oplog_printf(&ctx,"rmdir (%lu,%s): %s",(unsigned long int)parent,name,strerr(EACCES));
2557 fuse_reply_err(req, EACCES);
2558 return;
2559 }
2560 }
2561 nleng = strlen(name);
2562 if (nleng>MFS_NAME_MAX) {
2563 oplog_printf(&ctx,"rmdir (%lu,%s): %s",(unsigned long int)parent,name,strerr(ENAMETOOLONG));
2564 fuse_reply_err(req, ENAMETOOLONG);
2565 return;
2566 }
2567
2568 if (full_permissions) {
2569 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2570 status = fs_rmdir(parent,nleng,(const uint8_t*)name,ctx.uid,gids->gidcnt,gids->gidtab,&inode);
2571 groups_rel(gids);
2572 } else {
2573 uint32_t gidtmp = ctx.gid;
2574 status = fs_rmdir(parent,nleng,(const uint8_t*)name,ctx.uid,1,&gidtmp,&inode);
2575 }
2576 status = mfs_errorconv(status);
2577 if (status!=0) {
2578 oplog_printf(&ctx,"rmdir (%lu,%s): %s",(unsigned long int)parent,name,strerr(status));
2579 fuse_reply_err(req, status);
2580 } else {
2581 (void)inode; // for future use
2582 negentry_cache_insert(parent,nleng,(const uint8_t*)name);
2583 // if (newdircache) {
2584 // dir_cache_unlink(parent,nleng,(const uint8_t*)name);
2585 // }
2586 dcache_invalidate_attr(parent);
2587 dcache_invalidate_name(&ctx,parent,nleng,(const uint8_t*)name);
2588 #ifdef DENTRY_INVALIDATOR
2589 if (dinval) {
2590 dinval_remove(parent,nleng,(const uint8_t*)name);
2591 }
2592 #endif
2593 oplog_printf(&ctx,"rmdir (%lu,%s): OK",(unsigned long int)parent,name);
2594 fuse_reply_err(req, 0);
2595 }
2596 }
2597
2598 void mfs_symlink(fuse_req_t req, const char *path, fuse_ino_t parent, const char *name) {
2599 struct fuse_entry_param e;
2600 uint32_t inode;
2601 uint8_t attr[ATTR_RECORD_SIZE];
2602 char attrstr[256];
2603 uint8_t mattr;
2604 uint32_t nleng;
2605 int status;
2606 struct fuse_ctx ctx;
2607 groups *gids;
2608
2609 ctx = *(fuse_req_ctx(req));
2610 mfs_stats_inc(OP_SYMLINK);
2611 if (debug_mode) {
2612 oplog_printf(&ctx,"symlink (%s,%lu,%s) ...",path,(unsigned long int)parent,name);
2613 fprintf(stderr,"symlink (%s,%lu,%s)\n",path,(unsigned long int)parent,name);
2614 }
2615 if (parent==FUSE_ROOT_ID) {
2616 if (IS_SPECIAL_NAME(name)) {
2617 oplog_printf(&ctx,"symlink (%s,%lu,%s): %s",path,(unsigned long int)parent,name,strerr(EACCES));
2618 fuse_reply_err(req, EACCES);
2619 return;
2620 }
2621 }
2622 nleng = strlen(name);
2623 if (nleng>MFS_NAME_MAX || (strlen(path)+1)>MFS_SYMLINK_MAX) {
2624 oplog_printf(&ctx,"symlink (%s,%lu,%s): %s",path,(unsigned long int)parent,name,strerr(ENAMETOOLONG));
2625 fuse_reply_err(req, ENAMETOOLONG);
2626 return;
2627 }
2628
2629 if (full_permissions) {
2630 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2631 status = fs_symlink(parent,nleng,(const uint8_t*)name,(const uint8_t*)path,ctx.uid,gids->gidcnt,gids->gidtab,&inode,attr);
2632 groups_rel(gids);
2633 } else {
2634 uint32_t gidtmp = ctx.gid;
2635 status = fs_symlink(parent,nleng,(const uint8_t*)name,(const uint8_t*)path,ctx.uid,1,&gidtmp,&inode,attr);
2636 }
2637 status = mfs_errorconv(status);
2638 if (status!=0) {
2639 oplog_printf(&ctx,"symlink (%s,%lu,%s): %s",path,(unsigned long int)parent,name,strerr(status));
2640 fuse_reply_err(req, status);
2641 } else {
2642 negentry_cache_remove(parent,nleng,(const uint8_t*)name);
2643 // if (newdircache) {
2644 // dir_cache_link(parent,nleng,(const uint8_t*)name,inode,attr);
2645 // }
2646 dcache_invalidate_attr(parent);
2647 memset(&e, 0, sizeof(e));
2648 e.ino = inode;
2649 e.generation = 1;
2650 mattr = mfs_attr_get_mattr(attr);
2651 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
2652 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
2653 mfs_attr_to_stat(inode,attr,&e.attr);
2654 mfs_makeattrstr(attrstr,256,&e.attr);
2655 oplog_printf(&ctx,"symlink (%s,%lu,%s): OK (%.1lf,%lu,%.1lf,%s)",path,(unsigned long int)parent,name,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
2656 fuse_reply_entry(req, &e);
2657 }
2658 }
2659
2660 void mfs_readlink(fuse_req_t req, fuse_ino_t ino) {
2661 int status;
2662 uint8_t *path;
2663 const uint8_t *cpath;
2664 struct fuse_ctx ctx;
2665
2666 ctx = *(fuse_req_ctx(req));
2667 if (debug_mode) {
2668 oplog_printf(&ctx,"readlink (%lu) ...",(unsigned long int)ino);
2669 fprintf(stderr,"readlink (%lu)\n",(unsigned long int)ino);
2670 }
2671 path = symlink_cache_search(ino);
2672 if (path!=NULL) {
2673 mfs_stats_inc(OP_READLINK_CACHED);
2674 oplog_printf(&ctx,"readlink (%lu) (using cache): OK (%s)",(unsigned long int)ino,(char*)path);
2675 fuse_reply_readlink(req, (char*)path);
2676 free(path);
2677 return;
2678 }
2679 mfs_stats_inc(OP_READLINK_MASTER);
2680 status = fs_readlink(ino,&cpath);
2681 status = mfs_errorconv(status);
2682 if (status!=0) {
2683 oplog_printf(&ctx,"readlink (%lu): %s",(unsigned long int)ino,strerr(status));
2684 fuse_reply_err(req, status);
2685 } else {
2686 dcache_invalidate_attr(ino);
2687 symlink_cache_insert(ino,cpath);
2688 oplog_printf(&ctx,"readlink (%lu): OK (%s)",(unsigned long int)ino,(char*)cpath);
2689 fuse_reply_readlink(req, (char*)cpath);
2690 }
2691 }
2692
2693 #if FUSE_VERSION >= 30
2694 void mfs_rename(fuse_req_t req, fuse_ino_t parent, const char *name, fuse_ino_t newparent, const char *newname,unsigned int flags) {
2695 #else /* FUSE2 */
2696 void mfs_rename(fuse_req_t req, fuse_ino_t parent, const char *name, fuse_ino_t newparent, const char *newname) {
2697 #endif
2698 uint32_t nleng,newnleng;
2699 int status;
2700 uint32_t inode;
2701 uint8_t attr[ATTR_RECORD_SIZE];
2702 struct fuse_ctx ctx;
2703 groups *gids;
2704 #if FUSE_VERSION < 30
2705 unsigned int flags = 0;
2706 #endif
2707
2708 ctx = *(fuse_req_ctx(req));
2709 mfs_stats_inc(OP_RENAME);
2710 if (debug_mode) {
2711 oplog_printf(&ctx,"rename (%lu,%s,%lu,%s,%u) ...",(unsigned long int)parent,name,(unsigned long int)newparent,newname,flags);
2712 fprintf(stderr,"rename (%lu,%s,%lu,%s,%u)\n",(unsigned long int)parent,name,(unsigned long int)newparent,newname,flags);
2713 }
2714 // TODO implement support for new flags available in fuse3
2715 if (flags!=0) {
2716 oplog_printf(&ctx,"rename (%lu,%s,%lu,%s,%u): %s",(unsigned long int)parent,name,(unsigned long int)newparent,newname,flags,strerr(EINVAL));
2717 fuse_reply_err(req, EINVAL);
2718 return;
2719 }
2720 if (parent==FUSE_ROOT_ID) {
2721 if (IS_SPECIAL_NAME(name)) {
2722 oplog_printf(&ctx,"rename (%lu,%s,%lu,%s,%u): %s",(unsigned long int)parent,name,(unsigned long int)newparent,newname,flags,strerr(EACCES));
2723 fuse_reply_err(req, EACCES);
2724 return;
2725 }
2726 }
2727 if (newparent==FUSE_ROOT_ID) {
2728 if (IS_SPECIAL_NAME(newname)) {
2729 oplog_printf(&ctx,"rename (%lu,%s,%lu,%s,%u): %s",(unsigned long int)parent,name,(unsigned long int)newparent,newname,flags,strerr(EACCES));
2730 fuse_reply_err(req, EACCES);
2731 return;
2732 }
2733 }
2734 nleng = strlen(name);
2735 if (nleng>MFS_NAME_MAX) {
2736 oplog_printf(&ctx,"rename (%lu,%s,%lu,%s,%u): %s",(unsigned long int)parent,name,(unsigned long int)newparent,newname,flags,strerr(ENAMETOOLONG));
2737 fuse_reply_err(req, ENAMETOOLONG);
2738 return;
2739 }
2740 newnleng = strlen(newname);
2741 if (newnleng>MFS_NAME_MAX) {
2742 oplog_printf(&ctx,"rename (%lu,%s,%lu,%s,%u): %s",(unsigned long int)parent,name,(unsigned long int)newparent,newname,flags,strerr(ENAMETOOLONG));
2743 fuse_reply_err(req, ENAMETOOLONG);
2744 return;
2745 }
2746
2747 if (full_permissions) {
2748 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2749 status = fs_rename(parent,nleng,(const uint8_t*)name,newparent,newnleng,(const uint8_t*)newname,ctx.uid,gids->gidcnt,gids->gidtab,&inode,attr);
2750 groups_rel(gids);
2751 } else {
2752 uint32_t gidtmp = ctx.gid;
2753 status = fs_rename(parent,nleng,(const uint8_t*)name,newparent,newnleng,(const uint8_t*)newname,ctx.uid,1,&gidtmp,&inode,attr);
2754 }
2755 status = mfs_errorconv(status);
2756 if (status!=0) {
2757 oplog_printf(&ctx,"rename (%lu,%s,%lu,%s,%u): %s",(unsigned long int)parent,name,(unsigned long int)newparent,newname,flags,strerr(status));
2758 fuse_reply_err(req, status);
2759 } else {
2760 if (mfs_attr_get_type(attr)==TYPE_DIRECTORY) {
2761 sparents_add(inode,newparent,direntry_cache_timeout+60);
2762 }
2763 negentry_cache_insert(parent,nleng,(const uint8_t*)name);
2764 negentry_cache_remove(newparent,newnleng,(const uint8_t*)newname);
2765 // if (newdircache) {
2766 // dir_cache_unlink(parent,nleng,(const uint8_t*)name);
2767 // dir_cache_link(newparent,newnleng,(const uint8_t*)newname,inode,attr);
2768 // }
2769 dcache_invalidate_attr(parent);
2770 if (newparent!=parent) {
2771 dcache_invalidate_attr(newparent);
2772 }
2773 dcache_invalidate_name(&ctx,parent,nleng,(const uint8_t*)name);
2774 #ifdef DENTRY_INVALIDATOR
2775 if (dinval && mfs_attr_get_type(attr)==TYPE_DIRECTORY) {
2776 dinval_remove(parent,nleng,(const uint8_t*)name);
2777 if ((mfs_attr_get_mattr(attr)&MATTR_UNDELETABLE)==0) {
2778 dinval_add(newparent,newnleng,(const uint8_t*)newname,inode);
2779 }
2780 }
2781 #endif
2782 oplog_printf(&ctx,"rename (%lu,%s,%lu,%s,%u): OK",(unsigned long int)parent,name,(unsigned long int)newparent,newname,flags);
2783 fuse_reply_err(req, 0);
2784 }
2785 }
2786
2787 void mfs_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, const char *newname) {
2788 uint32_t newnleng;
2789 int status;
2790 struct fuse_entry_param e;
2791 uint32_t inode;
2792 uint8_t attr[ATTR_RECORD_SIZE];
2793 char attrstr[256];
2794 uint8_t mattr;
2795 struct fuse_ctx ctx;
2796 groups *gids;
2797
2798 ctx = *(fuse_req_ctx(req));
2799 mfs_stats_inc(OP_LINK);
2800 if (debug_mode) {
2801 oplog_printf(&ctx,"link (%lu,%lu,%s) ...",(unsigned long int)ino,(unsigned long int)newparent,newname);
2802 fprintf(stderr,"link (%lu,%lu,%s)\n",(unsigned long int)ino,(unsigned long int)newparent,newname);
2803 }
2804 if (IS_SPECIAL_INODE(ino)) {
2805 oplog_printf(&ctx,"link (%lu,%lu,%s): %s",(unsigned long int)ino,(unsigned long int)newparent,newname,strerr(EACCES));
2806 fuse_reply_err(req, EACCES);
2807 return;
2808 }
2809 if (newparent==FUSE_ROOT_ID) {
2810 if (IS_SPECIAL_NAME(newname)) {
2811 oplog_printf(&ctx,"link (%lu,%lu,%s): %s",(unsigned long int)ino,(unsigned long int)newparent,newname,strerr(EACCES));
2812 fuse_reply_err(req, EACCES);
2813 return;
2814 }
2815 }
2816 newnleng = strlen(newname);
2817 if (newnleng>MFS_NAME_MAX) {
2818 oplog_printf(&ctx,"link (%lu,%lu,%s): %s",(unsigned long int)ino,(unsigned long int)newparent,newname,strerr(ENAMETOOLONG));
2819 fuse_reply_err(req, ENAMETOOLONG);
2820 return;
2821 }
2822
2823 // write_data_flush_inode(ino);
2824 if (full_permissions) {
2825 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2826 status = fs_link(ino,newparent,newnleng,(const uint8_t*)newname,ctx.uid,gids->gidcnt,gids->gidtab,&inode,attr);
2827 groups_rel(gids);
2828 } else {
2829 uint32_t gidtmp = ctx.gid;
2830 status = fs_link(ino,newparent,newnleng,(const uint8_t*)newname,ctx.uid,1,&gidtmp,&inode,attr);
2831 }
2832 status = mfs_errorconv(status);
2833 if (status==ENOSPC) {
2834 status=EMLINK;
2835 }
2836 if (status!=0) {
2837 oplog_printf(&ctx,"link (%lu,%lu,%s): %s",(unsigned long int)ino,(unsigned long int)newparent,newname,strerr(status));
2838 fuse_reply_err(req, status);
2839 } else {
2840 negentry_cache_remove(newparent,newnleng,(const uint8_t*)newname);
2841 // if (newdircache) {
2842 // dir_cache_link(newparent,newnleng,(const uint8_t*)newname,inode,attr);
2843 // }
2844 if (ino!=inode) {
2845 dcache_invalidate_attr(ino);
2846 }
2847 dcache_invalidate_attr(newparent);
2848 dcache_setattr(inode,attr);
2849 memset(&e, 0, sizeof(e));
2850 e.ino = inode;
2851 e.generation = 1;
2852 mattr = mfs_attr_get_mattr(attr);
2853 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
2854 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
2855 mfs_attr_to_stat(inode,attr,&e.attr);
2856 mfs_makeattrstr(attrstr,256,&e.attr);
2857 oplog_printf(&ctx,"link (%lu,%lu,%s): OK (%.1lf,%lu,%.1lf,%s)",(unsigned long int)ino,(unsigned long int)newparent,newname,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr);
2858 fuse_reply_entry(req, &e);
2859 }
2860 }
2861
2862 void mfs_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) {
2863 dirbuf *dirinfo;
2864 uint32_t dindex;
2865 int status;
2866 uint8_t attr[ATTR_RECORD_SIZE];
2867 struct fuse_ctx ctx;
2868 groups *gids;
2869
2870 ctx = *(fuse_req_ctx(req));
2871 mfs_stats_inc(OP_OPENDIR);
2872 if (debug_mode) {
2873 oplog_printf(&ctx,"opendir (%lu) ...",(unsigned long int)ino);
2874 fprintf(stderr,"opendir (%lu)\n",(unsigned long int)ino);
2875 }
2876 if (IS_SPECIAL_INODE(ino)) {
2877 oplog_printf(&ctx,"opendir (%lu): %s",(unsigned long int)ino,strerr(ENOTDIR));
2878 fuse_reply_err(req, ENOTDIR);
2879 }
2880 if (mfs_disables & DISABLE_READDIR) {
2881 status = MFS_ERROR_EPERM;
2882 } else if (full_permissions) {
2883 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2884 status = fs_access(ino,ctx.uid,gids->gidcnt,gids->gidtab,MODE_MASK_R); // at least test rights
2885 groups_rel(gids);
2886 } else { // no acl means - we are using default permissions, so do not check supplementary groups
2887 uint32_t gidtmp = ctx.gid;
2888 status = fs_access(ino,ctx.uid,1,&gidtmp,MODE_MASK_R); // at least test rights
2889 }
2890 if (status==MFS_ERROR_ENOENT && sstats_get(ino,attr,0)==MFS_STATUS_OK) {
2891 if (full_permissions) {
2892 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
2893 status = mfs_access_test(attr,MODE_MASK_R,ctx.uid,gids->gidcnt,gids->gidtab);
2894 groups_rel(gids);
2895 } else {
2896 uint32_t gidtmp = ctx.gid;
2897 status = mfs_access_test(attr,MODE_MASK_R,ctx.uid,1,&gidtmp);
2898 }
2899 if (status!=MFS_STATUS_OK) {
2900 status = mfs_errorconv(status);
2901 oplog_printf(&ctx,"opendir (%lu): %s",(unsigned long int)ino,strerr(status));
2902 fuse_reply_err(req, status);
2903 } else {
2904 dindex = dirbuf_new();
2905 dirinfo = dirbuf_get(dindex);
2906 passert(dirinfo);
2907 pthread_mutex_lock(&(dirinfo->lock)); // make valgrind happy
2908 dirinfo->p = NULL;
2909 dirinfo->size = 0;
2910 dirinfo->dcache = NULL;
2911 dirinfo->wasread = 2;
2912 pthread_mutex_unlock(&(dirinfo->lock)); // make valgrind happy
2913 fi->fh = dindex;
2914 oplog_printf(&ctx,"sustained opendir (%lu): forced OK with empty directory",(unsigned long int)ino);
2915 if (fuse_reply_open(req,fi) == -ENOENT) {
2916 dirbuf_release(dindex);
2917 fi->fh = 0;
2918 }
2919 }
2920 } else if (status!=MFS_STATUS_OK) {
2921 status = mfs_errorconv(status);
2922 oplog_printf(&ctx,"opendir (%lu): %s",(unsigned long int)ino,strerr(status));
2923 fuse_reply_err(req, status);
2924 } else {
2925 dindex = dirbuf_new();
2926 dirinfo = dirbuf_get(dindex);
2927 passert(dirinfo);
2928 pthread_mutex_lock(&(dirinfo->lock)); // make valgrind happy
2929 dirinfo->p = NULL;
2930 dirinfo->size = 0;
2931 dirinfo->dcache = NULL;
2932 dirinfo->wasread = 0;
2933 pthread_mutex_unlock(&(dirinfo->lock)); // make valgrind happy
2934 fi->fh = dindex;
2935 oplog_printf(&ctx,"opendir (%lu): OK [handle:%08"PRIX32"]",(unsigned long int)ino,dindex);
2936 if (fuse_reply_open(req,fi) == -ENOENT) {
2937 dirbuf_release(dindex);
2938 fi->fh = 0;
2939 }
2940 }
2941 }
2942
2943 void mfs_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi) {
2944 int status;
2945 dirbuf *dirinfo;
2946 char buffer[READDIR_BUFFSIZE];
2947 char name[MFS_NAME_MAX+1];
2948 const uint8_t *ptr,*eptr;
2949 uint8_t end;
2950 size_t opos,oleng;
2951 uint8_t nleng;
2952 uint32_t inode;
2953 uint8_t type;
2954 uint8_t attrsize;
2955 struct stat stbuf;
2956 struct fuse_ctx ctx;
2957 groups *gids;
2958
2959 ctx = *(fuse_req_ctx(req));
2960 mfs_stats_inc(OP_READDIR);
2961 if (debug_mode) {
2962 if (fi!=NULL) {
2963 oplog_printf(&ctx,"readdir (%lu,%llu,%llu) [handle:%08"PRIX32"] ...",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh));
2964 } else {
2965 oplog_printf(&ctx,"readdir (%lu,%llu,%llu) ...",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
2966 }
2967 fprintf(stderr,"readdir (%lu,%llu,%llu)\n",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
2968 }
2969 if (fi==NULL) {
2970 oplog_printf(&ctx,"readdir (%lu,%llu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,strerr(EBADF));
2971 fuse_reply_err(req,EBADF);
2972 return;
2973 }
2974 dirinfo = dirbuf_get(fi->fh);
2975 if (dirinfo==NULL) {
2976 oplog_printf(&ctx,"readdir (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EBADF));
2977 fuse_reply_err(req,EBADF);
2978 return;
2979 }
2980 if (off<0) {
2981 oplog_printf(&ctx,"readdir (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EINVAL));
2982 fuse_reply_err(req,EINVAL);
2983 return;
2984 }
2985 attrsize = master_attrsize();
2986
2987 zassert(pthread_mutex_lock(&(dirinfo->lock)));
2988 if (dirinfo->wasread==0 || (dirinfo->wasread==1 && off==0)) {
2989 const uint8_t *dbuff;
2990 uint32_t dsize;
2991 uint8_t needscopy;
2992 /*
2993 if (newdircache) {
2994 status = dir_cache_getdirdata(ino,&dsize,&dbuff);
2995 if (status==1) { // got dir from new cache
2996 mfs_stats_inc(OP_GETDIR_CACHED);
2997 needscopy = 0;
2998 dirinfo->dataformat = 0;
2999 status = 0;
3000 } else {
3001 status = fs_getdir_plus(ino,ctx.uid,ctx.gid,1,&dbuff,&dsize);
3002 if (status==0) {
3003 mfs_stats_inc(OP_GETDIR_FULL);
3004 dir_cache_newdirdata(ino,dsize,dbuff);
3005 }
3006 needscopy = 1;
3007 dirinfo->dataformat = 1;
3008 }
3009 } else
3010 */
3011 if (usedircache) {
3012 uint8_t df;
3013 df = 1;
3014 if (full_permissions) {
3015 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
3016 status = fs_readdir(ino,ctx.uid,gids->gidcnt,gids->gidtab,1,0,&dbuff,&dsize);
3017 if (status==MFS_ERROR_EACCES) {
3018 df = 0;
3019 status = fs_readdir(ino,ctx.uid,gids->gidcnt,gids->gidtab,0,0,&dbuff,&dsize);
3020 }
3021 groups_rel(gids);
3022 } else {
3023 uint32_t gidtmp = ctx.gid;
3024 status = fs_readdir(ino,ctx.uid,1,&gidtmp,1,0,&dbuff,&dsize);
3025 if (status==MFS_ERROR_EACCES) {
3026 df = 0;
3027 status = fs_readdir(ino,ctx.uid,1,&gidtmp,0,0,&dbuff,&dsize);
3028 }
3029 }
3030 if (status==0) {
3031 if (df) {
3032 mfs_stats_inc(OP_GETDIR_FULL);
3033 } else {
3034 mfs_stats_inc(OP_GETDIR_SMALL);
3035 }
3036 }
3037 needscopy = 1;
3038 dirinfo->dataformat = df;
3039 } else {
3040 if (full_permissions) {
3041 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
3042 status = fs_readdir(ino,ctx.uid,gids->gidcnt,gids->gidtab,0,0,&dbuff,&dsize);
3043 groups_rel(gids);
3044 } else {
3045 uint32_t gidtmp = ctx.gid;
3046 status = fs_readdir(ino,ctx.uid,1,&gidtmp,0,0,&dbuff,&dsize);
3047 }
3048 if (status==0) {
3049 mfs_stats_inc(OP_GETDIR_SMALL);
3050 }
3051 needscopy = 1;
3052 dirinfo->dataformat = 0;
3053 }
3054 status = mfs_errorconv(status);
3055 if (status!=0) {
3056 oplog_printf(&ctx,"readdir (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(status));
3057 fuse_reply_err(req, status);
3058 zassert(pthread_mutex_unlock(&(dirinfo->lock)));
3059 return;
3060 }
3061 if (dirinfo->dcache) {
3062 dcache_release(dirinfo->dcache);
3063 dirinfo->dcache = NULL;
3064 }
3065 if (dirinfo->p) {
3066 free((uint8_t*)(dirinfo->p));
3067 dirinfo->p = NULL;
3068 }
3069 if (needscopy) {
3070 dirinfo->p = malloc(dsize);
3071 if (dirinfo->p == NULL) {
3072 oplog_printf(&ctx,"readdir (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EINVAL));
3073 fuse_reply_err(req,EINVAL);
3074 zassert(pthread_mutex_unlock(&(dirinfo->lock)));
3075 return;
3076 }
3077 memcpy((uint8_t*)(dirinfo->p),dbuff,dsize);
3078 } else {
3079 dirinfo->p = dbuff;
3080 }
3081 dirinfo->size = dsize;
3082 if (usedircache && dirinfo->dataformat==1) {
3083 dirinfo->dcache = dcache_new(&ctx,ino,dirinfo->p,dirinfo->size,attrsize);
3084 }
3085 }
3086 if (dirinfo->wasread<2) {
3087 dirinfo->wasread=1;
3088 }
3089
3090 if (off>=(off_t)(dirinfo->size)) {
3091 oplog_printf(&ctx,"readdir (%lu,%llu,%llu) [handle:%08"PRIX32"]: OK (no data)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh));
3092 fuse_reply_buf(req, NULL, 0);
3093 } else {
3094 if (size>READDIR_BUFFSIZE) {
3095 size=READDIR_BUFFSIZE;
3096 }
3097 ptr = dirinfo->p+off;
3098 eptr = dirinfo->p+dirinfo->size;
3099 opos = 0;
3100 end = 0;
3101
3102 while (ptr<eptr && end==0) {
3103 nleng = ptr[0];
3104 ptr++;
3105 memcpy(name,ptr,nleng);
3106 name[nleng]=0;
3107 ptr+=nleng;
3108 off+=nleng+((dirinfo->dataformat)?(attrsize+5):6);
3109 if (ptr+5<=eptr) {
3110 inode = get32bit(&ptr);
3111 if (dirinfo->dataformat) {
3112 mfs_attr_to_stat(inode,ptr,&stbuf);
3113 ptr+=attrsize;
3114 } else {
3115 type = get8bit(&ptr);
3116 mfs_type_to_stat(inode,type,&stbuf);
3117 }
3118 oleng = fuse_add_direntry(req, buffer + opos, size - opos, name, &stbuf, off);
3119 if (opos+oleng>size) {
3120 end=1;
3121 } else {
3122 opos+=oleng;
3123 }
3124 }
3125 }
3126
3127 oplog_printf(&ctx,"readdir (%lu,%llu,%llu) [handle:%08"PRIX32"]: OK (%lu)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),(unsigned long int)opos);
3128 fuse_reply_buf(req,buffer,opos);
3129 }
3130 zassert(pthread_mutex_unlock(&(dirinfo->lock)));
3131 }
3132
3133 #if FUSE_VERSION >= 30
3134 void mfs_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi) {
3135 int status;
3136 dirbuf *dirinfo;
3137 char buffer[READDIR_BUFFSIZE];
3138 char name[MFS_NAME_MAX+1];
3139 const uint8_t *ptr,*eptr;
3140 uint8_t end;
3141 size_t opos,oleng;
3142 uint8_t nleng;
3143 uint32_t inode;
3144 uint64_t maxfleng;
3145 uint8_t type;
3146 uint8_t mattr;
3147 uint8_t attrsize;
3148 struct fuse_entry_param e;
3149 struct fuse_ctx ctx;
3150 groups *gids;
3151
3152 ctx = *(fuse_req_ctx(req));
3153 mfs_stats_inc(OP_READDIRPLUS);
3154 if (debug_mode) {
3155 if (fi!=NULL) {
3156 oplog_printf(&ctx,"readdirplus (%lu,%llu,%llu) [handle:%08"PRIX32"] ...",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh));
3157 } else {
3158 oplog_printf(&ctx,"readdirplus (%lu,%llu,%llu) ...",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
3159 }
3160 fprintf(stderr,"readdirplus (%lu,%llu,%llu)\n",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
3161 }
3162 if (fi==NULL) {
3163 oplog_printf(&ctx,"readdirplus (%lu,%llu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,strerr(EBADF));
3164 fuse_reply_err(req,EBADF);
3165 return;
3166 }
3167 dirinfo = dirbuf_get(fi->fh);
3168 if (dirinfo==NULL) {
3169 oplog_printf(&ctx,"readdirplus (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EBADF));
3170 fuse_reply_err(req,EBADF);
3171 return;
3172 }
3173 if (off<0) {
3174 oplog_printf(&ctx,"readdirplus (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EINVAL));
3175 fuse_reply_err(req,EINVAL);
3176 return;
3177 }
3178 attrsize = master_attrsize();
3179
3180 zassert(pthread_mutex_lock(&(dirinfo->lock)));
3181 if (dirinfo->wasread==0 || (dirinfo->wasread==1 && (off==0 || dirinfo->dataformat==0))) {
3182 const uint8_t *dbuff;
3183 uint32_t dsize;
3184 uint8_t needscopy;
3185
3186 if (full_permissions) {
3187 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
3188 status = fs_readdir(ino,ctx.uid,gids->gidcnt,gids->gidtab,1,0,&dbuff,&dsize);
3189 groups_rel(gids);
3190 } else {
3191 uint32_t gidtmp = ctx.gid;
3192 status = fs_readdir(ino,ctx.uid,1,&gidtmp,1,0,&dbuff,&dsize);
3193 }
3194 if (status==0) {
3195 mfs_stats_inc(OP_GETDIR_PLUS);
3196 }
3197 needscopy = 1;
3198 dirinfo->dataformat = 1;
3199 status = mfs_errorconv(status);
3200 if (status!=0) {
3201 oplog_printf(&ctx,"readdirplus (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(status));
3202 fuse_reply_err(req, status);
3203 zassert(pthread_mutex_unlock(&(dirinfo->lock)));
3204 return;
3205 }
3206 if (dirinfo->dcache) {
3207 dcache_release(dirinfo->dcache);
3208 dirinfo->dcache = NULL;
3209 }
3210 if (dirinfo->p) {
3211 free((uint8_t*)(dirinfo->p));
3212 dirinfo->p = NULL;
3213 }
3214 if (needscopy) {
3215 dirinfo->p = malloc(dsize);
3216 if (dirinfo->p == NULL) {
3217 oplog_printf(&ctx,"readdirplus (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EINVAL));
3218 fuse_reply_err(req,EINVAL);
3219 zassert(pthread_mutex_unlock(&(dirinfo->lock)));
3220 return;
3221 }
3222 memcpy((uint8_t*)(dirinfo->p),dbuff,dsize);
3223 } else {
3224 dirinfo->p = dbuff;
3225 }
3226 dirinfo->size = dsize;
3227 if (usedircache && dirinfo->dataformat==1) {
3228 dirinfo->dcache = dcache_new(&ctx,ino,dirinfo->p,dirinfo->size,attrsize);
3229 }
3230 }
3231
3232 if (dirinfo->wasread<2) {
3233 dirinfo->wasread=1;
3234 }
3235 // assert(dirinfo->dataformat>0);
3236
3237 if (off>=(off_t)(dirinfo->size)) {
3238 oplog_printf(&ctx,"readdirplus (%lu,%llu,%llu) [handle:%08"PRIX32"]: OK (no data)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh));
3239 fuse_reply_buf(req, NULL, 0);
3240 } else {
3241 if (size>READDIR_BUFFSIZE) {
3242 size=READDIR_BUFFSIZE;
3243 }
3244 ptr = dirinfo->p+off;
3245 eptr = dirinfo->p+dirinfo->size;
3246 opos = 0;
3247 end = 0;
3248
3249 while (ptr<eptr && end==0) {
3250 nleng = ptr[0];
3251 ptr++;
3252 memcpy(name,ptr,nleng);
3253 name[nleng]=0;
3254 ptr+=nleng;
3255 off+=nleng+((dirinfo->dataformat)?(attrsize+5):6);
3256 if (ptr+5<=eptr) {
3257 inode = get32bit(&ptr);
3258 type = mfs_attr_get_type(ptr);
3259 if (type==TYPE_FILE) {
3260 maxfleng = write_data_inode_getmaxfleng(inode);
3261 } else {
3262 maxfleng = 0;
3263 }
3264
3265 memset(&e, 0, sizeof(e));
3266 e.ino = inode;
3267 e.generation = 1;
3268 mattr = mfs_attr_get_mattr(ptr);
3269 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
3270 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:((type==TYPE_DIRECTORY)?direntry_cache_timeout:entry_cache_timeout);
3271 #ifdef DENTRY_INVALIDATOR
3272 if (dinval && (mattr&MATTR_UNDELETABLE)==0 && type==TYPE_DIRECTORY) {
3273 dinval_add(ino,nleng,(const uint8_t *)name,inode);
3274 }
3275 #endif
3276 mfs_attr_to_stat(inode,ptr,&e.attr);
3277 if (maxfleng>(uint64_t)(e.attr.st_size)) {
3278 e.attr.st_size=maxfleng;
3279 // mfs_attr_set_fleng(ptr,maxfleng);
3280 }
3281 if (type==TYPE_FILE) {
3282 read_inode_set_length_passive(inode,e.attr.st_size);
3283 finfo_change_fleng(inode,e.attr.st_size);
3284 }
3285 fs_fix_amtime(inode,&(e.attr.st_atime),&(e.attr.st_mtime));
3286 ptr+=attrsize;
3287 oleng = fuse_add_direntry_plus(req, buffer + opos, size - opos, name, &e, off);
3288 if (opos+oleng>size) {
3289 end=1;
3290 } else {
3291 opos+=oleng;
3292 }
3293 }
3294 }
3295
3296 oplog_printf(&ctx,"readdirplus (%lu,%llu,%llu) [handle:%08"PRIX32"]: OK (%lu)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),(unsigned long int)opos);
3297 fuse_reply_buf(req,buffer,opos);
3298 }
3299 zassert(pthread_mutex_unlock(&(dirinfo->lock)));
3300 }
3301 #endif
3302
3303 void mfs_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) {
3304 (void)ino;
3305 dirbuf *dirinfo;
3306 struct fuse_ctx ctx;
3307
3308 ctx = *(fuse_req_ctx(req));
3309 mfs_stats_inc(OP_RELEASEDIR);
3310 if (debug_mode) {
3311 if (fi!=NULL) {
3312 oplog_printf(&ctx,"releasedir (%lu) [handle:%08"PRIX32"] ...",(unsigned long int)ino,(uint32_t)(fi->fh));
3313 } else {
3314 oplog_printf(&ctx,"releasedir (%lu) ...",(unsigned long int)ino);
3315 }
3316 fprintf(stderr,"releasedir (%lu)\n",(unsigned long int)ino);
3317 }
3318 if (fi==NULL) {
3319 oplog_printf(&ctx,"releasedir (%lu): %s",(unsigned long int)ino,strerr(EBADF));
3320 fuse_reply_err(req,EBADF);
3321 return;
3322 }
3323 dirinfo = dirbuf_get(fi->fh);
3324 if (dirinfo==NULL) {
3325 oplog_printf(&ctx,"releasedir (%lu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(uint32_t)(fi->fh),strerr(EBADF));
3326 fuse_reply_err(req,EBADF);
3327 return;
3328 }
3329 zassert(pthread_mutex_lock(&(dirinfo->lock)));
3330 if (dirinfo->dcache) {
3331 dcache_release(dirinfo->dcache);
3332 }
3333 if (dirinfo->p) {
3334 free((uint8_t*)(dirinfo->p));
3335 dirinfo->p = NULL;
3336 }
3337 zassert(pthread_mutex_unlock(&(dirinfo->lock)));
3338 dirbuf_release(fi->fh);
3339 fi->fh = 0;
3340 oplog_printf(&ctx,"releasedir (%lu) [handle:%08"PRIX32"]: OK",(unsigned long int)ino,(uint32_t)(fi->fh));
3341 fuse_reply_err(req,0);
3342 }
3343
3344 static uint32_t mfs_newfileinfo(uint8_t accmode,uint32_t inode,uint64_t fleng,uint8_t open_in_master,uint8_t appendonly) {
3345 finfo *fileinfo;
3346 uint32_t findex;
3347 double now;
3348
3349 now = monotonic_seconds();
3350 findex = finfo_new(inode);
3351 fileinfo = finfo_get(findex);
3352 passert(fileinfo);
3353 pthread_mutex_lock(&(fileinfo->lock)); // make helgrind happy
3354 fileinfo->flengptr = inoleng_acquire(inode);
3355 inoleng_setfleng(fileinfo->flengptr,fleng);
3356 fileinfo->inode = inode;
3357 #ifdef HAVE___SYNC_OP_AND_FETCH
3358 __sync_and_and_fetch(&(fileinfo->uselocks),0);
3359 #else
3360 fileinfo->uselocks = 0;
3361 #endif
3362 fileinfo->posix_lo_head = NULL;
3363 fileinfo->flock_lo_head = NULL;
3364 if (accmode == O_RDONLY) {
3365 fileinfo->mode = IO_RO;
3366 } else { // with writeback cache enabled even in O_WRONLY accmode reading is allowed - hence only IO_RO and IO_RW !!!
3367 if (appendonly) {
3368 fileinfo->mode = IO_RA;
3369 } else {
3370 fileinfo->mode = IO_RW;
3371 }
3372 }
3373 fileinfo->rdata = NULL;
3374 fileinfo->wdata = NULL;
3375 fileinfo->create = now;
3376 #ifdef FREEBSD_DELAYED_RELEASE
3377 fileinfo->ops_in_progress = 0;
3378 fileinfo->lastuse = now;
3379 // fileinfo->next = NULL;
3380 #endif
3381 fileinfo->open_waiting = 0;
3382 fileinfo->open_in_master = open_in_master;
3383 fileinfo->open_status = 0;
3384 pthread_mutex_unlock(&(fileinfo->lock)); // make helgrind happy
3385 return findex;
3386 }
3387
3388 static void mfs_removefileinfo(uint32_t findex) {
3389 finfo *fileinfo = finfo_get(findex);
3390 if (fileinfo!=NULL) {
3391 zassert(pthread_mutex_lock(&(fileinfo->lock)));
3392 #ifdef FREEBSD_DELAYED_RELEASE
3393 fileinfo->lastuse = monotonic_seconds();
3394 #else
3395 finfo_free_resources(fileinfo);
3396 #endif
3397 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
3398 finfo_release(findex);
3399 }
3400 }
3401
3402 void mfs_make_oflags_string(char *buf,uint32_t size,uint32_t flags) {
3403 uint32_t leng;
3404 #define add_flag(f) if (flags&f) { \
3405 if (leng<size) { \
3406 leng += snprintf(buf+leng,size-leng,"|" #f); \
3407 } \
3408 }
3409 if ((flags&O_ACCMODE)==O_RDWR) {
3410 leng = snprintf(buf,size,"O_RDWR");
3411 } else if ((flags&O_ACCMODE)==O_RDONLY) {
3412 leng = snprintf(buf,size,"O_RDONLY");
3413 } else if ((flags&O_ACCMODE)==O_WRONLY) {
3414 leng = snprintf(buf,size,"O_WRONLY");
3415 } else {
3416 leng = snprintf(buf,size,"O_NONE");
3417 }
3418 #ifdef O_NONBLOCK
3419 add_flag(O_NONBLOCK);
3420 #endif
3421 #ifdef O_APPEND
3422 add_flag(O_APPEND);
3423 #endif
3424 #ifdef O_CREAT
3425 add_flag(O_CREAT);
3426 #endif
3427 #ifdef O_TRUNC
3428 add_flag(O_TRUNC);
3429 #endif
3430 #ifdef O_EXCL
3431 add_flag(O_EXCL);
3432 #endif
3433 #ifdef O_SHLOCK
3434 add_flag(O_SHLOCK);
3435 #endif
3436 #ifdef O_EXLOCK
3437 add_flag(O_EXLOCK);
3438 #endif
3439 #ifdef O_NOFOLLOW
3440 add_flag(O_NOFOLLOW);
3441 #endif
3442 #ifdef O_SYMLINK
3443 add_flag(O_SYMLINK);
3444 #endif
3445 #ifdef O_EVTONLY
3446 add_flag(O_EVTONLY);
3447 #endif
3448 #ifdef O_CLOEXEC
3449 add_flag(O_CLOEXEC);
3450 #endif
3451 #ifdef O_ASYNC
3452 add_flag(O_ASYNC);
3453 #endif
3454 #ifdef O_DIRECT
3455 add_flag(O_DIRECT);
3456 #endif
3457 #ifdef O_DIRECTORY
3458 add_flag(O_DIRECTORY);
3459 #endif
3460 #ifdef O_DSYNC
3461 add_flag(O_DSYNC);
3462 #else
3463 # ifdef O_SYNC
3464 add_flag(O_SYNC);
3465 # endif
3466 #endif
3467 #ifdef O_LARGEFILE
3468 add_flag(O_LARGEFILE);
3469 #endif
3470 #ifdef O_NOATIME
3471 add_flag(O_NOATIME);
3472 #endif
3473 #ifdef O_NOCTTY
3474 add_flag(O_NOCTTY);
3475 #endif
3476 #ifdef O_NDELAY
3477 # ifdef O_NONBLOCK
3478 # if O_NDELAY != O_NONBLOCK
3479 add_flag(O_NDELAY);
3480 # endif
3481 # else
3482 add_flag(O_NDELAY);
3483 # endif
3484 #endif
3485 #ifdef O_PATH
3486 add_flag(O_PATH);
3487 #endif
3488 #ifdef O_TMPFILE
3489 add_flag(O_SYNC);
3490 #endif
3491 if (leng<size) {
3492 buf[leng]='\0';
3493 } else {
3494 buf[size-1]='\0';
3495 }
3496 }
3497
3498 void mfs_create(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, struct fuse_file_info *fi) {
3499 struct fuse_entry_param e;
3500 uint32_t inode;
3501 uint8_t attr[ATTR_RECORD_SIZE];
3502 char modestr[11];
3503 char attrstr[256];
3504 uint8_t mattr;
3505 uint32_t nleng;
3506 uint16_t cumask;
3507 int status;
3508 struct fuse_ctx ctx;
3509 groups *gids;
3510 uint32_t findex;
3511 uint8_t oflags;
3512 char flagsstr[512];
3513
3514 ctx = *(fuse_req_ctx(req));
3515 mfs_make_oflags_string(flagsstr,512,fi->flags);
3516 mfs_makemodestr(modestr,mode);
3517 mfs_stats_inc(OP_CREATE);
3518 if (debug_mode) {
3519 #ifdef FUSE_CAP_DONT_MASK
3520 char umaskstr[11];
3521 mfs_makemodestr(umaskstr,ctx.umask);
3522 oplog_printf(&ctx,"create (%lu,%s,%s,-%s:0%04o/%s:0%04o)",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode,umaskstr+1,(unsigned int)(ctx.umask));
3523 fprintf(stderr,"create (%lu,%s,%s,-%s:0%04o/%s:0%04o)\n",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode,umaskstr+1,(unsigned int)(ctx.umask));
3524 #else
3525 oplog_printf(&ctx,"create (%lu,%s,%s,-%s:0%04o)",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode);
3526 fprintf(stderr,"create (%lu,%s,%s,-%s:0%04o)\n",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode);
3527 #endif
3528 }
3529 if (parent==FUSE_ROOT_ID) {
3530 if (IS_SPECIAL_NAME(name)) {
3531 oplog_printf(&ctx,"create (%lu,%s,%s,-%s:0%04o): %s",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode,strerr(EACCES));
3532 fuse_reply_err(req,EACCES);
3533 return;
3534 }
3535 }
3536 nleng = strlen(name);
3537 if (nleng>MFS_NAME_MAX) {
3538 oplog_printf(&ctx,"create (%lu,%s,%s,-%s:0%04o): %s",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode,strerr(ENAMETOOLONG));
3539 fuse_reply_err(req, ENAMETOOLONG);
3540 return;
3541 }
3542
3543 #ifdef FUSE_CAP_DONT_MASK
3544 cumask = ctx.umask;
3545 #else
3546 cumask = 0;
3547 #endif
3548 if (full_permissions) {
3549 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
3550 status = fs_create(parent,nleng,(const uint8_t*)name,mode&07777,cumask,ctx.uid,gids->gidcnt,gids->gidtab,&inode,attr,&oflags);
3551 groups_rel(gids);
3552 } else {
3553 uint32_t gidtmp = ctx.gid;
3554 status = fs_create(parent,nleng,(const uint8_t*)name,mode&07777,cumask,ctx.uid,1,&gidtmp,&inode,attr,&oflags);
3555 }
3556 if (status!=MFS_ERROR_ENOTSUP) {
3557 #if defined(__APPLE__)
3558 // due to bug in os x - create in deleted directory goes into infinite loop when it gets ENOENT, so we should change it to different error - we use EACCES
3559 if (status==MFS_ERROR_ENOENT && sstats_get(parent,attr,0)==MFS_STATUS_OK) {
3560 status=MFS_ERROR_EACCES;
3561 }
3562 #endif
3563 status = mfs_errorconv(status);
3564 if (status!=0) {
3565 oplog_printf(&ctx,"create (%lu,%s,%s,-%s:0%04o): %s",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode,strerr(status));
3566 fuse_reply_err(req, status);
3567 return;
3568 }
3569 negentry_cache_remove(parent,nleng,(const uint8_t*)name);
3570 if (no_xattrs==0 && xattr_cache_on) { // Linux asks for this xattr before every write, so after create we can safely assume that there is no such attribute, and set it in xattr cache (improve efficiency on small files)
3571 xattr_cache_set(inode,ctx.uid,ctx.gid,8+1+10,(const uint8_t*)"security.capability",NULL,0,MFS_ERROR_ENOATTR);
3572 xattr_cache_set(inode,ctx.uid,ctx.gid,8+1+3,(const uint8_t*)"security.ima",NULL,0,MFS_ERROR_ENOATTR);
3573 }
3574 // if (newdircache) {
3575 // dir_cache_link(parent,nleng,(const uint8_t*)name,inode,attr);
3576 // }
3577 } else {
3578 uint8_t flags;
3579 uint32_t gidtmp = ctx.gid;
3580 flags = OPEN_AFTER_CREATE;
3581 if ((fi->flags & O_ACCMODE) == O_RDONLY) {
3582 flags |= OPEN_READ;
3583 } else if ((fi->flags & O_ACCMODE) == O_WRONLY) {
3584 flags |= OPEN_WRITE;
3585 } else if ((fi->flags & O_ACCMODE) == O_RDWR) {
3586 flags |= OPEN_READ | OPEN_WRITE;
3587 } else {
3588 oplog_printf(&ctx,"create (%lu,%s,%s,-%s:0%04o): %s",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode,strerr(EINVAL));
3589 fuse_reply_err(req, EINVAL);
3590 return;
3591 }
3592 status = fs_mknod(parent,nleng,(const uint8_t*)name,TYPE_FILE,mode&07777,cumask,ctx.uid,1,&gidtmp,0,&inode,attr);
3593 status = mfs_errorconv(status);
3594 if (status!=0) {
3595 oplog_printf(&ctx,"create (%lu,%s,%s,-%s:0%04o) (mknod): %s",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode,strerr(status));
3596 fuse_reply_err(req, status);
3597 return;
3598 }
3599 negentry_cache_remove(parent,nleng,(const uint8_t*)name);
3600 // if (newdircache) {
3601 // dir_cache_link(parent,nleng,(const uint8_t*)name,inode,attr);
3602 // }
3603 status = fs_opencheck(inode,ctx.uid,1,&gidtmp,flags,attr,&oflags);
3604 status = mfs_errorconv(status);
3605 if (status!=0) {
3606 oplog_printf(&ctx,"create (%lu,%s,%s,-%s:0%04o) (open): %s",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode,strerr(status));
3607 fuse_reply_err(req, status);
3608 return;
3609 }
3610 }
3611
3612 mattr = mfs_attr_get_mattr(attr);
3613 if (oflags==0xFF) { // old masters compatibility
3614 oflags = 0;
3615 if (mattr&MATTR_DIRECTMODE) {
3616 oflags |= OPEN_DIRECTMODE;
3617 }
3618 if (mattr&MATTR_ALLOWDATACACHE) {
3619 oflags |= OPEN_KEEPCACHE;
3620 }
3621 }
3622 if (fi->flags & O_APPEND) {
3623 oflags |= OPEN_APPENDONLY;
3624 }
3625 findex = mfs_newfileinfo(fi->flags & O_ACCMODE,inode,0,1,(oflags&OPEN_APPENDONLY)?1:0);
3626 fi->fh = findex;
3627 if ((oflags&(OPEN_DIRECTMODE|OPEN_APPENDONLY)) || (mfs_disables&(DISABLE_READ|DISABLE_WRITE))) {
3628 fi->keep_cache = 0;
3629 fi->direct_io = 1;
3630 } else {
3631 if (keep_cache==1) {
3632 fi->keep_cache=1;
3633 } else if (keep_cache==2 || keep_cache>=3) {
3634 fi->keep_cache=0;
3635 } else {
3636 fi->keep_cache = (oflags&OPEN_KEEPCACHE)?1:0;
3637 }
3638 fi->direct_io = (keep_cache>=3)?1:0;
3639 }
3640 if (debug_mode) {
3641 fprintf(stderr,"create (%lu,%s) ok -> use %s io ; %s data cache ; can %s\n",(unsigned long int)inode,flagsstr,(fi->direct_io)?"direct":"cached",(fi->keep_cache)?"keep":"clear",(oflags&OPEN_APPENDONLY)?"append only":"write randomly");
3642 }
3643 // if (fi->keep_cache==0) {
3644 // chunksdatacache_clear_inode(inode,0);
3645 // }
3646 dcache_invalidate_attr(parent);
3647 memset(&e, 0, sizeof(e));
3648 e.ino = inode;
3649 e.generation = 1;
3650 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
3651 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
3652 mfs_attr_to_stat(inode,attr,&e.attr);
3653 mfs_makeattrstr(attrstr,256,&e.attr);
3654 oplog_printf(&ctx,"create (%lu,%s,%s,-%s:0%04o): OK (%.1lf,%lu,%.1lf,%s) (direct_io:%u,keep_cache:%u,append_mode:%u) [handle:%08"PRIX32"]",(unsigned long int)parent,name,flagsstr,modestr+1,(unsigned int)mode,e.entry_timeout,(unsigned long int)e.ino,e.attr_timeout,attrstr,(unsigned int)fi->direct_io,(unsigned int)fi->keep_cache,(oflags&OPEN_APPENDONLY)?1:0,findex);
3655 fs_inc_acnt(inode);
3656 if (fuse_reply_create(req, &e, fi) == -ENOENT) {
3657 fs_dec_acnt(inode);
3658 mfs_removefileinfo(findex);
3659 fi->fh = 0;
3660 }
3661 }
3662
3663 void mfs_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) {
3664 uint8_t flags,oflags,mmode,noatomictrunc;
3665 uint16_t lflags;
3666 void *fdrec;
3667 uint8_t attr[ATTR_RECORD_SIZE];
3668 uint8_t mattr;
3669 int status;
3670 struct fuse_ctx ctx;
3671 groups *gids;
3672 uint32_t findex;
3673 char flagsstr[512];
3674
3675 // extra fi->flags on Linux:
3676 // O_NONBLOCK (0x00000800 / 00004000)
3677 // O_DSYNC (0x00001000 / 00010000)
3678 // O_ASYNC (0x00002000 / 00020000)
3679 // O_LARGEFILE(0x00008000 / 00100000) - always set
3680 // O_NOATIME (0x00040000 / 01000000)
3681 // __O_SYNC (0x00100000 / 04000000)
3682
3683 ctx = *(fuse_req_ctx(req));
3684 mfs_make_oflags_string(flagsstr,512,fi->flags);
3685 mfs_stats_inc(OP_OPEN);
3686 if (debug_mode) {
3687 oplog_printf(&ctx,"open (%lu,%s) ...",(unsigned long int)ino,flagsstr);
3688 fprintf(stderr,"open (%lu,%s)\n",(unsigned long int)ino,flagsstr);
3689 }
3690 // if (ino==MASTER_INODE) {
3691 // minfo *masterinfo;
3692 // status = fs_direct_connect();
3693 // if (status<0) {
3694 // fuse_reply_err(req,EIO);
3695 // return;
3696 // }
3697 // masterinfo = malloc(sizeof(minfo));
3698 // if (masterinfo==NULL) {
3699 // fuse_reply_err(req,ENOMEM);
3700 // return;
3701 // }
3702 // masterinfo->sd = status;
3703 // masterinfo->sent = 0;
3704 // fi->direct_io = 1;
3705 // fi->fh = (unsigned long)masterinfo;
3706 // fuse_reply_open(req, fi);
3707 // return;
3708 // }
3709 if (ino==MASTERINFO_INODE) {
3710 if ((fi->flags & O_ACCMODE) != O_RDONLY) {
3711 oplog_printf(&ctx,"open (%lu,%s) (internal node: MASTERINFO): %s",(unsigned long int)ino,flagsstr,strerr(EACCES));
3712 fuse_reply_err(req,EACCES);
3713 return;
3714 }
3715 fi->fh = 0;
3716 fi->direct_io = 0;
3717 fi->keep_cache = 0;
3718 oplog_printf(&ctx,"open (%lu,%s) (internal node: MASTERINFO): OK (0,1)",(unsigned long int)ino,flagsstr);
3719 fuse_reply_open(req, fi);
3720 return;
3721 }
3722 if (ino==PARAMS_INODE) {
3723 if ((fi->flags & O_ACCMODE) != O_RDONLY) {
3724 oplog_printf(&ctx,"open (%lu,%s) (internal node: PARAMS): %s",(unsigned long int)ino,flagsstr,strerr(EACCES));
3725 fuse_reply_err(req,EACCES);
3726 return;
3727 }
3728 if (ctx.uid != 0) {
3729 oplog_printf(&ctx,"open (%lu,%s) (internal node: PARAMS): %s",(unsigned long int)ino,flagsstr,strerr(EPERM));
3730 fuse_reply_err(req,EPERM);
3731 return;
3732 }
3733 }
3734 if (ino==STATS_INODE || ino==PARAMS_INODE) {
3735 sinfo *statsinfo;
3736 uint32_t sindex;
3737 sindex = sinfo_new();
3738 statsinfo = sinfo_get(sindex);
3739 passert(statsinfo);
3740 pthread_mutex_lock(&(statsinfo->lock)); // make helgrind happy
3741 if (ino==STATS_INODE) {
3742 stats_show_all(&(statsinfo->buff),&(statsinfo->leng));
3743 } else { // ino==PARAMS_INODE
3744 statsinfo->buff = malloc(PARAMS_BUFFSIZE);
3745 statsinfo->leng = main_snprint_parameters(statsinfo->buff,PARAMS_BUFFSIZE);
3746 }
3747 statsinfo->reset = 0;
3748 pthread_mutex_unlock(&(statsinfo->lock)); // make helgrind happy
3749 fi->fh = sindex;
3750 fi->direct_io = 1;
3751 fi->keep_cache = 0;
3752 if (ino==STATS_INODE) {
3753 oplog_printf(&ctx,"open (%lu,%s) (internal node: STATS): OK (1,0)",(unsigned long int)ino,flagsstr);
3754 } else {
3755 oplog_printf(&ctx,"open (%lu,%s) (internal node: PARAMS): OK (1,0)",(unsigned long int)ino,flagsstr);
3756 }
3757 fuse_reply_open(req, fi);
3758 return;
3759 }
3760 if (ino==MOOSE_INODE || ino==RANDOM_INODE) {
3761 fi->fh = 0;
3762 fi->direct_io = 1;
3763 fi->keep_cache = 0;
3764 oplog_printf(&ctx,"open (%lu,%s) (internal node: %s): OK (1,0)",(unsigned long int)ino,flagsstr,(ino==MOOSE_INODE)?"MOOSE":"RANDOM");
3765 fuse_reply_open(req, fi);
3766 return;
3767 }
3768 if (ino==OPLOG_INODE || ino==OPHISTORY_INODE) {
3769 if ((fi->flags & O_ACCMODE) != O_RDONLY) {
3770 oplog_printf(&ctx,"open (%lu,%s) (internal node: %s): %s",(unsigned long int)ino,flagsstr,(ino==OPLOG_INODE)?"OPLOG":"OPHISTORY",strerr(EACCES));
3771 fuse_reply_err(req,EACCES);
3772 return;
3773 }
3774 if (ctx.uid != 0) {
3775 oplog_printf(&ctx,"open (%lu,%s) (internal node: %s): %s",(unsigned long int)ino,flagsstr,(ino==OPLOG_INODE)?"OPLOG":"OPHISTORY",strerr(EPERM));
3776 fuse_reply_err(req,EPERM);
3777 return;
3778 }
3779 fi->fh = oplog_newhandle((ino==OPHISTORY_INODE)?1:0);
3780 fi->direct_io = 1;
3781 fi->keep_cache = 0;
3782 oplog_printf(&ctx,"open (%lu,%s) (internal node: %s): OK (1,0)",(unsigned long int)ino,flagsstr,(ino==OPLOG_INODE)?"OPLOG":"OPHISTORY");
3783 fuse_reply_open(req, fi);
3784 return;
3785 }
3786 /*
3787 if (ino==ATTRCACHE_INODE) {
3788 fi->fh = 0;
3789 fi->direct_io = 1;
3790 fi->keep_cache = 0;
3791 fuse_reply_open(req, fi);
3792 return;
3793 }
3794 */
3795 if ((fi->flags & O_ACCMODE) == O_RDONLY) {
3796 flags = OPEN_READ;
3797 mmode = MODE_MASK_R;
3798 } else if ((fi->flags & O_ACCMODE) == O_WRONLY) {
3799 flags = OPEN_WRITE;
3800 mmode = MODE_MASK_W;
3801 } else if ((fi->flags & O_ACCMODE) == O_RDWR) {
3802 flags = OPEN_READ | OPEN_WRITE;
3803 mmode = MODE_MASK_R | MODE_MASK_W;
3804 } else {
3805 flags = 0;
3806 mmode = 0;
3807 }
3808 if (fi->flags & O_TRUNC) {
3809 uint32_t mver = master_version();
3810 noatomictrunc = (mver<VERSION2INT(3,0,113))?1:0;
3811 flags |= OPEN_TRUNCATE;
3812 fdrec = NULL; // with trunc flag we have to communicate with master, so can't use fdcache
3813 } else {
3814 fdrec = fdcache_acquire(&ctx,ino,attr,&lflags);
3815 noatomictrunc = 0;
3816 }
3817 if (fdrec) {
3818 if ((lflags & LOOKUP_RO_FILESYSTEM) && (mmode & MODE_MASK_W)) {
3819 status = MFS_ERROR_EROFS;
3820 } else if ((lflags & LOOKUP_IMMUTABLE) && (mmode & MODE_MASK_W)) {
3821 status = MFS_ERROR_EPERM;
3822 } else {
3823 status = (lflags & (1<<(mmode&0x7)))?MFS_STATUS_OK:MFS_ERROR_EACCES;
3824 }
3825 if (status==MFS_STATUS_OK) {
3826 fdcache_inject_chunkdata(fdrec);
3827 oflags = 0;
3828 if (lflags & LOOKUP_APPENDONLY) {
3829 oflags |= OPEN_APPENDONLY;
3830 }
3831 if (lflags & LOOKUP_DIRECTMODE) {
3832 oflags |= OPEN_DIRECTMODE;
3833 }
3834 if (lflags & LOOKUP_KEEPCACHE) {
3835 oflags |= OPEN_KEEPCACHE;
3836 }
3837 }
3838 fdcache_release(fdrec);
3839 } else {
3840 write_data_flush_inode(ino); // update file attributes in master !!!
3841 if (full_permissions) {
3842 gids = groups_get_x(ctx.pid,ctx.uid,ctx.gid,2); // allow group refresh again (see: getxattr for "com.apple.quarantine")
3843 status = fs_opencheck(ino,ctx.uid,gids->gidcnt,gids->gidtab,flags,attr,&oflags);
3844 if (status==MFS_STATUS_OK && (fi->flags & O_TRUNC) && noatomictrunc) {
3845 status = do_truncate(ino,TRUNCATE_FLAG_OPENED,ctx.uid,gids->gidcnt,gids->gidtab,0,attr,NULL);
3846 }
3847 groups_rel(gids);
3848 } else {
3849 uint32_t gidtmp = ctx.gid;
3850 status = fs_opencheck(ino,ctx.uid,1,&gidtmp,flags,attr,&oflags);
3851 if (status==MFS_STATUS_OK && (fi->flags & O_TRUNC) && noatomictrunc) {
3852 status = do_truncate(ino,TRUNCATE_FLAG_OPENED,ctx.uid,1,&gidtmp,0,attr,NULL);
3853 }
3854 }
3855 }
3856
3857 status = mfs_errorconv(status);
3858
3859 if (status!=0) {
3860 oplog_printf(&ctx,"open (%lu,%s)%s: %s",(unsigned long int)ino,flagsstr,(fdrec)?" (using cached data from lookup)":"",strerr(status));
3861 fuse_reply_err(req, status);
3862 return ;
3863 }
3864
3865 if (fi->flags & O_TRUNC) {
3866 chunksdatacache_clear_inode(ino,0);
3867 finfo_change_fleng(ino,0);
3868 write_data_inode_setmaxfleng(ino,0);
3869 read_inode_set_length_active(ino,0);
3870 dcache_setattr(ino,attr);
3871 fdcache_invalidate(ino);
3872 }
3873
3874 mattr = mfs_attr_get_mattr(attr);
3875 if (oflags==0xFF) { // old masters compatibility
3876 oflags = 0;
3877 if (mattr&MATTR_DIRECTMODE) {
3878 oflags |= OPEN_DIRECTMODE;
3879 }
3880 if (mattr&MATTR_ALLOWDATACACHE) {
3881 oflags |= OPEN_KEEPCACHE;
3882 }
3883 }
3884 if (fi->flags & O_APPEND) {
3885 oflags |= OPEN_APPENDONLY;
3886 }
3887 findex = mfs_newfileinfo(fi->flags & O_ACCMODE,ino,mfs_attr_get_fleng(attr),(fdrec)?0:1,(oflags&OPEN_APPENDONLY)?1:0);
3888 fi->fh = findex;
3889 if ((oflags&(OPEN_DIRECTMODE|OPEN_APPENDONLY)) || (mfs_disables&(DISABLE_READ|DISABLE_WRITE))) {
3890 fi->keep_cache = 0;
3891 fi->direct_io = 1;
3892 } else {
3893 if (keep_cache==1) {
3894 fi->keep_cache=1;
3895 } else if (keep_cache==2 || keep_cache>=3) {
3896 fi->keep_cache=0;
3897 } else {
3898 fi->keep_cache = (oflags&OPEN_KEEPCACHE)?1:0;
3899 }
3900 fi->direct_io = (keep_cache>=3)?1:0;
3901 }
3902 if (debug_mode) {
3903 fprintf(stderr,"open (%lu,%s) ok -> use %s io ; %s data cache ; can %s\n",(unsigned long int)ino,flagsstr,(fi->direct_io)?"direct":"cached",(fi->keep_cache)?"keep":"clear",(oflags&OPEN_APPENDONLY)?"append only":"write randomly");
3904 }
3905 // if (fi->keep_cache==0) {
3906 // chunksdatacache_clear_inode(ino,0);
3907 // }
3908 oplog_printf(&ctx,"open (%lu,%s)%s: OK (direct_io:%u,keep_cache:%u,append_mode:%u) [handle:%08"PRIX32"]",(unsigned long int)ino,flagsstr,(fdrec)?" (using cached data from lookup)":"",(unsigned int)fi->direct_io,(unsigned int)fi->keep_cache,(oflags&OPEN_APPENDONLY)?1:0,findex);
3909 fs_inc_acnt(ino);
3910 if (fuse_reply_open(req, fi) == -ENOENT) {
3911 mfs_removefileinfo(findex);
3912 fs_dec_acnt(ino);
3913 fi->fh = 0;
3914 } else if (fdrec) {
3915 finfo *fileinfo;
3916 uint32_t gidtmp = ctx.gid;
3917 if (fi->keep_cache==0) {
3918 flags |= OPEN_CACHE_CLEARED;
3919 }
3920 status = fs_opencheck(ino,ctx.uid,1,&gidtmp,flags|OPEN_AFTER_CREATE,NULL,NULL); // just send "opencheck" to make sure that master knows that this file is open, AFTER_CREATE means 'ignore permissions' here
3921 if (status!=MFS_STATUS_OK) {
3922 status = mfs_errorconv(status);
3923 oplog_printf(&ctx,"open (%lu,%s) (do actual open): %s",(unsigned long int)ino,flagsstr,strerr(status));
3924 }
3925 fileinfo = finfo_get(fi->fh);
3926 if (fileinfo!=NULL) {
3927 zassert(pthread_mutex_lock(&(fileinfo->lock)));
3928 fileinfo->open_status = status;
3929 fileinfo->open_in_master = 1;
3930 if (fileinfo->open_waiting) {
3931 zassert(pthread_cond_broadcast(&(fileinfo->opencond)));
3932 }
3933 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
3934 }
3935 }
3936 }
3937
3938 void mfs_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) {
3939 struct fuse_ctx ctx;
3940 finfo *fileinfo;
3941
3942 ctx = *(fuse_req_ctx(req));
3943 mfs_stats_inc(OP_RELEASE);
3944 if (debug_mode) {
3945 if (fi!=NULL) {
3946 oplog_printf(&ctx,"release (%lu) [handle:%08"PRIX32"] ...",(unsigned long int)ino,(uint32_t)(fi->fh));
3947 } else {
3948 oplog_printf(&ctx,"release (%lu) ...",(unsigned long int)ino);
3949 }
3950 fprintf(stderr,"release (%lu)\n",(unsigned long int)ino);
3951 }
3952 if (fi==NULL) {
3953 oplog_printf(&ctx,"release (%lu): %s",(unsigned long int)ino,strerr(EBADF));
3954 fuse_reply_err(req,EBADF);
3955 return;
3956 }
3957 // if (ino==MASTER_INODE) {
3958 // minfo *masterinfo = (minfo*)(unsigned long)(fi->fh);
3959 // if (masterinfo!=NULL) {
3960 // fs_direct_close(masterinfo->sd);
3961 // free(masterinfo);
3962 // }
3963 // fuse_reply_err(req,0);
3964 // return;
3965 // }
3966 if (ino==MASTERINFO_INODE/* || ino==ATTRCACHE_INODE*/) {
3967 oplog_printf(&ctx,"release (%lu) (internal node: MASTERINFO): OK",(unsigned long int)ino);
3968 fuse_reply_err(req,0);
3969 return;
3970 }
3971 if (ino==STATS_INODE || ino==PARAMS_INODE) {
3972 sinfo *statsinfo = sinfo_get(fi->fh);
3973 if (statsinfo!=NULL) {
3974 pthread_mutex_lock(&(statsinfo->lock)); // make helgrind happy
3975 if (statsinfo->buff!=NULL) {
3976 free(statsinfo->buff);
3977 statsinfo->buff = NULL;
3978 }
3979 if (statsinfo->reset) {
3980 stats_reset_all();
3981 }
3982 pthread_mutex_unlock(&(statsinfo->lock)); // make helgrind happy
3983 sinfo_release(fi->fh);
3984 }
3985 oplog_printf(&ctx,"release (%lu) (internal node: STATS): OK",(unsigned long int)ino);
3986 fuse_reply_err(req,0);
3987 return;
3988 }
3989 if (ino==MOOSE_INODE || ino==RANDOM_INODE) {
3990 oplog_printf(&ctx,"release (%lu) (internal node: %s): OK",(unsigned long int)ino,(ino==MOOSE_INODE)?"MOOSE":"RANDOM");
3991 fuse_reply_err(req,0);
3992 return;
3993 }
3994 if (ino==OPLOG_INODE || ino==OPHISTORY_INODE) {
3995 oplog_releasehandle(fi->fh);
3996 oplog_printf(&ctx,"release (%lu) (internal node: %s): OK",(unsigned long int)ino,(ino==OPLOG_INODE)?"OPLOG":"OPHISTORY");
3997 fuse_reply_err(req,0);
3998 return;
3999 }
4000 if (fi->fh>0) {
4001 fileinfo = finfo_get(fi->fh);
4002 } else {
4003 fileinfo = NULL;
4004 }
4005 if (fileinfo!=NULL) {
4006 uint8_t uselocks;
4007 uint64_t *lock_owner_tab;
4008 uint32_t lock_owner_posix_cnt;
4009 uint32_t lock_owner_flock_cnt;
4010 uint32_t indx;
4011
4012 inoleng_io_wait(fileinfo->flengptr);
4013 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4014 // rwlock_wait_for_unlock:
4015 // while (fileinfo->writing | fileinfo->writers_cnt | fileinfo->readers_cnt) {
4016 // zassert(pthread_cond_wait(&(fileinfo->rwcond),&(fileinfo->lock)));
4017 // }
4018 #ifdef HAVE___SYNC_OP_AND_FETCH
4019 uselocks = __sync_or_and_fetch(&(fileinfo->uselocks),0);
4020 #else
4021 uselocks = fileinfo->uselocks;
4022 #endif
4023
4024 // any locks left?
4025 lock_owner_tab = NULL;
4026 lock_owner_posix_cnt = 0;
4027 lock_owner_flock_cnt = 0;
4028 if (fileinfo->posix_lo_head!=NULL || fileinfo->flock_lo_head!=NULL) {
4029 finfo_lock_owner *flo,**flop;
4030
4031 for (flo=fileinfo->posix_lo_head ; flo!=NULL ; flo=flo->next) {
4032 lock_owner_posix_cnt++;
4033 }
4034 for (flo=fileinfo->flock_lo_head ; flo!=NULL ; flo=flo->next) {
4035 if (flo->lock_owner!=fi->lock_owner) {
4036 lock_owner_flock_cnt++;
4037 }
4038 }
4039 if (lock_owner_posix_cnt+lock_owner_flock_cnt>0) {
4040 lock_owner_tab = malloc(sizeof(uint64_t)*(lock_owner_posix_cnt+lock_owner_flock_cnt));
4041 passert(lock_owner_tab);
4042 }
4043
4044 indx = 0;
4045 flop = &(fileinfo->posix_lo_head);
4046 while ((flo=*flop)!=NULL) {
4047 if (indx<lock_owner_posix_cnt) {
4048 lock_owner_tab[indx] = flo->lock_owner;
4049 }
4050 indx++;
4051 *flop = flo->next;
4052 free(flo);
4053 }
4054 massert(indx==lock_owner_posix_cnt,"loop mismatch");
4055 massert(fileinfo->posix_lo_head==NULL,"list not freed");
4056
4057 indx = 0;
4058 flop = &(fileinfo->flock_lo_head);
4059 while ((flo=*flop)!=NULL) {
4060 if (flo->lock_owner!=fi->lock_owner) {
4061 if (indx<lock_owner_flock_cnt) {
4062 lock_owner_tab[lock_owner_posix_cnt+indx] = flo->lock_owner;
4063 }
4064 indx++;
4065 }
4066 *flop = flo->next;
4067 free(flo);
4068 }
4069 massert(indx==lock_owner_flock_cnt,"loop mismatch");
4070 massert(fileinfo->flock_lo_head==NULL,"list not freed");
4071 }
4072
4073 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4074
4075 for (indx=0 ; indx<lock_owner_posix_cnt ; indx++) {
4076 int status;
4077 status = fs_posixlock(ino,0,lock_owner_tab[indx],POSIX_LOCK_CMD_SET,POSIX_LOCK_UNLCK,0,UINT64_MAX,0,NULL,NULL,NULL,NULL);
4078 status = mfs_errorconv(status);
4079 if (status!=0) {
4080 oplog_printf(&ctx,"release (%lu) - releasing all POSIX-type locks for %016"PRIX64" (left by kernel): %s",(unsigned long int)ino,lock_owner_tab[indx],strerr(status));
4081 } else {
4082 oplog_printf(&ctx,"release (%lu) - releasing all POSIX-type locks for %016"PRIX64" (left by kernel): OK",(unsigned long int)ino,lock_owner_tab[indx]);
4083 }
4084 }
4085
4086 if (uselocks&1) {
4087 int status;
4088 status = fs_flock(ino,0,fi->lock_owner,FLOCK_RELEASE);
4089 status = mfs_errorconv(status);
4090 if (status!=0) {
4091 oplog_printf(&ctx,"release (%lu) - releasing all FLOCK-type locks for %016"PRIX64" (received from kernel): %s",(unsigned long int)ino,(uint64_t)(fi->lock_owner),strerr(status));
4092 } else {
4093 oplog_printf(&ctx,"release (%lu) - releasing all FLOCK-type locks for %016"PRIX64" (received from kernel): OK",(unsigned long int)ino,(uint64_t)(fi->lock_owner));
4094 }
4095 }
4096
4097 for (indx=0 ; indx<lock_owner_flock_cnt ; indx++) {
4098 int status;
4099 status = fs_flock(ino,0,lock_owner_tab[lock_owner_posix_cnt+indx],FLOCK_RELEASE);
4100 status = mfs_errorconv(status);
4101 if (status!=0) {
4102 oplog_printf(&ctx,"release (%lu) - releasing all FLOCK-type locks for %016"PRIX64" (left by kernel): %s",(unsigned long int)ino,lock_owner_tab[indx],strerr(status));
4103 } else {
4104 oplog_printf(&ctx,"release (%lu) - releasing all FLOCK-type locks for %016"PRIX64" (left by kernel): OK",(unsigned long int)ino,lock_owner_tab[indx]);
4105 }
4106 }
4107
4108 if (lock_owner_tab!=NULL) {
4109 free(lock_owner_tab);
4110 }
4111
4112 }
4113 dcache_invalidate_attr(ino);
4114 if (fileinfo!=NULL) {
4115 oplog_printf(&ctx,"release (%lu) [handle:%08"PRIX32",uselocks:%u,lock_owner:%016"PRIX64"]: OK",(unsigned long int)ino,(uint32_t)(fi->fh),fileinfo->uselocks,(uint64_t)(fi->lock_owner));
4116 } else {
4117 oplog_printf(&ctx,"release (%lu) [handle:%08"PRIX32",lock_owner:%016"PRIX64"]: OK",(unsigned long int)ino,(uint32_t)(fi->fh),(uint64_t)(fi->lock_owner));
4118 }
4119 fuse_reply_err(req,0);
4120 if (fi->fh>0) {
4121 if (fileinfo!=NULL) {
4122 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4123 while (fileinfo->open_in_master==0) {
4124 fileinfo->open_waiting++;
4125 zassert(pthread_cond_wait(&(fileinfo->opencond),&(fileinfo->lock)));
4126 fileinfo->open_waiting--;
4127 }
4128 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4129 }
4130 mfs_removefileinfo(fi->fh); // after writes it waits for data sync, so do it after everything
4131 }
4132 fs_dec_acnt(ino);
4133 }
4134
4135 void mfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi) {
4136 finfo *fileinfo;
4137 uint8_t *buff;
4138 uint32_t ssize;
4139 struct iovec *iov;
4140 uint32_t iovcnt;
4141 void *buffptr;
4142 int err;
4143 uint8_t oim,oerr;
4144 struct fuse_ctx ctx;
4145
4146 ctx = *(fuse_req_ctx(req));
4147 mfs_stats_inc(OP_READ);
4148 if (debug_mode) {
4149 if (ino!=OPLOG_INODE && ino!=OPHISTORY_INODE) {
4150 if (fi!=NULL) {
4151 oplog_printf(&ctx,"read (%lu,%llu,%llu) [handle:%08"PRIX32"] ...",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh));
4152 } else {
4153 oplog_printf(&ctx,"read (%lu,%llu,%llu) ...",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
4154 }
4155 }
4156 fprintf(stderr,"read from inode %lu up to %llu bytes from position %llu\n",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
4157 }
4158 if (ino==MASTERINFO_INODE) {
4159 uint8_t masterinfo[14];
4160 fs_getmasterlocation(masterinfo);
4161 masterproxy_getlocation(masterinfo);
4162 #ifdef MASTERINFO_WITH_VERSION
4163 if (off>=14) {
4164 oplog_printf(&ctx,"read (%lu,%llu,%llu): OK (no data)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
4165 fuse_reply_buf(req,NULL,0);
4166 } else if (off+size>14) {
4167 oplog_printf(&ctx,"read (%lu,%llu,%llu): OK (%lu)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(unsigned long int)(14-off));
4168 fuse_reply_buf(req,(char*)(masterinfo+off),14-off);
4169 #else
4170 if (off>=10) {
4171 oplog_printf(&ctx,"read (%lu,%llu,%llu): OK (no data)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
4172 fuse_reply_buf(req,NULL,0);
4173 } else if (off+size>10) {
4174 oplog_printf(&ctx,"read (%lu,%llu,%llu): OK (%lu)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(unsigned long int)(10-off));
4175 fuse_reply_buf(req,(char*)(masterinfo+off),10-off);
4176 #endif
4177 } else {
4178 oplog_printf(&ctx,"read (%lu,%llu,%llu): OK (%lu)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(unsigned long int)size);
4179 fuse_reply_buf(req,(char*)(masterinfo+off),size);
4180 }
4181 return;
4182 }
4183 if (ino==STATS_INODE || ino==PARAMS_INODE) {
4184 sinfo *statsinfo = (fi!=NULL)?sinfo_get(fi->fh):NULL;
4185 if (statsinfo!=NULL) {
4186 pthread_mutex_lock(&(statsinfo->lock)); // make helgrind happy
4187 if (off>=statsinfo->leng) {
4188 oplog_printf(&ctx,"read (%lu,%llu,%llu): OK (no data)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
4189 fuse_reply_buf(req,NULL,0);
4190 } else if ((uint64_t)(off+size)>(uint64_t)(statsinfo->leng)) {
4191 oplog_printf(&ctx,"read (%lu,%llu,%llu): OK (%lu)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(unsigned long int)(statsinfo->leng-off));
4192 fuse_reply_buf(req,statsinfo->buff+off,statsinfo->leng-off);
4193 } else {
4194 oplog_printf(&ctx,"read (%lu,%llu,%llu): OK (%lu)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(unsigned long int)size);
4195 fuse_reply_buf(req,statsinfo->buff+off,size);
4196 }
4197 pthread_mutex_unlock(&(statsinfo->lock)); // make helgrind happy
4198 } else {
4199 oplog_printf(&ctx,"read (%lu,%llu,%llu): OK (no data)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
4200 fuse_reply_buf(req,NULL,0);
4201 }
4202 return;
4203 }
4204 if (ino==RANDOM_INODE) {
4205 uint8_t *rbptr;
4206 uint32_t nextr;
4207 // if (size>RANDOM_BUFFSIZE) {
4208 // size = RANDOM_BUFFSIZE;
4209 // }
4210 buff = malloc(size);
4211 ssize = size;
4212 rbptr = buff;
4213 pthread_mutex_lock(&randomlock);
4214 while (ssize>=4) {
4215 nextr = KISS;
4216 *rbptr++ = nextr>>24;
4217 *rbptr++ = nextr>>16;
4218 *rbptr++ = nextr>>8;
4219 *rbptr++ = nextr;
4220 ssize-=4;
4221 }
4222 if (ssize>0) {
4223 nextr = KISS;
4224 while (ssize>0) {
4225 *rbptr++ = nextr>>24;
4226 nextr <<= 8;
4227 ssize--;
4228 }
4229 }
4230 pthread_mutex_unlock(&randomlock);
4231 fuse_reply_buf(req,(char*)buff,size);
4232 free(buff);
4233 return;
4234 }
4235 if (ino==MOOSE_INODE) {
4236 static char mooseascii[175] = {
4237 0x20, 0x5C, 0x5F, 0x5C, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
4238 0x2F, 0x5F, 0x2F, 0x0A, 0x20, 0x20, 0x20, 0x20, 0x5C, 0x5F, 0x5C, 0x5F, 0x20, 0x20, 0x20, 0x20,
4239 0x5F, 0x2F, 0x5F, 0x2F, 0x0A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5C, 0x2D, 0x2D,
4240 0x2F, 0x0A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2F, 0x40, 0x40, 0x5C, 0x5F, 0x2D,
4241 0x2D, 0x5F, 0x5F, 0x5F, 0x5F, 0x0A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x28, 0x5F, 0x5F,
4242 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x29, 0x0A, 0x20, 0x20, 0x20, 0x20, 0x20,
4243 0x20, 0x20, 0x20, 0x60, 0x60, 0x5C, 0x20, 0x20, 0x20, 0x20, 0x5F, 0x5F, 0x20, 0x20, 0x7C, 0x0A,
4244 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7C, 0x7C, 0x2D, 0x27, 0x20,
4245 0x20, 0x60, 0x7C, 0x7C, 0x0A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
4246 0x7C, 0x7C, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7C, 0x7C, 0x0A, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
4247 0x20, 0x20, 0x20, 0x20, 0x20, 0x22, 0x22, 0x20, 0x20, 0x20, 0x20, 0x20, 0x22, 0x22, 0x0A};
4248 uint32_t t = monotonic_useconds()%5000000;
4249 if (t<150000 || (t>=600000 && t<750000)) {
4250 mooseascii[59]='O';
4251 mooseascii[60]='O';
4252 } else if ((t>=150000 && t<300000) || (t>=450000 && t<600000)) {
4253 mooseascii[59]='O';
4254 mooseascii[60]='o';
4255 } else if (t>=300000 && t<450000) {
4256 mooseascii[59]='O';
4257 mooseascii[60]='-';
4258 } else {
4259 mooseascii[59]='O';
4260 mooseascii[60]='O';
4261 }
4262 if (off>=175) {
4263 fuse_reply_buf(req,NULL,0);
4264 } else if ((uint64_t)(off+size)>175) {
4265 fuse_reply_buf(req,mooseascii+off,175-off);
4266 } else {
4267 fuse_reply_buf(req,mooseascii+off,size);
4268 }
4269 return;
4270 }
4271 if (ino==OPLOG_INODE || ino==OPHISTORY_INODE) {
4272 oplog_getdata(fi->fh,&buff,&ssize,size);
4273 fuse_reply_buf(req,(char*)buff,ssize);
4274 oplog_releasedata(fi->fh);
4275 return;
4276 }
4277 /*
4278 if (ino==ATTRCACHE_INODE) {
4279 uint8_t info[2];
4280 info[0]=dir_cache_ison()+'0';
4281 if (info[0]!='0' && info[0]!='1') {
4282 info[0]='X';
4283 }
4284 info[1]='\n';
4285 if (off>2) {
4286 fuse_reply_buf(req,NULL,0);
4287 } else if (off+size>2) {
4288 fuse_reply_buf(req,(char*)(info+off),2-off);
4289 } else {
4290 fuse_reply_buf(req,(char*)(info+off),size);
4291 }
4292 return;
4293 }
4294 */
4295 if (mfs_disables & DISABLE_READ) {
4296 oplog_printf(&ctx,"read (%lu,%llu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,strerr(EPERM));
4297 fuse_reply_err(req,EPERM);
4298 return;
4299 }
4300 if (fi==NULL) {
4301 oplog_printf(&ctx,"read (%lu,%llu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,strerr(EBADF));
4302 fuse_reply_err(req,EBADF);
4303 return;
4304 }
4305 fileinfo = finfo_get(fi->fh);
4306 if (fi->fh==0 || fileinfo==NULL) {
4307 oplog_printf(&ctx,"read (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EBADF));
4308 fuse_reply_err(req,EBADF);
4309 return;
4310 }
4311 if (fileinfo->inode!=ino) {
4312 oplog_printf(&ctx,"read (%lu!=%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)(fileinfo->inode),(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EBADF));
4313 fuse_reply_err(req,EBADF);
4314 return;
4315 }
4316 // if (ino==MASTER_INODE) {
4317 // minfo *masterinfo = (minfo*)(unsigned long)(fi->fh);
4318 // if (masterinfo->sent) {
4319 // int rsize;
4320 // buff = malloc(size);
4321 // rsize = fs_direct_read(masterinfo->sd,buff,size);
4322 // fuse_reply_buf(req,(char*)buff,rsize);
4323 // //syslog(LOG_WARNING,"master received: %d/%llu",rsize,(unsigned long long int)size);
4324 // free(buff);
4325 // } else {
4326 // syslog(LOG_WARNING,"master: read before write");
4327 // fuse_reply_buf(req,NULL,0);
4328 // }
4329 // return;
4330 // }
4331 if (off>=MAX_FILE_SIZE || off+size>=MAX_FILE_SIZE) {
4332 oplog_printf(&ctx,"read (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EFBIG));
4333 fuse_reply_err(req,EFBIG);
4334 return;
4335 }
4336 inoleng_read_start(fileinfo->flengptr);
4337 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4338 // rwlock_rdlock begin
4339 // while (fileinfo->writing | fileinfo->writers_cnt) {
4340 // zassert(pthread_cond_wait(&(fileinfo->rwcond),&(fileinfo->lock)));
4341 // }
4342 // fileinfo->readers_cnt++;
4343 // rwlock_rdlock_end
4344 #ifdef FREEBSD_DELAYED_RELEASE
4345 fileinfo->ops_in_progress++;
4346 #endif
4347 // if (fileinfo->mode==IO_WRITE) {
4348 // err = write_data_flush(fileinfo->wdata);
4349 // if (err!=0) {
4350 //#ifdef FREEBSD_DELAYED_RELEASE
4351 // fileinfo->ops_in_progress--;
4352 // fileinfo->lastuse = monotonic_seconds();
4353 //#endif
4354 // zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4355 // if (debug_mode) {
4356 // fprintf(stderr,"IO error occurred while writing inode %lu\n",(unsigned long int)ino);
4357 // }
4358 // oplog_printf(&ctx,"read (%lu,%llu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,strerr(err));
4359 // fuse_reply_err(req,err);
4360 // return;
4361 // }
4362 // }
4363 if (fileinfo->rdata == NULL) {
4364 fileinfo->rdata = read_data_new(ino,inoleng_getfleng(fileinfo->flengptr));
4365 }
4366 oim = fileinfo->open_in_master;
4367 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4368
4369 write_data_flush_inode(ino);
4370 ssize = size;
4371 fs_atime(ino);
4372 err = read_data(fileinfo->rdata,off,&ssize,&buffptr,&iov,&iovcnt);
4373 fs_atime(ino);
4374
4375 oerr = 0;
4376 if (oim==0) {
4377 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4378
4379 // wait for full open
4380 while (fileinfo->open_in_master==0) {
4381 fileinfo->open_waiting++;
4382 zassert(pthread_cond_wait(&(fileinfo->opencond),&(fileinfo->lock)));
4383 fileinfo->open_waiting--;
4384 }
4385
4386 if (fileinfo->open_status!=0) {
4387 err = fileinfo->open_status;
4388 oerr = 1;
4389 }
4390 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4391 }
4392 if (oerr) {
4393 oplog_printf(&ctx,"read (%lu,%llu,%llu) [handle:%08"PRIX32"] (this is open error): %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(err));
4394 fuse_reply_err(req,err);
4395 } else if (err!=0) {
4396 if (debug_mode) {
4397 fprintf(stderr,"IO error occurred while reading inode %lu (offset:%llu,size:%llu)\n",(unsigned long int)ino,(unsigned long long int)off,(unsigned long long int)size);
4398 }
4399 oplog_printf(&ctx,"read (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(err));
4400 fuse_reply_err(req,err);
4401 } else {
4402 if (debug_mode) {
4403 fprintf(stderr,"%"PRIu32" bytes have been read from inode %lu (offset:%llu)\n",ssize,(unsigned long int)ino,(unsigned long long int)off);
4404 }
4405 oplog_printf(&ctx,"read (%lu,%llu,%llu) [handle:%08"PRIX32"]: OK (%lu)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),(unsigned long int)ssize);
4406 // fuse_reply_buf(req,(char*)buff,ssize);
4407 fuse_reply_iov(req,iov,iovcnt);
4408 }
4409 // read_data_freebuff(fileinfo->rdata);
4410 read_data_free_buff(fileinfo->rdata,buffptr,iov);
4411 inoleng_read_end(fileinfo->flengptr);
4412 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4413 // rwlock_rdunlock begin
4414 // fileinfo->readers_cnt--;
4415 // if (fileinfo->readers_cnt==0) {
4416 // zassert(pthread_cond_broadcast(&(fileinfo->rwcond)));
4417 // }
4418 // rwlock_rdunlock_end
4419 #ifdef FREEBSD_DELAYED_RELEASE
4420 fileinfo->ops_in_progress--;
4421 fileinfo->lastuse = monotonic_seconds();
4422 #endif
4423 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4424 }
4425
4426 void mfs_write(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size, off_t off, struct fuse_file_info *fi) {
4427 finfo *fileinfo;
4428 int err;
4429 uint8_t appendonly;
4430 off_t leng;
4431 struct fuse_ctx ctx;
4432
4433 ctx = *(fuse_req_ctx(req));
4434 mfs_stats_inc(OP_WRITE);
4435 if (debug_mode) {
4436 if (fi!=NULL) {
4437 oplog_printf(&ctx,"write (%lu,%llu,%llu) [handle:%08"PRIX32"] ...",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh));
4438 } else {
4439 oplog_printf(&ctx,"write (%lu,%llu,%llu) ...",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
4440 }
4441 fprintf(stderr,"write to inode %lu %llu bytes at position %llu\n",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off);
4442 }
4443 if (ino==MASTERINFO_INODE || ino==OPLOG_INODE || ino==OPHISTORY_INODE || ino==MOOSE_INODE || ino==RANDOM_INODE || ino==PARAMS_INODE) {
4444 oplog_printf(&ctx,"write (%lu,%llu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,strerr(EACCES));
4445 fuse_reply_err(req,EACCES);
4446 return;
4447 }
4448 if (ino==STATS_INODE) {
4449 sinfo *statsinfo = (fi!=NULL)?sinfo_get(fi->fh):NULL;
4450 if (statsinfo!=NULL) {
4451 pthread_mutex_lock(&(statsinfo->lock)); // make helgrind happy
4452 statsinfo->reset=1;
4453 pthread_mutex_unlock(&(statsinfo->lock)); // make helgrind happy
4454 }
4455 oplog_printf(&ctx,"write (%lu,%llu,%llu): OK (%lu)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(unsigned long int)size);
4456 fuse_reply_write(req,size);
4457 return;
4458 }
4459 /*
4460 if (ino==ATTRCACHE_INODE) {
4461 if (off==0 && size>0 && buf[0]>='0' && buf[0]<='1') {
4462 dir_cache_user_switch(buf[0]-'0');
4463 newdircache = buf[0]-'0';
4464 }
4465 fuse_reply_write(req,size);
4466 return;
4467 }
4468 */
4469 if (mfs_disables & DISABLE_WRITE) {
4470 oplog_printf(&ctx,"write (%lu,%llu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,strerr(EPERM));
4471 fuse_reply_err(req,EPERM);
4472 return;
4473 }
4474 if (fi==NULL) {
4475 oplog_printf(&ctx,"write (%lu,%llu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,strerr(EBADF));
4476 fuse_reply_err(req,EBADF);
4477 return;
4478 }
4479 fileinfo = finfo_get(fi->fh);
4480 if (fi->fh==0 || fileinfo==NULL) {
4481 oplog_printf(&ctx,"write (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EBADF));
4482 fuse_reply_err(req,EBADF);
4483 return;
4484 }
4485 if (fileinfo->inode!=ino) {
4486 oplog_printf(&ctx,"write (%lu!=%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)(fileinfo->inode),(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EBADF));
4487 fuse_reply_err(req,EBADF);
4488 return;
4489 }
4490 // if (ino==MASTER_INODE) {
4491 // minfo *masterinfo = (minfo*)(unsigned long)(fi->fh);
4492 // int wsize;
4493 // masterinfo->sent=1;
4494 // wsize = fs_direct_write(masterinfo->sd,(const uint8_t*)buf,size);
4495 // //syslog(LOG_WARNING,"master sent: %d/%llu",wsize,(unsigned long long int)size);
4496 // fuse_reply_write(req,wsize);
4497 // return;
4498 // }
4499 if (off>=MAX_FILE_SIZE || off+size>=MAX_FILE_SIZE) {
4500 oplog_printf(&ctx,"write (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EFBIG));
4501 fuse_reply_err(req, EFBIG);
4502 return;
4503 }
4504 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4505
4506 while (fileinfo->open_in_master==0) {
4507 fileinfo->open_waiting++;
4508 zassert(pthread_cond_wait(&(fileinfo->opencond),&(fileinfo->lock)));
4509 fileinfo->open_waiting--;
4510 }
4511
4512 if (fileinfo->open_status!=0) {
4513 err = fileinfo->open_status;
4514 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4515 oplog_printf(&ctx,"write (%lu,%llu,%llu) [handle:%08"PRIX32"] (this is open error): %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(err));
4516 fuse_reply_err(req,err);
4517 return;
4518 }
4519
4520 appendonly = (fileinfo->mode==IO_RA)?1:0;
4521 if (fileinfo->mode==IO_RO) {
4522 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4523 oplog_printf(&ctx,"write (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(EACCES));
4524 fuse_reply_err(req,EACCES);
4525 return;
4526 }
4527 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4528 inoleng_write_start(fileinfo->flengptr);
4529 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4530 // rwlock_wrlock begin
4531 // fileinfo->writers_cnt++;
4532 // while (fileinfo->readers_cnt | fileinfo->writing) {
4533 // zassert(pthread_cond_wait(&(fileinfo->rwcond),&(fileinfo->lock)));
4534 // }
4535 // fileinfo->writers_cnt--;
4536 // fileinfo->writing = 1;
4537 // rwlock_wrlock end
4538 #ifdef FREEBSD_DELAYED_RELEASE
4539 fileinfo->ops_in_progress++;
4540 #endif
4541 leng = inoleng_getfleng(fileinfo->flengptr);
4542 err = 0;
4543 if (appendonly) {
4544 if (master_version()>=VERSION2INT(3,0,113)) {
4545 uint8_t status;
4546 uint64_t prevleng;
4547 uint32_t gid = 0;
4548 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4549 status = do_truncate(ino,TRUNCATE_FLAG_OPENED|TRUNCATE_FLAG_UPDATE|TRUNCATE_FLAG_RESERVE,0,1,&gid,size,NULL,&prevleng);
4550 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4551 if (status!=MFS_STATUS_OK) {
4552 err = mfs_errorconv(status);
4553 } else {
4554 off = prevleng;
4555 }
4556 } else {
4557 off = leng;
4558 if (off+size>=MAX_FILE_SIZE) {
4559 err = EFBIG;
4560 }
4561 }
4562 leng = off + size;
4563 }
4564 if (err==0) {
4565 if (fileinfo->wdata==NULL) {
4566 fileinfo->wdata = write_data_new(ino,leng);
4567 }
4568 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4569
4570 fs_mtime(ino);
4571 err = write_data(fileinfo->wdata,off,size,(const uint8_t*)buf,(ctx.uid==0)?1:0);
4572 fs_mtime(ino);
4573
4574 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4575 }
4576 // rwlock_wrunlock begin
4577 // fileinfo->writing = 0;
4578 // zassert(pthread_cond_broadcast(&(fileinfo->rwcond)));
4579 // wrlock_wrunlock end
4580 #ifdef FREEBSD_DELAYED_RELEASE
4581 fileinfo->ops_in_progress--;
4582 fileinfo->lastuse = monotonic_seconds();
4583 #endif
4584 if (err!=0) {
4585 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4586 if (debug_mode) {
4587 fprintf(stderr,"IO error occurred while writing inode %lu (offset:%llu,size:%llu)\n",(unsigned long int)ino,(unsigned long long int)off,(unsigned long long int)size);
4588 }
4589 oplog_printf(&ctx,"write (%lu,%llu,%llu) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),strerr(err));
4590 fuse_reply_err(req,err);
4591 } else {
4592 uint64_t newfleng;
4593 if ((uint64_t)(off+size)>inoleng_getfleng(fileinfo->flengptr)) {
4594 inoleng_setfleng(fileinfo->flengptr,off+size);
4595 newfleng = off+size;
4596 } else {
4597 newfleng = 0;
4598 }
4599 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4600 if (debug_mode) {
4601 fprintf(stderr,"%llu bytes have been written to inode %lu (offset:%llu)\n",(unsigned long long int)size,(unsigned long int)ino,(unsigned long long int)off);
4602 }
4603 oplog_printf(&ctx,"write (%lu,%llu,%llu) [handle:%08"PRIX32"]: OK (%llu)",(unsigned long int)ino,(unsigned long long int)size,(unsigned long long int)off,(uint32_t)(fi->fh),(unsigned long long int)size);
4604 if (newfleng>0) {
4605 read_inode_set_length_passive(ino,newfleng);
4606 write_data_inode_setmaxfleng(ino,newfleng);
4607 finfo_change_fleng(ino,newfleng);
4608 }
4609 read_inode_clear_cache(ino,off,size);
4610 fdcache_invalidate(ino);
4611 fuse_reply_write(req,size);
4612 }
4613 inoleng_write_end(fileinfo->flengptr);
4614 }
4615
4616 static inline int mfs_do_fsync(finfo *fileinfo) {
4617 uint32_t inode;
4618 int err;
4619 err = 0;
4620 inoleng_write_start(fileinfo->flengptr);
4621 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4622 inode = fileinfo->inode;
4623 if (fileinfo->wdata!=NULL && (fileinfo->mode==IO_RW || fileinfo->mode==IO_RA)) {
4624 // rwlock_wrlock begin
4625 // fileinfo->writers_cnt++;
4626 // while (fileinfo->readers_cnt | fileinfo->writing) {
4627 // zassert(pthread_cond_wait(&(fileinfo->rwcond),&(fileinfo->lock)));
4628 // }
4629 // fileinfo->writers_cnt--;
4630 // fileinfo->writing = 1;
4631 // rwlock_wrlock end
4632 #ifdef FREEBSD_DELAYED_RELEASE
4633 fileinfo->ops_in_progress++;
4634 #endif
4635 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4636
4637 err = write_data_flush(fileinfo->wdata);
4638
4639 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4640 // rwlock_wrunlock begin
4641 // fileinfo->writing = 0;
4642 // zassert(pthread_cond_broadcast(&(fileinfo->rwcond)));
4643 // rwlock_wrunlock end
4644 #ifdef FREEBSD_DELAYED_RELEASE
4645 fileinfo->ops_in_progress--;
4646 fileinfo->lastuse = monotonic_seconds();
4647 #endif
4648 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4649 } else {
4650 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4651 }
4652 if (err==0) {
4653 fdcache_invalidate(inode);
4654 dcache_invalidate_attr(inode);
4655 }
4656 inoleng_write_end(fileinfo->flengptr);
4657 return err;
4658 }
4659
4660 void mfs_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) {
4661 finfo *fileinfo;
4662 int err;
4663 uint8_t uselocks;
4664 #ifdef FLUSH_EXTRA_LOCKS
4665 uint64_t *lock_owner_tab;
4666 uint32_t lock_owner_cnt;
4667 uint32_t indx;
4668 #endif
4669 groups *gids;
4670 struct fuse_ctx ctx;
4671
4672 ctx = *(fuse_req_ctx(req));
4673 mfs_stats_inc(OP_FLUSH);
4674 if (debug_mode) {
4675 if (fi!=NULL) {
4676 oplog_printf(&ctx,"flush (%lu) [handle:%08"PRIX32"] ...",(unsigned long int)ino,(uint32_t)(fi->fh));
4677 } else {
4678 oplog_printf(&ctx,"flush (%lu) ...",(unsigned long int)ino);
4679 }
4680 fprintf(stderr,"flush (%lu)\n",(unsigned long int)ino);
4681 }
4682 if (IS_SPECIAL_INODE(ino)) {
4683 oplog_printf(&ctx,"flush (%lu): OK",(unsigned long int)ino);
4684 fuse_reply_err(req,0);
4685 return;
4686 }
4687 if (fi==NULL) {
4688 oplog_printf(&ctx,"flush (%lu): %s",(unsigned long int)ino,strerr(EBADF));
4689 fuse_reply_err(req,EBADF);
4690 return;
4691 }
4692 fileinfo = finfo_get(fi->fh);
4693 if (fi->fh==0 || fileinfo==NULL) {
4694 oplog_printf(&ctx,"flush (%lu) [handle:%08X"PRIX32"]: %s",(unsigned long int)ino,(uint32_t)(fi->fh),strerr(EBADF));
4695 fuse_reply_err(req,EBADF);
4696 return;
4697 }
4698 if (fileinfo->inode!=ino) {
4699 oplog_printf(&ctx,"flush (%lu!=%lu) [handle:%08"PRIX32"]: %s",(unsigned long int)(fileinfo->inode),(unsigned long int)ino,(uint32_t)(fi->fh),strerr(EBADF));
4700 fuse_reply_err(req,EBADF);
4701 return;
4702 }
4703 // syslog(LOG_NOTICE,"remove_locks inode:%lu owner:%llu",(unsigned long int)ino,(unsigned long long int)fi->lock_owner);
4704 err = 0;
4705 // fuse_reply_err(req,err);
4706
4707 #ifdef HAVE___SYNC_OP_AND_FETCH
4708 uselocks = __sync_or_and_fetch(&(fileinfo->uselocks),0);
4709 #else
4710 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4711 uselocks = fileinfo->uselocks;
4712 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4713 #endif
4714
4715 inoleng_write_start(fileinfo->flengptr);
4716 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4717 if (fileinfo->wdata!=NULL && (fileinfo->mode==IO_RW || fileinfo->mode==IO_RA)) {
4718 // rwlock_wrlock begin
4719 // fileinfo->writers_cnt++;
4720 // while (fileinfo->readers_cnt | fileinfo->writing) {
4721 // zassert(pthread_cond_wait(&(fileinfo->rwcond),&(fileinfo->lock)));
4722 // }
4723 // fileinfo->writers_cnt--;
4724 // fileinfo->writing = 1;
4725 // rwlock_wrlock end
4726 #ifdef FREEBSD_DELAYED_RELEASE
4727 fileinfo->ops_in_progress++;
4728 #endif
4729 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4730 if ((uselocks&2) || master_version()<VERSION2INT(3,0,43) || fileinfo->create + fsync_before_close_min_time < monotonic_seconds() || write_cache_almost_full()) {
4731 // fs_fsync_send(ino);
4732 err = write_data_flush(fileinfo->wdata);
4733 // fs_fsync_wait();
4734 } else {
4735 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
4736 do_truncate(ino,TRUNCATE_FLAG_OPENED|TRUNCATE_FLAG_UPDATE,ctx.uid,gids->gidcnt,gids->gidtab,write_data_getmaxfleng(fileinfo->wdata),NULL,NULL);
4737 groups_rel(gids);
4738 err = write_data_chunk_wait(fileinfo->wdata);
4739 }
4740 zassert(pthread_mutex_lock(&(fileinfo->lock)));
4741 // rwlock_wrunlock begin
4742 // fileinfo->writing = 0;
4743 // zassert(pthread_cond_broadcast(&(fileinfo->rwcond)));
4744 // rwlock_wrunlock end
4745 #ifdef FREEBSD_DELAYED_RELEASE
4746 fileinfo->ops_in_progress--;
4747 fileinfo->lastuse = monotonic_seconds();
4748 #endif
4749 }
4750
4751 #ifdef FLUSH_EXTRA_LOCKS
4752 lock_owner_tab = NULL;
4753 lock_owner_cnt = 0;
4754 if (fileinfo->posix_lo_head!=NULL) {
4755 finfo_lock_owner *flo,**flop;
4756
4757 for (flo=fileinfo->posix_lo_head ; flo!=NULL ; flo=flo->next) {
4758 if (flo->pid==ctx.pid && flo->lock_owner!=fi->lock_owner) {
4759 lock_owner_cnt++;
4760 }
4761 }
4762 if (lock_owner_cnt>0) {
4763 lock_owner_tab = malloc(sizeof(uint64_t)*lock_owner_cnt);
4764 passert(lock_owner_tab);
4765 }
4766 indx = 0;
4767 flop = &(fileinfo->posix_lo_head);
4768 while ((flo=*flop)!=NULL) {
4769 if (flo->pid==ctx.pid && flo->lock_owner!=fi->lock_owner) {
4770 if (indx<lock_owner_cnt) {
4771 lock_owner_tab[indx] = flo->lock_owner;
4772 }
4773 indx++;
4774 }
4775 if (flo->pid==ctx.pid || flo->lock_owner==fi->lock_owner) {
4776 *flop = flo->next;
4777 free(flo);
4778 } else {
4779 flop = &(flo->next);
4780 }
4781 }
4782 massert(indx==lock_owner_cnt,"loop mismatch");
4783 }
4784 #endif
4785 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
4786
4787 if (uselocks&2) {
4788 int status;
4789 status = fs_posixlock(ino,0,fi->lock_owner,POSIX_LOCK_CMD_SET,POSIX_LOCK_UNLCK,0,UINT64_MAX,0,NULL,NULL,NULL,NULL);
4790 status = mfs_errorconv(status);
4791 if (status!=0) {
4792 oplog_printf(&ctx,"flush (%lu) - releasing all POSIX-type locks for %016"PRIX64" (received from kernel): %s",(unsigned long int)ino,(uint64_t)(fi->lock_owner),strerr(status));
4793 } else {
4794 oplog_printf(&ctx,"flush (%lu) - releasing all POSIX-type locks for %016"PRIX64" (received from kernel): OK",(unsigned long int)ino,(uint64_t)(fi->lock_owner));
4795 }
4796 }
4797 #ifdef FLUSH_EXTRA_LOCKS
4798 for (indx=0 ; indx<lock_owner_cnt ; indx++) {
4799 int status;
4800 status = fs_posixlock(ino,0,lock_owner_tab[indx],POSIX_LOCK_CMD_SET,POSIX_LOCK_UNLCK,0,UINT64_MAX,0,NULL,NULL,NULL,NULL);
4801 status = mfs_errorconv(status);
4802 if (status!=0) {
4803 oplog_printf(&ctx,"flush (%lu) - releasing all POSIX-type locks for %016"PRIX64" (data structures): %s",(unsigned long int)ino,lock_owner_tab[indx],strerr(status));
4804 } else {
4805 oplog_printf(&ctx,"flush (%lu) - releasing all POSIX-type locks for %016"PRIX64" (data structures): OK",(unsigned long int)ino,lock_owner_tab[indx]);
4806 }
4807 }
4808
4809 if (lock_owner_tab!=NULL) {
4810 free(lock_owner_tab);
4811 }
4812 #endif
4813 if (err!=0) {
4814 oplog_printf(&ctx,"flush (%lu) [handle:%08"PRIX32",uselocks:%u,lock_owner:%016"PRIX64"]: %s",(unsigned long int)ino,(uint32_t)(fi->fh),uselocks,(uint64_t)(fi->lock_owner),strerr(err));
4815 } else {
4816 fdcache_invalidate(ino);
4817 dcache_invalidate_attr(ino);
4818 oplog_printf(&ctx,"flush (%lu) [handle:%08"PRIX32",uselocks:%u,lock_owner:%016"PRIX64"]: OK",(unsigned long int)ino,(uint32_t)(fi->fh),uselocks,(uint64_t)(fi->lock_owner));
4819 }
4820 inoleng_write_end(fileinfo->flengptr);
4821 fuse_reply_err(req,err);
4822 }
4823
4824 void mfs_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, struct fuse_file_info *fi) {
4825 finfo *fileinfo;
4826 int err;
4827 struct fuse_ctx ctx;
4828
4829 ctx = *(fuse_req_ctx(req));
4830 mfs_stats_inc(OP_FSYNC);
4831 if (debug_mode) {
4832 if (fi!=NULL) {
4833 oplog_printf(&ctx,"fsync (%lu,%d) [handle:%08"PRIX32"] ...",(unsigned long int)ino,datasync,(uint32_t)(fi->fh));
4834 } else {
4835 oplog_printf(&ctx,"fsync (%lu,%d) ...",(unsigned long int)ino,datasync);
4836 }
4837 fprintf(stderr,"fsync (%lu,%d)\n",(unsigned long int)ino,datasync);
4838 }
4839 if (IS_SPECIAL_INODE(ino)) {
4840 oplog_printf(&ctx,"fsync (%lu,%d): OK",(unsigned long int)ino,datasync);
4841 fuse_reply_err(req,0);
4842 return;
4843 }
4844 if (fi==NULL) {
4845 oplog_printf(&ctx,"fsync (%lu,%d): %s",(unsigned long int)ino,datasync,strerr(EBADF));
4846 fuse_reply_err(req,EBADF);
4847 return;
4848 }
4849 fileinfo = finfo_get(fi->fh);
4850 if (fi->fh==0 || fileinfo==NULL) {
4851 oplog_printf(&ctx,"fsync (%lu,%d) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,datasync,(uint32_t)(fi->fh),strerr(EBADF));
4852 fuse_reply_err(req,EBADF);
4853 return;
4854 }
4855 if (fileinfo->inode!=ino) {
4856 oplog_printf(&ctx,"fsync (%lu!=%lu,%d) [handle:%08"PRIX32"]: %s",(unsigned long int)(fileinfo->inode),(unsigned long int)ino,datasync,(uint32_t)(fi->fh),strerr(EBADF));
4857 fuse_reply_err(req,EBADF);
4858 return;
4859 }
4860 err = mfs_do_fsync(fileinfo);
4861 if (err!=0) {
4862 oplog_printf(&ctx,"fsync (%lu,%d) [handle:%08"PRIX32"]: %s",(unsigned long int)ino,datasync,(uint32_t)(fi->fh),strerr(err));
4863 } else {
4864 oplog_printf(&ctx,"fsync (%lu,%d) [handle:%08"PRIX32"]: OK",(unsigned long int)ino,datasync,(uint32_t)(fi->fh));
4865 }
4866 fuse_reply_err(req,err);
4867 }
4868
4869 #if FUSE_VERSION >= 29
4870
4871 typedef struct _flock_data {
4872 uint32_t reqid;
4873 uint32_t inode;
4874 uint64_t owner;
4875 uint32_t refs;
4876 } flock_data;
4877
4878 static uint32_t flock_reqid = 0;
4879 #ifndef HAVE___SYNC_OP_AND_FETCH
4880 static pthread_mutex_t flock_lock = PTHREAD_MUTEX_INITIALIZER;
4881 #endif
4882
4883 void* mfs_flock_interrupt (void *data) {
4884 flock_data *fld = (flock_data*)data;
4885 uint32_t refs;
4886
4887 for (;;) {
4888 #ifdef HAVE___SYNC_OP_AND_FETCH
4889 refs = __sync_or_and_fetch(&(fld->refs),0);
4890 #else
4891 zassert(pthread_mutex_lock(&flock_lock));
4892 refs = fld->refs;
4893 zassert(pthread_mutex_unlock(&flock_lock));
4894 #endif
4895 if (refs<=1) {
4896 break;
4897 }
4898 fs_flock(fld->inode,fld->reqid,fld->owner,FLOCK_INTERRUPT);
4899 portable_usleep(100000);
4900 }
4901 #ifdef HAVE___SYNC_OP_AND_FETCH
4902 (void)__sync_sub_and_fetch(&(fld->refs),1);
4903 #else
4904 zassert(pthread_mutex_lock(&flock_lock));
4905 fld->refs--;
4906 refs = fld->refs;
4907 zassert(pthread_mutex_unlock(&flock_lock));
4908 #endif
4909 if (refs==0) {
4910 free(fld);
4911 }
4912 return NULL;
4913 }
4914
4915 void mfs_flock_interrupt_spawner(fuse_req_t req, void *data) {
4916 struct fuse_ctx ctx;
4917 pthread_t th;
4918 flock_data *fld = (flock_data*)data;
4919 ctx = *(fuse_req_ctx(req));
4920
4921 #ifdef HAVE___SYNC_OP_AND_FETCH
4922 (void)__sync_add_and_fetch(&(fld->refs),1);
4923 #else
4924 zassert(pthread_mutex_lock(&flock_lock));
4925 fld->refs++;
4926 zassert(pthread_mutex_unlock(&flock_lock));
4927 #endif
4928 if (debug_mode) {
4929 oplog_printf(&ctx,"flock (%"PRIu32",%"PRIu32",%016"PRIX64",-): interrupted",fld->reqid,fld->inode,fld->owner);
4930 fprintf(stderr,"flock (%"PRIu32",%"PRIu32",%016"PRIX64",-): interrupted\n",fld->reqid,fld->inode,fld->owner);
4931 }
4932 lwt_minthread_create(&th,1,mfs_flock_interrupt,data);
4933 }
4934
4935 void mfs_flock (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, int op) {
4936 int status;
4937 struct fuse_ctx ctx;
4938 uint32_t reqid;
4939 uint64_t owner;
4940 uint8_t lock_mode,lmvalid;
4941 char *lock_mode_str;
4942 finfo *fileinfo;
4943 flock_data *fld;
4944 uint32_t refs;
4945
4946 if (no_bsd_locks) {
4947 fuse_reply_err(req,ENOSYS);
4948 return;
4949 }
4950 if (op&LOCK_UN) {
4951 lmvalid = 1;
4952 lock_mode = FLOCK_UNLOCK;
4953 lock_mode_str = "UNLOCK";
4954 } else if (op&LOCK_SH) {
4955 lmvalid = 1;
4956 if (op&LOCK_NB) {
4957 lock_mode=FLOCK_TRY_SHARED;
4958 lock_mode_str = "TRYSH";
4959 } else {
4960 lock_mode=FLOCK_LOCK_SHARED;
4961 lock_mode_str = "LOCKSH";
4962 }
4963 } else if (op&LOCK_EX) {
4964 lmvalid = 1;
4965 if (op&LOCK_NB) {
4966 lock_mode=FLOCK_TRY_EXCLUSIVE;
4967 lock_mode_str = "TRYEX";
4968 } else {
4969 lock_mode=FLOCK_LOCK_EXCLUSIVE;
4970 lock_mode_str = "LOCKEX";
4971 }
4972 } else {
4973 lmvalid = 0;
4974 lock_mode = 0;
4975 lock_mode_str = "-";
4976 }
4977 ctx = *(fuse_req_ctx(req));
4978 mfs_stats_inc(OP_FLOCK);
4979 if (IS_SPECIAL_INODE(ino)) {
4980 if (debug_mode) {
4981 oplog_printf(&ctx,"flock (-,%lu,-,%s): %s",(unsigned long int)ino,lock_mode_str,strerr(EPERM));
4982 fprintf(stderr,"flock (-,%lu,-,%s)\n",(unsigned long int)ino,lock_mode_str);
4983 }
4984 fuse_reply_err(req,EPERM);
4985 return;
4986 }
4987 if (lmvalid==0) {
4988 if (debug_mode) {
4989 oplog_printf(&ctx,"flock (-,%lu,-,%s): %s",(unsigned long int)ino,lock_mode_str,strerr(EINVAL));
4990 fprintf(stderr,"flock (-,%lu,-,%s)\n",(unsigned long int)ino,lock_mode_str);
4991 }
4992 fuse_reply_err(req,EINVAL);
4993 return;
4994 }
4995 if (fi==NULL) {
4996 if (debug_mode) {
4997 oplog_printf(&ctx,"flock (-,%lu,-,%s): %s",(unsigned long int)ino,lock_mode_str,strerr(EBADF));
4998 fprintf(stderr,"flock (-,%lu,-,%s)\n",(unsigned long int)ino,lock_mode_str);
4999 }
5000 fuse_reply_err(req,EBADF);
5001 return;
5002 }
5003 fileinfo = finfo_get(fi->fh);
5004 if (fileinfo==NULL) {
5005 if (debug_mode) {
5006 oplog_printf(&ctx,"flock (-,%lu,-,%s): %s",(unsigned long int)ino,lock_mode_str,strerr(EBADF));
5007 fprintf(stderr,"flock (-,%lu,-,%s)\n",(unsigned long int)ino,lock_mode_str);
5008 }
5009 fuse_reply_err(req,EBADF);
5010 return;
5011 }
5012 if (fileinfo->inode!=ino) {
5013 if (debug_mode) {
5014 oplog_printf(&ctx,"flock (-,%lu!=%lu,-,%s): %s",(unsigned long int)(fileinfo->inode),(unsigned long int)ino,lock_mode_str,strerr(EBADF));
5015 fprintf(stderr,"flock (-,%lu,-,%s)\n",(unsigned long int)ino,lock_mode_str);
5016 }
5017 fuse_reply_err(req,EBADF);
5018 return;
5019 }
5020
5021 owner = fi->lock_owner;
5022
5023 zassert(pthread_mutex_lock(&(fileinfo->lock)));
5024
5025 // wait for full open
5026 while (fileinfo->open_in_master==0) {
5027 fileinfo->open_waiting++;
5028 zassert(pthread_cond_wait(&(fileinfo->opencond),&(fileinfo->lock)));
5029 fileinfo->open_waiting--;
5030 }
5031
5032 if (fileinfo->open_status!=0) {
5033 status = fileinfo->open_status;
5034 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
5035 oplog_printf(&ctx,"flock (-,%lu,%016"PRIX64",%s) [handle:%08"PRIX32"] (this is open error): %s",(unsigned long int)ino,owner,lock_mode_str,(uint32_t)(fi->fh),strerr(status));
5036 fuse_reply_err(req,status);
5037 return;
5038 }
5039
5040
5041 // track all locks to unlock them on release
5042 if (lock_mode!=FLOCK_UNLOCK) {
5043 finfo_lock_owner *flo;
5044
5045 // add owner_id to list
5046 for (flo=fileinfo->flock_lo_head ; flo!=NULL ; flo=flo->next) {
5047 if (flo->lock_owner==owner) {
5048 break;
5049 }
5050 }
5051 if (flo==NULL) {
5052 flo = malloc(sizeof(finfo_lock_owner));
5053 flo->lock_owner = owner;
5054 flo->next = fileinfo->flock_lo_head;
5055 fileinfo->flock_lo_head = flo;
5056 }
5057 }
5058
5059 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
5060
5061 #ifdef HAVE___SYNC_OP_AND_FETCH
5062 do {
5063 reqid = __sync_add_and_fetch(&flock_reqid,1);
5064 } while (reqid==0);
5065 __sync_or_and_fetch(&(fileinfo->uselocks),1);
5066 #else
5067 zassert(pthread_mutex_lock(&flock_lock));
5068 flock_reqid++;
5069 if (flock_reqid==0) {
5070 flock_reqid=1;
5071 }
5072 reqid = flock_reqid;
5073 zassert(pthread_mutex_unlock(&flock_lock));
5074 zassert(pthread_mutex_lock(&(fileinfo->lock)));
5075 fileinfo->uselocks |= 1;
5076 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
5077 #endif
5078 if (debug_mode) {
5079 oplog_printf(&ctx,"flock (%"PRIu32",%lu,%016"PRIX64",%s) [handle:%08"PRIX32"] ...",reqid,(unsigned long int)ino,owner,lock_mode_str,(uint32_t)(fi->fh));
5080 fprintf(stderr,"flock (%"PRIu32",%lu,%016"PRIX64",%s)\n",reqid,(unsigned long int)ino,owner,lock_mode_str);
5081 }
5082 if (lock_mode==FLOCK_UNLOCK) {
5083 mfs_do_fsync(fileinfo);
5084 }
5085 if (lock_mode==FLOCK_LOCK_SHARED || lock_mode==FLOCK_LOCK_EXCLUSIVE) {
5086 fld = malloc(sizeof(flock_data));
5087 passert(fld);
5088 fld->reqid = reqid;
5089 fld->inode = ino;
5090 fld->owner = owner;
5091 fld->refs = 1;
5092 fuse_req_interrupt_func(req,mfs_flock_interrupt_spawner,fld);
5093 if (fuse_req_interrupted(req)==0) {
5094 status = fs_flock(ino,reqid,owner,lock_mode);
5095 status = mfs_errorconv(status);
5096 } else {
5097 status = EINTR;
5098 }
5099 fuse_req_interrupt_func(req,NULL,NULL);
5100 } else {
5101 status = fs_flock(ino,reqid,owner,lock_mode);
5102 status = mfs_errorconv(status);
5103 fld = NULL;
5104 }
5105 if (status==0) {
5106 oplog_printf(&ctx,"flock (%"PRIu32",%lu,%016"PRIX64",%s) [handle:%08"PRIX32"]: OK",reqid,(unsigned long int)ino,owner,lock_mode_str,(uint32_t)(fi->fh));
5107 } else {
5108 oplog_printf(&ctx,"flock (%"PRIu32",%lu,%016"PRIX64",%s) [handle:%08"PRIX32"]: %s",reqid,(unsigned long int)ino,owner,lock_mode_str,(uint32_t)(fi->fh),strerr(status));
5109 }
5110 fuse_reply_err(req,status);
5111 if (fld!=NULL) {
5112 #ifdef HAVE___SYNC_OP_AND_FETCH
5113 refs = __sync_sub_and_fetch(&(fld->refs),1);
5114 #else
5115 zassert(pthread_mutex_lock(&flock_lock));
5116 fld->refs--;
5117 refs = fld->refs;
5118 zassert(pthread_mutex_unlock(&flock_lock));
5119 #endif
5120 if (refs==0) {
5121 free(fld);
5122 }
5123 }
5124 }
5125 #endif
5126
5127 #if FUSE_VERSION >= 26
5128
5129 typedef struct _plock_data {
5130 uint32_t reqid;
5131 uint32_t inode;
5132 uint64_t owner;
5133 uint64_t start;
5134 uint64_t end;
5135 char ctype;
5136 uint32_t refs;
5137 } plock_data;
5138
5139 static uint32_t plock_reqid = 0;
5140 #ifndef HAVE___SYNC_OP_AND_FETCH
5141 static pthread_mutex_t plock_lock = PTHREAD_MUTEX_INITIALIZER;
5142 #endif
5143
5144 void* mfs_plock_interrupt (void *data) {
5145 plock_data *pld = (plock_data*)data;
5146 uint32_t refs;
5147
5148 for (;;) {
5149 #ifdef HAVE___SYNC_OP_AND_FETCH
5150 refs = __sync_or_and_fetch(&(pld->refs),0);
5151 #else
5152 zassert(pthread_mutex_lock(&plock_lock));
5153 refs = pld->refs;
5154 zassert(pthread_mutex_unlock(&plock_lock));
5155 #endif
5156 if (refs<=1) {
5157 break;
5158 }
5159 fs_posixlock(pld->inode,pld->reqid,pld->owner,POSIX_LOCK_CMD_INT,POSIX_LOCK_UNLCK,0,0,0,NULL,NULL,NULL,NULL);
5160 portable_usleep(100000);
5161 }
5162 #ifdef HAVE___SYNC_OP_AND_FETCH
5163 refs = __sync_sub_and_fetch(&(pld->refs),1);
5164 #else
5165 zassert(pthread_mutex_lock(&plock_lock));
5166 pld->refs++;
5167 refs = pld->refs;
5168 zassert(pthread_mutex_unlock(&plock_lock));
5169 #endif
5170 if (refs==0) {
5171 free(pld);
5172 }
5173 return NULL;
5174 }
5175
5176 void mfs_plock_interrupt_spawner (fuse_req_t req, void *data) {
5177 struct fuse_ctx ctx;
5178 pthread_t th;
5179 plock_data *pld = (plock_data*)data;
5180 ctx = *(fuse_req_ctx(req));
5181
5182 #ifdef HAVE___SYNC_OP_AND_FETCH
5183 (void)__sync_add_and_fetch(&(pld->refs),1);
5184 #else
5185 zassert(pthread_mutex_lock(&plock_lock));
5186 pld->refs++;
5187 zassert(pthread_mutex_unlock(&plock_lock));
5188 #endif
5189 if (debug_mode) {
5190 oplog_printf(&ctx,"setlkw (%"PRIu32",%016"PRIX64",%"PRIu64",%"PRIu64",%c): interrupted",pld->inode,pld->owner,pld->start,pld->end,pld->ctype);
5191 fprintf(stderr,"setlkw (%"PRIu32",%016"PRIX64",%"PRIu64",%"PRIu64",%c): interrupted\n",pld->inode,pld->owner,pld->start,pld->end,pld->ctype);
5192 }
5193 lwt_minthread_create(&th,1,mfs_plock_interrupt,data);
5194 }
5195
5196 void mfs_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, struct flock *lock) {
5197 int status;
5198 struct fuse_ctx ctx;
5199 struct flock rlock;
5200 uint64_t owner;
5201 uint64_t start,end,rstart,rend;
5202 uint32_t pid,rpid;
5203 uint8_t type,rtype;
5204 uint8_t invalid;
5205 char ctype,rctype;
5206
5207 if (no_posix_locks) {
5208 fuse_reply_err(req,ENOSYS);
5209 return;
5210 }
5211 ctx = *(fuse_req_ctx(req));
5212 mfs_stats_inc(OP_GETLK);
5213 if (IS_SPECIAL_INODE(ino)) {
5214 if (debug_mode) {
5215 oplog_printf(&ctx,"getlk (inode:%lu owner:- start:- end:- type:-): %s",(unsigned long int)ino,strerr(EPERM));
5216 fprintf(stderr,"getlk (inode:%lu owner:- start:- end:- type:-)\n",(unsigned long int)ino);
5217 }
5218 fuse_reply_err(req,EPERM);
5219 return;
5220 }
5221 invalid = 0;
5222 type = 0; // make gcc happy
5223 ctype = '-';
5224 if (lock->l_whence!=SEEK_SET) { // position has to be converted by the kernel
5225 invalid = 1;
5226 } else if (lock->l_type==F_UNLCK) {
5227 type = POSIX_LOCK_UNLCK;
5228 ctype = 'U';
5229 } else if (lock->l_type==F_RDLCK) {
5230 type = POSIX_LOCK_RDLCK;
5231 ctype = 'R';
5232 } else if (lock->l_type==F_WRLCK) {
5233 type = POSIX_LOCK_WRLCK;
5234 ctype = 'W';
5235 } else {
5236 invalid = 1;
5237 }
5238 if (invalid) {
5239 if (debug_mode) {
5240 oplog_printf(&ctx,"getlk (inode:%lu owner:- start:- end:- type:-): %s",(unsigned long int)ino,strerr(EINVAL));
5241 fprintf(stderr,"getlk (inode:%lu owner:- start:- end:- type:-)\n",(unsigned long int)ino);
5242 }
5243 fuse_reply_err(req,EINVAL);
5244 return;
5245 }
5246 if (fi==NULL || finfo_get(fi->fh)==NULL) {
5247 if (debug_mode) {
5248 oplog_printf(&ctx,"getlk (inode:%lu owner:- start:- end:- type:-): %s",(unsigned long int)ino,strerr(EBADF));
5249 fprintf(stderr,"getlk (inode:%lu owner:- start:- end:- type:-)\n",(unsigned long int)ino);
5250 }
5251 fuse_reply_err(req,EBADF);
5252 return;
5253 }
5254 owner = fi->lock_owner;
5255 start = lock->l_start;
5256 if (lock->l_len==0) {
5257 end = UINT64_MAX;
5258 } else {
5259 end = start + lock->l_len;
5260 }
5261 pid = ctx.pid;
5262 if (debug_mode) {
5263 oplog_printf(&ctx,"getlk (inode:%lu owner:%016"PRIX64" start:%"PRIu64" end:%"PRIu64" type:%c) [handle:%08"PRIX32"] ...",(unsigned long int)ino,owner,start,end,ctype,(uint32_t)(fi->fh));
5264 fprintf(stderr,"getlk (inode:%lu owner:%016"PRIX64" start:%"PRIu64" end:%"PRIu64" type:%c)\n",(unsigned long int)ino,owner,start,end,ctype);
5265 }
5266 status = fs_posixlock(ino,0,owner,POSIX_LOCK_CMD_GET,type,start,end,pid,&rtype,&rstart,&rend,&rpid);
5267 status = mfs_errorconv(status);
5268 if (status!=0) {
5269 oplog_printf(&ctx,"getlk (inode:%lu owner:%016"PRIX64" start:%"PRIu64" end:%"PRIu64" type:%c): %s",(unsigned long int)ino,owner,start,end,ctype,strerr(status));
5270 fuse_reply_err(req,status);
5271 return;
5272 }
5273 memset(&rlock,0,sizeof(struct flock));
5274 if (rtype==POSIX_LOCK_RDLCK) {
5275 rlock.l_type = F_RDLCK;
5276 rctype = 'R';
5277 } else if (rtype==POSIX_LOCK_WRLCK) {
5278 rlock.l_type = F_WRLCK;
5279 rctype = 'W';
5280 } else {
5281 rlock.l_type = F_UNLCK;
5282 rctype = 'U';
5283 }
5284 rlock.l_whence = SEEK_SET;
5285 rlock.l_start = rstart;
5286 if ((rend-rstart)>INT64_MAX) {
5287 rlock.l_len = 0;
5288 } else {
5289 rlock.l_len = (rend - rstart);
5290 }
5291 rlock.l_pid = rpid;
5292 oplog_printf(&ctx,"getlk (inode:%lu owner:%016"PRIX64" start:%"PRIu64" end:%"PRIu64" type:%c) [handle:%08"PRIX32"]: (start:%"PRIu64" end:%"PRIu64" type:%c pid:%"PRIu32")",(unsigned long int)ino,owner,start,end,ctype,(uint32_t)(fi->fh),rstart,rend,rctype,rpid);
5293 fuse_reply_lock(req,&rlock);
5294 }
5295
5296 void mfs_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, struct flock *lock, int sl) {
5297 int status;
5298 struct fuse_ctx ctx;
5299 uint32_t reqid;
5300 uint64_t owner;
5301 uint64_t start,end;
5302 uint32_t pid;
5303 uint8_t type;
5304 uint8_t invalid;
5305 char ctype;
5306 finfo *fileinfo;
5307 plock_data *pld;
5308 uint32_t refs;
5309 char *cmdname;
5310
5311 if (no_posix_locks) {
5312 fuse_reply_err(req,ENOSYS);
5313 return;
5314 }
5315 ctx = *(fuse_req_ctx(req));
5316 mfs_stats_inc(OP_SETLK);
5317 if (sl) {
5318 cmdname = "setlkw";
5319 } else {
5320 cmdname = "setlk";
5321 }
5322 if (IS_SPECIAL_INODE(ino)) {
5323 if (debug_mode) {
5324 oplog_printf(&ctx,"%s (inode:%lu owner:- start:- end:- type:-): %s",cmdname,(unsigned long int)ino,strerr(EPERM));
5325 fprintf(stderr,"%s (inode:%lu owner:- start:- end:- type:-)\n",cmdname,(unsigned long int)ino);
5326 }
5327 fuse_reply_err(req,EPERM);
5328 return;
5329 }
5330 invalid = 0;
5331 type = 0; // make gcc happy
5332 ctype = '-';
5333 if (lock->l_whence!=SEEK_SET) { // position has to be converted by the kernel
5334 invalid = 1;
5335 } else if (lock->l_type==F_UNLCK) {
5336 type = POSIX_LOCK_UNLCK;
5337 ctype = 'U';
5338 } else if (lock->l_type==F_RDLCK) {
5339 type = POSIX_LOCK_RDLCK;
5340 ctype = 'R';
5341 } else if (lock->l_type==F_WRLCK) {
5342 type = POSIX_LOCK_WRLCK;
5343 ctype = 'W';
5344 } else {
5345 invalid = 1;
5346 }
5347 if (invalid) {
5348 if (debug_mode) {
5349 oplog_printf(&ctx,"%s (inode:%lu owner:- start:- end:- type:-): %s",cmdname,(unsigned long int)ino,strerr(EINVAL));
5350 fprintf(stderr,"%s (inode:%lu owner:- start:- end:- type:-)\n",cmdname,(unsigned long int)ino);
5351 }
5352 fuse_reply_err(req,EINVAL);
5353 return;
5354 }
5355 if (fi==NULL) {
5356 if (debug_mode) {
5357 oplog_printf(&ctx,"%s (inode:%lu owner:- start:- end:- type:-): %s",cmdname,(unsigned long int)ino,strerr(EBADF));
5358 fprintf(stderr,"%s (inode:%lu owner:- start:- end:- type:-)\n",cmdname,(unsigned long int)ino);
5359 }
5360 fuse_reply_err(req,EBADF);
5361 return;
5362 }
5363 fileinfo = finfo_get(fi->fh);
5364 if (fileinfo==NULL) {
5365 if (debug_mode) {
5366 oplog_printf(&ctx,"%s (inode:%lu owner:- start:- end:- type:-): %s",cmdname,(unsigned long int)ino,strerr(EBADF));
5367 fprintf(stderr,"%s (inode:%lu owner:- start:- end:- type:-)\n",cmdname,(unsigned long int)ino);
5368 }
5369 fuse_reply_err(req,EBADF);
5370 return;
5371 }
5372 if (fileinfo->inode!=ino) {
5373 if (debug_mode) {
5374 oplog_printf(&ctx,"%s (handle_inode:%lu != inode:%lu owner:- start:- end:- type:-): %s",cmdname,(unsigned long int)(fileinfo->inode),(unsigned long int)ino,strerr(EBADF));
5375 fprintf(stderr,"%s (inode:%lu owner:- start:- end:- type:-)\n",cmdname,(unsigned long int)ino);
5376 }
5377 fuse_reply_err(req,EBADF);
5378 return;
5379 }
5380
5381 owner = fi->lock_owner;
5382 start = lock->l_start;
5383 if (lock->l_len==0) {
5384 end = UINT64_MAX;
5385 } else {
5386 end = start + lock->l_len;
5387 }
5388
5389 zassert(pthread_mutex_lock(&(fileinfo->lock)));
5390
5391 // wait for full open
5392 while (fileinfo->open_in_master==0) {
5393 fileinfo->open_waiting++;
5394 zassert(pthread_cond_wait(&(fileinfo->opencond),&(fileinfo->lock)));
5395 fileinfo->open_waiting--;
5396 }
5397
5398 if (fileinfo->open_status!=0) {
5399 status = fileinfo->open_status;
5400 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
5401 oplog_printf(&ctx,"%s (inode:%lu owner:%016"PRIX64" start:%"PRIu64" end:%"PRIu64" type:%c) [handle:%08"PRIX32"] (this is open error): %s",cmdname,(unsigned long int)ino,owner,start,end,ctype,(uint32_t)(fi->fh),strerr(status));
5402 fuse_reply_err(req,status);
5403 return;
5404 }
5405
5406 // track all locks to unlock them on release
5407 if (type!=POSIX_LOCK_UNLCK) {
5408 finfo_lock_owner *flo;
5409 // add pid,owner_id to list
5410 for (flo=fileinfo->posix_lo_head ; flo!=NULL ; flo=flo->next) {
5411 #ifdef FLUSH_EXTRA_LOCKS
5412 if (flo->pid==ctx.pid && flo->lock_owner==owner) {
5413 #else
5414 if (flo->lock_owner==owner) {
5415 #endif
5416 break;
5417 }
5418 }
5419 if (flo==NULL) {
5420 flo = malloc(sizeof(finfo_lock_owner));
5421 #ifdef FLUSH_EXTRA_LOCKS
5422 flo->pid = ctx.pid;
5423 #endif
5424 flo->lock_owner = owner;
5425 flo->next = fileinfo->posix_lo_head;
5426 fileinfo->posix_lo_head = flo;
5427 }
5428 }
5429
5430 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
5431
5432 pid = ctx.pid;
5433 #ifdef HAVE___SYNC_OP_AND_FETCH
5434 do {
5435 reqid = __sync_add_and_fetch(&plock_reqid,1);
5436 } while (reqid==0);
5437 __sync_or_and_fetch(&(fileinfo->uselocks),2);
5438 #else
5439 zassert(pthread_mutex_lock(&plock_lock));
5440 plock_reqid++;
5441 if (plock_reqid==0) {
5442 plock_reqid=1;
5443 }
5444 reqid = plock_reqid;
5445 zassert(pthread_mutex_unlock(&plock_lock));
5446 zassert(pthread_mutex_lock(&(fileinfo->lock)));
5447 fileinfo->uselocks |= 2;
5448 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
5449 #endif
5450 if (debug_mode) {
5451 oplog_printf(&ctx,"%s (inode:%lu owner:%016"PRIX64" start:%"PRIu64" end:%"PRIu64" type:%c) [handle:%08"PRIX32"] ...",cmdname,(unsigned long int)ino,owner,start,end,ctype,(uint32_t)(fi->fh));
5452 fprintf(stderr,"%s (inode:%lu owner:%016"PRIX64" start:%"PRIu64" end:%"PRIu64" type:%c)\n",cmdname,(unsigned long int)ino,owner,start,end,ctype);
5453 }
5454 if (type==POSIX_LOCK_UNLCK) {
5455 mfs_do_fsync(fileinfo);
5456 status = fs_posixlock(ino,reqid,owner,POSIX_LOCK_CMD_SET,POSIX_LOCK_UNLCK,start,end,pid,NULL,NULL,NULL,NULL);
5457 status = mfs_errorconv(status);
5458 pld = NULL;
5459 } else if (sl==0) {
5460 status = fs_posixlock(ino,reqid,owner,POSIX_LOCK_CMD_TRY,type,start,end,pid,NULL,NULL,NULL,NULL);
5461 status = mfs_errorconv(status);
5462 pld = NULL;
5463 } else {
5464 pld = malloc(sizeof(plock_data));
5465 passert(pld);
5466 pld->reqid = reqid;
5467 pld->inode = ino;
5468 pld->owner = owner;
5469 pld->start = start;
5470 pld->end = end;
5471 pld->ctype = ctype;
5472 pld->refs = 1;
5473 fuse_req_interrupt_func(req,mfs_plock_interrupt_spawner,pld);
5474 if (fuse_req_interrupted(req)==0) {
5475 status = fs_posixlock(ino,reqid,owner,POSIX_LOCK_CMD_SET,type,start,end,pid,NULL,NULL,NULL,NULL);
5476 status = mfs_errorconv(status);
5477 } else {
5478 status = EINTR;
5479 }
5480 fuse_req_interrupt_func(req,NULL,NULL);
5481 }
5482 if (status==0) {
5483 oplog_printf(&ctx,"%s (inode:%lu owner:%016"PRIX64" start:%"PRIu64" end:%"PRIu64" type:%c) [handle:%08"PRIX32"]: OK",cmdname,(unsigned long int)ino,owner,start,end,ctype,(uint32_t)(fi->fh));
5484 } else {
5485 oplog_printf(&ctx,"%s (inode:%lu owner:%016"PRIX64" start:%"PRIu64" end:%"PRIu64" type:%c) [handle:%08"PRIX32"]: %s",cmdname,(unsigned long int)ino,owner,start,end,ctype,(uint32_t)(fi->fh),strerr(status));
5486 }
5487 fuse_reply_err(req,status);
5488 if (pld!=NULL) {
5489 #ifdef HAVE___SYNC_OP_AND_FETCH
5490 refs = __sync_sub_and_fetch(&(pld->refs),1);
5491 #else
5492 zassert(pthread_mutex_lock(&plock_lock));
5493 pld->refs--;
5494 refs = pld->refs;
5495 zassert(pthread_mutex_unlock(&plock_lock));
5496 #endif
5497 if (refs==0) {
5498 free(pld);
5499 }
5500 }
5501 }
5502 #endif
5503
5504 // Linux ACL format:
5505 // version:8 (2)
5506 // flags:8 (0)
5507 // filler:16
5508 // N * [ tag:16 perm:16 id:32 ]
5509 // tag:
5510 // 01 - user
5511 // 02 - named user
5512 // 04 - group
5513 // 08 - named group
5514 // 10 - mask
5515 // 20 - other
5516
5517 int mfs_getfacl(fuse_req_t req,fuse_ino_t ino,/*uint8_t opened,uint32_t uid,uint32_t gids,uint32_t *gid,*/uint8_t aclxattr,const uint8_t **buff,uint32_t *leng) {
5518 uint16_t userperm;
5519 uint16_t groupperm;
5520 uint16_t otherperm;
5521 uint16_t maskperm;
5522 uint16_t namedusers;
5523 uint16_t namedgroups;
5524 const uint8_t *namedacls;
5525 uint8_t *b;
5526 uint32_t namedaclssize;
5527 const uint8_t *p;
5528 uint32_t i;
5529 int status;
5530
5531 (void)req;
5532 *buff = NULL;
5533 *leng = 0;
5534 status = fs_getfacl(ino,/*opened,uid,gids,gid,*/aclxattr,&userperm,&groupperm,&otherperm,&maskperm,&namedusers,&namedgroups,&namedacls,&namedaclssize);
5535
5536 if (status!=MFS_STATUS_OK) {
5537 return status;
5538 }
5539
5540 if (((namedusers+namedgroups)*6U) != namedaclssize) {
5541 return MFS_ERROR_EINVAL;
5542 }
5543
5544 *leng = 4+32+(namedusers+namedgroups)*8;
5545 b = mfs_aclstorage_get(4+32+(namedusers+namedgroups)*8);
5546 // fprintf(stderr,"getacl buff ptr: %p (size: %u)\n",(void*)b,4+32+(namedusers+namedgroups)*8);
5547 *buff = b;
5548 p = namedacls;
5549 b[0] = 2;
5550 b[1] = 0;
5551 b[2] = 0;
5552 b[3] = 0;
5553 b+=4;
5554 *(uint16_t*)(b) = 1;
5555 *(uint16_t*)(b+2) = userperm;
5556 *(uint32_t*)(b+4) = UINT32_C(0xFFFFFFFF);
5557 b+=8;
5558 for (i=0 ; i<namedusers ; i++) {
5559 *(uint32_t*)(b+4) = get32bit(&p);
5560 *(uint16_t*)(b) = 2;
5561 *(uint16_t*)(b+2) = get16bit(&p);
5562 b+=8;
5563 }
5564 *(uint16_t*)(b) = 4;
5565 *(uint16_t*)(b+2) = groupperm;
5566 *(uint32_t*)(b+4) = UINT32_C(0xFFFFFFFF);
5567 b+=8;
5568 for (i=0 ; i<namedgroups ; i++) {
5569 *(uint32_t*)(b+4) = get32bit(&p);
5570 *(uint16_t*)(b) = 8;
5571 *(uint16_t*)(b+2) = get16bit(&p);
5572 b+=8;
5573 }
5574 *(uint16_t*)(b) = 16;
5575 *(uint16_t*)(b+2) = maskperm;
5576 *(uint32_t*)(b+4) = UINT32_C(0xFFFFFFFF);
5577 b+=8;
5578 *(uint16_t*)(b) = 32;
5579 *(uint16_t*)(b+2) = otherperm;
5580 *(uint32_t*)(b+4) = UINT32_C(0xFFFFFFFF);
5581 // b+=8;
5582
5583 // fprintf(stderr,"getacl buff end ptr: %p\n",(void*)b);
5584 return MFS_STATUS_OK;
5585 }
5586
5587 int mfs_setfacl(fuse_req_t req,fuse_ino_t ino,uint32_t uid,uint8_t aclxattr,const char *buff,uint32_t leng) {
5588 uint16_t userperm;
5589 uint16_t groupperm;
5590 uint16_t otherperm;
5591 uint16_t maskperm;
5592 uint16_t namedusers;
5593 uint16_t namedgroups;
5594 uint16_t acls;
5595 uint8_t *p,*namedacls;
5596 uint32_t i;
5597 uint16_t tag;
5598
5599 (void)req;
5600 if (leng<4 || ((leng % 8) != 4) ) {
5601 return MFS_ERROR_EINVAL;
5602 }
5603
5604 if (buff[0]!=2) {
5605 return MFS_ERROR_EINVAL;
5606 }
5607
5608 acls = (leng - 4) / 8;
5609 userperm = 0xFFFF; // means empty
5610 groupperm = 0xFFFF; // means empty
5611 otherperm = 0xFFFF; // means empty
5612 maskperm = 0xFFFF; // means no mask
5613 namedusers = 0;
5614 namedgroups = 0;
5615
5616 for (i=0 ; i<acls ; i++) {
5617 tag = *(const uint16_t*)(buff+4+i*8);
5618 if (tag & 1) {
5619 if (userperm!=0xFFFF) {
5620 return MFS_ERROR_EINVAL;
5621 }
5622 userperm = *(const uint16_t*)(buff+6+i*8);
5623 }
5624 if (tag & 2) {
5625 namedusers++;
5626 }
5627 if (tag & 4) {
5628 if (groupperm!=0xFFFF) {
5629 return MFS_ERROR_EINVAL;
5630 }
5631 groupperm = *(const uint16_t*)(buff+6+i*8);
5632 }
5633 if (tag & 8) {
5634 namedgroups++;
5635 }
5636 if (tag & 16) {
5637 if (maskperm!=0xFFFF) {
5638 return MFS_ERROR_EINVAL;
5639 }
5640 maskperm = *(const uint16_t*)(buff+6+i*8);
5641 }
5642 if (tag & 32) {
5643 if (otherperm!=0xFFFF) {
5644 return MFS_ERROR_EINVAL;
5645 }
5646 otherperm = *(const uint16_t*)(buff+6+i*8);
5647 }
5648 }
5649 if (maskperm==0xFFFF && (namedusers|namedgroups)>0) {
5650 return MFS_ERROR_EINVAL;
5651 }
5652
5653 namedacls = mfs_aclstorage_get((namedusers+namedgroups)*6);
5654 // fprintf(stderr,"namedacls ptr: %p (size: %u)\n",(void*)namedacls,(namedusers+namedgroups)*6);
5655 p = namedacls;
5656 for (i=0 ; i<acls ; i++) {
5657 tag = *(const uint16_t*)(buff+4+i*8);
5658 if (tag & 2) {
5659 put32bit(&p,*(const uint32_t*)(buff+8+i*8));
5660 put16bit(&p,*(const uint16_t*)(buff+6+i*8));
5661 }
5662 }
5663 for (i=0 ; i<acls ; i++) {
5664 tag = *(const uint16_t*)(buff+4+i*8);
5665 if (tag & 8) {
5666 put32bit(&p,*(const uint32_t*)(buff+8+i*8));
5667 put16bit(&p,*(const uint16_t*)(buff+6+i*8));
5668 }
5669 }
5670 // fprintf(stderr,"namedacls end ptr: %p\n",(void*)p);
5671 return fs_setfacl(ino,uid,aclxattr,userperm,groupperm,otherperm,maskperm,namedusers,namedgroups,namedacls,(namedusers+namedgroups)*6);
5672 }
5673
5674 #if defined(__APPLE__)
5675 void mfs_setxattr (fuse_req_t req, fuse_ino_t ino, const char *name, const char *value, size_t size, int flags, uint32_t position) {
5676 #else
5677 void mfs_setxattr (fuse_req_t req, fuse_ino_t ino, const char *name, const char *value, size_t size, int flags) {
5678 uint32_t position=0;
5679 #endif
5680 uint32_t nleng;
5681 int status;
5682 uint8_t mode;
5683 struct fuse_ctx ctx;
5684 groups *gids;
5685 uint8_t aclxattr;
5686
5687 if (no_xattrs) {
5688 fuse_reply_err(req,ENOSYS);
5689 return;
5690 }
5691 ctx = *(fuse_req_ctx(req));
5692 mfs_stats_inc(OP_SETXATTR);
5693 if (debug_mode) {
5694 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d) ...",(unsigned long int)ino,name,(unsigned long long int)size,flags);
5695 fprintf(stderr,"setxattr (%lu,%s,%llu,%d)\n",(unsigned long int)ino,name,(unsigned long long int)size,flags);
5696 }
5697 if (IS_SPECIAL_INODE(ino)) {
5698 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d): %s",(unsigned long int)ino,name,(unsigned long long int)size,flags,strerr(EPERM));
5699 fuse_reply_err(req,EPERM);
5700 return;
5701 }
5702 if (size>MFS_XATTR_SIZE_MAX) {
5703 #if defined(__APPLE__)
5704 // Mac OS X returns E2BIG here
5705 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d): %s",(unsigned long int)ino,name,(unsigned long long int)size,flags,strerr(E2BIG));
5706 fuse_reply_err(req,E2BIG);
5707 #else
5708 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d): %s",(unsigned long int)ino,name,(unsigned long long int)size,flags,strerr(ERANGE));
5709 fuse_reply_err(req,ERANGE);
5710 #endif
5711 return;
5712 }
5713 nleng = strlen(name);
5714 if (nleng>MFS_XATTR_NAME_MAX) {
5715 #if defined(__APPLE__)
5716 // Mac OS X returns EPERM here
5717 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d): %s",(unsigned long int)ino,name,(unsigned long long int)size,flags,strerr(EPERM));
5718 fuse_reply_err(req,EPERM);
5719 #else
5720 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d): %s",(unsigned long int)ino,name,(unsigned long long int)size,flags,strerr(ERANGE));
5721 fuse_reply_err(req,ERANGE);
5722 #endif
5723 return;
5724 }
5725 if (nleng==0) {
5726 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d): %s",(unsigned long int)ino,name,(unsigned long long int)size,flags,strerr(EINVAL));
5727 fuse_reply_err(req,EINVAL);
5728 return;
5729 }
5730 if ((flags&XATTR_CREATE) && (flags&XATTR_REPLACE)) {
5731 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d): %s",(unsigned long int)ino,name,(unsigned long long int)size,flags,strerr(EINVAL));
5732 fuse_reply_err(req,EINVAL);
5733 return;
5734 }
5735 mode = (flags==XATTR_CREATE)?MFS_XATTR_CREATE_ONLY:(flags==XATTR_REPLACE)?MFS_XATTR_REPLACE_ONLY:MFS_XATTR_CREATE_OR_REPLACE;
5736 aclxattr = POSIX_ACL_NONE;
5737 if (strcmp(name,"system.posix_acl_access")==0) {
5738 aclxattr = POSIX_ACL_ACCESS;
5739 } else if (strcmp(name,"system.posix_acl_default")==0) {
5740 aclxattr = POSIX_ACL_DEFAULT;
5741 }
5742 (void)position;
5743 if (xattr_cache_on) {
5744 xattr_cache_del(ino,nleng,(const uint8_t*)name);
5745 }
5746 if (aclxattr!=POSIX_ACL_NONE && xattr_acl_support==0) {
5747 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d): %s",(unsigned long int)ino,name,(unsigned long long int)size,flags,strerr(ENOTSUP));
5748 fuse_reply_err(req,ENOTSUP);
5749 return;
5750 }
5751 if (aclxattr!=POSIX_ACL_NONE) {
5752 status = mfs_setfacl(req,ino,ctx.uid,aclxattr,value,size);
5753 } else {
5754 if (full_permissions) {
5755 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
5756 status = fs_setxattr(ino,0,ctx.uid,gids->gidcnt,gids->gidtab,nleng,(const uint8_t*)name,(uint32_t)size,(const uint8_t*)value,mode);
5757 groups_rel(gids);
5758 } else {
5759 uint32_t gidtmp = ctx.gid;
5760 status = fs_setxattr(ino,0,ctx.uid,1,&gidtmp,nleng,(const uint8_t*)name,(uint32_t)size,(const uint8_t*)value,mode);
5761 }
5762 }
5763 status = mfs_errorconv(status);
5764 if (status!=0) {
5765 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d): %s",(unsigned long int)ino,name,(unsigned long long int)size,flags,strerr(status));
5766 fuse_reply_err(req,status);
5767 return;
5768 }
5769 oplog_printf(&ctx,"setxattr (%lu,%s,%llu,%d): OK",(unsigned long int)ino,name,(unsigned long long int)size,flags);
5770 xattr_cache_set(ino,ctx.uid,ctx.gid,nleng,(const uint8_t*)name,(const uint8_t*)value,(uint32_t)size,MFS_STATUS_OK);
5771 fuse_reply_err(req,0);
5772 }
5773
5774 #if defined(__APPLE__)
5775 void mfs_getxattr (fuse_req_t req, fuse_ino_t ino, const char *name, size_t size, uint32_t position) {
5776 #else
5777 void mfs_getxattr (fuse_req_t req, fuse_ino_t ino, const char *name, size_t size) {
5778 uint32_t position=0;
5779 #endif /* __APPLE__ */
5780 uint32_t nleng;
5781 uint8_t attr[ATTR_RECORD_SIZE];
5782 int status;
5783 uint8_t mode;
5784 const uint8_t *buff;
5785 uint32_t leng;
5786 struct fuse_ctx ctx;
5787 groups *gids;
5788 void *xattr_value_release;
5789 uint8_t aclxattr;
5790 uint8_t use_cache;
5791
5792 if (no_xattrs) {
5793 fuse_reply_err(req,ENOSYS);
5794 return;
5795 }
5796 ctx = *(fuse_req_ctx(req));
5797 mfs_stats_inc(OP_GETXATTR);
5798 if (debug_mode) {
5799 oplog_printf(&ctx,"getxattr (%lu,%s,%llu) ...",(unsigned long int)ino,name,(unsigned long long int)size);
5800 fprintf(stderr,"getxattr (%lu,%s,%llu)\n",(unsigned long int)ino,name,(unsigned long long int)size);
5801 }
5802 if (IS_SPECIAL_INODE(ino)) {
5803 oplog_printf(&ctx,"getxattr (%lu,%s,%llu): %s",(unsigned long int)ino,name,(unsigned long long int)size,strerr(EPERM));
5804 fuse_reply_err(req,EPERM);
5805 return;
5806 }
5807 // if (xattr_acl_support==0 && (strcmp(name,"system.posix_acl_default")==0 || strcmp(name,"system.posix_acl_access")==0)) {
5808 // oplog_printf(&ctx,"getxattr (%lu,%s,%llu): %s",(unsigned long int)ino,name,(unsigned long long int)size,strerr(ENOTSUP));
5809 // fuse_reply_err(req,ENOTSUP);
5810 // return;
5811 // }
5812 nleng = strlen(name);
5813 if (nleng>MFS_XATTR_NAME_MAX) {
5814 #if defined(__APPLE__)
5815 // Mac OS X returns EPERM here
5816 oplog_printf(&ctx,"getxattr (%lu,%s,%llu): %s",(unsigned long int)ino,name,(unsigned long long int)size,strerr(EPERM));
5817 fuse_reply_err(req,EPERM);
5818 #else
5819 oplog_printf(&ctx,"getxattr (%lu,%s,%llu): %s",(unsigned long int)ino,name,(unsigned long long int)size,strerr(ERANGE));
5820 fuse_reply_err(req,ERANGE);
5821 #endif
5822 return;
5823 }
5824 if (nleng==0) {
5825 oplog_printf(&ctx,"getxattr (%lu,%s,%llu): %s",(unsigned long int)ino,name,(unsigned long long int)size,strerr(EINVAL));
5826 fuse_reply_err(req,EINVAL);
5827 return;
5828 }
5829 if (size==0) {
5830 mode = MFS_XATTR_LENGTH_ONLY;
5831 } else {
5832 mode = MFS_XATTR_GETA_DATA;
5833 }
5834 aclxattr = POSIX_ACL_NONE;
5835 if (strcmp(name,"system.posix_acl_access")==0) {
5836 aclxattr = POSIX_ACL_ACCESS;
5837 } else if (strcmp(name,"system.posix_acl_default")==0) {
5838 aclxattr = POSIX_ACL_DEFAULT;
5839 }
5840 if (aclxattr!=POSIX_ACL_NONE && xattr_acl_support==0) {
5841 oplog_printf(&ctx,"getxattr (%lu,%s,%llu): %s",(unsigned long int)ino,name,(unsigned long long int)size,strerr(ENOTSUP));
5842 fuse_reply_err(req,ENOTSUP);
5843 return;
5844 }
5845 (void)position;
5846 if (xattr_cache_on) { // check cache before getting groups
5847 xattr_value_release = xattr_cache_get(ino,ctx.uid,ctx.gid,nleng,(const uint8_t*)name,&buff,&leng,&status);
5848 } else {
5849 xattr_value_release = NULL;
5850 }
5851 if (aclxattr==POSIX_ACL_NONE && full_permissions && xattr_value_release==NULL) { // and get groups only if data were not found in cache
5852 if (strcmp(name,"com.apple.quarantine")==0) { // special case - obtaining groups from the kernel here leads to freeze, so avoid it
5853 gids = groups_get_x(ctx.pid,ctx.uid,ctx.gid,1);
5854 } else {
5855 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
5856 }
5857 } else {
5858 gids = NULL;
5859 }
5860 use_cache = 0;
5861 if (xattr_cache_on) {
5862 if (xattr_value_release==NULL) {
5863 if (usedircache && dcache_getattr(&ctx,ino,attr) && (mfs_attr_get_mattr(attr)&MATTR_NOXATTR)) { // no xattr
5864 status = MFS_ERROR_ENOATTR;
5865 buff = NULL;
5866 leng = 0;
5867 use_cache = 2;
5868 if (debug_mode) {
5869 fprintf(stderr,"getxattr: sending negative answer using open dir cache\n");
5870 }
5871 } else {
5872 if (aclxattr!=POSIX_ACL_NONE) {
5873 status = mfs_getfacl(req,ino,aclxattr,&buff,&leng);
5874 } else {
5875 if (gids!=NULL) { // full_permissions
5876 status = fs_getxattr(ino,0,ctx.uid,gids->gidcnt,gids->gidtab,nleng,(const uint8_t*)name,MFS_XATTR_GETA_DATA,&buff,&leng);
5877 } else {
5878 uint32_t gidtmp = ctx.gid;
5879 status = fs_getxattr(ino,0,ctx.uid,1,&gidtmp,nleng,(const uint8_t*)name,MFS_XATTR_GETA_DATA,&buff,&leng);
5880 }
5881 }
5882 }
5883 xattr_cache_set(ino,ctx.uid,ctx.gid,nleng,(const uint8_t*)name,buff,leng,status);
5884 } else {
5885 use_cache = 1;
5886 if (debug_mode) {
5887 fprintf(stderr,"getxattr: sending data from cache\n");
5888 }
5889 }
5890 } else {
5891 if (usedircache && dcache_getattr(&ctx,ino,attr) && (mfs_attr_get_mattr(attr)&MATTR_NOXATTR)) { // no xattr
5892 status = MFS_ERROR_ENOATTR;
5893 buff = NULL;
5894 leng = 0;
5895 use_cache = 2;
5896 if (debug_mode) {
5897 fprintf(stderr,"getxattr: sending negative answer using open dir cache\n");
5898 }
5899 } else {
5900 if (aclxattr!=POSIX_ACL_NONE) {
5901 status = mfs_getfacl(req,ino,aclxattr,&buff,&leng);
5902 } else {
5903 if (gids!=NULL) { // full_permissions
5904 status = fs_getxattr(ino,0,ctx.uid,gids->gidcnt,gids->gidtab,nleng,(const uint8_t*)name,mode,&buff,&leng);
5905 } else {
5906 uint32_t gidtmp = ctx.gid;
5907 status = fs_getxattr(ino,0,ctx.uid,1,&gidtmp,nleng,(const uint8_t*)name,mode,&buff,&leng);
5908 }
5909 }
5910 }
5911 }
5912 if (gids!=NULL) {
5913 groups_rel(gids);
5914 }
5915 status = mfs_errorconv(status);
5916 if (status!=0) {
5917 oplog_printf(&ctx,"getxattr (%lu,%s,%llu)%s: %s",(unsigned long int)ino,name,(unsigned long long int)size,(use_cache==0)?"":(use_cache==1)?" (using cache)":" (using open dir cache)",strerr(status));
5918 fuse_reply_err(req,status);
5919 if (xattr_value_release!=NULL) {
5920 xattr_cache_rel(xattr_value_release);
5921 }
5922 return;
5923 }
5924 if (size==0) {
5925 oplog_printf(&ctx,"getxattr (%lu,%s,%llu)%s: OK (%"PRIu32")",(unsigned long int)ino,name,(unsigned long long int)size,(use_cache==0)?"":(use_cache==1)?" (using cache)":" (using open dir cache)",leng);
5926 fuse_reply_xattr(req,leng);
5927 } else {
5928 if (leng>size) {
5929 oplog_printf(&ctx,"getxattr (%lu,%s,%llu)%s: %s",(unsigned long int)ino,name,(unsigned long long int)size,(use_cache==0)?"":(use_cache==1)?" (using cache)":" (using open dir cache)",strerr(ERANGE));
5930 fuse_reply_err(req,ERANGE);
5931 } else {
5932 oplog_printf(&ctx,"getxattr (%lu,%s,%llu)%s: OK (%"PRIu32")",(unsigned long int)ino,name,(unsigned long long int)size,(use_cache==0)?"":(use_cache==1)?" (using cache)":" (using open dir cache)",leng);
5933 fuse_reply_buf(req,(const char*)buff,leng);
5934 }
5935 }
5936 if (xattr_value_release!=NULL) {
5937 xattr_cache_rel(xattr_value_release);
5938 }
5939 }
5940
5941 void mfs_listxattr (fuse_req_t req, fuse_ino_t ino, size_t size) {
5942 const uint8_t *buff;
5943 uint32_t leng;
5944 uint8_t attr[ATTR_RECORD_SIZE];
5945 int status;
5946 uint8_t mode;
5947 struct fuse_ctx ctx;
5948 groups *gids;
5949
5950 if (no_xattrs) {
5951 fuse_reply_err(req,ENOSYS);
5952 return;
5953 }
5954 ctx = *(fuse_req_ctx(req));
5955 mfs_stats_inc(OP_LISTXATTR);
5956 if (debug_mode) {
5957 oplog_printf(&ctx,"listxattr (%lu,%llu) ...",(unsigned long int)ino,(unsigned long long int)size);
5958 fprintf(stderr,"listxattr (%lu,%llu)\n",(unsigned long int)ino,(unsigned long long int)size);
5959 }
5960 if (IS_SPECIAL_INODE(ino)) {
5961 oplog_printf(&ctx,"listxattr (%lu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,strerr(EPERM));
5962 fuse_reply_err(req,EPERM);
5963 return;
5964 }
5965 if (size==0) {
5966 mode = MFS_XATTR_LENGTH_ONLY;
5967 } else {
5968 mode = MFS_XATTR_GETA_DATA;
5969 }
5970 // posix_acl_XXX are not added here - on purpose (on XFS getfattr doesn't list those ACL-like xattrs)
5971 if (usedircache && dcache_getattr(&ctx,ino,attr) && (mfs_attr_get_mattr(attr)&MATTR_NOXATTR)) { // no xattr
5972 status = MFS_STATUS_OK;
5973 buff = NULL;
5974 leng = 0;
5975 } else {
5976 if (full_permissions) {
5977 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
5978 status = fs_listxattr(ino,0,ctx.uid,gids->gidcnt,gids->gidtab,mode,&buff,&leng);
5979 groups_rel(gids);
5980 } else {
5981 uint32_t gidtmp = ctx.gid;
5982 status = fs_listxattr(ino,0,ctx.uid,1,&gidtmp,mode,&buff,&leng);
5983 }
5984 }
5985 status = mfs_errorconv(status);
5986 if (status!=0) {
5987 oplog_printf(&ctx,"listxattr (%lu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,strerr(status));
5988 fuse_reply_err(req,status);
5989 return;
5990 }
5991 if (size==0) {
5992 oplog_printf(&ctx,"listxattr (%lu,%llu): OK (%"PRIu32")",(unsigned long int)ino,(unsigned long long int)size,leng);
5993 fuse_reply_xattr(req,leng);
5994 } else {
5995 if (leng>size) {
5996 oplog_printf(&ctx,"listxattr (%lu,%llu): %s",(unsigned long int)ino,(unsigned long long int)size,strerr(ERANGE));
5997 fuse_reply_err(req,ERANGE);
5998 } else {
5999 oplog_printf(&ctx,"listxattr (%lu,%llu): OK (%"PRIu32")",(unsigned long int)ino,(unsigned long long int)size,leng);
6000 fuse_reply_buf(req,(const char*)buff,leng);
6001 }
6002 }
6003 }
6004
6005 void mfs_removexattr (fuse_req_t req, fuse_ino_t ino, const char *name) {
6006 uint32_t nleng;
6007 int status;
6008 uint8_t usecache;
6009 struct fuse_ctx ctx;
6010 groups *gids;
6011 uint8_t aclxattr;
6012 void *xattr_value_release;
6013
6014 if (no_xattrs) {
6015 fuse_reply_err(req,ENOSYS);
6016 return;
6017 }
6018 ctx = *(fuse_req_ctx(req));
6019 mfs_stats_inc(OP_REMOVEXATTR);
6020 if (debug_mode) {
6021 oplog_printf(&ctx,"removexattr (%lu,%s) ...",(unsigned long int)ino,name);
6022 fprintf(stderr,"removexattr (%lu,%s)\n",(unsigned long int)ino,name);
6023 }
6024 if (IS_SPECIAL_INODE(ino)) {
6025 oplog_printf(&ctx,"removexattr (%lu,%s): %s",(unsigned long int)ino,name,strerr(EPERM));
6026 fuse_reply_err(req,EPERM);
6027 return;
6028 }
6029 aclxattr = POSIX_ACL_NONE;
6030 if (strcmp(name,"system.posix_acl_access")==0) {
6031 aclxattr = POSIX_ACL_ACCESS;
6032 } else if (strcmp(name,"system.posix_acl_default")==0) {
6033 aclxattr = POSIX_ACL_DEFAULT;
6034 }
6035 if (aclxattr!=POSIX_ACL_NONE && xattr_acl_support==0) {
6036 oplog_printf(&ctx,"removexattr (%lu,%s): %s",(unsigned long int)ino,name,strerr(ENOTSUP));
6037 fuse_reply_err(req,ENOTSUP);
6038 return;
6039 }
6040 nleng = strlen(name);
6041 if (nleng>MFS_XATTR_NAME_MAX) {
6042 #if defined(__APPLE__)
6043 // Mac OS X returns EPERM here
6044 oplog_printf(&ctx,"removexattr (%lu,%s): %s",(unsigned long int)ino,name,strerr(EPERM));
6045 fuse_reply_err(req,EPERM);
6046 #else
6047 oplog_printf(&ctx,"removexattr (%lu,%s): %s",(unsigned long int)ino,name,strerr(ERANGE));
6048 fuse_reply_err(req,ERANGE);
6049 #endif
6050 return;
6051 }
6052 if (nleng==0) {
6053 oplog_printf(&ctx,"removexattr (%lu,%s): %s",(unsigned long int)ino,name,strerr(EINVAL));
6054 fuse_reply_err(req,EINVAL);
6055 return;
6056 }
6057 xattr_value_release = NULL;
6058 usecache = 0;
6059 if (xattr_cache_on) {
6060 xattr_value_release = xattr_cache_get(ino,ctx.uid,ctx.gid,nleng,(const uint8_t*)name,NULL,NULL,&status);
6061 if (xattr_value_release) {
6062 if (status==MFS_ERROR_ENOATTR) {
6063 usecache = 1;
6064 }
6065 xattr_cache_rel(xattr_value_release);
6066 }
6067 }
6068 if (usecache == 0) {
6069 if (aclxattr!=POSIX_ACL_NONE) {
6070 status = fs_setfacl(ino,ctx.uid,aclxattr,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0,0,NULL,0);
6071 } else {
6072 if (full_permissions) {
6073 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
6074 status = fs_removexattr(ino,0,ctx.uid,gids->gidcnt,gids->gidtab,nleng,(const uint8_t*)name);
6075 groups_rel(gids);
6076 } else {
6077 uint32_t gidtmp = ctx.gid;
6078 status = fs_removexattr(ino,0,ctx.uid,1,&gidtmp,nleng,(const uint8_t*)name);
6079 }
6080 }
6081 }
6082 if (xattr_cache_on && (status==MFS_STATUS_OK || status==MFS_ERROR_ENOATTR)) {
6083 xattr_cache_set(ino,ctx.uid,ctx.gid,nleng,(const uint8_t*)name,NULL,0,MFS_ERROR_ENOATTR);
6084 }
6085 status = mfs_errorconv(status);
6086 if (status!=0) {
6087 oplog_printf(&ctx,"removexattr (%lu,%s)%s: %s",(unsigned long int)ino,name,usecache?" (using cache)":"",strerr(status));
6088 fuse_reply_err(req,status);
6089 } else {
6090 oplog_printf(&ctx,"removexattr (%lu,%s): OK",(unsigned long int)ino,name);
6091 fuse_reply_err(req,0);
6092 }
6093 if (usecache) {
6094 if (aclxattr!=POSIX_ACL_NONE) {
6095 status = fs_setfacl(ino,ctx.uid,aclxattr,0xFFFF,0xFFFF,0xFFFF,0xFFFF,0,0,NULL,0);
6096 } else {
6097 if (full_permissions) {
6098 gids = groups_get(ctx.pid,ctx.uid,ctx.gid);
6099 status = fs_removexattr(ino,0,ctx.uid,gids->gidcnt,gids->gidtab,nleng,(const uint8_t*)name);
6100 groups_rel(gids);
6101 } else {
6102 uint32_t gidtmp = ctx.gid;
6103 status = fs_removexattr(ino,0,ctx.uid,1,&gidtmp,nleng,(const uint8_t*)name);
6104 }
6105 }
6106 }
6107 }
6108
6109 void mfs_inode_clear_cache(uint32_t inode,uint64_t offset,uint64_t leng) {
6110 struct fuse_ctx ctx;
6111 ctx.uid = 0;
6112 ctx.gid = 0;
6113 ctx.pid = 0;
6114
6115 fdcache_invalidate(inode);
6116 #if (FUSE_VERSION >= 28)
6117 #if defined(__FreeBSD__)
6118 if (freebsd_workarounds) {
6119 oplog_printf(&ctx,"invalidate cache (%"PRIu32":%"PRIu64":%"PRIu64"): not supported",inode,offset,leng);
6120 } else
6121 #endif
6122 if (fuse_comm!=NULL) {
6123 fuse_lowlevel_notify_inval_inode(fuse_comm,inode,offset,leng);
6124 oplog_printf(&ctx,"invalidate cache (%"PRIu32":%"PRIu64":%"PRIu64"): ok",inode,offset,leng);
6125 } else {
6126 oplog_printf(&ctx,"invalidate cache (%"PRIu32":%"PRIu64":%"PRIu64"): lost",inode,offset,leng);
6127 }
6128 #else
6129 oplog_printf(&ctx,"invalidate cache (%"PRIu32":%"PRIu64":%"PRIu64"): not supported",inode,offset,leng);
6130 #endif
6131 }
6132
6133 void mfs_dentry_invalidate(uint32_t parent,uint8_t nleng,const char *name) {
6134 struct fuse_ctx ctx;
6135 ctx.uid = 0;
6136 ctx.gid = 0;
6137 ctx.pid = 0;
6138
6139 #if (FUSE_VERSION >= 28)
6140 #if defined(__FreeBSD__)
6141 if (freebsd_workarounds) {
6142 oplog_printf(&ctx,"invalidate entry (%"PRIu32":%s): not supported",parent,name);
6143 } else
6144 #endif
6145 if (fuse_comm!=NULL) {
6146 fuse_lowlevel_notify_inval_entry(fuse_comm,parent,name,nleng);
6147 oplog_printf(&ctx,"invalidate entry (%"PRIu32":%s): ok",parent,name);
6148 } else {
6149 oplog_printf(&ctx,"invalidate entry (%"PRIu32":%s): lost",parent,name);
6150 }
6151 #else
6152 (void)nleng;
6153 oplog_printf(&ctx,"invalidate entry (%"PRIu32":%s): not supported",parent,name);
6154 #endif
6155 }
6156
6157 void mfs_inode_change_fleng(uint32_t inode,uint64_t fleng) {
6158 finfo_change_fleng(inode,fleng);
6159 }
6160
6161 void mfs_term(void) {
6162 sinfo_freeall();
6163 dirbuf_freeall();
6164 finfo_freeall();
6165 xattr_cache_term();
6166 if (full_permissions) {
6167 groups_term();
6168 }
6169 }
6170
6171 void mfs_setdisables(uint32_t disables) {
6172 mfs_disables = disables;
6173 }
6174
6175 #if defined(__FreeBSD__)
6176 void mfs_freebsd_workarounds(int on) {
6177 freebsd_workarounds = on;
6178 if (keep_cache==4) {
6179 if (on) {
6180 if (debug_mode) {
6181 fprintf(stderr,"cachemode change fbsdauto -> direct\n");
6182 }
6183 keep_cache=3;
6184 } else {
6185 if (debug_mode) {
6186 fprintf(stderr,"cachemode change fbsdauto -> auto\n");
6187 }
6188 keep_cache=0;
6189 }
6190 }
6191 }
6192 #endif
6193
6194 #ifdef HAVE_FUSE3
6195 void mfs_setsession(struct fuse_session *se) {
6196 fuse_comm = se;
6197 }
6198 #else
6199 void mfs_setchan(struct fuse_chan *ch) {
6200 fuse_comm = ch;
6201 }
6202 #endif
6203
6204 void mfs_init (int debug_mode_in,int keep_cache_in,double direntry_cache_timeout_in,double entry_cache_timeout_in,double attr_cache_timeout_in,double xattr_cache_timeout_in,double groups_cache_timeout,int mkdir_copy_sgid_in,int sugid_clear_mode_in,int xattr_acl_support_in,double fsync_before_close_min_time_in,int no_xattrs_in,int no_posix_locks_in,int no_bsd_locks_in) {
6205 #ifdef FREEBSD_DELAYED_RELEASE
6206 pthread_t th;
6207 #endif
6208 uint32_t kver;
6209 const char* sugid_clear_mode_strings[] = {SUGID_CLEAR_MODE_STRINGS};
6210
6211 kver = main_kernelversion();
6212 debug_mode = debug_mode_in;
6213 keep_cache = keep_cache_in;
6214 direntry_cache_timeout = direntry_cache_timeout_in;
6215 entry_cache_timeout = entry_cache_timeout_in;
6216 attr_cache_timeout = attr_cache_timeout_in;
6217 mkdir_copy_sgid = mkdir_copy_sgid_in;
6218 sugid_clear_mode = sugid_clear_mode_in;
6219 xattr_cache_init(xattr_cache_timeout_in);
6220 xattr_cache_on = (xattr_cache_timeout_in>0.0)?1:0;
6221 xattr_acl_support = xattr_acl_support_in;
6222 fsync_before_close_min_time = fsync_before_close_min_time_in;
6223 no_xattrs = no_xattrs_in;
6224 no_posix_locks = no_posix_locks_in;
6225 no_bsd_locks = no_bsd_locks_in;
6226 if (groups_cache_timeout>0.0) {
6227 groups_init(groups_cache_timeout,debug_mode);
6228 full_permissions = 1;
6229 } else {
6230 full_permissions = 0;
6231 }
6232 fdcache_init();
6233 mfs_aclstorage_init();
6234 if (debug_mode) {
6235 fprintf(stderr,"kernel version: %u.%u\n",kver>>16,kver&0xFFFF);
6236 fprintf(stderr,"cache parameters: file_keep_cache=%s direntry_cache_timeout=%.2lf entry_cache_timeout=%.2lf attr_cache_timeout=%.2lf xattr_cache_timeout_in=%.2lf (%s)\n",(keep_cache==1)?"always":(keep_cache==2)?"never":(keep_cache==3)?"direct":(keep_cache==4)?"fbsdauto":"auto",direntry_cache_timeout,entry_cache_timeout,attr_cache_timeout,xattr_cache_timeout_in,xattr_cache_on?"on":"off");
6237 fprintf(stderr,"mkdir copy sgid=%d\nsugid clear mode=%s\n",mkdir_copy_sgid_in,(sugid_clear_mode_in<SUGID_CLEAR_MODE_OPTIONS)?sugid_clear_mode_strings[sugid_clear_mode_in]:"???");
6238 }
6239 mfs_statsptr_init();
6240 #if defined(__linux__)
6241 if (kver<MAKE_KERNEL_VERSION(4,19)) {
6242 #ifdef DENTRY_INVALIDATOR
6243 dinval = 1;
6244 if (debug_mode) {
6245 fprintf(stderr,"turn on dentry invalidator\n");
6246 }
6247 dinval_init(direntry_cache_timeout);
6248 #else
6249 fprintf(stderr,"your libfuse version is too old to properly fix the EBUSY bug\n");
6250 #endif
6251 } else {
6252 #ifdef DENTRY_INVALIDATOR
6253 dinval = 0;
6254 #endif
6255 }
6256 #endif
6257 #ifdef FREEBSD_DELAYED_RELEASE
6258 lwt_minthread_create(&th,1,finfo_delayed_release_cleanup_thread,NULL);
6259 #endif
6260 }
6261