1 /*
2 * Copyright (C) 2021 Jakub Kruszona-Zawadzki, Core Technology Sp. z o.o.
3 *
4 * This file is part of MooseFS.
5 *
6 * MooseFS is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, version 2 (only).
9 *
10 * MooseFS is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with MooseFS; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301, USA
18 * or visit http://www.gnu.org/licenses/gpl-2.0.html
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <stdarg.h>
28 #include <string.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <pthread.h>
34 #ifdef HAVE_SYS_FILE_H
35 #include <sys/file.h>
36 #endif
37
38 #include "MFSCommunication.h"
39 #include "mastercomm.h"
40 #include "inoleng.h"
41 #include "readdata.h"
42 #include "writedata.h"
43 #include "truncate.h"
44 #include "csdb.h"
45 #include "delayrun.h"
46 #include "conncache.h"
47 #include "chunkrwlock.h"
48 #include "chunksdatacache.h"
49 #include "portable.h"
50 #include "stats.h"
51 #include "crc.h"
52 #include "strerr.h"
53 #include "datapack.h"
54 #include "cfg.h"
55 #include "mfsstrerr.h"
56 #include "massert.h"
57 #include "md5.h"
58 #include "idstr.h"
59
60 #include "mfsio.h"
61
62 // #define DEBUG
63
64 #define MAX_FILE_SIZE (int64_t)(MFS_MAX_FILE_SIZE)
65
66 #define PATH_TO_INODES_EXPECT_NOENTRY 0
67 #define PATH_TO_INODES_EXPECT_OBJECT 1
68 #define PATH_TO_INODES_SKIP_LAST 2
69 #define PATH_TO_INODES_CHECK_LAST 3
70
71 #ifndef EDQUOT
72 # define EDQUOT ENOSPC
73 #endif
74 #ifndef ENOATTR
75 # ifdef ENODATA
76 # define ENOATTR ENODATA
77 # else
78 # define ENOATTR ENOENT
79 # endif
80 #endif
81
mfs_errorconv(int status)82 static int mfs_errorconv(int status) {
83 int ret;
84 switch (status) {
85 case MFS_STATUS_OK:
86 ret=0;
87 break;
88 case MFS_ERROR_EPERM:
89 ret=EPERM;
90 break;
91 case MFS_ERROR_ENOTDIR:
92 ret=ENOTDIR;
93 break;
94 case MFS_ERROR_ENOENT:
95 ret=ENOENT;
96 break;
97 case MFS_ERROR_EACCES:
98 ret=EACCES;
99 break;
100 case MFS_ERROR_EEXIST:
101 ret=EEXIST;
102 break;
103 case MFS_ERROR_EINVAL:
104 ret=EINVAL;
105 break;
106 case MFS_ERROR_ENOTEMPTY:
107 ret=ENOTEMPTY;
108 break;
109 case MFS_ERROR_IO:
110 ret=EIO;
111 break;
112 case MFS_ERROR_EROFS:
113 ret=EROFS;
114 break;
115 case MFS_ERROR_EINTR:
116 ret=EINTR;
117 break;
118 case MFS_ERROR_EAGAIN:
119 ret=EAGAIN;
120 break;
121 case MFS_ERROR_ECANCELED:
122 ret=ECANCELED;
123 break;
124 case MFS_ERROR_QUOTA:
125 ret=EDQUOT;
126 break;
127 case MFS_ERROR_ENOATTR:
128 ret=ENOATTR;
129 break;
130 case MFS_ERROR_ENOTSUP:
131 ret=ENOTSUP;
132 break;
133 case MFS_ERROR_ERANGE:
134 ret=ERANGE;
135 break;
136 case MFS_ERROR_NOSPACE:
137 ret=ENOSPC;
138 break;
139 case MFS_ERROR_CHUNKLOST:
140 ret=ENXIO;
141 break;
142 case MFS_ERROR_NOCHUNKSERVERS:
143 ret=ENOSPC;
144 break;
145 case MFS_ERROR_CSNOTPRESENT:
146 ret=ENXIO;
147 break;
148 case MFS_ERROR_NOTOPENED:
149 ret=EBADF;
150 break;
151 default:
152 ret=EINVAL;
153 break;
154 }
155 return ret;
156 }
157
mfs_type_convert(uint8_t type)158 static inline uint8_t mfs_type_convert(uint8_t type) {
159 switch (type) {
160 case DISP_TYPE_FILE:
161 return TYPE_FILE;
162 case DISP_TYPE_DIRECTORY:
163 return TYPE_DIRECTORY;
164 case DISP_TYPE_SYMLINK:
165 return TYPE_SYMLINK;
166 case DISP_TYPE_FIFO:
167 return TYPE_FIFO;
168 case DISP_TYPE_BLOCKDEV:
169 return TYPE_BLOCKDEV;
170 case DISP_TYPE_CHARDEV:
171 return TYPE_CHARDEV;
172 case DISP_TYPE_SOCKET:
173 return TYPE_SOCKET;
174 case DISP_TYPE_TRASH:
175 return TYPE_TRASH;
176 case DISP_TYPE_SUSTAINED:
177 return TYPE_SUSTAINED;
178 }
179 return 0;
180 }
181
182 #if 0
183 // for future use
184 static void mfs_type_to_stat(uint32_t inode,uint8_t type, struct stat *stbuf) {
185 memset(stbuf,0,sizeof(struct stat));
186 stbuf->st_ino = inode;
187 switch (type&0x7F) {
188 case DISP_TYPE_DIRECTORY:
189 case TYPE_DIRECTORY:
190 stbuf->st_mode = S_IFDIR;
191 break;
192 case DISP_TYPE_SYMLINK:
193 case TYPE_SYMLINK:
194 stbuf->st_mode = S_IFLNK;
195 break;
196 case DISP_TYPE_FILE:
197 case TYPE_FILE:
198 stbuf->st_mode = S_IFREG;
199 break;
200 case DISP_TYPE_FIFO:
201 case TYPE_FIFO:
202 stbuf->st_mode = S_IFIFO;
203 break;
204 case DISP_TYPE_SOCKET:
205 case TYPE_SOCKET:
206 stbuf->st_mode = S_IFSOCK;
207 break;
208 case DISP_TYPE_BLOCKDEV:
209 case TYPE_BLOCKDEV:
210 stbuf->st_mode = S_IFBLK;
211 break;
212 case DISP_TYPE_CHARDEV:
213 case TYPE_CHARDEV:
214 stbuf->st_mode = S_IFCHR;
215 break;
216 default:
217 stbuf->st_mode = 0;
218 }
219 }
220 #endif
221
fsnodes_type_convert(uint8_t type)222 static inline uint8_t fsnodes_type_convert(uint8_t type) {
223 switch (type) {
224 case DISP_TYPE_FILE:
225 return TYPE_FILE;
226 case DISP_TYPE_DIRECTORY:
227 return TYPE_DIRECTORY;
228 case DISP_TYPE_SYMLINK:
229 return TYPE_SYMLINK;
230 case DISP_TYPE_FIFO:
231 return TYPE_FIFO;
232 case DISP_TYPE_BLOCKDEV:
233 return TYPE_BLOCKDEV;
234 case DISP_TYPE_CHARDEV:
235 return TYPE_CHARDEV;
236 case DISP_TYPE_SOCKET:
237 return TYPE_SOCKET;
238 case DISP_TYPE_TRASH:
239 return TYPE_TRASH;
240 case DISP_TYPE_SUSTAINED:
241 return TYPE_SUSTAINED;
242 }
243 return 0;
244 }
245
mfs_attr_get_type(const uint8_t attr[ATTR_RECORD_SIZE])246 static inline uint8_t mfs_attr_get_type(const uint8_t attr[ATTR_RECORD_SIZE]) {
247 if (attr[0]<64) { // 1.7.29 and up
248 return (attr[1]>>4);
249 } else {
250 return fsnodes_type_convert(attr[0]&0x7F);
251 }
252 }
253
mfs_attr_to_stat(uint32_t inode,const uint8_t attr[ATTR_RECORD_SIZE],struct stat * stbuf)254 static void mfs_attr_to_stat(uint32_t inode,const uint8_t attr[ATTR_RECORD_SIZE], struct stat *stbuf) {
255 uint16_t attrmode;
256 uint8_t attrtype;
257 uint32_t attruid,attrgid,attratime,attrmtime,attrctime,attrnlink,attrrdev;
258 uint64_t attrlength;
259 const uint8_t *ptr;
260 ptr = attr;
261 if (attr[0]<64) { // 1.7.29 and up
262 ptr++;
263 attrmode = get16bit(&ptr);
264 attrtype = (attrmode>>12);
265 } else {
266 attrtype = get8bit(&ptr);
267 attrtype = mfs_type_convert(attrtype&0x7F);
268 attrmode = get16bit(&ptr);
269 }
270 attrmode &= 0x0FFF;
271 attruid = get32bit(&ptr);
272 attrgid = get32bit(&ptr);
273 attratime = get32bit(&ptr);
274 attrmtime = get32bit(&ptr);
275 attrctime = get32bit(&ptr);
276 attrnlink = get32bit(&ptr);
277 stbuf->st_ino = inode;
278 #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
279 stbuf->st_blksize = MFSBLOCKSIZE;
280 #endif
281 switch (attrtype & 0x7F) {
282 case TYPE_DIRECTORY:
283 stbuf->st_mode = S_IFDIR | attrmode;
284 attrlength = get64bit(&ptr);
285 stbuf->st_size = attrlength;
286 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
287 stbuf->st_blocks = (attrlength+511)/512;
288 #endif
289 break;
290 case TYPE_SYMLINK:
291 stbuf->st_mode = S_IFLNK | attrmode;
292 attrlength = get64bit(&ptr);
293 stbuf->st_size = attrlength;
294 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
295 stbuf->st_blocks = (attrlength+511)/512;
296 #endif
297 break;
298 case TYPE_FILE:
299 stbuf->st_mode = S_IFREG | attrmode;
300 attrlength = get64bit(&ptr);
301 stbuf->st_size = attrlength;
302 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
303 stbuf->st_blocks = (attrlength+511)/512;
304 #endif
305 break;
306 case TYPE_FIFO:
307 stbuf->st_mode = S_IFIFO | attrmode;
308 stbuf->st_size = 0;
309 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
310 stbuf->st_blocks = 0;
311 #endif
312 break;
313 case TYPE_SOCKET:
314 stbuf->st_mode = S_IFSOCK | attrmode;
315 stbuf->st_size = 0;
316 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
317 stbuf->st_blocks = 0;
318 #endif
319 break;
320 case TYPE_BLOCKDEV:
321 stbuf->st_mode = S_IFBLK | attrmode;
322 attrrdev = get32bit(&ptr);
323 #ifdef HAVE_STRUCT_STAT_ST_RDEV
324 stbuf->st_rdev = attrrdev;
325 #endif
326 stbuf->st_size = 0;
327 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
328 stbuf->st_blocks = 0;
329 #endif
330 break;
331 case TYPE_CHARDEV:
332 stbuf->st_mode = S_IFCHR | attrmode;
333 attrrdev = get32bit(&ptr);
334 #ifdef HAVE_STRUCT_STAT_ST_RDEV
335 stbuf->st_rdev = attrrdev;
336 #endif
337 stbuf->st_size = 0;
338 #ifdef HAVE_STRUCT_STAT_ST_BLOCKS
339 stbuf->st_blocks = 0;
340 #endif
341 break;
342 default:
343 stbuf->st_mode = 0;
344 }
345 stbuf->st_uid = attruid;
346 stbuf->st_gid = attrgid;
347 stbuf->st_atime = attratime;
348 stbuf->st_mtime = attrmtime;
349 stbuf->st_ctime = attrctime;
350 #ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME
351 stbuf->st_birthtime = attrctime; // for future use
352 #endif
353 stbuf->st_nlink = attrnlink;
354 }
355
356 typedef struct _cred {
357 uint32_t uid;
358 uint32_t gidcnt;
359 uint32_t gidtab[NGROUPS_MAX+1];
360 } cred;
361
mfs_get_credentials(cred * ctx)362 static void mfs_get_credentials(cred *ctx) {
363 gid_t gids[NGROUPS_MAX];
364 gid_t gid;
365 uint32_t i,j;
366
367 ctx->uid = geteuid();
368 ctx->gidcnt = getgroups(NGROUPS_MAX,gids);
369 gid = getegid();
370 ctx->gidtab[0] = gid;
371 for (i=0,j=1 ; i<ctx->gidcnt ; i++) {
372 if (gids[i]!=gid) {
373 ctx->gidtab[j++] = gids[i];
374 }
375 }
376 ctx->gidcnt = j;
377 }
378
mfs_path_to_inodes(const char * path,uint32_t * parent,uint32_t * inode,uint8_t name[256],uint8_t * nleng,uint8_t existflag,uint8_t attr[ATTR_RECORD_SIZE])379 static int mfs_path_to_inodes(const char *path,uint32_t *parent,uint32_t *inode,uint8_t name[256],uint8_t *nleng,uint8_t existflag,uint8_t attr[ATTR_RECORD_SIZE]) {
380 uint32_t cinode = MFS_ROOT_ID;
381 uint32_t pinode = MFS_ROOT_ID;
382 cred cr;
383 const char *pptr = path;
384 uint8_t partlen,status;
385
386 mfs_get_credentials(&cr);
387 if (inode!=NULL) {
388 *inode = 0;
389 }
390 memset(attr,0,ATTR_RECORD_SIZE);
391 if (path[0]==0) {
392 errno = EINVAL;
393 return -1;
394 }
395 if (path[0]=='/' && path[1]==0) {
396 if (existflag==PATH_TO_INODES_SKIP_LAST) {
397 if (parent!=NULL) {
398 *parent = pinode;
399 }
400 if (inode!=NULL) {
401 *inode = cinode;
402 }
403 name[0] = '.';
404 name[1] = 0;
405 *nleng = 1;
406 return 0;
407 }
408 name[0] = '.';
409 status = fs_simple_lookup(pinode,1,name,cr.uid,cr.gidcnt,cr.gidtab,&cinode,attr);
410 name[0] = 0;
411 if (status!=MFS_STATUS_OK) {
412 errno = mfs_errorconv(status);
413 return -1;
414 }
415 if (parent!=NULL) {
416 *parent = pinode;
417 }
418 if (inode!=NULL) {
419 *inode = cinode;
420 }
421 *nleng = 0;
422 return 0;
423 }
424 partlen = 0;
425 while (*pptr) {
426 if (*pptr=='/') {
427 pinode = cinode;
428 if (partlen>0) {
429 name[partlen] = 0;
430 #ifdef DEBUG
431 printf("perform lookup for (%u,%s) (internal part)\n",pinode,(char*)name);
432 #endif
433 status = fs_simple_lookup(pinode,partlen,name,cr.uid,cr.gidcnt,cr.gidtab,&cinode,attr);
434 if (status!=MFS_STATUS_OK) {
435 errno = mfs_errorconv(status);
436 return -1;
437 }
438 if (mfs_attr_get_type(attr)!=TYPE_DIRECTORY) {
439 errno = ENOTDIR;
440 return -1;
441 }
442 #ifdef DEBUG
443 printf("result inode: %u\n",cinode);
444 #endif
445 }
446 partlen = 0;
447 } else {
448 if (partlen==255) {
449 errno = ENAMETOOLONG;
450 return -1; // name too long
451 }
452 name[partlen++] = *pptr;
453 }
454 pptr++;
455 }
456 pinode = cinode;
457 if (partlen>0 && existflag!=PATH_TO_INODES_SKIP_LAST) {
458 #ifdef DEBUG
459 printf("perform lookup for (%u,%s) (last part)\n",pinode,(char*)name);
460 #endif
461 status = fs_simple_lookup(pinode,partlen,name,cr.uid,cr.gidcnt,cr.gidtab,&cinode,attr);
462 #ifdef DEBUG
463 if (status==MFS_STATUS_OK) {
464 printf("result inode: %u\n",cinode);
465 } else {
466 printf("lookup error: %s\n",mfsstrerr(status));
467 }
468 #endif
469 if (existflag==PATH_TO_INODES_EXPECT_NOENTRY) {
470 if (status==MFS_STATUS_OK) {
471 if (inode!=NULL) {
472 *inode = cinode;
473 }
474 errno = EEXIST;
475 return -1;
476 } else if (status!=MFS_ERROR_ENOENT) {
477 errno = mfs_errorconv(status);
478 return -1;
479 }
480 } else if (existflag==PATH_TO_INODES_EXPECT_OBJECT) {
481 if (status!=MFS_STATUS_OK) {
482 errno = mfs_errorconv(status);
483 return -1;
484 }
485 } else {
486 if (status!=MFS_STATUS_OK) {
487 cinode = 0;
488 }
489 }
490 name[partlen] = 0;
491 }
492 if (parent!=NULL) {
493 *parent = pinode;
494 }
495 if (inode!=NULL) {
496 *inode = cinode;
497 }
498 *nleng = partlen;
499 return 0;
500 }
501
mfs_attr_to_type(const uint8_t attr[ATTR_RECORD_SIZE])502 static uint8_t mfs_attr_to_type(const uint8_t attr[ATTR_RECORD_SIZE]) {
503 const uint8_t *ptr;
504 ptr = attr;
505 if (ptr[0]<64) {
506 return attr[1]>>4;
507 } else {
508 return mfs_type_convert(attr[0]&0x7f);
509 }
510 return 0;
511 }
512
mfs_attr_to_size(const uint8_t attr[ATTR_RECORD_SIZE])513 static uint64_t mfs_attr_to_size(const uint8_t attr[ATTR_RECORD_SIZE]) {
514 const uint8_t *ptr;
515 ptr = attr+27;
516 return get64bit(&ptr);
517 }
518
519 enum {MFS_IO_READWRITE,MFS_IO_READONLY,MFS_IO_WRITEONLY,MFS_IO_READAPPEND,MFS_IO_APPENDONLY,MFS_IO_FORBIDDEN};
520
521 typedef struct file_info {
522 void *flengptr;
523 uint32_t inode;
524 uint8_t mode;
525 uint8_t writing;
526 off_t offset;
527 uint32_t readers_cnt;
528 uint32_t writers_cnt;
529 void *rdata,*wdata;
530 pthread_mutex_t lock;
531 pthread_cond_t rwcond;
532 } file_info;
533
534 static file_info *fdtab;
535 static uint32_t fdtabsize;
536 static uint32_t *fdtabusemask;
537 static pthread_mutex_t fdtablock;
538
539 #define FDTABSIZE_INIT 1024
540
mfs_fi_init(file_info * fileinfo)541 static void mfs_fi_init(file_info *fileinfo) {
542 memset(fileinfo,0,sizeof(file_info));
543 fileinfo->mode = MFS_IO_FORBIDDEN;
544 zassert(pthread_mutex_init(&(fileinfo->lock),NULL));
545 zassert(pthread_cond_init(&(fileinfo->rwcond),NULL));
546 }
547
mfs_fi_term(file_info * fileinfo)548 static void mfs_fi_term(file_info *fileinfo) {
549 zassert(pthread_mutex_lock(&(fileinfo->lock)));
550 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
551 zassert(pthread_mutex_destroy(&(fileinfo->lock)));
552 zassert(pthread_cond_destroy(&(fileinfo->rwcond)));
553 }
554
mfs_resize_fd(void)555 static void mfs_resize_fd(void) {
556 file_info *newfdtab;
557 uint32_t *newfdtabusemask;
558 uint32_t newfdtabsize;
559 uint32_t i;
560
561 newfdtabsize = fdtabsize * 2;
562
563 newfdtab = realloc(fdtab,sizeof(file_info)*newfdtabsize);
564 passert(newfdtab);
565 newfdtabusemask = realloc(fdtabusemask,sizeof(uint32_t)*((newfdtabsize+31)/32));
566 passert(newfdtabusemask);
567 fdtab = newfdtab;
568 fdtabusemask = newfdtabusemask;
569 for (i=fdtabsize ; i<newfdtabsize ; i++) {
570 mfs_fi_init(fdtab+i);
571 }
572 i = fdtabsize+31/32;
573 memset(fdtabusemask+i,0,sizeof(uint32_t)*(((newfdtabsize+31)/32)-i));
574 if ((fdtabsize&0x1F)!=0) {
575 fdtabusemask[i-1] &= (0xFFFFFFFF >> (0x20-(fdtabsize&0x1F)));
576 }
577 fdtabsize = newfdtabsize;
578 }
579
mfs_next_fd(void)580 static int mfs_next_fd(void) {
581 uint32_t i,m;
582 int fd;
583 zassert(pthread_mutex_lock(&fdtablock));
584 for (i=0 ; i<(fdtabsize+31)/32 ; i++) {
585 if (fdtabusemask[i]!=0xFFFFFFFF) {
586 fd = i*32;
587 m = fdtabusemask[i];
588 while (m&1) {
589 fd++;
590 m>>=1;
591 }
592 while ((uint32_t)fd>=fdtabsize) {
593 mfs_resize_fd();
594 }
595 fdtabusemask[fd>>5] |= (1<<(fd&0x1F));
596 zassert(pthread_mutex_unlock(&fdtablock));
597 return fd;
598 }
599 }
600 fd = fdtabsize;
601 mfs_resize_fd();
602 fdtabusemask[fd>>5] |= (1<<(fd&0x1F));
603 zassert(pthread_mutex_unlock(&fdtablock));
604 return fd;
605 }
606
mfs_free_fd(int fd)607 static void mfs_free_fd(int fd) {
608 uint32_t i,m;
609 zassert(pthread_mutex_lock(&fdtablock));
610 if (fd>=0 && (uint32_t)fd<fdtabsize) {
611 i = fd>>5;
612 m = 1<<(fd&0x1F);
613 fdtabusemask[i] &= ~m;
614 }
615 zassert(pthread_mutex_unlock(&fdtablock));
616 }
617
mfs_get_fi(int fd)618 static file_info* mfs_get_fi(int fd) {
619 uint32_t i,m;
620 zassert(pthread_mutex_lock(&fdtablock));
621 if (fd>=0 && (uint32_t)fd<fdtabsize) {
622 i = fd>>5;
623 m = 1<<(fd&0x1F);
624 if (fdtabusemask[i] & m) {
625 zassert(pthread_mutex_unlock(&fdtablock));
626 return fdtab+fd;
627 }
628 }
629 zassert(pthread_mutex_unlock(&fdtablock));
630 return NULL;
631 }
632
finfo_change_fleng(uint32_t inode,uint64_t fleng)633 static void finfo_change_fleng(uint32_t inode,uint64_t fleng) {
634 inoleng_update_fleng(inode,fleng);
635 }
636
637 //
638 // TODO sugid_clear_mode
639 // TODO mkdir_copy_sgid
640
641 static int sugid_clear_mode = 0;
642 static int mkdir_copy_sgid = 0;
643
644 static mode_t last_umask = 0;
645
mfs_mknod(const char * path,mode_t mode,dev_t dev)646 int mfs_mknod(const char *path, mode_t mode, dev_t dev) {
647 uint32_t parent;
648 uint32_t inode;
649 uint8_t name[256];
650 uint8_t nleng;
651 uint8_t attr[ATTR_RECORD_SIZE];
652 uint8_t status;
653 uint8_t type;
654 cred cr;
655 if (mfs_path_to_inodes(path,&parent,NULL,name,&nleng,PATH_TO_INODES_SKIP_LAST,attr)<0) {
656 return -1;
657 }
658 mfs_get_credentials(&cr);
659 last_umask = umask(last_umask); // This is potential race-condition, but there is no portable way to obtain umask atomically. Last umask is remembered to minimize probability of changing umask here.
660 umask(last_umask);
661 if (S_ISFIFO(mode)) {
662 type = TYPE_FIFO;
663 } else if (S_ISCHR(mode)) {
664 type = TYPE_CHARDEV;
665 } else if (S_ISBLK(mode)) {
666 type = TYPE_BLOCKDEV;
667 } else if (S_ISSOCK(mode)) {
668 type = TYPE_SOCKET;
669 } else if (S_ISREG(mode) || (mode&0170000)==0) {
670 type = TYPE_FILE;
671 } else {
672 errno = EPERM;
673 return -1;
674 }
675 status = fs_mknod(parent,nleng,(const uint8_t*)name,type,mode&07777,last_umask,cr.uid,cr.gidcnt,cr.gidtab,dev,&inode,attr);
676 if (status!=MFS_STATUS_OK) {
677 errno = mfs_errorconv(status);
678 return -1;
679 }
680 return 0;
681 }
682
mfs_unlink(const char * path)683 int mfs_unlink(const char *path) {
684 uint32_t parent;
685 uint32_t inode;
686 uint8_t name[256];
687 uint8_t nleng;
688 uint8_t attr[ATTR_RECORD_SIZE];
689 uint8_t status;
690 cred cr;
691 if (mfs_path_to_inodes(path,&parent,&inode,name,&nleng,PATH_TO_INODES_EXPECT_OBJECT,attr)<0) {
692 return -1;
693 }
694 mfs_get_credentials(&cr);
695 status = fs_unlink(parent,nleng,(const uint8_t*)name,cr.uid,cr.gidcnt,cr.gidtab,&inode);
696 if (status!=MFS_STATUS_OK) {
697 errno = mfs_errorconv(status);
698 return -1;
699 }
700 return 0;
701 }
702
mfs_mkdir(const char * path,mode_t mode)703 int mfs_mkdir(const char *path, mode_t mode) {
704 uint32_t parent;
705 uint32_t inode;
706 uint8_t name[256];
707 uint8_t nleng;
708 uint8_t attr[ATTR_RECORD_SIZE];
709 uint8_t status;
710 cred cr;
711 if (mfs_path_to_inodes(path,&parent,NULL,name,&nleng,PATH_TO_INODES_SKIP_LAST,attr)<0) {
712 return -1;
713 }
714 mfs_get_credentials(&cr);
715 last_umask = umask(last_umask); // This is potential race-condition, but there is no portable way to obtain umask atomically. Last umask is remembered to minimize probability of changing umask here.
716 umask(last_umask);
717 status = fs_mkdir(parent,nleng,(const uint8_t*)name,mode,last_umask,cr.uid,cr.gidcnt,cr.gidtab,mkdir_copy_sgid,&inode,attr);
718 if (status!=MFS_STATUS_OK) {
719 errno = mfs_errorconv(status);
720 return -1;
721 }
722 return 0;
723 }
724
mfs_rmdir(const char * path)725 int mfs_rmdir(const char *path) {
726 uint32_t parent;
727 uint32_t inode;
728 uint8_t name[256];
729 uint8_t nleng;
730 uint8_t attr[ATTR_RECORD_SIZE];
731 uint8_t status;
732 cred cr;
733 if (mfs_path_to_inodes(path,&parent,&inode,name,&nleng,PATH_TO_INODES_EXPECT_OBJECT,attr)<0) {
734 return -1;
735 }
736 mfs_get_credentials(&cr);
737 status = fs_rmdir(parent,nleng,(const uint8_t*)name,cr.uid,cr.gidcnt,cr.gidtab,&inode);
738 if (status!=MFS_STATUS_OK) {
739 errno = mfs_errorconv(status);
740 return -1;
741 }
742 return 0;
743 }
744
mfs_rename(const char * src,const char * dst)745 int mfs_rename(const char *src, const char *dst) {
746 uint32_t src_parent;
747 uint8_t src_name[256];
748 uint8_t src_nleng;
749 uint32_t dst_parent;
750 uint8_t dst_name[256];
751 uint8_t dst_nleng;
752 uint32_t inode;
753 uint8_t attr[ATTR_RECORD_SIZE];
754 uint8_t status;
755 cred cr;
756 if (mfs_path_to_inodes(src,&src_parent,NULL,src_name,&src_nleng,PATH_TO_INODES_SKIP_LAST,attr)<0) {
757 return -1;
758 }
759 if (mfs_path_to_inodes(dst,&dst_parent,NULL,dst_name,&dst_nleng,PATH_TO_INODES_SKIP_LAST,attr)<0) {
760 return -1;
761 }
762 mfs_get_credentials(&cr);
763 status = fs_rename(src_parent,src_nleng,(const uint8_t*)src_name,dst_parent,dst_nleng,(const uint8_t*)dst_name,cr.uid,cr.gidcnt,cr.gidtab,&inode,attr);
764 if (status!=MFS_STATUS_OK) {
765 errno = mfs_errorconv(status);
766 return -1;
767 }
768 return 0;
769 }
770
mfs_setattr_int(uint32_t inode,uint8_t opened,uint8_t setmask,mode_t mode,uid_t uid,gid_t gid,time_t atime,time_t mtime)771 static int mfs_setattr_int(uint32_t inode,uint8_t opened,uint8_t setmask,mode_t mode,uid_t uid,gid_t gid,time_t atime,time_t mtime) {
772 uint8_t attr[ATTR_RECORD_SIZE];
773 uint8_t status;
774 cred cr;
775
776 mfs_get_credentials(&cr);
777 status = fs_setattr(inode,opened,cr.uid,cr.gidcnt,cr.gidtab,setmask,mode&07777,uid,gid,atime,mtime,0,sugid_clear_mode,attr);
778 if (status!=MFS_STATUS_OK) {
779 errno = mfs_errorconv(status);
780 return -1;
781 }
782 return 0;
783 }
784
mfs_chmod(const char * path,mode_t mode)785 int mfs_chmod(const char *path, mode_t mode) {
786 uint32_t parent;
787 uint32_t inode;
788 uint8_t name[256];
789 uint8_t nleng;
790 uint8_t attr[ATTR_RECORD_SIZE];
791 if (mfs_path_to_inodes(path,&parent,&inode,name,&nleng,PATH_TO_INODES_EXPECT_OBJECT,attr)<0) {
792 return -1;
793 }
794 return mfs_setattr_int(inode,0,SET_MODE_FLAG,mode,0,0,0,0);
795 }
796
mfs_fchmod(int fildes,mode_t mode)797 int mfs_fchmod(int fildes, mode_t mode) {
798 file_info *fileinfo;
799
800 fileinfo = mfs_get_fi(fildes);
801 if (fileinfo==NULL) {
802 errno = EBADF;
803 return -1;
804 }
805 return mfs_setattr_int(fileinfo->inode,1,SET_MODE_FLAG,mode,0,0,0,0);
806 }
807
mfs_chown(const char * path,uid_t owner,gid_t group)808 int mfs_chown(const char *path, uid_t owner, gid_t group) {
809 uint32_t parent;
810 uint32_t inode;
811 uint8_t name[256];
812 uint8_t nleng;
813 uint8_t attr[ATTR_RECORD_SIZE];
814 uint8_t setmask;
815 if (mfs_path_to_inodes(path,&parent,&inode,name,&nleng,PATH_TO_INODES_EXPECT_OBJECT,attr)<0) {
816 return -1;
817 }
818 setmask = 0;
819 if (owner!=(uid_t)-1) {
820 setmask |= SET_UID_FLAG;
821 }
822 if (group!=(gid_t)-1) {
823 setmask |= SET_GID_FLAG;
824 }
825 return mfs_setattr_int(inode,0,setmask,0,owner,group,0,0);
826 }
827
mfs_fchown(int fildes,uid_t owner,gid_t group)828 int mfs_fchown(int fildes, uid_t owner, gid_t group) {
829 file_info *fileinfo;
830 uint8_t setmask;
831
832 fileinfo = mfs_get_fi(fildes);
833 if (fileinfo==NULL) {
834 errno = EBADF;
835 return -1;
836 }
837 setmask = 0;
838 if (owner!=(uid_t)-1) {
839 setmask |= SET_UID_FLAG;
840 }
841 if (group!=(gid_t)-1) {
842 setmask |= SET_GID_FLAG;
843 }
844 return mfs_setattr_int(fileinfo->inode,1,setmask,0,owner,group,0,0);
845 }
846
mfs_utimes(const char * path,const struct timeval times[2])847 int mfs_utimes(const char *path, const struct timeval times[2]) {
848 uint32_t parent;
849 uint32_t inode;
850 uint8_t name[256];
851 uint8_t nleng;
852 uint8_t attr[ATTR_RECORD_SIZE];
853 if (mfs_path_to_inodes(path,&parent,&inode,name,&nleng,PATH_TO_INODES_EXPECT_OBJECT,attr)<0) {
854 return -1;
855 }
856 if (times==NULL) {
857 return mfs_setattr_int(inode,0,SET_ATIME_NOW_FLAG|SET_MTIME_NOW_FLAG,0,0,0,0,0);
858 } else {
859 return mfs_setattr_int(inode,0,SET_ATIME_FLAG|SET_MTIME_FLAG,0,0,0,times[0].tv_sec,times[1].tv_sec);
860 }
861 }
862
mfs_futimes(int fildes,const struct timeval times[2])863 int mfs_futimes(int fildes, const struct timeval times[2]) {
864 file_info *fileinfo;
865
866 fileinfo = mfs_get_fi(fildes);
867 if (fileinfo==NULL) {
868 errno = EBADF;
869 return -1;
870 }
871 if (times==NULL) {
872 return mfs_setattr_int(fileinfo->inode,1,SET_ATIME_NOW_FLAG|SET_MTIME_NOW_FLAG,0,0,0,0,0);
873 } else {
874 return mfs_setattr_int(fileinfo->inode,1,SET_ATIME_FLAG|SET_MTIME_FLAG,0,0,0,times[0].tv_sec,times[1].tv_sec);
875 }
876 }
877
mfs_futimens(int fildes,const struct timespec times[2])878 int mfs_futimens(int fildes, const struct timespec times[2]) {
879 file_info *fileinfo;
880 uint8_t setmask;
881 uint32_t atime,mtime;
882
883 fileinfo = mfs_get_fi(fildes);
884 if (fileinfo==NULL) {
885 errno = EBADF;
886 return -1;
887 }
888 atime = 0;
889 mtime = 0;
890 if (times==NULL) {
891 setmask = SET_ATIME_NOW_FLAG|SET_MTIME_NOW_FLAG;
892 } else {
893 setmask = 0;
894 if (times[0].tv_nsec == UTIME_NOW) {
895 setmask |= SET_ATIME_NOW_FLAG;
896 } else if (times[0].tv_nsec != UTIME_OMIT) {
897 setmask |= SET_ATIME_FLAG;
898 atime = times[0].tv_sec;
899 }
900 if (times[1].tv_nsec == UTIME_NOW) {
901 setmask |= SET_MTIME_NOW_FLAG;
902 } else if (times[1].tv_nsec != UTIME_OMIT) {
903 setmask |= SET_MTIME_FLAG;
904 mtime = times[1].tv_sec;
905 }
906 }
907 return mfs_setattr_int(fileinfo->inode,1,setmask,0,0,0,atime,mtime);
908 }
909
mfs_truncate_int(uint32_t inode,uint8_t opened,off_t size,uint8_t attr[ATTR_RECORD_SIZE])910 static int mfs_truncate_int(uint32_t inode,uint8_t opened,off_t size,uint8_t attr[ATTR_RECORD_SIZE]) {
911 uint8_t status;
912 cred cr;
913
914 if (size<0) {
915 errno = EINVAL;
916 return -1;
917 }
918 if (size>=MAX_FILE_SIZE) {
919 errno = EFBIG;
920 return -1;
921 }
922 write_data_flush_inode(inode);
923 mfs_get_credentials(&cr);
924 status = do_truncate(inode,(opened)?TRUNCATE_FLAG_OPENED:0,cr.uid,cr.gidcnt,cr.gidtab,size,attr,NULL);
925 if (status!=MFS_STATUS_OK) {
926 errno = mfs_errorconv(status);
927 return -1;
928 }
929 chunksdatacache_clear_inode(inode,size/MFSCHUNKSIZE);
930 finfo_change_fleng(inode,size);
931 write_data_inode_setmaxfleng(inode,size);
932 read_inode_set_length_active(inode,size);
933 return 0;
934 }
935
mfs_truncate(const char * path,off_t size)936 int mfs_truncate(const char *path, off_t size) {
937 uint32_t parent;
938 uint32_t inode;
939 uint8_t name[256];
940 uint8_t nleng;
941 uint8_t attr[ATTR_RECORD_SIZE];
942 if (mfs_path_to_inodes(path,&parent,&inode,name,&nleng,PATH_TO_INODES_EXPECT_OBJECT,attr)<0) {
943 return -1;
944 }
945 return mfs_truncate_int(inode,0,size,attr);
946 }
947
mfs_ftruncate(int fildes,off_t size)948 int mfs_ftruncate(int fildes, off_t size) {
949 file_info *fileinfo;
950 uint8_t attr[ATTR_RECORD_SIZE];
951
952 fileinfo = mfs_get_fi(fildes);
953 if (fileinfo==NULL) {
954 errno = EBADF;
955 return -1;
956 }
957 return mfs_truncate_int(fileinfo->inode,1,size,attr);
958 }
959
mfs_lseek(int fildes,off_t offset,int whence)960 off_t mfs_lseek(int fildes, off_t offset, int whence) {
961 file_info *fileinfo;
962 off_t ret;
963
964 fileinfo = mfs_get_fi(fildes);
965 if (fileinfo==NULL) {
966 errno = EBADF;
967 return -1;
968 }
969 zassert(pthread_mutex_lock(&(fileinfo->lock)));
970 switch (whence) {
971 case SEEK_SET:
972 fileinfo->offset = offset;
973 break;
974 case SEEK_CUR:
975 fileinfo->offset += offset;
976 break;
977 case SEEK_END:
978 fileinfo->offset = inoleng_getfleng(fileinfo->flengptr) + offset;
979 break;
980 default:
981 errno = EINVAL;
982 return -1;
983 }
984 if (fileinfo->offset<0) {
985 fileinfo->offset = 0;
986 }
987 ret = fileinfo->offset;
988 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
989 return ret;
990 }
991
mfs_fix_attr(uint8_t type,uint32_t inode,struct stat * buf)992 static void mfs_fix_attr(uint8_t type,uint32_t inode,struct stat *buf) {
993 if (type==TYPE_FILE) {
994 uint64_t maxfleng = write_data_inode_getmaxfleng(inode);
995 if (maxfleng>(uint64_t)(buf->st_size)) {
996 buf->st_size = maxfleng;
997 }
998 read_inode_set_length_passive(inode,buf->st_size);
999 finfo_change_fleng(inode,buf->st_size);
1000 }
1001 fs_fix_amtime(inode,&(buf->st_atime),&(buf->st_mtime));
1002 }
1003
mfs_stat(const char * path,struct stat * buf)1004 int mfs_stat(const char *path, struct stat *buf) {
1005 uint32_t parent;
1006 uint32_t inode;
1007 uint8_t name[256];
1008 uint8_t nleng;
1009 uint8_t attr[ATTR_RECORD_SIZE];
1010 uint8_t type;
1011
1012 if (mfs_path_to_inodes(path,&parent,&inode,name,&nleng,PATH_TO_INODES_EXPECT_OBJECT,attr)<0) {
1013 return -1;
1014 }
1015 memset(buf,0,sizeof(struct stat));
1016 mfs_attr_to_stat(inode,attr,buf);
1017 type = mfs_attr_get_type(attr);
1018 mfs_fix_attr(type,inode,buf);
1019 return 0;
1020 }
1021
mfs_fstat(int fildes,struct stat * buf)1022 int mfs_fstat(int fildes, struct stat *buf) {
1023 uint8_t attr[ATTR_RECORD_SIZE];
1024 file_info *fileinfo;
1025 uint8_t status;
1026 uint8_t type;
1027
1028 fileinfo = mfs_get_fi(fildes);
1029 if (fileinfo==NULL) {
1030 errno = EBADF;
1031 return -1;
1032 }
1033 status = fs_getattr(fileinfo->inode,1,geteuid(),getegid(),attr);
1034 if (status!=MFS_STATUS_OK) {
1035 errno = mfs_errorconv(status);
1036 return -1;
1037 }
1038 memset(buf,0,sizeof(struct stat));
1039 mfs_attr_to_stat(fileinfo->inode,attr,buf);
1040 type = mfs_attr_get_type(attr);
1041 mfs_fix_attr(type,fileinfo->inode,buf);
1042 return 0;
1043 }
1044
mfs_open(const char * path,int oflag,...)1045 int mfs_open(const char *path,int oflag,...) {
1046 uint64_t fsize;
1047 uint32_t parent;
1048 uint32_t inode;
1049 uint8_t noatomictrunc;
1050 uint8_t name[256];
1051 uint8_t nleng;
1052 uint8_t attr[ATTR_RECORD_SIZE];
1053 uint8_t status;
1054 cred cr;
1055 uint8_t mfsoflag;
1056 uint8_t oflags;
1057 int fildes;
1058 int needopen;
1059 file_info *fileinfo;
1060
1061 mfsoflag = 0;
1062 switch (oflag&O_ACCMODE) {
1063 case O_RDONLY:
1064 mfsoflag |= OPEN_READ;
1065 break;
1066 case O_WRONLY:
1067 mfsoflag |= OPEN_WRITE;
1068 break;
1069 case O_RDWR:
1070 mfsoflag |= OPEN_READ | OPEN_WRITE;
1071 break;
1072 }
1073 if (oflag&O_TRUNC) {
1074 uint32_t mver;
1075 mver = master_version();
1076 noatomictrunc = (mver<VERSION2INT(3,0,113))?1:0;
1077 mfsoflag |= OPEN_TRUNCATE;
1078 } else {
1079 noatomictrunc = 0;
1080 }
1081
1082 oflags = 0;
1083 needopen = 1;
1084 if (mfs_path_to_inodes(path,&parent,&inode,name,&nleng,PATH_TO_INODES_CHECK_LAST,attr)<0) {
1085 return -1;
1086 }
1087 if (oflag&O_CREAT) {
1088 if (oflag&O_EXCL) {
1089 if (inode!=0) { // file exists
1090 errno = EEXIST;
1091 return -1;
1092 }
1093 } else {
1094 if (inode==0) { // file doesn't exists - create it
1095 int mode;
1096 va_list ap;
1097 // create
1098 va_start(ap,oflag);
1099 mode = va_arg(ap,int);
1100 va_end(ap);
1101 mfs_get_credentials(&cr);
1102 last_umask = umask(last_umask); // see - mkdir
1103 umask(last_umask);
1104 status = fs_create(parent,nleng,(const uint8_t*)name,mode,last_umask,cr.uid,cr.gidcnt,cr.gidtab,&inode,attr,&oflags);
1105 if (status!=MFS_STATUS_OK) {
1106 errno = mfs_errorconv(status);
1107 return -1;
1108 }
1109 needopen = 0;
1110 }
1111 }
1112 } else {
1113 if (inode==0) {
1114 errno = ENOENT;
1115 return -1;
1116 }
1117 }
1118 if (needopen) {
1119 if (mfs_attr_to_type(attr)!=TYPE_FILE) {
1120 errno = EISDIR;
1121 return -1;
1122 }
1123
1124 // open
1125 mfs_get_credentials(&cr);
1126 status = fs_opencheck(inode,cr.uid,cr.gidcnt,cr.gidtab,mfsoflag,attr,&oflags);
1127 if (status!=MFS_STATUS_OK) {
1128 errno = mfs_errorconv(status);
1129 return -1;
1130 }
1131 if (mfsoflag&OPEN_TRUNCATE && noatomictrunc) {
1132 if (mfs_truncate_int(inode,1,0,attr)<0) {
1133 return -1;
1134 }
1135 }
1136 }
1137 if (oflags & OPEN_APPENDONLY) {
1138 if ((oflag&O_APPEND)==0) {
1139 errno = EPERM;
1140 return -1;
1141 }
1142 }
1143
1144 fs_inc_acnt(inode);
1145
1146 fsize = mfs_attr_to_size(attr);
1147 fildes = mfs_next_fd();
1148 fileinfo = mfs_get_fi(fildes);
1149 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1150 fileinfo->flengptr = inoleng_acquire(inode);
1151 fileinfo->inode = inode;
1152 fileinfo->mode = MFS_IO_FORBIDDEN;
1153 fileinfo->offset = 0;
1154 fileinfo->rdata = NULL;
1155 fileinfo->wdata = NULL;
1156 fileinfo->readers_cnt = 0;
1157 fileinfo->writers_cnt = 0;
1158 fileinfo->writing = 0;
1159
1160 inoleng_setfleng(fileinfo->flengptr,fsize);
1161 if ((oflag&O_ACCMODE) == O_RDONLY) {
1162 fileinfo->mode = MFS_IO_READONLY;
1163 fileinfo->rdata = read_data_new(inode,fsize);
1164 } else if ((oflag&O_ACCMODE) == O_WRONLY) {
1165 if (oflag&O_APPEND) {
1166 fileinfo->mode = MFS_IO_APPENDONLY;
1167 } else {
1168 fileinfo->mode = MFS_IO_WRITEONLY;
1169 }
1170 fileinfo->wdata = write_data_new(inode,fsize);
1171 } else if ((oflag&O_ACCMODE) == O_RDWR) {
1172 if (oflag&O_APPEND) {
1173 fileinfo->mode = MFS_IO_READAPPEND;
1174 } else {
1175 fileinfo->mode = MFS_IO_READWRITE;
1176 }
1177 fileinfo->rdata = read_data_new(inode,fsize);
1178 fileinfo->wdata = write_data_new(inode,fsize);
1179 }
1180 if (oflag&O_APPEND) {
1181 fileinfo->offset = fsize;
1182 }
1183 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1184 return fildes;
1185 }
1186
mfs_pread_int(file_info * fileinfo,void * buf,size_t nbyte,off_t offset)1187 static ssize_t mfs_pread_int(file_info *fileinfo,void *buf,size_t nbyte,off_t offset) {
1188 uint32_t ssize;
1189 struct iovec *iov;
1190 uint32_t iovcnt,pos,i;
1191 void *buffptr;
1192 int err;
1193
1194 if (fileinfo==NULL) {
1195 errno = EBADF;
1196 return -1;
1197 }
1198 if (offset>=MAX_FILE_SIZE || offset+nbyte>=MAX_FILE_SIZE) {
1199 errno = EFBIG;
1200 return -1;
1201 }
1202 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1203 if (fileinfo->mode==MFS_IO_WRITEONLY || fileinfo->mode==MFS_IO_APPENDONLY || fileinfo->mode==MFS_IO_FORBIDDEN) {
1204 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1205 errno = EACCES;
1206 return -1;
1207 }
1208 // rwlock_rdlock begin
1209 while (fileinfo->writing | fileinfo->writers_cnt) {
1210 zassert(pthread_cond_wait(&(fileinfo->rwcond),&(fileinfo->lock)));
1211 }
1212 fileinfo->readers_cnt++;
1213 // rwlock_rdlock_end
1214 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1215
1216 write_data_flush_inode(fileinfo->inode);
1217
1218 ssize = nbyte;
1219 fs_atime(fileinfo->inode);
1220 err = read_data(fileinfo->rdata,offset,&ssize,&buffptr,&iov,&iovcnt);
1221 fs_atime(fileinfo->inode);
1222
1223 if (err==0) {
1224 pos = 0;
1225 for (i=0 ; i<iovcnt ; i++) {
1226 memcpy((uint8_t*)buf+pos,iov[i].iov_base,iov[i].iov_len);
1227 pos += iov[i].iov_len;
1228 }
1229 }
1230 read_data_free_buff(fileinfo->rdata,buffptr,iov);
1231 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1232 // rwlock_rdunlock begin
1233 fileinfo->readers_cnt--;
1234 if (fileinfo->readers_cnt==0) {
1235 zassert(pthread_cond_broadcast(&(fileinfo->rwcond)));
1236 }
1237 // rwlock_rdunlock_end
1238 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1239 if (err!=0) {
1240 errno = err;
1241 return -1;
1242 }
1243 return ssize;
1244 }
1245
mfs_pread(int fildes,void * buf,size_t nbyte,off_t offset)1246 ssize_t mfs_pread(int fildes,void *buf,size_t nbyte,off_t offset) {
1247 return mfs_pread_int(mfs_get_fi(fildes),buf,nbyte,offset);
1248 }
1249
mfs_read(int fildes,void * buf,size_t nbyte)1250 ssize_t mfs_read(int fildes,void *buf,size_t nbyte) {
1251 ssize_t s;
1252 file_info *fileinfo;
1253 off_t offset;
1254
1255 fileinfo = mfs_get_fi(fildes);
1256 if (fileinfo==NULL) {
1257 return -1;
1258 }
1259 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1260 offset = fileinfo->offset;
1261 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1262 s = mfs_pread_int(fileinfo,buf,nbyte,offset);
1263 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1264 if (s>0) {
1265 fileinfo->offset = offset + s;
1266 }
1267 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1268 return s;
1269 }
1270
mfs_pwrite_int(file_info * fileinfo,const void * buf,size_t nbyte,off_t offset)1271 static ssize_t mfs_pwrite_int(file_info *fileinfo,const void *buf,size_t nbyte,off_t offset) {
1272 uint64_t newfleng;
1273 uint8_t appendonly;
1274 int err;
1275
1276 if (fileinfo==NULL) {
1277 errno = EBADF;
1278 return -1;
1279 }
1280 if (offset>=MAX_FILE_SIZE || offset+nbyte>=MAX_FILE_SIZE) {
1281 errno = EFBIG;
1282 return -1;
1283 }
1284 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1285 appendonly = (fileinfo->mode==MFS_IO_APPENDONLY || fileinfo->mode==MFS_IO_READAPPEND)?1:0;
1286 if (fileinfo->mode==MFS_IO_READONLY || fileinfo->mode==MFS_IO_FORBIDDEN) {
1287 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1288 errno = EACCES;
1289 return -1;
1290 }
1291 // rwlock_wrlock begin
1292 fileinfo->writers_cnt++;
1293 while (fileinfo->readers_cnt | fileinfo->writing) {
1294 zassert(pthread_cond_wait(&(fileinfo->rwcond),&(fileinfo->lock)));
1295 }
1296 fileinfo->writers_cnt--;
1297 fileinfo->writing = 1;
1298 // rwlock_wrlock end
1299
1300 err = 0;
1301 if (appendonly) {
1302 if (master_version()>=VERSION2INT(3,0,113)) {
1303 uint8_t status;
1304 uint64_t prevleng;
1305 uint32_t gid = 0;
1306 uint32_t inode = fileinfo->inode;
1307 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1308 status = do_truncate(inode,TRUNCATE_FLAG_OPENED|TRUNCATE_FLAG_UPDATE|TRUNCATE_FLAG_RESERVE,0,1,&gid,nbyte,NULL,&prevleng);
1309 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1310 if (status!=MFS_STATUS_OK) {
1311 err = mfs_errorconv(status);
1312 } else {
1313 offset = prevleng;
1314 }
1315 } else {
1316 offset = inoleng_getfleng(fileinfo->flengptr);
1317 if (offset+nbyte>=MAX_FILE_SIZE) {
1318 err = EFBIG;
1319 }
1320 }
1321 }
1322 if (err==0) {
1323 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1324 fs_mtime(fileinfo->inode);
1325 err = write_data(fileinfo->wdata,offset,nbyte,(const uint8_t*)buf,(geteuid()==0)?1:0);
1326 fs_mtime(fileinfo->inode);
1327 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1328 }
1329
1330 // rwlock_wrunlock begin
1331 fileinfo->writing = 0;
1332 zassert(pthread_cond_broadcast(&(fileinfo->rwcond)));
1333 // wrlock_wrunlock end
1334
1335 if (err!=0) {
1336 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1337 errno = err;
1338 return -1;
1339 }
1340 if ((uint64_t)(offset+nbyte)>inoleng_getfleng(fileinfo->flengptr)) {
1341 inoleng_setfleng(fileinfo->flengptr,offset+nbyte);
1342 newfleng = offset+nbyte;
1343 } else {
1344 newfleng = 0;
1345 }
1346 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1347 if (newfleng>0) {
1348 read_inode_set_length_passive(fileinfo->inode,newfleng);
1349 write_data_inode_setmaxfleng(fileinfo->inode,newfleng);
1350 finfo_change_fleng(fileinfo->inode,newfleng);
1351 }
1352 read_inode_clear_cache(fileinfo->inode,offset,nbyte);
1353 // fdcache_invalidate(fileinfo->inode);
1354 return nbyte;
1355 }
1356
mfs_pwrite(int fildes,const void * buf,size_t nbyte,off_t offset)1357 ssize_t mfs_pwrite(int fildes,const void *buf,size_t nbyte,off_t offset) {
1358 return mfs_pwrite_int(mfs_get_fi(fildes),buf,nbyte,offset);
1359 }
1360
mfs_write(int fildes,const void * buf,size_t nbyte)1361 ssize_t mfs_write(int fildes,const void *buf,size_t nbyte) {
1362 ssize_t s;
1363 file_info *fileinfo;
1364 off_t offset;
1365
1366 fileinfo = mfs_get_fi(fildes);
1367 if (fileinfo==NULL) {
1368 return -1;
1369 }
1370 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1371 offset = fileinfo->offset;
1372 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1373 s = mfs_pwrite_int(fileinfo,buf,nbyte,fileinfo->offset);
1374 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1375 if (fileinfo->mode==MFS_IO_APPENDONLY || fileinfo->mode==MFS_IO_READAPPEND) {
1376 fileinfo->offset = inoleng_getfleng(fileinfo->flengptr);
1377 } else {
1378 if (s>0) {
1379 fileinfo->offset = offset + s;
1380 }
1381 }
1382 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1383 return s;
1384 }
1385
mfs_fsync_int(file_info * fileinfo)1386 static int mfs_fsync_int(file_info *fileinfo) {
1387 int err;
1388 if (fileinfo==NULL) {
1389 errno = EBADF;
1390 return -1;
1391 }
1392 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1393 if (fileinfo->wdata!=NULL && (fileinfo->mode!=MFS_IO_READONLY && fileinfo->mode!=MFS_IO_FORBIDDEN)) {
1394 // rwlock_wrlock begin
1395 fileinfo->writers_cnt++;
1396 while (fileinfo->readers_cnt | fileinfo->writing) {
1397 zassert(pthread_cond_wait(&(fileinfo->rwcond),&(fileinfo->lock)));
1398 }
1399 fileinfo->writers_cnt--;
1400 fileinfo->writing = 1;
1401 // rwlock_wrlock end
1402 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1403
1404 err = write_data_flush(fileinfo->wdata);
1405
1406 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1407 // rwlock_wrunlock begin
1408 fileinfo->writing = 0;
1409 zassert(pthread_cond_broadcast(&(fileinfo->rwcond)));
1410 // rwlock_wrunlock end
1411 } else {
1412 err = 0;
1413 }
1414 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1415 // if (err==0) {
1416 // fdcache_invalidate(inode);
1417 // dcache_invalidate_attr(inode);
1418 // }
1419 if (err!=0) {
1420 errno = err;
1421 return -1;
1422 }
1423 return 0;
1424 }
1425
mfs_fsync(int fildes)1426 int mfs_fsync(int fildes) {
1427 return mfs_fsync_int(mfs_get_fi(fildes));
1428 }
1429
mfs_close(int fildes)1430 int mfs_close(int fildes) {
1431 file_info *fileinfo;
1432 int err;
1433
1434 fileinfo = mfs_get_fi(fildes);
1435 if (fileinfo==NULL) {
1436 errno = EBADF;
1437 return -1;
1438 }
1439 zassert(pthread_mutex_lock(&(fileinfo->lock)));
1440 // rwlock_wait_for_unlock:
1441 while (fileinfo->writing | fileinfo->writers_cnt | fileinfo->readers_cnt) {
1442 zassert(pthread_cond_wait(&(fileinfo->rwcond),&(fileinfo->lock)));
1443 }
1444 zassert(pthread_mutex_unlock(&(fileinfo->lock)));
1445 err = mfs_fsync_int(fileinfo);
1446 if (fileinfo->rdata != NULL) {
1447 read_data_end(fileinfo->rdata);
1448 fileinfo->rdata = NULL;
1449 }
1450 if (fileinfo->wdata != NULL) {
1451 write_data_end(fileinfo->wdata);
1452 fileinfo->wdata = NULL;
1453 }
1454 if (fileinfo->flengptr != NULL) {
1455 inoleng_release(fileinfo->flengptr);
1456 fileinfo->flengptr = NULL;
1457 }
1458 if (fileinfo->mode != MFS_IO_FORBIDDEN) {
1459 fs_dec_acnt(fileinfo->inode);
1460 fileinfo->mode = MFS_IO_FORBIDDEN;
1461 }
1462 mfs_free_fd(fildes);
1463 if (err!=0) {
1464 errno = err;
1465 return -1;
1466 }
1467 return 0;
1468 }
1469
mfs_flock(int fildes,int op)1470 int mfs_flock(int fildes, int op) {
1471 uint8_t lock_mode;
1472 file_info *fileinfo;
1473 uint8_t status;
1474
1475 fileinfo = mfs_get_fi(fildes);
1476 if (fileinfo==NULL) {
1477 errno = EBADF;
1478 return -1;
1479 }
1480
1481 if (op&LOCK_UN) {
1482 lock_mode = FLOCK_UNLOCK;
1483 } else if (op&LOCK_SH) {
1484 if (op&LOCK_NB) {
1485 lock_mode=FLOCK_TRY_SHARED;
1486 } else {
1487 lock_mode=FLOCK_LOCK_SHARED;
1488 }
1489 } else if (op&LOCK_EX) {
1490 if (op&LOCK_NB) {
1491 lock_mode=FLOCK_TRY_EXCLUSIVE;
1492 } else {
1493 lock_mode=FLOCK_LOCK_EXCLUSIVE;
1494 }
1495 } else {
1496 errno = EINVAL;
1497 return -1;
1498 }
1499
1500 if (lock_mode==FLOCK_UNLOCK) {
1501 mfs_fsync_int(fileinfo);
1502 }
1503
1504 status = fs_flock(fileinfo->inode,0,fildes,lock_mode);
1505 if (status!=MFS_STATUS_OK) {
1506 errno = mfs_errorconv(status);
1507 return -1;
1508 }
1509 return 0;
1510 }
1511
mfs_lockf(int fildes,int function,off_t size)1512 int mfs_lockf(int fildes, int function, off_t size) {
1513 uint64_t start,end;
1514 uint32_t pid;
1515 file_info *fileinfo;
1516 uint8_t status;
1517
1518 fileinfo = mfs_get_fi(fildes);
1519 if (fileinfo==NULL) {
1520 errno = EBADF;
1521 return -1;
1522 }
1523
1524 if (size>0) {
1525 start = fileinfo->offset;
1526 end = start+size;
1527 if (end<start) {
1528 errno = EINVAL;
1529 return -1;
1530 }
1531 } else if (size<0) {
1532 end = fileinfo->offset;
1533 start = end+size;
1534 if (end<start) {
1535 errno = EINVAL;
1536 return -1;
1537 }
1538 } else { //size = 0;
1539 start = fileinfo->offset;
1540 end = UINT64_MAX;
1541 }
1542
1543 pid = getpid();
1544 if (function==F_ULOCK) {
1545 mfs_fsync_int(fileinfo);
1546 status = fs_posixlock(fileinfo->inode,0,fildes,POSIX_LOCK_CMD_SET,POSIX_LOCK_UNLCK,start,end,pid,NULL,NULL,NULL,NULL);
1547 } else if (function==F_LOCK) {
1548 status = fs_posixlock(fileinfo->inode,0,fildes,POSIX_LOCK_CMD_SET,POSIX_LOCK_WRLCK,start,end,pid,NULL,NULL,NULL,NULL);
1549 } else if (function==F_TLOCK) {
1550 status = fs_posixlock(fileinfo->inode,0,fildes,POSIX_LOCK_CMD_TRY,POSIX_LOCK_WRLCK,start,end,pid,NULL,NULL,NULL,NULL);
1551 } else if (function==F_TEST) {
1552 status = fs_posixlock(fileinfo->inode,0,fildes,POSIX_LOCK_CMD_GET,POSIX_LOCK_WRLCK,start,end,pid,NULL,NULL,NULL,NULL);
1553 } else {
1554 errno = EINVAL;
1555 return -1;
1556 }
1557
1558 if (status!=MFS_STATUS_OK) {
1559 errno = mfs_errorconv(status);
1560 return -1;
1561 }
1562 return 0;
1563 }
1564
mfs_fcntl_locks(int fildes,int function,struct flock * fl)1565 int mfs_fcntl_locks(int fildes, int function, struct flock *fl) {
1566 uint64_t start,end,rstart,rend;
1567 uint32_t pid,rpid;
1568 uint8_t type,rtype;
1569 file_info *fileinfo;
1570 uint8_t status;
1571
1572 fileinfo = mfs_get_fi(fildes);
1573 if (fileinfo==NULL) {
1574 errno = EBADF;
1575 return -1;
1576 }
1577
1578 if (fl->l_whence==SEEK_CUR) {
1579 if (fl->l_start > fileinfo->offset) {
1580 start = 0;
1581 } else {
1582 start = fileinfo->offset + fl->l_start;
1583 }
1584 } else if (fl->l_whence==SEEK_SET) {
1585 if (fl->l_start < 0) {
1586 start = 0;
1587 } else {
1588 start = fl->l_start;
1589 }
1590 } else if (fl->l_whence==SEEK_END) {
1591 if (fl->l_start > (off_t)inoleng_getfleng(fileinfo->flengptr)) {
1592 start = 0;
1593 } else {
1594 start = inoleng_getfleng(fileinfo->flengptr) + fl->l_start;
1595 }
1596 } else {
1597 errno = EINVAL;
1598 return -1;
1599 }
1600 if (fl->l_len <= 0) {
1601 end = UINT64_MAX;
1602 } else {
1603 end = start + fl->l_len;
1604 if (end<start) {
1605 end = UINT64_MAX;
1606 }
1607 }
1608 if (fl->l_type == F_UNLCK) {
1609 type = POSIX_LOCK_UNLCK;
1610 } else if (fl->l_type == F_RDLCK) {
1611 type = POSIX_LOCK_RDLCK;
1612 } else if (fl->l_type == F_WRLCK) {
1613 type = POSIX_LOCK_WRLCK;
1614 } else {
1615 errno = EINVAL;
1616 return -1;
1617 }
1618 pid = getpid();
1619
1620 if (type==POSIX_LOCK_UNLCK) {
1621 mfs_fsync_int(fileinfo);
1622 }
1623
1624
1625 if (function==F_GETLK) {
1626 status = fs_posixlock(fileinfo->inode,0,fildes,POSIX_LOCK_CMD_GET,type,start,end,pid,&rtype,&rstart,&rend,&rpid);
1627 } else if (function==F_SETLKW) {
1628 status = fs_posixlock(fileinfo->inode,0,fildes,POSIX_LOCK_CMD_SET,type,start,end,pid,NULL,NULL,NULL,NULL);
1629 } else if (function==F_SETLK) {
1630 status = fs_posixlock(fileinfo->inode,0,fildes,POSIX_LOCK_CMD_TRY,type,start,end,pid,NULL,NULL,NULL,NULL);
1631 } else {
1632 errno = EINVAL;
1633 return -1;
1634 }
1635
1636 if (status!=MFS_STATUS_OK) {
1637 errno = mfs_errorconv(status);
1638 return -1;
1639 }
1640
1641 if (function==F_GETLK) {
1642 memset(fl,0,sizeof(struct flock));
1643 if (rtype==POSIX_LOCK_RDLCK) {
1644 fl->l_type = F_RDLCK;
1645 } else if (rtype==POSIX_LOCK_WRLCK) {
1646 fl->l_type = F_WRLCK;
1647 } else {
1648 fl->l_type = F_UNLCK;
1649 }
1650 fl->l_whence = SEEK_SET;
1651 fl->l_start = rstart;
1652 if ((rend-rstart)>INT64_MAX) {
1653 fl->l_len = 0;
1654 } else {
1655 fl->l_len = (rend - rstart);
1656 }
1657 fl->l_pid = rpid;
1658 }
1659
1660 return 0;
1661 }
1662
1663
mfs_init(mfscfg * mcfg,uint8_t stage)1664 int mfs_init(mfscfg *mcfg,uint8_t stage) {
1665 uint32_t i;
1666 md5ctx ctx;
1667 uint8_t md5pass[16];
1668
1669 if (stage==0 || stage==1) {
1670 if (mcfg->masterpassword!=NULL) {
1671 md5_init(&ctx);
1672 md5_update(&ctx,(uint8_t*)(mcfg->masterpassword),strlen(mcfg->masterpassword));
1673 md5_final(md5pass,&ctx);
1674 memset(mcfg->masterpassword,0,strlen(mcfg->masterpassword));
1675 }
1676 strerr_init();
1677 mycrc32_init();
1678 if (fs_init_master_connection(NULL,mcfg->masterhost,mcfg->masterport,0,mcfg->mountpoint,mcfg->masterpath,(mcfg->masterpassword!=NULL)?md5pass:NULL,1,0)<0) {
1679 return -1;
1680 }
1681 memset(md5pass,0,16);
1682 }
1683
1684 if (stage==0 || stage==2) {
1685 inoleng_init();
1686 conncache_init(200);
1687 chunkrwlock_init();
1688 chunksdatacache_init();
1689 fs_init_threads(mcfg->io_try_cnt,0);
1690
1691 csdb_init();
1692 delay_init();
1693 read_data_init(mcfg->read_cache_mb*1024*1024,0x200000,10*0x200000,mcfg->io_try_cnt,0,5,mcfg->error_on_lost_chunk,mcfg->error_on_no_space);
1694 write_data_init(mcfg->write_cache_mb*1024*1024,mcfg->io_try_cnt,0,5,mcfg->error_on_lost_chunk,mcfg->error_on_no_space);
1695
1696 zassert(pthread_mutex_init(&fdtablock,NULL));
1697 fdtab = malloc(sizeof(file_info)*FDTABSIZE_INIT);
1698 fdtabsize = FDTABSIZE_INIT;
1699 fdtabusemask = malloc(sizeof(uint32_t)*((FDTABSIZE_INIT+31)/32));
1700 passert(fdtab);
1701 passert(fdtabusemask);
1702 for (i=0 ; i<fdtabsize ; i++) {
1703 mfs_fi_init(fdtab+i);
1704 }
1705 memset(fdtabusemask,0,sizeof(uint32_t)*((FDTABSIZE_INIT+31)/32));
1706
1707 last_umask = umask(0);
1708 umask(last_umask);
1709
1710 if (mcfg->mkdir_copy_sgid<0) {
1711 #ifdef __linux__
1712 mkdir_copy_sgid = 1;
1713 #else
1714 mkdir_copy_sgid = 0;
1715 #endif
1716 } else {
1717 mkdir_copy_sgid = mcfg->mkdir_copy_sgid;
1718 }
1719
1720 if (mcfg->sugid_clear_mode<0) {
1721 #if defined(DEFAULT_SUGID_CLEAR_MODE_EXT)
1722 sugid_clear_mode= SUGID_CLEAR_MODE_EXT;
1723 #elif defined(DEFAULT_SUGID_CLEAR_MODE_BSD)
1724 sugid_clear_mode = SUGID_CLEAR_MODE_BSD;
1725 #elif defined(DEFAULT_SUGID_CLEAR_MODE_OSX)
1726 sugid_clear_mode = SUGID_CLEAR_MODE_OSX;
1727 #else
1728 sugid_clear_mode = SUGID_CLEAR_MODE_NEVER;
1729 #endif
1730 } else {
1731 sugid_clear_mode = mcfg->sugid_clear_mode;
1732 }
1733 }
1734
1735 return 0;
1736 }
1737
mfs_term(void)1738 void mfs_term(void) {
1739 uint32_t i;
1740 for (i=0 ; i<fdtabsize ; i++) {
1741 mfs_close(i);
1742 mfs_fi_term(fdtab+i);
1743 }
1744 free(fdtabusemask);
1745 free(fdtab);
1746 zassert(pthread_mutex_lock(&fdtablock));
1747 zassert(pthread_mutex_unlock(&fdtablock));
1748 zassert(pthread_mutex_destroy(&fdtablock));
1749 write_data_term();
1750 read_data_term();
1751 delay_term();
1752 csdb_term();
1753
1754 fs_term();
1755 chunksdatacache_term();
1756 chunkrwlock_term();
1757 conncache_term();
1758 inoleng_term();
1759 stats_term();
1760 }
1761