1 /*
2 Copyright 2005-2010 Jakub Kruszona-Zawadzki, Gemius SA, 2013-2014 EditShare, 2013-2015 Skytechnology sp. z o.o..
3
4 This file was part of MooseFS and is part of LizardFS.
5
6 LizardFS is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, version 3.
9
10 LizardFS is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with LizardFS If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "common/platform.h"
20 #include "mount/lizard_client.h"
21
22 #include <assert.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <pthread.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <cstdint>
31 #include <fstream>
32 #include <map>
33 #include <memory>
34 #include <new>
35 #include <string>
36 #include <vector>
37
38 #include "common/access_control_list.h"
39 #include "common/acl_converter.h"
40 #include "common/acl_type.h"
41 #include "common/crc.h"
42 #include "common/datapack.h"
43 #include "common/errno_defs.h"
44 #include "common/lru_cache.h"
45 #include "common/mfserr.h"
46 #include "common/richacl_converter.h"
47 #include "common/slogger.h"
48 #include "common/sockets.h"
49 #include "common/special_inode_defs.h"
50 #include "common/time_utils.h"
51 #include "devtools/request_log.h"
52 #include "mount/acl_cache.h"
53 #include "mount/chunk_locator.h"
54 #include "mount/client_common.h"
55 #include "mount/direntry_cache.h"
56 #include "mount/g_io_limiters.h"
57 #include "mount/io_limit_group.h"
58 #include "mount/mastercomm.h"
59 #include "mount/masterproxy.h"
60 #include "mount/oplog.h"
61 #include "mount/readdata.h"
62 #include "mount/special_inode.h"
63 #include "mount/stats.h"
64 #include "mount/sugid_clear_mode_string.h"
65 #include "mount/symlinkcache.h"
66 #include "mount/tweaks.h"
67 #include "mount/writedata.h"
68 #include "protocol/MFSCommunication.h"
69 #include "protocol/matocl.h"
70
71 #ifdef __APPLE__
72 #include "mount/osx_acl_converter.h"
73 #endif
74
75 #include "mount/stat_defs.h" // !!! This must be last include. Do not move !!!
76
77 namespace LizardClient {
78
79 #define MAX_FILE_SIZE (int64_t)(MFS_MAX_FILE_SIZE)
80
81 #define PKGVERSION \
82 ((LIZARDFS_PACKAGE_VERSION_MAJOR)*1000000 + \
83 (LIZARDFS_PACKAGE_VERSION_MINOR)*1000 + \
84 (LIZARDFS_PACKAGE_VERSION_MICRO))
85
86 // #define MASTER_NAME ".master"
87 // #define MASTER_INODE 0x7FFFFFFF
88 // 0x01b6 == 0666
89 // static uint8_t masterattr[35]={'f', 0x01,0xB6, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,0,0,0,0};
90
91 #define IS_SPECIAL_INODE(ino) ((ino)>=SPECIAL_INODE_BASE)
92 #define IS_SPECIAL_NAME(name) ((name)[0]=='.' && (strcmp(SPECIAL_FILE_NAME_STATS,(name))==0 \
93 || strcmp(SPECIAL_FILE_NAME_MASTERINFO,(name))==0 || strcmp(SPECIAL_FILE_NAME_OPLOG,(name))==0 \
94 || strcmp(SPECIAL_FILE_NAME_OPHISTORY,(name))==0 || strcmp(SPECIAL_FILE_NAME_TWEAKS,(name))==0 \
95 || strcmp(SPECIAL_FILE_NAME_FILE_BY_INODE,(name))==0))
96
97 static GroupCache gGroupCache;
98
99 static void update_credentials(Context::IdType index, const GroupCache::Groups &groups);
100
101 #define RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, group_id, function_expression) \
102 do { \
103 const uint32_t kSecondaryGroupsBit = (uint32_t)1 << 31; \
104 status = function_expression; \
105 if (status == LIZARDFS_ERROR_GROUPNOTREGISTERED) { \
106 uint32_t index = group_id ^ kSecondaryGroupsBit; \
107 GroupCache::Groups groups = gGroupCache.findByIndex(index); \
108 if (!groups.empty()) { \
109 update_credentials(index, groups); \
110 status = function_expression; \
111 } \
112 } \
113 } while (0);
114
updateGroups(Context & ctx)115 void updateGroups(Context &ctx) {
116 static const uint32_t kSecondaryGroupsBit = (uint32_t)1 << 31;
117
118 if (ctx.gids.empty()) {
119 return;
120 }
121
122 if (ctx.gids.size() == 1) {
123 ctx.gid = ctx.gids[0];
124 return;
125 }
126
127 static_assert(sizeof(Context::IdType) >= sizeof(uint32_t), "IdType too small");
128
129 auto result = gGroupCache.find(ctx.gids);
130 Context::IdType gid = 0;
131 if (result.found == false) {
132 try {
133 uint32_t index = gGroupCache.put(ctx.gids);
134 update_credentials(index, ctx.gids);
135 gid = index | kSecondaryGroupsBit;
136 } catch (RequestException &e) {
137 lzfs_pretty_syslog(LOG_ERR, "Cannot update groups: %d", e.system_error_code);
138 }
139 } else {
140 gid = result.index | kSecondaryGroupsBit;
141 }
142
143 ctx.gid = gid;
144 }
145
getSpecialInodeByName(const char * name)146 Inode getSpecialInodeByName(const char *name) {
147 assert(name);
148
149 while (name[0] == '/') {
150 ++name;
151 }
152
153 if (strcmp(name, SPECIAL_FILE_NAME_MASTERINFO) == 0) {
154 return SPECIAL_INODE_MASTERINFO;
155 } else if (strcmp(name, SPECIAL_FILE_NAME_STATS) == 0) {
156 return SPECIAL_INODE_STATS;
157 } else if (strcmp(name, SPECIAL_FILE_NAME_TWEAKS) == 0) {
158 return SPECIAL_INODE_TWEAKS;
159 } else if (strcmp(name, SPECIAL_FILE_NAME_OPLOG) == 0) {
160 return SPECIAL_INODE_OPLOG;
161 } else if (strcmp(name, SPECIAL_FILE_NAME_OPHISTORY) == 0) {
162 return SPECIAL_INODE_OPHISTORY;
163 } else if (strcmp(name, SPECIAL_FILE_NAME_FILE_BY_INODE) == 0) {
164 return SPECIAL_INODE_FILE_BY_INODE;
165 } else {
166 return MAX_REGULAR_INODE;
167 }
168 }
169
isSpecialInode(Inode ino)170 bool isSpecialInode(Inode ino) {
171 return IS_SPECIAL_INODE(ino);
172 }
173
174 enum {IO_NONE,IO_READ,IO_WRITE,IO_READONLY,IO_WRITEONLY};
175
176 typedef struct _finfo {
177 uint8_t mode;
178 void *data;
179 uint8_t use_flocks;
180 uint8_t use_posixlocks;
181 pthread_mutex_t lock;
182 pthread_mutex_t flushlock;
183 } finfo;
184
185 static DirEntryCache gDirEntryCache;
186 static unsigned gDirEntryCacheMaxSize = 100000;
187
188 static int debug_mode = 0;
189 static int usedircache = 1;
190 static int keep_cache = 0;
191 static double direntry_cache_timeout = 0.1;
192 static double entry_cache_timeout = 0.0;
193 static double attr_cache_timeout = 0.1;
194 static int mkdir_copy_sgid = 0;
195 static int sugid_clear_mode = 0;
196 bool use_rwlock = 0;
197 static std::atomic<bool> gDirectIo(false);
198
199 // lock_request_counter shared by flock and setlk
200 static uint32_t lock_request_counter = 0;
201 static std::mutex lock_request_mutex;
202
203
204 static std::unique_ptr<AclCache> acl_cache;
205
eraseAclCache(Inode inode)206 inline void eraseAclCache(Inode inode) {
207 acl_cache->erase(
208 inode , 0, 0,
209 inode + 1, 0, 0);
210 }
211
212 // TODO consider making oplog_printf asynchronous
213
214 /**
215 * A wrapper around pthread_rwlock, acquiring a lock during construction and releasing it during
216 * destruction in case if the lock wasn't released beforehand.
217 */
218 struct PthreadRwLockWrapper {
PthreadRwLockWrapperLizardClient::PthreadRwLockWrapper219 PthreadRwLockWrapper(pthread_rwlock_t& mutex, bool write = true)
220 : rwlock_(mutex), locked_(false) {
221 lock(write);
222 }
223
~PthreadRwLockWrapperLizardClient::PthreadRwLockWrapper224 ~PthreadRwLockWrapper() {
225 if (locked_) {
226 unlock();
227 }
228 }
229
lockLizardClient::PthreadRwLockWrapper230 void lock(bool write = true) {
231 sassert(!locked_);
232 if (write) {
233 pthread_rwlock_wrlock(&rwlock_);
234 } else {
235 pthread_rwlock_rdlock(&rwlock_);
236 }
237 locked_ = true;
238 }
unlockLizardClient::PthreadRwLockWrapper239 void unlock() {
240 sassert(locked_);
241 locked_ = false;
242 pthread_rwlock_unlock(&rwlock_);
243 }
244
245 private:
246 pthread_rwlock_t& rwlock_;
247 bool locked_;
248 };
249
250 static uint64_t *statsptr[STATNODES];
251
statsptr_init(void)252 void statsptr_init(void) {
253 void *s;
254 s = stats_get_subnode(NULL,"fuse_ops",0);
255 statsptr[OP_SETXATTR] = stats_get_counterptr(stats_get_subnode(s,"setxattr",0));
256 statsptr[OP_GETXATTR] = stats_get_counterptr(stats_get_subnode(s,"getxattr",0));
257 statsptr[OP_LISTXATTR] = stats_get_counterptr(stats_get_subnode(s,"listxattr",0));
258 statsptr[OP_REMOVEXATTR] = stats_get_counterptr(stats_get_subnode(s,"removexattr",0));
259 statsptr[OP_FSYNC] = stats_get_counterptr(stats_get_subnode(s,"fsync",0));
260 statsptr[OP_FLUSH] = stats_get_counterptr(stats_get_subnode(s,"flush",0));
261 statsptr[OP_WRITE] = stats_get_counterptr(stats_get_subnode(s,"write",0));
262 statsptr[OP_READ] = stats_get_counterptr(stats_get_subnode(s,"read",0));
263 statsptr[OP_RELEASE] = stats_get_counterptr(stats_get_subnode(s,"release",0));
264 statsptr[OP_OPEN] = stats_get_counterptr(stats_get_subnode(s,"open",0));
265 statsptr[OP_CREATE] = stats_get_counterptr(stats_get_subnode(s,"create",0));
266 statsptr[OP_RELEASEDIR] = stats_get_counterptr(stats_get_subnode(s,"releasedir",0));
267 statsptr[OP_READDIR] = stats_get_counterptr(stats_get_subnode(s,"readdir",0));
268 statsptr[OP_READRESERVED] = stats_get_counterptr(stats_get_subnode(s,"readreserved",0));
269 statsptr[OP_READTRASH] = stats_get_counterptr(stats_get_subnode(s,"readtrash",0));
270 statsptr[OP_OPENDIR] = stats_get_counterptr(stats_get_subnode(s,"opendir",0));
271 statsptr[OP_LINK] = stats_get_counterptr(stats_get_subnode(s,"link",0));
272 statsptr[OP_RENAME] = stats_get_counterptr(stats_get_subnode(s,"rename",0));
273 statsptr[OP_READLINK] = stats_get_counterptr(stats_get_subnode(s,"readlink",0));
274 statsptr[OP_READLINK_CACHED] = stats_get_counterptr(stats_get_subnode(s,"readlink-cached",0));
275 statsptr[OP_SYMLINK] = stats_get_counterptr(stats_get_subnode(s,"symlink",0));
276 statsptr[OP_RMDIR] = stats_get_counterptr(stats_get_subnode(s,"rmdir",0));
277 statsptr[OP_MKDIR] = stats_get_counterptr(stats_get_subnode(s,"mkdir",0));
278 statsptr[OP_UNLINK] = stats_get_counterptr(stats_get_subnode(s,"unlink",0));
279 statsptr[OP_UNDEL] = stats_get_counterptr(stats_get_subnode(s,"undel",0));
280 statsptr[OP_MKNOD] = stats_get_counterptr(stats_get_subnode(s,"mknod",0));
281 statsptr[OP_SETATTR] = stats_get_counterptr(stats_get_subnode(s,"setattr",0));
282 statsptr[OP_GETATTR] = stats_get_counterptr(stats_get_subnode(s,"getattr",0));
283 statsptr[OP_DIRCACHE_GETATTR] = stats_get_counterptr(stats_get_subnode(s,"getattr-cached",0));
284 statsptr[OP_LOOKUP] = stats_get_counterptr(stats_get_subnode(s,"lookup",0));
285 statsptr[OP_LOOKUP_INTERNAL] = stats_get_counterptr(stats_get_subnode(s,"lookup-internal",0));
286 if (usedircache) {
287 statsptr[OP_DIRCACHE_LOOKUP] = stats_get_counterptr(stats_get_subnode(s,"lookup-cached",0));
288 }
289 statsptr[OP_ACCESS] = stats_get_counterptr(stats_get_subnode(s,"access",0));
290 statsptr[OP_STATFS] = stats_get_counterptr(stats_get_subnode(s,"statfs",0));
291 if (usedircache) {
292 statsptr[OP_GETDIR_FULL] = stats_get_counterptr(stats_get_subnode(s,"getdir-full",0));
293 } else {
294 statsptr[OP_GETDIR_SMALL] = stats_get_counterptr(stats_get_subnode(s,"getdir-small",0));
295 }
296 statsptr[OP_GETLK] = stats_get_counterptr(stats_get_subnode(s,"getlk",0));
297 statsptr[OP_SETLK] = stats_get_counterptr(stats_get_subnode(s,"setlk",0));
298 statsptr[OP_FLOCK] = stats_get_counterptr(stats_get_subnode(s,"flock",0));
299 }
300
stats_inc(uint8_t id)301 void stats_inc(uint8_t id) {
302 if (id < STATNODES) {
303 stats_lock();
304 (*statsptr[id])++;
305 stats_unlock();
306 }
307 }
308
type_to_stat(uint32_t inode,uint8_t type,struct stat * stbuf)309 void type_to_stat(uint32_t inode,uint8_t type, struct stat *stbuf) {
310 memset(stbuf,0,sizeof(struct stat));
311 stbuf->st_ino = inode;
312 switch (type) {
313 case TYPE_DIRECTORY:
314 stbuf->st_mode = S_IFDIR;
315 break;
316 case TYPE_SYMLINK:
317 stbuf->st_mode = S_IFLNK;
318 break;
319 case TYPE_FILE:
320 stbuf->st_mode = S_IFREG;
321 break;
322 case TYPE_FIFO:
323 stbuf->st_mode = S_IFIFO;
324 break;
325 case TYPE_SOCKET:
326 stbuf->st_mode = S_IFSOCK;
327 break;
328 case TYPE_BLOCKDEV:
329 stbuf->st_mode = S_IFBLK;
330 break;
331 case TYPE_CHARDEV:
332 stbuf->st_mode = S_IFCHR;
333 break;
334 default:
335 stbuf->st_mode = 0;
336 }
337 }
338
attr_get_mattr(const Attributes & attr)339 uint8_t attr_get_mattr(const Attributes &attr) {
340 return (attr[1]>>4); // higher 4 bits of mode
341 }
342
attr_to_stat(uint32_t inode,const Attributes & attr,struct stat * stbuf)343 void attr_to_stat(uint32_t inode, const Attributes &attr, struct stat *stbuf) {
344 uint16_t attrmode;
345 uint8_t attrtype;
346 uint32_t attruid,attrgid,attratime,attrmtime,attrctime,attrnlink,attrrdev;
347 uint64_t attrlength;
348 const uint8_t *ptr;
349 ptr = attr.data();
350 attrtype = get8bit(&ptr);
351 attrmode = get16bit(&ptr);
352 attruid = get32bit(&ptr);
353 attrgid = get32bit(&ptr);
354 attratime = get32bit(&ptr);
355 attrmtime = get32bit(&ptr);
356 attrctime = get32bit(&ptr);
357 attrnlink = get32bit(&ptr);
358 memset(stbuf, 0, sizeof(*stbuf));
359 stbuf->st_ino = inode;
360 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLKSIZE
361 stbuf->st_blksize = MFSBLOCKSIZE;
362 #endif
363 switch (attrtype) {
364 case TYPE_DIRECTORY:
365 stbuf->st_mode = S_IFDIR | (attrmode & 07777);
366 attrlength = get64bit(&ptr);
367 stbuf->st_size = attrlength;
368 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
369 stbuf->st_blocks = (attrlength+511)/512;
370 #endif
371 break;
372 case TYPE_SYMLINK:
373 stbuf->st_mode = S_IFLNK | (attrmode & 07777);
374 attrlength = get64bit(&ptr);
375 stbuf->st_size = attrlength;
376 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
377 stbuf->st_blocks = (attrlength+511)/512;
378 #endif
379 break;
380 case TYPE_FILE:
381 stbuf->st_mode = S_IFREG | (attrmode & 07777);
382 attrlength = get64bit(&ptr);
383 stbuf->st_size = attrlength;
384 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
385 stbuf->st_blocks = (attrlength+511)/512;
386 #endif
387 break;
388 case TYPE_FIFO:
389 stbuf->st_mode = S_IFIFO | (attrmode & 07777);
390 stbuf->st_size = 0;
391 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
392 stbuf->st_blocks = 0;
393 #endif
394 break;
395 case TYPE_SOCKET:
396 stbuf->st_mode = S_IFSOCK | (attrmode & 07777);
397 stbuf->st_size = 0;
398 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
399 stbuf->st_blocks = 0;
400 #endif
401 break;
402 case TYPE_BLOCKDEV:
403 stbuf->st_mode = S_IFBLK | (attrmode & 07777);
404 attrrdev = get32bit(&ptr);
405 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_RDEV
406 stbuf->st_rdev = attrrdev;
407 #endif
408 stbuf->st_size = 0;
409 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
410 stbuf->st_blocks = 0;
411 #endif
412 break;
413 case TYPE_CHARDEV:
414 stbuf->st_mode = S_IFCHR | (attrmode & 07777);
415 attrrdev = get32bit(&ptr);
416 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_RDEV
417 stbuf->st_rdev = attrrdev;
418 #endif
419 stbuf->st_size = 0;
420 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
421 stbuf->st_blocks = 0;
422 #endif
423 break;
424 default:
425 stbuf->st_mode = 0;
426 }
427 stbuf->st_uid = attruid;
428 stbuf->st_gid = attrgid;
429 stbuf->st_atime = attratime;
430 stbuf->st_mtime = attrmtime;
431 stbuf->st_ctime = attrctime;
432 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BIRTHTIME
433 stbuf->st_birthtime = attrctime; // for future use
434 #endif
435 stbuf->st_nlink = attrnlink;
436 }
437
makemodestr(char modestr[11],uint16_t mode)438 void makemodestr(char modestr[11],uint16_t mode) {
439 uint32_t i;
440 strcpy(modestr,"?rwxrwxrwx");
441 switch (mode & S_IFMT) {
442 case S_IFSOCK:
443 modestr[0] = 's';
444 break;
445 case S_IFLNK:
446 modestr[0] = 'l';
447 break;
448 case S_IFREG:
449 modestr[0] = '-';
450 break;
451 case S_IFBLK:
452 modestr[0] = 'b';
453 break;
454 case S_IFDIR:
455 modestr[0] = 'd';
456 break;
457 case S_IFCHR:
458 modestr[0] = 'c';
459 break;
460 case S_IFIFO:
461 modestr[0] = 'f';
462 break;
463 }
464 if (mode & S_ISUID) {
465 modestr[3] = 's';
466 }
467 if (mode & S_ISGID) {
468 modestr[6] = 's';
469 }
470 if (mode & S_ISVTX) {
471 modestr[9] = 't';
472 }
473 for (i=0 ; i<9 ; i++) {
474 if ((mode & (1<<i))==0) {
475 if (modestr[9-i]=='s' || modestr[9-i]=='t') {
476 modestr[9-i]&=0xDF;
477 } else {
478 modestr[9-i]='-';
479 }
480 }
481 }
482 }
483
makeattrstr(char * buff,uint32_t size,struct stat * stbuf)484 void makeattrstr(char *buff,uint32_t size,struct stat *stbuf) {
485 char modestr[11];
486 makemodestr(modestr,stbuf->st_mode);
487 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_RDEV
488 if (modestr[0]=='b' || modestr[0]=='c') {
489 snprintf(buff,size,"[%s:0%06o,%u,%ld,%ld,%lu,%lu,%lu,%" PRIu64 ",%08lX]",modestr,(unsigned int)(stbuf->st_mode),(unsigned int)(stbuf->st_nlink),(long int)stbuf->st_uid,(long int)stbuf->st_gid,(unsigned long int)(stbuf->st_atime),(unsigned long int)(stbuf->st_mtime),(unsigned long int)(stbuf->st_ctime),(uint64_t)(stbuf->st_size),(unsigned long int)(stbuf->st_rdev));
490 } else {
491 snprintf(buff,size,"[%s:0%06o,%u,%ld,%ld,%lu,%lu,%lu,%" PRIu64 "]",modestr,(unsigned int)(stbuf->st_mode),(unsigned int)(stbuf->st_nlink),(long int)stbuf->st_uid,(long int)stbuf->st_gid,(unsigned long int)(stbuf->st_atime),(unsigned long int)(stbuf->st_mtime),(unsigned long int)(stbuf->st_ctime),(uint64_t)(stbuf->st_size));
492 }
493 #else
494 snprintf(buff,size,"[%s:0%06o,%u,%ld,%ld,%lu,%lu,%lu,%" PRIu64 "]",modestr,(unsigned int)(stbuf->st_mode),(unsigned int)(stbuf->st_nlink),(long int)stbuf->st_uid,(long int)stbuf->st_gid,(unsigned long int)(stbuf->st_atime),(unsigned long int)(stbuf->st_mtime),(unsigned long int)(stbuf->st_ctime),(uint64_t)(stbuf->st_size));
495 #endif
496 }
497
RequestException(int error_code)498 RequestException::RequestException(int error_code) : system_error_code(), lizardfs_error_code() {
499 assert(error_code != LIZARDFS_STATUS_OK);
500
501 lizardfs_error_code = error_code;
502 system_error_code = lizardfs_error_conv(error_code);
503 if (debug_mode) {
504 lzfs::log_debug("status: {}", lizardfs_error_string(error_code));
505 }
506 }
507
statfs(const Context & ctx,Inode ino)508 struct statvfs statfs(const Context &ctx, Inode ino) {
509 uint64_t totalspace,availspace,trashspace,reservedspace;
510 uint32_t inodes;
511 uint32_t bsize;
512 struct statvfs stfsbuf;
513 memset(&stfsbuf,0,sizeof(stfsbuf));
514
515 stats_inc(OP_STATFS);
516 if (debug_mode) {
517 oplog_printf(ctx, "statfs (%lu)", (unsigned long int)ino);
518 }
519 (void)ino;
520 fs_statfs(&totalspace,&availspace,&trashspace,&reservedspace,&inodes);
521
522 #if defined(__APPLE__)
523 if (totalspace>0x0001000000000000ULL) {
524 bsize = 0x20000;
525 } else {
526 bsize = 0x10000;
527 }
528 #else
529 bsize = 0x10000;
530 #endif
531
532 stfsbuf.f_namemax = MFS_NAME_MAX;
533 stfsbuf.f_frsize = bsize;
534 stfsbuf.f_bsize = bsize;
535 #if defined(__APPLE__)
536 // FUSE on apple (or other parts of kernel) expects 32-bit values, so it's better to saturate this values than let being cut on 32-bit
537 // can't change bsize also because 64k seems to be the biggest acceptable value for bsize
538
539 if (totalspace/bsize>0xFFFFFFFFU) {
540 stfsbuf.f_blocks = 0xFFFFFFFFU;
541 } else {
542 stfsbuf.f_blocks = totalspace/bsize;
543 }
544 if (availspace/bsize>0xFFFFFFFFU) {
545 stfsbuf.f_bfree = 0xFFFFFFFFU;
546 stfsbuf.f_bavail = 0xFFFFFFFFU;
547 } else {
548 stfsbuf.f_bfree = availspace/bsize;
549 stfsbuf.f_bavail = availspace/bsize;
550 }
551 #else
552 stfsbuf.f_blocks = totalspace/bsize;
553 stfsbuf.f_bfree = availspace/bsize;
554 stfsbuf.f_bavail = availspace/bsize;
555 #endif
556 stfsbuf.f_files = MAX_REGULAR_INODE;
557 stfsbuf.f_ffree = MAX_REGULAR_INODE - inodes;
558 stfsbuf.f_favail = MAX_REGULAR_INODE - inodes;
559 //stfsbuf.f_flag = ST_RDONLY;
560 oplog_printf(ctx, "statfs (%lu): OK (%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu32 ")",
561 (unsigned long int)ino,
562 totalspace,
563 availspace,
564 trashspace,
565 reservedspace,
566 inodes);
567 return stfsbuf;
568 }
569
access(const Context & ctx,Inode ino,int mask)570 void access(const Context &ctx, Inode ino, int mask) {
571 int status;
572
573 int mmode;
574
575 oplog_printf(ctx, "access (%lu,0x%X)",
576 (unsigned long int)ino,
577 mask);
578 stats_inc(OP_ACCESS);
579 #if (R_OK==MODE_MASK_R) && (W_OK==MODE_MASK_W) && (X_OK==MODE_MASK_X)
580 mmode = mask & (MODE_MASK_R | MODE_MASK_W | MODE_MASK_X);
581 #else
582 mmode = 0;
583 if (mask & R_OK) {
584 mmode |= MODE_MASK_R;
585 }
586 if (mask & W_OK) {
587 mmode |= MODE_MASK_W;
588 }
589 if (mask & X_OK) {
590 mmode |= MODE_MASK_X;
591 }
592 #endif
593 if (IS_SPECIAL_INODE(ino)) {
594 if (mask & (W_OK | X_OK)) {
595 throw RequestException(LIZARDFS_ERROR_EACCES);
596 }
597 return;
598 }
599 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
600 fs_access(ino,ctx.uid,ctx.gid,mmode));
601 if (status != LIZARDFS_STATUS_OK) {
602 throw RequestException(status);
603 }
604 }
605
lookup(const Context & ctx,Inode parent,const char * name)606 EntryParam lookup(const Context &ctx, Inode parent, const char *name) {
607 EntryParam e;
608 uint64_t maxfleng;
609 uint32_t inode;
610 uint32_t nleng;
611 Attributes attr;
612 char attrstr[256];
613 uint8_t mattr;
614 uint8_t icacheflag;
615 int status;
616
617 if (debug_mode) {
618 oplog_printf(ctx, "lookup (%lu,%s) ...", (unsigned long int)parent, name);
619 }
620 nleng = strlen(name);
621 if (nleng > MFS_NAME_MAX) {
622 stats_inc(OP_LOOKUP);
623 oplog_printf(ctx, "lookup (%lu,%s): %s",
624 (unsigned long int)parent,
625 name,
626 lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
627 throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
628 }
629 if (parent == SPECIAL_INODE_ROOT) {
630 if (nleng == 2 && name[0] == '.' && name[1] == '.') {
631 nleng = 1;
632 }
633
634 Inode ino = getSpecialInodeByName(name);
635 if (IS_SPECIAL_INODE(ino)) {
636 return special_lookup(ino, ctx, parent, name, attrstr);
637 }
638 }
639 if (parent == SPECIAL_INODE_FILE_BY_INODE) {
640 char *endptr = nullptr;
641 inode = strtol(name, &endptr, 10);
642 if (endptr == nullptr || *endptr != '\0') {
643 throw RequestException(LIZARDFS_ERROR_EINVAL);
644 }
645 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
646 fs_getattr(inode, ctx.uid, ctx.gid, attr));
647 icacheflag = 0;
648 } else if (usedircache && gDirEntryCache.lookup(ctx,parent,std::string(name,nleng),inode,attr)) {
649 if (debug_mode) {
650 lzfs::log_debug("lookup: sending data from dircache");
651 }
652 stats_inc(OP_DIRCACHE_LOOKUP);
653 status = 0;
654 icacheflag = 1;
655 // oplog_printf(ctx, "lookup (%lu,%s) (using open dir cache): OK (%lu)",(unsigned long int)parent,name,(unsigned long int)inode);
656 } else {
657 stats_inc(OP_LOOKUP);
658 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
659 fs_lookup(parent, std::string(name, nleng), ctx.uid, ctx.gid, &inode, attr));
660 icacheflag = 0;
661 }
662 if (status != LIZARDFS_STATUS_OK) {
663 oplog_printf(ctx, "lookup (%lu,%s): %s",
664 (unsigned long int)parent,
665 name,
666 lizardfs_error_string(status));
667 throw RequestException(status);
668 }
669 if (attr[0]==TYPE_FILE) {
670 maxfleng = write_data_getmaxfleng(inode);
671 } else {
672 maxfleng = 0;
673 }
674 e.ino = inode;
675 mattr = attr_get_mattr(attr);
676 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
677 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:((attr[0]==TYPE_DIRECTORY)?direntry_cache_timeout:entry_cache_timeout);
678 attr_to_stat(inode,attr,&e.attr);
679 if (maxfleng>(uint64_t)(e.attr.st_size)) {
680 e.attr.st_size=maxfleng;
681 }
682 makeattrstr(attrstr,256,&e.attr);
683 oplog_printf(ctx, "lookup (%lu,%s)%s: OK (%.1f,%lu,%.1f,%s)",
684 (unsigned long int)parent,
685 name,
686 icacheflag?" (using open dir cache)":"",
687 e.entry_timeout,
688 (unsigned long int)e.ino,
689 e.attr_timeout,
690 attrstr);
691 return e;
692 }
693
getattr(const Context & ctx,Inode ino)694 AttrReply getattr(const Context &ctx, Inode ino) {
695 uint64_t maxfleng;
696 double attr_timeout;
697 struct stat o_stbuf;
698 Attributes attr;
699 char attrstr[256];
700 int status;
701
702 if (debug_mode) {
703 oplog_printf(ctx, "getattr (%lu) ...", (unsigned long int)ino);
704 }
705
706 if (IS_SPECIAL_INODE(ino)) {
707 return special_getattr(ino, ctx, attrstr);
708 }
709
710 maxfleng = write_data_getmaxfleng(ino);
711 if (usedircache && gDirEntryCache.lookup(ctx,ino,attr)) {
712 if (debug_mode) {
713 lzfs::log_debug("getattr: sending data from dircache\n");
714 }
715 stats_inc(OP_DIRCACHE_GETATTR);
716 status = LIZARDFS_STATUS_OK;
717 } else {
718 stats_inc(OP_GETATTR);
719 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
720 fs_getattr(ino,ctx.uid,ctx.gid,attr));
721 }
722 if (status != LIZARDFS_STATUS_OK) {
723 oplog_printf(ctx, "getattr (%lu): %s",
724 (unsigned long int)ino,
725 lizardfs_error_string(status));
726 throw RequestException(status);
727 }
728 memset(&o_stbuf, 0, sizeof(struct stat));
729 attr_to_stat(ino,attr,&o_stbuf);
730 if (attr[0]==TYPE_FILE && maxfleng>(uint64_t)(o_stbuf.st_size)) {
731 o_stbuf.st_size=maxfleng;
732 }
733 attr_timeout = (attr_get_mattr(attr)&MATTR_NOACACHE)?0.0:attr_cache_timeout;
734 makeattrstr(attrstr,256,&o_stbuf);
735 oplog_printf(ctx, "getattr (%lu): OK (%.1f,%s)",
736 (unsigned long int)ino,
737 attr_timeout,
738 attrstr);
739 return AttrReply{o_stbuf, attr_timeout};
740 }
741
setattr(const Context & ctx,Inode ino,struct stat * stbuf,int to_set)742 AttrReply setattr(const Context &ctx, Inode ino, struct stat *stbuf, int to_set) {
743 struct stat o_stbuf;
744 uint64_t maxfleng;
745 Attributes attr;
746 char modestr[11];
747 char attrstr[256];
748 double attr_timeout;
749 int status;
750
751 makemodestr(modestr,stbuf->st_mode);
752 stats_inc(OP_SETATTR);
753 if (debug_mode) {
754 oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]) ...",
755 (unsigned long int)ino,
756 to_set,
757 modestr+1,
758 (unsigned int)(stbuf->st_mode & 07777),
759 (long int)stbuf->st_uid,
760 (long int)stbuf->st_gid,
761 (unsigned long int)(stbuf->st_atime),
762 (unsigned long int)(stbuf->st_mtime),
763 (uint64_t)(stbuf->st_size));
764 }
765
766 if (IS_SPECIAL_INODE(ino)) {
767 return special_setattr(ino, ctx, stbuf, to_set, modestr, attrstr);
768 }
769
770 status = LIZARDFS_ERROR_EINVAL;
771 maxfleng = write_data_getmaxfleng(ino);
772 if ((to_set & (LIZARDFS_SET_ATTR_MODE
773 | LIZARDFS_SET_ATTR_UID
774 | LIZARDFS_SET_ATTR_GID
775 | LIZARDFS_SET_ATTR_ATIME
776 | LIZARDFS_SET_ATTR_ATIME_NOW
777 | LIZARDFS_SET_ATTR_MTIME
778 | LIZARDFS_SET_ATTR_MTIME_NOW
779 | LIZARDFS_SET_ATTR_SIZE)) == 0) { // change other flags or change nothing
780 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
781 fs_setattr(ino,ctx.uid,ctx.gid,0,0,0,0,0,0,0,attr)); // ext3 compatibility - change ctime during this operation (usually chown(-1,-1))
782 if (status != LIZARDFS_STATUS_OK) {
783 oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
784 (unsigned long int)ino,
785 to_set,
786 modestr+1,
787 (unsigned int)(stbuf->st_mode & 07777),
788 (long int)stbuf->st_uid,
789 (long int)stbuf->st_gid,
790 (unsigned long int)(stbuf->st_atime),
791 (unsigned long int)(stbuf->st_mtime),
792 (uint64_t)(stbuf->st_size),
793 lizardfs_error_string(status));
794 throw RequestException(status);
795 }
796 }
797 if (to_set & (LIZARDFS_SET_ATTR_MODE
798 | LIZARDFS_SET_ATTR_UID
799 | LIZARDFS_SET_ATTR_GID
800 | LIZARDFS_SET_ATTR_ATIME
801 | LIZARDFS_SET_ATTR_MTIME
802 | LIZARDFS_SET_ATTR_ATIME_NOW
803 | LIZARDFS_SET_ATTR_MTIME_NOW)) {
804 uint8_t setmask = 0;
805 if (to_set & LIZARDFS_SET_ATTR_MODE) {
806 setmask |= SET_MODE_FLAG;
807 }
808 if (to_set & LIZARDFS_SET_ATTR_UID) {
809 setmask |= SET_UID_FLAG;
810 }
811 if (to_set & LIZARDFS_SET_ATTR_GID) {
812 setmask |= SET_GID_FLAG;
813 }
814 if (to_set & LIZARDFS_SET_ATTR_ATIME) {
815 setmask |= SET_ATIME_FLAG;
816 }
817 if (to_set & LIZARDFS_SET_ATTR_ATIME_NOW) {
818 setmask |= SET_ATIME_NOW_FLAG;
819 }
820 if (to_set & LIZARDFS_SET_ATTR_MTIME) {
821 setmask |= SET_MTIME_FLAG;
822 }
823 if (to_set & LIZARDFS_SET_ATTR_MTIME_NOW) {
824 setmask |= SET_MTIME_NOW_FLAG;
825 }
826 if (to_set & (LIZARDFS_SET_ATTR_MTIME | LIZARDFS_SET_ATTR_MTIME_NOW)) {
827 // in this case we want flush all pending writes because they could overwrite mtime
828 write_data_flush_inode(ino);
829 }
830 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
831 fs_setattr(ino,ctx.uid,ctx.gid,setmask,stbuf->st_mode&07777,stbuf->st_uid,stbuf->st_gid,stbuf->st_atime,stbuf->st_mtime,sugid_clear_mode,attr));
832 if (to_set & (LIZARDFS_SET_ATTR_MODE | LIZARDFS_SET_ATTR_UID | LIZARDFS_SET_ATTR_GID)) {
833 eraseAclCache(ino);
834 }
835 if (status != LIZARDFS_STATUS_OK) {
836 oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
837 (unsigned long int)ino,
838 to_set,
839 modestr+1,
840 (unsigned int)(stbuf->st_mode & 07777),
841 (long int)stbuf->st_uid,
842 (long int)stbuf->st_gid,
843 (unsigned long int)(stbuf->st_atime),
844 (unsigned long int)(stbuf->st_mtime),
845 (uint64_t)(stbuf->st_size),
846 lizardfs_error_string(status));
847 throw RequestException(status);
848 }
849 }
850 if (to_set & LIZARDFS_SET_ATTR_SIZE) {
851 if (stbuf->st_size<0) {
852 oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
853 (unsigned long int)ino,
854 to_set,
855 modestr+1,
856 (unsigned int)(stbuf->st_mode & 07777),
857 (long int)stbuf->st_uid,
858 (long int)stbuf->st_gid,
859 (unsigned long int)(stbuf->st_atime),
860 (unsigned long int)(stbuf->st_mtime),
861 (uint64_t)(stbuf->st_size),
862 lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
863 throw RequestException(LIZARDFS_ERROR_EINVAL);
864 }
865 if (stbuf->st_size>=MAX_FILE_SIZE) {
866 oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
867 (unsigned long int)ino,
868 to_set,
869 modestr+1,
870 (unsigned int)(stbuf->st_mode & 07777),
871 (long int)stbuf->st_uid,
872 (long int)stbuf->st_gid,
873 (unsigned long int)(stbuf->st_atime),
874 (unsigned long int)(stbuf->st_mtime),
875 (uint64_t)(stbuf->st_size),
876 lizardfs_error_string(LIZARDFS_ERROR_EFBIG));
877 throw RequestException(LIZARDFS_ERROR_EFBIG);
878 }
879 try {
880 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
881 write_data_truncate(ino, false, ctx.uid, ctx.gid, stbuf->st_size, attr));
882 maxfleng = 0; // after the flush master server has valid length, don't use our length cache
883 } catch (Exception& ex) {
884 status = ex.status();
885 }
886 read_inode_ops(ino);
887 if (status != LIZARDFS_STATUS_OK) {
888 oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
889 (unsigned long int)ino,
890 to_set,
891 modestr+1,
892 (unsigned int)(stbuf->st_mode & 07777),
893 (long int)stbuf->st_uid,
894 (long int)stbuf->st_gid,
895 (unsigned long int)(stbuf->st_atime),
896 (unsigned long int)(stbuf->st_mtime),
897 (uint64_t)(stbuf->st_size),
898 lizardfs_error_string(status));
899 throw RequestException(status);
900 }
901 }
902 if (status != LIZARDFS_STATUS_OK) { // should never happen but better check than sorry
903 oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
904 (unsigned long int)ino,
905 to_set,
906 modestr+1,
907 (unsigned int)(stbuf->st_mode & 07777),
908 (long int)stbuf->st_uid,
909 (long int)stbuf->st_gid,
910 (unsigned long int)(stbuf->st_atime),
911 (unsigned long int)(stbuf->st_mtime),
912 (uint64_t)(stbuf->st_size),
913 lizardfs_error_string(status));
914 throw RequestException(status);
915 }
916 gDirEntryCache.lockAndInvalidateInode(ino);
917 memset(&o_stbuf, 0, sizeof(struct stat));
918 attr_to_stat(ino,attr,&o_stbuf);
919 if (attr[0]==TYPE_FILE && maxfleng>(uint64_t)(o_stbuf.st_size)) {
920 o_stbuf.st_size=maxfleng;
921 }
922 attr_timeout = (attr_get_mattr(attr)&MATTR_NOACACHE)?0.0:attr_cache_timeout;
923 makeattrstr(attrstr,256,&o_stbuf);
924 oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): OK (%.1f,%s)",
925 (unsigned long int)ino,
926 to_set,
927 modestr+1,
928 (unsigned int)(stbuf->st_mode & 07777),
929 (long int)stbuf->st_uid,
930 (long int)stbuf->st_gid,
931 (unsigned long int)(stbuf->st_atime),
932 (unsigned long int)(stbuf->st_mtime),
933 (uint64_t)(stbuf->st_size),
934 attr_timeout,
935 attrstr);
936 return AttrReply{o_stbuf, attr_timeout};
937 }
938
mknod(const Context & ctx,Inode parent,const char * name,mode_t mode,dev_t rdev)939 EntryParam mknod(const Context &ctx, Inode parent, const char *name, mode_t mode, dev_t rdev) {
940 EntryParam e;
941 uint32_t inode;
942 Attributes attr;
943 char modestr[11];
944 char attrstr[256];
945 uint8_t mattr;
946 uint32_t nleng;
947 int status;
948 uint8_t type;
949
950 makemodestr(modestr,mode);
951 stats_inc(OP_MKNOD);
952 if (debug_mode) {
953 oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX) ...",
954 (unsigned long int)parent,
955 name,
956 modestr,
957 (unsigned int)mode,
958 (unsigned long int)rdev);
959 }
960 nleng = strlen(name);
961 if (nleng>MFS_NAME_MAX) {
962 oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX): %s",
963 (unsigned long int)parent,
964 name,
965 modestr,
966 (unsigned int)mode,
967 (unsigned long int)rdev,
968 lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
969 throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
970 }
971 if (S_ISFIFO(mode)) {
972 type = TYPE_FIFO;
973 } else if (S_ISCHR(mode)) {
974 type = TYPE_CHARDEV;
975 } else if (S_ISBLK(mode)) {
976 type = TYPE_BLOCKDEV;
977 } else if (S_ISSOCK(mode)) {
978 type = TYPE_SOCKET;
979 } else if (S_ISREG(mode) || (mode&0170000)==0) {
980 type = TYPE_FILE;
981 } else {
982 oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX): %s",
983 (unsigned long int)parent,
984 name,
985 modestr,
986 (unsigned int)mode,
987 (unsigned long int)rdev,
988 lizardfs_error_string(LIZARDFS_ERROR_EPERM));
989 throw RequestException(LIZARDFS_ERROR_EPERM);
990 }
991
992 if (parent==SPECIAL_INODE_ROOT) {
993 if (IS_SPECIAL_NAME(name)) {
994 oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX): %s",
995 (unsigned long int)parent,
996 name,
997 modestr,
998 (unsigned int)mode,
999 (unsigned long int)rdev,
1000 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1001 throw RequestException(LIZARDFS_ERROR_EACCES);
1002 }
1003 }
1004 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1005 fs_mknod(parent,nleng,(const uint8_t*)name,type,mode&07777,ctx.umask,ctx.uid,ctx.gid,rdev,inode,attr));
1006 if (status != LIZARDFS_STATUS_OK) {
1007 oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX): %s",
1008 (unsigned long int)parent,
1009 name,
1010 modestr,
1011 (unsigned int)mode,
1012 (unsigned long int)rdev,
1013 lizardfs_error_string(status));
1014 throw RequestException(status);
1015 } else {
1016 gDirEntryCache.lockAndInvalidateParent(ctx, parent);
1017 e.ino = inode;
1018 mattr = attr_get_mattr(attr);
1019 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1020 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
1021 attr_to_stat(inode,attr,&e.attr);
1022 makeattrstr(attrstr,256,&e.attr);
1023 oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX): OK (%.1f,%lu,%.1f,%s)",
1024 (unsigned long int)parent,
1025 name,
1026 modestr,
1027 (unsigned int)mode,
1028 (unsigned long int)rdev,
1029 e.entry_timeout,
1030 (unsigned long int)e.ino,
1031 e.attr_timeout,
1032 attrstr);
1033 return e;
1034 }
1035 }
1036
unlink(const Context & ctx,Inode parent,const char * name)1037 void unlink(const Context &ctx, Inode parent, const char *name) {
1038 uint32_t nleng;
1039 int status;
1040
1041 stats_inc(OP_UNLINK);
1042 if (debug_mode) {
1043 oplog_printf(ctx, "unlink (%lu,%s) ...", (unsigned long int)parent, name);
1044 }
1045 if (parent==SPECIAL_INODE_ROOT) {
1046 if (IS_SPECIAL_NAME(name)) {
1047 oplog_printf(ctx, "unlink (%lu,%s): %s",
1048 (unsigned long int)parent,
1049 name,
1050 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1051 throw RequestException(LIZARDFS_ERROR_EACCES);
1052 }
1053 }
1054
1055 nleng = strlen(name);
1056 if (nleng>MFS_NAME_MAX) {
1057 oplog_printf(ctx, "unlink (%lu,%s): %s",
1058 (unsigned long int)parent,
1059 name,
1060 lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1061 throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1062 }
1063
1064 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1065 fs_unlink(parent,nleng,(const uint8_t*)name,ctx.uid,ctx.gid));
1066 gDirEntryCache.lockAndInvalidateParent(parent);
1067 if (status != LIZARDFS_STATUS_OK) {
1068 oplog_printf(ctx, "unlink (%lu,%s): %s",
1069 (unsigned long int)parent,
1070 name,
1071 lizardfs_error_string(status));
1072 throw RequestException(status);
1073 } else {
1074 oplog_printf(ctx, "unlink (%lu,%s): OK",
1075 (unsigned long int)parent,
1076 name);
1077 return;
1078 }
1079 }
1080
undel(const Context & ctx,Inode ino)1081 void undel(const Context &ctx, Inode ino) {
1082 stats_inc(OP_UNDEL);
1083 if (debug_mode) {
1084 oplog_printf(ctx, "undel (%lu) ...", (unsigned long)ino);
1085 }
1086 uint8_t status;
1087 // FIXME(haze): modify undel to return parent inode and call gDirEntryCache.lockAndInvalidateParent(parent)
1088 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid, fs_undel(ino));
1089 if (status != LIZARDFS_STATUS_OK) {
1090 throw RequestException(status);
1091 }
1092 }
1093
mkdir(const Context & ctx,Inode parent,const char * name,mode_t mode)1094 EntryParam mkdir(const Context &ctx, Inode parent, const char *name, mode_t mode) {
1095 struct EntryParam e;
1096 uint32_t inode;
1097 Attributes attr;
1098 char modestr[11];
1099 char attrstr[256];
1100 uint8_t mattr;
1101 uint32_t nleng;
1102 int status;
1103
1104 makemodestr(modestr,mode);
1105 stats_inc(OP_MKDIR);
1106 if (debug_mode) {
1107 oplog_printf(ctx, "mkdir (%lu,%s,d%s:0%04o) ...",
1108 (unsigned long int)parent,
1109 name,
1110 modestr+1,
1111 (unsigned int)mode);
1112 }
1113 if (parent==SPECIAL_INODE_ROOT) {
1114 if (IS_SPECIAL_NAME(name)) {
1115 oplog_printf(ctx, "mkdir (%lu,%s,d%s:0%04o): %s",
1116 (unsigned long int)parent,
1117 name,
1118 modestr+1,
1119 (unsigned int)mode,
1120 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1121 throw RequestException(LIZARDFS_ERROR_EACCES);
1122 }
1123 }
1124 nleng = strlen(name);
1125 if (nleng>MFS_NAME_MAX) {
1126 oplog_printf(ctx, "mkdir (%lu,%s,d%s:0%04o): %s",
1127 (unsigned long int)parent,
1128 name,
1129 modestr+1,
1130 (unsigned int)mode,
1131 lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1132 throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1133 }
1134
1135 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1136 fs_mkdir(parent,nleng,(const uint8_t*)name,mode,ctx.umask,ctx.uid,ctx.gid,mkdir_copy_sgid,inode,attr));
1137 if (status != LIZARDFS_STATUS_OK) {
1138 oplog_printf(ctx, "mkdir (%lu,%s,d%s:0%04o): %s",
1139 (unsigned long int)parent,
1140 name,
1141 modestr+1,
1142 (unsigned int)mode,
1143 lizardfs_error_string(status));
1144 throw RequestException(status);
1145 } else {
1146 gDirEntryCache.lockAndInvalidateParent(parent);
1147 e.ino = inode;
1148 mattr = attr_get_mattr(attr);
1149 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1150 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:direntry_cache_timeout;
1151 attr_to_stat(inode,attr,&e.attr);
1152 makeattrstr(attrstr,256,&e.attr);
1153 oplog_printf(ctx, "mkdir (%lu,%s,d%s:0%04o): OK (%.1f,%lu,%.1f,%s)",
1154 (unsigned long int)parent,
1155 name,
1156 modestr+1,
1157 (unsigned int)mode,
1158 e.entry_timeout,
1159 (unsigned long int)e.ino,
1160 e.attr_timeout,
1161 attrstr);
1162 return e;
1163 }
1164 }
1165
rmdir(const Context & ctx,Inode parent,const char * name)1166 void rmdir(const Context &ctx, Inode parent, const char *name) {
1167 uint32_t nleng;
1168 int status;
1169
1170 stats_inc(OP_RMDIR);
1171 if (debug_mode) {
1172 oplog_printf(ctx, "rmdir (%lu,%s) ...", (unsigned long int)parent, name);
1173 }
1174 if (parent==SPECIAL_INODE_ROOT) {
1175 if (IS_SPECIAL_NAME(name)) {
1176 oplog_printf(ctx, "rmdir (%lu,%s): %s",
1177 (unsigned long int)parent,
1178 name,
1179 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1180 throw RequestException(LIZARDFS_ERROR_EACCES);
1181 }
1182 }
1183 nleng = strlen(name);
1184 if (nleng>MFS_NAME_MAX) {
1185 oplog_printf(ctx, "rmdir (%lu,%s): %s",
1186 (unsigned long int)parent,
1187 name,
1188 lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1189 throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1190 }
1191
1192 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1193 fs_rmdir(parent,nleng,(const uint8_t*)name,ctx.uid,ctx.gid));
1194 gDirEntryCache.lockAndInvalidateParent(parent);
1195 if (status != LIZARDFS_STATUS_OK) {
1196 oplog_printf(ctx, "rmdir (%lu,%s): %s",
1197 (unsigned long int)parent,
1198 name,
1199 lizardfs_error_string(status));
1200 throw RequestException(status);
1201 } else {
1202 oplog_printf(ctx, "rmdir (%lu,%s): OK",
1203 (unsigned long int)parent,
1204 name);
1205 return;
1206 }
1207 }
1208
symlink(const Context & ctx,const char * path,Inode parent,const char * name)1209 EntryParam symlink(const Context &ctx, const char *path, Inode parent,
1210 const char *name) {
1211 struct EntryParam e;
1212 uint32_t inode;
1213 Attributes attr;
1214 char attrstr[256];
1215 uint8_t mattr;
1216 uint32_t nleng;
1217 int status;
1218
1219 stats_inc(OP_SYMLINK);
1220 if (debug_mode) {
1221 oplog_printf(ctx, "symlink (%s,%lu,%s) ...",
1222 path,
1223 (unsigned long int)parent,
1224 name);
1225 }
1226 if (parent==SPECIAL_INODE_ROOT) {
1227 if (IS_SPECIAL_NAME(name)) {
1228 oplog_printf(ctx, "symlink (%s,%lu,%s): %s",
1229 path,
1230 (unsigned long int)parent,
1231 name,
1232 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1233 throw RequestException(LIZARDFS_ERROR_EACCES);
1234 }
1235 }
1236 nleng = strlen(name);
1237 if (nleng>MFS_NAME_MAX) {
1238 oplog_printf(ctx, "symlink (%s,%lu,%s): %s",
1239 path,
1240 (unsigned long int)parent,
1241 name,
1242 lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1243 throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1244 }
1245
1246 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1247 fs_symlink(parent,nleng,(const uint8_t*)name,(const uint8_t*)path,ctx.uid,ctx.gid,&inode,attr));
1248 if (status != LIZARDFS_STATUS_OK) {
1249 oplog_printf(ctx, "symlink (%s,%lu,%s): %s",
1250 path,
1251 (unsigned long int)parent,
1252 name,
1253 lizardfs_error_string(status));
1254 throw RequestException(status);
1255 } else {
1256 gDirEntryCache.lockAndInvalidateParent(parent);
1257 e.ino = inode;
1258 mattr = attr_get_mattr(attr);
1259 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1260 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
1261 attr_to_stat(inode,attr,&e.attr);
1262 makeattrstr(attrstr,256,&e.attr);
1263 symlink_cache_insert(inode, (const uint8_t *)path);
1264 oplog_printf(ctx, "symlink (%s,%lu,%s): OK (%.1f,%lu,%.1f,%s)",
1265 path,
1266 (unsigned long int)parent,
1267 name,
1268 e.entry_timeout,
1269 (unsigned long int)e.ino,
1270 e.attr_timeout,
1271 attrstr);
1272 return e;
1273 }
1274 }
1275
readlink(const Context & ctx,Inode ino)1276 std::string readlink(const Context &ctx, Inode ino) {
1277 int status;
1278 const uint8_t *path;
1279
1280 if (debug_mode) {
1281 oplog_printf(ctx, "readlink (%lu) ...",
1282 (unsigned long int)ino);
1283 }
1284 if (symlink_cache_search(ino,&path)) {
1285 stats_inc(OP_READLINK_CACHED);
1286 oplog_printf(ctx, "readlink (%lu) (using cache): OK (%s)",
1287 (unsigned long int)ino,
1288 (char*)path);
1289 return std::string((char*)path);
1290 }
1291 stats_inc(OP_READLINK);
1292 status = fs_readlink(ino,&path);
1293 if (status != LIZARDFS_STATUS_OK) {
1294 oplog_printf(ctx, "readlink (%lu): %s",
1295 (unsigned long int)ino,
1296 lizardfs_error_string(status));
1297 throw RequestException(status);
1298 } else {
1299 symlink_cache_insert(ino,path);
1300 oplog_printf(ctx, "readlink (%lu): OK (%s)",
1301 (unsigned long int)ino,
1302 (char*)path);
1303 return std::string((char*)path);
1304 }
1305 }
1306
rename(const Context & ctx,Inode parent,const char * name,Inode newparent,const char * newname)1307 void rename(const Context &ctx, Inode parent, const char *name,
1308 Inode newparent, const char *newname) {
1309 uint32_t nleng,newnleng;
1310 int status;
1311 uint32_t inode;
1312 Attributes attr;
1313
1314 stats_inc(OP_RENAME);
1315 if (debug_mode) {
1316 oplog_printf(ctx, "rename (%lu,%s,%lu,%s) ...",
1317 (unsigned long int)parent,
1318 name,
1319 (unsigned long int)newparent,
1320 newname);
1321 }
1322 if (parent==SPECIAL_INODE_ROOT) {
1323 if (IS_SPECIAL_NAME(name)) {
1324 oplog_printf(ctx, "rename (%lu,%s,%lu,%s): %s",
1325 (unsigned long int)parent,
1326 name,
1327 (unsigned long int)newparent,
1328 newname,
1329 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1330 throw RequestException(LIZARDFS_ERROR_EACCES);
1331 }
1332 }
1333 if (newparent==SPECIAL_INODE_ROOT) {
1334 if (IS_SPECIAL_NAME(newname)) {
1335 oplog_printf(ctx, "rename (%lu,%s,%lu,%s): %s",
1336 (unsigned long int)parent,
1337 name,
1338 (unsigned long int)newparent,
1339 newname,
1340 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1341 throw RequestException(LIZARDFS_ERROR_EACCES);
1342 }
1343 }
1344 nleng = strlen(name);
1345 if (nleng>MFS_NAME_MAX) {
1346 oplog_printf(ctx, "rename (%lu,%s,%lu,%s): %s",
1347 (unsigned long int)parent,
1348 name,
1349 (unsigned long int)newparent,
1350 newname,
1351 lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1352 throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1353 }
1354 newnleng = strlen(newname);
1355 if (newnleng>MFS_NAME_MAX) {
1356 oplog_printf(ctx, "rename (%lu,%s,%lu,%s): %s",
1357 (unsigned long int)parent,
1358 name,
1359 (unsigned long int)newparent,
1360 newname,
1361 lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1362 throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1363 }
1364
1365 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1366 fs_rename(parent,nleng,(const uint8_t*)name,newparent,newnleng,(const uint8_t*)newname,ctx.uid,ctx.gid,&inode,attr));
1367 gDirEntryCache.lockAndInvalidateParent(parent);
1368 gDirEntryCache.lockAndInvalidateParent(newparent);
1369 if (status != LIZARDFS_STATUS_OK) {
1370 oplog_printf(ctx, "rename (%lu,%s,%lu,%s): %s",
1371 (unsigned long int)parent,
1372 name,
1373 (unsigned long int)newparent,
1374 newname,
1375 lizardfs_error_string(status));
1376 throw RequestException(status);
1377 } else {
1378 oplog_printf(ctx, "rename (%lu,%s,%lu,%s): OK",
1379 (unsigned long int)parent,
1380 name,
1381 (unsigned long int)newparent,
1382 newname);
1383 return;
1384 }
1385 }
1386
link(const Context & ctx,Inode ino,Inode newparent,const char * newname)1387 EntryParam link(const Context &ctx, Inode ino, Inode newparent, const char *newname) {
1388 uint32_t newnleng;
1389 int status;
1390 EntryParam e;
1391 uint32_t inode;
1392 Attributes attr;
1393 char attrstr[256];
1394 uint8_t mattr;
1395
1396
1397 stats_inc(OP_LINK);
1398 if (debug_mode) {
1399 oplog_printf(ctx, "link (%lu,%lu,%s) ...",
1400 (unsigned long int)ino,
1401 (unsigned long int)newparent,
1402 newname);
1403 }
1404 if (IS_SPECIAL_INODE(ino)) {
1405 oplog_printf(ctx, "link (%lu,%lu,%s): %s",
1406 (unsigned long int)ino,
1407 (unsigned long int)newparent,
1408 newname,
1409 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1410 throw RequestException(LIZARDFS_ERROR_EACCES);
1411 }
1412 if (newparent==SPECIAL_INODE_ROOT) {
1413 if (IS_SPECIAL_NAME(newname)) {
1414 oplog_printf(ctx, "link (%lu,%lu,%s): %s",
1415 (unsigned long int)ino,
1416 (unsigned long int)newparent,
1417 newname,
1418 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1419 throw RequestException(LIZARDFS_ERROR_EACCES);
1420 }
1421 }
1422 newnleng = strlen(newname);
1423 if (newnleng>MFS_NAME_MAX) {
1424 oplog_printf(ctx, "link (%lu,%lu,%s): %s",
1425 (unsigned long int)ino,
1426 (unsigned long int)newparent,
1427 newname,
1428 lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1429 throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1430 }
1431
1432 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1433 fs_link(ino,newparent,newnleng,(const uint8_t*)newname,ctx.uid,ctx.gid,&inode,attr));
1434 if (status != LIZARDFS_STATUS_OK) {
1435 oplog_printf(ctx, "link (%lu,%lu,%s): %s",
1436 (unsigned long int)ino,
1437 (unsigned long int)newparent,
1438 newname,
1439 lizardfs_error_string(status));
1440 throw RequestException(status);
1441 } else {
1442 gDirEntryCache.lockAndInvalidateParent(newparent);
1443 e.ino = inode;
1444 mattr = attr_get_mattr(attr);
1445 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1446 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
1447 attr_to_stat(inode,attr,&e.attr);
1448 makeattrstr(attrstr,256,&e.attr);
1449 oplog_printf(ctx, "link (%lu,%lu,%s): OK (%.1f,%lu,%.1f,%s)",
1450 (unsigned long int)ino,
1451 (unsigned long int)newparent,
1452 newname,
1453 e.entry_timeout,
1454 (unsigned long int)e.ino,
1455 e.attr_timeout,
1456 attrstr);
1457 return e;
1458 }
1459 }
1460
opendir(const Context & ctx,Inode ino)1461 void opendir(const Context &ctx, Inode ino) {
1462 int status;
1463
1464 stats_inc(OP_OPENDIR);
1465 if (debug_mode) {
1466 oplog_printf(ctx, "opendir (%lu) ...", (unsigned long int)ino);
1467 }
1468 if (IS_SPECIAL_INODE(ino)) {
1469 oplog_printf(ctx, "opendir (%lu): %s",
1470 (unsigned long int)ino,
1471 lizardfs_error_string(LIZARDFS_ERROR_ENOTDIR));
1472 throw RequestException(LIZARDFS_ERROR_ENOTDIR);
1473 }
1474
1475 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1476 fs_access(ino,ctx.uid,ctx.gid,MODE_MASK_R)); // at least test rights
1477 if (status != LIZARDFS_STATUS_OK) {
1478 oplog_printf(ctx, "opendir (%lu): %s",
1479 (unsigned long int)ino,
1480 lizardfs_error_string(status));
1481 throw RequestException(status);
1482 }
1483 }
1484
readdir(const Context & ctx,Inode ino,off_t off,size_t max_entries)1485 std::vector<DirEntry> readdir(const Context &ctx, Inode ino, off_t off, size_t max_entries) {
1486 static constexpr int kBatchSize = 1000;
1487
1488 stats_inc(OP_READDIR);
1489 if (debug_mode) {
1490 oplog_printf(ctx, "readdir (%lu,%" PRIu64 ",%" PRIu64 ") ...",
1491 (unsigned long int)ino,
1492 (uint64_t)max_entries,
1493 (uint64_t)off);
1494 }
1495 if (off<0) {
1496 oplog_printf(ctx, "readdir (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1497 (unsigned long int)ino,
1498 (uint64_t)max_entries,
1499 (uint64_t)off,
1500 lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
1501 throw RequestException(LIZARDFS_ERROR_EINVAL);
1502 }
1503
1504 std::vector<DirEntry> result;
1505 shared_lock<shared_mutex> access_guard(gDirEntryCache.rwlock());
1506 gDirEntryCache.updateTime();
1507
1508 uint64_t entry_index = off;
1509 auto it = gDirEntryCache.find(ctx, ino, entry_index);
1510
1511 result.reserve(max_entries);
1512 for(;it != gDirEntryCache.index_end() && max_entries > 0;++it) {
1513 if (!gDirEntryCache.isValid(it) || it->index != entry_index) {
1514 break;
1515 }
1516
1517 if (it->inode == 0) {
1518 // we have valid 'no more entries' marker
1519 assert(it->name.empty());
1520 max_entries = 0;
1521 break;
1522 }
1523
1524 ++entry_index;
1525 --max_entries;
1526
1527 struct stat stats;
1528 attr_to_stat(it->inode,it->attr,&stats);
1529 result.emplace_back(it->name, stats, entry_index);
1530 }
1531
1532 if (max_entries == 0) {
1533 return result;
1534 }
1535
1536 access_guard.unlock();
1537
1538 std::vector<DirectoryEntry> dir_entries;
1539 uint8_t status;
1540 uint64_t request_size = std::min<std::size_t>(std::max<std::size_t>(kBatchSize, max_entries),
1541 matocl::fuseGetDir::kMaxNumberOfDirectoryEntries);
1542 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1543 fs_getdir(ino, ctx.uid, ctx.gid, entry_index, request_size, dir_entries));
1544 auto data_acquire_time = gDirEntryCache.updateTime();
1545
1546 if(status != LIZARDFS_STATUS_OK) {
1547 throw RequestException(status);
1548 }
1549
1550 std::unique_lock<shared_mutex> write_guard(gDirEntryCache.rwlock());
1551 gDirEntryCache.updateTime();
1552
1553 gDirEntryCache.insertSubsequent(ctx, ino, entry_index, dir_entries, data_acquire_time);
1554 if (dir_entries.size() < request_size) {
1555 // insert 'no more entries' marker
1556 gDirEntryCache.insert(ctx, ino, 0, entry_index + dir_entries.size(), "", Attributes{{}}, data_acquire_time);
1557 gDirEntryCache.invalidate(ctx,ino,entry_index + dir_entries.size() + 1);
1558 }
1559
1560 if (gDirEntryCache.size() > gDirEntryCacheMaxSize) {
1561 gDirEntryCache.removeOldest(gDirEntryCache.size() - gDirEntryCacheMaxSize);
1562 }
1563
1564 write_guard.unlock();
1565
1566 for(auto it = dir_entries.begin(); it != dir_entries.end() && max_entries > 0; ++it) {
1567 --max_entries;
1568 ++entry_index;
1569
1570 struct stat stats;
1571 attr_to_stat(it->inode,it->attributes,&stats);
1572 result.emplace_back(it->name, stats, entry_index);
1573 }
1574
1575 return result;
1576 }
1577
readreserved(const Context & ctx,NamedInodeOffset off,NamedInodeOffset max_entries)1578 std::vector<NamedInodeEntry> readreserved(const Context &ctx, NamedInodeOffset off, NamedInodeOffset max_entries) {
1579 stats_inc(OP_READRESERVED);
1580 if (debug_mode) {
1581 oplog_printf(ctx, "readreserved (%" PRIu64 ",%" PRIu64 ") ...",
1582 (uint64_t)max_entries,
1583 (uint64_t)off);
1584 }
1585
1586 std::vector<NamedInodeEntry> entries;
1587 uint8_t status;
1588 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1589 fs_getreserved(off, max_entries, entries));
1590
1591 if (status != LIZARDFS_STATUS_OK) {
1592 throw RequestException(status);
1593 }
1594
1595 return entries;
1596 }
1597
readtrash(const Context & ctx,NamedInodeOffset off,NamedInodeOffset max_entries)1598 std::vector<NamedInodeEntry> readtrash(const Context &ctx, NamedInodeOffset off, NamedInodeOffset max_entries) {
1599 stats_inc(OP_READTRASH);
1600 if (debug_mode) {
1601 oplog_printf(ctx, "readtrash (%" PRIu64 ",%" PRIu64 ") ...",
1602 (uint64_t)max_entries,
1603 (uint64_t)off);
1604 }
1605
1606 std::vector<NamedInodeEntry> entries;
1607 uint8_t status;
1608 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1609 fs_gettrash(off, max_entries, entries));
1610
1611 if (status != LIZARDFS_STATUS_OK) {
1612 throw RequestException(status);
1613 }
1614
1615 return entries;
1616 }
1617
releasedir(Inode ino)1618 void releasedir(Inode ino) {
1619 static constexpr int kBatchSize = 1000;
1620
1621 stats_inc(OP_RELEASEDIR);
1622 if (debug_mode) {
1623 oplog_printf("releasedir (%lu) ...",
1624 (unsigned long int)ino);
1625 }
1626 oplog_printf("releasedir (%lu): OK",
1627 (unsigned long int)ino);
1628
1629 std::unique_lock<shared_mutex> write_guard(gDirEntryCache.rwlock());
1630 gDirEntryCache.updateTime();
1631 gDirEntryCache.removeExpired(kBatchSize);
1632 }
1633
1634
fs_newfileinfo(uint8_t accmode,uint32_t inode)1635 static finfo* fs_newfileinfo(uint8_t accmode, uint32_t inode) {
1636 finfo *fileinfo;
1637 fileinfo = (finfo*) malloc(sizeof(finfo));
1638 pthread_mutex_init(&(fileinfo->flushlock),NULL);
1639 pthread_mutex_init(&(fileinfo->lock),NULL);
1640 PthreadMutexWrapper lock((fileinfo->lock)); // make helgrind happy
1641 #ifdef __FreeBSD__
1642 /* old FreeBSD fuse reads whole file when opening with O_WRONLY|O_APPEND,
1643 * so can't open it write-only */
1644 (void)accmode;
1645 (void)inode;
1646 fileinfo->mode = IO_NONE;
1647 fileinfo->data = NULL;
1648 #else
1649 if (accmode == O_RDONLY) {
1650 fileinfo->mode = IO_READONLY;
1651 fileinfo->data = read_data_new(inode);
1652 } else if (accmode == O_WRONLY) {
1653 fileinfo->mode = IO_WRITEONLY;
1654 fileinfo->data = write_data_new(inode);
1655 } else {
1656 fileinfo->mode = IO_NONE;
1657 fileinfo->data = NULL;
1658 }
1659 #endif
1660 fileinfo->use_flocks = false;
1661 fileinfo->use_posixlocks = false;
1662
1663 return fileinfo;
1664 }
1665
remove_file_info(FileInfo * f)1666 void remove_file_info(FileInfo *f) {
1667 finfo* fileinfo = (finfo*)(f->fh);
1668 PthreadMutexWrapper lock(fileinfo->lock);
1669 if (fileinfo->mode == IO_READONLY || fileinfo->mode == IO_READ) {
1670 read_data_end(fileinfo->data);
1671 } else if (fileinfo->mode == IO_WRITEONLY || fileinfo->mode == IO_WRITE) {
1672 write_data_end(fileinfo->data);
1673 }
1674 lock.unlock(); // This unlock is needed, since we want to destroy the mutex
1675 pthread_mutex_destroy(&(fileinfo->lock));
1676 pthread_mutex_destroy(&(fileinfo->flushlock));
1677 free(fileinfo);
1678 }
1679
create(const Context & ctx,Inode parent,const char * name,mode_t mode,FileInfo * fi)1680 EntryParam create(const Context &ctx, Inode parent, const char *name, mode_t mode,
1681 FileInfo* fi) {
1682 struct EntryParam e;
1683 uint32_t inode;
1684 uint8_t oflags;
1685 Attributes attr;
1686 char modestr[11];
1687 char attrstr[256];
1688 uint8_t mattr;
1689 uint32_t nleng;
1690 int status;
1691
1692 finfo *fileinfo;
1693
1694 makemodestr(modestr,mode);
1695 stats_inc(OP_CREATE);
1696 if (debug_mode) {
1697 oplog_printf(ctx, "create (%lu,%s,-%s:0%04o)",
1698 (unsigned long int)parent,
1699 name,
1700 modestr+1,
1701 (unsigned int)mode);
1702 }
1703 if (parent==SPECIAL_INODE_ROOT) {
1704 if (IS_SPECIAL_NAME(name)) {
1705 oplog_printf(ctx, "create (%lu,%s,-%s:0%04o): %s",
1706 (unsigned long int)parent,
1707 name,
1708 modestr+1,
1709 (unsigned int)mode,
1710 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1711 throw RequestException(LIZARDFS_ERROR_EACCES);
1712 }
1713 }
1714 nleng = strlen(name);
1715 if (nleng>MFS_NAME_MAX) {
1716 oplog_printf(ctx, "create (%lu,%s,-%s:0%04o): %s",
1717 (unsigned long int)parent,
1718 name,
1719 modestr+1,
1720 (unsigned int)mode,
1721 lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1722 throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1723 }
1724
1725 oflags = AFTER_CREATE;
1726 if ((fi->flags & O_ACCMODE) == O_RDONLY) {
1727 oflags |= WANT_READ;
1728 } else if ((fi->flags & O_ACCMODE) == O_WRONLY) {
1729 oflags |= WANT_WRITE;
1730 } else if ((fi->flags & O_ACCMODE) == O_RDWR) {
1731 oflags |= WANT_READ | WANT_WRITE;
1732 } else {
1733 oplog_printf(ctx, "create (%lu,%s,-%s:0%04o): %s",
1734 (unsigned long int)parent,
1735 name,
1736 modestr+1,
1737 (unsigned int)mode,
1738 lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
1739 throw RequestException(LIZARDFS_ERROR_EINVAL);
1740 }
1741
1742 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1743 fs_mknod(parent,nleng,(const uint8_t*)name,TYPE_FILE,mode&07777,ctx.umask,ctx.uid,ctx.gid,0,inode,attr));
1744 if (status != LIZARDFS_STATUS_OK) {
1745 oplog_printf(ctx, "create (%lu,%s,-%s:0%04o) (mknod): %s",
1746 (unsigned long int)parent,
1747 name,
1748 modestr+1,
1749 (unsigned int)mode,
1750 lizardfs_error_string(status));
1751 throw RequestException(status);
1752 }
1753 Attributes tmp_attr;
1754 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1755 fs_opencheck(inode,ctx.uid,ctx.gid,oflags,tmp_attr));
1756
1757 if (status != LIZARDFS_STATUS_OK) {
1758 oplog_printf(ctx, "create (%lu,%s,-%s:0%04o) (open): %s",
1759 (unsigned long int)parent,
1760 name,
1761 modestr+1,
1762 (unsigned int)mode,
1763 lizardfs_error_string(status));
1764 throw RequestException(status);
1765 }
1766
1767 mattr = attr_get_mattr(attr);
1768 fileinfo = fs_newfileinfo(fi->flags & O_ACCMODE,inode);
1769 fi->fh = reinterpret_cast<uintptr_t>(fileinfo);
1770 if (keep_cache==1) {
1771 fi->keep_cache=1;
1772 } else if (keep_cache==2) {
1773 fi->keep_cache=0;
1774 } else {
1775 fi->keep_cache = (mattr&MATTR_ALLOWDATACACHE)?1:0;
1776 }
1777 if (debug_mode) {
1778 lzfs::log_debug("create ({}) ok -> keep cache: {}\n", inode, (int)fi->keep_cache);
1779 }
1780 gDirEntryCache.lockAndInvalidateParent(ctx, parent);
1781 e.ino = inode;
1782 e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1783 e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
1784 attr_to_stat(inode,attr,&e.attr);
1785 makeattrstr(attrstr,256,&e.attr);
1786 oplog_printf(ctx, "create (%lu,%s,-%s:0%04o): OK (%.1f,%lu,%.1f,%s,%lu)",
1787 (unsigned long int)parent,
1788 name,
1789 modestr+1,
1790 (unsigned int)mode,
1791 e.entry_timeout,
1792 (unsigned long int)e.ino,
1793 e.attr_timeout,
1794 attrstr,
1795 (unsigned long int)fi->keep_cache);
1796 return e;
1797 }
1798
open(const Context & ctx,Inode ino,FileInfo * fi)1799 void open(const Context &ctx, Inode ino, FileInfo *fi) {
1800 uint8_t oflags;
1801 Attributes attr;
1802 uint8_t mattr;
1803 int status;
1804
1805 finfo *fileinfo;
1806
1807 stats_inc(OP_OPEN);
1808 if (debug_mode) {
1809 oplog_printf(ctx, "open (%lu) ...", (unsigned long int)ino);
1810 }
1811
1812 if (IS_SPECIAL_INODE(ino)) {
1813 special_open(ino, ctx, fi);
1814 return;
1815 }
1816
1817 oflags = 0;
1818 if ((fi->flags & O_CREAT) == O_CREAT) {
1819 oflags |= AFTER_CREATE;
1820 }
1821 if ((fi->flags & O_ACCMODE) == O_RDONLY) {
1822 oflags |= WANT_READ;
1823 } else if ((fi->flags & O_ACCMODE) == O_WRONLY) {
1824 oflags |= WANT_WRITE;
1825 } else if ((fi->flags & O_ACCMODE) == O_RDWR) {
1826 oflags |= WANT_READ | WANT_WRITE;
1827 }
1828 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1829 fs_opencheck(ino,ctx.uid,ctx.gid,oflags,attr));
1830 if (status != LIZARDFS_STATUS_OK) {
1831 oplog_printf(ctx, "open (%lu): %s",
1832 (unsigned long int)ino,
1833 lizardfs_error_string(status));
1834 throw RequestException(status);
1835 }
1836
1837 mattr = attr_get_mattr(attr);
1838 fileinfo = fs_newfileinfo(fi->flags & O_ACCMODE,ino);
1839 fi->fh = reinterpret_cast<uintptr_t>(fileinfo);
1840 if (keep_cache==1) {
1841 fi->keep_cache=1;
1842 } else if (keep_cache==2) {
1843 fi->keep_cache=0;
1844 } else {
1845 fi->keep_cache = (mattr&MATTR_ALLOWDATACACHE)?1:0;
1846 }
1847 if (debug_mode) {
1848 lzfs::log_debug("open ({}) ok -> keep cache: {}\n", ino, (int)fi->keep_cache);
1849 }
1850 fi->direct_io = gDirectIo;
1851 oplog_printf(ctx, "open (%lu): OK (%lu,%lu)",
1852 (unsigned long int)ino,
1853 (unsigned long int)fi->direct_io,
1854 (unsigned long int)fi->keep_cache);
1855 }
1856
update_credentials(Context::IdType index,const GroupCache::Groups & groups)1857 static void update_credentials(Context::IdType index, const GroupCache::Groups &groups) {
1858 uint8_t status = fs_update_credentials(index, groups);
1859 if (status != LIZARDFS_STATUS_OK) {
1860 throw RequestException(status);
1861 }
1862 }
1863
release(Inode ino,FileInfo * fi)1864 void release(Inode ino, FileInfo *fi) {
1865 finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
1866
1867 stats_inc(OP_RELEASE);
1868 if (debug_mode) {
1869 oplog_printf("release (%lu) ...", (unsigned long int)ino);
1870 }
1871
1872 if (IS_SPECIAL_INODE(ino)) {
1873 special_release(ino, fi);
1874 return;
1875 }
1876
1877 if (fileinfo != NULL){
1878 if (fileinfo->use_flocks) {
1879 fs_flock_send(ino, fi->lock_owner, 0, lzfs_locks::kRelease);
1880 fileinfo->use_flocks = false;
1881 }
1882 fileinfo->use_posixlocks = false;
1883 remove_file_info(fi);
1884 }
1885 fs_release(ino);
1886 oplog_printf("release (%lu): OK",
1887 (unsigned long int)ino);
1888 }
1889
read_special_inode(const Context & ctx,Inode ino,size_t size,off_t off,FileInfo * fi)1890 std::vector<uint8_t> read_special_inode(const Context &ctx,
1891 Inode ino,
1892 size_t size,
1893 off_t off,
1894 FileInfo* fi) {
1895 LOG_AVG_TILL_END_OF_SCOPE0("read");
1896 stats_inc(OP_READ);
1897
1898 return special_read(ino, ctx, size, off, fi, debug_mode);
1899 }
1900
read(const Context & ctx,Inode ino,size_t size,off_t off,FileInfo * fi)1901 ReadCache::Result read(const Context &ctx,
1902 Inode ino,
1903 size_t size,
1904 off_t off,
1905 FileInfo *fi) {
1906 LOG_AVG_TILL_END_OF_SCOPE0("read");
1907 stats_inc(OP_READ);
1908
1909 finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
1910 int err;
1911 ReadCache::Result ret;
1912 if (debug_mode) {
1913 lzfs::log_debug("read from inode {} up to {} bytes from position {}",
1914 ino, size, off);
1915 }
1916 if (fileinfo==NULL) {
1917 oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1918 (unsigned long int)ino,
1919 (uint64_t)size,
1920 (uint64_t)off,
1921 lizardfs_error_string(LIZARDFS_ERROR_EBADF));
1922 throw RequestException(LIZARDFS_ERROR_EBADF);
1923 }
1924 if (off>=MAX_FILE_SIZE || off+size>=MAX_FILE_SIZE) {
1925 oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1926 (unsigned long int)ino,
1927 (uint64_t)size,
1928 (uint64_t)off,
1929 lizardfs_error_string(LIZARDFS_ERROR_EFBIG));
1930 throw RequestException(LIZARDFS_ERROR_EFBIG);
1931 }
1932 try {
1933 const SteadyTimePoint deadline = SteadyClock::now() + std::chrono::seconds(30);
1934 uint8_t status = gLocalIoLimiter().waitForRead(ctx.pid, size, deadline);
1935 if (status == LIZARDFS_STATUS_OK) {
1936 status = gGlobalIoLimiter().waitForRead(ctx.pid, size, deadline);
1937 }
1938 if (status != LIZARDFS_STATUS_OK) {
1939 err = (status == LIZARDFS_ERROR_EPERM ? LIZARDFS_ERROR_EPERM : LIZARDFS_ERROR_IO);
1940 oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1941 (unsigned long int)ino,
1942 (uint64_t)size,
1943 (uint64_t)off,
1944 lizardfs_error_string(err));
1945 throw RequestException(err);
1946 }
1947 } catch (Exception& ex) {
1948 lzfs_pretty_syslog(LOG_WARNING, "I/O limiting error: %s", ex.what());
1949 throw RequestException(LIZARDFS_ERROR_IO);
1950 }
1951 PthreadMutexWrapper lock(fileinfo->lock);
1952 PthreadMutexWrapper flushlock(fileinfo->flushlock);
1953 if (fileinfo->mode==IO_WRITEONLY) {
1954 oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1955 (unsigned long int)ino,
1956 (uint64_t)size,
1957 (uint64_t)off,
1958 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1959 throw RequestException(LIZARDFS_ERROR_EACCES);
1960 }
1961 if (fileinfo->mode==IO_WRITE) {
1962 err = write_data_flush(fileinfo->data);
1963 if (err != LIZARDFS_STATUS_OK) {
1964 oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1965 (unsigned long int)ino,
1966 (uint64_t)size,
1967 (uint64_t)off,
1968 lizardfs_error_string(err));
1969 throw RequestException(err);
1970 }
1971 write_data_end(fileinfo->data);
1972 }
1973 if (fileinfo->mode==IO_WRITE || fileinfo->mode==IO_NONE) {
1974 fileinfo->mode = IO_READ;
1975 fileinfo->data = read_data_new(ino);
1976 }
1977 // end of reader critical section
1978 flushlock.unlock();
1979
1980 write_data_flush_inode(ino);
1981
1982 uint64_t firstBlockToRead = off / MFSBLOCKSIZE;
1983 uint64_t firstBlockNotToRead = (off + size + MFSBLOCKSIZE - 1) / MFSBLOCKSIZE;
1984 uint64_t alignedOffset = firstBlockToRead * MFSBLOCKSIZE;
1985 uint64_t alignedSize = (firstBlockNotToRead - firstBlockToRead) * MFSBLOCKSIZE;
1986
1987 uint32_t ssize = alignedSize;
1988
1989 err = read_data(fileinfo->data, alignedOffset, ssize, ret);
1990 ssize = ret.requestSize(alignedOffset, ssize);
1991 if (err != LIZARDFS_STATUS_OK) {
1992 oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1993 (unsigned long int)ino,
1994 (uint64_t)size,
1995 (uint64_t)off,
1996 lizardfs_error_string(err));
1997 throw RequestException(err);
1998 } else {
1999 uint32_t replyOffset = off - alignedOffset;
2000 if (ssize > replyOffset) {
2001 ssize -= replyOffset;
2002 if (ssize > size) {
2003 ssize = size;
2004 }
2005 } else {
2006 ssize = 0;
2007 }
2008 oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): OK (%lu)",
2009 (unsigned long int)ino,
2010 (uint64_t)size,
2011 (uint64_t)off,
2012 (unsigned long int)ssize);
2013 }
2014 return ret;
2015 }
2016
write(const Context & ctx,Inode ino,const char * buf,size_t size,off_t off,FileInfo * fi)2017 BytesWritten write(const Context &ctx, Inode ino, const char *buf, size_t size, off_t off,
2018 FileInfo *fi) {
2019 finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
2020 int err;
2021
2022 stats_inc(OP_WRITE);
2023 if (debug_mode) {
2024 oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 ") ...",
2025 (unsigned long int)ino,
2026 (uint64_t)size,
2027 (uint64_t)off);
2028 }
2029
2030 if (IS_SPECIAL_INODE(ino)) {
2031 return special_write(ino, ctx, buf, size, off, fi);
2032 }
2033
2034 if (fileinfo==NULL) {
2035 oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): %s",
2036 (unsigned long int)ino,
2037 (uint64_t)size,
2038 (uint64_t)off,
2039 lizardfs_error_string(LIZARDFS_ERROR_EBADF));
2040 throw RequestException(LIZARDFS_ERROR_EBADF);
2041 }
2042 if (off>=MAX_FILE_SIZE || off+size>=MAX_FILE_SIZE) {
2043 oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): %s",
2044 (unsigned long int)ino,
2045 (uint64_t)size,
2046 (uint64_t)off,
2047 lizardfs_error_string(LIZARDFS_ERROR_EFBIG));
2048 throw RequestException(LIZARDFS_ERROR_EFBIG);
2049 }
2050 try {
2051 const SteadyTimePoint deadline = SteadyClock::now() + std::chrono::seconds(30);
2052 uint8_t status = gLocalIoLimiter().waitForWrite(ctx.pid, size, deadline);
2053 if (status == LIZARDFS_STATUS_OK) {
2054 status = gGlobalIoLimiter().waitForWrite(ctx.pid, size, deadline);
2055 }
2056 if (status != LIZARDFS_STATUS_OK) {
2057 err = status == LIZARDFS_ERROR_EPERM ? LIZARDFS_ERROR_EPERM : LIZARDFS_ERROR_IO;
2058 oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): %s",
2059 (unsigned long int)ino,
2060 (uint64_t)size,
2061 (uint64_t)off,
2062 lizardfs_error_string(err));
2063 throw RequestException(err);
2064 }
2065 } catch (Exception& ex) {
2066 lzfs_pretty_syslog(LOG_WARNING, "I/O limiting error: %s", ex.what());
2067 throw RequestException(LIZARDFS_ERROR_IO);
2068 }
2069 PthreadMutexWrapper lock(fileinfo->lock);
2070 if (fileinfo->mode==IO_READONLY) {
2071 oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): %s",
2072 (unsigned long int)ino,
2073 (uint64_t)size,
2074 (uint64_t)off,
2075 lizardfs_error_string(LIZARDFS_ERROR_EACCES));
2076 throw RequestException(LIZARDFS_ERROR_EACCES);
2077 }
2078 if (fileinfo->mode==IO_READ) {
2079 read_data_end(fileinfo->data);
2080 fileinfo->data = NULL;
2081 }
2082 if (fileinfo->mode==IO_READ || fileinfo->mode==IO_NONE) {
2083 fileinfo->mode = IO_WRITE;
2084 fileinfo->data = write_data_new(ino);
2085 }
2086 err = write_data(fileinfo->data,off,size,(const uint8_t*)buf);
2087 gDirEntryCache.lockAndInvalidateInode(ino);
2088 if (err != LIZARDFS_STATUS_OK) {
2089 oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): %s",
2090 (unsigned long int)ino,
2091 (uint64_t)size,
2092 (uint64_t)off,
2093 lizardfs_error_string(err));
2094 throw RequestException(err);
2095 } else {
2096 oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): OK (%lu)",
2097 (unsigned long int)ino,
2098 (uint64_t)size,
2099 (uint64_t)off,
2100 (unsigned long int)size);
2101 return size;
2102 }
2103 }
2104
flush(const Context & ctx,Inode ino,FileInfo * fi)2105 void flush(const Context &ctx, Inode ino, FileInfo* fi) {
2106 finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
2107 int err;
2108
2109 stats_inc(OP_FLUSH);
2110 if (debug_mode) {
2111 oplog_printf(ctx, "flush (%lu) ...",
2112 (unsigned long int)ino);
2113 }
2114 if (IS_SPECIAL_INODE(ino)) {
2115 oplog_printf(ctx, "flush (%lu): OK",
2116 (unsigned long int)ino);
2117 return;
2118 }
2119 if (fileinfo==NULL) {
2120 oplog_printf(ctx, "flush (%lu): %s",
2121 (unsigned long int)ino,
2122 lizardfs_error_string(LIZARDFS_ERROR_EBADF));
2123 throw RequestException(LIZARDFS_ERROR_EBADF);
2124 }
2125
2126 err = LIZARDFS_STATUS_OK;
2127 PthreadMutexWrapper lock(fileinfo->lock);
2128 if (fileinfo->mode==IO_WRITE || fileinfo->mode==IO_WRITEONLY) {
2129 err = write_data_flush(fileinfo->data);
2130 }
2131 lzfs_locks::FlockWrapper file_lock(lzfs_locks::kRelease,0,0,0);
2132 auto use_posixlocks = fileinfo->use_posixlocks;
2133 lock.unlock();
2134 if (use_posixlocks) {
2135 fs_setlk_send(ino, fi->lock_owner, 0, file_lock);
2136 }
2137 if (err != LIZARDFS_STATUS_OK) {
2138 oplog_printf(ctx, "flush (%lu): %s",
2139 (unsigned long int)ino,
2140 lizardfs_error_string(err));
2141 throw RequestException(err);
2142 } else {
2143 oplog_printf(ctx, "flush (%lu): OK",
2144 (unsigned long int)ino);
2145 }
2146 }
2147
fsync(const Context & ctx,Inode ino,int datasync,FileInfo * fi)2148 void fsync(const Context &ctx, Inode ino, int datasync, FileInfo* fi) {
2149 finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
2150 int err;
2151
2152 stats_inc(OP_FSYNC);
2153 if (debug_mode) {
2154 oplog_printf(ctx, "fsync (%lu,%d) ...",
2155 (unsigned long int)ino,
2156 datasync);
2157 }
2158 if (IS_SPECIAL_INODE(ino)) {
2159 oplog_printf(ctx, "fsync (%lu,%d): OK",
2160 (unsigned long int)ino,
2161 datasync);
2162 return;
2163 }
2164 if (fileinfo==NULL) {
2165 oplog_printf(ctx, "fsync (%lu,%d): %s",
2166 (unsigned long int)ino,
2167 datasync,
2168 lizardfs_error_string(LIZARDFS_ERROR_EBADF));
2169 throw RequestException(LIZARDFS_ERROR_EBADF);
2170 }
2171 err = LIZARDFS_STATUS_OK;
2172 PthreadMutexWrapper lock(fileinfo->lock);
2173 if (fileinfo->mode==IO_WRITE || fileinfo->mode==IO_WRITEONLY) {
2174 err = write_data_flush(fileinfo->data);
2175 }
2176 if (err != LIZARDFS_STATUS_OK) {
2177 oplog_printf(ctx, "fsync (%lu,%d): %s",
2178 (unsigned long int)ino,
2179 datasync,
2180 lizardfs_error_string(err));
2181 throw RequestException(err);
2182 } else {
2183 oplog_printf(ctx, "fsync (%lu,%d): OK",
2184 (unsigned long int)ino,
2185 datasync);
2186 }
2187 }
2188
2189 namespace {
2190
2191 class XattrHandler {
2192 public:
~XattrHandler()2193 virtual ~XattrHandler() {}
2194
2195 /*
2196 * handler for request to set an extended attribute
2197 * mode - one of XATTR_SMODE_*
2198 * returns status
2199 */
2200 virtual uint8_t setxattr(const Context& ctx, Inode ino, const char *name,
2201 uint32_t nleng, const char *value, size_t size, int mode) = 0;
2202
2203 /*
2204 * handler for request to get an extended attribute
2205 * mode - one of XATTR_GMODE_*
2206 * returns status and:
2207 * * sets value is mode is XATTR_GMODE_GET_DATA
2208 * * sets valueLength is mode is XATTR_GMODE_LENGTH_ONLY
2209 */
2210 virtual uint8_t getxattr(const Context& ctx, Inode ino, const char *name,
2211 uint32_t nleng, int mode, uint32_t& valueLength, std::vector<uint8_t>& value) = 0;
2212
2213 /*
2214 * handler for request to remove an extended attribute
2215 * returns status
2216 */
2217 virtual uint8_t removexattr(const Context& ctx, Inode ino, const char *name,
2218 uint32_t nleng) = 0;
2219 };
2220
2221 class PlainXattrHandler : public XattrHandler {
2222 public:
setxattr(const Context & ctx,Inode ino,const char * name,uint32_t nleng,const char * value,size_t size,int mode)2223 uint8_t setxattr(const Context& ctx, Inode ino, const char *name,
2224 uint32_t nleng, const char *value, size_t size, int mode) override {
2225 uint8_t status;
2226 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2227 fs_setxattr(ino, 0, ctx.uid, ctx.gid, nleng, (const uint8_t*)name,
2228 (uint32_t)size, (const uint8_t*)value, mode));
2229 return status;
2230 }
2231
getxattr(const Context & ctx,Inode ino,const char * name,uint32_t nleng,int mode,uint32_t & valueLength,std::vector<uint8_t> & value)2232 uint8_t getxattr(const Context& ctx, Inode ino, const char *name,
2233 uint32_t nleng, int mode, uint32_t& valueLength, std::vector<uint8_t>& value) override {
2234 const uint8_t *buff;
2235 uint8_t status;
2236 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2237 fs_getxattr(ino, 0, ctx.uid, ctx.gid, nleng, (const uint8_t*)name,
2238 mode, &buff, &valueLength));
2239 if (mode == XATTR_GMODE_GET_DATA && status == LIZARDFS_STATUS_OK) {
2240 value = std::vector<uint8_t>(buff, buff + valueLength);
2241 }
2242 return status;
2243 }
2244
removexattr(const Context & ctx,Inode ino,const char * name,uint32_t nleng)2245 uint8_t removexattr(const Context& ctx, Inode ino, const char *name,
2246 uint32_t nleng) override {
2247 uint8_t status;
2248 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2249 fs_removexattr(ino, 0, ctx.uid, ctx.gid, nleng, (const uint8_t*)name));
2250 return status;
2251 }
2252 };
2253
2254 class ErrorXattrHandler : public XattrHandler {
2255 public:
ErrorXattrHandler(uint8_t error)2256 ErrorXattrHandler(uint8_t error) : error_(error) {}
setxattr(const Context &,Inode,const char *,uint32_t,const char *,size_t,int)2257 uint8_t setxattr(const Context&, Inode, const char *,
2258 uint32_t, const char *, size_t, int) override {
2259 return error_;
2260 }
2261
getxattr(const Context &,Inode,const char *,uint32_t,int,uint32_t &,std::vector<uint8_t> &)2262 uint8_t getxattr(const Context&, Inode, const char *,
2263 uint32_t, int, uint32_t&, std::vector<uint8_t>&) override {
2264 return error_;
2265 }
2266
removexattr(const Context &,Inode,const char *,uint32_t)2267 uint8_t removexattr(const Context&, Inode, const char *,
2268 uint32_t) override {
2269 return error_;
2270 }
2271 private:
2272 uint8_t error_;
2273 };
2274
2275 class PosixAclXattrHandler : public XattrHandler {
2276 public:
PosixAclXattrHandler(AclType type)2277 PosixAclXattrHandler(AclType type) : type_(type) { }
2278
setxattr(const Context & ctx,Inode ino,const char *,uint32_t,const char * value,size_t size,int)2279 uint8_t setxattr(const Context& ctx, Inode ino, const char *,
2280 uint32_t, const char *value, size_t size, int) override {
2281 static constexpr size_t kEmptyAclSize = 4;
2282 AccessControlList posix_acl;
2283 try {
2284 if (size <= kEmptyAclSize) {
2285 uint8_t status;
2286 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2287 fs_deletacl(ino, ctx.uid, ctx.gid, type_));
2288 return status;
2289 }
2290 posix_acl = aclConverter::extractAclObject((const uint8_t*)value, size);
2291 } catch (Exception&) {
2292 return LIZARDFS_ERROR_EINVAL;
2293 }
2294 uint8_t status;
2295 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2296 fs_setacl(ino, ctx.uid, ctx.gid, type_, posix_acl));
2297 eraseAclCache(ino);
2298 gDirEntryCache.lockAndInvalidateInode(ino);
2299 return status;
2300 }
2301
getxattr(const Context & ctx,Inode ino,const char *,uint32_t,int,uint32_t & valueLength,std::vector<uint8_t> & value)2302 uint8_t getxattr(const Context& ctx, Inode ino, const char *,
2303 uint32_t, int /*mode*/, uint32_t& valueLength, std::vector<uint8_t>& value) override {
2304 try {
2305 AclCacheEntry cacheEntry = acl_cache->get(clock_.now(), ino, ctx.uid, ctx.gid);
2306 if (cacheEntry) {
2307 std::pair<bool, AccessControlList> posix_acl;
2308 if (type_ == AclType::kAccess) {
2309 posix_acl = cacheEntry->acl.convertToPosixACL();
2310 } else {
2311 posix_acl = cacheEntry->acl.convertToDefaultPosixACL();
2312 }
2313 if (!posix_acl.first) {
2314 return LIZARDFS_ERROR_ENOATTR;
2315 }
2316 value = aclConverter::aclObjectToXattr(posix_acl.second);
2317 valueLength = value.size();
2318 return LIZARDFS_STATUS_OK;
2319 } else {
2320 return LIZARDFS_ERROR_ENOATTR;
2321 }
2322 } catch (AclAcquisitionException &e) {
2323 sassert((e.status() != LIZARDFS_STATUS_OK) && (e.status() != LIZARDFS_ERROR_ENOATTR));
2324 return e.status();
2325 } catch (Exception &) {
2326 lzfs_pretty_syslog(LOG_WARNING, "Failed to convert ACL to xattr, looks like a bug");
2327 return LIZARDFS_ERROR_IO;
2328 }
2329 }
2330
removexattr(const Context & ctx,Inode ino,const char *,uint32_t)2331 uint8_t removexattr(const Context& ctx, Inode ino, const char *,
2332 uint32_t) override {
2333 uint8_t status;
2334 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2335 fs_deletacl(ino, ctx.uid, ctx.gid, type_));
2336 eraseAclCache(ino);
2337 return status;
2338 }
2339
2340 private:
2341 AclType type_;
2342 SteadyClock clock_;
2343 };
2344
2345 class NFSAclXattrHandler : public XattrHandler {
2346 public:
NFSAclXattrHandler()2347 NFSAclXattrHandler() { }
2348
setxattr(const Context & ctx,Inode ino,const char *,uint32_t,const char * value,size_t size,int)2349 uint8_t setxattr(const Context& ctx, Inode ino, const char *,
2350 uint32_t, const char *value, size_t size, int) override {
2351 uint8_t status = LIZARDFS_STATUS_OK;
2352 RichACL acl = richAclConverter::extractObjectFromNFS((uint8_t *)value, size);
2353
2354 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2355 fs_setacl(ino, ctx.uid, ctx.gid, acl));
2356 eraseAclCache(ino);
2357 gDirEntryCache.lockAndInvalidateInode(ino);
2358 return status;
2359 }
2360
getxattr(const Context & ctx,Inode ino,const char *,uint32_t,int,uint32_t & valueLength,std::vector<uint8_t> & value)2361 uint8_t getxattr(const Context& ctx, Inode ino, const char *,
2362 uint32_t, int, uint32_t& valueLength, std::vector<uint8_t>& value) override {
2363 try {
2364 AclCacheEntry cache_entry = acl_cache->get(clock_.now(), ino, ctx.uid, ctx.gid);
2365 if (cache_entry) {
2366 value = richAclConverter::objectToNFSXattr(cache_entry->acl, cache_entry->owner_id);
2367 valueLength = value.size();
2368 } else {
2369 valueLength = 4;
2370 value.assign(valueLength, 0);
2371 }
2372 return LIZARDFS_STATUS_OK;
2373 } catch (AclAcquisitionException& e) {
2374 sassert((e.status() != LIZARDFS_STATUS_OK) && (e.status() != LIZARDFS_ERROR_ENOATTR));
2375 return e.status();
2376 } catch (Exception&) {
2377 lzfs_pretty_syslog(LOG_WARNING, "Failed to convert ACL to xattr, looks like a bug");
2378 return LIZARDFS_ERROR_IO;
2379 }
2380 }
2381
removexattr(const Context & ctx,Inode ino,const char *,uint32_t)2382 uint8_t removexattr(const Context& ctx, Inode ino, const char *,
2383 uint32_t) override {
2384 uint8_t status;
2385 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2386 fs_deletacl(ino, ctx.uid, ctx.gid, AclType::kRichACL));
2387 eraseAclCache(ino);
2388 return status;
2389 }
2390 private:
2391 SteadyClock clock_;
2392 };
2393
2394 class RichAclXattrHandler : public XattrHandler {
2395 public:
RichAclXattrHandler()2396 RichAclXattrHandler() { }
2397
setxattr(const Context & ctx,Inode ino,const char *,uint32_t,const char * value,size_t size,int)2398 uint8_t setxattr(const Context& ctx, Inode ino, const char *,
2399 uint32_t, const char *value, size_t size, int) override {
2400 uint8_t status = LIZARDFS_STATUS_OK;
2401 RichACL acl = richAclConverter::extractObjectFromRichACL((uint8_t *)value, size);
2402
2403 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2404 fs_setacl(ino, ctx.uid, ctx.gid, acl));
2405 eraseAclCache(ino);
2406 gDirEntryCache.lockAndInvalidateInode(ino);
2407 return status;
2408 }
2409
getxattr(const Context & ctx,Inode ino,const char *,uint32_t,int,uint32_t & valueLength,std::vector<uint8_t> & value)2410 uint8_t getxattr(const Context& ctx, Inode ino, const char *,
2411 uint32_t, int, uint32_t& valueLength, std::vector<uint8_t>& value) override {
2412 try {
2413 AclCacheEntry cache_entry = acl_cache->get(clock_.now(), ino, ctx.uid, ctx.gid);
2414 if (cache_entry) {
2415 value = richAclConverter::objectToRichACLXattr(cache_entry->acl);
2416 valueLength = value.size();
2417 return LIZARDFS_STATUS_OK;
2418 } else {
2419 return LIZARDFS_ERROR_ENOATTR;
2420 }
2421 } catch (AclAcquisitionException& e) {
2422 sassert((e.status() != LIZARDFS_STATUS_OK) && (e.status() != LIZARDFS_ERROR_ENOATTR));
2423 return e.status();
2424 } catch (Exception&) {
2425 lzfs_pretty_syslog(LOG_WARNING, "Failed to convert ACL to xattr, looks like a bug");
2426 return LIZARDFS_ERROR_IO;
2427 }
2428 }
2429
removexattr(const Context & ctx,Inode ino,const char *,uint32_t)2430 uint8_t removexattr(const Context& ctx, Inode ino, const char *,
2431 uint32_t) override {
2432 uint8_t status;
2433 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2434 fs_deletacl(ino, ctx.uid, ctx.gid, AclType::kRichACL));
2435 eraseAclCache(ino);
2436 return status;
2437 }
2438 private:
2439 SteadyClock clock_;
2440 };
2441
2442 #ifdef __APPLE__
2443 class OsxAclXattrHandler : public XattrHandler {
2444 public:
OsxAclXattrHandler()2445 OsxAclXattrHandler() {}
2446
setxattr(const Context & ctx,Inode ino,const char *,uint32_t,const char * value,size_t size,int)2447 uint8_t setxattr(const Context& ctx, Inode ino, const char *,
2448 uint32_t, const char *value, size_t size, int) override {
2449 static constexpr size_t kEmptyAclSize = 4;
2450 if (size <= kEmptyAclSize) {
2451 return LIZARDFS_ERROR_EINVAL;
2452 }
2453 RichACL result;
2454 try {
2455 AclCacheEntry cache_entry = acl_cache->get(clock_.now(), ino, ctx.uid, ctx.gid);
2456 result = osxAclConverter::extractAclObject((const uint8_t*)value, size);
2457 } catch (RequestException &e) {
2458 return e.lizardfs_error_code;
2459 } catch (Exception&) {
2460 return LIZARDFS_ERROR_EINVAL;
2461 }
2462 uint8_t status = LIZARDFS_STATUS_OK;
2463 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2464 fs_setacl(ino, ctx.uid, ctx.gid, result));
2465 eraseAclCache(ino);
2466 gDirEntryCache.lockAndInvalidateInode(ino);
2467 return status;
2468 }
2469
getxattr(const Context & ctx,Inode ino,const char *,uint32_t,int,uint32_t & valueLength,std::vector<uint8_t> & value)2470 uint8_t getxattr(const Context& ctx, Inode ino, const char *,
2471 uint32_t, int /*mode*/, uint32_t& valueLength, std::vector<uint8_t>& value) override {
2472 try {
2473 auto ts = clock_.now();
2474 AclCacheEntry cache_entry = acl_cache->get(ts, ino, ctx.uid, ctx.gid);
2475 if (cache_entry) {
2476 value = osxAclConverter::objectToOsxXattr(cache_entry->acl);
2477 valueLength = value.size();
2478 return LIZARDFS_STATUS_OK;
2479 } else {
2480 return LIZARDFS_ERROR_ENOATTR;
2481 }
2482 } catch (AclAcquisitionException& e) {
2483 sassert((e.status() != LIZARDFS_STATUS_OK) && (e.status() != LIZARDFS_ERROR_ENOATTR));
2484 return e.status();
2485 } catch (RequestException &e) {
2486 return e.lizardfs_error_code;
2487 } catch (Exception&) {
2488 lzfs_pretty_syslog(LOG_WARNING, "Failed to convert ACL to xattr, looks like a bug");
2489 return LIZARDFS_ERROR_IO;
2490 }
2491 valueLength = 0;
2492 }
2493
removexattr(const Context & ctx,Inode ino,const char *,uint32_t)2494 uint8_t removexattr(const Context& ctx, Inode ino, const char *,
2495 uint32_t) override {
2496 uint8_t status;
2497 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2498 fs_deletacl(ino, ctx.uid, ctx.gid, AclType::kRichACL));
2499 eraseAclCache(ino);
2500 return status;
2501 }
2502
2503 private:
2504 SteadyClock clock_;
2505 };
2506
2507 #endif
2508
2509 } // anonymous namespace
2510
2511 static PosixAclXattrHandler accessAclXattrHandler(AclType::kAccess);
2512 static PosixAclXattrHandler defaultAclXattrHandler(AclType::kDefault);
2513 static NFSAclXattrHandler nfsAclXattrHandler;
2514 static RichAclXattrHandler richAclXattrHandler;
2515 #ifdef __APPLE__
2516 static OsxAclXattrHandler osxAclXattrHandler;
2517 #endif
2518
2519 static ErrorXattrHandler enotsupXattrHandler(LIZARDFS_ERROR_ENOTSUP);
2520 static PlainXattrHandler plainXattrHandler;
2521
2522 static std::map<std::string, XattrHandler*> xattr_handlers = {
2523 {"system.posix_acl_access", &accessAclXattrHandler},
2524 {"system.posix_acl_default", &defaultAclXattrHandler},
2525 {"system.nfs4_acl", &nfsAclXattrHandler},
2526 {"system.richacl", &richAclXattrHandler},
2527 {"security.capability", &enotsupXattrHandler},
2528 #ifdef __APPLE__
2529 {"com.apple.system.Security", &osxAclXattrHandler},
2530 #endif
2531 };
2532
choose_xattr_handler(const char * name)2533 static XattrHandler* choose_xattr_handler(const char *name) {
2534 try {
2535 return xattr_handlers.at(name);
2536 } catch (std::out_of_range&) {
2537 return &plainXattrHandler;
2538 }
2539 }
2540
setxattr(const Context & ctx,Inode ino,const char * name,const char * value,size_t size,int flags,uint32_t position)2541 void setxattr(const Context &ctx, Inode ino, const char *name, const char *value,
2542 size_t size, int flags, uint32_t position) {
2543 uint32_t nleng;
2544 int status;
2545 uint8_t mode;
2546
2547
2548 stats_inc(OP_SETXATTR);
2549 if (debug_mode) {
2550 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d) ...",
2551 (unsigned long int)ino,
2552 name,
2553 (uint64_t)size,
2554 flags);
2555 }
2556 if (IS_SPECIAL_INODE(ino)) {
2557 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2558 (unsigned long int)ino,
2559 name,
2560 (uint64_t)size,
2561 flags,
2562 lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2563 throw RequestException(LIZARDFS_ERROR_EPERM);
2564 }
2565 if (size>MFS_XATTR_SIZE_MAX) {
2566 #if defined(__APPLE__)
2567 // Mac OS X returns E2BIG here
2568 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2569 (unsigned long int)ino,
2570 name,
2571 (uint64_t)size,
2572 flags,
2573 lizardfs_error_string(LIZARDFS_ERROR_E2BIG));
2574 throw RequestException(LIZARDFS_ERROR_E2BIG);
2575 #else
2576 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2577 (unsigned long int)ino,
2578 name,
2579 (uint64_t)size,
2580 flags,
2581 lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2582 throw RequestException(LIZARDFS_ERROR_ERANGE);
2583 #endif
2584 }
2585 nleng = strlen(name);
2586 if (nleng>MFS_XATTR_NAME_MAX) {
2587 #if defined(__APPLE__)
2588 // Mac OS X returns EPERM here
2589 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2590 (unsigned long int)ino,
2591 name,
2592 (uint64_t)size,
2593 flags,
2594 lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2595 throw RequestException(LIZARDFS_ERROR_EPERM);
2596 #else
2597 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2598 (unsigned long int)ino,
2599 name,
2600 (uint64_t)size,
2601 flags,
2602 lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2603 throw RequestException(LIZARDFS_ERROR_ERANGE);
2604 #endif
2605 }
2606 if (nleng==0) {
2607 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2608 (unsigned long int)ino,
2609 name,
2610 (uint64_t)size,
2611 flags,
2612 lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2613 throw RequestException(LIZARDFS_ERROR_EINVAL);
2614 }
2615 if (strcmp(name,"security.capability")==0) {
2616 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2617 (unsigned long int)ino,
2618 name,
2619 (uint64_t)size,
2620 flags,
2621 lizardfs_error_string(LIZARDFS_ERROR_ENOTSUP));
2622 throw RequestException(LIZARDFS_ERROR_ENOTSUP);
2623 }
2624 #if defined(XATTR_CREATE) && defined(XATTR_REPLACE)
2625 if ((flags&XATTR_CREATE) && (flags&XATTR_REPLACE)) {
2626 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2627 (unsigned long int)ino,
2628 name,
2629 (uint64_t)size,
2630 flags,
2631 lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2632 throw RequestException(LIZARDFS_ERROR_EINVAL);
2633 }
2634 mode = (flags==XATTR_CREATE)?XATTR_SMODE_CREATE_ONLY:(flags==XATTR_REPLACE)?XATTR_SMODE_REPLACE_ONLY:XATTR_SMODE_CREATE_OR_REPLACE;
2635 #else
2636 mode = 0;
2637 #endif
2638 (void)position;
2639 status = choose_xattr_handler(name)->setxattr(ctx, ino, name, nleng, value, size, mode);
2640 if (status != LIZARDFS_STATUS_OK) {
2641 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2642 (unsigned long int)ino,
2643 name,
2644 (uint64_t)size,
2645 flags,
2646 lizardfs_error_string(status));
2647 throw RequestException(status);
2648 }
2649 oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): OK",
2650 (unsigned long int)ino,
2651 name,
2652 (uint64_t)size,
2653 flags);
2654 }
2655
getxattr(const Context & ctx,Inode ino,const char * name,size_t size,uint32_t position)2656 XattrReply getxattr(const Context &ctx, Inode ino, const char *name, size_t size, uint32_t position) {
2657 uint32_t nleng;
2658 int status;
2659 uint8_t mode;
2660 std::vector<uint8_t> buffer;
2661 const uint8_t *buff;
2662 uint32_t leng;
2663
2664
2665 stats_inc(OP_GETXATTR);
2666 if (debug_mode) {
2667 oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 ") ...",
2668 (unsigned long int)ino,
2669 name,
2670 (uint64_t)size);
2671 }
2672 if (IS_SPECIAL_INODE(ino)) {
2673 oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2674 (unsigned long int)ino,
2675 name,
2676 (uint64_t)size,
2677 lizardfs_error_string(LIZARDFS_ERROR_ENODATA));
2678 throw RequestException(LIZARDFS_ERROR_ENODATA);
2679 }
2680 nleng = strlen(name);
2681 if (nleng>MFS_XATTR_NAME_MAX) {
2682 #if defined(__APPLE__)
2683 // Mac OS X returns EPERM here
2684 oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2685 (unsigned long int)ino,
2686 name,
2687 (uint64_t)size,
2688 lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2689 throw RequestException(LIZARDFS_ERROR_EPERM);
2690 #else
2691 oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2692 (unsigned long int)ino,
2693 name,
2694 (uint64_t)size,
2695 lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2696 throw RequestException(LIZARDFS_ERROR_ERANGE);
2697 #endif
2698 }
2699 if (nleng==0) {
2700 oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2701 (unsigned long int)ino,
2702 name,
2703 (uint64_t)size,
2704 lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2705 throw RequestException(LIZARDFS_ERROR_EINVAL);
2706 }
2707 if (strcmp(name,"security.capability")==0) {
2708 oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2709 (unsigned long int)ino,
2710 name,
2711 (uint64_t)size,
2712 lizardfs_error_string(LIZARDFS_ERROR_ENOTSUP));
2713 throw RequestException(LIZARDFS_ERROR_ENOTSUP);
2714 }
2715 if (size==0) {
2716 mode = XATTR_GMODE_LENGTH_ONLY;
2717 } else {
2718 mode = XATTR_GMODE_GET_DATA;
2719 }
2720 (void)position;
2721 status = choose_xattr_handler(name)->getxattr(ctx, ino, name, nleng, mode, leng, buffer);
2722 buff = buffer.data();
2723 if (status != LIZARDFS_STATUS_OK) {
2724 oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2725 (unsigned long int)ino,
2726 name,
2727 (uint64_t)size,
2728 lizardfs_error_string(status));
2729 throw RequestException(status);
2730 }
2731 if (size==0) {
2732 oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): OK (%" PRIu32 ")",
2733 (unsigned long int)ino,
2734 name,
2735 (uint64_t)size,
2736 leng);
2737 return XattrReply{leng, {}};
2738 } else {
2739 if (leng>size) {
2740 oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2741 (unsigned long int)ino,
2742 name,
2743 (uint64_t)size,
2744 lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2745 throw RequestException(LIZARDFS_ERROR_ERANGE);
2746 } else {
2747 oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): OK (%" PRIu32 ")",
2748 (unsigned long int)ino,
2749 name,
2750 (uint64_t)size,
2751 leng);
2752 return XattrReply{leng, std::vector<uint8_t>(buff, buff + leng)};
2753 }
2754 }
2755 }
2756
listxattr(const Context & ctx,Inode ino,size_t size)2757 XattrReply listxattr(const Context &ctx, Inode ino, size_t size) {
2758 const uint8_t *buff;
2759 uint32_t leng;
2760 int status;
2761 uint8_t mode;
2762
2763 stats_inc(OP_LISTXATTR);
2764 if (debug_mode) {
2765 oplog_printf(ctx, "listxattr (%lu,%" PRIu64 ") ...",
2766 (unsigned long int)ino,
2767 (uint64_t)size);
2768 }
2769 if (IS_SPECIAL_INODE(ino)) {
2770 oplog_printf(ctx, "listxattr (%lu,%" PRIu64 "): %s",
2771 (unsigned long int)ino,
2772 (uint64_t)size,
2773 lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2774 throw RequestException(LIZARDFS_ERROR_EPERM);
2775 }
2776 if (size==0) {
2777 mode = XATTR_GMODE_LENGTH_ONLY;
2778 } else {
2779 mode = XATTR_GMODE_GET_DATA;
2780 }
2781 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2782 fs_listxattr(ino,0,ctx.uid,ctx.gid,mode,&buff,&leng));
2783 if (status != LIZARDFS_STATUS_OK) {
2784 oplog_printf(ctx, "listxattr (%lu,%" PRIu64 "): %s",
2785 (unsigned long int)ino,
2786 (uint64_t)size,
2787 lizardfs_error_string(status));
2788 throw RequestException(status);
2789 }
2790 if (size==0) {
2791 oplog_printf(ctx, "listxattr (%lu,%" PRIu64 "): OK (%" PRIu32 ")",
2792 (unsigned long int)ino,
2793 (uint64_t)size,
2794 leng);
2795 return XattrReply{leng, {}};
2796 } else {
2797 if (leng>size) {
2798 oplog_printf(ctx, "listxattr (%lu,%" PRIu64 "): %s",
2799 (unsigned long int)ino,
2800 (uint64_t)size,
2801 lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2802 throw RequestException(LIZARDFS_ERROR_ERANGE);
2803 } else {
2804 oplog_printf(ctx, "listxattr (%lu,%" PRIu64 "): OK (%" PRIu32 ")",
2805 (unsigned long int)ino,
2806 (uint64_t)size,
2807 leng);
2808 return XattrReply{leng, std::vector<uint8_t>(buff, buff + leng)};
2809 }
2810 }
2811 }
2812
removexattr(const Context & ctx,Inode ino,const char * name)2813 void removexattr(const Context &ctx, Inode ino, const char *name) {
2814 uint32_t nleng;
2815 int status;
2816
2817 stats_inc(OP_REMOVEXATTR);
2818 if (debug_mode) {
2819 oplog_printf(ctx, "removexattr (%lu,%s) ...",
2820 (unsigned long int)ino,
2821 name);
2822 }
2823 if (IS_SPECIAL_INODE(ino)) {
2824 oplog_printf(ctx, "removexattr (%lu,%s): %s",
2825 (unsigned long int)ino,
2826 name,
2827 lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2828 throw RequestException(LIZARDFS_ERROR_EPERM);
2829 }
2830 nleng = strlen(name);
2831 if (nleng>MFS_XATTR_NAME_MAX) {
2832 #if defined(__APPLE__)
2833 // Mac OS X returns EPERM here
2834 oplog_printf(ctx, "removexattr (%lu,%s): %s",
2835 (unsigned long int)ino,
2836 name,
2837 lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2838 throw RequestException(LIZARDFS_ERROR_EPERM);
2839 #else
2840 oplog_printf(ctx, "removexattr (%lu,%s): %s",
2841 (unsigned long int)ino,
2842 name,
2843 lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2844 throw RequestException(LIZARDFS_ERROR_ERANGE);
2845 #endif
2846 }
2847 if (nleng==0) {
2848 oplog_printf(ctx, "removexattr (%lu,%s): %s",
2849 (unsigned long int)ino,
2850 name,
2851 lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2852 throw RequestException(LIZARDFS_ERROR_EINVAL);
2853 }
2854 status = choose_xattr_handler(name)->removexattr(ctx, ino, name, nleng);
2855 if (status != LIZARDFS_STATUS_OK) {
2856 oplog_printf(ctx, "removexattr (%lu,%s): %s",
2857 (unsigned long int)ino,
2858 name,
2859 lizardfs_error_string(status));
2860 throw RequestException(status);
2861 } else {
2862 oplog_printf(ctx, "removexattr (%lu,%s): OK",
2863 (unsigned long int)ino,
2864 name);
2865 }
2866 }
2867
flock_interrupt(const lzfs_locks::InterruptData & data)2868 void flock_interrupt(const lzfs_locks::InterruptData &data) {
2869 fs_flock_interrupt(data);
2870 }
2871
setlk_interrupt(const lzfs_locks::InterruptData & data)2872 void setlk_interrupt(const lzfs_locks::InterruptData &data) {
2873 fs_setlk_interrupt(data);
2874 }
2875
getlk(const Context & ctx,Inode ino,FileInfo * fi,struct lzfs_locks::FlockWrapper & lock)2876 void getlk(const Context &ctx, Inode ino, FileInfo* fi, struct lzfs_locks::FlockWrapper &lock) {
2877 uint32_t status;
2878
2879 stats_inc(OP_FLOCK);
2880 if (IS_SPECIAL_INODE(ino)) {
2881 if (debug_mode) {
2882 oplog_printf(ctx, "flock(ctx, %lu, fi): %s", (unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2883 }
2884 throw RequestException(LIZARDFS_ERROR_EINVAL);
2885 }
2886
2887 if (!fi) {
2888 if (debug_mode) {
2889 oplog_printf(ctx,"flock(ctx, %lu, fi): %s",(unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2890 }
2891 throw RequestException(LIZARDFS_ERROR_EINVAL);
2892 }
2893
2894 // communicate with master
2895 status = fs_getlk(ino, fi->lock_owner, lock);
2896
2897 if (status) {
2898 throw RequestException(status);
2899 }
2900 }
2901
setlk_send(const Context & ctx,Inode ino,FileInfo * fi,struct lzfs_locks::FlockWrapper & lock)2902 uint32_t setlk_send(const Context &ctx, Inode ino, FileInfo* fi, struct lzfs_locks::FlockWrapper &lock) {
2903 uint32_t reqid;
2904 uint32_t status;
2905
2906 stats_inc(OP_SETLK);
2907 if (IS_SPECIAL_INODE(ino)) {
2908 if (debug_mode) {
2909 oplog_printf(ctx, "flock(ctx, %lu, fi): %s", (unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2910 }
2911 throw RequestException(LIZARDFS_ERROR_EINVAL);
2912 }
2913
2914 if (!fi) {
2915 if (debug_mode) {
2916 oplog_printf(ctx,"flock(ctx, %lu, fi): %s",(unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2917 }
2918 throw RequestException(LIZARDFS_ERROR_EINVAL);
2919 }
2920
2921 finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
2922
2923 // increase flock_id counter
2924 lock_request_mutex.lock();
2925 reqid = lock_request_counter++;
2926 lock_request_mutex.unlock();
2927
2928 if (fileinfo != NULL) {
2929 PthreadMutexWrapper lock(fileinfo->lock);
2930 fileinfo->use_posixlocks = true;
2931 }
2932
2933 // communicate with master
2934 status = fs_setlk_send(ino, fi->lock_owner, reqid, lock);
2935
2936 if (status) {
2937 throw RequestException(status);
2938 }
2939
2940 return reqid;
2941 }
2942
setlk_recv()2943 void setlk_recv() {
2944 uint32_t status = fs_setlk_recv();
2945
2946 if (status) {
2947 throw RequestException(status);
2948 }
2949 }
2950
flock_send(const Context & ctx,Inode ino,FileInfo * fi,int op)2951 uint32_t flock_send(const Context &ctx, Inode ino, FileInfo* fi, int op) {
2952 uint32_t reqid;
2953 uint32_t status;
2954
2955 stats_inc(OP_FLOCK);
2956 if (IS_SPECIAL_INODE(ino)) {
2957 if (debug_mode) {
2958 oplog_printf(ctx, "flock(ctx, %lu, fi): %s", (unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2959 }
2960 throw RequestException(LIZARDFS_ERROR_EINVAL);
2961 }
2962
2963 if (!fi) {
2964 if (debug_mode) {
2965 oplog_printf(ctx,"flock(ctx, %lu, fi): %s",(unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2966 }
2967 throw RequestException(LIZARDFS_ERROR_EINVAL);
2968 }
2969
2970 finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
2971
2972 // increase flock_id counter
2973 lock_request_mutex.lock();
2974 reqid = lock_request_counter++;
2975 lock_request_mutex.unlock();
2976
2977 if (fileinfo != NULL) {
2978 PthreadMutexWrapper lock(fileinfo->lock);
2979 fileinfo->use_flocks = true;
2980 }
2981
2982 // communicate with master
2983 status = fs_flock_send(ino, fi->lock_owner, reqid, op);
2984
2985 if (status) {
2986 throw RequestException(status);
2987 }
2988
2989 return reqid;
2990 }
2991
flock_recv()2992 void flock_recv() {
2993 uint32_t status = fs_flock_recv();
2994
2995 if (status) {
2996 throw RequestException(status);
2997 }
2998 }
2999
makesnapshot(const Context & ctx,Inode ino,Inode dst_parent,const std::string & dst_name,bool can_overwrite)3000 JobId makesnapshot(const Context &ctx, Inode ino, Inode dst_parent, const std::string &dst_name,
3001 bool can_overwrite) {
3002 if (IS_SPECIAL_INODE(ino)) {
3003 oplog_printf(ctx, "makesnapshot (%lu, %lu, %s): %s",
3004 (unsigned long)ino, (unsigned long)dst_parent, dst_name.c_str(), strerr(EINVAL));
3005 throw RequestException(EINVAL);
3006 }
3007
3008 JobId job_id;
3009 uint8_t status;
3010 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
3011 fs_makesnapshot(ino, dst_parent, dst_name, ctx.uid, ctx.gid, can_overwrite, job_id));
3012 if (status != LIZARDFS_STATUS_OK) {
3013 throw RequestException(status);
3014 }
3015
3016 return job_id;
3017 }
3018
getgoal(const Context & ctx,Inode ino)3019 std::string getgoal(const Context &ctx, Inode ino) {
3020 if (IS_SPECIAL_INODE(ino)) {
3021 oplog_printf(ctx, "getgoal (%lu): %s",
3022 (unsigned long)ino, strerr(EINVAL));
3023 throw RequestException(EINVAL);
3024 }
3025
3026 std::string goal;
3027 uint8_t status = fs_getgoal(ino, goal);
3028 if (status != LIZARDFS_STATUS_OK) {
3029 throw RequestException(status);
3030 }
3031
3032 return goal;
3033 }
3034
setgoal(const Context & ctx,Inode ino,const std::string & goal_name,uint8_t smode)3035 void setgoal(const Context &ctx, Inode ino, const std::string &goal_name, uint8_t smode) {
3036 if (IS_SPECIAL_INODE(ino)) {
3037 oplog_printf(ctx, "setgoal (%lu, %s): %s",
3038 (unsigned long)ino, goal_name.c_str(), strerr(EINVAL));
3039 throw RequestException(EINVAL);
3040 }
3041
3042 uint8_t status = fs_setgoal(ino, ctx.uid, goal_name, smode);
3043 if (status != LIZARDFS_STATUS_OK) {
3044 throw RequestException(status);
3045 }
3046 }
3047
statfs(uint64_t * totalspace,uint64_t * availspace,uint64_t * trashspace,uint64_t * reservedspace,uint32_t * inodes)3048 void statfs(uint64_t *totalspace, uint64_t *availspace, uint64_t *trashspace, uint64_t *reservedspace, uint32_t *inodes) {
3049 fs_statfs(totalspace, availspace, trashspace, reservedspace, inodes);
3050 }
3051
getchunksinfo(const Context & ctx,Inode ino,uint32_t chunk_index,uint32_t chunk_count)3052 std::vector<ChunkWithAddressAndLabel> getchunksinfo(const Context &ctx, Inode ino,
3053 uint32_t chunk_index, uint32_t chunk_count) {
3054 if (IS_SPECIAL_INODE(ino)) {
3055 oplog_printf(ctx, "getchunksinfo (%lu, %u, %u): %s",
3056 (unsigned long)ino, (unsigned)chunk_index, (unsigned)chunk_count, strerr(EINVAL));
3057 throw RequestException(EINVAL);
3058 }
3059 std::vector<ChunkWithAddressAndLabel> chunks;
3060 uint8_t status;
3061 RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
3062 fs_getchunksinfo(ctx.uid, ctx.gid, ino, chunk_index, chunk_count, chunks));
3063 if (status != LIZARDFS_STATUS_OK) {
3064 throw RequestException(status);
3065 }
3066 return chunks;
3067 }
3068
getchunkservers()3069 std::vector<ChunkserverListEntry> getchunkservers() {
3070 std::vector<ChunkserverListEntry> chunkservers;
3071 uint8_t status = fs_getchunkservers(chunkservers);
3072 if (status != LIZARDFS_STATUS_OK) {
3073 throw RequestException(status);
3074 }
3075 return chunkservers;
3076 }
3077
init(int debug_mode_,int keep_cache_,double direntry_cache_timeout_,unsigned direntry_cache_size_,double entry_cache_timeout_,double attr_cache_timeout_,int mkdir_copy_sgid_,SugidClearMode sugid_clear_mode_,bool use_rwlock_,double acl_cache_timeout_,unsigned acl_cache_size_)3078 void init(int debug_mode_, int keep_cache_, double direntry_cache_timeout_, unsigned direntry_cache_size_,
3079 double entry_cache_timeout_, double attr_cache_timeout_, int mkdir_copy_sgid_,
3080 SugidClearMode sugid_clear_mode_, bool use_rwlock_,
3081 double acl_cache_timeout_, unsigned acl_cache_size_) {
3082 debug_mode = debug_mode_;
3083 keep_cache = keep_cache_;
3084 direntry_cache_timeout = direntry_cache_timeout_;
3085 entry_cache_timeout = entry_cache_timeout_;
3086 attr_cache_timeout = attr_cache_timeout_;
3087 mkdir_copy_sgid = mkdir_copy_sgid_;
3088 sugid_clear_mode = static_cast<decltype (sugid_clear_mode)>(sugid_clear_mode_);
3089 use_rwlock = use_rwlock_;
3090 uint64_t timeout = (uint64_t)(direntry_cache_timeout * 1000000);
3091 gDirEntryCache.setTimeout(timeout);
3092 gDirEntryCacheMaxSize = direntry_cache_size_;
3093 if (debug_mode) {
3094 lzfs::log_debug("cache parameters: file_keep_cache={} direntry_cache_timeout={:.2f}"
3095 " entry_cache_timeout={:.2f} attr_cache_timeout={:.2f}",
3096 (keep_cache==1)?"always":(keep_cache==2)?"never":"auto",
3097 direntry_cache_timeout, entry_cache_timeout, attr_cache_timeout);
3098 lzfs::log_debug("mkdir copy sgid={} sugid clear mode={}",
3099 mkdir_copy_sgid_, sugidClearModeString(sugid_clear_mode_));
3100 lzfs::log_debug("RW lock {}", use_rwlock ? "enabled" : "disabled");
3101 lzfs::log_debug("ACL acl_cache_timeout={:.2f}, acl_cache_size={}\n",
3102 acl_cache_timeout_, acl_cache_size_);
3103 }
3104 statsptr_init();
3105
3106 acl_cache.reset(new AclCache(
3107 std::chrono::milliseconds((int)(1000 * acl_cache_timeout_)),
3108 acl_cache_size_,
3109 getAcl));
3110
3111 gTweaks.registerVariable("DirectIO", gDirectIo);
3112 gTweaks.registerVariable("AclCacheMaxTime", acl_cache->maxTime_ms);
3113 gTweaks.registerVariable("AclCacheHit", acl_cache->cacheHit);
3114 gTweaks.registerVariable("AclCacheExpired", acl_cache->cacheExpired);
3115 gTweaks.registerVariable("AclCacheMiss", acl_cache->cacheMiss);
3116 }
3117
fs_init(FsInitParams & params)3118 void fs_init(FsInitParams ¶ms) {
3119 socketinit();
3120 mycrc32_init();
3121 int connection_ret = fs_init_master_connection(params);
3122 if (!params.delayed_init && connection_ret < 0) {
3123 lzfs_pretty_syslog(LOG_ERR, "Can't initialize connection with master server");
3124 socketrelease();
3125 throw std::runtime_error("Can't initialize connection with master server");
3126 }
3127 symlink_cache_init(params.symlink_cache_timeout_s);
3128 gGlobalIoLimiter();
3129 fs_init_threads(params.io_retries);
3130 masterproxy_init();
3131
3132 gLocalIoLimiter();
3133 try {
3134 IoLimitsConfigLoader loader;
3135 if (!params.io_limits_config_file.empty()) {
3136 loader.load(std::ifstream(params.io_limits_config_file.c_str()));
3137 }
3138 gMountLimiter().loadConfiguration(loader);
3139 } catch (Exception &ex) {
3140 lzfs_pretty_syslog(LOG_ERR, "Can't initialize I/O limiting: %s", ex.what());
3141 masterproxy_term();
3142 ::fs_term();
3143 symlink_cache_term();
3144 socketrelease();
3145 throw std::runtime_error("Can't initialize I/O limiting");
3146 }
3147
3148 read_data_init(params.io_retries,
3149 params.chunkserver_round_time_ms,
3150 params.chunkserver_connect_timeout_ms,
3151 params.chunkserver_wave_read_timeout_ms,
3152 params.total_read_timeout_ms,
3153 params.cache_expiration_time_ms,
3154 params.readahead_max_window_size_kB,
3155 params.prefetch_xor_stripes,
3156 std::max(params.bandwidth_overuse, 1.));
3157 write_data_init(params.write_cache_size, params.io_retries, params.write_workers,
3158 params.write_window_size, params.chunkserver_write_timeout_ms, params.cache_per_inode_percentage);
3159
3160 init(params.debug_mode, params.keep_cache, params.direntry_cache_timeout, params.direntry_cache_size,
3161 params.entry_cache_timeout, params.attr_cache_timeout, params.mkdir_copy_sgid,
3162 params.sugid_clear_mode, params.use_rw_lock,
3163 params.acl_cache_timeout, params.acl_cache_size);
3164 }
3165
fs_term()3166 void fs_term() {
3167 write_data_term();
3168 read_data_term();
3169 masterproxy_term();
3170 ::fs_term();
3171 symlink_cache_term();
3172 socketrelease();
3173 }
3174
3175 } // namespace LizardClient
3176