1 /*
2    Copyright 2005-2010 Jakub Kruszona-Zawadzki, Gemius SA, 2013-2014 EditShare, 2013-2015 Skytechnology sp. z o.o..
3 
4    This file was part of MooseFS and is part of LizardFS.
5 
6    LizardFS is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation, version 3.
9 
10    LizardFS is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with LizardFS  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "common/platform.h"
20 #include "mount/lizard_client.h"
21 
22 #include <assert.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <pthread.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include <cstdint>
31 #include <fstream>
32 #include <map>
33 #include <memory>
34 #include <new>
35 #include <string>
36 #include <vector>
37 
38 #include "common/access_control_list.h"
39 #include "common/acl_converter.h"
40 #include "common/acl_type.h"
41 #include "common/crc.h"
42 #include "common/datapack.h"
43 #include "common/errno_defs.h"
44 #include "common/lru_cache.h"
45 #include "common/mfserr.h"
46 #include "common/richacl_converter.h"
47 #include "common/slogger.h"
48 #include "common/sockets.h"
49 #include "common/special_inode_defs.h"
50 #include "common/time_utils.h"
51 #include "devtools/request_log.h"
52 #include "mount/acl_cache.h"
53 #include "mount/chunk_locator.h"
54 #include "mount/client_common.h"
55 #include "mount/direntry_cache.h"
56 #include "mount/g_io_limiters.h"
57 #include "mount/io_limit_group.h"
58 #include "mount/mastercomm.h"
59 #include "mount/masterproxy.h"
60 #include "mount/oplog.h"
61 #include "mount/readdata.h"
62 #include "mount/special_inode.h"
63 #include "mount/stats.h"
64 #include "mount/sugid_clear_mode_string.h"
65 #include "mount/symlinkcache.h"
66 #include "mount/tweaks.h"
67 #include "mount/writedata.h"
68 #include "protocol/MFSCommunication.h"
69 #include "protocol/matocl.h"
70 
71 #ifdef __APPLE__
72 #include "mount/osx_acl_converter.h"
73 #endif
74 
75 #include "mount/stat_defs.h" // !!! This must be last include. Do not move !!!
76 
77 namespace LizardClient {
78 
79 #define MAX_FILE_SIZE (int64_t)(MFS_MAX_FILE_SIZE)
80 
81 #define PKGVERSION \
82 		((LIZARDFS_PACKAGE_VERSION_MAJOR)*1000000 + \
83 		(LIZARDFS_PACKAGE_VERSION_MINOR)*1000 + \
84 		(LIZARDFS_PACKAGE_VERSION_MICRO))
85 
86 // #define MASTER_NAME ".master"
87 // #define MASTER_INODE 0x7FFFFFFF
88 // 0x01b6 == 0666
89 // static uint8_t masterattr[35]={'f', 0x01,0xB6, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,0,0,0,0,0,0,0};
90 
91 #define IS_SPECIAL_INODE(ino) ((ino)>=SPECIAL_INODE_BASE)
92 #define IS_SPECIAL_NAME(name) ((name)[0]=='.' && (strcmp(SPECIAL_FILE_NAME_STATS,(name))==0 \
93 		|| strcmp(SPECIAL_FILE_NAME_MASTERINFO,(name))==0 || strcmp(SPECIAL_FILE_NAME_OPLOG,(name))==0 \
94 		|| strcmp(SPECIAL_FILE_NAME_OPHISTORY,(name))==0 || strcmp(SPECIAL_FILE_NAME_TWEAKS,(name))==0 \
95 		|| strcmp(SPECIAL_FILE_NAME_FILE_BY_INODE,(name))==0))
96 
97 static GroupCache gGroupCache;
98 
99 static void update_credentials(Context::IdType index, const GroupCache::Groups &groups);
100 
101 #define RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, group_id, function_expression) \
102 		do { \
103 			const uint32_t kSecondaryGroupsBit = (uint32_t)1 << 31; \
104 			status = function_expression; \
105 			if (status == LIZARDFS_ERROR_GROUPNOTREGISTERED) { \
106 				uint32_t index = group_id ^ kSecondaryGroupsBit; \
107 				GroupCache::Groups groups = gGroupCache.findByIndex(index); \
108 				if (!groups.empty()) { \
109 					update_credentials(index, groups); \
110 					status = function_expression; \
111 				} \
112 			} \
113 		} while (0);
114 
updateGroups(Context & ctx)115 void updateGroups(Context &ctx) {
116 	static const uint32_t kSecondaryGroupsBit = (uint32_t)1 << 31;
117 
118 	if (ctx.gids.empty()) {
119 		return;
120 	}
121 
122 	if (ctx.gids.size() == 1) {
123 		ctx.gid = ctx.gids[0];
124 		return;
125 	}
126 
127 	static_assert(sizeof(Context::IdType) >= sizeof(uint32_t), "IdType too small");
128 
129 	auto result = gGroupCache.find(ctx.gids);
130 	Context::IdType gid = 0;
131 	if (result.found == false) {
132 		try {
133 			uint32_t index = gGroupCache.put(ctx.gids);
134 			update_credentials(index, ctx.gids);
135 			gid = index | kSecondaryGroupsBit;
136 		} catch (RequestException &e) {
137 			lzfs_pretty_syslog(LOG_ERR, "Cannot update groups: %d", e.system_error_code);
138 		}
139 	} else {
140 		gid = result.index | kSecondaryGroupsBit;
141 	}
142 
143 	ctx.gid = gid;
144 }
145 
getSpecialInodeByName(const char * name)146 Inode getSpecialInodeByName(const char *name) {
147 	assert(name);
148 
149 	while (name[0] == '/') {
150 		++name;
151 	}
152 
153 	if (strcmp(name, SPECIAL_FILE_NAME_MASTERINFO) == 0) {
154 		return SPECIAL_INODE_MASTERINFO;
155 	} else if (strcmp(name, SPECIAL_FILE_NAME_STATS) == 0) {
156 		return SPECIAL_INODE_STATS;
157 	} else if (strcmp(name, SPECIAL_FILE_NAME_TWEAKS) == 0) {
158 		return SPECIAL_INODE_TWEAKS;
159 	} else if (strcmp(name, SPECIAL_FILE_NAME_OPLOG) == 0) {
160 		return SPECIAL_INODE_OPLOG;
161 	} else if (strcmp(name, SPECIAL_FILE_NAME_OPHISTORY) == 0) {
162 		return SPECIAL_INODE_OPHISTORY;
163 	} else if (strcmp(name, SPECIAL_FILE_NAME_FILE_BY_INODE) == 0) {
164 		return SPECIAL_INODE_FILE_BY_INODE;
165 	} else {
166 		return MAX_REGULAR_INODE;
167 	}
168 }
169 
isSpecialInode(Inode ino)170 bool isSpecialInode(Inode ino) {
171 	return IS_SPECIAL_INODE(ino);
172 }
173 
174 enum {IO_NONE,IO_READ,IO_WRITE,IO_READONLY,IO_WRITEONLY};
175 
176 typedef struct _finfo {
177 	uint8_t mode;
178 	void *data;
179 	uint8_t use_flocks;
180 	uint8_t use_posixlocks;
181 	pthread_mutex_t lock;
182 	pthread_mutex_t flushlock;
183 } finfo;
184 
185 static DirEntryCache gDirEntryCache;
186 static unsigned gDirEntryCacheMaxSize = 100000;
187 
188 static int debug_mode = 0;
189 static int usedircache = 1;
190 static int keep_cache = 0;
191 static double direntry_cache_timeout = 0.1;
192 static double entry_cache_timeout = 0.0;
193 static double attr_cache_timeout = 0.1;
194 static int mkdir_copy_sgid = 0;
195 static int sugid_clear_mode = 0;
196 bool use_rwlock = 0;
197 static std::atomic<bool> gDirectIo(false);
198 
199 // lock_request_counter shared by flock and setlk
200 static uint32_t lock_request_counter = 0;
201 static std::mutex lock_request_mutex;
202 
203 
204 static std::unique_ptr<AclCache> acl_cache;
205 
eraseAclCache(Inode inode)206 inline void eraseAclCache(Inode inode) {
207 	acl_cache->erase(
208 			inode    , 0, 0,
209 			inode + 1, 0, 0);
210 }
211 
212 // TODO consider making oplog_printf asynchronous
213 
214 /**
215  * A wrapper around pthread_rwlock, acquiring a lock during construction and releasing it during
216  * destruction in case if the lock wasn't released beforehand.
217  */
218 struct PthreadRwLockWrapper {
PthreadRwLockWrapperLizardClient::PthreadRwLockWrapper219 	PthreadRwLockWrapper(pthread_rwlock_t& mutex, bool write = true)
220 		: rwlock_(mutex), locked_(false) {
221 		lock(write);
222 	}
223 
~PthreadRwLockWrapperLizardClient::PthreadRwLockWrapper224 	~PthreadRwLockWrapper() {
225 		if (locked_) {
226 			unlock();
227 		}
228 	}
229 
lockLizardClient::PthreadRwLockWrapper230 	void lock(bool write = true) {
231 		sassert(!locked_);
232 		if (write) {
233 			pthread_rwlock_wrlock(&rwlock_);
234 		} else {
235 			pthread_rwlock_rdlock(&rwlock_);
236 		}
237 		locked_ = true;
238 	}
unlockLizardClient::PthreadRwLockWrapper239 	void unlock() {
240 		sassert(locked_);
241 		locked_ = false;
242 		pthread_rwlock_unlock(&rwlock_);
243 	}
244 
245 private:
246 	pthread_rwlock_t& rwlock_;
247 	bool locked_;
248 };
249 
250 static uint64_t *statsptr[STATNODES];
251 
statsptr_init(void)252 void statsptr_init(void) {
253 	void *s;
254 	s = stats_get_subnode(NULL,"fuse_ops",0);
255 	statsptr[OP_SETXATTR] = stats_get_counterptr(stats_get_subnode(s,"setxattr",0));
256 	statsptr[OP_GETXATTR] = stats_get_counterptr(stats_get_subnode(s,"getxattr",0));
257 	statsptr[OP_LISTXATTR] = stats_get_counterptr(stats_get_subnode(s,"listxattr",0));
258 	statsptr[OP_REMOVEXATTR] = stats_get_counterptr(stats_get_subnode(s,"removexattr",0));
259 	statsptr[OP_FSYNC] = stats_get_counterptr(stats_get_subnode(s,"fsync",0));
260 	statsptr[OP_FLUSH] = stats_get_counterptr(stats_get_subnode(s,"flush",0));
261 	statsptr[OP_WRITE] = stats_get_counterptr(stats_get_subnode(s,"write",0));
262 	statsptr[OP_READ] = stats_get_counterptr(stats_get_subnode(s,"read",0));
263 	statsptr[OP_RELEASE] = stats_get_counterptr(stats_get_subnode(s,"release",0));
264 	statsptr[OP_OPEN] = stats_get_counterptr(stats_get_subnode(s,"open",0));
265 	statsptr[OP_CREATE] = stats_get_counterptr(stats_get_subnode(s,"create",0));
266 	statsptr[OP_RELEASEDIR] = stats_get_counterptr(stats_get_subnode(s,"releasedir",0));
267 	statsptr[OP_READDIR] = stats_get_counterptr(stats_get_subnode(s,"readdir",0));
268 	statsptr[OP_READRESERVED] = stats_get_counterptr(stats_get_subnode(s,"readreserved",0));
269 	statsptr[OP_READTRASH] = stats_get_counterptr(stats_get_subnode(s,"readtrash",0));
270 	statsptr[OP_OPENDIR] = stats_get_counterptr(stats_get_subnode(s,"opendir",0));
271 	statsptr[OP_LINK] = stats_get_counterptr(stats_get_subnode(s,"link",0));
272 	statsptr[OP_RENAME] = stats_get_counterptr(stats_get_subnode(s,"rename",0));
273 	statsptr[OP_READLINK] = stats_get_counterptr(stats_get_subnode(s,"readlink",0));
274 	statsptr[OP_READLINK_CACHED] = stats_get_counterptr(stats_get_subnode(s,"readlink-cached",0));
275 	statsptr[OP_SYMLINK] = stats_get_counterptr(stats_get_subnode(s,"symlink",0));
276 	statsptr[OP_RMDIR] = stats_get_counterptr(stats_get_subnode(s,"rmdir",0));
277 	statsptr[OP_MKDIR] = stats_get_counterptr(stats_get_subnode(s,"mkdir",0));
278 	statsptr[OP_UNLINK] = stats_get_counterptr(stats_get_subnode(s,"unlink",0));
279 	statsptr[OP_UNDEL] = stats_get_counterptr(stats_get_subnode(s,"undel",0));
280 	statsptr[OP_MKNOD] = stats_get_counterptr(stats_get_subnode(s,"mknod",0));
281 	statsptr[OP_SETATTR] = stats_get_counterptr(stats_get_subnode(s,"setattr",0));
282 	statsptr[OP_GETATTR] = stats_get_counterptr(stats_get_subnode(s,"getattr",0));
283 	statsptr[OP_DIRCACHE_GETATTR] = stats_get_counterptr(stats_get_subnode(s,"getattr-cached",0));
284 	statsptr[OP_LOOKUP] = stats_get_counterptr(stats_get_subnode(s,"lookup",0));
285 	statsptr[OP_LOOKUP_INTERNAL] = stats_get_counterptr(stats_get_subnode(s,"lookup-internal",0));
286 	if (usedircache) {
287 		statsptr[OP_DIRCACHE_LOOKUP] = stats_get_counterptr(stats_get_subnode(s,"lookup-cached",0));
288 	}
289 	statsptr[OP_ACCESS] = stats_get_counterptr(stats_get_subnode(s,"access",0));
290 	statsptr[OP_STATFS] = stats_get_counterptr(stats_get_subnode(s,"statfs",0));
291 	if (usedircache) {
292 		statsptr[OP_GETDIR_FULL] = stats_get_counterptr(stats_get_subnode(s,"getdir-full",0));
293 	} else {
294 		statsptr[OP_GETDIR_SMALL] = stats_get_counterptr(stats_get_subnode(s,"getdir-small",0));
295 	}
296 	statsptr[OP_GETLK] = stats_get_counterptr(stats_get_subnode(s,"getlk",0));
297 	statsptr[OP_SETLK] = stats_get_counterptr(stats_get_subnode(s,"setlk",0));
298 	statsptr[OP_FLOCK] = stats_get_counterptr(stats_get_subnode(s,"flock",0));
299 }
300 
stats_inc(uint8_t id)301 void stats_inc(uint8_t id) {
302 	if (id < STATNODES) {
303 		stats_lock();
304 		(*statsptr[id])++;
305 		stats_unlock();
306 	}
307 }
308 
type_to_stat(uint32_t inode,uint8_t type,struct stat * stbuf)309 void type_to_stat(uint32_t inode,uint8_t type, struct stat *stbuf) {
310 	memset(stbuf,0,sizeof(struct stat));
311 	stbuf->st_ino = inode;
312 	switch (type) {
313 	case TYPE_DIRECTORY:
314 		stbuf->st_mode = S_IFDIR;
315 		break;
316 	case TYPE_SYMLINK:
317 		stbuf->st_mode = S_IFLNK;
318 		break;
319 	case TYPE_FILE:
320 		stbuf->st_mode = S_IFREG;
321 		break;
322 	case TYPE_FIFO:
323 		stbuf->st_mode = S_IFIFO;
324 		break;
325 	case TYPE_SOCKET:
326 		stbuf->st_mode = S_IFSOCK;
327 		break;
328 	case TYPE_BLOCKDEV:
329 		stbuf->st_mode = S_IFBLK;
330 		break;
331 	case TYPE_CHARDEV:
332 		stbuf->st_mode = S_IFCHR;
333 		break;
334 	default:
335 		stbuf->st_mode = 0;
336 	}
337 }
338 
attr_get_mattr(const Attributes & attr)339 uint8_t attr_get_mattr(const Attributes &attr) {
340 	return (attr[1]>>4);    // higher 4 bits of mode
341 }
342 
attr_to_stat(uint32_t inode,const Attributes & attr,struct stat * stbuf)343 void attr_to_stat(uint32_t inode, const Attributes &attr, struct stat *stbuf) {
344 	uint16_t attrmode;
345 	uint8_t attrtype;
346 	uint32_t attruid,attrgid,attratime,attrmtime,attrctime,attrnlink,attrrdev;
347 	uint64_t attrlength;
348 	const uint8_t *ptr;
349 	ptr = attr.data();
350 	attrtype = get8bit(&ptr);
351 	attrmode = get16bit(&ptr);
352 	attruid = get32bit(&ptr);
353 	attrgid = get32bit(&ptr);
354 	attratime = get32bit(&ptr);
355 	attrmtime = get32bit(&ptr);
356 	attrctime = get32bit(&ptr);
357 	attrnlink = get32bit(&ptr);
358 	memset(stbuf, 0, sizeof(*stbuf));
359 	stbuf->st_ino = inode;
360 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLKSIZE
361 	stbuf->st_blksize = MFSBLOCKSIZE;
362 #endif
363 	switch (attrtype) {
364 	case TYPE_DIRECTORY:
365 		stbuf->st_mode = S_IFDIR | (attrmode & 07777);
366 		attrlength = get64bit(&ptr);
367 		stbuf->st_size = attrlength;
368 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
369 		stbuf->st_blocks = (attrlength+511)/512;
370 #endif
371 		break;
372 	case TYPE_SYMLINK:
373 		stbuf->st_mode = S_IFLNK | (attrmode & 07777);
374 		attrlength = get64bit(&ptr);
375 		stbuf->st_size = attrlength;
376 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
377 		stbuf->st_blocks = (attrlength+511)/512;
378 #endif
379 		break;
380 	case TYPE_FILE:
381 		stbuf->st_mode = S_IFREG | (attrmode & 07777);
382 		attrlength = get64bit(&ptr);
383 		stbuf->st_size = attrlength;
384 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
385 		stbuf->st_blocks = (attrlength+511)/512;
386 #endif
387 		break;
388 	case TYPE_FIFO:
389 		stbuf->st_mode = S_IFIFO | (attrmode & 07777);
390 		stbuf->st_size = 0;
391 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
392 		stbuf->st_blocks = 0;
393 #endif
394 		break;
395 	case TYPE_SOCKET:
396 		stbuf->st_mode = S_IFSOCK | (attrmode & 07777);
397 		stbuf->st_size = 0;
398 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
399 		stbuf->st_blocks = 0;
400 #endif
401 		break;
402 	case TYPE_BLOCKDEV:
403 		stbuf->st_mode = S_IFBLK | (attrmode & 07777);
404 		attrrdev = get32bit(&ptr);
405 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_RDEV
406 		stbuf->st_rdev = attrrdev;
407 #endif
408 		stbuf->st_size = 0;
409 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
410 		stbuf->st_blocks = 0;
411 #endif
412 		break;
413 	case TYPE_CHARDEV:
414 		stbuf->st_mode = S_IFCHR | (attrmode & 07777);
415 		attrrdev = get32bit(&ptr);
416 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_RDEV
417 		stbuf->st_rdev = attrrdev;
418 #endif
419 		stbuf->st_size = 0;
420 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BLOCKS
421 		stbuf->st_blocks = 0;
422 #endif
423 		break;
424 	default:
425 		stbuf->st_mode = 0;
426 	}
427 	stbuf->st_uid = attruid;
428 	stbuf->st_gid = attrgid;
429 	stbuf->st_atime = attratime;
430 	stbuf->st_mtime = attrmtime;
431 	stbuf->st_ctime = attrctime;
432 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_BIRTHTIME
433 	stbuf->st_birthtime = attrctime;        // for future use
434 #endif
435 	stbuf->st_nlink = attrnlink;
436 }
437 
makemodestr(char modestr[11],uint16_t mode)438 void makemodestr(char modestr[11],uint16_t mode) {
439 	uint32_t i;
440 	strcpy(modestr,"?rwxrwxrwx");
441 	switch (mode & S_IFMT) {
442 	case S_IFSOCK:
443 		modestr[0] = 's';
444 		break;
445 	case S_IFLNK:
446 		modestr[0] = 'l';
447 		break;
448 	case S_IFREG:
449 		modestr[0] = '-';
450 		break;
451 	case S_IFBLK:
452 		modestr[0] = 'b';
453 		break;
454 	case S_IFDIR:
455 		modestr[0] = 'd';
456 		break;
457 	case S_IFCHR:
458 		modestr[0] = 'c';
459 		break;
460 	case S_IFIFO:
461 		modestr[0] = 'f';
462 		break;
463 	}
464 	if (mode & S_ISUID) {
465 		modestr[3] = 's';
466 	}
467 	if (mode & S_ISGID) {
468 		modestr[6] = 's';
469 	}
470 	if (mode & S_ISVTX) {
471 		modestr[9] = 't';
472 	}
473 	for (i=0 ; i<9 ; i++) {
474 		if ((mode & (1<<i))==0) {
475 			if (modestr[9-i]=='s' || modestr[9-i]=='t') {
476 				modestr[9-i]&=0xDF;
477 			} else {
478 				modestr[9-i]='-';
479 			}
480 		}
481 	}
482 }
483 
makeattrstr(char * buff,uint32_t size,struct stat * stbuf)484 void makeattrstr(char *buff,uint32_t size,struct stat *stbuf) {
485 	char modestr[11];
486 	makemodestr(modestr,stbuf->st_mode);
487 #ifdef LIZARDFS_HAVE_STRUCT_STAT_ST_RDEV
488 	if (modestr[0]=='b' || modestr[0]=='c') {
489 		snprintf(buff,size,"[%s:0%06o,%u,%ld,%ld,%lu,%lu,%lu,%" PRIu64 ",%08lX]",modestr,(unsigned int)(stbuf->st_mode),(unsigned int)(stbuf->st_nlink),(long int)stbuf->st_uid,(long int)stbuf->st_gid,(unsigned long int)(stbuf->st_atime),(unsigned long int)(stbuf->st_mtime),(unsigned long int)(stbuf->st_ctime),(uint64_t)(stbuf->st_size),(unsigned long int)(stbuf->st_rdev));
490 	} else {
491 		snprintf(buff,size,"[%s:0%06o,%u,%ld,%ld,%lu,%lu,%lu,%" PRIu64 "]",modestr,(unsigned int)(stbuf->st_mode),(unsigned int)(stbuf->st_nlink),(long int)stbuf->st_uid,(long int)stbuf->st_gid,(unsigned long int)(stbuf->st_atime),(unsigned long int)(stbuf->st_mtime),(unsigned long int)(stbuf->st_ctime),(uint64_t)(stbuf->st_size));
492 	}
493 #else
494 	snprintf(buff,size,"[%s:0%06o,%u,%ld,%ld,%lu,%lu,%lu,%" PRIu64 "]",modestr,(unsigned int)(stbuf->st_mode),(unsigned int)(stbuf->st_nlink),(long int)stbuf->st_uid,(long int)stbuf->st_gid,(unsigned long int)(stbuf->st_atime),(unsigned long int)(stbuf->st_mtime),(unsigned long int)(stbuf->st_ctime),(uint64_t)(stbuf->st_size));
495 #endif
496 }
497 
RequestException(int error_code)498 RequestException::RequestException(int error_code) : system_error_code(), lizardfs_error_code() {
499 	assert(error_code != LIZARDFS_STATUS_OK);
500 
501 	lizardfs_error_code = error_code;
502 	system_error_code = lizardfs_error_conv(error_code);
503 	if (debug_mode) {
504 		lzfs::log_debug("status: {}", lizardfs_error_string(error_code));
505 	}
506 }
507 
statfs(const Context & ctx,Inode ino)508 struct statvfs statfs(const Context &ctx, Inode ino) {
509 	uint64_t totalspace,availspace,trashspace,reservedspace;
510 	uint32_t inodes;
511 	uint32_t bsize;
512 	struct statvfs stfsbuf;
513 	memset(&stfsbuf,0,sizeof(stfsbuf));
514 
515 	stats_inc(OP_STATFS);
516 	if (debug_mode) {
517 		oplog_printf(ctx, "statfs (%lu)", (unsigned long int)ino);
518 	}
519 	(void)ino;
520 	fs_statfs(&totalspace,&availspace,&trashspace,&reservedspace,&inodes);
521 
522 #if defined(__APPLE__)
523 	if (totalspace>0x0001000000000000ULL) {
524 		bsize = 0x20000;
525 	} else {
526 		bsize = 0x10000;
527 	}
528 #else
529 	bsize = 0x10000;
530 #endif
531 
532 	stfsbuf.f_namemax = MFS_NAME_MAX;
533 	stfsbuf.f_frsize = bsize;
534 	stfsbuf.f_bsize = bsize;
535 #if defined(__APPLE__)
536 	// FUSE on apple (or other parts of kernel) expects 32-bit values, so it's better to saturate this values than let being cut on 32-bit
537 	// can't change bsize also because 64k seems to be the biggest acceptable value for bsize
538 
539 	if (totalspace/bsize>0xFFFFFFFFU) {
540 		stfsbuf.f_blocks = 0xFFFFFFFFU;
541 	} else {
542 		stfsbuf.f_blocks = totalspace/bsize;
543 	}
544 	if (availspace/bsize>0xFFFFFFFFU) {
545 		stfsbuf.f_bfree = 0xFFFFFFFFU;
546 		stfsbuf.f_bavail = 0xFFFFFFFFU;
547 	} else {
548 		stfsbuf.f_bfree = availspace/bsize;
549 		stfsbuf.f_bavail = availspace/bsize;
550 	}
551 #else
552 	stfsbuf.f_blocks = totalspace/bsize;
553 	stfsbuf.f_bfree = availspace/bsize;
554 	stfsbuf.f_bavail = availspace/bsize;
555 #endif
556 	stfsbuf.f_files = MAX_REGULAR_INODE;
557 	stfsbuf.f_ffree = MAX_REGULAR_INODE - inodes;
558 	stfsbuf.f_favail = MAX_REGULAR_INODE - inodes;
559 	//stfsbuf.f_flag = ST_RDONLY;
560 	oplog_printf(ctx, "statfs (%lu): OK (%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu32 ")",
561 			(unsigned long int)ino,
562 			totalspace,
563 			availspace,
564 			trashspace,
565 			reservedspace,
566 			inodes);
567 	return stfsbuf;
568 }
569 
access(const Context & ctx,Inode ino,int mask)570 void access(const Context &ctx, Inode ino, int mask) {
571 	int status;
572 
573 	int mmode;
574 
575 	oplog_printf(ctx, "access (%lu,0x%X)",
576 			(unsigned long int)ino,
577 			mask);
578 	stats_inc(OP_ACCESS);
579 #if (R_OK==MODE_MASK_R) && (W_OK==MODE_MASK_W) && (X_OK==MODE_MASK_X)
580 	mmode = mask & (MODE_MASK_R | MODE_MASK_W | MODE_MASK_X);
581 #else
582 	mmode = 0;
583 	if (mask & R_OK) {
584 		mmode |= MODE_MASK_R;
585 	}
586 	if (mask & W_OK) {
587 		mmode |= MODE_MASK_W;
588 	}
589 	if (mask & X_OK) {
590 		mmode |= MODE_MASK_X;
591 	}
592 #endif
593 	if (IS_SPECIAL_INODE(ino)) {
594 		if (mask & (W_OK | X_OK)) {
595 			throw RequestException(LIZARDFS_ERROR_EACCES);
596 		}
597 		return;
598 	}
599 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
600 		fs_access(ino,ctx.uid,ctx.gid,mmode));
601 	if (status != LIZARDFS_STATUS_OK) {
602 		throw RequestException(status);
603 	}
604 }
605 
lookup(const Context & ctx,Inode parent,const char * name)606 EntryParam lookup(const Context &ctx, Inode parent, const char *name) {
607 	EntryParam e;
608 	uint64_t maxfleng;
609 	uint32_t inode;
610 	uint32_t nleng;
611 	Attributes attr;
612 	char attrstr[256];
613 	uint8_t mattr;
614 	uint8_t icacheflag;
615 	int status;
616 
617 	if (debug_mode) {
618 		oplog_printf(ctx, "lookup (%lu,%s) ...", (unsigned long int)parent, name);
619 	}
620 	nleng = strlen(name);
621 	if (nleng > MFS_NAME_MAX) {
622 		stats_inc(OP_LOOKUP);
623 		oplog_printf(ctx, "lookup (%lu,%s): %s",
624 				(unsigned long int)parent,
625 				name,
626 				lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
627 		throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
628 	}
629 	if (parent == SPECIAL_INODE_ROOT) {
630 		if (nleng == 2 && name[0] == '.' && name[1] == '.') {
631 			nleng = 1;
632 		}
633 
634 		Inode ino = getSpecialInodeByName(name);
635 		if (IS_SPECIAL_INODE(ino)) {
636 			return special_lookup(ino, ctx, parent, name, attrstr);
637 		}
638 	}
639 	if (parent == SPECIAL_INODE_FILE_BY_INODE) {
640 		char *endptr = nullptr;
641 		inode = strtol(name, &endptr, 10);
642 		if (endptr == nullptr || *endptr != '\0') {
643 			throw RequestException(LIZARDFS_ERROR_EINVAL);
644 		}
645 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
646 			fs_getattr(inode, ctx.uid, ctx.gid, attr));
647 		icacheflag = 0;
648 	} else if (usedircache && gDirEntryCache.lookup(ctx,parent,std::string(name,nleng),inode,attr)) {
649 		if (debug_mode) {
650 			lzfs::log_debug("lookup: sending data from dircache");
651 		}
652 		stats_inc(OP_DIRCACHE_LOOKUP);
653 		status = 0;
654 		icacheflag = 1;
655 //              oplog_printf(ctx, "lookup (%lu,%s) (using open dir cache): OK (%lu)",(unsigned long int)parent,name,(unsigned long int)inode);
656 	} else {
657 		stats_inc(OP_LOOKUP);
658 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
659 		fs_lookup(parent, std::string(name, nleng), ctx.uid, ctx.gid, &inode, attr));
660 		icacheflag = 0;
661 	}
662 	if (status != LIZARDFS_STATUS_OK) {
663 		oplog_printf(ctx, "lookup (%lu,%s): %s",
664 				(unsigned long int)parent,
665 				name,
666 				lizardfs_error_string(status));
667 		throw RequestException(status);
668 	}
669 	if (attr[0]==TYPE_FILE) {
670 		maxfleng = write_data_getmaxfleng(inode);
671 	} else {
672 		maxfleng = 0;
673 	}
674 	e.ino = inode;
675 	mattr = attr_get_mattr(attr);
676 	e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
677 	e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:((attr[0]==TYPE_DIRECTORY)?direntry_cache_timeout:entry_cache_timeout);
678 	attr_to_stat(inode,attr,&e.attr);
679 	if (maxfleng>(uint64_t)(e.attr.st_size)) {
680 		e.attr.st_size=maxfleng;
681 	}
682 	makeattrstr(attrstr,256,&e.attr);
683 	oplog_printf(ctx, "lookup (%lu,%s)%s: OK (%.1f,%lu,%.1f,%s)",
684 			(unsigned long int)parent,
685 			name,
686 			icacheflag?" (using open dir cache)":"",
687 			e.entry_timeout,
688 			(unsigned long int)e.ino,
689 			e.attr_timeout,
690 			attrstr);
691 	return e;
692 }
693 
getattr(const Context & ctx,Inode ino)694 AttrReply getattr(const Context &ctx, Inode ino) {
695 	uint64_t maxfleng;
696 	double attr_timeout;
697 	struct stat o_stbuf;
698 	Attributes attr;
699 	char attrstr[256];
700 	int status;
701 
702 	if (debug_mode) {
703 		oplog_printf(ctx, "getattr (%lu) ...", (unsigned long int)ino);
704 	}
705 
706 	if (IS_SPECIAL_INODE(ino)) {
707 		return special_getattr(ino, ctx, attrstr);
708 	}
709 
710 	maxfleng = write_data_getmaxfleng(ino);
711 	if (usedircache && gDirEntryCache.lookup(ctx,ino,attr)) {
712 		if (debug_mode) {
713 			lzfs::log_debug("getattr: sending data from dircache\n");
714 		}
715 		stats_inc(OP_DIRCACHE_GETATTR);
716 		status = LIZARDFS_STATUS_OK;
717 	} else {
718 		stats_inc(OP_GETATTR);
719 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
720 		fs_getattr(ino,ctx.uid,ctx.gid,attr));
721 	}
722 	if (status != LIZARDFS_STATUS_OK) {
723 		oplog_printf(ctx, "getattr (%lu): %s",
724 				(unsigned long int)ino,
725 				lizardfs_error_string(status));
726 		throw RequestException(status);
727 	}
728 	memset(&o_stbuf, 0, sizeof(struct stat));
729 	attr_to_stat(ino,attr,&o_stbuf);
730 	if (attr[0]==TYPE_FILE && maxfleng>(uint64_t)(o_stbuf.st_size)) {
731 		o_stbuf.st_size=maxfleng;
732 	}
733 	attr_timeout = (attr_get_mattr(attr)&MATTR_NOACACHE)?0.0:attr_cache_timeout;
734 	makeattrstr(attrstr,256,&o_stbuf);
735 	oplog_printf(ctx, "getattr (%lu): OK (%.1f,%s)",
736 			(unsigned long int)ino,
737 			attr_timeout,
738 			attrstr);
739 	return AttrReply{o_stbuf, attr_timeout};
740 }
741 
setattr(const Context & ctx,Inode ino,struct stat * stbuf,int to_set)742 AttrReply setattr(const Context &ctx, Inode ino, struct stat *stbuf, int to_set) {
743 	struct stat o_stbuf;
744 	uint64_t maxfleng;
745 	Attributes attr;
746 	char modestr[11];
747 	char attrstr[256];
748 	double attr_timeout;
749 	int status;
750 
751 	makemodestr(modestr,stbuf->st_mode);
752 	stats_inc(OP_SETATTR);
753 	if (debug_mode) {
754 		oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]) ...",
755 			(unsigned long int)ino,
756 			to_set,
757 			modestr+1,
758 			(unsigned int)(stbuf->st_mode & 07777),
759 			(long int)stbuf->st_uid,
760 			(long int)stbuf->st_gid,
761 			(unsigned long int)(stbuf->st_atime),
762 			(unsigned long int)(stbuf->st_mtime),
763 			(uint64_t)(stbuf->st_size));
764 	}
765 
766 	if (IS_SPECIAL_INODE(ino)) {
767 		return special_setattr(ino, ctx, stbuf, to_set, modestr, attrstr);
768 	}
769 
770 	status = LIZARDFS_ERROR_EINVAL;
771 	maxfleng = write_data_getmaxfleng(ino);
772 	if ((to_set & (LIZARDFS_SET_ATTR_MODE
773 			| LIZARDFS_SET_ATTR_UID
774 			| LIZARDFS_SET_ATTR_GID
775 			| LIZARDFS_SET_ATTR_ATIME
776 			| LIZARDFS_SET_ATTR_ATIME_NOW
777 			| LIZARDFS_SET_ATTR_MTIME
778 			| LIZARDFS_SET_ATTR_MTIME_NOW
779 			| LIZARDFS_SET_ATTR_SIZE)) == 0) { // change other flags or change nothing
780 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
781 		fs_setattr(ino,ctx.uid,ctx.gid,0,0,0,0,0,0,0,attr));    // ext3 compatibility - change ctime during this operation (usually chown(-1,-1))
782 		if (status != LIZARDFS_STATUS_OK) {
783 			oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
784 					(unsigned long int)ino,
785 					to_set,
786 					modestr+1,
787 					(unsigned int)(stbuf->st_mode & 07777),
788 					(long int)stbuf->st_uid,
789 					(long int)stbuf->st_gid,
790 					(unsigned long int)(stbuf->st_atime),
791 					(unsigned long int)(stbuf->st_mtime),
792 					(uint64_t)(stbuf->st_size),
793 					lizardfs_error_string(status));
794 			throw RequestException(status);
795 		}
796 	}
797 	if (to_set & (LIZARDFS_SET_ATTR_MODE
798 			| LIZARDFS_SET_ATTR_UID
799 			| LIZARDFS_SET_ATTR_GID
800 			| LIZARDFS_SET_ATTR_ATIME
801 			| LIZARDFS_SET_ATTR_MTIME
802 			| LIZARDFS_SET_ATTR_ATIME_NOW
803 			| LIZARDFS_SET_ATTR_MTIME_NOW)) {
804 		uint8_t setmask = 0;
805 		if (to_set & LIZARDFS_SET_ATTR_MODE) {
806 			setmask |= SET_MODE_FLAG;
807 		}
808 		if (to_set & LIZARDFS_SET_ATTR_UID) {
809 			setmask |= SET_UID_FLAG;
810 		}
811 		if (to_set & LIZARDFS_SET_ATTR_GID) {
812 			setmask |= SET_GID_FLAG;
813 		}
814 		if (to_set & LIZARDFS_SET_ATTR_ATIME) {
815 			setmask |= SET_ATIME_FLAG;
816 		}
817 		if (to_set & LIZARDFS_SET_ATTR_ATIME_NOW) {
818 			setmask |= SET_ATIME_NOW_FLAG;
819 		}
820 		if (to_set & LIZARDFS_SET_ATTR_MTIME) {
821 			setmask |= SET_MTIME_FLAG;
822 		}
823 		if (to_set & LIZARDFS_SET_ATTR_MTIME_NOW) {
824 			setmask |= SET_MTIME_NOW_FLAG;
825 		}
826 		if (to_set & (LIZARDFS_SET_ATTR_MTIME | LIZARDFS_SET_ATTR_MTIME_NOW)) {
827 			// in this case we want flush all pending writes because they could overwrite mtime
828 			write_data_flush_inode(ino);
829 		}
830 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
831 		fs_setattr(ino,ctx.uid,ctx.gid,setmask,stbuf->st_mode&07777,stbuf->st_uid,stbuf->st_gid,stbuf->st_atime,stbuf->st_mtime,sugid_clear_mode,attr));
832 		if (to_set & (LIZARDFS_SET_ATTR_MODE | LIZARDFS_SET_ATTR_UID | LIZARDFS_SET_ATTR_GID)) {
833 			eraseAclCache(ino);
834 		}
835 		if (status != LIZARDFS_STATUS_OK) {
836 			oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
837 					(unsigned long int)ino,
838 					to_set,
839 					modestr+1,
840 					(unsigned int)(stbuf->st_mode & 07777),
841 					(long int)stbuf->st_uid,
842 					(long int)stbuf->st_gid,
843 					(unsigned long int)(stbuf->st_atime),
844 					(unsigned long int)(stbuf->st_mtime),
845 					(uint64_t)(stbuf->st_size),
846 					lizardfs_error_string(status));
847 			throw RequestException(status);
848 		}
849 	}
850 	if (to_set & LIZARDFS_SET_ATTR_SIZE) {
851 		if (stbuf->st_size<0) {
852 			oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
853 					(unsigned long int)ino,
854 					to_set,
855 					modestr+1,
856 					(unsigned int)(stbuf->st_mode & 07777),
857 					(long int)stbuf->st_uid,
858 					(long int)stbuf->st_gid,
859 					(unsigned long int)(stbuf->st_atime),
860 					(unsigned long int)(stbuf->st_mtime),
861 					(uint64_t)(stbuf->st_size),
862 					lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
863 			throw RequestException(LIZARDFS_ERROR_EINVAL);
864 		}
865 		if (stbuf->st_size>=MAX_FILE_SIZE) {
866 			oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
867 					(unsigned long int)ino,
868 					to_set,
869 					modestr+1,
870 					(unsigned int)(stbuf->st_mode & 07777),
871 					(long int)stbuf->st_uid,
872 					(long int)stbuf->st_gid,
873 					(unsigned long int)(stbuf->st_atime),
874 					(unsigned long int)(stbuf->st_mtime),
875 					(uint64_t)(stbuf->st_size),
876 					lizardfs_error_string(LIZARDFS_ERROR_EFBIG));
877 			throw RequestException(LIZARDFS_ERROR_EFBIG);
878 		}
879 		try {
880 			RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
881 				write_data_truncate(ino, false, ctx.uid, ctx.gid, stbuf->st_size, attr));
882 			maxfleng = 0; // after the flush master server has valid length, don't use our length cache
883 		} catch (Exception& ex) {
884 			status = ex.status();
885 		}
886 		read_inode_ops(ino);
887 		if (status != LIZARDFS_STATUS_OK) {
888 			oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
889 					(unsigned long int)ino,
890 					to_set,
891 					modestr+1,
892 					(unsigned int)(stbuf->st_mode & 07777),
893 					(long int)stbuf->st_uid,
894 					(long int)stbuf->st_gid,
895 					(unsigned long int)(stbuf->st_atime),
896 					(unsigned long int)(stbuf->st_mtime),
897 					(uint64_t)(stbuf->st_size),
898 					lizardfs_error_string(status));
899 			throw RequestException(status);
900 		}
901 	}
902 	if (status != LIZARDFS_STATUS_OK) {        // should never happen but better check than sorry
903 		oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): %s",
904 				(unsigned long int)ino,
905 				to_set,
906 				modestr+1,
907 				(unsigned int)(stbuf->st_mode & 07777),
908 				(long int)stbuf->st_uid,
909 				(long int)stbuf->st_gid,
910 				(unsigned long int)(stbuf->st_atime),
911 				(unsigned long int)(stbuf->st_mtime),
912 				(uint64_t)(stbuf->st_size),
913 				lizardfs_error_string(status));
914 		throw RequestException(status);
915 	}
916 	gDirEntryCache.lockAndInvalidateInode(ino);
917 	memset(&o_stbuf, 0, sizeof(struct stat));
918 	attr_to_stat(ino,attr,&o_stbuf);
919 	if (attr[0]==TYPE_FILE && maxfleng>(uint64_t)(o_stbuf.st_size)) {
920 		o_stbuf.st_size=maxfleng;
921 	}
922 	attr_timeout = (attr_get_mattr(attr)&MATTR_NOACACHE)?0.0:attr_cache_timeout;
923 	makeattrstr(attrstr,256,&o_stbuf);
924 	oplog_printf(ctx, "setattr (%lu,0x%X,[%s:0%04o,%ld,%ld,%lu,%lu,%" PRIu64 "]): OK (%.1f,%s)",
925 			(unsigned long int)ino,
926 			to_set,
927 			modestr+1,
928 			(unsigned int)(stbuf->st_mode & 07777),
929 			(long int)stbuf->st_uid,
930 			(long int)stbuf->st_gid,
931 			(unsigned long int)(stbuf->st_atime),
932 			(unsigned long int)(stbuf->st_mtime),
933 			(uint64_t)(stbuf->st_size),
934 			attr_timeout,
935 			attrstr);
936 	return AttrReply{o_stbuf, attr_timeout};
937 }
938 
mknod(const Context & ctx,Inode parent,const char * name,mode_t mode,dev_t rdev)939 EntryParam mknod(const Context &ctx, Inode parent, const char *name, mode_t mode, dev_t rdev) {
940 	EntryParam e;
941 	uint32_t inode;
942 	Attributes attr;
943 	char modestr[11];
944 	char attrstr[256];
945 	uint8_t mattr;
946 	uint32_t nleng;
947 	int status;
948 	uint8_t type;
949 
950 	makemodestr(modestr,mode);
951 	stats_inc(OP_MKNOD);
952 	if (debug_mode) {
953 		oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX) ...",
954 				(unsigned long int)parent,
955 				name,
956 				modestr,
957 				(unsigned int)mode,
958 				(unsigned long int)rdev);
959 	}
960 	nleng = strlen(name);
961 	if (nleng>MFS_NAME_MAX) {
962 		oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX): %s",
963 				(unsigned long int)parent,
964 				name,
965 				modestr,
966 				(unsigned int)mode,
967 				(unsigned long int)rdev,
968 				lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
969 		throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
970 	}
971 	if (S_ISFIFO(mode)) {
972 		type = TYPE_FIFO;
973 	} else if (S_ISCHR(mode)) {
974 		type = TYPE_CHARDEV;
975 	} else if (S_ISBLK(mode)) {
976 		type = TYPE_BLOCKDEV;
977 	} else if (S_ISSOCK(mode)) {
978 		type = TYPE_SOCKET;
979 	} else if (S_ISREG(mode) || (mode&0170000)==0) {
980 		type = TYPE_FILE;
981 	} else {
982 		oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX): %s",
983 				(unsigned long int)parent,
984 				name,
985 				modestr,
986 				(unsigned int)mode,
987 				(unsigned long int)rdev,
988 				lizardfs_error_string(LIZARDFS_ERROR_EPERM));
989 		throw RequestException(LIZARDFS_ERROR_EPERM);
990 	}
991 
992 	if (parent==SPECIAL_INODE_ROOT) {
993 		if (IS_SPECIAL_NAME(name)) {
994 			oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX): %s",
995 					(unsigned long int)parent,
996 					name,
997 					modestr,
998 					(unsigned int)mode,
999 					(unsigned long int)rdev,
1000 					lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1001 			throw RequestException(LIZARDFS_ERROR_EACCES);
1002 		}
1003 	}
1004 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1005 		fs_mknod(parent,nleng,(const uint8_t*)name,type,mode&07777,ctx.umask,ctx.uid,ctx.gid,rdev,inode,attr));
1006 	if (status != LIZARDFS_STATUS_OK) {
1007 		oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX): %s",
1008 				(unsigned long int)parent,
1009 				name,
1010 				modestr,
1011 				(unsigned int)mode,
1012 				(unsigned long int)rdev,
1013 				lizardfs_error_string(status));
1014 		throw RequestException(status);
1015 	} else {
1016 		gDirEntryCache.lockAndInvalidateParent(ctx, parent);
1017 		e.ino = inode;
1018 		mattr = attr_get_mattr(attr);
1019 		e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1020 		e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
1021 		attr_to_stat(inode,attr,&e.attr);
1022 		makeattrstr(attrstr,256,&e.attr);
1023 		oplog_printf(ctx, "mknod (%lu,%s,%s:0%04o,0x%08lX): OK (%.1f,%lu,%.1f,%s)",
1024 				(unsigned long int)parent,
1025 				name,
1026 				modestr,
1027 				(unsigned int)mode,
1028 				(unsigned long int)rdev,
1029 				e.entry_timeout,
1030 				(unsigned long int)e.ino,
1031 				e.attr_timeout,
1032 				attrstr);
1033 		return e;
1034 	}
1035 }
1036 
unlink(const Context & ctx,Inode parent,const char * name)1037 void unlink(const Context &ctx, Inode parent, const char *name) {
1038 	uint32_t nleng;
1039 	int status;
1040 
1041 	stats_inc(OP_UNLINK);
1042 	if (debug_mode) {
1043 		oplog_printf(ctx, "unlink (%lu,%s) ...", (unsigned long int)parent, name);
1044 	}
1045 	if (parent==SPECIAL_INODE_ROOT) {
1046 		if (IS_SPECIAL_NAME(name)) {
1047 			oplog_printf(ctx, "unlink (%lu,%s): %s",
1048 					(unsigned long int)parent,
1049 					name,
1050 					lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1051 			throw RequestException(LIZARDFS_ERROR_EACCES);
1052 		}
1053 	}
1054 
1055 	nleng = strlen(name);
1056 	if (nleng>MFS_NAME_MAX) {
1057 		oplog_printf(ctx, "unlink (%lu,%s): %s",
1058 				(unsigned long int)parent,
1059 				name,
1060 				lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1061 		throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1062 	}
1063 
1064 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1065 		fs_unlink(parent,nleng,(const uint8_t*)name,ctx.uid,ctx.gid));
1066 	gDirEntryCache.lockAndInvalidateParent(parent);
1067 	if (status != LIZARDFS_STATUS_OK) {
1068 		oplog_printf(ctx, "unlink (%lu,%s): %s",
1069 				(unsigned long int)parent,
1070 				name,
1071 				lizardfs_error_string(status));
1072 		throw RequestException(status);
1073 	} else {
1074 		oplog_printf(ctx, "unlink (%lu,%s): OK",
1075 				(unsigned long int)parent,
1076 				name);
1077 		return;
1078 	}
1079 }
1080 
undel(const Context & ctx,Inode ino)1081 void undel(const Context &ctx, Inode ino) {
1082 	stats_inc(OP_UNDEL);
1083 	if (debug_mode) {
1084 		oplog_printf(ctx, "undel (%lu) ...", (unsigned long)ino);
1085 	}
1086 	uint8_t status;
1087 	// FIXME(haze): modify undel to return parent inode and call gDirEntryCache.lockAndInvalidateParent(parent)
1088 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid, fs_undel(ino));
1089 	if (status != LIZARDFS_STATUS_OK) {
1090 		throw RequestException(status);
1091 	}
1092 }
1093 
mkdir(const Context & ctx,Inode parent,const char * name,mode_t mode)1094 EntryParam mkdir(const Context &ctx, Inode parent, const char *name, mode_t mode) {
1095 	struct EntryParam e;
1096 	uint32_t inode;
1097 	Attributes attr;
1098 	char modestr[11];
1099 	char attrstr[256];
1100 	uint8_t mattr;
1101 	uint32_t nleng;
1102 	int status;
1103 
1104 	makemodestr(modestr,mode);
1105 	stats_inc(OP_MKDIR);
1106 	if (debug_mode) {
1107 		oplog_printf(ctx, "mkdir (%lu,%s,d%s:0%04o) ...",
1108 				(unsigned long int)parent,
1109 				name,
1110 				modestr+1,
1111 				(unsigned int)mode);
1112 	}
1113 	if (parent==SPECIAL_INODE_ROOT) {
1114 		if (IS_SPECIAL_NAME(name)) {
1115 			oplog_printf(ctx, "mkdir (%lu,%s,d%s:0%04o): %s",
1116 					(unsigned long int)parent,
1117 					name,
1118 					modestr+1,
1119 					(unsigned int)mode,
1120 					lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1121 			throw RequestException(LIZARDFS_ERROR_EACCES);
1122 		}
1123 	}
1124 	nleng = strlen(name);
1125 	if (nleng>MFS_NAME_MAX) {
1126 		oplog_printf(ctx, "mkdir (%lu,%s,d%s:0%04o): %s",
1127 				(unsigned long int)parent,
1128 				name,
1129 				modestr+1,
1130 				(unsigned int)mode,
1131 				lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1132 		throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1133 	}
1134 
1135 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1136 		fs_mkdir(parent,nleng,(const uint8_t*)name,mode,ctx.umask,ctx.uid,ctx.gid,mkdir_copy_sgid,inode,attr));
1137 	if (status != LIZARDFS_STATUS_OK) {
1138 		oplog_printf(ctx, "mkdir (%lu,%s,d%s:0%04o): %s",
1139 				(unsigned long int)parent,
1140 				name,
1141 				modestr+1,
1142 				(unsigned int)mode,
1143 				lizardfs_error_string(status));
1144 		throw RequestException(status);
1145 	} else {
1146 		gDirEntryCache.lockAndInvalidateParent(parent);
1147 		e.ino = inode;
1148 		mattr = attr_get_mattr(attr);
1149 		e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1150 		e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:direntry_cache_timeout;
1151 		attr_to_stat(inode,attr,&e.attr);
1152 		makeattrstr(attrstr,256,&e.attr);
1153 		oplog_printf(ctx, "mkdir (%lu,%s,d%s:0%04o): OK (%.1f,%lu,%.1f,%s)",
1154 				(unsigned long int)parent,
1155 				name,
1156 				modestr+1,
1157 				(unsigned int)mode,
1158 				e.entry_timeout,
1159 				(unsigned long int)e.ino,
1160 				e.attr_timeout,
1161 				attrstr);
1162 		return e;
1163 	}
1164 }
1165 
rmdir(const Context & ctx,Inode parent,const char * name)1166 void rmdir(const Context &ctx, Inode parent, const char *name) {
1167 	uint32_t nleng;
1168 	int status;
1169 
1170 	stats_inc(OP_RMDIR);
1171 	if (debug_mode) {
1172 		oplog_printf(ctx, "rmdir (%lu,%s) ...", (unsigned long int)parent, name);
1173 	}
1174 	if (parent==SPECIAL_INODE_ROOT) {
1175 		if (IS_SPECIAL_NAME(name)) {
1176 			oplog_printf(ctx, "rmdir (%lu,%s): %s",
1177 					(unsigned long int)parent,
1178 					name,
1179 					lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1180 			throw RequestException(LIZARDFS_ERROR_EACCES);
1181 		}
1182 	}
1183 	nleng = strlen(name);
1184 	if (nleng>MFS_NAME_MAX) {
1185 		oplog_printf(ctx, "rmdir (%lu,%s): %s",
1186 				(unsigned long int)parent,
1187 				name,
1188 				lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1189 		throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1190 	}
1191 
1192 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1193 		fs_rmdir(parent,nleng,(const uint8_t*)name,ctx.uid,ctx.gid));
1194 	gDirEntryCache.lockAndInvalidateParent(parent);
1195 	if (status != LIZARDFS_STATUS_OK) {
1196 		oplog_printf(ctx, "rmdir (%lu,%s): %s",
1197 				(unsigned long int)parent,
1198 				name,
1199 				lizardfs_error_string(status));
1200 		throw RequestException(status);
1201 	} else {
1202 		oplog_printf(ctx, "rmdir (%lu,%s): OK",
1203 				(unsigned long int)parent,
1204 				name);
1205 		return;
1206 	}
1207 }
1208 
symlink(const Context & ctx,const char * path,Inode parent,const char * name)1209 EntryParam symlink(const Context &ctx, const char *path, Inode parent,
1210 			 const char *name) {
1211 	struct EntryParam e;
1212 	uint32_t inode;
1213 	Attributes attr;
1214 	char attrstr[256];
1215 	uint8_t mattr;
1216 	uint32_t nleng;
1217 	int status;
1218 
1219 	stats_inc(OP_SYMLINK);
1220 	if (debug_mode) {
1221 		oplog_printf(ctx, "symlink (%s,%lu,%s) ...",
1222 				path,
1223 				(unsigned long int)parent,
1224 				name);
1225 	}
1226 	if (parent==SPECIAL_INODE_ROOT) {
1227 		if (IS_SPECIAL_NAME(name)) {
1228 			oplog_printf(ctx, "symlink (%s,%lu,%s): %s",
1229 					path,
1230 					(unsigned long int)parent,
1231 					name,
1232 					lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1233 			throw RequestException(LIZARDFS_ERROR_EACCES);
1234 		}
1235 	}
1236 	nleng = strlen(name);
1237 	if (nleng>MFS_NAME_MAX) {
1238 		oplog_printf(ctx, "symlink (%s,%lu,%s): %s",
1239 				path,
1240 				(unsigned long int)parent,
1241 				name,
1242 				lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1243 		throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1244 	}
1245 
1246 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1247 		fs_symlink(parent,nleng,(const uint8_t*)name,(const uint8_t*)path,ctx.uid,ctx.gid,&inode,attr));
1248 	if (status != LIZARDFS_STATUS_OK) {
1249 		oplog_printf(ctx, "symlink (%s,%lu,%s): %s",
1250 				path,
1251 				(unsigned long int)parent,
1252 				name,
1253 				lizardfs_error_string(status));
1254 		throw RequestException(status);
1255 	} else {
1256 		gDirEntryCache.lockAndInvalidateParent(parent);
1257 		e.ino = inode;
1258 		mattr = attr_get_mattr(attr);
1259 		e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1260 		e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
1261 		attr_to_stat(inode,attr,&e.attr);
1262 		makeattrstr(attrstr,256,&e.attr);
1263 		symlink_cache_insert(inode, (const uint8_t *)path);
1264 		oplog_printf(ctx, "symlink (%s,%lu,%s): OK (%.1f,%lu,%.1f,%s)",
1265 				path,
1266 				(unsigned long int)parent,
1267 				name,
1268 				e.entry_timeout,
1269 				(unsigned long int)e.ino,
1270 				e.attr_timeout,
1271 				attrstr);
1272 		return e;
1273 	}
1274 }
1275 
readlink(const Context & ctx,Inode ino)1276 std::string readlink(const Context &ctx, Inode ino) {
1277 	int status;
1278 	const uint8_t *path;
1279 
1280 	if (debug_mode) {
1281 		oplog_printf(ctx, "readlink (%lu) ...",
1282 				(unsigned long int)ino);
1283 	}
1284 	if (symlink_cache_search(ino,&path)) {
1285 		stats_inc(OP_READLINK_CACHED);
1286 		oplog_printf(ctx, "readlink (%lu) (using cache): OK (%s)",
1287 				(unsigned long int)ino,
1288 				(char*)path);
1289 		return std::string((char*)path);
1290 	}
1291 	stats_inc(OP_READLINK);
1292 	status = fs_readlink(ino,&path);
1293 	if (status != LIZARDFS_STATUS_OK) {
1294 		oplog_printf(ctx, "readlink (%lu): %s",
1295 				(unsigned long int)ino,
1296 				lizardfs_error_string(status));
1297 		throw RequestException(status);
1298 	} else {
1299 		symlink_cache_insert(ino,path);
1300 		oplog_printf(ctx, "readlink (%lu): OK (%s)",
1301 				(unsigned long int)ino,
1302 				(char*)path);
1303 		return std::string((char*)path);
1304 	}
1305 }
1306 
rename(const Context & ctx,Inode parent,const char * name,Inode newparent,const char * newname)1307 void rename(const Context &ctx, Inode parent, const char *name,
1308 			Inode newparent, const char *newname) {
1309 	uint32_t nleng,newnleng;
1310 	int status;
1311 	uint32_t inode;
1312 	Attributes attr;
1313 
1314 	stats_inc(OP_RENAME);
1315 	if (debug_mode) {
1316 		oplog_printf(ctx, "rename (%lu,%s,%lu,%s) ...",
1317 				(unsigned long int)parent,
1318 				name,
1319 				(unsigned long int)newparent,
1320 				newname);
1321 	}
1322 	if (parent==SPECIAL_INODE_ROOT) {
1323 		if (IS_SPECIAL_NAME(name)) {
1324 			oplog_printf(ctx, "rename (%lu,%s,%lu,%s): %s",
1325 					(unsigned long int)parent,
1326 					name,
1327 					(unsigned long int)newparent,
1328 					newname,
1329 					lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1330 			throw RequestException(LIZARDFS_ERROR_EACCES);
1331 		}
1332 	}
1333 	if (newparent==SPECIAL_INODE_ROOT) {
1334 		if (IS_SPECIAL_NAME(newname)) {
1335 			oplog_printf(ctx, "rename (%lu,%s,%lu,%s): %s",
1336 					(unsigned long int)parent,
1337 					name,
1338 					(unsigned long int)newparent,
1339 					newname,
1340 					lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1341 			throw RequestException(LIZARDFS_ERROR_EACCES);
1342 		}
1343 	}
1344 	nleng = strlen(name);
1345 	if (nleng>MFS_NAME_MAX) {
1346 		oplog_printf(ctx, "rename (%lu,%s,%lu,%s): %s",
1347 				(unsigned long int)parent,
1348 				name,
1349 				(unsigned long int)newparent,
1350 				newname,
1351 				lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1352 		throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1353 	}
1354 	newnleng = strlen(newname);
1355 	if (newnleng>MFS_NAME_MAX) {
1356 		oplog_printf(ctx, "rename (%lu,%s,%lu,%s): %s",
1357 				(unsigned long int)parent,
1358 				name,
1359 				(unsigned long int)newparent,
1360 				newname,
1361 				lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1362 		throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1363 	}
1364 
1365 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1366 	fs_rename(parent,nleng,(const uint8_t*)name,newparent,newnleng,(const uint8_t*)newname,ctx.uid,ctx.gid,&inode,attr));
1367 	gDirEntryCache.lockAndInvalidateParent(parent);
1368 	gDirEntryCache.lockAndInvalidateParent(newparent);
1369 	if (status != LIZARDFS_STATUS_OK) {
1370 		oplog_printf(ctx, "rename (%lu,%s,%lu,%s): %s",
1371 				(unsigned long int)parent,
1372 				name,
1373 				(unsigned long int)newparent,
1374 				newname,
1375 				lizardfs_error_string(status));
1376 		throw RequestException(status);
1377 	} else {
1378 		oplog_printf(ctx, "rename (%lu,%s,%lu,%s): OK",
1379 				(unsigned long int)parent,
1380 				name,
1381 				(unsigned long int)newparent,
1382 				newname);
1383 		return;
1384 	}
1385 }
1386 
link(const Context & ctx,Inode ino,Inode newparent,const char * newname)1387 EntryParam link(const Context &ctx, Inode ino, Inode newparent, const char *newname) {
1388 	uint32_t newnleng;
1389 	int status;
1390 	EntryParam e;
1391 	uint32_t inode;
1392 	Attributes attr;
1393 	char attrstr[256];
1394 	uint8_t mattr;
1395 
1396 
1397 	stats_inc(OP_LINK);
1398 	if (debug_mode) {
1399 		oplog_printf(ctx, "link (%lu,%lu,%s) ...",
1400 				(unsigned long int)ino,
1401 				(unsigned long int)newparent,
1402 				newname);
1403 	}
1404 	if (IS_SPECIAL_INODE(ino)) {
1405 		oplog_printf(ctx, "link (%lu,%lu,%s): %s",
1406 				(unsigned long int)ino,
1407 				(unsigned long int)newparent,
1408 				newname,
1409 				lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1410 		throw RequestException(LIZARDFS_ERROR_EACCES);
1411 	}
1412 	if (newparent==SPECIAL_INODE_ROOT) {
1413 		if (IS_SPECIAL_NAME(newname)) {
1414 			oplog_printf(ctx, "link (%lu,%lu,%s): %s",
1415 					(unsigned long int)ino,
1416 					(unsigned long int)newparent,
1417 					newname,
1418 					lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1419 			throw RequestException(LIZARDFS_ERROR_EACCES);
1420 		}
1421 	}
1422 	newnleng = strlen(newname);
1423 	if (newnleng>MFS_NAME_MAX) {
1424 		oplog_printf(ctx, "link (%lu,%lu,%s): %s",
1425 				(unsigned long int)ino,
1426 				(unsigned long int)newparent,
1427 				newname,
1428 				lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1429 		throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1430 	}
1431 
1432 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1433 		fs_link(ino,newparent,newnleng,(const uint8_t*)newname,ctx.uid,ctx.gid,&inode,attr));
1434 	if (status != LIZARDFS_STATUS_OK) {
1435 		oplog_printf(ctx, "link (%lu,%lu,%s): %s",
1436 				(unsigned long int)ino,
1437 				(unsigned long int)newparent,
1438 				newname,
1439 				lizardfs_error_string(status));
1440 		throw RequestException(status);
1441 	} else {
1442 		gDirEntryCache.lockAndInvalidateParent(newparent);
1443 		e.ino = inode;
1444 		mattr = attr_get_mattr(attr);
1445 		e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1446 		e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
1447 		attr_to_stat(inode,attr,&e.attr);
1448 		makeattrstr(attrstr,256,&e.attr);
1449 		oplog_printf(ctx, "link (%lu,%lu,%s): OK (%.1f,%lu,%.1f,%s)",
1450 				(unsigned long int)ino,
1451 				(unsigned long int)newparent,
1452 				newname,
1453 				e.entry_timeout,
1454 				(unsigned long int)e.ino,
1455 				e.attr_timeout,
1456 				attrstr);
1457 		return e;
1458 	}
1459 }
1460 
opendir(const Context & ctx,Inode ino)1461 void opendir(const Context &ctx, Inode ino) {
1462 	int status;
1463 
1464 	stats_inc(OP_OPENDIR);
1465 	if (debug_mode) {
1466 		oplog_printf(ctx, "opendir (%lu) ...", (unsigned long int)ino);
1467 	}
1468 	if (IS_SPECIAL_INODE(ino)) {
1469 		oplog_printf(ctx, "opendir (%lu): %s",
1470 				(unsigned long int)ino,
1471 				lizardfs_error_string(LIZARDFS_ERROR_ENOTDIR));
1472 		throw RequestException(LIZARDFS_ERROR_ENOTDIR);
1473 	}
1474 
1475 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1476 		fs_access(ino,ctx.uid,ctx.gid,MODE_MASK_R));    // at least test rights
1477 	if (status != LIZARDFS_STATUS_OK) {
1478 		oplog_printf(ctx, "opendir (%lu): %s",
1479 				(unsigned long int)ino,
1480 				lizardfs_error_string(status));
1481 		throw RequestException(status);
1482 	}
1483 }
1484 
readdir(const Context & ctx,Inode ino,off_t off,size_t max_entries)1485 std::vector<DirEntry> readdir(const Context &ctx, Inode ino, off_t off, size_t max_entries) {
1486 	static constexpr int kBatchSize = 1000;
1487 
1488 	stats_inc(OP_READDIR);
1489 	if (debug_mode) {
1490 		oplog_printf(ctx, "readdir (%lu,%" PRIu64 ",%" PRIu64 ") ...",
1491 				(unsigned long int)ino,
1492 				(uint64_t)max_entries,
1493 				(uint64_t)off);
1494 	}
1495 	if (off<0) {
1496 		oplog_printf(ctx, "readdir (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1497 				(unsigned long int)ino,
1498 				(uint64_t)max_entries,
1499 				(uint64_t)off,
1500 				lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
1501 		throw RequestException(LIZARDFS_ERROR_EINVAL);
1502 	}
1503 
1504 	std::vector<DirEntry> result;
1505 	shared_lock<shared_mutex> access_guard(gDirEntryCache.rwlock());
1506 	gDirEntryCache.updateTime();
1507 
1508 	uint64_t entry_index = off;
1509 	auto it = gDirEntryCache.find(ctx, ino, entry_index);
1510 
1511 	result.reserve(max_entries);
1512 	for(;it != gDirEntryCache.index_end() && max_entries > 0;++it) {
1513 		if (!gDirEntryCache.isValid(it) || it->index != entry_index) {
1514 			break;
1515 		}
1516 
1517 		if (it->inode == 0) {
1518 			// we have valid 'no more entries' marker
1519 			assert(it->name.empty());
1520 			max_entries = 0;
1521 			break;
1522 		}
1523 
1524 		++entry_index;
1525 		--max_entries;
1526 
1527 		struct stat stats;
1528 		attr_to_stat(it->inode,it->attr,&stats);
1529 		result.emplace_back(it->name, stats, entry_index);
1530 	}
1531 
1532 	if (max_entries == 0) {
1533 		return result;
1534 	}
1535 
1536 	access_guard.unlock();
1537 
1538 	std::vector<DirectoryEntry> dir_entries;
1539 	uint8_t status;
1540 	uint64_t request_size = std::min<std::size_t>(std::max<std::size_t>(kBatchSize, max_entries),
1541 	                                              matocl::fuseGetDir::kMaxNumberOfDirectoryEntries);
1542 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1543 		fs_getdir(ino, ctx.uid, ctx.gid, entry_index, request_size, dir_entries));
1544 	auto data_acquire_time = gDirEntryCache.updateTime();
1545 
1546 	if(status != LIZARDFS_STATUS_OK) {
1547 		throw RequestException(status);
1548 	}
1549 
1550 	std::unique_lock<shared_mutex> write_guard(gDirEntryCache.rwlock());
1551 	gDirEntryCache.updateTime();
1552 
1553 	gDirEntryCache.insertSubsequent(ctx, ino, entry_index, dir_entries, data_acquire_time);
1554 	if (dir_entries.size() < request_size) {
1555 		// insert 'no more entries' marker
1556 		gDirEntryCache.insert(ctx, ino, 0, entry_index + dir_entries.size(), "", Attributes{{}}, data_acquire_time);
1557 		gDirEntryCache.invalidate(ctx,ino,entry_index + dir_entries.size() + 1);
1558 	}
1559 
1560 	if (gDirEntryCache.size() > gDirEntryCacheMaxSize) {
1561 		gDirEntryCache.removeOldest(gDirEntryCache.size() - gDirEntryCacheMaxSize);
1562 	}
1563 
1564 	write_guard.unlock();
1565 
1566 	for(auto it = dir_entries.begin(); it != dir_entries.end() && max_entries > 0; ++it) {
1567 		--max_entries;
1568 		++entry_index;
1569 
1570 		struct stat stats;
1571 		attr_to_stat(it->inode,it->attributes,&stats);
1572 		result.emplace_back(it->name, stats, entry_index);
1573 	}
1574 
1575 	return result;
1576 }
1577 
readreserved(const Context & ctx,NamedInodeOffset off,NamedInodeOffset max_entries)1578 std::vector<NamedInodeEntry> readreserved(const Context &ctx, NamedInodeOffset off, NamedInodeOffset max_entries) {
1579 	stats_inc(OP_READRESERVED);
1580 	if (debug_mode) {
1581 		oplog_printf(ctx, "readreserved (%" PRIu64 ",%" PRIu64 ") ...",
1582 				(uint64_t)max_entries,
1583 				(uint64_t)off);
1584 	}
1585 
1586 	std::vector<NamedInodeEntry> entries;
1587 	uint8_t status;
1588 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1589 		fs_getreserved(off, max_entries, entries));
1590 
1591 	if (status != LIZARDFS_STATUS_OK) {
1592 		throw RequestException(status);
1593 	}
1594 
1595 	return entries;
1596 }
1597 
readtrash(const Context & ctx,NamedInodeOffset off,NamedInodeOffset max_entries)1598 std::vector<NamedInodeEntry> readtrash(const Context &ctx, NamedInodeOffset off, NamedInodeOffset max_entries) {
1599 	stats_inc(OP_READTRASH);
1600 	if (debug_mode) {
1601 		oplog_printf(ctx, "readtrash (%" PRIu64 ",%" PRIu64 ") ...",
1602 				(uint64_t)max_entries,
1603 				(uint64_t)off);
1604 	}
1605 
1606 	std::vector<NamedInodeEntry> entries;
1607 	uint8_t status;
1608 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1609 		fs_gettrash(off, max_entries, entries));
1610 
1611 	if (status != LIZARDFS_STATUS_OK) {
1612 		throw RequestException(status);
1613 	}
1614 
1615 	return entries;
1616 }
1617 
releasedir(Inode ino)1618 void releasedir(Inode ino) {
1619 	static constexpr int kBatchSize = 1000;
1620 
1621 	stats_inc(OP_RELEASEDIR);
1622 	if (debug_mode) {
1623 		oplog_printf("releasedir (%lu) ...",
1624 				(unsigned long int)ino);
1625 	}
1626 	oplog_printf("releasedir (%lu): OK",
1627 			(unsigned long int)ino);
1628 
1629 	std::unique_lock<shared_mutex> write_guard(gDirEntryCache.rwlock());
1630 	gDirEntryCache.updateTime();
1631 	gDirEntryCache.removeExpired(kBatchSize);
1632 }
1633 
1634 
fs_newfileinfo(uint8_t accmode,uint32_t inode)1635 static finfo* fs_newfileinfo(uint8_t accmode, uint32_t inode) {
1636 	finfo *fileinfo;
1637 	fileinfo = (finfo*) malloc(sizeof(finfo));
1638 	pthread_mutex_init(&(fileinfo->flushlock),NULL);
1639 	pthread_mutex_init(&(fileinfo->lock),NULL);
1640 	PthreadMutexWrapper lock((fileinfo->lock)); // make helgrind happy
1641 #ifdef __FreeBSD__
1642 	/* old FreeBSD fuse reads whole file when opening with O_WRONLY|O_APPEND,
1643 	 * so can't open it write-only */
1644 	(void)accmode;
1645 	(void)inode;
1646 	fileinfo->mode = IO_NONE;
1647 	fileinfo->data = NULL;
1648 #else
1649 	if (accmode == O_RDONLY) {
1650 		fileinfo->mode = IO_READONLY;
1651 		fileinfo->data = read_data_new(inode);
1652 	} else if (accmode == O_WRONLY) {
1653 		fileinfo->mode = IO_WRITEONLY;
1654 		fileinfo->data = write_data_new(inode);
1655 	} else {
1656 		fileinfo->mode = IO_NONE;
1657 		fileinfo->data = NULL;
1658 	}
1659 #endif
1660 	fileinfo->use_flocks = false;
1661 	fileinfo->use_posixlocks = false;
1662 
1663 	return fileinfo;
1664 }
1665 
remove_file_info(FileInfo * f)1666 void remove_file_info(FileInfo *f) {
1667 	finfo* fileinfo = (finfo*)(f->fh);
1668 	PthreadMutexWrapper lock(fileinfo->lock);
1669 	if (fileinfo->mode == IO_READONLY || fileinfo->mode == IO_READ) {
1670 		read_data_end(fileinfo->data);
1671 	} else if (fileinfo->mode == IO_WRITEONLY || fileinfo->mode == IO_WRITE) {
1672 		write_data_end(fileinfo->data);
1673 	}
1674 	lock.unlock(); // This unlock is needed, since we want to destroy the mutex
1675 	pthread_mutex_destroy(&(fileinfo->lock));
1676 	pthread_mutex_destroy(&(fileinfo->flushlock));
1677 	free(fileinfo);
1678 }
1679 
create(const Context & ctx,Inode parent,const char * name,mode_t mode,FileInfo * fi)1680 EntryParam create(const Context &ctx, Inode parent, const char *name, mode_t mode,
1681 		FileInfo* fi) {
1682 	struct EntryParam e;
1683 	uint32_t inode;
1684 	uint8_t oflags;
1685 	Attributes attr;
1686 	char modestr[11];
1687 	char attrstr[256];
1688 	uint8_t mattr;
1689 	uint32_t nleng;
1690 	int status;
1691 
1692 	finfo *fileinfo;
1693 
1694 	makemodestr(modestr,mode);
1695 	stats_inc(OP_CREATE);
1696 	if (debug_mode) {
1697 		oplog_printf(ctx, "create (%lu,%s,-%s:0%04o)",
1698 				(unsigned long int)parent,
1699 				name,
1700 				modestr+1,
1701 				(unsigned int)mode);
1702 	}
1703 	if (parent==SPECIAL_INODE_ROOT) {
1704 		if (IS_SPECIAL_NAME(name)) {
1705 			oplog_printf(ctx, "create (%lu,%s,-%s:0%04o): %s",
1706 					(unsigned long int)parent,
1707 					name,
1708 					modestr+1,
1709 					(unsigned int)mode,
1710 					lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1711 			throw RequestException(LIZARDFS_ERROR_EACCES);
1712 		}
1713 	}
1714 	nleng = strlen(name);
1715 	if (nleng>MFS_NAME_MAX) {
1716 		oplog_printf(ctx, "create (%lu,%s,-%s:0%04o): %s",
1717 				(unsigned long int)parent,
1718 				name,
1719 				modestr+1,
1720 				(unsigned int)mode,
1721 				lizardfs_error_string(LIZARDFS_ERROR_ENAMETOOLONG));
1722 		throw RequestException(LIZARDFS_ERROR_ENAMETOOLONG);
1723 	}
1724 
1725 	oflags = AFTER_CREATE;
1726 	if ((fi->flags & O_ACCMODE) == O_RDONLY) {
1727 		oflags |= WANT_READ;
1728 	} else if ((fi->flags & O_ACCMODE) == O_WRONLY) {
1729 		oflags |= WANT_WRITE;
1730 	} else if ((fi->flags & O_ACCMODE) == O_RDWR) {
1731 		oflags |= WANT_READ | WANT_WRITE;
1732 	} else {
1733 		oplog_printf(ctx, "create (%lu,%s,-%s:0%04o): %s",
1734 				(unsigned long int)parent,
1735 				name,
1736 				modestr+1,
1737 				(unsigned int)mode,
1738 				lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
1739 		throw RequestException(LIZARDFS_ERROR_EINVAL);
1740 	}
1741 
1742 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1743 		fs_mknod(parent,nleng,(const uint8_t*)name,TYPE_FILE,mode&07777,ctx.umask,ctx.uid,ctx.gid,0,inode,attr));
1744 	if (status != LIZARDFS_STATUS_OK) {
1745 		oplog_printf(ctx, "create (%lu,%s,-%s:0%04o) (mknod): %s",
1746 				(unsigned long int)parent,
1747 				name,
1748 				modestr+1,
1749 				(unsigned int)mode,
1750 				lizardfs_error_string(status));
1751 		throw RequestException(status);
1752 	}
1753 	Attributes tmp_attr;
1754 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1755 		fs_opencheck(inode,ctx.uid,ctx.gid,oflags,tmp_attr));
1756 
1757 	if (status != LIZARDFS_STATUS_OK) {
1758 		oplog_printf(ctx, "create (%lu,%s,-%s:0%04o) (open): %s",
1759 				(unsigned long int)parent,
1760 				name,
1761 				modestr+1,
1762 				(unsigned int)mode,
1763 				lizardfs_error_string(status));
1764 		throw RequestException(status);
1765 	}
1766 
1767 	mattr = attr_get_mattr(attr);
1768 	fileinfo = fs_newfileinfo(fi->flags & O_ACCMODE,inode);
1769 	fi->fh = reinterpret_cast<uintptr_t>(fileinfo);
1770 	if (keep_cache==1) {
1771 		fi->keep_cache=1;
1772 	} else if (keep_cache==2) {
1773 		fi->keep_cache=0;
1774 	} else {
1775 		fi->keep_cache = (mattr&MATTR_ALLOWDATACACHE)?1:0;
1776 	}
1777 	if (debug_mode) {
1778 		lzfs::log_debug("create ({}) ok -> keep cache: {}\n", inode, (int)fi->keep_cache);
1779 	}
1780 	gDirEntryCache.lockAndInvalidateParent(ctx, parent);
1781 	e.ino = inode;
1782 	e.attr_timeout = (mattr&MATTR_NOACACHE)?0.0:attr_cache_timeout;
1783 	e.entry_timeout = (mattr&MATTR_NOECACHE)?0.0:entry_cache_timeout;
1784 	attr_to_stat(inode,attr,&e.attr);
1785 	makeattrstr(attrstr,256,&e.attr);
1786 	oplog_printf(ctx, "create (%lu,%s,-%s:0%04o): OK (%.1f,%lu,%.1f,%s,%lu)",
1787 			(unsigned long int)parent,
1788 			name,
1789 			modestr+1,
1790 			(unsigned int)mode,
1791 			e.entry_timeout,
1792 			(unsigned long int)e.ino,
1793 			e.attr_timeout,
1794 			attrstr,
1795 			(unsigned long int)fi->keep_cache);
1796 	return e;
1797 }
1798 
open(const Context & ctx,Inode ino,FileInfo * fi)1799 void open(const Context &ctx, Inode ino, FileInfo *fi) {
1800 	uint8_t oflags;
1801 	Attributes attr;
1802 	uint8_t mattr;
1803 	int status;
1804 
1805 	finfo *fileinfo;
1806 
1807 	stats_inc(OP_OPEN);
1808 	if (debug_mode) {
1809 		oplog_printf(ctx, "open (%lu) ...", (unsigned long int)ino);
1810 	}
1811 
1812 	if (IS_SPECIAL_INODE(ino)) {
1813 		special_open(ino, ctx, fi);
1814 		return;
1815 	}
1816 
1817 	oflags = 0;
1818 	if ((fi->flags & O_CREAT) == O_CREAT) {
1819 		oflags |= AFTER_CREATE;
1820 	}
1821 	if ((fi->flags & O_ACCMODE) == O_RDONLY) {
1822 		oflags |= WANT_READ;
1823 	} else if ((fi->flags & O_ACCMODE) == O_WRONLY) {
1824 		oflags |= WANT_WRITE;
1825 	} else if ((fi->flags & O_ACCMODE) == O_RDWR) {
1826 		oflags |= WANT_READ | WANT_WRITE;
1827 	}
1828 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
1829 		fs_opencheck(ino,ctx.uid,ctx.gid,oflags,attr));
1830 	if (status != LIZARDFS_STATUS_OK) {
1831 		oplog_printf(ctx, "open (%lu): %s",
1832 				(unsigned long int)ino,
1833 				lizardfs_error_string(status));
1834 		throw RequestException(status);
1835 	}
1836 
1837 	mattr = attr_get_mattr(attr);
1838 	fileinfo = fs_newfileinfo(fi->flags & O_ACCMODE,ino);
1839 	fi->fh = reinterpret_cast<uintptr_t>(fileinfo);
1840 	if (keep_cache==1) {
1841 		fi->keep_cache=1;
1842 	} else if (keep_cache==2) {
1843 		fi->keep_cache=0;
1844 	} else {
1845 		fi->keep_cache = (mattr&MATTR_ALLOWDATACACHE)?1:0;
1846 	}
1847 	if (debug_mode) {
1848 		lzfs::log_debug("open ({}) ok -> keep cache: {}\n", ino, (int)fi->keep_cache);
1849 	}
1850 	fi->direct_io = gDirectIo;
1851 	oplog_printf(ctx, "open (%lu): OK (%lu,%lu)",
1852 			(unsigned long int)ino,
1853 			(unsigned long int)fi->direct_io,
1854 			(unsigned long int)fi->keep_cache);
1855 }
1856 
update_credentials(Context::IdType index,const GroupCache::Groups & groups)1857 static void update_credentials(Context::IdType index, const GroupCache::Groups &groups) {
1858 	uint8_t status = fs_update_credentials(index, groups);
1859 	if (status != LIZARDFS_STATUS_OK) {
1860 		throw RequestException(status);
1861 	}
1862 }
1863 
release(Inode ino,FileInfo * fi)1864 void release(Inode ino, FileInfo *fi) {
1865 	finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
1866 
1867 	stats_inc(OP_RELEASE);
1868 	if (debug_mode) {
1869 		oplog_printf("release (%lu) ...", (unsigned long int)ino);
1870 	}
1871 
1872 	if (IS_SPECIAL_INODE(ino)) {
1873 		special_release(ino, fi);
1874 		return;
1875 	}
1876 
1877 	if (fileinfo != NULL){
1878 		if (fileinfo->use_flocks) {
1879 			fs_flock_send(ino, fi->lock_owner, 0, lzfs_locks::kRelease);
1880 			fileinfo->use_flocks = false;
1881 		}
1882 		fileinfo->use_posixlocks = false;
1883 		remove_file_info(fi);
1884 	}
1885 	fs_release(ino);
1886 	oplog_printf("release (%lu): OK",
1887 			(unsigned long int)ino);
1888 }
1889 
read_special_inode(const Context & ctx,Inode ino,size_t size,off_t off,FileInfo * fi)1890 std::vector<uint8_t> read_special_inode(const Context &ctx,
1891 			Inode ino,
1892 			size_t size,
1893 			off_t off,
1894 			FileInfo* fi) {
1895 	LOG_AVG_TILL_END_OF_SCOPE0("read");
1896 	stats_inc(OP_READ);
1897 
1898 	return special_read(ino, ctx, size, off, fi, debug_mode);
1899 }
1900 
read(const Context & ctx,Inode ino,size_t size,off_t off,FileInfo * fi)1901 ReadCache::Result read(const Context &ctx,
1902 			Inode ino,
1903 			size_t size,
1904 			off_t off,
1905 			FileInfo *fi) {
1906 	LOG_AVG_TILL_END_OF_SCOPE0("read");
1907 	stats_inc(OP_READ);
1908 
1909 	finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
1910 	int err;
1911 	ReadCache::Result ret;
1912 	if (debug_mode) {
1913 		lzfs::log_debug("read from inode {} up to {} bytes from position {}",
1914 		                ino, size, off);
1915 	}
1916 	if (fileinfo==NULL) {
1917 		oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1918 				(unsigned long int)ino,
1919 				(uint64_t)size,
1920 				(uint64_t)off,
1921 				lizardfs_error_string(LIZARDFS_ERROR_EBADF));
1922 		throw RequestException(LIZARDFS_ERROR_EBADF);
1923 	}
1924 	if (off>=MAX_FILE_SIZE || off+size>=MAX_FILE_SIZE) {
1925 		oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1926 				(unsigned long int)ino,
1927 				(uint64_t)size,
1928 				(uint64_t)off,
1929 				lizardfs_error_string(LIZARDFS_ERROR_EFBIG));
1930 		throw RequestException(LIZARDFS_ERROR_EFBIG);
1931 	}
1932 	try {
1933 		const SteadyTimePoint deadline = SteadyClock::now() + std::chrono::seconds(30);
1934 		uint8_t status = gLocalIoLimiter().waitForRead(ctx.pid, size, deadline);
1935 		if (status == LIZARDFS_STATUS_OK) {
1936 			status = gGlobalIoLimiter().waitForRead(ctx.pid, size, deadline);
1937 		}
1938 		if (status != LIZARDFS_STATUS_OK) {
1939 			err = (status == LIZARDFS_ERROR_EPERM ? LIZARDFS_ERROR_EPERM : LIZARDFS_ERROR_IO);
1940 			oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1941 					(unsigned long int)ino,
1942 					(uint64_t)size,
1943 					(uint64_t)off,
1944 					lizardfs_error_string(err));
1945 			throw RequestException(err);
1946 		}
1947 	} catch (Exception& ex) {
1948 		lzfs_pretty_syslog(LOG_WARNING, "I/O limiting error: %s", ex.what());
1949 		throw RequestException(LIZARDFS_ERROR_IO);
1950 	}
1951 	PthreadMutexWrapper lock(fileinfo->lock);
1952 	PthreadMutexWrapper flushlock(fileinfo->flushlock);
1953 	if (fileinfo->mode==IO_WRITEONLY) {
1954 		oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1955 				(unsigned long int)ino,
1956 				(uint64_t)size,
1957 				(uint64_t)off,
1958 				lizardfs_error_string(LIZARDFS_ERROR_EACCES));
1959 		throw RequestException(LIZARDFS_ERROR_EACCES);
1960 	}
1961 	if (fileinfo->mode==IO_WRITE) {
1962 		err = write_data_flush(fileinfo->data);
1963 		if (err != LIZARDFS_STATUS_OK) {
1964 			oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1965 					(unsigned long int)ino,
1966 					(uint64_t)size,
1967 					(uint64_t)off,
1968 					lizardfs_error_string(err));
1969 			throw RequestException(err);
1970 		}
1971 		write_data_end(fileinfo->data);
1972 	}
1973 	if (fileinfo->mode==IO_WRITE || fileinfo->mode==IO_NONE) {
1974 		fileinfo->mode = IO_READ;
1975 		fileinfo->data = read_data_new(ino);
1976 	}
1977 	// end of reader critical section
1978 	flushlock.unlock();
1979 
1980 	write_data_flush_inode(ino);
1981 
1982 	uint64_t firstBlockToRead = off / MFSBLOCKSIZE;
1983 	uint64_t firstBlockNotToRead = (off + size + MFSBLOCKSIZE - 1) / MFSBLOCKSIZE;
1984 	uint64_t alignedOffset = firstBlockToRead * MFSBLOCKSIZE;
1985 	uint64_t alignedSize = (firstBlockNotToRead - firstBlockToRead) * MFSBLOCKSIZE;
1986 
1987 	uint32_t ssize = alignedSize;
1988 
1989 	err = read_data(fileinfo->data, alignedOffset, ssize, ret);
1990 	ssize = ret.requestSize(alignedOffset, ssize);
1991 	if (err != LIZARDFS_STATUS_OK) {
1992 		oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): %s",
1993 				(unsigned long int)ino,
1994 				(uint64_t)size,
1995 				(uint64_t)off,
1996 				lizardfs_error_string(err));
1997 		throw RequestException(err);
1998 	} else {
1999 		uint32_t replyOffset = off - alignedOffset;
2000 		if (ssize > replyOffset) {
2001 			ssize -= replyOffset;
2002 			if (ssize > size) {
2003 				ssize = size;
2004 			}
2005 		} else {
2006 			ssize = 0;
2007 		}
2008 		oplog_printf(ctx, "read (%lu,%" PRIu64 ",%" PRIu64 "): OK (%lu)",
2009 				(unsigned long int)ino,
2010 				(uint64_t)size,
2011 				(uint64_t)off,
2012 				(unsigned long int)ssize);
2013 	}
2014 	return ret;
2015 }
2016 
write(const Context & ctx,Inode ino,const char * buf,size_t size,off_t off,FileInfo * fi)2017 BytesWritten write(const Context &ctx, Inode ino, const char *buf, size_t size, off_t off,
2018 			FileInfo *fi) {
2019 	finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
2020 	int err;
2021 
2022 	stats_inc(OP_WRITE);
2023 	if (debug_mode) {
2024 		oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 ") ...",
2025 				(unsigned long int)ino,
2026 				(uint64_t)size,
2027 				(uint64_t)off);
2028 	}
2029 
2030 	if (IS_SPECIAL_INODE(ino)) {
2031 		return special_write(ino, ctx, buf, size, off, fi);
2032 	}
2033 
2034 	if (fileinfo==NULL) {
2035 		oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): %s",
2036 				(unsigned long int)ino,
2037 				(uint64_t)size,
2038 				(uint64_t)off,
2039 				lizardfs_error_string(LIZARDFS_ERROR_EBADF));
2040 		throw RequestException(LIZARDFS_ERROR_EBADF);
2041 	}
2042 	if (off>=MAX_FILE_SIZE || off+size>=MAX_FILE_SIZE) {
2043 		oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): %s",
2044 				(unsigned long int)ino,
2045 				(uint64_t)size,
2046 				(uint64_t)off,
2047 				lizardfs_error_string(LIZARDFS_ERROR_EFBIG));
2048 		throw RequestException(LIZARDFS_ERROR_EFBIG);
2049 	}
2050 	try {
2051 		const SteadyTimePoint deadline = SteadyClock::now() + std::chrono::seconds(30);
2052 		uint8_t status = gLocalIoLimiter().waitForWrite(ctx.pid, size, deadline);
2053 		if (status == LIZARDFS_STATUS_OK) {
2054 			status = gGlobalIoLimiter().waitForWrite(ctx.pid, size, deadline);
2055 		}
2056 		if (status != LIZARDFS_STATUS_OK) {
2057 			err = status == LIZARDFS_ERROR_EPERM ? LIZARDFS_ERROR_EPERM : LIZARDFS_ERROR_IO;
2058 			oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): %s",
2059 							(unsigned long int)ino,
2060 							(uint64_t)size,
2061 							(uint64_t)off,
2062 							lizardfs_error_string(err));
2063 			throw RequestException(err);
2064 		}
2065 	} catch (Exception& ex) {
2066 		lzfs_pretty_syslog(LOG_WARNING, "I/O limiting error: %s", ex.what());
2067 		throw RequestException(LIZARDFS_ERROR_IO);
2068 	}
2069 	PthreadMutexWrapper lock(fileinfo->lock);
2070 	if (fileinfo->mode==IO_READONLY) {
2071 		oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): %s",
2072 				(unsigned long int)ino,
2073 				(uint64_t)size,
2074 				(uint64_t)off,
2075 				lizardfs_error_string(LIZARDFS_ERROR_EACCES));
2076 		throw RequestException(LIZARDFS_ERROR_EACCES);
2077 	}
2078 	if (fileinfo->mode==IO_READ) {
2079 		read_data_end(fileinfo->data);
2080 		fileinfo->data = NULL;
2081 	}
2082 	if (fileinfo->mode==IO_READ || fileinfo->mode==IO_NONE) {
2083 		fileinfo->mode = IO_WRITE;
2084 		fileinfo->data = write_data_new(ino);
2085 	}
2086 	err = write_data(fileinfo->data,off,size,(const uint8_t*)buf);
2087 	gDirEntryCache.lockAndInvalidateInode(ino);
2088 	if (err != LIZARDFS_STATUS_OK) {
2089 		oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): %s",
2090 				(unsigned long int)ino,
2091 				(uint64_t)size,
2092 				(uint64_t)off,
2093 				lizardfs_error_string(err));
2094 		throw RequestException(err);
2095 	} else {
2096 		oplog_printf(ctx, "write (%lu,%" PRIu64 ",%" PRIu64 "): OK (%lu)",
2097 				(unsigned long int)ino,
2098 				(uint64_t)size,
2099 				(uint64_t)off,
2100 				(unsigned long int)size);
2101 		return size;
2102 	}
2103 }
2104 
flush(const Context & ctx,Inode ino,FileInfo * fi)2105 void flush(const Context &ctx, Inode ino, FileInfo* fi) {
2106 	finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
2107 	int err;
2108 
2109 	stats_inc(OP_FLUSH);
2110 	if (debug_mode) {
2111 		oplog_printf(ctx, "flush (%lu) ...",
2112 				(unsigned long int)ino);
2113 	}
2114 	if (IS_SPECIAL_INODE(ino)) {
2115 		oplog_printf(ctx, "flush (%lu): OK",
2116 				(unsigned long int)ino);
2117 		return;
2118 	}
2119 	if (fileinfo==NULL) {
2120 		oplog_printf(ctx, "flush (%lu): %s",
2121 				(unsigned long int)ino,
2122 				lizardfs_error_string(LIZARDFS_ERROR_EBADF));
2123 		throw RequestException(LIZARDFS_ERROR_EBADF);
2124 	}
2125 
2126 	err = LIZARDFS_STATUS_OK;
2127 	PthreadMutexWrapper lock(fileinfo->lock);
2128 	if (fileinfo->mode==IO_WRITE || fileinfo->mode==IO_WRITEONLY) {
2129 		err = write_data_flush(fileinfo->data);
2130 	}
2131 	lzfs_locks::FlockWrapper file_lock(lzfs_locks::kRelease,0,0,0);
2132 	auto use_posixlocks = fileinfo->use_posixlocks;
2133 	lock.unlock();
2134 	if (use_posixlocks) {
2135 		fs_setlk_send(ino, fi->lock_owner, 0, file_lock);
2136 	}
2137 	if (err != LIZARDFS_STATUS_OK) {
2138 		oplog_printf(ctx, "flush (%lu): %s",
2139 				(unsigned long int)ino,
2140 				lizardfs_error_string(err));
2141 		throw RequestException(err);
2142 	} else {
2143 		oplog_printf(ctx, "flush (%lu): OK",
2144 				(unsigned long int)ino);
2145 	}
2146 }
2147 
fsync(const Context & ctx,Inode ino,int datasync,FileInfo * fi)2148 void fsync(const Context &ctx, Inode ino, int datasync, FileInfo* fi) {
2149 	finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
2150 	int err;
2151 
2152 	stats_inc(OP_FSYNC);
2153 	if (debug_mode) {
2154 		oplog_printf(ctx, "fsync (%lu,%d) ...",
2155 				(unsigned long int)ino,
2156 				datasync);
2157 	}
2158 	if (IS_SPECIAL_INODE(ino)) {
2159 		oplog_printf(ctx, "fsync (%lu,%d): OK",
2160 				(unsigned long int)ino,
2161 				datasync);
2162 		return;
2163 	}
2164 	if (fileinfo==NULL) {
2165 		oplog_printf(ctx, "fsync (%lu,%d): %s",
2166 				(unsigned long int)ino,
2167 				datasync,
2168 				lizardfs_error_string(LIZARDFS_ERROR_EBADF));
2169 		throw RequestException(LIZARDFS_ERROR_EBADF);
2170 	}
2171 	err = LIZARDFS_STATUS_OK;
2172 	PthreadMutexWrapper lock(fileinfo->lock);
2173 	if (fileinfo->mode==IO_WRITE || fileinfo->mode==IO_WRITEONLY) {
2174 		err = write_data_flush(fileinfo->data);
2175 	}
2176 	if (err != LIZARDFS_STATUS_OK) {
2177 		oplog_printf(ctx, "fsync (%lu,%d): %s",
2178 				(unsigned long int)ino,
2179 				datasync,
2180 				lizardfs_error_string(err));
2181 		throw RequestException(err);
2182 	} else {
2183 		oplog_printf(ctx, "fsync (%lu,%d): OK",
2184 				(unsigned long int)ino,
2185 				datasync);
2186 	}
2187 }
2188 
2189 namespace {
2190 
2191 class XattrHandler {
2192 public:
~XattrHandler()2193 	virtual ~XattrHandler() {}
2194 
2195 	/*
2196 	 * handler for request to set an extended attribute
2197 	 * mode - one of XATTR_SMODE_*
2198 	 * returns status
2199 	 */
2200 	virtual uint8_t setxattr(const Context& ctx, Inode ino, const char *name,
2201 			uint32_t nleng, const char *value, size_t size, int mode) = 0;
2202 
2203 	/*
2204 	 * handler for request to get an extended attribute
2205 	 * mode - one of XATTR_GMODE_*
2206 	 * returns status and:
2207 	 * * sets value is mode is XATTR_GMODE_GET_DATA
2208 	 * * sets valueLength is mode is XATTR_GMODE_LENGTH_ONLY
2209 	 */
2210 	virtual uint8_t getxattr(const Context& ctx, Inode ino, const char *name,
2211 			uint32_t nleng, int mode, uint32_t& valueLength, std::vector<uint8_t>& value) = 0;
2212 
2213 	/*
2214 	 * handler for request to remove an extended attribute
2215 	 * returns status
2216 	 */
2217 	virtual uint8_t removexattr(const Context& ctx, Inode ino, const char *name,
2218 			uint32_t nleng) = 0;
2219 };
2220 
2221 class PlainXattrHandler : public XattrHandler {
2222 public:
setxattr(const Context & ctx,Inode ino,const char * name,uint32_t nleng,const char * value,size_t size,int mode)2223 	uint8_t setxattr(const Context& ctx, Inode ino, const char *name,
2224 		uint32_t nleng, const char *value, size_t size, int mode) override {
2225 		uint8_t status;
2226 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2227 			fs_setxattr(ino, 0, ctx.uid, ctx.gid, nleng, (const uint8_t*)name,
2228 				(uint32_t)size, (const uint8_t*)value, mode));
2229 		return status;
2230 	}
2231 
getxattr(const Context & ctx,Inode ino,const char * name,uint32_t nleng,int mode,uint32_t & valueLength,std::vector<uint8_t> & value)2232 	uint8_t getxattr(const Context& ctx, Inode ino, const char *name,
2233 		uint32_t nleng, int mode, uint32_t& valueLength, std::vector<uint8_t>& value) override {
2234 		const uint8_t *buff;
2235 		uint8_t status;
2236 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2237 			fs_getxattr(ino, 0, ctx.uid, ctx.gid, nleng, (const uint8_t*)name,
2238 				mode, &buff, &valueLength));
2239 		if (mode == XATTR_GMODE_GET_DATA && status == LIZARDFS_STATUS_OK) {
2240 			value = std::vector<uint8_t>(buff, buff + valueLength);
2241 		}
2242 		return status;
2243 	}
2244 
removexattr(const Context & ctx,Inode ino,const char * name,uint32_t nleng)2245 	uint8_t removexattr(const Context& ctx, Inode ino, const char *name,
2246 			uint32_t nleng) override {
2247 		uint8_t status;
2248 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2249 			fs_removexattr(ino, 0, ctx.uid, ctx.gid, nleng, (const uint8_t*)name));
2250 		return status;
2251 	}
2252 };
2253 
2254 class ErrorXattrHandler : public XattrHandler {
2255 public:
ErrorXattrHandler(uint8_t error)2256 	ErrorXattrHandler(uint8_t error) : error_(error) {}
setxattr(const Context &,Inode,const char *,uint32_t,const char *,size_t,int)2257 	uint8_t setxattr(const Context&, Inode, const char *,
2258 			uint32_t, const char *, size_t, int) override {
2259 		return error_;
2260 	}
2261 
getxattr(const Context &,Inode,const char *,uint32_t,int,uint32_t &,std::vector<uint8_t> &)2262 	uint8_t getxattr(const Context&, Inode, const char *,
2263 			uint32_t, int, uint32_t&, std::vector<uint8_t>&) override {
2264 		return error_;
2265 	}
2266 
removexattr(const Context &,Inode,const char *,uint32_t)2267 	uint8_t removexattr(const Context&, Inode, const char *,
2268 			uint32_t) override {
2269 		return error_;
2270 	}
2271 private:
2272 	uint8_t error_;
2273 };
2274 
2275 class PosixAclXattrHandler : public XattrHandler {
2276 public:
PosixAclXattrHandler(AclType type)2277 	PosixAclXattrHandler(AclType type) : type_(type) { }
2278 
setxattr(const Context & ctx,Inode ino,const char *,uint32_t,const char * value,size_t size,int)2279 	uint8_t setxattr(const Context& ctx, Inode ino, const char *,
2280 			uint32_t, const char *value, size_t size, int) override {
2281 		static constexpr size_t kEmptyAclSize = 4;
2282 		AccessControlList posix_acl;
2283 		try {
2284 			if (size <= kEmptyAclSize) {
2285 				uint8_t status;
2286 				RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2287 					fs_deletacl(ino, ctx.uid, ctx.gid, type_));
2288 				return status;
2289 			}
2290 			posix_acl = aclConverter::extractAclObject((const uint8_t*)value, size);
2291 		} catch (Exception&) {
2292 			return LIZARDFS_ERROR_EINVAL;
2293 		}
2294 		uint8_t status;
2295 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2296 			fs_setacl(ino, ctx.uid, ctx.gid, type_, posix_acl));
2297 		eraseAclCache(ino);
2298 		gDirEntryCache.lockAndInvalidateInode(ino);
2299 		return status;
2300 	}
2301 
getxattr(const Context & ctx,Inode ino,const char *,uint32_t,int,uint32_t & valueLength,std::vector<uint8_t> & value)2302 	uint8_t getxattr(const Context& ctx, Inode ino, const char *,
2303 			uint32_t, int /*mode*/, uint32_t& valueLength, std::vector<uint8_t>& value) override {
2304 		try {
2305 			AclCacheEntry cacheEntry = acl_cache->get(clock_.now(), ino, ctx.uid, ctx.gid);
2306 			if (cacheEntry) {
2307 				std::pair<bool, AccessControlList> posix_acl;
2308 				if (type_ == AclType::kAccess) {
2309 					posix_acl = cacheEntry->acl.convertToPosixACL();
2310 				} else {
2311 					posix_acl = cacheEntry->acl.convertToDefaultPosixACL();
2312 				}
2313 				if (!posix_acl.first) {
2314 					return LIZARDFS_ERROR_ENOATTR;
2315 				}
2316 				value = aclConverter::aclObjectToXattr(posix_acl.second);
2317 				valueLength = value.size();
2318 				return LIZARDFS_STATUS_OK;
2319 			} else {
2320 				return LIZARDFS_ERROR_ENOATTR;
2321 			}
2322 		} catch (AclAcquisitionException &e) {
2323 			sassert((e.status() != LIZARDFS_STATUS_OK) && (e.status() != LIZARDFS_ERROR_ENOATTR));
2324 			return e.status();
2325 		} catch (Exception &) {
2326 			lzfs_pretty_syslog(LOG_WARNING, "Failed to convert ACL to xattr, looks like a bug");
2327 			return LIZARDFS_ERROR_IO;
2328 		}
2329 	}
2330 
removexattr(const Context & ctx,Inode ino,const char *,uint32_t)2331 	uint8_t removexattr(const Context& ctx, Inode ino, const char *,
2332 			uint32_t) override {
2333 		uint8_t status;
2334 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2335 			fs_deletacl(ino, ctx.uid, ctx.gid, type_));
2336 		eraseAclCache(ino);
2337 		return status;
2338 	}
2339 
2340 private:
2341 	AclType type_;
2342 	SteadyClock clock_;
2343 };
2344 
2345 class NFSAclXattrHandler : public XattrHandler {
2346 public:
NFSAclXattrHandler()2347 	NFSAclXattrHandler() { }
2348 
setxattr(const Context & ctx,Inode ino,const char *,uint32_t,const char * value,size_t size,int)2349 	uint8_t setxattr(const Context& ctx, Inode ino, const char *,
2350 			uint32_t, const char *value, size_t size, int) override {
2351 		uint8_t status = LIZARDFS_STATUS_OK;
2352 		RichACL acl = richAclConverter::extractObjectFromNFS((uint8_t *)value, size);
2353 
2354 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2355 			fs_setacl(ino, ctx.uid, ctx.gid, acl));
2356 		eraseAclCache(ino);
2357 		gDirEntryCache.lockAndInvalidateInode(ino);
2358 		return status;
2359 	}
2360 
getxattr(const Context & ctx,Inode ino,const char *,uint32_t,int,uint32_t & valueLength,std::vector<uint8_t> & value)2361 	uint8_t getxattr(const Context& ctx, Inode ino, const char *,
2362 			uint32_t, int, uint32_t& valueLength, std::vector<uint8_t>& value) override {
2363 		try {
2364 			AclCacheEntry cache_entry = acl_cache->get(clock_.now(), ino, ctx.uid, ctx.gid);
2365 			if (cache_entry) {
2366 				value = richAclConverter::objectToNFSXattr(cache_entry->acl, cache_entry->owner_id);
2367 				valueLength = value.size();
2368 			} else {
2369 				valueLength = 4;
2370 				value.assign(valueLength, 0);
2371 			}
2372 			return LIZARDFS_STATUS_OK;
2373 		} catch (AclAcquisitionException& e) {
2374 			sassert((e.status() != LIZARDFS_STATUS_OK) && (e.status() != LIZARDFS_ERROR_ENOATTR));
2375 			return e.status();
2376 		} catch (Exception&) {
2377 			lzfs_pretty_syslog(LOG_WARNING, "Failed to convert ACL to xattr, looks like a bug");
2378 			return LIZARDFS_ERROR_IO;
2379 		}
2380 	}
2381 
removexattr(const Context & ctx,Inode ino,const char *,uint32_t)2382 	uint8_t removexattr(const Context& ctx, Inode ino, const char *,
2383 			uint32_t) override {
2384 		uint8_t status;
2385 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2386 			fs_deletacl(ino, ctx.uid, ctx.gid, AclType::kRichACL));
2387 		eraseAclCache(ino);
2388 		return status;
2389 	}
2390 private:
2391 	SteadyClock clock_;
2392 };
2393 
2394 class RichAclXattrHandler : public XattrHandler {
2395 public:
RichAclXattrHandler()2396 	RichAclXattrHandler() { }
2397 
setxattr(const Context & ctx,Inode ino,const char *,uint32_t,const char * value,size_t size,int)2398 	uint8_t setxattr(const Context& ctx, Inode ino, const char *,
2399 			uint32_t, const char *value, size_t size, int) override {
2400 		uint8_t status = LIZARDFS_STATUS_OK;
2401 		RichACL acl = richAclConverter::extractObjectFromRichACL((uint8_t *)value, size);
2402 
2403 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2404 			fs_setacl(ino, ctx.uid, ctx.gid, acl));
2405 		eraseAclCache(ino);
2406 		gDirEntryCache.lockAndInvalidateInode(ino);
2407 		return status;
2408 	}
2409 
getxattr(const Context & ctx,Inode ino,const char *,uint32_t,int,uint32_t & valueLength,std::vector<uint8_t> & value)2410 	uint8_t getxattr(const Context& ctx, Inode ino, const char *,
2411 			uint32_t, int, uint32_t& valueLength, std::vector<uint8_t>& value) override {
2412 		try {
2413 			AclCacheEntry cache_entry = acl_cache->get(clock_.now(), ino, ctx.uid, ctx.gid);
2414 			if (cache_entry) {
2415 				value = richAclConverter::objectToRichACLXattr(cache_entry->acl);
2416 				valueLength = value.size();
2417 				return LIZARDFS_STATUS_OK;
2418 			} else {
2419 				return LIZARDFS_ERROR_ENOATTR;
2420 			}
2421 		} catch (AclAcquisitionException& e) {
2422 			sassert((e.status() != LIZARDFS_STATUS_OK) && (e.status() != LIZARDFS_ERROR_ENOATTR));
2423 			return e.status();
2424 		} catch (Exception&) {
2425 			lzfs_pretty_syslog(LOG_WARNING, "Failed to convert ACL to xattr, looks like a bug");
2426 			return LIZARDFS_ERROR_IO;
2427 		}
2428 	}
2429 
removexattr(const Context & ctx,Inode ino,const char *,uint32_t)2430 	uint8_t removexattr(const Context& ctx, Inode ino, const char *,
2431 			uint32_t) override {
2432 		uint8_t status;
2433 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2434 			fs_deletacl(ino, ctx.uid, ctx.gid, AclType::kRichACL));
2435 		eraseAclCache(ino);
2436 		return status;
2437 	}
2438 private:
2439 	SteadyClock clock_;
2440 };
2441 
2442 #ifdef __APPLE__
2443 class OsxAclXattrHandler : public XattrHandler {
2444 public:
OsxAclXattrHandler()2445 	OsxAclXattrHandler() {}
2446 
setxattr(const Context & ctx,Inode ino,const char *,uint32_t,const char * value,size_t size,int)2447 	uint8_t setxattr(const Context& ctx, Inode ino, const char *,
2448 			uint32_t, const char *value, size_t size, int) override {
2449 		static constexpr size_t kEmptyAclSize = 4;
2450 		if (size <= kEmptyAclSize) {
2451 			return LIZARDFS_ERROR_EINVAL;
2452 		}
2453 		RichACL result;
2454 		try {
2455 			AclCacheEntry cache_entry = acl_cache->get(clock_.now(), ino, ctx.uid, ctx.gid);
2456 			result = osxAclConverter::extractAclObject((const uint8_t*)value, size);
2457 		} catch (RequestException &e) {
2458 			return e.lizardfs_error_code;
2459 		} catch (Exception&) {
2460 			return LIZARDFS_ERROR_EINVAL;
2461 		}
2462 		uint8_t status = LIZARDFS_STATUS_OK;
2463 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2464 			fs_setacl(ino, ctx.uid, ctx.gid, result));
2465 		eraseAclCache(ino);
2466 		gDirEntryCache.lockAndInvalidateInode(ino);
2467 		return status;
2468 	}
2469 
getxattr(const Context & ctx,Inode ino,const char *,uint32_t,int,uint32_t & valueLength,std::vector<uint8_t> & value)2470 	uint8_t getxattr(const Context& ctx, Inode ino, const char *,
2471 			uint32_t, int /*mode*/, uint32_t& valueLength, std::vector<uint8_t>& value) override {
2472 		try {
2473 			auto ts = clock_.now();
2474 			AclCacheEntry cache_entry = acl_cache->get(ts, ino, ctx.uid, ctx.gid);
2475 			if (cache_entry) {
2476 				value = osxAclConverter::objectToOsxXattr(cache_entry->acl);
2477 				valueLength = value.size();
2478 				return LIZARDFS_STATUS_OK;
2479 			} else {
2480 				return LIZARDFS_ERROR_ENOATTR;
2481 			}
2482 		} catch (AclAcquisitionException& e) {
2483 			sassert((e.status() != LIZARDFS_STATUS_OK) && (e.status() != LIZARDFS_ERROR_ENOATTR));
2484 			return e.status();
2485 		} catch (RequestException &e) {
2486 			return e.lizardfs_error_code;
2487 		} catch (Exception&) {
2488 			lzfs_pretty_syslog(LOG_WARNING, "Failed to convert ACL to xattr, looks like a bug");
2489 			return LIZARDFS_ERROR_IO;
2490 		}
2491 		valueLength = 0;
2492 	}
2493 
removexattr(const Context & ctx,Inode ino,const char *,uint32_t)2494 	uint8_t removexattr(const Context& ctx, Inode ino, const char *,
2495 			uint32_t) override {
2496 		uint8_t status;
2497 		RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2498 			fs_deletacl(ino, ctx.uid, ctx.gid, AclType::kRichACL));
2499 		eraseAclCache(ino);
2500 		return status;
2501 	}
2502 
2503 private:
2504 	SteadyClock clock_;
2505 };
2506 
2507 #endif
2508 
2509 } // anonymous namespace
2510 
2511 static PosixAclXattrHandler accessAclXattrHandler(AclType::kAccess);
2512 static PosixAclXattrHandler defaultAclXattrHandler(AclType::kDefault);
2513 static NFSAclXattrHandler nfsAclXattrHandler;
2514 static RichAclXattrHandler richAclXattrHandler;
2515 #ifdef __APPLE__
2516 static OsxAclXattrHandler osxAclXattrHandler;
2517 #endif
2518 
2519 static ErrorXattrHandler enotsupXattrHandler(LIZARDFS_ERROR_ENOTSUP);
2520 static PlainXattrHandler plainXattrHandler;
2521 
2522 static std::map<std::string, XattrHandler*> xattr_handlers = {
2523 	{"system.posix_acl_access", &accessAclXattrHandler},
2524 	{"system.posix_acl_default", &defaultAclXattrHandler},
2525 	{"system.nfs4_acl", &nfsAclXattrHandler},
2526 	{"system.richacl", &richAclXattrHandler},
2527 	{"security.capability", &enotsupXattrHandler},
2528 #ifdef __APPLE__
2529 	{"com.apple.system.Security", &osxAclXattrHandler},
2530 #endif
2531 };
2532 
choose_xattr_handler(const char * name)2533 static XattrHandler* choose_xattr_handler(const char *name) {
2534 	try {
2535 		return xattr_handlers.at(name);
2536 	} catch (std::out_of_range&) {
2537 		return &plainXattrHandler;
2538 	}
2539 }
2540 
setxattr(const Context & ctx,Inode ino,const char * name,const char * value,size_t size,int flags,uint32_t position)2541 void setxattr(const Context &ctx, Inode ino, const char *name, const char *value,
2542 			size_t size, int flags, uint32_t position) {
2543 	uint32_t nleng;
2544 	int status;
2545 	uint8_t mode;
2546 
2547 
2548 	stats_inc(OP_SETXATTR);
2549 	if (debug_mode) {
2550 		oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d) ...",
2551 				(unsigned long int)ino,
2552 				name,
2553 				(uint64_t)size,
2554 				flags);
2555 	}
2556 	if (IS_SPECIAL_INODE(ino)) {
2557 		oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2558 				(unsigned long int)ino,
2559 				name,
2560 				(uint64_t)size,
2561 				flags,
2562 				lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2563 		throw RequestException(LIZARDFS_ERROR_EPERM);
2564 	}
2565 	if (size>MFS_XATTR_SIZE_MAX) {
2566 #if defined(__APPLE__)
2567 		// Mac OS X returns E2BIG here
2568 		oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2569 				(unsigned long int)ino,
2570 				name,
2571 				(uint64_t)size,
2572 				flags,
2573 				lizardfs_error_string(LIZARDFS_ERROR_E2BIG));
2574 		throw RequestException(LIZARDFS_ERROR_E2BIG);
2575 #else
2576 		oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2577 				(unsigned long int)ino,
2578 				name,
2579 				(uint64_t)size,
2580 				flags,
2581 				lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2582 		throw RequestException(LIZARDFS_ERROR_ERANGE);
2583 #endif
2584 	}
2585 	nleng = strlen(name);
2586 	if (nleng>MFS_XATTR_NAME_MAX) {
2587 #if defined(__APPLE__)
2588 		// Mac OS X returns EPERM here
2589 		oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2590 				(unsigned long int)ino,
2591 				name,
2592 				(uint64_t)size,
2593 				flags,
2594 				lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2595 		throw RequestException(LIZARDFS_ERROR_EPERM);
2596 #else
2597 		oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2598 				(unsigned long int)ino,
2599 				name,
2600 				(uint64_t)size,
2601 				flags,
2602 				lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2603 		throw RequestException(LIZARDFS_ERROR_ERANGE);
2604 #endif
2605 	}
2606 	if (nleng==0) {
2607 		oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2608 				(unsigned long int)ino,
2609 				name,
2610 				(uint64_t)size,
2611 				flags,
2612 				lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2613 		throw RequestException(LIZARDFS_ERROR_EINVAL);
2614 	}
2615 	if (strcmp(name,"security.capability")==0) {
2616 		oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2617 				(unsigned long int)ino,
2618 				name,
2619 				(uint64_t)size,
2620 				flags,
2621 				lizardfs_error_string(LIZARDFS_ERROR_ENOTSUP));
2622 		throw RequestException(LIZARDFS_ERROR_ENOTSUP);
2623 	}
2624 #if defined(XATTR_CREATE) && defined(XATTR_REPLACE)
2625 	if ((flags&XATTR_CREATE) && (flags&XATTR_REPLACE)) {
2626 		oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2627 				(unsigned long int)ino,
2628 				name,
2629 				(uint64_t)size,
2630 				flags,
2631 				lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2632 		throw RequestException(LIZARDFS_ERROR_EINVAL);
2633 	}
2634 	mode = (flags==XATTR_CREATE)?XATTR_SMODE_CREATE_ONLY:(flags==XATTR_REPLACE)?XATTR_SMODE_REPLACE_ONLY:XATTR_SMODE_CREATE_OR_REPLACE;
2635 #else
2636 	mode = 0;
2637 #endif
2638 	(void)position;
2639 	status = choose_xattr_handler(name)->setxattr(ctx, ino, name, nleng, value, size, mode);
2640 	if (status != LIZARDFS_STATUS_OK) {
2641 		oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): %s",
2642 				(unsigned long int)ino,
2643 				name,
2644 				(uint64_t)size,
2645 				flags,
2646 				lizardfs_error_string(status));
2647 		throw RequestException(status);
2648 	}
2649 	oplog_printf(ctx, "setxattr (%lu,%s,%" PRIu64 ",%d): OK",
2650 			(unsigned long int)ino,
2651 			name,
2652 			(uint64_t)size,
2653 			flags);
2654 }
2655 
getxattr(const Context & ctx,Inode ino,const char * name,size_t size,uint32_t position)2656 XattrReply getxattr(const Context &ctx, Inode ino, const char *name, size_t size, uint32_t position) {
2657 	uint32_t nleng;
2658 	int status;
2659 	uint8_t mode;
2660 	std::vector<uint8_t> buffer;
2661 	const uint8_t *buff;
2662 	uint32_t leng;
2663 
2664 
2665 	stats_inc(OP_GETXATTR);
2666 	if (debug_mode) {
2667 		oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 ") ...",
2668 				(unsigned long int)ino,
2669 				name,
2670 				(uint64_t)size);
2671 	}
2672 	if (IS_SPECIAL_INODE(ino)) {
2673 		oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2674 				(unsigned long int)ino,
2675 				name,
2676 				(uint64_t)size,
2677 				lizardfs_error_string(LIZARDFS_ERROR_ENODATA));
2678 		throw RequestException(LIZARDFS_ERROR_ENODATA);
2679 	}
2680 	nleng = strlen(name);
2681 	if (nleng>MFS_XATTR_NAME_MAX) {
2682 #if defined(__APPLE__)
2683 		// Mac OS X returns EPERM here
2684 		oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2685 				(unsigned long int)ino,
2686 				name,
2687 				(uint64_t)size,
2688 				lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2689 		throw RequestException(LIZARDFS_ERROR_EPERM);
2690 #else
2691 		oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2692 				(unsigned long int)ino,
2693 				name,
2694 				(uint64_t)size,
2695 				lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2696 		throw RequestException(LIZARDFS_ERROR_ERANGE);
2697 #endif
2698 	}
2699 	if (nleng==0) {
2700 		oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2701 				(unsigned long int)ino,
2702 				name,
2703 				(uint64_t)size,
2704 				lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2705 		throw RequestException(LIZARDFS_ERROR_EINVAL);
2706 	}
2707 	if (strcmp(name,"security.capability")==0) {
2708 		oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2709 				(unsigned long int)ino,
2710 				name,
2711 				(uint64_t)size,
2712 				lizardfs_error_string(LIZARDFS_ERROR_ENOTSUP));
2713 		throw RequestException(LIZARDFS_ERROR_ENOTSUP);
2714 	}
2715 	if (size==0) {
2716 		mode = XATTR_GMODE_LENGTH_ONLY;
2717 	} else {
2718 		mode = XATTR_GMODE_GET_DATA;
2719 	}
2720 	(void)position;
2721 	status = choose_xattr_handler(name)->getxattr(ctx, ino, name, nleng, mode, leng, buffer);
2722 	buff = buffer.data();
2723 	if (status != LIZARDFS_STATUS_OK) {
2724 		oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2725 				(unsigned long int)ino,
2726 				name,
2727 				(uint64_t)size,
2728 				lizardfs_error_string(status));
2729 		throw RequestException(status);
2730 	}
2731 	if (size==0) {
2732 		oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): OK (%" PRIu32 ")",
2733 				(unsigned long int)ino,
2734 				name,
2735 				(uint64_t)size,
2736 				leng);
2737 		return XattrReply{leng, {}};
2738 	} else {
2739 		if (leng>size) {
2740 			oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): %s",
2741 					(unsigned long int)ino,
2742 					name,
2743 					(uint64_t)size,
2744 					lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2745 			throw RequestException(LIZARDFS_ERROR_ERANGE);
2746 		} else {
2747 			oplog_printf(ctx, "getxattr (%lu,%s,%" PRIu64 "): OK (%" PRIu32 ")",
2748 					(unsigned long int)ino,
2749 					name,
2750 					(uint64_t)size,
2751 					leng);
2752 			return XattrReply{leng, std::vector<uint8_t>(buff, buff + leng)};
2753 		}
2754 	}
2755 }
2756 
listxattr(const Context & ctx,Inode ino,size_t size)2757 XattrReply listxattr(const Context &ctx, Inode ino, size_t size) {
2758 	const uint8_t *buff;
2759 	uint32_t leng;
2760 	int status;
2761 	uint8_t mode;
2762 
2763 	stats_inc(OP_LISTXATTR);
2764 	if (debug_mode) {
2765 		oplog_printf(ctx, "listxattr (%lu,%" PRIu64 ") ...",
2766 				(unsigned long int)ino,
2767 				(uint64_t)size);
2768 	}
2769 	if (IS_SPECIAL_INODE(ino)) {
2770 		oplog_printf(ctx, "listxattr (%lu,%" PRIu64 "): %s",
2771 				(unsigned long int)ino,
2772 				(uint64_t)size,
2773 				lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2774 		throw RequestException(LIZARDFS_ERROR_EPERM);
2775 	}
2776 	if (size==0) {
2777 		mode = XATTR_GMODE_LENGTH_ONLY;
2778 	} else {
2779 		mode = XATTR_GMODE_GET_DATA;
2780 	}
2781 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
2782 		fs_listxattr(ino,0,ctx.uid,ctx.gid,mode,&buff,&leng));
2783 	if (status != LIZARDFS_STATUS_OK) {
2784 		oplog_printf(ctx, "listxattr (%lu,%" PRIu64 "): %s",
2785 				(unsigned long int)ino,
2786 				(uint64_t)size,
2787 				lizardfs_error_string(status));
2788 		throw RequestException(status);
2789 	}
2790 	if (size==0) {
2791 		oplog_printf(ctx, "listxattr (%lu,%" PRIu64 "): OK (%" PRIu32 ")",
2792 				(unsigned long int)ino,
2793 				(uint64_t)size,
2794 				leng);
2795 		return XattrReply{leng, {}};
2796 	} else {
2797 		if (leng>size) {
2798 			oplog_printf(ctx, "listxattr (%lu,%" PRIu64 "): %s",
2799 					(unsigned long int)ino,
2800 					(uint64_t)size,
2801 					lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2802 			throw RequestException(LIZARDFS_ERROR_ERANGE);
2803 		} else {
2804 			oplog_printf(ctx, "listxattr (%lu,%" PRIu64 "): OK (%" PRIu32 ")",
2805 					(unsigned long int)ino,
2806 					(uint64_t)size,
2807 					leng);
2808 			return XattrReply{leng, std::vector<uint8_t>(buff, buff + leng)};
2809 		}
2810 	}
2811 }
2812 
removexattr(const Context & ctx,Inode ino,const char * name)2813 void removexattr(const Context &ctx, Inode ino, const char *name) {
2814 	uint32_t nleng;
2815 	int status;
2816 
2817 	stats_inc(OP_REMOVEXATTR);
2818 	if (debug_mode) {
2819 		oplog_printf(ctx, "removexattr (%lu,%s) ...",
2820 				(unsigned long int)ino,
2821 				name);
2822 	}
2823 	if (IS_SPECIAL_INODE(ino)) {
2824 		oplog_printf(ctx, "removexattr (%lu,%s): %s",
2825 				(unsigned long int)ino,
2826 				name,
2827 				lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2828 		throw RequestException(LIZARDFS_ERROR_EPERM);
2829 	}
2830 	nleng = strlen(name);
2831 	if (nleng>MFS_XATTR_NAME_MAX) {
2832 #if defined(__APPLE__)
2833 		// Mac OS X returns EPERM here
2834 		oplog_printf(ctx, "removexattr (%lu,%s): %s",
2835 				(unsigned long int)ino,
2836 				name,
2837 				lizardfs_error_string(LIZARDFS_ERROR_EPERM));
2838 		throw RequestException(LIZARDFS_ERROR_EPERM);
2839 #else
2840 		oplog_printf(ctx, "removexattr (%lu,%s): %s",
2841 				(unsigned long int)ino,
2842 				name,
2843 				lizardfs_error_string(LIZARDFS_ERROR_ERANGE));
2844 		throw RequestException(LIZARDFS_ERROR_ERANGE);
2845 #endif
2846 	}
2847 	if (nleng==0) {
2848 		oplog_printf(ctx, "removexattr (%lu,%s): %s",
2849 				(unsigned long int)ino,
2850 				name,
2851 				lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2852 		throw RequestException(LIZARDFS_ERROR_EINVAL);
2853 	}
2854 	status = choose_xattr_handler(name)->removexattr(ctx, ino, name, nleng);
2855 	if (status != LIZARDFS_STATUS_OK) {
2856 		oplog_printf(ctx, "removexattr (%lu,%s): %s",
2857 				(unsigned long int)ino,
2858 				name,
2859 				lizardfs_error_string(status));
2860 		throw RequestException(status);
2861 	} else {
2862 		oplog_printf(ctx, "removexattr (%lu,%s): OK",
2863 				(unsigned long int)ino,
2864 				name);
2865 	}
2866 }
2867 
flock_interrupt(const lzfs_locks::InterruptData & data)2868 void flock_interrupt(const lzfs_locks::InterruptData &data) {
2869 	fs_flock_interrupt(data);
2870 }
2871 
setlk_interrupt(const lzfs_locks::InterruptData & data)2872 void setlk_interrupt(const lzfs_locks::InterruptData &data) {
2873 	fs_setlk_interrupt(data);
2874 }
2875 
getlk(const Context & ctx,Inode ino,FileInfo * fi,struct lzfs_locks::FlockWrapper & lock)2876 void getlk(const Context &ctx, Inode ino, FileInfo* fi, struct lzfs_locks::FlockWrapper &lock) {
2877 	uint32_t status;
2878 
2879 	stats_inc(OP_FLOCK);
2880 	if (IS_SPECIAL_INODE(ino)) {
2881 		if (debug_mode) {
2882 			oplog_printf(ctx, "flock(ctx, %lu, fi): %s", (unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2883 		}
2884 		throw RequestException(LIZARDFS_ERROR_EINVAL);
2885 	}
2886 
2887 	if (!fi) {
2888 		if (debug_mode) {
2889 			oplog_printf(ctx,"flock(ctx, %lu, fi): %s",(unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2890 		}
2891 		throw RequestException(LIZARDFS_ERROR_EINVAL);
2892 	}
2893 
2894 	// communicate with master
2895 	status = fs_getlk(ino, fi->lock_owner, lock);
2896 
2897 	if (status) {
2898 		throw RequestException(status);
2899 	}
2900 }
2901 
setlk_send(const Context & ctx,Inode ino,FileInfo * fi,struct lzfs_locks::FlockWrapper & lock)2902 uint32_t setlk_send(const Context &ctx, Inode ino, FileInfo* fi, struct lzfs_locks::FlockWrapper &lock) {
2903 	uint32_t reqid;
2904 	uint32_t status;
2905 
2906 	stats_inc(OP_SETLK);
2907 	if (IS_SPECIAL_INODE(ino)) {
2908 		if (debug_mode) {
2909 			oplog_printf(ctx, "flock(ctx, %lu, fi): %s", (unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2910 		}
2911 		throw RequestException(LIZARDFS_ERROR_EINVAL);
2912 	}
2913 
2914 	if (!fi) {
2915 		if (debug_mode) {
2916 			oplog_printf(ctx,"flock(ctx, %lu, fi): %s",(unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2917 		}
2918 		throw RequestException(LIZARDFS_ERROR_EINVAL);
2919 	}
2920 
2921 	finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
2922 
2923 	// increase flock_id counter
2924 	lock_request_mutex.lock();
2925 	reqid = lock_request_counter++;
2926 	lock_request_mutex.unlock();
2927 
2928 	if (fileinfo != NULL) {
2929 		PthreadMutexWrapper lock(fileinfo->lock);
2930 		fileinfo->use_posixlocks = true;
2931 	}
2932 
2933 	// communicate with master
2934 	status = fs_setlk_send(ino, fi->lock_owner, reqid, lock);
2935 
2936 	if (status) {
2937 		throw RequestException(status);
2938 	}
2939 
2940 	return reqid;
2941 }
2942 
setlk_recv()2943 void setlk_recv() {
2944 	uint32_t status = fs_setlk_recv();
2945 
2946 	if (status) {
2947 		throw RequestException(status);
2948 	}
2949 }
2950 
flock_send(const Context & ctx,Inode ino,FileInfo * fi,int op)2951 uint32_t flock_send(const Context &ctx, Inode ino, FileInfo* fi, int op) {
2952 	uint32_t reqid;
2953 	uint32_t status;
2954 
2955 	stats_inc(OP_FLOCK);
2956 	if (IS_SPECIAL_INODE(ino)) {
2957 		if (debug_mode) {
2958 			oplog_printf(ctx, "flock(ctx, %lu, fi): %s", (unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2959 		}
2960 		throw RequestException(LIZARDFS_ERROR_EINVAL);
2961 	}
2962 
2963 	if (!fi) {
2964 		if (debug_mode) {
2965 			oplog_printf(ctx,"flock(ctx, %lu, fi): %s",(unsigned long int)ino, lizardfs_error_string(LIZARDFS_ERROR_EINVAL));
2966 		}
2967 		throw RequestException(LIZARDFS_ERROR_EINVAL);
2968 	}
2969 
2970 	finfo *fileinfo = reinterpret_cast<finfo*>(fi->fh);
2971 
2972 	// increase flock_id counter
2973 	lock_request_mutex.lock();
2974 	reqid = lock_request_counter++;
2975 	lock_request_mutex.unlock();
2976 
2977 	if (fileinfo != NULL) {
2978 		PthreadMutexWrapper lock(fileinfo->lock);
2979 		fileinfo->use_flocks = true;
2980 	}
2981 
2982 	// communicate with master
2983 	status = fs_flock_send(ino, fi->lock_owner, reqid, op);
2984 
2985 	if (status) {
2986 		throw RequestException(status);
2987 	}
2988 
2989 	return reqid;
2990 }
2991 
flock_recv()2992 void flock_recv() {
2993 	uint32_t status = fs_flock_recv();
2994 
2995 	if (status) {
2996 		throw RequestException(status);
2997 	}
2998 }
2999 
makesnapshot(const Context & ctx,Inode ino,Inode dst_parent,const std::string & dst_name,bool can_overwrite)3000 JobId makesnapshot(const Context &ctx, Inode ino, Inode dst_parent, const std::string &dst_name,
3001 	          bool can_overwrite) {
3002 	if (IS_SPECIAL_INODE(ino)) {
3003 		oplog_printf(ctx, "makesnapshot (%lu, %lu, %s): %s",
3004 				(unsigned long)ino, (unsigned long)dst_parent, dst_name.c_str(), strerr(EINVAL));
3005 		throw RequestException(EINVAL);
3006 	}
3007 
3008 	JobId job_id;
3009 	uint8_t status;
3010 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
3011 		fs_makesnapshot(ino, dst_parent, dst_name, ctx.uid, ctx.gid, can_overwrite, job_id));
3012 	if (status != LIZARDFS_STATUS_OK) {
3013 		throw RequestException(status);
3014 	}
3015 
3016 	return job_id;
3017 }
3018 
getgoal(const Context & ctx,Inode ino)3019 std::string getgoal(const Context &ctx, Inode ino) {
3020 	if (IS_SPECIAL_INODE(ino)) {
3021 		oplog_printf(ctx, "getgoal (%lu): %s",
3022 				(unsigned long)ino, strerr(EINVAL));
3023 		throw RequestException(EINVAL);
3024 	}
3025 
3026 	std::string goal;
3027 	uint8_t status = fs_getgoal(ino, goal);
3028 	if (status != LIZARDFS_STATUS_OK) {
3029 		throw RequestException(status);
3030 	}
3031 
3032 	return goal;
3033 }
3034 
setgoal(const Context & ctx,Inode ino,const std::string & goal_name,uint8_t smode)3035 void setgoal(const Context &ctx, Inode ino, const std::string &goal_name, uint8_t smode) {
3036 	if (IS_SPECIAL_INODE(ino)) {
3037 		oplog_printf(ctx, "setgoal (%lu, %s): %s",
3038 				(unsigned long)ino, goal_name.c_str(), strerr(EINVAL));
3039 		throw RequestException(EINVAL);
3040 	}
3041 
3042 	uint8_t status = fs_setgoal(ino, ctx.uid, goal_name, smode);
3043 	if (status != LIZARDFS_STATUS_OK) {
3044 		throw RequestException(status);
3045 	}
3046 }
3047 
statfs(uint64_t * totalspace,uint64_t * availspace,uint64_t * trashspace,uint64_t * reservedspace,uint32_t * inodes)3048 void statfs(uint64_t *totalspace, uint64_t *availspace, uint64_t *trashspace, uint64_t *reservedspace, uint32_t *inodes) {
3049 	fs_statfs(totalspace, availspace, trashspace, reservedspace, inodes);
3050 }
3051 
getchunksinfo(const Context & ctx,Inode ino,uint32_t chunk_index,uint32_t chunk_count)3052 std::vector<ChunkWithAddressAndLabel> getchunksinfo(const Context &ctx, Inode ino,
3053 	                                  uint32_t chunk_index, uint32_t chunk_count) {
3054 	if (IS_SPECIAL_INODE(ino)) {
3055 		oplog_printf(ctx, "getchunksinfo (%lu, %u, %u): %s",
3056 				(unsigned long)ino, (unsigned)chunk_index, (unsigned)chunk_count, strerr(EINVAL));
3057 		throw RequestException(EINVAL);
3058 	}
3059 	std::vector<ChunkWithAddressAndLabel> chunks;
3060 	uint8_t status;
3061 	RETRY_ON_ERROR_WITH_UPDATED_CREDENTIALS(status, ctx.gid,
3062 		fs_getchunksinfo(ctx.uid, ctx.gid, ino, chunk_index, chunk_count, chunks));
3063 	if (status != LIZARDFS_STATUS_OK) {
3064 		throw RequestException(status);
3065 	}
3066 	return chunks;
3067 }
3068 
getchunkservers()3069 std::vector<ChunkserverListEntry> getchunkservers() {
3070 	std::vector<ChunkserverListEntry> chunkservers;
3071 	uint8_t status = fs_getchunkservers(chunkservers);
3072 	if (status != LIZARDFS_STATUS_OK) {
3073 		throw RequestException(status);
3074 	}
3075 	return chunkservers;
3076 }
3077 
init(int debug_mode_,int keep_cache_,double direntry_cache_timeout_,unsigned direntry_cache_size_,double entry_cache_timeout_,double attr_cache_timeout_,int mkdir_copy_sgid_,SugidClearMode sugid_clear_mode_,bool use_rwlock_,double acl_cache_timeout_,unsigned acl_cache_size_)3078 void init(int debug_mode_, int keep_cache_, double direntry_cache_timeout_, unsigned direntry_cache_size_,
3079 		double entry_cache_timeout_, double attr_cache_timeout_, int mkdir_copy_sgid_,
3080 		SugidClearMode sugid_clear_mode_, bool use_rwlock_,
3081 		double acl_cache_timeout_, unsigned acl_cache_size_) {
3082 	debug_mode = debug_mode_;
3083 	keep_cache = keep_cache_;
3084 	direntry_cache_timeout = direntry_cache_timeout_;
3085 	entry_cache_timeout = entry_cache_timeout_;
3086 	attr_cache_timeout = attr_cache_timeout_;
3087 	mkdir_copy_sgid = mkdir_copy_sgid_;
3088 	sugid_clear_mode = static_cast<decltype (sugid_clear_mode)>(sugid_clear_mode_);
3089 	use_rwlock = use_rwlock_;
3090 	uint64_t timeout = (uint64_t)(direntry_cache_timeout * 1000000);
3091 	gDirEntryCache.setTimeout(timeout);
3092 	gDirEntryCacheMaxSize = direntry_cache_size_;
3093 	if (debug_mode) {
3094 		lzfs::log_debug("cache parameters: file_keep_cache={} direntry_cache_timeout={:.2f}"
3095 		                " entry_cache_timeout={:.2f} attr_cache_timeout={:.2f}",
3096 		                (keep_cache==1)?"always":(keep_cache==2)?"never":"auto",
3097 		                direntry_cache_timeout, entry_cache_timeout, attr_cache_timeout);
3098 		lzfs::log_debug("mkdir copy sgid={} sugid clear mode={}",
3099 		                mkdir_copy_sgid_, sugidClearModeString(sugid_clear_mode_));
3100 		lzfs::log_debug("RW lock {}", use_rwlock ? "enabled" : "disabled");
3101 		lzfs::log_debug("ACL acl_cache_timeout={:.2f}, acl_cache_size={}\n",
3102 		                acl_cache_timeout_, acl_cache_size_);
3103 	}
3104 	statsptr_init();
3105 
3106 	acl_cache.reset(new AclCache(
3107 			std::chrono::milliseconds((int)(1000 * acl_cache_timeout_)),
3108 			acl_cache_size_,
3109 			getAcl));
3110 
3111 	gTweaks.registerVariable("DirectIO", gDirectIo);
3112 	gTweaks.registerVariable("AclCacheMaxTime", acl_cache->maxTime_ms);
3113 	gTweaks.registerVariable("AclCacheHit", acl_cache->cacheHit);
3114 	gTweaks.registerVariable("AclCacheExpired", acl_cache->cacheExpired);
3115 	gTweaks.registerVariable("AclCacheMiss", acl_cache->cacheMiss);
3116 }
3117 
fs_init(FsInitParams & params)3118 void fs_init(FsInitParams &params) {
3119 	socketinit();
3120 	mycrc32_init();
3121 	int connection_ret = fs_init_master_connection(params);
3122 	if (!params.delayed_init && connection_ret < 0) {
3123 		lzfs_pretty_syslog(LOG_ERR, "Can't initialize connection with master server");
3124 		socketrelease();
3125 		throw std::runtime_error("Can't initialize connection with master server");
3126 	}
3127 	symlink_cache_init(params.symlink_cache_timeout_s);
3128 	gGlobalIoLimiter();
3129 	fs_init_threads(params.io_retries);
3130 	masterproxy_init();
3131 
3132 	gLocalIoLimiter();
3133 	try {
3134 		IoLimitsConfigLoader loader;
3135 		if (!params.io_limits_config_file.empty()) {
3136 			loader.load(std::ifstream(params.io_limits_config_file.c_str()));
3137 		}
3138 		gMountLimiter().loadConfiguration(loader);
3139 	} catch (Exception &ex) {
3140 		lzfs_pretty_syslog(LOG_ERR, "Can't initialize I/O limiting: %s", ex.what());
3141 		masterproxy_term();
3142 		::fs_term();
3143 		symlink_cache_term();
3144 		socketrelease();
3145 		throw std::runtime_error("Can't initialize I/O limiting");
3146 	}
3147 
3148 	read_data_init(params.io_retries,
3149 			params.chunkserver_round_time_ms,
3150 			params.chunkserver_connect_timeout_ms,
3151 			params.chunkserver_wave_read_timeout_ms,
3152 			params.total_read_timeout_ms,
3153 			params.cache_expiration_time_ms,
3154 			params.readahead_max_window_size_kB,
3155 			params.prefetch_xor_stripes,
3156 			std::max(params.bandwidth_overuse, 1.));
3157 	write_data_init(params.write_cache_size, params.io_retries, params.write_workers,
3158 			params.write_window_size, params.chunkserver_write_timeout_ms, params.cache_per_inode_percentage);
3159 
3160 	init(params.debug_mode, params.keep_cache, params.direntry_cache_timeout, params.direntry_cache_size,
3161 		params.entry_cache_timeout, params.attr_cache_timeout, params.mkdir_copy_sgid,
3162 		params.sugid_clear_mode, params.use_rw_lock,
3163 		params.acl_cache_timeout, params.acl_cache_size);
3164 }
3165 
fs_term()3166 void fs_term() {
3167 	write_data_term();
3168 	read_data_term();
3169 	masterproxy_term();
3170 	::fs_term();
3171 	symlink_cache_term();
3172 	socketrelease();
3173 }
3174 
3175 } // namespace LizardClient
3176