1 /*
2  * Copyright (C) 2021 Jakub Kruszona-Zawadzki, Core Technology Sp. z o.o.
3  *
4  * This file is part of MooseFS.
5  *
6  * MooseFS is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, version 2 (only).
9  *
10  * MooseFS is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with MooseFS; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301, USA
18  * or visit http://www.gnu.org/licenses/gpl-2.0.html
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include <stdio.h>
26 #include <string.h>
27 #include <math.h>
28 #include <limits.h>
29 #include <stdlib.h>
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/stat.h>
33 #include <sys/statvfs.h>
34 #include <sys/time.h>
35 #include <unistd.h>
36 #include <fcntl.h>
37 #include <inttypes.h>
38 #include <poll.h>
39 #include <errno.h>
40 
41 #include "labelparser.h"
42 #include "datapack.h"
43 #include "massert.h"
44 // #include "strerr.h"
45 #include "mfsstrerr.h"
46 #include "sockets.h"
47 #include "hashfn.h"
48 #include "clocks.h"
49 #include "mfsalloc.h"
50 #include "md5.h"
51 #include "MFSCommunication.h"
52 
53 #define tcpread(s,b,l) tcptoread(s,b,l,20000)
54 #define tcpwrite(s,b,l) tcptowrite(s,b,l,20000)
55 
56 #define INODE_VALUE_MASK 0x1FFFFFFF
57 #define INODE_TYPE_MASK 0x60000000
58 #define INODE_TYPE_TRASH 0x20000000
59 #define INODE_TYPE_SUSTAINED 0x40000000
60 #define INODE_TYPE_SPECIAL 0x00000000
61 
62 
dirname_inplace(char * path)63 static void dirname_inplace(char *path) {
64 	char *endp;
65 
66 	if (path==NULL) {
67 		return;
68 	}
69 	if (path[0]=='\0') {
70 		path[0]='.';
71 		path[1]='\0';
72 		return;
73 	}
74 
75 	/* Strip trailing slashes */
76 	endp = path + strlen(path) - 1;
77 	while (endp > path && *endp == '/') {
78 		endp--;
79 	}
80 
81 	/* Find the start of the dir */
82 	while (endp > path && *endp != '/') {
83 		endp--;
84 	}
85 
86 	if (endp == path) {
87 		if (path[0]=='/') {
88 			path[1]='\0';
89 		} else {
90 			path[0]='.';
91 			path[1]='\0';
92 		}
93 		return;
94 	} else {
95 		*endp = '\0';
96 	}
97 }
98 
master_register(int rfd,uint32_t cuid)99 static int master_register(int rfd,uint32_t cuid) {
100 	uint32_t i;
101 	const uint8_t *rptr;
102 	uint8_t *wptr,regbuff[8+73];
103 
104 	wptr = regbuff;
105 	put32bit(&wptr,CLTOMA_FUSE_REGISTER);
106 	put32bit(&wptr,73);
107 	memcpy(wptr,FUSE_REGISTER_BLOB_ACL,64);
108 	wptr+=64;
109 	put8bit(&wptr,REGISTER_TOOLS);
110 	put32bit(&wptr,cuid);
111 	put16bit(&wptr,VERSMAJ);
112 	put8bit(&wptr,VERSMID);
113 	put8bit(&wptr,VERSMIN);
114 	if (tcpwrite(rfd,regbuff,8+73)!=8+73) {
115 		printf("register to master: send error\n");
116 		return -1;
117 	}
118 	if (tcpread(rfd,regbuff,9)!=9) {
119 		printf("register to master: receive error\n");
120 		return -1;
121 	}
122 	rptr = regbuff;
123 	i = get32bit(&rptr);
124 	if (i!=MATOCL_FUSE_REGISTER) {
125 		printf("register to master: wrong answer (type)\n");
126 		return -1;
127 	}
128 	i = get32bit(&rptr);
129 	if (i!=1) {
130 		printf("register to master: wrong answer (length)\n");
131 		return -1;
132 	}
133 	if (*rptr) {
134 		printf("register to master: %s\n",mfsstrerr(*rptr));
135 		return -1;
136 	}
137 	return 0;
138 }
139 
140 typedef struct _master_conn {
141 	dev_t device;
142 	uint32_t masterversion;
143 	uint32_t masterip;
144 	uint16_t masterport;
145 	uint32_t mastercuid;
146 
147 	int fd;
148 	uint32_t sbuffsize,rbuffsize;
149 	uint8_t *sbuff,*rbuff;
150 	uint8_t *wptr;
151 	const uint8_t *rptr;
152 	uint32_t pleng;
153 	uint8_t err;
154 } master_conn;
155 
156 static master_conn *mc;
157 
master_connect(void)158 static int master_connect(void) {
159 	uint8_t cnt;
160 	cnt=0;
161 	while (cnt<10) {
162 		mc->fd = tcpsocket();
163 		if (mc->fd<0) {
164 			printf("can't create connection socket: %s\n",strerr(errno));
165 			return -1;
166 		}
167 		tcpreuseaddr(mc->fd);
168 		tcpnumbind(mc->fd,0,0);
169 		if (tcpnumtoconnect(mc->fd,mc->masterip,mc->masterport,(cnt%2)?(300*(1<<(cnt>>1))):(200*(1<<(cnt>>1))))<0) {
170 			tcpclose(mc->fd);
171 			mc->fd = -1;
172 			cnt++;
173 			if (cnt==10) {
174 				printf("can't connect to master (.masterinfo): %s\n",strerr(errno));
175 				return -1;
176 			}
177 		} else {
178 			cnt=10;
179 		}
180 	}
181 	tcpnodelay(mc->fd);
182 	if (master_register(mc->fd,mc->mastercuid)<0) {
183 		printf("can't register to master (.masterinfo)\n");
184 		tcpclose(mc->fd);
185 		mc->fd = -1;
186 		return -1;
187 	}
188 	return 0;
189 }
190 
master_prepare_conn(const char * name,uint32_t * inode,mode_t * mode,uint64_t * leng,uint8_t needsamedev,uint8_t needrwfs)191 int master_prepare_conn(const char *name,uint32_t *inode,mode_t *mode,uint64_t *leng,uint8_t needsamedev,uint8_t needrwfs) {
192 	char rpath[PATH_MAX+1];
193 	struct stat stb;
194 	struct statvfs stvfsb;
195 	int sd;
196 	uint8_t masterinfo[14];
197 	const uint8_t *miptr;
198 	uint32_t pinode;
199 	int rpathlen;
200 
201 	rpath[0]=0;
202 	if (realpath(name,rpath)==NULL) {
203 		printf("%s: realpath error on (%s): %s\n",name,rpath,strerr(errno));
204 		return -1;
205 	}
206 //	p = rpath;
207 	if (needrwfs) {
208 		if (statvfs(rpath,&stvfsb)!=0) {
209 			printf("%s: (%s) statvfs error: %s\n",name,rpath,strerr(errno));
210 			return -1;
211 		}
212 		if (stvfsb.f_flag&ST_RDONLY) {
213 			printf("%s: (%s) Read-only file system\n",name,rpath);
214 			return -1;
215 		}
216 	}
217 	if (lstat(rpath,&stb)!=0) {
218 		printf("%s: (%s) lstat error: %s\n",name,rpath,strerr(errno));
219 		return -1;
220 	}
221 	pinode = stb.st_ino;
222 	if (inode!=NULL) {
223 		*inode = pinode;
224 	}
225 	if (mode!=NULL) {
226 		*mode = stb.st_mode;
227 	}
228 	if (leng!=NULL) {
229 		*leng = stb.st_size;
230 	}
231 	if (mc->fd>=0) {
232 		if (mc->device==stb.st_dev) {
233 			return 0;
234 		}
235 		if (needsamedev) {
236 			printf("%s: different device\n",name);
237 			return -1;
238 		}
239 	}
240 	if (mc->fd>=0) {
241 		close(mc->fd);
242 		mc->fd=-1;
243 	}
244 	mc->device = stb.st_dev;
245 	for(;;) {
246 		rpathlen = strlen(rpath);
247 		if (rpathlen+strlen("/.masterinfo")<PATH_MAX) {
248 			strcpy(rpath+rpathlen,"/.masterinfo");
249 			if (lstat(rpath,&stb)==0) {
250 				if ((stb.st_ino==0x7FFFFFFF || stb.st_ino==0x7FFFFFFE) && stb.st_nlink==1 && stb.st_uid==0 && stb.st_gid==0 && (stb.st_size==10 || stb.st_size==14)) {
251 					if (stb.st_ino==0x7FFFFFFE && inode!=NULL) {	// meta master
252 						if (((*inode)&INODE_TYPE_MASK)!=INODE_TYPE_TRASH && ((*inode)&INODE_TYPE_MASK)!=INODE_TYPE_SUSTAINED) {
253 							printf("%s: only files in 'trash' and 'sustained' are usable in mfsmeta\n",name);
254 							return -1;
255 						}
256 						(*inode)&=INODE_VALUE_MASK;
257 					}
258 					sd = open(rpath,O_RDONLY);
259 					if (stb.st_size==10) {
260 						if (read(sd,masterinfo,10)!=10) {
261 							printf("%s: can't read '.masterinfo'\n",name);
262 							close(sd);
263 							return -1;
264 						}
265 					} else if (stb.st_size==14) {
266 						if (read(sd,masterinfo,14)!=14) {
267 							printf("%s: can't read '.masterinfo'\n",name);
268 							close(sd);
269 							return -1;
270 						}
271 					}
272 					close(sd);
273 					miptr = masterinfo;
274 					mc->masterip = get32bit(&miptr);
275 					mc->masterport = get16bit(&miptr);
276 					mc->mastercuid = get32bit(&miptr);
277 					if (stb.st_size==14) {
278 						mc->masterversion = get32bit(&miptr);
279 					} else {
280 						mc->masterversion = 0;
281 					}
282 					if (mc->masterip==0 || mc->masterport==0 || mc->mastercuid==0) {
283 						printf("%s: incorrect '.masterinfo'\n",name);
284 						return -1;
285 					}
286 					return 0;
287 				}
288 			} else if (pinode==1) { // this is root inode - if there is no .masterinfo here then it is not MFS.
289 				printf("%s: not MFS object\n",name);
290 				return -1;
291 			}
292 		} else if (pinode==1) { // found root inode, but path is still to long - give up
293 			printf("%s: path too long\n",name);
294 			return -1;
295 		}
296 		rpath[rpathlen]='\0';
297 		if (rpath[0]!='/' || rpath[1]=='\0') { // went to '/' without success - this is not MFS
298 			printf("%s: not MFS object\n",name);
299 			return -1;
300 		}
301 		dirname_inplace(rpath);
302 		if (lstat(rpath,&stb)!=0) {
303 			printf("%s: (%s) lstat error: %s\n",name,rpath,strerr(errno));
304 			return -1;
305 		}
306 		pinode = stb.st_ino;
307 	}
308 	return -1;
309 }
310 
master_close_conn(int err)311 static void master_close_conn(int err) {
312 	if (mc->fd<0) {
313 		return;
314 	}
315 	if (err) {
316 		close(mc->fd);
317 		mc->fd = -1;
318 		mc->device = 0;
319 	}
320 }
321 
master_get_version(void)322 uint32_t master_get_version(void) {
323 	return mc->masterversion;
324 }
325 
master_init(void)326 void master_init(void) {
327 	mc = malloc(sizeof(master_conn));
328 	passert(mc);
329 
330 	mc->device = 0;
331 	mc->masterversion = 0;
332 	mc->masterip = 0;
333 	mc->masterport = 0;
334 	mc->mastercuid = 0;
335 	mc->fd = -1;
336 	mc->sbuffsize = 0;
337 	mc->rbuffsize = 0;
338 	mc->sbuff = NULL;
339 	mc->rbuff = NULL;
340 	mc->wptr = NULL;
341 	mc->rptr = NULL;
342 	mc->err = 0;
343 }
344 
master_error(void)345 void master_error(void) {
346 	close(mc->fd);
347 	mc->fd = -1;
348 	mc->device = 0;
349 	mc->masterversion = 0;
350 	mc->masterip = 0;
351 	mc->masterport = 0;
352 	mc->mastercuid = 0;
353 }
354 
master_new_packet(void)355 void master_new_packet(void) {
356 	mc->err = 0;
357 	mc->wptr = mc->sbuff + 12;
358 }
359 
master_sendcheck(uint8_t bytes)360 static inline void master_sendcheck(uint8_t bytes) {
361 	if (mc->sbuffsize==0 || mc->sbuff==NULL || mc->wptr==NULL) {
362 		mc->sbuffsize = 100;
363 		mc->sbuff = malloc(mc->sbuffsize);
364 		mc->wptr = mc->sbuff + 12; // leave space for command and length
365 	} else if ((mc->wptr - mc->sbuff) + bytes > (long int)mc->sbuffsize) {
366 		uint32_t pleng;
367 		pleng = (mc->wptr - mc->sbuff);
368 		if (bytes>mc->sbuffsize) {
369 			mc->sbuffsize += (bytes * 3) / 2;
370 		} else {
371 			mc->sbuffsize *= 3;
372 			mc->sbuffsize /= 2;
373 		}
374 		mc->sbuff = mfsrealloc(mc->sbuff,mc->sbuffsize);
375 		mc->wptr = mc->sbuff + pleng;
376 	}
377 	passert(mc->sbuff);
378 }
379 
master_put8bit(uint8_t d8)380 void master_put8bit(uint8_t d8) {
381 	master_sendcheck(1);
382 	put8bit(&(mc->wptr),d8);
383 }
384 
master_put16bit(uint16_t d16)385 void master_put16bit(uint16_t d16) {
386 	master_sendcheck(2);
387 	put16bit(&(mc->wptr),d16);
388 }
389 
master_put32bit(uint32_t d32)390 void master_put32bit(uint32_t d32) {
391 	master_sendcheck(4);
392 	put32bit(&(mc->wptr),d32);
393 }
394 
master_put64bit(uint64_t d64)395 void master_put64bit(uint64_t d64) {
396 	master_sendcheck(8);
397 	put64bit(&(mc->wptr),d64);
398 }
399 
master_putname(uint8_t nleng,const char name[256])400 void master_putname(uint8_t nleng,const char name[256]) {
401 	master_sendcheck(nleng+1);
402 	put8bit(&(mc->wptr),nleng);
403 	memcpy(mc->wptr,name,nleng);
404 	mc->wptr += nleng;
405 }
406 
master_send_and_receive(uint32_t scmd,uint32_t ecmd)407 int master_send_and_receive(uint32_t scmd,uint32_t ecmd) {
408 	uint8_t hdr[12];
409 	uint8_t *wptr;
410 	const uint8_t *rptr;
411 	uint32_t pleng,rcmd;
412 	uint8_t cnt;
413 
414 	pleng = (mc->wptr - mc->sbuff);
415 	wptr = mc->sbuff;
416 	put32bit(&wptr,scmd);
417 	put32bit(&wptr,pleng-8);
418 	put32bit(&wptr,0); // query id
419 	cnt = 0;
420 	while(1) {
421 		if (mc->fd<0) {
422 			if (master_connect()<0) {
423 				cnt++;
424 				if (cnt>=10) {
425 					printf("can't connect to master\n");
426 					master_close_conn(1);
427 					return -1;
428 				}
429 				sleep(1);
430 				continue;
431 			}
432 		}
433 		if (tcpwrite(mc->fd,mc->sbuff,pleng)!=(ssize_t)pleng) {
434 			master_close_conn(1);
435 			cnt++;
436 			if (cnt>=10) {
437 				printf("master query: send error\n");
438 				return -1;
439 			}
440 			continue;
441 		}
442 		if (tcpread(mc->fd,hdr,12)!=12) {
443 			master_close_conn(1);
444 			cnt++;
445 			if (cnt>=10) {
446 				printf("master query: receive error\n");
447 				return -1;
448 			}
449 			continue;
450 		}
451 		rptr = hdr;
452 		rcmd = get32bit(&rptr);
453 		mc->pleng = get32bit(&rptr);
454 		if (rcmd!=ecmd) {
455 			printf("master query: unexpected answer\n");
456 			master_close_conn(1);
457 			return -1;
458 		}
459 		if (mc->pleng<4) {
460 			printf("master query: packet too short\n");
461 			master_close_conn(1);
462 			return -1;
463 		}
464 		if (get32bit(&rptr)!=0) {
465 			printf("master query: unexpected query id\n");
466 			master_close_conn(1);
467 			return -1;
468 		}
469 		mc->pleng -= 4;
470 		if (mc->rbuffsize<mc->pleng) {
471 			if (mc->rbuff!=NULL) {
472 				free(mc->rbuff);
473 			}
474 			mc->rbuff = malloc(mc->pleng);
475 			passert(mc->rbuff);
476 			mc->rbuffsize = mc->pleng;
477 		}
478 		mc->rptr = mc->rbuff;
479 		if (mc->pleng>0) {
480 			if (tcpread(mc->fd,mc->rbuff,mc->pleng)!=(ssize_t)(mc->pleng)) {
481 				master_close_conn(1);
482 				cnt++;
483 				if (cnt>=10) {
484 					printf("master query: receive error\n");
485 					return -1;
486 				}
487 				continue;
488 			}
489 		}
490 		return 0;
491 	}
492 	return 0;
493 }
494 
master_recvcheck(uint8_t bytes)495 static inline int master_recvcheck(uint8_t bytes) {
496 	if (mc->err || mc->rbuffsize==0 || mc->rbuff==NULL || mc->rptr==NULL) {
497 		mc->err = 1;
498 		return -1;
499 	} else if ((mc->rptr - mc->rbuff) + bytes > (long int)mc->pleng) {
500 		mc->err = 1;
501 		return -1;
502 	}
503 	return 0;
504 }
505 
master_get_leng(void)506 uint32_t master_get_leng(void) {
507 	return mc->pleng;
508 }
509 
master_get8bit(void)510 uint8_t master_get8bit(void) {
511 	if (master_recvcheck(1)<0) {
512 		return 0;
513 	} else {
514 		return get8bit(&(mc->rptr));
515 	}
516 }
517 
master_get16bit(void)518 uint16_t master_get16bit(void) {
519 	if (master_recvcheck(2)<0) {
520 		return 0;
521 	} else {
522 		return get16bit(&(mc->rptr));
523 	}
524 }
525 
master_get32bit(void)526 uint32_t master_get32bit(void) {
527 	if (master_recvcheck(4)<0) {
528 		return 0;
529 	} else {
530 		return get32bit(&(mc->rptr));
531 	}
532 }
533 
master_get64bit(void)534 uint64_t master_get64bit(void) {
535 	if (master_recvcheck(8)<0) {
536 		return 0;
537 	} else {
538 		return get64bit(&(mc->rptr));
539 	}
540 }
541 
master_getname(char name[256])542 void master_getname(char name[256]) {
543 	uint8_t nleng = master_get8bit();
544 	if (master_recvcheck(nleng)<0) {
545 		name[0] = 0;
546 	} else {
547 		memcpy(name,mc->rptr,nleng);
548 		name[nleng]=0;
549 		mc->rptr += nleng;
550 	}
551 	return;
552 }
553 
master_bytes_left(void)554 uint32_t master_bytes_left(void) {
555 	if (mc->err) {
556 		return 0;
557 	}
558 	return mc->pleng - (mc->rptr - mc->rbuff);
559 }
560 
master_end_packet(void)561 uint8_t master_end_packet(void) {
562 	if (mc->err==1) {
563 		master_error();
564 		return 0;
565 	}
566 	if ((mc->rptr - mc->rbuff) != (long int)mc->pleng) {
567 		master_error();
568 		return 0;
569 	}
570 	return 1;
571 }
572