xref: /linux/fs/xfs/libxfs/xfs_attr_remote.c (revision 9a6b55ac)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4  * Copyright (c) 2013 Red Hat, Inc.
5  * All Rights Reserved.
6  */
7 #include "xfs.h"
8 #include "xfs_fs.h"
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_bit.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_da_format.h"
17 #include "xfs_da_btree.h"
18 #include "xfs_inode.h"
19 #include "xfs_trans.h"
20 #include "xfs_bmap.h"
21 #include "xfs_attr.h"
22 #include "xfs_attr_remote.h"
23 #include "xfs_trace.h"
24 #include "xfs_error.h"
25 
26 #define ATTR_RMTVALUE_MAPSIZE	1	/* # of map entries at once */
27 
28 /*
29  * Each contiguous block has a header, so it is not just a simple attribute
30  * length to FSB conversion.
31  */
32 int
33 xfs_attr3_rmt_blocks(
34 	struct xfs_mount *mp,
35 	int		attrlen)
36 {
37 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
38 		int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
39 		return (attrlen + buflen - 1) / buflen;
40 	}
41 	return XFS_B_TO_FSB(mp, attrlen);
42 }
43 
44 /*
45  * Checking of the remote attribute header is split into two parts. The verifier
46  * does CRC, location and bounds checking, the unpacking function checks the
47  * attribute parameters and owner.
48  */
49 static xfs_failaddr_t
50 xfs_attr3_rmt_hdr_ok(
51 	void			*ptr,
52 	xfs_ino_t		ino,
53 	uint32_t		offset,
54 	uint32_t		size,
55 	xfs_daddr_t		bno)
56 {
57 	struct xfs_attr3_rmt_hdr *rmt = ptr;
58 
59 	if (bno != be64_to_cpu(rmt->rm_blkno))
60 		return __this_address;
61 	if (offset != be32_to_cpu(rmt->rm_offset))
62 		return __this_address;
63 	if (size != be32_to_cpu(rmt->rm_bytes))
64 		return __this_address;
65 	if (ino != be64_to_cpu(rmt->rm_owner))
66 		return __this_address;
67 
68 	/* ok */
69 	return NULL;
70 }
71 
72 static xfs_failaddr_t
73 xfs_attr3_rmt_verify(
74 	struct xfs_mount	*mp,
75 	struct xfs_buf		*bp,
76 	void			*ptr,
77 	int			fsbsize,
78 	xfs_daddr_t		bno)
79 {
80 	struct xfs_attr3_rmt_hdr *rmt = ptr;
81 
82 	if (!xfs_sb_version_hascrc(&mp->m_sb))
83 		return __this_address;
84 	if (!xfs_verify_magic(bp, rmt->rm_magic))
85 		return __this_address;
86 	if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
87 		return __this_address;
88 	if (be64_to_cpu(rmt->rm_blkno) != bno)
89 		return __this_address;
90 	if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
91 		return __this_address;
92 	if (be32_to_cpu(rmt->rm_offset) +
93 				be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
94 		return __this_address;
95 	if (rmt->rm_owner == 0)
96 		return __this_address;
97 
98 	return NULL;
99 }
100 
101 static int
102 __xfs_attr3_rmt_read_verify(
103 	struct xfs_buf	*bp,
104 	bool		check_crc,
105 	xfs_failaddr_t	*failaddr)
106 {
107 	struct xfs_mount *mp = bp->b_mount;
108 	char		*ptr;
109 	int		len;
110 	xfs_daddr_t	bno;
111 	int		blksize = mp->m_attr_geo->blksize;
112 
113 	/* no verification of non-crc buffers */
114 	if (!xfs_sb_version_hascrc(&mp->m_sb))
115 		return 0;
116 
117 	ptr = bp->b_addr;
118 	bno = bp->b_bn;
119 	len = BBTOB(bp->b_length);
120 	ASSERT(len >= blksize);
121 
122 	while (len > 0) {
123 		if (check_crc &&
124 		    !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
125 			*failaddr = __this_address;
126 			return -EFSBADCRC;
127 		}
128 		*failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno);
129 		if (*failaddr)
130 			return -EFSCORRUPTED;
131 		len -= blksize;
132 		ptr += blksize;
133 		bno += BTOBB(blksize);
134 	}
135 
136 	if (len != 0) {
137 		*failaddr = __this_address;
138 		return -EFSCORRUPTED;
139 	}
140 
141 	return 0;
142 }
143 
144 static void
145 xfs_attr3_rmt_read_verify(
146 	struct xfs_buf	*bp)
147 {
148 	xfs_failaddr_t	fa;
149 	int		error;
150 
151 	error = __xfs_attr3_rmt_read_verify(bp, true, &fa);
152 	if (error)
153 		xfs_verifier_error(bp, error, fa);
154 }
155 
156 static xfs_failaddr_t
157 xfs_attr3_rmt_verify_struct(
158 	struct xfs_buf	*bp)
159 {
160 	xfs_failaddr_t	fa;
161 	int		error;
162 
163 	error = __xfs_attr3_rmt_read_verify(bp, false, &fa);
164 	return error ? fa : NULL;
165 }
166 
167 static void
168 xfs_attr3_rmt_write_verify(
169 	struct xfs_buf	*bp)
170 {
171 	struct xfs_mount *mp = bp->b_mount;
172 	xfs_failaddr_t	fa;
173 	int		blksize = mp->m_attr_geo->blksize;
174 	char		*ptr;
175 	int		len;
176 	xfs_daddr_t	bno;
177 
178 	/* no verification of non-crc buffers */
179 	if (!xfs_sb_version_hascrc(&mp->m_sb))
180 		return;
181 
182 	ptr = bp->b_addr;
183 	bno = bp->b_bn;
184 	len = BBTOB(bp->b_length);
185 	ASSERT(len >= blksize);
186 
187 	while (len > 0) {
188 		struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
189 
190 		fa = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno);
191 		if (fa) {
192 			xfs_verifier_error(bp, -EFSCORRUPTED, fa);
193 			return;
194 		}
195 
196 		/*
197 		 * Ensure we aren't writing bogus LSNs to disk. See
198 		 * xfs_attr3_rmt_hdr_set() for the explanation.
199 		 */
200 		if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
201 			xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
202 			return;
203 		}
204 		xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
205 
206 		len -= blksize;
207 		ptr += blksize;
208 		bno += BTOBB(blksize);
209 	}
210 
211 	if (len != 0)
212 		xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
213 }
214 
215 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
216 	.name = "xfs_attr3_rmt",
217 	.magic = { 0, cpu_to_be32(XFS_ATTR3_RMT_MAGIC) },
218 	.verify_read = xfs_attr3_rmt_read_verify,
219 	.verify_write = xfs_attr3_rmt_write_verify,
220 	.verify_struct = xfs_attr3_rmt_verify_struct,
221 };
222 
223 STATIC int
224 xfs_attr3_rmt_hdr_set(
225 	struct xfs_mount	*mp,
226 	void			*ptr,
227 	xfs_ino_t		ino,
228 	uint32_t		offset,
229 	uint32_t		size,
230 	xfs_daddr_t		bno)
231 {
232 	struct xfs_attr3_rmt_hdr *rmt = ptr;
233 
234 	if (!xfs_sb_version_hascrc(&mp->m_sb))
235 		return 0;
236 
237 	rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
238 	rmt->rm_offset = cpu_to_be32(offset);
239 	rmt->rm_bytes = cpu_to_be32(size);
240 	uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid);
241 	rmt->rm_owner = cpu_to_be64(ino);
242 	rmt->rm_blkno = cpu_to_be64(bno);
243 
244 	/*
245 	 * Remote attribute blocks are written synchronously, so we don't
246 	 * have an LSN that we can stamp in them that makes any sense to log
247 	 * recovery. To ensure that log recovery handles overwrites of these
248 	 * blocks sanely (i.e. once they've been freed and reallocated as some
249 	 * other type of metadata) we need to ensure that the LSN has a value
250 	 * that tells log recovery to ignore the LSN and overwrite the buffer
251 	 * with whatever is in it's log. To do this, we use the magic
252 	 * NULLCOMMITLSN to indicate that the LSN is invalid.
253 	 */
254 	rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);
255 
256 	return sizeof(struct xfs_attr3_rmt_hdr);
257 }
258 
259 /*
260  * Helper functions to copy attribute data in and out of the one disk extents
261  */
262 STATIC int
263 xfs_attr_rmtval_copyout(
264 	struct xfs_mount *mp,
265 	struct xfs_buf	*bp,
266 	xfs_ino_t	ino,
267 	int		*offset,
268 	int		*valuelen,
269 	uint8_t		**dst)
270 {
271 	char		*src = bp->b_addr;
272 	xfs_daddr_t	bno = bp->b_bn;
273 	int		len = BBTOB(bp->b_length);
274 	int		blksize = mp->m_attr_geo->blksize;
275 
276 	ASSERT(len >= blksize);
277 
278 	while (len > 0 && *valuelen > 0) {
279 		int hdr_size = 0;
280 		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
281 
282 		byte_cnt = min(*valuelen, byte_cnt);
283 
284 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
285 			if (xfs_attr3_rmt_hdr_ok(src, ino, *offset,
286 						  byte_cnt, bno)) {
287 				xfs_alert(mp,
288 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
289 					bno, *offset, byte_cnt, ino);
290 				return -EFSCORRUPTED;
291 			}
292 			hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
293 		}
294 
295 		memcpy(*dst, src + hdr_size, byte_cnt);
296 
297 		/* roll buffer forwards */
298 		len -= blksize;
299 		src += blksize;
300 		bno += BTOBB(blksize);
301 
302 		/* roll attribute data forwards */
303 		*valuelen -= byte_cnt;
304 		*dst += byte_cnt;
305 		*offset += byte_cnt;
306 	}
307 	return 0;
308 }
309 
310 STATIC void
311 xfs_attr_rmtval_copyin(
312 	struct xfs_mount *mp,
313 	struct xfs_buf	*bp,
314 	xfs_ino_t	ino,
315 	int		*offset,
316 	int		*valuelen,
317 	uint8_t		**src)
318 {
319 	char		*dst = bp->b_addr;
320 	xfs_daddr_t	bno = bp->b_bn;
321 	int		len = BBTOB(bp->b_length);
322 	int		blksize = mp->m_attr_geo->blksize;
323 
324 	ASSERT(len >= blksize);
325 
326 	while (len > 0 && *valuelen > 0) {
327 		int hdr_size;
328 		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
329 
330 		byte_cnt = min(*valuelen, byte_cnt);
331 		hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
332 						 byte_cnt, bno);
333 
334 		memcpy(dst + hdr_size, *src, byte_cnt);
335 
336 		/*
337 		 * If this is the last block, zero the remainder of it.
338 		 * Check that we are actually the last block, too.
339 		 */
340 		if (byte_cnt + hdr_size < blksize) {
341 			ASSERT(*valuelen - byte_cnt == 0);
342 			ASSERT(len == blksize);
343 			memset(dst + hdr_size + byte_cnt, 0,
344 					blksize - hdr_size - byte_cnt);
345 		}
346 
347 		/* roll buffer forwards */
348 		len -= blksize;
349 		dst += blksize;
350 		bno += BTOBB(blksize);
351 
352 		/* roll attribute data forwards */
353 		*valuelen -= byte_cnt;
354 		*src += byte_cnt;
355 		*offset += byte_cnt;
356 	}
357 }
358 
359 /*
360  * Read the value associated with an attribute from the out-of-line buffer
361  * that we stored it in.
362  *
363  * Returns 0 on successful retrieval, otherwise an error.
364  */
365 int
366 xfs_attr_rmtval_get(
367 	struct xfs_da_args	*args)
368 {
369 	struct xfs_bmbt_irec	map[ATTR_RMTVALUE_MAPSIZE];
370 	struct xfs_mount	*mp = args->dp->i_mount;
371 	struct xfs_buf		*bp;
372 	xfs_dablk_t		lblkno = args->rmtblkno;
373 	uint8_t			*dst = args->value;
374 	int			valuelen;
375 	int			nmap;
376 	int			error;
377 	int			blkcnt = args->rmtblkcnt;
378 	int			i;
379 	int			offset = 0;
380 
381 	trace_xfs_attr_rmtval_get(args);
382 
383 	ASSERT(!(args->flags & ATTR_KERNOVAL));
384 	ASSERT(args->rmtvaluelen == args->valuelen);
385 
386 	valuelen = args->rmtvaluelen;
387 	while (valuelen > 0) {
388 		nmap = ATTR_RMTVALUE_MAPSIZE;
389 		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
390 				       blkcnt, map, &nmap,
391 				       XFS_BMAPI_ATTRFORK);
392 		if (error)
393 			return error;
394 		ASSERT(nmap >= 1);
395 
396 		for (i = 0; (i < nmap) && (valuelen > 0); i++) {
397 			xfs_daddr_t	dblkno;
398 			int		dblkcnt;
399 
400 			ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
401 			       (map[i].br_startblock != HOLESTARTBLOCK));
402 			dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
403 			dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
404 			error = xfs_trans_read_buf(mp, args->trans,
405 						   mp->m_ddev_targp,
406 						   dblkno, dblkcnt, 0, &bp,
407 						   &xfs_attr3_rmt_buf_ops);
408 			if (error)
409 				return error;
410 
411 			error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
412 							&offset, &valuelen,
413 							&dst);
414 			xfs_trans_brelse(args->trans, bp);
415 			if (error)
416 				return error;
417 
418 			/* roll attribute extent map forwards */
419 			lblkno += map[i].br_blockcount;
420 			blkcnt -= map[i].br_blockcount;
421 		}
422 	}
423 	ASSERT(valuelen == 0);
424 	return 0;
425 }
426 
427 /*
428  * Write the value associated with an attribute into the out-of-line buffer
429  * that we have defined for it.
430  */
431 int
432 xfs_attr_rmtval_set(
433 	struct xfs_da_args	*args)
434 {
435 	struct xfs_inode	*dp = args->dp;
436 	struct xfs_mount	*mp = dp->i_mount;
437 	struct xfs_bmbt_irec	map;
438 	xfs_dablk_t		lblkno;
439 	xfs_fileoff_t		lfileoff = 0;
440 	uint8_t			*src = args->value;
441 	int			blkcnt;
442 	int			valuelen;
443 	int			nmap;
444 	int			error;
445 	int			offset = 0;
446 
447 	trace_xfs_attr_rmtval_set(args);
448 
449 	/*
450 	 * Find a "hole" in the attribute address space large enough for
451 	 * us to drop the new attribute's value into. Because CRC enable
452 	 * attributes have headers, we can't just do a straight byte to FSB
453 	 * conversion and have to take the header space into account.
454 	 */
455 	blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
456 	error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
457 						   XFS_ATTR_FORK);
458 	if (error)
459 		return error;
460 
461 	args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
462 	args->rmtblkcnt = blkcnt;
463 
464 	/*
465 	 * Roll through the "value", allocating blocks on disk as required.
466 	 */
467 	while (blkcnt > 0) {
468 		/*
469 		 * Allocate a single extent, up to the size of the value.
470 		 *
471 		 * Note that we have to consider this a data allocation as we
472 		 * write the remote attribute without logging the contents.
473 		 * Hence we must ensure that we aren't using blocks that are on
474 		 * the busy list so that we don't overwrite blocks which have
475 		 * recently been freed but their transactions are not yet
476 		 * committed to disk. If we overwrite the contents of a busy
477 		 * extent and then crash then the block may not contain the
478 		 * correct metadata after log recovery occurs.
479 		 */
480 		nmap = 1;
481 		error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
482 				  blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map,
483 				  &nmap);
484 		if (error)
485 			return error;
486 		error = xfs_defer_finish(&args->trans);
487 		if (error)
488 			return error;
489 
490 		ASSERT(nmap == 1);
491 		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
492 		       (map.br_startblock != HOLESTARTBLOCK));
493 		lblkno += map.br_blockcount;
494 		blkcnt -= map.br_blockcount;
495 
496 		/*
497 		 * Start the next trans in the chain.
498 		 */
499 		error = xfs_trans_roll_inode(&args->trans, dp);
500 		if (error)
501 			return error;
502 	}
503 
504 	/*
505 	 * Roll through the "value", copying the attribute value to the
506 	 * already-allocated blocks.  Blocks are written synchronously
507 	 * so that we can know they are all on disk before we turn off
508 	 * the INCOMPLETE flag.
509 	 */
510 	lblkno = args->rmtblkno;
511 	blkcnt = args->rmtblkcnt;
512 	valuelen = args->rmtvaluelen;
513 	while (valuelen > 0) {
514 		struct xfs_buf	*bp;
515 		xfs_daddr_t	dblkno;
516 		int		dblkcnt;
517 
518 		ASSERT(blkcnt > 0);
519 
520 		nmap = 1;
521 		error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
522 				       blkcnt, &map, &nmap,
523 				       XFS_BMAPI_ATTRFORK);
524 		if (error)
525 			return error;
526 		ASSERT(nmap == 1);
527 		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
528 		       (map.br_startblock != HOLESTARTBLOCK));
529 
530 		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
531 		dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
532 
533 		bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt);
534 		if (!bp)
535 			return -ENOMEM;
536 		bp->b_ops = &xfs_attr3_rmt_buf_ops;
537 
538 		xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
539 				       &valuelen, &src);
540 
541 		error = xfs_bwrite(bp);	/* GROT: NOTE: synchronous write */
542 		xfs_buf_relse(bp);
543 		if (error)
544 			return error;
545 
546 
547 		/* roll attribute extent map forwards */
548 		lblkno += map.br_blockcount;
549 		blkcnt -= map.br_blockcount;
550 	}
551 	ASSERT(valuelen == 0);
552 	return 0;
553 }
554 
555 /*
556  * Remove the value associated with an attribute by deleting the
557  * out-of-line buffer that it is stored on.
558  */
559 int
560 xfs_attr_rmtval_remove(
561 	struct xfs_da_args	*args)
562 {
563 	struct xfs_mount	*mp = args->dp->i_mount;
564 	xfs_dablk_t		lblkno;
565 	int			blkcnt;
566 	int			error;
567 	int			done;
568 
569 	trace_xfs_attr_rmtval_remove(args);
570 
571 	/*
572 	 * Roll through the "value", invalidating the attribute value's blocks.
573 	 */
574 	lblkno = args->rmtblkno;
575 	blkcnt = args->rmtblkcnt;
576 	while (blkcnt > 0) {
577 		struct xfs_bmbt_irec	map;
578 		struct xfs_buf		*bp;
579 		xfs_daddr_t		dblkno;
580 		int			dblkcnt;
581 		int			nmap;
582 
583 		/*
584 		 * Try to remember where we decided to put the value.
585 		 */
586 		nmap = 1;
587 		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
588 				       blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
589 		if (error)
590 			return error;
591 		ASSERT(nmap == 1);
592 		ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
593 		       (map.br_startblock != HOLESTARTBLOCK));
594 
595 		dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
596 		dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
597 
598 		/*
599 		 * If the "remote" value is in the cache, remove it.
600 		 */
601 		bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
602 		if (bp) {
603 			xfs_buf_stale(bp);
604 			xfs_buf_relse(bp);
605 			bp = NULL;
606 		}
607 
608 		lblkno += map.br_blockcount;
609 		blkcnt -= map.br_blockcount;
610 	}
611 
612 	/*
613 	 * Keep de-allocating extents until the remote-value region is gone.
614 	 */
615 	lblkno = args->rmtblkno;
616 	blkcnt = args->rmtblkcnt;
617 	done = 0;
618 	while (!done) {
619 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
620 				    XFS_BMAPI_ATTRFORK, 1, &done);
621 		if (error)
622 			return error;
623 		error = xfs_defer_finish(&args->trans);
624 		if (error)
625 			return error;
626 
627 		/*
628 		 * Close out trans and start the next one in the chain.
629 		 */
630 		error = xfs_trans_roll_inode(&args->trans, args->dp);
631 		if (error)
632 			return error;
633 	}
634 	return 0;
635 }
636