1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_da_format.h"
20 #include "xfs_da_btree.h"
21 #include "xfs_dir2.h"
22 #include "xfs_attr.h"
23 #include "xfs_attr_leaf.h"
24 #include "xfs_attr_sf.h"
25 #include "xfs_attr_remote.h"
26 #include "xfs_bmap.h"
27 #include "xfs_bmap_util.h"
28 #include "xfs_exchmaps.h"
29 #include "xfs_exchrange.h"
30 #include "xfs_acl.h"
31 #include "xfs_parent.h"
32 #include "scrub/xfs_scrub.h"
33 #include "scrub/scrub.h"
34 #include "scrub/common.h"
35 #include "scrub/trace.h"
36 #include "scrub/repair.h"
37 #include "scrub/tempfile.h"
38 #include "scrub/tempexch.h"
39 #include "scrub/xfile.h"
40 #include "scrub/xfarray.h"
41 #include "scrub/xfblob.h"
42 #include "scrub/attr.h"
43 #include "scrub/reap.h"
44 #include "scrub/attr_repair.h"
45
46 /*
47 * Extended Attribute Repair
48 * =========================
49 *
50 * We repair extended attributes by reading the attr leaf blocks looking for
51 * attributes entries that look salvageable (name passes verifiers, value can
52 * be retrieved, etc). Each extended attribute worth salvaging is stashed in
53 * memory, and the stashed entries are periodically replayed into a temporary
54 * file to constrain memory use. Batching the construction of the temporary
55 * extended attribute structure in this fashion reduces lock cycling of the
56 * file being repaired and the temporary file.
57 *
58 * When salvaging completes, the remaining stashed attributes are replayed to
59 * the temporary file. An atomic file contents exchange is used to commit the
60 * new xattr blocks to the file being repaired. This will disrupt attrmulti
61 * cursors.
62 */
63
64 struct xrep_xattr_key {
65 /* Cookie for retrieval of the xattr name. */
66 xfblob_cookie name_cookie;
67
68 /* Cookie for retrieval of the xattr value. */
69 xfblob_cookie value_cookie;
70
71 /* XFS_ATTR_* flags */
72 int flags;
73
74 /* Length of the value and name. */
75 uint32_t valuelen;
76 uint16_t namelen;
77 };
78
79 /*
80 * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
81 * them to the temp file.
82 */
83 #define XREP_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8)
84
85 struct xrep_xattr {
86 struct xfs_scrub *sc;
87
88 /* Information for exchanging attr fork mappings at the end. */
89 struct xrep_tempexch tx;
90
91 /* xattr keys */
92 struct xfarray *xattr_records;
93
94 /* xattr values */
95 struct xfblob *xattr_blobs;
96
97 /* Number of attributes that we are salvaging. */
98 unsigned long long attrs_found;
99
100 /* Can we flush stashed attrs to the tempfile? */
101 bool can_flush;
102
103 /* Did the live update fail, and hence the repair is now out of date? */
104 bool live_update_aborted;
105
106 /* Lock protecting parent pointer updates */
107 struct mutex lock;
108
109 /* Fixed-size array of xrep_xattr_pptr structures. */
110 struct xfarray *pptr_recs;
111
112 /* Blobs containing parent pointer names. */
113 struct xfblob *pptr_names;
114
115 /* Hook to capture parent pointer updates. */
116 struct xfs_dir_hook dhook;
117
118 /* Scratch buffer for capturing parent pointers. */
119 struct xfs_da_args pptr_args;
120
121 /* Name buffer */
122 struct xfs_name xname;
123 char namebuf[MAXNAMELEN];
124 };
125
126 /* Create a parent pointer in the tempfile. */
127 #define XREP_XATTR_PPTR_ADD (1)
128
129 /* Remove a parent pointer from the tempfile. */
130 #define XREP_XATTR_PPTR_REMOVE (2)
131
132 /* A stashed parent pointer update. */
133 struct xrep_xattr_pptr {
134 /* Cookie for retrieval of the pptr name. */
135 xfblob_cookie name_cookie;
136
137 /* Parent pointer record. */
138 struct xfs_parent_rec pptr_rec;
139
140 /* Length of the pptr name. */
141 uint8_t namelen;
142
143 /* XREP_XATTR_PPTR_{ADD,REMOVE} */
144 uint8_t action;
145 };
146
147 /* Set up to recreate the extended attributes. */
148 int
xrep_setup_xattr(struct xfs_scrub * sc)149 xrep_setup_xattr(
150 struct xfs_scrub *sc)
151 {
152 if (xfs_has_parent(sc->mp))
153 xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
154
155 return xrep_tempfile_create(sc, S_IFREG);
156 }
157
158 /*
159 * Decide if we want to salvage this attribute. We don't bother with
160 * incomplete or oversized keys or values. The @value parameter can be null
161 * for remote attrs.
162 */
163 STATIC int
xrep_xattr_want_salvage(struct xrep_xattr * rx,unsigned int attr_flags,const void * name,int namelen,const void * value,int valuelen)164 xrep_xattr_want_salvage(
165 struct xrep_xattr *rx,
166 unsigned int attr_flags,
167 const void *name,
168 int namelen,
169 const void *value,
170 int valuelen)
171 {
172 if (attr_flags & XFS_ATTR_INCOMPLETE)
173 return false;
174 if (namelen > XATTR_NAME_MAX || namelen <= 0)
175 return false;
176 if (!xfs_attr_namecheck(attr_flags, name, namelen))
177 return false;
178 if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
179 return false;
180 if (attr_flags & XFS_ATTR_PARENT)
181 return xfs_parent_valuecheck(rx->sc->mp, value, valuelen);
182
183 return true;
184 }
185
186 /* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */
187 STATIC int
xrep_xattr_salvage_key(struct xrep_xattr * rx,int flags,unsigned char * name,int namelen,unsigned char * value,int valuelen)188 xrep_xattr_salvage_key(
189 struct xrep_xattr *rx,
190 int flags,
191 unsigned char *name,
192 int namelen,
193 unsigned char *value,
194 int valuelen)
195 {
196 struct xrep_xattr_key key = {
197 .valuelen = valuelen,
198 .flags = flags & XFS_ATTR_NSP_ONDISK_MASK,
199 };
200 unsigned int i = 0;
201 int error = 0;
202
203 if (xchk_should_terminate(rx->sc, &error))
204 return error;
205
206 /*
207 * Truncate the name to the first character that would trip namecheck.
208 * If we no longer have a name after that, ignore this attribute.
209 */
210 if (flags & XFS_ATTR_PARENT) {
211 key.namelen = namelen;
212
213 trace_xrep_xattr_salvage_pptr(rx->sc->ip, flags, name,
214 key.namelen, value, valuelen);
215 } else {
216 while (i < namelen && name[i] != 0)
217 i++;
218 if (i == 0)
219 return 0;
220 key.namelen = i;
221
222 trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name,
223 key.namelen, valuelen);
224 }
225
226 error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name,
227 key.namelen);
228 if (error)
229 return error;
230
231 error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value,
232 key.valuelen);
233 if (error)
234 return error;
235
236 error = xfarray_append(rx->xattr_records, &key);
237 if (error)
238 return error;
239
240 rx->attrs_found++;
241 return 0;
242 }
243
244 /*
245 * Record a shortform extended attribute key & value for later reinsertion
246 * into the inode.
247 */
248 STATIC int
xrep_xattr_salvage_sf_attr(struct xrep_xattr * rx,struct xfs_attr_sf_hdr * hdr,struct xfs_attr_sf_entry * sfe)249 xrep_xattr_salvage_sf_attr(
250 struct xrep_xattr *rx,
251 struct xfs_attr_sf_hdr *hdr,
252 struct xfs_attr_sf_entry *sfe)
253 {
254 struct xfs_scrub *sc = rx->sc;
255 struct xchk_xattr_buf *ab = sc->buf;
256 unsigned char *name = sfe->nameval;
257 unsigned char *value = &sfe->nameval[sfe->namelen];
258
259 if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)hdr,
260 sfe->namelen))
261 return 0;
262
263 if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)hdr,
264 sfe->valuelen))
265 return 0;
266
267 if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval,
268 sfe->namelen, value, sfe->valuelen))
269 return 0;
270
271 return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval,
272 sfe->namelen, value, sfe->valuelen);
273 }
274
275 /*
276 * Record a local format extended attribute key & value for later reinsertion
277 * into the inode.
278 */
279 STATIC int
xrep_xattr_salvage_local_attr(struct xrep_xattr * rx,struct xfs_attr_leaf_entry * ent,unsigned int nameidx,const char * buf_end,struct xfs_attr_leaf_name_local * lentry)280 xrep_xattr_salvage_local_attr(
281 struct xrep_xattr *rx,
282 struct xfs_attr_leaf_entry *ent,
283 unsigned int nameidx,
284 const char *buf_end,
285 struct xfs_attr_leaf_name_local *lentry)
286 {
287 struct xchk_xattr_buf *ab = rx->sc->buf;
288 unsigned char *value;
289 unsigned int valuelen;
290 unsigned int namesize;
291
292 /*
293 * Decode the leaf local entry format. If something seems wrong, we
294 * junk the attribute.
295 */
296 value = &lentry->nameval[lentry->namelen];
297 valuelen = be16_to_cpu(lentry->valuelen);
298 namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen);
299 if ((char *)lentry + namesize > buf_end)
300 return 0;
301 if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval,
302 lentry->namelen, value, valuelen))
303 return 0;
304 if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
305 return 0;
306
307 /* Try to save this attribute. */
308 return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval,
309 lentry->namelen, value, valuelen);
310 }
311
312 /*
313 * Record a remote format extended attribute key & value for later reinsertion
314 * into the inode.
315 */
316 STATIC int
xrep_xattr_salvage_remote_attr(struct xrep_xattr * rx,struct xfs_attr_leaf_entry * ent,unsigned int nameidx,const char * buf_end,struct xfs_attr_leaf_name_remote * rentry,unsigned int ent_idx,struct xfs_buf * leaf_bp)317 xrep_xattr_salvage_remote_attr(
318 struct xrep_xattr *rx,
319 struct xfs_attr_leaf_entry *ent,
320 unsigned int nameidx,
321 const char *buf_end,
322 struct xfs_attr_leaf_name_remote *rentry,
323 unsigned int ent_idx,
324 struct xfs_buf *leaf_bp)
325 {
326 struct xchk_xattr_buf *ab = rx->sc->buf;
327 struct xfs_da_args args = {
328 .trans = rx->sc->tp,
329 .dp = rx->sc->ip,
330 .index = ent_idx,
331 .geo = rx->sc->mp->m_attr_geo,
332 .owner = rx->sc->ip->i_ino,
333 .attr_filter = ent->flags & XFS_ATTR_NSP_ONDISK_MASK,
334 .namelen = rentry->namelen,
335 .name = rentry->name,
336 .value = ab->value,
337 .valuelen = be32_to_cpu(rentry->valuelen),
338 };
339 unsigned int namesize;
340 int error;
341
342 /*
343 * Decode the leaf remote entry format. If something seems wrong, we
344 * junk the attribute. Note that we should never find a zero-length
345 * remote attribute value.
346 */
347 namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
348 if ((char *)rentry + namesize > buf_end)
349 return 0;
350 if (args.valuelen == 0 ||
351 !xrep_xattr_want_salvage(rx, ent->flags, rentry->name,
352 rentry->namelen, NULL, args.valuelen))
353 return 0;
354 if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
355 return 0;
356
357 /*
358 * Enlarge the buffer (if needed) to hold the value that we're trying
359 * to salvage from the old extended attribute data.
360 */
361 error = xchk_setup_xattr_buf(rx->sc, args.valuelen);
362 if (error == -ENOMEM)
363 error = -EDEADLOCK;
364 if (error)
365 return error;
366
367 /* Look up the remote value and stash it for reconstruction. */
368 error = xfs_attr3_leaf_getvalue(leaf_bp, &args);
369 if (error || args.rmtblkno == 0)
370 goto err_free;
371
372 error = xfs_attr_rmtval_get(&args);
373 if (error)
374 goto err_free;
375
376 /* Try to save this attribute. */
377 error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name,
378 rentry->namelen, ab->value, args.valuelen);
379 err_free:
380 /* remote value was garbage, junk it */
381 if (error == -EFSBADCRC || error == -EFSCORRUPTED)
382 error = 0;
383 return error;
384 }
385
386 /* Extract every xattr key that we can from this attr fork block. */
387 STATIC int
xrep_xattr_recover_leaf(struct xrep_xattr * rx,struct xfs_buf * bp)388 xrep_xattr_recover_leaf(
389 struct xrep_xattr *rx,
390 struct xfs_buf *bp)
391 {
392 struct xfs_attr3_icleaf_hdr leafhdr;
393 struct xfs_scrub *sc = rx->sc;
394 struct xfs_mount *mp = sc->mp;
395 struct xfs_attr_leafblock *leaf;
396 struct xfs_attr_leaf_name_local *lentry;
397 struct xfs_attr_leaf_name_remote *rentry;
398 struct xfs_attr_leaf_entry *ent;
399 struct xfs_attr_leaf_entry *entries;
400 struct xchk_xattr_buf *ab = rx->sc->buf;
401 char *buf_end;
402 size_t off;
403 unsigned int nameidx;
404 unsigned int hdrsize;
405 int i;
406 int error = 0;
407
408 bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize);
409
410 /* Check the leaf header */
411 leaf = bp->b_addr;
412 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
413 hdrsize = xfs_attr3_leaf_hdr_size(leaf);
414 xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize);
415 entries = xfs_attr3_leaf_entryp(leaf);
416
417 buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
418 for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
419 if (xchk_should_terminate(sc, &error))
420 return error;
421
422 /* Skip key if it conflicts with something else? */
423 off = (char *)ent - (char *)leaf;
424 if (!xchk_xattr_set_map(sc, ab->usedmap, off,
425 sizeof(xfs_attr_leaf_entry_t)))
426 continue;
427
428 /* Check the name information. */
429 nameidx = be16_to_cpu(ent->nameidx);
430 if (nameidx < leafhdr.firstused ||
431 nameidx >= mp->m_attr_geo->blksize)
432 continue;
433
434 if (ent->flags & XFS_ATTR_LOCAL) {
435 lentry = xfs_attr3_leaf_name_local(leaf, i);
436 error = xrep_xattr_salvage_local_attr(rx, ent, nameidx,
437 buf_end, lentry);
438 } else {
439 rentry = xfs_attr3_leaf_name_remote(leaf, i);
440 error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx,
441 buf_end, rentry, i, bp);
442 }
443 if (error)
444 return error;
445 }
446
447 return 0;
448 }
449
450 /* Try to recover shortform attrs. */
451 STATIC int
xrep_xattr_recover_sf(struct xrep_xattr * rx)452 xrep_xattr_recover_sf(
453 struct xrep_xattr *rx)
454 {
455 struct xfs_scrub *sc = rx->sc;
456 struct xchk_xattr_buf *ab = sc->buf;
457 struct xfs_attr_sf_hdr *hdr;
458 struct xfs_attr_sf_entry *sfe;
459 struct xfs_attr_sf_entry *next;
460 struct xfs_ifork *ifp;
461 unsigned char *end;
462 int i;
463 int error = 0;
464
465 ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK);
466 hdr = ifp->if_data;
467
468 bitmap_zero(ab->usedmap, ifp->if_bytes);
469 end = (unsigned char *)ifp->if_data + ifp->if_bytes;
470 xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*hdr));
471
472 sfe = xfs_attr_sf_firstentry(hdr);
473 if ((unsigned char *)sfe > end)
474 return 0;
475
476 for (i = 0; i < hdr->count; i++) {
477 if (xchk_should_terminate(sc, &error))
478 return error;
479
480 next = xfs_attr_sf_nextentry(sfe);
481 if ((unsigned char *)next > end)
482 break;
483
484 if (xchk_xattr_set_map(sc, ab->usedmap,
485 (char *)sfe - (char *)hdr,
486 sizeof(struct xfs_attr_sf_entry))) {
487 /*
488 * No conflicts with the sf entry; let's save this
489 * attribute.
490 */
491 error = xrep_xattr_salvage_sf_attr(rx, hdr, sfe);
492 if (error)
493 return error;
494 }
495
496 sfe = next;
497 }
498
499 return 0;
500 }
501
502 /*
503 * Try to return a buffer of xattr data for a given physical extent.
504 *
505 * Because the buffer cache get function complains if it finds a buffer
506 * matching the block number but not matching the length, we must be careful to
507 * look for incore buffers (up to the maximum length of a remote value) that
508 * could be hiding anywhere in the physical range. If we find an incore
509 * buffer, we can pass that to the caller. Optionally, read a single block and
510 * pass that back.
511 *
512 * Note the subtlety that remote attr value blocks for which there is no incore
513 * buffer will be passed to the callback one block at a time. These buffers
514 * will not have any ops attached and must be staled to prevent aliasing with
515 * multiblock buffers once we drop the ILOCK.
516 */
517 STATIC int
xrep_xattr_find_buf(struct xfs_mount * mp,xfs_fsblock_t fsbno,xfs_extlen_t max_len,bool can_read,struct xfs_buf ** bpp)518 xrep_xattr_find_buf(
519 struct xfs_mount *mp,
520 xfs_fsblock_t fsbno,
521 xfs_extlen_t max_len,
522 bool can_read,
523 struct xfs_buf **bpp)
524 {
525 struct xrep_bufscan scan = {
526 .daddr = XFS_FSB_TO_DADDR(mp, fsbno),
527 .max_sectors = xrep_bufscan_max_sectors(mp, max_len),
528 .daddr_step = XFS_FSB_TO_BB(mp, 1),
529 };
530 struct xfs_buf *bp;
531
532 while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
533 *bpp = bp;
534 return 0;
535 }
536
537 if (!can_read) {
538 *bpp = NULL;
539 return 0;
540 }
541
542 return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1),
543 XBF_TRYLOCK, bpp, NULL);
544 }
545
546 /*
547 * Deal with a buffer that we found during our walk of the attr fork.
548 *
549 * Attribute leaf and node blocks are simple -- they're a single block, so we
550 * can walk them one at a time and we never have to worry about discontiguous
551 * multiblock buffers like we do for directories.
552 *
553 * Unfortunately, remote attr blocks add a lot of complexity here. Each disk
554 * block is totally self contained, in the sense that the v5 header provides no
555 * indication that there could be more data in the next block. The incore
556 * buffers can span multiple blocks, though they never cross extent records.
557 * However, they don't necessarily start or end on an extent record boundary.
558 * Therefore, we need a special buffer find function to walk the buffer cache
559 * for us.
560 *
561 * The caller must hold the ILOCK on the file being repaired. We use
562 * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't
563 * own the block and don't want to hang the system on a potentially garbage
564 * buffer.
565 */
566 STATIC int
xrep_xattr_recover_block(struct xrep_xattr * rx,xfs_dablk_t dabno,xfs_fsblock_t fsbno,xfs_extlen_t max_len,xfs_extlen_t * actual_len)567 xrep_xattr_recover_block(
568 struct xrep_xattr *rx,
569 xfs_dablk_t dabno,
570 xfs_fsblock_t fsbno,
571 xfs_extlen_t max_len,
572 xfs_extlen_t *actual_len)
573 {
574 struct xfs_da_blkinfo *info;
575 struct xfs_buf *bp;
576 int error;
577
578 error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp);
579 if (error)
580 return error;
581 info = bp->b_addr;
582 *actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length);
583
584 trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno,
585 be16_to_cpu(info->magic));
586
587 /*
588 * If the buffer has the right magic number for an attr leaf block and
589 * passes a structure check (we don't care about checksums), salvage
590 * as much as we can from the block. */
591 if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) &&
592 xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) &&
593 xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL)
594 error = xrep_xattr_recover_leaf(rx, bp);
595
596 /*
597 * If the buffer didn't already have buffer ops set, it was read in by
598 * the _find_buf function and could very well be /part/ of a multiblock
599 * remote block. Mark it stale so that it doesn't hang around in
600 * memory to cause problems.
601 */
602 if (bp->b_ops == NULL)
603 xfs_buf_stale(bp);
604
605 xfs_buf_relse(bp);
606 return error;
607 }
608
609 /* Insert one xattr key/value. */
610 STATIC int
xrep_xattr_insert_rec(struct xrep_xattr * rx,const struct xrep_xattr_key * key)611 xrep_xattr_insert_rec(
612 struct xrep_xattr *rx,
613 const struct xrep_xattr_key *key)
614 {
615 struct xfs_da_args args = {
616 .dp = rx->sc->tempip,
617 .attr_filter = key->flags,
618 .namelen = key->namelen,
619 .valuelen = key->valuelen,
620 .owner = rx->sc->ip->i_ino,
621 .geo = rx->sc->mp->m_attr_geo,
622 .whichfork = XFS_ATTR_FORK,
623 .op_flags = XFS_DA_OP_OKNOENT,
624 };
625 struct xchk_xattr_buf *ab = rx->sc->buf;
626 int error;
627
628 /*
629 * Grab pointers to the scrub buffer so that we can use them to insert
630 * attrs into the temp file.
631 */
632 args.name = ab->name;
633 args.value = ab->value;
634
635 /*
636 * The attribute name is stored near the end of the in-core buffer,
637 * though we reserve one more byte to ensure null termination.
638 */
639 ab->name[XATTR_NAME_MAX] = 0;
640
641 error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name,
642 key->namelen);
643 if (error)
644 return error;
645
646 error = xfblob_free(rx->xattr_blobs, key->name_cookie);
647 if (error)
648 return error;
649
650 error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value,
651 key->valuelen);
652 if (error)
653 return error;
654
655 error = xfblob_free(rx->xattr_blobs, key->value_cookie);
656 if (error)
657 return error;
658
659 ab->name[key->namelen] = 0;
660
661 if (key->flags & XFS_ATTR_PARENT) {
662 trace_xrep_xattr_insert_pptr(rx->sc->tempip, key->flags,
663 ab->name, key->namelen, ab->value,
664 key->valuelen);
665 args.op_flags |= XFS_DA_OP_LOGGED;
666 } else {
667 trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags,
668 ab->name, key->namelen, key->valuelen);
669 }
670
671 /*
672 * xfs_attr_set creates and commits its own transaction. If the attr
673 * already exists, we'll just drop it during the rebuild.
674 */
675 xfs_attr_sethash(&args);
676 error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE, false);
677 if (error == -EEXIST)
678 error = 0;
679
680 return error;
681 }
682
683 /*
684 * Periodically flush salvaged attributes to the temporary file. This is done
685 * to reduce the memory requirements of the xattr rebuild because files can
686 * contain millions of attributes.
687 */
688 STATIC int
xrep_xattr_flush_stashed(struct xrep_xattr * rx)689 xrep_xattr_flush_stashed(
690 struct xrep_xattr *rx)
691 {
692 xfarray_idx_t array_cur;
693 int error;
694
695 /*
696 * Entering this function, the scrub context has a reference to the
697 * inode being repaired, the temporary file, and a scrub transaction
698 * that we use during xattr salvaging to avoid livelocking if there
699 * are cycles in the xattr structures. We hold ILOCK_EXCL on both
700 * the inode being repaired, though it is not ijoined to the scrub
701 * transaction.
702 *
703 * To constrain kernel memory use, we occasionally flush salvaged
704 * xattrs from the xfarray and xfblob structures into the temporary
705 * file in preparation for exchanging the xattr structures at the end.
706 * Updating the temporary file requires a transaction, so we commit the
707 * scrub transaction and drop the two ILOCKs so that xfs_attr_set can
708 * allocate whatever transaction it wants.
709 *
710 * We still hold IOLOCK_EXCL on the inode being repaired, which
711 * prevents anyone from modifying the damaged xattr data while we
712 * repair it.
713 */
714 error = xrep_trans_commit(rx->sc);
715 if (error)
716 return error;
717 xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
718
719 /*
720 * Take the IOLOCK of the temporary file while we modify xattrs. This
721 * isn't strictly required because the temporary file is never revealed
722 * to userspace, but we follow the same locking rules. We still hold
723 * sc->ip's IOLOCK.
724 */
725 error = xrep_tempfile_iolock_polled(rx->sc);
726 if (error)
727 return error;
728
729 /* Add all the salvaged attrs to the temporary file. */
730 foreach_xfarray_idx(rx->xattr_records, array_cur) {
731 struct xrep_xattr_key key;
732
733 error = xfarray_load(rx->xattr_records, array_cur, &key);
734 if (error)
735 return error;
736
737 error = xrep_xattr_insert_rec(rx, &key);
738 if (error)
739 return error;
740 }
741
742 /* Empty out both arrays now that we've added the entries. */
743 xfarray_truncate(rx->xattr_records);
744 xfblob_truncate(rx->xattr_blobs);
745
746 xrep_tempfile_iounlock(rx->sc);
747
748 /* Recreate the salvage transaction and relock the inode. */
749 error = xchk_trans_alloc(rx->sc, 0);
750 if (error)
751 return error;
752 xchk_ilock(rx->sc, XFS_ILOCK_EXCL);
753 return 0;
754 }
755
756 /* Decide if we've stashed too much xattr data in memory. */
757 static inline bool
xrep_xattr_want_flush_stashed(struct xrep_xattr * rx)758 xrep_xattr_want_flush_stashed(
759 struct xrep_xattr *rx)
760 {
761 unsigned long long bytes;
762
763 if (!rx->can_flush)
764 return false;
765
766 bytes = xfarray_bytes(rx->xattr_records) +
767 xfblob_bytes(rx->xattr_blobs);
768 return bytes > XREP_XATTR_MAX_STASH_BYTES;
769 }
770
771 /*
772 * Did we observe rename changing parent pointer xattrs while we were flushing
773 * salvaged attrs?
774 */
775 static inline bool
xrep_xattr_saw_pptr_conflict(struct xrep_xattr * rx)776 xrep_xattr_saw_pptr_conflict(
777 struct xrep_xattr *rx)
778 {
779 bool ret;
780
781 ASSERT(rx->can_flush);
782
783 if (!xfs_has_parent(rx->sc->mp))
784 return false;
785
786 xfs_assert_ilocked(rx->sc->ip, XFS_ILOCK_EXCL);
787
788 mutex_lock(&rx->lock);
789 ret = xfarray_bytes(rx->pptr_recs) > 0;
790 mutex_unlock(&rx->lock);
791
792 return ret;
793 }
794
795 /*
796 * Reset the entire repair state back to initial conditions, now that we've
797 * detected a parent pointer update to the attr structure while we were
798 * flushing salvaged attrs. See the locking notes in dir_repair.c for more
799 * information on why this is all necessary.
800 */
801 STATIC int
xrep_xattr_full_reset(struct xrep_xattr * rx)802 xrep_xattr_full_reset(
803 struct xrep_xattr *rx)
804 {
805 struct xfs_scrub *sc = rx->sc;
806 struct xfs_attr_sf_hdr *hdr;
807 struct xfs_ifork *ifp = &sc->tempip->i_af;
808 int error;
809
810 trace_xrep_xattr_full_reset(sc->ip, sc->tempip);
811
812 /* The temporary file's data fork had better not be in btree format. */
813 if (sc->tempip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
814 ASSERT(0);
815 return -EIO;
816 }
817
818 /*
819 * We begin in transaction context with sc->ip ILOCKed but not joined
820 * to the transaction. To reset to the initial state, we must hold
821 * sc->ip's ILOCK to prevent rename from updating parent pointer
822 * information and the tempfile's ILOCK to clear its contents.
823 */
824 xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
825 xrep_tempfile_ilock_both(sc);
826 xfs_trans_ijoin(sc->tp, sc->ip, 0);
827 xfs_trans_ijoin(sc->tp, sc->tempip, 0);
828
829 /*
830 * Free all the blocks of the attr fork of the temp file, and reset
831 * it back to local format.
832 */
833 if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
834 error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
835 if (error)
836 return error;
837
838 ASSERT(ifp->if_bytes == 0);
839 ifp->if_format = XFS_DINODE_FMT_LOCAL;
840 xfs_idata_realloc(sc->tempip, sizeof(*hdr), XFS_ATTR_FORK);
841 }
842
843 /* Reinitialize the attr fork to an empty shortform structure. */
844 hdr = ifp->if_data;
845 memset(hdr, 0, sizeof(*hdr));
846 hdr->totsize = cpu_to_be16(sizeof(*hdr));
847 xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE | XFS_ILOG_ADATA);
848
849 /*
850 * Roll this transaction to commit our reset ondisk. The tempfile
851 * should no longer be joined to the transaction, so we drop its ILOCK.
852 * This should leave us in transaction context with sc->ip ILOCKed but
853 * not joined to the transaction.
854 */
855 error = xrep_roll_trans(sc);
856 if (error)
857 return error;
858 xrep_tempfile_iunlock(sc);
859
860 /*
861 * Erase any accumulated parent pointer updates now that we've erased
862 * the tempfile's attr fork. We're resetting the entire repair state
863 * back to where we were initially, except now we won't flush salvaged
864 * xattrs until the very end.
865 */
866 mutex_lock(&rx->lock);
867 xfarray_truncate(rx->pptr_recs);
868 xfblob_truncate(rx->pptr_names);
869 mutex_unlock(&rx->lock);
870
871 rx->can_flush = false;
872 rx->attrs_found = 0;
873
874 ASSERT(xfarray_bytes(rx->xattr_records) == 0);
875 ASSERT(xfblob_bytes(rx->xattr_blobs) == 0);
876 return 0;
877 }
878
879 /* Extract as many attribute keys and values as we can. */
880 STATIC int
xrep_xattr_recover(struct xrep_xattr * rx)881 xrep_xattr_recover(
882 struct xrep_xattr *rx)
883 {
884 struct xfs_bmbt_irec got;
885 struct xfs_scrub *sc = rx->sc;
886 struct xfs_da_geometry *geo = sc->mp->m_attr_geo;
887 xfs_fileoff_t offset;
888 xfs_extlen_t len;
889 xfs_dablk_t dabno;
890 int nmap;
891 int error;
892
893 restart:
894 /*
895 * Iterate each xattr leaf block in the attr fork to scan them for any
896 * attributes that we might salvage.
897 */
898 for (offset = 0;
899 offset < XFS_MAX_FILEOFF;
900 offset = got.br_startoff + got.br_blockcount) {
901 nmap = 1;
902 error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset,
903 &got, &nmap, XFS_BMAPI_ATTRFORK);
904 if (error)
905 return error;
906 if (nmap != 1)
907 return -EFSCORRUPTED;
908 if (!xfs_bmap_is_written_extent(&got))
909 continue;
910
911 for (dabno = round_up(got.br_startoff, geo->fsbcount);
912 dabno < got.br_startoff + got.br_blockcount;
913 dabno += len) {
914 xfs_fileoff_t curr_offset = dabno - got.br_startoff;
915 xfs_extlen_t maxlen;
916
917 if (xchk_should_terminate(rx->sc, &error))
918 return error;
919
920 maxlen = min_t(xfs_filblks_t, INT_MAX,
921 got.br_blockcount - curr_offset);
922 error = xrep_xattr_recover_block(rx, dabno,
923 curr_offset + got.br_startblock,
924 maxlen, &len);
925 if (error)
926 return error;
927
928 if (xrep_xattr_want_flush_stashed(rx)) {
929 error = xrep_xattr_flush_stashed(rx);
930 if (error)
931 return error;
932
933 if (xrep_xattr_saw_pptr_conflict(rx)) {
934 error = xrep_xattr_full_reset(rx);
935 if (error)
936 return error;
937
938 goto restart;
939 }
940 }
941 }
942 }
943
944 return 0;
945 }
946
947 /*
948 * Reset the extended attribute fork to a state where we can start re-adding
949 * the salvaged attributes.
950 */
951 STATIC int
xrep_xattr_fork_remove(struct xfs_scrub * sc,struct xfs_inode * ip)952 xrep_xattr_fork_remove(
953 struct xfs_scrub *sc,
954 struct xfs_inode *ip)
955 {
956 struct xfs_attr_sf_hdr *hdr;
957 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK);
958
959 /*
960 * If the data fork is in btree format, we can't change di_forkoff
961 * because we could run afoul of the rule that the data fork isn't
962 * supposed to be in btree format if there's enough space in the fork
963 * that it could have used extents format. Instead, reinitialize the
964 * attr fork to have a shortform structure with zero attributes.
965 */
966 if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
967 ifp->if_format = XFS_DINODE_FMT_LOCAL;
968 hdr = xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes,
969 XFS_ATTR_FORK);
970 hdr->count = 0;
971 hdr->totsize = cpu_to_be16(sizeof(*hdr));
972 xfs_trans_log_inode(sc->tp, ip,
973 XFS_ILOG_CORE | XFS_ILOG_ADATA);
974 return 0;
975 }
976
977 /* If we still have attr fork extents, something's wrong. */
978 if (ifp->if_nextents != 0) {
979 struct xfs_iext_cursor icur;
980 struct xfs_bmbt_irec irec;
981 unsigned int i = 0;
982
983 xfs_emerg(sc->mp,
984 "inode 0x%llx attr fork still has %llu attr extents, format %d?!",
985 ip->i_ino, ifp->if_nextents, ifp->if_format);
986 for_each_xfs_iext(ifp, &icur, &irec) {
987 xfs_err(sc->mp,
988 "[%u]: startoff %llu startblock %llu blockcount %llu state %u",
989 i++, irec.br_startoff,
990 irec.br_startblock, irec.br_blockcount,
991 irec.br_state);
992 }
993 ASSERT(0);
994 return -EFSCORRUPTED;
995 }
996
997 xfs_attr_fork_remove(ip, sc->tp);
998 return 0;
999 }
1000
1001 /*
1002 * Free all the attribute fork blocks of the file being repaired and delete the
1003 * fork. The caller must ILOCK the scrub file and join it to the transaction.
1004 * This function returns with the inode joined to a clean transaction.
1005 */
1006 int
xrep_xattr_reset_fork(struct xfs_scrub * sc)1007 xrep_xattr_reset_fork(
1008 struct xfs_scrub *sc)
1009 {
1010 int error;
1011
1012 trace_xrep_xattr_reset_fork(sc->ip, sc->ip);
1013
1014 /* Unmap all the attr blocks. */
1015 if (xfs_ifork_has_extents(&sc->ip->i_af)) {
1016 error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK);
1017 if (error)
1018 return error;
1019 }
1020
1021 error = xrep_xattr_fork_remove(sc, sc->ip);
1022 if (error)
1023 return error;
1024
1025 return xfs_trans_roll_inode(&sc->tp, sc->ip);
1026 }
1027
1028 /*
1029 * Free all the attribute fork blocks of the temporary file and delete the attr
1030 * fork. The caller must ILOCK the tempfile and join it to the transaction.
1031 * This function returns with the inode joined to a clean scrub transaction.
1032 */
1033 int
xrep_xattr_reset_tempfile_fork(struct xfs_scrub * sc)1034 xrep_xattr_reset_tempfile_fork(
1035 struct xfs_scrub *sc)
1036 {
1037 int error;
1038
1039 trace_xrep_xattr_reset_fork(sc->ip, sc->tempip);
1040
1041 /*
1042 * Wipe out the attr fork of the temp file so that regular inode
1043 * inactivation won't trip over the corrupt attr fork.
1044 */
1045 if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
1046 error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
1047 if (error)
1048 return error;
1049 }
1050
1051 return xrep_xattr_fork_remove(sc, sc->tempip);
1052 }
1053
1054 /*
1055 * Find all the extended attributes for this inode by scraping them out of the
1056 * attribute key blocks by hand, and flushing them into the temp file.
1057 * When we're done, free the staging memory before exchanging the xattr
1058 * structures to reduce memory usage.
1059 */
1060 STATIC int
xrep_xattr_salvage_attributes(struct xrep_xattr * rx)1061 xrep_xattr_salvage_attributes(
1062 struct xrep_xattr *rx)
1063 {
1064 struct xfs_inode *ip = rx->sc->ip;
1065 int error;
1066
1067 /* Short format xattrs are easy! */
1068 if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) {
1069 error = xrep_xattr_recover_sf(rx);
1070 if (error)
1071 return error;
1072
1073 return xrep_xattr_flush_stashed(rx);
1074 }
1075
1076 /*
1077 * For non-inline xattr structures, the salvage function scans the
1078 * buffer cache looking for potential attr leaf blocks. The scan
1079 * requires the ability to lock any buffer found and runs independently
1080 * of any transaction <-> buffer item <-> buffer linkage. Therefore,
1081 * roll the transaction to ensure there are no buffers joined. We hold
1082 * the ILOCK independently of the transaction.
1083 */
1084 error = xfs_trans_roll(&rx->sc->tp);
1085 if (error)
1086 return error;
1087
1088 error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK);
1089 if (error)
1090 return error;
1091
1092 error = xrep_xattr_recover(rx);
1093 if (error)
1094 return error;
1095
1096 return xrep_xattr_flush_stashed(rx);
1097 }
1098
1099 /*
1100 * Add this stashed incore parent pointer to the temporary file. The caller
1101 * must hold the tempdir's IOLOCK, must not hold any ILOCKs, and must not be in
1102 * transaction context.
1103 */
1104 STATIC int
xrep_xattr_replay_pptr_update(struct xrep_xattr * rx,const struct xfs_name * xname,struct xrep_xattr_pptr * pptr)1105 xrep_xattr_replay_pptr_update(
1106 struct xrep_xattr *rx,
1107 const struct xfs_name *xname,
1108 struct xrep_xattr_pptr *pptr)
1109 {
1110 struct xfs_scrub *sc = rx->sc;
1111 int error;
1112
1113 switch (pptr->action) {
1114 case XREP_XATTR_PPTR_ADD:
1115 /* Create parent pointer. */
1116 trace_xrep_xattr_replay_parentadd(sc->tempip, xname,
1117 &pptr->pptr_rec);
1118
1119 error = xfs_parent_set(sc->tempip, sc->ip->i_ino, xname,
1120 &pptr->pptr_rec, &rx->pptr_args);
1121 ASSERT(error != -EEXIST);
1122 return error;
1123 case XREP_XATTR_PPTR_REMOVE:
1124 /* Remove parent pointer. */
1125 trace_xrep_xattr_replay_parentremove(sc->tempip, xname,
1126 &pptr->pptr_rec);
1127
1128 error = xfs_parent_unset(sc->tempip, sc->ip->i_ino, xname,
1129 &pptr->pptr_rec, &rx->pptr_args);
1130 ASSERT(error != -ENOATTR);
1131 return error;
1132 }
1133
1134 ASSERT(0);
1135 return -EIO;
1136 }
1137
1138 /*
1139 * Flush stashed parent pointer updates that have been recorded by the scanner.
1140 * This is done to reduce the memory requirements of the xattr rebuild, since
1141 * files can have a lot of hardlinks and the fs can be busy.
1142 *
1143 * Caller must not hold transactions or ILOCKs. Caller must hold the tempfile
1144 * IOLOCK.
1145 */
1146 STATIC int
xrep_xattr_replay_pptr_updates(struct xrep_xattr * rx)1147 xrep_xattr_replay_pptr_updates(
1148 struct xrep_xattr *rx)
1149 {
1150 xfarray_idx_t array_cur;
1151 int error;
1152
1153 mutex_lock(&rx->lock);
1154 foreach_xfarray_idx(rx->pptr_recs, array_cur) {
1155 struct xrep_xattr_pptr pptr;
1156
1157 error = xfarray_load(rx->pptr_recs, array_cur, &pptr);
1158 if (error)
1159 goto out_unlock;
1160
1161 error = xfblob_loadname(rx->pptr_names, pptr.name_cookie,
1162 &rx->xname, pptr.namelen);
1163 if (error)
1164 goto out_unlock;
1165 mutex_unlock(&rx->lock);
1166
1167 error = xrep_xattr_replay_pptr_update(rx, &rx->xname, &pptr);
1168 if (error)
1169 return error;
1170
1171 mutex_lock(&rx->lock);
1172 }
1173
1174 /* Empty out both arrays now that we've added the entries. */
1175 xfarray_truncate(rx->pptr_recs);
1176 xfblob_truncate(rx->pptr_names);
1177 mutex_unlock(&rx->lock);
1178 return 0;
1179 out_unlock:
1180 mutex_unlock(&rx->lock);
1181 return error;
1182 }
1183
1184 /*
1185 * Remember that we want to create a parent pointer in the tempfile. These
1186 * stashed actions will be replayed later.
1187 */
1188 STATIC int
xrep_xattr_stash_parentadd(struct xrep_xattr * rx,const struct xfs_name * name,const struct xfs_inode * dp)1189 xrep_xattr_stash_parentadd(
1190 struct xrep_xattr *rx,
1191 const struct xfs_name *name,
1192 const struct xfs_inode *dp)
1193 {
1194 struct xrep_xattr_pptr pptr = {
1195 .action = XREP_XATTR_PPTR_ADD,
1196 .namelen = name->len,
1197 };
1198 int error;
1199
1200 trace_xrep_xattr_stash_parentadd(rx->sc->tempip, dp, name);
1201
1202 xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
1203 error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
1204 if (error)
1205 return error;
1206
1207 return xfarray_append(rx->pptr_recs, &pptr);
1208 }
1209
1210 /*
1211 * Remember that we want to remove a parent pointer from the tempfile. These
1212 * stashed actions will be replayed later.
1213 */
1214 STATIC int
xrep_xattr_stash_parentremove(struct xrep_xattr * rx,const struct xfs_name * name,const struct xfs_inode * dp)1215 xrep_xattr_stash_parentremove(
1216 struct xrep_xattr *rx,
1217 const struct xfs_name *name,
1218 const struct xfs_inode *dp)
1219 {
1220 struct xrep_xattr_pptr pptr = {
1221 .action = XREP_XATTR_PPTR_REMOVE,
1222 .namelen = name->len,
1223 };
1224 int error;
1225
1226 trace_xrep_xattr_stash_parentremove(rx->sc->tempip, dp, name);
1227
1228 xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
1229 error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
1230 if (error)
1231 return error;
1232
1233 return xfarray_append(rx->pptr_recs, &pptr);
1234 }
1235
1236 /*
1237 * Capture dirent updates being made by other threads. We will have to replay
1238 * the parent pointer updates before exchanging attr forks.
1239 */
1240 STATIC int
xrep_xattr_live_dirent_update(struct notifier_block * nb,unsigned long action,void * data)1241 xrep_xattr_live_dirent_update(
1242 struct notifier_block *nb,
1243 unsigned long action,
1244 void *data)
1245 {
1246 struct xfs_dir_update_params *p = data;
1247 struct xrep_xattr *rx;
1248 struct xfs_scrub *sc;
1249 int error;
1250
1251 rx = container_of(nb, struct xrep_xattr, dhook.dirent_hook.nb);
1252 sc = rx->sc;
1253
1254 /*
1255 * This thread updated a dirent that points to the file that we're
1256 * repairing, so stash the update for replay against the temporary
1257 * file.
1258 */
1259 if (p->ip->i_ino != sc->ip->i_ino)
1260 return NOTIFY_DONE;
1261
1262 mutex_lock(&rx->lock);
1263 if (p->delta > 0)
1264 error = xrep_xattr_stash_parentadd(rx, p->name, p->dp);
1265 else
1266 error = xrep_xattr_stash_parentremove(rx, p->name, p->dp);
1267 if (error)
1268 rx->live_update_aborted = true;
1269 mutex_unlock(&rx->lock);
1270 return NOTIFY_DONE;
1271 }
1272
1273 /*
1274 * Prepare both inodes' attribute forks for an exchange. Promote the tempfile
1275 * from short format to leaf format, and if the file being repaired has a short
1276 * format attr fork, turn it into an empty extent list.
1277 */
1278 STATIC int
xrep_xattr_swap_prep(struct xfs_scrub * sc,bool temp_local,bool ip_local)1279 xrep_xattr_swap_prep(
1280 struct xfs_scrub *sc,
1281 bool temp_local,
1282 bool ip_local)
1283 {
1284 int error;
1285
1286 /*
1287 * If the tempfile's attributes are in shortform format, convert that
1288 * to a single leaf extent so that we can use the atomic mapping
1289 * exchange.
1290 */
1291 if (temp_local) {
1292 struct xfs_da_args args = {
1293 .dp = sc->tempip,
1294 .geo = sc->mp->m_attr_geo,
1295 .whichfork = XFS_ATTR_FORK,
1296 .trans = sc->tp,
1297 .total = 1,
1298 .owner = sc->ip->i_ino,
1299 };
1300
1301 error = xfs_attr_shortform_to_leaf(&args);
1302 if (error)
1303 return error;
1304
1305 /*
1306 * Roll the deferred log items to get us back to a clean
1307 * transaction.
1308 */
1309 error = xfs_defer_finish(&sc->tp);
1310 if (error)
1311 return error;
1312 }
1313
1314 /*
1315 * If the file being repaired had a shortform attribute fork, convert
1316 * that to an empty extent list in preparation for the atomic mapping
1317 * exchange.
1318 */
1319 if (ip_local) {
1320 struct xfs_ifork *ifp;
1321
1322 ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1323
1324 xfs_idestroy_fork(ifp);
1325 ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1326 ifp->if_nextents = 0;
1327 ifp->if_bytes = 0;
1328 ifp->if_data = NULL;
1329 ifp->if_height = 0;
1330
1331 xfs_trans_log_inode(sc->tp, sc->ip,
1332 XFS_ILOG_CORE | XFS_ILOG_ADATA);
1333 }
1334
1335 return 0;
1336 }
1337
1338 /* Exchange the temporary file's attribute fork with the one being repaired. */
1339 int
xrep_xattr_swap(struct xfs_scrub * sc,struct xrep_tempexch * tx)1340 xrep_xattr_swap(
1341 struct xfs_scrub *sc,
1342 struct xrep_tempexch *tx)
1343 {
1344 bool ip_local, temp_local;
1345 int error = 0;
1346
1347 ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1348 temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1349
1350 /*
1351 * If the both files have a local format attr fork and the rebuilt
1352 * xattr data would fit in the repaired file's attr fork, just copy
1353 * the contents from the tempfile and declare ourselves done.
1354 */
1355 if (ip_local && temp_local) {
1356 int forkoff;
1357 int newsize;
1358
1359 newsize = xfs_attr_sf_totsize(sc->tempip);
1360 forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize);
1361 if (forkoff > 0) {
1362 sc->ip->i_forkoff = forkoff;
1363 xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK);
1364 return 0;
1365 }
1366 }
1367
1368 /* Otherwise, make sure both attr forks are in block-mapping mode. */
1369 error = xrep_xattr_swap_prep(sc, temp_local, ip_local);
1370 if (error)
1371 return error;
1372
1373 return xrep_tempexch_contents(sc, tx);
1374 }
1375
1376 /*
1377 * Finish replaying stashed parent pointer updates, allocate a transaction for
1378 * exchanging extent mappings, and take the ILOCKs of both files before we
1379 * commit the new extended attribute structure.
1380 */
1381 STATIC int
xrep_xattr_finalize_tempfile(struct xrep_xattr * rx)1382 xrep_xattr_finalize_tempfile(
1383 struct xrep_xattr *rx)
1384 {
1385 struct xfs_scrub *sc = rx->sc;
1386 int error;
1387
1388 if (!xfs_has_parent(sc->mp))
1389 return xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1390
1391 /*
1392 * Repair relies on the ILOCK to quiesce all possible xattr updates.
1393 * Replay all queued parent pointer updates into the tempfile before
1394 * exchanging the contents, even if that means dropping the ILOCKs and
1395 * the transaction.
1396 */
1397 do {
1398 error = xrep_xattr_replay_pptr_updates(rx);
1399 if (error)
1400 return error;
1401
1402 error = xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1403 if (error)
1404 return error;
1405
1406 if (xfarray_length(rx->pptr_recs) == 0)
1407 break;
1408
1409 xchk_trans_cancel(sc);
1410 xrep_tempfile_iunlock_both(sc);
1411 } while (!xchk_should_terminate(sc, &error));
1412 return error;
1413 }
1414
1415 /*
1416 * Exchange the new extended attribute data (which we created in the tempfile)
1417 * with the file being repaired.
1418 */
1419 STATIC int
xrep_xattr_rebuild_tree(struct xrep_xattr * rx)1420 xrep_xattr_rebuild_tree(
1421 struct xrep_xattr *rx)
1422 {
1423 struct xfs_scrub *sc = rx->sc;
1424 int error;
1425
1426 /*
1427 * If we didn't find any attributes to salvage, repair the file by
1428 * zapping its attr fork.
1429 */
1430 if (rx->attrs_found == 0) {
1431 xfs_trans_ijoin(sc->tp, sc->ip, 0);
1432 error = xrep_xattr_reset_fork(sc);
1433 if (error)
1434 return error;
1435
1436 goto forget_acls;
1437 }
1438
1439 trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip);
1440
1441 /*
1442 * Commit the repair transaction and drop the ILOCKs so that we can use
1443 * the atomic file content exchange helper functions to compute the
1444 * correct resource reservations.
1445 *
1446 * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr
1447 * modifications, but there's nothing to prevent userspace from reading
1448 * the attributes until we're ready for the exchange operation. Reads
1449 * will return -EIO without shutting down the fs, so we're ok with
1450 * that.
1451 */
1452 error = xrep_trans_commit(sc);
1453 if (error)
1454 return error;
1455
1456 xchk_iunlock(sc, XFS_ILOCK_EXCL);
1457
1458 /*
1459 * Take the IOLOCK on the temporary file so that we can run xattr
1460 * operations with the same locks held as we would for a normal file.
1461 * We still hold sc->ip's IOLOCK.
1462 */
1463 error = xrep_tempfile_iolock_polled(rx->sc);
1464 if (error)
1465 return error;
1466
1467 /*
1468 * Allocate transaction, lock inodes, and make sure that we've replayed
1469 * all the stashed parent pointer updates to the temp file. After this
1470 * point, we're ready to exchange attr fork mappings.
1471 */
1472 error = xrep_xattr_finalize_tempfile(rx);
1473 if (error)
1474 return error;
1475
1476 /*
1477 * Exchange the blocks mapped by the tempfile's attr fork with the file
1478 * being repaired. The old attr blocks will then be attached to the
1479 * tempfile, so reap its attr fork.
1480 */
1481 error = xrep_xattr_swap(sc, &rx->tx);
1482 if (error)
1483 return error;
1484
1485 error = xrep_xattr_reset_tempfile_fork(sc);
1486 if (error)
1487 return error;
1488
1489 /*
1490 * Roll to get a transaction without any inodes joined to it. Then we
1491 * can drop the tempfile's ILOCK and IOLOCK before doing more work on
1492 * the scrub target file.
1493 */
1494 error = xfs_trans_roll(&sc->tp);
1495 if (error)
1496 return error;
1497
1498 xrep_tempfile_iunlock(sc);
1499 xrep_tempfile_iounlock(sc);
1500
1501 forget_acls:
1502 /* Invalidate cached ACLs now that we've reloaded all the xattrs. */
1503 xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE);
1504 xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT);
1505 return 0;
1506 }
1507
1508 /* Tear down all the incore scan stuff we created. */
1509 STATIC void
xrep_xattr_teardown(struct xrep_xattr * rx)1510 xrep_xattr_teardown(
1511 struct xrep_xattr *rx)
1512 {
1513 if (xfs_has_parent(rx->sc->mp))
1514 xfs_dir_hook_del(rx->sc->mp, &rx->dhook);
1515 if (rx->pptr_names)
1516 xfblob_destroy(rx->pptr_names);
1517 if (rx->pptr_recs)
1518 xfarray_destroy(rx->pptr_recs);
1519 xfblob_destroy(rx->xattr_blobs);
1520 xfarray_destroy(rx->xattr_records);
1521 mutex_destroy(&rx->lock);
1522 kfree(rx);
1523 }
1524
1525 /* Set up the filesystem scan so we can regenerate extended attributes. */
1526 STATIC int
xrep_xattr_setup_scan(struct xfs_scrub * sc,struct xrep_xattr ** rxp)1527 xrep_xattr_setup_scan(
1528 struct xfs_scrub *sc,
1529 struct xrep_xattr **rxp)
1530 {
1531 struct xrep_xattr *rx;
1532 char *descr;
1533 int max_len;
1534 int error;
1535
1536 rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS);
1537 if (!rx)
1538 return -ENOMEM;
1539 rx->sc = sc;
1540 rx->can_flush = true;
1541 rx->xname.name = rx->namebuf;
1542
1543 mutex_init(&rx->lock);
1544
1545 /*
1546 * Allocate enough memory to handle loading local attr values from the
1547 * xfblob data while flushing stashed attrs to the temporary file.
1548 * We only realloc the buffer when salvaging remote attr values.
1549 */
1550 max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize);
1551 error = xchk_setup_xattr_buf(rx->sc, max_len);
1552 if (error == -ENOMEM)
1553 error = -EDEADLOCK;
1554 if (error)
1555 goto out_rx;
1556
1557 /* Set up some staging for salvaged attribute keys and values */
1558 descr = xchk_xfile_ino_descr(sc, "xattr keys");
1559 error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key),
1560 &rx->xattr_records);
1561 kfree(descr);
1562 if (error)
1563 goto out_rx;
1564
1565 descr = xchk_xfile_ino_descr(sc, "xattr names");
1566 error = xfblob_create(descr, &rx->xattr_blobs);
1567 kfree(descr);
1568 if (error)
1569 goto out_keys;
1570
1571 if (xfs_has_parent(sc->mp)) {
1572 ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
1573
1574 descr = xchk_xfile_ino_descr(sc,
1575 "xattr retained parent pointer entries");
1576 error = xfarray_create(descr, 0,
1577 sizeof(struct xrep_xattr_pptr),
1578 &rx->pptr_recs);
1579 kfree(descr);
1580 if (error)
1581 goto out_values;
1582
1583 descr = xchk_xfile_ino_descr(sc,
1584 "xattr retained parent pointer names");
1585 error = xfblob_create(descr, &rx->pptr_names);
1586 kfree(descr);
1587 if (error)
1588 goto out_pprecs;
1589
1590 xfs_dir_hook_setup(&rx->dhook, xrep_xattr_live_dirent_update);
1591 error = xfs_dir_hook_add(sc->mp, &rx->dhook);
1592 if (error)
1593 goto out_ppnames;
1594 }
1595
1596 *rxp = rx;
1597 return 0;
1598 out_ppnames:
1599 xfblob_destroy(rx->pptr_names);
1600 out_pprecs:
1601 xfarray_destroy(rx->pptr_recs);
1602 out_values:
1603 xfblob_destroy(rx->xattr_blobs);
1604 out_keys:
1605 xfarray_destroy(rx->xattr_records);
1606 out_rx:
1607 mutex_destroy(&rx->lock);
1608 kfree(rx);
1609 return error;
1610 }
1611
1612 /*
1613 * Repair the extended attribute metadata.
1614 *
1615 * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer.
1616 * The buffer cache in XFS can't handle aliased multiblock buffers, so this
1617 * might misbehave if the attr fork is crosslinked with other filesystem
1618 * metadata.
1619 */
1620 int
xrep_xattr(struct xfs_scrub * sc)1621 xrep_xattr(
1622 struct xfs_scrub *sc)
1623 {
1624 struct xrep_xattr *rx = NULL;
1625 int error;
1626
1627 if (!xfs_inode_hasattr(sc->ip))
1628 return -ENOENT;
1629
1630 /* The rmapbt is required to reap the old attr fork. */
1631 if (!xfs_has_rmapbt(sc->mp))
1632 return -EOPNOTSUPP;
1633 /* We require atomic file exchange range to rebuild anything. */
1634 if (!xfs_has_exchange_range(sc->mp))
1635 return -EOPNOTSUPP;
1636
1637 error = xrep_xattr_setup_scan(sc, &rx);
1638 if (error)
1639 return error;
1640
1641 ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1642
1643 error = xrep_xattr_salvage_attributes(rx);
1644 if (error)
1645 goto out_scan;
1646
1647 if (rx->live_update_aborted) {
1648 error = -EIO;
1649 goto out_scan;
1650 }
1651
1652 /* Last chance to abort before we start committing fixes. */
1653 if (xchk_should_terminate(sc, &error))
1654 goto out_scan;
1655
1656 error = xrep_xattr_rebuild_tree(rx);
1657 if (error)
1658 goto out_scan;
1659
1660 out_scan:
1661 xrep_xattr_teardown(rx);
1662 return error;
1663 }
1664