xref: /linux/fs/xfs/libxfs/xfs_health.h (revision 37056912)
1 /* SPDX-License-Identifier: GPL-2.0+ */
2 /*
3  * Copyright (C) 2019 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #ifndef __XFS_HEALTH_H__
7 #define __XFS_HEALTH_H__
8 
9 /*
10  * In-Core Filesystem Health Assessments
11  * =====================================
12  *
13  * We'd like to be able to summarize the current health status of the
14  * filesystem so that the administrator knows when it's necessary to schedule
15  * some downtime for repairs.  Until then, we would also like to avoid abrupt
16  * shutdowns due to corrupt metadata.
17  *
18  * The online scrub feature evaluates the health of all filesystem metadata.
19  * When scrub detects corruption in a piece of metadata it will set the
20  * corresponding sickness flag, and repair will clear it if successful.  If
21  * problems remain at unmount time, we can also request manual intervention by
22  * logging a notice to run xfs_repair.
23  *
24  * Each health tracking group uses a pair of fields for reporting.  The
25  * "checked" field tell us if a given piece of metadata has ever been examined,
26  * and the "sick" field tells us if that piece was found to need repairs.
27  * Therefore we can conclude that for a given sick flag value:
28  *
29  *  - checked && sick   => metadata needs repair
30  *  - checked && !sick  => metadata is ok
31  *  - !checked && sick  => errors have been observed during normal operation,
32  *                         but the metadata has not been checked thoroughly
33  *  - !checked && !sick => has not been examined since mount
34  *
35  * Evidence of health problems can be sorted into three basic categories:
36  *
37  * a) Primary evidence, which signals that something is defective within the
38  *    general grouping of metadata.
39  *
40  * b) Secondary evidence, which are side effects of primary problem but are
41  *    not themselves problems.  These can be forgotten when the primary
42  *    health problems are addressed.
43  *
44  * c) Indirect evidence, which points to something being wrong in another
45  *    group, but we had to release resources and this is all that's left of
46  *    that state.
47  */
48 
49 struct xfs_mount;
50 struct xfs_perag;
51 struct xfs_inode;
52 struct xfs_fsop_geom;
53 struct xfs_btree_cur;
54 struct xfs_da_args;
55 
56 /* Observable health issues for metadata spanning the entire filesystem. */
57 #define XFS_SICK_FS_COUNTERS	(1 << 0)  /* summary counters */
58 #define XFS_SICK_FS_UQUOTA	(1 << 1)  /* user quota */
59 #define XFS_SICK_FS_GQUOTA	(1 << 2)  /* group quota */
60 #define XFS_SICK_FS_PQUOTA	(1 << 3)  /* project quota */
61 #define XFS_SICK_FS_QUOTACHECK	(1 << 4)  /* quota counts */
62 #define XFS_SICK_FS_NLINKS	(1 << 5)  /* inode link counts */
63 
64 /* Observable health issues for realtime volume metadata. */
65 #define XFS_SICK_RT_BITMAP	(1 << 0)  /* realtime bitmap */
66 #define XFS_SICK_RT_SUMMARY	(1 << 1)  /* realtime summary */
67 
68 /* Observable health issues for AG metadata. */
69 #define XFS_SICK_AG_SB		(1 << 0)  /* superblock */
70 #define XFS_SICK_AG_AGF		(1 << 1)  /* AGF header */
71 #define XFS_SICK_AG_AGFL	(1 << 2)  /* AGFL header */
72 #define XFS_SICK_AG_AGI		(1 << 3)  /* AGI header */
73 #define XFS_SICK_AG_BNOBT	(1 << 4)  /* free space by block */
74 #define XFS_SICK_AG_CNTBT	(1 << 5)  /* free space by length */
75 #define XFS_SICK_AG_INOBT	(1 << 6)  /* inode index */
76 #define XFS_SICK_AG_FINOBT	(1 << 7)  /* free inode index */
77 #define XFS_SICK_AG_RMAPBT	(1 << 8)  /* reverse mappings */
78 #define XFS_SICK_AG_REFCNTBT	(1 << 9)  /* reference counts */
79 #define XFS_SICK_AG_INODES	(1 << 10) /* inactivated bad inodes */
80 
81 /* Observable health issues for inode metadata. */
82 #define XFS_SICK_INO_CORE	(1 << 0)  /* inode core */
83 #define XFS_SICK_INO_BMBTD	(1 << 1)  /* data fork */
84 #define XFS_SICK_INO_BMBTA	(1 << 2)  /* attr fork */
85 #define XFS_SICK_INO_BMBTC	(1 << 3)  /* cow fork */
86 #define XFS_SICK_INO_DIR	(1 << 4)  /* directory */
87 #define XFS_SICK_INO_XATTR	(1 << 5)  /* extended attributes */
88 #define XFS_SICK_INO_SYMLINK	(1 << 6)  /* symbolic link remote target */
89 #define XFS_SICK_INO_PARENT	(1 << 7)  /* parent pointers */
90 
91 #define XFS_SICK_INO_BMBTD_ZAPPED	(1 << 8)  /* data fork erased */
92 #define XFS_SICK_INO_BMBTA_ZAPPED	(1 << 9)  /* attr fork erased */
93 #define XFS_SICK_INO_DIR_ZAPPED		(1 << 10) /* directory erased */
94 #define XFS_SICK_INO_SYMLINK_ZAPPED	(1 << 11) /* symlink erased */
95 
96 /* Don't propagate sick status to ag health summary during inactivation */
97 #define XFS_SICK_INO_FORGET	(1 << 12)
98 #define XFS_SICK_INO_DIRTREE	(1 << 13)  /* directory tree structure */
99 
100 /* Primary evidence of health problems in a given group. */
101 #define XFS_SICK_FS_PRIMARY	(XFS_SICK_FS_COUNTERS | \
102 				 XFS_SICK_FS_UQUOTA | \
103 				 XFS_SICK_FS_GQUOTA | \
104 				 XFS_SICK_FS_PQUOTA | \
105 				 XFS_SICK_FS_QUOTACHECK | \
106 				 XFS_SICK_FS_NLINKS)
107 
108 #define XFS_SICK_RT_PRIMARY	(XFS_SICK_RT_BITMAP | \
109 				 XFS_SICK_RT_SUMMARY)
110 
111 #define XFS_SICK_AG_PRIMARY	(XFS_SICK_AG_SB | \
112 				 XFS_SICK_AG_AGF | \
113 				 XFS_SICK_AG_AGFL | \
114 				 XFS_SICK_AG_AGI | \
115 				 XFS_SICK_AG_BNOBT | \
116 				 XFS_SICK_AG_CNTBT | \
117 				 XFS_SICK_AG_INOBT | \
118 				 XFS_SICK_AG_FINOBT | \
119 				 XFS_SICK_AG_RMAPBT | \
120 				 XFS_SICK_AG_REFCNTBT)
121 
122 #define XFS_SICK_INO_PRIMARY	(XFS_SICK_INO_CORE | \
123 				 XFS_SICK_INO_BMBTD | \
124 				 XFS_SICK_INO_BMBTA | \
125 				 XFS_SICK_INO_BMBTC | \
126 				 XFS_SICK_INO_DIR | \
127 				 XFS_SICK_INO_XATTR | \
128 				 XFS_SICK_INO_SYMLINK | \
129 				 XFS_SICK_INO_PARENT | \
130 				 XFS_SICK_INO_DIRTREE)
131 
132 #define XFS_SICK_INO_ZAPPED	(XFS_SICK_INO_BMBTD_ZAPPED | \
133 				 XFS_SICK_INO_BMBTA_ZAPPED | \
134 				 XFS_SICK_INO_DIR_ZAPPED | \
135 				 XFS_SICK_INO_SYMLINK_ZAPPED)
136 
137 /* Secondary state related to (but not primary evidence of) health problems. */
138 #define XFS_SICK_FS_SECONDARY	(0)
139 #define XFS_SICK_RT_SECONDARY	(0)
140 #define XFS_SICK_AG_SECONDARY	(0)
141 #define XFS_SICK_INO_SECONDARY	(XFS_SICK_INO_FORGET)
142 
143 /* Evidence of health problems elsewhere. */
144 #define XFS_SICK_FS_INDIRECT	(0)
145 #define XFS_SICK_RT_INDIRECT	(0)
146 #define XFS_SICK_AG_INDIRECT	(XFS_SICK_AG_INODES)
147 #define XFS_SICK_INO_INDIRECT	(0)
148 
149 /* All health masks. */
150 #define XFS_SICK_FS_ALL	(XFS_SICK_FS_PRIMARY | \
151 				 XFS_SICK_FS_SECONDARY | \
152 				 XFS_SICK_FS_INDIRECT)
153 
154 #define XFS_SICK_RT_ALL	(XFS_SICK_RT_PRIMARY | \
155 				 XFS_SICK_RT_SECONDARY | \
156 				 XFS_SICK_RT_INDIRECT)
157 
158 #define XFS_SICK_AG_ALL	(XFS_SICK_AG_PRIMARY | \
159 				 XFS_SICK_AG_SECONDARY | \
160 				 XFS_SICK_AG_INDIRECT)
161 
162 #define XFS_SICK_INO_ALL	(XFS_SICK_INO_PRIMARY | \
163 				 XFS_SICK_INO_SECONDARY | \
164 				 XFS_SICK_INO_INDIRECT | \
165 				 XFS_SICK_INO_ZAPPED)
166 
167 /*
168  * These functions must be provided by the xfs implementation.  Function
169  * behavior with respect to the first argument should be as follows:
170  *
171  * xfs_*_mark_sick:        Set the sick flags and do not set checked flags.
172  *                         Runtime code should call this upon encountering
173  *                         a corruption.
174  *
175  * xfs_*_mark_corrupt:     Set the sick and checked flags simultaneously.
176  *                         Fsck tools should call this when corruption is
177  *                         found.
178  *
179  * xfs_*_mark_healthy:     Clear the sick flags and set the checked flags.
180  *                         Fsck tools should call this after correcting errors.
181  *
182  * xfs_*_measure_sickness: Return the sick and check status in the provided
183  *                         out parameters.
184  */
185 
186 void xfs_fs_mark_sick(struct xfs_mount *mp, unsigned int mask);
187 void xfs_fs_mark_corrupt(struct xfs_mount *mp, unsigned int mask);
188 void xfs_fs_mark_healthy(struct xfs_mount *mp, unsigned int mask);
189 void xfs_fs_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
190 		unsigned int *checked);
191 
192 void xfs_rt_mark_sick(struct xfs_mount *mp, unsigned int mask);
193 void xfs_rt_mark_corrupt(struct xfs_mount *mp, unsigned int mask);
194 void xfs_rt_mark_healthy(struct xfs_mount *mp, unsigned int mask);
195 void xfs_rt_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
196 		unsigned int *checked);
197 
198 void xfs_agno_mark_sick(struct xfs_mount *mp, xfs_agnumber_t agno,
199 		unsigned int mask);
200 void xfs_ag_mark_sick(struct xfs_perag *pag, unsigned int mask);
201 void xfs_ag_mark_corrupt(struct xfs_perag *pag, unsigned int mask);
202 void xfs_ag_mark_healthy(struct xfs_perag *pag, unsigned int mask);
203 void xfs_ag_measure_sickness(struct xfs_perag *pag, unsigned int *sick,
204 		unsigned int *checked);
205 
206 void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask);
207 void xfs_inode_mark_corrupt(struct xfs_inode *ip, unsigned int mask);
208 void xfs_inode_mark_healthy(struct xfs_inode *ip, unsigned int mask);
209 void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
210 		unsigned int *checked);
211 
212 void xfs_health_unmount(struct xfs_mount *mp);
213 void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
214 void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
215 void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
216 void xfs_da_mark_sick(struct xfs_da_args *args);
217 
218 /* Now some helpers. */
219 
220 static inline bool
xfs_fs_has_sickness(struct xfs_mount * mp,unsigned int mask)221 xfs_fs_has_sickness(struct xfs_mount *mp, unsigned int mask)
222 {
223 	unsigned int	sick, checked;
224 
225 	xfs_fs_measure_sickness(mp, &sick, &checked);
226 	return sick & mask;
227 }
228 
229 static inline bool
xfs_rt_has_sickness(struct xfs_mount * mp,unsigned int mask)230 xfs_rt_has_sickness(struct xfs_mount *mp, unsigned int mask)
231 {
232 	unsigned int	sick, checked;
233 
234 	xfs_rt_measure_sickness(mp, &sick, &checked);
235 	return sick & mask;
236 }
237 
238 static inline bool
xfs_ag_has_sickness(struct xfs_perag * pag,unsigned int mask)239 xfs_ag_has_sickness(struct xfs_perag *pag, unsigned int mask)
240 {
241 	unsigned int	sick, checked;
242 
243 	xfs_ag_measure_sickness(pag, &sick, &checked);
244 	return sick & mask;
245 }
246 
247 static inline bool
xfs_inode_has_sickness(struct xfs_inode * ip,unsigned int mask)248 xfs_inode_has_sickness(struct xfs_inode *ip, unsigned int mask)
249 {
250 	unsigned int	sick, checked;
251 
252 	xfs_inode_measure_sickness(ip, &sick, &checked);
253 	return sick & mask;
254 }
255 
256 static inline bool
xfs_fs_is_healthy(struct xfs_mount * mp)257 xfs_fs_is_healthy(struct xfs_mount *mp)
258 {
259 	return !xfs_fs_has_sickness(mp, -1U);
260 }
261 
262 static inline bool
xfs_rt_is_healthy(struct xfs_mount * mp)263 xfs_rt_is_healthy(struct xfs_mount *mp)
264 {
265 	return !xfs_rt_has_sickness(mp, -1U);
266 }
267 
268 static inline bool
xfs_ag_is_healthy(struct xfs_perag * pag)269 xfs_ag_is_healthy(struct xfs_perag *pag)
270 {
271 	return !xfs_ag_has_sickness(pag, -1U);
272 }
273 
274 static inline bool
xfs_inode_is_healthy(struct xfs_inode * ip)275 xfs_inode_is_healthy(struct xfs_inode *ip)
276 {
277 	return !xfs_inode_has_sickness(ip, -1U);
278 }
279 
280 void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
281 void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
282 void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
283 
284 #define xfs_metadata_is_sick(error) \
285 	(unlikely((error) == -EFSCORRUPTED || (error) == -EFSBADCRC))
286 
287 #endif	/* __XFS_HEALTH_H__ */
288