xref: /openbsd/sys/sys/buf.h (revision 4b77bcb6)
1 /*	$OpenBSD: buf.h,v 1.95 2014/08/31 21:08:48 tedu Exp $	*/
2 /*	$NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)buf.h	8.7 (Berkeley) 1/21/94
38  */
39 
40 #ifndef _SYS_BUF_H_
41 #define	_SYS_BUF_H_
42 #include <sys/queue.h>
43 #include <sys/tree.h>
44 #include <sys/mutex.h>
45 
46 #define NOLIST ((struct buf *)0x87654321)
47 
48 struct buf;
49 struct vnode;
50 
51 struct buf_rb_bufs;
52 RB_PROTOTYPE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare);
53 
54 LIST_HEAD(bufhead, buf);
55 
56 /*
57  * To avoid including <ufs/ffs/softdep.h>
58  */
59 
60 LIST_HEAD(workhead, worklist);
61 
62 /*
63  * Buffer queues
64  */
65 #define BUFQ_NSCAN_N	128
66 #define BUFQ_FIFO	0
67 #define BUFQ_NSCAN	1
68 #define BUFQ_DEFAULT	BUFQ_NSCAN
69 #define BUFQ_HOWMANY	2
70 
71 /*
72  * Write limits for bufq - defines high and low water marks for how
73  * many kva slots are allowed to be consumed to parallelize writes from
74  * the buffer cache from any individual bufq.
75  */
76 #define BUFQ_HI		128
77 #define BUFQ_LOW	64
78 
79 struct bufq_impl;
80 
81 struct bufq {
82 	SLIST_ENTRY(bufq)	 bufq_entries;
83 	struct mutex	 	 bufq_mtx;
84 	void			*bufq_data;
85 	u_int			 bufq_outstanding;
86 	u_int			 bufq_hi;
87 	u_int			 bufq_low;
88 	int			 bufq_waiting;
89 	int			 bufq_stop;
90 	int			 bufq_type;
91 	const struct bufq_impl	*bufq_impl;
92 };
93 
94 int		 bufq_init(struct bufq *, int);
95 int		 bufq_switch(struct bufq *, int);
96 void		 bufq_destroy(struct bufq *);
97 
98 void		 bufq_queue(struct bufq *, struct buf *);
99 struct buf	*bufq_dequeue(struct bufq *);
100 void		 bufq_requeue(struct bufq *, struct buf *);
101 int		 bufq_peek(struct bufq *);
102 void		 bufq_drain(struct bufq *);
103 
104 void		 bufq_wait(struct bufq *, struct buf *);
105 void		 bufq_done(struct bufq *, struct buf *);
106 void		 bufq_quiesce(void);
107 void		 bufq_restart(void);
108 
109 /* disksort */
110 struct bufq_disksort {
111 	struct buf	 *bqd_actf;
112 	struct buf	**bqd_actb;
113 };
114 
115 /* fifo */
116 SIMPLEQ_HEAD(bufq_fifo_head, buf);
117 struct bufq_fifo {
118 	SIMPLEQ_ENTRY(buf)	bqf_entries;
119 };
120 
121 /* nscan */
122 SIMPLEQ_HEAD(bufq_nscan_head, buf);
123 struct bufq_nscan {
124 	SIMPLEQ_ENTRY(buf)	bqf_entries;
125 };
126 
127 /* bufq link in struct buf */
128 union bufq_data {
129 	struct bufq_disksort	bufq_data_disksort;
130 	struct bufq_fifo	bufq_data_fifo;
131 	struct bufq_nscan	bufq_data_nscan;
132 };
133 
134 /*
135  * These are currently used only by the soft dependency code, hence
136  * are stored once in a global variable. If other subsystems wanted
137  * to use these hooks, a pointer to a set of bio_ops could be added
138  * to each buffer.
139  */
140 extern struct bio_ops {
141 	void	(*io_start)(struct buf *);
142 	void	(*io_complete)(struct buf *);
143 	void	(*io_deallocate)(struct buf *);
144 	void	(*io_movedeps)(struct buf *, struct buf *);
145 	int	(*io_countdeps)(struct buf *, int, int);
146 } bioops;
147 
148 /* XXX: disksort(); */
149 #define b_actf	b_bufq.bufq_data_disksort.bqd_actf
150 #define b_actb	b_bufq.bufq_data_disksort.bqd_actb
151 
152 /* The buffer header describes an I/O operation in the kernel. */
153 struct buf {
154 	RB_ENTRY(buf) b_rbbufs;		/* vnode "hash" tree */
155 	LIST_ENTRY(buf) b_list;		/* All allocated buffers. */
156 	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
157 	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
158 	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
159 	volatile long	b_flags;	/* B_* flags. */
160 	int	b_error;		/* Errno value. */
161 	long	b_bufsize;		/* Allocated buffer size. */
162 	long	b_bcount;		/* Valid bytes in buffer. */
163 	size_t	b_resid;		/* Remaining I/O. */
164 	dev_t	b_dev;			/* Device associated with buffer. */
165 	caddr_t	b_data;			/* associated data */
166 	void	*b_saveaddr;		/* Original b_data for physio. */
167 
168 	TAILQ_ENTRY(buf) b_valist;	/* LRU of va to reuse. */
169 
170 	union	bufq_data b_bufq;
171 	struct	bufq	  *b_bq;	/* What bufq this buf is on */
172 
173 	struct uvm_object *b_pobj;	/* Object containing the pages */
174 	off_t	b_poffs;		/* Offset within object */
175 
176 	daddr_t	b_lblkno;		/* Logical block number. */
177 	daddr_t	b_blkno;		/* Underlying physical block number. */
178 					/* Function to call upon completion.
179 					 * Will be called at splbio(). */
180 	void	(*b_iodone)(struct buf *);
181 	struct	vnode *b_vp;		/* Device vnode. */
182 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
183 	int	b_dirtyend;		/* Offset of end of dirty region. */
184 	int	b_validoff;		/* Offset in buffer of valid region. */
185 	int	b_validend;		/* Offset of end of valid region. */
186  	struct	workhead b_dep;		/* List of filesystem dependencies. */
187 };
188 
189 /* Device driver compatibility definitions. */
190 #define	b_active b_bcount		/* Driver queue head: drive active. */
191 
192 /*
193  * These flags are kept in b_flags.
194  */
195 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
196 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
197 #define	B_NEEDCOMMIT	0x00000002	/* Needs committing to stable storage */
198 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
199 #define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
200 #define	B_BUSY		0x00000010	/* I/O in progress. */
201 #define	B_CACHE		0x00000020	/* Bread found us in the cache. */
202 #define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
203 #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
204 #define	B_DONE		0x00000100	/* I/O completed. */
205 #define	B_EINTR		0x00000200	/* I/O was interrupted */
206 #define	B_ERROR		0x00000400	/* I/O error occurred. */
207 #define	B_INVAL		0x00000800	/* Does not contain valid info. */
208 #define	B_NOCACHE	0x00001000	/* Do not cache block after use. */
209 #define	B_PHYS		0x00002000	/* I/O to user memory. */
210 #define	B_RAW		0x00004000	/* Set by physio for raw transfers. */
211 #define	B_READ		0x00008000	/* Read buffer. */
212 #define	B_WANTED	0x00010000	/* Process wants this buffer. */
213 #define	B_WRITEINPROG	0x00020000	/* Write in progress. */
214 #define	B_XXX		0x00040000	/* Debugging flag. */
215 #define	B_DEFERRED	0x00080000	/* Skipped over for cleaning */
216 #define	B_SCANNED	0x00100000	/* Block already pushed during sync */
217 #define	B_PDAEMON	0x00200000	/* I/O started by pagedaemon */
218 #define	B_RELEASED	0x00400000	/* free this buffer after its kvm */
219 #define	B_WARM		0x00800000	/* keep this buffer on warmqueue */
220 #define	B_COLD		0x01000000	/* keep this buffer on coldqueue */
221 
222 #define	B_BITS	"\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
223     "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
224     "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \
225     "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \
226     "\025SCANNED\026DAEMON\027RELEASED"
227 
228 /*
229  * This structure describes a clustered I/O.  It is stored in the b_saveaddr
230  * field of the buffer on which I/O is done.  At I/O completion, cluster
231  * callback uses the structure to parcel I/O's to individual buffers, and
232  * then free's this structure.
233  */
234 struct cluster_save {
235 	long	bs_bcount;		/* Saved b_bcount. */
236 	long	bs_bufsize;		/* Saved b_bufsize. */
237 	void	*bs_saveaddr;		/* Saved b_addr. */
238 	int	bs_nchildren;		/* Number of associated buffers. */
239 	struct buf **bs_children;	/* List of associated buffers. */
240 };
241 
242 /*
243  * Zero out the buffer's data area.
244  */
245 #define	clrbuf(bp) {							\
246 	bzero((bp)->b_data, (u_int)(bp)->b_bcount);			\
247 	(bp)->b_resid = 0;						\
248 }
249 
250 
251 /* Flags to low-level allocation routines. */
252 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
253 #define B_SYNC		0x02	/* Do all allocations synchronously. */
254 
255 struct cluster_info {
256 	daddr_t	ci_lastr;	/* last read (read-ahead) */
257 	daddr_t	ci_lastw;	/* last write (write cluster) */
258 	daddr_t	ci_cstart;	/* start block of cluster */
259 	daddr_t	ci_lasta;	/* last allocation */
260 	int	ci_clen; 	/* length of current cluster */
261 	int	ci_ralen;	/* Read-ahead length */
262 	daddr_t	ci_maxra;	/* last readahead block */
263 };
264 
265 #ifdef _KERNEL
266 __BEGIN_DECLS
267 /* Kva slots (of size MAXPHYS) reserved for syncer and cleaner. */
268 #define RESERVE_SLOTS 4
269 /* Buffer cache pages reserved for syncer and cleaner. */
270 #define RESERVE_PAGES (RESERVE_SLOTS * MAXPHYS / PAGE_SIZE)
271 /* Minimum size of the buffer cache, in pages. */
272 #define BCACHE_MIN (RESERVE_PAGES * 2)
273 #define UNCLEAN_PAGES (bcstats.numbufpages - bcstats.numcleanpages)
274 
275 extern struct proc *cleanerproc;
276 extern long bufpages;		/* Max number of pages for buffers' data */
277 extern struct pool bufpool;
278 extern struct bufhead bufhead;
279 
280 void	bawrite(struct buf *);
281 void	bdwrite(struct buf *);
282 void	biodone(struct buf *);
283 int	biowait(struct buf *);
284 int bread(struct vnode *, daddr_t, int, struct buf **);
285 int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
286     struct buf **);
287 void	brelse(struct buf *);
288 #define bremfree bufcache_take
289 void	bufinit(void);
290 void	buf_dirty(struct buf *);
291 void    buf_undirty(struct buf *);
292 int	bwrite(struct buf *);
293 struct buf *getblk(struct vnode *, daddr_t, int, int, int);
294 struct buf *geteblk(int);
295 struct buf *incore(struct vnode *, daddr_t);
296 
297 /*
298  * bufcache functions
299  */
300 void bufcache_take(struct buf *);
301 void bufcache_release(struct buf *);
302 
303 struct buf *bufcache_getcleanbuf(void);
304 struct buf *bufcache_getdirtybuf(void);
305 
306 /*
307  * buf_kvm_init initializes the kvm handling for buffers.
308  * buf_acquire sets the B_BUSY flag and ensures that the buffer is
309  * mapped in the kvm.
310  * buf_release clears the B_BUSY flag and allows the buffer to become
311  * unmapped.
312  * buf_unmap is for internal use only. Unmaps the buffer from kvm.
313  */
314 void	buf_mem_init(vsize_t);
315 void	buf_acquire(struct buf *);
316 void	buf_acquire_unmapped(struct buf *);
317 void	buf_acquire_nomap(struct buf *);
318 void	buf_map(struct buf *);
319 void	buf_release(struct buf *);
320 int	buf_dealloc_mem(struct buf *);
321 void	buf_fix_mapping(struct buf *, vsize_t);
322 void	buf_alloc_pages(struct buf *, vsize_t);
323 void	buf_free_pages(struct buf *);
324 
325 
326 void	minphys(struct buf *bp);
327 int	physio(void (*strategy)(struct buf *), dev_t dev, int flags,
328 	    void (*minphys)(struct buf *), struct uio *uio);
329 void  brelvp(struct buf *);
330 void  reassignbuf(struct buf *);
331 void  bgetvp(struct vnode *, struct buf *);
332 
333 void  buf_replacevnode(struct buf *, struct vnode *);
334 void  buf_daemon(struct proc *);
335 void  buf_replacevnode(struct buf *, struct vnode *);
336 void  buf_daemon(struct proc *);
337 int bread_cluster(struct vnode *, daddr_t, int, struct buf **);
338 
339 #ifdef DEBUG
340 void buf_print(struct buf *);
341 #endif
342 
343 static __inline void
344 buf_start(struct buf *bp)
345 {
346 	if (bioops.io_start)
347 		(*bioops.io_start)(bp);
348 }
349 
350 static __inline void
351 buf_complete(struct buf *bp)
352 {
353 	if (bioops.io_complete)
354 		(*bioops.io_complete)(bp);
355 }
356 
357 static __inline void
358 buf_deallocate(struct buf *bp)
359 {
360 	if (bioops.io_deallocate)
361 		(*bioops.io_deallocate)(bp);
362 }
363 
364 static __inline void
365 buf_movedeps(struct buf *bp, struct buf *bp2)
366 {
367 	if (bioops.io_movedeps)
368 		(*bioops.io_movedeps)(bp, bp2);
369 }
370 
371 static __inline int
372 buf_countdeps(struct buf *bp, int i, int islocked)
373 {
374 	if (bioops.io_countdeps)
375 		return ((*bioops.io_countdeps)(bp, i, islocked));
376 	else
377 		return (0);
378 }
379 
380 void	cluster_write(struct buf *, struct cluster_info *, u_quad_t);
381 
382 __END_DECLS
383 #endif /* _KERNEL */
384 #endif /* !_SYS_BUF_H_ */
385