xref: /openbsd/sys/sys/buf.h (revision 84142c07)
1 /*	$OpenBSD: buf.h,v 1.93 2013/11/21 01:16:52 dlg Exp $	*/
2 /*	$NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)buf.h	8.7 (Berkeley) 1/21/94
38  */
39 
40 #ifndef _SYS_BUF_H_
41 #define	_SYS_BUF_H_
42 #include <sys/queue.h>
43 #include <sys/tree.h>
44 #include <sys/mutex.h>
45 
46 #define NOLIST ((struct buf *)0x87654321)
47 
48 struct buf;
49 struct vnode;
50 
51 struct buf_rb_bufs;
52 RB_PROTOTYPE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare);
53 
54 LIST_HEAD(bufhead, buf);
55 
56 /*
57  * To avoid including <ufs/ffs/softdep.h>
58  */
59 
60 LIST_HEAD(workhead, worklist);
61 
62 /*
63  * Buffer queues
64  */
65 #define BUFQ_NSCAN_N	128
66 #define BUFQ_FIFO	0
67 #define BUFQ_NSCAN	1
68 #define BUFQ_DEFAULT	BUFQ_NSCAN
69 #define BUFQ_HOWMANY	2
70 
71 /*
72  * Write limits for bufq - defines high and low water marks for how
73  * many kva slots are allowed to be consumed to parallelize writes from
74  * the buffer cache from any individual bufq.
75  */
76 #define BUFQ_HI		128
77 #define BUFQ_LOW	64
78 
79 struct bufq_impl;
80 
81 struct bufq {
82 	SLIST_ENTRY(bufq)	 bufq_entries;
83 	struct mutex	 	 bufq_mtx;
84 	void			*bufq_data;
85 	u_int			 bufq_outstanding;
86 	u_int			 bufq_hi;
87 	u_int			 bufq_low;
88 	int			 bufq_waiting;
89 	int			 bufq_stop;
90 	int			 bufq_type;
91 	const struct bufq_impl	*bufq_impl;
92 };
93 
94 int		 bufq_init(struct bufq *, int);
95 int		 bufq_switch(struct bufq *, int);
96 void		 bufq_destroy(struct bufq *);
97 
98 void		 bufq_queue(struct bufq *, struct buf *);
99 struct buf	*bufq_dequeue(struct bufq *);
100 void		 bufq_requeue(struct bufq *, struct buf *);
101 int		 bufq_peek(struct bufq *);
102 void		 bufq_drain(struct bufq *);
103 
104 void		 bufq_wait(struct bufq *, struct buf *);
105 void		 bufq_done(struct bufq *, struct buf *);
106 void		 bufq_quiesce(void);
107 void		 bufq_restart(void);
108 
109 /* disksort */
110 struct bufq_disksort {
111 	struct buf	 *bqd_actf;
112 	struct buf	**bqd_actb;
113 };
114 
115 /* fifo */
116 SIMPLEQ_HEAD(bufq_fifo_head, buf);
117 struct bufq_fifo {
118 	SIMPLEQ_ENTRY(buf)	bqf_entries;
119 };
120 
121 /* nscan */
122 SIMPLEQ_HEAD(bufq_nscan_head, buf);
123 struct bufq_nscan {
124 	SIMPLEQ_ENTRY(buf)	bqf_entries;
125 };
126 
127 /* bufq link in struct buf */
128 union bufq_data {
129 	struct bufq_disksort	bufq_data_disksort;
130 	struct bufq_fifo	bufq_data_fifo;
131 	struct bufq_nscan	bufq_data_nscan;
132 };
133 
134 /*
135  * These are currently used only by the soft dependency code, hence
136  * are stored once in a global variable. If other subsystems wanted
137  * to use these hooks, a pointer to a set of bio_ops could be added
138  * to each buffer.
139  */
140 extern struct bio_ops {
141 	void	(*io_start)(struct buf *);
142 	void	(*io_complete)(struct buf *);
143 	void	(*io_deallocate)(struct buf *);
144 	void	(*io_movedeps)(struct buf *, struct buf *);
145 	int	(*io_countdeps)(struct buf *, int, int);
146 } bioops;
147 
148 /* XXX: disksort(); */
149 #define b_actf	b_bufq.bufq_data_disksort.bqd_actf
150 #define b_actb	b_bufq.bufq_data_disksort.bqd_actb
151 
152 /* The buffer header describes an I/O operation in the kernel. */
153 struct buf {
154 	RB_ENTRY(buf) b_rbbufs;		/* vnode "hash" tree */
155 	LIST_ENTRY(buf) b_list;		/* All allocated buffers. */
156 	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
157 	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
158 	time_t	b_synctime;		/* Time this buffer should be flushed */
159 	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
160 	volatile long	b_flags;	/* B_* flags. */
161 	int	b_error;		/* Errno value. */
162 	long	b_bufsize;		/* Allocated buffer size. */
163 	long	b_bcount;		/* Valid bytes in buffer. */
164 	size_t	b_resid;		/* Remaining I/O. */
165 	dev_t	b_dev;			/* Device associated with buffer. */
166 	caddr_t	b_data;			/* associated data */
167 	void	*b_saveaddr;		/* Original b_data for physio. */
168 
169 	TAILQ_ENTRY(buf) b_valist;	/* LRU of va to reuse. */
170 
171 	union	bufq_data b_bufq;
172 	struct	bufq	  *b_bq;	/* What bufq this buf is on */
173 
174 	struct uvm_object *b_pobj;	/* Object containing the pages */
175 	off_t	b_poffs;		/* Offset within object */
176 
177 	daddr_t	b_lblkno;		/* Logical block number. */
178 	daddr_t	b_blkno;		/* Underlying physical block number. */
179 					/* Function to call upon completion.
180 					 * Will be called at splbio(). */
181 	void	(*b_iodone)(struct buf *);
182 	struct	vnode *b_vp;		/* Device vnode. */
183 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
184 	int	b_dirtyend;		/* Offset of end of dirty region. */
185 	int	b_validoff;		/* Offset in buffer of valid region. */
186 	int	b_validend;		/* Offset of end of valid region. */
187  	struct	workhead b_dep;		/* List of filesystem dependencies. */
188 };
189 
190 /* Device driver compatibility definitions. */
191 #define	b_active b_bcount		/* Driver queue head: drive active. */
192 
193 /*
194  * These flags are kept in b_flags.
195  */
196 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
197 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
198 #define	B_NEEDCOMMIT	0x00000002	/* Needs committing to stable storage */
199 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
200 #define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
201 #define	B_BUSY		0x00000010	/* I/O in progress. */
202 #define	B_CACHE		0x00000020	/* Bread found us in the cache. */
203 #define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
204 #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
205 #define	B_DONE		0x00000100	/* I/O completed. */
206 #define	B_EINTR		0x00000200	/* I/O was interrupted */
207 #define	B_ERROR		0x00000400	/* I/O error occurred. */
208 #define	B_INVAL		0x00000800	/* Does not contain valid info. */
209 #define	B_NOCACHE	0x00001000	/* Do not cache block after use. */
210 #define	B_PHYS		0x00002000	/* I/O to user memory. */
211 #define	B_RAW		0x00004000	/* Set by physio for raw transfers. */
212 #define	B_READ		0x00008000	/* Read buffer. */
213 #define	B_WANTED	0x00010000	/* Process wants this buffer. */
214 #define	B_WRITEINPROG	0x00020000	/* Write in progress. */
215 #define	B_XXX		0x00040000	/* Debugging flag. */
216 #define	B_DEFERRED	0x00080000	/* Skipped over for cleaning */
217 #define	B_SCANNED	0x00100000	/* Block already pushed during sync */
218 #define	B_PDAEMON	0x00200000	/* I/O started by pagedaemon */
219 #define	B_RELEASED	0x00400000	/* free this buffer after its kvm */
220 
221 #define	B_BITS	"\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
222     "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
223     "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \
224     "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \
225     "\025SCANNED\026DAEMON\027RELEASED"
226 
227 /*
228  * This structure describes a clustered I/O.  It is stored in the b_saveaddr
229  * field of the buffer on which I/O is done.  At I/O completion, cluster
230  * callback uses the structure to parcel I/O's to individual buffers, and
231  * then free's this structure.
232  */
233 struct cluster_save {
234 	long	bs_bcount;		/* Saved b_bcount. */
235 	long	bs_bufsize;		/* Saved b_bufsize. */
236 	void	*bs_saveaddr;		/* Saved b_addr. */
237 	int	bs_nchildren;		/* Number of associated buffers. */
238 	struct buf **bs_children;	/* List of associated buffers. */
239 };
240 
241 /*
242  * Zero out the buffer's data area.
243  */
244 #define	clrbuf(bp) {							\
245 	bzero((bp)->b_data, (u_int)(bp)->b_bcount);			\
246 	(bp)->b_resid = 0;						\
247 }
248 
249 
250 /* Flags to low-level allocation routines. */
251 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
252 #define B_SYNC		0x02	/* Do all allocations synchronously. */
253 
254 struct cluster_info {
255 	daddr_t	ci_lastr;	/* last read (read-ahead) */
256 	daddr_t	ci_lastw;	/* last write (write cluster) */
257 	daddr_t	ci_cstart;	/* start block of cluster */
258 	daddr_t	ci_lasta;	/* last allocation */
259 	int	ci_clen; 	/* length of current cluster */
260 	int	ci_ralen;	/* Read-ahead length */
261 	daddr_t	ci_maxra;	/* last readahead block */
262 };
263 
264 #ifdef _KERNEL
265 __BEGIN_DECLS
266 /* Kva slots (of size MAXPHYS) reserved for syncer and cleaner. */
267 #define RESERVE_SLOTS 4
268 /* Buffer cache pages reserved for syncer and cleaner. */
269 #define RESERVE_PAGES (RESERVE_SLOTS * MAXPHYS / PAGE_SIZE)
270 /* Minimum size of the buffer cache, in pages. */
271 #define BCACHE_MIN (RESERVE_PAGES * 2)
272 #define UNCLEAN_PAGES (bcstats.numbufpages - bcstats.numcleanpages)
273 
274 extern struct proc *cleanerproc;
275 extern long bufpages;		/* Max number of pages for buffers' data */
276 extern struct pool bufpool;
277 extern struct bufhead bufhead;
278 
279 void	bawrite(struct buf *);
280 void	bdwrite(struct buf *);
281 void	biodone(struct buf *);
282 int	biowait(struct buf *);
283 int bread(struct vnode *, daddr_t, int, struct buf **);
284 int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
285     struct buf **);
286 void	brelse(struct buf *);
287 void	bremfree(struct buf *);
288 void	bufinit(void);
289 void	buf_dirty(struct buf *);
290 void    buf_undirty(struct buf *);
291 int	bwrite(struct buf *);
292 struct buf *getblk(struct vnode *, daddr_t, int, int, int);
293 struct buf *geteblk(int);
294 struct buf *incore(struct vnode *, daddr_t);
295 
296 /*
297  * buf_kvm_init initializes the kvm handling for buffers.
298  * buf_acquire sets the B_BUSY flag and ensures that the buffer is
299  * mapped in the kvm.
300  * buf_release clears the B_BUSY flag and allows the buffer to become
301  * unmapped.
302  * buf_unmap is for internal use only. Unmaps the buffer from kvm.
303  */
304 void	buf_mem_init(vsize_t);
305 void	buf_acquire(struct buf *);
306 void	buf_acquire_unmapped(struct buf *);
307 void	buf_acquire_nomap(struct buf *);
308 void	buf_map(struct buf *);
309 void	buf_release(struct buf *);
310 int	buf_dealloc_mem(struct buf *);
311 void	buf_fix_mapping(struct buf *, vsize_t);
312 void	buf_alloc_pages(struct buf *, vsize_t);
313 void	buf_free_pages(struct buf *);
314 
315 
316 void	minphys(struct buf *bp);
317 int	physio(void (*strategy)(struct buf *), dev_t dev, int flags,
318 	    void (*minphys)(struct buf *), struct uio *uio);
319 void  brelvp(struct buf *);
320 void  reassignbuf(struct buf *);
321 void  bgetvp(struct vnode *, struct buf *);
322 
323 void  buf_replacevnode(struct buf *, struct vnode *);
324 void  buf_daemon(struct proc *);
325 void  buf_replacevnode(struct buf *, struct vnode *);
326 void  buf_daemon(struct proc *);
327 int bread_cluster(struct vnode *, daddr_t, int, struct buf **);
328 
329 #ifdef DEBUG
330 void buf_print(struct buf *);
331 #endif
332 
333 static __inline void
334 buf_start(struct buf *bp)
335 {
336 	if (bioops.io_start)
337 		(*bioops.io_start)(bp);
338 }
339 
340 static __inline void
341 buf_complete(struct buf *bp)
342 {
343 	if (bioops.io_complete)
344 		(*bioops.io_complete)(bp);
345 }
346 
347 static __inline void
348 buf_deallocate(struct buf *bp)
349 {
350 	if (bioops.io_deallocate)
351 		(*bioops.io_deallocate)(bp);
352 }
353 
354 static __inline void
355 buf_movedeps(struct buf *bp, struct buf *bp2)
356 {
357 	if (bioops.io_movedeps)
358 		(*bioops.io_movedeps)(bp, bp2);
359 }
360 
361 static __inline int
362 buf_countdeps(struct buf *bp, int i, int islocked)
363 {
364 	if (bioops.io_countdeps)
365 		return ((*bioops.io_countdeps)(bp, i, islocked));
366 	else
367 		return (0);
368 }
369 
370 void	cluster_write(struct buf *, struct cluster_info *, u_quad_t);
371 
372 __END_DECLS
373 #endif /* _KERNEL */
374 #endif /* !_SYS_BUF_H_ */
375