xref: /openbsd/sys/sys/buf.h (revision 3d8817e4)
1 /*	$OpenBSD: buf.h,v 1.76 2011/04/07 19:07:42 beck Exp $	*/
2 /*	$NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)buf.h	8.7 (Berkeley) 1/21/94
38  */
39 
40 #ifndef _SYS_BUF_H_
41 #define	_SYS_BUF_H_
42 #include <sys/queue.h>
43 #include <sys/tree.h>
44 #include <sys/mutex.h>
45 #include <sys/workq.h>
46 
47 #define NOLIST ((struct buf *)0x87654321)
48 
49 struct buf;
50 struct vnode;
51 
52 struct buf_rb_bufs;
53 RB_PROTOTYPE(buf_rb_bufs, buf, b_rbbufs, rb_buf_compare);
54 
55 LIST_HEAD(bufhead, buf);
56 
57 /*
58  * To avoid including <ufs/ffs/softdep.h>
59  */
60 
61 LIST_HEAD(workhead, worklist);
62 
63 /*
64  * Buffer queues
65  */
66 #define BUFQ_DISKSORT	0
67 #define	BUFQ_FIFO	1
68 #define BUFQ_DEFAULT	BUFQ_DISKSORT
69 #define BUFQ_HOWMANY	2
70 
71 struct bufq_impl;
72 
73 struct bufq {
74 	SLIST_ENTRY(bufq)	 bufq_entries;
75 	struct mutex	 	 bufq_mtx;
76 	void			*bufq_data;
77 	u_int			 bufq_outstanding;
78 	int			 bufq_stop;
79 	int			 bufq_type;
80 	const struct bufq_impl	*bufq_impl;
81 };
82 
83 int		 bufq_init(struct bufq *, int);
84 int		 bufq_switch(struct bufq *, int);
85 void		 bufq_destroy(struct bufq *);
86 
87 void		 bufq_queue(struct bufq *, struct buf *);
88 struct buf	*bufq_dequeue(struct bufq *);
89 void		 bufq_requeue(struct bufq *, struct buf *);
90 int		 bufq_peek(struct bufq *);
91 void		 bufq_drain(struct bufq *);
92 
93 void		 bufq_done(struct bufq *, struct buf *);
94 void		 bufq_quiesce(void);
95 void		 bufq_restart(void);
96 
97 /* disksort */
98 struct bufq_disksort {
99 	struct buf	 *bqd_actf;
100 	struct buf	**bqd_actb;
101 };
102 
103 /* fifo */
104 SIMPLEQ_HEAD(bufq_fifo_head, buf);
105 struct bufq_fifo {
106 	SIMPLEQ_ENTRY(buf)	bqf_entries;
107 };
108 
109 /* Abuse bufq_fifo, for swapping to regular files. */
110 struct bufq_swapreg {
111 	SIMPLEQ_ENTRY(buf)	bqf_entries;
112 	struct workq_task	bqf_wqtask;
113 
114 };
115 
116 /* bufq link in struct buf */
117 union bufq_data {
118 	struct bufq_disksort	bufq_data_disksort;
119 	struct bufq_fifo	bufq_data_fifo;
120 	struct bufq_swapreg	bufq_swapreg;
121 };
122 
123 /*
124  * These are currently used only by the soft dependency code, hence
125  * are stored once in a global variable. If other subsystems wanted
126  * to use these hooks, a pointer to a set of bio_ops could be added
127  * to each buffer.
128  */
129 extern struct bio_ops {
130 	void	(*io_start)(struct buf *);
131 	void	(*io_complete)(struct buf *);
132 	void	(*io_deallocate)(struct buf *);
133 	void	(*io_movedeps)(struct buf *, struct buf *);
134 	int	(*io_countdeps)(struct buf *, int, int);
135 } bioops;
136 
137 /* XXX: disksort(); */
138 #define b_actf	b_bufq.bufq_data_disksort.bqd_actf
139 #define b_actb	b_bufq.bufq_data_disksort.bqd_actb
140 
141 /* The buffer header describes an I/O operation in the kernel. */
142 struct buf {
143 	RB_ENTRY(buf) b_rbbufs;		/* vnode "hash" tree */
144 	LIST_ENTRY(buf) b_list;		/* All allocated buffers. */
145 	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
146 	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
147 	time_t	b_synctime;		/* Time this buffer should be flushed */
148 	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
149 	volatile long	b_flags;	/* B_* flags. */
150 	int	b_error;		/* Errno value. */
151 	long	b_bufsize;		/* Allocated buffer size. */
152 	long	b_bcount;		/* Valid bytes in buffer. */
153 	size_t	b_resid;		/* Remaining I/O. */
154 	dev_t	b_dev;			/* Device associated with buffer. */
155 	caddr_t	b_data;			/* associated data */
156 	void	*b_saveaddr;		/* Original b_data for physio. */
157 
158 	TAILQ_ENTRY(buf) b_valist;	/* LRU of va to reuse. */
159 
160 	union	bufq_data b_bufq;
161 	struct	bufq	  *b_bq;	/* What bufq this buf is on */
162 
163 	struct uvm_object *b_pobj;	/* Object containing the pages */
164 	off_t	b_poffs;		/* Offset within object */
165 
166 	daddr64_t	b_lblkno;	/* Logical block number. */
167 	daddr64_t	b_blkno;	/* Underlying physical block number. */
168 					/* Function to call upon completion.
169 					 * Will be called at splbio(). */
170 	void	(*b_iodone)(struct buf *);
171 	struct	vnode *b_vp;		/* Device vnode. */
172 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
173 	int	b_dirtyend;		/* Offset of end of dirty region. */
174 	int	b_validoff;		/* Offset in buffer of valid region. */
175 	int	b_validend;		/* Offset of end of valid region. */
176  	struct	workhead b_dep;		/* List of filesystem dependencies. */
177 };
178 
179 /*
180  * For portability with historic industry practice, the cylinder number has
181  * to be maintained in the `b_resid' field.
182  */
183 #define	b_cylinder b_resid		/* Cylinder number for disksort(). */
184 
185 /* Device driver compatibility definitions. */
186 #define	b_active b_bcount		/* Driver queue head: drive active. */
187 #define	b_errcnt b_resid		/* Retry count while I/O in progress. */
188 
189 /*
190  * These flags are kept in b_flags.
191  */
192 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
193 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
194 #define	B_NEEDCOMMIT	0x00000002	/* Needs committing to stable storage */
195 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
196 #define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
197 #define	B_BUSY		0x00000010	/* I/O in progress. */
198 #define	B_CACHE		0x00000020	/* Bread found us in the cache. */
199 #define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
200 #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
201 #define	B_DONE		0x00000100	/* I/O completed. */
202 #define	B_EINTR		0x00000200	/* I/O was interrupted */
203 #define	B_ERROR		0x00000400	/* I/O error occurred. */
204 #define	B_INVAL		0x00000800	/* Does not contain valid info. */
205 #define	B_NOCACHE	0x00001000	/* Do not cache block after use. */
206 #define	B_PHYS		0x00002000	/* I/O to user memory. */
207 #define	B_RAW		0x00004000	/* Set by physio for raw transfers. */
208 #define	B_READ		0x00008000	/* Read buffer. */
209 #define	B_WANTED	0x00010000	/* Process wants this buffer. */
210 #define	B_WRITEINPROG	0x00020000	/* Write in progress. */
211 #define	B_XXX		0x00040000	/* Debugging flag. */
212 #define	B_DEFERRED	0x00080000	/* Skipped over for cleaning */
213 #define	B_SCANNED	0x00100000	/* Block already pushed during sync */
214 #define	B_PDAEMON	0x00200000	/* I/O started by pagedaemon */
215 #define	B_RELEASED	0x00400000	/* free this buffer after its kvm */
216 #define	B_NOTMAPPED	0x00800000	/* BUSY, but not necessarily mapped */
217 
218 #define	B_BITS	"\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
219     "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
220     "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \
221     "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \
222     "\025SCANNED\026DAEMON\027RELEASED\030NOTMAPPED"
223 
224 /*
225  * This structure describes a clustered I/O.  It is stored in the b_saveaddr
226  * field of the buffer on which I/O is done.  At I/O completion, cluster
227  * callback uses the structure to parcel I/O's to individual buffers, and
228  * then free's this structure.
229  */
230 struct cluster_save {
231 	long	bs_bcount;		/* Saved b_bcount. */
232 	long	bs_bufsize;		/* Saved b_bufsize. */
233 	void	*bs_saveaddr;		/* Saved b_addr. */
234 	int	bs_nchildren;		/* Number of associated buffers. */
235 	struct buf **bs_children;	/* List of associated buffers. */
236 };
237 
238 /*
239  * Zero out the buffer's data area.
240  */
241 #define	clrbuf(bp) {							\
242 	bzero((bp)->b_data, (u_int)(bp)->b_bcount);			\
243 	(bp)->b_resid = 0;						\
244 }
245 
246 
247 /* Flags to low-level allocation routines. */
248 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
249 #define B_SYNC		0x02	/* Do all allocations synchronously. */
250 
251 struct cluster_info {
252 	daddr64_t	ci_lastr;	/* last read (read-ahead) */
253 	daddr64_t	ci_lastw;	/* last write (write cluster) */
254 	daddr64_t	ci_cstart;	/* start block of cluster */
255 	daddr64_t	ci_lasta;	/* last allocation */
256 	int		ci_clen; 	/* length of current cluster */
257 	int		ci_ralen;	/* Read-ahead length */
258 	daddr64_t	ci_maxra;	/* last readahead block */
259 };
260 
261 #ifdef _KERNEL
262 __BEGIN_DECLS
263 extern int bufpages;		/* Max number of pages for buffers' data */
264 extern struct pool bufpool;
265 extern struct bufhead bufhead;
266 
267 void	bawrite(struct buf *);
268 void	bdwrite(struct buf *);
269 void	biodone(struct buf *);
270 int	biowait(struct buf *);
271 int bread(struct vnode *, daddr64_t, int, struct ucred *, struct buf **);
272 int breadn(struct vnode *, daddr64_t, int, daddr64_t *, int *, int,
273     struct ucred *, struct buf **);
274 void	brelse(struct buf *);
275 void	bremfree(struct buf *);
276 void	bufinit(void);
277 void	buf_dirty(struct buf *);
278 void    buf_undirty(struct buf *);
279 int	bwrite(struct buf *);
280 struct buf *getblk(struct vnode *, daddr64_t, int, int, int);
281 struct buf *geteblk(int);
282 struct buf *incore(struct vnode *, daddr64_t);
283 
284 /*
285  * buf_kvm_init initializes the kvm handling for buffers.
286  * buf_acquire sets the B_BUSY flag and ensures that the buffer is
287  * mapped in the kvm.
288  * buf_release clears the B_BUSY flag and allows the buffer to become
289  * unmapped.
290  * buf_unmap is for internal use only. Unmaps the buffer from kvm.
291  */
292 void	buf_mem_init(vsize_t);
293 void	buf_acquire(struct buf *);
294 void	buf_acquire_unmapped(struct buf *);
295 void	buf_map(struct buf *);
296 void	buf_release(struct buf *);
297 int	buf_dealloc_mem(struct buf *);
298 void	buf_fix_mapping(struct buf *, vsize_t);
299 void	buf_alloc_pages(struct buf *, vsize_t);
300 void	buf_free_pages(struct buf *);
301 
302 
303 void	minphys(struct buf *bp);
304 int	physio(void (*strategy)(struct buf *), dev_t dev, int flags,
305 	    void (*minphys)(struct buf *), struct uio *uio);
306 void  brelvp(struct buf *);
307 void  reassignbuf(struct buf *);
308 void  bgetvp(struct vnode *, struct buf *);
309 
310 void  buf_replacevnode(struct buf *, struct vnode *);
311 void  buf_daemon(struct proc *);
312 void  buf_replacevnode(struct buf *, struct vnode *);
313 void  buf_daemon(struct proc *);
314 int bread_cluster(struct vnode *, daddr64_t, int, struct buf **);
315 
316 #ifdef DEBUG
317 void buf_print(struct buf *);
318 #endif
319 
320 static __inline void
321 buf_start(struct buf *bp)
322 {
323 	if (bioops.io_start)
324 		(*bioops.io_start)(bp);
325 }
326 
327 static __inline void
328 buf_complete(struct buf *bp)
329 {
330 	if (bioops.io_complete)
331 		(*bioops.io_complete)(bp);
332 }
333 
334 static __inline void
335 buf_deallocate(struct buf *bp)
336 {
337 	if (bioops.io_deallocate)
338 		(*bioops.io_deallocate)(bp);
339 }
340 
341 static __inline void
342 buf_movedeps(struct buf *bp, struct buf *bp2)
343 {
344 	if (bioops.io_movedeps)
345 		(*bioops.io_movedeps)(bp, bp2);
346 }
347 
348 static __inline int
349 buf_countdeps(struct buf *bp, int i, int islocked)
350 {
351 	if (bioops.io_countdeps)
352 		return ((*bioops.io_countdeps)(bp, i, islocked));
353 	else
354 		return (0);
355 }
356 
357 void	cluster_write(struct buf *, struct cluster_info *, u_quad_t);
358 
359 __END_DECLS
360 #endif
361 #endif /* !_SYS_BUF_H_ */
362