xref: /openbsd/sys/sys/buf.h (revision 81fb472f)
1 /*	$OpenBSD: buf.h,v 1.114 2024/02/03 18:51:58 beck Exp $	*/
2 /*	$NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)buf.h	8.7 (Berkeley) 1/21/94
38  */
39 
40 #ifndef _SYS_BUF_H_
41 #define	_SYS_BUF_H_
42 #include <sys/queue.h>
43 #include <sys/tree.h>
44 #include <sys/mutex.h>
45 #include <uvm/uvm_extern.h>
46 
47 #define NOLIST ((struct buf *)0x87654321)
48 
49 struct buf;
50 struct vnode;
51 
52 LIST_HEAD(bufhead, buf);
53 
54 /*
55  * Buffer queues
56  */
57 #define BUFQ_NSCAN_N	128
58 #define BUFQ_FIFO	0
59 #define BUFQ_NSCAN	1
60 #define BUFQ_DEFAULT	BUFQ_NSCAN
61 #define BUFQ_HOWMANY	2
62 
63 /*
64  * Write limits for bufq - defines high and low water marks for how
65  * many kva slots are allowed to be consumed to parallelize writes from
66  * the buffer cache from any individual bufq.
67  */
68 #define BUFQ_HI		128
69 #define BUFQ_LOW	64
70 
71 struct bufq_impl;
72 
73 struct bufq {
74 	SLIST_ENTRY(bufq)	 bufq_entries;
75 	struct mutex	 	 bufq_mtx;
76 	void			*bufq_data;
77 	u_int			 bufq_outstanding;
78 	u_int			 bufq_hi;
79 	u_int			 bufq_low;
80 	int			 bufq_waiting;
81 	int			 bufq_stop;
82 	int			 bufq_type;
83 	const struct bufq_impl	*bufq_impl;
84 };
85 
86 int		 bufq_init(struct bufq *, int);
87 int		 bufq_switch(struct bufq *, int);
88 void		 bufq_destroy(struct bufq *);
89 
90 void		 bufq_queue(struct bufq *, struct buf *);
91 struct buf	*bufq_dequeue(struct bufq *);
92 void		 bufq_requeue(struct bufq *, struct buf *);
93 int		 bufq_peek(struct bufq *);
94 void		 bufq_drain(struct bufq *);
95 
96 void		 bufq_wait(struct bufq *);
97 void		 bufq_done(struct bufq *, struct buf *);
98 void		 bufq_quiesce(void);
99 void		 bufq_restart(void);
100 
101 /* fifo */
102 SIMPLEQ_HEAD(bufq_fifo_head, buf);
103 struct bufq_fifo {
104 	SIMPLEQ_ENTRY(buf)	bqf_entries;
105 };
106 
107 /* nscan */
108 SIMPLEQ_HEAD(bufq_nscan_head, buf);
109 struct bufq_nscan {
110 	SIMPLEQ_ENTRY(buf)	bqf_entries;
111 };
112 
113 /* bufq link in struct buf */
114 union bufq_data {
115 	struct bufq_fifo	bufq_data_fifo;
116 	struct bufq_nscan	bufq_data_nscan;
117 };
118 
119 /* The buffer header describes an I/O operation in the kernel. */
120 struct buf {
121 	RBT_ENTRY(buf) b_rbbufs;	/* vnode "hash" tree */
122 	LIST_ENTRY(buf) b_list;		/* All allocated buffers. */
123 	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
124 	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
125 	int cache;			/* which cache are we in */
126 	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
127 	volatile long	b_flags;	/* B_* flags. */
128 	long	b_bufsize;		/* Allocated buffer size. */
129 	long	b_bcount;		/* Valid bytes in buffer. */
130 	size_t	b_resid;		/* Remaining I/O. */
131 	int	b_error;		/* Errno value. */
132 	dev_t	b_dev;			/* Device associated with buffer. */
133 	caddr_t	b_data;			/* associated data */
134 	void	*b_saveaddr;		/* Original b_data for physio. */
135 
136 	TAILQ_ENTRY(buf) b_valist;	/* LRU of va to reuse. */
137 
138 	union	bufq_data b_bufq;
139 	struct	bufq	  *b_bq;	/* What bufq this buf is on */
140 
141 	struct uvm_object *b_pobj;
142 	struct uvm_object b_uobj;	/* Object containing the pages */
143 	off_t	b_poffs;		/* Offset within object */
144 
145 	daddr_t	b_lblkno;		/* Logical block number. */
146 	daddr_t	b_blkno;		/* Underlying physical block number. */
147 					/* Function to call upon completion.
148 					 * Will be called at splbio(). */
149 	void	(*b_iodone)(struct buf *);
150 	struct	vnode *b_vp;		/* Device vnode. */
151 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
152 	int	b_dirtyend;		/* Offset of end of dirty region. */
153 	int	b_validoff;		/* Offset in buffer of valid region. */
154 	int	b_validend;		/* Offset of end of valid region. */
155 };
156 
157 TAILQ_HEAD(bufqueue, buf);
158 
159 struct bufcache {
160 	int64_t hotbufpages;
161 	int64_t warmbufpages;
162 	int64_t cachepages;
163 	struct bufqueue hotqueue;
164 	struct bufqueue coldqueue;
165 	struct bufqueue warmqueue;
166 };
167 
168 /* Device driver compatibility definitions. */
169 #define	b_active b_bcount		/* Driver queue head: drive active. */
170 
171 /*
172  * These flags are kept in b_flags.
173  */
174 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
175 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
176 #define	B_NEEDCOMMIT	0x00000002	/* Needs committing to stable storage */
177 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
178 #define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
179 #define	B_BUSY		0x00000010	/* I/O in progress. */
180 #define	B_CACHE		0x00000020	/* Bread found us in the cache. */
181 #define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
182 #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
183 #define	B_DONE		0x00000100	/* I/O completed. */
184 #define	B_EINTR		0x00000200	/* I/O was interrupted */
185 #define	B_ERROR		0x00000400	/* I/O error occurred. */
186 #define	B_INVAL		0x00000800	/* Does not contain valid info. */
187 #define	B_NOCACHE	0x00001000	/* Do not cache block after use. */
188 #define	B_PHYS		0x00002000	/* I/O to user memory. */
189 #define	B_RAW		0x00004000	/* Set by physio for raw transfers. */
190 #define	B_READ		0x00008000	/* Read buffer. */
191 #define	B_WANTED	0x00010000	/* Process wants this buffer. */
192 #define	B_WRITEINPROG	0x00020000	/* Write in progress. */
193 #define	B_XXX		0x00040000	/* Debugging flag. */
194 #define	B_DEFERRED	0x00080000	/* Skipped over for cleaning */
195 #define	B_SCANNED	0x00100000	/* Block already pushed during sync */
196 #define	B_PDAEMON	0x00200000	/* I/O started by pagedaemon */
197 #define	B_RELEASED	0x00400000	/* free this buffer after its kvm */
198 #define	B_WARM		0x00800000	/* buffer is or has been on the warm queue */
199 #define	B_COLD		0x01000000	/* buffer is on the cold queue */
200 #define	B_BC		0x02000000	/* buffer is managed by the cache */
201 #define	B_DMA		0x04000000	/* buffer is DMA reachable */
202 
203 #define	B_BITS	"\20\001AGE\002NEEDCOMMIT\003ASYNC\004BAD\005BUSY" \
204     "\006CACHE\007CALL\010DELWRI\011DONE\012EINTR\013ERROR" \
205     "\014INVAL\015NOCACHE\016PHYS\017RAW\020READ" \
206     "\021WANTED\022WRITEINPROG\023XXX(FORMAT)\024DEFERRED" \
207     "\025SCANNED\026DAEMON\027RELEASED\030WARM\031COLD\032BC\033DMA"
208 
209 /*
210  * Zero out the buffer's data area.
211  */
212 #define	clrbuf(bp) {							\
213 	bzero((bp)->b_data, (bp)->b_bcount);				\
214 	(bp)->b_resid = 0;						\
215 }
216 
217 
218 /* Flags to low-level allocation routines. */
219 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
220 #define B_SYNC		0x02	/* Do all allocations synchronously. */
221 
222 struct cluster_info {
223 	daddr_t	ci_lastr;	/* last read (read-ahead) */
224 	daddr_t	ci_lastw;	/* last write (write cluster) */
225 	daddr_t	ci_cstart;	/* start block of cluster */
226 	daddr_t	ci_lasta;	/* last allocation */
227 	int	ci_clen; 	/* length of current cluster */
228 	int	ci_ralen;	/* Read-ahead length */
229 	daddr_t	ci_maxra;	/* last readahead block */
230 };
231 
232 #ifdef _KERNEL
233 __BEGIN_DECLS
234 /* Kva slots (of size MAXPHYS) reserved for syncer and cleaner. */
235 #define RESERVE_SLOTS 4
236 /* Buffer cache pages reserved for syncer and cleaner. */
237 #define RESERVE_PAGES (RESERVE_SLOTS * MAXPHYS / PAGE_SIZE)
238 /* Minimum size of the buffer cache, in pages. */
239 #define BCACHE_MIN (RESERVE_PAGES * 2)
240 #define UNCLEAN_PAGES (bcstats.numbufpages - bcstats.numcleanpages)
241 
242 extern struct proc *cleanerproc;
243 extern long bufpages;		/* Max number of pages for buffers' data */
244 extern struct pool bufpool;
245 extern struct bufhead bufhead;
246 
247 void	bawrite(struct buf *);
248 void	bdwrite(struct buf *);
249 void	biodone(struct buf *);
250 int	biowait(struct buf *);
251 int bread(struct vnode *, daddr_t, int, struct buf **);
252 int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
253     struct buf **);
254 void	brelse(struct buf *);
255 #define bremfree bufcache_take
256 void	bufinit(void);
257 void	buf_dirty(struct buf *);
258 void    buf_undirty(struct buf *);
259 void	buf_adjcnt(struct buf *, long);
260 int	bwrite(struct buf *);
261 struct buf *getblk(struct vnode *, daddr_t, int, int, uint64_t);
262 struct buf *geteblk(size_t);
263 struct buf *incore(struct vnode *, daddr_t);
264 
265 /*
266  * bufcache functions
267  */
268 void bufcache_take(struct buf *);
269 void bufcache_release(struct buf *);
270 
271 int buf_flip_high(struct buf *);
272 void buf_flip_dma(struct buf *);
273 struct buf *bufcache_getcleanbuf(int, int);
274 struct buf *bufcache_getdirtybuf(void);
275 
276 /*
277  * buf_kvm_init initializes the kvm handling for buffers.
278  * buf_acquire sets the B_BUSY flag and ensures that the buffer is
279  * mapped in the kvm.
280  * buf_release clears the B_BUSY flag and allows the buffer to become
281  * unmapped.
282  * buf_unmap is for internal use only. Unmaps the buffer from kvm.
283  */
284 void	buf_mem_init(vsize_t);
285 void	buf_acquire(struct buf *);
286 void	buf_acquire_nomap(struct buf *);
287 void	buf_map(struct buf *);
288 void	buf_release(struct buf *);
289 int	buf_dealloc_mem(struct buf *);
290 void	buf_fix_mapping(struct buf *, vsize_t);
291 void	buf_alloc_pages(struct buf *, vsize_t);
292 void	buf_free_pages(struct buf *);
293 
294 void	minphys(struct buf *bp);
295 int	physio(void (*strategy)(struct buf *), dev_t dev, int flags,
296 	    void (*minphys)(struct buf *), struct uio *uio);
297 void  brelvp(struct buf *);
298 void  reassignbuf(struct buf *);
299 void  bgetvp(struct vnode *, struct buf *);
300 
301 void  buf_replacevnode(struct buf *, struct vnode *);
302 void  buf_daemon(void *);
303 void  buf_replacevnode(struct buf *, struct vnode *);
304 int bread_cluster(struct vnode *, daddr_t, int, struct buf **);
305 
306 __END_DECLS
307 #endif /* _KERNEL */
308 #endif /* !_SYS_BUF_H_ */
309