xref: /dragonfly/sys/dev/raid/vinum/vinumvar.h (revision dcd1a9c7)
1 /*-
2  * Copyright (c) 1997, 1998, 1999
3  *	Nan Yang Computer Services Limited.  All rights reserved.
4  *
5  *  Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
6  *
7  *  Written by Greg Lehey
8  *
9  *  This software is distributed under the so-called ``Berkeley
10  *  License'':
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by Nan Yang Computer
23  *	Services Limited.
24  * 4. Neither the name of the Company nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * This software is provided ``as is'', and any express or implied
29  * warranties, including, but not limited to, the implied warranties of
30  * merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall the company or contributors be liable for any
32  * direct, indirect, incidental, special, exemplary, or consequential
33  * damages (including, but not limited to, procurement of substitute
34  * goods or services; loss of use, data, or profits; or business
35  * interruption) however caused and on any theory of liability, whether
36  * in contract, strict liability, or tort (including negligence or
37  * otherwise) arising in any way out of the use of this software, even if
38  * advised of the possibility of such damage.
39  */
40 
41 #ifdef _KERNEL
42 #include "opt_vinum.h"
43 #endif
44 
45 #include <sys/time.h>
46 #include "vinumstate.h"
47 
48 /*
49  * A disk block number or offset
50  */
51 typedef int64_t         vinum_off_t;
52 
53 /*
54  * Some configuration maxima.  They're an enum because
55  * we can't define global constants.  Sorry about that.
56  *
57  * These aren't as bad as they look: most of them are soft limits.
58  */
59 
60 #define VINUMROOT
61 enum constants {
62 	VINUM_HEADER = 512,		/* size of header on disk */
63 	MAXCONFIGLINE = 1024,		/* maximum size of one config line */
64 	MINVINUMSLICE = 1048576,	/* minimum size of a slice */
65 
66 	ROUND_ROBIN_READPOL = -1,	/* round robin read policy */
67 
68 	/*
69 	 * type field in minor number
70 	 */
71 	VINUM_VOLUME_TYPE = 0,
72 	VINUM_PLEX_TYPE = 1,
73 	VINUM_SD_TYPE = 2,
74 	VINUM_DRIVE_TYPE = 3,
75 	VINUM_SUPERDEV_TYPE = 4,	/* super device. */
76 	VINUM_RAWPLEX_TYPE = 5,		/* anonymous plex */
77 	VINUM_RAWSD_TYPE = 6,		/* anonymous subdisk */
78 
79 	/*
80 	 * Shifts for the individual fields in the device
81 	 */
82 	VINUM_TYPE_SHIFT = 28,
83 	VINUM_VOL_SHIFT = 0,
84 	VINUM_PLEX_SHIFT = 16,
85 	VINUM_SD_SHIFT = 20,
86 	VINUM_VOL_WIDTH = 8,
87 	VINUM_PLEX_WIDTH = 3,
88 	VINUM_SD_WIDTH = 8,
89 
90 	/*
91 	 * Shifts for the second half of raw plex and
92 	 * subdisk numbers
93 	 */
94 	VINUM_RAWPLEX_SHIFT = 8,	/* shift the second half this much */
95 	VINUM_RAWPLEX_WIDTH = 12,	/* width of second half */
96 
97 	MAJORDEV_SHIFT = 8,
98 
99 	MAXPLEX = 8,			/* max number of plexes in a volume */
100 	MAXSD = 256,			/* max number of subdisks in a plex */
101 	MAXDRIVENAME = 32,		/* max length of a device name */
102 	MAXSDNAME = 64,			/* max length of a subdisk name */
103 	MAXPLEXNAME = 64,		/* max length of a plex name */
104 	MAXVOLNAME = 64,		/* max length of a volume name */
105 	MAXNAME = 64,			/* max length of any name */
106 
107 
108 	/*
109 	 * Define a minor device number.
110 	 * This is not used directly; instead, it's
111 	 * called by the other macros.
112 	 */
113 #define VINUMMINOR(v,p,s,t)  (  (v << VINUM_VOL_SHIFT)		\
114 			      | (p << VINUM_PLEX_SHIFT)		\
115 			      | (s << VINUM_SD_SHIFT)		\
116 			      | (t << VINUM_TYPE_SHIFT) )
117 
118 /* Create device minor numbers */
119 
120 #ifdef _KERNEL
121 
122 #define VINUMDEV(v,p,s,t)  	\
123 		VINUMMINOR (v, p, s, t)
124 
125 #define VINUM_PLEX(p)		\
126 		 ((VINUM_RAWPLEX_TYPE << VINUM_TYPE_SHIFT) 	\
127 		 | (p & 0xff)					\
128 		 | ((p & ~0xff) << 8))
129 
130 #define VINUM_SD(s)		\
131 		 ((VINUM_RAWSD_TYPE << VINUM_TYPE_SHIFT) 	\
132 		 | (s & 0xff)					\
133 		 | ((s & ~0xff) << 8))
134 
135 #endif
136 
137     /* Create a bit mask for x bits */
138 #define MASK(x)	 ((1 << (x)) - 1)
139 
140     /* Create a raw block device minor number */
141 #define VINUMRMINOR(d,t)	\
142 		( ((d & MASK(VINUM_VOL_WIDTH)) << VINUM_VOL_SHIFT)	\
143 		 | ((d & ~MASK(VINUM_VOL_WIDTH)) <<			\
144 			(VINUM_PLEX_SHIFT + VINUM_VOL_WIDTH))		\
145 		 | (t << VINUM_TYPE_SHIFT) )
146 
147     /* extract device type */
148 #define DEVTYPE(x) ((minor (x) >> VINUM_TYPE_SHIFT) & 7)
149 
150     /*
151      * This mess is used to catch people who compile
152      * a debug vinum(8) and non-debug kernel module,
153      * or the other way round.
154      */
155 
156 #ifdef VINUMDEBUG
157 
158 /* superdevice number */
159 #define	VINUM_SUPERDEV		VINUMMINOR(1, 0, 0, VINUM_SUPERDEV_TYPE)
160 
161 /* non-debug superdevice number */
162 #define	VINUM_WRONGSUPERDEV	VINUMMINOR(2, 0, 0, VINUM_SUPERDEV_TYPE)
163 
164 #else
165 
166 /* superdevice number */
167 #define	VINUM_SUPERDEV		VINUMMINOR(2, 0, 0, VINUM_SUPERDEV_TYPE)
168 
169 /* debug superdevice number */
170 #define	VINUM_WRONGSUPERDEV	VINUMMINOR(1, 0, 0, VINUM_SUPERDEV_TYPE)
171 
172 #endif
173 
174 /* daemon superdevice number */
175 #define	VINUM_DAEMON_DEV	VINUMMINOR(0, 0, 0, VINUM_SUPERDEV_TYPE)
176 
177 	/*
178 	 * the number of object entries to cater for initially, and also the
179 	 * value by which they are incremented.  It doesn't take long
180 	 * to extend them, so theoretically we could start with 1 of each, but
181 	 * it's untidy to allocate such small areas.  These values are
182 	 * probably too small.
183 	 */
184 
185 	INITIAL_DRIVES = 4,
186 	INITIAL_VOLUMES = 4,
187 	INITIAL_PLEXES = 8,
188 	INITIAL_SUBDISKS = 16,
189 	INITIAL_SUBDISKS_IN_PLEX = 4,	/* num subdisks to alloc to a plex */
190 	INITIAL_SUBDISKS_IN_DRIVE = 4,	/* num subdisks to alloc to a drive */
191 	INITIAL_DRIVE_FREELIST = 16,	/* num entries in drive freelist */
192 	PLEX_REGION_TABLE_SIZE = 8,	/* num entries in plex region tables */
193 	PLEX_LOCKS = 256,		/* num locks to alloc to a plex */
194 	MAX_REVIVE_BLOCKSIZE = MAXPHYS,	/* maximum revive block size */
195 	DEFAULT_REVIVE_BLOCKSIZE = 65536,/* default revive block size */
196 	VINUMHOSTNAMELEN = 32,		/* host name field in label */
197 };
198 
199 /* device numbers */
200 
201 /*
202  *  31 30    28 27                   20 19 18    16 15         8 7      0
203  * |---------------------------------------------------------------------|
204  * |X |  Type  |    Subdisk number     | X| Plex   |      Major |  volno |
205  * |---------------------------------------------------------------------|
206  *
207  *    0x2                 03                 1           19	    06
208  *
209  * The fields in the minor number are interpreted as follows:
210  *
211  * Volume:              Only type and volume number are relevant
212  * Plex in volume:      type, plex number in volume and volume number
213  *			are relevant
214  * raw plex:            type, plex number is made of bits 27-16 and 7-0
215  * raw subdisk:         type, subdisk number is made of bits 27-16 and 7-0
216  */
217 
218 #if 0
219 
220 /* This doesn't get used.  Consider removing it. */
221 struct devcode {
222 	/*
223 	 * CARE.  These fields assume a big-endian word.  On a
224 	 * little-endian system, they're the wrong way around
225 	 */
226 	unsigned volume:8;		/* up to 256 volumes */
227 	unsigned major:8;		/* major number fits */
228 	unsigned plex:3;		/* up to 8 plexes per volume */
229 	unsigned unused:1;		/* up for grabs */
230 	unsigned sd:8;			/* up to 256 subdisks per plex */
231 	unsigned type:3;		/* type of object */
232 	/*
233 	 * type field
234 	 VINUM_VOLUME = 0,
235 	 VINUM_PLEX = 1,
236 	 VINUM_SUBDISK = 2,
237 	 VINUM_DRIVE = 3,
238 	 VINUM_SUPERDEV = 4,
239 	 VINUM_RAWPLEX = 5,
240 	 VINUM_RAWSD = 6 */
241 	unsigned signbit:1;		/* to make 32 bits */
242 };
243 
244 #endif
245 
246 #define VINUM_BASE  "vinum/"
247 #define VINUM_DIR   "/dev/vinum"
248 
249 /*
250  * These definitions help catch
251  * userland/kernel mismatches.
252  */
253 #ifdef VINUMDEBUG
254 
255 /* normal super device */
256 #define VINUM_WRONGSUPERDEV_NAME	VINUM_DIR "/control"
257 #define VINUM_WRONGSUPERDEV_BASE	VINUM_BASE "control"
258 
259 /* debug super device */
260 #define VINUM_SUPERDEV_NAME		VINUM_DIR "/Control"
261 #define VINUM_SUPERDEV_BASE		VINUM_BASE "Control"
262 
263 #else
264 
265 /* debug super device */
266 #define VINUM_WRONGSUPERDEV_NAME	VINUM_DIR "/Control"
267 #define VINUM_WRONGSUPERDEV_BASE	VINUM_BASE "Control"
268 
269 /* normal super device */
270 #define VINUM_SUPERDEV_NAME		VINUM_DIR "/control"
271 #define VINUM_SUPERDEV_BASE		VINUM_BASE "control"
272 
273 #endif
274 
275 /* super device for daemon only */
276 #define VINUM_DAEMON_DEV_NAME		VINUM_DIR "/controld"
277 #define VINUM_DAEMON_DEV_BASE		VINUM_BASE "controld"
278 
279 /*
280  * Flags for all objects.  Most of them only apply to
281  * specific objects, but we have space for all in any
282  * 32 bit flags word.
283  */
284 enum objflags {
285 	VF_LOCKED = 1,			/* locked access to this object */
286 	VF_LOCKING = 2,			/* we want access to this object */
287 	VF_OPEN = 4,			/* object has openers */
288 	VF_WRITETHROUGH = 8,		/* volume: write through */
289 	VF_INITED = 0x10,		/* unit has been initialized */
290 
291 	/* 0x20 unused, was: VF_WLABEL: label area is writable */
292 	VF_LABELLING = 0x40,		/* unit is currently being labelled */
293 	VF_WANTED = 0x80,		/* waiting to obtain a lock */
294 	VF_RAW = 0x100,			/* raw volume (no file system) */
295 	VF_LOADED = 0x200,		/* module is loaded */
296 	VF_CONFIGURING = 0x400,		/* someone is changing the config */
297 	VF_WILL_CONFIGURE = 0x800,	/* someone wants to change the config */
298 	VF_CONFIG_INCOMPLETE = 0x1000,	/* not finished changing the config */
299 	VF_CONFIG_SETUPSTATE = 0x2000,	/* set a vol up if all plexes empty */
300 	VF_READING_CONFIG = 0x4000,	/* reading config database from disk */
301 	VF_FORCECONFIG = 0x8000,	/* config drives even with diff names */
302 	VF_NEWBORN = 0x10000,		/* for objects: we've just created it */
303 	VF_CONFIGURED = 0x20000,	/* for drives: we read the config */
304 	VF_STOPPING = 0x40000,		/* for vinum_conf: stop on last close */
305 
306 	VF_DAEMONOPEN = 0x80000,	/* the daemon has us open (only
307 					 * superdev) */
308 
309 	VF_CREATED = 0x100000,		/* for vols: freshly created,
310 					 * more then new */
311 	VF_HOTSPARE = 0x200000,		/* for drives: use as hot spare */
312 	VF_RETRYERRORS = 0x400000,	/* don't down subdisks on I/O errors */
313 };
314 
315 /* Global configuration information for the vinum subsystem */
316 struct _vinum_conf {
317 	/* Pointers to vinum structures */
318 	struct drive *drive;
319 	struct sd *sd;
320 	struct plex *plex;
321 	struct volume *volume;
322 
323 	/* the number allocated */
324 	int drives_allocated;
325 	int subdisks_allocated;
326 	int plexes_allocated;
327 	int volumes_allocated;
328 
329 	/* and the number currently in use */
330 	int drives_used;
331 	int subdisks_used;
332 	int plexes_used;
333 	int volumes_used;
334 
335 	int flags;
336 
337 #define VINUM_MAXACTIVE  30000	/* max number of active requests */
338 	int active;		/* current number of requests outstanding */
339 	int maxactive;		/* max number of requests ever outstanding */
340 #ifdef VINUMDEBUG
341 	struct request *lastrq;
342 	struct bio *lastbio;
343 #endif
344 	int physbufs;
345 };
346 
347 /* Use these defines to simplify code */
348 #define DRIVE vinum_conf.drive
349 #define SD vinum_conf.sd
350 #define PLEX vinum_conf.plex
351 #define VOL vinum_conf.volume
352 #define VFLAGS vinum_conf.flags
353 
354 /*
355  * Slice header
356  *
357  * Vinum drives start with this structure:
358  *
359  *\                                            Sector
360  * |--------------------------------------|
361  * |   PDP-11 memorial boot block         |      0
362  * |--------------------------------------|
363  * |   Disk label, maybe                  |      1
364  * |--------------------------------------|
365  * |   Slice definition  (vinum_hdr)      |      8
366  * |--------------------------------------|
367  * |                                      |
368  * |   Configuration info, first copy     |      9
369  * |                                      |
370  * |--------------------------------------|
371  * |                                      |
372  * |   Configuration info, second copy    |      9 + size of config
373  * |                                      |
374  * |--------------------------------------|
375  */
376 
377 /*
378  * Sizes and offsets of our information
379  */
380 enum {
381 	VINUM_LABEL_OFFSET = 4096,	/* offset of vinum label */
382 	VINUMHEADERLEN = 512,		/* size of vinum label */
383 	VINUM_CONFIG_OFFSET = 4608,	/* offset of first config copy */
384 	MAXCONFIG = 65536,		/* and size of config copy */
385 
386 	/* this is where the data starts */
387 	DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE
388 };
389 
390 /*
391  * hostname is 256 bytes long, but we don't need to shlep
392  * multiple copies in vinum.  We use the host name just
393  * to identify this system, and 32 bytes should be ample
394  * for that purpose
395  */
396 
397 struct vinum_label {
398 	char sysname[VINUMHOSTNAMELEN];	/* system name at time of creation */
399 	char name[MAXDRIVENAME];	/* our name of the drive */
400 	struct timeval date_of_birth;	/* the time it was created */
401 	struct timeval last_update;	/* and the time of last update */
402 	/*
403 	 * total size in bytes of the drive.  This value
404 	 * includes the headers.
405 	 */
406 	off_t drive_size;
407 };
408 
409 struct vinum_hdr {
410 	uint64_t magic;			/* we're long on magic numbers */
411 
412 	/*
413 	 * Size in bytes of each copy of the
414 	 * configuration info.  This must be a multiple
415 	 * of the sector size.
416 	 */
417 	int config_length;
418 	struct vinum_label label;	/* unique label */
419 };
420 
421 /* should be this */
422 #define VINUM_MAGIC    22322600044678729LL
423 
424 /* becomes this after obliteration */
425 #define VINUM_NOMAGIC  22322600044678990LL
426 
427 /* Information returned from read_drive_label */
428 enum drive_label_info {
429 	DL_CANT_OPEN,			/* invalid partition */
430 	DL_NOT_OURS,			/* valid part, but no vinum label */
431 	DL_DELETED_LABEL,		/* valid part, deleted label found */
432 	DL_WRONG_DRIVE,			/* drive name doesn't match */
433 	DL_OURS				/* valid partition and label found */
434 };
435 
436 /*** Drive definitions ***/
437 /*
438  * A drive corresponds to a disk slice.  We use a different term to show
439  * the difference in usage: it doesn't have to be a slice, and could
440  * theoretically be a complete, unpartitioned disk
441  */
442 
443 struct drive {
444 	char devicename[MAXDRIVENAME];	/* name of the slice it's on */
445 	enum drivestate state;		/* current state */
446 	int flags;			/* flags */
447 	int subdisks_allocated;		/* number of entries in sd */
448 	int subdisks_used;		/* and the number used */
449 	int blocksize;			/* size of fs blocks */
450 	int pid;			/* of locker */
451 	u_int64_t sectors_available;	/* number of sectors still available */
452 	int secsperblock;
453 	int lasterror;			/* last error on drive */
454 	int driveno;			/* index of drive in vinum_conf */
455 	int opencount;			/* number of up subdisks */
456 	u_int64_t reads;		/* number of reads on this drive */
457 	u_int64_t writes;		/* number of writes on this drive */
458 	u_int64_t bytes_read;		/* number of bytes read */
459 	u_int64_t bytes_written;	/* number of bytes written */
460 	struct vinum_label label;	/* and the label information */
461 #define DRIVE_MAXACTIVE  30000		/* maximum number of active requests */
462 	int active;			/* current number of reqs outstanding */
463 	int maxactive;			/* max num of reqs ever outstanding */
464 	int freelist_size;		/* entries alloced in free list */
465 	int freelist_entries;		/* entries used in free list */
466 	struct drive_freelist {		/* sorted list of free space on drive */
467 		u_int64_t offset;	/* offset of entry */
468 		u_int64_t sectors;	/* and length in sectors */
469 	} *freelist;
470 	struct partinfo partinfo;	/* partition information */
471 	/* XXX kludge until we get this struct cleaned up */
472 #ifdef _KERNEL
473 	struct vnode *vp;
474 	struct cdev *dev;
475 #else
476 	void	*vp_dummy;
477 	void	*dev_dummy;
478 #endif
479 #ifdef VINUMDEBUG
480 	char lockfilename[16];		/* locked with file */
481 	int lockline;			/* and the line number */
482 #endif
483 };
484 
485 /*** Subdisk definitions ***/
486 
487 struct sd {
488 	char name[MAXSDNAME];		/* name of subdisk */
489 	enum sdstate state;		/* state */
490 	int flags;
491 	int lasterror;			/* last error occurred */
492 	/* offsets in blocks */
493 	int64_t driveoffset;		/* offset on drive */
494 #ifdef _KERNEL
495 	cdev_t	sd_dev;
496 #else
497 	void	*sd_dev_dummy;
498 #endif
499 
500 	/*
501 	 * plexoffset is the offset from the beginning
502 	 * of the plex to the very first part of the
503 	 * subdisk, in sectors.  For striped, RAID-4 and
504 	 * RAID-5 plexes, only the first stripe is
505 	 * located at this offset
506 	 */
507 	int64_t plexoffset;		/* offset in plex */
508 	u_int64_t sectors;		/* and length in sectors */
509 	int plexno;			/* index of plex, if it belongs */
510 	int driveno;			/* index of the drive */
511 	int sdno;			/* our index in vinum_conf */
512 	int plexsdno;			/* and our number in our plex */
513 	/* (undefined if no plex) */
514 	u_int64_t reads;		/* number of reads on this subdisk */
515 	u_int64_t writes;		/* number of writes on this subdisk */
516 	u_int64_t bytes_read;		/* number of bytes read */
517 	u_int64_t bytes_written;	/* number of bytes written */
518 	/* revive parameters */
519 	u_int64_t revived;		/* blkno of current revive request */
520 	int revive_blocksize;		/* revive block size (bytes) */
521 	int revive_interval;		/* and time to wait between transfers */
522 	pid_t reviver;			/* PID of reviving process */
523 	/* init parameters */
524 	u_int64_t initialized;		/* blkno of current init request */
525 	int init_blocksize;		/* init block size (bytes) */
526 	int init_interval;		/* time to wait between transfers */
527 	struct request *waitlist;	/* list of reqs waiting on revive op */
528 };
529 
530 /*** Plex definitions ***/
531 
532 /* kinds of plex organization */
533 enum plexorg {
534 	plex_disorg,			/* disorganized */
535 	plex_concat,			/* concatenated plex */
536 	plex_striped,			/* striped plex */
537 	plex_raid4,			/* RAID4 plex */
538 	plex_raid5			/* RAID5 plex */
539 };
540 
541 /* Recognize plex organizations */
542 /* RAID 1, 4 or 5 */
543 #define isstriped(p)	(p->organization >= plex_striped)
544 
545 /* RAID 4 or 5 */
546 #define isparity(p)	(p->organization >= plex_raid4)
547 
548 struct plex {
549 	char name[MAXPLEXNAME];		/* name of plex */
550 	enum plexorg organization;	/* Plex organization */
551 	enum plexstate state;		/* and current state */
552 #ifdef _KERNEL
553 	cdev_t	plex_dev;
554 #else
555 	void	*plex_dev_dummy;
556 #endif
557 	u_int64_t length;		/* total length of plex (sectors) */
558 	int flags;
559 	int stripesize;			/* size of stripe or raid band,
560 					 * in sectors */
561 	int subdisks;			/* number of associated subdisks */
562 	int subdisks_allocated;		/* number of subdisks allocated
563 					 * space for */
564 	int *sdnos;			/* list of component subdisks */
565 	int plexno;			/* index of plex in vinum_conf */
566 	int volno;			/* index of volume */
567 	int volplexno;			/* number of plex in volume */
568 	/* Statistics */
569 	u_int64_t reads;		/* number of reads on this plex */
570 	u_int64_t writes;		/* number of writes on this plex */
571 	u_int64_t bytes_read;		/* number of bytes read */
572 	u_int64_t bytes_written;	/* number of bytes written */
573 	u_int64_t recovered_reads;	/* number of recovered read
574 					 * operations */
575 	u_int64_t degraded_writes;	/* number of degraded writes */
576 	u_int64_t parityless_writes;	/* number of parityless writes */
577 	u_int64_t multiblock;		/* requests that needed more than
578 					 * one block */
579 	u_int64_t multistripe;		/* requests that needed more than
580 					 * one stripe */
581 	int sddowncount;		/* number of subdisks down */
582 
583 	/* Lock information */
584 	int usedlocks;			/* number currently in use */
585 	int lockwaits;			/* and number of waits for locks */
586 	off_t checkblock;		/* block number for parity op */
587 	struct rangelock *lock;		/* ranges of locked addresses */
588 };
589 
590 /*** Volume definitions ***/
591 
592 /* Address range definitions, for locking volumes */
593 struct rangelock {
594 	vinum_off_t stripe;		/* address + 1 of the range being locked  */
595 	struct buf *bp;		/* user's buffer pointer */
596 };
597 
598 struct volume {
599 	char name[MAXVOLNAME];		/* name of volume */
600 	enum volumestate state;		/* current state */
601 	int plexes;			/* number of plexes */
602 	int preferred_plex;		/* plex to read from, -1 for
603 					 * round-robin */
604 #ifdef _KERNEL
605 	cdev_t	vol_dev;
606 #else
607 	void	*vol_dev_dummy;
608 #endif
609 
610 	/*
611 	 * index of plex used for last read, for
612 	 * round-robin.
613 	 */
614 	int last_plex_read;
615 	int volno;			/* volume number */
616 	int flags;			/* status and configuration flags */
617 	int openflags;			/* flags supplied to last open(2) */
618 	u_int64_t size;			/* size of volume */
619 	int blocksize;			/* logical block size */
620 	int active;			/* number of outstanding
621 					 * requests active */
622 	int subops;			/* and the number of suboperations */
623 	/* Statistics */
624 	u_int64_t bytes_read;		/* number of bytes read */
625 	u_int64_t bytes_written;	/* number of bytes written */
626 	u_int64_t reads;		/* number of reads on this volume */
627 	u_int64_t writes;		/* number of writes on this volume */
628 	u_int64_t recovered_reads;	/* reads recovered from another plex */
629 
630 	/*
631 	 * Unlike subdisks in the plex, space for the
632 	 * plex pointers is static.
633 	 */
634 	int plex[MAXPLEX];		/* index of plexes */
635 };
636 
637 /*
638  * Table expansion.  Expand table, which contains oldcount
639  * entries of type element, by increment entries, and change
640  * oldcount accordingly
641  */
642 #define EXPAND(table, element, oldcount, increment)         \
643 {							    \
644       expand_table((void **) &table,			    \
645 		   oldcount * sizeof (element),		    \
646 		   (oldcount + increment) * sizeof (element)); \
647       oldcount += increment;				    \
648 }
649 
650 /*
651  * Information on vinum's memory usage
652  */
653 struct meminfo {
654 	int mallocs;			/* number of malloced blocks */
655 	int total_malloced;		/* total amount malloced */
656 	int highwater;			/* maximum number of mallocs */
657 	struct mc *malloced;		/* pointer to kernel table */
658 };
659 
660 #define MCFILENAMELEN	16
661 struct mc {
662 	struct timeval time;
663 	int seq;
664 	int size;
665 	short line;
666 	caddr_t address;
667 	char file[MCFILENAMELEN];
668 };
669 
670 /*
671  * These enums are used by the state transition
672  * routines.  They're in bit map format:
673  *
674  * Bit 0: Other plexes in the volume are down
675  * Bit 1: Other plexes in the volume are up
676  * Bit 2: The current plex is up
677  * Maybe they should be local to
678  * state.c
679  */
680 enum volplexstate {
681 	volplex_onlyusdown = 0,		/* 0: we're the only plex,
682 					 * and we're down */
683 	volplex_alldown,		/* 1: another plex is down,
684 					 * and so are we */
685 	volplex_otherup,		/* 2: another plex is up */
686 	volplex_otherupdown,		/* 3: other plexes are up and down */
687 	volplex_onlyus,			/* 4: we're up and alone */
688 	volplex_onlyusup,		/* 5: only we are up, others are down */
689 	volplex_allup,			/* 6: all plexes are up */
690 	volplex_someup			/* 7: some plexes are up,
691 					 * including us */
692 };
693 
694 /* state map for plex */
695 enum sdstates {
696 	sd_emptystate = 1,
697 	sd_downstate = 2,		/* SD is down */
698 	sd_crashedstate = 4,		/* SD is crashed */
699 	sd_obsoletestate = 8,		/* SD is obsolete */
700 	sd_stalestate = 16,		/* SD is stale */
701 	sd_rebornstate = 32,		/* SD is reborn */
702 	sd_upstate = 64,		/* SD is up */
703 	sd_initstate = 128,		/* SD is initializing */
704 	sd_initializedstate = 256,	/* SD is initialized */
705 	sd_otherstate = 512,		/* SD is in some other state */
706 };
707 
708 /*
709  * This is really just a parameter to pass to
710  * set_<foo>_state, but since it needs to be known
711  * in the external definitions, we need to define
712  * it here
713  */
714 enum setstateflags {
715 	setstate_none = 0,		/* no flags */
716 	setstate_force = 1,		/* force the state change */
717 	setstate_configuring = 2,	/* we're currently configuring,
718 					   don't save */
719 };
720 
721 /* Operations for parityops to perform. */
722 enum parityop {
723 	checkparity,
724 	rebuildparity,
725 	rebuildandcheckparity,		/* rebuildparity with the -v option */
726 };
727 
728 #ifdef VINUMDEBUG
729 
730 /*
731  * Debugging stuff
732  */
733 enum debugflags {
734 	DEBUG_ADDRESSES = 1,		/* show buffer information during
735 					 * requests */
736 	DEBUG_NUMOUTPUT = 2,		/* show the value of vp->v_numoutput */
737 	DEBUG_RESID = 4,		/* go into debugger in complete_rqe */
738 	DEBUG_LASTREQS = 8,		/* keep a circular buffer of
739 					 * last requests */
740 	DEBUG_REVIVECONFLICT = 16,	/* print info about revive conflicts */
741 	DEBUG_EOFINFO = 32,		/* print info about EOF detection */
742 	DEBUG_MEMFREE = 64,		/* keep info about Frees */
743 	DEBUG_BIGDRIVE = 128,		/* pretend our drives are 100 times
744 					 * the size */
745 	DEBUG_REMOTEGDB = 256,		/* go into remote gdb */
746 	DEBUG_WARNINGS = 512,		/* log various relatively
747 					 * harmless warnings  */
748 };
749 
750 #endif
751