xref: /dragonfly/sys/dev/raid/vinum/vinumvar.h (revision dca3c15d)
1 /*-
2  * Copyright (c) 1997, 1998, 1999
3  *	Nan Yang Computer Services Limited.  All rights reserved.
4  *
5  *  Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
6  *
7  *  Written by Greg Lehey
8  *
9  *  This software is distributed under the so-called ``Berkeley
10  *  License'':
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by Nan Yang Computer
23  *	Services Limited.
24  * 4. Neither the name of the Company nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * This software is provided ``as is'', and any express or implied
29  * warranties, including, but not limited to, the implied warranties of
30  * merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall the company or contributors be liable for any
32  * direct, indirect, incidental, special, exemplary, or consequential
33  * damages (including, but not limited to, procurement of substitute
34  * goods or services; loss of use, data, or profits; or business
35  * interruption) however caused and on any theory of liability, whether
36  * in contract, strict liability, or tort (including negligence or
37  * otherwise) arising in any way out of the use of this software, even if
38  * advised of the possibility of such damage.
39  */
40 
41 #include <sys/time.h>
42 #include "vinumstate.h"
43 
44 /*
45  * A disk block number or offset
46  */
47 typedef int64_t         vinum_off_t;
48 
49 /*
50  * Some configuration maxima.  They're an enum because
51  * we can't define global constants.  Sorry about that.
52  *
53  * These aren't as bad as they look: most of them are soft limits.
54  */
55 
56 #define VINUMROOT
57 enum constants {
58 	VINUM_HEADER = 512,		/* size of header on disk */
59 	MAXCONFIGLINE = 1024,		/* maximum size of one config line */
60 	MINVINUMSLICE = 1048576,	/* minimum size of a slice */
61 
62 	VINUM_CDEV_MAJOR = 91,		/* major number for character device */
63 
64 	ROUND_ROBIN_READPOL = -1,	/* round robin read policy */
65 
66 	/*
67 	 * type field in minor number
68 	 */
69 	VINUM_VOLUME_TYPE = 0,
70 	VINUM_PLEX_TYPE = 1,
71 	VINUM_SD_TYPE = 2,
72 	VINUM_DRIVE_TYPE = 3,
73 	VINUM_SUPERDEV_TYPE = 4,	/* super device. */
74 	VINUM_RAWPLEX_TYPE = 5,		/* anonymous plex */
75 	VINUM_RAWSD_TYPE = 6,		/* anonymous subdisk */
76 
77 	/*
78 	 * Shifts for the individual fields in the device
79 	 */
80 	VINUM_TYPE_SHIFT = 28,
81 	VINUM_VOL_SHIFT = 0,
82 	VINUM_PLEX_SHIFT = 16,
83 	VINUM_SD_SHIFT = 20,
84 	VINUM_VOL_WIDTH = 8,
85 	VINUM_PLEX_WIDTH = 3,
86 	VINUM_SD_WIDTH = 8,
87 
88 	/*
89 	 * Shifts for the second half of raw plex and
90 	 * subdisk numbers
91 	 */
92 	VINUM_RAWPLEX_SHIFT = 8,	/* shift the second half this much */
93 	VINUM_RAWPLEX_WIDTH = 12,	/* width of second half */
94 
95 	MAJORDEV_SHIFT = 8,
96 
97 	MAXPLEX = 8,			/* max number of plexes in a volume */
98 	MAXSD = 256,			/* max number of subdisks in a plex */
99 	MAXDRIVENAME = 32,		/* max length of a device name */
100 	MAXSDNAME = 64,			/* max length of a subdisk name */
101 	MAXPLEXNAME = 64,		/* max length of a plex name */
102 	MAXVOLNAME = 64,		/* max length of a volume name */
103 	MAXNAME = 64,			/* max length of any name */
104 
105 
106 	/*
107 	 * Define a minor device number.
108 	 * This is not used directly; instead, it's
109 	 * called by the other macros.
110 	 */
111 #define VINUMMINOR(v,p,s,t)  (  (v << VINUM_VOL_SHIFT)		\
112 			      | (p << VINUM_PLEX_SHIFT)		\
113 			      | (s << VINUM_SD_SHIFT)		\
114 			      | (t << VINUM_TYPE_SHIFT) )
115 
116 /* Create device minor numbers */
117 
118 #ifdef _KERNEL
119 
120 #define VINUMDEV(v,p,s,t)  	\
121 		VINUMMINOR (v, p, s, t)
122 
123 #define VINUM_PLEX(p)		\
124 		 ((VINUM_RAWPLEX_TYPE << VINUM_TYPE_SHIFT) 	\
125 		 | (p & 0xff)					\
126 		 | ((p & ~0xff) << 8))
127 
128 #define VINUM_SD(s)		\
129 		 ((VINUM_RAWSD_TYPE << VINUM_TYPE_SHIFT) 	\
130 		 | (s & 0xff)					\
131 		 | ((s & ~0xff) << 8))
132 
133 #endif
134 
135     /* Create a bit mask for x bits */
136 #define MASK(x)	 ((1 << (x)) - 1)
137 
138     /* Create a raw block device minor number */
139 #define VINUMRMINOR(d,t)	\
140 		( ((d & MASK(VINUM_VOL_WIDTH)) << VINUM_VOL_SHIFT)	\
141 		 | ((d & ~MASK(VINUM_VOL_WIDTH)) <<			\
142 			(VINUM_PLEX_SHIFT + VINUM_VOL_WIDTH))		\
143 		 | (t << VINUM_TYPE_SHIFT) )
144 
145     /* extract device type */
146 #define DEVTYPE(x) ((minor (x) >> VINUM_TYPE_SHIFT) & 7)
147 
148     /*
149      * This mess is used to catch people who compile
150      * a debug vinum(8) and non-debug kernel module,
151      * or the other way round.
152      */
153 
154 #ifdef VINUMDEBUG
155 
156 /* superdevice number */
157 #define	VINUM_SUPERDEV		VINUMMINOR(1, 0, 0, VINUM_SUPERDEV_TYPE)
158 
159 /* non-debug superdevice number */
160 #define	VINUM_WRONGSUPERDEV	VINUMMINOR(2, 0, 0, VINUM_SUPERDEV_TYPE)
161 
162 #else
163 
164 /* superdevice number */
165 #define	VINUM_SUPERDEV		VINUMMINOR(2, 0, 0, VINUM_SUPERDEV_TYPE)
166 
167 /* debug superdevice number */
168 #define	VINUM_WRONGSUPERDEV	VINUMMINOR(1, 0, 0, VINUM_SUPERDEV_TYPE)
169 
170 #endif
171 
172 /* daemon superdevice number */
173 #define	VINUM_DAEMON_DEV	VINUMMINOR(0, 0, 0, VINUM_SUPERDEV_TYPE)
174 
175 	/*
176 	 * the number of object entries to cater for initially, and also the
177 	 * value by which they are incremented.  It doesn't take long
178 	 * to extend them, so theoretically we could start with 1 of each, but
179 	 * it's untidy to allocate such small areas.  These values are
180 	 * probably too small.
181 	 */
182 
183 	INITIAL_DRIVES = 4,
184 	INITIAL_VOLUMES = 4,
185 	INITIAL_PLEXES = 8,
186 	INITIAL_SUBDISKS = 16,
187 	INITIAL_SUBDISKS_IN_PLEX = 4,	/* num subdisks to alloc to a plex */
188 	INITIAL_SUBDISKS_IN_DRIVE = 4,	/* num subdisks to alloc to a drive */
189 	INITIAL_DRIVE_FREELIST = 16,	/* num entries in drive freelist */
190 	PLEX_REGION_TABLE_SIZE = 8,	/* num entries in plex region tables */
191 	PLEX_LOCKS = 256,		/* num locks to alloc to a plex */
192 	MAX_REVIVE_BLOCKSIZE = MAXPHYS,	/* maximum revive block size */
193 	DEFAULT_REVIVE_BLOCKSIZE = 65536,/* default revive block size */
194 	VINUMHOSTNAMELEN = 32,		/* host name field in label */
195 };
196 
197 /* device numbers */
198 
199 /*
200  *  31 30    28 27                   20 19 18    16 15         8 7      0
201  * |---------------------------------------------------------------------|
202  * |X |  Type  |    Subdisk number     | X| Plex   |      Major |  volno |
203  * |---------------------------------------------------------------------|
204  *
205  *    0x2                 03                 1           19	    06
206  *
207  * The fields in the minor number are interpreted as follows:
208  *
209  * Volume:              Only type and volume number are relevant
210  * Plex in volume:      type, plex number in volume and volume number
211  *			are relevant
212  * raw plex:            type, plex number is made of bits 27-16 and 7-0
213  * raw subdisk:         type, subdisk number is made of bits 27-16 and 7-0
214  */
215 
216 #if 0
217 
218 /* This doesn't get used.  Consider removing it. */
219 struct devcode {
220 	/*
221 	 * CARE.  These fields assume a big-endian word.  On a
222 	 * little-endian system, they're the wrong way around
223 	 */
224 	unsigned volume:8;		/* up to 256 volumes */
225 	unsigned major:8;		/* major number fits */
226 	unsigned plex:3;		/* up to 8 plexes per volume */
227 	unsigned unused:1;		/* up for grabs */
228 	unsigned sd:8;			/* up to 256 subdisks per plex */
229 	unsigned type:3;		/* type of object */
230 	/*
231 	 * type field
232 	 VINUM_VOLUME = 0,
233 	 VINUM_PLEX = 1,
234 	 VINUM_SUBDISK = 2,
235 	 VINUM_DRIVE = 3,
236 	 VINUM_SUPERDEV = 4,
237 	 VINUM_RAWPLEX = 5,
238 	 VINUM_RAWSD = 6 */
239 	unsigned signbit:1;		/* to make 32 bits */
240 };
241 
242 #endif
243 
244 #define VINUM_BASE  "vinum/"
245 #define VINUM_DIR   "/dev/vinum"
246 
247 /*
248  * These definitions help catch
249  * userland/kernel mismatches.
250  */
251 #if VINUMDEBUG
252 
253 /* normal super device */
254 #define VINUM_WRONGSUPERDEV_NAME	VINUM_DIR "/control"
255 #define VINUM_WRONGSUPERDEV_BASE	VINUM_BASE "control"
256 
257 /* debug super device */
258 #define VINUM_SUPERDEV_NAME		VINUM_DIR "/Control"
259 #define VINUM_SUPERDEV_BASE		VINUM_BASE "Control"
260 
261 #else
262 
263 /* debug super device */
264 #define VINUM_WRONGSUPERDEV_NAME	VINUM_DIR "/Control"
265 #define VINUM_WRONGSUPERDEV_BASE	VINUM_BASE "Control"
266 
267 /* normal super device */
268 #define VINUM_SUPERDEV_NAME		VINUM_DIR "/control"
269 #define VINUM_SUPERDEV_BASE		VINUM_BASE "control"
270 
271 #endif
272 
273 /* super device for daemon only */
274 #define VINUM_DAEMON_DEV_NAME		VINUM_DIR "/controld"
275 #define VINUM_DAEMON_DEV_BASE		VINUM_BASE "controld"
276 
277 /*
278  * Flags for all objects.  Most of them only apply to
279  * specific objects, but we have space for all in any
280  * 32 bit flags word.
281  */
282 enum objflags {
283 	VF_LOCKED = 1,			/* locked access to this object */
284 	VF_LOCKING = 2,			/* we want access to this object */
285 	VF_OPEN = 4,			/* object has openers */
286 	VF_WRITETHROUGH = 8,		/* volume: write through */
287 	VF_INITED = 0x10,		/* unit has been initialized */
288 
289 	/* 0x20 unused, was: VF_WLABEL: label area is writable */
290 	VF_LABELLING = 0x40,		/* unit is currently being labelled */
291 	VF_WANTED = 0x80,		/* waiting to obtain a lock */
292 	VF_RAW = 0x100,			/* raw volume (no file system) */
293 	VF_LOADED = 0x200,		/* module is loaded */
294 	VF_CONFIGURING = 0x400,		/* someone is changing the config */
295 	VF_WILL_CONFIGURE = 0x800,	/* someone wants to change the config */
296 	VF_CONFIG_INCOMPLETE = 0x1000,	/* not finished changing the config */
297 	VF_CONFIG_SETUPSTATE = 0x2000,	/* set a vol up if all plexes empty */
298 	VF_READING_CONFIG = 0x4000,	/* reading config database from disk */
299 	VF_FORCECONFIG = 0x8000,	/* config drives even with diff names */
300 	VF_NEWBORN = 0x10000,		/* for objects: we've just created it */
301 	VF_CONFIGURED = 0x20000,	/* for drives: we read the config */
302 	VF_STOPPING = 0x40000,		/* for vinum_conf: stop on last close */
303 
304 	VF_DAEMONOPEN = 0x80000,	/* the daemon has us open (only
305 					 * superdev) */
306 
307 	VF_CREATED = 0x100000,		/* for vols: freshly created,
308 					 * more then new */
309 	VF_HOTSPARE = 0x200000,		/* for drives: use as hot spare */
310 	VF_RETRYERRORS = 0x400000,	/* don't down subdisks on I/O errors */
311 };
312 
313 /* Global configuration information for the vinum subsystem */
314 struct _vinum_conf {
315 	/* Pointers to vinum structures */
316 	struct drive *drive;
317 	struct sd *sd;
318 	struct plex *plex;
319 	struct volume *volume;
320 
321 	/* the number allocated */
322 	int drives_allocated;
323 	int subdisks_allocated;
324 	int plexes_allocated;
325 	int volumes_allocated;
326 
327 	/* and the number currently in use */
328 	int drives_used;
329 	int subdisks_used;
330 	int plexes_used;
331 	int volumes_used;
332 
333 	int flags;
334 
335 #define VINUM_MAXACTIVE  30000	/* max number of active requests */
336 	int active;		/* current number of requests outstanding */
337 	int maxactive;		/* max number of requests ever outstanding */
338 #if VINUMDEBUG
339 	struct request *lastrq;
340 	struct bio *lastbio;
341 #endif
342 	int physbufs;
343 };
344 
345 /* Use these defines to simplify code */
346 #define DRIVE vinum_conf.drive
347 #define SD vinum_conf.sd
348 #define PLEX vinum_conf.plex
349 #define VOL vinum_conf.volume
350 #define VFLAGS vinum_conf.flags
351 
352 /*
353  * Slice header
354  *
355  * Vinum drives start with this structure:
356  *
357  *\                                            Sector
358  * |--------------------------------------|
359  * |   PDP-11 memorial boot block         |      0
360  * |--------------------------------------|
361  * |   Disk label, maybe                  |      1
362  * |--------------------------------------|
363  * |   Slice definition  (vinum_hdr)      |      8
364  * |--------------------------------------|
365  * |                                      |
366  * |   Configuration info, first copy     |      9
367  * |                                      |
368  * |--------------------------------------|
369  * |                                      |
370  * |   Configuration info, second copy    |      9 + size of config
371  * |                                      |
372  * |--------------------------------------|
373  */
374 
375 /*
376  * Sizes and offsets of our information
377  */
378 enum {
379 	VINUM_LABEL_OFFSET = 4096,	/* offset of vinum label */
380 	VINUMHEADERLEN = 512,		/* size of vinum label */
381 	VINUM_CONFIG_OFFSET = 4608,	/* offset of first config copy */
382 	MAXCONFIG = 65536,		/* and size of config copy */
383 
384 	/* this is where the data starts */
385 	DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE
386 };
387 
388 /*
389  * hostname is 256 bytes long, but we don't need to shlep
390  * multiple copies in vinum.  We use the host name just
391  * to identify this system, and 32 bytes should be ample
392  * for that purpose
393  */
394 
395 struct vinum_label {
396 	char sysname[VINUMHOSTNAMELEN];	/* system name at time of creation */
397 	char name[MAXDRIVENAME];	/* our name of the drive */
398 	struct timeval date_of_birth;	/* the time it was created */
399 	struct timeval last_update;	/* and the time of last update */
400 	/*
401 	 * total size in bytes of the drive.  This value
402 	 * includes the headers.
403 	 */
404 	off_t drive_size;
405 };
406 
407 struct vinum_hdr {
408 	uint64_t magic;			/* we're long on magic numbers */
409 
410 	/*
411 	 * Size in bytes of each copy of the
412 	 * configuration info.  This must be a multiple
413 	 * of the sector size.
414 	 */
415 	int config_length;
416 	struct vinum_label label;	/* unique label */
417 };
418 
419 /* should be this */
420 #define VINUM_MAGIC    22322600044678729LL
421 
422 /* becomes this after obliteration */
423 #define VINUM_NOMAGIC  22322600044678990LL
424 
425 /* Information returned from read_drive_label */
426 enum drive_label_info {
427 	DL_CANT_OPEN,			/* invalid partition */
428 	DL_NOT_OURS,			/* valid part, but no vinum label */
429 	DL_DELETED_LABEL,		/* valid part, deleted label found */
430 	DL_WRONG_DRIVE,			/* drive name doesn't match */
431 	DL_OURS				/* valid partition and label found */
432 };
433 
434 /*** Drive definitions ***/
435 /*
436  * A drive corresponds to a disk slice.  We use a different term to show
437  * the difference in usage: it doesn't have to be a slice, and could
438  * theoretically be a complete, unpartitioned disk
439  */
440 
441 struct drive {
442 	char devicename[MAXDRIVENAME];	/* name of the slice it's on */
443 	enum drivestate state;		/* current state */
444 	int flags;			/* flags */
445 	int subdisks_allocated;		/* number of entries in sd */
446 	int subdisks_used;		/* and the number used */
447 	int blocksize;			/* size of fs blocks */
448 	int pid;			/* of locker */
449 	u_int64_t sectors_available;	/* number of sectors still available */
450 	int secsperblock;
451 	int lasterror;			/* last error on drive */
452 	int driveno;			/* index of drive in vinum_conf */
453 	int opencount;			/* number of up subdisks */
454 	u_int64_t reads;		/* number of reads on this drive */
455 	u_int64_t writes;		/* number of writes on this drive */
456 	u_int64_t bytes_read;		/* number of bytes read */
457 	u_int64_t bytes_written;	/* number of bytes written */
458 	struct vinum_label label;	/* and the label information */
459 #define DRIVE_MAXACTIVE  30000		/* maximum number of active requests */
460 	int active;			/* current number of reqs outstanding */
461 	int maxactive;			/* max num of reqs ever outstanding */
462 	int freelist_size;		/* entries alloced in free list */
463 	int freelist_entries;		/* entries used in free list */
464 	struct drive_freelist {		/* sorted list of free space on drive */
465 		u_int64_t offset;	/* offset of entry */
466 		u_int64_t sectors;	/* and length in sectors */
467 	} *freelist;
468 	struct partinfo partinfo;	/* partition information */
469 	/* XXX kludge until we get this struct cleaned up */
470 #if _KERNEL
471 	struct vnode *vp;
472 	struct cdev *dev;
473 #else
474 	void	*vp_dummy;
475 	void	*dev_dummy;
476 #endif
477 #ifdef VINUMDEBUG
478 	char lockfilename[16];		/* locked with file */
479 	int lockline;			/* and the line number */
480 #endif
481 };
482 
483 /*** Subdisk definitions ***/
484 
485 struct sd {
486 	char name[MAXSDNAME];		/* name of subdisk */
487 	enum sdstate state;		/* state */
488 	int flags;
489 	int lasterror;			/* last error occurred */
490 	/* offsets in blocks */
491 	int64_t driveoffset;		/* offset on drive */
492 #ifdef _KERNEL
493 	cdev_t	sd_dev;
494 #else
495 	void	*sd_dev_dummy;
496 #endif
497 
498 	/*
499 	 * plexoffset is the offset from the beginning
500 	 * of the plex to the very first part of the
501 	 * subdisk, in sectors.  For striped, RAID-4 and
502 	 * RAID-5 plexes, only the first stripe is
503 	 * located at this offset
504 	 */
505 	int64_t plexoffset;		/* offset in plex */
506 	u_int64_t sectors;		/* and length in sectors */
507 	int plexno;			/* index of plex, if it belongs */
508 	int driveno;			/* index of the drive */
509 	int sdno;			/* our index in vinum_conf */
510 	int plexsdno;			/* and our number in our plex */
511 	/* (undefined if no plex) */
512 	u_int64_t reads;		/* number of reads on this subdisk */
513 	u_int64_t writes;		/* number of writes on this subdisk */
514 	u_int64_t bytes_read;		/* number of bytes read */
515 	u_int64_t bytes_written;	/* number of bytes written */
516 	/* revive parameters */
517 	u_int64_t revived;		/* blkno of current revive request */
518 	int revive_blocksize;		/* revive block size (bytes) */
519 	int revive_interval;		/* and time to wait between transfers */
520 	pid_t reviver;			/* PID of reviving process */
521 	/* init parameters */
522 	u_int64_t initialized;		/* blkno of current init request */
523 	int init_blocksize;		/* init block size (bytes) */
524 	int init_interval;		/* time to wait between transfers */
525 	struct request *waitlist;	/* list of reqs waiting on revive op */
526 };
527 
528 /*** Plex definitions ***/
529 
530 /* kinds of plex organization */
531 enum plexorg {
532 	plex_disorg,			/* disorganized */
533 	plex_concat,			/* concatenated plex */
534 	plex_striped,			/* striped plex */
535 	plex_raid4,			/* RAID4 plex */
536 	plex_raid5			/* RAID5 plex */
537 };
538 
539 /* Recognize plex organizations */
540 /* RAID 1, 4 or 5 */
541 #define isstriped(p)	(p->organization >= plex_striped)
542 
543 /* RAID 4 or 5 */
544 #define isparity(p)	(p->organization >= plex_raid4)
545 
546 struct plex {
547 	char name[MAXPLEXNAME];		/* name of plex */
548 	enum plexorg organization;	/* Plex organization */
549 	enum plexstate state;		/* and current state */
550 #ifdef _KERNEL
551 	cdev_t	plex_dev;
552 #else
553 	void	*plex_dev_dummy;
554 #endif
555 	u_int64_t length;		/* total length of plex (sectors) */
556 	int flags;
557 	int stripesize;			/* size of stripe or raid band,
558 					 * in sectors */
559 	int subdisks;			/* number of associated subdisks */
560 	int subdisks_allocated;		/* number of subdisks allocated
561 					 * space for */
562 	int *sdnos;			/* list of component subdisks */
563 	int plexno;			/* index of plex in vinum_conf */
564 	int volno;			/* index of volume */
565 	int volplexno;			/* number of plex in volume */
566 	/* Statistics */
567 	u_int64_t reads;		/* number of reads on this plex */
568 	u_int64_t writes;		/* number of writes on this plex */
569 	u_int64_t bytes_read;		/* number of bytes read */
570 	u_int64_t bytes_written;	/* number of bytes written */
571 	u_int64_t recovered_reads;	/* number of recovered read
572 					 * operations */
573 	u_int64_t degraded_writes;	/* number of degraded writes */
574 	u_int64_t parityless_writes;	/* number of parityless writes */
575 	u_int64_t multiblock;		/* requests that needed more than
576 					 * one block */
577 	u_int64_t multistripe;		/* requests that needed more than
578 					 * one stripe */
579 	int sddowncount;		/* number of subdisks down */
580 
581 	/* Lock information */
582 	int usedlocks;			/* number currently in use */
583 	int lockwaits;			/* and number of waits for locks */
584 	off_t checkblock;		/* block number for parity op */
585 	struct rangelock *lock;		/* ranges of locked addresses */
586 };
587 
588 /*** Volume definitions ***/
589 
590 /* Address range definitions, for locking volumes */
591 struct rangelock {
592 	vinum_off_t stripe;		/* address + 1 of the range being locked  */
593 	struct buf *bp;		/* user's buffer pointer */
594 };
595 
596 struct volume {
597 	char name[MAXVOLNAME];		/* name of volume */
598 	enum volumestate state;		/* current state */
599 	int plexes;			/* number of plexes */
600 	int preferred_plex;		/* plex to read from, -1 for
601 					 * round-robin */
602 #ifdef _KERNEL
603 	cdev_t	vol_dev;
604 #else
605 	void	*vol_dev_dummy;
606 #endif
607 
608 	/*
609 	 * index of plex used for last read, for
610 	 * round-robin.
611 	 */
612 	int last_plex_read;
613 	int volno;			/* volume number */
614 	int flags;			/* status and configuration flags */
615 	int openflags;			/* flags supplied to last open(2) */
616 	u_int64_t size;			/* size of volume */
617 	int blocksize;			/* logical block size */
618 	int active;			/* number of outstanding
619 					 * requests active */
620 	int subops;			/* and the number of suboperations */
621 	/* Statistics */
622 	u_int64_t bytes_read;		/* number of bytes read */
623 	u_int64_t bytes_written;	/* number of bytes written */
624 	u_int64_t reads;		/* number of reads on this volume */
625 	u_int64_t writes;		/* number of writes on this volume */
626 	u_int64_t recovered_reads;	/* reads recovered from another plex */
627 
628 	/*
629 	 * Unlike subdisks in the plex, space for the
630 	 * plex pointers is static.
631 	 */
632 	int plex[MAXPLEX];		/* index of plexes */
633 };
634 
635 /*
636  * Table expansion.  Expand table, which contains oldcount
637  * entries of type element, by increment entries, and change
638  * oldcount accordingly
639  */
640 #define EXPAND(table, element, oldcount, increment)         \
641 {							    \
642       expand_table((void **) &table,			    \
643 		   oldcount * sizeof (element),		    \
644 		   (oldcount + increment) * sizeof (element)); \
645       oldcount += increment;				    \
646 }
647 
648 /*
649  * Information on vinum's memory usage
650  */
651 struct meminfo {
652 	int mallocs;			/* number of malloced blocks */
653 	int total_malloced;		/* total amount malloced */
654 	int highwater;			/* maximum number of mallocs */
655 	struct mc *malloced;		/* pointer to kernel table */
656 };
657 
658 #define MCFILENAMELEN	16
659 struct mc {
660 	struct timeval time;
661 	int seq;
662 	int size;
663 	short line;
664 	caddr_t address;
665 	char file[MCFILENAMELEN];
666 };
667 
668 /*
669  * These enums are used by the state transition
670  * routines.  They're in bit map format:
671  *
672  * Bit 0: Other plexes in the volume are down
673  * Bit 1: Other plexes in the volume are up
674  * Bit 2: The current plex is up
675  * Maybe they should be local to
676  * state.c
677  */
678 enum volplexstate {
679 	volplex_onlyusdown = 0,		/* 0: we're the only plex,
680 					 * and we're down */
681 	volplex_alldown,		/* 1: another plex is down,
682 					 * and so are we */
683 	volplex_otherup,		/* 2: another plex is up */
684 	volplex_otherupdown,		/* 3: other plexes are up and down */
685 	volplex_onlyus,			/* 4: we're up and alone */
686 	volplex_onlyusup,		/* 5: only we are up, others are down */
687 	volplex_allup,			/* 6: all plexes are up */
688 	volplex_someup			/* 7: some plexes are up,
689 					 * including us */
690 };
691 
692 /* state map for plex */
693 enum sdstates {
694 	sd_emptystate = 1,
695 	sd_downstate = 2,		/* SD is down */
696 	sd_crashedstate = 4,		/* SD is crashed */
697 	sd_obsoletestate = 8,		/* SD is obsolete */
698 	sd_stalestate = 16,		/* SD is stale */
699 	sd_rebornstate = 32,		/* SD is reborn */
700 	sd_upstate = 64,		/* SD is up */
701 	sd_initstate = 128,		/* SD is initializing */
702 	sd_initializedstate = 256,	/* SD is initialized */
703 	sd_otherstate = 512,		/* SD is in some other state */
704 };
705 
706 /*
707  * This is really just a parameter to pass to
708  * set_<foo>_state, but since it needs to be known
709  * in the external definitions, we need to define
710  * it here
711  */
712 enum setstateflags {
713 	setstate_none = 0,		/* no flags */
714 	setstate_force = 1,		/* force the state change */
715 	setstate_configuring = 2,	/* we're currently configuring,
716 					   don't save */
717 };
718 
719 /* Operations for parityops to perform. */
720 enum parityop {
721 	checkparity,
722 	rebuildparity,
723 	rebuildandcheckparity,		/* rebuildparity with the -v option */
724 };
725 
726 #ifdef VINUMDEBUG
727 
728 /*
729  * Debugging stuff
730  */
731 enum debugflags {
732 	DEBUG_ADDRESSES = 1,		/* show buffer information during
733 					 * requests */
734 	DEBUG_NUMOUTPUT = 2,		/* show the value of vp->v_numoutput */
735 	DEBUG_RESID = 4,		/* go into debugger in complete_rqe */
736 	DEBUG_LASTREQS = 8,		/* keep a circular buffer of
737 					 * last requests */
738 	DEBUG_REVIVECONFLICT = 16,	/* print info about revive conflicts */
739 	DEBUG_EOFINFO = 32,		/* print info about EOF detection */
740 	DEBUG_MEMFREE = 64,		/* keep info about Frees */
741 	DEBUG_BIGDRIVE = 128,		/* pretend our drives are 100 times
742 					 * the size */
743 	DEBUG_REMOTEGDB = 256,		/* go into remote gdb */
744 	DEBUG_WARNINGS = 512,		/* log various relatively
745 					 * harmless warnings  */
746 };
747 
748 #ifdef _KERNEL
749 #ifdef __i386__
750 #define longjmp LongJmp			/* test our longjmps */
751 #endif
752 #endif
753 #endif
754