1 /*
2  *	Copyright 1996, University Corporation for Atmospheric Research
3  *	See netcdf/COPYRIGHT file for copying and redistribution conditions.
4  */
5 /* $Id: posixio.c,v 1.89 2010/05/22 21:59:08 dmh Exp $ */
6 
7 #include <config.h>
8 #include <assert.h>
9 #include <stdlib.h>
10 #include <errno.h>
11 #ifndef ENOERR
12 #define ENOERR 0
13 #endif
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include <fcntl.h>
17 #include <string.h>
18 #ifdef _MSC_VER /* Microsoft Compilers */
19 #include <io.h>
20 /* Take the following warning disable out when NetCDF is updated */
21 /* to support 64-bit versions of "read" and "write" used below */
22 /* in "px_pgin" and "px_pgout" */
23 #  ifdef _WIN64
24 #    pragma warning ( disable : 4267 )
25 #  endif
26 #endif
27 #ifdef __BORLANDC__
28 #include <io.h>
29 #endif
30 #ifdef HAVE_UNISTD_H
31 #include <unistd.h>
32 #endif
33 #ifndef HAVE_SSIZE_T
34 #define ssize_t int
35 #endif
36 
37 #ifndef SEEK_SET
38 #define SEEK_SET 0
39 #define SEEK_CUR 1
40 #define SEEK_END 2
41 #endif
42 
43 #include "ncio.h"
44 #include "fbits.h"
45 #include "rnd.h"
46 
47 /* #define INSTRUMENT 1 */
48 #if INSTRUMENT /* debugging */
49 #undef NDEBUG
50 #include <stdio.h>
51 #include "instr.h"
52 #endif
53 
54 #undef MIN  /* system may define MIN somewhere and complain */
55 #define MIN(mm,nn) (((mm) < (nn)) ? (mm) : (nn))
56 
57 #if !defined(NDEBUG) && !defined(X_INT_MAX)
58 #define  X_INT_MAX 2147483647
59 #endif
60 
61 #if 0 /* !defined(NDEBUG) && !defined(X_ALIGN) */
62 #define  X_ALIGN 4
63 #else
64 #undef X_ALIGN
65 #endif
66 
67 /* These are needed on mingw to get a dll to compile. They really
68  * should be provided in sys/stats.h, but what the heck. Let's not be
69  * too picky! */
70 /* Borland bcc32 doesn't have these definitions of permission bits */
71 #ifndef S_IRUSR
72 #define S_IRUSR   0000400
73 #endif
74 #ifndef S_IRGRP
75 #define S_IRGRP   0000040
76 #endif
77 #ifndef S_IROTH
78 #define S_IROTH   0000004
79 #endif
80 #ifndef S_IWUSR
81 #define S_IWUSR   0000200
82 #endif
83 #ifndef S_IWGRP
84 #define S_IWGRP   0000020
85 #endif
86 #ifndef S_IWOTH
87 #define S_IWOTH   0000002
88 #endif
89 #ifndef S_IXUSR
90 #define S_IXUSR   0000100
91 #endif
92 #ifndef S_IXGRP
93 #define S_IXGRP   0000010
94 #endif
95 #ifndef S_IXOTH
96 #define S_IXOTH   0000001
97 #endif
98 #ifndef S_IRWXU
99 #define S_IRWXU   0000700
100 #endif
101 #ifndef S_IRWXG
102 #define S_IRWXG   0000070
103 #endif
104 #ifndef S_IRWXO
105 #define S_IRWXO   0000007
106 #endif
107 
108 /*
109  * Define the following for debugging.
110  */
111 /* #define ALWAYS_NC_SHARE 1 */
112 
113 #if defined(__BORLANDC__)
114 #pragma warn -8004 /* "assigned a value that is never used" */
115 #pragma warn -8065 /* "Call to function 'XXX' with no prototype" */
116 #endif
117 
118 /* Begin OS */
119 
120 #ifndef POSIXIO_DEFAULT_PAGESIZE
121 #define POSIXIO_DEFAULT_PAGESIZE 4096
122 #endif
123 /*
124  * What is the system pagesize?
125  */
126 static size_t
pagesize(void)127 pagesize(void)
128 {
129 /* Hmm, aren't standards great? */
130 #if defined(_SC_PAGE_SIZE) && !defined(_SC_PAGESIZE)
131 #define _SC_PAGESIZE _SC_PAGE_SIZE
132 #endif
133 
134 #ifdef _SC_PAGESIZE
135 	{
136 		const long pgsz = sysconf(_SC_PAGESIZE);
137 		if(pgsz > 0)
138 			return (size_t) pgsz;
139 		/* else, silent in the face of error */
140 	}
141 #elif defined(HAVE_GETPAGESIZE)
142 	return (size_t) getpagesize();
143 #endif
144 	return (size_t) POSIXIO_DEFAULT_PAGESIZE;
145 }
146 
147 /*
148  * What is the preferred I/O block size?
149  */
150 static size_t
blksize(int fd)151 blksize(int fd)
152 {
153 #if defined(HAVE_ST_BLKSIZE)
154 	struct stat sb;
155 	if (fstat(fd, &sb) > -1)
156 	{
157 		if(sb.st_blksize >= 8192)
158 			return (size_t) sb.st_blksize;
159 		return 8192;
160 	}
161 	/* else, silent in the face of error */
162 #endif
163 	return (size_t) 2 * pagesize();
164 }
165 
166 
167 /*
168  * Sortof like ftruncate, except won't make the
169  * file shorter.
170  */
171 static int
fgrow(const int fd,const off_t len)172 fgrow(const int fd, const off_t len)
173 {
174 	struct stat sb;
175 	if (fstat(fd, &sb) < 0)
176 		return errno;
177 	if (len < sb.st_size)
178 		return ENOERR;
179 	{
180 	    const long dumb = 0;
181 	    /* we don't use ftruncate() due to problem with FAT32 file systems */
182 	    /* cache current position */
183 	    const off_t pos = lseek(fd, 0, SEEK_CUR);
184 	    if(pos < 0)
185 		return errno;
186 	    if (lseek(fd, len-sizeof(dumb), SEEK_SET) < 0)
187 		return errno;
188 	    if(write(fd, &dumb, sizeof(dumb)) < 0)
189 		return errno;
190 	    if (lseek(fd, pos, SEEK_SET) < 0)
191 		return errno;
192 	}
193 	return ENOERR;
194 }
195 
196 
197 /*
198  * Sortof like ftruncate, except won't make the file shorter.  Differs
199  * from fgrow by only writing one byte at designated seek position, if
200  * needed.
201  */
202 static int
fgrow2(const int fd,const off_t len)203 fgrow2(const int fd, const off_t len)
204 {
205 	struct stat sb;
206 	if (fstat(fd, &sb) < 0)
207 		return errno;
208 	if (len <= sb.st_size)
209 		return ENOERR;
210 	{
211 	    const char dumb = 0;
212 	    /* we don't use ftruncate() due to problem with FAT32 file systems */
213 	    /* cache current position */
214 	    const off_t pos = lseek(fd, 0, SEEK_CUR);
215 	    if(pos < 0)
216 		return errno;
217 	    if (lseek(fd, len-1, SEEK_SET) < 0)
218 		return errno;
219 	    if(write(fd, &dumb, sizeof(dumb)) < 0)
220 		return errno;
221 	    if (lseek(fd, pos, SEEK_SET) < 0)
222 		return errno;
223 	}
224 	return ENOERR;
225 }
226 /* End OS */
227 /* Begin px */
228 
229 /* The px_ functions are for posix systems, when NC_SHARE is not in
230    effect. */
231 
232 /* Write out a "page" of data to the file. The size of the page
233    (i.e. the extent) varies.
234 
235    nciop - pointer to the file metadata.
236    offset - where in the file should this page be written.
237    extent - how many bytes should be written.
238    vp - pointer to the data to write.
239    posp - pointer to current position in file, updated after write.
240 */
241 static int
px_pgout(ncio * const nciop,off_t const offset,const size_t extent,void * const vp,off_t * posp)242 px_pgout(ncio *const nciop,
243 	off_t const offset,  const size_t extent,
244 	void *const vp, off_t *posp)
245 {
246     ssize_t partial;
247     size_t nextent;
248     char *nvp;
249 #ifdef X_ALIGN
250 	assert(offset % X_ALIGN == 0);
251 #endif
252 
253 	assert(*posp == OFF_NONE || *posp == lseek(nciop->fd, 0, SEEK_CUR));
254 
255 	if(*posp != offset)
256 	{
257 		if(lseek(nciop->fd, offset, SEEK_SET) != offset)
258 		{
259 			return errno;
260 		}
261 		*posp = offset;
262 	}
263 	/* Old write, didn't handle partial writes correctly */
264 	/* if(write(nciop->fd, vp, extent) != (ssize_t) extent) */
265 	/* { */
266 	/* 	return errno; */
267 	/* } */
268 	nextent = extent;
269         nvp = vp;
270 	while((partial = write(nciop->fd, nvp, nextent)) != -1) {
271 	    if(partial == nextent)
272 		break;
273 	    nvp += partial;
274 	    nextent -= partial;
275 	}
276 	if(partial == -1)
277 	    return errno;
278 	*posp += extent;
279 
280 	return ENOERR;
281 }
282 
283 /* Read in a page of data.
284 
285    nciop - a pointer to the ncio struct for this file.
286    offset - byte offset in file where read starts.
287    extent - the size of the page that will be read.
288    vp - a pointer to where the data will end up.
289    nreadp - returned number of bytes actually read (may be less than extent).
290    posp - pointer to current position in file, updated after read.
291 */
292 static int
px_pgin(ncio * const nciop,off_t const offset,const size_t extent,void * const vp,size_t * nreadp,off_t * posp)293 px_pgin(ncio *const nciop,
294 	off_t const offset, const size_t extent,
295 	void *const vp, size_t *nreadp, off_t *posp)
296 {
297 	int status;
298 	ssize_t nread;
299 
300 #ifdef X_ALIGN
301 	assert(offset % X_ALIGN == 0);
302 	assert(extent % X_ALIGN == 0);
303 #endif
304 
305 	assert(*posp == OFF_NONE || *posp == lseek(nciop->fd, 0, SEEK_CUR));
306 
307 	if(*posp != offset)
308 	{
309 		if(lseek(nciop->fd, offset, SEEK_SET) != offset)
310 		{
311 			status = errno;
312 			return status;
313 		}
314 		*posp = offset;
315 	}
316 
317 	errno = 0;
318 	nread = read(nciop->fd, vp, extent);
319 	if(nread != (ssize_t) extent)
320 	{
321 		status = errno;
322 		if(nread == -1 || status != ENOERR)
323 			return status;
324 		/* else it's okay we read less than asked for */
325 		(void) memset((char *)vp + nread, 0, (ssize_t)extent - nread);
326 	}
327 	*nreadp = nread;
328 	*posp += nread;
329 
330 	return ENOERR;
331 }
332 
333 /* This struct is for POSIX systems, with NC_SHARE not in effect. If
334    NC_SHARE is used, see ncio_spx.
335 
336    blksz - block size for reads and writes to file.
337    pos - current read/write position in file.
338    bf_offset - file offset corresponding to start of memory buffer
339    bf_extent - number of bytes in I/O request
340    bf_cnt - number of bytes available in buffer
341    bf_base - pointer to beginning of buffer.
342    bf_rflags - buffer region flags (defined in ncio.h) tell the lock
343    status, read/write permissions, and modification status of regions
344    of data in the buffer.
345    bf_refcount - buffer reference count.
346    slave - used in moves.
347 */
348 typedef struct ncio_px {
349 	size_t blksz;
350 	off_t pos;
351 	/* buffer */
352 	off_t	bf_offset;
353 	size_t	bf_extent;
354 	size_t	bf_cnt;
355 	void	*bf_base;
356 	int	bf_rflags;
357 	int	bf_refcount;
358 	/* chain for double buffering in px_move */
359 	struct ncio_px *slave;
360 } ncio_px;
361 
362 
363 /*ARGSUSED*/
364 /* This function indicates the file region starting at offset may be
365    released.
366 
367    This is for POSIX, without NC_SHARE.  If called with RGN_MODIFIED
368    flag, sets the modified flag in pxp->bf_rflags and decrements the
369    reference count.
370 
371    pxp - pointer to posix non-share ncio_px struct.
372 
373    offset - file offset for beginning of to region to be
374    released.
375 
376    rflags - only RGN_MODIFIED is relevent to this function, others ignored
377 */
378 static int
px_rel(ncio_px * const pxp,off_t offset,int rflags)379 px_rel(ncio_px *const pxp, off_t offset, int rflags)
380 {
381 	assert(pxp->bf_offset <= offset
382 		 && offset < pxp->bf_offset + (off_t) pxp->bf_extent);
383 	assert(pIf(fIsSet(rflags, RGN_MODIFIED),
384 		fIsSet(pxp->bf_rflags, RGN_WRITE)));
385 
386 	if(fIsSet(rflags, RGN_MODIFIED))
387 	{
388 		fSet(pxp->bf_rflags, RGN_MODIFIED);
389 	}
390 	pxp->bf_refcount--;
391 
392 	return ENOERR;
393 }
394 
395 /* This function indicates the file region starting at offset may be
396    released.  Each read or write to the file is bracketed by a call to
397    the "get" region function and a call to the "rel" region function.
398    If you only read from the memory region, release it with a flag of
399    0, if you modify the region, release it with a flag of
400    RGN_MODIFIED.
401 
402    For POSIX system, without NC_SHARE, this becomes the rel function
403    pointed to by the ncio rel function pointer. It mearly checks for
404    file write permission, then calls px_rel to do everything.
405 
406    nciop - pointer to ncio struct.
407    offset - num bytes from beginning of buffer to region to be
408    released.
409    rflags - only RGN_MODIFIED is relevent to this function, others ignored
410 */
411 static int
ncio_px_rel(ncio * const nciop,off_t offset,int rflags)412 ncio_px_rel(ncio *const nciop, off_t offset, int rflags)
413 {
414 	ncio_px *const pxp = (ncio_px *)nciop->pvt;
415 
416 	if(fIsSet(rflags, RGN_MODIFIED) && !fIsSet(nciop->ioflags, NC_WRITE))
417 		return EPERM; /* attempt to write readonly file */
418 
419 	return px_rel(pxp, offset, rflags);
420 }
421 
422 /* POSIX get. This will "make a region available." Since we're using
423    buffered IO, this means that if needed, we'll fetch a new page from
424    the file, otherwise, just return a pointer to what's in memory
425    already.
426 
427    nciop - pointer to ncio struct, containing file info.
428    pxp - pointer to ncio_px struct, which contains special metadate
429    for posix files without NC_SHARE.
430    offset - start byte of region to get.
431    extent - how many bytes to read.
432    rflags - One of the RGN_* flags defined in ncio.h.
433    vpp - pointer to pointer that will recieve data.
434 
435    NOTES:
436 
437    * For blkoffset round offset down to the nearest pxp->blksz. This
438    provides the offset (in bytes) to the beginning of the block that
439    holds the current offset.
440 
441    * diff tells how far into the current block we are.
442 
443    * For blkextent round up to the number of bytes at the beginning of
444    the next block, after the one that holds our current position, plus
445    whatever extra (i.e. the extent) that we are about to grab.
446 
447    * The blkextent can't be more than twice the pxp->blksz. That's
448    because the pxp->blksize is the sizehint, and in ncio_px_init2 the
449    buffer (pointed to by pxp->bf-base) is allocated with 2 *
450    *sizehintp. This is checked (unneccesarily) more than once in
451    asserts.
452 
453    * If this is called on a newly opened file, pxp->bf_offset will be
454    OFF_NONE and we'll jump to label pgin to immediately read in a
455    page.
456 */
457 static int
px_get(ncio * const nciop,ncio_px * const pxp,off_t offset,size_t extent,int rflags,void ** const vpp)458 px_get(ncio *const nciop, ncio_px *const pxp,
459 		off_t offset, size_t extent,
460 		int rflags,
461 		void **const vpp)
462 {
463 	int status = ENOERR;
464 
465 	const off_t blkoffset = _RNDDOWN(offset, (off_t)pxp->blksz);
466 	off_t diff = (size_t)(offset - blkoffset);
467 	off_t blkextent = _RNDUP(diff + extent, pxp->blksz);
468 
469 	assert(extent != 0);
470 	assert(extent < X_INT_MAX); /* sanity check */
471 	assert(offset >= 0); /* sanity check */
472 
473 	if(2 * pxp->blksz < blkextent)
474 		return E2BIG; /* TODO: temporary kludge */
475 	if(pxp->bf_offset == OFF_NONE)
476 	{
477 		/* Uninitialized */
478 		if(pxp->bf_base == NULL)
479 		{
480 			assert(pxp->bf_extent == 0);
481 			assert(blkextent <= 2 * pxp->blksz);
482 			pxp->bf_base = malloc(2 * pxp->blksz);
483 			if(pxp->bf_base == NULL)
484 				return ENOMEM;
485 		}
486 		goto pgin;
487 	}
488 	/* else */
489 	assert(blkextent <= 2 * pxp->blksz);
490 
491 	if(blkoffset == pxp->bf_offset)
492 	{
493 		/* hit */
494  		if(blkextent > pxp->bf_extent)
495 		{
496 			/* page in upper */
497 			void *const middle =
498 			 	(void *)((char *)pxp->bf_base + pxp->blksz);
499 			assert(pxp->bf_extent == pxp->blksz);
500 			status = px_pgin(nciop,
501 				 pxp->bf_offset + (off_t)pxp->blksz,
502 				 pxp->blksz,
503 				 middle,
504 				 &pxp->bf_cnt,
505 				 &pxp->pos);
506 			if(status != ENOERR)
507 				return status;
508 			pxp->bf_extent = 2 * pxp->blksz;
509 			pxp->bf_cnt += pxp->blksz;
510 		}
511 		goto done;
512 	}
513 	/* else */
514 
515 	if(pxp->bf_extent > pxp->blksz
516 		 && blkoffset == pxp->bf_offset + (off_t)pxp->blksz)
517 	{
518 		/* hit in upper half */
519 		if(blkextent == pxp->blksz)
520 		{
521 			/* all in upper half, no fault needed */
522 			diff += pxp->blksz;
523 			goto done;
524 		}
525 		/* else */
526 		if(pxp->bf_cnt > pxp->blksz)
527 		{
528 			/* data in upper half */
529 			void *const middle =
530 				(void *)((char *)pxp->bf_base + pxp->blksz);
531 			assert(pxp->bf_extent == 2 * pxp->blksz);
532 			if(fIsSet(pxp->bf_rflags, RGN_MODIFIED))
533 			{
534 				/* page out lower half */
535 				assert(pxp->bf_refcount <= 0);
536 				status = px_pgout(nciop,
537 					pxp->bf_offset,
538 					pxp->blksz,
539 					pxp->bf_base,
540 					&pxp->pos);
541 				if(status != ENOERR)
542 					return status;
543 			}
544 			pxp->bf_cnt -= pxp->blksz;
545 			/* copy upper half into lower half */
546 			(void) memcpy(pxp->bf_base, middle, pxp->bf_cnt);
547 		}
548 		pxp->bf_offset = blkoffset;
549 		/* pxp->bf_extent = pxp->blksz; */
550 
551  		assert(blkextent == 2 * pxp->blksz);
552 		{
553 			/* page in upper */
554 			void *const middle =
555 			 	(void *)((char *)pxp->bf_base + pxp->blksz);
556 			status = px_pgin(nciop,
557 				 pxp->bf_offset + (off_t)pxp->blksz,
558 				 pxp->blksz,
559 				 middle,
560 				 &pxp->bf_cnt,
561 				 &pxp->pos);
562 			if(status != ENOERR)
563 				return status;
564 			pxp->bf_extent = 2 * pxp->blksz;
565 			pxp->bf_cnt += pxp->blksz;
566 		}
567 		goto done;
568 	}
569 	/* else */
570 
571 	if(blkoffset == pxp->bf_offset - (off_t)pxp->blksz)
572 	{
573 		/* wants the page below */
574 		void *const middle =
575 			(void *)((char *)pxp->bf_base + pxp->blksz);
576 		size_t upper_cnt = 0;
577 		if(pxp->bf_cnt > pxp->blksz)
578 		{
579 			/* data in upper half */
580 			assert(pxp->bf_extent == 2 * pxp->blksz);
581 			if(fIsSet(pxp->bf_rflags, RGN_MODIFIED))
582 			{
583 				/* page out upper half */
584 				assert(pxp->bf_refcount <= 0);
585 				status = px_pgout(nciop,
586 					pxp->bf_offset + (off_t)pxp->blksz,
587 					pxp->bf_cnt - pxp->blksz,
588 					middle,
589 					&pxp->pos);
590 				if(status != ENOERR)
591 					return status;
592 			}
593 			pxp->bf_cnt = pxp->blksz;
594 			pxp->bf_extent = pxp->blksz;
595 		}
596 		if(pxp->bf_cnt > 0)
597 		{
598 			/* copy lower half into upper half */
599 			(void) memcpy(middle, pxp->bf_base, pxp->blksz);
600 			upper_cnt = pxp->bf_cnt;
601 		}
602 		/* read page below into lower half */
603 		status = px_pgin(nciop,
604 			 blkoffset,
605 			 pxp->blksz,
606 			 pxp->bf_base,
607 			 &pxp->bf_cnt,
608 			 &pxp->pos);
609 		if(status != ENOERR)
610 			return status;
611 		pxp->bf_offset = blkoffset;
612 		if(upper_cnt != 0)
613 		{
614 			pxp->bf_extent = 2 * pxp->blksz;
615 			pxp->bf_cnt = pxp->blksz + upper_cnt;
616 		}
617 		else
618 		{
619 			pxp->bf_extent = pxp->blksz;
620 		}
621 		goto done;
622 	}
623 	/* else */
624 
625 	/* no overlap */
626 	if(fIsSet(pxp->bf_rflags, RGN_MODIFIED))
627 	{
628 		assert(pxp->bf_refcount <= 0);
629 		status = px_pgout(nciop,
630 			pxp->bf_offset,
631 			pxp->bf_cnt,
632 			pxp->bf_base,
633 			&pxp->pos);
634 		if(status != ENOERR)
635 			return status;
636 		pxp->bf_rflags = 0;
637 	}
638 
639 pgin:
640 	status = px_pgin(nciop,
641 		 blkoffset,
642 		 blkextent,
643 		 pxp->bf_base,
644 		 &pxp->bf_cnt,
645 		 &pxp->pos);
646 	if(status != ENOERR)
647 		return status;
648 	 pxp->bf_offset = blkoffset;
649 	 pxp->bf_extent = blkextent;
650 
651 done:
652 	extent += diff;
653 	if(pxp->bf_cnt < extent)
654 		pxp->bf_cnt = extent;
655 	assert(pxp->bf_cnt <= pxp->bf_extent);
656 
657 	pxp->bf_rflags |= rflags;
658 	pxp->bf_refcount++;
659 
660 	*vpp = (char *)pxp->bf_base + diff;
661 	return ENOERR;
662 }
663 
664 /* Request that the region (offset, extent) be made available through
665    *vpp.
666 
667    This function converts a file region specified by an offset and
668    extent to a memory pointer. The region may be locked until the
669    corresponding call to rel().
670 
671    For POSIX systems, without NC_SHARE. This function gets a page of
672    size extent?
673 
674    This is a wrapper for the function px_get, which does all the heavy
675    lifting.
676 
677    nciop - pointer to ncio struct for this file.
678    offset - offset (from beginning of file?) to the data we want to
679    read.
680    extent - the number of bytes to read from the file.
681    rflags - One of the RGN_* flags defined in ncio.h.
682    vpp - handle to point at data when it's been read.
683 */
684 static int
ncio_px_get(ncio * const nciop,off_t offset,size_t extent,int rflags,void ** const vpp)685 ncio_px_get(ncio *const nciop,
686 		off_t offset, size_t extent,
687 		int rflags,
688 		void **const vpp)
689 {
690 	ncio_px *const pxp = (ncio_px *)nciop->pvt;
691 
692 	if(fIsSet(rflags, RGN_WRITE) && !fIsSet(nciop->ioflags, NC_WRITE))
693 		return EPERM; /* attempt to write readonly file */
694 
695 	/* reclaim space used in move */
696 	if(pxp->slave != NULL)
697 	{
698 		if(pxp->slave->bf_base != NULL)
699 		{
700 			free(pxp->slave->bf_base);
701 			pxp->slave->bf_base = NULL;
702 			pxp->slave->bf_extent = 0;
703 			pxp->slave->bf_offset = OFF_NONE;
704 		}
705 		free(pxp->slave);
706 		pxp->slave = NULL;
707 	}
708 	return px_get(nciop, pxp, offset, extent, rflags, vpp);
709 }
710 
711 
712 /* ARGSUSED */
713 static int
px_double_buffer(ncio * const nciop,off_t to,off_t from,size_t nbytes,int rflags)714 px_double_buffer(ncio *const nciop, off_t to, off_t from,
715 			size_t nbytes, int rflags)
716 {
717 	ncio_px *const pxp = (ncio_px *)nciop->pvt;
718 	int status = ENOERR;
719 	void *src;
720 	void *dest;
721 
722 #if INSTRUMENT
723 fprintf(stderr, "\tdouble_buffr %ld %ld %ld\n",
724 		 (long)to, (long)from, (long)nbytes);
725 #endif
726 	status = px_get(nciop, pxp, to, nbytes, RGN_WRITE,
727 			&dest);
728 	if(status != ENOERR)
729 		return status;
730 
731 	if(pxp->slave == NULL)
732 	{
733 		pxp->slave = (ncio_px *) malloc(sizeof(ncio_px));
734 		if(pxp->slave == NULL)
735 			return ENOMEM;
736 
737 		pxp->slave->blksz = pxp->blksz;
738 		/* pos done below */
739 		pxp->slave->bf_offset = pxp->bf_offset;
740 		pxp->slave->bf_extent = pxp->bf_extent;
741 		pxp->slave->bf_cnt = pxp->bf_cnt;
742 		pxp->slave->bf_base = malloc(2 * pxp->blksz);
743 		if(pxp->slave->bf_base == NULL)
744 			return ENOMEM;
745 		(void) memcpy(pxp->slave->bf_base, pxp->bf_base,
746 			 pxp->bf_extent);
747 		pxp->slave->bf_rflags = 0;
748 		pxp->slave->bf_refcount = 0;
749 		pxp->slave->slave = NULL;
750 	}
751 
752 	pxp->slave->pos = pxp->pos;
753 	status = px_get(nciop, pxp->slave, from, nbytes, 0,
754 			&src);
755 	if(status != ENOERR)
756 		return status;
757 	if(pxp->pos != pxp->slave->pos)
758 	{
759 		/* position changed, sync */
760 		pxp->pos = pxp->slave->pos;
761 	}
762 
763 	(void) memcpy(dest, src, nbytes);
764 
765 	(void)px_rel(pxp->slave, from, 0);
766 	(void)px_rel(pxp, to, RGN_MODIFIED);
767 
768 	return status;
769 }
770 
771 /* Like memmove(), safely move possibly overlapping data.
772 
773    Copy one region to another without making anything available to
774    higher layers. May be just implemented in terms of get() and rel(),
775    or may be tricky to be efficient. Only used in by nc_enddef()
776    after redefinition.
777 
778    nciop - pointer to ncio struct with file info.
779    to - src for move?
780    from - dest for move?
781    nbytes - number of bytes to move.
782    rflags - One of the RGN_* flags defined in ncio.h. The only
783    reasonable flag value is RGN_NOLOCK.
784 */
785 static int
ncio_px_move(ncio * const nciop,off_t to,off_t from,size_t nbytes,int rflags)786 ncio_px_move(ncio *const nciop, off_t to, off_t from,
787 			size_t nbytes, int rflags)
788 {
789 	ncio_px *const pxp = (ncio_px *)nciop->pvt;
790 	int status = ENOERR;
791 	off_t lower;
792 	off_t upper;
793 	char *base;
794 	size_t diff;
795 	size_t extent;
796 
797 	if(to == from)
798 		return ENOERR; /* NOOP */
799 
800 	if(fIsSet(rflags, RGN_WRITE) && !fIsSet(nciop->ioflags, NC_WRITE))
801 		return EPERM; /* attempt to write readonly file */
802 
803 	rflags &= RGN_NOLOCK; /* filter unwanted flags */
804 
805 	if(to > from)
806 	{
807 		/* growing */
808 		lower = from;
809 		upper = to;
810 	}
811 	else
812 	{
813 		/* shrinking */
814 		lower = to;
815 		upper = from;
816 	}
817 	diff = (size_t)(upper - lower);
818 	extent = diff + nbytes;
819 
820 #if INSTRUMENT
821 fprintf(stderr, "ncio_px_move %ld %ld %ld %ld %ld\n",
822 		 (long)to, (long)from, (long)nbytes, (long)lower, (long)extent);
823 #endif
824 	if(extent > pxp->blksz)
825 	{
826 		size_t remaining = nbytes;
827 
828 if(to > from)
829 {
830 		off_t frm = from + nbytes;
831 		off_t toh = to + nbytes;
832 		for(;;)
833 		{
834 			size_t loopextent = MIN(remaining, pxp->blksz);
835 			frm -= loopextent;
836 			toh -= loopextent;
837 
838 			status = px_double_buffer(nciop, toh, frm,
839 				 	loopextent, rflags) ;
840 			if(status != ENOERR)
841 				return status;
842 			remaining -= loopextent;
843 
844 			if(remaining == 0)
845 				break; /* normal loop exit */
846 		}
847 }
848 else
849 {
850 		for(;;)
851 		{
852 			size_t loopextent = MIN(remaining, pxp->blksz);
853 
854 			status = px_double_buffer(nciop, to, from,
855 				 	loopextent, rflags) ;
856 			if(status != ENOERR)
857 				return status;
858 			remaining -= loopextent;
859 
860 			if(remaining == 0)
861 				break; /* normal loop exit */
862 			to += loopextent;
863 			from += loopextent;
864 		}
865 }
866 		return ENOERR;
867 	}
868 
869 #if INSTRUMENT
870 fprintf(stderr, "\tncio_px_move small\n");
871 #endif
872 	status = px_get(nciop, pxp, lower, extent, RGN_WRITE|rflags,
873 			(void **)&base);
874 
875 	if(status != ENOERR)
876 		return status;
877 
878 	if(to > from)
879 		(void) memmove(base + diff, base, nbytes);
880 	else
881 		(void) memmove(base, base + diff, nbytes);
882 
883 	(void) px_rel(pxp, lower, RGN_MODIFIED);
884 
885 	return status;
886 }
887 
888 
889 /* Flush any buffers to disk. May be a no-op on if I/O is unbuffered.
890    This function is used when NC_SHARE is NOT used.
891 */
892 static int
ncio_px_sync(ncio * const nciop)893 ncio_px_sync(ncio *const nciop)
894 {
895 	ncio_px *const pxp = (ncio_px *)nciop->pvt;
896 	int status = ENOERR;
897 	if(fIsSet(pxp->bf_rflags, RGN_MODIFIED))
898 	{
899 		assert(pxp->bf_refcount <= 0);
900 		status = px_pgout(nciop, pxp->bf_offset,
901 			pxp->bf_cnt,
902 			pxp->bf_base, &pxp->pos);
903 		if(status != ENOERR)
904 			return status;
905 		pxp->bf_rflags = 0;
906 	}
907 	else if (!fIsSet(pxp->bf_rflags, RGN_WRITE))
908 	{
909 	    /*
910 	     * The dataset is readonly.  Invalidate the buffers so
911 	     * that the next ncio_px_get() will actually read data.
912 	     */
913 	    pxp->bf_offset = OFF_NONE;
914 	    pxp->bf_cnt = 0;
915 	}
916 	return status;
917 }
918 
919 /* Internal function called at close to
920    free up anything hanging off pvt.
921 */
922 static void
ncio_px_free(void * const pvt)923 ncio_px_free(void *const pvt)
924 {
925 	ncio_px *const pxp = (ncio_px *)pvt;
926 	if(pxp == NULL)
927 		return;
928 
929 	if(pxp->slave != NULL)
930 	{
931 		if(pxp->slave->bf_base != NULL)
932 		{
933 			free(pxp->slave->bf_base);
934 			pxp->slave->bf_base = NULL;
935 			pxp->slave->bf_extent = 0;
936 			pxp->slave->bf_offset = OFF_NONE;
937 		}
938 		free(pxp->slave);
939 		pxp->slave = NULL;
940 	}
941 
942 	if(pxp->bf_base != NULL)
943 	{
944 		free(pxp->bf_base);
945 		pxp->bf_base = NULL;
946 		pxp->bf_extent = 0;
947 		pxp->bf_offset = OFF_NONE;
948 	}
949 }
950 
951 
952 /* This is the second half of the ncio initialization. This is called
953    after the file has actually been opened.
954 
955    The most important thing that happens is the allocation of a block
956    of memory at pxp->bf_base. This is going to be twice the size of
957    the chunksizehint (rounded up to the nearest sizeof(double)) passed
958    in from nc__create or nc__open. The rounded chunksizehint (passed
959    in here in sizehintp) is going to be stored as pxp->blksize.
960 
961    According to our "contract" we are not allowed to ask for an extent
962    larger than this chunksize/sizehint/blksize from the ncio get
963    function.
964 
965    nciop - pointer to the ncio struct
966    sizehintp - pointer to a size hint that will be rounded up and
967    passed back to the caller.
968    isNew - true if this is being called from ncio_create for a new
969    file.
970 */
971 static int
ncio_px_init2(ncio * const nciop,size_t * sizehintp,int isNew)972 ncio_px_init2(ncio *const nciop, size_t *sizehintp, int isNew)
973 {
974 	ncio_px *const pxp = (ncio_px *)nciop->pvt;
975 	const size_t bufsz = 2 * *sizehintp;
976 
977 	assert(nciop->fd >= 0);
978 
979 	pxp->blksz = *sizehintp;
980 
981 	assert(pxp->bf_base == NULL);
982 
983 	/* this is separate allocation because it may grow */
984 	pxp->bf_base = malloc(bufsz);
985 	if(pxp->bf_base == NULL)
986 		return ENOMEM;
987 	/* else */
988 	pxp->bf_cnt = 0;
989 	if(isNew)
990 	{
991 		/* save a read */
992 		pxp->pos = 0;
993 		pxp->bf_offset = 0;
994 		pxp->bf_extent = bufsz;
995 		(void) memset(pxp->bf_base, 0, pxp->bf_extent);
996 	}
997 	return ENOERR;
998 }
999 
1000 
1001 /* This is the first of a two-part initialization of the ncio struct.
1002    Here the rel, get, move, sync, and free function pointers are set
1003    to their POSIX non-NC_SHARE functions (ncio_px_*).
1004 
1005    The ncio_px struct is also partially initialized.
1006 */
1007 static void
ncio_px_init(ncio * const nciop)1008 ncio_px_init(ncio *const nciop)
1009 {
1010 	ncio_px *const pxp = (ncio_px *)nciop->pvt;
1011 
1012 	*((ncio_relfunc **)&nciop->rel) = ncio_px_rel; /* cast away const */
1013 	*((ncio_getfunc **)&nciop->get) = ncio_px_get; /* cast away const */
1014 	*((ncio_movefunc **)&nciop->move) = ncio_px_move; /* cast away const */
1015 	*((ncio_syncfunc **)&nciop->sync) = ncio_px_sync; /* cast away const */
1016 	*((ncio_freefunc **)&nciop->free) = ncio_px_free; /* cast away const */
1017 
1018 	pxp->blksz = 0;
1019 	pxp->pos = -1;
1020 	pxp->bf_offset = OFF_NONE;
1021 	pxp->bf_extent = 0;
1022 	pxp->bf_rflags = 0;
1023 	pxp->bf_refcount = 0;
1024 	pxp->bf_base = NULL;
1025 	pxp->slave = NULL;
1026 
1027 }
1028 
1029 /* Begin spx */
1030 
1031 /* This is the struct that gets hung of ncio->pvt(?) when the NC_SHARE
1032    flag is used.
1033 */
1034 typedef struct ncio_spx {
1035 	off_t pos;
1036 	/* buffer */
1037 	off_t	bf_offset;
1038 	size_t	bf_extent;
1039 	size_t	bf_cnt;
1040 	void	*bf_base;
1041 } ncio_spx;
1042 
1043 
1044 /*ARGSUSED*/
1045 /* This function releases the region specified by offset.
1046 
1047    For POSIX system, with NC_SHARE, this becomes the rel function
1048    pointed to by the ncio rel function pointer. It mearly checks for
1049    file write permission, then calls px_rel to do everything.
1050 
1051    nciop - pointer to ncio struct.
1052 
1053    offset - beginning of region.
1054 
1055    rflags - One of the RGN_* flags defined in ncio.h. If set to
1056    RGN_MODIFIED it means that the data in this region were modified,
1057    and it needs to be written out to the disk immediately (since we
1058    are not buffering with NC_SHARE on).
1059 
1060 */
1061 static int
ncio_spx_rel(ncio * const nciop,off_t offset,int rflags)1062 ncio_spx_rel(ncio *const nciop, off_t offset, int rflags)
1063 {
1064 	ncio_spx *const pxp = (ncio_spx *)nciop->pvt;
1065 	int status = ENOERR;
1066 
1067 	assert(pxp->bf_offset <= offset);
1068 	assert(pxp->bf_cnt != 0);
1069 	assert(pxp->bf_cnt <= pxp->bf_extent);
1070 #ifdef X_ALIGN
1071 	assert(offset < pxp->bf_offset + X_ALIGN);
1072 	assert(pxp->bf_cnt % X_ALIGN == 0 );
1073 #endif
1074 
1075 	if(fIsSet(rflags, RGN_MODIFIED))
1076 	{
1077 		if(!fIsSet(nciop->ioflags, NC_WRITE))
1078 			return EPERM; /* attempt to write readonly file */
1079 
1080 		status = px_pgout(nciop, pxp->bf_offset,
1081 			pxp->bf_cnt,
1082 			pxp->bf_base, &pxp->pos);
1083 		/* if error, invalidate buffer anyway */
1084 	}
1085 	pxp->bf_offset = OFF_NONE;
1086 	pxp->bf_cnt = 0;
1087 	return status;
1088 }
1089 
1090 
1091 /* Request that the region (offset, extent) be made available through
1092    *vpp.
1093 
1094    This function converts a file region specified by an offset and
1095    extent to a memory pointer. The region may be locked until the
1096    corresponding call to rel().
1097 
1098    For POSIX systems, with NC_SHARE.
1099 
1100    nciop - pointer to ncio struct for this file.
1101    offset - offset (from beginning of file?) to the data we want to
1102    read.
1103    extent - the number of bytes we want.
1104    rflags - One of the RGN_* flags defined in ncio.h. May be RGN_NOLOCK.
1105    vpp - handle to point at data when it's been read.
1106 */
1107 static int
ncio_spx_get(ncio * const nciop,off_t offset,size_t extent,int rflags,void ** const vpp)1108 ncio_spx_get(ncio *const nciop,
1109 		off_t offset, size_t extent,
1110 		int rflags,
1111 		void **const vpp)
1112 {
1113 	ncio_spx *const pxp = (ncio_spx *)nciop->pvt;
1114 	int status = ENOERR;
1115 #ifdef X_ALIGN
1116 	size_t rem;
1117 #endif
1118 
1119 	if(fIsSet(rflags, RGN_WRITE) && !fIsSet(nciop->ioflags, NC_WRITE))
1120 		return EPERM; /* attempt to write readonly file */
1121 
1122 	assert(extent != 0);
1123 	assert(extent < X_INT_MAX); /* sanity check */
1124 
1125 	assert(pxp->bf_cnt == 0);
1126 
1127 #ifdef X_ALIGN
1128 	rem = (size_t)(offset % X_ALIGN);
1129 	if(rem != 0)
1130 	{
1131 		offset -= rem;
1132 		extent += rem;
1133 	}
1134 
1135 	{
1136 		const size_t rndup = extent % X_ALIGN;
1137 		if(rndup != 0)
1138 			extent += X_ALIGN - rndup;
1139 	}
1140 
1141 	assert(offset % X_ALIGN == 0);
1142 	assert(extent % X_ALIGN == 0);
1143 #endif
1144 
1145 	if(pxp->bf_extent < extent)
1146 	{
1147 		if(pxp->bf_base != NULL)
1148 		{
1149 			free(pxp->bf_base);
1150 			pxp->bf_base = NULL;
1151 			pxp->bf_extent = 0;
1152 		}
1153 		assert(pxp->bf_extent == 0);
1154 		pxp->bf_base = malloc(extent);
1155 		if(pxp->bf_base == NULL)
1156 			return ENOMEM;
1157 		pxp->bf_extent = extent;
1158 	}
1159 
1160 	status = px_pgin(nciop, offset,
1161 		 extent,
1162 		 pxp->bf_base,
1163 		 &pxp->bf_cnt, &pxp->pos);
1164 	if(status != ENOERR)
1165 		return status;
1166 
1167 	pxp->bf_offset = offset;
1168 
1169 	if(pxp->bf_cnt < extent)
1170 		pxp->bf_cnt = extent;
1171 
1172 #ifdef X_ALIGN
1173 	*vpp = (char *)pxp->bf_base + rem;
1174 #else
1175 	*vpp = pxp->bf_base;
1176 #endif
1177 	return ENOERR;
1178 }
1179 
1180 
1181 #if 0
1182 /*ARGSUSED*/
1183 static int
1184 strategy(ncio *const nciop, off_t to, off_t offset,
1185 			size_t extent, int rflags)
1186 {
1187 	static ncio_spx pxp[1];
1188 	int status = ENOERR;
1189 #ifdef X_ALIGN
1190 	size_t rem;
1191 #endif
1192 
1193 	assert(extent != 0);
1194 	assert(extent < X_INT_MAX); /* sanity check */
1195 #if INSTRUMENT
1196 fprintf(stderr, "strategy %ld at %ld to %ld\n",
1197 	 (long)extent, (long)offset, (long)to);
1198 #endif
1199 
1200 
1201 #ifdef X_ALIGN
1202 	rem = (size_t)(offset % X_ALIGN);
1203 	if(rem != 0)
1204 	{
1205 		offset -= rem;
1206 		extent += rem;
1207 	}
1208 
1209 	{
1210 		const size_t rndup = extent % X_ALIGN;
1211 		if(rndup != 0)
1212 			extent += X_ALIGN - rndup;
1213 	}
1214 
1215 	assert(offset % X_ALIGN == 0);
1216 	assert(extent % X_ALIGN == 0);
1217 #endif
1218 
1219 	if(pxp->bf_extent < extent)
1220 	{
1221 		if(pxp->bf_base != NULL)
1222 		{
1223 			free(pxp->bf_base);
1224 			pxp->bf_base = NULL;
1225 			pxp->bf_extent = 0;
1226 		}
1227 		assert(pxp->bf_extent == 0);
1228 		pxp->bf_base = malloc(extent);
1229 		if(pxp->bf_base == NULL)
1230 			return ENOMEM;
1231 		pxp->bf_extent = extent;
1232 	}
1233 
1234 	status = px_pgin(nciop, offset,
1235 		 extent,
1236 		 pxp->bf_base,
1237 		 &pxp->bf_cnt, &pxp->pos);
1238 	if(status != ENOERR)
1239 		return status;
1240 
1241 	pxp->bf_offset = to; /* TODO: XALIGN */
1242 
1243 	if(pxp->bf_cnt < extent)
1244 		pxp->bf_cnt = extent;
1245 
1246 	status = px_pgout(nciop, pxp->bf_offset,
1247 		pxp->bf_cnt,
1248 		pxp->bf_base, &pxp->pos);
1249 	/* if error, invalidate buffer anyway */
1250 	pxp->bf_offset = OFF_NONE;
1251 	pxp->bf_cnt = 0;
1252 	return status;
1253 }
1254 #endif
1255 
1256 /* Copy one region to another without making anything available to
1257    higher layers. May be just implemented in terms of get() and rel(),
1258    or may be tricky to be efficient.  Only used in by nc_enddef()
1259    after redefinition.
1260 
1261    nciop - pointer to ncio struct for this file.
1262    to - dest for move?
1263    from - src for move?
1264    nbytes - number of bytes to move.
1265    rflags - One of the RGN_* flags defined in ncio.h.
1266 */
1267 static int
ncio_spx_move(ncio * const nciop,off_t to,off_t from,size_t nbytes,int rflags)1268 ncio_spx_move(ncio *const nciop, off_t to, off_t from,
1269 			size_t nbytes, int rflags)
1270 {
1271 	int status = ENOERR;
1272 	off_t lower = from;
1273 	off_t upper = to;
1274 	char *base;
1275 	size_t diff;
1276 	size_t extent;
1277 
1278 	rflags &= RGN_NOLOCK; /* filter unwanted flags */
1279 
1280 	if(to == from)
1281 		return ENOERR; /* NOOP */
1282 
1283 	if(to > from)
1284 	{
1285 		/* growing */
1286 		lower = from;
1287 		upper = to;
1288 	}
1289 	else
1290 	{
1291 		/* shrinking */
1292 		lower = to;
1293 		upper = from;
1294 	}
1295 
1296 	diff = (size_t)(upper - lower);
1297 	extent = diff + nbytes;
1298 
1299 	status = ncio_spx_get(nciop, lower, extent, RGN_WRITE|rflags,
1300 			(void **)&base);
1301 
1302 	if(status != ENOERR)
1303 		return status;
1304 
1305 	if(to > from)
1306 		(void) memmove(base + diff, base, nbytes);
1307 	else
1308 		(void) memmove(base, base + diff, nbytes);
1309 
1310 	(void) ncio_spx_rel(nciop, lower, RGN_MODIFIED);
1311 
1312 	return status;
1313 }
1314 
1315 
1316 /*ARGSUSED*/
1317 /* Flush any buffers to disk. May be a no-op on if I/O is unbuffered.
1318 */
1319 static int
ncio_spx_sync(ncio * const nciop)1320 ncio_spx_sync(ncio *const nciop)
1321 {
1322 	/* NOOP */
1323 	return ENOERR;
1324 }
1325 
1326 static void
ncio_spx_free(void * const pvt)1327 ncio_spx_free(void *const pvt)
1328 {
1329 	ncio_spx *const pxp = (ncio_spx *)pvt;
1330 	if(pxp == NULL)
1331 		return;
1332 
1333 	if(pxp->bf_base != NULL)
1334 	{
1335 		free(pxp->bf_base);
1336 		pxp->bf_base = NULL;
1337 		pxp->bf_offset = OFF_NONE;
1338 		pxp->bf_extent = 0;
1339 		pxp->bf_cnt = 0;
1340 	}
1341 }
1342 
1343 
1344 /* This does the second half of the ncio_spx struct initialization for
1345    POSIX systems, with NC_SHARE on.
1346 
1347    nciop - pointer to ncio struct for this file. File has been opened.
1348    sizehintp - pointer to a size which will be rounded up to the
1349    nearest 8-byt boundary and then used as the max size "chunk" (or
1350    page) to read from the file.
1351 */
1352 static int
ncio_spx_init2(ncio * const nciop,const size_t * const sizehintp)1353 ncio_spx_init2(ncio *const nciop, const size_t *const sizehintp)
1354 {
1355 	ncio_spx *const pxp = (ncio_spx *)nciop->pvt;
1356 
1357 	assert(nciop->fd >= 0);
1358 
1359 	pxp->bf_extent = *sizehintp;
1360 
1361 	assert(pxp->bf_base == NULL);
1362 
1363 	/* this is separate allocation because it may grow */
1364 	pxp->bf_base = malloc(pxp->bf_extent);
1365 	if(pxp->bf_base == NULL)
1366 	{
1367 		pxp->bf_extent = 0;
1368 		return ENOMEM;
1369 	}
1370 	/* else */
1371 	return ENOERR;
1372 }
1373 
1374 
1375 /* First half of init for ncio_spx struct, setting the rel, get, move,
1376    snyc, and free function pointers to the NC_SHARE versions of these
1377    functions (i.e. the ncio_spx_* functions).
1378 */
1379 static void
ncio_spx_init(ncio * const nciop)1380 ncio_spx_init(ncio *const nciop)
1381 {
1382 	ncio_spx *const pxp = (ncio_spx *)nciop->pvt;
1383 
1384 	*((ncio_relfunc **)&nciop->rel) = ncio_spx_rel; /* cast away const */
1385 	*((ncio_getfunc **)&nciop->get) = ncio_spx_get; /* cast away const */
1386 	*((ncio_movefunc **)&nciop->move) = ncio_spx_move; /* cast away const */
1387 	*((ncio_syncfunc **)&nciop->sync) = ncio_spx_sync; /* cast away const */
1388 	*((ncio_freefunc **)&nciop->free) = ncio_spx_free; /* cast away const */
1389 
1390 	pxp->pos = -1;
1391 	pxp->bf_offset = OFF_NONE;
1392 	pxp->bf_extent = 0;
1393 	pxp->bf_cnt = 0;
1394 	pxp->bf_base = NULL;
1395 }
1396 
1397 
1398 /* */
1399 
1400 /* This will call whatever free function is attached to the free
1401    function pointer in ncio. It's called from ncio_close, and from
1402    ncio_open and ncio_create when an error occurs that the file
1403    metadata must be freed.
1404 */
1405 static void
ncio_free(ncio * nciop)1406 ncio_free(ncio *nciop)
1407 {
1408 	if(nciop == NULL)
1409 		return;
1410 
1411 	if(nciop->free != NULL)
1412 		nciop->free(nciop->pvt);
1413 
1414 	free(nciop);
1415 }
1416 
1417 
1418 /* Create a new ncio struct to hold info about the file. This will
1419    create and init the ncio_px or ncio_spx struct (the latter if
1420    NC_SHARE is used.)
1421 */
1422 static ncio *
ncio_new(const char * path,int ioflags)1423 ncio_new(const char *path, int ioflags)
1424 {
1425 	size_t sz_ncio = M_RNDUP(sizeof(ncio));
1426 	size_t sz_path = M_RNDUP(strlen(path) +1);
1427 	size_t sz_ncio_pvt;
1428 	ncio *nciop;
1429 
1430 #if ALWAYS_NC_SHARE /* DEBUG */
1431 	fSet(ioflags, NC_SHARE);
1432 #endif
1433 
1434 	if(fIsSet(ioflags, NC_SHARE))
1435 		sz_ncio_pvt = sizeof(ncio_spx);
1436 	else
1437 		sz_ncio_pvt = sizeof(ncio_px);
1438 
1439 	nciop = (ncio *) malloc(sz_ncio + sz_path + sz_ncio_pvt);
1440 	if(nciop == NULL)
1441 		return NULL;
1442 
1443 	nciop->ioflags = ioflags;
1444 	*((int *)&nciop->fd) = -1; /* cast away const */
1445 
1446 	nciop->path = (char *) ((char *)nciop + sz_ncio);
1447 	(void) strcpy((char *)nciop->path, path); /* cast away const */
1448 
1449 				/* cast away const */
1450 	*((void **)&nciop->pvt) = (void *)(nciop->path + sz_path);
1451 
1452 	if(fIsSet(ioflags, NC_SHARE))
1453 		ncio_spx_init(nciop);
1454 	else
1455 		ncio_px_init(nciop);
1456 
1457 	return nciop;
1458 }
1459 
1460 
1461 /* Public below this point */
1462 #ifndef NCIO_MINBLOCKSIZE
1463 #define NCIO_MINBLOCKSIZE 256
1464 #endif
1465 #ifndef NCIO_MAXBLOCKSIZE
1466 #define NCIO_MAXBLOCKSIZE 268435456 /* sanity check, about X_SIZE_T_MAX/8 */
1467 #endif
1468 
1469 #ifdef S_IRUSR
1470 #define NC_DEFAULT_CREAT_MODE \
1471         (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH) /* 0666 */
1472 
1473 #else
1474 #define NC_DEFAULT_CREAT_MODE 0666
1475 #endif
1476 
1477 /* Create a file, and the ncio struct to go with it. This funtion is
1478    only called from nc__create_mp.
1479 
1480    path - path of file to create.
1481    ioflags - flags from nc_create
1482    initialsz - From the netcdf man page: "The argument
1483    Iinitialsize sets the initial size of the file at creation time."
1484    igeto -
1485    igetsz -
1486    sizehintp - this eventually goes into pxp->blksz and is the size of
1487    a page of data for buffered reads and writes.
1488    nciopp - pointer to a pointer that will get location of newly
1489    created and inited ncio struct.
1490    igetvpp - pointer to pointer which will get the location of ?
1491 */
1492 int
ncio_create(const char * path,int ioflags,size_t initialsz,off_t igeto,size_t igetsz,size_t * sizehintp,ncio ** nciopp,void ** const igetvpp)1493 ncio_create(const char *path, int ioflags,
1494 	size_t initialsz,
1495 	off_t igeto, size_t igetsz, size_t *sizehintp,
1496 	ncio **nciopp, void **const igetvpp)
1497 {
1498 	ncio *nciop;
1499 	int oflags = (O_RDWR|O_CREAT);
1500 	int fd;
1501 	int status;
1502 
1503 	if(initialsz < (size_t)igeto + igetsz)
1504 		initialsz = (size_t)igeto + igetsz;
1505 
1506 	fSet(ioflags, NC_WRITE);
1507 
1508 	if(path == NULL || *path == 0)
1509 		return EINVAL;
1510 
1511 	nciop = ncio_new(path, ioflags);
1512 	if(nciop == NULL)
1513 		return ENOMEM;
1514 
1515 	if(fIsSet(ioflags, NC_NOCLOBBER))
1516 		fSet(oflags, O_EXCL);
1517 	else
1518 		fSet(oflags, O_TRUNC);
1519 #ifdef O_BINARY
1520 	fSet(oflags, O_BINARY);
1521 #endif
1522 #ifdef vms
1523 	fd = open(path, oflags, NC_DEFAULT_CREAT_MODE, "ctx=stm");
1524 #else
1525 	/* Should we mess with the mode based on NC_SHARE ?? */
1526 	fd = open(path, oflags, NC_DEFAULT_CREAT_MODE);
1527 #endif
1528 #if 0
1529 	(void) fprintf(stderr, "ncio_create(): path=\"%s\"\n", path);
1530 	(void) fprintf(stderr, "ncio_create(): oflags=0x%x\n", oflags);
1531 #endif
1532 	if(fd < 0)
1533 	{
1534 		status = errno;
1535 		goto unwind_new;
1536 	}
1537 	*((int *)&nciop->fd) = fd; /* cast away const */
1538 
1539 	if(*sizehintp < NCIO_MINBLOCKSIZE)
1540 	{
1541 		/* Use default */
1542 		*sizehintp = blksize(fd);
1543 	}
1544 	else if(*sizehintp >= NCIO_MAXBLOCKSIZE)
1545 	{
1546 		/* Use maximum allowed value */
1547 		*sizehintp = NCIO_MAXBLOCKSIZE;
1548 	}
1549 	else
1550 	{
1551 		*sizehintp = M_RNDUP(*sizehintp);
1552 	}
1553 
1554 	if(fIsSet(nciop->ioflags, NC_SHARE))
1555 		status = ncio_spx_init2(nciop, sizehintp);
1556 	else
1557 		status = ncio_px_init2(nciop, sizehintp, 1);
1558 
1559 	if(status != ENOERR)
1560 		goto unwind_open;
1561 
1562 	if(initialsz != 0)
1563 	{
1564 		status = fgrow(fd, (off_t)initialsz);
1565 		if(status != ENOERR)
1566 			goto unwind_open;
1567 	}
1568 
1569 	if(igetsz != 0)
1570 	{
1571 		status = nciop->get(nciop,
1572 				igeto, igetsz,
1573                         	RGN_WRITE,
1574                         	igetvpp);
1575 		if(status != ENOERR)
1576 			goto unwind_open;
1577 	}
1578 
1579 	*nciopp = nciop;
1580 	return ENOERR;
1581 
1582 unwind_open:
1583 	(void) close(fd);
1584 	/* ?? unlink */
1585 	/*FALLTHRU*/
1586 unwind_new:
1587 	ncio_free(nciop);
1588 	return status;
1589 }
1590 
1591 
1592 /* This function opens the data file. It is only called from nc.c,
1593    from nc__open_mp and nc_delete_mp.
1594 
1595    path - path of data file.
1596 
1597    ioflags - flags passed into nc_open.
1598 
1599    igeto - looks like this function can do an initial page get, and
1600    igeto is going to be the offset for that. But it appears to be
1601    unused
1602 
1603    igetsz - the size in bytes of initial page get (a.k.a. extent). Not
1604    ever used in the library.
1605 
1606    sizehintp - pointer to sizehint parameter from nc__open or
1607    nc__create. This is used to set pxp->blksz.
1608 
1609    Here's what the man page has to say:
1610 
1611    "The argument referenced by chunksize controls a space versus time
1612    tradeoff, memory allocated in the netcdf library versus number of
1613    system calls.
1614 
1615    Because of internal requirements, the value may not be set to
1616    exactly the value requested. The actual value chosen is returned by reference.
1617 
1618    Using the value NC_SIZEHINT_DEFAULT causes the library to choose a
1619    default. How the system choses the default depends on the
1620    system. On many systems, the "preferred I/O block size" is
1621    available from the stat() system call, struct stat member
1622    st_blksize. If this is available it is used. Lacking that, twice
1623    the system pagesize is used. Lacking a call to discover the system
1624    pagesize, we just set default chunksize to 8192.
1625 
1626    The chunksize is a property of a given open netcdf descriptor ncid,
1627    it is not a persistent property of the netcdf dataset."
1628 
1629    nciopp - pointer to pointer that will get address of newly created
1630    and inited ncio struct.
1631 
1632    igetvpp - handle to pass back pointer to data from inital page
1633    read, if this were ever used, which it isn't.
1634 */
1635 int
ncio_open(const char * path,int ioflags,off_t igeto,size_t igetsz,size_t * sizehintp,ncio ** nciopp,void ** const igetvpp)1636 ncio_open(const char *path,
1637 	int ioflags,
1638 	off_t igeto, size_t igetsz, size_t *sizehintp,
1639 	ncio **nciopp, void **const igetvpp)
1640 {
1641 	ncio *nciop;
1642 	int oflags = fIsSet(ioflags, NC_WRITE) ? O_RDWR : O_RDONLY;
1643 	int fd;
1644 	int status;
1645 
1646 	if(path == NULL || *path == 0)
1647 		return EINVAL;
1648 
1649 	nciop = ncio_new(path, ioflags);
1650 	if(nciop == NULL)
1651 		return ENOMEM;
1652 
1653 #ifdef O_BINARY
1654 	fSet(oflags, O_BINARY);
1655 #endif
1656 #ifdef vms
1657 	fd = open(path, oflags, 0, "ctx=stm");
1658 #else
1659 	fd = open(path, oflags, 0);
1660 #endif
1661 	if(fd < 0)
1662 	{
1663 		status = errno;
1664 		goto unwind_new;
1665 	}
1666 	*((int *)&nciop->fd) = fd; /* cast away const */
1667 
1668 	if(*sizehintp < NCIO_MINBLOCKSIZE)
1669 	{
1670 		/* Use default */
1671 		*sizehintp = blksize(fd);
1672 	}
1673 	else if(*sizehintp >= NCIO_MAXBLOCKSIZE)
1674 	{
1675 		/* Use maximum allowed value */
1676 		*sizehintp = NCIO_MAXBLOCKSIZE;
1677 	}
1678 	else
1679 	{
1680 		*sizehintp = M_RNDUP(*sizehintp);
1681 	}
1682 
1683 	if(fIsSet(nciop->ioflags, NC_SHARE))
1684 		status = ncio_spx_init2(nciop, sizehintp);
1685 	else
1686 		status = ncio_px_init2(nciop, sizehintp, 0);
1687 
1688 	if(status != ENOERR)
1689 		goto unwind_open;
1690 
1691 	if(igetsz != 0)
1692 	{
1693 		status = nciop->get(nciop,
1694 				igeto, igetsz,
1695                         	0,
1696                         	igetvpp);
1697 		if(status != ENOERR)
1698 			goto unwind_open;
1699 	}
1700 
1701 	*nciopp = nciop;
1702 	return ENOERR;
1703 
1704 unwind_open:
1705 	(void) close(fd);
1706 	/*FALLTHRU*/
1707 unwind_new:
1708 	ncio_free(nciop);
1709 	return status;
1710 }
1711 
1712 /*
1713  * Get file size in bytes.
1714  */
1715 int
ncio_filesize(ncio * nciop,off_t * filesizep)1716 ncio_filesize(ncio *nciop, off_t *filesizep)
1717 {
1718     struct stat sb;
1719 
1720     assert(nciop != NULL);
1721     if (fstat(nciop->fd, &sb) < 0)
1722 	return errno;
1723     *filesizep = sb.st_size;
1724     return ENOERR;
1725 }
1726 
1727 /*
1728  * Sync any changes to disk, then truncate or extend file so its size
1729  * is length.  This is only intended to be called before close, if the
1730  * file is open for writing and the actual size does not match the
1731  * calculated size, perhaps as the result of having been previously
1732  * written in NOFILL mode.
1733  */
1734 int
ncio_pad_length(ncio * nciop,off_t length)1735 ncio_pad_length(ncio *nciop, off_t length)
1736 {
1737 	int status = ENOERR;
1738 
1739 	if(nciop == NULL)
1740 		return EINVAL;
1741 
1742 	if(!fIsSet(nciop->ioflags, NC_WRITE))
1743 	        return EPERM; /* attempt to write readonly file */
1744 
1745 	status = nciop->sync(nciop);
1746 	if(status != ENOERR)
1747 	        return status;
1748 
1749  	status = fgrow2(nciop->fd, length);
1750  	if(status != ENOERR)
1751 	        return status;
1752 	return ENOERR;
1753 }
1754 
1755 
1756 /* Write out any dirty buffers to disk and
1757    ensure that next read will get data from disk.
1758 
1759    Sync any changes, then close the open file associated with the ncio
1760    struct, and free its memory.
1761 
1762    nciop - pointer to ncio to close.
1763 
1764    doUnlink - if true, unlink file
1765 */
1766 int
ncio_close(ncio * nciop,int doUnlink)1767 ncio_close(ncio *nciop, int doUnlink)
1768 {
1769 	int status = ENOERR;
1770 
1771 	if(nciop == NULL)
1772 		return EINVAL;
1773 
1774 	status = nciop->sync(nciop);
1775 
1776 	(void) close(nciop->fd);
1777 
1778 	if(doUnlink)
1779 		(void) unlink(nciop->path);
1780 
1781 	ncio_free(nciop);
1782 
1783 	return status;
1784 }
1785