1 /*
2  * Copyright (c) 2003 Gunnar Ritter
3  *
4  * This software is provided 'as-is', without any express or implied
5  * warranty. In no event will the authors be held liable for any damages
6  * arising from the use of this software.
7  *
8  * Permission is granted to anyone to use this software for any purpose,
9  * including commercial applications, and to alter it and redistribute
10  * it freely, subject to the following restrictions:
11  *
12  * 1. The origin of this software must not be misrepresented; you must not
13  *    claim that you wrote the original software. If you use this software
14  *    in a product, an acknowledgment in the product documentation would be
15  *    appreciated but is not required.
16  *
17  * 2. Altered source versions must be plainly marked as such, and must not be
18  *    misrepresented as being the original software.
19  *
20  * 3. This notice may not be removed or altered from any source distribution.
21  */
22 /*	Sccsid @(#)iblok.h	1.5 (gritter) 7/16/04	*/
23 
24 /*
25  * Functions to read a file sequentially.
26  */
27 
28 #include	<sys/types.h>		/* for off_t, pid_t */
29 #include	<stdio.h>		/* for EOF */
30 #include	<wchar.h>		/* for wchar_t */
31 #include	<limits.h>		/* for MB_LEN_MAX */
32 
33 struct	iblok {
34 	long long	ib_endoff;	/* offset of endc from start of file */
35 	char	ib_mbuf[MB_LEN_MAX+1];	/* multibyte overflow buffer */
36 	char	*ib_mcur;		/* next byte to read in ib_mbuf */
37 	char	*ib_mend;		/* one beyond last byte in ib_mbuf */
38 	char	*ib_blk;		/* buffered data */
39 	char	*ib_cur;		/* next character in ib_blk */
40 	char	*ib_end;		/* one beyond last byte in ib_blk */
41 	int	ib_fd;			/* input file descriptor */
42 	int	ib_errno;		/* errno on error, or 0 */
43 	int	ib_incompl;		/* had an incomplete last line */
44 	int	ib_mb_cur_max;		/* MB_CUR_MAX at time of ib_alloc() */
45 	int	ib_seekable;		/* had a successful lseek() */
46 	pid_t	ib_pid;			/* child from ib_popen() */
47 	unsigned	ib_blksize;	/* buffer size */
48 };
49 
50 /*
51  * Allocate an input buffer with file descriptor fd. blksize may be
52  * either the size of a buffer to allocate in ib_blk, or 0 if the
53  * size is determined automatically. On error, NULL is returned and
54  * errno indicates the offending error.
55  */
56 extern struct iblok	*ib_alloc(int fd, unsigned blksize);
57 
58 /*
59  * Deallocate the passed input buffer. The file descriptor is not
60  * closed.
61  */
62 extern void		ib_free(struct iblok *ip);
63 
64 /*
65  * Open file name and do ib_alloc() on the descriptor.
66  */
67 extern struct iblok	*ib_open(const char *name, unsigned blksize);
68 
69 /*
70  * Close the file descriptor in ip and do ib_free(). Return value is
71  * the result of close().
72  */
73 extern int		ib_close(struct iblok *ip);
74 
75 /*
76  * A workalike of popen(cmd, "r") using iblok facilities.
77  */
78 extern struct iblok	*ib_popen(const char *cmd, unsigned blksize);
79 
80 /*
81  * Close an iblok opened with ib_popen().
82  */
83 extern int		ib_pclose(struct iblok *ip);
84 
85 /*
86  * Read new input buffer. Returns the next character (or EOF) and advances
87  * ib_cur by one above the bottom of the buffer.
88  */
89 extern int		ib_read(struct iblok *ip);
90 
91 /*
92  * Get next character. Return EOF at end-of-file or read error.
93  */
94 #define	ib_get(ip)	((ip)->ib_cur < (ip)->ib_end ? *(ip)->ib_cur++ & 0377 :\
95 				ib_read(ip))
96 
97 /*
98  * Unget a character. Note that this implementation alters the read buffer.
99  * Caution: Calling this macro more than once might underflow ib_blk.
100  */
101 #define ib_unget(c, ip)	(*(--(ip)->ib_cur) = (char)(c))
102 
103 /*
104  * Get file offset of last read character.
105  */
106 #define	ib_offs(ip)	((ip)->ib_endoff - ((ip)->ib_end - (ip)->ib_cur - 1))
107 
108 /*
109  * Read a wide character using ib_get() facilities. *wc is used to store
110  * the wide character, or WEOF if an invalid byte sequence was found.
111  * The number of bytes consumed is stored in *len. Return value is the
112  * corresponding byte sequence, or NULL at end-of-file in input.
113  *
114  * Note that it is not possible to mix calls to ib_getw() with calls to
115  * ib_get(), ib_unget() or ib_seek() unless the last character read by
116  * ib_getw() was L'\n'.
117  */
118 extern char	*ib_getw(struct iblok *ip, wint_t *wc, int *len);
119 
120 /*
121  * Get a line from ip, returning the line length. Further arguments are either
122  * the pointer to a malloc()ed buffer and a pointer to its size, or (NULL, 0)
123  * if ib_getlin() shall allocate the buffer itselves. ib_getlin() will use
124  * the realloc-style function reallc() to increase the buffer if necessary;
125  * this function is expected never to fail (i. e., it must longjmp() or abort
126  * if it cannot allocate a buffer of the demanded size).
127  * On end-of-file or error, 0 is returned.
128  */
129 extern size_t	ib_getlin(struct iblok *ip, char **line, size_t *alcd,
130 			void *(*reallc)(void *, size_t));
131 
132 /*
133  * Like lseek().
134  */
135 extern off_t	ib_seek(struct iblok *ip, off_t off, int whence);
136