1 /* Buffered input from a source.
2  *
3  * J.Cupitt, 18/11/19
4  */
5 
6 /*
7 
8     This file is part of VIPS.
9 
10     VIPS is free software; you can redistribute it and/or modify
11     it under the terms of the GNU Lesser General Public License as published by
12     the Free Software Foundation; either version 2 of the License, or
13     (at your option) any later version.
14 
15     This program is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18     GNU Lesser General Public License for more details.
19 
20     You should have received a copy of the GNU Lesser General Public License
21     along with this program; if not, write to the Free Software
22     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23     02110-1301  USA
24 
25  */
26 
27 /*
28 
29     These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
30 
31  */
32 
33 /*
34 #define VIPS_DEBUG
35  */
36 
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif /*HAVE_CONFIG_H*/
40 #include <vips/intl.h>
41 
42 #include <stdio.h>
43 #include <ctype.h>
44 #include <stdlib.h>
45 #ifdef HAVE_UNISTD_H
46 #include <unistd.h>
47 #endif /*HAVE_UNISTD_H*/
48 #include <string.h>
49 #include <errno.h>
50 #include <sys/types.h>
51 #include <sys/stat.h>
52 #include <fcntl.h>
53 #include <unistd.h>
54 
55 #include <vips/vips.h>
56 #include <vips/internal.h>
57 #include <vips/debug.h>
58 
59 /**
60  * SECTION: sbuf
61  * @short_description: buffered read from a source
62  * @stability: Stable
63  * @see_also: <link linkend="libvips-foreign">foreign</link>
64  * @include: vips/vips.h
65  * @title: VipsSbuf
66  *
67  * #VipsSbuf wraps up a #VipsSource and provides a set of calls for
68  * text-oriented buffered reading. You can fetch lines of text, skip
69  * whitespace, and so on.
70  *
71  * It is useful for implementing things like CSV readers, for example.
72  */
73 
74 G_DEFINE_TYPE( VipsSbuf, vips_sbuf, VIPS_TYPE_OBJECT );
75 
76 static void
vips_sbuf_class_init(VipsSbufClass * class)77 vips_sbuf_class_init( VipsSbufClass *class )
78 {
79 	VipsObjectClass *object_class = VIPS_OBJECT_CLASS( class );
80 	GObjectClass *gobject_class = G_OBJECT_CLASS( class );
81 
82 	gobject_class->set_property = vips_object_set_property;
83 	gobject_class->get_property = vips_object_get_property;
84 
85 	object_class->nickname = "sbuf";
86 	object_class->description = _( "buffered source" );
87 
88 	VIPS_ARG_OBJECT( class, "input", 1,
89 		_( "Input" ),
90 		_( "Source to load from" ),
91 		VIPS_ARGUMENT_REQUIRED_INPUT,
92 		G_STRUCT_OFFSET( VipsSbuf, source ),
93 		VIPS_TYPE_SOURCE );
94 
95 }
96 
97 static void
vips_sbuf_init(VipsSbuf * sbuf)98 vips_sbuf_init( VipsSbuf *sbuf )
99 {
100 	sbuf->read_point = 0;
101 	sbuf->chars_in_buffer = 0;
102 	sbuf->input_buffer[0] = '\0';
103 }
104 
105 /**
106  * vips_sbuf_new_from_source:
107  * @source: source to operate on
108  *
109  * Create a VipsSbuf wrapping a source.
110  *
111  * Returns: a new #VipsSbuf
112  */
113 VipsSbuf *
vips_sbuf_new_from_source(VipsSource * source)114 vips_sbuf_new_from_source( VipsSource *source )
115 {
116 	VipsSbuf *sbuf;
117 
118 	g_assert( source );
119 
120 	sbuf = VIPS_SBUF( g_object_new( VIPS_TYPE_SBUF,
121 		"input", source,
122 		NULL ) );
123 
124 	if( vips_object_build( VIPS_OBJECT( sbuf ) ) ) {
125 		VIPS_UNREF( sbuf );
126 		return( NULL );
127 	}
128 
129 	return( sbuf );
130 }
131 
132 /**
133  * vips_sbuf_unbuffer:
134  * @sbuf: source to operate on
135  *
136  * Discard the input buffer and reset the read point. You must call this
137  * before using read or seek on the underlying #VipsSource class.
138  */
139 void
vips_sbuf_unbuffer(VipsSbuf * sbuf)140 vips_sbuf_unbuffer( VipsSbuf *sbuf )
141 {
142 	/* We'd read ahead a little way -- seek backwards by that amount.
143 	 */
144 	vips_source_seek( sbuf->source,
145 		sbuf->read_point - sbuf->chars_in_buffer, SEEK_CUR );
146 	sbuf->read_point = 0;
147 	sbuf->chars_in_buffer = 0;
148 }
149 
150 /* Returns -1 on error, 0 on EOF, otherwise bytes read.
151  */
152 static gint64
vips_sbuf_refill(VipsSbuf * sbuf)153 vips_sbuf_refill( VipsSbuf *sbuf )
154 {
155 	gint64 bytes_read;
156 
157 	VIPS_DEBUG_MSG( "vips_sbuf_refill:\n" );
158 
159 	/* We should not discard any unread bytes.
160 	 */
161 	g_assert( sbuf->read_point == sbuf->chars_in_buffer );
162 
163 	bytes_read = vips_source_read( sbuf->source,
164 		sbuf->input_buffer, VIPS_SBUF_BUFFER_SIZE );
165 	if( bytes_read == -1 )
166 		return( -1 );
167 
168 	sbuf->read_point = 0;
169 	sbuf->chars_in_buffer = bytes_read;
170 
171 	/* Always add a null byte so we can use strchr() etc. on lines. This is
172 	 * safe because input_buffer is VIPS_SBUF_BUFFER_SIZE + 1 bytes.
173 	 */
174 	sbuf->input_buffer[bytes_read] = '\0';
175 
176 	return( bytes_read );
177 }
178 
179 /**
180  * vips_sbuf_getc:
181  * @sbuf: source to operate on
182  *
183  * Fetch the next character from the source.
184  *
185  * If you can, use the macro VIPS_SBUF_GETC() instead for speed.
186  *
187  * Returns: the next char from @sbuf, -1 on read error or EOF.
188  */
189 int
vips_sbuf_getc(VipsSbuf * sbuf)190 vips_sbuf_getc( VipsSbuf *sbuf )
191 {
192 	if( sbuf->read_point == sbuf->chars_in_buffer &&
193 		vips_sbuf_refill( sbuf ) <= 0 )
194 		return( -1 );
195 
196 	g_assert( sbuf->read_point < sbuf->chars_in_buffer );
197 
198 	return( sbuf->input_buffer[sbuf->read_point++] );
199 }
200 
201 /**
202  * VIPS_SBUF_GETC:
203  * @sbuf: source to operate on
204  *
205  * Fetch the next character from the source.
206  *
207  * Returns: the next char from @sbuf, -1 on read error or EOF.
208  */
209 
210 /**
211  * vips_sbuf_ungetc:
212  * @sbuf: source to operate on
213  *
214  * The opposite of vips_sbuf_getc(): undo the previous getc.
215  *
216  * unget more than one character is undefined. Unget at the start of the file
217  * does nothing.
218  *
219  * If you can, use the macro VIPS_SBUF_UNGETC() instead for speed.
220  */
221 void
vips_sbuf_ungetc(VipsSbuf * sbuf)222 vips_sbuf_ungetc( VipsSbuf *sbuf )
223 {
224 	if( sbuf->read_point > 0 )
225 		sbuf->read_point -= 1;
226 }
227 
228 /**
229  * VIPS_SBUF_UNGETC:
230  * @sbuf: source to operate on
231  *
232  * The opposite of vips_sbuf_getc(): undo the previous getc.
233  *
234  * unget more than one character is undefined. Unget at the start of the file
235  * does nothing.
236  */
237 
238 /**
239  * vips_sbuf_require:
240  * @sbuf: source to operate on
241  * @require: make sure we have at least this many chars available
242  *
243  * Make sure there are at least @require bytes of readahead available.
244  *
245  * Returns: 0 on success, -1 on error or EOF.
246  */
247 int
vips_sbuf_require(VipsSbuf * sbuf,int require)248 vips_sbuf_require( VipsSbuf *sbuf, int require )
249 {
250 	g_assert( require < VIPS_SBUF_BUFFER_SIZE );
251 	g_assert( sbuf->chars_in_buffer >= 0 );
252 	g_assert( sbuf->chars_in_buffer <= VIPS_SBUF_BUFFER_SIZE );
253 	g_assert( sbuf->read_point >= 0 );
254 	g_assert( sbuf->read_point <= sbuf->chars_in_buffer );
255 
256 	VIPS_DEBUG_MSG( "vips_sbuf_require: %d\n", require );
257 
258 	if( sbuf->read_point + require > sbuf->chars_in_buffer ) {
259 		/* Areas can overlap, so we must memmove().
260 		 */
261 		memmove( sbuf->input_buffer,
262 			sbuf->input_buffer + sbuf->read_point,
263 			sbuf->chars_in_buffer - sbuf->read_point );
264 		sbuf->chars_in_buffer -= sbuf->read_point;
265 		sbuf->read_point = 0;
266 
267 		while( require > sbuf->chars_in_buffer ) {
268 			unsigned char *to = sbuf->input_buffer +
269 				sbuf->chars_in_buffer;
270 			int space_available =
271 				VIPS_SBUF_BUFFER_SIZE -
272 				sbuf->chars_in_buffer;
273 			size_t bytes_read;
274 
275 			if( (bytes_read = vips_source_read( sbuf->source,
276 				to, space_available )) == -1 )
277 				return( -1 );
278 			if( bytes_read == 0 ) {
279 				vips_error(
280 					vips_connection_nick( VIPS_CONNECTION(
281 						sbuf->source ) ),
282 					"%s", _( "end of file" ) );
283 				return( -1 );
284 			}
285 
286 			to[bytes_read] = '\0';
287 			sbuf->chars_in_buffer += bytes_read;
288 		}
289 	}
290 
291 	return( 0 );
292 }
293 
294 /**
295  * VIPS_SBUF_REQUIRE:
296  * @sbuf: source to operate on
297  * @require: need this many characters
298  *
299  * Make sure at least @require characters are available for
300  * VIPS_SBUF_PEEK() and VIPS_SBUF_FETCH().
301  *
302  * Returns: 0 on success, -1 on read error or EOF.
303  */
304 
305 /**
306  * VIPS_SBUF_PEEK:
307  * @sbuf: source to operate on
308  *
309  * After a successful VIPS_SBUF_REQUIRE(), you can index this to get
310  * require characters of input.
311  *
312  * Returns: a pointer to the next requre characters of input.
313  */
314 
315 /**
316  * VIPS_SBUF_FETCH:
317  * @sbuf: source to operate on
318  *
319  * After a successful VIPS_SBUF_REQUIRE(), you can use this require times
320  * to fetch characters of input.
321  *
322  * Returns: the next input character.
323  */
324 
325 /**
326  * vips_sbuf_get_line:
327  * @sbuf: source to operate on
328  *
329  * Fetch the next line of text from @sbuf and return it. The end of
330  * line character (or characters, for DOS files) are removed, and the string
331  * is terminated with a null (`\0` character).
332  *
333  * Returns NULL on end of file or read error.
334  *
335  * If the line is longer than some arbitrary (but large) limit, it is
336  * truncated. If you need to be able to read very long lines, use the
337  * slower vips_sbuf_get_line_copy().
338  *
339  * The return value is owned by @sbuf and must not be freed. It
340  * is valid until the next get call to @sbuf.
341  *
342  * Returns: the next line of text, or NULL on EOF or read error.
343  */
344 const char *
vips_sbuf_get_line(VipsSbuf * sbuf)345 vips_sbuf_get_line( VipsSbuf *sbuf )
346 {
347 	int write_point;
348 	int space_remaining;
349 	int ch;
350 
351 	VIPS_DEBUG_MSG( "vips_sbuf_get_line:\n" );
352 
353 	write_point = 0;
354 	space_remaining = VIPS_SBUF_BUFFER_SIZE;
355 
356 	while( (ch = VIPS_SBUF_GETC( sbuf )) != -1 &&
357 		ch != '\n' &&
358 		space_remaining > 0 ) {
359 		sbuf->line[write_point] = ch;
360 		write_point += 1;
361 		space_remaining -= 1;
362 	}
363 	sbuf->line[write_point] = '\0';
364 
365 	/* If we hit EOF immediately, return EOF.
366 	 */
367 	if( ch == -1 &&
368 		write_point == 0 )
369 		return( NULL );
370 
371 	/* If the final char in the buffer is \r, this is probably a DOS file
372 	 * and we should remove that too.
373 	 *
374 	 * There's a chance this could incorrectly remove \r in very long
375 	 * lines, but ignore this.
376 	 */
377 	if( write_point > 0 &&
378 		sbuf->line[write_point - 1] == '\r' )
379 		sbuf->line[write_point - 1] = '\0';
380 	/* If we filled the output line without seeing \n, keep going to the
381 	 * next \n.
382 	 */
383 	if( ch != '\n' &&
384 		space_remaining == 0 ) {
385 		while( (ch = VIPS_SBUF_GETC( sbuf )) != -1 &&
386 			ch != '\n' )
387 			;
388 	}
389 
390 	VIPS_DEBUG_MSG( "    %s\n", sbuf->line );
391 
392 	return( (const char *) sbuf->line );
393 }
394 
395 /**
396  * vips_sbuf_get_line_copy:
397  * @sbuf: source to operate on
398  *
399  * Fetch the next line of text from @sbuf and return it. The end of
400  * line character (or characters, for DOS files) are removed, and the string
401  * is terminated with a null (`\0` character).
402  *
403  * The return result must be freed with g_free().
404  *
405  * This is slower than vips_sbuf_get_line(), but can work with lines of
406  * any length.
407  *
408  * Returns: the next line of text, or NULL on EOF or read error.
409  */
410 char *
vips_sbuf_get_line_copy(VipsSbuf * sbuf)411 vips_sbuf_get_line_copy( VipsSbuf *sbuf )
412 {
413 	static const unsigned char null = '\0';
414 
415 	VIPS_DEBUG_MSG( "vips_sbuf_get_line_copy:\n" );
416 
417 	GByteArray *buffer;
418 	int ch;
419 	char *result;
420 
421 	buffer = g_byte_array_new();
422 
423 	while( (ch = VIPS_SBUF_GETC( sbuf )) != -1 &&
424 		ch != '\n' ) {
425 		unsigned char c = ch;
426 
427 		g_byte_array_append( buffer, &c, 1 );
428 	}
429 
430 	/* Immediate EOF.
431 	 */
432 	if( ch == -1 &&
433 		buffer->len == 0 ) {
434 		VIPS_FREEF( g_byte_array_unref, buffer );
435 		return( NULL );
436 	}
437 
438 	/* If the character before the \n was \r, this is probably a DOS file
439 	 * and we should remove the \r.
440 	 */
441 	if( ch == '\n' &&
442 		buffer->len > 0 &&
443 		buffer->data[buffer->len - 1] == '\r' )
444 		g_byte_array_set_size( buffer, buffer->len - 1 );
445 
446 	g_byte_array_append( buffer, &null, 1 );
447 
448 	result = (char *) g_byte_array_free( buffer, FALSE );
449 
450 	VIPS_DEBUG_MSG( "    %s\n", result );
451 
452 	return( result );
453 }
454 
455 /**
456  * vips_sbuf_get_non_whitespace:
457  * @sbuf: source to operate on
458  *
459  * Fetch the next chunk of non-whitespace text from the source, and
460  * null-terminate it.
461  *
462  * After this, the next getc will be the first char of the next block of
463  * whitespace (or EOF).
464  *
465  * If the first getc is whitespace, stop instantly and return the empty
466  * string.
467  *
468  * If the item is longer than some arbitrary (but large) limit, it is
469  * truncated.
470  *
471  * The return value is owned by @sbuf and must not be freed. It
472  * is valid until the next get call to @sbuf.
473  *
474  * Returns: the next block of non-whitespace, or NULL on EOF or read error.
475  */
476 const char *
vips_sbuf_get_non_whitespace(VipsSbuf * sbuf)477 vips_sbuf_get_non_whitespace( VipsSbuf *sbuf )
478 {
479 	int ch;
480 	int i;
481 
482 	for( i = 0; i < VIPS_SBUF_BUFFER_SIZE &&
483 		!isspace( ch = VIPS_SBUF_GETC( sbuf ) ) &&
484 		ch != EOF; i++ )
485 		sbuf->line[i] = ch;
486 	sbuf->line[i] = '\0';
487 
488 	/* If we stopped before seeing any whitespace, skip to the end of the
489 	 * block of non-whitespace.
490 	 */
491 	if( !isspace( ch ) )
492 		while( !isspace( ch = VIPS_SBUF_GETC( sbuf ) ) &&
493 			ch != EOF )
494 			;
495 
496 	/* If we finally stopped on whitespace, step back one so the next get
497 	 * will be whitespace (or EOF).
498 	 */
499 	if( isspace( ch ) )
500 		VIPS_SBUF_UNGETC( sbuf );
501 
502 	return( (const char *) sbuf->line );
503 }
504 
505 /**
506  * vips_sbuf_skip_whitespace:
507  * @sbuf: source to operate on
508  *
509  * After this, the next getc will be the first char of the next block of
510  * non-whitespace (or EOF).
511  *
512  * Also skip comments, ie. from any '#' character to the end of the line.
513  *
514  * Returns: 0 on success, or -1 on EOF.
515  */
516 int
vips_sbuf_skip_whitespace(VipsSbuf * sbuf)517 vips_sbuf_skip_whitespace( VipsSbuf *sbuf )
518 {
519 	int ch;
520 
521 	do {
522 		ch = VIPS_SBUF_GETC( sbuf );
523 
524 		/* # skip comments too.
525 		 */
526 		if( ch == '#' ) {
527 			/* Probably EOF.
528 			 */
529 			if( !vips_sbuf_get_line( sbuf ) )
530 				return( -1 );
531 			ch = VIPS_SBUF_GETC( sbuf );
532 		}
533 	} while( isspace( ch ) );
534 
535 	VIPS_SBUF_UNGETC( sbuf );
536 
537 	return( 0 );
538 }
539