1 /* Buffered input from a source.
2 *
3 * J.Cupitt, 18/11/19
4 */
5
6 /*
7
8 This file is part of VIPS.
9
10 VIPS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU Lesser General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU Lesser General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 02110-1301 USA
24
25 */
26
27 /*
28
29 These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
30
31 */
32
33 /*
34 #define VIPS_DEBUG
35 */
36
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif /*HAVE_CONFIG_H*/
40 #include <vips/intl.h>
41
42 #include <stdio.h>
43 #include <ctype.h>
44 #include <stdlib.h>
45 #ifdef HAVE_UNISTD_H
46 #include <unistd.h>
47 #endif /*HAVE_UNISTD_H*/
48 #include <string.h>
49 #include <errno.h>
50 #include <sys/types.h>
51 #include <sys/stat.h>
52 #include <fcntl.h>
53 #include <unistd.h>
54
55 #include <vips/vips.h>
56 #include <vips/internal.h>
57 #include <vips/debug.h>
58
59 /**
60 * SECTION: sbuf
61 * @short_description: buffered read from a source
62 * @stability: Stable
63 * @see_also: <link linkend="libvips-foreign">foreign</link>
64 * @include: vips/vips.h
65 * @title: VipsSbuf
66 *
67 * #VipsSbuf wraps up a #VipsSource and provides a set of calls for
68 * text-oriented buffered reading. You can fetch lines of text, skip
69 * whitespace, and so on.
70 *
71 * It is useful for implementing things like CSV readers, for example.
72 */
73
74 G_DEFINE_TYPE( VipsSbuf, vips_sbuf, VIPS_TYPE_OBJECT );
75
76 static void
vips_sbuf_class_init(VipsSbufClass * class)77 vips_sbuf_class_init( VipsSbufClass *class )
78 {
79 VipsObjectClass *object_class = VIPS_OBJECT_CLASS( class );
80 GObjectClass *gobject_class = G_OBJECT_CLASS( class );
81
82 gobject_class->set_property = vips_object_set_property;
83 gobject_class->get_property = vips_object_get_property;
84
85 object_class->nickname = "sbuf";
86 object_class->description = _( "buffered source" );
87
88 VIPS_ARG_OBJECT( class, "input", 1,
89 _( "Input" ),
90 _( "Source to load from" ),
91 VIPS_ARGUMENT_REQUIRED_INPUT,
92 G_STRUCT_OFFSET( VipsSbuf, source ),
93 VIPS_TYPE_SOURCE );
94
95 }
96
97 static void
vips_sbuf_init(VipsSbuf * sbuf)98 vips_sbuf_init( VipsSbuf *sbuf )
99 {
100 sbuf->read_point = 0;
101 sbuf->chars_in_buffer = 0;
102 sbuf->input_buffer[0] = '\0';
103 }
104
105 /**
106 * vips_sbuf_new_from_source:
107 * @source: source to operate on
108 *
109 * Create a VipsSbuf wrapping a source.
110 *
111 * Returns: a new #VipsSbuf
112 */
113 VipsSbuf *
vips_sbuf_new_from_source(VipsSource * source)114 vips_sbuf_new_from_source( VipsSource *source )
115 {
116 VipsSbuf *sbuf;
117
118 g_assert( source );
119
120 sbuf = VIPS_SBUF( g_object_new( VIPS_TYPE_SBUF,
121 "input", source,
122 NULL ) );
123
124 if( vips_object_build( VIPS_OBJECT( sbuf ) ) ) {
125 VIPS_UNREF( sbuf );
126 return( NULL );
127 }
128
129 return( sbuf );
130 }
131
132 /**
133 * vips_sbuf_unbuffer:
134 * @sbuf: source to operate on
135 *
136 * Discard the input buffer and reset the read point. You must call this
137 * before using read or seek on the underlying #VipsSource class.
138 */
139 void
vips_sbuf_unbuffer(VipsSbuf * sbuf)140 vips_sbuf_unbuffer( VipsSbuf *sbuf )
141 {
142 /* We'd read ahead a little way -- seek backwards by that amount.
143 */
144 vips_source_seek( sbuf->source,
145 sbuf->read_point - sbuf->chars_in_buffer, SEEK_CUR );
146 sbuf->read_point = 0;
147 sbuf->chars_in_buffer = 0;
148 }
149
150 /* Returns -1 on error, 0 on EOF, otherwise bytes read.
151 */
152 static gint64
vips_sbuf_refill(VipsSbuf * sbuf)153 vips_sbuf_refill( VipsSbuf *sbuf )
154 {
155 gint64 bytes_read;
156
157 VIPS_DEBUG_MSG( "vips_sbuf_refill:\n" );
158
159 /* We should not discard any unread bytes.
160 */
161 g_assert( sbuf->read_point == sbuf->chars_in_buffer );
162
163 bytes_read = vips_source_read( sbuf->source,
164 sbuf->input_buffer, VIPS_SBUF_BUFFER_SIZE );
165 if( bytes_read == -1 )
166 return( -1 );
167
168 sbuf->read_point = 0;
169 sbuf->chars_in_buffer = bytes_read;
170
171 /* Always add a null byte so we can use strchr() etc. on lines. This is
172 * safe because input_buffer is VIPS_SBUF_BUFFER_SIZE + 1 bytes.
173 */
174 sbuf->input_buffer[bytes_read] = '\0';
175
176 return( bytes_read );
177 }
178
179 /**
180 * vips_sbuf_getc:
181 * @sbuf: source to operate on
182 *
183 * Fetch the next character from the source.
184 *
185 * If you can, use the macro VIPS_SBUF_GETC() instead for speed.
186 *
187 * Returns: the next char from @sbuf, -1 on read error or EOF.
188 */
189 int
vips_sbuf_getc(VipsSbuf * sbuf)190 vips_sbuf_getc( VipsSbuf *sbuf )
191 {
192 if( sbuf->read_point == sbuf->chars_in_buffer &&
193 vips_sbuf_refill( sbuf ) <= 0 )
194 return( -1 );
195
196 g_assert( sbuf->read_point < sbuf->chars_in_buffer );
197
198 return( sbuf->input_buffer[sbuf->read_point++] );
199 }
200
201 /**
202 * VIPS_SBUF_GETC:
203 * @sbuf: source to operate on
204 *
205 * Fetch the next character from the source.
206 *
207 * Returns: the next char from @sbuf, -1 on read error or EOF.
208 */
209
210 /**
211 * vips_sbuf_ungetc:
212 * @sbuf: source to operate on
213 *
214 * The opposite of vips_sbuf_getc(): undo the previous getc.
215 *
216 * unget more than one character is undefined. Unget at the start of the file
217 * does nothing.
218 *
219 * If you can, use the macro VIPS_SBUF_UNGETC() instead for speed.
220 */
221 void
vips_sbuf_ungetc(VipsSbuf * sbuf)222 vips_sbuf_ungetc( VipsSbuf *sbuf )
223 {
224 if( sbuf->read_point > 0 )
225 sbuf->read_point -= 1;
226 }
227
228 /**
229 * VIPS_SBUF_UNGETC:
230 * @sbuf: source to operate on
231 *
232 * The opposite of vips_sbuf_getc(): undo the previous getc.
233 *
234 * unget more than one character is undefined. Unget at the start of the file
235 * does nothing.
236 */
237
238 /**
239 * vips_sbuf_require:
240 * @sbuf: source to operate on
241 * @require: make sure we have at least this many chars available
242 *
243 * Make sure there are at least @require bytes of readahead available.
244 *
245 * Returns: 0 on success, -1 on error or EOF.
246 */
247 int
vips_sbuf_require(VipsSbuf * sbuf,int require)248 vips_sbuf_require( VipsSbuf *sbuf, int require )
249 {
250 g_assert( require < VIPS_SBUF_BUFFER_SIZE );
251 g_assert( sbuf->chars_in_buffer >= 0 );
252 g_assert( sbuf->chars_in_buffer <= VIPS_SBUF_BUFFER_SIZE );
253 g_assert( sbuf->read_point >= 0 );
254 g_assert( sbuf->read_point <= sbuf->chars_in_buffer );
255
256 VIPS_DEBUG_MSG( "vips_sbuf_require: %d\n", require );
257
258 if( sbuf->read_point + require > sbuf->chars_in_buffer ) {
259 /* Areas can overlap, so we must memmove().
260 */
261 memmove( sbuf->input_buffer,
262 sbuf->input_buffer + sbuf->read_point,
263 sbuf->chars_in_buffer - sbuf->read_point );
264 sbuf->chars_in_buffer -= sbuf->read_point;
265 sbuf->read_point = 0;
266
267 while( require > sbuf->chars_in_buffer ) {
268 unsigned char *to = sbuf->input_buffer +
269 sbuf->chars_in_buffer;
270 int space_available =
271 VIPS_SBUF_BUFFER_SIZE -
272 sbuf->chars_in_buffer;
273 size_t bytes_read;
274
275 if( (bytes_read = vips_source_read( sbuf->source,
276 to, space_available )) == -1 )
277 return( -1 );
278 if( bytes_read == 0 ) {
279 vips_error(
280 vips_connection_nick( VIPS_CONNECTION(
281 sbuf->source ) ),
282 "%s", _( "end of file" ) );
283 return( -1 );
284 }
285
286 to[bytes_read] = '\0';
287 sbuf->chars_in_buffer += bytes_read;
288 }
289 }
290
291 return( 0 );
292 }
293
294 /**
295 * VIPS_SBUF_REQUIRE:
296 * @sbuf: source to operate on
297 * @require: need this many characters
298 *
299 * Make sure at least @require characters are available for
300 * VIPS_SBUF_PEEK() and VIPS_SBUF_FETCH().
301 *
302 * Returns: 0 on success, -1 on read error or EOF.
303 */
304
305 /**
306 * VIPS_SBUF_PEEK:
307 * @sbuf: source to operate on
308 *
309 * After a successful VIPS_SBUF_REQUIRE(), you can index this to get
310 * require characters of input.
311 *
312 * Returns: a pointer to the next requre characters of input.
313 */
314
315 /**
316 * VIPS_SBUF_FETCH:
317 * @sbuf: source to operate on
318 *
319 * After a successful VIPS_SBUF_REQUIRE(), you can use this require times
320 * to fetch characters of input.
321 *
322 * Returns: the next input character.
323 */
324
325 /**
326 * vips_sbuf_get_line:
327 * @sbuf: source to operate on
328 *
329 * Fetch the next line of text from @sbuf and return it. The end of
330 * line character (or characters, for DOS files) are removed, and the string
331 * is terminated with a null (`\0` character).
332 *
333 * Returns NULL on end of file or read error.
334 *
335 * If the line is longer than some arbitrary (but large) limit, it is
336 * truncated. If you need to be able to read very long lines, use the
337 * slower vips_sbuf_get_line_copy().
338 *
339 * The return value is owned by @sbuf and must not be freed. It
340 * is valid until the next get call to @sbuf.
341 *
342 * Returns: the next line of text, or NULL on EOF or read error.
343 */
344 const char *
vips_sbuf_get_line(VipsSbuf * sbuf)345 vips_sbuf_get_line( VipsSbuf *sbuf )
346 {
347 int write_point;
348 int space_remaining;
349 int ch;
350
351 VIPS_DEBUG_MSG( "vips_sbuf_get_line:\n" );
352
353 write_point = 0;
354 space_remaining = VIPS_SBUF_BUFFER_SIZE;
355
356 while( (ch = VIPS_SBUF_GETC( sbuf )) != -1 &&
357 ch != '\n' &&
358 space_remaining > 0 ) {
359 sbuf->line[write_point] = ch;
360 write_point += 1;
361 space_remaining -= 1;
362 }
363 sbuf->line[write_point] = '\0';
364
365 /* If we hit EOF immediately, return EOF.
366 */
367 if( ch == -1 &&
368 write_point == 0 )
369 return( NULL );
370
371 /* If the final char in the buffer is \r, this is probably a DOS file
372 * and we should remove that too.
373 *
374 * There's a chance this could incorrectly remove \r in very long
375 * lines, but ignore this.
376 */
377 if( write_point > 0 &&
378 sbuf->line[write_point - 1] == '\r' )
379 sbuf->line[write_point - 1] = '\0';
380 /* If we filled the output line without seeing \n, keep going to the
381 * next \n.
382 */
383 if( ch != '\n' &&
384 space_remaining == 0 ) {
385 while( (ch = VIPS_SBUF_GETC( sbuf )) != -1 &&
386 ch != '\n' )
387 ;
388 }
389
390 VIPS_DEBUG_MSG( " %s\n", sbuf->line );
391
392 return( (const char *) sbuf->line );
393 }
394
395 /**
396 * vips_sbuf_get_line_copy:
397 * @sbuf: source to operate on
398 *
399 * Fetch the next line of text from @sbuf and return it. The end of
400 * line character (or characters, for DOS files) are removed, and the string
401 * is terminated with a null (`\0` character).
402 *
403 * The return result must be freed with g_free().
404 *
405 * This is slower than vips_sbuf_get_line(), but can work with lines of
406 * any length.
407 *
408 * Returns: the next line of text, or NULL on EOF or read error.
409 */
410 char *
vips_sbuf_get_line_copy(VipsSbuf * sbuf)411 vips_sbuf_get_line_copy( VipsSbuf *sbuf )
412 {
413 static const unsigned char null = '\0';
414
415 VIPS_DEBUG_MSG( "vips_sbuf_get_line_copy:\n" );
416
417 GByteArray *buffer;
418 int ch;
419 char *result;
420
421 buffer = g_byte_array_new();
422
423 while( (ch = VIPS_SBUF_GETC( sbuf )) != -1 &&
424 ch != '\n' ) {
425 unsigned char c = ch;
426
427 g_byte_array_append( buffer, &c, 1 );
428 }
429
430 /* Immediate EOF.
431 */
432 if( ch == -1 &&
433 buffer->len == 0 ) {
434 VIPS_FREEF( g_byte_array_unref, buffer );
435 return( NULL );
436 }
437
438 /* If the character before the \n was \r, this is probably a DOS file
439 * and we should remove the \r.
440 */
441 if( ch == '\n' &&
442 buffer->len > 0 &&
443 buffer->data[buffer->len - 1] == '\r' )
444 g_byte_array_set_size( buffer, buffer->len - 1 );
445
446 g_byte_array_append( buffer, &null, 1 );
447
448 result = (char *) g_byte_array_free( buffer, FALSE );
449
450 VIPS_DEBUG_MSG( " %s\n", result );
451
452 return( result );
453 }
454
455 /**
456 * vips_sbuf_get_non_whitespace:
457 * @sbuf: source to operate on
458 *
459 * Fetch the next chunk of non-whitespace text from the source, and
460 * null-terminate it.
461 *
462 * After this, the next getc will be the first char of the next block of
463 * whitespace (or EOF).
464 *
465 * If the first getc is whitespace, stop instantly and return the empty
466 * string.
467 *
468 * If the item is longer than some arbitrary (but large) limit, it is
469 * truncated.
470 *
471 * The return value is owned by @sbuf and must not be freed. It
472 * is valid until the next get call to @sbuf.
473 *
474 * Returns: the next block of non-whitespace, or NULL on EOF or read error.
475 */
476 const char *
vips_sbuf_get_non_whitespace(VipsSbuf * sbuf)477 vips_sbuf_get_non_whitespace( VipsSbuf *sbuf )
478 {
479 int ch;
480 int i;
481
482 for( i = 0; i < VIPS_SBUF_BUFFER_SIZE &&
483 !isspace( ch = VIPS_SBUF_GETC( sbuf ) ) &&
484 ch != EOF; i++ )
485 sbuf->line[i] = ch;
486 sbuf->line[i] = '\0';
487
488 /* If we stopped before seeing any whitespace, skip to the end of the
489 * block of non-whitespace.
490 */
491 if( !isspace( ch ) )
492 while( !isspace( ch = VIPS_SBUF_GETC( sbuf ) ) &&
493 ch != EOF )
494 ;
495
496 /* If we finally stopped on whitespace, step back one so the next get
497 * will be whitespace (or EOF).
498 */
499 if( isspace( ch ) )
500 VIPS_SBUF_UNGETC( sbuf );
501
502 return( (const char *) sbuf->line );
503 }
504
505 /**
506 * vips_sbuf_skip_whitespace:
507 * @sbuf: source to operate on
508 *
509 * After this, the next getc will be the first char of the next block of
510 * non-whitespace (or EOF).
511 *
512 * Also skip comments, ie. from any '#' character to the end of the line.
513 *
514 * Returns: 0 on success, or -1 on EOF.
515 */
516 int
vips_sbuf_skip_whitespace(VipsSbuf * sbuf)517 vips_sbuf_skip_whitespace( VipsSbuf *sbuf )
518 {
519 int ch;
520
521 do {
522 ch = VIPS_SBUF_GETC( sbuf );
523
524 /* # skip comments too.
525 */
526 if( ch == '#' ) {
527 /* Probably EOF.
528 */
529 if( !vips_sbuf_get_line( sbuf ) )
530 return( -1 );
531 ch = VIPS_SBUF_GETC( sbuf );
532 }
533 } while( isspace( ch ) );
534
535 VIPS_SBUF_UNGETC( sbuf );
536
537 return( 0 );
538 }
539